#include <stdio.h>

#include <string.h>


#ifndef GLOBAL_DECL

#include "global.h"     // Global data types and variables

#endif



parser::parser(getter* caller, config* setts)   // Constructor

{
    settings = setts;
    supervisor  = settings -> supervisor;
    show_extern = settings -> show_extern;
    parent = caller;
}


// ******** Lexical Analyser *********


bool parser::get_lexeme(char* result)
{
    int         loop = 0, loop2 = 0;    // Loop variables

    char        new_char = NULL;        // New character read


    // On first call to function fill the buffer

    if (!buffer_init)
    {
        for (loop = 0; loop < BUFFER_SIZE; loop++)
            buffer[loop] = source -> read();
        buffer_init = true;
        head = 0;
    }

    // Check if the end of file has been reached and the buffer exhausted

    if ((buffer[head] == EOF) || (buffer[head] == NULL))
        return false;

    // Extra spaces and carriage returns before lexemes are ignored

    // Read characters until start of lexeme found

    while ((buffer[head] == ' ') || (buffer[head] == '\n'))
    {
        read_file();
    }

    // Search through to find end of lexeme

    loop = head;

    do
    {
        switch ( buffer[loop] )
        {
            case ' ':
            case '>':
            case '<':
            case '=':
            case '\"':
            case '\n':
            case '&':
            case ';':
            case NULL:
            case EOF:
                if (loop == head)   // lexeme is only 1 character..

                {
                    result[0] = buffer[loop];
                    result[1] = NULL;
                    read_file();
                    return true;
                }
                else                // End of lexeme found

                {
                    loop2 = 0;
                    do
                    {
                        result[loop2++] = buffer[head];
                        read_file();
                    } while (head != loop);
                    
                    result[loop2] = NULL;
                    return true;
                }
                break;
        }
        loop++;
        if (loop==BUFFER_SIZE) loop-=BUFFER_SIZE;
    } while ( loop != head );

    // No lexeme ending found! - Lexeme must be longer than the buffer.

    // Return entire buffer as a lexeme

    loop2 = 0;
    do
    {
        result[loop2++] = buffer[head];
        read_file();
    } while (loop2 < BUFFER_SIZE-1);
    
    result[loop2] = NULL;
    return true;

}

void parser::get_trigger()
{
    block_opts* current;

    current = pg_block_opts -> get_item();
    
    if (current != NULL)
        strcpy(trigger, current -> trigger_text);
    else
        trigger[0] = NULL;
}

void parser::setup_blocks()
{
    block_opts* current;

    trigger[0] = NULL;

    pg_block_opts -> move_first();
    current = pg_block_opts -> get_item();
    
    if (current != NULL)
    {
        strcpy(trigger, current -> trigger_text);
        if (current -> start_ignored)
            ignore_block();
    }
}

void parser::read_file()
{
    int loop, loop2 = 0;
    bool found = true;

    // Read a new character into the buffer

    buffer[head++] = source -> read();
    if (head==BUFFER_SIZE) head-=BUFFER_SIZE;
    
    // Search for start or end of ignored section

    // This is a hideously high overhead in terms of processing time

    if (trigger[0] != NULL)
    {
        loop = head;
        do
        {
            if (trigger[loop2++] != buffer[loop++])
                found = false;
            if (loop==BUFFER_SIZE) loop-=BUFFER_SIZE;
        } while ((loop2 < (signed)strlen(trigger)) && (found));

        if (found)
        {
            // Toggle between un/ignored

            if (ignored_block)
                unignore_block();
            else
                ignore_block();
            // Get next trigger

            get_trigger();
        }
    }
}

bool parser::get_character(char* result)
{
    if ((buffer[head] == EOF) || (buffer[head] == NULL))
        return false;
    *result = buffer[head];
    read_file();
    return true;
}

// **************** Parser ******************


ignore_type parser::get_table_opts(unsigned int index)
{
    table_opts* table;
    
    if (pg_table_opts == NULL) return Display;

    pg_table_opts -> move_first();
    table = pg_table_opts -> get_item();

    while (table != NULL)
    {
        if (    (table -> table_no == index)
             || (    (table -> ignore_range)
                  && (index >= table -> table_no)
                  && (index <= table -> table_range_end)
                )
           )
            return table -> ignore_table;
        table = pg_table_opts -> get_item();
    }

    return Display;
}

void parser::ignore_section()
{
    ignored_section = true;
    if (supervisor)
        destination -> write("</FONT><FONT SIZE=\"2\"><I>");
    else
        destination -> ignored_section = true;
}

void parser::unignore_section()
{
    ignored_section = false;
    if (supervisor)
    {
        destination -> write("</FONT></I><FONT SIZE=\"");
        destination -> write ( font_size );
        destination -> write("\">");
    }
    else
        destination -> ignored_section = false;
}

void parser::ignore_block()
{
    ignored_block = true;
    if (supervisor)
    {
        destination -> write("<BR><B>Start of Ignored Block</B><BR>");
        destination -> write("</FONT><FONT SIZE=\"2\"><I>");
    }
    else
        destination -> ignored_section = true;
}

void parser::unignore_block()
{
    ignored_block = false;
    if (supervisor)
    {
        destination -> write("</FONT></I><FONT SIZE=\"");
        destination -> write ( font_size );
        destination -> write("\"><BR><B>End of Ignored Block</B><BR>");
    }
    else
    {
        destination -> write("<BR>");
        destination -> ignored_section = false;
    }
}

bool parser::parse( char* fname_in, char* fname_out, char* url, char* description,
                    tableoptslist* pg_table_details, bool subpage, char* size,
                    blockoptslist* pg_block_details)
{
    // Open files

    source      = new fileaccess( fname_in,  "r", settings );
    destination = new fileaccess( fname_out, "w", settings );

    if ( (source -> error) || (destination -> error) )
        return false;

    // Set initial values for variables

    is_subpage      = subpage;
    buffer_init     = false;
    in_link         = false;
    frame_index     = 1;
    table_number    = 1;
    table_ignoring  = -1;
    ignored_section = false;
    ignored_block   = false;
    stack_index     = 0;
    link_no         = 1;
    pg_table_opts   = pg_table_details;
    pg_block_opts   = pg_block_details;
    
    strcpy(font_size, size);
    
    // Write HTML headers

    destination -> writeheaders(url, description);

    // Setup the block ignore policies

    setup_blocks();

    // Transfer control to first state

    ST_text();

    // Make sure you do not ignore the footer!

    unignore_block();

    // Write HTML footers

    destination -> writefooters(url);

    // Close files

    delete source;
    delete destination;

    return true;
}

// States:


bool parser::ST_text()                  // Initial state - Main text of page

{
    char current_lexeme[BUFFER_SIZE];

    do
    {
        // Call to lexical analyser

        if (!get_lexeme(current_lexeme))
            return false;               // EOF

        
        // Special character Non-Terminal symbol

        if (current_lexeme[0] == '&')       { if (!ST_special())    return false; }

        // Tag Non-Terminal symbol

        else if (current_lexeme[0] == '<')  { if (!ST_utag())       return false; }

        // Normal text

        else
        {
            // Output character

            destination -> write ( current_lexeme );
            destination -> write ( ' ' );
        }

    } while (true);

    return true;
}

bool parser::ST_utag()                  // Unknown Tag state

{
    char current_lexeme[BUFFER_SIZE];   // Lexeme currently being processed


    if (!get_lexeme(current_lexeme))
        return false;                   // EOF

    
    if ( !stricmp(current_lexeme, "TITLE") )   { if(!ST_title())        return false;}
    if ( !stricmp(current_lexeme, "BR") )      { if(!ST_break())        return false;}
    if ( !stricmp(current_lexeme, "!--") )     { if(!ST_comment())      return false;}
    if ( !stricmp(current_lexeme, "SCRIPT") )  { if(!ST_script())       return false;}
    if ( !stricmp(current_lexeme, "STYLE") )   { if(!ST_style())        return false;}
    if ( !stricmp(current_lexeme, "CAPTION") ) { if(!ST_caption())      return false;}
    if ( !stricmp(current_lexeme, "TABLE") )   { if(!ST_table())        return false;}
    if ( !stricmp(current_lexeme, "/TABLE") )  { if(!ST_tableend())     return false;}
    if ( !stricmp(current_lexeme, "TR") )      { if(!ST_tablerow())     return false;}
    if ( !stricmp(current_lexeme, "TD") )      { if(!ST_tablecol())     return false;}
    if ( !stricmp(current_lexeme, "TH") )      { if(!ST_tablecol())     return false;}
    if ( !stricmp(current_lexeme, "FRAME") )   { if(!ST_frame())        return false;}
    if ( !stricmp(current_lexeme, "NOFRAMES") ){ if(!ST_noframes())     return false;}
    if ( !stricmp(current_lexeme, "FRAMESET") ){ if(!ST_frameset())     return false;}
    if ( !stricmp(current_lexeme, "/A") )      { if(!ST_linkend())      return false;}
    
    if ( !stricmp(current_lexeme, "A") )
        { if(!ST_link())  return false; else return true; }
    if ( !stricmp(current_lexeme, "IMG") )
        { if(!ST_image()) return false; else return true; }

    do 
    {
        if (!get_lexeme(current_lexeme))
            return false;               // EOF

    } while (current_lexeme[0] != '>');

    return true;
}

bool parser::ST_special()               // Special character

{
    char current_lexeme[BUFFER_SIZE];   // Current lexeme

    char prev_lexeme[BUFFER_SIZE];      // Previous lexeme

    int loop;

    if (!get_lexeme(prev_lexeme))
        return false;                   // EOF


    if (prev_lexeme[0] == '<')
    {
        destination -> write("and ");
        if (!ST_utag())
            return false;
        else
            return true;
    }
    
    if (!get_lexeme(current_lexeme))
        return false;                   // EOF


    if (current_lexeme[0] == '<')
    {
        destination -> write("and ");
        destination -> write(prev_lexeme);
        destination -> write(' ');
        if (!ST_utag())
            return false;
        else
            return true;
    }
    else if ((current_lexeme[0] != ';'))// Not a special character after all!

    {   
        destination -> write("and ");
        destination -> write(prev_lexeme);
        destination -> write(' ');
        destination -> write(current_lexeme);
        destination -> write(' ');
        return true;
    }

    loop = 0;
    do
    {
        // Check through the list of special characters to see if it exists

        
        if (    (!strcmp(prev_lexeme, SpecialChars[loop].NE) )
             || ( (prev_lexeme[0] == '#') && (atoi(&prev_lexeme[1]) == SpecialChars[loop].code) )
           )
        {
            if ( SpecialChars[loop].text != NULL )
            {
                destination -> write( "<B>" );
                destination -> write( SpecialChars[loop].text );
                destination -> write( "</B>" );
                destination -> write(' ');
            }
            else
            {
                destination -> write( (char)SpecialChars[loop].code );
                destination -> write(' ');
            }
        }
        loop++;
    
    } while (SpecialChars[loop].NE != NULL);

    return true;
}

bool parser::ST_break()                 // Line break

{
    destination -> write("<BR>");
    return true;
}

bool parser::ST_linkend()               // End of link

{
    in_link = false;
    destination -> write("</A>");
    return true;
}

bool parser::ST_image()                 // Image

{
    char current_lexeme[BUFFER_SIZE];   // Lexeme currently being processed

    bool has_alt = false;
    char alt_text[MAXLEN_ALT];

    do
    {       
        if (!get_lexeme(current_lexeme))
            return false;   // EOF


        if ( !stricmp(current_lexeme, "ALT") )
        {
            has_alt = true;
            if(!ST_alt(alt_text)) return false;
            destination -> write(alt_text);
        }

    } while (current_lexeme[0] != '>');

    // If in link, showing externs, and no ALT tag, then insert "Image"

    if ((show_extern) && (in_link) && ((!has_alt) || (alt_text[0] == NULL)))
        destination -> write(" <B>Image</B> ");

    return true;
}

bool parser::ST_alt(char* text)         // Alternative textual description

{
    char current_lexeme[BUFFER_SIZE];   // Lexeme currently being processed

    bool first_char = true;             // Flag indicating first character in alt text


    if (!get_lexeme(current_lexeme))
        return false;                   // EOF


    if ( current_lexeme[0] != '=' )     // Unexpected symbol found, exit

        return true;

    if (!get_lexeme(current_lexeme))
        return false;                   // EOF


    if ( current_lexeme[0] != '\"' )    // Alt text has no quotes around it

    {
        strcpy(text, " <B>");
        strcat(text, "\"</B>");
        if (strlen(current_lexeme) < MAXLEN_ALT - 19)
            strcat(text, current_lexeme);
        else
            strncat(text, current_lexeme, MAXLEN_ALT - 19);
        strcat(text, "<B>\"</B> ");
        return true;
    }

    do
    {
        if (!get_lexeme(current_lexeme))
            return false;               // EOF

        if (current_lexeme[0] == '\"')
        {
            if (!first_char)
                strcat(text, "<B>\"</B> ");
            else
                text[0] = NULL;         // Why do people use empty alt tags???

            return true;
        }
        else if (first_char)
        {
            strcpy(text, " <B>");
            strcat(text, "\"</B>");
            first_char = false;
        }
        else
            strcat(text, " ");

        if (strlen(current_lexeme) + strlen(text) < MAXLEN_ALT - 19)
            strcat(text, current_lexeme);
        else
            strncat(text, current_lexeme, MAXLEN_ALT - strlen(text) - 19);
    } while (true);

    return true;
}

bool parser::ST_script()                // Script in page - remove completely

{
    char current_lexeme[BUFFER_SIZE];   // Lexeme currently being processed


    do
    {       
        if (!get_lexeme(current_lexeme))
            return false;               // EOF

    
    } while (stricmp(current_lexeme, "/SCRIPT"));

    return true;
}

bool parser::ST_style()                 // Style sheets in page - remove completely

{
    char current_lexeme[BUFFER_SIZE];   // Lexeme currently being processed


    do
    {       
        if (!get_lexeme(current_lexeme))
            return false;               // EOF

    
    } while (stricmp(current_lexeme, "/STYLE"));

    return true;
}

bool parser::ST_table()                 // Table

{
    char blank[5];
    int loop;
    
    if (!ignored_block)
    {
        table_stack[stack_index] = table_number;    // Remember for table end


        switch (get_table_opts(table_number))
        {
            case Display:                   // Table should just be printed normally

                destination -> write("<BR>");
                if (supervisor)
                {
                    destination -> write("<B>Table ");
                    if (stack_index > 0)    // Print list of nestings to ease setting up

                    {
                        destination -> write("( ");
                        for (loop = 0; loop<stack_index; loop++)
                        {
                            destination -> write(itoa(table_stack[loop], blank, 10));
                            if (loop<stack_index-1)
                                destination -> write(" -> ");
                        }
                        destination -> write(" ) -> ");
                    }
                    destination -> write(itoa(table_number, blank, 10));
                    destination -> write(" Not announced</B>");
                    destination -> write("<BR>");
                }
                break;

            case Ignore:                    // Table should be ignored

                if (!ignored_section)
                {
                    table_ignoring = table_number;
                    ignore_section();
                }
                if (supervisor)
                {
                    destination -> write("<BR>");
                    destination -> write("<B>Table ");
                    if (stack_index > 0)    // Print list of nestings to ease setting up

                    {
                        destination -> write("( ");
                        for (loop = 0; loop<stack_index; loop++)
                        {
                            destination -> write(itoa(table_stack[loop], blank, 10));
                            if (loop<stack_index-1)
                                destination -> write(" -> ");
                        }
                        destination -> write(" ) -> ");
                    }
                    destination -> write(itoa(table_number, blank, 10));
                    destination -> write(" Ignored.</B> ");
                }
                else
                    destination -> write("<BR>");
                break;
            
            case Announce:                  // Table should be announced

                destination -> write("<BR>");
                destination -> write("<B>Table ");
                if (supervisor)
                {
                    if (stack_index > 1)    // Print list of nestings to ease setting up

                    {
                        destination -> write("( ");
                        for (loop = 0; loop<stack_index-1; loop++)
                        {
                            destination -> write(itoa(table_number, blank, 10));
                            destination -> write(" -> ");
                        }
                        destination -> write(" ) -> ");
                    }
                    destination -> write(itoa(table_number, blank, 10));
                    destination -> write(" Announced ");
                }
                else
                    destination -> write("start.</B>");
                destination -> write("<BR>");
                break;
        }
    
        if (stack_index<STACK_DEPTH)
        {
            table_number++;
            stack_index++;
        }
    }
    return true;
}

bool parser::ST_tablerow()              // Table Row

{
    destination -> write("<BR>");
    return true;
}

bool parser::ST_tableend()              // Table End

{
    unsigned int    index;
    char            blank[5];

    if (!ignored_block)
    {
        if (stack_index>0)
            index = table_stack[--stack_index];
        else
            return true;    // More table ends than starts!


        switch(get_table_opts(index))
        {
        case Announce:
            destination -> write("<BR>");
            destination -> write("<B>End of table");
            if (supervisor)
            {
                destination -> write(" ");
                destination -> write(itoa(index, blank, 10));
                destination -> write(" Announced ");
            }
            destination -> write("</B>");
            destination -> write("<BR>");
            break;
        case Ignore:
            if (supervisor)
            {
                destination -> write("<BR>");
                destination -> write("<B>End of table ");
                destination -> write(itoa(index, blank, 10));
                destination -> write(" Ignored</B> ");
            }
            break;
        case Display:
            if (supervisor)
            {
                destination -> write("<BR>");
                destination -> write("<B>End of table ");
                destination -> write(itoa(index, blank, 10));
                destination -> write("</B> ");
            }
        }

        if (index == table_ignoring)
        {
            unignore_section();
        }
    }

    return true;
}

bool parser::ST_tablecol()              // Table Column

{
    destination -> write("  ");
    return true;
}

bool parser::ST_caption()               // Table caption

{
    char current_lexeme[BUFFER_SIZE];   // Lexeme currently being processed


    destination -> write( "<B> Caption is: </B>" );

    do
    {       
        if (!get_lexeme(current_lexeme))
            return false;               // EOF

    
    } while (current_lexeme[0] != '>');

    do
    {
        if (!get_lexeme(current_lexeme))
            return false;               // EOF

        
        if (current_lexeme[0] != '<')
        {
            destination -> write( current_lexeme );
            destination -> write( ' ' );
        }

    } while (current_lexeme[0] != '<');

    return true;
}

bool parser::ST_title()                 // Page title

{
    char current_lexeme[BUFFER_SIZE];   // Lexeme currently being processed


    destination -> write( "<B>Page Title: </B>" );

    do
    {       
        if (!get_lexeme(current_lexeme))
            return false;               // EOF

    
    } while (current_lexeme[0] != '>');

    do
    {
        if (!get_lexeme(current_lexeme))
            return false;               // EOF

        
        if (current_lexeme[0] != '<')
        {
            destination -> write( current_lexeme );
            destination -> write( ' ' );
        }

    } while (current_lexeme[0] != '<');
    
    destination -> write("<BR>");
    destination -> write("<BR>");

    return true;
}

bool parser::ST_comment()               // HTML comments

{
    char current_char = NULL;
    char prev_char = NULL;

    if (supervisor)
        destination -> write("<BR><B>COMMENT:</B> <!--");

    do
    {
        prev_char = current_char;

        if (!get_character(&current_char))
            return false;               // EOF


        if (supervisor)
        {
            while (current_char == '<')
            {
                destination -> write("&gt;");
                if (!get_character(&current_char))
                    return false;       // EOF

            }

            if (current_char != NULL)
                destination -> write( prev_char );
        }
    
    } while (!((current_char == '-') && (prev_char == '-')));

    if (supervisor)
        destination -> write(" <B>END COMMENT</B><BR>");

    return true;
}

bool parser::ST_link()                  // Link

{
    char current_lexeme[BUFFER_SIZE];   // Lexeme currently being processed

    char blank[5];

    in_link = true;

    do
    {       
        if (!get_lexeme(current_lexeme))
            return false;               // EOF


        if ( !stricmp(current_lexeme, "HREF") ){ if(!ST_href()) return false; }

    } while (current_lexeme[0] != '>');

    if (!ignored_section)
    {
        if ((!