//#include <Winsock2.h>


#ifndef GLOBAL_DECL

#include "global.h"     // Global data types and variables

#endif


getter::getter(wsock* connect, config* setts, fileaccess* lgfile, statuswnd* stat)
{
    connection = connect;
    settings = setts;
    logfile = lgfile;
    status = stat;
}

bool getter::RetrieveURL(char* url, char* result, data_item* page_info, unsigned int link_no)
{
    char   out_path[MAXLEN_LFILENAME];          // Temporary filename storage

    char   in_path[MAXLEN_LFILENAME];           // Temporary filename storage

    bool   valid_url;                           // Flag to store whether or not the url is valid

    char   fname[MAXLEN_FILENAME];              // Filename of file to be saved

    char   error[MAXLEN_ERROR];                 // Error description

    char   extension[5];                        // File extension of file to be downloaded

    char   new_url[MAXLEN_URL];                 // Newly created URL

    char   description[MAXLEN_DESCRIPTION+10];  // Description of page

    char   filter[MAXLEN_FILTER];               // Subpage URL filter

    char   status_txt[800];                     // Status text to send to the status window

    bool   is_subpage = true;                   // Is this a subpage?

    bool   filter_valid;                        // Does the page match the filter?

    char   blank[5];                            // Used for integer to string conversion

    int    loop, loop2;                         // Loop variables

    FILE*  fp;                                  // File handle used to check if file exists


    sub_opts*       current_subopts;            // Current subpage options

    blockoptslist   pg_block_options;           // Copy of block options list to send to parser

    parser*         fileparse;                  // Pointer to an instance of the parser object



    // Check if main page - if so direct a pointer at the page's information

    if (page_info != NULL)
    {
        is_subpage = false;
        subpage_no = 0;
        current_page = page_info;
        // Store description for use with sub pages

        strcpy(description, page_info -> description);
        strcpy(last_description, description);
    }
    else    // Page is a subpage

    {
        // Restore description from main page

        strcpy(description, last_description);
        strcat(description, " - Sub Page ");

        // Loop through subpage filters

        current_page -> sub_options.move_first();
        current_subopts = current_page -> sub_options.get_item();

        // Check if URL matches one of the url filters

        filter_valid = false;

        while ((current_subopts != NULL) && (!filter_valid))
        {
            strcpy(filter, current_subopts -> filter);

            if ((filter[0] != NULL) && (strlen(url) > strlen(filter)))
            {
                for (loop2=0; loop2<(signed)(strlen(url)-strlen(filter)+1); loop2++)
                {
                    if (strnicmp(&(url[loop2]), filter, strlen(filter)) == 0)
                        filter_valid = true;
                }
            }
            else
                filter_valid = true;

            // Check if the link number is within the bounds of one of the link limits

            if (link_no < current_subopts -> first_link) filter_valid = false;
            if (link_no > current_subopts -> last_link)  filter_valid = false;

            // If suitable filter not found get the next one

            if (!filter_valid)
                current_subopts = current_page -> sub_options.get_item();
        }
    }

    // Sort out the URL - these can be a real mess!

    valid_url = url_splitter(url, new_url, error, is_subpage);

    // Generate filenames

    strcpy(fname, &new_url[7]);
    // Get rid of non-allowed characters

    for (loop=0; loop<(signed)strlen(fname); loop++)
        if ((fname[loop] == '/') || (fname[loop] == '?') ||
            (fname[loop] == ':') || (fname[loop] == '*') ||
            (fname[loop] == '?') || (fname[loop] == '\"') ||
            (fname[loop] == '>') || (fname[loop] == '|')     )
            fname[loop] = '.';
    strcpy(in_path,  "./source pages/");
    strcat(in_path,  fname);
    strcat(in_path,  ".htm");
    strcpy(out_path, "./converted pages/");
    strcat(out_path, fname);
    strcat(out_path, ".htm");


    // Download has been cancelled!

    if (settings -> cancel)
    {
        logfile -> lock();
        logfile -> write("Canc");
        logfile -> write( current_page -> description, 25 );
        logfile -> write( " = " );
        logfile -> write( new_url, 45 );
        logfile -> write( " Stopped - Download Cancelled" );
        logfile -> unlock();
        return false;
    }

    // Subpage does not match any of the subpage filters - do not download it

    if ((is_subpage) && (!filter_valid))
    {
        strcpy(result, new_url);
        logfile -> lock();
        logfile -> write( "\nSkip " );
        logfile -> write( current_page -> description, 25 );
        logfile -> write( " = " );
        logfile -> write( new_url, 45 );
        logfile -> write( " does not match subpage filters" );
        logfile -> unlock();
        return false;
    }
    
    // Check if file extension is one of the allowed extensions

    if (strlen(new_url) > 4)
    {
        strcpy(extension, &new_url[strlen(new_url) - 4]);   // Non-Allowed Extensions:

        if ((!stricmp(extension, ".zip") )  ||
            (!stricmp(extension, ".bmp") )  ||
            (!stricmp(extension, ".jpg") )  ||
            (!stricmp(extension, ".gif") )  ||
            (!stricmp(extension, ".tif") )  ||
            (!stricmp(extension, ".arj") )  ||
            (!stricmp(extension, ".tgz") )  ||
            (!stricmp(extension, ".tar") )  ||
            (!stricmp(extension, ".doc") )  ||
            (!stricmp(extension, ".rtf") )  ||
            (!stricmp(extension, ".ace") ) )
        {
            error_page("This page/file had an extension that is not allowed, you can get it at this url:", out_path, new_url);
            valid_url = false;
        }
    }


    if (!valid_url) // URL is invalid

    {
        error_page(error, out_path, new_url);
    }
    else
    {
        // Output filename used by calling function to create link to new page

        strcpy(result, fname);
        strcat(result, ".htm");

        // Increment subpage number (purely cosmetic)

        if (is_subpage)
            subpage_no++;

        // Check if page has already been downloaded

        fp = fopen( in_path, "r" );
        if ( fp == NULL )           // File does not exist - needs downloading

        {
            // Add page to status window

            if (is_subpage)
            {
                strcat(description, itoa(subpage_no, blank, 10));
            }
            strcpy(status_txt, "Downloading ");
            strcat(status_txt, description);
            status -> addline( status_txt );

            // Add page to logfile

            logfile -> lock();
            if (is_subpage)
            {
                logfile -> write( "\nSP" );
                if (subpage_no < 10)
                    logfile -> write( '0' );    
                logfile -> write( itoa(subpage_no, blank, 10) );
                logfile -> write( ' ' );
                logfile -> write( current_page -> description, 25 );
            }
            else
            {
                logfile -> write( "\nMain " );
                logfile -> write( current_page -> description, 25 );
            }
            logfile -> write( " = " );
            logfile -> write( new_url, 45 );
            logfile -> write( " Downloading." );
            logfile -> unlock();

            // Retrieve the page from the world wide web

            if ( !connection -> get_page( new_url, in_path ) )
            {
                strcpy(error, connection -> error);
                error_page(error, out_path, new_url);
            }
            else
            {
                // Create an instance of the parser object to parse the page

                fileparse = new parser(this, settings);
                
                // Parse the downloaded page

                if (is_subpage)
                {
                    // Make a copy of the relevant subpage block options

                    //pg_block_options = current_subopts -> block_options;


                    fileparse -> parse(in_path, out_path, new_url, description,
                                        &(current_subopts -> table_options), true,
                                        settings -> font_size,
                                        &(current_subopts -> block_options));
                }
                else
                {
                    fileparse -> parse(in_path, out_path, new_url, description,
                                        &(current_page -> table_options), false,
                                        settings -> font_size,
                                        &(current_page -> block_options));
                }

                delete fileparse;
            }
        }
        else
            fclose(fp);
    }

    return true;
}

bool getter::url_splitter(char* url, char* new_url, char* error, bool is_subpage)
{
    char *last_slash;
    char *first_slash;
    char domain[MAXLEN_DOMAIN];
    char path[MAXLEN_PATH];
    char old_path[MAXLEN_PATH];
    char filename[MAXLEN_FILENAME];
    bool has_domain = false;
    int loop;

    // Initialise the variables to blank values

    domain[0] = NULL;
    path[0] = NULL;
    filename[0] = NULL;
    old_path[0] = NULL;
    first_slash = NULL;
    last_slash = NULL;
    new_url[0] = NULL;

    strcpy(new_url, url);

    //IF url STARTS WITH "http://" THEN

    if (!strnicmp(url, "http://", 7))
        has_domain = true;
    else if (!strnicmp(url, "mailto:", 7))
    {
        strcpy(error,"This is a link to an e-mail address.");
        return false;
    }
    else if (strstr(url, ":") != NULL)
    {
        strcpy(error,"ERROR - Can only download pages using HTTP.");
        return false;
    }
    
    if (has_domain)
    {
        //url := RIGHT OF url AFTER AND NOT INCLUDING "http://"

        strcpy(url, &url[7]);
        //IF "/" NOT IN url THEN

        if (strchr(url, '/') == NULL)
        {
            //domain := url

            if (strlen(url) > MAXLEN_DOMAIN)
            {
                strcpy(error,"ERROR - Domain Name Too Long.");
                return false;
            }
            strcpy(domain, url);
            //path := "/"

            strcpy(path, "/");
        }
        else
        {
            //domain := LEFT OF url UP TO AND NOT INCLUDING FIRST "/"

            loop = 0;
            while ((url[loop] != '/') && (loop < MAXLEN_DOMAIN))
                domain[loop] = url[loop++];
            domain[loop] = NULL;
            if (loop == MAXLEN_DOMAIN)
            {
                strcpy(error,"ERROR - Domain Name Too Long.");
                return false;
            }
            //url := RIGHT OF url AFTER AND INCLUDING FIRST "/"

            first_slash = strchr(url, '/');
            strcpy(url, first_slash);
            //filename := RIGHT OF url AFTER AND NOT INCLUDING LAST "/"

            last_slash = strrchr(url, '/');
            strcpy(filename, last_slash + 1);
            //url := LEFT OF url UP TO AND INCLUDING LAST "/"

            url[last_slash - url + 1] = NULL;
            //path := url

            if (strlen(url) > MAXLEN_PATH)
            {
                strcpy(error,"ERROR - URL Path Too Long.");
                return false;
            }
            strcpy(path, url);
        }
    }
    else
    {
        //IF "/" NOT IN url THEN

        last_slash = strrchr(url, '/');
        if (last_slash == NULL)
        {
            //path := "/"

            strcpy(path, "/");
            //filename := url

            if (strlen(url) > MAXLEN_FILENAME)
            {
                strcpy(error,"ERROR - URL Filename Too Long.");
                return false;
            }
            strcpy(filename, url);
        }
        else
        {
            //filename := RIGHT OF url AFTER AND NOT INCLUDING LAST "/"

            if (strlen(last_slash + 1) > MAXLEN_FILENAME)
            {
                strcpy(error,"ERROR - URL Filename Too Long.");
                return false;
            }
            strcpy(filename, last_slash + 1);
            //url := LEFT OF url UP TO AND INCLUDING LAST "/"

            url[last_slash - url + 1] = NULL;
            //path := url

            if (strlen(url) > MAXLEN_PATH)
            {
                strcpy(error,"ERROR - URL Path Too Long.");
                return false;
            }
            strcpy(path, url);
            
        }
    }

    // Insert missing bits

    if ((domain[0] == NULL) && (is_subpage))    // Relative link to a subpage

    {
        // Use stored domain

        strcpy(domain, stored_domain);

        if ((path[0] != '/') || ((path[0] == '/') && (path[1] == NULL)) )
        {
            // Use stored path

            strcpy(old_path, stored_path);

            // Combine new path with stored path

            if (strlen(path) + strlen(old_path) > MAXLEN_PATH)
            {
                strcpy(error,"ERROR - URL Path Too Long.");
                return false;
            }
            strcat(old_path, path);
            strcpy(path, old_path);
        }
    }
    else if (is_subpage)    // Absolute link to subpage

    {
        if (stricmp(domain, stored_domain)) // Sub page is in different domain

        {
            strcpy(error,"ERROR - This sub page was not within the domain of the main page.");
            return false;
        }
    }
    else if ((domain[0] == NULL) && (!is_subpage))  // Relative link to main page??!?

    {
        strcpy(error,"ERROR - The URL specified is invalid.");
        return false;
    }
    else    // Absolute link to main page

    {
        strcpy(stored_domain, domain);  // Store domain

        strcpy(stored_path, path);      // Store path

    }

    // Re-construct filename from domain-path-filename parts

    if (7 + strlen(domain) + strlen(path) + strlen(filename) > MAXLEN_URL)
    {
        strcpy(error,"ERROR - URL Too Long.");
        return false;
    }
    strcpy(new_url, "http://" );
    strcat(new_url, domain    );
    strcat(new_url, path      );
    strcat(new_url, filename  );

    return true;
}


void getter::error_page(char* error_desc, char* fname, char* url)
{
    fileaccess outfile( fname, "w", settings );

    if ( !outfile.error )
    {
        // Write HTML headers

        outfile.write( "<HTML>\n" );
        outfile.write( "<BODY>\n<TITLE>ERROR</TITLE>\n<FONT SIZE=\"" );
        outfile.write( settings -> font_size );
        outfile.write( "\">" );

        // Write error description and link

        outfile.write( error_desc );
        outfile.write( "<BR><BR>The URL was: <A HREF=\"" );
        outfile.write( url );
        outfile.write( "\">" );
        outfile.write( url );
        outfile.write( "</A>" );

        // Write HTML footers

        outfile.write ( "\n<BR><BR><A HREF=\"javascript:history.back(1)\">Go back to previous page</A><BR>" );
        outfile.write( "<A HREF=\"mailto:webnewsspeak@cc.umist.ac.uk?subject=" );
        outfile.write( "Page or link error\">Report errors or dead links</A>\n" );
        outfile.write( "</FONT></B>\n" );
        outfile.write( "</BODY>\n" );
        outfile.write( "</HTML>" );
    }

    logfile -> lock();
    logfile -> write( "\nERR! " );
    logfile -> write( current_page -> description, 25 );
    logfile -> write( " = " );
    logfile -> write( url, 45 );
    logfile -> write( " " );
    logfile -> write( error_desc );
    logfile -> unlock();

}

syntax highlighted by Code2HTML, v. 0.8.11