#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <conio.h>
#include <mem.h>

/*
   Processes output from WORD_DB.C to mechanically solve a single
   substitution cryptogram.

   WORD_DB takes a list of words (through stdin) and creates:
      - via stdout a fixed field list of length, word, 'normalized' word
        and 'sorted' word.  Format is %02d %-30s %-30s %-30s (mind those
        spaces!)
      - to a file called DICTSTAT.DAT, a file in which the first line records
        the numbers of words encountered when it was last run, and each
        additonal line lists (in order) a word length and the number of words
        found of that length, in the format: length, nwords, e.g., 8,19461
        (19,461 words of length 8).  If all is good, the list of word counts
        should sum to the total from line one.

   This program takes two arguments, the 'dictionary' file, and the
   'DICTSTAT.DAT' that goes with it.  (The idea here is that dictionaries of
   multiple sizes can be prepared, and with a little creativity with file naming
   could all be used with this program.)

   IMPORTANT: The 'dictionary' file is assumed to be a sorted version of the
      stdout output from WORD_DB.  For example, say WORD_DB was invoked as:

         word_db < WORDS.LST > WORDDATA.TXT

      ... so far so good,  WORDDATA.TXT contains the stdout output. Now do the
      following:

         sort < WORDDATA.TXT > SORT_LEN.TXT

      Now SORT_LEN.TXT is usable as a 'dictionary' for this program.  The system
      call effectively sorts first by length, and then by word (alphabetically).


   MCV  2002
*/
 
#define FIELD_LEN      250 /* How long each name or value can be       */
#define MAXBUFSIZE    4096
#define MAX_NV_PAIRS   200 /* How many name=value pairs we can process */

#define LINELENGTH               128
#define TRUE                       1
#define FALSE                      0

/* I don't have a dictionary with words bigger than 28 characters.  If you do */
/* then adjust this number!                                                   */
#define MAXWORDLENGTH             30
#define LENGTHWIDTH                2
#define SPACES                     3
#define N_ENDCHARS                 2

#define MAX_WORDS              30000

#define DICTIONARYDB   "SORT_LEN.TXT"
#define STATISTICSDB   "DICTSTAT.DAT"
#define WORD_TAG       "scrambled"

/* On the local PC the current home page is Default.htm, but it is index.htm */
/* at the biz site.                                                          */

#define HOMEPAGE       "index.htm"

typedef struct stat_node {
   unsigned  count;
   long      offset;
} stat_rec;

stat_rec word_stats[MAXWORDLENGTH];
     
typedef struct word_node {
   char  *word;
   char  *normalized;
   char  *sorted;
   struct word_node *next;
} word_rec;

word_rec *word_list;
word_rec *answers;

typedef struct name_value_st
{
   char name[FIELD_LEN + 1];
   char value[FIELD_LEN + 1];
} name_value;

name_value nv_pairs[MAX_NV_PAIRS];

char *errors[] = {
/*  0 */ "Not used.  Error condition 0 indicates success."
/*  1 */,"Could not open input file to read."
/*  2 */,"Could not open output file to write."
/*  3 */,"Bad read from stdin"
/*  4 */,"Data conflicts between dictionary and statistics"
/*  5 */,"Memory allocation error"
/*  6 */,"Read error"
/*  7 */,"No data received!"
/*  8 */,"Input length not > 0!"
};

unsigned n_words;
char     dict_str[LINELENGTH];    
int      reclen;

void  add_jumble_html(void);
void  cleanup_word_rec(word_rec *);  
int   get_input(void); 
void  html_content(void);
void  html_start(char *);
void  html_header(int,char *);
void  html_text(char *);
void  html_end(void);       
void  load_nv_pair(char *,int);
void  main(void);
void  normalize(char *,char *);
void  play_jumble(char *);         
void  send_error(int,char *);
void  sort_str(char *,char *);
char *stblank(int);
void  strip_junk(char *); 
void  unescape_url(char *);
void  unpad(char *);  
char  x2c(char *);

/* ----------------------------------------------------- */
void load_nv_pair(char *t,int nv_entry)
{
   /* Assumes nv_pairs array is currently full of NULL characters */

   int   chars_processed = 0;
   char *src;
   char *dest;

   /* Get the part before the '=' sign */
   src  = t;
   dest = nv_pairs[nv_entry].name;
   while ((*src) && (*src != '=') && (chars_processed < FIELD_LEN))
   {
      /* Convert '+'s to spaces */
      if (*src == '+')
         *dest = ' ';
      else
         *dest = *src;

      dest++;
      src++;
      chars_processed++;
   }

   /* Skip the '=' character */
   if (*src == '=')
   {
      /* Get the part after the '=' character */

      src++;
      dest = nv_pairs[nv_entry].value;
      chars_processed = 0;
      while ((*src) && (*src != '=') && (chars_processed < FIELD_LEN))
      {
         /* Change a '+' to a ' ' */
         if (*src == '+')
            *dest = ' ';
         else
            *dest = *src;

         dest++;
         src++;
         chars_processed++;
      }
   }

   /* Now need to decode %XX characters from the two fields */
   unescape_url(nv_pairs[nv_entry].name);
   unescape_url(nv_pairs[nv_entry].value);
}
/* ----------------------------------------------------- */
void unescape_url(char *url)
{                                 
   /* Routine borrowed from examples supplied with the NCSA server */
   int x, y;

   for (x = 0, y = 0;url[y]; ++x, ++y)
   {
      if ((url[x] = url[y]) == '%')
      {
         url[x] = x2c(&url[y + 1]);
         y += 2;
      }
   }

   url[x] = '\0';
}
/* ----------------------------------------------------- */
char x2c(char *what)
{
   char digit;

   digit  = (char)(what[0] >= 'A' ? ((what[0] & 0xdf) - 'A') + 10 : (what[0] - '0'));
   digit *= (char)16;
   digit += (char)(what[1] >= 'A' ? ((what[1] & 0xdf) - 'A') + 10 : (what[1] - '0'));

   return(digit);
}
/* ----------------------------------------------------- */
void  html_content(void)
{
   printf("Content-type: text/html\r\n\r\n");
}
/* ----------------------------------------------------- */
void  html_start(char *s)
{
   printf("<HTML>\r\n");
   printf("<HEAD>\r\n");
   printf("<TITLE>%s</TITLE>\r\n",s);
   printf("</HEAD>\r\n");
   printf("<BODY>\r\n");
}
/* ----------------------------------------------------- */
void  html_header(int level,char *s)
{
   if ((level < 1) || (level > 6))
      level = 6;

   if (strlen(s) == 0)
      return;

   printf("<H%d>%s</H%d>\r\n",level,s,level);
}
/* ----------------------------------------------------- */
void  html_text(char *s)
{
   printf("%s\r\n",s);
}
/* ----------------------------------------------------- */
void  html_end(void)
{
   printf("</BODY>\r\n");
   printf("</HTML>\r\n");
}
/* ----------------------------------------------------- */
void send_error(int num,char *extra)
{
   char s[LINELENGTH + 1];

   sprintf(s,"%s (%d: %s)",
      errors[num],num, (extra != NULL ? extra : ""));

   html_content();
   html_start("ERROR:");
   html_text(s);
   html_end();
}
/* ----------------------------------------------------- */
int  get_input(void)
{
   int   n;
   int   got_data;
   char *ip_data = 0;
   int   ip_length = 0;
   char  buffer[MAXBUFSIZE];
   char *t;

   memset(buffer,'\0',(FIELD_LEN * 2) + 2);
   
   got_data = FALSE;

   t = getenv("REQUEST_METHOD");

   if (t)
   {
      if (strcmp(t,"POST") == 0)
      {
         t = getenv("CONTENT_LENGTH");
         if (t)
         {
            ip_length = atoi(t);
            ip_data   = stblank(ip_length);
            if (fread(ip_data, 1, ip_length, stdin) != (size_t)ip_length)
            {
               send_error(3,"get_input");
               return(0);
            }

            got_data = TRUE;
         }
      }
      else if (strcmp(t,"GET") == 0)
      {
         t = getenv("QUERY_STRING");
         if (t)
         {
            ip_length = strlen(t);
            ip_data   = stblank(ip_length);

            strcpy(ip_data,t);

            got_data = TRUE;
         }
      }
   }

   if (got_data == FALSE)
   {
      send_error(7,"get_input");
      return(0);
   }

   if (ip_length <= 0)
   {
      send_error(8,"get_input");
      return(0);
   }
   
   html_text("<BR>");

   n = 0;
   strcpy(buffer,ip_data);
   t = strtok(buffer,"&");
   while (t != NULL)
   {
      /* Decode and load the pair */
      load_nv_pair(t,n);
      n++;

      /* Move onto the next name=value pair */
      t = strtok(NULL,"&");
   }

   return(1);
}        
/* ----------------------------------------------------------- */
void sort_str(char *s,char *t)
{
   int  i, j, w;
   char c;

   // Assumes that user has allocated t


   strcpy(t,s);

   w = strlen(t);

   if (w < 2)
      return;

   for (i = 1;i < w;i++)
   {
      c = *(t + i);
      j = i;
      while (*(t + j - 1) > c)
      {
         *(t + j) = *(t + j - 1);
         j = j - 1;
         if (j == 0)
            break;
      }
      *(t + j) = c;
   }
}
/* ----------------------------------------------------------- */
char *stblank(int n)
{
   char *t;

   if ((t = (char *)calloc(n + 1, sizeof(char))) == NULL)
      send_error(2,"stblank()");

   return(t);
}
/* ----------------------------------------------------------- */
void cleanup_word_rec(word_rec *rec)
{
   word_rec *temp, *next;

   /* We want to cleanup the list EXCEPT FOR the first pointer, which */
   /* is our anchor for other parts of the program.                   */

   free(rec -> word);
   free(rec -> normalized);
   free(rec -> sorted);

   temp = rec -> next;
   while (temp != NULL)
   {
      next = temp -> next;

      free(temp -> word);
      free(temp -> normalized);
      free(temp -> sorted);

      free(temp);

      temp = next;
   }

   rec -> word       = stblank(1);
   rec -> normalized = stblank(1);
   rec -> sorted     = stblank(1);
   rec -> next       = NULL;

   return;
}
/* ----------------------------------------------------------- */
void gather_list(int len,word_rec *list)
{
   unsigned  count;
   long      offset;
   FILE     *dictionary;
   int       i;
   word_rec *temp;
   char      str[LINELENGTH];
   int       w;
   char      word[MAXWORDLENGTH]; 
   char      n_word[MAXWORDLENGTH];
   char      s_word[MAXWORDLENGTH];

   cleanup_word_rec(list);

   count  = word_stats[len].count;
   if (count == 0)
      return;

   offset = word_stats[len].offset;

   dictionary = fopen(dict_str,"rb");
   if (dictionary == NULL)
      send_error(1,dict_str);

   temp = list;

   fseek(dictionary,(long)(offset * (long)reclen),SEEK_SET);
   for (i = 0;i < (int)count;i++)
   {

      if (fread(str,reclen,1,dictionary) != 1)
         send_error(6,"word_rec");

      sscanf(str,"%d %s %s %s",
         &w,word,n_word,s_word);
      unpad(word);
      unpad(n_word);
      unpad(s_word);

      temp -> word       = stblank(len);
      temp -> normalized = stblank(len);
      temp -> sorted     = stblank(len);
      if (i == (int)(count - 1))
         temp -> next = NULL;
      else
         if ((temp -> next = (word_rec *)malloc(sizeof(word_rec))) == NULL)
            send_error(5,"word_rec");

      strcpy(temp -> word,      word  );
      strcpy(temp -> normalized,n_word);
      strcpy(temp -> sorted,    s_word);

      temp = temp -> next;
   }
}
/* ----------------------------------------------------------- */
int sorted_match(char *s)
{
   int       w;
   int       ctr;
   word_rec *temp;
   word_rec *match;   
   char      s_word[MAXWORDLENGTH];
    
   ctr = 0;

   sort_str(s,s_word);

   w = strlen(s_word);

   if (w == 0)
      return(ctr);

   gather_list(w,word_list);

   cleanup_word_rec(answers);

   temp = word_list;
   while (temp != NULL)
   {
      if (strcmp(s_word,temp -> sorted) == 0)
      {
         ctr++;

         if (ctr == 1)
         {
            match = answers;
         }
         else
         {
            if ((match -> next = (word_rec *)malloc(sizeof(word_rec))) == NULL)
               send_error(5,"answers");
            match = match -> next;
         }
         match -> next = NULL;

         match -> word       = stblank(w);
         match -> normalized = stblank(w);
         match -> sorted     = stblank(w);

         strcpy(match -> word,      temp -> word);
         strcpy(match -> normalized,temp -> normalized);
         strcpy(match -> sorted,    temp -> sorted);
      }

      temp = temp -> next;
   }

   /* Return number of matches */
   return(ctr);

}                                           
/* ----------------------------------------------------------- */
void unpad(char *s)
{
   int   i;
   int   w;

   w = strlen(s);
   if (w > 0)
   {
      i = (w - 1);
      while ((i >= 0) && (*(s + i) == ' '))
      {
         *(s + i) = '\0';
         i--;
      }
   }
}
/* ----------------------------------------------------------- */
void strip_junk(char *s)
{
   int   i;
   int   w;
   char  c;

   w = strlen(s);
   for (i = 0;i < w;i++)
   {
      c = *(s + i);

      if ((c == '\r') || (c == '\n'))
         *(s + i) = '\0';
   }
}
/* ----------------------------------------------------------- */
void normalize(char *s,char *t)
{
   int letters[26];
   int  i;
   int  w;
   int  ctr;
   char c;
   int  slot;

   /* Declare an array of flags for each of the 26 lowercase letters */
   for (i = 0;i < 26;i++)
      letters[i] = FALSE;

   w = strlen(s);

   ctr = 0;
   for (i = 0;i < w;i++)
   {
      c = *(s + i);

      if ((c < 'a') || (c > 'z'))
      {
         *(t + i) = c;
         continue;
      }

      slot = (int)(c - 'a');

      /* Map each letter found to its order of discovery in the given word. */
      /* e.g., in the word "dad", the 'd' is discovered 1st, so we assign   */
      /* it number 1.  'a' is discovered 2nd, so it gets 2.  When we find   */
      /* the next 'd', we have a map that tells us it matches the 1st       */
      /* letter discovered for this particular word.                        */

      if (letters[slot] == FALSE)
      {
         ctr++;
         letters[slot] = ctr;
      }

      /* We then build a 'normalized' word in which the letters are mapped  */
      /* to the lowercase alphabet according to order of discovery.  So 'a' */
      /* will represent the first letter discovered, 'b' the second, etc.   */
      
      *(t + i) = (char)('a' + (letters[slot] - 1));
   }

   *(t + w) = '\0';
}                                                                   
/* ----------------------------------------------------------- */
void play_jumble(char *s)
{
   int       w;
   int       n;
   word_rec *temp;          
   char      str[LINELENGTH + 1];
   int       ctr;

   /* word_list and answers are declared as global variables.  We  */
   /* need to provide them some initial values so that the cleanup */
   /* routines won't get tripped up.                               */
   
   if ((word_list = (word_rec *)malloc(sizeof(word_rec))) == NULL)
      send_error(5,"word_rec");

   word_list -> word       = stblank(1);
   word_list -> normalized = stblank(1);
   word_list -> sorted     = stblank(1);
   word_list -> next       = NULL;

   if ((answers   = (word_rec *)malloc(sizeof(word_rec))) == NULL)
      send_error(5,"answers");

   answers   -> word       = stblank(1);
   answers   -> normalized = stblank(1);
   answers   -> sorted     = stblank(1);
   answers   -> next       = NULL;

   unpad(s);
   w = strlen(s);

   if (w == 0)
   {
      html_text("Empty string!<BR><BR>");
      return;
   }

   ctr = 0;

   n = sorted_match(s);
   if (n == 0)
      html_text("Sorry, no solution found<BR><BR>");
   else
   {
      temp = answers;
      while (temp != NULL)
      {
         ctr++;
         sprintf(str,"%2d: %s<BR>",ctr,temp->word);
         html_text(str);
         temp = temp -> next;
      }
   }

   return;
}
/* ----------------------------------------------------------- */
void main(void)
{      
   FILE     *f1;       
   char      str[LINELENGTH + 1];
   int       i;
   long      total;
   int       length;
   unsigned  count;
   int       n;
   char      buffer[MAXBUFSIZE];   
   char      puzzle_word[MAXWORDLENGTH];

   for (i = 0;i < MAX_NV_PAIRS;i++)
   {
      strcpy(nv_pairs[i].name, "");
      strcpy(nv_pairs[i].value,"");
   }

   html_content();
   html_start("Solving the Jumble");


   if (!get_input())
      exit(EXIT_FAILURE);

   strcpy(puzzle_word,"");

   n = 0;
   while (strlen(nv_pairs[n].name) != 0)
   {
      if (strcmp(nv_pairs[n].name,WORD_TAG) == 0)
         strcpy(puzzle_word,nv_pairs[n].value);

      n++;
   }

   strcpy(dict_str,DICTIONARYDB);

   f1 = fopen(STATISTICSDB,"rt");
   if (f1 == NULL)
      send_error(1,STATISTICSDB);

   fgets(str,LINELENGTH,f1);
   sscanf(str,"%u",&n_words);

   for (i = 0;i < MAXWORDLENGTH;i++)
   {
      word_stats[i].count  = 0;
      word_stats[i].offset = 0;
   }

   total = (unsigned)0;
   while (fgets(str,LINELENGTH,f1) != NULL)
   {
      if (sscanf(str,"%d,%u",&length,&count) != 2)
         continue;

      if ((length < 0) || (length >= MAXWORDLENGTH))
         continue;

      word_stats[length].count  = count;
      word_stats[length].offset = total;

      total += (long)count;
   }
   fclose(f1);

   
   if ((unsigned)n_words != (unsigned)total)
      send_error(4,"Word counts do not match");


   /* reclen is the length of each dictionary record.  Each word record */
   /* consists of a LENGTHWIDTH integer, 3 MAXWORDLENGTH permutations   */
   /* of the word (the word itself and its normalized and sorted        */
   /* versions, SPACES spaces separating these fields, and N_ENDCHARS   */
   /* end characters (\r and \n I guess).                               */

   reclen = LENGTHWIDTH + (3 * MAXWORDLENGTH) + SPACES + N_ENDCHARS;

   sprintf(buffer,"Unscrambling: %s<BR>",puzzle_word);

   html_header(2,buffer);

   play_jumble(puzzle_word);
   
   html_end();

   add_jumble_html();


   exit(EXIT_SUCCESS);
}
/* ----------------------------------------------------------- */
void add_jumble_html()
{                 
   printf("<FORM ACTION=\"jmbl_cgi.exe\" METHOD=POST>\r\n");
   printf("<p><font face=\"Verdana\">Enter a scrambled word: </font>&nbsp;<INPUT NAME=scrambled SIZE=20 MAXLENGTH=40>&nbsp;&nbsp;&nbsp;&nbsp;\r\n");
   printf("<INPUT TYPE=submit VALUE=\"Unscramble!\" name=\"unscramble\"> </p>\r\n");
   printf("</FORM>\r\n");
   printf("<p>&nbsp;</p>\r\n");
   printf("<p>&nbsp;</p>\r\n");
   printf("<p><i><font face=\"Verdana\" size=\"2\"><a href=\"../%s\">Back to Mark Van Dine, LLC Home</a></font></i></p>\r\n",
      HOMEPAGE);
}
/* ----------------------------------------------------------- */
