/*
  fmw.c - Find Multiple Words anagram utility.

  Jason Hood, 7 to 11 August, 2006.

  Replacement for the MULTI utility in the WORDY and YAWL packages.  This
  version is a lot quicker and doesn't have duplicated combinations.  It
  does, however, sort by length; -s will sort each word in the anagram,
  which can then be piped into sort, if an alphabetical listing is desired.
*/

#define PVERS "1.00"
#define PDATE "11 August, 2006"


#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#if defined( _WIN32 ) || (defined( __MSDOS__ ) && !defined( __DJGPP__ ))
#include <io.h>
#else
#include <unistd.h>
#endif

#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif

//#include <stdbool.h>
#define bool  char
#define true  1
#define false 0

#if defined( __DJGPP__ )
char** __crt0_glob_function( char* dummy ) { return 0; }
#elif defined( __MINGW32__ )
int _CRT_glob = 0;
#endif

typedef unsigned char uchar;
typedef   signed char schar;


#define MAX_WORD 100	// UKACD 1.7 has an 89-character phrase

struct
{
  int  words;		// number of words in the anagram
  int  min;		// minimum word length to consider
  int  max;		// maximum word length to consider
  bool sort;		// sort it?
  bool len;		// were word lengths given?
  char sep;		// character to separate each word
  char name[512];	// filename of the word list
}
option =
{
  -1,			// find any number of words
  1,			// all words
  MAX_WORD,		// all words (probably)
  false,		// leave sorted by length
  false,		// no lengths
  ' ',                  // use space to separate words
  ""			// WORD.LST, but in the same directory as this program
};


// Create my own character map.
#define L  1		// letter
#define D  3		// treat digits as letters for this program
#define I  4		// leading (initial) annotation
#define T  8		// trailing annotation
#define A 12		// leading and trailing annotation
uchar eofmap[257] =
{
  0,					// EOF
  A,A,A,A, A,A,A,A, A,A,I,A, A,A,A,A,	// 00-0F newline
  A,A,A,A, A,A,A,A, A,A,A,A, A,A,A,A,	// 10-1F
  A,I,A,A, A,A,A,0, A,A,A,A, A,A,I,A,	// 20-2F exclamation, apostrophe, period
  D,D,D,D, D,D,D,D, D,D,A,A, A,A,A,I,	// 30-3F digits, question mark
  A,L,L,L, L,L,L,L, L,L,L,L, L,L,L,L,	// 40-4F uppercase
  L,L,L,L, L,L,L,L, L,L,L,A, A,A,A,A,	// 50-5F uppercase
  A,L,L,L, L,L,L,L, L,L,L,L, L,L,L,L,	// 60-6F lowercase
  L,L,L,L, L,L,L,L, L,L,L,A, A,A,A,A,	// 70-7F lowercase
  L,L,L,L, L,L,L,L, L,L,L,L, L,L,L,L,	// 80-8F regard all extended-ASCII
  L,L,L,L, L,L,L,L, L,L,L,L, L,L,L,L,	// 90-9F  as letters
  L,L,L,L, L,L,L,L, L,L,L,L, L,L,L,L,	// A0-AF
  L,L,L,L, L,L,L,L, L,L,L,L, L,L,L,L,	// B0-BF
  L,L,L,L, L,L,L,L, L,L,L,L, L,L,L,L,	// C0-CF
  L,L,L,L, L,L,L,L, L,L,L,L, L,L,L,L,	// D0-DF
  L,L,L,L, L,L,L,L, L,L,L,L, L,L,L,L,	// E0-EF
  L,L,L,L, L,L,L,L, L,L,L,L, L,L,L,L,	// F0-FF
};
uchar* const charmap = eofmap + 1;
#define is_alpha( c ) (charmap[c] & L)
#define is_digit( c ) (charmap[c] & D)
#define is_lead( c )  (charmap[c] & I)
#define is_trail( c ) (charmap[c] & T)

uchar casemap[256] =	// translate upper to lower to ignore case
{
  "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
  "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
  "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F"
  "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F"
  "\x40"     "abcdefghijklmnopqrstuvwxyz"     "\x5B\x5C\x5D\x5E\x5F"
  "\x60"     "abcdefghijklmnopqrstuvwxyz"     "\x7B\x7C\x7D\x7E\x7F"
  "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F"
  "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F"
  "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
  "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"
  "\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF"
  "\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF"
  "\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF"
  "\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF"
};


int   letlen;			// number of letters to anagram
schar letcnt[256];		// the anagram letter counts
uchar fndlet[MAX_WORD]; 	// matched letters to restore the count
int   fndlen;			// number of matched letters

struct wlist			// linked list of words
{
  struct wlist* next;
  int	 len;
  char*  word;
  uchar  bare[2];		// dynamically allocated (NUL for both strings)
};

struct wlist* words[MAX_WORD];	// head of each list of word lengths
struct wlist* tails[MAX_WORD];	// tail of each list of word lengths
int    minlen;			// the shortest word actually read

struct wlist* multi[MAX_WORD];	// the words found
int    wordlen[2][MAX_WORD];	// desired lengths of each word


void help( bool ver )
#ifdef __GNUC__
__attribute__((noreturn))
#endif
;

char* get_list_path( const char* argv0 );
int   process_options( int argc, char* argv[] );
void  process_letters( uchar* pat );
bool  match_anagram( uchar* word );
void  create( FILE* list );
int   read_word( FILE* list, char* word, uchar* bare );
void  build( int len, int cnt, struct wlist* w );
void  display( int cnt );


int main( int argc, char* argv[] )
{
  FILE* list;
  int	set;

  set = process_options( argc, argv );
  if (set == 0)
  {
    puts( "Error: missing letters." );
    return 1;
  }

  list = NULL;
  if (*option.name == '\0')
  {
    if (!isatty( 0 ))
    {
      list = stdin;
    }
    else
    {
      strcpy( get_list_path( argv[0] ), "word.lst" );
    }
  }
  else if (option.name[0] == '-' && option.name[1] == '\0')
  {
    list = stdin;
  }
  if (list == NULL)
  {
    list = fopen( option.name, "r" );
    if (list == NULL)
    {
      printf( "Error: unable to open \"%s\".\n", option.name );
      return 1;
    }
  }

  process_letters( (uchar*)argv[set] );
  create( list );
  build( letlen, 0, words[0] );

  fclose( list );

  return 0;
}


// Find all the words that can be made from any of the letters.
void create( FILE* list )
{
  char	 word[MAX_WORD];
  uchar  bare[MAX_WORD];
  int	 len, lens[MAX_WORD], max = 0;
  struct wlist* w ;

  minlen = MAX_WORD;
  memset( lens, 0, sizeof(lens) );
  while ((len = read_word( list, word, bare )) != 0)
  {
    if (match_anagram( bare ))
    {
      w = malloc( sizeof(struct wlist) + len + strlen( word ) );
      if (w == NULL)
      {
	puts( "Error: out of memory." );
	exit( 1 );
      }
      w->next = NULL;
      w->len  = len;
      w->word = (char*)w->bare + len+1;
      memcpy( w->bare, bare, len+1 );
      strcpy( w->word, word );
      if (words[len] == NULL)
      {
	words[len] = w;
      }
      else
      {
	tails[len]->next = w;
      }
      tails[len] = w;
      ++lens[len];

      if (len < minlen)
      {
	minlen = len;
      }
      if (len > max)
      {
	max = len;
      }

      do
      {
	++letcnt[fndlet[--fndlen]];
      } while (fndlen != 0);
    }
  }

  if (max == 0)
  {
    exit( 0 );
  }

  // Check we at least have the number of each length.
  if (option.len)
  {
    for (len = option.words; len >= 0; --len)
    {
      lens[0] = 1;
      while (len > 0 && wordlen[1][len-1] == wordlen[1][len])
      {
	++lens[0];
	--len;
      }
      if (lens[wordlen[1][len]] < lens[0])
      {
	exit( 0 );
      }
    }
  }

  // Combine all the separate lists into one.
  words[0] = words[minlen];
  w	   = tails[minlen];
  for (len = minlen+1; len <= max; ++len)
  {
    if (words[len] == NULL)
    {
      continue;
    }
    w->next = words[len];
    w = tails[len];
  }
}


// Recursively search the list for words.
// len: the remaining letters in the set
// cnt: the position of the word
// w:	the head of the list
void build( int len, int cnt, struct wlist* w )
{
  int fnd;

  if (option.len)
  {
    if (w->len < wordlen[1][cnt])
    {
      w = words[wordlen[1][cnt]];
    }
  }
  // The last word must use all remaining letters.
  else if (cnt == option.words || len == minlen)
  {
    if (w->len < len)
    {
      w = words[len];
    }
  }
  for (; w; w = w->next)
  {
    if (option.len && w->len > wordlen[1][cnt])
    {
      break;
    }
    if (len != w->len)
    {
      if (w->len > len) 		// ran out of letters
      {
	break;
      }
      if (len - w->len < minlen)	// matching this word would not leave
      { 				//  enough letters for the next, so
	w = words[len]; 		//  this must use all the remaining
	if (!w)
	{
	  break;
	}
      }
    }
    fnd = fndlen;
    if (match_anagram( w->bare ))
    {
      multi[cnt] = w;
      len -= w->len;
      if (len == 0)
      {
	if (option.words == -1 || cnt == option.words)
	{
	  display( cnt );
	}
      }
      else if (w->len > len)
      {
	do
	{
	  ++letcnt[fndlet[--fndlen]];
	} while (fndlen != fnd);
	break;
      }
      else
      {
	build( len, cnt+1, w );
      }
      do
      {
	++letcnt[fndlet[--fndlen]];
      } while (fndlen != fnd);
      len += w->len;
    }
  }
}


// Display the words, sorting as necessary.
void display( int cnt )
{
  int m, j;
  struct wlist*  w[MAX_WORD];
  struct wlist** W;

  if (option.sort)
  {
    // Simple bubble sort to sort the words alphabetically.
    memcpy( w, multi, sizeof(struct wlist*) * (cnt+1) );
    for (j = cnt; j > 0; --j)
    {
      for (m = j-1; m >= 0; --m)
      {
	if (strcmp( (char*)w[m]->bare, (char*)w[j]->bare ) > 0)
	{
	  struct wlist* t = w[m];
	  w[m] = w[j];
	  w[j] = t;
	}
      }
    }
    W = w;
  }
  else
  {
    W = multi;
  }

  if (option.len)
  {
    if (!option.sort)
    {
      memcpy( w, multi, sizeof(struct wlist*) * (cnt+1) );
    }
    // Stable "selection" sort to put the words in the right length order.
    for (m = cnt; m > 0; --m)
    {
      if (w[m]->len != wordlen[0][m])
      {
	for (j = m-1; j >= 0; --j)
	{
	  if (w[j]->len == wordlen[0][m])
	  {
	    struct wlist* t = w[j];
	    memcpy( w+j, w+j+1, (m - j) * sizeof(struct wlist*) );
	    w[m] = t;
	    break;
	  }
	}
      }
    }
    W = w;
  }

  for (m = 0; m < cnt; ++m)
  {
    printf( "%s%c", W[m]->word, option.sep );
  }
  puts( W[m]->word );
}


// Read a word from the file LIST.  WORD is the word as it exists in the file
// (without annotations); BARE has punctuation removed and is made lowercase.
// Returns the length of BARE, 0 if no more words.
int read_word( FILE* list, char* word, uchar* bare )
{
  int	 len;
  int	 c;
  uchar* w;

  do
  {
    w	= (uchar*)word;
    len = 0;
    do
    {
      c = getc( list );
    } while (is_lead( c ));
    if (c == EOF)
    {
      return len;
    }
    do
    {
      *w++ = c;
      if (is_alpha( c ))
      {
	bare[len++] = casemap[c];
      }
      c = getc( list );
    } while (c != '\n' && c != EOF);

    if ((char*)w - word >= MAX_WORD-1)
    {
      puts( "Error: word too long (buffer overflow)." );
      exit( 1 );
    }

    while (is_trail( *--w ))
    {
      //if (is_alpha( *w ))		// trail and alpha don't overlap
      //{				//  in this program
      //  --len;
      //}
    }
  } while (len < option.min || len > option.max);

  w[1] = bare[len] = '\0';

  return len;
}


// Count the letters.
void process_letters( uchar* let )
{
  int max;
  int j, k;

  for (; *let; ++let)
  {
    if (*let == '.' || *let == '?')
    {
      *let = '?';
    }
    else if (!is_alpha( *let ))
    {
      continue;
    }
    *let = casemap[*let];
    ++letcnt[*let];
    ++letlen;
  }

  if (option.len)
  {
    // Put the word lengths in order using a bubble sort.
    memcpy( wordlen[1], wordlen[0], (option.words + 1) * sizeof(int) );
    max = 0;
    for (j = option.words; j > 0; --j)
    {
      for (k = j-1; k >= 0; --k)
      {
	if (wordlen[1][k] > wordlen[1][j])
	{
	  int t = wordlen[1][k];
	  wordlen[1][k] = wordlen[1][j];
	  wordlen[1][j] = t;
	}
      }
      max += wordlen[1][j];
    }
    max += wordlen[1][j];
    if (max != letlen)
    {
      printf( "Error: word lengths (%d) don't agree with letters (%d).\n",
	      max, letlen );
      exit( 1 );
    }
    option.min = wordlen[1][0];
    option.max = wordlen[1][option.words];
  }
  else
  {
    max = letlen - (option.min * ((option.words == -1) ? 1 : option.words));
    if (option.max > max)
    {
      option.max = max;
    }
  }
}


// Determine if all of the letters in WORD can be found in the anagram set.
// Precondition: WORD is at least one letter.
bool match_anagram( uchar* word )
{
  int fnd = fndlen;
  int c;

  do
  {
    c = *word;
    if (letcnt[c] == 0)
    {
      if (letcnt['?'] == 0)
      {
	while (fndlen != fnd)
	{
	  ++letcnt[fndlet[--fndlen]];
	}
	return false;
      }
      c = '?';
    }
    --letcnt[c];
    fndlet[fndlen++] = c;
  } while (*++word);

  return true;
}


int process_options( int argc, char* argv[] )
{
  int arg;
  int set = 0;

  if (argc == 1 || strcmp( argv[1], "--help" ) == 0)
  {
    help( false );
  }
  if (strcmp( argv[1], "--version" ) == 0)
  {
    help( true );
  }

  for (arg = 1; arg < argc; ++arg)
  {
    if (argv[arg][0] == '-' || argv[arg][0] == '/')
    {
      int j;
      for (j = 1; argv[arg][j]; ++j)
      {
	switch (argv[arg][j])
	{
	  case '?': help( false );

	  case 'f':
	  {
	    char* name;
	    if (argv[arg][j+1] == '\0')
	    {
	      if (argv[arg+1] == NULL)
	      {
		puts( "Error: missing filename." );
		exit( 1 );
	      }
	      name = argv[++arg];
	    }
	    else
	    {
	      name = argv[arg]+j+1;
	    }
	    if (*name == '=')
	    {
	      strcpy( get_list_path( argv[0] ), name+1 );
	    }
	    else
	    {
	      strcpy( option.name, name );
	    }
	  }
	  goto next;

	  case 's': option.sort = true; break;
	  case 't': option.sep  = '\t'; break;

	  case '/': break;

	  default:
	    printf( "Error: unknown option '%c'.\n", argv[arg][j] );
	    exit( 1 );
	  break;
	}
      }
    }
    else if (is_digit( (uchar)*argv[arg] ))
    {
      int   len[2];
      char* end;
      len[0] = (int)strtol( argv[arg], &end, 10 );
      if (len[0] < 100)
      {
	if (*end == '\0')
	{
	  if (set == 0)
	  {
	    option.min = len[0];
	  }
	  else
	  {
	    option.words = len[0] - 1;
	  }
	  continue;
	}
	if (*end == '=' || *end == '-' || *end == '+')
	{
	  if (end[1] == '\0')
	  {
	    if (*end == '=')
	    {
	      option.min = option.max = len[0];
	    }
	    else if (*end == '+')
	    {
	      option.min = len[0];
	    }
	    else // (*end == '-')
	    {
	      option.max = len[0];
	    }
	    continue;
	  }
	  if (is_digit( (uchar)end[1] ))
	  {
	    if (set == 0)
	    {
	      len[1] = (int)strtol( end+1, &end, 10 );
	      if (len[1] < 100 && *end == '\0')
	      {
		option.min = len[0];
		option.max = len[1];
		continue;
	      }
	    }
	    else
	    {
	      wordlen[0][0] = len[0];
	      option.words = 0;
	      do
	      {
		wordlen[0][++option.words] = (int)strtol( end+1, &end, 10 );
	      } while (*end == '-' || *end == '+' || *end == '=');
	      option.len = true;
	      continue;
	    }
	  }
	}
      }
      set = arg;
    }
    else
    {
      set = arg;
    }
  next: ;
  }

  return set;
}


// Determine the default path of the word list.  For Windows and DOS it is the
// same directory as the executable; for Unix it is /usr/share/dict/.
// Fills option.name with the path and returns a pointer to its end.
char* get_list_path( const char* argv0 )
{
#if defined( _WIN32 ) || defined( __MSDOS__ )
  char* path;
  char* name;
# ifdef _WIN32
  GetModuleFileName( NULL, option.name, sizeof(option.name) );
# else
  strcpy( option.name, argv0 );
# endif
  for (path = name = option.name; *path; ++path)
  {
    if (*path == '\\' || *path == '/' || *path == ':')
    {
      name = path + 1;
    }
  }
  //*name = '\0';               // not needed at the moment
  return name;
#else
  strcpy( option.name, "/usr/share/dict/" );
  return option.name + 16;
#endif
}


void help( bool ver )
{
  puts( "Find Multiple Words by Jason Hood <jadoxa@yahoo.com.au>.\n"
	"Version "PVERS" ("PDATE").  Freeware.\n"
	"http://fw.adoxa.cjb.net/"
      );
  if (!ver)
  puts( "\n"
	"Extract words out of a set of letters.\n"
	"\n"
	"fmw [-f list] [-st] [range] letters [words]\n"
	"\n"
	"-f list   use LIST for the word list (\"-\" for standard input)\n"
	"-s        sort the words alphabetically\n"
	"-t        use a tab to separate the words\n"
	"range     the minimum and maximum word lengths to consider:\n"
	"  N-      N or fewer letters\n"
	"  N=      exactly N letters\n"
	"  N[+]    N or more letters\n"
	"  N-M     between N and M letters, inclusive\n"
	"letters   the set of letters to search ('.' or '?' are wild)\n"
	"words     the number of words in the set (which can be 1), or\n"
	"  1-2-... the length of each word in the set\n"
	"\n"
	"The default word list is taken from standard input if it is redirected,\n"
	"otherwise from "
#if defined( _WIN32 ) || defined( __MSDOS__ )
		       "WORD.LST in the same directory as the executable.\n"
#else
		       "/usr/share/dict/word.lst.\n"
#endif
	"If LIST starts with '=', it will be substituted with the above path."
      );

  exit( 0 );
}
