/*
  fw.c - Find Words: word-based pattern matching.

  Jason Hood, 23 July to 12 August, 2006.

  A single program to replace most of those in the WORDY package.
  In addition, it will (not) ignore case and punctuation.

  Todo:
  * allow sets and digits within anagrams;
  * allow multiple files;
  * allow AND and OR operations (AND is indirectly supported via piping).
*/

#define PVERS "1.00"
#define PDATE "12 August, 2006"


#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#if defined( _WIN32 ) || (defined( __MSDOS__ ) && !defined( __DJGPP__ ))
#include <io.h>
#else
#include <unistd.h>
#endif

#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif

//#include <stdbool.h>
#define bool  char
#define true  1
#define false 0

#if defined( __DJGPP__ )
char** __crt0_glob_function( char* dummy ) { return 0; }
#elif defined( __MINGW32__ )
int _CRT_glob = 0;
#endif

typedef unsigned char uchar;
typedef   signed char schar;


#define MAX_WORD 100	// UKACD 1.7 has an 89-character phrase

struct
{
  int  min;		// minimum word length to consider
  int  max;		// maximum word length to consider
  bool range;		// were the above specified?
  bool unique;		// should digits match unique letters?
  bool literal; 	// should dot and digits be matched literally?
  bool ignore;		// should case be ignored?
  bool balance; 	// should anagram balances be displayed?
  bool not;		// succeed when the pattern fails?
  int  diff;		// find slightly different words?
  int  sort;		// sort by length?
  char name[512];	// filename of the word list
}
option =
{
  1,			// all words
  MAX_WORD,		// all words (probably)
  false,		// no range specified
  true, 		// digits should match unique letters
  false,		// dot and digits are wild
  true, 		// ignore case
  false,		// don't display anagram balances
  false,		// match the pattern
  0,			// pattern matching
  0,			// don't sort by length
  ""			// WORD.LST, but in the same directory as this program
};


// Create my own character map.
#define L  1		// letter, including extended-ASCII and sometimes
			//  punctuation and digits
#define D  2		// digit
#define I  4		// leading (initial) annotation
#define T  8		// trailing annotation
#define A 12		// leading and trailing annotation
uchar eofmap[257] =
{
  0,					// EOF
  A,A,A,A, A,A,A,A, A,A,I,A, A,A,A,A,	// 00-0F newline
  A,A,A,A, A,A,A,A, A,A,A,A, A,A,A,A,	// 10-1F
  A,I,A,A, A,A,A,0, A,A,A,A, A,A,I,A,	// 20-2F exclamation, apostrophe, period
  D,D,D,D, D,D,D,D, D,D,A,A, A,A,A,I,	// 30-3F digits, question mark
  A,L,L,L, L,L,L,L, L,L,L,L, L,L,L,L,	// 40-4F uppercase
  L,L,L,L, L,L,L,L, L,L,L,A, A,A,A,A,	// 50-5F uppercase
  A,L,L,L, L,L,L,L, L,L,L,L, L,L,L,L,	// 60-6F lowercase
  L,L,L,L, L,L,L,L, L,L,L,A, A,A,A,A,	// 70-7F lowercase
  L,L,L,L, L,L,L,L, L,L,L,L, L,L,L,L,	// 80-8F regard all extended-ASCII
  L,L,L,L, L,L,L,L, L,L,L,L, L,L,L,L,	// 90-9F  as letters
  L,L,L,L, L,L,L,L, L,L,L,L, L,L,L,L,	// A0-AF
  L,L,L,L, L,L,L,L, L,L,L,L, L,L,L,L,	// B0-BF
  L,L,L,L, L,L,L,L, L,L,L,L, L,L,L,L,	// C0-CF
  L,L,L,L, L,L,L,L, L,L,L,L, L,L,L,L,	// D0-DF
  L,L,L,L, L,L,L,L, L,L,L,L, L,L,L,L,	// E0-EF
  L,L,L,L, L,L,L,L, L,L,L,L, L,L,L,L,	// F0-FF
};
uchar* const charmap = eofmap + 1;
#define set_alpha( c ) charmap[c] |= L
#define is_alpha( c ) (charmap[c] & L)
#define is_digit( c ) (charmap[c] & D)
#define is_lead( c )  (charmap[c] & I)
#define is_trail( c ) (charmap[c] & T)

uchar casemap[256] =	// translate upper to lower to ignore case
{
  "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
  "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
  "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F"
  "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F"
  "\x40"     "abcdefghijklmnopqrstuvwxyz"     "\x5B\x5C\x5D\x5E\x5F"
  "\x60"     "abcdefghijklmnopqrstuvwxyz"     "\x7B\x7C\x7D\x7E\x7F"
  "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F"
  "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F"
  "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
  "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"
  "\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF"
  "\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF"
  "\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF"
  "\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF"
};


#define MAX_STARS 16	 // maximum number of unknown sequences

struct
{
  int	 pats;		 // number of individual patterns, less one (# of stars)
  int	 len[MAX_STARS]; // length of each
  uchar* pat[MAX_STARS]; // pattern of each
  int	 length;	 // sum of all lengths
  int	 infix; 	 // length of all the "inner" patterns
  int	 suffix;	 // length of the last pattern
  bool	 dig[MAX_STARS]; // does this pattern contain digits?
  bool	 digits;	 // does the overall pattern contain digits?
  uchar  digit[10];	 // letter corresponding to digit
  bool	 seen[32];	 // has this letter been seen?
  int	 anagram;	 // length of anagram
  schar  anapat[256];	 // the anagram letter counts
  uchar  ananot[32];	 // the letters not allowed in anagram wildcards
  bool	 exact; 	 // exactly anagram letters and no more
}
pattern;

#define MAX_SETS 31	 // control characters, without NUL
uchar set[MAX_SETS][32]; // 32 bytes * 8 bits = 256 characters
int   sets;
#define IDX( c ) ((c) >> 3)			// character to "byte"
#define BIT( c ) (1 << ((c) & 7))		// character to "bit"
#define SET( c, s ) s[IDX(c)] |= BIT(c) 	// add character to the set
#define TST( c, s ) (s[IDX(c)] & BIT(c))	// is character in the set?

schar  count[256];	// count of letters matching the anagram
schar* cntbeg;		// pointer to lowest letter
int    cntlen;		// letter count
schar* anabeg;

struct wlist			// linked list of words
{
  struct wlist* next;
  char	 word[1];		// dynamically allocated
};

struct wlist* words[MAX_WORD];	// head of each list of word lengths
struct wlist* tails[MAX_WORD];	// tail of each list of word lengths


void help( char type )
#ifdef __GNUC__
__attribute__((noreturn))
#endif
;

char*  get_list_path( const char* argv0 );
int    process_options( int argc, char* argv[] );
void   process_pattern( uchar* pat );
uchar* process_set( uchar* pat, uchar* s );
bool   match_pattern( const uchar* pat, uchar* word );
bool   match_infix( uchar* word, int len );
bool   match_anagram( uchar* word, int len );
void   match( FILE* list );
void   diff( FILE* list );
int    read_word( FILE* list, uchar* word, uchar* bare );
void   make_balance( char* word, uchar* bare, int len );
void   insert( char* word, int len );
void   display( void );


int main( int argc, char* argv[] )
{
  FILE* list;
  int	pat;
  int	c;

  pat = process_options( argc, argv );
  if (pat == 0)
  {
    if (!option.range || option.diff)
    {
      puts( "Error: missing pattern." );
      return 1;
    }
  }

  list = NULL;
  if (*option.name == '\0')
  {
    if (!isatty( 0 ))
    {
      list = stdin;
    }
    else
    {
      strcpy( get_list_path( argv[0] ), "word.lst" );
    }
  }
  else if (option.name[0] == '-' && option.name[1] == '\0')
  {
    list = stdin;
  }
  if (list == NULL)
  {
    list = fopen( option.name, "r" );
    if (list == NULL)
    {
      printf( "Error: unable to open \"%s\".\n", option.name );
      return 1;
    }
  }

  if (!option.ignore)
  {
    for (c = 0; c < 256; ++c)
    {
      casemap[c] = c;
    }
  }
  if (option.literal)
  {
    set_alpha( '.' );
    for (c = '0'; c <= '9'; ++c)
    {
      set_alpha( c );
      charmap[c] &= ~D;
    }
  }

  if (pat)
  {
    process_pattern( (uchar*)argv[pat] );
  }
  if (option.diff)
  {
    diff( list );
  }
  else
  {
    match( list );
    display();
  }

  fclose( list );

  return 0;
}


// Match each word in file LIST against the pattern.
void match( FILE* list )
{
  uchar word[MAX_WORD], bare[MAX_WORD];
  int	len;
  bool	bal = (pattern.anagram && option.balance);

  if (option.not)
  {
    int min = option.min;
    if (!option.range && pattern.pats)
    {
      option.min = 1;
    }
    while ((len = read_word( list, word, bare )) != 0)
    {
      if (len >= min)
      {
	if (pattern.digits)
	{
	  memset( pattern.digit, 0, sizeof(pattern.digit) );
	  if (option.unique)
	  {
	    memset( pattern.seen, false, sizeof(pattern.seen) );
	  }
	}
	if (pattern.len[0])
	{
	  if (match_pattern( pattern.pat[0], bare ))
	  {
	    continue;
	  }
	}
	if (pattern.suffix)
	{
	  if (match_pattern( pattern.pat[pattern.pats], bare+len-pattern.suffix ))
	  {
	    continue;
	  }
	}
	if (pattern.infix)
	{
	  if (match_infix( bare, len ))
	  {
	    continue;
	  }
	}
	if (pattern.anagram)
	{
	  if (match_anagram( bare, len ))
	  {
	    continue;
	  }
	}
      }
      insert( (char*)word, len );
    }
  }
  else
  {
    while ((len = read_word( list, word, bare )) != 0)
    {
      if (pattern.digits)
      {
	memset( pattern.digit, 0, sizeof(pattern.digit) );
	if (option.unique)
	{
	  memset( pattern.seen, false, sizeof(pattern.seen) );
	}
      }
      if (pattern.len[0])
      {
	if (!match_pattern( pattern.pat[0], bare ))
	{
	  continue;
	}
      }
      if (pattern.suffix)
      {
	if (!match_pattern( pattern.pat[pattern.pats], bare+len-pattern.suffix ))
	{
	  continue;
	}
      }
      if (pattern.infix)
      {
	if (!match_infix( bare, len ))
	{
	  continue;
	}
      }
      if (pattern.anagram)
      {
	if (!match_anagram( bare, len ))
	{
	  continue;
	}
      }
      if (bal)
      {
	make_balance( (char*)word, bare, len );
      }
      insert( (char*)word, len );
    }
  }
}


// Display the anagram balances.  Use '-' to indicate letters that have not
// been used and '+' to indicate letters that have replaced wildcards.
void make_balance( char* word, uchar* bare, int len )
{
  bool minus = false, plus = false;
  int  wlen, pos;
  int  c, d;

  pos = wlen = strlen( word );
  for (c = 0; c < 256; ++c)
  {
    if (count[c] > 0)
    {
      if (!minus)
      {
	word[pos++] = '\t';
	if (wlen < 8)
	{
	  word[pos++] = '\t';
	}
	word[pos++] = '-';
	minus = true;
      }
      do
      {
	word[pos++] = c;
      } while (--count[c] != 0);
    }
  }

  // Sort the letters.
  for (c = len - 1; c > 0; --c)
  {
    for (d = c - 1; d >= 0; --d)
    {
      if (bare[d] > bare[c])
      {
	int t = bare[c];
	bare[c] = bare[d];
	bare[d] = t;
      }
    }
  }

  for (c = 0; c < len; ++c)
  {
    if (bare[c])
    {
      if (!minus)
      {
	word[pos++] = '\t';
	if (wlen < 8)
	{
	  word[pos++] = '\t';
	}
	minus = true;
      }
      if (!plus)
      {
	word[pos++] = '+';
	plus = true;
      }
      word[pos++] = bare[c];
    }
  }

  if (pos >= MAX_WORD)
  {
    puts( "Error: buffer overflow." );
    exit( 1 );
  }

  word[pos] = '\0';
}


// Display the word or insert it into the appropriate length list.
void insert( char* word, int len )
{
  struct wlist* w;

  if (option.sort == 0 || (option.sort == 1 && len == option.min)
		       || (option.sort == 2 && len == option.max))
  {
    puts( word );
    return;
  }

  w = malloc( sizeof(struct wlist) + strlen( word ) );
  if (w == NULL)
  {
    display();
    w = malloc( sizeof(struct wlist) + strlen( word ) );
    if (w == NULL)
    {
      puts( "Error: out of memory." );
      exit( 1 );
    }
  }
  w->next = NULL;
  strcpy( w->word, word );
  if (words[len] == NULL)
  {
    words[len] = w;
  }
  else
  {
    tails[len]->next = w;
  }
  tails[len] = w;
}


// Display all the words found (so far) and release the memory.
void display( void )
{
  int len, end, delta;
  struct wlist* w;

  if (option.sort == 0)
  {
    return;
  }
  if (option.sort == 1)
  {
    len = option.min + 1;
    end = option.max + 1;
    delta = 1;
  }
  else
  {
    len = option.max - 1;
    end = option.min - 1;
    delta = -1;
  }

  for (; len != end; len += delta)
  {
    for (w = words[len]; w;)
    {
      struct wlist* t = w->next;
      puts( w->word );
      free( w );
      w = t;
    }
  }
}


// Find words in file LIST that are slightly different to the pattern.
// Precondition: only words of the same length are read.
void diff( FILE* list )
{
  uchar  word[MAX_WORD], bare[MAX_WORD];
  int	 len;
  int	 diff;

  while ((len = read_word( list, word, bare )) != 0)
  {
    diff = 0;
    if (pattern.anagram)
    {
      memcpy( cntbeg, anabeg, cntlen );
    }
    while (--len >= 0)
    {
      if (bare[len] != pattern.pat[0][len] && pattern.pat[0][len] != '?')
      {
	if (pattern.anagram)
	{
	  if (count[bare[len]] == 0)
	  {
	    if (count['?'] == 0)
	    {
	      goto next;
	    }
	    --count['?'];
	  }
	  else
	  {
	    --count[bare[len]];
	  }
	}
	if (++diff > option.diff)
	{
	  goto next;
	}
      }
    }
    if (diff == option.diff)
    {
      puts( (char*)word );
    }
  next: ;
  }
}


// Read a word from the file LIST; WORD contains the word as it is in the file
// (without annotations); BARE contains just the (lowercase) letters and it is
// NOT NUL-terminated.	Returns the length of BARE, 0 if no more words.
int read_word( FILE* list, uchar* word, uchar* bare )
{
  uchar* wrd;
  int	 len;
  int	 c;

  do
  {
    wrd = word;
    len = 0;
    do
    {
      c = getc( list );
    } while (is_lead( c ));
    if (c == EOF)
    {
      return len;
    }
    do
    {
      *wrd++ = c;
      if (is_alpha( c ))
      {
	bare[len++] = casemap[c];
      }
      c = getc( list );
    } while (c != '\n' && c != EOF);

    if (wrd - word >= MAX_WORD-1)
    {
      puts( "Error: word too long (buffer overflow)." );
      exit( 1 );
    }

    while (is_trail( *--wrd ))
    {
      if (is_alpha( *wrd ))
      {
	--len;
      }
    }
  } while (len < option.min || len > option.max);

  wrd[1] = '\0';

  return len;
}


// Determine the attributes of the pattern.
void process_pattern( uchar* pat )
{
  int	min, wild;
  uchar amin = 255, amax = 0;

#define ANA( c ) \
  if (c < amin)  \
  {		 \
    amin = c;	 \
  }		 \
  if (c > amax)  \
  {		 \
    amax = c;	 \
  }

  pattern.pat[0] = pat;
  min = wild = 0;

  if (option.diff)
  {
    for (; *pat; ++pat)
    {
      if (*pat == ';' || *pat == ':')
      {
	if (pat == pattern.pat[0])
	{
	  puts( "Error: pattern required in order to find difference." );
	  exit( 1 );
	}
	break;
      }
      if (*pat == '.')
      {
	if (!option.literal)
	{
	  *pat = '?';
	}
      }
      else if (*pat == '*' || *pat == '[')
      {
	puts( "Error: only '.' or '?' allowed when finding differences." );
	exit( 1 );
      }
      else
      {
	*pat = casemap[*pat];
      }
      ++min;
    }
    option.min = option.max = min;
    if (option.diff > min)
    {
      option.diff = min;
    }

    if (*pat == ';' || *pat == ':')
    {
      while (*++pat)
      {
	if (*pat == '*' || *pat == '[' || *pat == '=' || *pat == '/')
	{
	  puts( "Error: only '.' or '?' allowed in difference anagrams." );
	  exit( 1 );
	}
	if (*pat == '.')
	{
	  *pat = '?';
	}
	else
	{
	  *pat = casemap[*pat];
	}
	++pattern.anagram;
	++pattern.anapat[*pat];
	ANA( *pat );
      }
      if (pattern.anagram < option.diff)
      {
	pattern.anapat['?'] += option.diff - pattern.anagram;
	ANA( '?' );
      }
      if (pattern.anapat['?'] >= option.diff)
      {
	pattern.anagram = 0;
      }
      anabeg = pattern.anapat + amin;
      cntbeg = count + amin;
      cntlen = amax - amin + 1;
    }

    return;
  }

  if (*pat == '!')
  {
    option.not = true;
    ++pat;
  }
  for (; *pat; ++pat)
  {
    if (*pat == ';' || *pat == ':')
    {
      break;
    }
    if (*pat == '*')
    {
      if (pattern.pats == MAX_STARS)
      {
	puts( "Error: pattern contains too many components." );
	exit( 1 );
      }
      *pat = '\0';
      pattern.pat[++pattern.pats] = pat + 1;
    }
    else
    {
      if (is_alpha( *pat ))
      {
	*pat = casemap[*pat];
      }
      else if (*pat == '.' || *pat == '?')
      {
	*pat = '?';
	++wild;
      }
      else if (is_digit( *pat ))
      {
	pattern.digits = pattern.dig[pattern.pats] = true;
      }
      else if (*pat == '[')
      {
	uchar* e;
	if (sets == MAX_SETS)
	{
	  puts( "Error: too many sets." );
	  exit( 1 );
	}
	e = process_set( pat, set[sets++] );
	*pat = sets;
	strcpy( (char*)pat+1, (char*)e+1 );
      }
      else
      {
	set_alpha( *pat );
      }
      ++pattern.len[pattern.pats];
      ++min;
    }
  }
  pattern.length = min;
  pattern.suffix = (pattern.pats == 0) ? 0 : pattern.len[pattern.pats];
  pattern.infix  = min - (pattern.len[0] + pattern.suffix);

  if (pattern.pats == 0)
  {
    if (min)
    {
      option.min = option.max = min;
    }
  }
  else
  {
    if (min > option.min)
    {
      option.min = min;
    }
    if (min > option.max)
    {
      option.max = min;
    }
  }

  if (*pat == ';' || *pat == ':')
  {
    bool only  = (pat == pattern.pat[0]);
    bool slash = false;
    bool not   = false;
    int  len   = 0;
    int  cnt   = 0;
    *pat++ = '\0';
    for (; is_digit( *pat ); ++pat)
    {
      cnt = cnt * 10 + *pat - '0';
    }
    if (cnt)
    {
      switch (*pat)
      {
	case '*': pattern.anapat['*'] = -1;
	case '=': pattern.exact = true;
	case '/': pattern.anagram = cnt;
		  slash = true;
		  if (*pat != '*')
		  {
		    cnt = 0;
		  }
	break;
	default:
	  cnt = 0;
	  pat = pattern.pat[0];
	break;
      }
      ++pat;
    }
    for (; *pat; ++pat)
    {
      if (*pat == '*')
      {
	pattern.anapat['*'] = -1;
      }
      else if (*pat == '=')
      {
	uchar* p;
	if (slash)
	{
	  puts( "Error: only one '=' or '/' allowed in an anagram." );
	  exit( 1 );
	}
	// Go back and add the letters to the exclusion.
	for (p = pattern.pat[0]; p < pat; ++p)
	{
	  if (*p != '*' && *p != '/' && *p != '.' && *p != '?')
	  {
	    SET( casemap[*p], pattern.ananot );
	  }
	}
	pattern.exact = true;
	slash = true;
      }
      else if (*pat == '/')
      {
	if (slash)
	{
	  puts( "Error: only one '=' or '/' allowed in an anagram." );
	  exit( 1 );
	}
	slash = true;
      }
      else if (*pat == '[')
      {
	if (pat[1] != '^' && pat[1] != '!')
	{
	  puts( "Error: set is not supported in anagrams." );
	  exit( 1 );
	}
	pat = process_set( pat+1, pattern.ananot );
	not = true;
      }
      else
      {
	if (!slash || pattern.anagram == 0)
	{
	  ++pattern.anagram;
	}
	if (*pat == '.' || *pat == '?')
	{
	  if (cnt)
	  {
	    puts( "Error: blanks not allowed with count." );
	    exit( 1 );
	  }
	  ++pattern.anapat['?'];
	  ANA( '?' );
	}
	else
	{
	  ++pattern.anapat[casemap[*pat]];
	  ANA( casemap[*pat] );
	  if (pattern.exact)
	  {
	    if (cnt)
	    {
	      pattern.anapat[casemap[*pat]] = cnt;
	    }
	    SET( casemap[*pat], pattern.ananot );
	  }
	}
	++len;
      }
    }
    if (!pattern.anagram)
    {
      if (!not)
      {
	puts( "Error: empty or useless anagram pattern." );
	exit( 1 );
      }
      pattern.anagram = -1;
      pattern.anapat['*'] = -1;
      amin = 1;
      amax = 0;
    }
    else if (pattern.anagram > len)
    {
      pattern.anagram = len;
    }
    if (only)
    {
      if (!option.range)
      {
	option.min = (slash) ? pattern.anagram : len;
	if (!pattern.anapat['*'])
	{
	  option.max = len;
	}
      }
      else
      {
	if (option.min < pattern.anagram)
	{
	  if (slash)
	  {
	    option.min = pattern.anagram;
	    if (option.max < pattern.anagram)
	    {
	      option.max = pattern.anagram;
	    }
	  }
	  else
	  {
	    pattern.anagram = option.min;
	  }
	}
	if (pattern.anagram < option.min || option.max > pattern.anagram)
	{
	  pattern.anapat['*'] = -1;
	}
      }
    }
    else if (pattern.anagram > 1)
    {
      if (!slash)
      {
	pattern.anagram = 1;
      }
      if (len < option.min - (min - wild))
      {
	pattern.anapat['?'] += option.min - (min - wild) - len;
	ANA( '?' );
      }
    }
    anabeg = pattern.anapat + amin;
    cntbeg = count + amin;
    cntlen = amax - amin + 1;
  }
}


// Process a set into a bitmask identifying the valid characters.  P should
// point to the opening '['; S should be (at least) 32 bytes.  Returns a
// pointer to the closing ']'.
uchar* process_set( uchar* p, uchar* s )
{
  int  c, end;
  bool negate = false;

  c = *++p;
  if (c == '!' || c == '^')
  {
    negate = true;
    c = *++p;
  }

  do
  {
    if (c == '\0')
    {
      puts( "Error: set missing ']'." );
      exit( 1 );
    }
    c = casemap[c];

    if (p[1] == '-' && p[2] && p[2] != ']')
    {
      end = casemap[*(p += 2)];
      if (end < c)
      {
	int t = c;
	c = end;
	end = t;
      }
    }
    else
    {
      end = c;
    }

    for (; c <= end; ++c)
    {
      SET( c, s );
      set_alpha( c );
    }
    c = *++p;
  } while (c != ']');

  if (negate)
  {
    for (c = 0; c < 32; ++c)
    {
      s[c] ^= 0xFF;	// invert each bit to match what's NOT in the set
    }
  }

  return p;
}


// Match the pattern PAT against the word WORD.
// Precondition: WORD (containing only letters) is at least as long as PAT.
bool match_pattern( const uchar* pat, uchar* word )
{
  uchar p;

  while ((p = *pat++) != '\0')
  {
    if (p != '?')
    {
      uchar w = *word;
      if (p < 32)
      {
	if (!TST( w, set[p-1] ))
	{
	  return false;
	}
      }
      else if (is_digit( p ))
      {
	int d = p - '0';
	if (pattern.digit[d] == 0)
	{
	  if (option.unique)
	  {
	    if (TST( w, pattern.seen ))
	    {
	      return false;
	    }
	    SET( w, pattern.seen );
	  }
	  pattern.digit[d] = w;
	}
	else if (pattern.digit[d] != w)
	{
	  return false;
	}
      }
      else if (p != w)
      {
	return false;
      }
      *word = 0;
    }
    ++word;
  }

  return true;
}


// Match the infix pattern against any portion of WORD.
// Precondition: prefix and suffix characters have already matched and the
//		 remaining length of WORD is at least infix characters.
bool match_infix( uchar* word, int len )
{
  const uchar* end;
  bool	digit[10];
  uchar seen[32];
  uchar wrd[MAX_WORD];
  uchar first;
  int	p;

  word += pattern.len[0];
  end	= word + len - pattern.infix;

  for (p = 1; p < pattern.pats; ++p)
  {
    // Store the current digit status in able to restore it in case of failure.
    if (pattern.dig[p])
    {
      memcpy( digit, pattern.digit, sizeof(pattern.digit) );
      if (option.unique)
      {
	memcpy( seen, pattern.seen, sizeof(pattern.seen) );
      }
    }
    // Store the current letters.
    if (pattern.anagram)
    {
      memcpy( wrd, word, pattern.len[p] );
    }

    first = *pattern.pat[p];
    if (is_alpha( first ))
    {
      uchar* pat = pattern.pat[p] + 1;
      do
      {
	if (casemap[*word++] == first)
	{
	  if (match_pattern( pat, word ))
	  {
	    *--word = 0;
	    goto next;
	  }
	  if (pattern.dig[p])
	  {
	    memcpy( pattern.digit, digit, sizeof(pattern.digit) );
	    if (option.unique)
	    {
	      memcpy( pattern.seen, seen, sizeof(pattern.seen) );
	    }
	  }
	  if (pattern.anagram)
	  {
	    memcpy( word, wrd, pattern.len[p] );
	  }
	}
      } while (word <= end);
    }
    else
    {
      do
      {
	if (match_pattern( pattern.pat[p], word ))
	{
	  goto next;
	}
	if (pattern.dig[p])
	{
	  memcpy( pattern.digit, digit, sizeof(pattern.digit) );
	  if (option.unique)
	  {
	    memcpy( pattern.seen, seen, sizeof(pattern.seen) );
	  }
	}
	if (pattern.anagram)
	{
	  memcpy( word, wrd, pattern.len[p] );
	}
      } while (++word <= end);
    }
    return false;

  next:
    word += pattern.len[p];
    end  += pattern.len[p];
  }

  return true;
}


// Determine if any of the non-matched letters in WORD can be found in the
// anagram set.
bool match_anagram( uchar* word, int len )
{
  int fnd;

  fnd = pattern.anagram;
  memcpy( cntbeg, anabeg, cntlen );

  while (--len >= 0)
  {
    if (word[len])
    {
      if (count[word[len]] == 0)
      {
	if (TST( word[len], pattern.ananot ))
	{
	  return false;
	}
	if (count['?'] == 0)
	{
	  if (!pattern.anapat['*'])
	  {
	    return false;
	  }
	}
	else
	{
	  --count['?'];
	  --fnd;
	}
      }
      else
      {
	--count[word[len]];
	word[len] = 0;
	--fnd;
      }
    }
  }

  return (pattern.exact) ? (fnd == 0) : (fnd <= 0);
}


int process_options( int argc, char* argv[] )
{
  int arg;
  int pat = 0;

  if (argc == 1 || strcmp( argv[1], "--help" ) == 0)
  {
    help( 0 );
  }
  if (strcmp( argv[1], "--version" ) == 0)
  {
    help( 'v' );
  }

  for (arg = 1; arg < argc; ++arg)
  {
    if (argv[arg][0] == '-' || argv[arg][0] == '/')
    {
      int j;
      for (j = 1; argv[arg][j]; ++j)
      {
	switch (argv[arg][j])
	{
	  case '?': help( argv[arg][j+1] );

	  case 'b': option.balance = true;  break;
	  case 'c': option.ignore  = false; break;
	  case 'd': option.unique  = false; break;
	  case 'p': option.literal = true;  break;
	  case 's': option.sort    = 1;     break;
	  case 'S': option.sort    = 2;     break;

	  case 'f':
	  {
	    char* name;
	    if (argv[arg][j+1] == '\0')
	    {
	      if (argv[arg+1] == NULL)
	      {
		puts( "Error: missing filename." );
		exit( 1 );
	      }
	      name = argv[++arg];
	    }
	    else
	    {
	      name = argv[arg]+j+1;
	    }
	    if (*name == '=')
	    {
	      strcpy( get_list_path( argv[0] ), name+1 );
	    }
	    else
	    {
	      strcpy( option.name, name );
	    }
	  }
	  goto next;

	  case '/': break;

	  default:
	    if (is_digit( (uchar)argv[arg][j] ))
	    {
	      option.diff = option.diff * 10 + argv[arg][j] - '0';
	      break;
	    }
	    printf( "Error: unknown option '%c'.\n", argv[arg][j] );
	    exit( 1 );
	  break;
	}
      }
    }
    else if (argv[arg][0] == '!' && argv[arg][1] == '\0')
    {
      option.not = true;
    }
    else if (is_digit( (uchar)*argv[arg] ))
    {
      int   len[2];
      char* end;
      len[0] = (int)strtol( argv[arg], &end, 10 );
      if (len[0] < 100)
      {
	if (*end == '\0')
	{
	  option.min = option.max = len[0];
	  option.range = true;
	  continue;
	}
	if (*end == '+' || *end == '-')
	{
	  if (end[1] == '\0')
	  {
	    if (*end == '+')
	    {
	      option.min = len[0];
	    }
	    else // (*end == '-')
	    {
	      option.max = len[0];
	    }
	    option.range = true;
	    continue;
	  }
	  if (is_digit( (uchar)end[1] ))
	  {
	    len[1] = (int)strtol( end+1, &end, 10 );
	    if (len[1] < 100 && *end == '\0')
	    {
	      option.min = len[0];
	      option.max = len[1];
	      option.range = true;
	      continue;
	    }
	  }
	}
      }
      pat = arg;
    }
    else
    {
      pat = arg;
    }
  next: ;
  }

  return pat;
}


// Determine the default path of the word list.  For Windows and DOS it is the
// same directory as the executable; for Unix it is /usr/share/dict/.
// Fills option.name with the path and returns a pointer to its end.
char* get_list_path( const char* argv0 )
{
#if defined( _WIN32 ) || defined( __MSDOS__ )
  char* path;
  char* name;
# ifdef _WIN32
  GetModuleFileName( NULL, option.name, sizeof(option.name) );
# else
  strcpy( option.name, argv0 );
# endif
  for (path = name = option.name; *path; ++path)
  {
    if (*path == '\\' || *path == '/' || *path == ':')
    {
      name = path + 1;
    }
  }
  //*name = '\0';               // not needed at the moment
  return name;
#else
  strcpy( option.name, "/usr/share/dict/" );
  return option.name + 16;
#endif
}


void help( char type )
{
  if (type == 'x')
  {
    puts( "word         is \"word\" in the list?\n"
	  "w..d..s      seven-letter words starting with \"w\", having \"d\" in the middle\n"
	  "               and ending with \"s\"\n"
	  "pre*in*end   words starting with \"pre\", containing \"in\" and ending with \"end\"\n"
	  "*112233*     words containing three consecutive (and different) doubles\n"
	  "5            all the five-letter words\n"
	  "5+           all the five-, six-, seven-, ... letter words\n"
	  "5-           all the one- to five-letter words\n"
	  "5-7 *[st]    all the five- to seven-letter words that end with \"s\" or \"t\"\n"
	  ";abcd*       all the words that contain the letters \"abcd\"\n"
	  ";/abcd*      all the words that contain at least one from \"abcd\"\n"
	  "..;abcd      all the two-letter words that can be made out of \"abcd\"\n"
	  "2 ;abcd      as above\n"
	  ";ab/cd       all the two- to four-letter words that can be made out of \"abcd\"\n"
	  ";abcd..      all the six-letter words that contain the letters \"abcd\"\n"
	  "6 ;abcd      as above\n"
	  "6 ;abc/d     all the six-letter words that contain three or four from \"abcd\"\n"
	  "6 ;2/abcd    all the six-letter words that contain at least two from \"abcd\"\n"
	  "6 ;/abcd     all the six-letter words that contain at least one from \"abcd\"\n"
	  "6 ;abcd=     all the six-letter words that contain exactly one each of \"abcd\"\n"
	  "6 ;2=abcd    all the six-letter words that contain exactly two from \"abcd\"\n"
	  "-1 bear;yt   words different to \"bear\" by one letter, which is \"y\" or \"t\"\n"
	  "*a*e*i*o*u*;[!aeiou]   words that have the vowels exactly in order"
	);
  }
  else if (type == 'w')
  {
    puts( "WORDY\t\t\tFW\n"
	  "-----\t\t\t----\n"
	  "search  XXXX FILENAME\tfw -f FILENAME XXXX\n"
	  "prefix  super\t\tfw super*\n"
	  "suffix  ing\t\tfw *ing\n"
	  "inset   work\t\tfw *work*\n"
	  "pattern 1231234\t\tfw 1231234\n"
	  "xfind   w?ee?\t\tfw w?ee?\n"
	  "xfind   ???????\t\tfw 7\n"
	  "anagram retin??\t\tfw ;/retin??\n"
	  "bingo   abcde??\t\tfw ;abcde??\n"
	  "hookto  band adeinos\tfw *band*;adeinos\n"
	  "unscr   owkni?7r\tfw ......r;owkni.\n"
	  "\t\t\tfw 7 *r;owkni\n"
	  "ofset   abcd 3\t\tfw ;abc/d*\n"
	  "\t\t\tfw ;3/abcd*\n"
	  "xtract  qt~er\t\tfw ;qt*[!er]\n"
	  "select  aqtz~er 3\tfw ;aqt=z*[!er]\n"
	  "vowels  4\t\tfw ;4*aeiou\n"
	  "1dif    bear\t\tfw -1 bear\n"
	  "1dif    bear 2\t\tfw -2 bear"
	);
  }
  else
  {
    puts( "Find Words by Jason Hood <jadoxa@yahoo.com.au>.\n"
	  "Version "PVERS" ("PDATE").  Freeware.\n"
	  "http://fw.adoxa.cjb.net/"
	);
    if (type != 'v')
    puts( "\n"
	  "Search a word list for words matching a pattern.\n"
	  "\n"
	  "fw [-?[x|w]] [-bcdpsS] [-f list] [-N] [range] [!] [pattern]\n"
	  "\n"
	  "-?        this help\n"
	  "-?x       example patterns\n"
	  "-?w       equivalents to WORDY\n"
	  "-b        show anagram balances\n"
	  "-c        match case\n"
	  "-d        allow digits to be duplicated\n"
	  "-f list   use LIST for the word list (\"-\" for standard input)\n"
	  "-p        treat dot and digits as letters, not wildcards\n"
	  "-s        sort by length, shortest first\n"
	  "-S        sort by length, longest first\n"
	  "-N        find words different by N letters\n"
	  "range     the minimum and maximum word lengths to consider:\n"
	  "  N-      N or fewer letters\n"
	  "  N       exactly N letters\n"
	  "  N+      N or more letters\n"
	  "  N-M     between N and M letters, inclusive\n"
	  "!         find words which do NOT match the pattern\n"
	  "pattern   the pattern to match, which may contain:\n"
	  "  *             match any number of letters\n"
	  "  ?             match a single letter\n"
	  "  N             a digit 0-9 matches the same letter it first matched\n"
	  "  [SET]         match a single letter from SET\n"
	  "  [!SET]        match a single letter NOT from SET\n"
	  "  ;SET          words CONSISTING of ALL the letters from SET (may include '?')\n"
	  "  ;SET*         words CONTAINING ALL the letters from SET\n"
	  "  ;SET*[!NOT]   as above, but exclude words with letters from NOT\n"
	  "  ;[!SET]       words not containing the letters from SET\n"
	  "  ;/SET         words CONSISTING of ANY letters from SET\n"
	  "  ;/SET*        words CONTAINING ANY letters from SET\n"
	  "  ;SE/T         words consisting of AT LEAST TWO of the letters from SET; or\n"
	  "                words consisting of ALL or ALL BUT ONE of the letters from SET\n"
	  "  ;N/SET        words consisting of AT LEAST N letters from SET;\n"
	  "  ;SE=T*        words containing ONLY TWO of the letters from SET; or\n"
	  "                words containing ALL BUT ONE of the letters from SET\n"
	  "  ;N=SET        words CONSISTING of EXACTLY N letters from SET\n"
	  "  ;N=SET*       words CONTAINING EXACTLY N letters from SET\n"
	  "  ;N*SET        words CONTAINING EXACTLY N letters from SET (with duplicates)\n"
	  "  PAT;SET       words matching PAT and CONTAINING ANY from SET\n"
	  "\n"
	  "Notes: the range will be adjusted to fit the pattern;\n"
	  "       '.' is an alternative to '?';\n"
	  "       '^' is an alternative to '!';\n"
	  "       ':' is an alternative to ';';\n"
	  "       punctuation is usually ignored, but if it is in the pattern\n"
	  "        it must match in the word."
	  "\n"
	  "The default word list is taken from standard input if it is redirected,\n"
	  "otherwise from "
#if defined( _WIN32 ) || defined( __MSDOS__ )
			 "WORD.LST in the same directory as the executable.\n"
#else
			 "/usr/share/dict/word.lst.\n"
#endif
	  "If LIST starts with '=', it will be substituted with the above path."
	);
  }
  exit( 0 );
}
