// This program tests the validity of a Blitzkrieg DAWG file, and demonstrates the new Dawg-Node configuration. // Updated on Monday, December 29, 2011. #include #include #include #define MAX 15 #define MAX_INPUT 120 #define BIG_IT_UP -32 #define LOWER_IT 32 #define SIZE_OF_CHARACTER_SET 26 #define WILD_CARD '?' #define DAWG_DATA "Traditional_Dawg_For_Word-List.dat" #define WORD_LIST_OUTPUT "New_Dawg_Word_List.txt" // These values define the format of the "Dawg" node encoding. #define CHILD_BIT_SHIFT 10 #define END_OF_WORD_BIT_MASK 0X00000200 #define END_OF_LIST_BIT_MASK 0X00000100 #define LETTER_BIT_MASK 0X000000FF // Define the boolean type as an enumeration. typedef enum {FALSE = 0, TRUE = 1} Bool; typedef Bool* BoolPtr; // When reading strings from a file, the new-line character is appended, and this macro will remove it before processing. #define CUT_OFF_NEW_LINE(string) (string[strlen(string) - 1] = '\0') // For speed, define these two simple functions as macros. They modify the "LettersToWorkWith" string in the recursive anagrammer. #define REMOVE_CHAR_FROM_STRING(thestring, theposition, shiftsize) ( memmove(thestring + theposition, thestring + theposition + 1, shiftsize) ) #define INSERT_CHAR_IN_STRING(ts, tp, thechar, shiftsize) ( (memmove(ts + tp + 1, ts + tp, shiftsize)), (ts[tp] = thechar) ) // These are the predefined characters that exist in the DAWG data file. const unsigned char CharacterSet[SIZE_OF_CHARACTER_SET] = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' }; // Enter the graph at the correct position without scrolling through a predefined list of level "0" nodes. const unsigned char EntryNodeIndex[SIZE_OF_CHARACTER_SET] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 }; // This function converts any lower case letters in the string "RawWord", into capitals, so that the whole string is made of capital letters. void MakeMeAllCapital(char *RawWord){ int X; for ( X = 0; X < strlen(RawWord); X++ ){ if ( RawWord[X] >= 'a' && RawWord[X] <= 'z' ) RawWord[X] = RawWord[X] + BIG_IT_UP; } } // This function removes any char from "ThisString" which does not exist in "CharacterSet". void RemoveIllegalChars(unsigned char *ThisString){ int X; int Y; int Length = strlen(ThisString); for ( X = 0; X < Length; X++ ) { if ( ThisString[X] == WILD_CARD ) goto CheckNextPosition; for ( Y = 0; Y < SIZE_OF_CHARACTER_SET; Y++ ) { if ( ThisString[X] == CharacterSet[Y] ) goto CheckNextPosition; } // The "char" in position "X" is illegal. memmove(ThisString + X, ThisString + X + 1, Length - X); X -= 1; Length -= 1; CheckNextPosition:; } } // Return the index position of character "ThisChar", as it appears in "CharacterSet", and it must exist in the set. unsigned char CharToIndexConversion(unsigned char ThisChar){ int Y; if ( ThisChar == WILD_CARD ) return 255; for ( Y = 0; Y < SIZE_OF_CHARACTER_SET; Y++ ) { if ( ThisChar == CharacterSet[Y] ) return Y; } } // This is a simple Bubble Sort. void Alphabetize(unsigned char *Word){ int X; int Y; int WildCardCount = 0; unsigned char WorkingChar; int WordSize = strlen(Word); for( X = 1; X < WordSize; X++ ) { for( Y = 0; Y <= (WordSize - X - 1); Y++ ) { if ( CharToIndexConversion(Word[Y]) > CharToIndexConversion(Word[Y + 1]) ) { WorkingChar = Word[Y + 1]; Word[Y + 1] = Word[Y]; Word[Y] = WorkingChar; } } } if ( Word[WordSize - 1] == WILD_CARD ) { for ( X = WordSize - 1; X >= 0; X-- ) { if ( Word[X] == WILD_CARD ) WildCardCount += 1; } memmove(Word + WildCardCount, Word, WordSize - WildCardCount); memset(Word, WILD_CARD, WildCardCount); } } // Define the "Dawg" functionality as macros for speed. "DAWG_CHILD()" Now uses only a single bit shift operation. #define DAWG_LETTER(thearray, theindex) (thearray[theindex]&LETTER_BIT_MASK) #define DAWG_END_OF_WORD(thearray, theindex) (thearray[theindex]&END_OF_WORD_BIT_MASK) #define DAWG_NEXT(thearray, theindex) ((thearray[theindex]&END_OF_LIST_BIT_MASK)? 0: theindex + 1) #define DAWG_CHILD(thearray, theindex) (thearray[theindex]>>CHILD_BIT_SHIFT) // A recursive depth first traversal of "TheDawg" lexicon to produce a readable wordlist in "TheStream". void DawgTraverseLexiconRecurse(unsigned int *TheDawg, int CurrentIndex, int FillThisPosition, char *WorkingString, int *TheCount, FILE *TheStream){ int PassOffIndex; WorkingString[FillThisPosition] = DAWG_LETTER(TheDawg, CurrentIndex); if ( DAWG_END_OF_WORD(TheDawg, CurrentIndex) ) { *TheCount += 1; WorkingString[FillThisPosition + 1] = '\0'; // Include the Windows Carriage Return char. fprintf(TheStream, "|%6d|-|%-15s|\r\n", *TheCount, WorkingString); } if ( PassOffIndex = DAWG_CHILD(TheDawg, CurrentIndex) ) DawgTraverseLexiconRecurse(TheDawg, PassOffIndex, FillThisPosition + 1, WorkingString, TheCount, TheStream); if ( PassOffIndex = DAWG_NEXT(TheDawg, CurrentIndex) ) DawgTraverseLexiconRecurse(TheDawg, PassOffIndex, FillThisPosition, WorkingString, TheCount, TheStream); } // Move through "ThisDawg" lexicon, and print the words into "ThisStream". void DawgTraverseLexicon(unsigned int *ThisDawg, FILE *ThisStream){ char *BufferWord = (char*)malloc((MAX + 1)*sizeof(char)); int *WordCounter = (int*)malloc(sizeof(int)); *WordCounter = 0; // Include the Windows Carriage Return char. fprintf(ThisStream, "This is the lexicon contained in the file |%s|.\r\n\r\n", DAWG_DATA); DawgTraverseLexiconRecurse(ThisDawg, 1, 0, BufferWord, WordCounter, ThisStream); free(BufferWord); free(WordCounter); } // This function is the core component of this program. // It requires that "UnusedChars" be in alphabetical order because the tradition Dawg is a list based structure. void DawgAnagrammerRecurse(int *DawgOfWar, int CurrentIndex, unsigned char *ToyWithMe, int FillThisPosition, unsigned char *UnusedChars, int SizeOfBank, int *ForTheCounter, Bool WildCard){ int X; char PreviousChar = '\0'; char CurrentChar; int TempIndex = DAWG_CHILD(DawgOfWar, CurrentIndex); ToyWithMe[FillThisPosition] = DAWG_LETTER(DawgOfWar, CurrentIndex) + (WildCard? LOWER_IT: 0); if ( DAWG_END_OF_WORD(DawgOfWar, CurrentIndex) ) { *ForTheCounter += 1; ToyWithMe[FillThisPosition + 1] = '\0'; printf("|%4d| - |%-15s|%s\n", *ForTheCounter, ToyWithMe, SizeOfBank? "\0": "--> TRUE ANAGRAM"); } if ( (SizeOfBank > 0) && (TempIndex != 0) ) { for ( X = 0; X < SizeOfBank; X++ ) { CurrentChar = UnusedChars[X]; if ( CurrentChar == PreviousChar ) continue; if ( CurrentChar == WILD_CARD ) { REMOVE_CHAR_FROM_STRING(UnusedChars, X, SizeOfBank - X); while ( TempIndex ) { DawgAnagrammerRecurse(DawgOfWar, TempIndex, ToyWithMe, FillThisPosition + 1, UnusedChars, SizeOfBank - 1, ForTheCounter, TRUE); TempIndex = DAWG_NEXT(DawgOfWar, TempIndex); } INSERT_CHAR_IN_STRING(UnusedChars, X, CurrentChar, SizeOfBank - X); TempIndex = DAWG_CHILD(DawgOfWar, CurrentIndex); } else { do { if ( CurrentChar == DAWG_LETTER(DawgOfWar, TempIndex) ) { REMOVE_CHAR_FROM_STRING(UnusedChars, X, SizeOfBank - X); DawgAnagrammerRecurse(DawgOfWar, TempIndex, ToyWithMe, FillThisPosition + 1, UnusedChars, SizeOfBank - 1, ForTheCounter, FALSE); INSERT_CHAR_IN_STRING(UnusedChars, X, CurrentChar, SizeOfBank - X); TempIndex = DAWG_NEXT(DawgOfWar, TempIndex); break; } else if ( CurrentChar < DAWG_LETTER(DawgOfWar, TempIndex) ) break; } while ( TempIndex = DAWG_NEXT(DawgOfWar, TempIndex) ); } if ( TempIndex == 0 ) break; PreviousChar = CurrentChar; } } } // This function uses "MasterDawg" to determine the words that can be made from the letters in "CharBank". // The words will be displayed in alphabetical order according to "CharacterSet[]". // The return value is the total number of words found. int DawgAnagrammer(int *MasterDawg, unsigned char * CharBank){ int X; int Y; int Result; int BankSize = strlen(CharBank); int *ForTheCount = (int*)malloc(sizeof(int)); unsigned char *TheWordSoFar = (char*)malloc((MAX + 1)*sizeof(char)); unsigned char *LettersToWorkWith = (unsigned char*)malloc(MAX_INPUT*sizeof(unsigned char)); unsigned char PreviousChar = '\0'; unsigned char CurrentChar; int NumberOfLetters; int CurrentEntryNode; strcpy(LettersToWorkWith, CharBank); Alphabetize(LettersToWorkWith); NumberOfLetters = strlen(LettersToWorkWith); *ForTheCount = 0; for ( X = 0; X < BankSize; X++ ) { CurrentChar = LettersToWorkWith[X]; // Move to the next letter if we have already processed the "CurrentChar". if ( CurrentChar == PreviousChar ) continue; if ( CurrentChar == WILD_CARD ) { REMOVE_CHAR_FROM_STRING(LettersToWorkWith, X, NumberOfLetters - X); for ( Y = 0; Y < SIZE_OF_CHARACTER_SET; Y++ ) { if ( EntryNodeIndex[Y] ) { DawgAnagrammerRecurse(MasterDawg, EntryNodeIndex[Y], TheWordSoFar, 0, LettersToWorkWith, NumberOfLetters - 1, ForTheCount, TRUE); } } INSERT_CHAR_IN_STRING(LettersToWorkWith, X, CurrentChar, NumberOfLetters - X); PreviousChar = CurrentChar; continue; } // Make sure not to enter the graph if NO words begin with "CurrentChar". CurrentEntryNode = EntryNodeIndex[CharToIndexConversion(CurrentChar)]; if ( !CurrentEntryNode ) { PreviousChar = CurrentChar; continue; } REMOVE_CHAR_FROM_STRING(LettersToWorkWith, X, NumberOfLetters - X); DawgAnagrammerRecurse(MasterDawg, CurrentEntryNode, TheWordSoFar, 0, LettersToWorkWith, NumberOfLetters - 1, ForTheCount, FALSE); INSERT_CHAR_IN_STRING(LettersToWorkWith, X, CurrentChar, NumberOfLetters - X); PreviousChar = CurrentChar; } Result = *ForTheCount; free(ForTheCount); free(TheWordSoFar); free(LettersToWorkWith); return Result; } int main() { int NumberOfNodes; int *TheDawgArray; FILE *Lexicon; FILE *WordList; unsigned char *DecisionInput; Bool FetchData = TRUE; unsigned char FirstChar; int InputSize; DecisionInput = (unsigned char *)malloc(MAX_INPUT*sizeof(char)); Lexicon = fopen(DAWG_DATA, "rb"); fread(&NumberOfNodes, sizeof(int), 1, Lexicon); printf("\n The lexicon DAWG contains |%d| nodes.\n", NumberOfNodes); TheDawgArray = (int *)malloc(NumberOfNodes*sizeof(int)); fread(TheDawgArray, sizeof(int), NumberOfNodes, Lexicon); fclose(Lexicon); printf("\n \"%s\" has been read into memory.\n\n", DAWG_DATA); // This program relies on a compressed lexicon data file, // so allow the user to see a readable word list in an output file. while ( FetchData == TRUE ) { printf("\nWould you like to print the Dawg word list into a text file?(Y/N): "); fgets(DecisionInput, MAX_INPUT, stdin); FirstChar = DecisionInput[0]; if ( FirstChar == 'Y' || FirstChar == 'y' ) { WordList = fopen(WORD_LIST_OUTPUT, "w"); DawgTraverseLexicon(TheDawgArray, WordList); fclose(WordList); FetchData = FALSE; } else if ( FirstChar == 'N' || FirstChar == 'n' ) FetchData = FALSE; } FetchData = TRUE; // Now the user can enter strings of letters for anagramming, to see the words that can be made from them. while ( FetchData == TRUE ) { printf("\nEnter anagram letters ( At least 2 letters) - (WildCard = ?): "); fgets(DecisionInput, MAX_INPUT, stdin); CUT_OFF_NEW_LINE(DecisionInput); MakeMeAllCapital(DecisionInput); RemoveIllegalChars(DecisionInput); InputSize = strlen(DecisionInput); if ( InputSize >= 2 ) { printf("\nThis is the set of letters that you just input |%s|.\n\n", DecisionInput); printf("\n|%d| Words were found in the lexicon Dawg.\n", DawgAnagrammer(TheDawgArray, DecisionInput)); } else FetchData = FALSE; } printf("\nThank you for testing the new Blitzkrieg DAWG encoding. GAME OVER.\n\n"); return 0; }