// This program demonstrates the use of a traditional DAWG as applied to in-order anagramming, where the data structure really shines. #include #include #include #define MAX 15 #define MAX_INPUT 120 #define BIG_IT_UP -32 #define DAWG_DATA "Dawg_For_Lexicon.dat" #define WORD_LIST_OUTPUT "Dawg_Word_List.txt" // These values define the format of the "Dawg" node encoding. #define LETTER_BIT_SHIFT 25 #define LETTER_BIT_MASK 1040187392 #define CHILD_INDEX_BIT_MASK 33554431 #define END_OF_WORD_BIT_MASK 2147483648 #define END_OF_LIST_BIT_MASK 1073741824 // Define the boolean type as an enumeration. typedef enum {FALSE = 0, TRUE = 1} Bool; typedef Bool* BoolPtr; // When reading strings from a file, the new-line character is appended, and this macro will remove it before processing. #define CUT_OFF_NEW_LINE(string) (string[strlen(string) - 1] = '\0') // For speed, define these two simple functions as macros. They modify the "LettersToWorkWith" string in the recursive anagrammer. #define REMOVE_CHAR_FROM_STRING(thestring, theposition, shiftsize) ( memmove(thestring + theposition, thestring + theposition + 1, shiftsize) ) #define INSERT_CHAR_IN_STRING(thestring, theposition, thechar, shiftsize) ( (memmove(thestring + theposition + 1, thestring + theposition, shiftsize)), (thestring[theposition] = thechar) ) // This function converts any lower case letters in the string "RawWord", into capitals, so that the whole string is made of capital letters. void MakeMeAllCapital(char *RawWord){ unsigned int X; for ( X = 0; X < strlen(RawWord); X++ ){ if ( RawWord[X] >= 'a' && RawWord[X] <= 'z' ) RawWord[X] = RawWord[X] + BIG_IT_UP; } } // This function removes all non-letter chars from "ThisString". void RemoveIllegalChars(char *ThisString){ unsigned int X; for ( X = 0; X < strlen(ThisString); X++ ) { if ( !(ThisString[X] >= 'A' && ThisString[X] <= 'Z') && !(ThisString[X] >= 'a' && ThisString[X] <= 'z') ) { memmove(ThisString + X, ThisString + X + 1, strlen(ThisString) - X); X -= 1; } } } // This is a simple Bubble Sort. There is no need for a an optimal algorithm here, because the user input of this program will be very short. void Alphabetize(char* Word){ int X; int Y; char WorkingChar; int WordSize = strlen(Word); /* This nested "for" loop structure ensures that the highest letter filters to the back of the string each time that we increment X in the outer loop. */ for( X = 1; X < WordSize; X++ ) { for( Y = 0; Y <= (WordSize - X - 1); Y++ ) { if (Word[Y] > Word[Y + 1]){ WorkingChar = Word[Y + 1]; Word[Y + 1] = Word[Y]; Word[Y] = WorkingChar; } } } } // Define the "Dawg" functionality as macros for speed. #define DAWG_LETTER(thearray, theindex) (((thearray[theindex]&LETTER_BIT_MASK)>>LETTER_BIT_SHIFT) + 'A') #define DAWG_END_OF_WORD(thearray, theindex) ((thearray[theindex]&END_OF_WORD_BIT_MASK)? TRUE: FALSE) #define DAWG_NEXT(thearray, theindex) ((thearray[theindex]&END_OF_LIST_BIT_MASK)? 0: (theindex + 1)) #define DAWG_CHILD(thearray, theindex) (thearray[theindex]&CHILD_INDEX_BIT_MASK) // A recursive depth first traversal of "TheDawg" lexicon to produce a readable wordlist in "TheStream". void DawgTraverseLexiconRecurse(unsigned int *TheDawg, int CurrentIndex, int FillThisPosition, char *WorkingString, int *TheCount, FILE *TheStream){ int PassOffIndex; WorkingString[FillThisPosition] = DAWG_LETTER(TheDawg, CurrentIndex); if ( DAWG_END_OF_WORD(TheDawg, CurrentIndex) ) { *TheCount += 1; WorkingString[FillThisPosition + 1] = '\0'; // Include the Windows Carriage Return char. fprintf(TheStream, "|%6d|-|%-15s|\r\n", *TheCount, WorkingString); } if ( PassOffIndex = DAWG_CHILD(TheDawg, CurrentIndex) ) DawgTraverseLexiconRecurse(TheDawg, PassOffIndex, FillThisPosition + 1, WorkingString, TheCount, TheStream); if ( PassOffIndex = DAWG_NEXT(TheDawg, CurrentIndex) ) DawgTraverseLexiconRecurse(TheDawg, PassOffIndex, FillThisPosition, WorkingString, TheCount, TheStream); } // Move through the entire "ThisDawg" lexicon, and print the words into "ThisStream". void DawgTraverseLexicon(unsigned int *ThisDawg, FILE *ThisStream){ char *BufferWord = (char*)malloc((MAX + 1)*sizeof(char)); int *WordCounter = (int*)malloc(sizeof(int)); *WordCounter = 0; // Include the Windows Carriage Return char. fprintf(ThisStream, "This is the lexicon contained in the file |%s|.\r\n\r\n", DAWG_DATA); DawgTraverseLexiconRecurse(ThisDawg, 1, 0, BufferWord, WordCounter, ThisStream); free(BufferWord); free(WordCounter); } // This function is the core component of this program. It requires that "UnusedChars" be in alphabetical order because the tradition Dawg is a list based structure. void DawgAnagrammerRecurse(unsigned int *DawgOfWar, int CurrentIndex, char *ToyWithMe, int FillThisPosition, char *UnusedChars, int SizeOfBank, int *ForTheCounter){ int X; char PreviousChar = '\0'; char CurrentChar; int TempIndex = DAWG_CHILD(DawgOfWar, CurrentIndex); ToyWithMe[FillThisPosition] = DAWG_LETTER(DawgOfWar, CurrentIndex); //ToyWithMe[FillThisPosition + 1] = '\0'; //printf("UnusedChars|%s|\n", UnusedChars); if ( DAWG_END_OF_WORD(DawgOfWar, CurrentIndex) ) { *ForTheCounter += 1; ToyWithMe[FillThisPosition + 1] = '\0'; printf("|%4d| - |%-15s|\n", *ForTheCounter, ToyWithMe); } if ( (SizeOfBank > 0) && (TempIndex != 0) ) { for ( X = 0; X < SizeOfBank; X++ ) { CurrentChar = UnusedChars[X]; //printf("Looking For |%c|\n", CurrentChar); if ( CurrentChar == PreviousChar ) continue; do { //printf("Compare -|%c| with ^|%c|-|%d|\n", CurrentChar, DAWG_LETTER(DawgOfWar, TempIndex), TempIndex); if ( CurrentChar == DAWG_LETTER(DawgOfWar, TempIndex) ) { //printf("Bingo\n"); REMOVE_CHAR_FROM_STRING(UnusedChars, X, SizeOfBank - X); DawgAnagrammerRecurse(DawgOfWar, TempIndex, ToyWithMe, FillThisPosition + 1, UnusedChars, SizeOfBank - 1, ForTheCounter); //printf("Back|%d|\n", FillThisPosition); INSERT_CHAR_IN_STRING(UnusedChars, X, CurrentChar, SizeOfBank - X); TempIndex = DAWG_NEXT(DawgOfWar, TempIndex); break; } else if ( CurrentChar < DAWG_LETTER(DawgOfWar, TempIndex) ) break; } while ( TempIndex = DAWG_NEXT(DawgOfWar, TempIndex) ); if ( TempIndex == 0 ) break; PreviousChar = CurrentChar; } } } // This function uses "MasterDawg" to determine the all of the words that can be made from the letters in "CharBank". // The return value is the total number of words found. int DawgAnagrammer(unsigned int *MasterDawg, char * CharBank){ int X; int Result; int BankSize = strlen(CharBank); int *ForTheCount = (int*)malloc(sizeof(int)); char *TheWordSoFar = (char*)malloc((MAX + 1)*sizeof(char)); char *LettersToWorkWith = (char*)malloc((MAX_INPUT)*sizeof(char)); char PreviousChar = '\0'; char CurrentChar; int NumberOfLetters; strcpy(LettersToWorkWith, CharBank); NumberOfLetters = strlen(LettersToWorkWith); *ForTheCount = 0; for ( X = 0; X < BankSize; X++ ) { CurrentChar = CharBank[X]; // Move to the next letter if we have already processed the "CurrentChar". if ( CurrentChar == PreviousChar ) continue; // We can assume that every letter in the lexicon exists on the first row, so don't bother looking for them. Just remove the one we are using and plug away. REMOVE_CHAR_FROM_STRING(LettersToWorkWith, X, NumberOfLetters - X); DawgAnagrammerRecurse(MasterDawg, CurrentChar - '@', TheWordSoFar, 0, LettersToWorkWith, NumberOfLetters - 1, ForTheCount); INSERT_CHAR_IN_STRING(LettersToWorkWith, X, CurrentChar, NumberOfLetters - X); PreviousChar = CurrentChar; } Result = *ForTheCount; free(ForTheCount); free(TheWordSoFar); free(LettersToWorkWith); return Result; } int main() { int NumberOfNodes; unsigned int *TheDawgArray; FILE *Lexicon; FILE *WordList; char *DecisionInput; Bool FetchData = TRUE; char FirstChar; int InputSize; DecisionInput = (char*)malloc(MAX_INPUT*sizeof(char)); Lexicon = fopen(DAWG_DATA, "rb"); fread(&NumberOfNodes, sizeof(int), 1, Lexicon); printf("The lexicon DAWG contains |%d| nodes.\n", NumberOfNodes); TheDawgArray = (unsigned int*)malloc(NumberOfNodes*sizeof(unsigned int)); fread(TheDawgArray, sizeof(unsigned int), NumberOfNodes, Lexicon); fclose(Lexicon); printf("\nLexicon data file TWL06 has been read into memory.\n\n"); // This program relies on a compressed lexicon data file, so allow the user to see a readable word list in an output file. while ( FetchData == TRUE ) { printf("\nWould you like to print the Dawg word list into a file?(Y/N): "); fgets(DecisionInput, MAX_INPUT, stdin); FirstChar = DecisionInput[0]; if ( FirstChar == 'Y' || FirstChar == 'y' ) { WordList = fopen(WORD_LIST_OUTPUT, "w"); DawgTraverseLexicon(TheDawgArray, WordList); fclose(WordList); FetchData = FALSE; } else if ( FirstChar == 'N' || FirstChar == 'n' ) FetchData = FALSE; } FetchData = TRUE; // Now the user can enter strings of letters for anagramming, to see the words that can be made from them. while ( FetchData == TRUE ) { printf("\nEnter the string of letters that you want to anagram(2 or more letters): "); fgets(DecisionInput, MAX_INPUT, stdin); CUT_OFF_NEW_LINE(DecisionInput); RemoveIllegalChars(DecisionInput); MakeMeAllCapital(DecisionInput); Alphabetize(DecisionInput); InputSize = strlen(DecisionInput); if ( InputSize >= 2 ) { printf("\nThis is the set of letters that you just input |%s|.\n\n", DecisionInput); printf("\n|%d| Words were found in the lexicon Dawg.\n", DawgAnagrammer(TheDawgArray, DecisionInput)); } else FetchData = FALSE; } printf("\nThank you for playing the GoogleDawgAnagrammer. GAME OVER.\n\n"); return 0; }