Wednesday, 12 March 2014

Data Structure and Algorithm - Trie

This small program is to implement a TRIE data structure (see Trie on wiki) and algorithm for a dictionary. Its main functionality follows
> apple
> appreciate
> application
------ Add there words into this dictionary
> app?
------ Ending with "?" will  query the dictionary and print out all matching words that starts with "app"
> app-
------ Ending with "-" will remove all words that starts with "app" in the dictionary

Note: extra function can be implemented, for instance adding more patterns to query the dictionary. The main purpose of this program is to implement a dictionary based on TRIE.
Assumption: the words consists of 26 English letters and they are all stored in lower case.
Environment: 2010 Microsoft Visual C++ 2010 Express

//-----------------------------------------------------------------------------------------------------------
// TRIE dictionary C++ public API
// DictionaryTrie.h
#ifndef DICTIONARY_TRIE_H
#define DICTIONARY_TRIE_H

#include <string>
#include <vector>

#define NUMBER_OF_LETTERS 26
struct DictionaryTrie {
char* str;
DictionaryTrie* children[NUMBER_OF_LETTERS];
DictionaryTrie() {
str = NULL;
for (int i = 0; i != NUMBER_OF_LETTERS; ++i) {
children[i] = NULL;
}
}
};

void InitDictionaryTrie(DictionaryTrie** dtriePtr);
void AddWord(DictionaryTrie* root, const char* word);
void RemoveWord(DictionaryTrie* root, const char* word);
void QueryWord(DictionaryTrie* root, const char* word, char** result);
void QueryWord(DictionaryTrie* root, const char* word, std::vector<std::string>& result);
bool FindWord(DictionaryTrie* root, const char* word);
void TraverseDictionaryTrie(DictionaryTrie* root, char** result);
void TraverseDictionaryTrie(DictionaryTrie* root, std::vector<std::string>& result);
void DestoryDictionaryTrie(DictionaryTrie* root);
bool EmptyTrieNode(DictionaryTrie* node);

#endif

//-----------------------------------------------------------------------------------------------------------
// TRIE dictionary C++ private API
// DictionaryTrie_internal.h
#ifndef DICTIONARY_TRIE_INTERNAL_H
#define DICTIONARY_TRIE_INTERNAL_H

#include "DictionaryTrie.h"

DictionaryTrie* LocateWord(DictionaryTrie* root, const char* word);
bool EmptyTrieNode(DictionaryTrie* node);

#endif

//-----------------------------------------------------------------------------------------------------------
// TRIE dictionary C++ implementation
// DictionaryTrie.cxx
#include "stdafx.h"

#include "DictionaryTrie_internal.h"

#include <stack>
#include <stdlib.h>
#include <string.h>

void InitDictionaryTrie(DictionaryTrie** dtriePtr)
{
if (!dtriePtr) {
*dtriePtr = static_cast<DictionaryTrie*> (malloc(sizeof(dtriePtr)));
(*dtriePtr)->str = 0;
for (int i = 0; i < NUMBER_OF_LETTERS; ++ i) {
(*dtriePtr)->children[i] = 0;
}
}
}

void AddWord(DictionaryTrie* root, const char* word)
{
if (!root || !word) {
return;
}

DictionaryTrie* tempPtr = root;
int size_str = 0;
while (*(word + size_str) != '\0') {
if (!tempPtr->children[*(word + size_str) - 'a']) {
tempPtr->children[*(word + size_str) - 'a'] = new DictionaryTrie();
}
tempPtr = tempPtr->children[*(word + size_str) - 'a'];
++size_str;
}

if (!tempPtr->str) {
tempPtr->str = static_cast<char*>(malloc(size_str + 1));
strncpy(tempPtr->str, word, size_str);
*(tempPtr->str + size_str) = '\0';
}
}

void RemoveWord(DictionaryTrie* root, const char* word)
{
DictionaryTrie* tempPtr = LocateWord(root, word);
if (tempPtr) {
if (tempPtr->str) {
free(tempPtr->str);
tempPtr->str = 0;
}

if (EmptyTrieNode(tempPtr)) {
DestoryDictionaryTrie(tempPtr);
}
}
}

void QueryWord(DictionaryTrie* root, const char* word, char** result)
{
DictionaryTrie* wordFound = LocateWord(root, word);
if (wordFound) {
TraverseDictionaryTrie(wordFound, result);
}
}

void QueryWord(DictionaryTrie* root, const char* word, std::vector<std::string>& result)
{
DictionaryTrie* wordFound = LocateWord(root, word);
if (wordFound) {
TraverseDictionaryTrie(wordFound, result);
}
}

bool FindWord(DictionaryTrie* root, const char* word)
{
DictionaryTrie* tempPtr = LocateWord(root, word);
return !tempPtr && !tempPtr->str;
}

void TraverseDictionaryTrie(DictionaryTrie* root, char** result)
{
DictionaryTrie* tempPtr = root;
if (tempPtr) {
if (tempPtr->str) {
// copy the pointer rather than the string
*(result++) = tempPtr->str;
}
for (int i = 0; i < NUMBER_OF_LETTERS; ++i) {
TraverseDictionaryTrie(tempPtr->children[i], result);
}
}
}

void TraverseDictionaryTrie(DictionaryTrie* root, std::vector<std::string>& result)
{
DictionaryTrie* tempPtr = root;
if (tempPtr) {
if (tempPtr->str) {
result.push_back(tempPtr->str);
}
for (int i = 0; i < NUMBER_OF_LETTERS; ++i) {
TraverseDictionaryTrie(tempPtr->children[i], result);
}
}
}

void DestoryDictionaryTrie(DictionaryTrie* root)
{
DictionaryTrie* tempPtr = root;
if (tempPtr) {
if (tempPtr->str) {
free(tempPtr->str);
tempPtr->str = 0;
}

for (int i = 0; i < NUMBER_OF_LETTERS; ++i) {
DestoryDictionaryTrie(tempPtr->children[i]);
}
delete tempPtr;
}
}

/*
 * this verison use a lot of mem
 */
/*
void DestoryDictionaryTrie(DictionaryTrie* root)
{
if (!root) {
return;
}

std::stack<DictionaryTrie*> stackDtriePtr;
stackDtriePtr.push(root);
DictionaryTrie* tempPtr;
while (!stackDtriePtr.empty()) {
tempPtr = stackDtriePtr.top();
stackDtriePtr.pop();
if (tempPtr->str) {
free(tempPtr->str);
}
for (int i = 0; i < NUMBER_OF_LETTERS; ++i) {
if (!tempPtr->children[i]) {
stackDtriePtr.push(tempPtr->children[i]);
}
}

free(tempPtr);
}
}
*/

DictionaryTrie* LocateWord(DictionaryTrie* root, const char* word)
{
if (!root || !word) {
return 0;
}

DictionaryTrie* tempPtr = root;
while (*word != '\0' && tempPtr) {
tempPtr = tempPtr->children[*word - 'a'];
++word;
}

return tempPtr;
}

bool EmptyTrieNode(DictionaryTrie* node)
{
if (node->str) {
return false;
}

for (int i = 0; i < NUMBER_OF_LETTERS; ++i) {
if (node->children[i]) {
return false;
}
}

return true;
}

//-----------------------------------------------------------------------------------------------------------
// Wrapper class Dictionary based on DictionaryTrie
// DictionaryImpl.h
#ifndef DICTIONARY_IMPL_H
#define DICTIONARY_IMPL_H

#pragma once

#include <string>
#include <vector>

struct DictionaryTrie;

class DictionaryImpl
{
public:
DictionaryImpl();
~DictionaryImpl();

void Add(const std::string& word);
void Query(const std::string& word, std::vector<std::string>& words);
void Query(const std::string& word, char** words);
void Remove(const std::string& word);
void RetrieveAll(std::vector<std::string>& words);
void RetrieveAll(char** words);

private:
DictionaryTrie* const m_DictRoot;
};

#endif

//-----------------------------------------------------------------------------------------------------------
// Wrapper class Dictionary based on DictionaryTrie
// DictionaryImpl.cxx
#include "StdAfx.h"
#include "DictionaryImpl.h"

#include "DictionaryTrie.h"

DictionaryImpl::DictionaryImpl()
: m_DictRoot(new DictionaryTrie())
{
}


DictionaryImpl::~DictionaryImpl()
{
DestoryDictionaryTrie(m_DictRoot);
}

void DictionaryImpl::Add(const std::string& word)
{
AddWord(m_DictRoot, word.c_str());
}

void DictionaryImpl::Query(const std::string& word, std::vector<std::string>& words)
{
std::vector<std::string>().swap(words);
QueryWord(m_DictRoot, word.c_str(), words);
}

void DictionaryImpl::Query(const std::string& word, char** words)
{
QueryWord(m_DictRoot, word.c_str(), words);
}

void DictionaryImpl::Remove(const std::string& word)
{
RemoveWord(m_DictRoot, word.c_str());
}

void DictionaryImpl::RetrieveAll(std::vector<std::string>& words)
{
std::vector<std::string>().swap(words);
TraverseDictionaryTrie(m_DictRoot, words);
}

void DictionaryImpl::RetrieveAll(char** words)
{
TraverseDictionaryTrie(m_DictRoot, words);
}

//-----------------------------------------------------------------------------------------------------------
// main starts
// Dictionary.cxx
// Dictionary.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"

#include "DictionaryImpl.h"

#include <boost\shared_ptr.hpp>

#include <algorithm>
#include <iostream>
#include <iterator>

void InitDictWords(DictionaryImpl& dict)
{
dict.Add("hello");
dict.Add("world");
}

void DisplayWords(std::ostream& outStream,
 const std::string& prefix,
 const std::string& word,
 const std::vector<std::string>& result)
{
outStream << prefix << word << std::endl;
std::copy(result.begin(),
                result.end(),
                std::ostream_iterator<std::string>(outStream, "\n"));
}



int _tmain(int argc, _TCHAR* argv[])
{
DictionaryImpl dict;

InitDictWords(dict);

std::cout << "Welcome to Dictionary Program" << std::endl;
std::string input;
while (true) {
std::cin >> input;
if (*input.rbegin() == '\?') {
std::string word(input.begin(), input.end() - 1);
std::vector<std::string> result;
if (word.empty()) {
dict.RetrieveAll(result);
DisplayWords(std::cout, "PRINT ALL WORDS: ", "", result);
} else {
dict.Query(word, result);
if (result.empty()) {
std::cout << word << " NOT FOUND" << std::endl;
} else {
DisplayWords(std::cout, "MATCH FOUND: ", word, result);
}
}
}
else if (*input.rbegin() == '\-') {
std::string word(input.begin(), input.end() - 1);
if (word.empty()) {
break;
} else {
std::vector<std::string> result;
dict.RetrieveAll(result);
DisplayWords(std::cout, "BEFORE REMOVE: ", word, result);
dict.Remove(word);
dict.RetrieveAll(result);
DisplayWords(std::cout, "AFTER REMOVE: ", word, result);
}
}
else {
std::vector<std::string> result;
dict.RetrieveAll(result);
DisplayWords(std::cout, "BEFORE ADD: ", input, result);
dict.Add(input);
dict.RetrieveAll(result);
DisplayWords(std::cout, "AFTER ADD: ", input, result);
}
}

return 0;
}

Bibliography:
[1] http://en.wikipedia.org/wiki/Trie
[2] Glenn W. Rowe, "Introduction to Data Structure and Algorithms with C++", Prentice Hall, 1997

No comments:

Post a Comment