Thursday 5 May 2016

Compare Exotic String

1. Problem Description
This is a Google interview question for software engineer from careercup. Here is the description from the original thread,
"
Provide a function that allow to compare two strings lexicography, having in mind that these words may contain digraphs (two letters together represents a single one i.e in Spanish ch is a single character ). 
This in order to be able to sort a list of words.
urodba April 07, 2016 in United States Report Duplicate | Flag ".

2. Data Structure and Algorithm
The idea follows
    - Build all possible characters into a Trie structure
    - Build a hash map for each valid entry in Trie, for exmaple
        * The pointer in Trie as the key and has a integer as value
        * Character "a" as 65 (in ASCII table)
        * Character "cz" as 100 (any number to represent its value comparing to other characters)
    - Search a valid entry from the starting of the string in the Trie until failed to find or reach the end
        * Find the longest valid entry in Trie
        * Compare two valid entries in two given strings
            ~ Return a positive value if lhs > rhs
            ~ Return a negative value if lhs < rhs
            ~ Return 0 if equal

Initially I was thinking to use a hash map instead of Trie to build up the character table. But the disadvantage of hash map is that have to continue searching for the valid entry in the character table up the maximal key size. For example if a valid entry can be made up to 10 English character, then have to search 10 times (equal to the maximal key size) to enumerate all the possibility. In the case of just one valid entry is 10 characters long ("abcdefghij") in the character table. Still need to search 10 times even starting with "x".
A character table of Trie structure does not have this problem. And in computation wise Trie search needs only one pointer checking and should outpaces harsh map, which needs to compute hash code then pointer checking.

3. C++ Implementation
The Trie structure used in this implemented can be found in Data Structure and Algorithm - TrieData Structure and Algorithm - Trie and Palindrome (Google Interview Question) and Trie - Live Stream Words Detector.

// ********************************************************************************
// Implementation
// ********************************************************************************

// header
#include "DictionaryTrie.h"

#include <exception>
#include <string>
#include <unordered_map>
#include <vector>

struct ExoticChar
{
    ExoticChar(const std::string &ec, const int v)
        : eChar(ec), val(v)
    {}

    ExoticChar& operator=(const ExoticChar &rhs) {
        this->eChar = rhs.eChar;
        this->val = rhs.val;
        return *this;
    }

    std::string eChar;
    int val;
};

class EscException : public std::exception
{
public:
    EscException(const std::string &msg)
        : m_Msg(msg)
    {}

    const char* what() const {
        return m_Msg.c_str();
    }

private:
    const std::string m_Msg;
};

class ExoticStringComparator
{
public:
    ExoticStringComparator(const std::vector<ExoticChar> &staticTable);
    ~ExoticStringComparator();

    int CompareExoticString(const std::string &lhs, const std::string &rhs) const;
private:
    DictionaryTrie* m_StaticMapRoot;
    std::unordered_map<DictionaryTrie*, int> m_KeyValMap;
};

// cpp
#include "ExoticStringComparator.h"


ExoticStringComparator::ExoticStringComparator(const std::vector<ExoticChar> &staticTable)
    : m_StaticMapRoot(NULL)
{
    if (staticTable.empty()) {
        throw EscException("ExoticStringComparator creation - empty table");
    }

    m_StaticMapRoot = new DictionaryTrie();
    DictionaryTrie *tempPtr;
    for (auto it = staticTable.begin(); it != staticTable.end(); ++it) {
        tempPtr = AddWord(m_StaticMapRoot, it->eChar.c_str());
        if (tempPtr) {
            m_KeyValMap.insert(std::make_pair(tempPtr, it->val));
        }
    }
}


ExoticStringComparator::~ExoticStringComparator()
{
    DestoryDictionaryTrie(m_StaticMapRoot);
    m_StaticMapRoot = NULL;
}

int ExoticStringComparator::CompareExoticString(const std::string &lhs, const std::string &rhs) const
{
    auto lhsTempIt = lhs.begin();
    auto rhsTempIt = rhs.begin();

    int result = 0;
    DictionaryTrie *lhsTemp, *lhsCur;
    DictionaryTrie *rhsTemp, *rhsCur;
    //while (lhsCurIt != lhs.end() && rhsCurIt != rhs.end()) {
    while (lhsTempIt != lhs.end() && rhsTempIt != rhs.end()) {
        lhsTemp = m_StaticMapRoot;
        while (lhsTemp = FindPath(lhsTemp, *lhsTempIt)) {
            if (lhsTemp->str) {
                lhsCur = lhsTemp;
            }

            ++lhsTempIt;
            if (lhsTempIt == lhs.end()) {
                break;
            }
        }

        rhsTemp = m_StaticMapRoot;
        while (rhsTemp = FindPath(rhsTemp, *rhsTempIt)) {
            if (rhsTemp->str) {
                rhsCur = rhsTemp;
            }

            ++rhsTempIt;
            if (rhsTempIt == rhs.end()) {
                break;
            }
        }

        auto lhsValIt = m_KeyValMap.find(lhsCur);
        auto rhsValIt = m_KeyValMap.find(rhsCur);
        if (lhsValIt == m_KeyValMap.end() || rhsValIt == m_KeyValMap.end()) {
            throw EscException("CompareExoticString: invalid map");
        }
        result = lhsValIt->second - rhsValIt->second;
        if (result != 0) {
            return result;
        }
    }

    if (lhsTempIt != lhs.end()) {
        return 1;
    }
    else if (rhsTempIt != rhs.end()) {
        return -1;
    }
    return result;
}
// ********************************************************************************
// Test
// ********************************************************************************
void TestExoticStringComparator()
{
    {
        // non-exotic testing
        const std::vector<ExoticChar> charTable = {
            { "a", 'a' },
            { "b", 'b' },
            { "c", 'c' },
            { "d", 'd' },
            { "e", 'e' },
            { "f", 'f' },
            { "g", 'g' },
            { "h", 'h' },
            { "i", 'i' },
            { "j", 'j' },
            { "k", 'k' },
            { "l", 'l' },
            { "m", 'm' },
            { "n", 'n' },
            { "o", 'o' },
            { "p", 'p' },
            { "q", 'q' },
            { "r", 'r' },
            { "s", 's' },
            { "t", 't' },
            { "u", 'u' },
            { "v", 'v' },
            { "w", 'w' },
            { "x", 'x' },
            { "y", 'y' },
            { "z", 'z' } };
           
        ExoticStringComparator esc(charTable);
        assert(esc.CompareExoticString("lkasdfajsd", "lkasdfajsd") == 0);
        assert(esc.CompareExoticString("lkasdfajsd", "lkasdfahsd") > 0);
        assert(esc.CompareExoticString("lkasdfajsd", "lkasdfaxsd") < 0);
        assert(esc.CompareExoticString("lkasdfajsdz", "lkasdfajsd") > 0);
        assert(esc.CompareExoticString("lkasdfajsd", "lkasdfajsdz") < 0);
   
    }

    {
        // exotic testing
        const std::vector<ExoticChar> charTable = {
            { "a", 'a' },
            { "b", 'b' },
            { "c", 'c' },
            { "d", 'd' },
            { "e", 'e' },
            { "f", 'f' },
            { "g", 'g' },
            { "h", 'h' },
            { "i", 'i' },
            { "j", 'j' },
            { "k", 'k' },
            { "l", 'l' },
            { "m", 'm' },
            { "n", 'n' },
            { "o", 'o' },
            { "p", 'p' },
            { "q", 'q' },
            { "r", 'r' },
            { "s", 's' },
            { "t", 't' },
            { "u", 'u' },
            { "v", 'v' },
            { "w", 'w' },
            { "x", 'x' },
            { "y", 'y' },
            { "z", 'z' },
            { "ae", 'z' + 1 },
            { "oe", 'z' + 2 },
            { "ue", 'a' - 1 },
            { "sch", 'a' - 2 }, };

        ExoticStringComparator esc(charTable);
        assert(esc.CompareExoticString("ae", "oe") < 0);
        assert(esc.CompareExoticString("ae", "ue") > 0);
        assert(esc.CompareExoticString("oe", "sch") > 0);
        assert(esc.CompareExoticString("oe", "ue") > 0);
        assert(esc.CompareExoticString("lkasdfaesd", "lkasdfajsdz") > 0);
        assert(esc.CompareExoticString("lkasdfschaesd", "lkasdfaajsdz") < 0);
        assert(esc.CompareExoticString("lkasdfschaesd", "lkasdfschoesd") < 0);

    }
}

1 comment:

  1. Hey I was not able to follow your algorithm. I didn't understand what exactly does trie contains and how do we define longest valid entry.

    Thanks

    ReplyDelete