Wednesday 27 January 2016

Dynamic Programming - Find If A Given Tree Has Duplicate Sub-Tree (Part III)

1. Algorithm
This is an improvement of the solution on the previous blog, Dynamic Programming - Find If A Given Tree Has Duplicate Sub-Tree (Part II). The solution presented in Part II has to go though the entire tree to calculate the hash code for each node and then iterate over the hash map to find hash key with collisions to test node-by-node. This guarantees O(N) solution, where N is the size of the entire tree. However this problem needs only to find out if duplicate trees exist or not. There is no need to carry on computing the hash code for the entire tree, if one of duplication can be detected already.

The improvement is very straightforward. Compute the hash code for each node via  post-order traversal. When insert the hash key into the hash map, immediately test if collision exists. If yes, immediately test if the duplication of sub tree exist or not. If found duplication then stop calculating hash code and return true. Otherwise carry on computing the hash code for the rest the tree.

Pseudo-code:
1. Calculate the hash code for this node in the same way via post-order traversal
2. Insert into the hash code into the hash map
    * Insert into the hash map directly if hash code not present yet
    * Node-by-node comparison of this node against hash value entries if present already
        - Return true, if node-by-node comparison is true
        - Insert hash code into the hash map otherwise
3. Repeat Step 1 and Step 2 until exhaust the entire true
4. Return false

In the solution on Part II it guarantees linear solution and always linear, not more and not less. However this solution guarantees linear solution too. But slightly better in average. The worse case will be in the case that the only duplication happens in level 1 (root node - level 0). Otherwise in any other cases this solution does not require to calculate the entire tree. The best cases will be that the duplication sub-trees reside in the left part of the tree.

2. C++ Implementation
// ********************************************************************************
// Implementation
// ********************************************************************************
template <typename T>
bool FindDuplicateTreeInEntries(const std::unordered_set<TreeNode<T>*> entries,
                                TreeNode<T>* refEntry,
                                TreeNode<T>* &entry1,
                                TreeNode<T>* &entry2)
{
    if (!MoreThanNNodes_R(refEntry, 2)) {
        return false;
    }
    auto itEnd = entries.end();
    for (auto it = entries.begin(); it != itEnd; ++it) {
        if (MoreThanNNodes_R(*it, 2) && CompareTwoTrees_R(*it, refEntry)) {
            entry1 = *it;
            entry2 = refEntry;
            return true;
        }
    }

    return false;
}

template <typename T>
size_t CalculateHashMapAndFindSubTree_R(TreeNode<T>* root,
    std::unordered_map<size_t, std::unordered_set<TreeNode<T>*>> &hm,
    TreeNode<T>* &entry1,
    TreeNode<T>* &entry2,
    bool &found)
{
    // Prime number used to calculate the hash code in Step 1
    const size_t PrimeOfCurNode = 1009;
    const size_t PrimeOfLeftNode = 2909;
    const size_t PrimeOfRightNode = 4021;

    // Return directly if duplication is found
    // Get out of recursion directly
    if (!root || found) {
        return 0;
    }

    size_t hashKey = *root->data * PrimeOfCurNode;
    if (root->left) {
        // Find in the left branch: Step 1 post-order traversal
        hashKey += PrimeOfLeftNode * CalculateHashMapAndFindSubTree_R(root->left,
                                                                      hm,
                                                                      entry1,
                                                                      entry2,
                                                                      found);
    }
    if (!found && root->right) {
        // Continue finding in the right branch: Step 1 post-order traversal
        hashKey += PrimeOfRightNode * CalculateHashMapAndFindSubTree_R(root->right,
                                                                       hm,
                                                                       entry1,
                                                                       entry2,
                                                                       found);
    }
    if (!found) {
        // Calculate hash code for the current root: Step 1 post-order traversal
        size_t hashValue = std::hash<size_t>{}(hashKey);
        auto foundIt = hm.find(hashValue);
        if (foundIt == hm.end()) {
            // Insert into hash map if not present yet: Step 2
            std::unordered_set<TreeNode<T>*> entry;
            entry.insert(root);
            hm.insert(std::make_pair(hashValue, entry));
        }
        else {
            std::unordered_set<TreeNode<T>*>& refEntry = foundIt->second;
            if (refEntry.find(root) == refEntry.end()) {
                // Node-by-node comparison: Step 2
                found = FindDuplicateTreeInEntries(refEntry, root, entry1, entry2);
                if (!found) {
                    // Insert into hash map if node-by-node comparison is false: Step 2
                    refEntry.insert(root);
                }
            }
        }
        // Return hash value for next calculation: Step 3
        return hashValue;
    }
    return 0;
}

template <typename T>
bool HaveDuplicateSubTree3_R(TreeNode<T> *root,
                                                      TreeNode<T>* &entry1,
                                                      TreeNode<T>* &entry2)
{
    std::unordered_map<size_t, std::unordered_set<TreeNode<T>*>> nodeHM;
    bool found = false;
    CalculateHashMapAndFindSubTree_R(root, nodeHM, entry1, entry2, found);
    return found;
}

// ********************************************************************************
// Test
// ********************************************************************************
void TestFindDuplicateSubTree3()
{
    {
        TreeNode<int>* root = nullptr;
        TreeNode<int>* entry1;
        TreeNode<int>* entry2;
        assert(HaveDuplicateSubTree3_R(root, entry1, entry2) == false);
    }

    {
        const std::vector<int> testInput = { 1, 2 };
        auto root = ConstructTreeRecursive(testInput);
        assert(root != nullptr);
        assert(GetTreeSize_R(root) == testInput.size());
        TreeNode<int>* entry1;
        TreeNode<int>* entry2;
        assert(HaveDuplicateSubTree3_R(root, entry1, entry2) == false);
        DeleteTree(&root);
    }

    {
        const std::vector<int> testInput = { 1, 2, 3 };
        auto root = ConstructTreeRecursive(testInput);
        assert(root != nullptr);
        assert(GetTreeSize_R(root) == testInput.size());
        TreeNode<int>* entry1;
        TreeNode<int>* entry2;
        assert(HaveDuplicateSubTree3_R(root, entry1, entry2) == false);
        DeleteTree(&root);
    }

    {
        const std::vector<int> testInput = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
        auto root = ConstructTreeRecursive(testInput);
        assert(root != nullptr);
        assert(GetTreeSize_R(root) == testInput.size());
        TreeNode<int>* entry1;
        TreeNode<int>* entry2;
        assert(HaveDuplicateSubTree3_R(root, entry1, entry2) == false);
        DeleteTree(&root);
    }

    {
        const std::vector<int> testInput = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
        auto root = ConstructTreeRecursive(testInput);
        assert(root != nullptr);
        assert(GetTreeSize_R(root) == testInput.size());
        TreeNode<int>* entry1;
        TreeNode<int>* entry2;
        assert(HaveDuplicateSubTree3_R(root, entry1, entry2) == false);

        const std::vector<int> subTreeInput = { 100, 101 };
        auto subTree1 = ConstructTreeRecursive(subTreeInput);
        auto subTree2 = ConstructTreeRecursive(subTreeInput);
        assert(GetTreeSize_R(subTree1) == subTreeInput.size());
        assert(GetTreeSize_R(subTree2) == subTreeInput.size());
        assert(CheckTwoTreesWithSameValues(subTree1, subTree2) == true);

        auto leftestNode = FindLeftestNode(root);
        auto rightestNode = FindRightestNode(root);
        assert(leftestNode != nullptr);
        assert(leftestNode->left == nullptr);
        assert(rightestNode != nullptr);
        assert(rightestNode->right == nullptr);
        assert(*leftestNode->data == 1);
        assert(*rightestNode->data == 10);

        leftestNode->left = subTree1;
        rightestNode->right = subTree2;
        assert(HaveDuplicateSubTree3_R(root, entry1, entry2) == false);

        DeleteTree(&root);
    }

    {
        const std::vector<int> testInput = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
        auto root = ConstructTreeRecursive(testInput);
        assert(root != nullptr);
        assert(GetTreeSize_R(root) == testInput.size());
        TreeNode<int>* entry1;
        TreeNode<int>* entry2;
        assert(HaveDuplicateSubTree3_R(root, entry1, entry2) == false);

        const std::vector<int> subTreeInput = { 100, 101, 102 };
        auto subTree1 = ConstructTreeRecursive(subTreeInput);
        auto subTree2 = ConstructTreeRecursive(subTreeInput);
        assert(GetTreeSize_R(subTree1) == subTreeInput.size());
        assert(GetTreeSize_R(subTree2) == subTreeInput.size());
        assert(CheckTwoTreesWithSameValues(subTree1, subTree2) == true);

        auto leftestNode = FindLeftestNode(root);
        auto rightestNode = FindRightestNode(root);
        assert(leftestNode != nullptr);
        assert(leftestNode->left == nullptr);
        assert(rightestNode != nullptr);
        assert(rightestNode->right == nullptr);
        assert(*leftestNode->data == 1);
        assert(*rightestNode->data == 10);

        leftestNode->left = subTree1;
        rightestNode->right = subTree2;
        assert(HaveDuplicateSubTree3_R(root, entry1, entry2) == true);
        assert(entry1 != nullptr);
        assert(entry2 != nullptr);
        assert(CheckTwoTreesWithSameValues(entry1, subTree1));
        assert(CheckTwoTreesWithSameValues(entry2, subTree2));
        assert(CheckTwoTreesWithSameValues(entry1, entry2));

        DeleteTree(&root);
    }

    {
        const std::vector<int> testInput = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
        auto root = ConstructTreeRecursive(testInput);
        assert(root != nullptr);
        assert(GetTreeSize_R(root) == testInput.size());
        TreeNode<int>* entry1;
        TreeNode<int>* entry2;
        assert(HaveDuplicateSubTree3_R(root, entry1, entry2) == false);

        const std::vector<int> subTreeInput = { 100, 101, 102 };
        auto subTree1 = ConstructTreeRecursive(subTreeInput);
        auto subTree2 = ConstructTreeRecursive(subTreeInput);
        assert(GetTreeSize_R(subTree1) == subTreeInput.size());
        assert(GetTreeSize_R(subTree2) == subTreeInput.size());
        assert(CheckTwoTreesWithSameValues(subTree1, subTree2) == true);

        auto leftestNode = FindLeftestNode(root->left);
        auto rightestNode = FindRightestNode(root->left);
        assert(leftestNode != nullptr);
        assert(leftestNode->left == nullptr);
        assert(rightestNode != nullptr);
        assert(rightestNode->right == nullptr);
        assert(*leftestNode->data == 1);
        assert(*rightestNode->data == 4);

        leftestNode->left = subTree1;
        rightestNode->right = subTree2;
        assert(HaveDuplicateSubTree3_R(root, entry1, entry2) == true);
        assert(entry1 != nullptr);
        assert(entry2 != nullptr);
        assert(CheckTwoTreesWithSameValues(entry1, subTree1));
        assert(CheckTwoTreesWithSameValues(entry2, subTree2));
        assert(CheckTwoTreesWithSameValues(entry1, entry2));

        DeleteTree(&root);
    }

    {
        const std::vector<int> testInput = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
        auto root = ConstructTreeRecursive(testInput);
        assert(root != nullptr);
        assert(GetTreeSize_R(root) == testInput.size());
        TreeNode<int>* entry1;
        TreeNode<int>* entry2;
        assert(HaveDuplicateSubTree3_R(root, entry1, entry2) == false);

        const std::vector<int> subTreeInput = { 100, 101, 102 };
        auto subTree1 = ConstructTreeRecursive(subTreeInput);
        auto subTree2 = ConstructTreeRecursive(subTreeInput);
        assert(GetTreeSize_R(subTree1) == subTreeInput.size());
        assert(GetTreeSize_R(subTree2) == subTreeInput.size());
        assert(CheckTwoTreesWithSameValues(subTree1, subTree2) == true);

        auto leftestNode = FindLeftestNode(root->right);
        auto rightestNode = FindRightestNode(root->right);
        assert(leftestNode != nullptr);
        assert(leftestNode->left == nullptr);
        assert(rightestNode != nullptr);
        assert(rightestNode->right == nullptr);
        assert(*leftestNode->data == 6);
        assert(*rightestNode->data == 10);

        leftestNode->left = subTree1;
        rightestNode->right = subTree2;
        assert(HaveDuplicateSubTree3_R(root, entry1, entry2) == true);
        assert(entry1 != nullptr);
        assert(entry2 != nullptr);
        assert(CheckTwoTreesWithSameValues(entry1, subTree1));
        assert(CheckTwoTreesWithSameValues(entry2, subTree2));
        assert(CheckTwoTreesWithSameValues(entry1, entry2));

        DeleteTree(&root);
    }

    {
        const std::vector<int> testInput = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
        auto root = ConstructTreeRecursive(testInput);
        assert(root != nullptr);
        assert(GetTreeSize_R(root) == testInput.size());
        TreeNode<int>* entry1 = nullptr;
        TreeNode<int>* entry2 = nullptr;
        assert(HaveDuplicateSubTree3_R(root, entry1, entry2) == false);

        const std::vector<int> subTreeInput1 = { 100, 101, 102 };
        const std::vector<int> subTreeInput2 = { 102, 101, 100 };
        auto subTree1 = ConstructTreeRecursive(subTreeInput1);
        auto subTree2 = ConstructTreeRecursive(subTreeInput2);
        assert(GetTreeSize_R(subTree1) == subTreeInput1.size());
        assert(GetTreeSize_R(subTree2) == subTreeInput2.size());
        assert(CheckTwoTreesWithSameValues(subTree1, subTree2) == true);

        auto leftestNode = FindLeftestNode(root->right);
        auto rightestNode = FindRightestNode(root->right);
        assert(leftestNode != nullptr);
        assert(leftestNode->left == nullptr);
        assert(rightestNode != nullptr);
        assert(rightestNode->right == nullptr);
        assert(*leftestNode->data == 6);
        assert(*rightestNode->data == 10);

        leftestNode->left = subTree1;
        rightestNode->right = subTree2;
        assert(HaveDuplicateSubTree3_R(root, entry1, entry2) == false);
        assert(entry1 == nullptr);
        assert(entry2 == nullptr);

        DeleteTree(&root);
    }

    {
        const std::vector<int> testInput = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
        auto root = ConstructTreeRecursive(testInput);
        assert(root != nullptr);
        assert(GetTreeSize_R(root) == testInput.size());
        TreeNode<int>* entry1 = nullptr;
        TreeNode<int>* entry2 = nullptr;
        assert(HaveDuplicateSubTree3_R(root, entry1, entry2) == false);

        TreeNode<int>* subTree1 = new TreeNode<int>(1000);
        subTree1->left = new TreeNode<int>(1001);
        subTree1->left->left = new TreeNode<int>(1002);
        TreeNode<int>* subTree2 = new TreeNode<int>(1000);
        subTree2->right = new TreeNode<int>(1001);
        subTree2->right->right = new TreeNode<int>(1002);
        assert(GetTreeSize_R(subTree1) == 3);
        assert(GetTreeSize_R(subTree2) == 3);
        assert(CheckTwoTreesWithSameValues(subTree1, subTree2) == true);

        auto leftestNode = FindLeftestNode(root->right);
        auto rightestNode = FindRightestNode(root->right);
        assert(leftestNode != nullptr);
        assert(leftestNode->left == nullptr);
        assert(rightestNode != nullptr);
        assert(rightestNode->right == nullptr);
        assert(*leftestNode->data == 6);
        assert(*rightestNode->data == 10);

        leftestNode->left = subTree1;
        rightestNode->right = subTree2;
        assert(HaveDuplicateSubTree3_R(root, entry1, entry2) == false);
        assert(entry1 == nullptr);
        assert(entry2 == nullptr);

        DeleteTree(&root);
    }
    {
        const std::vector<int> testInput = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
        auto root = ConstructTreeRecursive(testInput);
        assert(root != nullptr);
        assert(GetTreeSize_R(root) == testInput.size());
        TreeNode<int>* entry1 = nullptr;
        TreeNode<int>* entry2 = nullptr;
        assert(HaveDuplicateSubTree3_R(root, entry1, entry2) == false);

        TreeNode<int>* subTree1 = new TreeNode<int>(1000);
        subTree1->left = new TreeNode<int>(1001);
        subTree1->right = new TreeNode<int>(1002);
        TreeNode<int>* subTree2 = new TreeNode<int>(1000);
        subTree2->right = new TreeNode<int>(1001);
        subTree2->right->right = new TreeNode<int>(1002);
        assert(GetTreeSize_R(subTree1) == 3);
        assert(GetTreeSize_R(subTree2) == 3);
        assert(CheckTwoTreesWithSameValues(subTree1, subTree2) == true);

        auto leftestNode = FindLeftestNode(root->right);
        auto rightestNode = FindRightestNode(root->right);
        assert(leftestNode != nullptr);
        assert(leftestNode->left == nullptr);
        assert(rightestNode != nullptr);
        assert(rightestNode->right == nullptr);
        assert(*leftestNode->data == 6);
        assert(*rightestNode->data == 10);

        leftestNode->left = subTree1;
        rightestNode->right = subTree2;
        assert(HaveDuplicateSubTree3_R(root, entry1, entry2) == false);
        assert(entry1 == nullptr);
        assert(entry2 == nullptr);

        DeleteTree(&root);
    }

    {
        const std::vector<int> testInput = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
        auto root = ConstructTreeRecursive(testInput);
        assert(root != nullptr);
        assert(GetTreeSize_R(root) == testInput.size());
        TreeNode<int>* entry1 = nullptr;
        TreeNode<int>* entry2 = nullptr;
        assert(HaveDuplicateSubTree3_R(root, entry1, entry2) == false);

        TreeNode<int>* subTree1 = new TreeNode<int>(1000);
        subTree1->right = new TreeNode<int>(1002);
        subTree1->right->right = new TreeNode<int>(1001);
        TreeNode<int>* subTree2 = new TreeNode<int>(1000);
        subTree2->right = new TreeNode<int>(1001);
        subTree2->right->right = new TreeNode<int>(1002);
        assert(GetTreeSize_R(subTree1) == 3);
        assert(GetTreeSize_R(subTree2) == 3);
        assert(CheckTwoTreesWithSameValues(subTree1, subTree2) == true);

        auto leftestNode = FindLeftestNode(root->right);
        auto rightestNode = FindRightestNode(root->right);
        assert(leftestNode != nullptr);
        assert(leftestNode->left == nullptr);
        assert(rightestNode != nullptr);
        assert(rightestNode->right == nullptr);
        assert(*leftestNode->data == 6);
        assert(*rightestNode->data == 10);

        leftestNode->left = subTree1;
        rightestNode->right = subTree2;
        assert(HaveDuplicateSubTree3_R(root, entry1, entry2) == false);
        assert(entry1 == nullptr);
        assert(entry2 == nullptr);

        DeleteTree(&root);
    }
}

No comments:

Post a Comment