Tuesday, 29 December 2015

Find the Shortest Sub-string Containing S1, S2 and S3

1. Problem Description
- witold.pluta42 about a month ago in Europe | Report Duplicate | Flag ". 

2. Solution
Given the sting as S and 3 smaller string S1, S2 and S3, the solution has O(1) memory complexity and O(N) computation complexity, where N is the size of S. The idea is to go through the string, find the position of S1, S2 and S3, always keep the latest position of 3 and track the best result.

Pseudo-code:
    1. Initialization:
        * The starting position of idxS1, idxS2, idxS3 in S as -1
        * The length of 3 sub-string as len1, len2 and len3
        * minLen: the minimal value of len1, len2 and len3
        * maxLen: the maximal value of len1, len2 and len3
        * The length of S as len
        * The result sub-string, result, as S
    2. Start from the beginning of string S, idx = 0
    3. Check if S1 exists from idx
        Go to Step 4, if yes.
        Go to Step 5, if no
    4. Update idxS1 = idx, and calculate the sub-string if idxS1, idxS2 and idxS3 all >= 0
        * The start position of sub-string: the minimal value of idxS1, idxS2 and idxS3
        * The end position: the maximal value of idxS1+len1, idxS2+len2 and idxS3+len3
        * Swap it with result it is shorter
    5. Check if S2 exists from idx
        Go to Step 6, if yes.
        Go to Step 7, if no
    6. Update idxS2 = idx, and calculate the sub-string if idxS1, idxS2 and idxS3 all >= 0
        The start position of substring: the minimal value of idxS1, idxS2 and idxS3
        The end position: the maximal value of idxS1+len1, idxS2+len2 and idxS3+len3
        Swap it with result it is shorter
    7. Check if S3 exists from idx
        Go to Step 8, if yes.
        Go to Step 9, if no
    9. Update idxS3 = idx, and calculate the sub-string if idxS1, idxS2 and idxS3 all >= 0
        The start position of substring: the minimal value of idxS1, idxS2 and idxS3
        The end position: the maximal value of idxS1+len1, idxS2+len2 and idxS3+len3
        Swap it with result it is shorter
    10, Go to Step 12, if result's length is equal to maxLen
    11. Increment idx by 1 until it reaches (len - minLen) and repeat Step 3~10
    12. Check if idxS1, idxS2 and idxS3 are all >=0
         Return the sub-string of result from S, if yes
         Return empty string otherwise

Note: 
Step 10: the condition only happens when the longest string of S1, S2 and S3 contains other 2. Then the shortest sub-string will be the longest string of S1, S2 and S3.
Step 11: No need to loop until the end of S, because when idx is >= (len-minLen), then Step 3, 5 and 7 will be always false.

3. C++ Implementation
// ********************************************************************************
// Implementation
// ********************************************************************************
#include <algorithm>
#include <cmath>
#include <string>
#include <vector>

struct SubString {
    SubString(size_t idx, size_t length)
        : startIndex(idx)
        , len(length)
    {}

    size_t startIndex;
    size_t len;

    size_t EndIndex() const {
        return startIndex + len;
    }

    bool operator > (SubString const & rhs) {
        return this->len > rhs.len;
    }
};

SubString FindSubStringContainS1S2S3(SubString const &s1,
                                     SubString const &s2,
                                     SubString const &s3)
{
    size_t startIndex = std::min(std::min(s1.startIndex, s2.startIndex), s3.startIndex);
    size_t endIndex = std::max(std::max(s1.EndIndex(), s2.EndIndex()), s3.EndIndex());
    return SubString(startIndex, endIndex - startIndex);
}

void FindShortestSubString(int idxS1, int idxS2, int idxS3,
                          size_t len1, size_t len2, size_t len3,
                          SubString& bestResult)
{
    if (idxS1 < 0 || idxS2 < 0 || idxS3 < 0) {
        return;
    }

    SubString temp = FindSubStringContainS1S2S3(SubString(idxS1, len1),
                                                SubString(idxS2, len2),
                                                SubString(idxS3, len3));
    if (bestResult.operator>(temp)) {
        bestResult = temp;
    }
}

std::string FindTheShortestSubStringContainS123(std::string const &input,
                                                std::string const &S1,
                                                std::string const &S2,
                                                std::string const &S3)
{
    if (input.empty() || S1.empty() || S2.empty() || S3.empty()) {
        return std::string();
    }

    const size_t len1 = S1.size();
    const size_t len2 = S2.size();
    const size_t len3 = S3.size();
    const size_t minLen = std::min(std::min(len1, len2), len3);
    const size_t maxLen = std::max(std::max(len1, len2), len3);
    const size_t lenInput = input.size();
    const char* cstrInput = input.c_str();
    const char* cstrS1 = S1.c_str();
    const char* cstrS2 = S2.c_str();
    const char* cstrS3 = S3.c_str();

    int idxS1 = -1;
    int idxS2 = -1;
    int idxS3 = -1;
    SubString bestResult(0, lenInput);
    for (size_t index = 0; (index + minLen) < lenInput; ++index) {
       if (strncmp(cstrInput + index, cstrS1, len1) == 0) {
            idxS1 = index;
            FindShortestSubString(idxS1, idxS2, idxS3, len1, len2, len3, bestResult);
        }
        if (strncmp(cstrInput + index, cstrS2, len2) == 0) {
            idxS2 = index;
            FindShortestSubString(idxS1, idxS2, idxS3, len1, len2, len3, bestResult);
        }
        if (strncmp(cstrInput + index, cstrS3, len3) == 0) {
            idxS3 = index;
            FindShortestSubString(idxS1, idxS2, idxS3, len1, len2, len3, bestResult);
        }

        if (bestResult.len == maxLen) {
            break;
        }
    }

    if (idxS1 < 0 || idxS2 < 0 || idxS3 < 0) {
        return std::string();
    }

    return input.substr(bestResult.startIndex, bestResult.len);
}

// ********************************************************************************
// Test
// ********************************************************************************
#include <cassert>
void TestFindTheShortestSubStringContainS1S2S3()
{
    {
        const std::string input("abc0123456789aksdfjasd");
        const std::string s1("0123");
        const std::string s2("3456");
        const std::string s3("");
        std::string result = FindTheShortestSubStringContainS123(input, s1, s2, s3);
        assert(result.empty() == true);
    }

    {
        const std::string input("abc0123456789aksdfjasd");
        const std::string s1("0123456");
        const std::string s2("3456");
        const std::string s3("1234");
        std::string result = FindTheShortestSubStringContainS123(input, s1, s2, s3);
        assert(result == std::string("0123456"));
    }

    {
        const std::string input("abc0123456789aksdfjasd");
        const std::string s1("0123");
        const std::string s2("3456");
        const std::string s3("6789");
        std::string result = FindTheShortestSubStringContainS123(input, s1, s2, s3);
        assert(result == std::string("0123456789"));
    }

    {
        const std::string input("sdfa01234ad23456dfad6789abc0123456789aksdfjasd");
        const std::string s1("0123");
        const std::string s2("3456");
        const std::string s3("6789");
        std::string result = FindTheShortestSubStringContainS123(input, s1, s2, s3);
        assert(result == std::string("0123456789"));
    }

    {
        const std::string input(
               "sdfa01234ad23456dfad6789abc0123456789aksdfjasd0123skd3456kjsd6789jhs");
        const std::string s1("0123");
        const std::string s2("3456");
        const std::string s3("6789");
        std::string result = FindTheShortestSubStringContainS123(input, s1, s2, s3);
        assert(result == std::string("0123456789"));
    }
}

No comments:

Post a Comment