Sunday, 14 December 2014

Dynamic Programming - Maximizing Gold : Cooperative Play and Known Pots

1. Analysis
In the last two blog we have discussed competitive play with unknown pots, cooperative play with unknown pots and competitive play with known pots. In this blog I am going to discuss cooperative play with known pots.

Comparing with competitive play with known pots the difference is on the second player's strategy. In competitive play with known pots the second player is to maximize the gold as the first player. However in cooperative play with known pots the second player is to minimize the gold in order to let the first player maximize the gold. In another word in competitive play both players have to bring up their best game, however in cooperative play the first player has to play the best and the second player has to play the worst.

2. DP's Sub-problem
As we discussed above, the strategy for the first player is to maximize the gold and the strategy for the second player is to minimize the gold. Or in other word the second player is to pick the gold pot in the first player's favor which allows the first player to maximize the gold.

Given a serial gold pots (P1, P2, ..., Pn), and at any stage of play F(Pi, ..., Pj), where i, j are within [1, n] and i is not greater than j.
For the first player:
    - Pick the left pot, then the return is Pi + F(Pi+1, ..., Pj)
    - Pick the right pot, then the return is Pj + F(Pi, ..., Pj-1)
    - And take the bigger return
For the first player:
    - Pick the left pot, then the return for the first player is F(Pi+1, ..., Pj)
    - Pick the right pot, then the return for the first player is F(Pi, ..., Pj-1)
    - Pick the pot that result in a bigger return for the first player

3. C++ Implementation
As competitive play with known pots, the caching technique is used to save the duplicate calculation as well.

// ********************************************************************************
// IMPLEMENTATION
// ********************************************************************************
#include <unordered_map>
#include <vector>

struct GoldPotKey {
    size_t startIndex;
    size_t endIndex;

    GoldPotKey(size_t s, size_t e)
        : startIndex(s), endIndex(e)
    {}

    bool operator==(const GoldPotKey& rhs) const {
        return startIndex == rhs.startIndex &&
               endIndex == rhs.endIndex;
    }
};

struct GoldPotKeyHash {
    size_t operator()(const GoldPotKey& key) const {
        return std::hash<double>()(key.startIndex) ^ std::hash<size_t>()(key.endIndex);
    }

    bool operator()(const GoldPotKey& k1, const GoldPotKey& k2) const {
        return k1 == k2;
    }
};

typedef size_t REWARD;
typedef std::unordered_map<GoldPotKey, REWARD, GoldPotKeyHash, GoldPotKeyHash>
                                 GoldPotRewardHashMap;

size_t MaxGoldCooperativeAndKnown_DP(const std::vector<size_t>& goldPots,
                                     const size_t startIndex,
                                     const size_t endIndex,
                                     const bool isFirstPlayer,
                                     GoldPotRewardHashMap& gprhm)
{
    if (startIndex == (endIndex - 1)) {
        if (isFirstPlayer) {
            return goldPots[startIndex];
        }
        return 0;
    }

    const GoldPotKey gpk{ startIndex, endIndex };
    {
        GoldPotRewardHashMap::const_iterator foundIter = gprhm.find(gpk);
        if (foundIter != gprhm.end()) {
            return foundIter->second;
        }
    }

    size_t returnOfOpponentIfTakeTheLeftPot;
    size_t returnOfOpponentIfTakeTheRightPot;
    if (isFirstPlayer)
    {
        returnOfOpponentIfTakeTheLeftPot = goldPots[startIndex] +       
                        MaxGoldCooperativeAndKnown_DP(goldPots,
                                                startIndex + 1, endIndex, false, gprhm);
        returnOfOpponentIfTakeTheRightPot = goldPots[endIndex - 1] + 
                        MaxGoldCooperativeAndKnown_DP(goldPots,
                                                startIndex, endIndex - 1, false, gprhm);
    }
    else {
        returnOfOpponentIfTakeTheLeftPot =
                        MaxGoldCooperativeAndKnown_DP(goldPots, startIndex + 1,
                                                endIndex, true, gprhm);
        returnOfOpponentIfTakeTheRightPot =
                        MaxGoldCooperativeAndKnown_DP(goldPots,
                                                startIndex, endIndex - 1, true, gprhm);
    }
    // the second player tries to maximize his/her gold, and take the bigger
    // return and the left is for the first player
    size_t expectedReward = 
                returnOfOpponentIfTakeTheLeftPot > returnOfOpponentIfTakeTheRightPot ?
                returnOfOpponentIfTakeTheLeftPot : returnOfOpponentIfTakeTheRightPot;

    gprhm.insert(std::make_pair(gpk, expectedReward));

    return expectedReward;
}

size_t MaxGoldCooperativeAndKnown(const std::vector<size_t>& goldPots)
{
    if (goldPots.empty()) {
        return 0;
    }

    size_t totalPots = goldPots.size();
    if (totalPots == 1) {
        return goldPots[0];
    }

    GoldPotRewardHashMap gprhm;
    return MaxGoldCooperativeAndKnown_DP(goldPots, 0, goldPots.size(), true, gprhm);
}

// ********************************************************************************
// TEST
// ********************************************************************************
#include <cassert>

void TestCases()
{
    std::vector<size_t> goldPots = { 1, 2 };
    assert(MaxGoldCooperativeAndKnown(goldPots) == 2);

    goldPots = { 1, 3, 2 };
    assert(MaxGoldCooperativeAndKnown(goldPots) == 5);

    goldPots = { 1, 2, 10, 3 };
    assert(MaxGoldCooperativeAndKnown(goldPots) == 13);

    goldPots = { 4, 1, 2, 10 };
    assert(MaxGoldCooperativeAndKnown(goldPots) == 14);

    goldPots = { 4, 1, 2, 10, 3 };
    assert(MaxGoldCooperativeAndKnown(goldPots) == 17);

    goldPots = { 5, 4, 1, 2, 10 };
    assert(MaxGoldCooperativeAndKnown(goldPots) == 19);

    goldPots = { 5, 4, 1, 2, 10, 3 };
    assert(MaxGoldCooperativeAndKnown(goldPots) == 19);
}
// ********************************************************************************

Wednesday, 10 December 2014

Dynamic Programming - Maximizing Gold : Competitive Play and Known Pots

1. Analysis 
As the two players are to play competitively, both sides are trying to gain as much gold as possible. The first player has the privilege to pick first then has the edge. The second player has to do the best given the remaining pots after the first player's pick.

The ultimate goal for the first player is to achieve the global maximal gold. As we discussed in the last blog, there might be multiple ways of picking to reach that global maximal gold. But for some scenarios each picking will be critical if there is only one global maximal and only one way of picking to reach it.

Let's take a look at how this game is played. First of all the first player has the advantage to pick first. Afterwards the second player, the first player, ... in turn until running out of the gold pots. For each picking it is independent on what has happened before. Even thought players make some mistakes, for now this picking they have to decide has to bring them the most gold in the remaining gold pots. As we know that each picking is independent and this is a maximization problem, dynamic programming is good solution for this kind of problems

2. DP's Sub-problem
Given a serial gold pots (P1, P2, ..., Pn) the game play is F(P1, ..., Pn). The first player has the advantage as taking the pick first. Two options are available, picking P1 or Pn. The goal to maximize the gold, therefore pick P1 or Pn which can bring more gold.
    - If pick P1, then the return of gold is P1 + F(P2, ..., Pn)
    - If pick Pn, then the return of gold is Pn + F(P1, ..., Pn-1)
    - Pick the pot that has more return

After the very first pick, both players are trying to maximize their return. In other words both players are trying to pick the pot in the least favor of the opponent, because the total amount of gold is fixed. The more you gain, the less the opponent gets. At any stage of play F(Pi, ..., Pj), where i <= j and both within [1, n]. All the gold remaining is SUM = sigma(Pk), where k within[i, j].
    - If pick Pi, then the return is Pi + F(Pi+1, ..., Pj)
            and the return of the opponent is SUM - (Pi + F(Pi+1, ..., Pj)
    - If pick Pj, then the return is Pj + F(Pi, ..., Pj-1)
            and the return of the opponent is SUM - (Pj + F(Pi, ..., Pj-1)
    - Pick the pot in the least favor of the opponent
            * Pick Pi if, SUM - (Pi + F(Pi+1, ..., Pj) < SUM - (Pj + F(Pi, ..., Pj-1)
            * Pick Pj if, SUM - (Pi + F(Pi+1, ..., Pj) > SUM - (Pj + F(Pi, ..., Pj-1)

Keep in mind in this game we are looking for the return of the first player. The DP function has to return the gain of the first player even though it is the 2nd player is playing. This is why we have interpret the goal of two player is to pick the plot in the least favor of the opponent.

3. C++ Implementation
Caching techniques has been used to save the duplicate searching path. Because two players are adopting the same strategy, the real important thing is remaining gold pots. Therefore the pair of start and end indices of gold pots is used as the hash key.

// ********************************************************************************
// IMPLEMENTATION
// ********************************************************************************
#include <numeric>
#include <unordered_map>
#include <vector>

struct GoldPotKey {
    size_t startIndex;
    size_t endIndex;

    GoldPotKey(size_t s, size_t e)
        : startIndex(s), endIndex(e)
    {}

    bool operator==(const GoldPotKey& rhs) const {
        return startIndex == rhs.startIndex &&
               endIndex == rhs.endIndex;
    }
};

struct GoldPotKeyHash {
    size_t operator()(const GoldPotKey& key) const {
        return std::hash<double>()(key.startIndex) ^ std::hash<size_t>()(key.endIndex);
    }

    bool operator()(const GoldPotKey& k1, const GoldPotKey& k2) const {
        return k1 == k2;
    }
};

typedef size_t REWARD;
typedef std::unordered_map<GoldPotKey, REWARD, GoldPotKeyHash, GoldPotKeyHash>
                        GoldPotRewardHashMap;

size_t MaxGoldCompetitiveAndKnown_DP(const std::vector<size_t>& goldPots,
                                     const size_t startIndex,
                                     const size_t endIndex,
                                     const size_t sum,
                                     GoldPotRewardHashMap& gprhm)
{
    if (startIndex == (endIndex - 1)) {
        return 0;
    }

    const GoldPotKey gpk{ startIndex, endIndex };
    {
        GoldPotRewardHashMap::const_iterator foundIter = gprhm.find(gpk);
        if (foundIter != gprhm.end()) {
            return foundIter->second;
        }
    }

    size_t returnOfOpponentIfTakeTheLeftPot = goldPots[startIndex] +
                                MaxGoldCompetitiveAndKnown_DP(goldPots,
                                       startIndex + 1, endIndex, sum - goldPots[startIndex], gprhm);
    size_t returnOfOpponentIfTakeTheRightPot = goldPots[endIndex - 1] +
                                MaxGoldCompetitiveAndKnown_DP(goldPots,
                                       startIndex, endIndex - 1, sum - goldPots[endIndex - 1], gprhm);
    // the second player tries to maximize his/her gold, and take the bigger
    // return and the left is for the first player
    REWARD expectedReward;
    if (returnOfOpponentIfTakeTheLeftPot < returnOfOpponentIfTakeTheRightPot) {
        expectedReward = sum - returnOfOpponentIfTakeTheRightPot;
    }
    else {
        expectedReward = sum - returnOfOpponentIfTakeTheLeftPot;
    }

    gprhm.insert(std::make_pair(gpk, expectedReward));

    return expectedReward;
}

size_t MaxGoldCompetitiveAndKnown(const std::vector<size_t>& goldPots)
{
    if (goldPots.empty()) {
        return 0;
    }

    size_t totalPots = goldPots.size();
    if (totalPots == 1) {
        return goldPots[0];
    }

    GoldPotRewardHashMap gprhm;
    size_t sum = std::accumulate(goldPots.begin(), goldPots.end(), 0);
    size_t returnOfTakeTheLeft = goldPots[0] +
        MaxGoldCompetitiveAndKnown_DP(goldPots, 1, totalPots, sum - goldPots[0], gprhm);
    size_t reutrnOfTakeTheRight = goldPots[totalPots - 1] +
        MaxGoldCompetitiveAndKnown_DP(goldPots, 0, totalPots - 1,
                               sum - goldPots[totalPots - 1], gprhm);
    size_t maxGold = returnOfTakeTheLeft > reutrnOfTakeTheRight ?
                                  returnOfTakeTheLeft : reutrnOfTakeTheRight;
    return maxGold;
}
// ********************************************************************************
// TEST
// ********************************************************************************
#include <cmath>

void TestCases()
{
    std::vector<size_t> goldPots = { 1, 2 };
    assert(MaxGoldCompetitiveAndKnown(goldPots) == 2);
 
    goldPots = { 1, 3, 2 };
    assert(MaxGoldCompetitiveAndKnown(goldPots) == 3);
 
    goldPots = { 1, 2, 10, 3 };
    assert(MaxGoldCompetitiveAndKnown(goldPots) == 11);
 
    goldPots = { 4, 1, 2, 10 };
    assert(MaxGoldCompetitiveAndKnown(goldPots) == 12);
 
    goldPots = { 4, 1, 2, 10, 3 };
    assert(MaxGoldCompetitiveAndKnown(goldPots) == 9);
 
    goldPots = { 5, 4, 1, 2, 10 };
    assert(MaxGoldCompetitiveAndKnown(goldPots) == 15);
 
    goldPots = { 5, 4, 1, 2, 10, 3 };
    assert(MaxGoldCompetitiveAndKnown(goldPots) == 16);
  }
// ********************************************************************************

Monday, 8 December 2014

Dynamic Programming - Maximizing Gold : Competitive/Cooperative Play with Unknown Pots

1. Problem Description
Yet again this is a Google Interview Questions for Intern from careerup. The following is the original problem description of that thread.

"Question: Two players A and B are playing a game. Pots of gold, each with 
varying number of coins are placed in a single line. The rules of the game are:
1) Players play turn by turn.
2) On each turn a player can pick a pot of gold from either end of the line. He
gets all the gold in that pot. The next pot of gold on that end is now available
for picking.
What is the maximum number of gold can the first player get ?"
2. Analysis 
As some contributors on that thread pointed out that the owner of this thread was not very clear about two questions
    - Are two players playing competitively or cooperatively?
    - Are the gold pots are known to two players?

If the gold pots are unknown to two player and the only known pots are these two at the two ends available to pick, then there is not really much strategy to play. As the very limited information is available, like
    - Two pots available to pick at the two ends
    - The gold that two players are holding before the next pick
Assume that the number of gold in each pot, the number of pots are completely random and each game is independent.  Therefor repeating many games does not really bring statistical hint for next games. All the two players can do is to make a pick based on the very limited information.

If the gold pots are known to two players, not matter if the players are playing competitively or cooperatively there is a global optimal/maximal gold that the first player can gain. If not single global maximal (many ways of picking to reach the maximal gold for the first player, for instance all pots with the same amount of gold), one of the ways to pick is what we are looking for. If only one global maximal available, the each picking of the first player will be critical because there might be one only way of picking that can take the first player to the unique global maximal. This will be a game between two players who can think deep forward.

In this blog I will discuss the scenarios that the gold pots are known to the two players. And these two players can play either competitively or cooperatively.

3. Solution
As I briefly discussed in last section, very  limited information are available to the two players.Only the two pots at the two ends and the gold that they are currently holding now are available to be considered. My strategy is rather simple as well. Focus on the short term run as not enough information available to plan in the long run.
    - Competitive play: both players pick the bigger pots at the two ends
    - Cooperative play: the first player picks the bigger pot and the 2nd pick the smaller pot

4. C++ Implementation
// ********************************************************************************
// IMPLEMENTATION
// ********************************************************************************
#include <vector>

size_t MaxGoldCooperativeAndUnknown(const std::vector<size_t>& goldPots)
{
    if (goldPots.empty()) {
        return 0;
    }

    size_t endIndex = goldPots.size() - 1;
    if (endIndex == 0) {
        return goldPots[0];
    }

    size_t gold = 0;
    size_t whoseTurn = 0; // Even: first player; Odd: second player
    for (size_t startIndex = 0; startIndex < endIndex;) {
        if (goldPots[startIndex] > goldPots[endIndex]) {
            if (whoseTurn & 1) {
                --endIndex; // the second player pick up smaller one
            }
            else {
                gold += goldPots[startIndex];
                ++startIndex; // the first player pick up bigger pot
            }
        }
        else
        {
            if (whoseTurn & 1) {
                ++startIndex; // the second player pick up smaller one
            }
            else {
                gold += goldPots[endIndex];
                --endIndex; // // the first player pick up bigger pot
            }
   
        }
        ++whoseTurn;
    }

    return gold;
}

size_t MaxGoldCompetitiveAndUnknown(const std::vector<size_t>& goldPots)
{
    if (goldPots.empty()) {
        return 0;
    }

    size_t endIndex = goldPots.size() - 1;
    if (endIndex == 0) {
        return goldPots[0];
    }

    size_t gold = 0;
    size_t whoseTurn = 0; // Even: first player; Odd: second player
    for (size_t startIndex = 0; startIndex < endIndex;) {
        if (goldPots[startIndex] > goldPots[endIndex]) {
            if (whoseTurn & 1) {
                ++startIndex; // the second player pick up bigger one
            }
            else {
                gold += goldPots[startIndex];
                ++startIndex; // the first player pick up bigger pot
            }
        }
        else
        {
            if (whoseTurn & 1) {
                --endIndex; // the second player pick up bigger one
            }
            else {
                gold += goldPots[endIndex];
                --endIndex; // the first player pick up bigger pot
            }

        }
        ++whoseTurn;
    }

    return gold;
}
// ********************************************************************************

5. Reflection on the Strategy
Because of limited information available and the simple strategy, this is really not a fun game to play. And the game does not really depends on how well the two players can play and it completely depends on how the gold pots is generated.

Another question is if this is a win-or-lose game. If yes, then the player can consider taking disadvantage pick for the short run in order to gain more in the long run. For instance if one player knows that the gold is much less than the opponent. Then probably it is worth taking the risk to take some bold picks in order to swing the game. This can't be called a strategy but be really just a gamble, because there is not really any evidence to bring you different game result by taking disadvantage picks based on the assumption in Section 3.

Surely with known gold pots, it would be much fun to play. Because each pick of the first player will potentially be deciding moment if he/she can reach the global maximal gold. I will discuss it in my next two blogs.

Tuesday, 25 November 2014

Data Structure and Algorithm - Detect the Repetitive Sub-string

1. Problem Description
Yet again this is a Google Interview Question for Software Engineer/Developer on careerup. Here is the description on that thread.

"Given a string (1-d array) , find if there is any sub-sequence that repeats itself. 
Here, sub-sequence can be a non-contiguous pattern, with the same relative order. 

Eg: 

1. abab <------yes, ab is repeated 
2. abba <---- No, a and b follow different order 
3. acbdaghfb <-------- yes there is a followed by b at two places 
4. abcdacb <----- yes a followed by b twice 

The above should be applicable to ANY TWO (or every two) characters in the string and optimum over time. 

In the sense, it should be checked for every pair of characters in the string."

2. Solution
As we are looking for repeating sub-string, therefore any character that appears only once will not count. We only care about these characters that appears  multiple times.

A string with characters appearing more than twice
Given a string as "...asdfasxabcbdkldjfadxkldjkadxadsfads...", character 'x' appears 3 or more times, then I believe that this is a repetitive string, because the second 'x' follows up the first one and the third one follows up the second. So it is repetitive.
Therefore a string has any character appearing more than twice, then it is a repetitive string.

A string with all characters appearing no greater twice
As we discussed above, those characters that appear only once are out of interest and they can be removed as noise. Assume that we remove the noise and only characters that appear twice remain. Let's examine what they should look like. Let's take an arbitrary example, a, b, c and d appear twice. If we have starting string as "abcd", what could be the next string in order to make this string non-repetitive. Obviously here are 4 options.
    - If it is 'a', then definitely this is a repetitive string because b, c, and d will appear after second 'a'.
    - If 'b', then definitely this is a repetitive string because c and d will appear after second 'b'.
    - If 'c', then definitely this is a repetitive string because d will appear after second 'c'.
    - If 'd', undecided and dependent on what the next follows up.

This is actually telling us a very interesting story. If a string, whose characters all appear twice, has to be a palindrome, then it will be qualified as non-repetitive string. Otherwise it will definitely be a repetitive string. Here summarize the pseudo-code:
    - Detect each character's appearance. Return true, if any appears more than twice
    - Strip out all characters that appear only once.
    - Check if the rest string is palindrome. If yes, return false otherwise return true.

Complexity analysis
A linear solution is implemented. Use a hash map to record/detect the appearance of characters, which can be used to strip out all the characters that appear only once.Then checking if the remaining string is palindrome can be done in linear solution as well by doing comparison from both ends. So overall this is a linear solution on both algorithm complexity and space complexity.

3. C++ Implementation
// ********************************************************************************
// IMPLEMENTATION
// ********************************************************************************
#include <string>
#include <unordered_map>
#include <vector>

#include <cassert>

typedef std::unordered_map<size_t, size_t> FrequencyMap;

bool IsPalindrome(const std::string& myStr)
{
    if (myStr.empty()) {
        return false;
    }

    const size_t len = myStr.size();
    if (len == 1) {
        return true;
    }

    for (size_t start = 0, end = len - 1; start < end; ++start, --end) {
        if (myStr[start] != myStr[end]) {
            return false;
        }
    }

    return true;
}

bool DetectFrequencyAndReturnFalseIGreaterThan2(const std::string& myStr, FrequencyMap& freqMap)
{
    for (std::string::const_iterator cIt = myStr.begin(); cIt != myStr.end(); ++cIt) {
        FrequencyMap::iterator found = freqMap.find(*cIt);
        if (found == freqMap.end()) {
            freqMap.insert(std::make_pair(*cIt, 1));
        }
        else {
            ++found->second;
            if (found->second > 2) {
                return true;
            }
        }
    }

    return false;
}

std::string StripCharsAppearingOnlyOnce(const std::string& myStr, const FrequencyMap& freqMap)
{
    std::string result;
    for (std::string::const_iterator cIt = myStr.begin(); cIt != myStr.end(); ++cIt) {
        FrequencyMap::const_iterator found = freqMap.find(*cIt);
        assert(found != freqMap.end());
        if (found->second == 2) {
            result.push_back(*cIt);
        }
    }

    return result;
}

bool FindRepeatedSequence(const std::string& myStr)
{
    FrequencyMap freqMap;
    if (DetectFrequencyAndReturnFalseIGreaterThan2(myStr, freqMap)) {
        return true;
    }

    std::string stringWithCharAppearingTwice = StripCharsAppearingOnlyOnce(myStr, freqMap);
    if (stringWithCharAppearingTwice.empty() || stringWithCharAppearingTwice.size() < 4) {
        return false;
    }

    return !IsPalindrome(stringWithCharAppearingTwice);
}

// ********************************************************************************
// TEST
// ********************************************************************************
void TestCases()
{
    assert(FindRepeatedSequence("aaa") == true);
    assert(FindRepeatedSequence("abab") == true);
    assert(FindRepeatedSequence("abba") == false);
    assert(FindRepeatedSequence("acbdaghfb") == true);
    assert(FindRepeatedSequence("abcdacb") == true);
}
// ********************************************************************************

Thursday, 20 November 2014

Data Structure and Algorithm - Find the Largest Number within K Swaps

1. Problem Description
Yet again this is another Google Interview Question for Software Developer/Engineer in careerup. Here is the original description of the thread.

"Given a number M (N-digit integer) and K-swap operations(a swap 
operation can swap 2 digits), devise an algorithm to get the maximum possible integer? 
Examples: 
M = 132 K = 1 output = 312 
M = 132 K = 2 output = 321 
M = 7899 k = 2 output = 9987 
M = 8799 and K = 2 output = 9987"

2. Solution
Let's take a close look at this problem. For any number there is always a global maximal and minimal value. The global maximal value is to number that have all the digits in the descending order from the left to the right direction. The minimal value is the number that have all the digits in ascending order except that the starting digit can be 0.)

How much swap to get the maximal value?
Given a number with any arbitrary order, at the most it needs (N-1) swaps to get the global maximal value, where N is the number of digits this number has. Simply each time find the biggest digit and swap it to the left side. For instance,
    - Number = 8765432109
        * 9765432108
        * 9865432107
        * 9875432106
        * 9876432105
        * 9876532104
        * 9876542103
        * 9876543102
        * 9876543201
        * 9876543210
This example is the worst case that need 9 (10 digits - 1) swaps, because each swap will result in the biggest digit out of place. The last swap will make the two digits in place, and this is where (-1) comes from in (N - 1).

Swap distance
We all know about the edit distance between two strings. Literally it represents how many modifications is needed to make two strings equal, assuming that each adding, removing and modifying as 1 distance.
Then what is swap distance? Assume we represent a  number with std::string with 10 chars, '0' - '9'. The swap distance represents how many swaps is needed to reach the global maximal value given a number in any shape with each swap as 1 distance. And we already know that the swap distance will be no more than (N-1), where N is the size of the string.

Solution of this problem
Here we know that there is a global maximal value for any number. And we always can reach this global maximal value if given enough swaps. So if given K swaps, we would like to reach a number as close to the global maximal value as possible. This means that first we need to find the right digits to swap and secondly these digits should be in descending order after swaps.

How to find the right digits for these K swaps and how to swap? The idea is to match the global maximal value for the left to the right. Swap as many as biggest digits to the left and keep them arranged in descending order when swapping. Here is the pseudo-code:
    I). Find the global maximal value
    II). Compare the Number with its global maximal and locate the first K edit distance from the left side to the right. (Not exactly correct. Will discuss later)
    III). Keep the indices of these digits that appearing at the edit difference
    IV). Sort the digits in the descending order and put them back to the indices recorded above
Here are two examples
    - N: 8765432109, K = 3
        * 9876543210 as the global maximal value
        * The first 3 edit difference is: 876 vs. 987
        * The indices of these 4 digits are: 0, 1, 2, 9
        * Sort the 4 digits in the descending order: 9, 8, 7, 6
        * Put 4 digits back: 9@position 0, 8@position 1, 7@position 2 and 6@position 9
        * After 3 swaps, then the maximal number we can achieve: 9875432106

    - N: 876959, K = 3
        * 998765 as the global maximal value
        * The first 3 edit difference is: 876 vs. 998
        * The indices of these 5 digits are: 0, 1, 2, 3,  and 5
        * Sort the 5 digits in the descending order: 9, 9, 8, 7, 6
        * Put 5 digits back: 9@position 0, 9@position 1, 8@position 2, 7@position 3 and 6@position 5
        * After 3 swaps, the maximal number we can achieve: 998756

As I mentioned on the 2nd step finding the K edit distance between Number and its global maximal value is not exactly correct. It does not work for some scenarios. The reason behind is that the swap distance is different from the edit distance, because one swap could make two digits into the correct place if these two digits are taking their counterpart's desired location. This should count only 1 swap distance but it counts 2 edit distance. Here is an example.
    - N: 689945932999, K = 5
        * 999999865432 as the global maximal value
        * The first 5 edit difference is: 68xx459 vs. 99xx998
        * The key here @position 1 and @position 6: pair (8, 9) and (9, 8). Swapping the digits on these two locations will make both digits into the correct place. But only one swap but two edit distance.
        * So above we have not yet consume K swaps yet. So we have to keep comparing Number and the global maximal value until exhausting K swaps.
        * The first 5 swap difference is:  68xx4593 vs. 99xx9986
        * Now keep the indices, sort the digits and put them back. The result: 999999862543

In order to calculate the correct swap distances the previous pairs have to be tracked. If two pairs have the same number but in different order (x, y) <=> (y, x), then the swap distance counts only 1. In the above pseduo-code, finding K swap distances instead of K edit distances in the 2nd step will make this algorithm fly.

Complexity analysis
It takes O(N) to find the global maximal value. Do not sort the string in the descending because it takes at least O(N*logN) (see Sort and Binary Search). Keep in mind that there are only 10 digits. Simply count the occurrence of each digits and rearrange them in the descending order. This is an O(N) solution.

Finding the K swap distance takes O(N). Simply go through from left to the right. But at the same time we need to track if (x,y)/(y,x) appear. In my implementation hash map is used, therefore it is O(1) solution for tracking the pair. Overall it is O(N) to find the K swap distance.

 The digits to swap need to be sorted as well, and this can be done in the same way as to find the global maximal value. Therefore it is O(K).

At the same time we need to keep tracking the indices of the digits to swap. The digits to swap are sorted and put back from the left to right. The largest digit to the lowest index and the smallest digit to the largest index. So the indices has to be sorted as well. In my implementation a binary map is used therefor it takes O(K*logK). Still O(K) is achievable if using hash set plus a vector in the way of using hash map above to tracking the (x,y) and (y.x) pair.

In my implementation it is a O(N) + O(K*logK) solution in computation complexity and O(N) in space complexity. Still a O(N+K) solution is achievable in computation complexity.

3. C++ Implementation
// ********************************************************************************
// Implementation
// ********************************************************************************
#include <set>
#include <string>
#include <unordered_map>

const int TOTAL_NUM_OF_DIGITS = 10;

struct SwapPair {
    SwapPair(char src, char dst)
    : a(src < dst ? src : dst),
    b(src > dst ? src : dst)
    {}

    bool operator()(const SwapPair& rhs) const {
        return a == rhs.a && b == rhs.b;
    }

    char a;
    char b;
};

struct SwapPairInfo {
    SwapPairInfo(char src, char dst, size_t f)
    : aLessThanb(src < dst), frequency(f)
    {}

    bool aLessThanb;
    size_t frequency;
};

struct SwapPairHash{
    size_t operator()(const SwapPair& key) const {
        return std::hash<long>()(key.a) ^
            std::hash<long>()(key.b);
    }

    bool operator()(const SwapPair& k1, const SwapPair& k2) const {
        return k1.operator()(k2);
    }
};

// to track the (x,y) and (y,x) pair
typedef std::unordered_map<SwapPair, SwapPairInfo, SwapPairHash, SwapPairHash> SwapPairHashMap;

std::string FindLargestNumber(const std::string& numbers,
                                                   std::vector<size_t> allDigitsMap[TOTAL_NUM_OF_DIGITS])
{
    for (size_t charIdx = numbers.size(); charIdx > 0; --charIdx) {
        assert(numbers[charIdx - 1] >= '0' && numbers[charIdx - 1] <= '9');
        allDigitsMap[numbers[charIdx - 1] - '0'].push_back(charIdx - 1);
    }

    std::string result;
    for (int num = TOTAL_NUM_OF_DIGITS - 1; num >= 0; --num) {
        if (!allDigitsMap[num].empty()) {
            for (size_t index = 0; index < allDigitsMap[num].size(); ++index) {
                result.push_back('0' + num);
            }
        }
    }

    return result;
}

std::string SwapKcharsBySorting(const std::string& numbers, const std::set<size_t>& swapIndices)
{
    size_t digitsCount[TOTAL_NUM_OF_DIGITS] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
    for (std::set<size_t>::const_iterator cIter = swapIndices.begin();
                             cIter != swapIndices.end(); ++cIter) {
        ++digitsCount[numbers[*cIter] - '0'];
    }

    std::vector<char> sortedSwapChars;
    for (int num = TOTAL_NUM_OF_DIGITS - 1; num >= 0; --num) {
        if (digitsCount[num] > 0) {
            for (size_t index = 0; index < digitsCount[num]; ++index) {
                sortedSwapChars.push_back('0' + num);
            }
        }
    }

    std::string result(numbers);
    size_t index = 0;
    for (std::set<size_t>::const_iterator cIter = swapIndices.begin();
        cIter != swapIndices.end(); ++cIter, ++index) {
        result[*cIter] = sortedSwapChars[index];
    }

    return result;
}

std::string KswapSolution(const std::string& numbers, const size_t k)
{
    if (k == 0 || numbers.empty()) {
        return numbers;
    }

    std::vector<size_t> digitsMap[TOTAL_NUM_OF_DIGITS];
    const std::string largestNum = FindLargestNumber(numbers, digitsMap);
    assert(largestNum.size() == numbers.size());

    if ((k + 1) >= numbers.size()) {
        // special case to reach the maximal value any way
        return largestNum;
    }


    // find the index to swap
    SwapPairHashMap swapPairHM;
    size_t swapCount = k;
    std::set<size_t> swapIndices;
    size_t tempIndexOfDigitsMap[TOTAL_NUM_OF_DIGITS] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
    for (size_t index = 0; index < numbers.size() && swapCount > 0; ++index) {
        if (largestNum[index] != numbers[index]) {
            const SwapPair sp(numbers[index], largestNum[index]);
            SwapPairHashMap::iterator found = swapPairHM.find(sp);
            if (found == swapPairHM.end()) {
               // keep (x,y)
               swapPairHM.insert(std::make_pair(sp,
                    SwapPairInfo(numbers[index], largestNum[index], 1)));
                --swapCount;
                swapIndices.insert(index);
                swapIndices.insert(digitsMap[largestNum[index] - '0'].at(
                    tempIndexOfDigitsMap[largestNum[index] - '0']));
                ++tempIndexOfDigitsMap[largestNum[index] - '0'];
            }
            else {
                bool aLessThanb = numbers[index] < largestNum[index];
                if (found->second.aLessThanb == aLessThanb) {
                    // (x, y) again
                    ++found->second.frequency;
                    --swapCount;
                }
                else {
                    // (y,x) found
                    if (found->second.frequency > 0) {
                        // there is (x,y) available, then do not reduce swapCount
                        --found->second.frequency;
                    }
                    else {
                        // there is not (x,y) available, then change it to (y, x) pair
                        ++found->second.frequency;
                        found->second.aLessThanb = aLessThanb;
                        --swapCount;
                    }
                }

                swapIndices.insert(index);
                swapIndices.insert(digitsMap[largestNum[index] - '0'].at(
                    tempIndexOfDigitsMap[largestNum[index] - '0']));
                ++tempIndexOfDigitsMap[largestNum[index] - '0'];
            }
        }
    }

    return SwapKcharsBySorting(numbers, swapIndices);
}

// ********************************************************************************
// TEST
// ********************************************************************************

#include <cassert>
void TestCornerCases()
{
    assert(KswapSolution("", 1).empty());
    assert(KswapSolution("9", 1) == "9");
    assert(KswapSolution("19", 1) == "91");
    assert(KswapSolution("19", 0) == "19");
    assert(KswapSolution("1234", 3) == "4321");
    assert(KswapSolution("119798699", 8) == "999987611");
}

void TestCases()
{
    assert(KswapSolution("132", 1) == "312");
    assert(KswapSolution("132", 2) == "321");
    assert(KswapSolution("7899", 2) == "9987");
    assert(KswapSolution("8799", 2) == "9987");
    assert(KswapSolution("1189119999", 5) == "9999981111");
    assert(KswapSolution("191899", 3) == "999811");
    assert(KswapSolution("191899", 2) == "999811");
    assert(KswapSolution("34155", 2) == "55143");
    assert(KswapSolution("12520", 2) == "52210");
    assert(KswapSolution("876959", 3) == "998756");
    assert(KswapSolution("6899459999", 5) == "9999998654");
    assert(KswapSolution("68994579999", 5) == "99999987654");
    assert(KswapSolution("689984599382999", 5) == "999999988382654");
    assert(KswapSolution("68994593999", 5) == "99999986543");
    assert(KswapSolution("68994597999", 5) == "99999987654");
    assert(KswapSolution("689945932999", 5) == "999999862543");
    assert(KswapSolution("876959", 2) == "996857");
    assert(KswapSolution("123456789098765432199998888777777266655", 350)
                == "999999888888777777776666655554433222110");
    assert(KswapSolution("123456789098765432199998888777777266655", 35)
                == "999999888888777777776666655554433222110");
    assert(KswapSolution("123456789098765432199998888777777266655", 9)
                == "999999888878765432165438210777777266655");
}
// ********************************************************************************


Sunday, 2 November 2014

Data Structure and Algorithm - Reversing a Sub-string to Get the Least Lexicographical String

1. Problem Description
This is a Google Interview Question SDE-2s from careerup. The following is the original description from the website.

"You are given a string S. Each character of S is either ‘a’, or ‘b’. You wish to reverse exactly one sub-string of S such that the new string is lexicographically smaller than all the other strings that you can get by reversing exactly one sub-string. 
For example, given ‘abab’, you may choose to reverse the substring ‘ab’ that starts from index 2 (0-based). This gives you the string ‘abba’. But, if you choose the reverse the substring ‘ba’ starting from index 1, you will get ‘aabb’. There is no way of getting a smaller string, hence reversing the substring in the range [1, 2] is optimal. 

Input: 
First line contains a number T, the number of test cases. 
Each test case contains a single string S. The characters of the string will be from the set { a, b }. 

Output: 
For each test case, print two numbers separated by comma; for example “x,y” (without the quotes and without any additional whitespace). “x,y” describe the starting index (0-based) and ending index respectively of the substring that must be reversed in order to acheive the smallest lexicographical string. If there are multiple possible answers, print the one with the smallest ‘x’. If there are still multiple answers possible, print the one with the smallest ‘y’. 

Constraints: 
1 ? T ? 100 
1 ? length of S ? 1000 

Sample Input: 

abab 
abba 
bbaa 
aaaa 
babaabba 

Sample Output: 
1,2 
1,3 
0,3 
0,0 
0,4"

2. Solution
This solution has algorithm complexity of O(N) and space complexity of O(1). The fundamental idea is that the result string will have as many 'a's as possible in the left hand after reversing a sub-string, because the lexicographical comparison starts from the left to the right and stops when the first character is not equal.

The start of the sub-string
This is reasonably easy question. The answer is that the sub-string starts from the first 'b' in the string from the left hand direction, if there is 'a' existing afterwards. Otherwise starts from 0.
If there is no 'b' existing in the string, then the string consists of only 'a's. Therefore the string is already at the least lexicographical state and the sub-string {0, 0} should be return according to the problem requirement - return the sub-string with the least indices if multiple choices exist. For instance 'aaaa', there is no 'b' and reversing any sub-strings will have the same result. According to the requirement the first character {0, 0} is the correct answer.
If there is 'b' exiting, then the very first 'b' from the left hand side is the starting point of the sub-string. For instance ''aaabbaabbbabbab', then the sub-string should start from 3 (the index of string starts from 0).

The end of the sub-string
It must end with 'a' that appears after the start of the sub-string, if such 'a' exists. Otherwise ends at 0. Let's take a look at a few scenarios.

Scenario 1: there is no such 'a' after the first 'b'
If there is no 'a' existing after the very first 'b', then this string is already at its least lexicographical state, Therefore the program should return {0, 0}. For instance 'aabbbb', the very first 'b' is at position 2 and afterwards there is no 'a' exits. And this string is already at the least lexicographical state, and no reversing can possibly make it lexicographically less.

Scenario 2: there is only one continuous 'a's appearing after the first 'b'
The sub-string will be starts from the very first 'b' and ends at the last 'a'. For instance 'aabbaaabbb', the very first 'b' is at position 2 and the last 'a' after the first 'b' is at position 6, 'bbaaa'. Reversing this sub-string {2, 6} will have 'aaaaabbbbb', which is the sort of string as Scenario 1 that is already at the least lexicographical state. So the correct answer for this scenario is {firstB, lastAAfterFirstB}.

Scenario 3: multiple continuous 'a's after the first 'b' but only one instance with most 'a's
There can be any number of continuous 'a's appearing after the first 'b'. These continuous 'a's consist of any number of 'a's but only one continuous 'a's have the most 'a's. For instance,
    'aabbaaabbbaaaabbbbaaaaaabb'
    - The first 'b': at position 2
    - 3 continuous 'a's after the first 'b': 'aaa', 'aaaa' and 'aaaaaa'
    - The continuous 'a' having the most 'a's are 'aaaaaa'. With 6 'a's
Now we have 3 candidates of sub-strings
    'bbaaa'                            
    'bbaaabbbaaaa'
    'bbaaabbbaaaabbbbaaaaaa'
And the last candidate is what we want, because after reversing it will have longest 'a's on the left hand side of continous 8 'a's. Other two candidates will have  5 and 6. Therefore the correct answer for this scenario is {firstB, lastAOfLongestContinuousA_AfterFirstB}. In the above example {2, 23}.

Scenario 4: multiple continuous 'a's after the first 'b' and multiple instances with most 'a's
All the above scenarios can be easily achieved with O(N) algorithm complexity and O(1) space complexity. For this scenario that I am most struggled with is to find a solution with O(N) algorithm complexity.
Initially I was thinking of recording all the instances with most 'a's, then go thought all the candidates and find the one that reversing can result the least lexicographical string (or find the one with the least indices if multiple candidates have the same result). But in this way for some extreme cases it won't guarantee O(N) algorithm complexity, for instance
    'bababababababababa'
    - the very fist 'b' in the left at position 0
    - After the very fist 'b', all the continuous 'a's have size of 1.
    - There are N/2 candidates (N is the length of the string), record them
    - Compare the N/2 candidates and find out the correct one
    - It is a O(N^2) solution

A solution with O(N^2) algorithm complexity is not what I was targeting obviously. After carefully examining this cases, a linear solution found. The key idea behind this solution is to how to pick the correct candidate (the candidate has the same length and most 'a's). Here is the pseudo-code
    1. Go through each character in the reverse direction of the string until firstB (include firstB)
    2. Record the startIndex, the numberOfA and LenToNextContinousA of the longest continuous 'a's
    3. Keep update the startIndex, numberOfA and LenToNextContinousA, if found a longer one
    4. If found 2nd continuous 'a' with the same number 'a'. Compare the following LenToNextContinousA - numberOfA character with str[startIndex - LenToNextContinousA + 1, startIndex - numberOfA]. If less, then update startIndex, LenToNextContinousA to the 2nd continuous 'a'.
    5. Until hit firstB.
Step 4 is to decide to pick up the candidate. Here is the explanation. Considering two adjacent two continuous 'a's with longest 'a's found so far.
    "a...absssssbaaaabxxxxxxbaaaabzzzzz", suppose
    reverse(xxxxxxxx) = yyyyyyyy
    reverse(sssss) = ooooo, then we have two candicates
    Result 1: a...aaaaabooooobbxxxxxxbaaaab....  <= Candidate 1
    Result 2: a...aaaaabyyyyyybaaaabsssssbb....    <= Candidate 2
Compare Result 1 and Result 2, then the two lexicographical equality is decided by the string marked as red color booooobb and  byyyyyyb. Because in the purple color part Result 2 is always less than Result 1, because xxxx can not be aaaa. Therefore we can conclude,
    - If  booooobb < byyyyyyb, then Result 1 < Result 2 and take Candidate 1
    - If  booooobb > byyyyyyb, then Result 2 < Result 1 and take Candidate 2
    - If  booooobb = byyyyyyb, then Result 2 < Result 1 and take Candidate 2

So far now we have gone through all the scenarios and the sub-string can be found in a single linear search on the string. It is a linear solution and requires constant space.

3. C++ Implementation
// Implementation
// ********************************************************************************
#include <string>

#include <cassert>

struct AabbResult {
    AabbResult(int s, int e)
        : start(s), end(e)
    {}

    int start;
    int end;
};

AabbResult AabbSolution(const std::string& str)
{
    if (str.empty()) {
        return AabbResult{ -1, -1 };
    }

    size_t firstB = str.find_first_of('b');
    if (firstB == std::string::npos) {
        // all are 'a', so need to reverse
        return AabbResult{ 0, 0 };
    }

    bool foundA = false;
    size_t rFirstA = 0;
    size_t rNextA = 0;
    size_t MaxNumOfContinousA = 0;
    for (size_t index = str.length() - 1; index > firstB; --index)
    {
        if (!foundA && rFirstA == 0 && str[index] == 'a') {
            foundA = true;
            rFirstA = index;
            continue;
        }

        if (foundA && str[index] != 'a') {
            // find the first continuous 'a' in the reverse order 
            // and record its length
            foundA = false;
            MaxNumOfContinousA = rFirstA - index;
        }

        if (!foundA && rFirstA > 0 && str[index] == 'a') {
            // find the start of the 2nd continuous 'a' in the
            // reverse order
            rNextA = index;
            break;
        }
    }

    if (rFirstA == 0) {
        // not found any 'a' after first 'b'
        // eg: aaabbbb
        return AabbResult{ 0, 0 };
    }

    if (rNextA == 0) {
        // only one continuous 'a' between firstB and rFirstA
        // aaabbbaaaabb
        return AabbResult( static_cast<int>(firstB),
                           static_cast<int>(rFirstA) );
    }

    // There are multiplie multipile continuous 'a' after firstB
    // eg: aaabbbaaabbaaabbaabb
    size_t strLenBetweenLongestA = 0;
    size_t rLongestAIndex = rFirstA;
    size_t numOfContinuousA = 0;
    bool   sameMaxLengthContinousAFound = false;
    size_t numToContinueComparing = rFirstA - rNextA;
    foundA = false;
    size_t tempIndex = 0;
    for (size_t index = rNextA + 1; index > firstB; --index, ++tempIndex) {
        if (sameMaxLengthContinousAFound) {
            // Step 4 on Scenario 4
            if (numToContinueComparing > 0) {
                if (str[index - 1] > str[rLongestAIndex - MaxNumOfContinousA - tempIndex]) {
                    // keep the continuous 'a' as it is still the least lexicographical one
                    // Case of booooobb > byyyyyyb
                    sameMaxLengthContinousAFound = false;
                }
                else if (str[index - 1] < str[rLongestAIndex - MaxNumOfContinousA - tempIndex]) {
                    // Case of booooobb < byyyyyyb         
                    // The newly found continuous 'a's is the least lexicographical one. 
                    // Update the record
                    rLongestAIndex = rFirstA;
                }

                --numToContinueComparing;
            }
            else {
                // keep the continuous 'a' as it is still the least lexicographical one
                // Case of booooobb = byyyyyyb
                sameMaxLengthContinousAFound = false;
            }
        }

        if (!foundA && str[index - 1] == 'a') {
            strLenBetweenLongestA = rLongestAIndex - (index - 1);
            rNextA = index - 1;
            numOfContinuousA = 1;
            foundA = true;
            continue;
        }

        if (foundA) {
            if (str[index - 1] == 'a') {
                ++numOfContinuousA;
                if (numOfContinuousA > MaxNumOfContinousA) {
                    // found a even longer continuous 'a's
                    // clear the flag to cancel the comparison of Step 4 on Scenario 4
                    sameMaxLengthContinousAFound = false;
                }
            }
            else {
                if (numOfContinuousA > MaxNumOfContinousA) {
                    // found a even longer continuous 'a's
                    // update the record
                    rLongestAIndex = rNextA;
                    MaxNumOfContinousA = numOfContinuousA;
                    sameMaxLengthContinousAFound = false;
                }
                else if (numOfContinuousA == MaxNumOfContinousA) {
                    // found two continuous 'a' with the same number of 'a'
                    // Set the flag to start the comparison of Step 4 on Scenario 4
                    sameMaxLengthContinousAFound = true;
                    numToContinueComparing = strLenBetweenLongestA - MaxNumOfContinousA;
                    tempIndex = 0;
                    rFirstA = rNextA;
                }

                foundA = false;
            }
        }

    }

    if (sameMaxLengthContinousAFound) {
        assert(numToContinueComparing > 0);
        // found two continuous 'a' with the same number of 'a'
        // not finished Step 4 on Scenario 4 yet before hitting firstB
        // conitnue to the reversed part
        for (size_t index = 0; index < numToContinueComparing; ++index, ++tempIndex) {
            if (str[rFirstA + index + 1] >
                              str[rLongestAIndex - MaxNumOfContinousA - tempIndex]) {
                // Case of booooobb > byyyyyyb, and
                // Case of booooobb = byyyyyyb
                return AabbResult{ static_cast<int>(firstB),
                                   static_cast<int>(rLongestAIndex) };
            }
            else if (str[rFirstA + index + 1] <
                             str[rLongestAIndex - MaxNumOfContinousA - tempIndex]) {
                // Case of booooobb < byyyyyyb
                return AabbResult{ static_cast<int>(firstB),
                                   static_cast<int>(rFirstA) };
            }
        }
    }

    return AabbResult{ static_cast<int>(firstB),
                       static_cast<int>(rLongestAIndex) };
}

// ********************************************************************************
// Test
// ********************************************************************************
void TestCaseFromCareerup()
{
    AabbResult result = AabbSolution("");
    assert(result.start == -1 && result.end == -1);
     
    result = AabbSolution("abab");
    assert(result.start == 1 && result.end == 2);

    result = AabbSolution("abba");
    assert(result.start == 1 && result.end == 3);
 
    result = AabbSolution("bbaa");
    assert(result.start == 0 && result.end == 3);
 
    result = AabbSolution("aaaa");
    assert(result.start == 0 && result.end == 0);
 
    result = AabbSolution("bbbbbbbbbbbbbbbb");
    assert(result.start == 0 && result.end == 0);

    result = AabbSolution("aaaaaaaaaaaaaa");
    assert(result.start == 0 && result.end == 0);
}

void TestCasesSingleHighestContinousA()
{
    AabbResult result = AabbSolution("babaabba");
    assert(result.start == 0 && result.end == 4);

    result = AabbSolution("babaabbaaaaabbbbaaaaaaabbbaabbaabababab");
    assert(result.start == 0 && result.end == 22);

    result = AabbSolution("aababaabbaaaaabbbbaaaaaaabbbaabbaabababab");
    assert(result.start == 2 && result.end == 24);
}

void TestCasesMultipleHighestContinuousAs()
{
    AabbResult result = AabbSolution("abbaaabbbbbbaaa"); // abbbaaabbbbbbaaa
    assert(result.start == 1 && result.end == 14);
    result = AabbSolution("abbbaaabbbbbbaaa");
    assert(result.start == 1 && result.end == 15);
    result = AabbSolution("abbaaababbbbaaa");
    assert(result.start == 1 && result.end == 5);
    result = AabbSolution("baabaabbaa");
    assert(result.start == 0 && result.end == 5);

    // a babaaa babaaa babaaa babaaa babaaa
    result = AabbSolution("ababaaababaaa");
    assert(result.start == 1 && result.end == 12);
    result = AabbSolution("ababaaababaaababaaa");
    assert(result.start == 1 && result.end == 18);
    result = AabbSolution("ababaaababaaababaaababaaa");
    assert(result.start == 1 && result.end == 24);
    result = AabbSolution("ababaaababaaababaaababaaababaaa");
    assert(result.start == 1 && result.end == 30);
}

// ********************************************************************************

Dynamic Programming - Task Scheduling

1. Problem Description

"There are at most eight servers in a data center. Each server has got a capacity/memory limit. There can be at most 8 tasks that need to be scheduled on those servers. Each task requires certain capacity/memory to run, and each server can handle multiple tasks as long as the capacity limit is not hit. Write a program to see if all of the given tasks can be scheduled or not on the servers? 

Ex: 
Servers capacity limits: 8, 16, 8, 32 
Tasks capacity needs: 18, 4, 8, 4, 6, 6, 8, 8 
For this example, the program should say 'true'. 

Ex2: 
Server capacity limits: 1, 3 
Task capacity needs: 4 
For this example, program should return false. 

Got some idea that this needs to be solved using dynamic programming concept, but could not figure out exact solution."

2. Solution
As the interviewee indicated that dynamic programming will be a very good approach to tackle this problem. I think he/she is correct. As usual the key of dynamic programming is to find the sub problem.

Assuming given the problem as
    Servers: {S1, S2, ......, Sn}
    Tasks:   {T1, T2, ......, Tm}
Try to assigned T1 to Si, i within [1, n], if T1 <= Si (the server has to have enough resource to accommodate the task).  Then we have a sub-problem as,
    Severs: {S1, S2, ..., Si-1, Si - T1, Si+1, ..., Sn}
    Tasks:  {T2, ......, Tm}

Corner Cases or Terminating Conditions:
    - Task is empty then return true.
    - If all resource available is less than all task required, then return false.
        Sum(Si) < Sum(Tj) 
    - If a task requires more resource than that any servers can provide, then return false.
        Max(Tj) > Max(Si)
    - Any server that provides less resource than the least any task requires can be ignored
        Si < Min(Tj)
   - If the tasks comes a single task and can be accommodated by servers, then return true.
They are all valid cases but not necessary to implement them all. In my solution I just implemented the 1st and the last and the solution works fine.

3. C++ Implementation
// Implementation
// ********************************************************************************
bool SchedulingSolutionDP(std::vector<int>& serverCenter,
                           const std::vector<int>& tasks, const size_t taskIndex)
{
    if (tasks.empty()) {
        return true;
    }

    if (taskIndex >= (tasks.size() - 1)) {
        // the last corner case
        for (size_t index = 0; index < serverCenter.size(); ++index) {
            if (tasks[taskIndex] <= serverCenter[index]) {
                return true;
            }
        }
        return false;
    }

    for (size_t index = 0; index < serverCenter.size(); ++index) {
        if (tasks[taskIndex] <= serverCenter[index]) {
            // create sub-problem and call recursively
            serverCenter[index] -= tasks[taskIndex];
            if (SchedulingSolutionDP(serverCenter, tasks, taskIndex + 1)){
                // recover to the original problem
                serverCenter[index] += tasks[taskIndex];
                return true;
            }
            // recover to the original problem
            serverCenter[index] += tasks[taskIndex];
        }
    }

    return false;
}
// ********************************************************************************
// Test
// ********************************************************************************
void TestCases()
{
    {
        std::vector<int> serverCenter = { 8, 16, 8, 32 };
        std::vector<int> tasks = { 18, 4, 8, 4, 6, 6, 8, 8 };
        assert(SchedulingSolutionDP(serverCenter, tasks, 0) == true);
    }

    {
        std::vector<int> serverCenter = { 1, 3 };
        std::vector<int> tasks = { 4 };
        assert(SchedulingSolutionDP(serverCenter, tasks, 0) == false);
    }

    {
        std::vector<int> serverCenter = { 7, 6 };
        std::vector<int> tasks = { 5, 3, 3, 2 };
        assert(SchedulingSolutionDP(serverCenter, tasks, 0) == true);
    }

    {
        std::vector<int> serverCenter = { 1, 3, 1, 2, 3, 4 };
        std::vector<int> tasks = { 4, 3, 1, 2, 3, 1 };
        assert(SchedulingSolutionDP(serverCenter, tasks, 0) == true);
    }

    {
        std::vector<int> serverCenter = { 2, 5, 10, 4, 13, 19, 23, 11, 8, 17 };
        std::vector<int> tasks = { 30, 2, 1 };
        assert(SchedulingSolutionDP(serverCenter, tasks, 0) == false);
    }

    {
        std::vector<int> serverCenter = { 8, 6, 2 };
        std::vector<int> tasks = { 5, 4, 3, 2, 1 };
        assert(SchedulingSolutionDP(serverCenter, tasks, 0) == true);
    }

    {
        std::vector<int> serverCenter = { 8, 6, 2 };
        std::vector<int> tasks = { 5, 4, 4, 2, 1 };
        assert(SchedulingSolutionDP(serverCenter, tasks, 0) == true);
    }
}
// ********************************************************************************