Thursday, 14 August 2014

Data Structure and Algorithm - Linked List: Merge Sort

As I presented the common API for Linked List in my last blog entry, Data Structure and Algorithm - Linked List (Part I), in this blog I would like to present a sorting algorithm for Linked List, merge sort. Please refer to the basic idea about sorting in Data Structure and Algorithm - Sort and Binary Search.

Why Merge Sort
- Simply idea: divide and conquer
- Easy adapted to parallel computing
- Space complexity: O(1) for Linked List, comparing with other sorting algorithm
- Better performance than other more efficient algorithm in theory due to the traverse of Linked List
- Guarantee to be stable sort
- Guarantee to the worst case of O(N*logN)

// C implementation of Merge Sort on Linked List
//********************************************************************************
// LinkedListSort.h
#ifndef LINKED_LIST_SORT_H
#define LINKED_LIST_SORT_H

#pragma once

#include "LinkedListAlgo.h"
#include "LinkedListElement.h"

template<typename T>
void LL_MergeSort(LinkedListElement<T>** head)
{
    if (head == nullptr || *head == nullptr || (*head)->next == nullptr) {
        return;
    }

    LinkedListElement<T>* front = nullptr;
    LinkedListElement<T>* back = nullptr;
    LL_FrontBackSplit(*head, &front, &back);
    LL_MergeSort(&front);
    LL_MergeSort(&back);
    *head = LL_SortedMerge(front, back);
}

template<typename T>
void LL_FrontBackSplit(LinkedListElement<T>* src,
                       LinkedListElement<T>** front,
                       LinkedListElement<T>** back)
{
    if (src == nullptr) {
        return;
    }

    LinkedListElement<T>* slow = src;
    LinkedListElement<T>* fast = src->next;
    while (fast != nullptr) {
        fast = fast->next;
        if (fast != nullptr) {
            slow = slow->next;
            fast = fast->next;
        }
    }

    *front = src;
    *back = slow->next;
    slow->next = nullptr;
}

template<typename T>
LinkedListElement<T>* LL_SortedMerge(LinkedListElement<T>* l1,
                                                     LinkedListElement<T>* l2)
{
    LinkedListElement<T> temp;
    LinkedListElement<T>* cur = &temp;

    while (l1 != nullptr || l2 != nullptr) {
        if (l1 == nullptr) {
            cur->next = l2;
            return temp.next;
        }
        if (l2 == nullptr) {
            cur->next = l1;
            return temp.next;
        }
        if (l1->data <= l2->data) {
            cur->next = l1;
            cur = cur->next;
            l1 = l1->next;
        }
        else {
            cur->next = l2;
            cur = cur->next;
            l2 = l2->next;
        }
    }

    return temp.next;
}

template<typename T>
LinkedListElement<T>* LL_SortedMergeRecursively(LinkedListElement<T>* l1,
                                                                     LinkedListElement<T>* l2)
{
    if (l1 == nullptr) {
        return l2;
    }
    if (l2 == nullptr) {
        return l1;
    }

    LinkedListElement<T>* result = nullptr;
    if (l1->data <= l2->data) {
        result = l1;
        result->next = LL_SortedMergeRecursively(l1->next, l2);
    }
    else {
        result = l2;
        result->next = LL_SortedMergeRecursively(l1, l2->next);
    }

    return result;
}

template<typename T>
void LL_RemoveDuplicates(LinkedListElement<T>** head)
{
    if (head == nullptr || *head == nullptr || (*head)->next == nullptr) {
        return;
    }

    LinkedListElement<T>* curValPtr = *head;
    LinkedListElement<T>* nextValPtr = (*head)->next;
    while (nextValPtr != nullptr) {
        if (nextValPtr->data == curValPtr->data) {
            curValPtr->next = nextValPtr->next;
            delete nextValPtr;
            nextValPtr = curValPtr->next;
        }
        else {
            curValPtr = nextValPtr;
            nextValPtr = nextValPtr->next;
        }
    }

    return;
}

/*
 * create a new list which is intersection of two lists
 * no duplicate in this list
 * l1 and l2: has to be sorted linked list
 */
template<typename T>
LinkedListElement<T>* LL_SortedIntersect(LinkedListElement<T>* l1,
                                                          LinkedListElement<T>* l2)
{
    if (l1 == nullptr || l2 == nullptr) {
        return nullptr;
    }

    LinkedListElement<T>* head = nullptr;
    ptrdiff_t found;
    size_t pos = 0;
    LinkedListElement<T>* curValPtr = nullptr;
    while (l1 != nullptr) {
        if (curValPtr == nullptr || curValPtr->data != l1->data) {
            curValPtr = l1;

            found = LL_Find(l2, pos, curValPtr->data);
            if (found >= 0) {
                LL_PushFront(&head, curValPtr->data);
                pos = static_cast<size_t>(found + 1);
            }
        }
        l1 = l1->next;
    }

    return head;
}

template<typename T>
bool LL_SortedInsert(LinkedListElement<T>** head, const T& val)
{
    if (head == nullptr) {
        return false;
    }

    if (*head == nullptr) {
        LL_PushFront(head, val);
        return true;
    }

    if (val <= (*head)->data) {
        LL_PushFront(head, val);
        return true;
    }

    LinkedListElement<T>* prev = *head;
    LinkedListElement<T>* cur = (*head)->next;

    while (cur != nullptr) {
        if (cur->data >= val) {
            prev->next = new LinkedListElement<T>(val);
            prev->next->next = cur;
            return true;
        }
        prev = cur;
        cur = cur->next;
    }

    // hit here not found the position
    // LL_PushBack(head, val);
    prev->next = new LinkedListElement<T>(val);
    return true;
}

#endif

// test.cpp
void TestLinkedListSort()
{
    LinkedListElement<int>* head1 = nullptr;
    // construct list: 10, 9, 20, 8, 7, 7, 20, 30, 50, 20, 4, 3,
    LL_PushFront(&head1, 3);
    LL_PushFront(&head1, 4);
    LL_PushFront(&head1, 20);
    LL_PushFront(&head1, 50);
    LL_PushFront(&head1, 30);
    LL_PushFront(&head1, 20);
    LL_PushFront(&head1, 7);
    LL_PushFront(&head1, 7);
    LL_PushFront(&head1, 8);
    LL_PushFront(&head1, 20);
    LL_PushFront(&head1, 9);
    LL_PushFront(&head1, 10);
    assert(LL_GetSize(head1) == 12);

    LinkedListElement<int>* head2 = nullptr;
    // const list: 12, 17, 56, 19, 18, 10, 9, 12, 6, 8, 10
    LL_PushFront(&head2, 10);
    LL_PushFront(&head2, 8);
    LL_PushFront(&head2, 6);
    LL_PushFront(&head2, 12);
    LL_PushFront(&head2, 9);
    LL_PushFront(&head2, 10);
    LL_PushFront(&head2, 18);
    LL_PushFront(&head2, 19);
    LL_PushFront(&head2, 56);
    LL_PushFront(&head2, 17);
    LL_PushFront(&head2, 12);
    assert(LL_GetSize(head2) == 11);

    LL_MergeSort(&head1); // 3, 4, 7, 7, 8, 9, 10, 20, 20, 20, 30, 50

    int val;
    assert(LL_GetNth(head1, 0, val) == true);
    assert(val == 3);
    assert(LL_GetNth(head1, 1, val) == true);
    assert(val == 4);
    assert(LL_GetNth(head1, 2, val) == true);
    assert(val == 7);
    assert(LL_GetNth(head1, 3, val) == true);
    assert(val == 7);
    assert(LL_GetNth(head1, 4, val) == true);
    assert(val == 8);
    assert(LL_GetNth(head1, 5, val) == true);
    assert(val == 9);
    assert(LL_GetNth(head1, 6, val) == true);
    assert(val == 10);
    assert(LL_GetNth(head1, 7, val) == true);
    assert(val == 20);
    assert(LL_GetNth(head1, 8, val) == true);
    assert(val == 20);
    assert(LL_GetNth(head1, 9, val) == true);
    assert(val == 20);
    assert(LL_GetNth(head1, 10, val) == true);
    assert(val == 30);
    assert(LL_GetNth(head1, 11, val) == true);
    assert(val == 50);
    assert(LL_GetNth(head1, 12, val) == false);

    LL_MergeSort(&head2); // 6, 8, 9, 10, 10, 12, 12, 17, 18, 19, 56
    assert(LL_GetNth(head2, 0, val) == true);
    assert(val == 6);
    assert(LL_GetNth(head2, 1, val) == true);
    assert(val == 8);
    assert(LL_GetNth(head2, 2, val) == true);
    assert(val == 9);
    assert(LL_GetNth(head2, 3, val) == true);
    assert(val == 10);
    assert(LL_GetNth(head2, 4, val) == true);
    assert(val == 10);
    assert(LL_GetNth(head2, 5, val) == true);
    assert(val == 12);
    assert(LL_GetNth(head2, 6, val) == true);
    assert(val == 12);
    assert(LL_GetNth(head2, 7, val) == true);
    assert(val == 17);
    assert(LL_GetNth(head2, 8, val) == true);
    assert(val == 18);
    assert(LL_GetNth(head2, 9, val) == true);
    assert(val == 19);
    assert(LL_GetNth(head2, 10, val) == true);
    assert(val == 56);
    assert(LL_GetNth(head2, 11, val) == false);

    LinkedListElement<int>* front = nullptr;
    LinkedListElement<int>* back = nullptr;
    LL_FrontBackSplit(head1, &front, &back);
    assert(front == head1);
    assert(back->data == 10);
    assert(LL_GetSize(front) == 6);
    assert(LL_GetSize(back) == 6);
    head1 = LL_SortedMerge(front, back); // 3, 4, 7, 7, 8, 9, 10, 20, 20, 20, 30, 50
    assert(LL_GetSize(head1) == 12);
    assert(LL_GetNth(head1, 0, val) == true);
    assert(val == 3);
    assert(LL_GetNth(head1, 1, val) == true);
    assert(val == 4);
    assert(LL_GetNth(head1, 2, val) == true);
    assert(val == 7);
    assert(LL_GetNth(head1, 3, val) == true);
    assert(val == 7);
    assert(LL_GetNth(head1, 4, val) == true);
    assert(val == 8);
    assert(LL_GetNth(head1, 5, val) == true);
    assert(val == 9);
    assert(LL_GetNth(head1, 6, val) == true);
    assert(val == 10);
    assert(LL_GetNth(head1, 7, val) == true);
    assert(val == 20);
    assert(LL_GetNth(head1, 8, val) == true);
    assert(val == 20);
    assert(LL_GetNth(head1, 9, val) == true);
    assert(val == 20);
    assert(LL_GetNth(head1, 10, val) == true);
    assert(val == 30);
    assert(LL_GetNth(head1, 11, val) == true);
    assert(val == 50);
    assert(LL_GetNth(head1, 12, val) == false);

    LL_FrontBackSplit(head2, &front, &back); // 6, 8, 9, 10, 10, 12, 12, 17, 18, 19, 56
    assert(front == head2);
    assert(back->data == 12);
    assert(LL_GetSize(front) == 6);
    assert(LL_GetSize(back) == 5);
    head2 = LL_SortedMergeRecursively(front, back);
    assert(LL_GetNth(head2, 0, val) == true);
    assert(val == 6);
    assert(LL_GetNth(head2, 1, val) == true);
    assert(val == 8);
    assert(LL_GetNth(head2, 2, val) == true);
    assert(val == 9);
    assert(LL_GetNth(head2, 3, val) == true);
    assert(val == 10);
    assert(LL_GetNth(head2, 4, val) == true);
    assert(val == 10);
    assert(LL_GetNth(head2, 5, val) == true);
    assert(val == 12);
    assert(LL_GetNth(head2, 6, val) == true);
    assert(val == 12);
    assert(LL_GetNth(head2, 7, val) == true);
    assert(val == 17);
    assert(LL_GetNth(head2, 8, val) == true);
    assert(val == 18);
    assert(LL_GetNth(head2, 9, val) == true);
    assert(val == 19);
    assert(LL_GetNth(head2, 10, val) == true);
    assert(val == 56);
    assert(LL_GetNth(head2, 11, val) == false);

    // 810, 9, 8
    LinkedListElement<int>* intersect = LL_SortedIntersect(head1, head2);
    assert(LL_GetSize(intersect) == 3);
    assert(LL_GetNth(intersect, 0, val) == true);
    assert(val == 10);
    assert(LL_GetNth(intersect, 1, val) == true);
    assert(val == 9);
    assert(LL_GetNth(intersect, 2, val) == true);
    assert(val == 8);
    assert(LL_GetNth(intersect, 3, val) == false);
    LL_Delete(&intersect);


    // 3, 4, 6, 7, 7, 8, 8, 9, 9, 10, 10, 10, 12, 12, 17, 18, 19, 20, 20, 20, 30, 50, 56
    LinkedListElement<int>* head = LL_SortedMergeRecursively(head1, head2);
    assert(LL_GetSize(head) == 23);
    assert(LL_GetNth(head, 0, val) == true);
    assert(val == 3);
    assert(LL_GetNth(head, 1, val) == true);
    assert(val == 4);
    assert(LL_GetNth(head, 2, val) == true);
    assert(val == 6);
    assert(LL_GetNth(head, 3, val) == true);
    assert(val == 7);
    assert(LL_GetNth(head, 4, val) == true);
    assert(val == 7);
    assert(LL_GetNth(head, 5, val) == true);
    assert(val == 8);
    assert(LL_GetNth(head, 6, val) == true);
    assert(val == 8);
    assert(LL_GetNth(head, 7, val) == true);
    assert(val == 9);
    assert(LL_GetNth(head, 8, val) == true);
    assert(val == 9);
    assert(LL_GetNth(head, 9, val) == true);
    assert(val == 10);
    assert(LL_GetNth(head, 10, val) == true);
    assert(val == 10);
    assert(LL_GetNth(head, 11, val) == true);
    assert(val == 10);
    assert(LL_GetNth(head, 12, val) == true);
    assert(val == 12);
    assert(LL_GetNth(head, 13, val) == true);
    assert(val == 12);
    assert(LL_GetNth(head, 14, val) == true);
    assert(val == 17);
    assert(LL_GetNth(head, 15, val) == true);
    assert(val == 18);
    assert(LL_GetNth(head, 16, val) == true);
    assert(val == 19);
    assert(LL_GetNth(head, 17, val) == true);
    assert(val == 20);
    assert(LL_GetNth(head, 18, val) == true);
    assert(val == 20);
    assert(LL_GetNth(head, 19, val) == true);
    assert(val == 20);
    assert(LL_GetNth(head, 20, val) == true);
    assert(val == 30);
    assert(LL_GetNth(head, 21, val) == true);
    assert(val == 50);
    assert(LL_GetNth(head, 22, val) == true);
    assert(val == 56);
    assert(LL_GetNth(head, 23, val) == false);

    // 3, 4, 6, 7, 8, 9, 10, 12, 17, 18, 19, 20, 30, 50, 56
    LL_RemoveDuplicates(&head);
    assert(LL_GetSize(head) == 15);
    assert(LL_GetNth(head, 0, val) == true);
    assert(val == 3);
    assert(LL_GetNth(head, 1, val) == true);
    assert(val == 4);
    assert(LL_GetNth(head, 2, val) == true);
    assert(val == 6);
    assert(LL_GetNth(head, 3, val) == true);
    assert(val == 7);
    assert(LL_GetNth(head, 4, val) == true);
    assert(val == 8);
    assert(LL_GetNth(head, 5, val) == true);
    assert(val == 9);
    assert(LL_GetNth(head, 6, val) == true);
    assert(val == 10);
    assert(LL_GetNth(head, 7, val) == true);
    assert(val == 12);
    assert(LL_GetNth(head, 8, val) == true);
    assert(val == 17);
    assert(LL_GetNth(head, 9, val) == true);
    assert(val == 18);
    assert(LL_GetNth(head, 10, val) == true);
    assert(val == 19);
    assert(LL_GetNth(head, 11, val) == true);
    assert(val == 20);
    assert(LL_GetNth(head, 12, val) == true);
    assert(val == 30);
    assert(LL_GetNth(head, 13, val) == true);
    assert(val == 50);
    assert(LL_GetNth(head, 14, val) == true);
    assert(val == 56);
    assert(LL_GetNth(head, 15, val) == false);

    assert(LL_SortedInsert(&head, 1) == true);
// 1, 3, 4, 6, 7, 8, 9, 10, 12, 17, 18, 19, 20, 30, 50, 56
    assert(LL_GetSize(head) == 16);
    assert(LL_Find(head, 1) == 0);
// 1, 3, 4, 6, 7, 8, 9, 10, 12, 17, 18, 19, 20, 30, 50, 56, 60
    assert(LL_SortedInsert(&head, 60) == true);
    assert(LL_GetSize(head) == 17);
    assert(LL_Find(head, 60) == 16); 
// 1, 3, 4, 6, 7, 8, 9, 10, 10, 12, 17, 18, 19, 20, 30, 50, 56, 60
    assert(LL_SortedInsert(&head, 10));
    ptrdiff_t pos = LL_Find(head, 10);
    assert(pos == 7);
    pos = LL_Find(head, pos + 1, 10);
    assert(pos == 0);

    LL_Delete(&head);
}
//********************************************************************************

Bibliography:
[1] http://cslibrary.stanford.edu/105/LinkedListProblems.pdf
[2] Glenn W. Rowe, Data Structure & Algorithm C++, Prentice Hall, 1996
[3] http://en.wikipedia.org/wiki/Merge_sort
[4] http://en.wikipedia.org/wiki/Sorting_algorithm

No comments:

Post a Comment