Monday, October 1, 2012

Linked Lists and some known operations

Linked Lists

The intention of this post is to create a LinkedList class and implement all generally asked questions on linked lists. So here it is:

class Node {
public:
    Node( int nData ) : m_nData( nData ), m_pNext( NULL ) {};
    ~Node() { m_nData = 0; };
    
    void    SetData( int nData )        { m_nData = nData;};
    int     GetData() const             { return m_nData;};

    void    SetNext( Node* pNode )      { m_pNext = pNode;};
    Node*   GetNext() const             { return m_pNext;};

    void    Print( bool bReverse = false ) const;
private:
    int   m_nData;
    Node* m_pNext;
};

void Node:: Print( bool bReverse /*= false*/ ) const {
    if( bReverse && !m_pNext )
        std::cout << "NULL";

    if( bReverse )
        std::cout << "<-- span="span"> << m_nData;
    else
        std::cout << m_nData << " --> ";

    if( !bReverse && !m_pNext ) {
        //this is the last node
        std::cout << "NULL";
    }
}

class LinkedList {
public:
    LinkedList() : m_pHead( NULL ) {};
    ~LinkedList() { Clear();};

    Node*  AddNode( int nData, bool bSorted = false );
    Node*  AddHead( int nData );
    Node*  AddTail( int nData );
    Node*  AddAt( size_t nPos, int nData );

    size_t Length() const;
    void   Clear();
    bool   Delete( Node* pNode );
    bool   IsEmpty() const { return m_pHead == NULL; };

    Node*  GetNthLastNode( size_t nN ) const;
    bool   IsCyclic() const;
    Node*  FindCycleNode() const;
    int    FindCycleLength() const;
    Node*  FindMidNode() const;
    Node*  FindIntersectingNode( Node* pList1, Node* pList2 ) const;
    bool   IsPalindrome() const;


    void   Reverse();
    void   ReversePrint( bool bRecursive = true );
    void   Print() const;

    static size_t Length( Node* pHead );
private:
    //member functions
    void  ReversePrint( Node* pNode ) const;
    Node* IsPalindrome( Node* pLeft, Node* pRight );

    //attributes
    Node* m_pHead;
};//class LinkedList

void LinkedList::Print() const {
    Node* pStart = m_pHead;
    while( pNode ) {
        pNode->Print( false );
        pNode = pNode->GetNext();
    }
}

void LinkedList::ReversePrint( Node* pNode ) const {
    if( !pNode )
        return;
    else {
        ReversePrint( pNode->GetNext() );
        pNode->Print( true );
    }
}
 

/*
 * There are two do this:
 * 1. Recursive solution: In this we recursively call the function
 *    till we reach the end of the list and while returning back
 *    display the content of the node
 * 2. Iterative: Reverse the list, print it and then reverse it back
 *    again. However this entails changing the list
 * 3. Iterative: If there is a need to not to change the list
 *    then a stack can be used. Iterate the list, push elements
 *    onto the stack. Next pop the elements and print the poped 
 *    element poped.

void LinkedList::ReversePrint( bool bRecursive /*= true*/ ) {
    if( bRecursive )
        ReversePrint( m_pHead );
    else {
        //iterative solution
        //Reverse the linked list O(n)
        //print the list O(n)
        //Reverse it back again O(n)
        Reverse();
        Print();
        Reverse();

    }


Node* LinkedList::IsPalindrome( Node* pLeft, Node* pRight ) {
    if( !pRight )
        return pLeft;

    //Calling IsPalindrome recursively will 
    //go on till we hit the end of the list
    pLeft = IsPalindrome( pLeft, pRight->GetNext() );
    //Now we have hit the end of the list
    if( pLeft ) {
        if( pLeft->GetData() == pRight->GetData() ) {
            pLeft = pLeft->GetNext() ? pLeft->GetNext() : pLeft;
        }
    }

    return pLeft;
}

/*
 * A palindrome is a word, phrase, number, or other sequence of
 * units that may be read the same way in either direction, with
 * general allowances for adjustments to punctuations and word 
 * dividers.
 * There are many ways to implement this algorithm
 * 1. Find the mid-point, then reverse the second part of the list
 * then compare the firs half and 2nd half and then reverse the 2nd
 * half back to get the original list
 * 2. Using recursion, in this case we divide the linked list into
 * smaller lists and check whether the smaller lists are palindrome
 * This implementation uses the recursion technique time complexity
 * is O(n) and space complexity is O(n).
bool LinkedList::IsPalindrome() const {
    return NULL == IsPalindrome( m_pHead, m_pHead ) ? false : true;
}

/*
 * If two intersecting linked lists intersect they will form a
 * Y kind of pattern. The way to find the intersecting node
 * is to find the length of both the lists.
 * Then find the one which is bigger.
 * Iterate the bigger list delta (difference in lengths) nodes
 * Now start moving both the lists simultaneously
 * The place at which both the lists point to the same node
 * is the intersecting point, if they do not meet, then they
 * are not intersecting.
 */

Node* LinkedList::FindIntersectingNode( Node* pList1, Node* pList2 ) const {
    if( !pList1 || !pList2 )
        return NULL;

    //find the length of both the lists
    size_t nLen1 = LinkedList::Length( pList1 );
    size_t nLen2 = LinkedList::Length( pList2 );

    //find the delta
    size_t nDelta = nLen1 > nLen2 ? nLen1 - nLen2 : nLen2 - nLen1;
    Node* pLongerList = nLen1 > nLen2 ? pList1 : pList2;
    Node* pSmallerList = nLen1 > nLen2 ? pList2 : pList1;

    //iterate nDelta nodes in the longer list
    size_t nCount = 0;
    
    while( nCount++ < nDelta ) {
        pLongerList = pLongerList->GetNext();
    }

    //now iterate both the lists
    //till we come across a scenario where both the nodes
    //are same or we reach the end
    //NOTE: Since both the lists now are exactly the same size
    //hence we can check for NULL of one pointer itself
    while( pLongerList != pSmallerList || pLongerList != NULL ) {
        pLongerList = pLongerList->GetNext();
        pSmallerList = pSmallerList->GetNext();
    }

    return pLongerList;
}

/*
 * This function finds the middle node of the linked list
 * The way to do this is to start with two pointers, both
 * starting from head. The slow pointer will move one step
 * whereas the fast pointer will move two steps at a time
 * When the fast pointer hits the last, the slow pointer
 * will be pointing to the middle node.
 * NOTE: Need to take care of end of list.  
Node* LinkedList::FindMidNode() const {
    Node* pSlow = m_pHead;
    Node* pFast = m_pHead;

    if( IsEmpty() )
        return NULL;

    bool bToggle = false;
    while( pSlow->GetNext() ) {
        if( bToggle ) {
            pFast = pFast->GetNext();
        }
        else {
            pFast = pFast->GetNext();
            pSlow = pSlow->GetNext();
        }
        bToggle = !bToggle;    }

    return pSlow;
}

/*
 * The purpose of this function is to delete the Node pointer
 * to in the Node* pointer. The code is not supposed to use the
 * head pointer. So this means this should be done in O(1) time
 */
bool LinkedList::Delete( Node* pNode ) {
    if( !pNode )
        return false;

    //a->b->c->e
    //delete b
    //Copy the data of c in b
    //point b to its next next
    //delete b's next
    //a->b->e
    //NOTE: The idea is to copy over the contents
    //of the next node into this node
    //then point this node to next' next.
    //and then delete the next node/
    //This will NOT work if the given Node is the
    //last node,
    if( pNode->GetNext() ) {
        pNode->SetData( pNode->GetNext()->GetData() );
        Node* pNext = pNode->GetNext();
        pNode->SetNext( pNext->GetNext() );
        delete pNext;
        return true;
    }
    else {
        //this is the last node, cannot do anything
        return false;
    }

}
void LinkedList::Reverse() {
    //1->2->3->1->5->6
    //6->5->1->3->2->1
    //The approach is to iterate the list and 
    //swap the nodes

    Node* pCurr = m_pHead;
    Node* pPrev = NULL;
    Node* pNext = NULL;

    while( pCurr ) {
        //store the next
        pNext = pCurr->GetNext();
        //set the current's next to previous
        pCurr->SetNext( pPrev );
        //previous becomes the current pointer
        pPrev = pCurr;
        //next node is the pNext
        pCurr = pNext; 
    }

    m_pHead = pPrev;
}
int LinkedList::FindCycleLength() const {
    Node* pSlow = m_pHead;
    Node* pFast = m_pHead;
    bool  bCycle = false;

    while( pSlow && pFast ) {
        pFast = pFast->GetNext();
        if( pSlow == pFast ) {
            bCycle = true;
            break;
        }

        if( !pFast )
            break;

        pFast = pFast->GetNext();
        if( pSlow == pFast ) {
            bCycle = true;
            break;
        }

        pSlow = pSlow->GetNext();
    }

    if( bCycle ) {
       //now both slow and fast are pointing to the same node
       //now we need to loop and count
       int nCount = 0;
       pFast = pFast->GetNext();

       while( pSlow != pFast ) {
          pFast = pFast->GetNext();
          ++nCount;
       }

       return nCount;
    }
    else
        return -1;
}
/*
 * The approach to find the Node, where the cycle starts is
 * very similar to the IsCyclic function.
 * Once a cycle is detected, then reset the slow pointer to the
 * beginning of the linked list and start moving both the slow
 * and the fast node simultaneously, one node at a time.
 * The node at which they meet is the Node where the cycle starts
 * Note: This algorithm is also known as Floyd Cycle finding
 * algorithm.
 * There is one more algorithm called Brent's Algorithm.
 *
 */
Node* LinkedList::FindCycleNode() const {
    Node* pSlow = m_pHead;
    Node* pFast = m_pHead;
    bool  bCycle = false;

    while( pSlow && pFast ) {
        pFast = pFast->GetNext();
        if( pSlow == pFast ) {
            bCycle = true;
            break;
        }

        if( !pFast )
            break;

        pFast = pFast->GetNext();
        if( pSlow == pFast ) {
            bCycle = true;
            break;
        }

        pSlow = pSlow->GetNext();

    }

    if( bCycle ) {
        pSlow = m_pHead;
        while( pSlow != pFast ) {
            pFast = pFast->GetNext();
            pSlow = pSlow->GetNext();
        }
        return pSlow;
    }
    else
        return NULL;
}
/*
 * The way to find whether a node is cyclic or not
 * is to use two pointers. Move the ahead pointer two node at a time
 * whereas move the slower node one node at a time.
 * If the faster node reaches NULL then it is not cyclic
 * At some point of time the slow and the fast node will meet.
 * If they meet then the linked list is cyclic.
 */
bool LinkedList::IsCyclic() const {
    Node* pSlow = NULL;
    Node* pFast = NULL;

    while( pSlow && pFast ) {
        pFast = pFast->GetNext();
        if( pSlow == pFast )
            return true;
        if( !pFast )
            break;
        
        pFast = pFast->GetNext();
        if( pSlow == pFast )
            return true;
        pSlow = pSlow->GetNext();
    }

    return false;
}

/*
 * The way to do this is to use 2 pointers.
 * Move one pointer nN positions from the head.
 * (Take care of scenarios wherein we reach the end before the nN position)
 * Now move both the pointers simultaneously. When the first pointer reaches the end, the node at which the first pointer is pointing will be the NthLastNode
 */
Node* LinkedList::GetNthLastNode( size_t nN ) const {
    Node* pAhead  = m_pHead;
    Node* pBehind = m_pHead;

    //Move the pAhead node by nN positions
    size_t nCurrPos = 0;
    while( pAhead && nCurrPos++ < nN ) {
        pAhead = pAhead->GetNext();
    }
    
    if( !pAhead ) {
        //we reached the end before nN nodes
        return NULL;
    }

    //Now move both the nodes simultaneosuly till pAhead reaches
    //the end
    while( pAhead ) {
        pAhead  = pAhead->GetNext();
        pBehind = pBehind->GetNext();
    }

    return pBehind;
}
/*
 * This function returns the length of the linked list
 * Note: In a real world implementation, you would want to
 * store the length as a private member and keep it updated
 * on every add/remove operation happening in the linked list.
 * Node* pHead: Pointer to the head of the linked list
 */
size_t LinkedList::Length( Node* pHead ){
    size_t nLength = 0;

    Node* pNode = pHead;
    while( pNode ) {
        ++nLength;
        pNode = pNode->GetNext();
    }

    return nLength;
}

size_t LinkedList::Length() const {
    return LinkedList::Length( m_pHead );
}

/*
 * This functions adds a new node in the linked list
 * if the bSorted flag is true, then the data is added in
 * ascending order
 */
Node* LinkedList::AddNode( int nData, bool bSorted /*= false*/ ) {
    if( !m_pHead ) {
        m_pHead = new Node( nData );
        return m_pHead;
    }

    //so this is not a empty node
    //if the node is to be added in a sorted order
    //then we find the position at which the node
    //needs to be inserted and then add it here
    //else we add a new head
    if( bSorted ) {
        //search the position where it needs to be inserted
        Node* pCurr = m_pHead;
        Node* pPrev = NULL;
        while( pCurr && pCurr->GetData() < nData ) {
            pPrev = pCurr;
            pCurr = pCurr->GetNext();
        }

        Node* pNode = new Node( nData );
        pNode->SetNext( pPrev->GetNext() );
        pPrev->SetNext( pNode );

        return pNode;
    }
    else {
        Node* pNode = m_pHead;
        m_pHead = new Node( nData );
        m_pHead->SetNext( pNode );
        return m_pHead;
    }
}

Node* LinkedList::AddHead( int nData ) {
    return AddNode( nData, false );
}

Node* LinkedList::AddTail( int nData ) {
    Node *pNode = new Node( nData );
    Node *pCurr = m_pHead;
    Node *pPrev = NULL;

    while( pCurr ) {
        pPrev = pCurr;
        pCurr = pCurr->GetNext();
    }
    
    pPrev->SetNext( pNode );


    return pNode;
}

Node* LinkedList::AddAt( size_t nPos, int nData ) {
   if( 0 == nPos )
       return AddHead( nData );

   size_t nCurrPos = 0;

   Node *pCurr = m_pHead;
   Node *pPrev = NULL;

   while( pCurr && nCurrPos++ < nPos ) {
       pPrev = pCurr;
       pCurr = pCurr->GetNext();

   }

   Node* pNode = new Node( nData );
   pNode->SetNext( pPrev->GetNext() );
   pPrev->SetNext( pNode );

   return pNode;
}

void LinkedList::Clear() {
    Node* pPrev = NULL;
    while( m_pHead ) {
        pPrev = m_pHead;
        m_pHead = m_pHead->GetNext();
        delete pPrev;
    }
}

Thursday, September 20, 2012

Find max sub array in a given array

This is also known as Kadane's algorithm (here). 

To quote from wikipedia: This problem was posed as a simplified model for maximum likelihood of patter matching in digitized images.

In Computer Science this is a task to find a contiguous subarray within a 1-D array of numbers (containing at least 1 +tive number), which has the largest sum.

Here are a couple of approaches to solve this problem. The brute force method is to iterate over the array twice and sum up the contiguous cells until you find the block with the maximum sum. However the complexity of this algorithm O(n2).

Brute force method

int FindMaxSum( int[] array, int nSize, int& nMaxStartIndex, int& nMaxEndIndex) {
    int nMaxSum = INT_MIN;
    nMaxStartIndex = 0;
    nMaxEndIndex = nSize > ? nSize - 1 : 0;

    if( nSize == 2 ) {
        nMaxSum = array[ 0 ] + array[ 1 ];
    }
    else if( nSize == 1 ) {
        nMaxSum = array[ 0 ];
    }

    else if( nSize == 0 ) {
        nMaxSum = 0;
    }
    else {
        int nCurrentSum = 0;
        for( int i = 2 ; i < nSize; ++i ) {
            for( int j = 0 ; j < nSize - i; j++ ) {
                for( int k = j; k < k + i; k++ )
                    nCurrentSum += array[ k ];
                if( nMaxSum < nCurrentSum ) {
                    nMaxSum = nCurrentSum;
                    nMaxStartIndex = j;
                    nMaxEndIndex = k;
                }
            }

        }

    }
    
    return nMaxSum;
}

O(n) Algorithm

Obviously the above O(n2) is not preferred, that is why Jay Kadane came up with an approach which solves this in O(n) time. Here is a C++ implementation of the same.

int FindMaxSum( int[] array, int nSize, int& nMaxStartIndex, int& nMaxEndIndex) {
    int nMaxSum = INT_MIN;
    int nCurrSum = 0;
    int nCurrStartIndex = 0;

    for( int nCurrEndIndex = 0 ; nCurrEndIndex < nSize; ++nCurrEndIndex ) {
        nCurrSum += array[ nCurrEndIndex ];
         if( nMaxSum < nCurrSum ) {
           nMaxSum        = nCurrSum;
           nMaxStartIndex = nCurrStartIndex;
           nMaxEndIndex   = nCurrEndIndex;
        }

        if( nCurrSum < 0 ) {
            nCurrSum = 0;
            nCurrStartIndex = nCurrEndIndex + 1;
        }

    }

    return nMaxSum;
}

I will soon update this thread to extend Kadane's algorithm to a 2-D array.

Wednesday, September 19, 2012

How to find whether a string is an anagram

First things first, what is an anagram? Anagram is a word, phrase, name or a sentence formed by rearranging the letters of an another word, phrase, name of sentence.The key rule here is that all letters of the name/phrase/word/sentence should be used ONCE and only ONCE. For example
  • cinema and iceman.
  • Longest Anagram ever created is here.
  • Stone age and Stage one.
  • A chevrolet and Love the car
  • A carton of cigarettes and I got a taste for cancer.
Some more examples are here.

Anyways back to Computer Science, how to write an algorithm, which can check whether a given word/phrase/sentence is an anagram or not.
The rule for an anagram is that both the strings should be formed using the exactly the same letters as shown above in the example.

Lets take cinema and iceman
In the first word cinema,
c occurs once, i occurs once and so do the rest of the letters i.e., n, e, m and a. Now observe that in the anagram word iceman:
i occurs once, i occurs once and so do the rest of the letters i.e, e, m, a and n. The key takeaway is that all these letters occur only once. If the second letter uses a letter, which is not present in the first, then it is not an anagram.

Two approaches:
  1. A simple approach would be to sort both the words/phrase/sentence and compare for equality. If they are equal then they are anagrams otherwise not.
  2. Another approach is to  check if the two strings have identical counts for each unique char.
The approach I am proposing is similar to the second approach above. It is:
  1. Take the first string and create a binary sorted tree out of it. Note the data that the node will contain would be the letter and a count, which basically is used to keep track of occurrence of that particular character.
  2. Once the tree is populated with the first string, now add the second string also in the BST. The only difference is that if while populating the data, you end up in a situation where a new node has to be created, then you know it is not an anagram.
  3. There is one more case and which is that the unique letters are same, however their counts are different. So while adding the second string to the BST, if you run into a existing letter, decrement that nodes letter count.
  4. If we exhaust all the elements of the second tree as well, do a tree-traversal using any mechanism (pre/post/in/level), the requirement is that the letter count should be zero for it to an anagram.
class Node {
    public:
        Node( wchar_t wchElement ) : m_wchElement( wchElement ),
                                     m_nCount( 1 ),
                                     m_pLeft( NULL ),
                                     m_pRight( NULL ) {};
        ~Node() {
            delete m_pLeft;
            m_pLeft = NULL;
            delete m_pRight;
            m_pRight = NULL;
        };

        int  GetCount() const       { return m_nCount;};
        void IncrementCount()       { ++m_nCount;};
        void DecrementCount()       { --m_nCount;};

        wchar_t GetData() const     { return m_wchElement;};
        Node* GetLeft() const       { return m_pLeft;};
        Node* GetRight() const      { return m_pRight;};

        void  SetLeft( Node* pNode) { m_pLeft = pNode;};
        void  SetRight( Node* pNode){ m_pRight = pRight;};
    private:
        //hidden constructor
        Node();
        wchar_t m_wchElement;
        int     m_nCount;
        Node*   m_pLeft;
        Node*   m_pRight;
};

class BST {
    public:
        BST(): m_pRoot( NULL ) {}
        ~BST() {
            //function to empty the tree, left as an exercise
            //for the reader
        }

        Node* Add( wchar_t wchElement, bool bIncrementCount = true)
        bool  IsAnagram( wchar_t* pszString1, wchar_t* pszString2 );
        bool  PreOrder( Node* pNode );
    private:
        Node* m_pRoot;
};

bool BST::Add( wchar_t wchElement, bool bIncrementCount=/*true*/) {
    //Note: When adding the first string
    //the return values are of no use
    //they will come into use while adding the second string
    //i.e, when bIncrementCount is false.
    bool bAnagram = true;
    if( !m_pRoot ) {
        m_pRoot = new Node( wchElement );
        return bAnagram;
    }

    Node* pRoot = m_pRoot;
    while( pRoot ) {
        if( pRoot->GetData() == wchElement ) {
            bIncrementCount ? pRoot->IncrementCount():
                              pRoot->DecrementCount();
        }
        elseif( pRoot->GetData() < wchElement ) {
            if( pRoot->GetLeft() ) {
                pRoot = pRoot->GetLeft();
            }
            else {
                if( nIncrementCount ) {
                    pRoot->SetLeft( new Node( wchElement ) );
                }
                else {
                    //we are adding the second string
                    //and we have found a unique element
                    //which is not in the first string
                    //so this is not a anagram.
                    bAnagram = false;
                    break;
                }
            }
        }
        else {
            //greater so operate on the right sub-tree
            if( pRoot->GetRight() ) {
                pRoot = pRoot->GetRight();
            }
            else {
                if( bIncrementCount ) {
                    pRoot->SetRight( new Node( wchElement ) );
                }
                else {
                    //we are adding the second string
                    //and we have found a unique element
                    //which is not in the first string
                    //so this is not a anagram.
                    bAnagram = false;
                    break;
                }
            }
        }
    }

    return bAnagram;
}

bool BST::IsAnagram( wchar_t *pszString1, wchar_t *pszString2 ) {
    wchar_t *pStart = pszString1;
    while( *pStart ) {
        Add( *pStart++, true );
    }

    //add the pszString2, while adding the next string
    //we will get to know any character which is unique
    //in the 2nd string.
    //however there may be strings like abcd and abcc, or
    //like xxxx and xxx. For such cases while populating the
    //BST we will not come to know, so for such cases we will later
    //use one of the traversal techniques to see how many elements
    //still have a non-zero count. If they have then it is not an 
    //anagram
    pStart = pszString2;
    while( *pStart ) {
        if( !Add( *pStart++, false ) )
            return false;
    }
    //Here we will use one of the traversals techniques to traverse
    //all the nodes and check the count at each node
    //if we find any node having non-zero count, then we know for 
    //sure that it is not an anagram.

    return PreOrder( m_pRoot );
}

//rLR
bool BST::PreOrder( Node* pNode ) {
    if( !pNode )
        return true;
    if( pNode->GetCount() != 0 )
        return false;
    if( PreOrder( pNode->GetLeft() ) )
        return PreOrder( pNode->GetRight() );
    else
        return false;
}

int wmain( int argc, wchar_t** argv ) {
     wchar_t *pszString1 = L"cinema";
     wchar_t *pszString2 = L"iceman";

     BST oBST;
     if( oBST.IsAnagram( pszString1, pszString2 ) ) {
         std::wcout << L"Is Anagram";
     }
     else {
         std::wcout << L"NOT is Anagram";
     }

     std::wcout << std::endl;

     return 0;
}

I agree there is scope of improvement in the above code, however it basically demonstrates a way to find whether strings/words/phrases are anagrams. The above code can be modified to read the two strings from files, or from user input, or from any other input mechanism.