Monday, December 29, 2014

Java Code to create Prefix Tree and run string matcher on it


/* package whatever

import java.util.*;
import java.lang.*;
import java.io.*;

/* Name of the class has to be "Main" only if the class is public. */
class PrefixTreeUser
{
    public static void main (String[] args) throws java.lang.Exception
    {
        //Basic exact match
        {
            StringMatcher matcher = new StringMatcher();
            matcher.add_exact_match("contactus", 1);
            assert(matcher.lookup("contactus") == 1);
        }
        //Basic prefix test
        {
            StringMatcher matcher = new StringMatcher();
            matcher.add_prefix_match("img", 1);
            assert(matcher.lookup("imgcutepuppy") == 1);
            assert(matcher.lookup("htmlcutepuppy") == -1);
        }
        //Basic longest match test
        {
            StringMatcher matcher = new StringMatcher();
            matcher.add_prefix_match("img", 1);
            matcher.add_prefix_match("imghd", 2);
            assert(matcher.lookup("imgcutepuppy") == 1);
            assert(matcher.lookup("imghdcutepuppy") == 2);
        }

    }
}

=========================================================================

class StringMatcher
{

final private PrefixTree prefixTree;

public StringMatcher()
{
    prefixTree = new PrefixTree();
}
   
/// Add an exact string match, i.e. ÒabcÓ in
/// the documentation above. Adding an exact match for an
/// existing `exact_str` will overwrite the previous `id`.
/// @param exact_str string to match with the id.
/// @param id that is mapped to this string.
public void add_exact_match(String exact_str, int id) {
    prefixTree.createPathFor(exact_str, true, id);
}

/// Add a prefix string match
/// the documentation above. Adding a prefix match for an
/// existing `prefix_str` will overwrite the previous `id`.
/// @param prefix_str to match with the id.
/// @param id(>=0) that is mapped to this string.
public void add_prefix_match(String prefix_str, int id) {
    prefixTree.createPathFor(prefix_str, false, id);
}

/// Get the id for the specified string.
/// @param input to lookup the id for
/// @returns -1 if there is no match or the id
public int lookup(String input) {
    return prefixTree.scanPathFor(input);
}

/// Delete the exact matches for the given string, i.e. if we have
/// an add_exact_match(str 2), delete_exact_match will remove the
/// match of str to 2.
/// @param exact_str exact match to delete.
/// @return true if a match was deleted.
boolean delete_exact_match(String exact_str) {
    return false;
}

/// Delete the prefix matches for the given string, i.e. if we have
/// a add_prefix_match(str, 2), delete_prefix_match will remove the
/// match of str to 2.
/// @param prefix_str prefix match to delete.
/// @return true if a match was deleted.
boolean delete_prefix_match(String prefix_str) {
    return false;
}

}

=========================================================================

class Node
{
    private char alphabet;
    protected Node[] next;
    private int partialMatch;
    private int exactMatch;
   
    public Node(char c)
    {
        next = new Node[Util.MAX_ALPHABETS_RANGE];
        setAlphabet(c);
    }
   
    // @return get's char value
    public char getAlphabet()
    {
        return alphabet;
    }
   
    // @param alphabet value to be set
    public void setAlphabet(final char alphabetValue)
    {
        alphabet = alphabetValue;
    }
   
    // @return partialMatch of the node alphabet
    public int getPartialMatch()
    {
        return partialMatch;
    }
   
    // @param partialMatchValue to be set for node's alphabet
    public void setPartialMatch(final int partialMatchValue)
    {
        partialMatch = partialMatchValue;
    }
   
    // @return exactMatch of this node's alphabet
    public int getExactMatch()
    {
        return exactMatch;
    }
   
    // @param exactMatchValue to be set for this node's alphabet
    public void setExactMatch(final int exactMatchValue)
    {
        exactMatch = exactMatchValue;
    }
   
    // @param childNumber is alphabet's numeric value
    // @return child node corresponding to that numeric value
    public Node getChildAt(final int childNumber)
    {
        if(childNumber >= Util.MAX_ALPHABETS_RANGE)
        {
            // Can do better here. For now, lets not worry about exceptions too much.
            throw new RuntimeException("Your tree is expecting more than alphabets range");
        }
       
        return next[childNumber];
    }
   
    // @param childNumber to be set
    // @param childNode to be set for this node
    public void setChildAt(final int childNumber, final Node childNode)
    {
        if(childNumber >= Util.MAX_ALPHABETS_RANGE)
        {
            // Can do better here. For now, lets not worry about exceptions too much.
            throw new RuntimeException("Your tree is expecting more than alphabets range");
        }
       
        next[childNumber] = childNode;
    }
}

=========================================================================

class Util
{
    public static final int MAX_ALPHABETS_RANGE=52;
   
    public static int convertCharToNumber(char c)
    {
        int i = c;
        if( i >= 65 && i <= 90 )
        {
            return ( i - 65 );
        }
        else if ( i >= 97 && i <= 122 )
        {
            // Arithmetic not solved for readability
            return ( i - 97 + 26);
        }
        else
        {
            throw new RuntimeException("Only [azAZ+] characters please!");
        }
    }
}

=========================================================================

class PrefixTree
{
    public Node[] head;
   
    public PrefixTree()
    {
        head = new Node[Util.MAX_ALPHABETS_RANGE];
    }
   
    // Creates prefix tree path
    // @param prefixStr string for which path is created
    // @param isExactMatchvalue true if called for exact match
    // @param matchValue value to store for nodes in the path
    public void createPathFor(final String prefixStr, final Boolean isExactMatchValue, final int matchValue)    
    {
        int index = Util.convertCharToNumber( prefixStr.charAt(0) );
        Node currentNode = head[ index ];
       
        if( currentNode == null )
        {
            head[ index ] = currentNode = new Node( prefixStr.charAt(0) );
            if( prefixStr.length() == 1 )
            {
                if( isExactMatchValue == true )
                {
                    currentNode.setExactMatch(matchValue);
                }
                else
                {
                    currentNode.setPartialMatch(matchValue);
                }
           
                return;
            }
        }
       
       
        for(int i=1; i < prefixStr.length(); i++ )
        {
            char c = prefixStr.charAt(i);
            int currentIndex = Util.convertCharToNumber(c);
           
            if( currentNode.next[currentIndex] == null )
            {

                currentNode.next[currentIndex] = new Node(c);
                currentNode = currentNode.next[currentIndex];
               
                if( isExactMatchValue )
                {
                    if( i == prefixStr.length() - 1 )
                    {
                        currentNode.setExactMatch(matchValue);
                    }
                }
                else
                {
                    currentNode.setPartialMatch(matchValue);
                }
            }
            else
            {
                if ( !isExactMatchValue && currentNode.getPartialMatch() != 0 )
                {
                    currentNode.setPartialMatch(matchValue);
                }
               
            }
        }
    }
   
    // Scans prefix tree to return the matched value for the given string
    // @param prefixStr string in question for which value is to be returned
    // @param isExactMatch either to return exact or partial match
    public int scanPathFor(final String prefixStr)
    {
        int prefixMatch=-1;
        Node currentNode = head[ Util.convertCharToNumber( prefixStr.charAt(0) ) ];
       
        if( currentNode == null )
        {
            return -1;
        }
       
        int i;
        for( i=1; i < prefixStr.length(); i++)
        {
            char c = prefixStr.charAt(i);
            int currentIndex = Util.convertCharToNumber(c);           
           
            if( currentNode.next[currentIndex] != null )
            {
                prefixMatch = currentNode.getPartialMatch();
                currentNode = currentNode.next[currentIndex];
            }
            else
            {
                break;
            }
        }
       
        if( i == 0 )
        {
            return -1;
        }

        if( i == prefixStr.length() )
        {
            return currentNode.getExactMatch();
        }
        else
        {
            return prefixMatch;
        }
    }
}
=========================================================================

No comments:

Post a Comment