C# Dictionary Tree Algrithm with O(N) Time


  • 0
    J

    Thought:
    If we enum each letter of the input string and try to match the word in the dictionary, the Time complexity is O(N* M* words average length). In real life, we normally have a big dictionary contains thousands of words , then it will be extremely slow.
    Solution:
    Here, we build a dictionary prefix Trie first, then keep tracking how many dictionary entries it can match in current letter.

    1. When current letter matches a word that parsing to the end of dictionary trie, we get a new start-end index pair that can add bold , and we can merge the start-end index into existing bold tag list.
    2. If we can not parse by the dictionary, we just drop it from the tracking list.
    3. So we only need to scan the string once and maintain the tracking list using dictionary tree. It will be a O(N) Time complexity.
    public class Solution
        {
            Dictionary<char, Node> DicTree = new Dictionary<char, Node>();
    
            public string AddBoldTag(string s, string[] dict)
            {
                
                if (string.IsNullOrEmpty(s)) return s;
    
                BuildDicTree(dict);
    
                Dictionary<int, Node> trackingList = new Dictionary<int, Node>();
    
                List<int> boldStartIndex = new List<int>();
                List<int> boldEndIndex = new List<int>();
    
                for (int i = 0; i < s.Length; i++)
                {                
                    Dictionary<int, Node> newtrackingList = new Dictionary<int, Node>();
                    foreach (var n in trackingList)
                    {
                        var matchedNode = n.Value.GetChild(s[i]);
                        
                        if (matchedNode != null)
                        {
                            // dictionary contains the prefix
                            newtrackingList[n.Key] = matchedNode;
                            if (matchedNode.isEnd)
                            {
                                UpdateBoldIndex(boldStartIndex, boldEndIndex, n.Key, i);
                            }
                        }
                        // if matchedNode == null means dictionary does not have the word
                        // the newtrackinglist will not contain it
                    }
    
                    trackingList = newtrackingList;
    
                    if (DicTree.ContainsKey(s[i]))
                    {
                        trackingList.Add(i, DicTree[s[i]]);
                        if (DicTree[s[i]].isEnd)
                        {
                            UpdateBoldIndex(boldStartIndex, boldEndIndex, i, i);
                        }
                    }
                }
    
                var newstr = GetUpdateString(s, boldStartIndex, boldEndIndex);
                return newstr;
    
            }
    
            // input the string and all start and end index of the <b> </b>
            // say: s = "akcd" start=[0, 3], end = [1, 3]. then output will be "<b>ak</b>c<b>d</b>"
            public string GetUpdateString(string s, List<int> start, List<int> end)
            {
                if (start.Count < 1)
                {
                    return s;
                }
    
                string news = "";
                for (int i = 0; i < start.Count; i++)
                {
                    if (i == 0)
                    {
                        news += s.Substring(0, start[0]) + "<b>" + s.Substring(start[0], end[0] - start[0] + 1) + "</b>";
                    }
                    else
                    {
                        news += s.Substring(end[i - 1] + 1, start[i] - end[i - 1] - 1) + "<b>" + s.Substring(start[i], end[i] - start[i] + 1) + "</b>";
                    }
                }
                
                
                if (end[start.Count - 1] < s.Length - 1)
                {
                    news += s.Substring(end[start.Count - 1] + 1);
                }
    
                return news;
            }
    
            public void UpdateBoldIndex(List<int> start, List<int> end, int newstart, int newEnd)
            {
                
                int i = 0;
                while (i < start.Count)
                {
                    if (newstart <= end[i] + 1)                
                    {
                        // the newstart has overlap with the previous one
                        // merge them
                        start[i] = Math.Min(start[i], newstart);
                        end[i] = Math.Max(end[i], newEnd);
    
                        break;
                    }
    
                    i++;
                }
    
                if (i == start.Count)
                {
                    start.Add(newstart);
                    end.Add(newEnd);
                }
                else
                {
                    // remove the small fragement
                    while (start.Count > i + 1)
                    {
                        start.RemoveAt(start.Count - 1);
                        end.RemoveAt(end.Count - 1);
                    }
                }
            }
    
            public void BuildDicTree(string[] dict)
            {            
                for (int i = 0; i < dict.Length; i++)
                {
                    var s = dict[i];
                    if (string.IsNullOrEmpty(s)) continue;
    
                    Node preNode = null ;
                    for (int j = 0; j < s.Length; j++)
                    {
                        var c = s[j];
                        bool isEnd = j == s.Length - 1;
    
                        if (j == 0)
                        {
                            if (DicTree.ContainsKey(c))
                            {
                                if (isEnd) DicTree[c].isEnd = true;
                                preNode = DicTree[c];
                            }
                            else
                            {
                                var n = new Node(c, isEnd);
                                DicTree.Add(c, n);
                                preNode = n;
                            }
                        }
                        else
                        {
                            var n = new Node(c, isEnd);
                            preNode = preNode.SetChild(n);
                            
                        }
                    }
                }
            }
    
            public class Node
            {
                public char c;
                public Dictionary<char, Node> child = new Dictionary<char, Node>();
                public bool isEnd;
    
                public Node(char c, bool isEnd)
                {
                    this.c = c;
                    this.isEnd = isEnd;
                }
    
                public Node SetChild(Node n)
                {
                    if (child.ContainsKey(n.c))
                    {
                        if (n.isEnd)
                        {
                            child[n.c].isEnd = true;                        
                        }
                        return child[n.c];
                    }
                    else
                    {
                        child.Add(n.c, n);
                        return n;
                    }
                }
    
                public Node GetChild(char c)
                {
                    if (child.ContainsKey(c))
                    {
                        return child[c];
                    }
    
                    return null;
                }
            }
    }
    
    

  • 0

    What's a "tire"? (There's even another solution using that word, at first I thought it's a typo but maybe it isn't?)

    When you say "O(N)", it looks like you forgot that building the "tire" takes time as well (though I'm not sure because you're not saying what N is).


  • 0
    J

    @StefanPochmann
    oh, it is a typo, should be trie.
    Suppose the string length is N, the dictionary contains M words, and the words average length is K. Then Building the Dictionary tree need O(M * K) time.
    And searching the string need O(N) time.
    So the total time is O(N) + O(M*K).
    But please note, we only need to build the dictionary trie once in the real life. And keep using it for different incoming strings.
    That's why I say the time is O(N)


Log in to reply
 

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.