Simple to understand Python solution using list preprocessing and BFS, beats 95%


  • 23
    from collections import deque
    
    
    class Solution(object):
        def ladderLength(self, beginWord, endWord, wordList):
            
            def construct_dict(word_list):
                d = {}
                for word in word_list:
                    for i in range(len(word)):
                        s = word[:i] + "_" + word[i+1:]
                        d[s] = d.get(s, []) + [word]
                return d
                
            def bfs_words(begin, end, dict_words):
                queue, visited = deque([(begin, 1)]), set()
                while queue:
                    word, steps = queue.popleft()
                    if word not in visited:
                        visited.add(word)
                        if word == end:
                            return steps
                        for i in range(len(word)):
                            s = word[:i] + "_" + word[i+1:]
                            neigh_words = dict_words.get(s, [])
                            for neigh in neigh_words:
                                if neigh not in visited:
                                    queue.append((neigh, steps + 1))
                return 0
            
            d = construct_dict(wordList | set([beginWord, endWord]))
            return bfs_words(beginWord, endWord, d)

  • 1
    C

    The preprocessing part is really clever!


  • 1
    X

    admirable solution!
    I have rewrite it in c++

    class Solution {
    public:
        int ladderLength(string beginWord, string endWord, unordered_set<string> &wordList) {
    		wordList.insert(endWord);
            int len = beginWord.size();
            unordered_map<string,vector<string>> dict;
            for( auto word : wordList ){
                for( int i=0; i<len ;i++ ){
                    string key(word);
                    key[i] = '_';
                    dict[key].push_back(word);
                }
            }
    
            unordered_set<string> visited;
            queue<string> leafs;
            leafs.push(beginWord);
            int deepth = 2;
            int currLayer = 1,nextLayer=0;
            while( !leafs.empty() ){
                string word = leafs.front();
                leafs.pop();
                currLayer--;
    
                for( int i=0 ; i<len ; i++ ){
                    string key(word);
                    key[i] = '_';
                    for( auto neighbor : dict[key] ){
                        if( neighbor == endWord){
                            return deepth;
                        }
                        if( visited.count(neighbor) == 0 ){
                            leafs.push(neighbor);
                            visited.insert(neighbor);
                            nextLayer++;
                        }
                    }
                }
                if( currLayer==0 ){
                    deepth++;
                    currLayer=nextLayer;
                    nextLayer=0;
                }
            }
    
            return 0;
        }
    };
    

  • 4
    E

    @agave

    I came up with the exact same way to find neighbors! I originally tried to use a mapping of both the patterns to the words and the words to their patterns, but the overhead of building the second mapping (just the appending cost) was high enough to offset the benefits (at least on shorter wordlists).

    Even though your posted solution definitely doesn't beat 95% of solution at the time of my writing (the best I got with it as you have it posted was around 77%) it's still a great solution and I wanted to share some of my modifications (got the run time down to 196ms which is just above 88%). I have some fundamental improvements to the algorithm, and a few pythonic changes as well.

    Algorithmic improvements:
    In BFS you should always check a node to determine if it meets the goal criteria BEFORE adding it to the queue. If you wait to check till you pop the node your time complexity goes from O(b^d) to O(b^(d+1)). This makes a pretty big difference.

    Second, because this is a BFS, once a node has been seen, that is the earliest it could have possibly been seen so nodes should be added to the visited set as soon as they are seen, not after they are popped from the queue. This reduces the size of the queue and the number of append operations.

    Third, a simple pythoninc improvement is to use collections.defaultdict instead of the dict.get method. This is for a couple of reasons, the first is that it makes the code a bit simpler, and the second is that dict.get is slow in comparison.

    That's pretty much it. Thanks again for sharing your solution!

    Here's my code:

    class Solution(object):
        def ladderLength(self, beginWord, endWord, wordList):
                    
            def make_p2w(word_list, w_idxs):
                """Creates a map of all combinations of words with missing letters mapped 
                to all words in the list that match that pattern.
                E.g. hot -> {'_ot': ['hot'], 'h_t': ['hot'], 'ho_': ['hot']}
                """
                p2w = collections.defaultdict(list)
                
                for word in word_list:
                    for i, j in w_idxs:
                        p = word[:i] + "_" + word[j:]
                        p2w[p].append(word)
                return p2w
                
            def bfs_words(begin, end, w_idxs, p2w):
                queue = collections.deque([(begin, 1)])
                visited = set([begin])
                            
                while queue:
                    # Get the next node to explore from the top of the queue
                    word, depth = queue.popleft()
                    
                    # Get the node's children 
                    # By recreating all possible patterns for that string
                    for i,j in w_idxs:
                        p = word[:i] + "_" + word[j:]
                        neighbor_words = p2w[p]
                        # Iterate through children
                        for nw in neighbor_words:
                            if nw not in visited:
                                # Goal check (before adding to the queue)
                                if nw == end:
                                    return depth+1
                                # Add to visited
                                # These is no reason to wait to mark nodes as visited. Because this is 
                                # a BFS, once a node has been seen that is the earliest it could have
                                # possibly been seen so any other path to that node would either be 
                                # longer or the same length as what we've already observed.
                                visited.add(nw)                            
                                # Add to the end of the queue
                                queue.append((nw, depth+1))
                return 0
            
            # Get word length and character indexes
            wl = len(beginWord)
            w_indexes = zip(range(wl), range(1, wl+1))
            # Preprocess words into a map
            patterns2words = make_p2w(wordList | set([beginWord, endWord]), w_indexes)
            # Do the search
            return bfs_words(beginWord, endWord, w_indexes, patterns2words)
    

Log in to reply
 

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.