[Inverted index] C++ better than O(n) query time


  • 0
    X

    In search engine, when we retrieve doc id from inverted index, the iterator usually has two functions: next and jump. The time complexity better than O(N), worse than O(logN).

    class WordDistance {
    public:
        unordered_map<string, vector<int> > wordmap;
        WordDistance(vector<string>& words) {
            for (int i = 0; i < words.size(); ++i) {
                wordmap[words[i]].push_back(i);
            }
        }
        // jump to the target pos
        int jumpTo(vector<int>& vec, int curpos, int target) {
            vector<int>::iterator lb = lower_bound(vec.begin()+curpos,vec.end(),target);
            int pos = lb-vec.begin();
            --pos;
            if (pos <= curpos) pos=curpos+1;
            return pos;
        }
        int shortest(string word1, string word2) {
            int res = INT_MAX;
            int pos1 = 0, pos2 = 0;
            while (pos1 < wordmap[word1].size() && pos2 < wordmap[word2].size()) {
                res = min(res, abs(wordmap[word1][pos1] - wordmap[word2][pos2]));
                if (wordmap[word1][pos1] < wordmap[word2][pos2]) {
                    pos1 = jumpTo(wordmap[word1], pos1, wordmap[word2][pos2]);
                } else {
                    pos2 = jumpTo(wordmap[word2], pos2, wordmap[word1][pos1]);
                }
            }
            return res;
        }
    };
    

Log in to reply
 

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.