Python regexp + merge intervals

  • 0

    Three steps:

    1. For each word in "dict", find all occurrence and create an Interval for each (start, end)
    2. Sorted intervals and merge overlapped intervals
    3. Now we have each Interval represent the start & end boundaries where bold tag should be inserted, then use it to build the res string.
    class Solution(object):
        Interval = collections.namedtuple('Interval', ['start', 'end'])
        def addBoldTag(self, s, dict):
            intervals = []
            for word in dict:
                pattern = '(?={})'.format(word)
                length = len(word)
                idx = [m.start() for m in re.finditer(pattern, s)]
                for i in idx:
                    intervals.append(self.Interval(i, i+length-1))
            intervals = sorted(intervals, key=lambda i: i.start)
            merged = []
            for inter in intervals:
                if not merged or inter.start > merged[-1].end+1:
                    start, end = merged.pop()
                    merged.append(self.Interval(start, max(end, inter.end)))
            return self.buildstr(s, merged)
        def buildstr(self, s, intervals):
            begidx = 0
            res = ''
            for inter in intervals:
                res += s[begidx:inter.start] + '<b>' + s[inter.start:inter.end+1] + '</b>'
                begidx = inter.end+1
            res += s[begidx:]
            return res

Log in to reply

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.