Recursive decent parser in python


  • 0
    Y
    class Parser(object):
    
        class SyntaxError(Exception):
            pass
    
        def __init__(self, code):
            self.code = code
            self.i = 0
            self.n = len(code)
    
        def peek(self, n=1):
            beg = self.i
            end = beg + n
            if end > self.n:
                return self.code[beg:]
            else:
                return self.code[beg:end]
    
        def has_next(self):
            return self.i < self.n
    
        def next(self, n=1):
            beg = self.i
            end = beg + n
            if end > self.n:
                self.i = self.n
                return self.code[beg:]
            else:
                self.i = end
                return self.code[beg:end]
    
        def error(self):
            raise Parser.SyntaxError(self.code[self.i:])
    
        def accept(self, s):
            if self.peek(len(s)) == s:
                return True
            else:
                return False
    
        def expect(self, s):
            if self.next(len(s)) != s:
                self.error()
        # tags
        def parse_tag(self):
            tag_name = self.parse_open_tag()
            self.parse_tag_content()
            self.parse_closed_tag(tag_name)
    
        def parse_tag_name(self):
            tag_name = ''
            while self.peek().isupper():
                tag_name += self.next()
                if len(tag_name) > 9:
                    self.error()
    
            if len(tag_name) == 0:
                self.error()
    
            return tag_name
    
        def parse_open_tag(self):
            self.expect('<')
            tag_name = self.parse_tag_name()
            self.expect('>')
            return tag_name
    
        def parse_closed_tag(self, expected_tag_name):
            self.expect('</')
            tag_name = self.parse_tag_name()
            if tag_name != expected_tag_name:
                self.error()
            self.expect('>')
    
        # content
        def parse_tag_content(self):
    
            def parse_one_time():
                if self.accept('<!'):       # it should be a cdata
                    self.parse_cdata()
                elif self.accept('<'):      # it should be a tag
                    self.parse_tag()
                elif self.has_next() and not self.accept('</'):  # it should be common strings
                    while self.has_next() and self.peek() != '<':
                        self.next()
                else:
                    self.error()
    
            while self.has_next() and not self.accept('</'):
                parse_one_time()
    
        def parse_cdata(self):
            self.expect('<![CDATA[')
            self.parse_cdata_content()
            self.expect(']]>')
    
        def parse_cdata_content(self):
            while self.has_next():
                if self.peek(3) != ']]>':
                    self.next()
                else:
                    break
    
    class Solution(object):
        def isValid(self, code):
            """
            :type code: str
            :rtype: bool
            """
            parser = Parser(code)
            try:
                parser.parse_tag()
                return not parser.has_next()
            except Parser.SyntaxError:
                return False
    

Log in to reply
 

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.