Without bit manipulation - single pass


  • 0
    S
    class Solution(object):
        def validUtf8(self, data):
            """
            :type data: List[int]
            :rtype: bool
            """
            # 2^7 - 128
            # 2^6 - 64 -> 192
            # 2^5 - 32 -> 224
            # 2^4 - 16 -> 240
            # 2^3 - 8  -> 248
            # number less than 128 has first bit 0 - so it is one byte code
            # number  192 - 223 has first two bits set 1 and next bit 0
            # number  224 -  239 has first three bits set 1 and next bit 0
            # numbers  240 - 247 has first four bits set 1 and next bit 0
            # 
            # Analyze  to know how many bytes in utf chacter?
            #
            # second third and fourth bytes will always begin with 1000 0000 and end with 1011 1111
            # which it will be between 128 and 191
    
            n = len(data)
            i = 0
            while i < n:
                if 0  <= data[i] < 128:
                    i += 1 # valid, only one byte utf character
                    continue
                elif 192 <= data[i] < 224:
                    if (i+1) < n and 128 <= data[i+1] < 192:
                        i += 2
                        continue
                    else:
                        return False
                elif 224 <= data[i] < 240:
                    if (i+1) < (i+2) < n and 128 <= data[i+1] < 192 and 128 <= data[i+2] < 192:
                        i+=3
                        continue
                    else:
                        return False
                elif 240 <= data[i] < 248:
                    if (i+1) < (i+2) < (i+3) < n and 128 <= data[i+1] < 192 and 128 <= data[i+2] < 192 and 128 <= data[i+3] < 192:
                        i += 4
                        continue
                    else:
                        return False
                else:
                    return False
            return True

Log in to reply
 

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.