Use one INT to store 4 ASCII chars (c++ with explanation comments)


  • 0
    X
    class Codec {
    public:
        // Encodes a list of strings to a single string.
        string encode(vector<string>& strs) {
            if(strs.size() == 0)
                return "=";
            string out = "";
            // Encode each string and link them with '.'
    
            for(int i = 0; i < strs.size(); i++){
                out += encodeS(strs[i]);
                out += '.';
            }
            return out;
        }
        
        // Decodes a single string to a list of strings.
        vector<string> decode(string s) {
            string tmp;
            vector<string> v;
            if(!s.compare("="))
                return v;
            for(int i = 0; i < s.size(); i++){
                if(s[i] == '.'){
                    v.push_back(decodeS(tmp));
                    tmp.clear();
                }
                else{
                    tmp += (s[i]);
                }
            }
            return v;
        }
        
        // Encode single string
        string encodeS(string strs) {
            if (strs.size() == 0)
                return "-";
            int size = strs.size();
            int tmp;
            string s;
            int lastGroup;
            // Since ASCII has 8 bits, and an integer has 32 bits,
            // Use an integer to store 4 characters
            // We don't know how many chars stored in the last integer
            // So we use last char to indicate it
            for(int i = 0; i < size; i = i + 4){
                lastGroup = 1;
                tmp = 0;
                tmp += (int)strs[i];
                if(i + 1 < size) {tmp += ((int) strs[i + 1] << 8); lastGroup = 2;}
                if(i + 2 < size) {tmp += ((int) strs[i + 2] << 16); lastGroup = 3;}
                if(i + 3 < size) {tmp += ((int) strs[i + 3] << 24); lastGroup = 4;}
                // Use sepace to split each integer
                s += (to_string(tmp) + ' ');
                // further optimize: pre defined string length
            }
            s += (to_string(lastGroup));
            // Encoded string only contains spaces and numbers, or '-' indicating empty string
            return s;
        }
        
       // Decode single string
       string decodeS(string s) {
            if(!s.compare("-")){
                return "";
            }
            string tmp, out;
            int tmpInt;
            for(int i = 0; i < s.size() - 1; i++){
                if(s[i] == ' '){
                    tmpInt = stoi(tmp);
                    out += ((char)(tmpInt & 0b11111111));
                    out += ((char)((tmpInt >> 8) & 0b11111111));
                    out += ((char)((tmpInt >> 16) & 0b11111111));
                    out += ((char)((tmpInt >> 24)  & 0b11111111));
                    tmp.clear();
                }
                else{
                    tmp.push_back(s[i]);
                }
            }
            // Based on the number of chars stored in the last integer, truncate several chars
            return out.substr(0, out.size() - (4 - (s[s.size() - 1] - '0')));
        }
    };
    
    // Your Codec object will be instantiated and called as such:
    // Codec codec;
    // codec.decode(codec.encode(strs));

  • 0
    A

    I don't think this is a good way to approach this problem. It is true that int has 32 bits and char has 8 bits.
    But what if our string consists of 4 chars with bit encodings '1000000'. Then the int becomes 10000000100000001000000010000000, which is 2155905152 (unsigned int). Converting this int to string results in 10 chars => anti-compression.


  • 0
    X

    I agree this is not a space-saving solution, but it encodes well and is at least more secure than some other solutions.


  • 0

    @MintMen Can you point out some of those less secure solutions?


Log in to reply
 

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.