My 0ms super-fast solution


  • 0
    I

    This version implements several tricks to go blindingly fast.

    • bitset<> to track the available digits in row, column and tile
    • lookup table to avoid division/modulo when computing tile number
    • lookup tables to identify right most zero and number of zeros in a bitset
    • sorting the list of open positions to tackle the most constrained positions first
    • one level of lookahead to detect blind alleys earlier and reduce the total recursion space

    All that combined results in a "0ms" DFS solution.

    const int right_most_zero[ 512 ] =
    {
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,
        0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,-1    
    };
    
    const int num_zero[512] = 
    {
        9,8,8,7,8,7,7,6,8,7,7,6,7,6,6,5,
        8,7,7,6,7,6,6,5,7,6,6,5,6,5,5,4,
        8,7,7,6,7,6,6,5,7,6,6,5,6,5,5,4,
        7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,
        8,7,7,6,7,6,6,5,7,6,6,5,6,5,5,4,
        7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,
        7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,
        6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,
        8,7,7,6,7,6,6,5,7,6,6,5,6,5,5,4,
        7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,
        7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,
        6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,
        7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,
        6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,
        6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,
        5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1,
        8,7,7,6,7,6,6,5,7,6,6,5,6,5,5,4,
        7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,
        7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,
        6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,
        7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,
        6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,
        6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,
        5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1,
        7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,
        6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,
        6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,
        5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1,
        6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,
        5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1,
        5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1,
        4,3,3,2,3,2,2,1,3,2,2,1,2,1,1,0
    };
    
    const int til_no[9][9] =
    {
        { 0, 0, 0, 1, 1, 1, 2, 2, 2 },
        { 0, 0, 0, 1, 1, 1, 2, 2, 2 },
        { 0, 0, 0, 1, 1, 1, 2, 2, 2 },
        { 3, 3, 3, 4, 4, 4, 5, 5, 5 },
        { 3, 3, 3, 4, 4, 4, 5, 5, 5 },
        { 3, 3, 3, 4, 4, 4, 5, 5, 5 },
        { 6, 6, 6, 7, 7, 7, 8, 8, 8 },
        { 6, 6, 6, 7, 7, 7, 8, 8, 8 },
        { 6, 6, 6, 7, 7, 7, 8, 8, 8 }
    };
     
    class Solution {
        char board[9][9];
        bitset<9> col_bits[9];
        bitset<9> row_bits[9];
        bitset<9> til_bits[9];
        vector< pair< int, int > > open;
        
        bitset<9> avail_at( int x, int y ) {
            int t = til_no[y][x];
            return row_bits[y] | col_bits[x] | til_bits[t];
        }
        
        int num_choices_at( int x, int y ) {
            return num_zero[ avail_at( x, y ).to_ulong() ];
        }
        
        int num_choices_at( pair<int, int> p ) {
            return num_choices_at( p.first, p.second );
        }
        
        bool recurse( int level ) {
            if ( level == -1 )
                return true;
            
            // Look ahead one level to see if it has fewer open choices.
            // If it does, swap places with it.  This should trigger 
            // backtrack earlier if we're in a blind alley.
            if ( level > 0 ) {
                int ch_here = num_choices_at( open[level  ] );
                int ch_next = num_choices_at( open[level-1] );
                
                // If there are no choices on either of these two levels,
                // just leave.  We're in a blind alley.
                if ( !ch_here || !ch_next )
                    return false;
                
                if ( ch_next < ch_here )
                    swap( open[level], open[level-1] );
            }
                
            const auto& xy    = open[ level ];
            const auto  x     = xy.first;
            const auto  y     = xy.second;
            const auto  t     = til_no[y][x];
            const auto  avail = avail_at( x, y );
            auto try_bits     = avail.to_ulong( );
    
            for ( int rmz = right_most_zero[ try_bits ] ; rmz >= 0 ; 
                  try_bits |= 1 << rmz, rmz = right_most_zero[ try_bits ] ) {
                row_bits[y].set( rmz );
                col_bits[x].set( rmz );
                til_bits[t].set( rmz );
                
                board[y][x] = rmz;
                if ( recurse( level - 1 ) )
                    return true;
                    
                row_bits[y].reset( rmz );
                col_bits[x].reset( rmz );
                til_bits[t].reset( rmz );
            }
            
            return false;
        }
        
    public:
        void solveSudoku(vector<vector<char>>& board_) {
            for ( int i = 0; i < 9 ; ++i ) {
                col_bits[i] = 0;
                row_bits[i] = 0;
                til_bits[i] = 0;
            }
            open.clear();
            open.reserve(81);
            
            for ( int y = 0 ; y < 9 ; ++y ) {
                for ( int x = 0 ; x < 9 ; ++x ) {
                    int digit = board_[y][x];
                    int digit_val = digit == '.' ? -1 : digit - '1';
                    
                    board[y][x] = digit_val;
                    if ( digit_val >= 0 ) {
                        int t = til_no[y][x];
                        
                        row_bits[y].set( digit_val );
                        col_bits[x].set( digit_val );
                        til_bits[t].set( digit_val );
                    } else {
                        open.emplace_back( x, y );
                    }
                }
            }
            
            // Already solved?  Boo-yah!
            if ( !open.size() )  
                return;
                
            // Sort the open list in decreasing order of 
            // available choices, most constrained are at end
            sort( begin( open ), end( open ), 
                [this]( const pair<int,int>& x, const pair<int,int>& y ) {
                    int choices_at_x = this->num_choices_at( x );
                    int choices_at_y = this->num_choices_at( y );
                    return choices_at_y < choices_at_x;
            });
            
            // Solve it
            recurse( open.size() - 1 );
            
            // Copy it back into the original board
            for ( int y = 0 ; y < 9 ; ++y ) {
                for ( int x = 0 ; x < 9 ; ++x ) {
                    board_[y][x] = board[y][x] +'1';
                }
            }
        }
    };

  • 0
    This post is deleted!

  • 0
    I

    I'm replying to a comment that I received an email for, but I don't see here in the forum yet.

    It turns out I'm leaving some performance on the table by using vector<pair<int,int>> for my open list. If I modify my code to use std::array<> here, it speeds up further.

    Running the original example through the modified code shows that that one change eliminates about 40% of the cycles. Not fast enough to reach the DLX solution, perhaps, but much closer. I'll add the modified code above.


  • 0
    I

    The code:

    class Solution2 {
        char board[9][9];
        bitset<9> col_bits[9];
        bitset<9> row_bits[9];
        bitset<9> til_bits[9];
        array< pair<char,char>, 81 > open;
    
        bitset<9> avail_at( int x, int y ) {
            int t = til_no[y][x];
            return row_bits[y] | col_bits[x] | til_bits[t];
        }
    
        int num_choices_at( int x, int y ) {
            return num_zero[ avail_at( x, y ).to_ulong() ];
        }
    
        int num_choices_at( pair<char, char> p ) {
            return num_choices_at( p.first, p.second );
        }
    
        bool recurse( int level ) {
            if ( level == -1 )
                return true;
    
            // Look ahead one level to see if it has fewer open choices.
            // If it does, swap places with it.  This should trigger 
            // backtrack earlier if we're in a blind alley.
            if ( level > 0 ) {
                int ch_here = num_choices_at( open[level  ] );
                int ch_next = num_choices_at( open[level-1] );
    
                // If there are no choices on either of these two levels,
                // just leave.  We're in a blind alley.
                if ( !ch_here || !ch_next )
                    return false;
    
                if ( ch_next < ch_here )
                    swap( open[level], open[level-1] );
            }
    
            const auto& xy    = open[ level ];
            const auto  x     = xy.first;
            const auto  y     = xy.second;
            const auto  t     = til_no[y][x];
            const auto  avail = avail_at( x, y );
            auto try_bits     = avail.to_ulong( );
    
            for ( int rmz = right_most_zero[ try_bits ] ; rmz >= 0 ; 
                  try_bits |= 1 << rmz, rmz = right_most_zero[ try_bits ] ) {
                row_bits[y].set( rmz );
                col_bits[x].set( rmz );
                til_bits[t].set( rmz );
    
                board[y][x] = rmz;
                if ( recurse( level - 1 ) )
                    return true;
    
                row_bits[y].reset( rmz );
                col_bits[x].reset( rmz );
                til_bits[t].reset( rmz );
            }
    
            return false;
        }
    
    public:
        void solveSudoku(vector<vector<char>>& board_) {
            for ( int i = 0; i < 9 ; ++i ) {
                col_bits[i] = 0;
                row_bits[i] = 0;
                til_bits[i] = 0;
            }
    
            int num_open = 0;
    
            for ( int y = 0 ; y < 9 ; ++y ) {
                for ( int x = 0 ; x < 9 ; ++x ) {
                    int digit = board_[y][x];
                    int digit_val = digit == '.' ? -1 : digit - '1';
    
                    board[y][x] = digit_val;
                    if ( digit_val >= 0 ) {
                        int t = til_no[y][x];
    
                        row_bits[y].set( digit_val );
                        col_bits[x].set( digit_val );
                        til_bits[t].set( digit_val );
                    } else {
                        open[ num_open++ ] = pair<char,char>( x, y );
                    }
                }
            }
    
            // Already solved?  Boo-yah!
            if ( !num_open )  
                return;
    
            // Sort the open list in decreasing order of 
            // available choices, most constrained are at end
            sort( begin( open ), begin( open ) + num_open, 
                [this]( const pair<char,char>& x, const pair<char,char>& y ) {
                    int choices_at_x = this->num_choices_at( x );
                    int choices_at_y = this->num_choices_at( y );
                    return choices_at_y < choices_at_x;
            });
    
            // Solve it
            recurse( num_open - 1 );
    
            // Copy it back into the original board
            for ( int y = 0 ; y < 9 ; ++y ) {
                for ( int x = 0 ; x < 9 ; ++x ) {
                    board_[y][x] = board[y][x] +'1';
                }
            }
        }
    };
    

  • 0
    I

    If you increase the lookahead to 2, and you include the coordinate in the sort criteria (so the search occurs in raster order among elements with equal available choices), and convert the lookup tables to 'char', that pulls another 10-15% out.


  • 0
    I

    Actually... I had messed my wrapper up, and it was trying to re-solve a solved Sudoku! :-P

    With the changes I made, I actually see very little speedup on the original board given in the problem description. I see over 3x speedup on a much harder puzzle from my Sudoku puzzle book.

    {
        { '.', '.', '.', '.', '3', '4', '6', '.', '1' },
        { '.', '.', '.', '5', '6', '.', '.', '3', '7' },
        { '3', '2', '.', '.', '.', '.', '4', '5', '.' },
        { '.', '3', '.', '1', '8', '.', '.', '.', '2' },
        { '.', '.', '.', '.', '.', '.', '.', '.', '.' },
        { '7', '.', '.', '.', '9', '6', '.', '1', '.' },
        { '.', '9', '3', '.', '.', '.', '.', '4', '6' },
        { '4', '1', '.', '.', '5', '9', '.', '.', '.' },
        { '6', '.', '7', '4', '1', '.', '.', '.', '.' },
    }
    

  • 0
    B

    In fact, I can't understand it. Can you give a more specific explanation?


Log in to reply
 

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.