preprocessed columns, rows and squares sets, the first version was implemented with HashSet, unfortunately the performance was around 69ms(40%), for the small fixed amount of elements, array fits better than set.

the termination condition is the number of empty cells equals 0, which is prebuilt as well.

```
public void solveSudoku(char[][] board) {
boolean[][] rows = new boolean[9][9];
boolean[][] cols = new boolean[9][9];
boolean[][] sqrs = new boolean[9][9];
int target = 81;
for(int i = 0; i < 9; i++)
for(int j = 0; j < 9; j++) {
if (board[i][j] != '.') {
rows[i][board[i][j] - '1'] = true;
sqrs[3 * (i / 3) + j / 3][board[i][j] - '1'] = true;
cols[j][board[i][j] - '1'] = true;
target--;
}
}
dfs(rows, cols, sqrs, board, target, 0, new boolean[1]);
}
private void dfs(boolean[][] rows, boolean[][] cols, boolean[][] sqrs, char[][] board, int target, int pos, boolean[] flag) {
if (target == 0) {
flag[0] = true;
return;
}
int r = pos / 9, c = pos % 9;
if (board[r][c] != '.') {
dfs(rows, cols, sqrs, board, target, pos + 1, flag);
} else {
int sq = 3 * (r / 3) + c / 3;
for (int i = 0; i < 9; i++) {
if (!rows[r][i] && !cols[c][i] && !sqrs[sq][i]) {
board[r][c] = (char)('1' + i);
rows[r][i] = cols[c][i] = sqrs[sq][i] = true;
dfs(rows, cols, sqrs, board, target - 1, pos + 1, flag);
if (flag[0]) return;
rows[r][i] = cols[c][i] = sqrs[sq][i] = false;
}
}
board[r][c] = '.';
}
}
```