Inspired by Leetcode 522 student attendance record ii.

See what @lixx2100 said in Improving the runtime from O(n) to O(log n):

This quick powering method can be applied to a lot of DP questions that only use O(1) memory, which means transform function can be written as DP[i] = A* DP[i-1], in which A is a matrix.

```
class Solution {
public:
int numWays(int n, int k) {
if(!k || !n) return 0;
vector<int> dp({k, 0});
vector<vector<int>> A({
{k-1, k-1}, {1, 0}
});
n -= 1;
while(n > 0){
if(n % 2 == 1){
vector<int> tmp = dp;
dp[0] = A[0][0] * tmp[0] + A[0][1] * tmp[1];
dp[1] = A[1][0] * tmp[0] + A[1][1] * tmp[1];
}
A = sq(A);
n /= 2;
}
return dp[0] + dp[1];
}
vector<vector<int>> sq(vector<vector<int>>& A){
vector<vector<int>> C(2, vector<int>(2, 0));
for(int i = 0; i < 2; i ++){
for(int j = 0; j < 2; j ++){
for(int k = 0; k < 2; k ++){
C[i][j] += A[i][k] * A[k][j];
}
}
}
return C;
}
};
```