My first accepted solution was this:

```
class Solution {
public:
bool canPartition(vector<int>& nums) {
int all = accumulate(nums.begin(), nums.end(), 0);
if (all % 2 != 0)
return false;
unordered_set<int> sums;
sums.insert(0);
for (int num : nums) {
unordered_set<int> nextSums = sums;
for (int sum : sums) {
if (sum + num == all - (sum + num))
return true;
else if (sum + num < all - (sum + num))
nextSums.insert(sum + num);
}
sums = nextSums;
}
return false;
}
};
```

It barely gets past TL: 1836 ms.

Next, a DP solution based on this one:

```
class Solution {
public:
bool canPartition(vector<int>& nums) {
int sum = accumulate(nums.begin(), nums.end(), 0);
if (sum % 2 != 0)
return false;
int target = sum / 2;
vector<bool> hasSum(target + 1, false);
hasSum[0] = true;
int len = 1;
for (int num : nums) {
if (num > target) // A single number greater than half the sum?
return false; // No way we can partition this!
for (int i = len - 1; i >= 0; --i) {
if (hasSum[i] && i + num < hasSum.size()) {
if (i + num == target)
return true;
hasSum[i + num] = true;
len = max(len, i + sum + 1);
}
}
}
return hasSum[target];
}
};
```

43 ms! Much better. But simple naive DFS based on this solution

```
class Solution {
public:
bool canPartition(vector<int>& nums) {
int sum = accumulate(nums.begin(), nums.end(), 0);
if (sum % 2 != 0)
return false;
return hasSubsetSum(nums, 0, sum / 2);
}
private:
bool hasSubsetSum(const vector<int> &nums, size_t start, int target) {
if (target < 0)
return false;
if (target == 0)
return true;
for (size_t i = start; i < nums.size(); ++i)
if (hasSubsetSum(nums, i + 1, target - nums[i]))
return true;
return false;
}
};
```

is 3 ms! How come? Isn't it *exponential*? Is it just the test cases are picked or something more?