This is the first version which costs 152ms.

```
void nSum(vector<int>& nums, int target, int n, vector<vector<int>>& ans, vector<int>& res, int start, int len) {
if (n == 0) {
if (target == 0)
ans.push_back(res);
return;
}
for (int i = start;i <= len - n;++i) {
if (nums[i] * n > target || nums.back() * n < target)
break;
res.push_back(nums[i]);
nSum(nums,target-nums[i],n-1,ans,res,i+1,len);
res.pop_back();
while (i+1 <= len - n && nums[i] == nums[i+1])
++i;
}
}
vector<vector<int>> fourSum(vector<int>& nums, int target) {
vector<vector<int>> ans;
vector<int> res;
sort(nums.begin(),nums.end());
nSum(nums,target,4,ans,res,0,nums.size());
return ans;
}
```

This is the second version, much faster, 32ms! I believe it can be optimized to run faster!

```
void nSum(vector<int>& nums, int target, int n, vector<vector<int>>& ans, vector<int>& res, int start, int len) {
if (n == 1) {
if (binary_search(nums.begin()+start,nums.end(),target)) {
res.push_back(target);
ans.push_back(res);
res.pop_back();
}
return;
}
for (int i = start;i <= len - n;++i) {
if (nums[i] * n > target || nums.back() * n < target)
break;
res.push_back(nums[i]);
nSum(nums,target-nums[i],n-1,ans,res,i+1,len);
res.pop_back();
while (i+1 <= len - n && nums[i] == nums[i+1])
++i;
}
}
vector<vector<int>> fourSum(vector<int>& nums, int target) {
vector<vector<int>> ans;
vector<int> res;
sort(nums.begin(),nums.end());
nSum(nums,target,4,ans,res,0,nums.size());
return ans;
}
```

I always love this kind of easy code, my coding style is programming different problems in a common paradigm, and this one is simple standard backtracking with pruning!