Inspired by 2sum, I have an O(n^2) algorithm but it takes several hundreds of ms to pass all testcases. Anyone has any idea why that happens? ~~Is it because of the repeat calls of unordered_set constructor and desctructor?~~

EDIT: I tried to use a vector of unordered_set to reduce the # calls of destructor, and it's still very slow.

```
vector<vector<int>> threeSum(vector<int>& nums) {
vector<vector<int>> res;
sort(nums.begin(), nums.end());
for (int i = 0; i < nums.size(); ++i) {
if (i > 0 && nums[i] == nums[i-1]) continue;
long a = nums[i];
if (a > 0) break;
unordered_set<int> dp;
dp.reserve(nums.size() - i);
unordered_set<int> used;
for (int j = i + 1; j < nums.size(); ++j) {
if (used.find(nums[j]) != used.end()) continue;
long b = a + nums[j];
if (b + nums[i+1] > 0) break;
if (dp.find(-b) != dp.end()) {
res.push_back(vector<int>({nums[i], -b, nums[j]}));
used.insert(nums[j]);
}
dp.insert(nums[j]);
}
}
// sort(res.begin(), res.end());
return res;
}
```