The idea is like BFS. Try to swap from start position = 0, until start position reaches the end of the input vector.

```
class Solution {
public:
vector<vector<int>> permute(vector<int>& nums) {
vector<vector<int>> res;
if (nums.empty())
{
return res;
}
deque<QueueItem> q;
q.push_back(QueueItem(nums, 0));
while (!q.empty())
{
QueueItem& qi = q.front();
res.push_back(qi.vec);
for (int j = qi.start; j < nums.size() - 1; ++j)
{
for (int i = j + 1; i < nums.size(); ++i)
{
q.push_back(QueueItem(qi.vec, j + 1));
swap(q.back().vec[j], q.back().vec[i]);
}
}
q.pop_front();
}
return res;
}
private:
struct QueueItem
{
vector<int> vec;
int start;
QueueItem(const vector<int>& v, int s) : vec(v), start(s) {}
};
};
```