I created a 2 pass solution first. It ran on Leetcode 3ms. Then my mind really refused to think about one pass. Why would one pass be faster then 2 pass one? Well... assuming that we touch memory one time and bring it into CPU cache for computations vs. 2 times, maybe it could be faster. But anyway we would have to make one write pass and that write pass might be significantly longer depending on the way we make those writes.

OK, I made a one pass solution and Leetcode ran it the same 3ms. I looked at solutions of others and they seemed pretty much the same. But I created a test, which populates 3 big arrays and the size of them is ~2GB. I run the test from VS2017 Release x64 on a 8GB laptop.

The time I am interested in is for that sorting section between the puts("On you marks!") and return statements.

And I see that my 2 pass solution runs 0.7 sec while everything else - mine and two other solutions - 3.4 sec. In fact, normal std::sort does it faster - 3.2 sec.

Since I do not pass seed to rand the sequence of rand produced numbers is the same between runs and my control printf of sizes show that. The code is below. Just replace the class names in main for your own verifications.

```
#include "stdafx.h"
#include <vector>
#include <algorithm>
using namespace std;
// Mine 2 pass. Leedcode reports 3ms solution for this
class SolutionX
{
public:
void sortColors(vector<int>& nums)
{
size_t counts[3] = { 0, 0, 0 };
for (auto& n : nums)
++counts[n];
auto Write = [&nums](int v, size_t startPos, size_t count)
{
auto iter = nums.begin() + startPos;
while (count > 0)
{
*iter++ = v;
--count;
}
};
Write(0, 0, counts[0]);
Write(1, counts[0], counts[1]);
Write(2, counts[1] + counts[0], counts[2]);
}
};
// Mine. One pass. Leetcode reports 3ms.
class Solution
{
public:
void sortColors(vector<int>& nums)
{
auto left = find_if(nums.begin(), nums.end(), [](int v) { return v != 0; });
if (left == nums.end())
return;
auto right = find_if(nums.rbegin(), nums.rend(), [](int v) { return v != 2; });
if (right == nums.rend())
return;
// To pointers to avoid some debug only validations.
auto zeroW = &*left;
auto oneW = &*left;
auto twoW = &*right;
while (oneW <= twoW)
{
switch (*oneW)
{
case 0:
swap(*zeroW++, *oneW);
oneW = max(zeroW, oneW);
break;
case 2:
swap(*oneW, *twoW--);
break;
default:
++oneW;
}
}
}
};
// Popular solution by other dev.
class SolutionO {
public:
void sortColors(vector<int>& nums)
{
int tmp = 0, low = 0, mid = 0, high = nums.size() - 1;
while (mid <= high)
{
if (nums[mid] == 0)
{
tmp = nums[low];
nums[low] = nums[mid];
nums[mid] = tmp;
low++;
mid++;
}
else if (nums[mid] == 1)
{
mid++;
}
else if (nums[mid] == 2)
{
tmp = nums[high];
nums[high] = nums[mid];
nums[mid] = tmp;
high--;
}
}
}
};
// A solution posted by another dev.
class SolutionO1 {
public:
void sortColors(vector<int>& nums) {
int i = 0, j = i, k = nums.size() - 1;
while (j <= k) {
if (nums[j] == 0) swap(nums[i++], nums[j++]);
else if (nums[j] == 1) j++;
else swap(nums[k--], nums[j]);
}
}
};
int main()
{
Solution s;
vector<vector<int>> vectors(3);
for (int t = 0; t < 3; ++t)
{
size_t N = rand() * 10000;
vector<int> r1(N);
for (size_t i = 0; i < N; ++i)
{
int n = rand();
r1[i] = n < (RAND_MAX / 3) ? 0 : (n < (2 * RAND_MAX / 3) ? 1 : 2);
}
vectors[t] = move(r1);
}
puts("On you marks!");
for (int t = 0; t < 3; ++t)
{
auto& r1 = vectors[t];
s.sortColors(r1);
// sort(r1.begin(), r1.end());
printf("%d\n", (int)r1.size());
}
return 0;
}
```