Instead of using a map for marking the vertical order, I just insert to the front of the result. This will have O(n) time for n level so the amortized time for insertion is still O(1). Therefore, the time complexity if O(num_nodes), space is O(num_nodes) because the BFS queue will have O(num_nodes) in the last level.

```
vector<vector<int>> verticalOrder(TreeNode* root) {
vector<vector<int>> result;
if (!root)
return result;
queue<pair<TreeNode*, int>> bfsq;
int level = 0, min_level = 0;
bfsq.emplace(root, level);
while (!bfsq.empty()) {
auto cur = bfsq.front();
bfsq.pop();
int cur_level = cur.second;
if (min_level > cur_level) {
min_level = cur_level;
result.insert(result.begin(), vector<int>());
}
if (cur_level - min_level == result.size())
result.emplace_back(vector<int>());
result[cur_level - min_level].emplace_back(cur.first->val);
if (cur.first->left)
bfsq.emplace(cur.first->left, cur_level - 1);
if (cur.first->right)
bfsq.emplace(cur.first->right, cur_level + 1);
}
return result;
}
```