Reservoir sampling with sample test code to check the probabilities


  • 0

    Nothing new. Same old reservoir sampling code with a sample tester method to check that the probability that a given index is chosen is equal.

    class RandomPickIndex
        {
            private int[] values;
            Random randGen = new Random();
            public RandomPickIndex(int[] values)
            {
                this.values = values;
            }
    
            public int Pick(int target)
            {
                
                int chosenIndex = -1;
                int size = 0;
                for(int i = 0; i < values.Length; i++)
                {
                    if (values[i] != target)
                    {
                        continue;
                    }
                    // Choose a random number between 0 and size + 1
                    int randomNumber = randGen.Next(size+1);
                    // If size is 1, the sample space is (0, 1). The probability that i is chosen is P(i) = 1/2
                    // If size is 2, the sample space is (0, 1, 2). The probability that i is chosen is 1/3 and the chosen Index gets replaced.
                    // P(j, c) - probability that index j is the chosen index in current iteration c, when j is not the current index
                    // P(j, c) = P(j, c-1) * NOT P(i).
                    // P(j, c) = (1/2) * (1-(1/3)) = 1/3;
    
                    if (randomNumber == size)
                    {
                        chosenIndex = i;
                    }
    
                    size++;
                    
                }
    
                return chosenIndex;
            }
            
            private static void TestChances(int[] values)
            {
                Dictionary<int, double> expected = values.GroupBy(n => n).ToDictionary(k => k.Key, v => (double)1 / v.Count());
                var picker = new RandomPickIndex(values);
                long trials = 100000;
                foreach (var target in expected.Keys)
                {
                    Dictionary<int, long> picks = new Dictionary<int, long>();
                    for (int t = 0; t < trials; t++)
                    {
                        int index = picker.Pick(target);
                        if (!picks.ContainsKey(index))
                        {
                            picks.Add(index, 0);
                        }
    
                        picks[index]++;
                    }
                    double error = 0.01;
                    foreach (var pick in picks)
                    {
                        double p = (double)pick.Value / trials;
                        double exp = expected[target];
                        if (p >= exp - error && p <= exp + error)
                        {
                            Console.WriteLine("Target = {3}, P({0}) = {1}, E({0}) = {2}, OK", pick.Key, p, exp, target);
                        }
                        else
                        {
                            Console.WriteLine("Target = {3}, P({0}) = {1}, E({0}) = {2}, ERROR", pick.Key, p, exp, target);
                        }
                    }
                }
            }
        }
    

    Sample output from the tester

    int[] values = new int[] { 1, 1, 3, 2, 3, 2, 2, 2, 3, 4, 4, 5 };
    TestChances(values);
    

    Output

    Target = 1, P(1) = 0.50251, E(1) = 0.5, OK
    Target = 1, P(0) = 0.49749, E(0) = 0.5, OK
    Target = 3, P(8) = 0.3347, E(8) = 0.333333333333333, OK
    Target = 3, P(2) = 0.33097, E(2) = 0.333333333333333, OK
    Target = 3, P(4) = 0.33433, E(4) = 0.333333333333333, OK
    Target = 2, P(5) = 0.2508, E(5) = 0.25, OK
    Target = 2, P(6) = 0.25227, E(6) = 0.25, OK
    Target = 2, P(3) = 0.2482, E(3) = 0.25, OK
    Target = 2, P(7) = 0.24873, E(7) = 0.25, OK
    Target = 4, P(9) = 0.50132, E(9) = 0.5, OK
    Target = 4, P(10) = 0.49868, E(10) = 0.5, OK
    Target = 5, P(11) = 1, E(11) = 1, OK
    

Log in to reply
 

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.