Switch to unified view

a b/third_party/nucleus/util/samplers.h
1
/*
2
 * Copyright 2018 Google LLC.
3
 *
4
 * Redistribution and use in source and binary forms, with or without
5
 * modification, are permitted provided that the following conditions
6
 * are met:
7
 *
8
 * 1. Redistributions of source code must retain the above copyright notice,
9
 *    this list of conditions and the following disclaimer.
10
 *
11
 * 2. Redistributions in binary form must reproduce the above copyright
12
 *    notice, this list of conditions and the following disclaimer in the
13
 *    documentation and/or other materials provided with the distribution.
14
 *
15
 * 3. Neither the name of the copyright holder nor the names of its
16
 *    contributors may be used to endorse or promote products derived from this
17
 *    software without specific prior written permission.
18
 *
19
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29
 * POSSIBILITY OF SUCH DAMAGE.
30
 *
31
 */
32
33
#ifndef THIRD_PARTY_NUCLEUS_UTIL_SAMPLERS_H_
34
#define THIRD_PARTY_NUCLEUS_UTIL_SAMPLERS_H_
35
36
#include <random>
37
38
#include "absl/log/check.h"
39
#include "third_party/nucleus/platform/types.h"
40
41
namespace nucleus {
42
43
// Helper class for randomly sampling a fraction of values.
44
//
45
// API is simple: only a fraction_to_keep calls to Keep() will return true.
46
//
47
// So keeping a 10% fraction of the values in a vector<int> x is:
48
//
49
// FractionalSampler sampler(0.10, seed_uint);
50
// for( int v : x ) {
51
//  if (sampler.Keep()) {
52
//    ...
53
//  }
54
//
55
class FractionalSampler {
56
 public:
57
  // Creates a new FractionalSampler that keeps fraction_to_keep elements on
58
  // average among N calls to Keep().
59
  explicit FractionalSampler(double fraction_to_keep, uint64 random_seed)
60
      : fraction_to_keep_(fraction_to_keep),
61
        generator_(random_seed),
62
        uniform_(0.0, 1.0) {
63
    CHECK_GE(fraction_to_keep, 0.0) << "Must be between 0.0 and 1.0";
64
    CHECK_LE(fraction_to_keep, 1.0) << "Must be between 0.0 and 1.0";
65
  }
66
67
  // Randomly return true approximately fraction_to_keep of the time.
68
  bool Keep() const { return uniform_(generator_) <= fraction_to_keep_; }
69
70
  // Gets the fraction of elements that will be kept.
71
  double FractionKept() const { return fraction_to_keep_; }
72
73
 private:
74
  const double fraction_to_keep_;
75
  // Raw RNG, of a type compatible with STL distribution functions.
76
  mutable std::mt19937_64 generator_;
77
  // Distribution sampler, to sample uniformly from [0,1).
78
  mutable std::uniform_real_distribution<> uniform_;
79
};
80
81
}  // namespace nucleus
82
83
#endif  // THIRD_PARTY_NUCLEUS_UTIL_SAMPLERS_H_