1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License..
//! Sampling from random distributions.
//!
//! This is a generalization of `Rand` to allow parameters to control the
//! exact properties of the generated values, e.g. the mean and standard
//! deviation of a normal distribution. The `Sample` trait is the most
//! general, and allows for generating values that change some state
//! internally. The `IndependentSample` trait is for generating values
//! that do not need to record state.
use std::marker;
use crate::{Rand, Rng};
pub use self::exponential::Exp;
pub use self::gamma::{ChiSquared, FisherF, Gamma, StudentT};
pub use self::normal::{LogNormal, Normal};
pub use self::range::Range;
pub mod exponential;
pub mod gamma;
pub mod normal;
pub mod range;
/// Types that can be used to create a random instance of `Support`.
pub trait Sample<Support> {
/// Generate a random value of `Support`, using `rng` as the
/// source of randomness.
fn sample<R: Rng>(&mut self, rng: &mut R) -> Support;
}
/// `Sample`s that do not require keeping track of state.
///
/// Since no state is recorded, each sample is (statistically)
/// independent of all others, assuming the `Rng` used has this
/// property.
// FIXME maybe having this separate is overkill (the only reason is to
// take &self rather than &mut self)? or maybe this should be the
// trait called `Sample` and the other should be `DependentSample`.
pub trait IndependentSample<Support>: Sample<Support> {
/// Generate a random value.
fn ind_sample<R: Rng>(&self, rng: &mut R) -> Support;
}
/// A wrapper for generating types that implement `Rand` via the
/// `Sample` & `IndependentSample` traits.
#[derive(Debug)]
pub struct RandSample<Sup> {
_marker: marker::PhantomData<fn() -> Sup>,
}
impl<Sup> Copy for RandSample<Sup> {}
impl<Sup> Clone for RandSample<Sup> {
fn clone(&self) -> Self {
*self
}
}
impl<Sup: Rand> Sample<Sup> for RandSample<Sup> {
fn sample<R: Rng>(&mut self, rng: &mut R) -> Sup {
self.ind_sample(rng)
}
}
impl<Sup: Rand> IndependentSample<Sup> for RandSample<Sup> {
fn ind_sample<R: Rng>(&self, rng: &mut R) -> Sup {
rng.gen()
}
}
impl<Sup> RandSample<Sup> {
pub fn new() -> RandSample<Sup> {
RandSample {
_marker: marker::PhantomData,
}
}
}
impl<Sup> Default for RandSample<Sup> {
fn default() -> RandSample<Sup> {
Self::new()
}
}
/// A value with a particular weight for use with `WeightedChoice`.
#[derive(Copy, Clone, Debug)]
pub struct Weighted<T> {
/// The numerical weight of this item
pub weight: u32,
/// The actual item which is being weighted
pub item: T,
}
/// A distribution that selects from a finite collection of weighted items.
///
/// Each item has an associated weight that influences how likely it
/// is to be chosen: higher weight is more likely.
///
/// The `Clone` restriction is a limitation of the `Sample` and
/// `IndependentSample` traits. Note that `&T` is (cheaply) `Clone` for
/// all `T`, as is `u32`, so one can store references or indices into
/// another vector.
///
/// # Example
///
/// ```rust
/// use sgx_rand::distributions::{Weighted, WeightedChoice, IndependentSample};
///
/// let mut items = vec!(Weighted { weight: 2, item: 'a' },
/// Weighted { weight: 4, item: 'b' },
/// Weighted { weight: 1, item: 'c' });
/// let wc = WeightedChoice::new(&mut items);
/// let mut rng = sgx_rand::thread_rng();
/// for _ in 0..16 {
/// // on average prints 'a' 4 times, 'b' 8 and 'c' twice.
/// println!("{}", wc.ind_sample(&mut rng));
/// }
/// ```
#[derive(Debug)]
pub struct WeightedChoice<'a, T: 'a> {
items: &'a mut [Weighted<T>],
weight_range: Range<u32>,
}
impl<'a, T: Clone> WeightedChoice<'a, T> {
/// Create a new `WeightedChoice`.
///
/// Panics if:
/// - `v` is empty
/// - the total weight is 0
/// - the total weight is larger than a `u32` can contain.
pub fn new(items: &'a mut [Weighted<T>]) -> WeightedChoice<'a, T> {
// strictly speaking, this is subsumed by the total weight == 0 case
assert!(
!items.is_empty(),
"WeightedChoice::new called with no items"
);
let mut running_total: u32 = 0;
// we convert the list from individual weights to cumulative
// weights so we can binary search. This *could* drop elements
// with weight == 0 as an optimisation.
for item in items.iter_mut() {
running_total = match running_total.checked_add(item.weight) {
Some(n) => n,
None => panic!(
"WeightedChoice::new called with a total weight \
larger than a u32 can contain"
),
};
item.weight = running_total;
}
assert!(
running_total != 0,
"WeightedChoice::new called with a total weight of 0"
);
WeightedChoice {
items,
// we're likely to be generating numbers in this range
// relatively often, so might as well cache it
weight_range: Range::new(0, running_total),
}
}
}
impl<'a, T: Clone> Sample<T> for WeightedChoice<'a, T> {
fn sample<R: Rng>(&mut self, rng: &mut R) -> T {
self.ind_sample(rng)
}
}
impl<'a, T: Clone> IndependentSample<T> for WeightedChoice<'a, T> {
fn ind_sample<R: Rng>(&self, rng: &mut R) -> T {
// we want to find the first element that has cumulative
// weight > sample_weight, which we do by binary since the
// cumulative weights of self.items are sorted.
// choose a weight in [0, total_weight)
let sample_weight = self.weight_range.ind_sample(rng);
// short circuit when it's the first item
if sample_weight < self.items[0].weight {
return self.items[0].item.clone();
}
let mut idx = 0;
let mut modifier = self.items.len();
// now we know that every possibility has an element to the
// left, so we can just search for the last element that has
// cumulative weight <= sample_weight, then the next one will
// be "it". (Note that this greatest element will never be the
// last element of the vector, since sample_weight is chosen
// in [0, total_weight) and the cumulative weight of the last
// one is exactly the total weight.)
while modifier > 1 {
let i = idx + modifier / 2;
if self.items[i].weight <= sample_weight {
// we're small, so look to the right, but allow this
// exact element still.
idx = i;
// we need the `/ 2` to round up otherwise we'll drop
// the trailing elements when `modifier` is odd.
modifier += 1;
} else {
// otherwise we're too big, so go left. (i.e. do
// nothing)
}
modifier /= 2;
}
self.items[idx + 1].item.clone()
}
}
mod ziggurat_tables;
/// Sample a random number using the Ziggurat method (specifically the
/// ZIGNOR variant from Doornik 2005). Most of the arguments are
/// directly from the paper:
///
/// * `rng`: source of randomness
/// * `symmetric`: whether this is a symmetric distribution, or one-sided with P(x < 0) = 0.
/// * `X`: the $x_i$ abscissae.
/// * `F`: precomputed values of the PDF at the $x_i$, (i.e. $f(x_i)$)
/// * `F_DIFF`: precomputed values of $f(x_i) - f(x_{i+1})$
/// * `pdf`: the probability density function
/// * `zero_case`: manual sampling from the tail when we chose the
/// bottom box (i.e. i == 0)
// the perf improvement (25-50%) is definitely worth the extra code
// size from force-inlining.
#[inline(always)]
fn ziggurat<R: Rng, P, Z>(
rng: &mut R,
symmetric: bool,
x_tab: ziggurat_tables::ZigTable,
f_tab: ziggurat_tables::ZigTable,
mut pdf: P,
mut zero_case: Z,
) -> f64
where
P: FnMut(f64) -> f64,
Z: FnMut(&mut R, f64) -> f64,
{
const SCALE: f64 = (1u64 << 53) as f64;
loop {
// reimplement the f64 generation as an optimisation suggested
// by the Doornik paper: we have a lot of precision-space
// (i.e. there are 11 bits of the 64 of a u64 to use after
// creating a f64), so we might as well reuse some to save
// generating a whole extra random number. (Seems to be 15%
// faster.)
//
// This unfortunately misses out on the benefits of direct
// floating point generation if an RNG like dSMFT is
// used. (That is, such RNGs create floats directly, highly
// efficiently and overload next_f32/f64, so by not calling it
// this may be slower than it would be otherwise.)
// FIXME: investigate/optimise for the above.
let bits: u64 = rng.gen();
let i = (bits & 0xff) as usize;
let f = (bits >> 11) as f64 / SCALE;
// u is either U(-1, 1) or U(0, 1) depending on if this is a
// symmetric distribution or not.
let u = if symmetric { 2.0 * f - 1.0 } else { f };
let x = u * x_tab[i];
let test_x = if symmetric { x.abs() } else { x };
// algebraically equivalent to |u| < x_tab[i+1]/x_tab[i] (or u < x_tab[i+1]/x_tab[i])
if test_x < x_tab[i + 1] {
return x;
}
if i == 0 {
return zero_case(rng, u);
}
// algebraically equivalent to f1 + DRanU()*(f0 - f1) < 1
if f_tab[i + 1] + (f_tab[i] - f_tab[i + 1]) * rng.gen::<f64>() < pdf(x) {
return x;
}
}
}