Skip to content

Commit

Permalink
Implement exp_m1 and exp2_m1
Browse files Browse the repository at this point in the history
This switches between a Taylor-esque approximation based on x + x/2,
and the "obvious" exp(x) - 1 based on the size of x, with the exact
polynomial and the size threshold chosen by `optimiser`.

Fixes #10.
  • Loading branch information
huonw committed Jan 26, 2019
1 parent 55d4bef commit b16ad3c
Show file tree
Hide file tree
Showing 6 changed files with 252 additions and 1 deletion.
28 changes: 27 additions & 1 deletion benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ extern crate fast_math;
extern crate criterion;
use criterion::{Criterion, Fun, black_box};

use std::f32::consts::LN_2;

fn bench<Fast, Raw, Std>(c: &mut Criterion, name: &str, values: &'static [f32],
fast: &'static Fast, raw: &'static Raw, std: &'static Std)
where
Expand Down Expand Up @@ -129,6 +131,29 @@ fn bench_exp2(c: &mut Criterion) {
bench(c, "exp2", values, &fast_math::exp2, &fast_math::exp2_raw, &f32::exp2)
}

fn bench_exp_m1(c: &mut Criterion) {
let values = &[
1.0 * 0.85708036, 1.0 * -2.43390621, 1.0 * 2.80163358, 1.0 * -2.55126348, 1.0 * 3.18046186,
1.0 * -2.88689427, 1.0 * 0.32215155, 1.0 * -0.07701401, 1.0 * 1.22922506, 1.0 * -0.4580259,
1.0 * 0.01257442, 1.0 * -4.23107197, 1.0 * 0.89538113, 1.0 * -1.65219582, 1.0 * 0.14632742,
1.0 * -1.68663984, 1.0 * 1.88125115, 1.0 * -2.16773942, 1.0 * 1.27461936, 1.0 * -1.03091265
];
bench(c, "exp_m1", values, &fast_math::exp_m1, &fast_math::exp_m1_raw, &f32::exp_m1)
}

fn bench_exp2_m1(c: &mut Criterion) {
let values = &[
1.0 * 0.85708036, 1.0 * -2.43390621, 1.0 * 2.80163358, 1.0 * -2.55126348, 1.0 * 3.18046186,
1.0 * -2.88689427, 1.0 * 0.32215155, 1.0 * -0.07701401, 1.0 * 1.22922606, 1.0 * -0.4580259,
1.0 * 0.01257442, 1.0 * -4.23107197, 1.0 * 0.89538113, 1.0 * -1.65219582, 1.0 * 0.14632742,
1.0 * -1.68663984, 1.0 * 1.88125115, 1.0 * -2.16773942, 1.0 * 1.27461936, 1.0 * -1.03091265
];
fn exp2_m1(x: f32) -> f32 {
(x * LN_2).exp_m1()
}
bench(c, "exp2_m1", values, &fast_math::exp2_m1, &fast_math::exp2_m1_raw, &exp2_m1)
}

fn bench_atan2(c: &mut Criterion) {
let baseline = Fun::new(
"baseline",
Expand Down Expand Up @@ -158,5 +183,6 @@ fn bench_atan2(c: &mut Criterion) {
c.bench_functions("scalar/atan2", vec![baseline, full, std], values);
}

criterion_group!(benches, bench_log2, bench_exp, bench_exp2, bench_atan, bench_atan2);
criterion_group!(benches, bench_log2, bench_exp, bench_exp2, bench_exp_m1, bench_exp2_m1,
bench_atan, bench_atan2);
criterion_main!(benches);
32 changes: 32 additions & 0 deletions examples/exhaustive-exp_m1.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
extern crate fast_math;
extern crate ieee754;
use ieee754::Ieee754;

use std::f32::consts::LN_2;

fn main() {
let mut max_rel = 0f32;
let mut max_abs = 0f32;
for x in (-128.0 * LN_2).upto(128.0 * LN_2) {
let e = fast_math::exp_m1(x);
let t = x.exp_m1();
let diff = (e - t).abs();
let rel = e.rel_error(t).abs();
max_abs = max_abs.max(diff);
max_rel = max_rel.max(rel);
}
println!("exp_m1 : absolute: {:.8e}, relative: {:.8}", max_abs, max_rel);

let (abs, rel) = (-128.0).upto(128.0)
.map(|x| {
let e = fast_math::exp2_m1(x);
let t = (x * LN_2).exp_m1();
let diff = (e - t).abs();
let rel = e.rel_error(t).abs();
(diff, rel)
})
.fold((0_f32, 0_f32), |(a, a_), (b, b_)| (a.max(b), a_.max(b_)));

println!("exp2_m1: absolute: {:.8e}, relative: {:.8}", abs, rel);

}
1 change: 1 addition & 0 deletions optimiser/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ fn main() {
"atan" => run(problems::Atan, n),
"exp" => run(problems::Exp, n),
"exp2" => run(problems::Exp2, n),
"exp_m1" => run(problems::ExpM1, n),
"log2" => run(problems::Log2, n),
s => panic!("unknown argument '{}'", s),
}
Expand Down
35 changes: 35 additions & 0 deletions optimiser/src/problems.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,41 @@ impl Approximation for Exp2 {
}
}

pub struct ExpM1;
impl Approximation for ExpM1 {
fn name() -> &'static str { "exp_m1" }

const NUM_PARAMS: usize = 3;
fn ranges() -> Vec<(f32, f32, Option<f32>)> {
vec![(0.0, 1.0, Some(0.2)),
(0.0, 2.0, Some(1.0)),
(0.0, 1.0, Some(0.5))]
}

const MIN: f32 = -1.0;
const MAX: f32 = 1.0;
fn exact_test_values() -> Vec<f32> {
vec![0.0]
}

fn exact(x: f64) -> f64 {
x.exp_m1()
}
fn approx(x: f32, params: ArrayView1<f64>) -> f32 {
assert_eq!(params.len(), Self::NUM_PARAMS);
let limit = params[0] as f32;
let add = params[1] as f32;
let mul = params[2] as f32;

let value = if x.abs() <= limit {
x * (add + mul * x)
} else {
fast_math::exp(x) - 1.0
};
value
}
}

pub struct Log2;
impl Approximation for Log2 {
fn name() -> &'static str { "log2" }
Expand Down
156 changes: 156 additions & 0 deletions src/exp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ impl Base {
Base::Two => 1.0,
}
}
#[inline(always)]
fn ln(self) -> f32 {
match self {
Base::E => 1.0,
Base::Two => f::LN_2,
}
}

#[inline(always)]
fn upper_limit(self) -> f32 {
Expand Down Expand Up @@ -59,6 +66,33 @@ fn exp_impl(x: f32, base: Base) -> f32 {
}
}

const EXP_M1_THRESHOLD: f32 = 0.25153902;
const EXP_M1_ADD: f32 = 1.0053172;
const EXP_M1_MUL: f32 = 0.5004446;
#[inline(always)]
fn exp_m1_raw_impl(x: f32, base: Base) -> f32 {
if x.abs() <= EXP_M1_THRESHOLD / base.ln() {
// premultiply because these can be done at compile time
let add = EXP_M1_ADD * base.ln();
let mul = EXP_M1_MUL * base.ln() * base.ln();
x * (add + mul * x)
} else {
exp_raw_impl(x, base) - 1.0
}
}

#[inline(always)]
fn exp_m1_impl(x: f32, base: Base) -> f32 {
if x.abs() <= EXP_M1_THRESHOLD / base.ln() {
// premultiply because these can be done at compile time
let add = EXP_M1_ADD * base.ln();
let mul = EXP_M1_MUL * base.ln() * base.ln();
x * (add + mul * x)
} else {
exp_impl(x, base) - 1.0
}
}

/// Compute a fast approximation to 2<sup><code>x</code></sup> for
/// -151 &le; `x` &le; 151.
///
Expand Down Expand Up @@ -89,6 +123,32 @@ pub fn exp2(x: f32) -> f32 {
exp_impl(x, Base::Two)
}

/// Compute a fast approximation to 2<sup><code>x</code></sup> - 1 for
/// -128 &le; `x` &le; 128.
///
/// This will return unspecified nonsense if `x` does not satisfy
/// those requirements. Use `exp2_m1` if correct handling is required
/// (at the expense of some speed).
///
/// The maximum relative error is less than 0.011.
#[inline]
pub fn exp2_m1_raw(x: f32) -> f32 {
exp_m1_raw_impl(x, Base::Two)
}

/// Compute a fast approximation to 2<sup><code>x</code></sup> - 1.
///
/// The maximum relative error is less than 0.011.
///
/// If `x` is NaN, `exp2_m1` returns NaN.
///
/// See also `exp2_m1_raw` which only works on -128 &le; `x` &le; 128,
/// but is 10-50% faster.
#[inline]
pub fn exp2_m1(x: f32) -> f32 {
exp_m1_impl(x, Base::Two)
}

/// Compute a fast approximation to *e*<sup><code>x</code></sup> for
/// -104 &le; `x` &le; 104.
///
Expand Down Expand Up @@ -121,6 +181,32 @@ pub fn exp(x: f32) -> f32 {
exp_impl(x, Base::E)
}

/// Compute a fast approximation to *e*<sup><code>x</code></sup> - 1 for
/// -88 &le; `x` &le; 88.
///
/// This will return unspecified nonsense if `x` does not satisfy
/// those requirements. Use `exp_m1` if correct handling is required
/// (at the expense of some speed).
///
/// The maximum relative error is less than 0.011.
#[inline]
pub fn exp_m1_raw(x: f32) -> f32 {
exp_m1_raw_impl(x, Base::E)
}

/// Compute a fast approximation to *e*<sup><code>x</code></sup> - 1.
///
/// The maximum relative error is less than 0.011.
///
/// If `x` is NaN, `exp_m1` returns NaN.
///
/// See also `exp_m1_raw` which only works on -88 &le; `x` &le; 88,
/// but is 10-30% faster.
#[inline]
pub fn exp_m1(x: f32) -> f32 {
exp_m1_impl(x, Base::E)
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -196,4 +282,74 @@ mod tests {
assert!((exp2(0.0) - 1.0).abs() < 0.002);
assert_eq!(exp2(f32::INFINITY), f32::INFINITY);
}

const EXP_M1_REL_ERR: f32 = 0.0054;
#[test]
fn exp_m1_rel_err_exhaustive() {
let mut max = 0.0;
for i in 0..PREC + 1 {
for j in -5..6 {
for &sign in &[-1.0, 1.0] {
let x = sign * (1.0 + i as f32 / PREC as f32) * 2f32.powi(j * 2);
let e = exp_m1(x);
let t = x.exp_m1();
let rel = e.rel_error(t).abs();

if t.classify() == num::FpCategory::Subnormal {
// subnormal should be approximately right
assert!(rel <= 1.0,
"{:.8}: e = {:.8e}, t = {:.8e}. {:.4}", x, e, t, rel);
} else {
if rel > max { max = rel }
// e == t handles the infinity case
assert!(rel <= EXP_M1_REL_ERR,
"{:.8}: e = {:.8e}, t = {:.8e}. {:.4}", x, e, t, rel);
}
}
}
}
println!("maximum {}", max);
}

#[test]
fn exp2_m1_rel_err_exhaustive() {
let mut max = 0.0;
for i in 0..PREC + 1 {
for j in -5..6 {
for &sign in &[-1.0, 1.0] {
let x = sign * (1.0 + i as f32 / PREC as f32) * 2f32.powi(j * 2);
let e = exp2_m1(x);
let t = (x * f32::consts::LN_2).exp_m1();
let rel = e.rel_error(t).abs();
if t.classify() == num::FpCategory::Subnormal {
// subnormal should be approximately right
assert!(rel <= 1.0,
"{:.8}: e = {:.8e}, t = {:.8e}. {:.4}", x, e, t, rel);
} else {
if rel > max { max = rel }
// e == t handles the infinity case
assert!(rel <= EXP_M1_REL_ERR,
"{:.8}: e = {:.8e}, t = {:.8e}. {:.4}", x, e, t, rel);
}
}
}
}
println!("maximum {}", max);
}

#[test]
fn exp_m1_edge_cases() {
assert!(exp_m1(f32::NAN).is_nan());
assert_eq!(exp_m1(f32::NEG_INFINITY), -1.0);
assert_eq!(exp_m1(0.0), 0.0);
assert_eq!(exp_m1(f32::INFINITY), f32::INFINITY);
}

#[test]
fn exp2_m1_edge_cases() {
assert!(exp2_m1(f32::NAN).is_nan());
assert_eq!(exp2_m1(f32::NEG_INFINITY), -1.0);
assert_eq!(exp2_m1(0.0), 0.0);
assert_eq!(exp2_m1(f32::INFINITY), f32::INFINITY);
}
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ extern crate ieee754;
pub use log::{log2, log2_raw};
pub use atan::{atan_raw, atan, atan2};
pub use exp::{exp_raw, exp2_raw, exp, exp2};
pub use exp::{exp_m1_raw, exp_m1, exp2_m1_raw, exp2_m1};

mod log;
mod atan;
Expand Down

0 comments on commit b16ad3c

Please sign in to comment.