From 0cb4f59e16a48ca091e7c2cb38c66f00f0ab892a Mon Sep 17 00:00:00 2001 From: Ben Sully Date: Thu, 19 Dec 2024 16:39:12 +0000 Subject: [PATCH 1/4] feat: invert power transform/scaling order On main we power transform then standard scale the results, matching sklearn. This PR tries out scaling then power transforming instead. --- js/augurs-transforms-js/src/lib.rs | 59 +++++++++++++++--------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/js/augurs-transforms-js/src/lib.rs b/js/augurs-transforms-js/src/lib.rs index 1dde91b..cd08773 100644 --- a/js/augurs-transforms-js/src/lib.rs +++ b/js/augurs-transforms-js/src/lib.rs @@ -1,7 +1,5 @@ //! JavaScript bindings for augurs transformations, such as power transforms, scaling, etc. -use std::cell::RefCell; - use serde::{Deserialize, Serialize}; use tsify_next::Tsify; use wasm_bindgen::prelude::*; @@ -35,8 +33,7 @@ pub enum PowerTransformAlgorithm { #[wasm_bindgen] pub struct PowerTransform { inner: Transform, - standardize: bool, - scale_params: RefCell>, + scale_params: Option, } #[wasm_bindgen] @@ -46,11 +43,23 @@ impl PowerTransform { /// @experimental #[wasm_bindgen(constructor)] pub fn new(opts: PowerTransformOptions) -> Result { + let (scale_params, inner) = if opts.standardize { + let scale_params = StandardScaleParams::from_data(opts.data.iter().copied()); + let scaler = Transform::standard_scaler(scale_params.clone()); + let scaled: Vec<_> = scaler.transform(opts.data.iter().copied()).collect(); + ( + Some(scale_params), + Transform::power_transform(&scaled).map_err(|e| JsError::new(&e.to_string()))?, + ) + } else { + ( + None, + Transform::power_transform(&opts.data).map_err(|e| JsError::new(&e.to_string()))?, + ) + }; Ok(PowerTransform { - inner: Transform::power_transform(&opts.data) - .map_err(|e| JsError::new(&e.to_string()))?, - standardize: opts.standardize, - scale_params: RefCell::new(None), + inner, + scale_params, }) } @@ -62,17 +71,13 @@ impl PowerTransform { /// @experimental #[wasm_bindgen] pub fn transform(&self, data: VecF64) -> Result, JsError> { - let transformed: Vec<_> = self - .inner - .transform(data.convert()?.iter().copied()) - .collect(); - if !self.standardize { - Ok(transformed) - } else { - let scale_params = StandardScaleParams::from_data(transformed.iter().copied()); + let data = data.convert()?; + if let Some(scale_params) = &self.scale_params { let scaler = Transform::standard_scaler(scale_params.clone()); - self.scale_params.replace(Some(scale_params)); - Ok(scaler.transform(transformed.iter().copied()).collect()) + let scaled: Vec<_> = scaler.transform(data.iter().copied()).collect(); + Ok(self.inner.transform(scaled.iter().copied()).collect()) + } else { + Ok(self.inner.transform(data.iter().copied()).collect()) } } @@ -85,17 +90,13 @@ impl PowerTransform { /// @experimental #[wasm_bindgen(js_name = "inverseTransform")] pub fn inverse_transform(&self, data: VecF64) -> Result, JsError> { - match (self.standardize, self.scale_params.borrow().as_ref()) { - (true, Some(scale_params)) => { - let inverse_scaler = Transform::standard_scaler(scale_params.clone()); - let data = data.convert()?; - let scaled = inverse_scaler.inverse_transform(data.iter().copied()); - Ok(self.inner.inverse_transform(scaled).collect()) - } - _ => Ok(self - .inner - .inverse_transform(data.convert()?.iter().copied()) - .collect()), + let data = data.convert()?; + if let Some(scale_params) = &self.scale_params { + let scaler = Transform::standard_scaler(scale_params.clone()); + let inverse_transformed = self.inner.inverse_transform(data.iter().copied()); + Ok(scaler.inverse_transform(inverse_transformed).collect()) + } else { + Ok(self.inner.inverse_transform(data.iter().copied()).collect()) } } From 7a8586f4ea9eacd102186bb21af5d861a58af9cb Mon Sep 17 00:00:00 2001 From: Chris Marchbanks Date: Thu, 19 Dec 2024 15:54:25 -0700 Subject: [PATCH 2/4] Scale the data before or after a transformation Allow users to specify if they do not want to scale their data, or to scale it either before or after doing a power transformation. This allows both matching the sklearn behavior of scaling the data after the transformation, or scaling it before the transformation which can help with data that floors at non-zero values. --- js/augurs-transforms-js/src/lib.rs | 104 +++++++++++++++++------------ 1 file changed, 63 insertions(+), 41 deletions(-) diff --git a/js/augurs-transforms-js/src/lib.rs b/js/augurs-transforms-js/src/lib.rs index cd08773..8e0dc55 100644 --- a/js/augurs-transforms-js/src/lib.rs +++ b/js/augurs-transforms-js/src/lib.rs @@ -1,5 +1,7 @@ //! JavaScript bindings for augurs transformations, such as power transforms, scaling, etc. +use std::cell::RefCell; + use serde::{Deserialize, Serialize}; use tsify_next::Tsify; use wasm_bindgen::prelude::*; @@ -33,7 +35,8 @@ pub enum PowerTransformAlgorithm { #[wasm_bindgen] pub struct PowerTransform { inner: Transform, - scale_params: Option, + standardize: Standardize, + scale_params: RefCell>, } #[wasm_bindgen] @@ -43,61 +46,70 @@ impl PowerTransform { /// @experimental #[wasm_bindgen(constructor)] pub fn new(opts: PowerTransformOptions) -> Result { - let (scale_params, inner) = if opts.standardize { - let scale_params = StandardScaleParams::from_data(opts.data.iter().copied()); - let scaler = Transform::standard_scaler(scale_params.clone()); - let scaled: Vec<_> = scaler.transform(opts.data.iter().copied()).collect(); - ( - Some(scale_params), - Transform::power_transform(&scaled).map_err(|e| JsError::new(&e.to_string()))?, - ) - } else { - ( - None, - Transform::power_transform(&opts.data).map_err(|e| JsError::new(&e.to_string()))?, - ) - }; Ok(PowerTransform { - inner, - scale_params, + inner: Transform::power_transform(&opts.data) + .map_err(|e| JsError::new(&e.to_string()))?, + standardize: opts.standardize.unwrap_or_default(), + scale_params: RefCell::new(None), }) } /// Transform the given data. /// - /// The transformed data is then scaled using a standard scaler (unless - /// `standardize` was set to `false` in the constructor). + /// The data is also scaled either before or after being transformed as per the standardize + /// option. /// /// @experimental #[wasm_bindgen] pub fn transform(&self, data: VecF64) -> Result, JsError> { let data = data.convert()?; - if let Some(scale_params) = &self.scale_params { - let scaler = Transform::standard_scaler(scale_params.clone()); - let scaled: Vec<_> = scaler.transform(data.iter().copied()).collect(); - Ok(self.inner.transform(scaled.iter().copied()).collect()) - } else { - Ok(self.inner.transform(data.iter().copied()).collect()) - } + Ok(match self.standardize { + Standardize::None => self.inner.transform(data.iter().copied()).collect(), + Standardize::Before => { + let scale_params = StandardScaleParams::from_data(data.iter().copied()); + let scaler = Transform::standard_scaler(scale_params.clone()); + self.scale_params.replace(Some(scale_params)); + let scaled: Vec<_> = scaler.transform(data.iter().copied()).collect(); + self.inner.transform(scaled.iter().copied()).collect() + } + Standardize::After => { + let transformed: Vec<_> = self.inner.transform(data.iter().copied()).collect(); + + let scale_params = StandardScaleParams::from_data(transformed.iter().copied()); + let scaler = Transform::standard_scaler(scale_params.clone()); + self.scale_params.replace(Some(scale_params)); + scaler.transform(transformed.iter().copied()).collect() + } + }) } /// Inverse transform the given data. /// - /// The data is first scaled back to the original scale using the standard scaler - /// (unless `standardize` was set to `false` in the constructor), then the - /// inverse power transform is applied. + /// The data is also inversely scaled according to the standardize option. The ordering is + /// opposite the order done in transform, i.e if transform scales first then transforms, then + /// inverse_transform transforms then scales. /// /// @experimental #[wasm_bindgen(js_name = "inverseTransform")] pub fn inverse_transform(&self, data: VecF64) -> Result, JsError> { let data = data.convert()?; - if let Some(scale_params) = &self.scale_params { - let scaler = Transform::standard_scaler(scale_params.clone()); - let inverse_transformed = self.inner.inverse_transform(data.iter().copied()); - Ok(scaler.inverse_transform(inverse_transformed).collect()) - } else { - Ok(self.inner.inverse_transform(data.iter().copied()).collect()) - } + Ok( + match (self.standardize, self.scale_params.borrow().as_ref()) { + (Standardize::Before, Some(scale_params)) => { + let inverse_transformed = self.inner.inverse_transform(data.iter().copied()); + let inverse_scaler = Transform::standard_scaler(scale_params.clone()); + inverse_scaler + .inverse_transform(inverse_transformed) + .collect() + } + (Standardize::After, Some(scale_params)) => { + let inverse_scaler = Transform::standard_scaler(scale_params.clone()); + let scaled = inverse_scaler.inverse_transform(data.iter().copied()); + self.inner.inverse_transform(scaled).collect() + } + _ => self.inner.inverse_transform(data.iter().copied()).collect(), + }, + ) } /// Get the algorithm used by the power transform. @@ -122,8 +134,19 @@ impl PowerTransform { } } -fn default_standardize() -> bool { - true +/// When to standardize the data. +#[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Tsify)] +#[serde(rename_all = "camelCase")] +#[tsify(from_wasm_abi)] +pub enum Standardize { + /// Only run a power transform, do not standardize the data. + None, + /// Standardize the data before running the power transform. This may provide better results for data + /// with a non-zero floor. + Before, + /// Standardize the data after running the power transform. This matches the default in sklearn. + #[default] + After, } /// Options for the power transform. @@ -137,8 +160,7 @@ pub struct PowerTransformOptions { /// Whether to standardize the data after applying the power transform. /// - /// This is generally recommended, and defaults to `true`. - #[serde(default = "default_standardize")] + /// This is generally recommended, and defaults to [`Standardize::After`] to match sklearn. #[tsify(optional)] - pub standardize: bool, + pub standardize: Option, } From 8e460e4a3f7643c94cc7e805900f847df38d1ebe Mon Sep 17 00:00:00 2001 From: Chris Marchbanks Date: Thu, 19 Dec 2024 16:19:46 -0700 Subject: [PATCH 3/4] Replace RefCell with &mut self --- js/augurs-transforms-js/src/lib.rs | 44 ++++++++++++++---------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/js/augurs-transforms-js/src/lib.rs b/js/augurs-transforms-js/src/lib.rs index 8e0dc55..a5e38d2 100644 --- a/js/augurs-transforms-js/src/lib.rs +++ b/js/augurs-transforms-js/src/lib.rs @@ -1,7 +1,5 @@ //! JavaScript bindings for augurs transformations, such as power transforms, scaling, etc. -use std::cell::RefCell; - use serde::{Deserialize, Serialize}; use tsify_next::Tsify; use wasm_bindgen::prelude::*; @@ -36,7 +34,7 @@ pub enum PowerTransformAlgorithm { pub struct PowerTransform { inner: Transform, standardize: Standardize, - scale_params: RefCell>, + scale_params: Option, } #[wasm_bindgen] @@ -50,7 +48,7 @@ impl PowerTransform { inner: Transform::power_transform(&opts.data) .map_err(|e| JsError::new(&e.to_string()))?, standardize: opts.standardize.unwrap_or_default(), - scale_params: RefCell::new(None), + scale_params: None, }) } @@ -61,14 +59,14 @@ impl PowerTransform { /// /// @experimental #[wasm_bindgen] - pub fn transform(&self, data: VecF64) -> Result, JsError> { + pub fn transform(&mut self, data: VecF64) -> Result, JsError> { let data = data.convert()?; Ok(match self.standardize { Standardize::None => self.inner.transform(data.iter().copied()).collect(), Standardize::Before => { let scale_params = StandardScaleParams::from_data(data.iter().copied()); let scaler = Transform::standard_scaler(scale_params.clone()); - self.scale_params.replace(Some(scale_params)); + self.scale_params = Some(scale_params); let scaled: Vec<_> = scaler.transform(data.iter().copied()).collect(); self.inner.transform(scaled.iter().copied()).collect() } @@ -77,7 +75,7 @@ impl PowerTransform { let scale_params = StandardScaleParams::from_data(transformed.iter().copied()); let scaler = Transform::standard_scaler(scale_params.clone()); - self.scale_params.replace(Some(scale_params)); + self.scale_params = Some(scale_params); scaler.transform(transformed.iter().copied()).collect() } }) @@ -93,23 +91,21 @@ impl PowerTransform { #[wasm_bindgen(js_name = "inverseTransform")] pub fn inverse_transform(&self, data: VecF64) -> Result, JsError> { let data = data.convert()?; - Ok( - match (self.standardize, self.scale_params.borrow().as_ref()) { - (Standardize::Before, Some(scale_params)) => { - let inverse_transformed = self.inner.inverse_transform(data.iter().copied()); - let inverse_scaler = Transform::standard_scaler(scale_params.clone()); - inverse_scaler - .inverse_transform(inverse_transformed) - .collect() - } - (Standardize::After, Some(scale_params)) => { - let inverse_scaler = Transform::standard_scaler(scale_params.clone()); - let scaled = inverse_scaler.inverse_transform(data.iter().copied()); - self.inner.inverse_transform(scaled).collect() - } - _ => self.inner.inverse_transform(data.iter().copied()).collect(), - }, - ) + Ok(match (self.standardize, self.scale_params.clone()) { + (Standardize::Before, Some(scale_params)) => { + let inverse_transformed = self.inner.inverse_transform(data.iter().copied()); + let inverse_scaler = Transform::standard_scaler(scale_params.clone()); + inverse_scaler + .inverse_transform(inverse_transformed) + .collect() + } + (Standardize::After, Some(scale_params)) => { + let inverse_scaler = Transform::standard_scaler(scale_params.clone()); + let scaled = inverse_scaler.inverse_transform(data.iter().copied()); + self.inner.inverse_transform(scaled).collect() + } + _ => self.inner.inverse_transform(data.iter().copied()).collect(), + }) } /// Get the algorithm used by the power transform. From 64d5d3dc0e7a9b32673bd6d438d3a4e2b52426e9 Mon Sep 17 00:00:00 2001 From: Chris Marchbanks Date: Thu, 19 Dec 2024 17:11:04 -0700 Subject: [PATCH 4/4] WIP: Fix errors with scaling before transformation If we scale before transforming then some values may become negative and a Box Cox transformation is no longer valid. This commit waits to choose which transformation algorithm to use until after potentially scaling the data. --- crates/augurs-forecaster/src/transforms.rs | 2 +- js/augurs-transforms-js/src/lib.rs | 38 +++++++++++++++++----- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/crates/augurs-forecaster/src/transforms.rs b/crates/augurs-forecaster/src/transforms.rs index 4d878bf..1bfa393 100644 --- a/crates/augurs-forecaster/src/transforms.rs +++ b/crates/augurs-forecaster/src/transforms.rs @@ -43,7 +43,7 @@ impl Transforms { } /// A transformation that can be applied to a time series. -#[derive(Debug)] +#[derive(Debug, Clone)] #[non_exhaustive] pub enum Transform { /// Linear interpolation. diff --git a/js/augurs-transforms-js/src/lib.rs b/js/augurs-transforms-js/src/lib.rs index a5e38d2..305715e 100644 --- a/js/augurs-transforms-js/src/lib.rs +++ b/js/augurs-transforms-js/src/lib.rs @@ -32,7 +32,7 @@ pub enum PowerTransformAlgorithm { #[derive(Debug)] #[wasm_bindgen] pub struct PowerTransform { - inner: Transform, + inner: Option, standardize: Standardize, scale_params: Option, } @@ -45,8 +45,7 @@ impl PowerTransform { #[wasm_bindgen(constructor)] pub fn new(opts: PowerTransformOptions) -> Result { Ok(PowerTransform { - inner: Transform::power_transform(&opts.data) - .map_err(|e| JsError::new(&e.to_string()))?, + inner: None, standardize: opts.standardize.unwrap_or_default(), scale_params: None, }) @@ -62,16 +61,32 @@ impl PowerTransform { pub fn transform(&mut self, data: VecF64) -> Result, JsError> { let data = data.convert()?; Ok(match self.standardize { - Standardize::None => self.inner.transform(data.iter().copied()).collect(), + Standardize::None => { + let transform = + Transform::power_transform(&data).map_err(|e| JsError::new(&e.to_string()))?; + let result = transform.transform(data.iter().copied()).collect(); + self.inner = Some(transform); + result + } Standardize::Before => { let scale_params = StandardScaleParams::from_data(data.iter().copied()); let scaler = Transform::standard_scaler(scale_params.clone()); self.scale_params = Some(scale_params); let scaled: Vec<_> = scaler.transform(data.iter().copied()).collect(); - self.inner.transform(scaled.iter().copied()).collect() + + let transform = Transform::power_transform(&scaled) + .map_err(|e| JsError::new(&e.to_string()))?; + let result = transform.transform(scaled.iter().copied()).collect(); + self.inner = Some(transform); + + result } Standardize::After => { - let transformed: Vec<_> = self.inner.transform(data.iter().copied()).collect(); + let transform = + Transform::power_transform(&data).map_err(|e| JsError::new(&e.to_string()))?; + + let transformed: Vec<_> = transform.transform(data.iter().copied()).collect(); + self.inner = Some(transform); let scale_params = StandardScaleParams::from_data(transformed.iter().copied()); let scaler = Transform::standard_scaler(scale_params.clone()); @@ -91,9 +106,10 @@ impl PowerTransform { #[wasm_bindgen(js_name = "inverseTransform")] pub fn inverse_transform(&self, data: VecF64) -> Result, JsError> { let data = data.convert()?; + let transformer = self.inner.clone().unwrap(); Ok(match (self.standardize, self.scale_params.clone()) { (Standardize::Before, Some(scale_params)) => { - let inverse_transformed = self.inner.inverse_transform(data.iter().copied()); + let inverse_transformed = transformer.inverse_transform(data.iter().copied()); let inverse_scaler = Transform::standard_scaler(scale_params.clone()); inverse_scaler .inverse_transform(inverse_transformed) @@ -102,12 +118,15 @@ impl PowerTransform { (Standardize::After, Some(scale_params)) => { let inverse_scaler = Transform::standard_scaler(scale_params.clone()); let scaled = inverse_scaler.inverse_transform(data.iter().copied()); - self.inner.inverse_transform(scaled).collect() + transformer.inverse_transform(scaled).collect() } - _ => self.inner.inverse_transform(data.iter().copied()).collect(), + _ => transformer + .inverse_transform(data.iter().copied()) + .collect(), }) } + /* /// Get the algorithm used by the power transform. /// /// @experimental @@ -128,6 +147,7 @@ impl PowerTransform { _ => unreachable!(), } } + */ } /// When to standardize the data.