Skip to content

Commit

Permalink
Merge pull request #20 from perpetual-ml/iter_mem_limit
Browse files Browse the repository at this point in the history
memory and iter limit added
  • Loading branch information
deadsoul44 authored Oct 28, 2024
2 parents 8be403a + a94ef73 commit ccfdd49
Show file tree
Hide file tree
Showing 14 changed files with 118 additions and 49 deletions.
8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "perpetual"
version = "0.5.2"
version = "0.6.0"
edition = "2021"
authors = ["Mutlu Simsek <[email protected]>"]
homepage = "https://perpetual-ml.com"
Expand All @@ -21,9 +21,9 @@ codegen-units = 1

[dependencies]
rayon = "1.8"
thiserror = "1.0.64"
serde_json = { version = "1.0.129", features = ["float_roundtrip"] }
serde = { version = "1.0.209", features = ["derive"] }
thiserror = "1.0.65"
serde_json = { version = "1.0.132", features = ["float_roundtrip"] }
serde = { version = "1.0.213", features = ["derive"] }
approx = "0.5"
log = "0.4"
rand = "0.8.5"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ pip install perpetual
To use in a Rust project, add the following to your Cargo.toml file to get the package from [crates.io](https://crates.io/crates/perpetual).

```toml
perpetual = "0.5.2"
perpetual = "0.6.0"
```

## Paper
Expand Down
12 changes: 9 additions & 3 deletions benches/perpetual_benchmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,11 @@ pub fn tree_benchmarks(c: &mut Criterion) {
.fit(
black_box(&data),
black_box(&y),
black_box(0.3),
black_box(None),
black_box(None),
black_box(None),
black_box(None),
black_box(0.3),
black_box(None),
black_box(None),
black_box(None),
Expand All @@ -169,9 +171,11 @@ pub fn tree_benchmarks(c: &mut Criterion) {
.fit(
black_box(&data),
black_box(&y),
black_box(0.3),
black_box(None),
black_box(None),
black_box(None),
black_box(None),
black_box(0.3),
black_box(None),
black_box(None),
black_box(None),
Expand All @@ -180,7 +184,9 @@ pub fn tree_benchmarks(c: &mut Criterion) {
})
});
let mut booster = PerpetualBooster::default();
booster.fit(&data, &y, None, None, 0.1, None, None, None).unwrap();
booster
.fit(&data, &y, 0.1, None, None, None, None, None, None, None)
.unwrap();
booster_train.bench_function("Predict Booster", |b| {
b.iter(|| booster.predict(black_box(&data), false))
});
Expand Down
13 changes: 12 additions & 1 deletion examples/cal_housing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,18 @@ fn main() -> Result<(), Box<dyn Error>> {
.set_num_threads(Some(*num_threads));

let now = SystemTime::now();
model.fit(&matrix_train, &y_train, None, None, *budget, None, None, None)?;
model.fit(
&matrix_train,
&y_train,
*budget,
None,
None,
None,
None,
None,
None,
None,
)?;
println!("now.elapsed: {:?}", now.elapsed().unwrap().as_secs_f32());

let trees = model.get_prediction_trees();
Expand Down
2 changes: 1 addition & 1 deletion examples/cover_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ fn main() -> Result<(), Box<dyn Error>> {
.map(|y| if (*y as i32) == i { 1.0 } else { 0.0 })
.collect();

model.fit(&matrix_train, &y_tr, None, None, *budget, None, None, None)?;
model.fit(&matrix_train, &y_tr, *budget, None, None, None, None, None, None, None)?;
println!("Completed fitting model number: {}", i);

let trees = model.get_prediction_trees();
Expand Down
2 changes: 1 addition & 1 deletion examples/titanic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ fn main() -> Result<(), Box<dyn Error>> {
// the relevant `set_` methods for any parameters you would like to
// adjust.
let mut model = PerpetualBooster::default().set_objective(Objective::LogLoss);
model.fit(&matrix, &y, None, None, *budget, None, None, None)?;
model.fit(&matrix, &y, *budget, None, None, None, None, None, None, None)?;

println!("Model prediction: {:?} ...", &model.predict(&matrix, true)[0..10]);

Expand Down
4 changes: 2 additions & 2 deletions python-package/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "py-perpetual"
version = "0.5.2"
version = "0.6.0"
edition = "2021"
authors = ["Mutlu Simsek <[email protected]>"]
homepage = "https://perpetual-ml.com"
Expand All @@ -19,7 +19,7 @@ crate-type = ["cdylib", "rlib"]

[dependencies]
pyo3 = { version = "0.22.5", features = ["extension-module"] }
perpetual_rs = {package="perpetual", version = "0.5.2", path = "../" }
perpetual_rs = {package="perpetual", version = "0.6.0", path = "../" }
numpy = "0.22.0"
ndarray = "0.16.1"
serde_plain = { version = "1.0" }
Expand Down
2 changes: 1 addition & 1 deletion python-package/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "maturin"

[project]
name = "perpetual"
version = "0.5.2"
version = "0.6.0"
description = "A self-generalizing gradient boosting machine which doesn't need hyperparameter optimization"
license = { file = "LICENSE" }
keywords = [
Expand Down
11 changes: 10 additions & 1 deletion python-package/python/perpetual/booster.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ def fit(
reset: Union[bool, None] = None,
categorical_features: Union[Iterable[int], Iterable[str], str, None] = "auto",
timeout: Union[float, None] = None,
iteration_limit: Union[int, None] = None,
memory_limit: Union[float, None] = None,
) -> Self:
"""Fit the gradient booster on a provided dataset.
Expand All @@ -168,12 +170,17 @@ def fit(
training the model. If None is passed, a weight of 1 will be used for every record.
Defaults to None.
budget: a positive number for fitting budget. Increasing this number will more
likely result in increased accuracy.
likely result in more boosting rounds and more increased predictive power.
alpha: only used in quantile regression.
reset: whether to reset the model or continue training.
categorical_features: The names or indices for categorical features.
`auto` for Polars or Pandas categorical data type.
timeout: optional fit timeout in seconds
iteration_limit: optional limit for the number of boosting rounds. The default value is 1000 boosting rounds.
The algorithm automatically stops for most of the cases before hitting this limit.
If you want to experiment with very high budget (>2.0), you can also increase this limit.
memory_limit: optional limit for memory allocation in GB. If not set, the memory will be allocated based on
available memory and the algorithm requirements.
"""

features_, flat_data, rows, cols, categorical_features_, cat_mapping = (
Expand Down Expand Up @@ -247,6 +254,8 @@ def fit(
reset=reset,
categorical_features=categorical_features_, # type: ignore
timeout=timeout,
iteration_limit=iteration_limit,
memory_limit=memory_limit,
)

return self
Expand Down
6 changes: 5 additions & 1 deletion python-package/src/booster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ impl PerpetualBooster {
reset: Option<bool>,
categorical_features: Option<HashSet<usize>>,
timeout: Option<f32>,
iteration_limit: Option<usize>,
memory_limit: Option<f32>,
) -> PyResult<()> {
let flat_data = flat_data.as_slice()?;
let data = Matrix::new(flat_data, rows, cols);
Expand All @@ -156,12 +158,14 @@ impl PerpetualBooster {
match self.booster.fit(
&data,
y,
budget,
sample_weight_,
alpha,
budget,
reset,
categorical_features,
timeout,
iteration_limit,
memory_limit,
) {
Ok(m) => Ok(m),
Err(e) => Err(PyValueError::new_err(e.to_string())),
Expand Down
6 changes: 5 additions & 1 deletion python-package/src/multi_output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ impl MultiOutputBooster {
reset: Option<bool>,
categorical_features: Option<HashSet<usize>>,
timeout: Option<f32>,
iteration_limit: Option<usize>,
memory_limit: Option<f32>,
) -> PyResult<()> {
let flat_data = flat_data.as_slice()?;
let data = Matrix::new(flat_data, rows, cols);
Expand All @@ -178,12 +180,14 @@ impl MultiOutputBooster {
match self.booster.fit(
&data,
&y_data,
budget,
sample_weight_,
alpha,
budget,
reset,
categorical_features,
timeout,
iteration_limit,
memory_limit,
) {
Ok(m) => Ok(m),
Err(e) => Err(PyValueError::new_err(e.to_string())),
Expand Down
Loading

0 comments on commit ccfdd49

Please sign in to comment.