diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 09a66d60256..cd78f3680e0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -228,27 +228,25 @@ jobs: miniforge-variant: Mambaforge miniforge-version: latest use-mamba: true - - name: ASV installation - run: pip install asv==0.5.1 - name: Running benchmarks run: | git remote add upstream https://github.com/modin-project/modin.git git fetch upstream - if git diff upstream/master --name-only | grep -q "^asv_bench/"; then - # ASV correctly creates environments for testing only from the branch - # with `master` name - git checkout -b master + if git diff upstream/main --name-only | grep -q "^asv_bench/"; then cd asv_bench - asv check -v + + mamba env create -f ../environment-dev.yml + conda activate modin + pip install .. asv machine --yes # check Modin on Ray - asv run --quick --strict --show-stderr --launch-method=spawn \ + asv run --quick --dry-run --python=same --strict --show-stderr --launch-method=spawn \ -b ^benchmarks -b ^io -b ^scalability | tee benchmarks.log # check pure pandas - MODIN_ASV_USE_IMPL=pandas asv run --quick --strict --show-stderr --launch-method=spawn \ + MODIN_ASV_USE_IMPL=pandas asv run --quick --dry-run --python=same --strict --show-stderr --launch-method=spawn \ -b ^benchmarks -b ^io | tee benchmarks.log # Otherwise, ASV considers that the environment has already been created, although ASV command is run for another config, @@ -256,13 +254,14 @@ jobs: rm -f -R .asv/env/ # TODO: Remove manual environment creation after fix https://github.com/airspeed-velocity/asv/issues/1310 + conda deactivate mamba env create -f ../requirements/env_hdk.yml conda activate modin_on_hdk pip install asv==0.5.1 pip install .. # check Modin on HDK - MODIN_ENGINE=native MODIN_STORAGE_FORMAT=hdk MODIN_EXPERIMENTAL=true asv run --quick --strict --show-stderr \ + MODIN_ENGINE=native MODIN_STORAGE_FORMAT=hdk MODIN_EXPERIMENTAL=true asv run --quick --dry-run --python=same --strict --show-stderr \ --launch-method=forkserver --python=same --config asv.conf.hdk.json \ -b ^hdk | tee benchmarks.log else diff --git a/asv_bench/asv.conf.dask.json b/asv_bench/asv.conf.dask.json index 6da9cf2419c..e91cb3efc2f 100644 --- a/asv_bench/asv.conf.dask.json +++ b/asv_bench/asv.conf.dask.json @@ -13,6 +13,10 @@ // project being benchmarked "repo": "..", + // List of branches to benchmark. If not provided, defaults to "master" + // (for git) or "default" (for mercurial). + "branches": ["main"], + // Customizable commands for building, installing, and // uninstalling the project. See asv.conf.json documentation. "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}[dask]"], diff --git a/asv_bench/asv.conf.hdk.json b/asv_bench/asv.conf.hdk.json index 3c3f4519068..32745c30bf0 100644 --- a/asv_bench/asv.conf.hdk.json +++ b/asv_bench/asv.conf.hdk.json @@ -13,6 +13,10 @@ // project being benchmarked "repo": "..", + // List of branches to benchmark. If not provided, defaults to "master" + // (for git) or "default" (for mercurial). + "branches": ["main"], + // Customizable commands for building, installing, and // uninstalling the project. See asv.conf.json documentation. // diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 9b7ea10f741..437afc0e62a 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -13,6 +13,10 @@ // project being benchmarked "repo": "..", + // List of branches to benchmark. If not provided, defaults to "master" + // (for git) or "default" (for mercurial). + "branches": ["main"], + // Customizable commands for building, installing, and // uninstalling the project. See asv.conf.json documentation. "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}[ray]"], diff --git a/asv_bench/asv.conf.unidist.json b/asv_bench/asv.conf.unidist.json index 49d5e953909..4a93c5551d8 100644 --- a/asv_bench/asv.conf.unidist.json +++ b/asv_bench/asv.conf.unidist.json @@ -13,6 +13,10 @@ // project being benchmarked "repo": "..", + // List of branches to benchmark. If not provided, defaults to "master" + // (for git) or "default" (for mercurial). + "branches": ["main"], + // Customizable commands for building, installing, and // uninstalling the project. See asv.conf.json documentation. "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}[unidist]"], diff --git a/asv_bench/benchmarks/benchmarks.py b/asv_bench/benchmarks/benchmarks.py index d352c708383..8c5e73be214 100644 --- a/asv_bench/benchmarks/benchmarks.py +++ b/asv_bench/benchmarks/benchmarks.py @@ -1029,20 +1029,17 @@ class TimeDropDuplicatesDataframe: param_names = ["shape"] def setup(self, shape): - from pandas import DataFrame - rows, cols = shape N = rows // 10 K = 10 - # Assigning a large number of columns - inefficient in Modin, so use pandas - temp_df = DataFrame() - # dataframe would have cols-1 keys(strings) and one value(int) column + data = {} + # dataframe would have cols-1 keys(strings) and one value(int) column for col in range(cols - 1): - temp_df["key" + str(col + 1)] = IMPL.Index( + data["key" + str(col + 1)] = IMPL.Index( [f"i-{i}" for i in range(N)], dtype=object ).values.repeat(K) - self.df = IMPL.DataFrame(temp_df) - self.df["value"] = np.random.randn(N * K) + data["value"] = np.random.randn(N * K) + self.df = IMPL.DataFrame(data) execute(self.df) def time_drop_dups(self, shape):