Simplify the input argument of _construct_global_block_info_list
(#70)
#123
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: examples | |
on: [push, pull_request] | |
jobs: | |
examples: | |
name: "Python 3.10" | |
runs-on: 4-core-ubuntu-gpu-t4 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up and update uv. | |
run: | | |
curl -LsSf https://astral.sh/uv/install.sh | sh | |
source $HOME/.local/bin/env | |
uv self update | |
- name: Install Python. | |
run: uv python install 3.10 | |
- name: Create venv and install the package. | |
run: | | |
uv venv && source .venv/bin/activate | |
uv pip install ".[examples]" | |
- name: Run default example on CPU. | |
run: | | |
source .venv/bin/activate | |
CUDA_VISIBLE_DEVICES="" python -m distributed_shampoo.examples.default_cifar10_example --optimizer-type DISTRIBUTED_SHAMPOO --precondition-frequency 30 --grafting-type ADAM --use-bias-correction --use-decoupled-weight-decay --use-merge-dims --epochs 1 --batch-size 1024 | |
- name: Run default example on GPU. | |
run: | | |
source .venv/bin/activate | |
python -m distributed_shampoo.examples.default_cifar10_example --optimizer-type DISTRIBUTED_SHAMPOO --precondition-frequency 30 --grafting-type ADAM --use-bias-correction --use-decoupled-weight-decay --use-merge-dims --epochs 1 --batch-size 1024 | |
- name: Run DDP example on CPU. | |
run: | | |
source .venv/bin/activate | |
CUDA_VISIBLE_DEVICES="" torchrun --standalone --nnodes=1 --nproc_per_node=2 -m distributed_shampoo.examples.ddp_cifar10_example --optimizer-type DISTRIBUTED_SHAMPOO --precondition-frequency 15 --grafting-type ADAM --use-bias-correction --use-decoupled-weight-decay --use-merge-dims --epochs 1 --local-batch-size 1024 --backend gloo | |
- name: Run DDP example on GPU. | |
run: | | |
source .venv/bin/activate | |
torchrun --standalone --nnodes=1 --nproc_per_node=1 -m distributed_shampoo.examples.ddp_cifar10_example --optimizer-type DISTRIBUTED_SHAMPOO --precondition-frequency 30 --grafting-type ADAM --use-bias-correction --use-decoupled-weight-decay --use-merge-dims --epochs 1 --local-batch-size 1024 |