Skip to content

Commit

Permalink
Fix validate nightly binaries (pytorch#1372)
Browse files Browse the repository at this point in the history
Summary:

Fix validate nightly binaries.

Differential Revision: D48928848
  • Loading branch information
hlhtsang authored and facebook-github-bot committed Oct 5, 2023
1 parent 369526d commit 7a1ef9a
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 24 deletions.
70 changes: 51 additions & 19 deletions .github/scripts/validate_binaries.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,40 +8,72 @@

export PYTORCH_CUDA_PKG=""

conda create -y -n build_binary python="${MATRIX_PYTHON_VERSION}"

conda run -n build_binary python --version

# Install pytorch, torchrec and fbgemm as per
# installation instructions on following page
# https://github.com/pytorch/torchrec#installations
if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cuda' ]]; then
export PYTORCH_CUDA_PKG="pytorch-cuda=${MATRIX_GPU_ARCH_VERSION}"

if [[ ${MATRIX_GPU_ARCH_TYPE} = 'rocm' ]]; then
echo "We don't support rocm"
exit 0
fi

if [[ ${MATRIX_CHANNEL} = 'nightly' ]]; then
# shellcheck disable=SC2086
conda install -y pytorch ${PYTORCH_CUDA_PKG} -c pytorch-nightly -c nvidia
pip install torchrec_nightly
if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cuda' ]]; then
export CUDA_VERSION="cu118"
else
# shellcheck disable=SC2086
conda install -y pytorch ${PYTORCH_CUDA_PKG} -c pytorch -c nvidia
pip install torchrec
export CUDA_VERSION="cpu"
fi

if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cpu' || ${MATRIX_GPU_ARCH_TYPE} = 'rocm' ]]; then
if [[ ${MATRIX_CHANNEL} = 'nightly' ]]; then
pip uninstall fbgemm-gpu-nightly -y
pip install fbgemm-gpu-nightly-cpu
# figure out CUDA VERSION
if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cuda' ]]; then
if [[ ${MATRIX_GPU_ARCH_VERSION} = '11.8' ]]; then
export CUDA_VERSION="cu118"
else
pip uninstall fbgemm-gpu -y
pip install fbgemm-gpu-cpu
export CUDA_VERSION="cu121"
fi
else
export CUDA_VERSION="cpu"
fi

# figure out URL
if [[ ${MATRIX_CHANNEL} = 'nightly' ]]; then
export PYTORCH_URL="https://download.pytorch.org/whl/nightly/${CUDA_VERSION}"
elif [[ ${MATRIX_CHANNEL} = 'test' ]]; then
export PYTORCH_URL="https://download.pytorch.org/whl/test/${CUDA_VERSION}"
elif [[ ${MATRIX_CHANNEL} = 'release' ]]; then
export PYTORCH_URL="https://download.pytorch.org/whl/${CUDA_VERSION}"
fi

# install pytorch
# switch back to conda once torch nightly is fixed
# if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cuda' ]]; then
# export PYTORCH_CUDA_PKG="pytorch-cuda=${MATRIX_GPU_ARCH_VERSION}"
# fi
conda run -n build_binary pip install torch --index-url "$PYTORCH_URL"

# install fbgemm
conda run -n build_binary pip install fbgemm-gpu --index-url "$PYTORCH_URL"

# install requirements
conda run -n build_binary pip install torchmetrics==1.0.3

# install torchrec
conda run -n build_binary pip install torchrec --index-url "$PYTORCH_URL"

# Run small import test
python -c "import torch; import fbgemm_gpu; import torchrec"
conda run -n build_binary python -c "import torch; import fbgemm_gpu; import torchrec"

# check directory
ls -R

# Finally run smoke test
pip install torchx
# python 3.11 needs torchx-nightly
conda run -n build_binary pip install torchx-nightly iopath
if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cuda' ]]; then
torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
else
torchx run -s local_cwd dist.ddp -j 1 --script test_installation.py -- --cpu_only
conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --script test_installation.py -- --cpu_only
fi
3 changes: 2 additions & 1 deletion .github/workflows/validate-binaries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@ on:
workflow_dispatch:
inputs:
channel:
description: "Channel to use (nightly, release)"
description: "Channel to use (nightly, release, test)"
required: true
type: choice
options:
- release
- nightly
- test
ref:
description: 'Reference to checkout, defaults to empty'
default: ""
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/validate-nightly-binaries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ on:
- .github/workflows/validate-binaries.yml
- .github/scripts/validate-binaries.sh
pull_request:
paths:
- .github/workflows/validate-nightly-binaries.yml
- .github/workflows/validate-binaries.yml
- .github/scripts/validate-binaries.sh
# paths:
# - .github/workflows/validate-nightly-binaries.yml
# - .github/workflows/validate-binaries.yml
# - .github/scripts/validate-binaries.sh
jobs:
nightly:
uses: ./.github/workflows/validate-binaries.yml
Expand Down

0 comments on commit 7a1ef9a

Please sign in to comment.