Skip to content

Commit

Permalink
Fix validate nightly binaries (pytorch#1372)
Browse files Browse the repository at this point in the history
Summary:

Fix validate nightly binaries.

Differential Revision: D48928848
  • Loading branch information
hlhtsang authored and facebook-github-bot committed Oct 5, 2023
1 parent 369526d commit 32876f0
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 25 deletions.
83 changes: 64 additions & 19 deletions .github/scripts/validate_binaries.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,40 +8,85 @@

export PYTORCH_CUDA_PKG=""

conda create -y -n build_binary python="${MATRIX_PYTHON_VERSION}"

conda run -n build_binary python --version

# Install pytorch, torchrec and fbgemm as per
# installation instructions on following page
# https://github.com/pytorch/torchrec#installations

if [[ ${MATRIX_GPU_ARCH_TYPE} = 'rocm' ]]; then
echo "We don't support rocm"
exit 0
fi

if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cuda' ]]; then
export PYTORCH_CUDA_PKG="pytorch-cuda=${MATRIX_GPU_ARCH_VERSION}"
export CUDA_VERSION="cu118"
else
export CUDA_VERSION="cpu"
fi

if [[ ${MATRIX_CHANNEL} = 'nightly' ]]; then
# shellcheck disable=SC2086
conda install -y pytorch ${PYTORCH_CUDA_PKG} -c pytorch-nightly -c nvidia
pip install torchrec_nightly
# figure out CUDA VERSION
if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cuda' ]]; then
if [[ ${MATRIX_GPU_ARCH_VERSION} = '11.8' ]]; then
export CUDA_VERSION="cu118"
else
export CUDA_VERSION="cu121"
fi
else
# shellcheck disable=SC2086
conda install -y pytorch ${PYTORCH_CUDA_PKG} -c pytorch -c nvidia
pip install torchrec
export CUDA_VERSION="cpu"
fi

if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cpu' || ${MATRIX_GPU_ARCH_TYPE} = 'rocm' ]]; then
if [[ ${MATRIX_CHANNEL} = 'pypi_release' ]]; then
echo "checking pypi release"
pip install torch
pip install fbgemm-gpu
pip install torchrec
else
# figure out URL
if [[ ${MATRIX_CHANNEL} = 'nightly' ]]; then
pip uninstall fbgemm-gpu-nightly -y
pip install fbgemm-gpu-nightly-cpu
else
pip uninstall fbgemm-gpu -y
pip install fbgemm-gpu-cpu
export PYTORCH_URL="https://download.pytorch.org/whl/nightly/${CUDA_VERSION}"
elif [[ ${MATRIX_CHANNEL} = 'test' ]]; then
export PYTORCH_URL="https://download.pytorch.org/whl/test/${CUDA_VERSION}"
elif [[ ${MATRIX_CHANNEL} = 'release' ]]; then
export PYTORCH_URL="https://download.pytorch.org/whl/${CUDA_VERSION}"
fi

# install pytorch
# switch back to conda once torch nightly is fixed
# if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cuda' ]]; then
# export PYTORCH_CUDA_PKG="pytorch-cuda=${MATRIX_GPU_ARCH_VERSION}"
# fi
conda run -n build_binary pip install torch --index-url "$PYTORCH_URL"

# install fbgemm
conda run -n build_binary pip install fbgemm-gpu --index-url "$PYTORCH_URL"

# install requirements from pypi
conda run -n build_binary pip install torchmetrics==1.0.3

# install torchrec
conda run -n build_binary pip install torchrec --index-url "$PYTORCH_URL"

# Run small import test
conda run -n build_binary python -c "import torch; import fbgemm_gpu; import torchrec"
fi

# Run small import test
python -c "import torch; import fbgemm_gpu; import torchrec"
# check directory
ls -R

# check if cuda available
conda run -n build_binary python -c "import torch; print(torch.cuda.is_available())"

# check cuda version
conda run -n build_binary python -c "import torch; print(torch.version.cuda)"

# Finally run smoke test
pip install torchx
# python 3.11 needs torchx-nightly
conda run -n build_binary pip install torchx-nightly iopath
if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cuda' ]]; then
torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
else
torchx run -s local_cwd dist.ddp -j 1 --script test_installation.py -- --cpu_only
conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --script test_installation.py -- --cpu_only
fi
4 changes: 3 additions & 1 deletion .github/workflows/validate-binaries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@ on:
workflow_dispatch:
inputs:
channel:
description: "Channel to use (nightly, release)"
description: "Channel to use (nightly, release, test, pypi_release)"
required: true
type: choice
options:
- release
- nightly
- test
- pypi_release
ref:
description: 'Reference to checkout, defaults to empty'
default: ""
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/validate-nightly-binaries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ on:
- .github/workflows/validate-binaries.yml
- .github/scripts/validate-binaries.sh
pull_request:
paths:
- .github/workflows/validate-nightly-binaries.yml
- .github/workflows/validate-binaries.yml
- .github/scripts/validate-binaries.sh
# paths:
# - .github/workflows/validate-nightly-binaries.yml
# - .github/workflows/validate-binaries.yml
# - .github/scripts/validate-binaries.sh
jobs:
nightly:
uses: ./.github/workflows/validate-binaries.yml
with:
channel: nightly
channel: pypi_release

0 comments on commit 32876f0

Please sign in to comment.