From 8443ce7dfe567aeef8ac18634c05a6bac0bf9e0d Mon Sep 17 00:00:00 2001 From: Henry Tsang Date: Tue, 3 Oct 2023 10:43:34 -0700 Subject: [PATCH] Update install methods after fbgemm onboarded onto nova (#1420) Summary: wip Differential Revision: D49852074 --- .github/scripts/install_fbgemm.sh | 38 ++++++++++++++++++++++++ .github/workflows/build-wheels-linux.yml | 2 +- .github/workflows/docs.yml | 9 +++--- .github/workflows/pyre.yml | 4 +-- .github/workflows/unittest_ci.yml | 5 ++++ .github/workflows/unittest_ci_cpu.yml | 13 ++++---- install-requirements.txt | 2 +- requirements.txt | 2 +- setup.py | 31 +------------------ 9 files changed, 60 insertions(+), 46 deletions(-) create mode 100644 .github/scripts/install_fbgemm.sh diff --git a/.github/scripts/install_fbgemm.sh b/.github/scripts/install_fbgemm.sh new file mode 100644 index 000000000..ef4a60cec --- /dev/null +++ b/.github/scripts/install_fbgemm.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +echo "I am here" + +echo "MATRIX_GPU_ARCH_TYPE" +echo "$MATRIX_GPU_ARCH_TYPE" + +echo "GPU_ARCH_TYPE" +echo "$GPU_ARCH_TYPE" +echo "$gpu_arch_type" + +echo "MATRIX_CHANNEL" +echo "$MATRIX_CHANNEL" + + +echo "CHANNEL" +echo "$CHANNEL" +echo "$channel" + + +if [ "$MATRIX_GPU_ARCH_TYPE" = "cpu" ]; then + CUDA_VERSION="cpu" +elif [ "$MATRIX_GPU_ARCH_VERSION" = "11.8" ]; then + CUDA_VERSION="cu118" +elif [ "$MATRIX_GPU_ARCH_VERSION" = "12.1" ]; then + CUDA_VERSION="cu121" +fi + +echo "$CUDA_VERSION" + +if [ "$MATRIX_CHANNEL" = "nightly" ]; then + ${CONDA_RUN} pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/"$CUDA_VERSION" +fi diff --git a/.github/workflows/build-wheels-linux.yml b/.github/workflows/build-wheels-linux.yml index 459d082ec..07399f30f 100644 --- a/.github/workflows/build-wheels-linux.yml +++ b/.github/workflows/build-wheels-linux.yml @@ -54,7 +54,7 @@ jobs: test-infra-ref: main build-matrix: ${{ needs.filter-matrix.outputs.matrix }} pre-script: "" - post-script: "" + post-script: .github/scripts/install_fbgemm.sh package-name: torchrec smoke-test-script: "" trigger-event: ${{ github.event_name }} diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index f40cba633..b9b96a514 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -52,12 +52,13 @@ jobs: - name: Install PyTorch shell: bash run: | - conda install -n build_binary -y -c pytorch-nightly "pytorch-nightly"::pytorch[build="*cpu*"] + conda install -n build_binary --yes pytorch cpuonly -c pytorch-nightly + - name: Install fbgemm + run: | + conda run -n build_binary pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu - name: Install TorchRec run: | - conda run -n build_binary python -m pip install torchrec_nightly - conda run -n build_binary python -m pip uninstall fbgemm-gpu-nightly -y - conda run -n build_binary python -m pip install fbgemm-gpu-nightly-cpu + pip install torchrec --index-url https://download.pytorch.org/whl/nightly/cpu - name: Test fbgemm_gpu and torchrec installation shell: bash run: | diff --git a/.github/workflows/pyre.yml b/.github/workflows/pyre.yml index 1e17da79b..8be068996 100644 --- a/.github/workflows/pyre.yml +++ b/.github/workflows/pyre.yml @@ -19,8 +19,8 @@ jobs: uses: actions/checkout@v2 - name: Install dependencies run: > - conda install --yes -c pytorch-nightly "pytorch-nightly"::pytorch[build="*cpu*"] && - sed -i 's/fbgemm-gpu-nightly/fbgemm-gpu-nightly-cpu/g' requirements.txt && + conda install --yes pytorch cpuonly -c pytorch-nightly && + pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu && pip install -r requirements.txt && pip install pyre-check-nightly==$(cat .pyre_configuration | grep version | awk '{print $2}' | sed 's/\"//g') - name: Pyre check diff --git a/.github/workflows/unittest_ci.yml b/.github/workflows/unittest_ci.yml index 650a03c11..edf7a9fc1 100644 --- a/.github/workflows/unittest_ci.yml +++ b/.github/workflows/unittest_ci.yml @@ -166,6 +166,11 @@ jobs: shell: bash run: | conda install -n build_binary -y pytorch pytorch-cuda=11.8 -c pytorch-nightly -c nvidia + - name: Install fbgemm + shell: bash + run: | + conda run -n build_binary \ + pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cu118 # download wheel from GHA - name: Download wheel uses: actions/download-artifact@v2 diff --git a/.github/workflows/unittest_ci_cpu.yml b/.github/workflows/unittest_ci_cpu.yml index f15ab7c57..0b23b12e3 100644 --- a/.github/workflows/unittest_ci_cpu.yml +++ b/.github/workflows/unittest_ci_cpu.yml @@ -47,14 +47,16 @@ jobs: conda run -n build_binary python --version conda install -n build_binary \ --yes \ - -c pytorch-nightly \ - "pytorch-nightly"::pytorch[build="*cpu*"] + pytorch cpuonly -c pytorch-nightly conda run -n build_binary \ python -c "import torch.distributed" - sed -i 's/fbgemm-gpu-nightly/fbgemm-gpu-nightly-cpu/g' requirements.txt + conda run -n build_binary \ + pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu + conda run -n build_binary \ + python -c "import fbgemm_gpu" + echo "fbgemm_gpu succeeded" conda run -n build_binary \ pip install -r requirements.txt - export CU_VERSION="cpu" conda run -n build_binary \ python setup.py bdist_wheel \ --python-tag=${{ matrix.python-tag }} @@ -64,9 +66,6 @@ jobs: conda run -n build_binary \ python -c "import numpy" echo "numpy succeeded" - conda run -n build_binary \ - python -c "import fbgemm_gpu" - echo "fbgemm_gpu succeeded" conda install -n build_binary -y pytest conda run -n build_binary \ python -m pytest torchrec -v -s -W ignore::pytest.PytestCollectionWarning --continue-on-collection-errors -k 'not test_sharding_gloo_cw and not test_load_state_dict_prefix and not test_load_state_dict_cw_multiple_shards' --ignore-glob=**/test_utils/ diff --git a/install-requirements.txt b/install-requirements.txt index 8543a91aa..68a1d0b26 100644 --- a/install-requirements.txt +++ b/install-requirements.txt @@ -1,3 +1,3 @@ -fbgemm-gpu-nightly +fbgemm-gpu torchmetrics==1.0.3 tqdm diff --git a/requirements.txt b/requirements.txt index 85dc26fac..a6913a2e8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ black cmake -fbgemm-gpu-nightly +fbgemm-gpu hypothesis==6.70.1 iopath numpy diff --git a/setup.py b/setup.py index c7fb225b8..6a790b808 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,6 @@ import os import subprocess import sys -from datetime import date from pathlib import Path from typing import List @@ -45,17 +44,6 @@ def _export_version(version, sha): fileobj.write("git_version = {}\n".format(repr(sha))) -def get_channel(): - # Channel typically takes on the following values: - # - NIGHTLY: for nightly published binaries - # - TEST: for binaries build from release candidate branches - return os.getenv("CHANNEL") - - -def get_cu_version(): - return os.getenv("CU_VERSION", "cpu") - - def parse_args(argv: List[str]) -> argparse.Namespace: parser = argparse.ArgumentParser(description="torchrec setup") return parser.parse_known_args(argv) @@ -64,9 +52,6 @@ def parse_args(argv: List[str]) -> argparse.Namespace: def main(argv: List[str]) -> None: args, unknown = parse_args(argv) - # Set up package version - channel = get_channel() - with open( os.path.join(os.path.dirname(__file__), "README.MD"), encoding="utf8" ) as f: @@ -81,21 +66,7 @@ def main(argv: List[str]) -> None: version, sha = _get_version() _export_version(version, sha) - if channel != "nightly": - if "fbgemm-gpu-nightly" in install_requires: - install_requires.remove("fbgemm-gpu-nightly") - install_requires.append("fbgemm-gpu") - - cu_version = get_cu_version() - if cu_version == "cpu": - if "fbgemm-gpu-nightly" in install_requires: - install_requires.remove("fbgemm-gpu-nightly") - install_requires.append("fbgemm-gpu-nightly-cpu") - if "fbgemm-gpu" in install_requires: - install_requires.remove("fbgemm-gpu") - install_requires.append("fbgemm-gpu-cpu") - - print(f"-- torchrec building version: {version} CU Version: {cu_version}") + print(f"-- torchrec building version: {version}") packages = find_packages( exclude=(