update docstring and fair loader behaviour #6
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
NllbTokenizer Copyright (c) Meta Platforms, Inc. and affiliates. | ||
# All rights reserved. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
on: | ||
workflow_call: | ||
inputs: | ||
torch: | ||
type: string | ||
required: true | ||
py: | ||
type: string | ||
required: true | ||
variant: | ||
type: string | ||
required: true | ||
arch: | ||
type: string | ||
default: 'x86_64' | ||
sanitizers: | ||
type: string | ||
default: 'nosan' | ||
artifacts_name: | ||
type: string | ||
default: 'wheels' | ||
version_override: | ||
type: string | ||
default: '' | ||
label_version_with_variant: | ||
type: boolean | ||
default: true | ||
run_integration_tests: | ||
type: boolean | ||
default: false | ||
run_on_device: | ||
type: boolean | ||
default: false | ||
jobs: | ||
build: | ||
name: Build | ||
runs-on: | ||
labels: 4-core-ubuntu | ||
container: | ||
image: ghcr.io/facebookresearch/fairseq2-ci-manylinux_${{ inputs.arch }}:2-${{ inputs.variant }} | ||
defaults: | ||
run: | ||
shell: bash | ||
steps: | ||
- name: Check-out the repository | ||
uses: actions/checkout@v3 | ||
with: | ||
submodules: recursive | ||
- name: Create the Python virtual environment | ||
run: | | ||
python${{ inputs.py }} -m venv ~/venv | ||
echo ~/venv/bin >> "$GITHUB_PATH" | ||
- name: Install PyTorch | ||
run: | | ||
pip install --extra-index-url https://download.pytorch.org/whl/${{ inputs.variant }}\ | ||
torch==${{ inputs.torch }} | ||
- name: Install requirements | ||
run: | | ||
pip install --requirement fairseq2n/python/requirements-build.txt | ||
- name: Override project version | ||
if: inputs.version_override | ||
run: | | ||
tools/set-project-version.sh ${{ inputs.version_override }} | ||
- name: Set build variant as local version label | ||
if: inputs.label_version_with_variant | ||
env: | ||
VARIANT: ${{ inputs.variant }} | ||
run: | | ||
version=$(cat VERSION) | ||
# If the version has already a local label, append the variant. | ||
if [[ $version == *+* ]]; then | ||
tools/set-project-version.sh $version.$VARIANT | ||
else | ||
tools/set-project-version.sh $version+$VARIANT | ||
fi | ||
- name: Build fairseq2n | ||
working-directory: fairseq2n | ||
env: | ||
VARIANT: ${{ inputs.variant }} | ||
SANITIZERS: ${{ inputs.sanitizers }} | ||
run: | | ||
# We build our CUDA kernels for Volta and Ampere. | ||
if [[ $VARIANT == cu* ]]; then | ||
cuda_archs="70-real;80-real;80-virtual" | ||
cuda=ON | ||
else | ||
cuda=OFF | ||
fi | ||
# Since they do not play well together, we perform LTO only if no | ||
# sanitizer is enabled. | ||
if [[ $SANITIZERS == nosan ]]; then | ||
build_type=Release | ||
lto=ON | ||
else | ||
build_type=Debug | ||
lto=OFF | ||
fi | ||
cmake\ | ||
-GNinja\ | ||
-DCMAKE_BUILD_TYPE=$build_type\ | ||
-DCMAKE_CUDA_ARCHITECTURES=$cuda_archs\ | ||
-DFAIRSEQ2N_PERFORM_LTO=$lto\ | ||
-DFAIRSEQ2N_SANITIZERS="${SANITIZERS/_/;}"\ | ||
-DFAIRSEQ2N_TREAT_WARNINGS_AS_ERRORS=ON\ | ||
-DFAIRSEQ2N_USE_CUDA=$cuda\ | ||
-DFAIRSEQ2N_PYTHON_DEVEL=OFF\ | ||
-B build | ||
cmake --build build | ||
- name: Package fairseq2n | ||
working-directory: fairseq2n/python | ||
run: | | ||
pip wheel\ | ||
--use-pep517\ | ||
--no-build-isolation\ | ||
--no-deps\ | ||
--config-settings "--build-option=--plat-name"\ | ||
--config-settings "--build-option=manylinux2014_${{ inputs.arch }}"\ | ||
--wheel-dir build/wheelhouse\ | ||
. | ||
- name: Package fairseq2 | ||
run: | | ||
pip wheel --no-deps --wheel-dir build/wheelhouse . | ||
- name: Upload wheels and native tests to staging | ||
uses: actions/upload-artifact@v3 | ||
with: | ||
name: ${{ inputs.artifacts_name }}-pt${{ inputs.torch }}-py${{ inputs.py }}-linux_${{ inputs.arch }}-${{ inputs.variant }}-${{ inputs.sanitizers }} | ||
path: | | ||
build/wheelhouse/*.whl | ||
fairseq2n/build/tests/run-tests | ||
fairseq2n/python/build/wheelhouse/*.whl | ||
retention-days: 1 | ||
test: | ||
name: Test | ||
needs: build | ||
runs-on: | ||
labels: 4-core-ubuntu | ||
container: | ||
image: ghcr.io/facebookresearch/fairseq2-ci-manylinux_${{ inputs.arch }}:2-${{ inputs.variant }} | ||
defaults: | ||
run: | ||
shell: bash | ||
steps: | ||
- name: Download wheels and native tests from staging | ||
uses: actions/download-artifact@v3 | ||
with: | ||
name: ${{ inputs.artifacts_name }}-pt${{ inputs.torch }}-py${{ inputs.py }}-linux_${{ inputs.arch }}-${{ inputs.variant }}-${{ inputs.sanitizers }} | ||
path: ~/artifacts/ | ||
- name: Check-out the repository | ||
uses: actions/checkout@v3 | ||
- name: Create the Python virtual environment | ||
run: | | ||
python${{ inputs.py }} -m venv ~/venv | ||
echo ~/venv/bin >> "$GITHUB_PATH" | ||
- name: Install PyTorch | ||
run: | | ||
pip install --extra-index-url https://download.pytorch.org/whl/${{ inputs.variant }}\ | ||
torch==${{ inputs.torch }} | ||
- name: Install requirements | ||
env: | ||
ARCH: ${{ inputs.arch }} | ||
run: | | ||
pip install --requirement requirements-devel.txt | ||
if [[ $ARCH == x86_64 ]]; then | ||
# Our Docker images have glibc 2.17, but tbb versions later than | ||
# 2021.9 are built against a newer glibc. | ||
pip install tbb==2021.8 | ||
fi | ||
- name: Install fairseq2n | ||
run: | | ||
pip install --no-cache-dir ~/artifacts/fairseq2n/python/build/wheelhouse/*.whl | ||
- name: Install fairseq2 | ||
run: | | ||
for whl in ~/artifacts/build/wheelhouse/*.whl; do | ||
pip install --no-cache-dir $whl | ||
pip install --no-cache-dir "fairseq['arrow']@$whl" | ||
done | ||
- name: Set the sanitizer variables | ||
if: inputs.sanitizers != 'nosan' | ||
env: | ||
SANITIZERS: ${{ inputs.sanitizers }} | ||
run: | | ||
{ | ||
# We can't enable container overflow checks due to false positives | ||
# in non-instrumented dependencies. | ||
echo ASAN_OPTIONS=detect_container_overflow=0 | ||
echo LSAN_OPTIONS=suppressions=fairseq2n/LSan.supp,exitcode=0,log_path=$HOME/lsan.out | ||
for sanitizer in ${SANITIZERS//_/ }; do | ||
# LIBASAN and LIBTSAN environment variables are defined in the | ||
# container image. | ||
case $sanitizer in | ||
asan) | ||
echo SANITIZER_LIB=$(echo $LIBASAN) | ||
break | ||
;; | ||
tsan) | ||
echo SANITIZER_LIB=$(echo $LIBTSAN) | ||
break | ||
;; | ||
esac | ||
done | ||
} >> "$GITHUB_ENV" | ||
- name: Run native tests | ||
run: | | ||
chmod 755 ~/artifacts/fairseq2n/build/tests/run-tests | ||
site_packages=~/venv/lib/python${{ inputs.py }}/site-packages | ||
LD_LIBRARY_PATH=~/venv/lib:$site_packages/torch/lib:$site_packages/fairseq2n/lib:$LD_LIBRARY_PATH\ | ||
~/artifacts/fairseq2n/build/tests/run-tests | ||
- name: Run Python tests | ||
env: | ||
RUN_INTEGRATION_TESTS: ${{ inputs.run_integration_tests }} | ||
RUN_ON_DEVICE: ${{ inputs.run_on_device }} | ||
run: | | ||
if [[ $RUN_INTEGRATION_TESTS == true ]]; then | ||
integration=--integration | ||
fi | ||
if [[ $RUN_ON_DEVICE != true ]]; then | ||
dev=cpu | ||
else | ||
dev=cuda:0 | ||
fi | ||
LD_PRELOAD=${SANITIZER_LIB} pytest -rP --device $dev $integration --verbose | ||
- name: Check the output of the leak sanitizer | ||
if: env.LSAN_OPTIONS != '' | ||
run: | | ||
# Unfortunately Python leaks quite a bit of memory, so we cannot rely | ||
# on the raw output of LSan. As a rudimentary workaround, we check if | ||
# any stack frame has a symbol containing the word 'fairseq2'. | ||
for f in ~/lsan.out.*; do | ||
cat $f | ||
if grep --silent 'fairseq2' $f; then | ||
exit 1 | ||
fi | ||
done |