Skip to content

Changes to support native Data loader and readers #477

Changes to support native Data loader and readers

Changes to support native Data loader and readers #477

name: Python Package using Conda
on:
push:
pull_request:
jobs:
build-and-test:
strategy:
fail-fast: false
matrix:
os: [ ubuntu-20.04 ]
profiler: [ DEFAULT, DLIO_PROFILER ]
gcc: [10]
name: ${{ matrix.os }}-${{ matrix.profiler }}-${{ matrix.gcc }}
runs-on: ${{ matrix.os }}
env:
VENV: "/home/runner/work/venv"
DLIO_PROFILER: ${{ matrix.profiler }}
CC: gcc-${{ matrix.gcc }}
CXX: g++-${{ matrix.gcc }}
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: 3.10.5
- name: Cache install modules
id: cache-modules
uses: actions/cache@v2
with:
path: ${{ env.VENV }}
key: ${{env.VENV }}-${{env.DLIO_PROFILER}}-${{ hashFiles('setup.py') }}
- name: Install System Tools
run: |
sudo apt update
sudo apt-get install $CC $CXX libc6
sudo apt-get install mpich
if [[ $DLIO_PROFILER == 'DLIO_PROFILER' ]]; then
sudo apt-get install libhwloc-dev
fi
- name: Install DLIO
if: steps.cache-modules.outputs.cache-hit != 'true'
run: |
echo "Profiler ${DLIO_PROFILER} gcc $CC"
python -m pip install --upgrade pip
pip install virtualenv
python -m venv ${VENV}
source ${VENV}/bin/activate
if [[ $DLIO_PROFILER == 'DLIO_PROFILER' ]]; then
pip install .[test,dlio_profiler]
else
pip install .[test]
fi
- name: test_gen_data
run: |
source ${VENV}/bin/activate
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_gen_data[dlio_png-tensorflow-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_gen_data[dlio_npz-tensorflow-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_gen_data[dlio_jpeg-tensorflow-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_gen_data[dlio_tfrecord-tensorflow-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_gen_data[dlio_hdf5-tensorflow-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_gen_data[dali_npz-pytorch-native_dali] -v
- name: test_custom_storage_root_gen_data
run: |
source ${VENV}/bin/activate
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_storage_root_gen_data[dlio_png-tensorflow-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_storage_root_gen_data[dlio_npz-tensorflow-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_storage_root_gen_data[dlio_jpeg-tensorflow-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_storage_root_gen_data[dlio_tfrecord-tensorflow-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_storage_root_gen_data[dlio_hdf5-tensorflow-dlio_tensorflow] -v
- name: test_train
run: |
source ${VENV}/bin/activate
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dlio_png-tensorflow-dlio_tensorflow0] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dlio_npz-tensorflow-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dlio_jpeg-tensorflow-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dlio_tfrecord-tensorflow-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dlio_hdf5-tensorflow-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dlio_csv-tensorflow-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dlio_png-pytorch-dlio_pytorch] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dlio_npz-pytorch-dlio_pytorch] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dlio_jpeg-pytorch-dlio_pytorch] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dlio_hdf5-pytorch-dlio_pytorch] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dlio_csv-pytorch-dlio_pytorch] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dlio_png-tensorflow-dlio_dali] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dlio_npz-tensorflow-dlio_dali] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dlio_jpeg-tensorflow-dlio_dali] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dlio_hdf5-tensorflow-dlio_dali] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dlio_csv-tensorflow-dlio_dali] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dlio_png-pytorch-dlio_dali] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dlio_npz-pytorch-dlio_dali] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dlio_jpeg-pytorch-dlio_dali] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dlio_hdf5-pytorch-dlio_dali] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dlio_csv-pytorch-dlio_dali] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dlio_png-tensorflow-dlio_tensorflow1] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[tf_tfrecord-tensorflow-native_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dali_tfrecord-pytorch-native_dali] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[dali_npz-pytorch-native_dali] -v
- name: test_custom_storage_root_train
run: |
source ${VENV}/bin/activate
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_custom_storage_root_train[dlio_png-tensorflow-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_custom_storage_root_train[dlio_npz-tensorflow-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_custom_storage_root_train[dlio_jpeg-tensorflow-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_custom_storage_root_train[dlio_tfrecord-tensorflow-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_custom_storage_root_train[dlio_hdf5-tensorflow-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_custom_storage_root_train[dlio_csv-tensorflow-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_custom_storage_root_train[dlio_png-pytorch-dlio_pytorch] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_custom_storage_root_train[dlio_npz-pytorch-dlio_pytorch] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_custom_storage_root_train[dlio_jpeg-pytorch-dlio_pytorch] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_custom_storage_root_train[dlio_hdf5-pytorch-dlio_pytorch] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_custom_storage_root_train[dlio_csv-pytorch-dlio_pytorch] -v
- name: test_checkpoint_epoch
run: |
source ${VENV}/bin/activate
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_checkpoint_epoch -v
- name: test_checkpoint_step
run: |
source ${VENV}/bin/activate
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_checkpoint_step -v
source ${VENV}/bin/activate
- name: test_eval
run: |
source ${VENV}/bin/activate
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_eval -v
source ${VENV}/bin/activate
- name: test_multi_threads
run: |
source ${VENV}/bin/activate
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_multi_threads[tensorflow-0-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_multi_threads[tensorflow-1-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_multi_threads[tensorflow-2-dlio_tensorflow] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_multi_threads[pytorch-0-dlio_pytorch] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_multi_threads[pytorch-1-dlio_pytorch] -v
RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_multi_threads[pytorch-2-dlio_pytorch] -v
- name: test-tf-loader-tfrecord
run: |
source ${VENV}/bin/activate
RDMAV_FORK_SAFE=1 mpirun -np 2 dlio_benchmark workload=resnet50 ++workload.dataset.num_files_train=64 ++workload.workflow.train=False ++workload.workflow.generate_data=True ++workload.dataset.num_files_train=16 ++workload.dataset.num_samples_per_file=16
RDMAV_FORK_SAFE=1 mpirun -np 2 dlio_benchmark workload=resnet50 ++workload.dataset.num_files_train=64 ++workload.workflow.train=True ++workload.workflow.generate_data=False ++workload.dataset.num_files_train=16 ++workload.dataset.num_samples_per_file=16
- name: test-torch-loader-npz
run: |
source ${VENV}/bin/activate
RDMAV_FORK_SAFE=1 mpirun -np 2 dlio_benchmark workload=unet3d ++workload.train.computation_time=0.05 ++workload.evaluation.eval_time=0.01 ++workload.train.epochs=2 ++workload.workflow.train=False ++workload.workflow.generate_data=True ++workload.dataset.num_files_train=16 ++workload.dataset.num_files_eval=16 ++workload.reader.read_threads=2 ++workload.dataset.record_length=4096 ++workload.dataset.record_length_stdev=0
RDMAV_FORK_SAFE=1 mpirun -np 2 dlio_benchmark workload=unet3d ++workload.train.computation_time=0.05 ++workload.evaluation.eval_time=0.01 ++workload.train.epochs=2 ++workload.workflow.train=True ++workload.workflow.generate_data=False ++workload.dataset.num_files_train=16 ++workload.dataset.num_files_eval=16 ++workload.reader.read_threads=2 ++workload.dataset.record_length=4096 ++workload.dataset.record_length_stdev=0
- name: test-tf-loader-npz
run: |
source ${VENV}/bin/activate
RDMAV_FORK_SAFE=1 mpirun -np 2 dlio_benchmark workload=unet3d ++workload.framework=tensorflow ++workload.data_reader.data_loader=dlio_tensorflow ++workload.train.computation_time=0.05 ++workload.evaluation.eval_time=0.01 ++workload.train.epochs=2 ++workload.workflow.train=False ++workload.workflow.generate_data=True ++workload.dataset.num_files_train=16 ++workload.dataset.num_files_eval=16 ++workload.reader.read_threads=2 ++workload.dataset.record_length=4096 ++workload.dataset.record_length_stdev=0
RDMAV_FORK_SAFE=1 mpirun -np 2 dlio_benchmark workload=unet3d ++workload.framework=tensorflow ++workload.data_reader.data_loader=dlio_tensorflow ++workload.train.computation_time=0.05 ++workload.evaluation.eval_time=0.01 ++workload.train.epochs=2 ++workload.workflow.train=True ++workload.workflow.generate_data=False ++workload.dataset.num_files_train=16 ++workload.dataset.num_files_eval=16 ++workload.reader.read_threads=2 ++workload.dataset.record_length=4096 ++workload.dataset.record_length_stdev=0