cosmoflow_pytorch/ci.yml

# Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

include:
  - cosmoflow/pytorch/ci_nightly.yml
  - cosmoflow/pytorch/ci_dryrun.yml

variables:
  BENCHMARK: cosmoflow
  FRAMEWORK: pytorch
  BENCHMARK_PATH: cosmoflow/pytorch
  MODEL_NAME: Cosmoflow
  TIME_TAGS: 0
  NVTX_FLAG: 0

cosmoflow_pytorch:
  extends: .build_job

.cosmoflow_pytorch_dgxa100_job:
  needs: [ cosmoflow_pytorch ]
  extends: .dgxa100_job

# NEXP defined from,
# https://github.com/mlcommons/logging/blob/master/mlperf_logging/benchmark_meta.py#L19: cosmoflow needs N=10 strong scaling runs, we do 2N
.dr_strong_cosmoflow_pytorch_dgxa100_job:
  needs: [ cosmoflow_pytorch ]
  variables:
    NEXP: 10
  extends: .dr_dgxa100_job

# weak scaling is always just one experiment
.dr_weak_cosmoflow_pytorch_dgxa100_job:
  needs: [ cosmoflow_pytorch ]
  variables:
    NEXP: 1
  extends: .dr_dgxa100_job