Skip to content

Commit

Permalink
Review changes: integration tests, TYPECHECKING, Linting
Browse files Browse the repository at this point in the history
  • Loading branch information
Bobbins228 committed Mar 27, 2024
1 parent f88d547 commit 559c19a
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 850 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/components-integration-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ jobs:
platform: "linux.20_04.4x"
- scheduler: "ray"
platform: ubuntu-20.04
- scheduler: "kueue"
container_repo: localhost:5000/torchx
platform: "linux.20_04.4x"
fail-fast: false
runs-on: ${{ matrix.platform }}
permissions:
Expand Down Expand Up @@ -81,6 +84,11 @@ jobs:
run: |
scripts/setup_minikube.sh
- name: Start Kueue enabled Kubernetes
if: ${{ matrix.scheduler == 'kueue'}}
run: |
scripts/setup_minikube_kueue.sh
- name: Run Components Integration Tests
env:
INTEGRATION_TEST_STORAGE: ${{ secrets.INTEGRATION_TEST_STORAGE }}
Expand Down
30 changes: 17 additions & 13 deletions scripts/kueue_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,34 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import argparse

from integ_test_utils import build_images, BuildInfo, MissingEnvError, push_images
from torchx.components.dist import ddp
from torchx.runner import get_runner
from integ_test_utils import (
build_images,
BuildInfo,
push_images,
MissingEnvError
)
import argparse
from torchx.specs import AppState
from torchx.util.types import none_throws


def argparser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Kueue dist trainer integration test runner.")
parser = argparse.ArgumentParser(
description="Kueue dist trainer integration test runner."
)
parser.add_argument("--container_repo", type=str)
parser.add_argument("--dryrun", action="store_true",
help="Does not actually submit the app," " just prints the scheduler request",)
parser.add_argument(
"--dryrun",
action="store_true",
help="Does not actually submit the app," " just prints the scheduler request",
)
return parser


def build_and_push_image(container_repo: str) -> BuildInfo:
build = build_images()
push_images(build, container_repo=container_repo)
return build


def run_kueue_test(dryrun: bool = False):
# Gather args & build image
print("Building image")
Expand All @@ -46,7 +50,7 @@ def run_kueue_test(dryrun: bool = False):
j="1x1",
)
# Pass config variables
cfg={"namespace":"torchx-dev", "local_queue":"torchx-local-queue"}
cfg = {"namespace": "torchx-dev", "local_queue": "torchx-local-queue"}
print("Submitting job")
if dryrun:
dryrun_info = runner.dryrun(app, "kueue", cfg)
Expand All @@ -60,6 +64,7 @@ def run_kueue_test(dryrun: bool = False):
if none_throws(final_status).state != AppState.SUCCEEDED:
raise Exception(f"Dist app failed with status: {final_status}")


def main() -> None:
args = argparser().parse_args()

Expand All @@ -68,7 +73,6 @@ def main() -> None:
except MissingEnvError:
print("Skip runnig tests, executed only docker build step")


if __name__ == "__main__":
main()


3 changes: 0 additions & 3 deletions torchx/schedulers/kubernetes_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,17 +61,14 @@
from torchx.specs.api import (
AppDef,
AppState,
BindMount,
CfgVal,
DeviceMount,
macros,
ReplicaState,
ReplicaStatus,
RetryPolicy,
Role,
RoleStatus,
runopts,
VolumeMount,
)
from torchx.util.role_to_pod import role_to_pod
from torchx.util.strings import normalize_str
Expand Down
Loading

0 comments on commit 559c19a

Please sign in to comment.