From c021460b3b43fa610bfee87d90d8ef996d3efda0 Mon Sep 17 00:00:00 2001 From: Chun-Wei Chen Date: Wed, 26 Jul 2023 13:47:19 -0700 Subject: [PATCH] Add models from Hugging Face/transformers from MLAgility (#615) * popular_on_huggingface/bert-base-uncased.py Signed-off-by: jcwchen * add transformers models Signed-off-by: jcwchen * remove gpt1 and gpt2 for now Signed-off-by: jcwchen * config Signed-off-by: jcwchen * get model name from build_dir Signed-off-by: jcwchen * find_model_hash_name Signed-off-by: jcwchen * subprocess.PIPE Signed-off-by: jcwchen * new models Signed-off-by: jcwchen * 7 models Signed-off-by: jcwchen * only keep 4 Signed-off-by: jcwchen * remove 4 Signed-off-by: jcwchen * remove albert-base-v2 Signed-off-by: jcwchen * del model and sess Signed-off-by: jcwchen * check_path Signed-off-by: jcwchen * drop models in CI Signed-off-by: jcwchen * add bert_generation Signed-off-by: jcwchen * --binary Signed-off-by: jcwchen * disable bert_generation.py Signed-off-by: jcwchen * no binary Signed-off-by: jcwchen * cancel in progress Signed-off-by: jcwchen * binary Signed-off-by: jcwchen * minimal Signed-off-by: jcwchen * --mini Signed-off-by: jcwchen * manually check Signed-off-by: jcwchen * only keep Signed-off-by: jcwchen * run_test_dir Signed-off-by: jcwchen * coma Signed-off-by: jcwchen * cache_converted_dir = "~/.cache" Signed-off-by: jcwchen * delete and clean cache Signed-off-by: jcwchen * clean Signed-off-by: jcwchen * clean all Signed-off-by: jcwchen * only clean Signed-off-by: jcwchen * --cache-dir", cache_converted_dir Signed-off-by: jcwchen * disable openai_clip-vit-large-patch14 Signed-off-by: jcwchen * disable Signed-off-by: jcwchen * only keep 4 Signed-off-by: jcwchen * comma Signed-off-by: jcwchen * runs-on: macos-latest Signed-off-by: jcwchen * not using conda Signed-off-by: jcwchen * final_model_path Signed-off-by: jcwchen * git-lfst pull dir Signed-off-by: jcwchen * git diff Signed-off-by: jcwchen * use onnx.load to compare Signed-off-by: jcwchen * test_utils.pull_lfs_file(final_model_path) Signed-off-by: jcwchen * only test changed models Signed-off-by: jcwchen * test_utils Signed-off-by: jcwchen * get_cpu_info Signed-off-by: jcwchen * ext names Signed-off-by: jcwchen * test_utils.get_changed_models() Signed-off-by: jcwchen * compare 2 Signed-off-by: jcwchen * fix init Signed-off-by: jcwchen * transformers==4.29.2 Signed-off-by: jcwchen * test Signed-off-by: jcwchen * initializer Signed-off-by: jcwchen * update bert-generation Signed-off-by: jcwchen * fixed numpy Signed-off-by: jcwchen * print(f"initializer {k}") Signed-off-by: jcwchen * update bert from mac Signed-off-by: Chun-Wei Chen * remove bert-generation Signed-off-by: jcwchen * mlagility_subdir_count number Signed-off-by: jcwchen * remove unused onnx Signed-off-by: jcwchen --------- Signed-off-by: jcwchen Signed-off-by: Chun-Wei Chen --- .github/workflows/codeql.yml | 4 ++ .github/workflows/linux_ci.yml | 4 ++ .github/workflows/mlagility_validation.yml | 17 ++++--- .github/workflows/windows_ci.yml | 4 ++ ...distilbert-base-multilingual-cased-16.onnx | 3 ++ .../test_data_set_0/input_0.pb | 3 ++ .../test_data_set_0/input_1.pb | 3 ++ .../test_data_set_0/output_0.pb | 3 ++ .../distilbert-base-uncased-16.onnx | 3 ++ .../test_data_set_0/input_0.pb | 3 ++ .../test_data_set_0/input_1.pb | 3 ++ .../test_data_set_0/output_0.pb | 3 ++ .../openai_clip-vit-large-patch14-16.onnx | 3 ++ .../test_data_set_0/input_0.pb | 3 ++ .../test_data_set_0/input_1.pb | 3 ++ .../test_data_set_0/input_2.pb | 3 ++ .../test_data_set_0/output_0.pb | 3 ++ .../test_data_set_0/output_1.pb | 3 ++ .../test_data_set_0/output_2.pb | 3 ++ .../test_data_set_0/output_3.pb | 3 ++ .../test_data_set_0/output_4.pb | 3 ++ .../test_data_set_0/output_5.pb | 3 ++ .../test_data_set_0/output_6.pb | 3 ++ .../test_data_set_0/output_7.pb | 3 ++ models/mlagility/requirements.txt | 2 + workflow_scripts/check_model.py | 3 +- .../generate_onnx_hub_manifest.py | 3 +- workflow_scripts/mlagility_config.py | 11 +++++ workflow_scripts/run_mlagility.py | 49 +++++++++++++------ workflow_scripts/test_models.py | 34 ++----------- workflow_scripts/test_utils.py | 35 +++++++++++++ 31 files changed, 170 insertions(+), 56 deletions(-) create mode 100644 models/mlagility/distilbert-base-multilingual-cased/distilbert-base-multilingual-cased-16.onnx create mode 100644 models/mlagility/distilbert-base-multilingual-cased/test_data_set_0/input_0.pb create mode 100644 models/mlagility/distilbert-base-multilingual-cased/test_data_set_0/input_1.pb create mode 100644 models/mlagility/distilbert-base-multilingual-cased/test_data_set_0/output_0.pb create mode 100644 models/mlagility/distilbert-base-uncased/distilbert-base-uncased-16.onnx create mode 100644 models/mlagility/distilbert-base-uncased/test_data_set_0/input_0.pb create mode 100644 models/mlagility/distilbert-base-uncased/test_data_set_0/input_1.pb create mode 100644 models/mlagility/distilbert-base-uncased/test_data_set_0/output_0.pb create mode 100644 models/mlagility/openai_clip-vit-large-patch14/openai_clip-vit-large-patch14-16.onnx create mode 100644 models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/input_0.pb create mode 100644 models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/input_1.pb create mode 100644 models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/input_2.pb create mode 100644 models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_0.pb create mode 100644 models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_1.pb create mode 100644 models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_2.pb create mode 100644 models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_3.pb create mode 100644 models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_4.pb create mode 100644 models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_5.pb create mode 100644 models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_6.pb create mode 100644 models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_7.pb diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 3e89b2d06..4333f3f9d 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -19,6 +19,10 @@ on: schedule: - cron: '31 11 * * 4' +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + jobs: analyze: name: Analyze diff --git a/.github/workflows/linux_ci.yml b/.github/workflows/linux_ci.yml index a6a856458..0218105c4 100644 --- a/.github/workflows/linux_ci.yml +++ b/.github/workflows/linux_ci.yml @@ -9,6 +9,10 @@ on: pull_request: branches: [ main, new-models] +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + jobs: # This workflow contains a single job called "build" build: diff --git a/.github/workflows/mlagility_validation.yml b/.github/workflows/mlagility_validation.yml index 1727e27e8..dc2120f58 100644 --- a/.github/workflows/mlagility_validation.yml +++ b/.github/workflows/mlagility_validation.yml @@ -6,20 +6,23 @@ on: pull_request: branches: [ main, new-models] +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + jobs: build: - runs-on: ubuntu-latest + runs-on: macos-latest strategy: matrix: - python-version: ['3.8'] + python-version: ["3.8"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3 name: Checkout repo - - uses: conda-incubator/setup-miniconda@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0 with: - miniconda-version: "latest" - activate-environment: mla python-version: ${{ matrix.python-version }} - name: Install dependencies and mlagility @@ -34,4 +37,4 @@ jobs: run: | # TODO: remove the following after mlagility has resovled version contradict issue pip install -r models/mlagility/requirements.txt - python workflow_scripts/run_mlagility.py + python workflow_scripts/run_mlagility.py --drop diff --git a/.github/workflows/windows_ci.yml b/.github/workflows/windows_ci.yml index 4ebcb3804..33ac19230 100644 --- a/.github/workflows/windows_ci.yml +++ b/.github/workflows/windows_ci.yml @@ -9,6 +9,10 @@ on: pull_request: branches: [ main, new-models] +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + jobs: # This workflow contains a single job called "build" build: diff --git a/models/mlagility/distilbert-base-multilingual-cased/distilbert-base-multilingual-cased-16.onnx b/models/mlagility/distilbert-base-multilingual-cased/distilbert-base-multilingual-cased-16.onnx new file mode 100644 index 000000000..9bf573b39 --- /dev/null +++ b/models/mlagility/distilbert-base-multilingual-cased/distilbert-base-multilingual-cased-16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85d586f31ae0e722a13697f549bc416361307a713ea47b5199be92b6164c7a92 +size 909131662 diff --git a/models/mlagility/distilbert-base-multilingual-cased/test_data_set_0/input_0.pb b/models/mlagility/distilbert-base-multilingual-cased/test_data_set_0/input_0.pb new file mode 100644 index 000000000..34397ac0a --- /dev/null +++ b/models/mlagility/distilbert-base-multilingual-cased/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b51430931d0d2b17a2942c5540cde3939bddc347a40e0af5f29af4c317b0946 +size 99 diff --git a/models/mlagility/distilbert-base-multilingual-cased/test_data_set_0/input_1.pb b/models/mlagility/distilbert-base-multilingual-cased/test_data_set_0/input_1.pb new file mode 100644 index 000000000..949f3f7d6 --- /dev/null +++ b/models/mlagility/distilbert-base-multilingual-cased/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a22700ede7e250a4f9ff93623dc5cc01aef737f881f961b4154db14a59cf625b +size 104 diff --git a/models/mlagility/distilbert-base-multilingual-cased/test_data_set_0/output_0.pb b/models/mlagility/distilbert-base-multilingual-cased/test_data_set_0/output_0.pb new file mode 100644 index 000000000..fa4da89c6 --- /dev/null +++ b/models/mlagility/distilbert-base-multilingual-cased/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bba97c91fda88a98490536ee15d3c9c223c43fa29b96a54f212988fbd71750e +size 4781900 diff --git a/models/mlagility/distilbert-base-uncased/distilbert-base-uncased-16.onnx b/models/mlagility/distilbert-base-uncased/distilbert-base-uncased-16.onnx new file mode 100644 index 000000000..6f812def0 --- /dev/null +++ b/models/mlagility/distilbert-base-uncased/distilbert-base-uncased-16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd40116da3771da0923a50182d78b273f6dec6fafd260f8bcf2f0b172e81e669 +size 361805958 diff --git a/models/mlagility/distilbert-base-uncased/test_data_set_0/input_0.pb b/models/mlagility/distilbert-base-uncased/test_data_set_0/input_0.pb new file mode 100644 index 000000000..d7c4811e8 --- /dev/null +++ b/models/mlagility/distilbert-base-uncased/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deee60b4924abe2a7c31468e66d6fe9c199f064cf75002050e374b093babce33 +size 99 diff --git a/models/mlagility/distilbert-base-uncased/test_data_set_0/input_1.pb b/models/mlagility/distilbert-base-uncased/test_data_set_0/input_1.pb new file mode 100644 index 000000000..dfb157e59 --- /dev/null +++ b/models/mlagility/distilbert-base-uncased/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e7562da67d6e4be8a5c03829ac3b4c6a58574f72c28bdb8be7598c1c70d0c36 +size 104 diff --git a/models/mlagility/distilbert-base-uncased/test_data_set_0/output_0.pb b/models/mlagility/distilbert-base-uncased/test_data_set_0/output_0.pb new file mode 100644 index 000000000..f4c95ab66 --- /dev/null +++ b/models/mlagility/distilbert-base-uncased/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4196ec1ac7e782e70d622bcd117d5e2b15c25753bc2628eda93ec2f1e6db9dd6 +size 1220899 diff --git a/models/mlagility/openai_clip-vit-large-patch14/openai_clip-vit-large-patch14-16.onnx b/models/mlagility/openai_clip-vit-large-patch14/openai_clip-vit-large-patch14-16.onnx new file mode 100644 index 000000000..ca4d42113 --- /dev/null +++ b/models/mlagility/openai_clip-vit-large-patch14/openai_clip-vit-large-patch14-16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d93300d1aa183d6e7e2380c0e0c6199f3de5d284a8c762edc9455ca39143673a +size 1711045098 diff --git a/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/input_0.pb b/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/input_0.pb new file mode 100644 index 000000000..936fd197a --- /dev/null +++ b/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/input_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aa2c14c16de9e8698d44800be79677cfcdcc71a3b3f5fa6dd1a2b6278995c75 +size 131 diff --git a/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/input_1.pb b/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/input_1.pb new file mode 100644 index 000000000..7d802df8b --- /dev/null +++ b/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/input_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:314cf7c88c16efa52c08e3a069c2ef730a1f7855f1465042d99afdcca2a7b5fc +size 602142 diff --git a/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/input_2.pb b/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/input_2.pb new file mode 100644 index 000000000..9c3f6a477 --- /dev/null +++ b/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/input_2.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b909a0efafb3432a67732a64a245b46863b1345c0ed5b0b8bcd072a23b35081 +size 136 diff --git a/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_0.pb b/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_0.pb new file mode 100644 index 000000000..05ed9371c --- /dev/null +++ b/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_0.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8774b4b70d2338ccb0b2a593f189a68b09ab70bdc01253ab7ce1e7b9bf210736 +size 22 diff --git a/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_1.pb b/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_1.pb new file mode 100644 index 000000000..4bcb5e081 --- /dev/null +++ b/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_1.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:318e6f4a6661e384ad80b1366524e23c1e707a9c6d27fdc9ce0aa3eef53e0989 +size 38 diff --git a/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_2.pb b/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_2.pb new file mode 100644 index 000000000..e4a1cf016 --- /dev/null +++ b/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_2.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38edc8c89d4b8908ef8d877878508d3b83ce4a00ed8454650e4b17a5ea782925 +size 6173 diff --git a/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_3.pb b/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_3.pb new file mode 100644 index 000000000..90c1f8172 --- /dev/null +++ b/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_3.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b005a3c5250ca15b0cf6dfad2c35ce4508892a70f8a47372bc87d1f32dc1ae5d +size 3104 diff --git a/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_4.pb b/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_4.pb new file mode 100644 index 000000000..26b91e2c0 --- /dev/null +++ b/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_4.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ee79f7627c45bfe1ec1bf082e88a1ec87572f8fa5133b704419fc6e54e25302 +size 43039 diff --git a/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_5.pb b/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_5.pb new file mode 100644 index 000000000..cbe269624 --- /dev/null +++ b/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_5.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d104ccf42bfb41389f55ee8425e69147eee62a83f2cf4d4119a07be1178989d +size 6173 diff --git a/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_6.pb b/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_6.pb new file mode 100644 index 000000000..5a3aa6d37 --- /dev/null +++ b/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_6.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0a126cf51c1f4ec4ab0992774da04264ff3ff25ca41843ad4d3ecef07a9361f +size 1052705 diff --git a/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_7.pb b/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_7.pb new file mode 100644 index 000000000..4535dadb4 --- /dev/null +++ b/models/mlagility/openai_clip-vit-large-patch14/test_data_set_0/output_7.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d5f9e890a2735baa209d8cf120ec84dbf303c308a2949d0482b0781b36da3b2 +size 4125 diff --git a/models/mlagility/requirements.txt b/models/mlagility/requirements.txt index 68b485fb9..f95ba81c8 100644 --- a/models/mlagility/requirements.txt +++ b/models/mlagility/requirements.txt @@ -1,2 +1,4 @@ +numpy==1.24.4 torch==2.0.1 torchvision==0.15.2 +transformers==4.29.2 diff --git a/workflow_scripts/check_model.py b/workflow_scripts/check_model.py index 2119e4e1a..33dce8001 100644 --- a/workflow_scripts/check_model.py +++ b/workflow_scripts/check_model.py @@ -16,7 +16,8 @@ def has_vnni_support(): def run_onnx_checker(model_path): model = onnx.load(model_path) - onnx.checker.check_model(model, full_check=True) + del model + onnx.checker.check_model(model_path, full_check=True) def ort_skip_reason(model_path): diff --git a/workflow_scripts/generate_onnx_hub_manifest.py b/workflow_scripts/generate_onnx_hub_manifest.py index 4e3dff8cb..0e22478b0 100644 --- a/workflow_scripts/generate_onnx_hub_manifest.py +++ b/workflow_scripts/generate_onnx_hub_manifest.py @@ -14,8 +14,7 @@ import onnx from onnx import shape_inference import argparse -from test_models import get_changed_models -from test_utils import pull_lfs_file +from test_utils import get_changed_models, pull_lfs_file # Acknowledgments to pytablereader codebase for this function diff --git a/workflow_scripts/mlagility_config.py b/workflow_scripts/mlagility_config.py index b0b8200bb..b5acd5323 100644 --- a/workflow_scripts/mlagility_config.py +++ b/workflow_scripts/mlagility_config.py @@ -15,4 +15,15 @@ "torch_hub/densenet121.py", "torch_hub/inception_v3.py", "torch_hub/googlenet.py", + #"transformers/bert_generation.py", # non consistent created model from mlagility + #"popular_on_huggingface/bert-base-uncased.py", + #"popular_on_huggingface/xlm-roberta-large.py", + #"popular_on_huggingface/bert-large-uncased.py", + "popular_on_huggingface/openai_clip-vit-large-patch14.py", + #"popular_on_huggingface/xlm-roberta-base.py", # output nan + #"popular_on_huggingface/roberta-base.py", # output nan + "popular_on_huggingface/distilbert-base-uncased.py", + #"popular_on_huggingface/distilroberta-base.py", # output nan + "popular_on_huggingface/distilbert-base-multilingual-cased.py", + #"popular_on_huggingface/albert-base-v2", # Status Message: indices element out of data bounds, idx=8 must be within the inclusive range [-2,1] ] diff --git a/workflow_scripts/run_mlagility.py b/workflow_scripts/run_mlagility.py index d1092a0a4..50e9b0110 100644 --- a/workflow_scripts/run_mlagility.py +++ b/workflow_scripts/run_mlagility.py @@ -7,6 +7,7 @@ import subprocess import sys import ort_test_dir_utils +import test_utils def get_immediate_subdirectories_count(dir_name): @@ -21,7 +22,7 @@ def find_model_hash_name(stdout): line = line.replace("\\", "/") # last part of the path is the model hash name return line.split("/")[-1] - raise Exception(f"Cannot find Build dir in {stdout}.") + raise Exception(f"Cannot find Build dir in {stdout}.") ZOO_OPSET_VERSION = "16" @@ -33,34 +34,45 @@ def find_model_hash_name(stdout): def main(): + # caculate first; otherwise the directories might be deleted by shutil.rmtree + mlagility_subdir_count = get_immediate_subdirectories_count(mlagility_models_dir) + parser = argparse.ArgumentParser(description="Test settings") + parser.add_argument("--all_models", required=False, default=False, action="store_true", + help="Test all ONNX Model Zoo models instead of only chnaged models") parser.add_argument("--create", required=False, default=False, action="store_true", help="Create new models from mlagility if not exist.") + parser.add_argument("--drop", required=False, default=False, action="store_true", + help="Drop downloaded models after verification. (For space limitation in CIs)") parser.add_argument("--skip", required=False, default=False, action="store_true", help="Skip checking models if already exist.") + args = parser.parse_args() errors = 0 - + changed_models_set = set(test_utils.get_changed_models()) + print(f"Changed models: {changed_models_set}") for model_info in models_info: - directory_name, model_name = model_info.split("/") + _, model_name = model_info.split("/") model_name = model_name.replace(".py", "") model_zoo_dir = model_name + print(f"----------------Checking {model_zoo_dir}----------------") + final_model_dir = osp.join(mlagility_models_dir, model_zoo_dir) + final_model_name = f"{model_zoo_dir}-{ZOO_OPSET_VERSION}.onnx" + final_model_path = osp.join(final_model_dir, final_model_name) + if not args.all_models and final_model_path not in changed_models_set: + print(f"Skip checking {final_model_path} because it is not changed.") + continue + if osp.exists(final_model_path) and args.skip: + print(f"Skip checking {model_zoo_dir} because {final_model_path} already exists.") + continue try: - print(f"----------------Checking {model_zoo_dir}----------------") - final_model_dir = osp.join(mlagility_models_dir, model_zoo_dir) - final_model_name = f"{model_zoo_dir}-{ZOO_OPSET_VERSION}.onnx" - final_model_path = osp.join(final_model_dir, final_model_name) - if osp.exists(final_model_path) and args.skip: - print(f"Skip checking {model_zoo_dir} because {final_model_path} already exists.") - continue cmd = subprocess.run(["benchit", osp.join(mlagility_root, model_info), "--cache-dir", cache_converted_dir, "--onnx-opset", ZOO_OPSET_VERSION, "--export-only"], cwd=cwd_path, stdout=subprocess.PIPE, stderr=sys.stderr, check=True) model_hash_name = find_model_hash_name(cmd.stdout) - print(model_hash_name) mlagility_created_onnx = osp.join(cache_converted_dir, model_hash_name, "onnx", model_hash_name + base_name) if args.create: ort_test_dir_utils.create_test_dir(mlagility_created_onnx, "./", final_model_dir) @@ -75,14 +87,21 @@ def main(): except Exception as e: errors += 1 print(f"Failed to check {model_zoo_dir} because of {e}.") - + if args.drop: + subprocess.run(["benchit", "cache", "delete", "--all", "--cache-dir", cache_converted_dir], + cwd=cwd_path, stdout=sys.stdout, stderr=sys.stderr, check=True) + subprocess.run(["benchit", "cache", "clean", "--all", "--cache-dir", cache_converted_dir], + cwd=cwd_path, stdout=sys.stdout, stderr=sys.stderr, check=True) + shutil.rmtree(final_model_dir, ignore_errors=True) + shutil.rmtree(cache_converted_dir, ignore_errors=True) + total_count = len(models_info) if args.all_models else len(changed_models_set) if errors > 0: - print(f"All {len(models_info)} model(s) have been checked, but {errors} model(s) failed.") + print(f"All {total_count} model(s) have been checked, but {errors} model(s) failed.") sys.exit(1) else: - print(f"All {len(models_info)} model(s) have been checked.") + print(f"All {total_count} model(s) have been checked.") + - mlagility_subdir_count = get_immediate_subdirectories_count(mlagility_models_dir) if mlagility_subdir_count != len(models_info): print(f"Expected {len(models_info)} model(s) in {mlagility_models_dir}, but got {mlagility_subdir_count} model(s) under models/mlagility." f"Please check if you have added new model(s) to models_info in mlagility_config.py.") diff --git a/workflow_scripts/test_models.py b/workflow_scripts/test_models.py index 9a99f6674..04cec1890 100644 --- a/workflow_scripts/test_models.py +++ b/workflow_scripts/test_models.py @@ -23,25 +23,6 @@ def get_all_models(): return model_list -def get_changed_models(): - model_list = [] - cwd_path = Path.cwd() - # git fetch first for git diff on GitHub Action - subprocess.run(["git", "fetch", "origin", "main:main"], - cwd=cwd_path, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - # obtain list of added or modified files in this PR - obtain_diff = subprocess.Popen(["git", "diff", "--name-only", "--diff-filter=AM", "origin/main", "HEAD"], - cwd=cwd_path, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdoutput, _ = obtain_diff.communicate() - diff_list = stdoutput.split() - - # identify list of changed ONNX models in ONXX Model Zoo - model_list = [str(model).replace("b'", "").replace("'", "") - for model in diff_list if onnx_ext_name in str(model) or tar_ext_name in str(model)] - return model_list - - def main(): parser = argparse.ArgumentParser(description="Test settings") # default all: test by both onnx and onnxruntime @@ -53,12 +34,12 @@ def main(): parser.add_argument("--create", required=False, default=False, action="store_true", help="Create new test data by ORT if it fails with existing test data") parser.add_argument("--all_models", required=False, default=False, action="store_true", - help="Test all ONNX Model Zoo models instead of only chnaged models") + help="Test all ONNX Model Zoo models instead of only changed models") parser.add_argument("--drop", required=False, default=False, action="store_true", help="Drop downloaded models after verification. (For space limitation in CIs)") args = parser.parse_args() - model_list = get_all_models() if args.all_models else get_changed_models() + model_list = get_all_models() if args.all_models else test_utils.get_changed_models() # run lfs install before starting the tests test_utils.run_lfs_install() @@ -106,16 +87,7 @@ def main(): print("[PASS] {} is checked by onnx. ".format(model_name)) if args.target == "onnxruntime" or args.target == "all": try: - # git lfs pull those test_data_set_* folders - root_dir = Path(model_path).parent - for _, dirs, _ in os.walk(root_dir): - for dir in dirs: - if "test_data_set_" in dir: - test_data_set_dir = os.path.join(root_dir, dir) - for _, _, files in os.walk(test_data_set_dir): - for file in files: - if file.endswith(".pb"): - test_utils.pull_lfs_file(os.path.join(test_data_set_dir, file)) + test_utils.pull_lfs_directory(Path(model_path).parent) check_model.run_backend_ort_with_data(model_path) print("[PASS] {} is checked by onnxruntime. ".format(model_name)) except Exception as e: diff --git a/workflow_scripts/test_utils.py b/workflow_scripts/test_utils.py index 8384c63d8..d7d62052c 100644 --- a/workflow_scripts/test_utils.py +++ b/workflow_scripts/test_utils.py @@ -25,6 +25,18 @@ def pull_lfs_file(file_name): print(f'LFS pull completed for {file_name} with return code= {result.returncode}') +def pull_lfs_directory(directory_name): + # git lfs pull those test_data_set_* folders + for _, dirs, _ in os.walk(directory_name): + for dir in dirs: + if "test_data_set_" in dir: + test_data_set_dir = os.path.join(directory_name, dir) + for _, _, files in os.walk(test_data_set_dir): + for file in files: + if file.endswith(".pb"): + pull_lfs_file(os.path.join(test_data_set_dir, file)) + + def run_lfs_prune(): result = subprocess.run(['git', 'lfs', 'prune'], cwd=cwd_path, stdout=subprocess.PIPE, stderr=subprocess.PIPE) print(f'LFS prune completed with return code= {result.returncode}') @@ -62,3 +74,26 @@ def remove_tar_dir(): def remove_onnxruntime_test_dir(): if os.path.exists(TEST_ORT_DIR) and os.path.isdir(TEST_ORT_DIR): rmtree(TEST_ORT_DIR) + + +def get_changed_models(): + tar_ext_name = ".tar.gz" + onnx_ext_name = ".onnx" + model_list = [] + cwd_path = Path.cwd() + # TODO: use the main branch instead of new-models + branch_name = "new-models" # "main" + # git fetch first for git diff on GitHub Action + subprocess.run(["git", "fetch", "origin", f"{branch_name}:{branch_name}"], + cwd=cwd_path, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + # obtain list of added or modified files in this PR + obtain_diff = subprocess.Popen(["git", "diff", "--name-only", "--diff-filter=AM", "origin/" + branch_name, "HEAD"], + cwd=cwd_path, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdoutput, _ = obtain_diff.communicate() + diff_list = stdoutput.split() + + # identify list of changed ONNX models in ONXX Model Zoo + model_list = [str(model).replace("b'", "").replace("'", "") + for model in diff_list if onnx_ext_name in str(model) or tar_ext_name in str(model)] + return model_list