Test fixes (#812)

* clean up unused files * fix tests: HF TOKEN not available on-pr, add evaluation.md to tests * markup docs * fix evaluations.md * add markup to native execution md * install wget for gguf.md testing, prevent evaluation.md failures * remove secrets from yml files * update * remove copy pasta from macosand macos-mps tests * typo * format
pytorch · Jul 17, 2024 · db5f13a · db5f13a
1 parent 7b473a0
commit db5f13a
Show file tree

Hide file tree

Showing 10 changed files with 253 additions and 134 deletions.
diff --git a/.ci/scripts/run-docs b/.ci/scripts/run-docs
@@ -72,6 +72,23 @@ if [ "$1" == "advanced" ]; then
         echo "*******************************************"
         bash -x ./run-advanced.sh
         echo "::endgroup::"
-   echo "TBD"
 fi            
 
+if [ "$1" == "evaluation" ]; then
+
+    exit 0
+
+        echo "::group::Create script to run evaluation"
+        python3 scripts/updown.py --file docs/evaluation.md --replace 'llama3:stories15M,-l 3:-l 2,meta-llama/Meta-Llama-3-8B:stories15M' --suppress huggingface-cli,HF_TOKEN > ./run-evaluation.sh
+        # for good measure, if something happened to updown processor,
+        # and it did not error out, fail with an exit 1
+        echo "exit 1" >> ./run-evaluation.sh
+        echo "::endgroup::"
+
+        echo "::group::Run evaluation"
+        echo "*******************************************"
+        cat ./run-evaluation.sh
+        echo "*******************************************"
+        bash -x ./run-evaluation.sh
+fi
+
diff --git a/.github/workflows/run-readme-pr-macos.yml b/.github/workflows/run-readme-pr-macos.yml
@@ -33,23 +33,16 @@ jobs:
           sysctl machdep.cpu.core_count
           echo "::endgroup::"
 
-          echo "::group::Create script to run README"
-          python3 scripts/updown.py --file README.md  --replace 'llama3:stories15M,-l 3:-l 2,meta-llama/Meta-Llama-3-8B-Instruct:stories15M' --suppress huggingface-cli,HF_TOKEN > ./run-readme.sh
-          # for good measure, if something happened to updown processor,
-          # and it did not error out, fail with an exit 1
-          echo "exit 1" >> ./run-readme.sh
-          echo "::endgroup::"
+          .ci/scripts/run-docs readme
   
-          echo "::group::Run README"
-          echo "*******************************************"
-          cat ./run-readme.sh
+          echo "::group::Completion"
+          echo "tests complete"
           echo "*******************************************"
-          bash -x ./run-readme.sh
           echo "::endgroup::"
-  
+
 
   test-quantization-macos:
-    runs-on: macos-14-xlarge 
+    runs-on: macos-14-xlarge
     steps:
       - name: Checkout code
         uses: actions/checkout@v2
@@ -75,19 +68,7 @@ jobs:
           sysctl machdep.cpu.core_count
           echo "::endgroup::"
 
-          echo "::group::Create script to run quantization"
-          python3 scripts/updown.py --file docs/quantization.md  --replace 'llama3:stories15M,-l 3:-l 2,meta-llama/Meta-Llama-3-8B-Instruct:stories15M' --suppress huggingface-cli,HF_TOKEN > ./run-quantization.sh
-          # for good measure, if something happened to updown processor,
-          # and it did not error out, fail with an exit 1
-          echo "exit 1" >> ./run-quantization.sh
-          echo "::endgroup::"
-  
-          echo "::group::Run quantization"
-          echo "*******************************************"
-          cat ./run-quantization.sh
-          echo "*******************************************"
-          bash -x ./run-quantization.sh
-          echo "::endgroup::"
+          .ci/scripts/run-docs quantization
   
           echo "::group::Completion"
           echo "tests complete"
@@ -97,7 +78,6 @@ jobs:
 
   test-gguf-macos:
     runs-on: macos-14-xlarge
-    secrets: inherit
     steps:
       - name: Checkout code
         uses: actions/checkout@v2
@@ -110,7 +90,6 @@ jobs:
         with:
           xcode-version: '15.3'
       - name: Run script
-        secrets-env: "HF_TOKEN_PERIODIC"		
         run: |
           set -x
           # NS: Remove previous installation  of torch first
@@ -124,25 +103,42 @@ jobs:
           sysctl machdep.cpu.core_count
           echo "::endgroup::"
 
-          # echo "::group::Install newer objcopy that supports --set-section-alignment"
-          # yum install -y  devtoolset-10-binutils
-          # export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
-          # echo "::endgroup::"
-  
-          echo "::group::Create script to run gguf"
-          python3 scripts/updown.py --file docs/GGUF.md > ./run-gguf.sh
-          # for good measure, if something happened to updown processor,
-          # and it did not error out, fail with an exit 1
-          echo "exit 1" >> ./run-gguf.sh
-          echo "::endgroup::"
+          .ci/scripts/run-docs gguf
 
-          echo "::group::Run gguf"
-          echo "*******************************************"
-          cat ./run-gguf.sh
+          echo "::group::Completion"
+          echo "tests complete"
           echo "*******************************************"
-          bash -x ./run-gguf.sh
           echo "::endgroup::"
 
+  test-advanced-macos:
+    runs-on: macos-14-xlarge
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10.11'
+      - name: Setup Xcode
+        if: runner.os == 'macOS'
+        uses: maxim-lobanov/setup-xcode@v1
+        with:
+          xcode-version: '15.3'
+      - name: Run script
+        run: |
+          set -x
+          # NS: Remove previous installation  of torch first
+          # as this script does not isntall anything into conda env but rather as system dep
+          pip3 uninstall -y torch || true
+          set -eou pipefail
+
+          echo "::group::Print machine info"
+          uname -a
+          sysctl machdep.cpu.brand_string
+          sysctl machdep.cpu.core_count
+          echo "::endgroup::"
+
+          .ci/scripts/run-docs advanced
+
           echo "::group::Completion"
           echo "tests complete"
           echo "*******************************************"

diff --git a/.github/workflows/run-readme-pr-mps.yml b/.github/workflows/run-readme-pr-mps.yml
@@ -25,29 +25,17 @@ jobs:
           sysctl machdep.cpu.core_count
           echo "::endgroup::"
 
-          # echo "::group::Install newer objcopy that supports --set-section-alignment"
-          # yum install -y  devtoolset-10-binutils
-          # export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
-          # echo "::endgroup::"
-  
-          echo "::group::Create script to run README"
-          python3 scripts/updown.py --file README.md --replace 'llama3:stories15M,-l 3:-l 2,meta-llama/Meta-Llama-3-8B-Instruct:stories15M' --suppress huggingface-cli,HF_TOKEN > ./run-readme.sh
-          # for good measure, if something happened to updown processor,
-          # and it did not error out, fail with an exit 1
-          echo "exit 1" >> ./run-readme.sh
-          echo "::endgroup::"
-
-          echo "::group::Run README"
-          echo "*******************************************"
-          cat ./run-readme.sh
+          .ci/scripts/run-docs readme
+
+          echo "::group::Completion"
+          echo "tests complete"
           echo "*******************************************"
-          bash -x ./run-readme.sh
           echo "::endgroup::"
-  
+
   test-quantization-mps-macos:
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     with:
-      runner: macos-m1-stable  # neeps MPS, was macos-m1-stable
+      runner: macos-m1-14  
       script: |
           set -x
           conda create -y -n test-quantization-mps-macos python=3.10.11
@@ -64,25 +52,62 @@ jobs:
           sysctl machdep.cpu.core_count
           echo "::endgroup::"
 
-          # echo "::group::Install newer objcopy that supports --set-section-algnment"
-          # yum install -y  devtoolset-10-binutils
-          # export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
-          # echo "::endgroup::"
-  
-          echo "::group::Create script to run quantization"
-          python3 scripts/updown.py --file docs/quantization.md --replace llama3:stories15M --suppress huggingface-cli,HF_TOKEN > ./run-quantization.sh
-          # for good measure, if something happened to updown processor,
-          # and it did not error out, fail with an exit 1
-          echo "exit 1" >> ./run-quantization.sh
-          echo "::endgroup::"
-
-          echo "::group::Run quantization"
+          .ci/scripts/run-docs quantization
+
+          echo "::group::Completion"
+          echo "tests complete"
           echo "*******************************************"
-          cat ./run-quantization.sh
+          echo "::endgroup::"
+
+  test-gguf-mps-macos:
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    with:
+      runner: macos-m1-14  # neeps MPS, was macos-m1-stable
+      script: |
+          set -x
+          conda create -y -n test-quantization-mps-macos python=3.10.11
+          conda activate test-quantization-mps-macos
+          # NS: Remove previous installation  of torch first
+          # as this script does not isntall anything into conda env
+          # but rather  system dep
+          pip3 uninstall -y torch || true
+          set -eou pipefail
+
+          echo "::group::Print machine info"
+          uname -a
+          sysctl machdep.cpu.brand_string
+          sysctl machdep.cpu.core_count
+          echo "::endgroup::"
+
+          .ci/scripts/run-docs gguf
+
+          echo "::group::Completion"
+          echo "tests complete"
           echo "*******************************************"
-          bash -x ./run-quantization.sh
           echo "::endgroup::"
-
+
+  test-advanced-mps-macos:
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    with:
+      runner: macos-m1-14  # neeps MPS, was macos-m1-stable
+      script: |
+          set -x
+          conda create -y -n test-quantization-mps-macos python=3.10.11
+          conda activate test-quantization-mps-macos
+          # NS: Remove previous installation  of torch first
+          # as this script does not isntall anything into conda env
+          # but rather  system dep
+          pip3 uninstall -y torch || true
+          set -eou pipefail
+
+          echo "::group::Print machine info"
+          uname -a
+          sysctl machdep.cpu.brand_string
+          sysctl machdep.cpu.core_count
+          echo "::endgroup::"
+
+          .ci/scripts/run-docs advanced
+
           echo "::group::Completion"
           echo "tests complete"
           echo "*******************************************"

diff --git a/.github/workflows/run-readme-pr.yml b/.github/workflows/run-readme-pr.yml
@@ -10,10 +10,8 @@ on:
 jobs:
   test-readme-any:
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
-    secrets: inherit
     with:
       runner: linux.g5.4xlarge.nvidia.gpu
-      secrets-env: "HF_TOKEN_PERIODIC"
       gpu-arch-type: cuda
       gpu-arch-version: "12.1"
       timeout: 60
@@ -38,7 +36,6 @@ jobs:
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     with:
       runner: linux.g5.4xlarge.nvidia.gpu
-      secrets-env: "HF_TOKEN_PERIODIC"
       gpu-arch-type: cuda
       gpu-arch-version: "12.1"
       timeout: 60
@@ -104,10 +101,8 @@ jobs:
 
   test-gguf-any:
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
-    secrets: inherit
     with:
       runner: linux.g5.4xlarge.nvidia.gpu
-      secrets-env: "HF_TOKEN_PERIODIC"
       gpu-arch-type: cuda
       gpu-arch-version: "12.1"
       timeout: 60
@@ -130,10 +125,8 @@ jobs:
 
   test-gguf-cpu:
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
-    secrets: inherit
     with:
       runner: linux.g5.4xlarge.nvidia.gpu
-      secrets-env: "HF_TOKEN_PERIODIC"
       gpu-arch-type: cuda
       gpu-arch-version: "12.1"
       timeout: 60
@@ -157,10 +150,8 @@ jobs:
 
   test-advanced-any:
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
-    secrets: inherit
     with:
       runner: linux.g5.4xlarge.nvidia.gpu
-      secrets-env: "HF_TOKEN_PERIODIC"
       gpu-arch-type: cuda
       gpu-arch-version: "12.1"
       timeout: 60
@@ -184,10 +175,8 @@ jobs:
 
   test-advanced-cpu:
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
-    secrets: inherit
     with:
       runner: linux.g5.4xlarge.nvidia.gpu
-      secrets-env: "HF_TOKEN_PERIODIC"
       gpu-arch-type: cuda
       gpu-arch-version: "12.1"
       timeout: 60
@@ -208,12 +197,10 @@ jobs:
         echo "*******************************************"
         echo "::endgroup::"
 
-  test-torchtune-any:
+  test-evaluation-any:
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
-    secrets: inherit
     with:
       runner: linux.g5.4xlarge.nvidia.gpu
-      secrets-env: "HF_TOKEN_PERIODIC"
       gpu-arch-type: cuda
       gpu-arch-version: "12.1"
       timeout: 60
@@ -227,18 +214,31 @@ jobs:
         export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
         echo "::endgroup::"
 
-        echo "::group::Create script to run torchtune"
-        python3 scripts/updown.py --file docs/torchtune.md --replace 'llama3:stories15M,-l 3:-l 2,meta-llama/Meta-Llama-3-8B:stories15M' --suppress huggingface-cli,HF_TOKEN > ./run-torchtune.sh
-        # for good measure, if something happened to updown processor,
-        # and it did not error out, fail with an exit 1
-        echo "exit 1" >> ./run-torchtune.sh
-        echo "::endgroup::"
+        .ci/scripts/run-docs evaluation
 
-        echo "::group::Run advanced"
-        echo "*******************************************"
-        cat ./run-torchtune.sh
+        echo "::group::Completion"
+        echo "tests complete"
         echo "*******************************************"
-        bash -x ./run-torchtune.sh
+        echo "::endgroup::"
+
+  test-evaluation-cpu:
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    with:
+      runner: linux.g5.4xlarge.nvidia.gpu
+      gpu-arch-type: cuda
+      gpu-arch-version: "12.1"
+      timeout: 60
+      script: |
+        echo "::group::Print machine info"
+        uname -a
+        echo "::endgroup::"
+
+        echo "::group::Install newer objcopy that supports --set-section-alignment"
+        yum install -y  devtoolset-10-binutils
+        export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
+        echo "::endgroup::"
+
+        TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs evaluation
 
         echo "::group::Completion"
         echo "tests complete"

diff --git a/docs/ADVANCED-USERS.md b/docs/ADVANCED-USERS.md
@@ -10,6 +10,8 @@ Torchchat is currently in a pre-release state and under extensive development.
 
 [shell default]: HF_TOKEN="${SECRET_HF_TOKEN_PERIODIC}" huggingface-cli login
 
+[shell default]: ./install_requirements.sh
+
 [shell default]: TORCHCHAT_ROOT=${PWD} ./scripts/install_et.sh