Generate the dependency matrix by code for GitHub Actions (#1431)

k2-fsa · Dec 25, 2023 · c855a58 · c855a58
1 parent e5bb1ae
commit c855a58
Show file tree

Hide file tree

Showing 8 changed files with 280 additions and 184 deletions.
diff --git a/.github/scripts/docker/Dockerfile b/.github/scripts/docker/Dockerfile
@@ -31,10 +31,12 @@ LABEL github_repo="https://github.com/k2-fsa/icefall"
 RUN pip install --no-cache-dir \
       torch==${TORCH_VERSION} torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/cpu/torch_stable.html \
       k2==${_K2_VERSION} -f https://k2-fsa.github.io/k2/cpu.html \
+      \
       git+https://github.com/lhotse-speech/lhotse \
       kaldifeat==${_KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cpu.html \
       dill \
       graphviz \
+      kaldi-decoder \
       kaldi_native_io \
       kaldialign \
       kaldifst \

diff --git a/.github/scripts/docker/generate_build_matrix.py b/.github/scripts/docker/generate_build_matrix.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+# Copyright    2023  Xiaomi Corp.        (authors: Fangjun Kuang)
+
+
+import json
+
+
+def version_gt(a, b):
+    a_major, a_minor = a.split(".")[:2]
+    b_major, b_minor = b.split(".")[:2]
+    if a_major > b_major:
+        return True
+
+    if a_major == b_major and a_minor > b_minor:
+        return True
+
+    return False
+
+
+def version_ge(a, b):
+    a_major, a_minor = a.split(".")[:2]
+    b_major, b_minor = b.split(".")[:2]
+    if a_major > b_major:
+        return True
+
+    if a_major == b_major and a_minor >= b_minor:
+        return True
+
+    return False
+
+
+def get_torchaudio_version(torch_version):
+    if torch_version == "1.13.0":
+        return "0.13.0"
+    elif torch_version == "1.13.1":
+        return "0.13.1"
+    elif torch_version == "2.0.0":
+        return "2.0.1"
+    elif torch_version == "2.0.1":
+        return "2.0.2"
+    else:
+        return torch_version
+
+
+def get_matrix():
+    k2_version = "1.24.4.dev20231220"
+    kaldifeat_version = "1.25.3.dev20231221"
+    version = "1.1"
+    python_version = ["3.8", "3.9", "3.10", "3.11"]
+    torch_version = ["1.13.0", "1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"]
+
+    matrix = []
+    for p in python_version:
+        for t in torch_version:
+            # torchaudio <= 1.13.x supports only python <= 3.10
+
+            if version_gt(p, "3.10") and not version_gt(t, "2.0"):
+                continue
+
+            matrix.append(
+                {
+                    "k2-version": k2_version,
+                    "kaldifeat-version": kaldifeat_version,
+                    "version": version,
+                    "python-version": p,
+                    "torch-version": t,
+                    "torchaudio-version": get_torchaudio_version(t),
+                }
+            )
+    return matrix
+
+
+def main():
+    matrix = get_matrix()
+    print(json.dumps({"include": matrix}))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/scripts/docker/run.sh → .github/scripts/librispeech/ASR/run.sh b/.github/scripts/docker/run.sh → .github/scripts/librispeech/ASR/run.sh
@@ -1,11 +1,12 @@
 #!/usr/bin/env bash
+
 set -ex
 
-cd /icefall
-export PYTHONPATH=/icefall:$PYTHONPATH
-python3 -c "import torch; print(torch.__file__)"
-python3 -c "import torchaudio; print(torchaudio.__version__)"
-python3 -c "import icefall; print(icefall.__file__)"
+log() {
+  # This function is from espnet
+  local fname=${BASH_SOURCE[1]##*/}
+  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
 
 cd egs/librispeech/ASR
 

diff --git a/.github/scripts/yesno/ASR/run.sh b/.github/scripts/yesno/ASR/run.sh
@@ -0,0 +1,86 @@
+#!/usr/bin/env bash
+
+set -ex
+
+log() {
+  # This function is from espnet
+  local fname=${BASH_SOURCE[1]##*/}
+  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+cd egs/yesno/ASR
+
+log "data preparation"
+./prepare.sh
+
+log "training"
+python3 ./tdnn/train.py
+
+log "decoding"
+python3 ./tdnn/decode.py
+
+log "export to pretrained.pt"
+
+python3 ./tdnn/export.py --epoch 14 --avg 2
+
+python3 ./tdnn/pretrained.py \
+  --checkpoint ./tdnn/exp/pretrained.pt \
+  --HLG ./data/lang_phone/HLG.pt \
+  --words-file ./data/lang_phone/words.txt \
+  download/waves_yesno/0_0_0_1_0_0_0_1.wav \
+  download/waves_yesno/0_0_1_0_0_0_1_0.wav
+
+log "Test exporting to torchscript"
+python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1
+
+python3 ./tdnn/jit_pretrained.py \
+  --nn-model ./tdnn/exp/cpu_jit.pt \
+  --HLG ./data/lang_phone/HLG.pt \
+  --words-file ./data/lang_phone/words.txt \
+  download/waves_yesno/0_0_0_1_0_0_0_1.wav \
+  download/waves_yesno/0_0_1_0_0_0_1_0.wav
+
+log "Test exporting to onnx"
+python3 ./tdnn/export_onnx.py --epoch 14 --avg 2
+
+log "Test float32 model"
+python3 ./tdnn/onnx_pretrained.py \
+  --nn-model ./tdnn/exp/model-epoch-14-avg-2.onnx \
+  --HLG ./data/lang_phone/HLG.pt \
+  --words-file ./data/lang_phone/words.txt \
+  download/waves_yesno/0_0_0_1_0_0_0_1.wav \
+  download/waves_yesno/0_0_1_0_0_0_1_0.wav
+
+log "Test int8 model"
+python3 ./tdnn/onnx_pretrained.py \
+  --nn-model ./tdnn/exp/model-epoch-14-avg-2.int8.onnx \
+  --HLG ./data/lang_phone/HLG.pt \
+  --words-file ./data/lang_phone/words.txt \
+  download/waves_yesno/0_0_0_1_0_0_0_1.wav \
+  download/waves_yesno/0_0_1_0_0_0_1_0.wav
+
+log "Test decoding with H"
+python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1
+
+python3 ./tdnn/jit_pretrained_decode_with_H.py \
+    --nn-model ./tdnn/exp/cpu_jit.pt \
+    --H ./data/lang_phone/H.fst \
+    --tokens ./data/lang_phone/tokens.txt \
+    ./download/waves_yesno/0_0_0_1_0_0_0_1.wav \
+    ./download/waves_yesno/0_0_1_0_0_0_1_0.wav \
+    ./download/waves_yesno/0_0_1_0_0_1_1_1.wav
+
+log "Test decoding with HL"
+python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1
+
+python3 ./tdnn/jit_pretrained_decode_with_HL.py \
+    --nn-model ./tdnn/exp/cpu_jit.pt \
+    --HL ./data/lang_phone/HL.fst \
+    --words ./data/lang_phone/words.txt \
+    ./download/waves_yesno/0_0_0_1_0_0_0_1.wav \
+    ./download/waves_yesno/0_0_1_0_0_0_1_0.wav \
+    ./download/waves_yesno/0_0_1_0_0_1_1_1.wav
+
+log "Show generated files"
+ls -lh tdnn/exp
+ls -lh data/lang_phone
diff --git a/.github/workflows/build-cpu-docker.yml b/.github/workflows/build-cpu-docker.yml
@@ -7,18 +7,31 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
+  generate_build_matrix:
+    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
+    # see https://github.com/pytorch/pytorch/pull/50633
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Generating build matrix
+        id: set-matrix
+        run: |
+          # outputting for debugging purposes
+          python ./.github/scripts/docker/generate_build_matrix.py
+          MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
+          echo "::set-output name=matrix::${MATRIX}"
   build-cpu-docker:
+    needs: generate_build_matrix
     name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
-    runs-on: ${{ matrix.os }}
+    runs-on: ubuntu-latest
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest]
-        python-version: ["3.8", "3.9", "3.10"]
-        torch-version: ["1.13.0", "1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"]
-        k2-version: ["1.24.4.dev20231220"]
-        kaldifeat-version: ["1.25.3.dev20231221"]
-        version: ["1.1"]
+        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
 
     steps:
       # refer to https://github.com/actions/checkout
@@ -45,25 +58,14 @@ jobs:
         run: |
           cd .github/scripts/docker
           torch_version=${{ matrix.torch-version }}
+          torchaudio_version=${{ matrix.torchaudio-version }}
 
-          # see https://pytorch.org/audio/stable/installation.html#compatibility-matrix
-          if [[ $torch_version == 1.13.0 ]]; then
-            torchaudio_version=0.13.0
-          elif [[ $torch_version == 1.13.1 ]]; then
-            torchaudio_version=0.13.1
-          elif [[ $torch_version == 2.0.0 ]]; then
-            torchaudio_version=2.0.1
-          elif [[ $torch_version == 2.0.1 ]]; then
-            torchaudio_version=2.0.2
-          else
-            torchaudio_version=$torch_version
-          fi
           echo "torch_version: $torch_version"
           echo "torchaudio_version: $torchaudio_version"
 
           version=${{ matrix.version }}
 
-          tag=ghcr.io/k2-fsa/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v$version
+          tag=ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v$version
           echo "tag: $tag"
 
           docker build \