Always calculate rotary embedding caches in model builder #3431
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "MacOS CPU ARM64 Build" | |
on: | |
workflow_dispatch: | |
push: | |
branches: | |
- main | |
- rel-* | |
pull_request: | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
env: | |
ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime&api-version=6.0-preview.1" | |
ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime" | |
jobs: | |
mac-cpu-arm64-build: | |
runs-on: macos-latest # arm64 | |
steps: | |
- name: Checkout OnnxRuntime GenAI repo | |
uses: actions/checkout@v4 | |
with: | |
submodules: true | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: '3.12.x' | |
- name: Setup Java 17 | |
uses: actions/setup-java@v4 | |
with: | |
java-version: '17' | |
distribution: 'temurin' | |
cache: 'gradle' | |
- name: Setup Gradle | |
uses: gradle/actions/setup-gradle@v3 | |
with: | |
gradle-version: '8.6' | |
- name: Get the Latest OnnxRuntime Nightly Version | |
run: | | |
ORT_NIGHTLY_VERSION=$(curl -s "${{ env.ORT_NIGHTLY_REST_API }}" | jq -r '.value[0].versions[0].normalizedVersion') | |
echo "$ORT_NIGHTLY_VERSION" | |
echo "ORT_NIGHTLY_VERSION=$ORT_NIGHTLY_VERSION" >> $GITHUB_ENV | |
- name: Download OnnxRuntime Nightly | |
run: | | |
nuget install ${{ env.ORT_PACKAGE_NAME }} -version ${{ env.ORT_NIGHTLY_VERSION }} -x | |
- name: Extract OnnxRuntime library and header files | |
run: | | |
mkdir -p ort/lib | |
mv ${{ env.ORT_PACKAGE_NAME }}/build/native/include ort/ | |
mv ${{ env.ORT_PACKAGE_NAME }}/runtimes/osx-arm64/native/* ort/lib/ | |
- name: Configure CMake | |
run: | | |
cmake --preset macos_arm64_cpu_release | |
- name: Build with CMake | |
run: | | |
cmake --build --preset macos_arm64_cpu_release --parallel | |
continue-on-error: false | |
- name: Install the python wheel and test dependencies | |
run: | | |
python3 -m venv genai-macos-venv | |
source genai-macos-venv/bin/activate | |
python3 -m pip install -r test/python/requirements.txt | |
python3 -m pip install -r test/python/macos/torch/requirements.txt | |
python3 -m pip install -r test/python/macos/ort/requirements.txt | |
python3 -m pip install build/cpu/osx-arm64/wheel/onnxruntime_genai*.whl --no-deps | |
- name: Remove the ort lib and header files | |
run: | | |
rm -rf ort | |
- name: Verify Build Artifacts | |
if: always() | |
continue-on-error: true | |
run: | | |
ls -l ${{ github.workspace }}/build/cpu/osx-arm64 | |
ls -l ${{ github.workspace }}/build/cpu/osx-arm64/wheel | |
# This will also download all the test models to the test/test_models directory | |
# These models are used by the python tests as well as C#, C++ and others. | |
- name: Run the python tests | |
run: | | |
source genai-macos-venv/bin/activate | |
export HF_TOKEN="12345" | |
export ORTGENAI_LOG_ORT_LIB=1 | |
python3 -m pip install requests | |
python3 test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models | |
- name: Build the C# API and Run the C# Tests | |
run: | | |
export ORTGENAI_LOG_ORT_LIB=1 | |
cd test/csharp | |
dotnet test /p:Configuration=Release /p:NativeBuildOutputDir="../../build/cpu/osx-arm64" --verbosity normal | |
- name: Build the Java API and Run the Java Tests | |
run: | | |
set -e -x | |
source genai-macos-venv/bin/activate | |
python3 build.py --config=Release --build_dir build/cpu/osx-arm64 --build_java --parallel --cmake_generator "Unix Makefiles" --macos MacOSX --osx_arch arm64 --apple_deploy_target 12.0 --apple_sysroot macosx | |
- name: Run tests | |
run: | | |
set -e -x | |
export ORTGENAI_LOG_ORT_LIB=1 | |
export DYLD_LIBRARY_PATH=$DYLD_LIBRARY_PATH:$GITHUB_WORKSPACE/build/cpu/osx-arm64 | |
./build/cpu/osx-arm64/unit_tests |