Simplify parquet_hybrid_test.py to speedup the execution #8
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright (c) 2022-2024, NVIDIA CORPORATION. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# A workflow to run mvn verify check | |
name: mvn[compile,RAT,scalastyle,docgen] | |
on: | |
pull_request: | |
types: [opened, synchronize, reopened] | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} | |
cancel-in-progress: true | |
env: | |
COMMON_MVN_FLAGS: >- | |
-Ddist.jar.compress=false | |
-DskipTests | |
-Dmaven.scaladoc.skip | |
-Dmaven.artifact.threads=10 | |
--batch-mode | |
-Dmaven.wagon.http.retryHandler.count=3 | |
-Dmaven.wagon.httpconnectionManager.ttlSeconds=30 | |
-Daether.connector.http.connectionMaxTtl=30 | |
-Drapids.secondaryCacheDir=$HOME/.m2/repository/.sbt/1.0/zinc/org.scala-sbt | |
jobs: | |
cache-dependencies: | |
runs-on: ubuntu-latest | |
outputs: | |
dailyCacheKey: ${{ steps.generateCacheKey.outputs.dailyCacheKey }} | |
defaultSparkVersion: ${{ steps.allShimVersionsStep.outputs.defaultSparkVersion }} | |
sparkTailVersions: ${{ steps.allShimVersionsStep.outputs.tailVersions }} | |
sparkJDKVersions: ${{ steps.allShimVersionsStep.outputs.jdkVersions }} | |
scala213Versions: ${{ steps.allShimVersionsStep.outputs.scala213Versions }} | |
steps: | |
- uses: actions/checkout@v4 # refs/pull/:prNumber/merge | |
- uses: actions/setup-java@v4 | |
with: | |
distribution: 'temurin' | |
java-version: 8 | |
- name: Generate daily cache key | |
id: generateCacheKey | |
run: | | |
set -x | |
cacheKey="${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}-${{ github.event.pull_request.base.ref }}-$(date +'%Y-%m-%d')" | |
echo "dailyCacheKey=$cacheKey" | tee $GITHUB_ENV $GITHUB_OUTPUT | |
- name: Cache local Maven repository | |
id: cache | |
uses: actions/cache@v4 | |
with: | |
path: ~/.m2 | |
key: ${{ env.dailyCacheKey }} | |
restore-keys: ${{ runner.os }}-maven- | |
- name: populate-daily-cache | |
if: steps.cache.outputs.cache-hit != 'true' | |
run: | | |
set -x | |
max_retry=3; delay=30; i=1 | |
while true; do | |
for pom in pom.xml scala2.13/pom.xml | |
do | |
mvn ${{ env.COMMON_MVN_FLAGS }} --file $pom help:evaluate -pl dist \ | |
-Dexpression=included_buildvers \ | |
-DforceStdout -PnoSnapshots -q | tr -d ',' | \ | |
xargs -n 1 bash -c \ | |
'mvn ${{ env.COMMON_MVN_FLAGS }} --file $1 -Dbuildver=$2 de.qaware.maven:go-offline-maven-plugin:resolve-dependencies' _ $pom | |
# compile base versions to cache scala compiler and compiler bridge | |
mvn ${{ env.COMMON_MVN_FLAGS }} --file $pom \ | |
process-test-resources -pl sql-plugin-api -am | |
done && break || { | |
if [[ $i -le $max_retry ]]; then | |
echo "mvn command failed. Retry $i/$max_retry."; ((i++)); sleep $delay; ((delay=delay*2)) | |
else | |
echo "mvn command failed. Exit 1"; exit 1 | |
fi | |
} | |
done | |
- name: all shim versions | |
id: allShimVersionsStep | |
run: | | |
set -x | |
. jenkins/version-def.sh | |
svArrBodyNoSnapshot=$(printf ",{\"spark-version\":\"%s\",\"isSnapshot\":false}" "${SPARK_SHIM_VERSIONS_NOSNAPSHOTS_TAIL[@]}") | |
svArrBodyNoSnapshot=${svArrBodyNoSnapshot:1} | |
# get private artifact version | |
privateVer=$(mvn help:evaluate -q -pl dist -Dexpression=spark-rapids-private.version -DforceStdout) | |
# do not add empty snapshot versions or when private version is released one (does not include snapshot shims) | |
if [[ ${#SPARK_SHIM_VERSIONS_SNAPSHOTS_ONLY[@]} -gt 0 && $privateVer == *"-SNAPSHOT" ]]; then | |
svArrBodySnapshot=$(printf ",{\"spark-version\":\"%s\",\"isSnapshot\":true}" "${SPARK_SHIM_VERSIONS_SNAPSHOTS_ONLY[@]}") | |
svArrBodySnapshot=${svArrBodySnapshot:1} | |
svJsonStr=$(printf {\"include\":[%s]} $svArrBodyNoSnapshot,$svArrBodySnapshot) | |
else | |
svJsonStr=$(printf {\"include\":[%s]} $svArrBodyNoSnapshot) | |
fi | |
echo "tailVersions=$svJsonStr" >> $GITHUB_OUTPUT | |
# default version | |
echo "defaultSparkVersion=${SPARK_BASE_SHIM_VERSION}" >> $GITHUB_OUTPUT | |
jdkHeadVersionArrBody=$(printf ",{\"spark-version\":\"%s\",\"java-version\":8}" "${SPARK_BASE_SHIM_VERSION}") | |
# jdk11 | |
jdk11VersionArrBody=$(printf ",{\"spark-version\":\"%s\",\"java-version\":11}" "${SPARK_SHIM_VERSIONS_JDK11[@]}") | |
# jdk17 | |
jdk17VersionArrBody=$(printf ",{\"spark-version\":\"%s\",\"java-version\":17}" "${SPARK_SHIM_VERSIONS_JDK17[@]}") | |
# jdk | |
jdkVersionArrBody=$jdkHeadVersionArrBody$jdk11VersionArrBody$jdk17VersionArrBody | |
jdkVersionArrBody=${jdkVersionArrBody:1} | |
jdkVersionJsonStr=$(printf {\"include\":[%s]} $jdkVersionArrBody) | |
echo "jdkVersions=$jdkVersionJsonStr" >> $GITHUB_OUTPUT | |
SCALA_BINARY_VER=2.13 | |
. jenkins/version-def.sh | |
svArrBodyNoSnapshot=$(printf ",{\"spark-version\":\"%s\",\"isSnapshot\":false}" "${SPARK_SHIM_VERSIONS_NOSNAPSHOTS[@]}") | |
svArrBodyNoSnapshot=${svArrBodyNoSnapshot:1} | |
# get private artifact version | |
privateVer=$(mvn help:evaluate -q -pl dist -Dexpression=spark-rapids-private.version -DforceStdout) | |
# do not add empty snapshot versions or when private version is released one (does not include snapshot shims) | |
if [[ ${#SPARK_SHIM_VERSIONS_SNAPSHOTS_ONLY[@]} -gt 0 && $privateVer == *"-SNAPSHOT" ]]; then | |
svArrBodySnapshot=$(printf ",{\"spark-version\":\"%s\",\"isSnapshot\":true}" "${SPARK_SHIM_VERSIONS_SNAPSHOTS_ONLY[@]}") | |
svArrBodySnapshot=${svArrBodySnapshot:1} | |
svJsonStr=$(printf {\"include\":[%s]} $svArrBodyNoSnapshot,$svArrBodySnapshot) | |
else | |
svJsonStr=$(printf {\"include\":[%s]} $svArrBodyNoSnapshot) | |
fi | |
echo "scala213Versions=$svJsonStr" >> $GITHUB_OUTPUT | |
package-tests: | |
needs: cache-dependencies | |
continue-on-error: ${{ matrix.isSnapshot }} | |
strategy: | |
matrix: ${{ fromJSON(needs.cache-dependencies.outputs.sparkTailVersions) }} | |
fail-fast: false | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 # refs/pull/:prNumber/merge | |
- name: Setup Java and Maven Env | |
uses: actions/setup-java@v4 | |
with: | |
distribution: adopt | |
java-version: 8 | |
- name: Cache local Maven repository | |
uses: actions/cache@v4 | |
with: | |
path: ~/.m2 | |
key: ${{ needs.cache-dependencies.outputs.dailyCacheKey }} | |
- name: check runtime before tests | |
run: | | |
env | grep JAVA | |
java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH" | |
- name: package tests check | |
run: | | |
# https://github.com/NVIDIA/spark-rapids/issues/8847 | |
# specify expected versions | |
export JAVA_HOME=${JAVA_HOME_8_X64} | |
export PATH=${JAVA_HOME}/bin:${PATH} | |
java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH" | |
# test command, will retry for 3 times if failed. | |
max_retry=3; delay=30; i=1 | |
while true; do | |
mvn package \ | |
-pl integration_tests,tests,tools -am -P 'individual,pre-merge' \ | |
-Dbuildver=${{ matrix.spark-version }} -Dmaven.scalastyle.skip=true \ | |
-Drat.skip=true ${{ env.COMMON_MVN_FLAGS }} && break || { | |
if [[ $i -le $max_retry ]]; then | |
echo "mvn command failed. Retry $i/$max_retry."; ((i++)); sleep $delay; ((delay=delay*2)) | |
else | |
echo "mvn command failed. Exit 1"; exit 1 | |
fi | |
} | |
done | |
package-tests-scala213: | |
needs: cache-dependencies | |
continue-on-error: ${{ matrix.isSnapshot }} | |
strategy: | |
matrix: ${{ fromJSON(needs.cache-dependencies.outputs.scala213Versions) }} | |
fail-fast: false | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 # refs/pull/:prNumber/merge | |
- name: Setup Java and Maven Env | |
uses: actions/setup-java@v4 | |
with: | |
distribution: adopt | |
java-version: 8 | |
- name: Cache local Maven repository | |
uses: actions/cache@v4 | |
with: | |
path: ~/.m2 | |
key: ${{ needs.cache-dependencies.outputs.dailyCacheKey }} | |
- name: check runtime before tests | |
run: | | |
env | grep JAVA | |
java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH" | |
- name: package tests check | |
run: | | |
# https://github.com/NVIDIA/spark-rapids/issues/8847 | |
# specify expected versions | |
export JAVA_HOME=${JAVA_HOME_8_X64} | |
export PATH=${JAVA_HOME}/bin:${PATH} | |
java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH" | |
# verify Scala 2.13 build files | |
./build/make-scala-version-build-files.sh 2.13 | |
# verify git status | |
if [ -n "$(echo -n $(git status -s | grep 'scala2.13'))" ]; then | |
git add -N scala2.13/* && git diff 'scala2.13/*' | |
echo "Generated Scala 2.13 build files don't match what's in repository" | |
exit 1 | |
fi | |
# change to Scala 2.13 Directory | |
cd scala2.13 | |
# test command, will retry for 3 times if failed. | |
max_retry=3; delay=30; i=1 | |
while true; do | |
mvn package \ | |
-pl integration_tests,tests,tools -am -P 'individual,pre-merge' \ | |
-Dbuildver=${{ matrix.spark-version }} -Dmaven.scalastyle.skip=true \ | |
-Drat.skip=true ${{ env.COMMON_MVN_FLAGS }} && break || { | |
if [[ $i -le $max_retry ]]; then | |
echo "mvn command failed. Retry $i/$max_retry."; ((i++)); sleep $delay; ((delay=delay*2)) | |
else | |
echo "mvn command failed. Exit 1"; exit 1 | |
fi | |
} | |
done | |
verify-all-modules: | |
needs: cache-dependencies | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: ${{ fromJSON(needs.cache-dependencies.outputs.sparkJDKVersions) }} | |
steps: | |
- uses: actions/checkout@v4 # refs/pull/:prNumber/merge | |
- name: Setup Java and Maven Env | |
uses: actions/setup-java@v4 | |
with: | |
distribution: adopt | |
java-version: ${{ matrix.java-version }} | |
- name: Cache local Maven repository | |
uses: actions/cache@v4 | |
with: | |
path: ~/.m2 | |
key: ${{ needs.cache-dependencies.outputs.dailyCacheKey }} | |
- name: check runtime before tests | |
run: | | |
env | grep JAVA | |
java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH" | |
- name: Build JDK | |
run: | | |
# https://github.com/NVIDIA/spark-rapids/issues/8847 | |
# specify expected versions | |
export JAVA_HOME=${JAVA_HOME_${{ matrix.java-version }}_X64} | |
export PATH=${JAVA_HOME}/bin:${PATH} | |
java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH" | |
# test command, will retry for 3 times if failed. | |
max_retry=3; delay=30; i=1 | |
while true; do | |
mvn verify \ | |
-P "individual,pre-merge" -Dbuildver=${{ matrix.spark-version }} \ | |
${{ env.COMMON_MVN_FLAGS }} && break || { | |
if [[ $i -le $max_retry ]]; then | |
echo "mvn command failed. Retry $i/$max_retry."; ((i++)); sleep $delay; ((delay=delay*2)) | |
else | |
echo "mvn command failed. Exit 1"; exit 1 | |
fi | |
} | |
done | |
install-modules: | |
needs: cache-dependencies | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
maven-version: [3.6.3, 3.8.8, 3.9.3] | |
steps: | |
- uses: actions/checkout@v4 # refs/pull/:prNumber/merge | |
- name: Setup Java | |
uses: actions/setup-java@v4 | |
with: | |
distribution: adopt | |
java-version: 11 | |
- name: Cache local Maven repository | |
uses: actions/cache@v4 | |
with: | |
path: ~/.m2 | |
key: ${{ needs.cache-dependencies.outputs.dailyCacheKey }} | |
- name: Setup Maven Wrapper | |
run: mvn wrapper:wrapper -Dmaven=${{ matrix.maven-version }} | |
- name: check runtime before tests | |
run: | | |
env | grep JAVA | |
java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH" | |
- name: Install with Maven ${{ matrix.maven-version }} | |
run: | | |
# https://github.com/NVIDIA/spark-rapids/issues/8847 | |
# specify expected versions | |
export JAVA_HOME=${JAVA_HOME_11_X64} | |
export PATH=${JAVA_HOME}/bin:${PATH} | |
java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH" | |
# test command, will retry for 3 times if failed. | |
max_retry=3; delay=30; i=1 | |
while true; do | |
./mvnw install \ | |
-P "individual,pre-merge" \ | |
-Dbuildver=${{ needs.cache-dependencies.outputs.defaultSparkVersion }} \ | |
${{ env.COMMON_MVN_FLAGS }} && break || { | |
if [[ $i -le $max_retry ]]; then | |
echo "mvn command failed. Retry $i/$max_retry."; ((i++)); sleep $delay; ((delay=delay*2)) | |
else | |
echo "mvn command failed. Exit 1"; exit 1 | |
fi | |
} | |
done |