Skip to content

perf: benchmark lance vs parquet read time, write time, and compressed size #8589

perf: benchmark lance vs parquet read time, write time, and compressed size

perf: benchmark lance vs parquet read time, write time, and compressed size #8589

Workflow file for this run

name: Python
on:
push:
branches:
- main
pull_request:
paths:
- python/**
- rust/**
- protos/**
- .github/workflows/python.yml
- .github/workflows/build_linux_wheel/**
- .github/workflows/build_mac_wheel/**
- .github/workflows/run_tests/**
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
# This env var is used by Swatinem/rust-cache@v2 for the cache
# key, so we set it to make sure it is always consistent.
CARGO_TERM_COLOR: always
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
RUST_BACKTRACE: "1"
CI: "true"
jobs:
lint:
timeout-minutes: 45
runs-on: "ubuntu-22.04"
defaults:
run:
shell: bash
working-directory: python
env:
# Need up-to-date compilers for kernels
CC: gcc-12
CXX: g++-12
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
lfs: true
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: 3.11 # Ray does not support 3.12 yet.
- uses: Swatinem/rust-cache@v2
with:
workspaces: python
- name: Install linting tools
run: |
pip install ruff==0.4.1 maturin tensorflow tqdm ray[data]
pip install torch --index-url https://download.pytorch.org/whl/cpu
- name: Lint Python
run: |
ruff format --check python
ruff check python
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y protobuf-compiler libssl-dev
- name: Lint Rust
run: |
cargo fmt --all -- --check
cargo clippy --all-features --tests -- -D warnings
- name: Build
run: |
python -m venv venv
source venv/bin/activate
pip install torch tqdm --index-url https://download.pytorch.org/whl/cpu
pip install maturin
maturin develop --extras tests,ray
- name: Run doctest
run: |
source venv/bin/activate
pytest --doctest-modules python/lance
linux:
timeout-minutes: 45
strategy:
matrix:
python-minor-version: [ "9", "12" ]
name: "Python Linux 3.${{ matrix.python-minor-version }} x86_64"
runs-on: "ubuntu-22.04"
defaults:
run:
shell: bash
working-directory: python
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
lfs: true
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: 3.${{ matrix.python-minor-version }}
- uses: Swatinem/rust-cache@v2
with:
workspaces: python
prefix-key: "manylinux2014" # use this to flush the cache
- uses: ./.github/workflows/build_linux_wheel
- uses: ./.github/workflows/run_tests
# Make sure wheels are not included in the Rust cache
- name: Delete wheels
run: sudo rm -rf target/wheels
linux-arm:
timeout-minutes: 45
#runs-on: warp-ubuntu-latest-arm64-4x
runs-on: buildjet-4vcpu-ubuntu-2204-arm
name: Python Linux 3.${{ matrix.python-minor-version }} ARM
strategy:
matrix:
python-minor-version: [ "12" ]
defaults:
run:
shell: bash
working-directory: python
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
lfs: true
- name: Set up Python
run: |
sudo apt update
sudo apt install -y -qq wget software-properties-common
sudo add-apt-repository ppa:deadsnakes/ppa
sudo apt install -y python3.${{ matrix.python-minor-version }}
sudo update-alternatives --install /usr/bin/python python /usr/bin/python3.${{ matrix.python-minor-version }} 1
sudo apt install -y python3-pip python3.${{ matrix.python-minor-version }}-distutils
- uses: Swatinem/rust-cache@v2
with:
workspaces: python
prefix-key: "manylinux2014" # use this to flush the cache
- uses: ./.github/workflows/build_linux_wheel
with:
arm-build: "true"
manylinux: "2_24"
- uses: ./.github/workflows/run_tests
# Make sure wheels are not included in the Rust cache
- name: Delete wheels
run: sudo rm -rf target/wheels
mac:
timeout-minutes: 45
name: Python macOS 3.12 ARM
runs-on: "macos-14"
defaults:
run:
shell: bash
working-directory: python
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
lfs: true
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- uses: Swatinem/rust-cache@v2
with:
workspaces: python
- uses: ./.github/workflows/build_mac_wheel
- uses: ./.github/workflows/run_tests
# Make sure wheels are not included in the Rust cache
- name: Delete wheels
run: rm -rf target/wheels
windows:
runs-on: windows-latest
timeout-minutes: 90
defaults:
run:
shell: powershell
working-directory: python
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
lfs: true
- uses: Swatinem/rust-cache@v2
with:
workspaces: python
- uses: ./.github/workflows/build_windows_wheel
- uses: ./.github/workflows/run_tests
aws-integtest:
timeout-minutes: 45
runs-on: "ubuntu-latest"
defaults:
run:
shell: bash
working-directory: python
services:
minio:
image: lazybit/minio
ports:
- 9000:9000
env:
MINIO_ACCESS_KEY: ACCESSKEY
MINIO_SECRET_KEY: SECRETKEY
options: --name=minio --health-cmd "curl http://localhost:9000/minio/health/live"
dynamodb-local:
image: amazon/dynamodb-local
ports:
- 8000:8000
env:
AWS_ACCESS_KEY_ID: ACCESSKEY
AWS_SECRET_ACCESS_KEY: SECRETKEY
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
lfs: true
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11" # TODO: upgrade when ray supports 3.12
- uses: Swatinem/rust-cache@v2
with:
workspaces: python
prefix-key: "manylinux2014" # use this to flush the cache
- uses: ./.github/workflows/build_linux_wheel
- name: Install dependencies
run: |
pip install ray[data]
pip install torch --index-url https://download.pytorch.org/whl/cpu
- uses: ./.github/workflows/run_integtests
# Make sure wheels are not included in the Rust cache
- name: Delete wheels
run: sudo rm -rf target/wheels