Skip to content

Commit

Permalink
feat(ci): Add integration test workflow with AWS Bedrock access (#128)
Browse files Browse the repository at this point in the history
  • Loading branch information
tonyhoo authored Nov 14, 2024
1 parent 7559e8d commit dd38a67
Show file tree
Hide file tree
Showing 10 changed files with 221 additions and 20 deletions.
23 changes: 23 additions & 0 deletions .github/actions/test-assistant/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: "Test Assistant"
inputs:
submodule-to-test:
description: "The submodule to run tests against such as integration"
required: true
aws-role-arn:
description: "AWS role ARN to assume"
required: true

runs:
using: "composite"
steps:
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ inputs.aws-role-arn }}
aws-region: us-west-2
role-duration-seconds: 1800

- name: Run Tests
shell: bash -l {0}
run: |
chmod +x ./.github/workflow_scripts/test_assistant.sh && ./.github/workflow_scripts/test_assistant.sh '${{ inputs.submodule-to-test }}'
7 changes: 7 additions & 0 deletions .github/workflow_scripts/env_setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@ function setup_build_env {
python -m pip install ruff
}

function setup_test_env {
python -m pip install --upgrade pip
python -m pip install pytest
python -m pip install pytest-xdist
}


function install_all {
python -m pip install -e .[dev]
}
12 changes: 12 additions & 0 deletions .github/workflow_scripts/test_assistant.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash

MODULE=$1

set -ex

source $(dirname "$0")/env_setup.sh

install_all
setup_test_env

python -m pytest -n 2 --junitxml=results.xml tests/unittests/$MODULE/
92 changes: 92 additions & 0 deletions .github/workflows/continuous_integration.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
name: Continuous Integration
on:
push:
pull_request_target:
types: [labeled, synchronize, opened]

permissions:
id-token: write
contents: read

jobs:
permission_check:
runs-on: ubuntu-latest
steps:
- name: Check for Actor Permission
id: check
continue-on-error: true
uses: prince-chrismc/check-actor-permissions-action@v3
with:
github_token: ${{ github.token }}
permission: write
- name: Debug Information
if: ${{ github.event_name == 'pull_request_target' }}
run: |
echo "Event Name: ${{ github.event_name }}"
echo "Labels: ${{ toJson(github.event.pull_request.labels) }}"
echo "Permitted: ${{ steps.check.outputs.permitted }}"
echo "Safe to Test Label Present: ${{ contains(github.event.pull_request.labels.*.name, 'safe to test') }}"
- name: Check PR Safe to Run
if: ${{ github.event_name == 'pull_request_target' && !contains(github.event.pull_request.labels.*.name, 'safe to test') && steps.check.outputs.permitted == 'false' }}
run: exit 1
- name: Remove Safe to Test Label # One commit is safe doesn't mean the next commit is safe.
if: ${{ github.event_name == 'pull_request_target' }}
uses: actions-ecosystem/[email protected]
with:
labels: 'safe to test'
integration_test:
needs: permission_check
runs-on: ${{ matrix.os }}
defaults:
run:
shell: bash
strategy:
fail-fast: false
matrix:
os: [macos-latest, windows-latest, ubuntu-latest]
python: ["3.9", "3.10", "3.11"]

steps:
- name: Checkout repository
if: ${{ github.event_name != 'pull_request_target' }}
uses: actions/checkout@v4

- name: Checkout repository(Pull Request Target)
if: ${{ github.event_name == 'pull_request_target' }}
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}

- name: Setup Miniconda
uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: assistant_py3
environment-file: .github/workflows_env/unittest_env.yml
auto-update-conda: true
python-version: ${{ matrix.python }}
miniconda-version: "latest"

- name: Setup OMP for macOS
if: matrix.os == 'macos-latest'
shell: bash -l {0}
run: |
wget https://raw.githubusercontent.com/Homebrew/homebrew-core/fb8323f2b170bd4ae97e1bac9bf3e2983af3fdb0/Formula/libomp.rb
if brew list | grep -q libomp; then
brew unlink libomp
fi
brew install libomp.rb
rm libomp.rb
- name: Check if changes beside docs
uses: dorny/paths-filter@v3
id: changes
with:
filters: |
other_than_docs:
- '!(docs/**)**'
- name: Integration Test
if: steps.changes.outputs.other_than_docs == 'true'
uses: ./.github/actions/test-assistant
with:
aws-role-arn: ${{ secrets.AWS_CI_ROLE_ARN }}
submodule-to-test: integration
3 changes: 3 additions & 0 deletions .github/workflows_env/unittest_env.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
name: assistant_py3
dependencies:
- pip
- setuptools>=61.0
- pip:
- nose
- flake8
- pytest
- pytest-xdist
41 changes: 25 additions & 16 deletions src/autogluon_assistant/assistant.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import logging
import signal
from typing import Any, Dict, Union
import sys
import threading
from contextlib import contextmanager
from typing import Any, Dict, Optional, Union

from hydra.utils import instantiate
from omegaconf import DictConfig, OmegaConf
Expand All @@ -19,25 +22,31 @@
TestIDColumnInference,
TrainIDColumnInference,
)
from .transformer import TransformTimeoutError

logger = logging.getLogger(__name__)


class timeout:
def __init__(self, seconds=1, error_message="Transform timed out"):
self.seconds = seconds
self.error_message = error_message

def handle_timeout(self, signum, frame):
raise TransformTimeoutError(self.error_message)

def __enter__(self):
signal.signal(signal.SIGALRM, self.handle_timeout)
signal.alarm(self.seconds)

def __exit__(self, type, value, traceback):
signal.alarm(0)
@contextmanager
def timeout(seconds: int, error_message: Optional[str] = None):
if sys.platform == "win32":
# Windows implementation using threading
timer = threading.Timer(seconds, lambda: (_ for _ in ()).throw(TimeoutError(error_message)))
timer.start()
try:
yield
finally:
timer.cancel()
else:
# Unix implementation using SIGALRM
def handle_timeout(signum, frame):
raise TimeoutError(error_message)

signal.signal(signal.SIGALRM, handle_timeout)
signal.alarm(seconds)
try:
yield
finally:
signal.alarm(0)


class TabularPredictionAssistant:
Expand Down
1 change: 0 additions & 1 deletion src/autogluon_assistant/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def fit_predict(self, task: TabularPredictionTask) -> Any:


class AutogluonTabularPredictor(Predictor):

def __init__(self, config: Any):
self.config = config
self.metadata: Dict[str, Any] = defaultdict(dict)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@


class CAAFETransformer(BaseFeatureTransformer):

identifier = "caafe"

def __init__(
Expand Down Expand Up @@ -78,7 +77,6 @@ def _fit_dataframes(
dataset_description: str = "",
**kwargs,
) -> None:

if problem_type not in ("binary", "multiclass"):
logger.info("Feature transformer CAAFE only supports classification problems.")
return
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,6 @@ def _calculate_and_evaluate(self, candidate_features, train_idx, val_idx):


class OpenFETransformer(BaseFeatureTransformer):

identifier = "openfe"

def __init__(self, n_jobs: int = 1, num_features_to_keep: int = 10, **kwargs) -> None:
Expand Down
59 changes: 59 additions & 0 deletions tests/unittests/integration/test_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import os

import pandas as pd
import pytest

from autogluon_assistant import run_assistant


@pytest.fixture
def titanic_data_path(tmp_path):
# Create data directory
data_dir = tmp_path / "titanic_data"
data_dir.mkdir()

# Download and save train/test data
train_url = "https://autogluon.s3.amazonaws.com/datasets/titanic/train.csv"
test_url = "https://autogluon.s3.amazonaws.com/datasets/titanic/test.csv"

pd.read_csv(train_url).to_csv(data_dir / "train.csv", index=False)
pd.read_csv(test_url).to_csv(data_dir / "test.csv", index=False)

# Create description file
description = """
Binary classification task to predict passenger survival on the Titanic.
Target Variable:
- Survived: Survival (0 = No; 1 = Yes)
Features include:
- Pclass: Passenger Class (1 = 1st; 2 = 2nd; 3 = 3rd)
- Sex: Gender
- Age: Age in years
- SibSp: Number of siblings/spouses aboard
- Parch: Number of parents/children aboard
- Fare: Passenger fare
- Embarked: Port of embarkation (C = Cherbourg; Q = Queenstown; S = Southampton)
Evaluation metric: Binary classification accuracy
"""

with open(data_dir / "descriptions.txt", "w") as f:
f.write(description)

return str(data_dir)


def test_titanic_prediction(titanic_data_path):
# Run assistant
output_file = run_assistant(task_path=titanic_data_path, presets="medium_quality")

# Load original test data and predictions
test_data = pd.read_csv(os.path.join(titanic_data_path, "test.csv"))
predictions = pd.read_csv(output_file)

# Basic validation checks
assert os.path.exists(output_file)
assert "Survived" in predictions.columns
assert len(predictions) == len(test_data)
assert predictions["Survived"].isin([0, 1]).all()

0 comments on commit dd38a67

Please sign in to comment.