Skip to content

Commit

Permalink
ci/transformers: add baseline checks for test cases
Browse files Browse the repository at this point in the history
Signed-off-by: Dmitry Rogozhkin <[email protected]>
  • Loading branch information
dvrogozh committed Jan 9, 2025
1 parent ce10e7e commit aa79416
Show file tree
Hide file tree
Showing 2 changed files with 285 additions and 8 deletions.
269 changes: 269 additions & 0 deletions .github/scripts/check-transformers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,269 @@
import argparse
import sys

from junitparser import JUnitXml, Error, Failure, Skipped

parser = argparse.ArgumentParser()
parser.add_argument('junitxml', nargs='+')
args = parser.parse_args()

failing_cases = {
'tests.benchmark.test_benchmark.BenchmarkTest': {
'test_inference_encoder_decoder_with_configs': None,
'test_inference_fp16': None,
'test_inference_no_configs': None,
'test_inference_no_configs_only_pretrain': None,
'test_inference_no_model_no_architectures': None,
'test_inference_torchscript': None,
'test_inference_with_configs': None,
'test_save_csv_files': None,
'test_trace_memory': None,
'test_train_encoder_decoder_with_configs': None,
'test_train_no_configs': None,
'test_train_no_configs_fp16': None,
'test_train_with_configs': None,
},
'tests.generation.test_logits_process.LogitsProcessorTest': {
'test_watermarking_processor': None,
},
'tests.generation.test_utils.GenerationIntegrationTests': {
'test_assisted_decoding_encoder_decoder_shared_encoder': None,
'test_assisted_decoding_num_assistant_tokens_heuristic_schedule': None,
'test_assisted_generation_early_exit': None,
'test_custom_logits_processor': None,
'test_default_max_length_warning': None,
'test_eos_token_id_int_and_list_beam_search': None,
'test_eos_token_id_int_and_list_top_k_top_sampling': None,
'test_generate_compile_fullgraph_tiny': None,
'test_generated_length_assisted_generation': None,
'test_max_new_tokens_encoder_decoder': None,
'test_min_length_if_input_embeds': None,
'test_model_kwarg_assisted_decoding_decoder_only': None,
'test_model_kwarg_assisted_decoding_encoder_decoder': None,
'test_model_kwarg_encoder_signature_filtering': None,
'test_prepare_inputs_for_generation_decoder_llm': None,
'test_stop_sequence_stopping_criteria': None,
},
'tests.models.detr.test_image_processing_detr.DetrImageProcessingTest': {
'test_fast_is_faster_than_slow': { 'flicky': True },
},
'tests.models.dpt.test_modeling_dpt_auto_backbone.DPTModelTest': {
'test_batching_equivalence': { 'flicky': True },
},
'tests.models.fuyu.test_modeling_fuyu.FuyuModelTest': {
'test_prompt_lookup_decoding_matches_greedy_search': { 'flicky': True },
},
'tests.models.git.test_modeling_git.GitModelTest': {
'test_generate_continue_from_past_key_values': { 'flicky': True },
'test_inputs_embeds_matches_input_ids': None,
},
'tests.models.hiera.test_modeling_hiera.HieraModelTest': {
'test_torch_fx': None,
'test_torch_fx_output_loss': None,
},
'tests.models.mamba.test_modeling_mamba.MambaIntegrationTests': {
'test_simple_generate_1_cpu': None,
},
'tests.models.pix2struct.test_modeling_pix2struct.Pix2StructModelTest': {
'test_new_cache_format_0': None,
'test_new_cache_format_1': None,
'test_new_cache_format_2': None,
},
'tests.models.speecht5.test_modeling_speecht5.SpeechT5ForTextToSpeechIntegrationTests': {
'test_batch_generation': None,
},
'tests.pipelines.test_pipelines_automatic_speech_recognition.AutomaticSpeechRecognitionPipelineTests': {
'test_small_model_pt_seq2seq': None,
},
'tests.pipelines.test_pipelines_common.CustomPipelineTest': {
'test_custom_code_with_string_tokenizer': None,
},
'tests.pipelines.test_pipelines_depth_estimation.DepthEstimationPipelineTests': {
'test_multiprocess': None,
},
'tests.pipelines.test_pipelines_image_to_text.ImageToTextPipelineTests': {
'test_small_model_pt': None,
},
'tests.pipelines.test_pipelines_summarization.SummarizationPipelineTests': {
'test_small_model_pt': None,
},
'tests.pipelines.test_pipelines_text_generation.TextGenerationPipelineTests': {
'test_small_model_pt': None,
'test_stop_sequence_stopping_criteria': None,
},
'tests.pipelines.test_pipelines_video_classification.VideoClassificationPipelineTests': {
'test_small_model_pt': None,
},
'tests.pipelines.test_pipelines_visual_question_answering.VisualQuestionAnsweringPipelineTests': {
'test_small_model_pt_blip2': None,
},
'tests.pipelines.test_pipelines_zero_shot_image_classification.ZeroShotImageClassificationPipelineTests': {
'test_small_model_pt': None,
'test_small_model_pt_fp16': None,
},
'tests.test_pipeline_mixin.AutomaticSpeechRecognitionPipelineTests': {
'test_small_model_pt_seq2seq': None,
},
'tests.test_pipeline_mixin.DepthEstimationPipelineTests': {
'test_multiprocess': None,
},
'tests.test_pipeline_mixin.ImageToTextPipelineTests': {
'test_small_model_pt': None,
},
'tests.test_pipeline_mixin.SummarizationPipelineTests': {
'test_small_model_pt': None,
},
'tests.test_pipeline_mixin.TextGenerationPipelineTests': {
'test_small_model_pt': None,
'test_stop_sequence_stopping_criteria': None,
},
'tests.test_pipeline_mixin.VideoClassificationPipelineTests': {
'test_small_model_pt': None,
},
'tests.test_pipeline_mixin.VisualQuestionAnsweringPipelineTests': {
'test_small_model_pt_blip2': None,
},
'tests.test_pipeline_mixin.ZeroShotImageClassificationPipelineTests': {
'test_small_model_pt': None,
'test_small_model_pt_fp16': None,
},
}

new_failures = []
known_failures = []
new_passes = []
flickies = []
skipped_flickies = []

def get_classname(case):
return ' '.join(case.classname.split())

def get_name(case):
return ' '.join(case.name.split())

def get_result(case):
result = "passed"
if case.result:
if isinstance(case.result[0], Error):
result = "error"
elif isinstance(case.result[0], Skipped):
result = "skipped"
elif isinstance(case.result[0], Failure):
result = "failed"
return result

def get_message(case):
if not case.result:
return ""
return f"{case.result[0].message.splitlines()[0]}"

def is_known_failure(classname, name):
if classname in failing_cases and name in failing_cases[classname]:
return True
return False

def is_flicky(classname, name):
if classname in failing_cases and name in failing_cases[classname]:
_case = failing_cases[classname][name]
if _case is None:
return False
return True if 'flicky' in _case and _case['flicky'] else False
return False

xmls = [ JUnitXml.fromfile(f) for f in args.junitxml ]
for idx, xml in enumerate(xmls):
for suite in xml:
for case in suite:
classname = get_classname(case)
name = get_name(case)
result = get_result(case)
flicky = is_flicky(classname, name)
if flicky:
if result == "skipped":
skipped_flickies.append(case)
else:
flickies.append(case)
else:
if result not in ["passed", "skipped"]:
if is_known_failure(classname, name):
known_failures.append(case)
else:
new_failures.append(case)
else:
if is_known_failure(classname, name):
new_passes.append(case)

def print_md_row(row, print_header):
if print_header:
header = " | ".join([f"{key}" for key, _ in row.items()])
print(f"| {header} |")
header = " | ".join(["-"*len(key) for key, _ in row.items()])
print(f"| {header} |")
row = " | ".join([f"{value}" for _, value in row.items()])
print(f"| {row} |")

def print_cases(cases):
print_header = True
for case in cases:
classname = get_classname(case)
name = get_name(case)
result = get_result(case)
message = get_message(case)
row = {
'Class name': classname,
'Test name': name,
'Status': result,
'Message': message,
}
print_md_row(row, print_header)
print_header = False

printed = False
def print_break(needed):
if needed:
print("")

if new_failures:
print_break(printed)
print("### New failures")
print_cases(new_failures)
printed = True

if known_failures:
print_break(printed)
print("### Known failures")
print_cases(known_failures)
printed = True

if new_passes:
print_break(printed)
print("### New passes")
print_cases(new_passes)
print("")
print("> [!NOTE]")
print("> Adjust baseline: some tests which previously failed now pass.")
printed = True

if skipped_flickies:
print_break(printed)
print("### Skipped flickies")
print_cases(skipped_flickies)
print("")
print("> [!NOTE]")
print("> Adjust baseline: some flicky tests are now skipped.")
printed = True

if flickies:
print_break(printed)
print("### Flickies")
print_cases(flickies)
printed = True

if new_failures:
sys.exit(1)
elif new_passes:
sys.exit(2)
elif skipped_flickies:
sys.exit(3)

sys.exit(0)
24 changes: 16 additions & 8 deletions .github/workflows/_linux_transformers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ jobs:
conda remove --all -y -n huggingface_transformers_test || rm -rf $(dirname ${CONDA_EXE})/../envs/huggingface_transformers_test
conda create -y -n huggingface_transformers_test python=${{ env.python }}
source activate huggingface_transformers_test
pip install junitparser
- name: Prepare Stock XPU Pytorch
run: |
pwd
Expand Down Expand Up @@ -135,22 +136,22 @@ jobs:
run: |
source activate huggingface_transformers_test
cd transformers
python3 -m pytest -rsf --make-reports=$TEST_CASE -k backbone tests || \
python3 -m pytest -rsf --make-reports=$TEST_CASE --junit-xml=reports/$TEST_CASE.xml -k backbone tests || \
(echo "FAILED_CASES=$FAILED_CASES,$TEST_CASE" >> $GITHUB_ENV)
- name: Run tests/*.py
env:
TEST_CASE: 'tests_py'
run: |
source activate huggingface_transformers_test
cd transformers
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/*.py || true
python3 -m pytest -rsf --make-reports=$TEST_CASE --junit-xml=reports/$TEST_CASE.xml tests/*.py || true
- name: Run tests/benchmark
env:
TEST_CASE: 'tests_benchmark'
run: |
source activate huggingface_transformers_test
cd transformers
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/benchmark || true
python3 -m pytest -rsf --make-reports=$TEST_CASE --junit-xml=reports/$TEST_CASE.xml tests/benchmark || true
- name: Run tests/generation
env:
TEST_CASE: 'tests_generation'
Expand All @@ -160,7 +161,7 @@ jobs:
# Excluding tests due to:
# * torch.distributed.* not yet supported by XPU
pattern="not TestFSDPGeneration"
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/generation -k "$pattern" || true
python3 -m pytest -rsf --make-reports=$TEST_CASE --junit-xml=reports/$TEST_CASE.xml tests/generation -k "$pattern" || true
- name: Run tests/models
env:
TEST_CASE: 'tests_models'
Expand All @@ -175,7 +176,7 @@ jobs:
not test_model_parallel_equal_results and \
not test_resize_embeddings_untied and \
not test_resize_tokens_embeddings"
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/models -k "$pattern" || true
python3 -m pytest -rsf --make-reports=$TEST_CASE --junit-xml=reports/$TEST_CASE.xml tests/models -k "$pattern" || true
- name: Run tests/pipelines
env:
TEST_CASE: 'tests_pipelines'
Expand All @@ -184,7 +185,7 @@ jobs:
cd transformers
# Some tests are known to fail w/o clear pattern
# TODO: drop ||true after triage and fixes
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/pipelines || true
python3 -m pytest -rsf --make-reports=$TEST_CASE --junit-xml=reports/$TEST_CASE.xml tests/pipelines || true
- name: Run tests/trainer
env:
TEST_CASE: 'tests_trainer'
Expand All @@ -199,7 +200,7 @@ jobs:
not TestTrainerDistributed and \
not TestTrainerDistributedXPU and \
not TestFSDPTrainer"
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/trainer -k "$pattern" || \
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/trainer --junit-xml=reports/$TEST_CASE.xml -k "$pattern" || \
(echo "FAILED_CASES=$FAILED_CASES,$TEST_CASE" >> $GITHUB_ENV)
- name: Run tests/utils
env:
Expand All @@ -210,13 +211,15 @@ jobs:
# Excluding tests due to:
# * Network proxy connection issue, reason unknown
pattern="not test_load_img_url_timeout"
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/utils -k "$pattern" || \
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/utils --junit-xml=reports/$TEST_CASE.xml -k "$pattern" || \
(echo "FAILED_CASES=$FAILED_CASES,$TEST_CASE" >> $GITHUB_ENV)
- name: Check for errors in tests
run: |
FAILED_CASES=$(echo $FAILED_CASES | sed 's/^,//')
echo "Failed cases: [$(echo $FAILED_CASES | sed 's/,/, /g')]"
test -z "$FAILED_CASES"
source activate huggingface_transformers_test
python3 torch-xpu-ops/.github/scripts/check-transformers.py transformers/reports/*.xml
- name: Clean HF home directory and cache
if: ${{ always() }}
run: |
Expand Down Expand Up @@ -251,6 +254,11 @@ jobs:
echo "| $test_group | $errors | $failed | $deselected | $passed | $skipped |"
done
} >> $GITHUB_STEP_SUMMARY
- name: Print baseline difference
if: ${{ ! cancelled() }}
run: |
source activate huggingface_transformers_test
python3 torch-xpu-ops/.github/scripts/check-transformers.py transformers/reports/*.xml >> $GITHUB_STEP_SUMMARY || true
- name: Print failure lines
if: ${{ ! cancelled() }}
run: |
Expand Down

0 comments on commit aa79416

Please sign in to comment.