From 8a4a8039a2fe27f85d25a2b3ffa2e34d565f81df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Tue, 1 Aug 2023 09:49:35 +0200 Subject: [PATCH] Refactor with black (#1009) * Refactor with black Signed-off-by: Xavier Dupre * remove unnecessary skip condition Signed-off-by: Xavier Dupre * freeze lightgbm version Signed-off-by: Xavier Dupre * add ruff to github action Signed-off-by: Xavier Dupre * update badge on README.md Signed-off-by: Xavier Dupre --------- Signed-off-by: Xavier Dupre --- .azure-pipelines/linux-conda-CI.yml | 11 +- .azure-pipelines/win32-conda-CI.yml | 10 +- .github/workflows/black-ruff.yml | 16 + README.md | 10 +- .../bench_plot_onnxruntime_decision_tree.py | 115 +- benchmarks/bench_plot_onnxruntime_hgb.py | 151 +- benchmarks/bench_plot_onnxruntime_linreg.py | 114 +- benchmarks/bench_plot_onnxruntime_logreg.py | 123 +- .../bench_plot_onnxruntime_random_forest.py | 129 +- ...ench_plot_onnxruntime_random_forest_reg.py | 183 +- benchmarks/bench_plot_onnxruntime_svm_reg.py | 93 +- benchmarks/post_graph.py | 66 +- docs/conf.py | 157 +- docs/examples/plot_backend.py | 20 +- docs/examples/plot_benchmark_cdist.py | 57 +- docs/examples/plot_benchmark_pipeline.py | 75 +- docs/examples/plot_black_op.py | 90 +- docs/examples/plot_cast_transformer.py | 82 +- docs/examples/plot_complex_pipeline.py | 106 +- .../plot_convert_decision_function.py | 26 +- docs/examples/plot_convert_model.py | 17 +- docs/examples/plot_convert_syntax.py | 57 +- docs/examples/plot_convert_zipmap.py | 43 +- docs/examples/plot_custom_model.py | 145 +- docs/examples/plot_custom_parser.py | 122 +- .../plot_custom_parser_alternative.py | 109 +- docs/examples/plot_errors_onnxruntime.py | 41 +- docs/examples/plot_gpr.py | 58 +- docs/examples/plot_intermediate_outputs.py | 99 +- docs/examples/plot_investigate_pipeline.py | 39 +- docs/examples/plot_logging.py | 10 +- docs/examples/plot_nmf.py | 42 +- docs/examples/plot_onnx_operators.py | 74 +- docs/examples/plot_pipeline.py | 23 +- docs/examples/plot_pipeline_lightgbm.py | 43 +- docs/examples/plot_pipeline_xgboost.py | 50 +- docs/examples/plot_tfidfvectorizer.py | 150 +- docs/exts/github_link.py | 29 +- docs/exts/sphinx_skl2onnx_extension.py | 104 +- docs/requirements.txt | 2 +- docs/tests/test_documentation_examples.py | 39 +- docs/tests/test_documentation_tutorial.py | 46 +- docs/tests/test_utils_benchmark.py | 11 +- docs/tests/test_utils_classes.py | 1 - docs/tutorial/plot_abegin_convert_pipeline.py | 22 +- docs/tutorial/plot_bbegin_measure_time.py | 71 +- docs/tutorial/plot_catwoe_transformer.py | 58 +- docs/tutorial/plot_cbegin_opset.py | 32 +- docs/tutorial/plot_dbegin_options.py | 68 +- docs/tutorial/plot_dbegin_options_list.py | 62 +- docs/tutorial/plot_dbegin_options_zipmap.py | 49 +- docs/tutorial/plot_ebegin_float_double.py | 73 +- docs/tutorial/plot_fbegin_investigate.py | 33 +- docs/tutorial/plot_gbegin_cst.py | 23 +- docs/tutorial/plot_gbegin_dataframe.py | 83 +- docs/tutorial/plot_gconverting.py | 27 +- docs/tutorial/plot_gexternal_catboost.py | 72 +- docs/tutorial/plot_gexternal_lightgbm.py | 31 +- docs/tutorial/plot_gexternal_lightgbm_reg.py | 83 +- docs/tutorial/plot_gexternal_xgboost.py | 67 +- docs/tutorial/plot_icustom_converter.py | 26 +- docs/tutorial/plot_jcustom_syntax.py | 45 +- .../plot_kcustom_converter_wrapper.py | 15 +- docs/tutorial/plot_lcustom_options.py | 59 +- docs/tutorial/plot_mcustom_parser.py | 45 +- docs/tutorial/plot_ngrams.py | 25 +- docs/tutorial/plot_transformer_discrepancy.py | 43 +- docs/tutorial/plot_usparse_xgboost.py | 201 +- docs/tutorial/plot_wext_pyod_forest.py | 93 +- docs/tutorial/plot_woe_transformer.py | 51 +- pyproject.toml | 17 + requirements-dev.txt | 7 +- setup.py | 56 +- skl2onnx/__init__.py | 7 +- skl2onnx/__main__.py | 13 +- skl2onnx/_parse.py | 458 ++-- skl2onnx/_supported_operators.py | 521 +++-- skl2onnx/algebra/automation.py | 116 +- skl2onnx/algebra/complex_functions.py | 276 ++- skl2onnx/algebra/custom_ops.py | 39 +- skl2onnx/algebra/graph_state.py | 417 ++-- skl2onnx/algebra/onnx_operator.py | 638 +++--- skl2onnx/algebra/onnx_operator_mixin.py | 105 +- skl2onnx/algebra/onnx_ops.py | 548 +++-- .../algebra/onnx_subgraph_operator_mixin.py | 1 + skl2onnx/algebra/sklearn_ops.py | 43 +- skl2onnx/algebra/type_helper.py | 46 +- skl2onnx/common/_apply_operation.py | 195 +- skl2onnx/common/_container.py | 404 ++-- skl2onnx/common/_onnx_optimisation_common.py | 78 +- skl2onnx/common/_registration.py | 53 +- skl2onnx/common/_topology.py | 759 ++++--- skl2onnx/common/data_types.py | 139 +- skl2onnx/common/onnx_optimisation_identity.py | 70 +- skl2onnx/common/shape_calculator.py | 109 +- skl2onnx/common/tree_ensemble.py | 243 +- skl2onnx/common/utils.py | 59 +- skl2onnx/common/utils_checking.py | 10 +- skl2onnx/common/utils_classifier.py | 87 +- skl2onnx/common/utils_sklearn.py | 68 +- skl2onnx/convert.py | 120 +- skl2onnx/helpers/investigate.py | 163 +- skl2onnx/helpers/onnx_helper.py | 156 +- skl2onnx/helpers/onnx_rare_helper.py | 24 +- skl2onnx/operator_converters/_gp_kernels.py | 455 ++-- skl2onnx/operator_converters/ada_boost.py | 996 ++++---- .../array_feature_extractor.py | 43 +- skl2onnx/operator_converters/bagging.py | 355 +-- skl2onnx/operator_converters/binariser.py | 54 +- .../calibrated_classifier_cv.py | 525 +++-- skl2onnx/operator_converters/cast_op.py | 25 +- skl2onnx/operator_converters/class_labels.py | 86 +- skl2onnx/operator_converters/common.py | 49 +- skl2onnx/operator_converters/concat_op.py | 10 +- .../cross_decomposition.py | 26 +- skl2onnx/operator_converters/decision_tree.py | 644 ++++-- skl2onnx/operator_converters/decomposition.py | 106 +- .../operator_converters/dict_vectoriser.py | 44 +- .../operator_converters/feature_hasher.py | 152 +- .../operator_converters/feature_selection.py | 56 +- skl2onnx/operator_converters/flatten_op.py | 19 +- .../function_transformer.py | 34 +- .../operator_converters/gamma_regressor.py | 60 +- .../operator_converters/gaussian_mixture.py | 251 +- .../operator_converters/gaussian_process.py | 338 +-- .../operator_converters/gradient_boosting.py | 189 +- .../operator_converters/grid_search_cv.py | 14 +- skl2onnx/operator_converters/id_op.py | 16 +- skl2onnx/operator_converters/imputer_op.py | 114 +- .../operator_converters/isolation_forest.py | 247 +- .../operator_converters/k_bins_discretiser.py | 191 +- skl2onnx/operator_converters/k_means.py | 49 +- skl2onnx/operator_converters/kernel_pca.py | 160 +- .../operator_converters/label_binariser.py | 133 +- skl2onnx/operator_converters/label_encoder.py | 38 +- .../operator_converters/linear_classifier.py | 360 +-- .../operator_converters/linear_regressor.py | 245 +- .../local_outlier_factor.py | 148 +- .../multilayer_perceptron.py | 249 +- skl2onnx/operator_converters/multioutput.py | 73 +- skl2onnx/operator_converters/multiply_op.py | 23 +- skl2onnx/operator_converters/naive_bayes.py | 645 ++++-- .../operator_converters/nearest_neighbours.py | 740 +++--- skl2onnx/operator_converters/normaliser.py | 28 +- .../operator_converters/one_hot_encoder.py | 164 +- .../one_vs_one_classifier.py | 143 +- .../one_vs_rest_classifier.py | 285 ++- .../operator_converters/ordinal_encoder.py | 113 +- .../ovr_decision_function.py | 112 +- skl2onnx/operator_converters/pipelines.py | 47 +- .../polynomial_features.py | 132 +- .../operator_converters/power_transformer.py | 130 +- .../quadratic_discriminant_analysis.py | 191 +- skl2onnx/operator_converters/random_forest.py | 705 +++--- .../operator_converters/random_projection.py | 13 +- .../random_trees_embedding.py | 24 +- .../operator_converters/ransac_regressor.py | 16 +- skl2onnx/operator_converters/replace_op.py | 43 +- skl2onnx/operator_converters/scaler_op.py | 207 +- skl2onnx/operator_converters/sequence.py | 30 +- .../operator_converters/sgd_classifier.py | 469 ++-- .../operator_converters/sgd_oneclass_svm.py | 57 +- skl2onnx/operator_converters/stacking.py | 229 +- .../support_vector_machines.py | 294 ++- .../operator_converters/text_vectoriser.py | 373 +-- .../operator_converters/tfidf_transformer.py | 66 +- .../operator_converters/tfidf_vectoriser.py | 36 +- .../operator_converters/voting_classifier.py | 172 +- .../operator_converters/voting_regressor.py | 45 +- skl2onnx/operator_converters/zip_map.py | 102 +- skl2onnx/proto/__init__.py | 12 +- .../array_feature_extractor.py | 5 +- skl2onnx/shape_calculators/cast_op.py | 15 +- skl2onnx/shape_calculators/class_labels.py | 3 +- skl2onnx/shape_calculators/concat.py | 68 +- .../shape_calculators/cross_decomposition.py | 17 +- skl2onnx/shape_calculators/dict_vectorizer.py | 12 +- skl2onnx/shape_calculators/ensemble_shapes.py | 99 +- skl2onnx/shape_calculators/feature_hasher.py | 17 +- skl2onnx/shape_calculators/flatten.py | 5 +- .../shape_calculators/function_transformer.py | 15 +- .../shape_calculators/gaussian_process.py | 27 +- skl2onnx/shape_calculators/grid_search_cv.py | 16 +- skl2onnx/shape_calculators/identity.py | 5 +- skl2onnx/shape_calculators/imputer.py | 37 +- .../shape_calculators/isolation_forest.py | 3 +- .../shape_calculators/k_bins_discretiser.py | 15 +- skl2onnx/shape_calculators/k_means.py | 15 +- skl2onnx/shape_calculators/kernel_pca.py | 37 +- skl2onnx/shape_calculators/label_binariser.py | 13 +- skl2onnx/shape_calculators/label_encoder.py | 11 +- .../shape_calculators/linear_classifier.py | 63 +- .../shape_calculators/linear_regressor.py | 89 +- .../shape_calculators/local_outlier_factor.py | 3 +- skl2onnx/shape_calculators/mixture.py | 30 +- skl2onnx/shape_calculators/multioutput.py | 20 +- .../shape_calculators/nearest_neighbours.py | 84 +- skl2onnx/shape_calculators/one_hot_encoder.py | 10 +- .../one_vs_one_classifier.py | 5 +- .../one_vs_rest_classifier.py | 10 +- skl2onnx/shape_calculators/ordinal_encoder.py | 11 +- .../ovr_decision_function.py | 8 +- skl2onnx/shape_calculators/pipelines.py | 9 +- .../shape_calculators/polynomial_features.py | 13 +- .../shape_calculators/power_transformer.py | 7 +- .../quadratic_discriminant_analysis.py | 5 +- .../shape_calculators/random_projection.py | 5 +- .../random_trees_embedding.py | 10 +- skl2onnx/shape_calculators/replace_op.py | 6 +- skl2onnx/shape_calculators/scaler.py | 37 +- skl2onnx/shape_calculators/sequence.py | 5 +- .../shape_calculators/sgd_oneclass_svm.py | 13 +- .../support_vector_machines.py | 38 +- skl2onnx/shape_calculators/svd.py | 36 +- skl2onnx/shape_calculators/text_vectorizer.py | 17 +- .../shape_calculators/tfidf_transformer.py | 8 +- .../shape_calculators/voting_classifier.py | 9 +- .../shape_calculators/voting_regressor.py | 6 +- skl2onnx/shape_calculators/zip_map.py | 25 +- skl2onnx/sklapi/cast_regressor.py | 21 +- skl2onnx/sklapi/cast_transformer.py | 21 +- skl2onnx/sklapi/replace_transformer.py | 11 +- skl2onnx/sklapi/sklearn_text.py | 37 +- skl2onnx/sklapi/sklearn_text_onnx.py | 35 +- skl2onnx/sklapi/woe_transformer.py | 46 +- skl2onnx/sklapi/woe_transformer_onnx.py | 335 +-- skl2onnx/tutorial/benchmark.py | 12 +- skl2onnx/tutorial/imagenet_classes.py | 2021 ++++++++--------- tests/benchmark.py | 3 +- tests/test_algebra_cascade.py | 169 +- tests/test_algebra_complex.py | 46 +- tests/test_algebra_converters.py | 38 +- tests/test_algebra_custom_model.py | 58 +- ...test_algebra_custom_model_sub_estimator.py | 221 +- tests/test_algebra_deprecation.py | 30 +- tests/test_algebra_double.py | 28 +- tests/test_algebra_onnx_doc.py | 35 +- ...test_algebra_onnx_operator_mixin_syntax.py | 201 +- tests/test_algebra_onnx_operators.py | 414 ++-- tests/test_algebra_onnx_operators_if.py | 407 ++-- tests/test_algebra_onnx_operators_opset.py | 23 +- tests/test_algebra_onnx_operators_scan.py | 517 +++-- tests/test_algebra_onnx_operators_sparse.py | 51 +- ...st_algebra_onnx_operators_sub_estimator.py | 147 +- tests/test_algebra_onnx_operators_wrapped.py | 79 +- tests/test_algebra_symbolic.py | 147 +- tests/test_algebra_test_helper.py | 60 +- tests/test_algebra_to_onnx.py | 198 +- tests/test_convert.py | 110 +- tests/test_convert_options.py | 353 +-- tests/test_custom_transformer_ordwoe.py | 51 +- tests/test_custom_transformer_tsne.py | 58 +- tests/test_investigate.py | 204 +- tests/test_onnx_helper.py | 118 +- tests/test_onnx_rare_helper.py | 16 +- tests/test_onnxruntime.py | 108 +- tests/test_op10.py | 50 +- tests/test_opset13.py | 83 +- tests/test_optimisation.py | 104 +- tests/test_options.py | 95 +- .../test_other_converter_library_pipelines.py | 68 +- tests/test_parsing_options.py | 109 +- tests/test_raw_name.py | 8 +- tests/test_scikit_pandas.py | 26 +- tests/test_shapes.py | 39 +- tests/test_sklearn_adaboost_converter.py | 299 ++- tests/test_sklearn_array_feature_extractor.py | 84 +- tests/test_sklearn_bagging_converter.py | 285 ++- tests/test_sklearn_binarizer_converter.py | 16 +- ...earn_calibrated_classifier_cv_converter.py | 348 +-- tests/test_sklearn_cast_regressor.py | 137 +- tests/test_sklearn_cast_transformer.py | 174 +- tests/test_sklearn_concat.py | 119 +- tests/test_sklearn_constant_predictor.py | 33 +- ...test_sklearn_count_vectorizer_converter.py | 143 +- ..._sklearn_count_vectorizer_converter_bug.py | 79 +- tests/test_sklearn_custom_nmf.py | 37 +- .../test_sklearn_decision_tree_converters.py | 243 +- .../test_sklearn_dict_vectorizer_converter.py | 161 +- tests/test_sklearn_documentation.py | 91 +- tests/test_sklearn_double_tensor_type_cls.py | 351 +-- tests/test_sklearn_double_tensor_type_reg.py | 136 +- tests/test_sklearn_double_tensor_type_tr.py | 427 ++-- tests/test_sklearn_feature_hasher.py | 225 +- ...st_sklearn_feature_selection_converters.py | 277 ++- tests/test_sklearn_feature_union.py | 145 +- ..._sklearn_function_transformer_converter.py | 106 +- tests/test_sklearn_gamma_regressor.py | 52 +- ...test_sklearn_gaussian_mixture_converter.py | 238 +- ...est_sklearn_gaussian_process_classifier.py | 85 +- ...test_sklearn_gaussian_process_regressor.py | 1264 +++++++---- .../test_sklearn_glm_classifier_converter.py | 606 +++-- tests/test_sklearn_glm_regressor_converter.py | 770 ++++--- ...st_sklearn_gradient_boosting_converters.py | 300 ++- .../test_sklearn_grid_search_cv_converter.py | 240 +- tests/test_sklearn_imputer_converter.py | 108 +- tests/test_sklearn_isolation_forest.py | 78 +- ...st_sklearn_k_bins_discretiser_converter.py | 320 ++- tests/test_sklearn_k_means_converter.py | 89 +- tests/test_sklearn_kernel_pca_converter.py | 100 +- .../test_sklearn_label_binariser_converter.py | 39 +- tests/test_sklearn_label_encoder_converter.py | 42 +- tests/test_sklearn_local_outlier_factor.py | 193 +- tests/test_sklearn_mlp_converter.py | 201 +- tests/test_sklearn_multi_output.py | 102 +- tests/test_sklearn_naive_bayes_converter.py | 291 +-- ...est_sklearn_nearest_neighbour_converter.py | 927 ++++---- tests/test_sklearn_normalizer_converter.py | 83 +- .../test_sklearn_one_hot_encoder_converter.py | 275 ++- ...sklearn_one_vs_one_classifier_converter.py | 84 +- ...klearn_one_vs_rest_classifier_converter.py | 373 +-- tests/test_sklearn_ordinal_encoder.py | 71 +- ...passive_aggressive_classifier_converter.py | 39 +- tests/test_sklearn_pca_converter.py | 97 +- tests/test_sklearn_perceptron_converter.py | 39 +- tests/test_sklearn_pipeline.py | 1303 +++++++---- tests/test_sklearn_pipeline_concat_tfidf.py | 359 ++- .../test_sklearn_pipeline_within_pipeline.py | 196 +- tests/test_sklearn_pls_regression.py | 82 +- ...t_sklearn_polynomial_features_converter.py | 122 +- tests/test_sklearn_power_transformer.py | 36 +- ...adratic_discriminant_analysis_converter.py | 202 +- .../test_sklearn_random_forest_converters.py | 723 +++--- tests/test_sklearn_random_projection.py | 17 +- tests/test_sklearn_random_trees_embedding.py | 39 +- tests/test_sklearn_replace_transformer.py | 34 +- tests/test_sklearn_scaler_converter.py | 318 ++- .../test_sklearn_sgd_classifier_converter.py | 416 ++-- ...test_sklearn_sgd_oneclass_svm_converter.py | 24 +- tests/test_sklearn_stacking.py | 512 +++-- tests/test_sklearn_svm_converters.py | 478 ++-- tests/test_sklearn_text.py | 199 +- ...est_sklearn_tfidf_transformer_converter.py | 30 +- ...earn_tfidf_transformer_converter_sparse.py | 26 +- ...test_sklearn_tfidf_vectorizer_converter.py | 614 ++--- ...sklearn_tfidf_vectorizer_converter_char.py | 229 +- ...earn_tfidf_vectorizer_converter_dataset.py | 32 +- ...arn_tfidf_vectorizer_converter_pipeline.py | 168 +- ...klearn_tfidf_vectorizer_converter_regex.py | 534 +++-- tests/test_sklearn_truncated_svd.py | 42 +- ...est_sklearn_voting_classifier_converter.py | 68 +- ...test_sklearn_voting_regressor_converter.py | 53 +- tests/test_sklearn_woe_transformer.py | 315 +-- tests/test_supported_converters.py | 70 +- tests/test_topology_prune.py | 92 +- tests/test_utils/__init__.py | 9 +- .../reference_implementation_afe.py | 4 +- .../reference_implementation_helper.py | 42 +- .../test_utils/reference_implementation_ml.py | 172 +- .../reference_implementation_svm.py | 210 +- .../reference_implementation_text.py | 181 +- .../reference_implementation_tree.py | 212 +- .../reference_implementation_zipmap.py | 24 +- tests/test_utils/tests_helper.py | 125 +- tests/test_utils/utils_backend.py | 42 +- tests/test_utils/utils_backend_onnx.py | 519 +++-- tests/test_utils/utils_backend_onnxruntime.py | 206 +- tests/test_utils_sklearn.py | 226 +- tests/test_variable_names.py | 129 +- tests_onnxmltools/test_columns.py | 64 +- tests_onnxmltools/test_lightgbm.py | 201 +- tests_onnxmltools/test_xgboost_converters.py | 240 +- 362 files changed, 30215 insertions(+), 21464 deletions(-) create mode 100644 .github/workflows/black-ruff.yml create mode 100644 pyproject.toml diff --git a/.azure-pipelines/linux-conda-CI.yml b/.azure-pipelines/linux-conda-CI.yml index 0c5f44de7..40d9ff887 100644 --- a/.azure-pipelines/linux-conda-CI.yml +++ b/.azure-pipelines/linux-conda-CI.yml @@ -243,10 +243,6 @@ jobs: fi displayName: 'install onnx' - - script: | - pip install flake8 - displayName: 'install flake8' - - script: | pip install $(onnxrt.version) displayName: 'install onnxruntime' @@ -334,11 +330,10 @@ jobs: displayName: 'pytest-onnxmltools' condition: eq(variables['run.example'], '1') - # Check flake8 after the tests to get more feedback. - # It is checked before the tests on the windows build. - script: | - flake8 skl2onnx tests tests_onnxmltools - displayName: 'flake8' + python -m pip install ruff + ruff skl2onnx tests tests_onnxmltools + displayName: 'ruff' - script: | if [ '$(onnx.target_opset)' != '' ] diff --git a/.azure-pipelines/win32-conda-CI.yml b/.azure-pipelines/win32-conda-CI.yml index 9fb907347..21b7e8cd6 100644 --- a/.azure-pipelines/win32-conda-CI.yml +++ b/.azure-pipelines/win32-conda-CI.yml @@ -157,13 +157,9 @@ jobs: - script: | call activate skl2onnxEnvironment - pip install flake8 - displayName: 'install flake8' - - - script: | - call activate skl2onnxEnvironment - flake8 skl2onnx tests tests_onnxmltools - displayName: 'flake8' + python -m pip install ruff + ruff skl2onnx tests tests_onnxmltools + displayName: 'ruff' - script: | call activate skl2onnxEnvironment diff --git a/.github/workflows/black-ruff.yml b/.github/workflows/black-ruff.yml new file mode 100644 index 000000000..c48dc25fd --- /dev/null +++ b/.github/workflows/black-ruff.yml @@ -0,0 +1,16 @@ +name: Black Format Checker +on: [push, pull_request] +jobs: + black-format-check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: psf/black@stable + with: + options: "--diff --check" + src: "." + ruff-format-check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: chartboost/ruff-action@v1 diff --git a/README.md b/README.md index 437f56a92..6f79fd921 100644 --- a/README.md +++ b/README.md @@ -2,14 +2,18 @@

-| Linux | Windows | -|-------|---------| -| [![Build Status](https://dev.azure.com/onnxmltools/sklearn-onnx/_apis/build/status/sklearn-onnx-linux-conda-ci?branchName=master)](https://dev.azure.com/onnxmltools/sklearn-onnx/_build/latest?definitionId=5?branchName=master) | [![Build Status](https://dev.azure.com/onnxmltools/sklearn-onnx/_apis/build/status/sklearn-onnx-win32-conda-ci?branchName=master)](https://dev.azure.com/onnxmltools/sklearn-onnx/_build/latest?definitionId=5?branchName=master)| +[![Build Status Linux](https://dev.azure.com/onnxmltools/sklearn-onnx/_apis/build/status%2Fonnx.sklearn-onnx.linux.CI?branchName=refs%2Fpull%2F1009%2Fmerge)](https://dev.azure.com/onnxmltools/sklearn-onnx/_build/latest?definitionId=21&branchName=refs%2Fpull%2F1009%2Fmerge) + +[![Build Status Windows](https://dev.azure.com/onnxmltools/sklearn-onnx/_apis/build/status%2Fonnx.sklearn-onnx.win.CI?branchName=refs%2Fpull%2F1009%2Fmerge)](https://dev.azure.com/onnxmltools/sklearn-onnx/_build/latest?definitionId=22&branchName=refs%2Fpull%2F1009%2Fmerge) + +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) ## Introduction *sklearn-onnx* converts [scikit-learn](https://scikit-learn.org/stable/) models to [ONNX](https://github.com/onnx/onnx). Once in the ONNX format, you can use tools like [ONNX Runtime](https://github.com/Microsoft/onnxruntime) for high performance scoring. All converters are tested with [onnxruntime](https://onnxruntime.ai/). +Any external converter can be registered to convert scikit-learn pipeline +including models or transformers coming from external libraries. ## Documentation Full documentation including tutorials is available at [https://onnx.ai/sklearn-onnx/](https://onnx.ai/sklearn-onnx/). diff --git a/benchmarks/bench_plot_onnxruntime_decision_tree.py b/benchmarks/bench_plot_onnxruntime_decision_tree.py index 0d06b1bb5..8b08aca8b 100644 --- a/benchmarks/bench_plot_onnxruntime_decision_tree.py +++ b/benchmarks/bench_plot_onnxruntime_decision_tree.py @@ -14,6 +14,7 @@ import pandas from sklearn import config_context from sklearn.tree import DecisionTreeClassifier + try: # scikit-learn >= 0.22 from sklearn.utils._testing import ignore_warnings @@ -29,14 +30,18 @@ # Implementations to benchmark. ############################## + def fcts_model(X, y, max_depth): "DecisionTreeClassifier." rf = DecisionTreeClassifier(max_depth=max_depth) rf.fit(X, y) - initial_types = [('X', FloatTensorType([None, X.shape[1]]))] - onx = convert_sklearn(rf, initial_types=initial_types, - options={DecisionTreeClassifier: {'zipmap': False}}) + initial_types = [("X", FloatTensorType([None, X.shape[1]]))] + onx = convert_sklearn( + rf, + initial_types=initial_types, + options={DecisionTreeClassifier: {"zipmap": False}}, + ) f = BytesIO() f.write(onx.SerializeToString()) content = f.getvalue() @@ -51,30 +56,29 @@ def predict_skl_predict_proba(X, model=rf): return rf.predict_proba(X) def predict_onnxrt_predict(X, sess=sess): - return sess.run(outputs[:1], {'X': X})[0] + return sess.run(outputs[:1], {"X": X})[0] def predict_onnxrt_predict_proba(X, sess=sess): - return sess.run(outputs[1:], {'X': X})[0] + return sess.run(outputs[1:], {"X": X})[0] - return {'predict': (predict_skl_predict, - predict_onnxrt_predict), - 'predict_proba': (predict_skl_predict_proba, - predict_onnxrt_predict_proba)} + return { + "predict": (predict_skl_predict, predict_onnxrt_predict), + "predict_proba": (predict_skl_predict_proba, predict_onnxrt_predict_proba), + } ############################## # Benchmarks ############################## + def allow_configuration(**kwargs): return True -def bench(n_obs, n_features, max_depths, methods, - repeat=10, verbose=False): +def bench(n_obs, n_features, max_depths, methods, repeat=10, verbose=False): res = [] for nfeat in n_features: - ntrain = 100000 X_train = np.empty((ntrain, nfeat)) X_train[:, :] = rand(ntrain, nfeat)[:, :].astype(np.float32) @@ -88,15 +92,12 @@ def bench(n_obs, n_features, max_depths, methods, for n in n_obs: for method in methods: - fct1, fct2 = fcts[method] - if not allow_configuration( - n=n, nfeat=nfeat, max_depth=max_depth): + if not allow_configuration(n=n, nfeat=nfeat, max_depth=max_depth): continue - obs = dict(n_obs=n, nfeat=nfeat, - max_depth=max_depth, method=method) + obs = dict(n_obs=n, nfeat=nfeat, max_depth=max_depth, method=method) # creates different inputs to avoid caching in any ways Xs = [] @@ -143,11 +144,11 @@ def bench(n_obs, n_features, max_depths, methods, # Plots. ############################## + def plot_results(df, verbose=False): nrows = max(len(set(df.max_depth)) * len(set(df.n_obs)), 2) ncols = max(len(set(df.method)), 2) - fig, ax = plt.subplots(nrows, ncols, - figsize=(ncols * 4, nrows * 4)) + fig, ax = plt.subplots(nrows, ncols, figsize=(ncols * 4, nrows * 4)) pos = 0 row = 0 for n_obs in sorted(set(df.n_obs)): @@ -156,31 +157,49 @@ def plot_results(df, verbose=False): for method in sorted(set(df.method)): a = ax[row, pos] if row == ax.shape[0] - 1: - a.set_xlabel("N features", fontsize='x-small') + a.set_xlabel("N features", fontsize="x-small") if pos == 0: a.set_ylabel( - "Time (s) n_obs={}\nmax_depth={}".format( - n_obs, max_depth), - fontsize='x-small') - - color = 'b' - subset = df[(df.method == method) & (df.n_obs == n_obs) & - (df.max_depth == max_depth)] + "Time (s) n_obs={}\nmax_depth={}".format(n_obs, max_depth), + fontsize="x-small", + ) + + color = "b" + subset = df[ + (df.method == method) + & (df.n_obs == n_obs) + & (df.max_depth == max_depth) + ] if subset.shape[0] == 0: continue subset = subset.sort_values("nfeat") if verbose: print(subset) label = "skl" - subset.plot(x="nfeat", y="time_skl", label=label, ax=a, - logx=True, logy=True, c=color, style='--') + subset.plot( + x="nfeat", + y="time_skl", + label=label, + ax=a, + logx=True, + logy=True, + c=color, + style="--", + ) label = "ort" - subset.plot(x="nfeat", y="time_ort", label=label, ax=a, - logx=True, logy=True, c=color) - - a.legend(loc=0, fontsize='x-small') + subset.plot( + x="nfeat", + y="time_ort", + label=label, + ax=a, + logx=True, + logy=True, + c=color, + ) + + a.legend(loc=0, fontsize="x-small") if row == 0: - a.set_title("method={}".format(method), fontsize='x-small') + a.set_title("method={}".format(method), fontsize="x-small") pos += 1 row += 1 @@ -190,13 +209,14 @@ def plot_results(df, verbose=False): @ignore_warnings(category=FutureWarning) def run_bench(repeat=100, verbose=False): n_obs = [1, 10, 100, 1000, 10000, 100000] - methods = ['predict', 'predict_proba'] + methods = ["predict", "predict_proba"] n_features = [1, 5, 10, 20, 50, 100, 200] max_depths = [2, 5, 10, 20] start = time() - results = bench(n_obs, n_features, max_depths, methods, - repeat=repeat, verbose=verbose) + results = bench( + n_obs, n_features, max_depths, methods, repeat=repeat, verbose=verbose + ) end = time() results_df = pandas.DataFrame(results) @@ -207,21 +227,24 @@ def run_bench(repeat=100, verbose=False): return results_df -if __name__ == '__main__': +if __name__ == "__main__": from datetime import datetime import sklearn import numpy import onnx import onnxruntime import skl2onnx - df = pandas.DataFrame([ - {"name": "date", "version": str(datetime.now())}, - {"name": "numpy", "version": numpy.__version__}, - {"name": "scikit-learn", "version": sklearn.__version__}, - {"name": "onnx", "version": onnx.__version__}, - {"name": "onnxruntime", "version": onnxruntime.__version__}, - {"name": "skl2onnx", "version": skl2onnx.__version__}, - ]) + + df = pandas.DataFrame( + [ + {"name": "date", "version": str(datetime.now())}, + {"name": "numpy", "version": numpy.__version__}, + {"name": "scikit-learn", "version": sklearn.__version__}, + {"name": "onnx", "version": onnx.__version__}, + {"name": "onnxruntime", "version": onnxruntime.__version__}, + {"name": "skl2onnx", "version": skl2onnx.__version__}, + ] + ) df.to_csv("bench_plot_onnxruntime_decision_tree.time.csv", index=False) print(df) df = run_bench(verbose=True) diff --git a/benchmarks/bench_plot_onnxruntime_hgb.py b/benchmarks/bench_plot_onnxruntime_hgb.py index 8a2523120..5c2d0f23d 100644 --- a/benchmarks/bench_plot_onnxruntime_hgb.py +++ b/benchmarks/bench_plot_onnxruntime_hgb.py @@ -24,13 +24,13 @@ # Implementations to benchmark. ############################## + def fcts_model(X, y, max_depth, n_estimators): "RandomForestClassifier." - rf = HistGradientBoostingRegressor( - max_depth=max_depth, max_iter=n_estimators) + rf = HistGradientBoostingRegressor(max_depth=max_depth, max_iter=n_estimators) rf.fit(X, y) - initial_types = [('X', FloatTensorType([None, X.shape[1]]))] + initial_types = [("X", FloatTensorType([None, X.shape[1]]))] onx = convert_sklearn(rf, initial_types=initial_types) f = BytesIO() f.write(onx.SerializeToString()) @@ -42,28 +42,31 @@ def predict_skl_predict(X, model=rf): return rf.predict(X) def predict_onnxrt_predict(X, sess=sess): - return sess.run(outputs[:1], {'X': X})[0] + return sess.run(outputs[:1], {"X": X})[0] - return {'predict': ( - predict_skl_predict, - predict_onnxrt_predict, - None, - )} + return { + "predict": ( + predict_skl_predict, + predict_onnxrt_predict, + None, + ) + } ############################## # Benchmarks ############################## + def allow_configuration(**kwargs): return True -def bench(n_obs, n_features, max_depths, n_estimatorss, - methods, repeat=10, verbose=False): +def bench( + n_obs, n_features, max_depths, n_estimatorss, methods, repeat=10, verbose=False +): res = [] for nfeat in n_features: - ntrain = 100000 X_train = np.empty((ntrain, nfeat)).astype(np.float32) X_train[:, :] = rand(ntrain, nfeat)[:, :] @@ -76,17 +79,24 @@ def bench(n_obs, n_features, max_depths, n_estimatorss, for n in n_obs: for method in methods: - fct1, fct2, fct3 = fcts[method] - if not allow_configuration(n=n, nfeat=nfeat, - max_depth=max_depth, - n_estimator=n_estimators, - method=method): + if not allow_configuration( + n=n, + nfeat=nfeat, + max_depth=max_depth, + n_estimator=n_estimators, + method=method, + ): continue - obs = dict(n_obs=n, nfeat=nfeat, max_depth=max_depth, - n_estimators=n_estimators, method=method) + obs = dict( + n_obs=n, + nfeat=nfeat, + max_depth=max_depth, + n_estimators=n_estimators, + method=method, + ) # creates different inputs to avoid caching in any ways Xs = [] @@ -128,8 +138,7 @@ def bench(n_obs, n_features, max_depths, n_estimatorss, if len(p1.shape) == 1 and len(p2.shape) == 2: p2 = p2.ravel() try: - assert_almost_equal( - p1.ravel(), p2.ravel(), decimal=5) + assert_almost_equal(p1.ravel(), p2.ravel(), decimal=5) except AssertionError as e: warnings.warn(str(e)) return res @@ -139,11 +148,11 @@ def bench(n_obs, n_features, max_depths, n_estimatorss, # Plots. ############################## + def plot_results(df, verbose=False): nrows = max(len(set(df.max_depth)) * len(set(df.n_obs)), 2) ncols = 2 - fig, ax = plt.subplots(nrows, ncols, - figsize=(ncols * 4, nrows * 4)) + fig, ax = plt.subplots(nrows, ncols, figsize=(ncols * 4, nrows * 4)) pos = 0 row = 0 for n_obs in sorted(set(df.n_obs)): @@ -152,17 +161,19 @@ def plot_results(df, verbose=False): for n_jobs in [1]: a = ax[row, pos] if row == ax.shape[0] - 1: - a.set_xlabel("N features", fontsize='x-small') + a.set_xlabel("N features", fontsize="x-small") if pos == 0: a.set_ylabel( - "Time (s) n_obs={}\nmax_depth={}".format( - n_obs, max_depth), fontsize='x-small') - - for color, n_estimators in zip( - 'brgyc', sorted(set(df.n_estimators))): - subset = df[(df.n_obs == n_obs) - & (df.max_depth == max_depth) - & (df.n_estimators == n_estimators)] + "Time (s) n_obs={}\nmax_depth={}".format(n_obs, max_depth), + fontsize="x-small", + ) + + for color, n_estimators in zip("brgyc", sorted(set(df.n_estimators))): + subset = df[ + (df.n_obs == n_obs) + & (df.max_depth == max_depth) + & (df.n_estimators == n_estimators) + ] if subset.shape[0] == 0: continue subset = subset.sort_values("nfeat") @@ -171,19 +182,43 @@ def plot_results(df, verbose=False): label = "skl ne={}".format(n_estimators) subset.plot( - x="nfeat", y="time_skl", label=label, ax=a, - logx=True, logy=True, c=color, style='--', lw=5) + x="nfeat", + y="time_skl", + label=label, + ax=a, + logx=True, + logy=True, + c=color, + style="--", + lw=5, + ) label = "ort ne={}".format(n_estimators) - subset.plot(x="nfeat", y="time_ort", label=label, ax=a, - logx=True, logy=True, c=color, lw=3) + subset.plot( + x="nfeat", + y="time_ort", + label=label, + ax=a, + logx=True, + logy=True, + c=color, + lw=3, + ) label = "lite ne={}".format(n_estimators) subset.plot( - x="nfeat", y="time_lite", label=label, ax=a, - logx=True, logy=True, c=color, style='-.', lw=3) - - a.legend(loc=0, fontsize='x-small') + x="nfeat", + y="time_lite", + label=label, + ax=a, + logx=True, + logy=True, + c=color, + style="-.", + lw=3, + ) + + a.legend(loc=0, fontsize="x-small") if row == 0: - a.set_title("---", fontsize='x-small') + a.set_title("---", fontsize="x-small") pos += 1 row += 1 @@ -193,14 +228,21 @@ def plot_results(df, verbose=False): @ignore_warnings(category=FutureWarning) def run_bench(repeat=100, verbose=False): n_obs = [1, 10, 100, 1000, 10000, 100000] - methods = ['predict'] + methods = ["predict"] n_features = [30, 100] max_depths = [10] n_estimatorss = [100, 200] start = time() - results = bench(n_obs, n_features, max_depths, n_estimatorss, - methods, repeat=repeat, verbose=verbose) + results = bench( + n_obs, + n_features, + max_depths, + n_estimatorss, + methods, + repeat=repeat, + verbose=verbose, + ) end = time() results_df = pandas.DataFrame(results) @@ -211,21 +253,24 @@ def run_bench(repeat=100, verbose=False): return results_df -if __name__ == '__main__': +if __name__ == "__main__": from datetime import datetime import sklearn import numpy import onnx import onnxruntime import skl2onnx - df = pandas.DataFrame([ - {"name": "date", "version": str(datetime.now())}, - {"name": "numpy", "version": numpy.__version__}, - {"name": "scikit-learn", "version": sklearn.__version__}, - {"name": "onnx", "version": onnx.__version__}, - {"name": "onnxruntime", "version": onnxruntime.__version__}, - {"name": "skl2onnx", "version": skl2onnx.__version__}, - ]) + + df = pandas.DataFrame( + [ + {"name": "date", "version": str(datetime.now())}, + {"name": "numpy", "version": numpy.__version__}, + {"name": "scikit-learn", "version": sklearn.__version__}, + {"name": "onnx", "version": onnx.__version__}, + {"name": "onnxruntime", "version": onnxruntime.__version__}, + {"name": "skl2onnx", "version": skl2onnx.__version__}, + ] + ) df.to_csv("bench_plot_onnxruntime_hgb.time.csv", index=False) print(df) df = run_bench(verbose=True) diff --git a/benchmarks/bench_plot_onnxruntime_linreg.py b/benchmarks/bench_plot_onnxruntime_linreg.py index aa180f71e..82490c22d 100644 --- a/benchmarks/bench_plot_onnxruntime_linreg.py +++ b/benchmarks/bench_plot_onnxruntime_linreg.py @@ -14,6 +14,7 @@ import pandas from sklearn import config_context from sklearn.linear_model import LinearRegression + try: # scikit-learn >= 0.22 from sklearn.utils._testing import ignore_warnings @@ -29,12 +30,13 @@ # Implementations to benchmark. ############################## + def fcts_model(X, y, fit_intercept): "LinearRegression." rf = LinearRegression(fit_intercept=fit_intercept) rf.fit(X, y) - initial_types = [('X', FloatTensorType([None, X.shape[1]]))] + initial_types = [("X", FloatTensorType([None, X.shape[1]]))] onx = convert_sklearn(rf, initial_types=initial_types) f = BytesIO() f.write(onx.SerializeToString()) @@ -47,25 +49,23 @@ def predict_skl_predict(X, model=rf): return rf.predict(X) def predict_onnxrt_predict(X, sess=sess): - return sess.run(outputs[:1], {'X': X})[0] + return sess.run(outputs[:1], {"X": X})[0] - return {'predict': (predict_skl_predict, - predict_onnxrt_predict)} + return {"predict": (predict_skl_predict, predict_onnxrt_predict)} ############################## # Benchmarks ############################## + def allow_configuration(**kwargs): return True -def bench(n_obs, n_features, fit_intercepts, methods, - repeat=10, verbose=False): +def bench(n_obs, n_features, fit_intercepts, methods, repeat=10, verbose=False): res = [] for nfeat in n_features: - ntrain = 10000 X_train = np.empty((ntrain, nfeat)) X_train[:, :] = rand(ntrain, nfeat)[:, :] @@ -83,16 +83,20 @@ def bench(n_obs, n_features, fit_intercepts, methods, else: loop_repeat = repeat for method in methods: - fct1, fct2 = fcts[method] if not allow_configuration( - n=n, nfeat=nfeat, fit_intercept=fit_intercept): + n=n, nfeat=nfeat, fit_intercept=fit_intercept + ): continue - obs = dict(n_obs=n, nfeat=nfeat, - fit_intercept=fit_intercept, method=method, - repeat=loop_repeat) + obs = dict( + n_obs=n, + nfeat=nfeat, + fit_intercept=fit_intercept, + method=method, + repeat=loop_repeat, + ) # creates different inputs to avoid caching in any ways Xs = [] @@ -128,8 +132,7 @@ def bench(n_obs, n_features, fit_intercepts, methods, if len(p1.shape) == 1 and len(p2.shape) == 2: p2 = p2.ravel() try: - assert_almost_equal( - p1.ravel(), p2.ravel(), decimal=5) + assert_almost_equal(p1.ravel(), p2.ravel(), decimal=5) except AssertionError as e: warnings.warn(str(e)) return res @@ -139,11 +142,11 @@ def bench(n_obs, n_features, fit_intercepts, methods, # Plots. ############################## + def plot_results(df, verbose=False): nrows = max(len(set(df.fit_intercept)) * len(set(df.n_obs)), 2) ncols = max(len(set(df.method)), 2) - fig, ax = plt.subplots(nrows, ncols, - figsize=(ncols * 4, nrows * 4)) + fig, ax = plt.subplots(nrows, ncols, figsize=(ncols * 4, nrows * 4)) pos = 0 row = 0 for n_obs in sorted(set(df.n_obs)): @@ -152,48 +155,68 @@ def plot_results(df, verbose=False): for method in sorted(set(df.method)): a = ax[row, pos] if row == ax.shape[0] - 1: - a.set_xlabel("N features", fontsize='x-small') + a.set_xlabel("N features", fontsize="x-small") if pos == 0: a.set_ylabel( "Time (s) n_obs={}\nfit_intercept={}".format( - n_obs, fit_intercept), - fontsize='x-small') - - color = 'b' - subset = df[(df.method == method) & (df.n_obs == n_obs) & - (df.fit_intercept == fit_intercept)] + n_obs, fit_intercept + ), + fontsize="x-small", + ) + + color = "b" + subset = df[ + (df.method == method) + & (df.n_obs == n_obs) + & (df.fit_intercept == fit_intercept) + ] if subset.shape[0] == 0: continue subset = subset.sort_values("nfeat") if verbose: print(subset) label = "skl" - subset.plot(x="nfeat", y="time_skl", label=label, ax=a, - logx=True, logy=True, c=color, style='--') + subset.plot( + x="nfeat", + y="time_skl", + label=label, + ax=a, + logx=True, + logy=True, + c=color, + style="--", + ) label = "ort" - subset.plot(x="nfeat", y="time_ort", label=label, ax=a, - logx=True, logy=True, c=color) - - a.legend(loc=0, fontsize='x-small') + subset.plot( + x="nfeat", + y="time_ort", + label=label, + ax=a, + logx=True, + logy=True, + c=color, + ) + + a.legend(loc=0, fontsize="x-small") if row == 0: - a.set_title("method={}".format(method), fontsize='x-small') + a.set_title("method={}".format(method), fontsize="x-small") pos += 1 row += 1 - plt.suptitle( - "Benchmark for LinearRegression sklearn/onnxruntime", fontsize=16) + plt.suptitle("Benchmark for LinearRegression sklearn/onnxruntime", fontsize=16) @ignore_warnings(category=FutureWarning) def run_bench(repeat=2000, verbose=False): n_obs = [1, 10, 100, 1000, 10000, 100000] - methods = ['predict'] + methods = ["predict"] n_features = [10, 50, 100] fit_intercepts = [True] start = time() - results = bench(n_obs, n_features, fit_intercepts, methods, - repeat=repeat, verbose=verbose) + results = bench( + n_obs, n_features, fit_intercepts, methods, repeat=repeat, verbose=verbose + ) end = time() results_df = pandas.DataFrame(results) @@ -204,21 +227,24 @@ def run_bench(repeat=2000, verbose=False): return results_df -if __name__ == '__main__': +if __name__ == "__main__": from datetime import datetime import sklearn import numpy import onnx import onnxruntime import skl2onnx - df = pandas.DataFrame([ - {"name": "date", "version": str(datetime.now())}, - {"name": "numpy", "version": numpy.__version__}, - {"name": "scikit-learn", "version": sklearn.__version__}, - {"name": "onnx", "version": onnx.__version__}, - {"name": "onnxruntime", "version": onnxruntime.__version__}, - {"name": "skl2onnx", "version": skl2onnx.__version__}, - ]) + + df = pandas.DataFrame( + [ + {"name": "date", "version": str(datetime.now())}, + {"name": "numpy", "version": numpy.__version__}, + {"name": "scikit-learn", "version": sklearn.__version__}, + {"name": "onnx", "version": onnx.__version__}, + {"name": "onnxruntime", "version": onnxruntime.__version__}, + {"name": "skl2onnx", "version": skl2onnx.__version__}, + ] + ) df.to_csv("bench_plot_onnxruntime_linreg.time.csv", index=False) print(df) df = run_bench(verbose=True) diff --git a/benchmarks/bench_plot_onnxruntime_logreg.py b/benchmarks/bench_plot_onnxruntime_logreg.py index f1a2511e6..dc2f1ec37 100644 --- a/benchmarks/bench_plot_onnxruntime_logreg.py +++ b/benchmarks/bench_plot_onnxruntime_logreg.py @@ -14,6 +14,7 @@ import pandas from sklearn import config_context from sklearn.linear_model import LogisticRegression + try: # scikit-learn >= 0.22 from sklearn.utils._testing import ignore_warnings @@ -29,14 +30,16 @@ # Implementations to benchmark. ############################## + def fcts_model(X, y, fit_intercept): "LogisticRegression." rf = LogisticRegression(fit_intercept=fit_intercept) rf.fit(X, y) - initial_types = [('X', FloatTensorType([None, X.shape[1]]))] - onx = convert_sklearn(rf, initial_types=initial_types, - options={LogisticRegression: {'zipmap': False}}) + initial_types = [("X", FloatTensorType([None, X.shape[1]]))] + onx = convert_sklearn( + rf, initial_types=initial_types, options={LogisticRegression: {"zipmap": False}} + ) f = BytesIO() f.write(onx.SerializeToString()) content = f.getvalue() @@ -51,30 +54,29 @@ def predict_skl_predict_proba(X, model=rf): return rf.predict_proba(X) def predict_onnxrt_predict(X, sess=sess): - return sess.run(outputs[:1], {'X': X})[0] + return sess.run(outputs[:1], {"X": X})[0] def predict_onnxrt_predict_proba(X, sess=sess): - return sess.run(outputs[1:], {'X': X})[0] + return sess.run(outputs[1:], {"X": X})[0] - return {'predict': (predict_skl_predict, - predict_onnxrt_predict), - 'predict_proba': (predict_skl_predict_proba, - predict_onnxrt_predict_proba)} + return { + "predict": (predict_skl_predict, predict_onnxrt_predict), + "predict_proba": (predict_skl_predict_proba, predict_onnxrt_predict_proba), + } ############################## # Benchmarks ############################## + def allow_configuration(**kwargs): return True -def bench(n_obs, n_features, fit_intercepts, methods, - repeat=10, verbose=False): +def bench(n_obs, n_features, fit_intercepts, methods, repeat=10, verbose=False): res = [] for nfeat in n_features: - ntrain = 10000 X_train = np.empty((ntrain, nfeat)) X_train[:, :] = rand(ntrain, nfeat)[:, :].astype(np.float32) @@ -94,16 +96,20 @@ def bench(n_obs, n_features, fit_intercepts, methods, else: loop_repeat = repeat for method in methods: - fct1, fct2 = fcts[method] if not allow_configuration( - n=n, nfeat=nfeat, fit_intercept=fit_intercept): + n=n, nfeat=nfeat, fit_intercept=fit_intercept + ): continue - obs = dict(n_obs=n, nfeat=nfeat, - fit_intercept=fit_intercept, method=method, - repeat=loop_repeat) + obs = dict( + n_obs=n, + nfeat=nfeat, + fit_intercept=fit_intercept, + method=method, + repeat=loop_repeat, + ) # creates different inputs to avoid caching in any ways Xs = [] @@ -146,11 +152,11 @@ def bench(n_obs, n_features, fit_intercepts, methods, # Plots. ############################## + def plot_results(df, verbose=False): nrows = max(len(set(df.fit_intercept)) * len(set(df.n_obs)), 2) ncols = max(len(set(df.method)), 2) - fig, ax = plt.subplots(nrows, ncols, - figsize=(ncols * 4, nrows * 4)) + fig, ax = plt.subplots(nrows, ncols, figsize=(ncols * 4, nrows * 4)) pos = 0 row = 0 for n_obs in sorted(set(df.n_obs)): @@ -159,48 +165,68 @@ def plot_results(df, verbose=False): for method in sorted(set(df.method)): a = ax[row, pos] if row == ax.shape[0] - 1: - a.set_xlabel("N features", fontsize='x-small') + a.set_xlabel("N features", fontsize="x-small") if pos == 0: a.set_ylabel( "Time (s) n_obs={}\nfit_intercept={}".format( - n_obs, fit_intercept), - fontsize='x-small') - - color = 'b' - subset = df[(df.method == method) & (df.n_obs == n_obs) & - (df.fit_intercept == fit_intercept)] + n_obs, fit_intercept + ), + fontsize="x-small", + ) + + color = "b" + subset = df[ + (df.method == method) + & (df.n_obs == n_obs) + & (df.fit_intercept == fit_intercept) + ] if subset.shape[0] == 0: continue subset = subset.sort_values("nfeat") if verbose: print(subset) label = "skl" - subset.plot(x="nfeat", y="time_skl", label=label, ax=a, - logx=True, logy=True, c=color, style='--') + subset.plot( + x="nfeat", + y="time_skl", + label=label, + ax=a, + logx=True, + logy=True, + c=color, + style="--", + ) label = "ort" - subset.plot(x="nfeat", y="time_ort", label=label, ax=a, - logx=True, logy=True, c=color) - - a.legend(loc=0, fontsize='x-small') + subset.plot( + x="nfeat", + y="time_ort", + label=label, + ax=a, + logx=True, + logy=True, + c=color, + ) + + a.legend(loc=0, fontsize="x-small") if row == 0: - a.set_title("method={}".format(method), fontsize='x-small') + a.set_title("method={}".format(method), fontsize="x-small") pos += 1 row += 1 - plt.suptitle( - "Benchmark for LogisticRegression sklearn/onnxruntime", fontsize=16) + plt.suptitle("Benchmark for LogisticRegression sklearn/onnxruntime", fontsize=16) @ignore_warnings(category=FutureWarning) def run_bench(repeat=1000, verbose=False): n_obs = [1, 10, 100, 1000, 10000, 100000] - methods = ['predict_proba'] # ['predict', 'predict_proba'] + methods = ["predict_proba"] # ['predict', 'predict_proba'] n_features = [10, 50] fit_intercepts = [True] start = time() - results = bench(n_obs, n_features, fit_intercepts, methods, - repeat=repeat, verbose=verbose) + results = bench( + n_obs, n_features, fit_intercepts, methods, repeat=repeat, verbose=verbose + ) end = time() results_df = pandas.DataFrame(results) @@ -211,21 +237,24 @@ def run_bench(repeat=1000, verbose=False): return results_df -if __name__ == '__main__': +if __name__ == "__main__": from datetime import datetime import sklearn import numpy import onnx import onnxruntime import skl2onnx - df = pandas.DataFrame([ - {"name": "date", "version": str(datetime.now())}, - {"name": "numpy", "version": numpy.__version__}, - {"name": "scikit-learn", "version": sklearn.__version__}, - {"name": "onnx", "version": onnx.__version__}, - {"name": "onnxruntime", "version": onnxruntime.__version__}, - {"name": "skl2onnx", "version": skl2onnx.__version__}, - ]) + + df = pandas.DataFrame( + [ + {"name": "date", "version": str(datetime.now())}, + {"name": "numpy", "version": numpy.__version__}, + {"name": "scikit-learn", "version": sklearn.__version__}, + {"name": "onnx", "version": onnx.__version__}, + {"name": "onnxruntime", "version": onnxruntime.__version__}, + {"name": "skl2onnx", "version": skl2onnx.__version__}, + ] + ) df.to_csv("bench_plot_onnxruntime_logreg.time.csv", index=False) print(df) df = run_bench(verbose=True) diff --git a/benchmarks/bench_plot_onnxruntime_random_forest.py b/benchmarks/bench_plot_onnxruntime_random_forest.py index 7fdef4d15..c968b5457 100644 --- a/benchmarks/bench_plot_onnxruntime_random_forest.py +++ b/benchmarks/bench_plot_onnxruntime_random_forest.py @@ -14,6 +14,7 @@ import pandas from sklearn import config_context from sklearn.ensemble import RandomForestClassifier + try: # scikit-learn >= 0.22 from sklearn.utils._testing import ignore_warnings @@ -29,12 +30,13 @@ # Implementations to benchmark. ############################## + def fcts_model(X, y, max_depth, n_estimators): "RandomForestClassifier." rf = RandomForestClassifier(max_depth=max_depth, n_estimators=n_estimators) rf.fit(X, y) - initial_types = [('X', FloatTensorType([None, X.shape[1]]))] + initial_types = [("X", FloatTensorType([None, X.shape[1]]))] onx = convert_sklearn(rf, initial_types=initial_types) f = BytesIO() f.write(onx.SerializeToString()) @@ -50,10 +52,10 @@ def predict_skl_predict_proba(X, model=rf): return rf.predict_proba(X) def predict_onnxrt_predict(X, sess=sess): - return numpy.array(sess.run(outputs[:1], {'X': X.astype(np.float32)})) + return numpy.array(sess.run(outputs[:1], {"X": X.astype(np.float32)})) def predict_onnxrt_predict_proba(X, sess=sess): - res = sess.run(outputs[1:], {'X': X.astype(np.float32)})[0] + res = sess.run(outputs[1:], {"X": X.astype(np.float32)})[0] # do not use DataFrame to convert the output into array, # it takes too much time out = numpy.empty((len(res), len(res[0])), dtype=numpy.float32) @@ -62,25 +64,26 @@ def predict_onnxrt_predict_proba(X, sess=sess): out[i, k] = v return out - return {'predict': (predict_skl_predict, - predict_onnxrt_predict), - 'predict_proba': (predict_skl_predict_proba, - predict_onnxrt_predict_proba)} + return { + "predict": (predict_skl_predict, predict_onnxrt_predict), + "predict_proba": (predict_skl_predict_proba, predict_onnxrt_predict_proba), + } ############################## # Benchmarks ############################## + def allow_configuration(**kwargs): return True -def bench(n_obs, n_features, max_depths, n_estimatorss, methods, - repeat=10, verbose=False): +def bench( + n_obs, n_features, max_depths, n_estimatorss, methods, repeat=10, verbose=False +): res = [] for nfeat in n_features: - ntrain = 100000 X_train = np.empty((ntrain, nfeat)) X_train[:, :] = rand(ntrain, nfeat)[:, :] @@ -95,16 +98,23 @@ def bench(n_obs, n_features, max_depths, n_estimatorss, methods, for n in n_obs: for method in methods: - fct1, fct2 = fcts[method] if not allow_configuration( - n=n, nfeat=nfeat, - max_depth=max_depth, n_estimator=n_estimators): + n=n, + nfeat=nfeat, + max_depth=max_depth, + n_estimator=n_estimators, + ): continue - obs = dict(n_obs=n, nfeat=nfeat, max_depth=max_depth, - n_estimators=n_estimators, method=method) + obs = dict( + n_obs=n, + nfeat=nfeat, + max_depth=max_depth, + n_estimators=n_estimators, + method=method, + ) # creates different inputs to avoid caching in any ways Xs = [] @@ -151,11 +161,11 @@ def bench(n_obs, n_features, max_depths, n_estimatorss, methods, # Plots. ############################## + def plot_results(df, verbose=False): nrows = max(len(set(df.max_depth)) * len(set(df.n_obs)), 2) ncols = max(len(set(df.method)), 2) - fig, ax = plt.subplots(nrows, ncols, - figsize=(ncols * 4, nrows * 4)) + fig, ax = plt.subplots(nrows, ncols, figsize=(ncols * 4, nrows * 4)) pos = 0 row = 0 for n_obs in sorted(set(df.n_obs)): @@ -164,33 +174,50 @@ def plot_results(df, verbose=False): for method in sorted(set(df.method)): a = ax[row, pos] if row == ax.shape[0] - 1: - a.set_xlabel("N features", fontsize='x-small') + a.set_xlabel("N features", fontsize="x-small") if pos == 0: a.set_ylabel( - "Time (s) n_obs={}\nmax_depth={}".format( - n_obs, max_depth), - fontsize='x-small') - - for color, n_estimators in zip( - 'brgyc', sorted(set(df.n_estimators))): - subset = df[(df.method == method) & (df.n_obs == n_obs) - & (df.max_depth == max_depth) - & (df.n_estimators == n_estimators)] + "Time (s) n_obs={}\nmax_depth={}".format(n_obs, max_depth), + fontsize="x-small", + ) + + for color, n_estimators in zip("brgyc", sorted(set(df.n_estimators))): + subset = df[ + (df.method == method) + & (df.n_obs == n_obs) + & (df.max_depth == max_depth) + & (df.n_estimators == n_estimators) + ] if subset.shape[0] == 0: continue subset = subset.sort_values("nfeat") if verbose: print(subset) label = "skl ne={}".format(n_estimators) - subset.plot(x="nfeat", y="time_skl", label=label, ax=a, - logx=True, logy=True, c=color, style='--') + subset.plot( + x="nfeat", + y="time_skl", + label=label, + ax=a, + logx=True, + logy=True, + c=color, + style="--", + ) label = "ort ne={}".format(n_estimators) - subset.plot(x="nfeat", y="time_ort", label=label, ax=a, - logx=True, logy=True, c=color) - - a.legend(loc=0, fontsize='x-small') + subset.plot( + x="nfeat", + y="time_ort", + label=label, + ax=a, + logx=True, + logy=True, + c=color, + ) + + a.legend(loc=0, fontsize="x-small") if row == 0: - a.set_title("method={}".format(method), fontsize='x-small') + a.set_title("method={}".format(method), fontsize="x-small") pos += 1 row += 1 @@ -200,14 +227,21 @@ def plot_results(df, verbose=False): @ignore_warnings(category=FutureWarning) def run_bench(repeat=100, verbose=False): n_obs = [1, 100] - methods = ['predict', 'predict_proba'] + methods = ["predict", "predict_proba"] n_features = [1, 5, 10, 20, 50, 100] max_depths = [2, 5, 10] n_estimatorss = [1, 10, 100] start = time() - results = bench(n_obs, n_features, max_depths, n_estimatorss, methods, - repeat=repeat, verbose=verbose) + results = bench( + n_obs, + n_features, + max_depths, + n_estimatorss, + methods, + repeat=repeat, + verbose=verbose, + ) end = time() results_df = pandas.DataFrame(results) @@ -218,21 +252,24 @@ def run_bench(repeat=100, verbose=False): return results_df -if __name__ == '__main__': +if __name__ == "__main__": from datetime import datetime import sklearn import numpy import onnx import onnxruntime import skl2onnx - df = pandas.DataFrame([ - {"name": "date", "version": str(datetime.now())}, - {"name": "numpy", "version": numpy.__version__}, - {"name": "scikit-learn", "version": sklearn.__version__}, - {"name": "onnx", "version": onnx.__version__}, - {"name": "onnxruntime", "version": onnxruntime.__version__}, - {"name": "skl2onnx", "version": skl2onnx.__version__}, - ]) + + df = pandas.DataFrame( + [ + {"name": "date", "version": str(datetime.now())}, + {"name": "numpy", "version": numpy.__version__}, + {"name": "scikit-learn", "version": sklearn.__version__}, + {"name": "onnx", "version": onnx.__version__}, + {"name": "onnxruntime", "version": onnxruntime.__version__}, + {"name": "skl2onnx", "version": skl2onnx.__version__}, + ] + ) df.to_csv("bench_plot_onnxruntime_random_forest.time.csv", index=False) print(df) df = run_bench(verbose=True) diff --git a/benchmarks/bench_plot_onnxruntime_random_forest_reg.py b/benchmarks/bench_plot_onnxruntime_random_forest_reg.py index ffee920b0..c942e72e5 100644 --- a/benchmarks/bench_plot_onnxruntime_random_forest_reg.py +++ b/benchmarks/bench_plot_onnxruntime_random_forest_reg.py @@ -25,13 +25,15 @@ # Implementations to benchmark. ############################## + def fcts_model(X, y, max_depth, n_estimators, n_jobs): "RandomForestClassifier." - rf = RandomForestRegressor(max_depth=max_depth, n_estimators=n_estimators, - n_jobs=n_jobs) + rf = RandomForestRegressor( + max_depth=max_depth, n_estimators=n_estimators, n_jobs=n_jobs + ) rf.fit(X, y) - initial_types = [('X', FloatTensorType([None, X.shape[1]]))] + initial_types = [("X", FloatTensorType([None, X.shape[1]]))] onx = convert_sklearn(rf, initial_types=initial_types) f = BytesIO() f.write(onx.SerializeToString()) @@ -42,48 +44,62 @@ def fcts_model(X, y, max_depth, n_estimators, n_jobs): if False: import treelite.sklearn import treelite_runtime + try: lite = treelite.sklearn.import_model(rf) name = "lite{}.dll".format(id(rf)) lite.export_lib( - toolchain='msvc' if sys.platform == "win32" else "gcc", - libpath=name, verbose=False) + toolchain="msvc" if sys.platform == "win32" else "gcc", + libpath=name, + verbose=False, + ) lite_predictor = treelite_runtime.Predictor(name, verbose=False) - except (treelite.util.TreeliteError, PermissionError, - UnicodeDecodeError): + except (treelite.util.TreeliteError, PermissionError, UnicodeDecodeError): lite_predictor = None def predict_skl_predict(X, model=rf): return rf.predict(X) def predict_onnxrt_predict(X, sess=sess): - return sess.run(outputs[:1], {'X': X})[0] + return sess.run(outputs[:1], {"X": X})[0] def predict_treelite_predict(X, sess=sess): return numpy.array( lite_predictor.predict( - treelite_runtime.Batch.from_npy2d(X.astype(np.float32)))) + treelite_runtime.Batch.from_npy2d(X.astype(np.float32)) + ) + ) - return {'predict': ( - predict_skl_predict, - predict_onnxrt_predict, - None, - )} + return { + "predict": ( + predict_skl_predict, + predict_onnxrt_predict, + None, + ) + } ############################## # Benchmarks ############################## + def allow_configuration(**kwargs): return True -def bench(n_obs, n_features, max_depths, n_estimatorss, n_jobss, - methods, repeat=10, verbose=False): +def bench( + n_obs, + n_features, + max_depths, + n_estimatorss, + n_jobss, + methods, + repeat=10, + verbose=False, +): res = [] for nfeat in n_features: - ntrain = 100000 X_train = np.empty((ntrain, nfeat)).astype(np.float32) X_train[:, :] = rand(ntrain, nfeat)[:, :] @@ -93,25 +109,30 @@ def bench(n_obs, n_features, max_depths, n_estimatorss, n_jobss, for n_jobs in n_jobss: for max_depth in max_depths: for n_estimators in n_estimatorss: - fcts = fcts_model(X_train, y_train, - max_depth, n_estimators, n_jobs) + fcts = fcts_model(X_train, y_train, max_depth, n_estimators, n_jobs) for n in n_obs: for method in methods: - fct1, fct2, fct3 = fcts[method] if not allow_configuration( - n=n, nfeat=nfeat, - max_depth=max_depth, - n_estimator=n_estimators, - n_jobs=n_jobs, method=method): + n=n, + nfeat=nfeat, + max_depth=max_depth, + n_estimator=n_estimators, + n_jobs=n_jobs, + method=method, + ): continue obs = dict( - n_obs=n, nfeat=nfeat, max_depth=max_depth, - n_estimators=n_estimators, method=method, - n_jobs=n_jobs) + n_obs=n, + nfeat=nfeat, + max_depth=max_depth, + n_estimators=n_estimators, + method=method, + n_jobs=n_jobs, + ) # creates different inputs to avoid caching # in any ways @@ -167,7 +188,8 @@ def bench(n_obs, n_features, max_depths, n_estimatorss, n_jobss, p2 = p2.ravel() try: assert_almost_equal( - p1.ravel(), p2.ravel(), decimal=5) + p1.ravel(), p2.ravel(), decimal=5 + ) except AssertionError as e: warnings.warn(str(e)) return res @@ -177,11 +199,11 @@ def bench(n_obs, n_features, max_depths, n_estimatorss, n_jobss, # Plots. ############################## + def plot_results(df, verbose=False): nrows = max(len(set(df.max_depth)) * len(set(df.n_obs)), 2) ncols = max(len(set(df.n_jobs)), 2) - fig, ax = plt.subplots(nrows, ncols, - figsize=(ncols * 4, nrows * 4)) + fig, ax = plt.subplots(nrows, ncols, figsize=(ncols * 4, nrows * 4)) pos = 0 row = 0 for n_obs in sorted(set(df.n_obs)): @@ -190,17 +212,22 @@ def plot_results(df, verbose=False): for n_jobs in sorted(set(df.n_jobs)): a = ax[row, pos] if row == ax.shape[0] - 1: - a.set_xlabel("N features", fontsize='x-small') + a.set_xlabel("N features", fontsize="x-small") if pos == 0: a.set_ylabel( "Time (s) n_obs={}\nmax_depth={} n_jobs={}".format( - n_obs, max_depth, n_jobs), fontsize='x-small') - - for color, n_estimators in zip( - 'brgyc', sorted(set(df.n_estimators))): - subset = df[(df.n_jobs == n_jobs) & (df.n_obs == n_obs) - & (df.max_depth == max_depth) - & (df.n_estimators == n_estimators)] + n_obs, max_depth, n_jobs + ), + fontsize="x-small", + ) + + for color, n_estimators in zip("brgyc", sorted(set(df.n_estimators))): + subset = df[ + (df.n_jobs == n_jobs) + & (df.n_obs == n_obs) + & (df.max_depth == max_depth) + & (df.n_estimators == n_estimators) + ] if subset.shape[0] == 0: continue subset = subset.sort_values("nfeat") @@ -209,20 +236,43 @@ def plot_results(df, verbose=False): label = "skl ne={}".format(n_estimators) subset.plot( - x="nfeat", y="time_skl", label=label, ax=a, - logx=True, logy=True, c=color, style='--', lw=5) + x="nfeat", + y="time_skl", + label=label, + ax=a, + logx=True, + logy=True, + c=color, + style="--", + lw=5, + ) label = "ort ne={}".format(n_estimators) subset.plot( - x="nfeat", y="time_ort", label=label, ax=a, - logx=True, logy=True, c=color, lw=3) + x="nfeat", + y="time_ort", + label=label, + ax=a, + logx=True, + logy=True, + c=color, + lw=3, + ) label = "lite ne={}".format(n_estimators) subset.plot( - x="nfeat", y="time_lite", label=label, ax=a, - logx=True, logy=True, c=color, style='-.', lw=3) - - a.legend(loc=0, fontsize='x-small') + x="nfeat", + y="time_lite", + label=label, + ax=a, + logx=True, + logy=True, + c=color, + style="-.", + lw=3, + ) + + a.legend(loc=0, fontsize="x-small") if row == 0: - a.set_title("n_jobs={}".format(n_jobs), fontsize='x-small') + a.set_title("n_jobs={}".format(n_jobs), fontsize="x-small") pos += 1 row += 1 @@ -232,15 +282,23 @@ def plot_results(df, verbose=False): @ignore_warnings(category=FutureWarning) def run_bench(repeat=100, verbose=False): n_obs = [1, 10, 100, 1000, 10000, 100000] - methods = ['predict'] + methods = ["predict"] n_features = [30, 100] max_depths = [10] n_estimatorss = [100, 200] n_jobss = [4] start = time() - results = bench(n_obs, n_features, max_depths, n_estimatorss, n_jobss, - methods, repeat=repeat, verbose=verbose) + results = bench( + n_obs, + n_features, + max_depths, + n_estimatorss, + n_jobss, + methods, + repeat=repeat, + verbose=verbose, + ) end = time() results_df = pandas.DataFrame(results) @@ -251,7 +309,7 @@ def run_bench(repeat=100, verbose=False): return results_df -if __name__ == '__main__': +if __name__ == "__main__": from datetime import datetime import sklearn import numpy @@ -260,16 +318,19 @@ def run_bench(repeat=100, verbose=False): import skl2onnx import treelite import treelite_runtime - df = pandas.DataFrame([ - {"name": "date", "version": str(datetime.now())}, - {"name": "numpy", "version": numpy.__version__}, - {"name": "scikit-learn", "version": sklearn.__version__}, - {"name": "onnx", "version": onnx.__version__}, - {"name": "onnxruntime", "version": onnxruntime.__version__}, - {"name": "skl2onnx", "version": skl2onnx.__version__}, - {"name": "treelite", "version": treelite.__version__}, - {"name": "treelite_runtime", "version": treelite_runtime.__version__}, - ]) + + df = pandas.DataFrame( + [ + {"name": "date", "version": str(datetime.now())}, + {"name": "numpy", "version": numpy.__version__}, + {"name": "scikit-learn", "version": sklearn.__version__}, + {"name": "onnx", "version": onnx.__version__}, + {"name": "onnxruntime", "version": onnxruntime.__version__}, + {"name": "skl2onnx", "version": skl2onnx.__version__}, + {"name": "treelite", "version": treelite.__version__}, + {"name": "treelite_runtime", "version": treelite_runtime.__version__}, + ] + ) df.to_csv("bench_plot_onnxruntime_random_forest_reg.time.csv", index=False) print(df) df = run_bench(verbose=True) diff --git a/benchmarks/bench_plot_onnxruntime_svm_reg.py b/benchmarks/bench_plot_onnxruntime_svm_reg.py index c1fa9b84c..2f38010c6 100644 --- a/benchmarks/bench_plot_onnxruntime_svm_reg.py +++ b/benchmarks/bench_plot_onnxruntime_svm_reg.py @@ -26,12 +26,13 @@ # Implementations to benchmark. ############################## + def fcts_model(X, y, kernel): "SVR." rf = SVR(kernel=kernel) rf.fit(X, y) - initial_types = [('X', FloatTensorType([None, X.shape[1]]))] + initial_types = [("X", FloatTensorType([None, X.shape[1]]))] onx = convert_sklearn(rf, initial_types=initial_types) f = BytesIO() f.write(onx.SerializeToString()) @@ -43,27 +44,28 @@ def predict_skl_predict(X, model=rf): return rf.predict(X) def predict_onnxrt_predict(X, sess=sess): - return sess.run(outputs[:1], {'X': X})[0] + return sess.run(outputs[:1], {"X": X})[0] - return {'predict': ( - predict_skl_predict, - predict_onnxrt_predict, - )} + return { + "predict": ( + predict_skl_predict, + predict_onnxrt_predict, + ) + } ############################## # Benchmarks ############################## + def allow_configuration(**kwargs): return True -def bench(n_obs, n_features, kernels, - methods, repeat=10, verbose=False): +def bench(n_obs, n_features, kernels, methods, repeat=10, verbose=False): res = [] for nfeat in n_features: - ntrain = 1000 X_train = np.empty((ntrain, nfeat)).astype(np.float32) X_train[:, :] = rand(ntrain, nfeat)[:, :] @@ -75,11 +77,9 @@ def bench(n_obs, n_features, kernels, for n in n_obs: for method in methods: - fct1, fct2 = fcts[method] - if not allow_configuration(n=n, nfeat=nfeat, - kernel=kernel): + if not allow_configuration(n=n, nfeat=nfeat, kernel=kernel): continue obs = dict(n_obs=n, nfeat=nfeat, kernel=kernel) @@ -124,8 +124,7 @@ def bench(n_obs, n_features, kernels, if len(p1.shape) == 1 and len(p2.shape) == 2: p2 = p2.ravel() try: - assert_almost_equal( - p1.ravel(), p2.ravel(), decimal=3) + assert_almost_equal(p1.ravel(), p2.ravel(), decimal=3) except AssertionError as e: warnings.warn(str(e)) return res @@ -135,22 +134,21 @@ def bench(n_obs, n_features, kernels, # Plots. ############################## + def plot_results(df, verbose=False): nrows = max(len(set(df.n_obs)), 2) ncols = 2 - fig, ax = plt.subplots(nrows, ncols, - figsize=(ncols * 4, nrows * 4)) + fig, ax = plt.subplots(nrows, ncols, figsize=(ncols * 4, nrows * 4)) pos = 0 row = 0 for n_obs in sorted(set(df.n_obs)): a = ax[row, pos] if row == ax.shape[0] - 1: - a.set_xlabel("N features", fontsize='x-small') + a.set_xlabel("N features", fontsize="x-small") if pos == 0: - a.set_ylabel( - "Time (s) n_obs={}".format(n_obs), fontsize='x-small') + a.set_ylabel("Time (s) n_obs={}".format(n_obs), fontsize="x-small") - for color, kernel in zip('brgyc', sorted(set(df.kernel))): + for color, kernel in zip("brgyc", sorted(set(df.kernel))): subset = df[(df.kernel == kernel)] if subset.shape[0] == 0: continue @@ -159,13 +157,30 @@ def plot_results(df, verbose=False): print(subset) label = "skl %s" % kernel - subset.plot(x="nfeat", y="time_skl", label=label, ax=a, - logx=True, logy=True, c=color, style='--', lw=5) + subset.plot( + x="nfeat", + y="time_skl", + label=label, + ax=a, + logx=True, + logy=True, + c=color, + style="--", + lw=5, + ) label = "ort %s" % kernel - subset.plot(x="nfeat", y="time_ort", label=label, ax=a, - logx=True, logy=True, c=color, lw=3) - - a.legend(loc=0, fontsize='x-small') + subset.plot( + x="nfeat", + y="time_ort", + label=label, + ax=a, + logx=True, + logy=True, + c=color, + lw=3, + ) + + a.legend(loc=0, fontsize="x-small") if row == 0: pass # a.set_title("n_jobs={}".format(n_jobs), fontsize='x-small') row += 1 @@ -176,13 +191,12 @@ def plot_results(df, verbose=False): @ignore_warnings(category=FutureWarning) def run_bench(repeat=100, verbose=False): n_obs = [1, 10, 100, 1000, 10000, 100000] - methods = ['predict'] + methods = ["predict"] n_features = [10, 50] kernels = ["linear", "poly", "rbf", "sigmoid"] start = time() - results = bench(n_obs, n_features, kernels, - methods, repeat=repeat, verbose=verbose) + results = bench(n_obs, n_features, kernels, methods, repeat=repeat, verbose=verbose) end = time() results_df = pandas.DataFrame(results) @@ -193,21 +207,24 @@ def run_bench(repeat=100, verbose=False): return results_df -if __name__ == '__main__': +if __name__ == "__main__": from datetime import datetime import sklearn import numpy import onnx import onnxruntime import skl2onnx - df = pandas.DataFrame([ - {"name": "date", "version": str(datetime.now())}, - {"name": "numpy", "version": numpy.__version__}, - {"name": "scikit-learn", "version": sklearn.__version__}, - {"name": "onnx", "version": onnx.__version__}, - {"name": "onnxruntime", "version": onnxruntime.__version__}, - {"name": "skl2onnx", "version": skl2onnx.__version__}, - ]) + + df = pandas.DataFrame( + [ + {"name": "date", "version": str(datetime.now())}, + {"name": "numpy", "version": numpy.__version__}, + {"name": "scikit-learn", "version": sklearn.__version__}, + {"name": "onnx", "version": onnx.__version__}, + {"name": "onnxruntime", "version": onnxruntime.__version__}, + {"name": "skl2onnx", "version": skl2onnx.__version__}, + ] + ) df.to_csv("bench_plot_onnxruntime_svm_reg.time.csv", index=False) print(df) df = run_bench(verbose=True) diff --git a/benchmarks/post_graph.py b/benchmarks/post_graph.py index ff4e64671..2d3276e72 100644 --- a/benchmarks/post_graph.py +++ b/benchmarks/post_graph.py @@ -11,17 +11,20 @@ def autolabel(ax, rects): for rect in rects: height = rect.get_height() - ax.annotate('%1.1fx' % height, - xy=(rect.get_x() + rect.get_width() / 2, height), - xytext=(0, 3), # 3 points vertical offset - textcoords="offset points", - ha='center', va='bottom', - fontsize=8) + ax.annotate( + "%1.1fx" % height, + xy=(rect.get_x() + rect.get_width() / 2, height), + xytext=(0, 3), # 3 points vertical offset + textcoords="offset points", + ha="center", + va="bottom", + fontsize=8, + ) def linear_models(): - filename1 = os.path.join(HERE, 'bench_plot_onnxruntime_linreg.csv') - filename2 = os.path.join(HERE, 'bench_plot_onnxruntime_logreg.csv') + filename1 = os.path.join(HERE, "bench_plot_onnxruntime_linreg.csv") + filename2 = os.path.join(HERE, "bench_plot_onnxruntime_logreg.csv") if not os.path.exists(filename1) or not os.path.exists(filename2): return dfr = read_csv(filename1) @@ -45,12 +48,12 @@ def linear_models(): x = numpy.arange(len(labels)) width = 0.90 - rects1 = ax.bar(x, means, width, label='Speedup') + rects1 = ax.bar(x, means, width, label="Speedup") if pos == 0: - ax.set_ylabel('Speedup') - ax.set_title('%s %d features' % (name, nf)) - ax.set_xlabel('batch size') + ax.set_ylabel("Speedup") + ax.set_title("%s %d features" % (name, nf)) + ax.set_xlabel("batch size") ax.set_xticks(x) ax.set_xticklabels(labels) autolabel(ax, rects1) @@ -65,20 +68,20 @@ def linear_models(): def svm_models(): - filename = os.path.join(HERE, 'bench_plot_onnxruntime_svm_reg.csv') + filename = os.path.join(HERE, "bench_plot_onnxruntime_svm_reg.csv") if not os.path.exists(filename): return dfr = read_csv(filename) dfr["speedup"] = dfr["time_skl"] / dfr["time_ort"] print(dfr.tail()) - ncols = len(set(dfr['kernel'])) + ncols = len(set(dfr["kernel"])) fig, axs = plt.subplots(1, ncols, figsize=(14, 4), sharey=True) name = "SVR" nf = 50 pos = 0 - for kernel in sorted(set(dfr['kernel'])): + for kernel in sorted(set(dfr["kernel"])): sub = dfr[(dfr.kernel == kernel) & (dfr.nfeat == nf)] ax = axs[pos] labels = sub.n_obs @@ -87,12 +90,12 @@ def svm_models(): x = numpy.arange(len(labels)) width = 0.90 - rects1 = ax.bar(x, means, width, label='Speedup') + rects1 = ax.bar(x, means, width, label="Speedup") if pos == 0: - ax.set_ylabel('Speedup') - ax.set_title('%s %s - %d features' % (name, kernel, nf)) - ax.set_xlabel('batch size') + ax.set_ylabel("Speedup") + ax.set_title("%s %s - %d features" % (name, kernel, nf)) + ax.set_xlabel("batch size") ax.set_xticks(x) ax.set_xticklabels(labels) autolabel(ax, rects1) @@ -107,8 +110,7 @@ def svm_models(): def rf_models(): - filename = os.path.join( - HERE, 'bench_plot_onnxruntime_random_forest_reg.csv') + filename = os.path.join(HERE, "bench_plot_onnxruntime_random_forest_reg.csv") if not os.path.exists(filename): return dfr = read_csv(filename) @@ -125,8 +127,11 @@ def rf_models(): for est in [100, 200]: for n_jobs in [4]: sub = dfr[ - (dfr.max_depth == max_depth) & (dfr.nfeat == nf) & - (dfr.n_estimators == est) & (dfr.n_jobs == n_jobs)] + (dfr.max_depth == max_depth) + & (dfr.nfeat == nf) + & (dfr.n_estimators == est) + & (dfr.n_jobs == n_jobs) + ] ax = axs[pos] labels = sub.n_obs means = sub.speedup @@ -134,17 +139,18 @@ def rf_models(): x = numpy.arange(len(labels)) width = 0.90 - rects1 = ax.bar(x, means, width, label='Speedup') + rects1 = ax.bar(x, means, width, label="Speedup") if pos == 0: - ax.set_yscale('log') - ax.set_ylim([0.1, max(dfr['speedup'])]) + ax.set_yscale("log") + ax.set_ylim([0.1, max(dfr["speedup"])]) if pos == 0: - ax.set_ylabel('Speedup') + ax.set_ylabel("Speedup") ax.set_title( - '%s\ndepth %d - %d features\n %d estimators %d jobs' - '' % (name, max_depth, nf, est, n_jobs)) - ax.set_xlabel('batch size') + "%s\ndepth %d - %d features\n %d estimators %d jobs" + "" % (name, max_depth, nf, est, n_jobs) + ) + ax.set_xlabel("batch size") ax.set_xticks(x) ax.set_xticklabels(labels) autolabel(ax, rects1) diff --git a/docs/conf.py b/docs/conf.py index cbad4d184..a730d3b02 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -7,56 +7,55 @@ import sys import warnings import skl2onnx -import pydata_sphinx_theme -sys.path.append(os.path.abspath('exts')) +sys.path.append(os.path.abspath("exts")) from github_link import make_linkcode_resolve # noqa # -- Project information ----------------------------------------------------- -project = 'sklearn-onnx' -copyright = '2018-2023, Microsoft' -author = 'Microsoft' +project = "sklearn-onnx" +copyright = "2018-2023, Microsoft" +author = "Microsoft" version = skl2onnx.__version__ release = version # -- General configuration --------------------------------------------------- extensions = [ - 'sphinx.ext.intersphinx', - 'sphinx.ext.imgmath', - 'sphinx.ext.ifconfig', - 'sphinx.ext.viewcode', + "sphinx.ext.intersphinx", + "sphinx.ext.imgmath", + "sphinx.ext.ifconfig", + "sphinx.ext.viewcode", "sphinx.ext.autodoc", - 'sphinx.ext.githubpages', + "sphinx.ext.githubpages", "sphinx_gallery.gen_gallery", - 'sphinx.ext.autodoc', - 'sphinx.ext.graphviz', - 'sphinx_skl2onnx_extension', - 'matplotlib.sphinxext.plot_directive', - 'pyquickhelper.sphinxext.sphinx_cmdref_extension', - 'pyquickhelper.sphinxext.sphinx_collapse_extension', - 'pyquickhelper.sphinxext.sphinx_docassert_extension', - 'pyquickhelper.sphinxext.sphinx_epkg_extension', - 'pyquickhelper.sphinxext.sphinx_exref_extension', - 'pyquickhelper.sphinxext.sphinx_faqref_extension', - 'pyquickhelper.sphinxext.sphinx_gdot_extension', - 'pyquickhelper.sphinxext.sphinx_runpython_extension', + "sphinx.ext.autodoc", + "sphinx.ext.graphviz", + "sphinx_skl2onnx_extension", + "matplotlib.sphinxext.plot_directive", + "pyquickhelper.sphinxext.sphinx_cmdref_extension", + "pyquickhelper.sphinxext.sphinx_collapse_extension", + "pyquickhelper.sphinxext.sphinx_docassert_extension", + "pyquickhelper.sphinxext.sphinx_epkg_extension", + "pyquickhelper.sphinxext.sphinx_exref_extension", + "pyquickhelper.sphinxext.sphinx_faqref_extension", + "pyquickhelper.sphinxext.sphinx_gdot_extension", + "pyquickhelper.sphinxext.sphinx_runpython_extension", "sphinxcontrib.blockdiag", ] -templates_path = ['_templates'] -source_suffix = ['.rst'] +templates_path = ["_templates"] +source_suffix = [".rst"] -master_doc = 'index' +master_doc = "index" language = "en" exclude_patterns = [] -pygments_style = 'default' +pygments_style = "default" # -- Options for HTML output ------------------------------------------------- -html_static_path = ['_static'] +html_static_path = ["_static"] html_theme = "furo" html_logo = "logo_main.png" @@ -67,42 +66,37 @@ # -- Options for intersphinx extension --------------------------------------- # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'https://docs.python.org/': None} +intersphinx_mapping = {"https://docs.python.org/": None} # -- Options for Sphinx Gallery ---------------------------------------------- linkcode_resolve = make_linkcode_resolve( - 'skl2onnx', - 'https://github.com/onnx/skl2onnx/blob/{revision}/' - '{package}/{path}#L{lineno}') + "skl2onnx", + "https://github.com/onnx/skl2onnx/blob/{revision}/" "{package}/{path}#L{lineno}", +) intersphinx_mapping = { - 'joblib': ('https://joblib.readthedocs.io/en/latest/', None), - 'python': ('https://docs.python.org/{.major}'.format( - sys.version_info), None), - 'matplotlib': ('https://matplotlib.org/', None), - 'mlinsights': ( - 'http://www.xavierdupre.fr/app/mlinsights/helpsphinx/', None), - 'numpy': ('https://docs.scipy.org/doc/numpy/', None), - 'pyquickhelper': ( - 'http://www.xavierdupre.fr/app/pyquickhelper/helpsphinx/', None), - 'onnxruntime': ('https://onnxruntime.ai/docs/api/python/', None), - 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), - 'scipy': ('https://docs.scipy.org/doc/scipy/reference', None), - 'seaborn': ('https://seaborn.pydata.org/', None), - 'scikit-learn': ( - 'https://scikit-learn.org/stable/', - None), - 'sklearn': ('https://scikit-learn.org/stable/', None), - 'skl2onnx': ('https://onnx.ai/sklearn-onnx/', None), - 'sklearn-onnx': ('https://onnx.ai/sklearn-onnx/', None), + "joblib": ("https://joblib.readthedocs.io/en/latest/", None), + "python": ("https://docs.python.org/{.major}".format(sys.version_info), None), + "matplotlib": ("https://matplotlib.org/", None), + "mlinsights": ("http://www.xavierdupre.fr/app/mlinsights/helpsphinx/", None), + "numpy": ("https://docs.scipy.org/doc/numpy/", None), + "pyquickhelper": ("http://www.xavierdupre.fr/app/pyquickhelper/helpsphinx/", None), + "onnxruntime": ("https://onnxruntime.ai/docs/api/python/", None), + "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), + "scipy": ("https://docs.scipy.org/doc/scipy/reference", None), + "seaborn": ("https://seaborn.pydata.org/", None), + "scikit-learn": ("https://scikit-learn.org/stable/", None), + "sklearn": ("https://scikit-learn.org/stable/", None), + "skl2onnx": ("https://onnx.ai/sklearn-onnx/", None), + "sklearn-onnx": ("https://onnx.ai/sklearn-onnx/", None), } sphinx_gallery_conf = { - 'examples_dirs': ['examples', 'tutorial'], - 'gallery_dirs': ['auto_examples', 'auto_tutorial'], - 'capture_repr': ('_repr_html_', '__repr__'), - 'ignore_repr_types': r'matplotlib.text|matplotlib.axes', + "examples_dirs": ["examples", "tutorial"], + "gallery_dirs": ["auto_examples", "auto_tutorial"], + "capture_repr": ("_repr_html_", "__repr__"), + "ignore_repr_types": r"matplotlib.text|matplotlib.axes", # 'binder': { # 'org': 'onnx', # 'repo': 'onnx.ai/sklearn-onnx/', @@ -114,35 +108,32 @@ } epkg_dictionary = { - 'C': 'https://en.wikipedia.org/wiki/C_(programming_language)', - 'C++': 'https://en.wikipedia.org/wiki/C%2B%2B', - 'cython': 'https://cython.org/', - 'DOT': 'https://www.graphviz.org/doc/info/lang.html', - 'ImageNet': 'http://www.image-net.org/', - 'LightGBM': 'https://lightgbm.readthedocs.io/en/latest/', - 'lightgbm': 'https://lightgbm.readthedocs.io/en/latest/', - 'NMF': - 'https://scikit-learn.org/stable/modules/generated/' - 'sklearn.decomposition.NMF.html', - 'numpy': 'https://numpy.org/', - 'onnx': 'https://github.com/onnx/onnx', - 'ONNX': 'https://onnx.ai/', - 'ONNX operators': - 'https://github.com/onnx/onnx/blob/master/docs/Operators.md', - 'ONNX ML operators': - 'https://github.com/onnx/onnx/blob/master/docs/Operators-ml.md', - 'onnxmltools': 'https://github.com/onnx/onnxmltools', - 'onnxruntime': 'https://microsoft.github.io/onnxruntime/', - 'openmp': 'https://en.wikipedia.org/wiki/OpenMP', - 'pyinstrument': 'https://github.com/joerick/pyinstrument', - 'python': 'https://www.python.org/', - 'pytorch': 'https://pytorch.org/', - 'scikit-learn': 'https://scikit-learn.org/stable/', - 'skorch': 'https://skorch.readthedocs.io/en/stable/', - 'sklearn-onnx': 'https://github.com/onnx/sklearn-onnx', - 'sphinx-gallery': 'https://github.com/sphinx-gallery/sphinx-gallery', - 'xgboost': 'https://xgboost.readthedocs.io/en/latest/', - 'XGBoost': 'https://xgboost.readthedocs.io/en/latest/', + "C": "https://en.wikipedia.org/wiki/C_(programming_language)", + "C++": "https://en.wikipedia.org/wiki/C%2B%2B", + "cython": "https://cython.org/", + "DOT": "https://www.graphviz.org/doc/info/lang.html", + "ImageNet": "http://www.image-net.org/", + "LightGBM": "https://lightgbm.readthedocs.io/en/latest/", + "lightgbm": "https://lightgbm.readthedocs.io/en/latest/", + "NMF": "https://scikit-learn.org/stable/modules/generated/" + "sklearn.decomposition.NMF.html", + "numpy": "https://numpy.org/", + "onnx": "https://github.com/onnx/onnx", + "ONNX": "https://onnx.ai/", + "ONNX operators": "https://github.com/onnx/onnx/blob/master/docs/Operators.md", + "ONNX ML operators": "https://github.com/onnx/onnx/blob/master/docs/Operators-ml.md", + "onnxmltools": "https://github.com/onnx/onnxmltools", + "onnxruntime": "https://microsoft.github.io/onnxruntime/", + "openmp": "https://en.wikipedia.org/wiki/OpenMP", + "pyinstrument": "https://github.com/joerick/pyinstrument", + "python": "https://www.python.org/", + "pytorch": "https://pytorch.org/", + "scikit-learn": "https://scikit-learn.org/stable/", + "skorch": "https://skorch.readthedocs.io/en/stable/", + "sklearn-onnx": "https://github.com/onnx/sklearn-onnx", + "sphinx-gallery": "https://github.com/sphinx-gallery/sphinx-gallery", + "xgboost": "https://xgboost.readthedocs.io/en/latest/", + "XGBoost": "https://xgboost.readthedocs.io/en/latest/", } warnings.filterwarnings("ignore", category=FutureWarning) diff --git a/docs/examples/plot_backend.py b/docs/examples/plot_backend.py index dd127c252..251399c02 100644 --- a/docs/examples/plot_backend.py +++ b/docs/examples/plot_backend.py @@ -44,11 +44,11 @@ # Let's use ONNX backend API to test it. model = onnx.load(name) -rep = backend.prepare(model, 'CPU') -x = np.array([[-1.0, -2.0, 5.0, 6.0], - [-1.0, -2.0, -3.0, -4.0], - [-1.0, -2.0, 7.0, 8.0]], - dtype=np.float32) +rep = backend.prepare(model, "CPU") +x = np.array( + [[-1.0, -2.0, 5.0, 6.0], [-1.0, -2.0, -3.0, -4.0], [-1.0, -2.0, 7.0, 8.0]], + dtype=np.float32, +) label, proba = rep.run(x) print("label={}".format(label)) print("probabilities={}".format(proba)) @@ -62,11 +62,11 @@ # The backend can also directly load the model # without using *onnx*. -rep = backend.prepare(name, 'CPU') -x = np.array([[-1.0, -2.0, -3.0, -4.0], - [-1.0, -2.0, -3.0, -4.0], - [-1.0, -2.0, -3.0, -4.0]], - dtype=np.float32) +rep = backend.prepare(name, "CPU") +x = np.array( + [[-1.0, -2.0, -3.0, -4.0], [-1.0, -2.0, -3.0, -4.0], [-1.0, -2.0, -3.0, -4.0]], + dtype=np.float32, +) label, proba = rep.run(x) print("label={}".format(label)) print("probabilities={}".format(proba)) diff --git a/docs/examples/plot_benchmark_cdist.py b/docs/examples/plot_benchmark_cdist.py index b02074b36..9247d7a93 100644 --- a/docs/examples/plot_benchmark_cdist.py +++ b/docs/examples/plot_benchmark_cdist.py @@ -34,15 +34,13 @@ X = np.ones((2, 4), dtype=np.float32) Y = np.ones((3, 4), dtype=np.float32) Y *= 2 -print(cdist(X, Y, metric='euclidean')) +print(cdist(X, Y, metric="euclidean")) #################################### # ONNX -op = OnnxCDist('X', 'Y', op_version=12, output_names=['Z'], - metric='euclidean') -onx = op.to_onnx({'X': X, 'Y': Y}, - outputs=[('Z', FloatTensorType())]) +op = OnnxCDist("X", "Y", op_version=12, output_names=["Z"], metric="euclidean") +onx = op.to_onnx({"X": X, "Y": Y}, outputs=[("Z", FloatTensorType())]) print(onx) @@ -53,9 +51,8 @@ # We compute the output of CDist operator # with onnxruntime. -sess = InferenceSession(onx.SerializeToString(), - providers=["CPUExecutionProvider"]) -res = sess.run(None, {'X': X, 'Y': Y}) +sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) +res = sess.run(None, {"X": X, "Y": Y}) print(res) ##################################### @@ -67,25 +64,30 @@ def measure_time(name, stmt, context, repeat=100, number=20): tim = Timer(stmt, globals=context) - res = np.array( - tim.repeat(repeat=repeat, number=number)) + res = np.array(tim.repeat(repeat=repeat, number=number)) res /= number mean = np.mean(res) - dev = np.mean(res ** 2) + dev = np.mean(res**2) dev = (dev - mean**2) ** 0.5 return dict( - average=mean, deviation=dev, min_exec=np.min(res), - max_exec=np.max(res), repeat=repeat, number=number, - nrows=context['X'].shape[0], ncols=context['Y'].shape[1], - name=name) + average=mean, + deviation=dev, + min_exec=np.min(res), + max_exec=np.max(res), + repeat=repeat, + number=number, + nrows=context["X"].shape[0], + ncols=context["Y"].shape[1], + name=name, + ) ############################## # scipy time_scipy = measure_time( - "scipy", "cdist(X, Y)", - context={'cdist': cdist, 'X': X, 'Y': Y}) + "scipy", "cdist(X, Y)", context={"cdist": cdist, "X": X, "Y": Y} +) pprint(time_scipy) @@ -93,8 +95,8 @@ def measure_time(name, stmt, context, repeat=100, number=20): # onnxruntime time_ort = measure_time( - "ort", "sess.run(None, {'X': X, 'Y': Y})", - context={'sess': sess, 'X': X, 'Y': Y}) + "ort", "sess.run(None, {'X': X, 'Y': Y})", context={"sess": sess, "X": X, "Y": Y} +) pprint(time_ort) ############################################ @@ -108,20 +110,21 @@ def measure_time(name, stmt, context, repeat=100, number=20): Y = np.random.randn(10, 4).astype(np.float32) time_scipy = measure_time( - "scipy", "cdist(X, Y)", - context={'cdist': cdist, 'X': X, 'Y': Y}) + "scipy", "cdist(X, Y)", context={"cdist": cdist, "X": X, "Y": Y} + ) time_ort = measure_time( - "ort", "sess.run(None, {'X': X, 'Y': Y})", - context={'sess': sess, 'X': X, 'Y': Y}) - metric = dict(N=dim, scipy=time_scipy['average'], - ort=time_ort['average']) + "ort", + "sess.run(None, {'X': X, 'Y': Y})", + context={"sess": sess, "X": X, "Y": Y}, + ) + metric = dict(N=dim, scipy=time_scipy["average"], ort=time_ort["average"]) metrics.append(metric) df = DataFrame(metrics) -df['scipy/ort'] = df['scipy'] / df['ort'] +df["scipy/ort"] = df["scipy"] / df["ort"] print(df) -df.plot(x='N', y=['scipy/ort']) +df.plot(x="N", y=["scipy/ort"]) ################################# # **Versions used for this example** diff --git a/docs/examples/plot_benchmark_pipeline.py b/docs/examples/plot_benchmark_pipeline.py index 8db485cc8..5634cfcf2 100644 --- a/docs/examples/plot_benchmark_pipeline.py +++ b/docs/examples/plot_benchmark_pipeline.py @@ -40,7 +40,7 @@ logistic = LogisticRegression() pca = PCA() -pipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)]) +pipe = Pipeline(steps=[("pca", pca), ("logistic", logistic)]) digits = datasets.load_digits() X_digits = digits.data[:1000] @@ -53,15 +53,15 @@ # ++++++++++++++++++ -initial_types = [('input', FloatTensorType((None, X_digits.shape[1])))] -model_onnx = convert_sklearn(pipe, initial_types=initial_types, - target_opset=12) +initial_types = [("input", FloatTensorType((None, X_digits.shape[1])))] +model_onnx = convert_sklearn(pipe, initial_types=initial_types, target_opset=12) -sess = rt.InferenceSession(model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) +sess = rt.InferenceSession( + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] +) print("skl predict_proba") print(pipe.predict_proba(X_digits[:2])) -onx_pred = sess.run(None, {'input': X_digits[:2].astype(np.float32)})[1] +onx_pred = sess.run(None, {"input": X_digits[:2].astype(np.float32)})[1] df = pd.DataFrame(onx_pred) print("onnx predict_proba") print(df.values) @@ -78,11 +78,15 @@ # ++++++++++ print("scikit-learn") -print(timeit("pipe.predict_proba(X_digits[:1])", - number=10000, globals=globals())) +print(timeit("pipe.predict_proba(X_digits[:1])", number=10000, globals=globals())) print("onnxruntime") -print(timeit("sess.run(None, {'input': X_digits[:1].astype(np.float32)})[1]", - number=10000, globals=globals())) +print( + timeit( + "sess.run(None, {'input': X_digits[:1].astype(np.float32)})[1]", + number=10000, + globals=globals(), + ) +) ############################################### # Intermediate steps @@ -95,34 +99,47 @@ # an smaller ONNX graph for every operator. -steps = collect_intermediate_steps( - pipe, "pipeline", initial_types) +steps = collect_intermediate_steps(pipe, "pipeline", initial_types) assert len(steps) == 2 pipe.predict_proba(X_digits[:2]) for i, step in enumerate(steps): - onnx_step = step['onnx_step'] - sess = rt.InferenceSession(onnx_step.SerializeToString(), - providers=["CPUExecutionProvider"]) - onnx_outputs = sess.run(None, {'input': X_digits[:2].astype(np.float32)}) - skl_outputs = step['model']._debug.outputs - if 'transform' in skl_outputs: - compare_objects(skl_outputs['transform'], onnx_outputs[0]) - print("benchmark", step['model'].__class__) + onnx_step = step["onnx_step"] + sess = rt.InferenceSession( + onnx_step.SerializeToString(), providers=["CPUExecutionProvider"] + ) + onnx_outputs = sess.run(None, {"input": X_digits[:2].astype(np.float32)}) + skl_outputs = step["model"]._debug.outputs + if "transform" in skl_outputs: + compare_objects(skl_outputs["transform"], onnx_outputs[0]) + print("benchmark", step["model"].__class__) print("scikit-learn") - print(timeit("step['model'].transform(X_digits[:1])", - number=10000, globals=globals())) + print( + timeit( + "step['model'].transform(X_digits[:1])", number=10000, globals=globals() + ) + ) else: - compare_objects(skl_outputs['predict_proba'], onnx_outputs[1]) - print("benchmark", step['model'].__class__) + compare_objects(skl_outputs["predict_proba"], onnx_outputs[1]) + print("benchmark", step["model"].__class__) print("scikit-learn") - print(timeit("step['model'].predict_proba(X_digits[:1])", - number=10000, globals=globals())) + print( + timeit( + "step['model'].predict_proba(X_digits[:1])", + number=10000, + globals=globals(), + ) + ) print("onnxruntime") - print(timeit("sess.run(None, {'input': X_digits[:1].astype(np.float32)})", - number=10000, globals=globals())) + print( + timeit( + "sess.run(None, {'input': X_digits[:1].astype(np.float32)})", + number=10000, + globals=globals(), + ) + ) ################################# # **Versions used for this example** diff --git a/docs/examples/plot_black_op.py b/docs/examples/plot_black_op.py index 5969a7406..d5c46d639 100644 --- a/docs/examples/plot_black_op.py +++ b/docs/examples/plot_black_op.py @@ -43,32 +43,39 @@ # ++++++++++++++++++ model_onnx = to_onnx( - model, X_train[:1].astype(np.float32), - options={id(model): {'score_samples': True}}, - target_opset=12) -sess = InferenceSession(model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model, + X_train[:1].astype(np.float32), + options={id(model): {"score_samples": True}}, + target_opset=12, +) +sess = InferenceSession( + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] +) xt = X_test[:5].astype(np.float32) print(model.score_samples(xt)) -print(sess.run(None, {'X': xt})[2]) +print(sess.run(None, {"X": xt})[2]) ################################## # Display the ONNX graph. pydot_graph = GetPydotGraph( - model_onnx.graph, name=model_onnx.graph.name, rankdir="TB", - node_producer=GetOpNodeProducer("docstring", color="yellow", - fillcolor="yellow", style="filled")) + model_onnx.graph, + name=model_onnx.graph.name, + rankdir="TB", + node_producer=GetOpNodeProducer( + "docstring", color="yellow", fillcolor="yellow", style="filled" + ), +) pydot_graph.write_dot("mixture.dot") -os.system('dot -O -Gdpi=300 -Tpng mixture.dot') +os.system("dot -O -Gdpi=300 -Tpng mixture.dot") image = plt.imread("mixture.dot.png") fig, ax = plt.subplots(figsize=(40, 20)) ax.imshow(image) -ax.axis('off') +ax.axis("off") ################################### @@ -80,43 +87,58 @@ # produces in that case. model_onnx2 = to_onnx( - model, X_train[:1].astype(np.float32), - options={id(model): {'score_samples': True}}, - black_op={'ReduceLogSumExp'}, - target_opset=12) -sess2 = InferenceSession(model_onnx2.SerializeToString(), - providers=["CPUExecutionProvider"]) + model, + X_train[:1].astype(np.float32), + options={id(model): {"score_samples": True}}, + black_op={"ReduceLogSumExp"}, + target_opset=12, +) +sess2 = InferenceSession( + model_onnx2.SerializeToString(), providers=["CPUExecutionProvider"] +) xt = X_test[:5].astype(np.float32) print(model.score_samples(xt)) -print(sess2.run(None, {'X': xt})[2]) +print(sess2.run(None, {"X": xt})[2]) ################################## # Display the ONNX graph. pydot_graph = GetPydotGraph( - model_onnx2.graph, name=model_onnx2.graph.name, rankdir="TB", - node_producer=GetOpNodeProducer("docstring", color="yellow", - fillcolor="yellow", style="filled")) + model_onnx2.graph, + name=model_onnx2.graph.name, + rankdir="TB", + node_producer=GetOpNodeProducer( + "docstring", color="yellow", fillcolor="yellow", style="filled" + ), +) pydot_graph.write_dot("mixture2.dot") -os.system('dot -O -Gdpi=300 -Tpng mixture2.dot') +os.system("dot -O -Gdpi=300 -Tpng mixture2.dot") image = plt.imread("mixture2.dot.png") fig, ax = plt.subplots(figsize=(40, 20)) ax.imshow(image) -ax.axis('off') +ax.axis("off") ####################################### # Processing time # +++++++++++++++ -print(timeit(stmt="sess.run(None, {'X': xt})", - number=10000, globals={'sess': sess, 'xt': xt})) +print( + timeit( + stmt="sess.run(None, {'X': xt})", number=10000, globals={"sess": sess, "xt": xt} + ) +) -print(timeit(stmt="sess2.run(None, {'X': xt})", - number=10000, globals={'sess2': sess2, 'xt': xt})) +print( + timeit( + stmt="sess2.run(None, {'X': xt})", + number=10000, + globals={"sess2": sess2, "xt": xt}, + ) +) ################################# # The model using ReduceLogSumExp is much faster. @@ -132,21 +154,25 @@ try: to_onnx( - model, X_train[:1].astype(np.float32), - options={id(model): {'score_samples': True}}, - black_op={'ReduceLogSumExp', 'Add'}, - target_opset=12) + model, + X_train[:1].astype(np.float32), + options={id(model): {"score_samples": True}}, + black_op={"ReduceLogSumExp", "Add"}, + target_opset=12, + ) except RuntimeError as e: - print('Error:', e) + print("Error:", e) ################################# # **Versions used for this example** import sklearn # noqa + print("numpy:", numpy.__version__) print("scikit-learn:", sklearn.__version__) import skl2onnx # noqa + print("onnx: ", onnx.__version__) print("onnxruntime: ", onnxruntime.__version__) print("skl2onnx: ", skl2onnx.__version__) diff --git a/docs/examples/plot_cast_transformer.py b/docs/examples/plot_cast_transformer.py index 11449b808..34efc74f8 100644 --- a/docs/examples/plot_cast_transformer.py +++ b/docs/examples/plot_cast_transformer.py @@ -50,38 +50,32 @@ # The weird data. X, y = make_regression(10000, 10, random_state=3) -X_train, X_test, y_train, _ = train_test_split( - X, y, random_state=3) +X_train, X_test, y_train, _ = train_test_split(X, y, random_state=3) Xi_train, yi_train = X_train.copy(), y_train.copy() Xi_test = X_test.copy() for i in range(X.shape[1]): - Xi_train[:, i] = (Xi_train[:, i] * math.pi * 2 ** i).astype( - np.int64) - Xi_test[:, i] = (Xi_test[:, i] * math.pi * 2 ** i).astype( - np.int64) + Xi_train[:, i] = (Xi_train[:, i] * math.pi * 2**i).astype(np.int64) + Xi_test[:, i] = (Xi_test[:, i] * math.pi * 2**i).astype(np.int64) max_depth = 10 Xi_test = Xi_test.astype(np.float32) ################################# # A simple model. -model1 = Pipeline([ - ('scaler', StandardScaler()), - ('dt', DecisionTreeRegressor(max_depth=max_depth)) -]) +model1 = Pipeline( + [("scaler", StandardScaler()), ("dt", DecisionTreeRegressor(max_depth=max_depth))] +) model1.fit(Xi_train, yi_train) exp1 = model1.predict(Xi_test) ################################# # Conversion into ONNX. -onx1 = to_onnx(model1, X_train[:1].astype(np.float32), - target_opset=15) -sess1 = InferenceSession(onx1.SerializeToString(), - providers=["CPUExecutionProvider"]) +onx1 = to_onnx(model1, X_train[:1].astype(np.float32), target_opset=15) +sess1 = InferenceSession(onx1.SerializeToString(), providers=["CPUExecutionProvider"]) ################################### # And the maximum difference. -got1 = sess1.run(None, {'X': Xi_test})[0] +got1 = sess1.run(None, {"X": Xi_test})[0] def maxdiff(a1, a2): @@ -96,17 +90,21 @@ def maxdiff(a1, a2): # The graph. pydot_graph = GetPydotGraph( - onx1.graph, name=onx1.graph.name, rankdir="TB", - node_producer=GetOpNodeProducer("docstring", color="yellow", - fillcolor="yellow", style="filled")) + onx1.graph, + name=onx1.graph.name, + rankdir="TB", + node_producer=GetOpNodeProducer( + "docstring", color="yellow", fillcolor="yellow", style="filled" + ), +) pydot_graph.write_dot("cast1.dot") -os.system('dot -O -Gdpi=300 -Tpng cast1.dot') +os.system("dot -O -Gdpi=300 -Tpng cast1.dot") image = plt.imread("cast1.dot.png") fig, ax = plt.subplots(figsize=(40, 20)) ax.imshow(image) -ax.axis('off') +ax.axis("off") ######################################## # New pipeline @@ -124,23 +122,27 @@ def maxdiff(a1, a2): # `'div'`) and to use double by inserting an explicit # Cast. -model2 = Pipeline([ - ('cast64', CastTransformer(dtype=np.float64)), - ('scaler', StandardScaler()), - ('cast', CastTransformer()), - ('dt', DecisionTreeRegressor(max_depth=max_depth)) -]) +model2 = Pipeline( + [ + ("cast64", CastTransformer(dtype=np.float64)), + ("scaler", StandardScaler()), + ("cast", CastTransformer()), + ("dt", DecisionTreeRegressor(max_depth=max_depth)), + ] +) model2.fit(Xi_train, yi_train) exp2 = model2.predict(Xi_test) -onx2 = to_onnx(model2, X_train[:1].astype(np.float32), - options={StandardScaler: {'div': 'div_cast'}}, - target_opset=15) +onx2 = to_onnx( + model2, + X_train[:1].astype(np.float32), + options={StandardScaler: {"div": "div_cast"}}, + target_opset=15, +) -sess2 = InferenceSession(onx2.SerializeToString(), - providers=["CPUExecutionProvider"]) -got2 = sess2.run(None, {'X': Xi_test})[0] +sess2 = InferenceSession(onx2.SerializeToString(), providers=["CPUExecutionProvider"]) +got2 = sess2.run(None, {"X": Xi_test})[0] md2 = maxdiff(exp2, got2) print(md2) @@ -149,25 +151,31 @@ def maxdiff(a1, a2): # The graph. pydot_graph = GetPydotGraph( - onx2.graph, name=onx2.graph.name, rankdir="TB", - node_producer=GetOpNodeProducer("docstring", color="yellow", - fillcolor="yellow", style="filled")) + onx2.graph, + name=onx2.graph.name, + rankdir="TB", + node_producer=GetOpNodeProducer( + "docstring", color="yellow", fillcolor="yellow", style="filled" + ), +) pydot_graph.write_dot("cast2.dot") -os.system('dot -O -Gdpi=300 -Tpng cast2.dot') +os.system("dot -O -Gdpi=300 -Tpng cast2.dot") image = plt.imread("cast2.dot.png") fig, ax = plt.subplots(figsize=(40, 20)) ax.imshow(image) -ax.axis('off') +ax.axis("off") ################################# # **Versions used for this example** import sklearn # noqa + print("numpy:", np.__version__) print("scikit-learn:", sklearn.__version__) import skl2onnx # noqa + print("onnx: ", onnx.__version__) print("onnxruntime: ", onnxruntime.__version__) print("skl2onnx: ", skl2onnx.__version__) diff --git a/docs/examples/plot_complex_pipeline.py b/docs/examples/plot_complex_pipeline.py index 1e4c58a2c..f9404d944 100644 --- a/docs/examples/plot_complex_pipeline.py +++ b/docs/examples/plot_complex_pipeline.py @@ -50,40 +50,50 @@ from skl2onnx.common.data_types import FloatTensorType, StringTensorType from skl2onnx.common.data_types import Int64TensorType -titanic_url = ('https://raw.githubusercontent.com/amueller/' - 'scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv') +titanic_url = ( + "https://raw.githubusercontent.com/amueller/" + "scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv" +) data = pd.read_csv(titanic_url) -X = data.drop('survived', axis=1) -y = data['survived'] +X = data.drop("survived", axis=1) +y = data["survived"] print(data.dtypes) # SimpleImputer on string is not available for # string in ONNX-ML specifications. # So we do it beforehand. -for cat in ['embarked', 'sex', 'pclass']: - X[cat].fillna('missing', inplace=True) +for cat in ["embarked", "sex", "pclass"]: + X[cat].fillna("missing", inplace=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) -numeric_features = ['age', 'fare'] -numeric_transformer = Pipeline(steps=[ - ('imputer', SimpleImputer(strategy='median')), - ('scaler', StandardScaler())]) +numeric_features = ["age", "fare"] +numeric_transformer = Pipeline( + steps=[("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())] +) -categorical_features = ['embarked', 'sex', 'pclass'] -categorical_transformer = Pipeline(steps=[ - # --- SimpleImputer is not available for strings in ONNX-ML specifications. - # ('imputer', SimpleImputer(strategy='constant', fill_value='missing')), - ('onehot', OneHotEncoder(handle_unknown='ignore'))]) +categorical_features = ["embarked", "sex", "pclass"] +categorical_transformer = Pipeline( + steps=[ + # --- SimpleImputer is not available for strings in ONNX-ML specifications. + # ('imputer', SimpleImputer(strategy='constant', fill_value='missing')), + ("onehot", OneHotEncoder(handle_unknown="ignore")) + ] +) preprocessor = ColumnTransformer( transformers=[ - ('num', numeric_transformer, numeric_features), - ('cat', categorical_transformer, categorical_features), - ]) + ("num", numeric_transformer, numeric_features), + ("cat", categorical_transformer, categorical_features), + ] +) -clf = Pipeline(steps=[('preprocessor', preprocessor), - ('classifier', LogisticRegression(solver='lbfgs'))]) +clf = Pipeline( + steps=[ + ("preprocessor", preprocessor), + ("classifier", LogisticRegression(solver="lbfgs")), + ] +) clf.fit(X_train, y_train) @@ -106,9 +116,9 @@ def convert_dataframe_schema(df, drop=None): for k, v in zip(df.columns, df.dtypes): if drop is not None and k in drop: continue - if v == 'int64': + if v == "int64": t = Int64TensorType([None, 1]) - elif v == 'float64': + elif v == "float64": t = FloatTensorType([None, 1]) else: t = StringTensorType([None, 1]) @@ -130,8 +140,9 @@ def convert_dataframe_schema(df, drop=None): # ++++++++++++++++++++++++++++++ try: - model_onnx = convert_sklearn(clf, 'pipeline_titanic', initial_inputs, - target_opset=12) + model_onnx = convert_sklearn( + clf, "pipeline_titanic", initial_inputs, target_opset=12 + ) except Exception as e: print(e) @@ -140,12 +151,12 @@ def convert_dataframe_schema(df, drop=None): # That's why the converter checks that there is no unused input. # They need to be removed from the graph inputs. -to_drop = {'parch', 'sibsp', 'cabin', 'ticket', - 'name', 'body', 'home.dest', 'boat'} +to_drop = {"parch", "sibsp", "cabin", "ticket", "name", "body", "home.dest", "boat"} initial_inputs = convert_dataframe_schema(X_train, to_drop) try: - model_onnx = convert_sklearn(clf, 'pipeline_titanic', initial_inputs, - target_opset=12) + model_onnx = convert_sklearn( + clf, "pipeline_titanic", initial_inputs, target_opset=12 + ) except Exception as e: print(e) @@ -156,8 +167,7 @@ def convert_dataframe_schema(df, drop=None): initial_inputs = convert_dataframe_schema(X_train, to_drop) -model_onnx = convert_sklearn(clf, 'pipeline_titanic', initial_inputs, - target_opset=12) +model_onnx = convert_sklearn(clf, "pipeline_titanic", initial_inputs, target_opset=12) # And save. @@ -196,8 +206,7 @@ def convert_dataframe_schema(df, drop=None): ################################ # We are ready to run *onnxruntime*. -sess = rt.InferenceSession("pipeline_titanic.onnx", - providers=["CPUExecutionProvider"]) +sess = rt.InferenceSession("pipeline_titanic.onnx", providers=["CPUExecutionProvider"]) pred_onx = sess.run(None, inputs) print("predict", pred_onx[0][:5]) print("predict_proba", pred_onx[1][:2]) @@ -207,14 +216,19 @@ def convert_dataframe_schema(df, drop=None): # Let's swith to an array but that requires to convert again with # an additional option zipmap. -model_onnx = convert_sklearn(clf, 'pipeline_titanic', initial_inputs, - target_opset=12, - options={id(clf): {'zipmap': False}}) +model_onnx = convert_sklearn( + clf, + "pipeline_titanic", + initial_inputs, + target_opset=12, + options={id(clf): {"zipmap": False}}, +) with open("pipeline_titanic_nozipmap.onnx", "wb") as f: f.write(model_onnx.SerializeToString()) -sess = rt.InferenceSession("pipeline_titanic_nozipmap.onnx", - providers=["CPUExecutionProvider"]) +sess = rt.InferenceSession( + "pipeline_titanic_nozipmap.onnx", providers=["CPUExecutionProvider"] +) pred_onx = sess.run(None, inputs) print("predict", pred_onx[0][:5]) print("predict_proba", pred_onx[1][:2]) @@ -231,20 +245,22 @@ def convert_dataframe_schema(df, drop=None): # # Finally, let's see the graph converted with *sklearn-onnx*. -pydot_graph = GetPydotGraph(model_onnx.graph, name=model_onnx.graph.name, - rankdir="TB", - node_producer=GetOpNodeProducer("docstring", - color="yellow", - fillcolor="yellow", - style="filled")) +pydot_graph = GetPydotGraph( + model_onnx.graph, + name=model_onnx.graph.name, + rankdir="TB", + node_producer=GetOpNodeProducer( + "docstring", color="yellow", fillcolor="yellow", style="filled" + ), +) pydot_graph.write_dot("pipeline_titanic.dot") -os.system('dot -O -Gdpi=300 -Tpng pipeline_titanic.dot') +os.system("dot -O -Gdpi=300 -Tpng pipeline_titanic.dot") image = plt.imread("pipeline_titanic.dot.png") fig, ax = plt.subplots(figsize=(40, 20)) ax.imshow(image) -ax.axis('off') +ax.axis("off") ################################# # **Versions used for this example** diff --git a/docs/examples/plot_convert_decision_function.py b/docs/examples/plot_convert_decision_function.py index 8c15a70fb..886501325 100644 --- a/docs/examples/plot_convert_decision_function.py +++ b/docs/examples/plot_convert_decision_function.py @@ -37,9 +37,8 @@ clr.fit(X_train, y_train) print(clr) -initial_type = [('float_input', FloatTensorType([None, 4]))] -onx = convert_sklearn(clr, initial_types=initial_type, - target_opset=12) +initial_type = [("float_input", FloatTensorType([None, 4]))] +onx = convert_sklearn(clr, initial_types=initial_type, target_opset=12) ############################ # Output type @@ -48,9 +47,8 @@ # Let's confirm the output type of the probabilities # is a list of dictionaries with onnxruntime. -sess = rt.InferenceSession(onx.SerializeToString(), - providers=["CPUExecutionProvider"]) -res = sess.run(None, {'float_input': X_test.astype(numpy.float32)}) +sess = rt.InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) +res = sess.run(None, {"float_input": X_test.astype(numpy.float32)}) print("skl", clr.predict_proba(X_test[:1])) print("onnx", res[1][:2]) @@ -59,14 +57,16 @@ # ++++++++++++++++++++++++++++++++ # -initial_type = [('float_input', FloatTensorType([None, 4]))] -options = {id(clr): {'raw_scores': True}} -onx2 = convert_sklearn(clr, initial_types=initial_type, options=options, - target_opset=12) +initial_type = [("float_input", FloatTensorType([None, 4]))] +options = {id(clr): {"raw_scores": True}} +onx2 = convert_sklearn( + clr, initial_types=initial_type, options=options, target_opset=12 +) -sess2 = rt.InferenceSession(onx2.SerializeToString(), - providers=["CPUExecutionProvider"]) -res2 = sess2.run(None, {'float_input': X_test.astype(numpy.float32)}) +sess2 = rt.InferenceSession( + onx2.SerializeToString(), providers=["CPUExecutionProvider"] +) +res2 = sess2.run(None, {"float_input": X_test.astype(numpy.float32)}) print("skl", clr.decision_function(X_test[:1])) print("onnx", res2[1][:2]) diff --git a/docs/examples/plot_convert_model.py b/docs/examples/plot_convert_model.py index 2ccbf5eb8..af00277ef 100644 --- a/docs/examples/plot_convert_model.py +++ b/docs/examples/plot_convert_model.py @@ -32,6 +32,7 @@ from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier + iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y) @@ -43,9 +44,8 @@ # Convert a model into ONNX # +++++++++++++++++++++++++ -initial_type = [('float_input', FloatTensorType([None, 4]))] -onx = convert_sklearn(clr, initial_types=initial_type, - target_opset=12) +initial_type = [("float_input", FloatTensorType([None, 4]))] +onx = convert_sklearn(clr, initial_types=initial_type, target_opset=12) with open("rf_iris.onnx", "wb") as f: f.write(onx.SerializeToString()) @@ -56,8 +56,7 @@ sess = rt.InferenceSession("rf_iris.onnx", providers=["CPUExecutionProvider"]) input_name = sess.get_inputs()[0].name label_name = sess.get_outputs()[0].name -pred_onx = sess.run( - [label_name], {input_name: X_test.astype(numpy.float32)})[0] +pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0] print(pred_onx) ####################################### @@ -65,17 +64,15 @@ clr = LogisticRegression() clr.fit(X_train, y_train) -initial_type = [('float_input', FloatTensorType([None, X_train.shape[1]]))] -onx = convert_sklearn(clr, initial_types=initial_type, - target_opset=12) +initial_type = [("float_input", FloatTensorType([None, X_train.shape[1]]))] +onx = convert_sklearn(clr, initial_types=initial_type, target_opset=12) with open("logreg_iris.onnx", "wb") as f: f.write(onx.SerializeToString()) sess = rt.InferenceSession("logreg_iris.onnx") input_name = sess.get_inputs()[0].name label_name = sess.get_outputs()[0].name -pred_onx = sess.run([label_name], - {input_name: X_test.astype(numpy.float32)})[0] +pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0] print(pred_onx) diff --git a/docs/examples/plot_convert_syntax.py b/docs/examples/plot_convert_syntax.py index 2eced5ec4..bdff42218 100644 --- a/docs/examples/plot_convert_syntax.py +++ b/docs/examples/plot_convert_syntax.py @@ -36,6 +36,7 @@ def predict_with_onnxruntime(onx, X): res = sess.run(None, {input_name: X.astype(np.float32)}) return res[0] + ################################# # Simple KMeans # +++++++++++++ @@ -48,8 +49,8 @@ def predict_with_onnxruntime(onx, X): tr.fit(X) onx = convert_sklearn( - tr, initial_types=[('X', FloatTensorType((None, X.shape[1])))], - target_opset=12) + tr, initial_types=[("X", FloatTensorType((None, X.shape[1])))], target_opset=12 +) print(predict_with_onnxruntime(onx, X)) ################################# @@ -83,8 +84,7 @@ def predict_with_onnxruntime(onx, X): # before fitting the model. X = np.arange(20).reshape(10, 2) -tr = wrap_as_onnx_mixin(KMeans(n_clusters=2), - target_opset=12) +tr = wrap_as_onnx_mixin(KMeans(n_clusters=2), target_opset=12) tr.fit(X) onx = tr.to_onnx(X.astype(np.float32)) @@ -97,9 +97,7 @@ def predict_with_onnxruntime(onx, X): # This is a simple scaler. -class CustomOpTransformer(BaseEstimator, TransformerMixin, - OnnxOperatorMixin): - +class CustomOpTransformer(BaseEstimator, TransformerMixin, OnnxOperatorMixin): def __init__(self): BaseEstimator.__init__(self) TransformerMixin.__init__(self) @@ -116,20 +114,22 @@ def transform(self, X): def onnx_shape_calculator(self): def shape_calculator(operator): operator.outputs[0].type = operator.inputs[0].type + return shape_calculator - def to_onnx_operator(self, inputs=None, outputs=('Y', ), - target_opset=None, **kwargs): + def to_onnx_operator( + self, inputs=None, outputs=("Y",), target_opset=None, **kwargs + ): if inputs is None: - raise RuntimeError("Parameter inputs should contain at least " - "one name.") + raise RuntimeError("Parameter inputs should contain at least " "one name.") opv = target_opset or self.op_version i0 = self.get_inputs(inputs, 0) W = self.W_.astype(np.float32) S = self.S_.astype(np.float32) - return OnnxDiv(OnnxSub(i0, W, op_version=12), S, - output_names=outputs, - op_version=opv) + return OnnxDiv( + OnnxSub(i0, W, op_version=12), S, output_names=outputs, op_version=opv + ) + ############################# # Way 1 @@ -140,8 +140,8 @@ def to_onnx_operator(self, inputs=None, outputs=('Y', ), tr.fit(X) onx = convert_sklearn( - tr, initial_types=[('X', FloatTensorType((None, X.shape[1])))], - target_opset=12) + tr, initial_types=[("X", FloatTensorType((None, X.shape[1])))], target_opset=12 +) print(predict_with_onnxruntime(onx, X)) ############################# @@ -171,8 +171,8 @@ def to_onnx_operator(self, inputs=None, outputs=('Y', ), X = np.arange(20).reshape(10, 2) tr = wrap_as_onnx_mixin( - make_pipeline(CustomOpTransformer(), KMeans(n_clusters=2)), - target_opset=12) + make_pipeline(CustomOpTransformer(), KMeans(n_clusters=2)), target_opset=12 +) tr.fit(X) @@ -186,28 +186,37 @@ def to_onnx_operator(self, inputs=None, outputs=('Y', ), # Finally, let's see the graph converted with *sklearn-onnx*. from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer # noqa -pydot_graph = GetPydotGraph(onx.graph, name=onx.graph.name, rankdir="TB", - node_producer=GetOpNodeProducer( - "docstring", color="yellow", - fillcolor="yellow", style="filled")) + +pydot_graph = GetPydotGraph( + onx.graph, + name=onx.graph.name, + rankdir="TB", + node_producer=GetOpNodeProducer( + "docstring", color="yellow", fillcolor="yellow", style="filled" + ), +) pydot_graph.write_dot("pipeline_onnx_mixin.dot") import os # noqa -os.system('dot -O -Gdpi=300 -Tpng pipeline_onnx_mixin.dot') + +os.system("dot -O -Gdpi=300 -Tpng pipeline_onnx_mixin.dot") import matplotlib.pyplot as plt # noqa + image = plt.imread("pipeline_onnx_mixin.dot.png") fig, ax = plt.subplots(figsize=(40, 20)) ax.imshow(image) -ax.axis('off') +ax.axis("off") ################################# # **Versions used for this example** import sklearn # noqa + print("numpy:", numpy.__version__) print("scikit-learn:", sklearn.__version__) import skl2onnx # noqa + print("onnx: ", onnx.__version__) print("onnxruntime: ", onnxruntime.__version__) print("skl2onnx: ", skl2onnx.__version__) diff --git a/docs/examples/plot_convert_zipmap.py b/docs/examples/plot_convert_zipmap.py index 1b0eaa48e..fbe48207b 100644 --- a/docs/examples/plot_convert_zipmap.py +++ b/docs/examples/plot_convert_zipmap.py @@ -38,9 +38,8 @@ clr.fit(X_train, y_train) print(clr) -initial_type = [('float_input', FloatTensorType([None, 4]))] -onx = convert_sklearn(clr, initial_types=initial_type, - target_opset=12) +initial_type = [("float_input", FloatTensorType([None, 4]))] +onx = convert_sklearn(clr, initial_types=initial_type, target_opset=12) ############################ # Output type @@ -50,7 +49,7 @@ # is a list of dictionaries with onnxruntime. sess = rt.InferenceSession(onx.SerializeToString()) -res = sess.run(None, {'float_input': X_test.astype(numpy.float32)}) +res = sess.run(None, {"float_input": X_test.astype(numpy.float32)}) print(res[1][:2]) print("probabilities type:", type(res[1])) print("type for the first observations:", type(res[1][0])) @@ -61,13 +60,14 @@ # # Let's remove the ZipMap operator. -initial_type = [('float_input', FloatTensorType([None, 4]))] -options = {id(clr): {'zipmap': False}} -onx2 = convert_sklearn(clr, initial_types=initial_type, options=options, - target_opset=12) +initial_type = [("float_input", FloatTensorType([None, 4]))] +options = {id(clr): {"zipmap": False}} +onx2 = convert_sklearn( + clr, initial_types=initial_type, options=options, target_opset=12 +) sess2 = rt.InferenceSession(onx2.SerializeToString()) -res2 = sess2.run(None, {'float_input': X_test.astype(numpy.float32)}) +res2 = sess2.run(None, {"float_input": X_test.astype(numpy.float32)}) print(res2[1][:2]) print("probabilities type:", type(res2[1])) print("type for the first observations:", type(res2[1][0])) @@ -80,15 +80,19 @@ # the probabilities into columns. The final model produces # one output for the label, and one output per class. -options = {id(clr): {'zipmap': 'columns'}} -onx3 = convert_sklearn(clr, initial_types=initial_type, options=options, - target_opset=12) +options = {id(clr): {"zipmap": "columns"}} +onx3 = convert_sklearn( + clr, initial_types=initial_type, options=options, target_opset=12 +) sess3 = rt.InferenceSession(onx3.SerializeToString()) -res3 = sess3.run(None, {'float_input': X_test.astype(numpy.float32)}) +res3 = sess3.run(None, {"float_input": X_test.astype(numpy.float32)}) for i, out in enumerate(sess3.get_outputs()): - print("output: '{}' shape={} values={}...".format( - out.name, res3[i].shape, res3[i][:2])) + print( + "output: '{}' shape={} values={}...".format( + out.name, res3[i].shape, res3[i][:2] + ) + ) ################################### @@ -98,16 +102,13 @@ X32 = X_test.astype(numpy.float32) print("Time with ZipMap:") -print(repeat(lambda: sess.run(None, {'float_input': X32}), - number=100, repeat=10)) +print(repeat(lambda: sess.run(None, {"float_input": X32}), number=100, repeat=10)) print("Time without ZipMap:") -print(repeat(lambda: sess2.run(None, {'float_input': X32}), - number=100, repeat=10)) +print(repeat(lambda: sess2.run(None, {"float_input": X32}), number=100, repeat=10)) print("Time without ZipMap but with columns:") -print(repeat(lambda: sess3.run(None, {'float_input': X32}), - number=100, repeat=10)) +print(repeat(lambda: sess3.run(None, {"float_input": X32}), number=100, repeat=10)) # The prediction is much faster without ZipMap # on this example. diff --git a/docs/examples/plot_custom_model.py b/docs/examples/plot_custom_model.py index a2a687ad5..ac14d66c3 100644 --- a/docs/examples/plot_custom_model.py +++ b/docs/examples/plot_custom_model.py @@ -63,9 +63,14 @@ class PredictableTSNE(BaseEstimator, TransformerMixin): - - def __init__(self, transformer=None, estimator=None, - normalize=True, keep_tsne_outputs=False, **kwargs): + def __init__( + self, + transformer=None, + estimator=None, + normalize=True, + keep_tsne_outputs=False, + **kwargs + ): """ :param transformer: `TSNE` by default :param estimator: `MLPRegressor` by default @@ -90,11 +95,12 @@ def __init__(self, transformer=None, estimator=None, if not hasattr(transformer, "fit_transform"): raise AttributeError( "Transformer {} does not have a 'fit_transform' " - "method.".format(type(transformer))) + "method.".format(type(transformer)) + ) if not hasattr(estimator, "predict"): raise AttributeError( - "Estimator {} does not have a 'predict' method.".format( - type(estimator))) + "Estimator {} does not have a 'predict' method.".format(type(estimator)) + ) self.normalize = normalize if kwargs: self.set_params(**kwargs) @@ -132,21 +138,22 @@ def fit(self, X, y, sample_weight=None): sig = inspect.signature(self.transformer.fit_transform) pars = {} - for p in ['sample_weight', 'y']: + for p in ["sample_weight", "y"]: if p in sig.parameters and p in params: pars[p] = params[p] target = self.transformer_.fit_transform(X, **pars) sig = inspect.signature(self.estimator.fit) - if 'sample_weight' in sig.parameters: + if "sample_weight" in sig.parameters: self.estimator_ = clone(self.estimator).fit( - X, target, sample_weight=sample_weight) + X, target, sample_weight=sample_weight + ) else: self.estimator_ = clone(self.estimator).fit(X, target) mean = target.mean(axis=0) var = target.std(axis=0) self.mean_ = mean - self.inv_std_ = 1. / var + self.inv_std_ = 1.0 / var exp = (target - mean) * self.inv_std_ got = (self.estimator_.predict(X) - mean) * self.inv_std_ self.loss_ = mean_squared_error(exp, got) @@ -191,11 +198,11 @@ def set_params(self, **values): """ pt, pe, pn = {}, {}, {} for k, v in values.items(): - if k.startswith('e_'): + if k.startswith("e_"): pe[k[2:]] = v - elif k.startswith('t_'): + elif k.startswith("t_"): pt[k[2:]] = v - elif k.startswith('n_'): + elif k.startswith("n_"): pn[k[2:]] = v else: raise ValueError("Unexpected parameter name '{0}'.".format(k)) @@ -217,10 +224,9 @@ def set_params(self, **values): n_samples, n_features = Xd.shape n_samples, n_features -X_train, X_test, y_train, y_test, imgs_train, imgs_test = train_test_split( - Xd, yd, imgs) +X_train, X_test, y_train, y_test, imgs_train, imgs_test = train_test_split(Xd, yd, imgs) -tsne = TSNE(n_components=2, init='pca', random_state=0) +tsne = TSNE(n_components=2, init="pca", random_state=0) def plot_embedding(Xp, y, imgs, title=None, figsize=(12, 4)): @@ -229,13 +235,17 @@ def plot_embedding(Xp, y, imgs, title=None, figsize=(12, 4)): fig, ax = plt.subplots(1, 2, figsize=figsize) for i in range(X.shape[0]): - ax[0].text(X[i, 0], X[i, 1], str(y[i]), - color=plt.cm.Set1(y[i] / 10.), - fontdict={'weight': 'bold', 'size': 9}) - - if hasattr(offsetbox, 'AnnotationBbox'): + ax[0].text( + X[i, 0], + X[i, 1], + str(y[i]), + color=plt.cm.Set1(y[i] / 10.0), + fontdict={"weight": "bold", "size": 9}, + ) + + if hasattr(offsetbox, "AnnotationBbox"): # only print thumbnails with matplotlib > 1.0 - shown_images = numpy.array([[1., 1.]]) # just something big + shown_images = numpy.array([[1.0, 1.0]]) # just something big for i in range(X.shape[0]): dist = numpy.sum((X[i] - shown_images) ** 2, 1) if numpy.min(dist) < 4e-3: @@ -243,19 +253,18 @@ def plot_embedding(Xp, y, imgs, title=None, figsize=(12, 4)): continue shown_images = numpy.r_[shown_images, [X[i]]] imagebox = offsetbox.AnnotationBbox( - offsetbox.OffsetImage(imgs[i], cmap=plt.cm.gray_r), - X[i]) + offsetbox.OffsetImage(imgs[i], cmap=plt.cm.gray_r), X[i] + ) ax[0].add_artist(imagebox) ax[0].set_xticks([]), ax[0].set_yticks([]) - ax[1].plot(Xp[:, 0], Xp[:, 1], '.') + ax[1].plot(Xp[:, 0], Xp[:, 1], ".") if title is not None: ax[0].set_title(title) return ax X_train_tsne = tsne.fit_transform(X_train) -plot_embedding(X_train_tsne, y_train, imgs_train, - "t-SNE embedding of the digits") +plot_embedding(X_train_tsne, y_train, imgs_train, "t-SNE embedding of the digits") ####################################### # Repeatable t-SNE @@ -267,18 +276,24 @@ def plot_embedding(Xp, y, imgs, title=None, figsize=(12, 4)): ptsne_knn.fit(X_train, y_train) X_train_tsne2 = ptsne_knn.transform(X_train) -plot_embedding(X_train_tsne2, y_train, imgs_train, - "Predictable t-SNE of the digits\n" - "StandardScaler+KNeighborsRegressor") +plot_embedding( + X_train_tsne2, + y_train, + imgs_train, + "Predictable t-SNE of the digits\n" "StandardScaler+KNeighborsRegressor", +) ################################ # We check on test set. X_test_tsne2 = ptsne_knn.transform(X_test) -plot_embedding(X_test_tsne2, y_test, imgs_test, - "Predictable t-SNE of the digits\n" - "StandardScaler+KNeighborsRegressor") +plot_embedding( + X_test_tsne2, + y_test, + imgs_test, + "Predictable t-SNE of the digits\n" "StandardScaler+KNeighborsRegressor", +) ####################################### # ONNX - shape_calculator, converter @@ -292,13 +307,12 @@ def plot_embedding(Xp, y, imgs, title=None, figsize=(12, 4)): def predictable_tsne_shape_calculator(operator): - - input = operator.inputs[0] # inputs in ONNX graph + input = operator.inputs[0] # inputs in ONNX graph # output = operator.outputs[0] # output in ONNX graph - op = operator.raw_operator # scikit-learn model (mmust be fitted) + op = operator.raw_operator # scikit-learn model (mmust be fitted) - N = input.type.shape[0] # number of observations - C = op.estimator_._y.shape[1] # dimension of outputs + N = input.type.shape[0] # number of observations + C = op.estimator_._y.shape[1] # dimension of outputs # new output definition operator.outputs[0].type = FloatTensorType([N, C]) @@ -317,8 +331,8 @@ def predictable_tsne_converter(scope, operator, container): :param container: contains the ONNX graph """ # input = operator.inputs[0] # input in ONNX graph - output = operator.outputs[0] # output in ONNX graph - op = operator.raw_operator # scikit-learn model (mmust be fitted) + output = operator.outputs[0] # output in ONNX graph + op = operator.raw_operator # scikit-learn model (mmust be fitted) # First step is the k nearest-neighbours, # we reuse existing converter and declare it as local @@ -329,7 +343,7 @@ def predictable_tsne_converter(scope, operator, container): knn_op.inputs = operator.inputs # We add an intermediate outputs. - knn_output = scope.declare_local_variable('knn_output', FloatTensorType()) + knn_output = scope.declare_local_variable("knn_output", FloatTensorType()) knn_op.outputs.append(knn_output) # We adjust the output of the submodel. @@ -337,27 +351,38 @@ def predictable_tsne_converter(scope, operator, container): shape_calc(knn_op) # We add the normalizer which needs a unique node name. - name = scope.get_unique_operator_name('Scaler') + name = scope.get_unique_operator_name("Scaler") # The parameter follows the specifications of ONNX # https://github.com/onnx/onnx/blob/master/docs/Operators-ml.md#ai.onnx.ml.Scaler - attrs = dict(name=name, - scale=op.inv_std_.ravel().astype(numpy.float32), - offset=op.mean_.ravel().astype(numpy.float32)) + attrs = dict( + name=name, + scale=op.inv_std_.ravel().astype(numpy.float32), + offset=op.mean_.ravel().astype(numpy.float32), + ) # Let's finally add the scaler which connects the output # of the k-nearest neighbours model to output of the whole model # declared in ONNX graph - container.add_node('Scaler', [knn_output.onnx_name], [output.full_name], - op_domain='ai.onnx.ml', **attrs) + container.add_node( + "Scaler", + [knn_output.onnx_name], + [output.full_name], + op_domain="ai.onnx.ml", + **attrs + ) + ################################## # We now need to declare the new converter. -update_registered_converter(PredictableTSNE, 'CustomPredictableTSNE', - predictable_tsne_shape_calculator, - predictable_tsne_converter) +update_registered_converter( + PredictableTSNE, + "CustomPredictableTSNE", + predictable_tsne_shape_calculator, + predictable_tsne_converter, +) #################################### # Conversion to ONNX @@ -367,9 +392,11 @@ def predictable_tsne_converter(scope, operator, container): # to convert. model_onnx = convert_sklearn( - ptsne_knn, 'predictable_tsne', - [('input', FloatTensorType([None, X_test.shape[1]]))], - target_opset=12) + ptsne_knn, + "predictable_tsne", + [("input", FloatTensorType([None, X_test.shape[1]]))], + target_opset=12, +) # And save. with open("predictable_tsne.onnx", "wb") as f: @@ -401,17 +428,21 @@ def predictable_tsne_converter(scope, operator, container): # ++++++++++++++++++++++ pydot_graph = GetPydotGraph( - model_onnx.graph, name=model_onnx.graph.name, rankdir="TB", + model_onnx.graph, + name=model_onnx.graph.name, + rankdir="TB", node_producer=GetOpNodeProducer( - "docstring", color="yellow", fillcolor="yellow", style="filled")) + "docstring", color="yellow", fillcolor="yellow", style="filled" + ), +) pydot_graph.write_dot("pipeline_tsne.dot") -os.system('dot -O -Gdpi=300 -Tpng pipeline_tsne.dot') +os.system("dot -O -Gdpi=300 -Tpng pipeline_tsne.dot") image = plt.imread("pipeline_tsne.dot.png") fig, ax = plt.subplots(figsize=(40, 20)) ax.imshow(image) -ax.axis('off') +ax.axis("off") ################################# # **Versions used for this example** diff --git a/docs/examples/plot_custom_parser.py b/docs/examples/plot_custom_parser.py index d79b32bad..ecbe99f95 100644 --- a/docs/examples/plot_custom_parser.py +++ b/docs/examples/plot_custom_parser.py @@ -33,9 +33,7 @@ import os from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer import onnxruntime as rt -from onnxconverter_common.onnx_ops import ( - apply_identity, apply_cast, apply_greater -) +from onnxconverter_common.onnx_ops import apply_identity, apply_cast, apply_greater from skl2onnx import to_onnx, get_model_alias from skl2onnx.proto import onnx_proto from skl2onnx.common._registration import get_shape_calculator @@ -44,20 +42,20 @@ class ValidatorClassifier(BaseEstimator, ClassifierMixin): - def __init__(self, estimator=None, threshold=0.75): ClassifierMixin.__init__(self) BaseEstimator.__init__(self) if estimator is None: - estimator = LogisticRegression(solver='liblinear') + estimator = LogisticRegression(solver="liblinear") self.estimator = estimator self.threshold = threshold def fit(self, X, y, sample_weight=None): sig = inspect.signature(self.estimator.fit) - if 'sample_weight' in sig.parameters: + if "sample_weight" in sig.parameters: self.estimator_ = clone(self.estimator).fit( - X, y, sample_weight=sample_weight) + X, y, sample_weight=sample_weight + ) else: self.estimator_ = clone(self.estimator).fit(X, y) return self @@ -97,8 +95,7 @@ def validate(self, X): # to this new model. try: - to_onnx(model, X_train[:1].astype(np.float32), - target_opset=12) + to_onnx(model, X_train[:1].astype(np.float32), target_opset=12) except RuntimeError as e: print(e) @@ -112,27 +109,27 @@ def validate(self, X): def validator_classifier_shape_calculator(operator): - input0 = operator.inputs[0] # inputs in ONNX graph outputs = operator.outputs # outputs in ONNX graph op = operator.raw_operator # scikit-learn model (mmust be fitted) if len(outputs) != 3: raise RuntimeError("3 outputs expected not {}.".format(len(outputs))) - N = input0.type.shape[0] # number of observations - C = op.estimator_.classes_.shape[0] # dimension of outputs + N = input0.type.shape[0] # number of observations + C = op.estimator_.classes_.shape[0] # dimension of outputs + + outputs[0].type = Int64TensorType([N]) # label + outputs[1].type = FloatTensorType([N, C]) # probabilities + outputs[2].type = Int64TensorType([C]) # validation - outputs[0].type = Int64TensorType([N]) # label - outputs[1].type = FloatTensorType([N, C]) # probabilities - outputs[2].type = Int64TensorType([C]) # validation ############################# # Then the converter. def validator_classifier_converter(scope, operator, container): - outputs = operator.outputs # outputs in ONNX graph - op = operator.raw_operator # scikit-learn model (mmust be fitted) + outputs = operator.outputs # outputs in ONNX graph + op = operator.raw_operator # scikit-learn model (mmust be fitted) # We reuse existing converter and declare it # as a local operator. @@ -142,8 +139,8 @@ def validator_classifier_converter(scope, operator, container): val_op.inputs = operator.inputs # We add an intermediate outputs. - val_label = scope.declare_local_variable('val_label', Int64TensorType()) - val_prob = scope.declare_local_variable('val_prob', FloatTensorType()) + val_label = scope.declare_local_variable("val_label", Int64TensorType()) + val_prob = scope.declare_local_variable("val_prob", FloatTensorType()) val_op.outputs.append(val_label) val_op.outputs.append(val_prob) @@ -152,30 +149,36 @@ def validator_classifier_converter(scope, operator, container): shape_calc(val_op) # We now handle the validation. - val_max = scope.get_unique_variable_name('val_max') + val_max = scope.get_unique_variable_name("val_max") if container.target_opset >= 18: - axis_name = scope.get_unique_variable_name('axis') - container.add_initializer( - axis_name, onnx_proto.TensorProto.INT64, [1], [1]) + axis_name = scope.get_unique_variable_name("axis") + container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [1]) container.add_node( - 'ReduceMax', [val_prob.full_name, axis_name], val_max, - name=scope.get_unique_operator_name('ReduceMax'), - keepdims=0) + "ReduceMax", + [val_prob.full_name, axis_name], + val_max, + name=scope.get_unique_operator_name("ReduceMax"), + keepdims=0, + ) else: container.add_node( - 'ReduceMax', val_prob.full_name, val_max, - name=scope.get_unique_operator_name('ReduceMax'), - axes=[1], keepdims=0) - - th_name = scope.get_unique_variable_name('threshold') + "ReduceMax", + val_prob.full_name, + val_max, + name=scope.get_unique_operator_name("ReduceMax"), + axes=[1], + keepdims=0, + ) + + th_name = scope.get_unique_variable_name("threshold") container.add_initializer( - th_name, onnx_proto.TensorProto.FLOAT, [1], [op.threshold]) - val_bin = scope.get_unique_variable_name('val_bin') + th_name, onnx_proto.TensorProto.FLOAT, [1], [op.threshold] + ) + val_bin = scope.get_unique_variable_name("val_bin") apply_greater(scope, [val_max, th_name], val_bin, container) - val_val = scope.get_unique_variable_name('validate') - apply_cast(scope, val_bin, val_val, container, - to=onnx_proto.TensorProto.INT64) + val_val = scope.get_unique_variable_name("validate") + apply_cast(scope, val_bin, val_val, container, to=onnx_proto.TensorProto.INT64) # We finally link the intermediate output to the shared converter. apply_identity(scope, val_label.full_name, outputs[0].full_name, container) @@ -187,16 +190,18 @@ def validator_classifier_converter(scope, operator, container): # Then the registration. -update_registered_converter(ValidatorClassifier, 'CustomValidatorClassifier', - validator_classifier_shape_calculator, - validator_classifier_converter) +update_registered_converter( + ValidatorClassifier, + "CustomValidatorClassifier", + validator_classifier_shape_calculator, + validator_classifier_converter, +) ######################## # And conversion... try: - to_onnx(model, X_test[:1].astype(np.float32), - target_opset=12) + to_onnx(model, X_test[:1].astype(np.float32), target_opset=12) except RuntimeError as e: print(e) @@ -218,9 +223,9 @@ def validator_classifier_parser(scope, model, inputs, custom_parsers=None): this_operator.inputs.append(inputs[0]) # outputs - val_label = scope.declare_local_variable('val_label', Int64TensorType()) - val_prob = scope.declare_local_variable('val_prob', FloatTensorType()) - val_val = scope.declare_local_variable('val_val', Int64TensorType()) + val_label = scope.declare_local_variable("val_label", Int64TensorType()) + val_prob = scope.declare_local_variable("val_prob", FloatTensorType()) + val_val = scope.declare_local_variable("val_val", Int64TensorType()) this_operator.outputs.append(val_label) this_operator.outputs.append(val_prob) this_operator.outputs.append(val_val) @@ -228,20 +233,23 @@ def validator_classifier_parser(scope, model, inputs, custom_parsers=None): # end return this_operator.outputs + ############################### # Registration. -update_registered_converter(ValidatorClassifier, 'CustomValidatorClassifier', - validator_classifier_shape_calculator, - validator_classifier_converter, - parser=validator_classifier_parser) +update_registered_converter( + ValidatorClassifier, + "CustomValidatorClassifier", + validator_classifier_shape_calculator, + validator_classifier_converter, + parser=validator_classifier_parser, +) ############################# # And conversion again. -model_onnx = to_onnx(model, X_test[:1].astype(np.float32), - target_opset=12) +model_onnx = to_onnx(model, X_test[:1].astype(np.float32), target_opset=12) ####################################### # Final test @@ -252,7 +260,7 @@ def validator_classifier_parser(scope, model, inputs, custom_parsers=None): X32 = X_test[:5].astype(np.float32) sess = rt.InferenceSession(model_onnx.SerializeToString()) -results = sess.run(None, {'X': X32}) +results = sess.run(None, {"X": X32}) print("--labels--") print("sklearn", model.predict(X32)) @@ -271,17 +279,21 @@ def validator_classifier_parser(scope, model, inputs, custom_parsers=None): # ++++++++++++++++++++++ pydot_graph = GetPydotGraph( - model_onnx.graph, name=model_onnx.graph.name, rankdir="TB", + model_onnx.graph, + name=model_onnx.graph.name, + rankdir="TB", node_producer=GetOpNodeProducer( - "docstring", color="yellow", fillcolor="yellow", style="filled")) + "docstring", color="yellow", fillcolor="yellow", style="filled" + ), +) pydot_graph.write_dot("validator_classifier.dot") -os.system('dot -O -Gdpi=300 -Tpng validator_classifier.dot') +os.system("dot -O -Gdpi=300 -Tpng validator_classifier.dot") image = plt.imread("validator_classifier.dot.png") fig, ax = plt.subplots(figsize=(40, 20)) ax.imshow(image) -ax.axis('off') +ax.axis("off") ################################# # **Versions used for this example** diff --git a/docs/examples/plot_custom_parser_alternative.py b/docs/examples/plot_custom_parser_alternative.py index 617d4b838..48b41a0b1 100644 --- a/docs/examples/plot_custom_parser_alternative.py +++ b/docs/examples/plot_custom_parser_alternative.py @@ -42,27 +42,30 @@ from skl2onnx.proto import onnx_proto from skl2onnx.common.data_types import FloatTensorType, Int64TensorType from skl2onnx.algebra.onnx_ops import ( - OnnxGreater, OnnxCast, OnnxReduceMaxApi18, OnnxIdentity + OnnxGreater, + OnnxCast, + OnnxReduceMaxApi18, + OnnxIdentity, ) from skl2onnx.algebra.onnx_operator import OnnxSubEstimator import matplotlib.pyplot as plt class ValidatorClassifier(BaseEstimator, ClassifierMixin): - def __init__(self, estimator=None, threshold=0.75): ClassifierMixin.__init__(self) BaseEstimator.__init__(self) if estimator is None: - estimator = LogisticRegression(solver='liblinear') + estimator = LogisticRegression(solver="liblinear") self.estimator = estimator self.threshold = threshold def fit(self, X, y, sample_weight=None): sig = inspect.signature(self.estimator.fit) - if 'sample_weight' in sig.parameters: + if "sample_weight" in sig.parameters: self.estimator_ = clone(self.estimator).fit( - X, y, sample_weight=sample_weight) + X, y, sample_weight=sample_weight + ) else: self.estimator_ = clone(self.estimator).fit(X, y) return self @@ -102,8 +105,7 @@ def validate(self, X): # to this new model. try: - to_onnx(model, X_train[:1].astype(np.float32), - target_opset=12) + to_onnx(model, X_train[:1].astype(np.float32), target_opset=12) except RuntimeError as e: print(e) @@ -117,48 +119,44 @@ def validate(self, X): def validator_classifier_shape_calculator(operator): - - input0 = operator.inputs[0] # first input in ONNX graph - outputs = operator.outputs # outputs in ONNX graph - op = operator.raw_operator # scikit-learn model (mmust be fitted) + input0 = operator.inputs[0] # first input in ONNX graph + outputs = operator.outputs # outputs in ONNX graph + op = operator.raw_operator # scikit-learn model (mmust be fitted) if len(outputs) != 3: raise RuntimeError("3 outputs expected not {}.".format(len(outputs))) - N = input0.type.shape[0] # number of observations - C = op.estimator_.classes_.shape[0] # dimension of outputs + N = input0.type.shape[0] # number of observations + C = op.estimator_.classes_.shape[0] # dimension of outputs + + outputs[0].type = Int64TensorType([N]) # label + outputs[1].type = FloatTensorType([N, C]) # probabilities + outputs[2].type = Int64TensorType([C]) # validation - outputs[0].type = Int64TensorType([N]) # label - outputs[1].type = FloatTensorType([N, C]) # probabilities - outputs[2].type = Int64TensorType([C]) # validation ############################# # Then the converter. def validator_classifier_converter(scope, operator, container): - input0 = operator.inputs[0] # first input in ONNX graph - outputs = operator.outputs # outputs in ONNX graph - op = operator.raw_operator # scikit-learn model (mmust be fitted) + input0 = operator.inputs[0] # first input in ONNX graph + outputs = operator.outputs # outputs in ONNX graph + op = operator.raw_operator # scikit-learn model (mmust be fitted) opv = container.target_opset # The model calls another one. The class `OnnxSubEstimator` # calls the converter for this operator. model = op.estimator_ - onnx_op = OnnxSubEstimator(model, input0, op_version=opv, - options={'zipmap': False}) + onnx_op = OnnxSubEstimator(model, input0, op_version=opv, options={"zipmap": False}) rmax = OnnxReduceMaxApi18(onnx_op[1], axes=[1], keepdims=0, op_version=opv) - great = OnnxGreater(rmax, np.array([op.threshold], dtype=np.float32), - op_version=opv) - valid = OnnxCast(great, to=onnx_proto.TensorProto.INT64, - op_version=opv) - - r1 = OnnxIdentity(onnx_op[0], output_names=[outputs[0].full_name], - op_version=opv) - r2 = OnnxIdentity(onnx_op[1], output_names=[outputs[1].full_name], - op_version=opv) - r3 = OnnxIdentity(valid, output_names=[outputs[2].full_name], - op_version=opv) + great = OnnxGreater( + rmax, np.array([op.threshold], dtype=np.float32), op_version=opv + ) + valid = OnnxCast(great, to=onnx_proto.TensorProto.INT64, op_version=opv) + + r1 = OnnxIdentity(onnx_op[0], output_names=[outputs[0].full_name], op_version=opv) + r2 = OnnxIdentity(onnx_op[1], output_names=[outputs[1].full_name], op_version=opv) + r3 = OnnxIdentity(valid, output_names=[outputs[2].full_name], op_version=opv) r1.add_to(scope, container) r2.add_to(scope, container) @@ -169,16 +167,18 @@ def validator_classifier_converter(scope, operator, container): # Then the registration. -update_registered_converter(ValidatorClassifier, 'CustomValidatorClassifier', - validator_classifier_shape_calculator, - validator_classifier_converter) +update_registered_converter( + ValidatorClassifier, + "CustomValidatorClassifier", + validator_classifier_shape_calculator, + validator_classifier_converter, +) ######################## # And conversion... try: - to_onnx(model, X_test[:1].astype(np.float32), - target_opset=12) + to_onnx(model, X_test[:1].astype(np.float32), target_opset=12) except RuntimeError as e: print(e) @@ -200,9 +200,9 @@ def validator_classifier_parser(scope, model, inputs, custom_parsers=None): this_operator.inputs.append(inputs[0]) # outputs - val_label = scope.declare_local_variable('val_label', Int64TensorType()) - val_prob = scope.declare_local_variable('val_prob', FloatTensorType()) - val_val = scope.declare_local_variable('val_val', Int64TensorType()) + val_label = scope.declare_local_variable("val_label", Int64TensorType()) + val_prob = scope.declare_local_variable("val_prob", FloatTensorType()) + val_val = scope.declare_local_variable("val_val", Int64TensorType()) this_operator.outputs.append(val_label) this_operator.outputs.append(val_prob) this_operator.outputs.append(val_val) @@ -210,20 +210,23 @@ def validator_classifier_parser(scope, model, inputs, custom_parsers=None): # ends return this_operator.outputs + ############################### # Registration. -update_registered_converter(ValidatorClassifier, 'CustomValidatorClassifier', - validator_classifier_shape_calculator, - validator_classifier_converter, - parser=validator_classifier_parser) +update_registered_converter( + ValidatorClassifier, + "CustomValidatorClassifier", + validator_classifier_shape_calculator, + validator_classifier_converter, + parser=validator_classifier_parser, +) ############################# # And conversion again. -model_onnx = to_onnx(model, X_test[:1].astype(np.float32), - target_opset=12) +model_onnx = to_onnx(model, X_test[:1].astype(np.float32), target_opset=12) ####################################### # Final test @@ -234,7 +237,7 @@ def validator_classifier_parser(scope, model, inputs, custom_parsers=None): X32 = X_test[:5].astype(np.float32) sess = rt.InferenceSession(model_onnx.SerializeToString()) -results = sess.run(None, {'X': X32}) +results = sess.run(None, {"X": X32}) print("--labels--") print("sklearn", model.predict(X32)) @@ -253,17 +256,21 @@ def validator_classifier_parser(scope, model, inputs, custom_parsers=None): # ++++++++++++++++++++++ pydot_graph = GetPydotGraph( - model_onnx.graph, name=model_onnx.graph.name, rankdir="TB", + model_onnx.graph, + name=model_onnx.graph.name, + rankdir="TB", node_producer=GetOpNodeProducer( - "docstring", color="yellow", fillcolor="yellow", style="filled")) + "docstring", color="yellow", fillcolor="yellow", style="filled" + ), +) pydot_graph.write_dot("validator_classifier.dot") -os.system('dot -O -Gdpi=300 -Tpng validator_classifier.dot') +os.system("dot -O -Gdpi=300 -Tpng validator_classifier.dot") image = plt.imread("validator_classifier.dot.png") fig, ax = plt.subplots(figsize=(40, 20)) ax.imshow(image) -ax.axis('off') +ax.axis("off") ################################# # **Versions used for this example** diff --git a/docs/examples/plot_errors_onnxruntime.py b/docs/examples/plot_errors_onnxruntime.py index c4ddd3e50..5e92fdd30 100644 --- a/docs/examples/plot_errors_onnxruntime.py +++ b/docs/examples/plot_errors_onnxruntime.py @@ -24,6 +24,7 @@ import numpy as np from sklearn.datasets import load_iris from sklearn.linear_model import LogisticRegression + try: from onnxruntime.capi.onnxruntime_pybind11_state import InvalidArgument except ImportError: @@ -35,8 +36,9 @@ with open("logreg_iris.onnx", "wb") as f: f.write( skl2onnx.to_onnx( - clr, data.data[:, :2].astype(np.float32), - target_opset=12).SerializeToString()) + clr, data.data[:, :2].astype(np.float32), target_opset=12 + ).SerializeToString() + ) example2 = "logreg_iris.onnx" sess = rt.InferenceSession(example2) @@ -50,8 +52,7 @@ # and cannot handle any other kind of floats. try: - x = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]], - dtype=np.float64) + x = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]], dtype=np.float64) sess.run([output_name], {input_name: x}) except Exception as e: print("Unexpected type") @@ -92,11 +93,12 @@ # dimension is a multiple of the expected input dimension. for x in [ - np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32), - np.array([[1.0, 2.0, 3.0, 4.0]], dtype=np.float32), - np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32), - np.array([1.0, 2.0, 3.0], dtype=np.float32), - np.array([[1.0, 2.0, 3.0]], dtype=np.float32)]: + np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32), + np.array([[1.0, 2.0, 3.0, 4.0]], dtype=np.float32), + np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32), + np.array([1.0, 2.0, 3.0], dtype=np.float32), + np.array([[1.0, 2.0, 3.0]], dtype=np.float32), +]: try: r = sess.run([output_name], {input_name: x}) print("Shape={0} and predicted labels={1}".format(x.shape, r)) @@ -104,15 +106,15 @@ print("Shape={0} and error={1}".format(x.shape, e)) for x in [ - np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32), - np.array([[1.0, 2.0, 3.0, 4.0]], dtype=np.float32), - np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32), - np.array([1.0, 2.0, 3.0], dtype=np.float32), - np.array([[1.0, 2.0, 3.0]], dtype=np.float32)]: + np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32), + np.array([[1.0, 2.0, 3.0, 4.0]], dtype=np.float32), + np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32), + np.array([1.0, 2.0, 3.0], dtype=np.float32), + np.array([[1.0, 2.0, 3.0]], dtype=np.float32), +]: try: r = sess.run(None, {input_name: x}) - print("Shape={0} and predicted probabilities={1}".format( - x.shape, r[1])) + print("Shape={0} and predicted probabilities={1}".format(x.shape, r[1])) except (RuntimeError, InvalidArgument) as e: print("Shape={0} and error={1}".format(x.shape, e)) @@ -121,9 +123,10 @@ # is higher than expects but produces a warning. for x in [ - np.array([[[1.0, 2.0], [3.0, 4.0]]], dtype=np.float32), - np.array([[[1.0, 2.0, 3.0]]], dtype=np.float32), - np.array([[[1.0, 2.0]], [[3.0, 4.0]]], dtype=np.float32)]: + np.array([[[1.0, 2.0], [3.0, 4.0]]], dtype=np.float32), + np.array([[[1.0, 2.0, 3.0]]], dtype=np.float32), + np.array([[[1.0, 2.0]], [[3.0, 4.0]]], dtype=np.float32), +]: try: r = sess.run([output_name], {input_name: x}) print("Shape={0} and predicted labels={1}".format(x.shape, r)) diff --git a/docs/examples/plot_gpr.py b/docs/examples/plot_gpr.py index 402fbe1b6..b38412ecd 100644 --- a/docs/examples/plot_gpr.py +++ b/docs/examples/plot_gpr.py @@ -37,7 +37,7 @@ dataset = load_diabetes() X, y = dataset.data, dataset.target X_train, X_test, y_train, y_test = train_test_split(X, y) -gpr = GaussianProcessRegressor(DotProduct() + RBF(), alpha=1.) +gpr = GaussianProcessRegressor(DotProduct() + RBF(), alpha=1.0) gpr.fit(X_train, y_train) print(gpr) @@ -48,14 +48,12 @@ # The documentation suggests the following way to # convert a model into ONNX. -initial_type = [('X', FloatTensorType([None, X_train.shape[1]]))] -onx = convert_sklearn(gpr, initial_types=initial_type, - target_opset=12) +initial_type = [("X", FloatTensorType([None, X_train.shape[1]]))] +onx = convert_sklearn(gpr, initial_types=initial_type, target_opset=12) sess = rt.InferenceSession(onx.SerializeToString()) try: - pred_onx = sess.run( - None, {'X': X_test.astype(numpy.float32)})[0] + pred_onx = sess.run(None, {"X": X_test.astype(numpy.float32)})[0] except RuntimeError as e: print(str(e)) @@ -73,13 +71,11 @@ # the fixed dimensions by an empty value. # (see next line). -initial_type = [('X', FloatTensorType([None, None]))] -onx = convert_sklearn(gpr, initial_types=initial_type, - target_opset=12) +initial_type = [("X", FloatTensorType([None, None]))] +onx = convert_sklearn(gpr, initial_types=initial_type, target_opset=12) sess = rt.InferenceSession(onx.SerializeToString()) -pred_onx = sess.run( - None, {'X': X_test.astype(numpy.float32)})[0] +pred_onx = sess.run(None, {"X": X_test.astype(numpy.float32)})[0] pred_skl = gpr.predict(X_test) print(pred_skl[:10]) @@ -90,10 +86,9 @@ # Let's confirm that by looking at the biggest # differences. -diff = numpy.sort(numpy.abs(numpy.squeeze(pred_skl) - - numpy.squeeze(pred_onx)))[-5:] +diff = numpy.sort(numpy.abs(numpy.squeeze(pred_skl) - numpy.squeeze(pred_onx)))[-5:] print(diff) -print('min(Y)-max(Y):', min(y_test), max(y_test)) +print("min(Y)-max(Y):", min(y_test), max(y_test)) ########################### # Third attempt: use of double @@ -113,22 +108,20 @@ # constant matrix such as the trained coefficients # will be dumped as doubles and not as floats anymore. -initial_type = [('X', DoubleTensorType([None, None]))] -onx64 = convert_sklearn(gpr, initial_types=initial_type, - target_opset=12) +initial_type = [("X", DoubleTensorType([None, None]))] +onx64 = convert_sklearn(gpr, initial_types=initial_type, target_opset=12) sess64 = rt.InferenceSession(onx64.SerializeToString()) -pred_onx64 = sess64.run(None, {'X': X_test})[0] +pred_onx64 = sess64.run(None, {"X": X_test})[0] print(pred_onx64[0, :10]) ################################ # The new differences look much better. -diff = numpy.sort(numpy.abs(numpy.squeeze(pred_skl) - - numpy.squeeze(pred_onx64)))[-5:] +diff = numpy.sort(numpy.abs(numpy.squeeze(pred_skl) - numpy.squeeze(pred_onx64)))[-5:] print(diff) -print('min(Y)-max(Y):', min(y_test), max(y_test)) +print("min(Y)-max(Y):", min(y_test), max(y_test)) #################################### # Size increase @@ -156,11 +149,12 @@ # That's done through the option mechanism # (see :ref:`l-conv-options`). -initial_type = [('X', DoubleTensorType([None, None]))] -options = {GaussianProcessRegressor: {'return_std': True}} +initial_type = [("X", DoubleTensorType([None, None]))] +options = {GaussianProcessRegressor: {"return_std": True}} try: - onx64_std = convert_sklearn(gpr, initial_types=initial_type, - options=options, target_opset=12) + onx64_std = convert_sklearn( + gpr, initial_types=initial_type, options=options, target_opset=12 + ) except RuntimeError as e: print(e) @@ -171,11 +165,12 @@ # predict at least once and then converting again. gpr.predict(X_test[:1], return_std=True) -onx64_std = convert_sklearn(gpr, initial_types=initial_type, - options=options, target_opset=12) +onx64_std = convert_sklearn( + gpr, initial_types=initial_type, options=options, target_opset=12 +) sess64_std = rt.InferenceSession(onx64_std.SerializeToString()) -pred_onx64_std = sess64_std.run(None, {'X': X_test[:5]}) +pred_onx64_std = sess64_std.run(None, {"X": X_test[:5]}) pprint.pprint(pred_onx64_std) @@ -188,12 +183,13 @@ # It looks good. Let's do a better checks. -pred_onx64_std = sess64_std.run(None, {'X': X_test}) +pred_onx64_std = sess64_std.run(None, {"X": X_test}) pred_std = gpr.predict(X_test, return_std=True) -diff = numpy.sort(numpy.abs(numpy.squeeze(pred_onx64_std[1]) - - numpy.squeeze(pred_std[1])))[-5:] +diff = numpy.sort( + numpy.abs(numpy.squeeze(pred_onx64_std[1]) - numpy.squeeze(pred_std[1])) +)[-5:] print(diff) ################################# diff --git a/docs/examples/plot_intermediate_outputs.py b/docs/examples/plot_intermediate_outputs.py index deb5483ed..98937ce83 100644 --- a/docs/examples/plot_intermediate_outputs.py +++ b/docs/examples/plot_intermediate_outputs.py @@ -40,7 +40,10 @@ from skl2onnx import convert_sklearn import pprint from skl2onnx.common.data_types import ( - FloatTensorType, StringTensorType, Int64TensorType) + FloatTensorType, + StringTensorType, + Int64TensorType, +) import numpy as np import pandas as pd from sklearn.compose import ColumnTransformer @@ -50,39 +53,49 @@ from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split -titanic_url = ('https://raw.githubusercontent.com/amueller/' - 'scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv') +titanic_url = ( + "https://raw.githubusercontent.com/amueller/" + "scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv" +) data = pd.read_csv(titanic_url) -X = data.drop('survived', axis=1) -y = data['survived'] +X = data.drop("survived", axis=1) +y = data["survived"] # SimpleImputer on string is not available # for string in ONNX-ML specifications. # So we do it beforehand. -for cat in ['embarked', 'sex', 'pclass']: - X[cat].fillna('missing', inplace=True) +for cat in ["embarked", "sex", "pclass"]: + X[cat].fillna("missing", inplace=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) -numeric_features = ['age', 'fare'] -numeric_transformer = Pipeline(steps=[ - ('imputer', SimpleImputer(strategy='median')), - ('scaler', StandardScaler())]) +numeric_features = ["age", "fare"] +numeric_transformer = Pipeline( + steps=[("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())] +) -categorical_features = ['embarked', 'sex', 'pclass'] -categorical_transformer = Pipeline(steps=[ - # --- SimpleImputer is not available for strings in ONNX-ML specifications. - # ('imputer', SimpleImputer(strategy='constant', fill_value='missing')), - ('onehot', OneHotEncoder(handle_unknown='ignore'))]) +categorical_features = ["embarked", "sex", "pclass"] +categorical_transformer = Pipeline( + steps=[ + # --- SimpleImputer is not available for strings in ONNX-ML specifications. + # ('imputer', SimpleImputer(strategy='constant', fill_value='missing')), + ("onehot", OneHotEncoder(handle_unknown="ignore")) + ] +) preprocessor = ColumnTransformer( transformers=[ - ('num', numeric_transformer, numeric_features), - ('cat', categorical_transformer, categorical_features), - ]) - -clf = Pipeline(steps=[('preprocessor', preprocessor), - ('classifier', LogisticRegression(solver='lbfgs'))]) + ("num", numeric_transformer, numeric_features), + ("cat", categorical_transformer, categorical_features), + ] +) + +clf = Pipeline( + steps=[ + ("preprocessor", preprocessor), + ("classifier", LogisticRegression(solver="lbfgs")), + ] +) clf.fit(X_train, y_train) @@ -104,9 +117,9 @@ def convert_dataframe_schema(df, drop=None): for k, v in zip(df.columns, df.dtypes): if drop is not None and k in drop: continue - if v == 'int64': + if v == "int64": t = Int64TensorType([None, 1]) - elif v == 'float64': + elif v == "float64": t = FloatTensorType([None, 1]) else: t = StringTensorType([None, 1]) @@ -128,8 +141,7 @@ def convert_dataframe_schema(df, drop=None): # ++++++++++++++++++++++++++++++ try: - model_onnx = convert_sklearn(clf, 'pipeline_titanic', inputs, - target_opset=12) + model_onnx = convert_sklearn(clf, "pipeline_titanic", inputs, target_opset=12) except Exception as e: print(e) @@ -138,14 +150,13 @@ def convert_dataframe_schema(df, drop=None): # *sklearn-onnx* does not. The ONNX version of *OneHotEncoder* # must be applied on columns of the same type. -X_train['pclass'] = X_train['pclass'].astype(str) -X_test['pclass'] = X_test['pclass'].astype(str) +X_train["pclass"] = X_train["pclass"].astype(str) +X_test["pclass"] = X_test["pclass"].astype(str) white_list = numeric_features + categorical_features to_drop = [c for c in X_train.columns if c not in white_list] inputs = convert_dataframe_schema(X_train, to_drop) -model_onnx = convert_sklearn(clf, 'pipeline_titanic', inputs, - target_opset=12) +model_onnx = convert_sklearn(clf, "pipeline_titanic", inputs, target_opset=12) # And save. @@ -211,7 +222,7 @@ def convert_dataframe_schema(df, drop=None): # and textual pipeline: *variable1*, *variable2*. # Let's look into the numerical pipeline first. -num_onnx = select_model_inputs_outputs(model_onnx, 'variable1') +num_onnx = select_model_inputs_outputs(model_onnx, "variable1") save_onnx_model(num_onnx, "pipeline_titanic_numerical.onnx") ################################ @@ -225,7 +236,7 @@ def convert_dataframe_schema(df, drop=None): # We do the same for the textual features. print(model_onnx) -text_onnx = select_model_inputs_outputs(model_onnx, 'variable2') +text_onnx = select_model_inputs_outputs(model_onnx, "variable2") save_onnx_model(text_onnx, "pipeline_titanic_textual.onnx") sess = rt.InferenceSession("pipeline_titanic_textual.onnx") numT = sess.run(None, inputs) @@ -238,33 +249,41 @@ def convert_dataframe_schema(df, drop=None): # Finally, let's see both subgraphs. First, numerical pipeline. pydot_graph = GetPydotGraph( - num_onnx.graph, name=num_onnx.graph.name, rankdir="TB", + num_onnx.graph, + name=num_onnx.graph.name, + rankdir="TB", node_producer=GetOpNodeProducer( - "docstring", color="yellow", fillcolor="yellow", style="filled")) + "docstring", color="yellow", fillcolor="yellow", style="filled" + ), +) pydot_graph.write_dot("pipeline_titanic_num.dot") -os.system('dot -O -Gdpi=300 -Tpng pipeline_titanic_num.dot') +os.system("dot -O -Gdpi=300 -Tpng pipeline_titanic_num.dot") image = plt.imread("pipeline_titanic_num.dot.png") fig, ax = plt.subplots(figsize=(40, 20)) ax.imshow(image) -ax.axis('off') +ax.axis("off") ###################################### # Then textual pipeline. pydot_graph = GetPydotGraph( - text_onnx.graph, name=text_onnx.graph.name, rankdir="TB", + text_onnx.graph, + name=text_onnx.graph.name, + rankdir="TB", node_producer=GetOpNodeProducer( - "docstring", color="yellow", fillcolor="yellow", style="filled")) + "docstring", color="yellow", fillcolor="yellow", style="filled" + ), +) pydot_graph.write_dot("pipeline_titanic_text.dot") -os.system('dot -O -Gdpi=300 -Tpng pipeline_titanic_text.dot') +os.system("dot -O -Gdpi=300 -Tpng pipeline_titanic_text.dot") image = plt.imread("pipeline_titanic_text.dot.png") fig, ax = plt.subplots(figsize=(40, 20)) ax.imshow(image) -ax.axis('off') +ax.axis("off") ################################# # **Versions used for this example** diff --git a/docs/examples/plot_investigate_pipeline.py b/docs/examples/plot_investigate_pipeline.py index 51fc56afc..b131d4fb8 100644 --- a/docs/examples/plot_investigate_pipeline.py +++ b/docs/examples/plot_investigate_pipeline.py @@ -39,8 +39,7 @@ from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline -pipe = Pipeline(steps=[('pca', PCA()), - ('logistic', LogisticRegression())]) +pipe = Pipeline(steps=[("pca", PCA()), ("logistic", LogisticRegression())]) digits = datasets.load_digits() X_digits = digits.data[:1000] @@ -53,14 +52,13 @@ # ++++++++++++++++++ -initial_types = [('input', FloatTensorType((None, X_digits.shape[1])))] -model_onnx = convert_sklearn(pipe, initial_types=initial_types, - target_opset=12) +initial_types = [("input", FloatTensorType((None, X_digits.shape[1])))] +model_onnx = convert_sklearn(pipe, initial_types=initial_types, target_opset=12) sess = rt.InferenceSession(model_onnx.SerializeToString()) print("skl predict_proba") print(pipe.predict_proba(X_digits[:2])) -onx_pred = sess.run(None, {'input': X_digits[:2].astype(np.float32)})[1] +onx_pred = sess.run(None, {"input": X_digits[:2].astype(np.float32)})[1] df = pd.DataFrame(onx_pred) print("onnx predict_proba") print(df.values) @@ -76,19 +74,18 @@ # an smaller ONNX graph for every operator. -steps = collect_intermediate_steps(pipe, "pipeline", - initial_types) +steps = collect_intermediate_steps(pipe, "pipeline", initial_types) assert len(steps) == 2 pipe.predict_proba(X_digits[:2]) for i, step in enumerate(steps): - onnx_step = step['onnx_step'] + onnx_step = step["onnx_step"] sess = rt.InferenceSession(onnx_step.SerializeToString()) - onnx_outputs = sess.run(None, {'input': X_digits[:2].astype(np.float32)}) - skl_outputs = step['model']._debug.outputs - print("step 1", type(step['model'])) + onnx_outputs = sess.run(None, {"input": X_digits[:2].astype(np.float32)}) + skl_outputs = step["model"]._debug.outputs + print("step 1", type(step["model"])) print("skl outputs") print(skl_outputs) print("onnx outputs") @@ -104,21 +101,21 @@ # needed to *replay* the prediction of the model. to_save = { - 'model': steps[1]['model'], - 'data_input': steps[1]['model']._debug.inputs, - 'data_output': steps[1]['model']._debug.outputs, - 'inputs': steps[1]['inputs'], - 'outputs': steps[1]['outputs'], + "model": steps[1]["model"], + "data_input": steps[1]["model"]._debug.inputs, + "data_output": steps[1]["model"]._debug.outputs, + "inputs": steps[1]["inputs"], + "outputs": steps[1]["outputs"], } -del steps[1]['model']._debug +del steps[1]["model"]._debug -with open('classifier.pkl', 'wb') as f: +with open("classifier.pkl", "wb") as f: pickle.dump(to_save, f) -with open('classifier.pkl', 'rb') as f: +with open("classifier.pkl", "rb") as f: restored = pickle.load(f) -print(restored['model'].predict_proba(restored['data_input']['predict_proba'])) +print(restored["model"].predict_proba(restored["data_input"]["predict_proba"])) ################################# # **Versions used for this example** diff --git a/docs/examples/plot_logging.py b/docs/examples/plot_logging.py index fcaad1000..724d35657 100644 --- a/docs/examples/plot_logging.py +++ b/docs/examples/plot_logging.py @@ -42,16 +42,14 @@ # Convert a model into ONNX # +++++++++++++++++++++++++ -initial_type = [('float_input', FloatTensorType([None, 4]))] -onx = convert_sklearn(clr, initial_types=initial_type, - target_opset=12) +initial_type = [("float_input", FloatTensorType([None, 4]))] +onx = convert_sklearn(clr, initial_types=initial_type, target_opset=12) sess = rt.InferenceSession(onx.SerializeToString()) input_name = sess.get_inputs()[0].name label_name = sess.get_outputs()[0].name -pred_onx = sess.run([label_name], - {input_name: X_test.astype(numpy.float32)})[0] +pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0] print(pred_onx) ######################################## @@ -74,7 +72,7 @@ # This information may be useful when a custom converter is being # implemented. -logger = logging.getLogger('skl2onnx') +logger = logging.getLogger("skl2onnx") logger.setLevel(logging.DEBUG) logging.basicConfig(level=logging.DEBUG) diff --git a/docs/examples/plot_nmf.py b/docs/examples/plot_nmf.py index a6aec7d9a..4b9be8605 100644 --- a/docs/examples/plot_nmf.py +++ b/docs/examples/plot_nmf.py @@ -32,15 +32,16 @@ import matplotlib.pyplot as plt from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer import onnx -from skl2onnx.algebra.onnx_ops import ( - OnnxArrayFeatureExtractor, OnnxMul, OnnxReduceSum) +from skl2onnx.algebra.onnx_ops import OnnxArrayFeatureExtractor, OnnxMul, OnnxReduceSum from skl2onnx.common.data_types import FloatTensorType from onnxruntime import InferenceSession -mat = np.array([[1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], - [1, 0, 0, 0], [1, 0, 0, 0]], dtype=np.float64) -mat[:mat.shape[1], :] += np.identity(mat.shape[1]) +mat = np.array( + [[1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0]], + dtype=np.float64, +) +mat[: mat.shape[1], :] += np.identity(mat.shape[1]) mod = NMF(n_components=2) W = mod.fit_transform(mat) @@ -93,20 +94,20 @@ def nmf_to_onnx(W, H, op_version=12): and returns the predictions for it. It assumes these indices applies on the training data. """ - col = OnnxArrayFeatureExtractor(H, 'col') - row = OnnxArrayFeatureExtractor(W.T, 'row') + col = OnnxArrayFeatureExtractor(H, "col") + row = OnnxArrayFeatureExtractor(W.T, "row") dot = OnnxMul(col, row, op_version=op_version) res = OnnxReduceSum(dot, output_names="rec", op_version=op_version) indices_type = np.array([0], dtype=np.int64) - onx = res.to_onnx(inputs={'col': indices_type, - 'row': indices_type}, - outputs=[('rec', FloatTensorType((None, 1)))], - target_opset=op_version) + onx = res.to_onnx( + inputs={"col": indices_type, "row": indices_type}, + outputs=[("rec", FloatTensorType((None, 1)))], + target_opset=op_version, + ) return onx -model_onnx = nmf_to_onnx(W.astype(np.float32), - H.astype(np.float32)) +model_onnx = nmf_to_onnx(W.astype(np.float32), H.astype(np.float32)) print(model_onnx) ######################################## @@ -116,9 +117,7 @@ def nmf_to_onnx(W, H, op_version=12): def predict_onnx(sess, row_indices, col_indices): - res = sess.run(None, - {'col': col_indices, - 'row': row_indices}) + res = sess.run(None, {"col": col_indices, "row": row_indices}) return res @@ -136,13 +135,16 @@ def predict_onnx(sess, row_indices, col_indices): ################################### # The ONNX graph looks like the following. pydot_graph = GetPydotGraph( - model_onnx.graph, name=model_onnx.graph.name, - rankdir="TB", node_producer=GetOpNodeProducer("docstring")) + model_onnx.graph, + name=model_onnx.graph.name, + rankdir="TB", + node_producer=GetOpNodeProducer("docstring"), +) pydot_graph.write_dot("graph_nmf.dot") -os.system('dot -O -Tpng graph_nmf.dot') +os.system("dot -O -Tpng graph_nmf.dot") image = plt.imread("graph_nmf.dot.png") plt.imshow(image) -plt.axis('off') +plt.axis("off") ################################# # **Versions used for this example** diff --git a/docs/examples/plot_onnx_operators.py b/docs/examples/plot_onnx_operators.py index 234384fbf..9a72fd884 100644 --- a/docs/examples/plot_onnx_operators.py +++ b/docs/examples/plot_onnx_operators.py @@ -46,17 +46,17 @@ from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer # Create one input (ValueInfoProto) -X = helper.make_tensor_value_info('X', TensorProto.FLOAT, [None, 2]) +X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [None, 2]) # Create one output (ValueInfoProto) -Y = helper.make_tensor_value_info('Y', TensorProto.FLOAT, [None, 4]) +Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [None, 4]) # Create a node (NodeProto) node_def = helper.make_node( - 'Pad', # node name - ['X'], # inputs - ['Y'], # outputs - mode='constant', # attributes + "Pad", # node name + ["X"], # inputs + ["Y"], # outputs + mode="constant", # attributes value=1.5, pads=[0, 1, 0, 1], ) @@ -64,18 +64,18 @@ # Create the graph (GraphProto) graph_def = helper.make_graph( [node_def], - 'test-model', + "test-model", [X], [Y], ) # Create the model (ModelProto) -model_def = helper.make_model(graph_def, producer_name='onnx-example') +model_def = helper.make_model(graph_def, producer_name="onnx-example") model_def.opset_import[0].version = 10 -print('The model is:\n{}'.format(model_def)) +print("The model is:\n{}".format(model_def)) onnx.checker.check_model(model_def) -print('The model is checked!') +print("The model is checked!") ##################################### # Same example with sklearn-onnx @@ -87,19 +87,24 @@ from skl2onnx.algebra.onnx_ops import OnnxPad # noqa -pad = OnnxPad('X', output_names=['Y'], mode='constant', value=1.5, - pads=[0, 1, 0, 1], op_version=10) -model_def = pad.to_onnx({'X': X}, target_opset=10) +pad = OnnxPad( + "X", + output_names=["Y"], + mode="constant", + value=1.5, + pads=[0, 1, 0, 1], + op_version=10, +) +model_def = pad.to_onnx({"X": X}, target_opset=10) -print('The model is:\n{}'.format(model_def)) +print("The model is:\n{}".format(model_def)) onnx.checker.check_model(model_def) -print('The model is checked!') +print("The model is checked!") #################################### # Inputs and outputs can also be skipped. -pad = OnnxPad(mode='constant', value=1.5, - pads=[0, 1, 0, 1], op_version=10) +pad = OnnxPad(mode="constant", value=1.5, pads=[0, 1, 0, 1], op_version=10) model_def = pad.to_onnx({pad.inputs[0].name: X}, target_opset=10) onnx.checker.check_model(model_def) @@ -112,17 +117,17 @@ # Preprocessing: create a model with two nodes, Y's shape is unknown -node1 = helper.make_node('Transpose', ['X'], ['Y'], perm=[1, 0, 2]) -node2 = helper.make_node('Transpose', ['Y'], ['Z'], perm=[1, 0, 2]) +node1 = helper.make_node("Transpose", ["X"], ["Y"], perm=[1, 0, 2]) +node2 = helper.make_node("Transpose", ["Y"], ["Z"], perm=[1, 0, 2]) graph = helper.make_graph( [node1, node2], - 'two-transposes', - [helper.make_tensor_value_info('X', TensorProto.FLOAT, (2, 3, 4))], - [helper.make_tensor_value_info('Z', TensorProto.FLOAT, (2, 3, 4))], + "two-transposes", + [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3, 4))], + [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (2, 3, 4))], ) -original_model = helper.make_model(graph, producer_name='onnx-examples') +original_model = helper.make_model(graph, producer_name="onnx-examples") # Check the model and print Y's shape information onnx.checker.check_model(original_model) @@ -133,12 +138,12 @@ from skl2onnx.algebra.onnx_ops import OnnxTranspose # noqa node = OnnxTranspose( - OnnxTranspose('X', perm=[1, 0, 2], op_version=12), - perm=[1, 0, 2], op_version=12) + OnnxTranspose("X", perm=[1, 0, 2], op_version=12), perm=[1, 0, 2], op_version=12 +) X = np.arange(2 * 3 * 4).reshape((2, 3, 4)).astype(np.float32) # numpy arrays are good enough to define the input shape -model_def = node.to_onnx({'X': X}, target_opset=12) +model_def = node.to_onnx({"X": X}, target_opset=12) onnx.checker.check_model(model_def) ###################################### @@ -147,6 +152,7 @@ def predict_with_onnxruntime(model_def, *inputs): import onnxruntime as ort + sess = ort.InferenceSession(model_def.SerializeToString()) names = [i.name for i in sess.get_inputs()] dinputs = {name: input for name, input in zip(names, inputs)} @@ -163,25 +169,31 @@ def predict_with_onnxruntime(model_def, *inputs): # ++++++++++++++++++++++ pydot_graph = GetPydotGraph( - model_def.graph, name=model_def.graph.name, rankdir="TB", - node_producer=GetOpNodeProducer("docstring", color="yellow", - fillcolor="yellow", style="filled")) + model_def.graph, + name=model_def.graph.name, + rankdir="TB", + node_producer=GetOpNodeProducer( + "docstring", color="yellow", fillcolor="yellow", style="filled" + ), +) pydot_graph.write_dot("pipeline_transpose2x.dot") -os.system('dot -O -Gdpi=300 -Tpng pipeline_transpose2x.dot') +os.system("dot -O -Gdpi=300 -Tpng pipeline_transpose2x.dot") image = plt.imread("pipeline_transpose2x.dot.png") fig, ax = plt.subplots(figsize=(40, 20)) ax.imshow(image) -ax.axis('off') +ax.axis("off") ################################# # **Versions used for this example** import sklearn # noqa + print("numpy:", numpy.__version__) print("scikit-learn:", sklearn.__version__) import skl2onnx # noqa + print("onnx: ", onnx.__version__) print("onnxruntime: ", onnxruntime.__version__) print("skl2onnx: ", skl2onnx.__version__) diff --git a/docs/examples/plot_pipeline.py b/docs/examples/plot_pipeline.py index 7c5475005..6eae7ed14 100644 --- a/docs/examples/plot_pipeline.py +++ b/docs/examples/plot_pipeline.py @@ -29,13 +29,14 @@ from skl2onnx.algebra.onnx_ops import OnnxAdd, OnnxMul onnx_fct = OnnxAdd( - OnnxMul('X', numpy.array([2], dtype=numpy.float32), - op_version=12), + OnnxMul("X", numpy.array([2], dtype=numpy.float32), op_version=12), numpy.array([[1, 0], [0, 1]], dtype=numpy.float32), - output_names=['Y'], op_version=12) + output_names=["Y"], + op_version=12, +) X = numpy.array([[4, 5], [-2, 3]], dtype=numpy.float32) -model = onnx_fct.to_onnx({'X': X}, target_opset=12) +model = onnx_fct.to_onnx({"X": X}, target_opset=12) print(model) filename = "example1.onnx" @@ -54,25 +55,29 @@ model = ModelProto() -with open(filename, 'rb') as fid: +with open(filename, "rb") as fid: content = fid.read() model.ParseFromString(content) ################################### # We convert it into a graph. -pydot_graph = GetPydotGraph(model.graph, name=model.graph.name, rankdir="TB", - node_producer=GetOpNodeProducer("docstring")) +pydot_graph = GetPydotGraph( + model.graph, + name=model.graph.name, + rankdir="TB", + node_producer=GetOpNodeProducer("docstring"), +) pydot_graph.write_dot("graph.dot") ####################################### # Then into an image -os.system('dot -O -Tpng graph.dot') +os.system("dot -O -Tpng graph.dot") ################################ # Which we display... image = plt.imread("graph.dot.png") plt.imshow(image) -plt.axis('off') +plt.axis("off") ################################# # **Versions used for this example** diff --git a/docs/examples/plot_pipeline_lightgbm.py b/docs/examples/plot_pipeline_lightgbm.py index a57bca298..fb584e5a1 100644 --- a/docs/examples/plot_pipeline_lightgbm.py +++ b/docs/examples/plot_pipeline_lightgbm.py @@ -30,8 +30,12 @@ import onnxruntime as rt from onnxruntime.capi.onnxruntime_pybind11_state import Fail as OrtFail from skl2onnx import convert_sklearn, update_registered_converter -from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes # noqa -from onnxmltools.convert.lightgbm.operator_converters.LightGbm import convert_lightgbm # noqa +from skl2onnx.common.shape_calculator import ( + calculate_linear_classifier_output_shapes, +) # noqa +from onnxmltools.convert.lightgbm.operator_converters.LightGbm import ( + convert_lightgbm, +) # noqa import onnxmltools.convert.common.data_types from skl2onnx.common.data_types import FloatTensorType import numpy @@ -49,8 +53,9 @@ X = X[ind, :].copy() y = y[ind].copy() -pipe = Pipeline([('scaler', StandardScaler()), - ('lgbm', LGBMClassifier(n_estimators=3))]) +pipe = Pipeline( + [("scaler", StandardScaler()), ("lgbm", LGBMClassifier(n_estimators=3))] +) pipe.fit(X, y) ###################################### @@ -72,18 +77,23 @@ ########################### # Let's register the new converter. update_registered_converter( - LGBMClassifier, 'LightGbmLGBMClassifier', - calculate_linear_classifier_output_shapes, convert_lightgbm, - options={'nocl': [True, False], 'zipmap': [True, False, 'columns']}) + LGBMClassifier, + "LightGbmLGBMClassifier", + calculate_linear_classifier_output_shapes, + convert_lightgbm, + options={"nocl": [True, False], "zipmap": [True, False, "columns"]}, +) ################################## # Convert again # +++++++++++++ model_onnx = convert_sklearn( - pipe, 'pipeline_lightgbm', - [('input', FloatTensorType([None, 2]))], - target_opset={'': 12, 'ai.onnx.ml': 2}) + pipe, + "pipeline_lightgbm", + [("input", FloatTensorType([None, 2]))], + target_opset={"": 12, "ai.onnx.ml": 2}, +) # And save. with open("pipeline_lightgbm.onnx", "wb") as f: @@ -118,18 +128,21 @@ # ++++++++++++++++++++++ pydot_graph = GetPydotGraph( - model_onnx.graph, name=model_onnx.graph.name, rankdir="TB", + model_onnx.graph, + name=model_onnx.graph.name, + rankdir="TB", node_producer=GetOpNodeProducer( - "docstring", color="yellow", - fillcolor="yellow", style="filled")) + "docstring", color="yellow", fillcolor="yellow", style="filled" + ), +) pydot_graph.write_dot("pipeline.dot") -os.system('dot -O -Gdpi=300 -Tpng pipeline.dot') +os.system("dot -O -Gdpi=300 -Tpng pipeline.dot") image = plt.imread("pipeline.dot.png") fig, ax = plt.subplots(figsize=(40, 20)) ax.imshow(image) -ax.axis('off') +ax.axis("off") ################################# # **Versions used for this example** diff --git a/docs/examples/plot_pipeline_xgboost.py b/docs/examples/plot_pipeline_xgboost.py index 5de606c31..3feaa6d87 100644 --- a/docs/examples/plot_pipeline_xgboost.py +++ b/docs/examples/plot_pipeline_xgboost.py @@ -34,9 +34,13 @@ import skl2onnx from skl2onnx.common.data_types import FloatTensorType from skl2onnx import convert_sklearn, update_registered_converter -from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes # noqa +from skl2onnx.common.shape_calculator import ( + calculate_linear_classifier_output_shapes, +) # noqa import onnxmltools -from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost # noqa +from onnxmltools.convert.xgboost.operator_converters.XGBoost import ( + convert_xgboost, +) # noqa import onnxmltools.convert.common.data_types data = load_iris() @@ -48,16 +52,18 @@ X = X[ind, :].copy() y = y[ind].copy() -pipe = Pipeline([('scaler', StandardScaler()), - ('lgbm', XGBClassifier(n_estimators=3))]) +pipe = Pipeline([("scaler", StandardScaler()), ("lgbm", XGBClassifier(n_estimators=3))]) pipe.fit(X, y) # The conversion fails but it is expected. try: - convert_sklearn(pipe, 'pipeline_xgboost', - [('input', FloatTensorType([None, 2]))], - target_opset={'': 12, 'ai.onnx.ml': 2}) + convert_sklearn( + pipe, + "pipeline_xgboost", + [("input", FloatTensorType([None, 2]))], + target_opset={"": 12, "ai.onnx.ml": 2}, + ) except Exception as e: print(e) @@ -88,18 +94,23 @@ ########################### # Let's register the new converter. update_registered_converter( - XGBClassifier, 'XGBoostXGBClassifier', - calculate_linear_classifier_output_shapes, convert_xgboost, - options={'nocl': [True, False], 'zipmap': [True, False, 'columns']}) + XGBClassifier, + "XGBoostXGBClassifier", + calculate_linear_classifier_output_shapes, + convert_xgboost, + options={"nocl": [True, False], "zipmap": [True, False, "columns"]}, +) ################################## # Convert again # +++++++++++++ model_onnx = convert_sklearn( - pipe, 'pipeline_xgboost', - [('input', FloatTensorType([None, 2]))], - target_opset={'': 12, 'ai.onnx.ml': 2}) + pipe, + "pipeline_xgboost", + [("input", FloatTensorType([None, 2]))], + target_opset={"": 12, "ai.onnx.ml": 2}, +) # And save. with open("pipeline_xgboost.onnx", "wb") as f: @@ -127,18 +138,21 @@ # ++++++++++++++++++++++ pydot_graph = GetPydotGraph( - model_onnx.graph, name=model_onnx.graph.name, rankdir="TB", + model_onnx.graph, + name=model_onnx.graph.name, + rankdir="TB", node_producer=GetOpNodeProducer( - "docstring", color="yellow", - fillcolor="yellow", style="filled")) + "docstring", color="yellow", fillcolor="yellow", style="filled" + ), +) pydot_graph.write_dot("pipeline.dot") -os.system('dot -O -Gdpi=300 -Tpng pipeline.dot') +os.system("dot -O -Gdpi=300 -Tpng pipeline.dot") image = plt.imread("pipeline.dot.png") fig, ax = plt.subplots(figsize=(40, 20)) ax.imshow(image) -ax.axis('off') +ax.axis("off") ################################# # **Versions used for this example** diff --git a/docs/examples/plot_tfidfvectorizer.py b/docs/examples/plot_tfidfvectorizer.py index b33b6765b..96321bfbf 100644 --- a/docs/examples/plot_tfidfvectorizer.py +++ b/docs/examples/plot_tfidfvectorizer.py @@ -32,13 +32,18 @@ from sklearn.base import BaseEstimator, TransformerMixin from sklearn.datasets import fetch_20newsgroups + try: from sklearn.datasets._twenty_newsgroups import ( - strip_newsgroup_footer, strip_newsgroup_quoting) + strip_newsgroup_footer, + strip_newsgroup_quoting, + ) except ImportError: # scikit-learn < 0.24 from sklearn.datasets.twenty_newsgroups import ( - strip_newsgroup_footer, strip_newsgroup_quoting) + strip_newsgroup_footer, + strip_newsgroup_quoting, + ) from sklearn.decomposition import TruncatedSVD from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.pipeline import Pipeline @@ -48,15 +53,17 @@ # limit the list of categories to make running this example faster. -categories = ['alt.atheism', 'talk.religion.misc'] -train = fetch_20newsgroups(random_state=1, - subset='train', - categories=categories, - ) -test = fetch_20newsgroups(random_state=1, - subset='test', - categories=categories, - ) +categories = ["alt.atheism", "talk.religion.misc"] +train = fetch_20newsgroups( + random_state=1, + subset="train", + categories=categories, +) +test = fetch_20newsgroups( + random_state=1, + subset="test", + categories=categories, +) ############################## # The first transform extract two fields from the data. @@ -78,16 +85,16 @@ def transform(self, posts): # first column = 'subject' and second column = 'body' features = np.empty(shape=(len(posts), 2), dtype=object) for i, text in enumerate(posts): - headers, _, bod = text.partition('\n\n') + headers, _, bod = text.partition("\n\n") bod = strip_newsgroup_footer(bod) bod = strip_newsgroup_quoting(bod) features[i, 1] = bod - prefix = 'Subject:' - sub = '' - for line in headers.split('\n'): + prefix = "Subject:" + sub = "" + for line in headers.split("\n"): if line.startswith(prefix): - sub = line[len(prefix):] + sub = line[len(prefix) :] break features[i, 0] = sub @@ -101,35 +108,42 @@ def transform(self, posts): # The pipeline is almost the same except # we remove the custom features. -pipeline = Pipeline([ - ('union', ColumnTransformer( - [ - ('subject', TfidfVectorizer(min_df=50, max_features=500), 0), - - ('body_bow', Pipeline([ - ('tfidf', TfidfVectorizer()), - ('best', TruncatedSVD(n_components=50)), - ]), 1), - - # Removed from the original example as - # it requires a custom converter. - # ('body_stats', Pipeline([ - # ('stats', TextStats()), # returns a list of dicts - # ('vect', DictVectorizer()), # list of dicts -> feature matrix - # ]), 1), - ], - - transformer_weights={ - 'subject': 0.8, - 'body_bow': 0.5, - # 'body_stats': 1.0, - } - )), - - # Use a LogisticRegression classifier on the combined features. - # Instead of LinearSVC (not fully ready in onnxruntime). - ('logreg', LogisticRegression()), -]) +pipeline = Pipeline( + [ + ( + "union", + ColumnTransformer( + [ + ("subject", TfidfVectorizer(min_df=50, max_features=500), 0), + ( + "body_bow", + Pipeline( + [ + ("tfidf", TfidfVectorizer()), + ("best", TruncatedSVD(n_components=50)), + ] + ), + 1, + ), + # Removed from the original example as + # it requires a custom converter. + # ('body_stats', Pipeline([ + # ('stats', TextStats()), # returns a list of dicts + # ('vect', DictVectorizer()), # list of dicts -> feature matrix + # ]), 1), + ], + transformer_weights={ + "subject": 0.8, + "body_bow": 0.5, + # 'body_stats': 1.0, + }, + ), + ), + # Use a LogisticRegression classifier on the combined features. + # Instead of LinearSVC (not fully ready in onnxruntime). + ("logreg", LogisticRegression()), + ] +) pipeline.fit(train_data, train.target) print(classification_report(pipeline.predict(test_data), test.target)) @@ -149,16 +163,32 @@ def transform(self, posts): seps = { TfidfVectorizer: { "separators": [ - ' ', '.', '\\?', ',', ';', ':', '!', - '\\(', '\\)', '\n', '"', "'", - "-", "\\[", "\\]", "@" + " ", + ".", + "\\?", + ",", + ";", + ":", + "!", + "\\(", + "\\)", + "\n", + '"', + "'", + "-", + "\\[", + "\\]", + "@", ] } } model_onnx = convert_sklearn( - pipeline, "tfidf", + pipeline, + "tfidf", initial_types=[("input", StringTensorType([None, 2]))], - options=seps, target_opset=12) + options=seps, + target_opset=12, +) ################################# # And save. @@ -169,8 +199,8 @@ def transform(self, posts): # Predictions with onnxruntime. sess = rt.InferenceSession("pipeline_tfidf.onnx") -print('---', train_data[0]) -inputs = {'input': train_data[:1]} +print("---", train_data[0]) +inputs = {"input": train_data[:1]} pred_onx = sess.run(None, inputs) print("predict", pred_onx[0]) print("predict_proba", pred_onx[1]) @@ -192,16 +222,18 @@ def transform(self, posts): # Finally, let's see the graph converted with *sklearn-onnx*. pydot_graph = GetPydotGraph( - model_onnx.graph, name=model_onnx.graph.name, - rankdir="TB", node_producer=GetOpNodeProducer("docstring", - color="yellow", - fillcolor="yellow", - style="filled")) + model_onnx.graph, + name=model_onnx.graph.name, + rankdir="TB", + node_producer=GetOpNodeProducer( + "docstring", color="yellow", fillcolor="yellow", style="filled" + ), +) pydot_graph.write_dot("pipeline_tfidf.dot") -os.system('dot -O -Gdpi=300 -Tpng pipeline_tfidf.dot') +os.system("dot -O -Gdpi=300 -Tpng pipeline_tfidf.dot") image = plt.imread("pipeline_tfidf.dot.png") fig, ax = plt.subplots(figsize=(40, 20)) ax.imshow(image) -ax.axis('off') +ax.axis("off") diff --git a/docs/exts/github_link.py b/docs/exts/github_link.py index 5bd939f1f..9d87a68de 100644 --- a/docs/exts/github_link.py +++ b/docs/exts/github_link.py @@ -9,16 +9,16 @@ import sys from functools import partial -REVISION_CMD = 'git rev-parse --short HEAD' +REVISION_CMD = "git rev-parse --short HEAD" def _get_git_revision(): try: revision = subprocess.check_output(REVISION_CMD.split()).strip() except (subprocess.CalledProcessError, OSError): - print('Failed to execute git to get revision') + print("Failed to execute git to get revision") return None - return revision.decode('utf-8') + return revision.decode("utf-8") def _linkcode_resolve(domain, info, package, url_fmt, revision): @@ -36,14 +36,14 @@ def _linkcode_resolve(domain, info, package, url_fmt, revision): if revision is None: return - if domain not in ('py', 'pyx'): + if domain not in ("py", "pyx"): return - if not info.get('module') or not info.get('fullname'): + if not info.get("module") or not info.get("fullname"): return - class_name = info['fullname'].split('.')[0] - module = __import__(info['module'], fromlist=[class_name]) - obj = attrgetter(info['fullname'])(module) + class_name = info["fullname"].split(".")[0] + module = __import__(info["module"], fromlist=[class_name]) + obj = attrgetter(info["fullname"])(module) # Unwrap the object to get the correct source # file in case that is wrapped by a decorator @@ -61,14 +61,12 @@ def _linkcode_resolve(domain, info, package, url_fmt, revision): if not fn: return - fn = os.path.relpath(fn, - start=os.path.dirname(__import__(package).__file__)) + fn = os.path.relpath(fn, start=os.path.dirname(__import__(package).__file__)) try: lineno = inspect.getsourcelines(obj)[1] except Exception: - lineno = '' - return url_fmt.format(revision=revision, package=package, - path=fn, lineno=lineno) + lineno = "" + return url_fmt.format(revision=revision, package=package, path=fn, lineno=lineno) def make_linkcode_resolve(package, url_fmt): @@ -80,5 +78,6 @@ def make_linkcode_resolve(package, url_fmt): '{path}#L{lineno}') """ revision = _get_git_revision() - return partial(_linkcode_resolve, revision=revision, package=package, - url_fmt=url_fmt) + return partial( + _linkcode_resolve, revision=revision, package=package, url_fmt=url_fmt + ) diff --git a/docs/exts/sphinx_skl2onnx_extension.py b/docs/exts/sphinx_skl2onnx_extension.py index e92aac08c..f7cf24cae 100644 --- a/docs/exts/sphinx_skl2onnx_extension.py +++ b/docs/exts/sphinx_skl2onnx_extension.py @@ -17,8 +17,9 @@ import onnxruntime -def skl2onnx_version_role(role, rawtext, text, lineno, inliner, - options=None, content=None): +def skl2onnx_version_role( + role, rawtext, text, lineno, inliner, options=None, content=None +): """ Defines custom role *skl2onnx-version* which returns *skl2onnx* version. @@ -27,14 +28,14 @@ def skl2onnx_version_role(role, rawtext, text, lineno, inliner, options = {} if content is None: content = [] - if text == 'v': - version = 'v' + skl2onnx.__version__ - elif text == 'rt': - version = 'v' + onnxruntime.__version__ + if text == "v": + version = "v" + skl2onnx.__version__ + elif text == "rt": + version = "v" + onnxruntime.__version__ else: raise RuntimeError( - "skl2onnx_version_role cannot interpret content '{0}'." - "".format(text)) + "skl2onnx_version_role cannot interpret content '{0}'." "".format(text) + ) node = nodes.literal(version) return [node], [] @@ -44,6 +45,7 @@ class SupportedSkl2OnnxDirective(Directive): Automatically displays the list of models *skl2onnx* can currently convert. """ + required_arguments = False optional_arguments = 0 final_argument_whitespace = True @@ -57,7 +59,7 @@ def run(self): for mod in models: par = nodes.paragraph() par += nodes.Text(mod) - bullets += nodes.list_item('', par) + bullets += nodes.list_item("", par) return ns @@ -66,6 +68,7 @@ class SupportedOnnxOpsDirective(Directive): Automatically displays the list of supported ONNX models *skl2onnx* can use to build converters. """ + required_arguments = False optional_arguments = 0 final_argument_whitespace = True @@ -92,10 +95,10 @@ def make_ref(name): if i + cut * 2 < len(sorted_keys): row.append(make_ref(sorted_keys[i + cut * 2])) else: - row.append('') + row.append("") else: - row.append('') - row.append('') + row.append("") + row.append("") table.append(row) rst = tabulate(table, tablefmt="rst") @@ -106,17 +109,16 @@ def make_ref(name): nested_parse_with_titles(self.state, st, node) main += node - rows.append('') + rows.append("") for name in sorted_keys: rows = [] cl = cls[name] - rows.append('.. _l-onnx-{}:'.format(cl.__name__)) - rows.append('') + rows.append(".. _l-onnx-{}:".format(cl.__name__)) + rows.append("") rows.append(cl.__name__) - rows.append('=' * len(cl.__name__)) - rows.append('') - rows.append( - ".. autoclass:: skl2onnx.algebra.onnx_ops.{}".format(name)) + rows.append("=" * len(cl.__name__)) + rows.append("") + rows.append(".. autoclass:: skl2onnx.algebra.onnx_ops.{}".format(name)) st = StringList(rows) node = nodes.container() nested_parse_with_titles(self.state, st, node) @@ -129,6 +131,7 @@ class SupportedSklearnOpsDirective(Directive): """ Automatically displays the list of available converters. """ + required_arguments = False optional_arguments = 0 final_argument_whitespace = True @@ -155,10 +158,10 @@ def make_ref(name): if i + cut * 2 < len(sorted_keys): row.append(make_ref(sorted_keys[i + cut * 2])) else: - row.append('') + row.append("") else: - row.append('') - row.append('') + row.append("") + row.append("") table.append(row) rst = tabulate(table, tablefmt="rst") @@ -169,17 +172,16 @@ def make_ref(name): nested_parse_with_titles(self.state, st, node) main += node - rows.append('') + rows.append("") for name in sorted_keys: rows = [] cl = cls[name] - rows.append('.. _l-sklops-{}:'.format(cl.__name__)) - rows.append('') + rows.append(".. _l-sklops-{}:".format(cl.__name__)) + rows.append("") rows.append(cl.__name__) - rows.append('=' * len(cl.__name__)) - rows.append('') - rows.append( - ".. autoclass:: skl2onnx.algebra.sklearn_ops.{}".format(name)) + rows.append("=" * len(cl.__name__)) + rows.append("") + rows.append(".. autoclass:: skl2onnx.algebra.sklearn_ops.{}".format(name)) st = StringList(rows) node = nodes.container() nested_parse_with_titles(self.state, st, node) @@ -194,6 +196,7 @@ def missing_ops(): """ from sklearn import __all__ from sklearn.base import BaseEstimator + found = [] for sub in __all__: try: @@ -209,11 +212,17 @@ def missing_ops(): issub = issubclass(cl, BaseEstimator) except TypeError: continue - if cl.__name__ in {'Pipeline', 'ColumnTransformer', - 'FeatureUnion', 'BaseEstimator'}: + if cl.__name__ in { + "Pipeline", + "ColumnTransformer", + "FeatureUnion", + "BaseEstimator", + }: continue - if (sub in {'calibration', 'dummy', 'manifold'} and - 'Calibrated' not in cl.__name__): + if ( + sub in {"calibration", "dummy", "manifold"} + and "Calibrated" not in cl.__name__ + ): continue if issub: found.append((cl.__name__, sub, cl)) @@ -226,6 +235,7 @@ class AllSklearnOpsDirective(Directive): Displays the list of models implemented in scikit-learn and whether or not there is an associated converter. """ + required_arguments = False optional_arguments = 0 final_argument_whitespace = True @@ -234,12 +244,19 @@ class AllSklearnOpsDirective(Directive): def run(self): from sklearn import __version__ as skver + found = missing_ops() nbconverters = 0 supported = set(build_sklearn_operator_name_map()) - rows = [".. list-table::", " :header-rows: 1", - " :widths: 10 7 4", - "", " * - Name", " - Package", " - Supported"] + rows = [ + ".. list-table::", + " :header-rows: 1", + " :widths: 10 7 4", + "", + " * - Name", + " - Package", + " - Supported", + ] for name, sub, cl in found: rows.append(" * - " + name) rows.append(" - " + sub) @@ -251,8 +268,7 @@ def run(self): rows.append("") rows.append("scikit-learn's version is **{0}**.".format(skver)) - rows.append( - "{0}/{1} models are covered.".format(nbconverters, len(found))) + rows.append("{0}/{1} models are covered.".format(nbconverters, len(found))) node = nodes.container() st = StringList(rows) @@ -265,9 +281,9 @@ def run(self): def setup(app): # Placeholder to initialize the folder before # generating the documentation. - app.add_role('skl2onnxversion', skl2onnx_version_role) - app.add_directive('supported-skl2onnx', SupportedSkl2OnnxDirective) - app.add_directive('supported-onnx-ops', SupportedOnnxOpsDirective) - app.add_directive('supported-sklearn-ops', SupportedSklearnOpsDirective) - app.add_directive('covered-sklearn-ops', AllSklearnOpsDirective) - return {'version': sphinx.__display_version__, 'parallel_read_safe': True} + app.add_role("skl2onnxversion", skl2onnx_version_role) + app.add_directive("supported-skl2onnx", SupportedSkl2OnnxDirective) + app.add_directive("supported-onnx-ops", SupportedOnnxOpsDirective) + app.add_directive("supported-sklearn-ops", SupportedSklearnOpsDirective) + app.add_directive("covered-sklearn-ops", AllSklearnOpsDirective) + return {"version": sphinx.__display_version__, "parallel_read_safe": True} diff --git a/docs/requirements.txt b/docs/requirements.txt index 401b34eff..cf3e6427e 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -5,7 +5,7 @@ coverage flake8 furo joblib -lightgbm +lightgbm<4.0 loky matplotlib mlinsights>=0.3.631 diff --git a/docs/tests/test_documentation_examples.py b/docs/tests/test_documentation_examples.py index 2b23b97fb..7c930431f 100644 --- a/docs/tests/test_documentation_examples.py +++ b/docs/tests/test_documentation_examples.py @@ -15,29 +15,26 @@ def import_source(module_file_path, module_name): if not os.path.exists(module_file_path): raise FileNotFoundError(module_file_path) - module_spec = importlib.util.spec_from_file_location( - module_name, module_file_path) + module_spec = importlib.util.spec_from_file_location(module_name, module_file_path) if module_spec is None: raise FileNotFoundError( - "Unable to find '{}' in '{}'.".format( - module_name, module_file_path)) + "Unable to find '{}' in '{}'.".format(module_name, module_file_path) + ) module = importlib.util.module_from_spec(module_spec) return module_spec.loader.exec_module(module) class TestDocumentationExample(unittest.TestCase): - def test_documentation_examples(self): - this = os.path.abspath(os.path.dirname(__file__)) - fold = os.path.normpath(os.path.join(this, '..', 'examples')) + fold = os.path.normpath(os.path.join(this, "..", "examples")) found = os.listdir(fold) tested = 0 for name in found: if name.startswith("plot_") and name.endswith(".py"): - if (name == "plot_pipeline_lightgbm.py" and - pv.Version(onnxruntime.__version__) < - pv.Version('1.0.0')): + if name == "plot_pipeline_lightgbm.py" and pv.Version( + onnxruntime.__version__ + ) < pv.Version("1.0.0"): continue print("run %r" % name) try: @@ -45,14 +42,14 @@ def test_documentation_examples(self): assert mod is not None except FileNotFoundError: # try another way - cmds = [sys.executable, "-u", - os.path.join(fold, name)] + cmds = [sys.executable, "-u", os.path.join(fold, name)] p = subprocess.Popen( - cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) res = p.communicate() out, err = res - st = err.decode('ascii', errors='ignore') - if len(st) > 0 and 'Traceback' in st: + st = err.decode("ascii", errors="ignore") + if len(st) > 0 and "Traceback" in st: if "No such file or directory: 'dot'" in st: # dot not installed, this part # is tested in onnx framework @@ -61,13 +58,14 @@ def test_documentation_examples(self): # dot not installed, this part # is tested in onnx framework pass - elif ('Please fix either the inputs or ' - 'the model.') in st: + elif ("Please fix either the inputs or " "the model.") in st: # onnxruntime datasets changed in master branch, # still the same in released version on pypi pass - elif ('Current official support for domain ai.onnx ' - 'is till opset 12.') in st: + elif ( + "Current official support for domain ai.onnx " + "is till opset 12." + ) in st: # one example is using opset 13 but onnxruntime # only support up to opset 12. pass @@ -78,7 +76,8 @@ def test_documentation_examples(self): raise RuntimeError( "Example '{}' (cmd: {} - exec_prefix='{}') " "failed due to\n{}" - "".format(name, cmds, sys.exec_prefix, st)) + "".format(name, cmds, sys.exec_prefix, st) + ) tested += 1 if tested == 0: raise RuntimeError("No example was tested.") diff --git a/docs/tests/test_documentation_tutorial.py b/docs/tests/test_documentation_tutorial.py index d607bf9a8..6e6f41c61 100644 --- a/docs/tests/test_documentation_tutorial.py +++ b/docs/tests/test_documentation_tutorial.py @@ -13,22 +13,19 @@ def import_source(module_file_path, module_name): if not os.path.exists(module_file_path): raise FileNotFoundError(module_file_path) - module_spec = importlib.util.spec_from_file_location( - module_name, module_file_path) + module_spec = importlib.util.spec_from_file_location(module_name, module_file_path) if module_spec is None: raise FileNotFoundError( - "Unable to find '{}' in '{}'.".format( - module_name, module_file_path)) + "Unable to find '{}' in '{}'.".format(module_name, module_file_path) + ) module = importlib.util.module_from_spec(module_spec) return module_spec.loader.exec_module(module) class TestDocumentationTutorial(unittest.TestCase): - def test_documentation_tutorial(self): - this = os.path.abspath(os.path.dirname(__file__)) - fold = os.path.normpath(os.path.join(this, '..', 'tutorial')) + fold = os.path.normpath(os.path.join(this, "..", "tutorial")) found = os.listdir(fold) tested = 0 for name in found: @@ -39,14 +36,14 @@ def test_documentation_tutorial(self): assert mod is not None except FileNotFoundError: # try another way - cmds = [sys.executable, "-u", - os.path.join(fold, name)] + cmds = [sys.executable, "-u", os.path.join(fold, name)] p = subprocess.Popen( - cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) res = p.communicate() out, err = res - st = err.decode('ascii', errors='ignore') - if len(st) > 0 and 'Traceback' in st: + st = err.decode("ascii", errors="ignore") + if len(st) > 0 and "Traceback" in st: if "No such file or directory: 'dot'" in st: # dot not installed, this part # is tested in onnx framework @@ -55,26 +52,30 @@ def test_documentation_tutorial(self): # dot not installed, this part # is tested in onnx framework pass - elif ("cannot import name 'LightGbmModelContainer' " - "from 'onnxmltools.convert.common." - "_container'") in st: + elif ( + "cannot import name 'LightGbmModelContainer' " + "from 'onnxmltools.convert.common." + "_container'" + ) in st: # onnxmltools not recent enough pass - elif ('Please fix either the inputs or ' - 'the model.') in st: + elif ("Please fix either the inputs or " "the model.") in st: # onnxruntime datasets changed in master branch, # still the same in released version on pypi pass - elif ('Current official support for domain ai.onnx ' - 'is till opset 12.') in st: + elif ( + "Current official support for domain ai.onnx " + "is till opset 12." + ) in st: # one example is using opset 13 but onnxruntime # only support up to opset 12. pass elif "'str' object has no attribute 'decode'" in st: # unstable bug in scikit-learn<0.24 pass - elif ("This method should be overwritten for " - "operator") in st: + elif ( + "This method should be overwritten for " "operator" + ) in st: # raised by old version of packages # used in the documentation pass @@ -82,7 +83,8 @@ def test_documentation_tutorial(self): raise RuntimeError( "Example '{}' (cmd: {} - exec_prefix='{}') " "failed due to\n{}" - "".format(name, cmds, sys.exec_prefix, st)) + "".format(name, cmds, sys.exec_prefix, st) + ) tested += 1 if tested == 0: raise RuntimeError("No example was tested.") diff --git a/docs/tests/test_utils_benchmark.py b/docs/tests/test_utils_benchmark.py index dbfa979ef..516081a3d 100644 --- a/docs/tests/test_utils_benchmark.py +++ b/docs/tests/test_utils_benchmark.py @@ -10,19 +10,16 @@ class TestMeasureTime(unittest.TestCase): - def test_vector_count(self): def fct(): X = numpy.ones((1000, 5)) return X - res = measure_time( - "fct", context={"fct": fct}, div_by_number=False, number=100) + + res = measure_time("fct", context={"fct": fct}, div_by_number=False, number=100) self.assertIn("average", res) - res = measure_time( - "fct", context={"fct": fct}, div_by_number=True, number=100) + res = measure_time("fct", context={"fct": fct}, div_by_number=True, number=100) self.assertIn("average", res) - res = measure_time( - "fct", context={"fct": fct}, div_by_number=True, number=1000) + res = measure_time("fct", context={"fct": fct}, div_by_number=True, number=1000) self.assertIn("average", res) diff --git a/docs/tests/test_utils_classes.py b/docs/tests/test_utils_classes.py index a1dbd634f..5a16aaaa9 100644 --- a/docs/tests/test_utils_classes.py +++ b/docs/tests/test_utils_classes.py @@ -9,7 +9,6 @@ class TestUtilsClasses(unittest.TestCase): - def test_classes(self): cl = class_names self.assertIsInstance(cl, dict) diff --git a/docs/tutorial/plot_abegin_convert_pipeline.py b/docs/tutorial/plot_abegin_convert_pipeline.py index f41c5f5c1..9596fe305 100644 --- a/docs/tutorial/plot_abegin_convert_pipeline.py +++ b/docs/tutorial/plot_abegin_convert_pipeline.py @@ -17,13 +17,14 @@ Training a pipeline +++++++++++++++++++ """ -from pyquickhelper.helpgen.graphviz_helper import plot_graphviz import numpy from onnxruntime import InferenceSession from sklearn.datasets import load_diabetes from sklearn.ensemble import ( - GradientBoostingRegressor, RandomForestRegressor, - VotingRegressor) + GradientBoostingRegressor, + RandomForestRegressor, + VotingRegressor, +) from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline @@ -39,9 +40,11 @@ reg2 = RandomForestRegressor(random_state=1, n_estimators=5) reg3 = LinearRegression() -ereg = Pipeline(steps=[ - ('voting', VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)])), -]) +ereg = Pipeline( + steps=[ + ("voting", VotingRegressor([("gb", reg1), ("rf", reg2), ("lr", reg3)])), + ] +) ereg.fit(X_train, y_train) ################################# @@ -54,8 +57,7 @@ # into single float and ONNX runtimes may not fully # support doubles. -onx = to_onnx(ereg, X_train[:1].astype(numpy.float32), - target_opset=12) +onx = to_onnx(ereg, X_train[:1].astype(numpy.float32), target_opset=12) ################################### # Prediction with ONNX @@ -64,7 +66,7 @@ # The first example uses :epkg:`onnxruntime`. sess = InferenceSession(onx.SerializeToString()) -pred_ort = sess.run(None, {'X': X_test.astype(numpy.float32)})[0] +pred_ort = sess.run(None, {"X": X_test.astype(numpy.float32)})[0] pred_skl = ereg.predict(X_test.astype(numpy.float32)) @@ -113,5 +115,5 @@ def diff(p1, p2): ########################################## # It works almost the same way. -pred_pyrt = oinf.run(None, {'X': X_test.astype(numpy.float32)})[0] +pred_pyrt = oinf.run(None, {"X": X_test.astype(numpy.float32)})[0] print(diff(pred_skl, pred_pyrt)) diff --git a/docs/tutorial/plot_bbegin_measure_time.py b/docs/tutorial/plot_bbegin_measure_time.py index 3acabe3e4..823211506 100644 --- a/docs/tutorial/plot_bbegin_measure_time.py +++ b/docs/tutorial/plot_bbegin_measure_time.py @@ -21,8 +21,10 @@ from sklearn import config_context from sklearn.datasets import make_regression from sklearn.ensemble import ( - GradientBoostingRegressor, RandomForestRegressor, - VotingRegressor) + GradientBoostingRegressor, + RandomForestRegressor, + VotingRegressor, +) from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from onnxruntime import InferenceSession @@ -32,15 +34,14 @@ N = 11000 X, y = make_regression(N, n_features=10) -X_train, X_test, y_train, y_test = train_test_split( - X, y, train_size=0.01) +X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.01) print("Train shape", X_train.shape) print("Test shape", X_test.shape) reg1 = GradientBoostingRegressor(random_state=1) reg2 = RandomForestRegressor(random_state=1) reg3 = LinearRegression() -ereg = VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)]) +ereg = VotingRegressor([("gb", reg1), ("rf", reg2), ("lr", reg3)]) ereg.fit(X_train, y_train) ################################# @@ -59,12 +60,12 @@ with config_context(assume_finite=True): obs = [] for batch_size, repeat in tqdm(sizes): - context = {"ereg": ereg, 'X': X_test[:batch_size]} + context = {"ereg": ereg, "X": X_test[:batch_size]} mt = measure_time( - "ereg.predict(X)", context, div_by_number=True, - number=10, repeat=repeat) - mt['size'] = context['X'].shape[0] - mt['mean_obs'] = mt['average'] / mt['size'] + "ereg.predict(X)", context, div_by_number=True, number=10, repeat=repeat + ) + mt["size"] = context["X"].shape[0] + mt["mean_obs"] = mt["average"] / mt["size"] obs.append(mt) df_skl = DataFrame(obs) @@ -73,8 +74,7 @@ ##################################### # Graphe. -df_skl.set_index('size')[['mean_obs']].plot( - title="scikit-learn", logx=True, logy=True) +df_skl.set_index("size")[["mean_obs"]].plot(title="scikit-learn", logx=True, logy=True) ############################### # ONNX runtime @@ -83,36 +83,41 @@ # The same is done with the two ONNX runtime # available. -onx = to_onnx(ereg, X_train[:1].astype(numpy.float32), - target_opset=14) -sess = InferenceSession(onx.SerializeToString(), - providers=["CPUExecutionProvider"]) +onx = to_onnx(ereg, X_train[:1].astype(numpy.float32), target_opset=14) +sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) oinf = ReferenceEvaluator(onx) obs = [] for batch_size, repeat in tqdm(sizes): - # scikit-learn - context = {"ereg": ereg, 'X': X_test[:batch_size].astype(numpy.float32)} + context = {"ereg": ereg, "X": X_test[:batch_size].astype(numpy.float32)} mt = measure_time( - "ereg.predict(X)", context, div_by_number=True, - number=10, repeat=repeat) - mt['size'] = context['X'].shape[0] - mt['skl'] = mt['average'] / mt['size'] + "ereg.predict(X)", context, div_by_number=True, number=10, repeat=repeat + ) + mt["size"] = context["X"].shape[0] + mt["skl"] = mt["average"] / mt["size"] # onnxruntime - context = {"sess": sess, 'X': X_test[:batch_size].astype(numpy.float32)} + context = {"sess": sess, "X": X_test[:batch_size].astype(numpy.float32)} mt2 = measure_time( - "sess.run(None, {'X': X})[0]", context, div_by_number=True, - number=10, repeat=repeat) - mt['ort'] = mt2['average'] / mt['size'] + "sess.run(None, {'X': X})[0]", + context, + div_by_number=True, + number=10, + repeat=repeat, + ) + mt["ort"] = mt2["average"] / mt["size"] # ReferenceEvaluator - context = {"oinf": oinf, 'X': X_test[:batch_size].astype(numpy.float32)} + context = {"oinf": oinf, "X": X_test[:batch_size].astype(numpy.float32)} mt2 = measure_time( - "oinf.run(None, {'X': X})[0]", context, div_by_number=True, - number=10, repeat=repeat) - mt['pyrt'] = mt2['average'] / mt['size'] + "oinf.run(None, {'X': X})[0]", + context, + div_by_number=True, + number=10, + repeat=repeat, + ) + mt["pyrt"] = mt2["average"] / mt["size"] # end obs.append(mt) @@ -124,9 +129,9 @@ ##################################### # Graph. -df.set_index('size')[['skl', 'ort', 'pyrt']].plot( - title="Average prediction time per runtime", - logx=True, logy=True) +df.set_index("size")[["skl", "ort", "pyrt"]].plot( + title="Average prediction time per runtime", logx=True, logy=True +) ##################################### # :epkg:`ONNX` runtimes are much faster than :epkg:`scikit-learn` diff --git a/docs/tutorial/plot_catwoe_transformer.py b/docs/tutorial/plot_catwoe_transformer.py index 338cf5939..bf9bed2fe 100644 --- a/docs/tutorial/plot_catwoe_transformer.py +++ b/docs/tutorial/plot_catwoe_transformer.py @@ -68,10 +68,10 @@ def ordenc_to_sklearn(op_mapping): "Converts OrdinalEncoder mapping to scikit-learn OrdinalEncoder." cats = [] for column_map in op_mapping: - col = column_map['col'] + col = column_map["col"] while len(cats) <= col: cats.append(None) - mapping = column_map['mapping'] + mapping = column_map["mapping"] res = [] for i in range(mapping.shape[0]): if np.isnan(mapping.index[i]): @@ -88,8 +88,7 @@ def ordenc_to_sklearn(op_mapping): def ordinal_encoder_shape_calculator(operator): - check_input_and_output_numbers( - operator, input_count_range=1, output_count_range=1) + check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) input_type = operator.inputs[0].type.__class__ input_dim = operator.inputs[0].get_first_dimension() shape = operator.inputs[0].type.shape @@ -104,15 +103,18 @@ def ordinal_encoder_converter(scope, operator, container): X = operator.inputs[0] skl_ord = ordenc_to_sklearn(op.mapping) - cat = OnnxSubEstimator(skl_ord, X, op_version=opv, - output_names=operator.outputs[:1]) + cat = OnnxSubEstimator( + skl_ord, X, op_version=opv, output_names=operator.outputs[:1] + ) cat.add_to(scope, container) update_registered_converter( - OrdinalEncoder, "CategoricalEncoderOrdinalEncoder", + OrdinalEncoder, + "CategoricalEncoderOrdinalEncoder", ordinal_encoder_shape_calculator, - ordinal_encoder_converter) + ordinal_encoder_converter, +) ################################### @@ -130,7 +132,7 @@ def ordinal_encoder_converter(scope, operator, container): ord_onx = to_onnx(enc, X[:1], target_opset=14) sess = InferenceSession(ord_onx.SerializeToString()) -print(sess.run(None, {'X': X[:5]})[0]) +print(sess.run(None, {"X": X[:5]})[0]) ###################################### # That works. @@ -149,7 +151,7 @@ def woeenc_to_sklearn(op_mapping): for column_map in op_mapping.items(): col = column_map[0] while len(cats) <= col: - cats.append('passthrough') + cats.append("passthrough") ws.append(None) mapping = column_map[1] intervals = [] @@ -168,25 +170,22 @@ def woeenc_to_sklearn(op_mapping): return skl -def woe_encoder_parser( - scope, model, inputs, custom_parsers=None): +def woe_encoder_parser(scope, model, inputs, custom_parsers=None): if len(inputs) != 1: - raise RuntimeError( - "Unexpected number of inputs: %d != 1." % len(inputs)) + raise RuntimeError("Unexpected number of inputs: %d != 1." % len(inputs)) if inputs[0].type is None: - raise RuntimeError( - "Unexpected type: %r." % (inputs[0], )) + raise RuntimeError("Unexpected type: %r." % (inputs[0],)) alias = get_model_alias(type(model)) this_operator = scope.declare_local_operator(alias, model) this_operator.inputs.append(inputs[0]) this_operator.outputs.append( - scope.declare_local_variable('catwoe', FloatTensorType())) + scope.declare_local_variable("catwoe", FloatTensorType()) + ) return this_operator.outputs def woe_encoder_shape_calculator(operator): - check_input_and_output_numbers( - operator, input_count_range=1, output_count_range=1) + check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) input_dim = operator.inputs[0].get_first_dimension() shape = operator.inputs[0].type.shape second_dim = None if len(shape) != 2 else shape[1] @@ -199,21 +198,26 @@ def woe_encoder_converter(scope, operator, container): opv = container.target_opset X = operator.inputs[0] - sub = OnnxSubEstimator(op.ordinal_encoder, X, - op_version=opv) + sub = OnnxSubEstimator(op.ordinal_encoder, X, op_version=opv) cast = OnnxCast(sub, op_version=opv, to=np.float32) skl_ord = woeenc_to_sklearn(op.mapping) - cat = OnnxSubEstimator(skl_ord, cast, op_version=opv, - output_names=operator.outputs[:1], - input_types=[FloatTensorType()]) + cat = OnnxSubEstimator( + skl_ord, + cast, + op_version=opv, + output_names=operator.outputs[:1], + input_types=[FloatTensorType()], + ) cat.add_to(scope, container) update_registered_converter( - WOEEncoder, "CategoricalEncoderWOEEncoder", + WOEEncoder, + "CategoricalEncoderWOEEncoder", woe_encoder_shape_calculator, woe_encoder_converter, - parser=woe_encoder_parser) + parser=woe_encoder_parser, +) ################################### @@ -229,4 +233,4 @@ def woe_encoder_converter(scope, operator, container): woe_onx = to_onnx(woe, X[:1], target_opset=14) sess = InferenceSession(woe_onx.SerializeToString()) -print(sess.run(None, {'X': X[:5]})[0]) +print(sess.run(None, {"X": X[:5]})[0]) diff --git a/docs/tutorial/plot_cbegin_opset.py b/docs/tutorial/plot_cbegin_opset.py index 89f3a6458..b0c0bfb38 100644 --- a/docs/tutorial/plot_cbegin_opset.py +++ b/docs/tutorial/plot_cbegin_opset.py @@ -48,7 +48,7 @@ fig, ax = plt.subplots(1, 1) for k in (-1, 1): - ax.plot(X[labels == k, 0], X[labels == k, 1], 'o', label="cl%d" % k) + ax.plot(X[labels == k, 0], X[labels == k, 1], "o", label="cl%d" % k) ax.set_title("Sample") ####################################### @@ -56,8 +56,9 @@ # ++++ -onx = to_onnx(model, X[:1].astype(numpy.float32), - target_opset={'': 15, 'ai.onnx.ml': 2}) +onx = to_onnx( + model, X[:1].astype(numpy.float32), target_opset={"": 15, "ai.onnx.ml": 2} +) print(onx) ########################## @@ -82,20 +83,22 @@ def get_domain_opset(onx): domains = onx.opset_import - res = [{'domain': dom.domain, 'version': dom.version} - for dom in domains] - return {d['domain']: d['version'] for d in res} + res = [{"domain": dom.domain, "version": dom.version} for dom in domains] + return {d["domain"]: d["version"] for d in res} for opset in range(6, onnx_opset_version() + 1): try: - onx = to_onnx(model, X[:1].astype(numpy.float32), - target_opset={'': opset, 'ai.onnx.ml': 2}) + onx = to_onnx( + model, + X[:1].astype(numpy.float32), + target_opset={"": opset, "ai.onnx.ml": 2}, + ) except RuntimeError as e: - print('target: %r error: %r' % (opset, e)) + print("target: %r error: %r" % (opset, e)) continue nodes = len(onx.graph.node) - print('target: %r --> %s %d' % (opset, get_domain_opset(onx), nodes)) + print("target: %r --> %s %d" % (opset, get_domain_opset(onx), nodes)) ######################################## # It shows that the model cannot be converted for opset @@ -112,13 +115,12 @@ def get_domain_opset(onx): for opset in range(9, onnx_opset_version() + 1): for opset_ml in range(1, 4): - tops = {'': opset, 'ai.onnx.ml': opset_ml} + tops = {"": opset, "ai.onnx.ml": opset_ml} try: print("try target_opset:", tops) - onx = to_onnx( - model, X[:1].astype(numpy.float32), target_opset=tops) + onx = to_onnx(model, X[:1].astype(numpy.float32), target_opset=tops) except RuntimeError as e: - print('target: %r error: %r' % (opset, e)) + print("target: %r error: %r" % (opset, e)) continue nodes = len(onx.graph.node) - print('target: %r --> %s %d' % (opset, get_domain_opset(onx), nodes)) + print("target: %r --> %s %d" % (opset, get_domain_opset(onx), nodes)) diff --git a/docs/tutorial/plot_dbegin_options.py b/docs/tutorial/plot_dbegin_options.py index 1d0468aa6..47b813f3a 100644 --- a/docs/tutorial/plot_dbegin_options.py +++ b/docs/tutorial/plot_dbegin_options.py @@ -69,7 +69,6 @@ from pprint import pformat import numpy -from pyquickhelper.helpgen.graphviz_helper import plot_graphviz from onnx.reference import ReferenceEvaluator from sklearn.ensemble import RandomForestClassifier from sklearn.preprocessing import MinMaxScaler @@ -87,8 +86,9 @@ clr = LogisticRegression() clr.fit(X_train, y_train) -model_def = to_onnx(clr, X_train.astype(numpy.float32), - options={id(clr): {'zipmap': False}}) +model_def = to_onnx( + clr, X_train.astype(numpy.float32), options={id(clr): {"zipmap": False}} +) oinf = ReferenceEvaluator(model_def) print(oinf) @@ -97,8 +97,7 @@ # Using function *id* has one flaw: it is not pickable. # It is just better to use strings. -model_def = to_onnx(clr, X_train.astype(numpy.float32), - options={'zipmap': False}) +model_def = to_onnx(clr, X_train.astype(numpy.float32), options={"zipmap": False}) oinf = ReferenceEvaluator(model_def) print(oinf) @@ -111,14 +110,10 @@ # name convention. -pipe = Pipeline([ - ('norm', MinMaxScaler()), - ('clr', LogisticRegression()) -]) +pipe = Pipeline([("norm", MinMaxScaler()), ("clr", LogisticRegression())]) pipe.fit(X_train, y_train) -model_def = to_onnx(pipe, X_train.astype(numpy.float32), - options={'clr__zipmap': False}) +model_def = to_onnx(pipe, X_train.astype(numpy.float32), options={"clr__zipmap": False}) oinf = ReferenceEvaluator(model_def) print(oinf) @@ -132,29 +127,28 @@ # First, with probabilities: -pipe = Pipeline([ - ('norm', MinMaxScaler()), - ('clr', LogisticRegression()) -]) +pipe = Pipeline([("norm", MinMaxScaler()), ("clr", LogisticRegression())]) pipe.fit(X_train, y_train) model_def = to_onnx( - pipe, X_train.astype(numpy.float32), - options={id(pipe): {'zipmap': False}}) + pipe, X_train.astype(numpy.float32), options={id(pipe): {"zipmap": False}} +) oinf = ReferenceEvaluator(model_def) -print(oinf.run(None, {'X': X.astype(numpy.float32)[:5]})) +print(oinf.run(None, {"X": X.astype(numpy.float32)[:5]})) ####################################### # Then with raw scores: model_def = to_onnx( - pipe, X_train.astype(numpy.float32), - options={id(pipe): {'raw_scores': True, 'zipmap': False}}) + pipe, + X_train.astype(numpy.float32), + options={id(pipe): {"raw_scores": True, "zipmap": False}}, +) oinf = ReferenceEvaluator(model_def) -print(oinf.run(None, {'X': X.astype(numpy.float32)[:5]})) +print(oinf.run(None, {"X": X.astype(numpy.float32)[:5]})) ######################################### # It did not seem to work... We need to tell @@ -162,22 +156,26 @@ # and not the whole pipeline. model_def = to_onnx( - pipe, X_train.astype(numpy.float32), - options={id(pipe.steps[1][1]): {'raw_scores': True, 'zipmap': False}}) + pipe, + X_train.astype(numpy.float32), + options={id(pipe.steps[1][1]): {"raw_scores": True, "zipmap": False}}, +) oinf = ReferenceEvaluator(model_def) -print(oinf.run(None, {'X': X.astype(numpy.float32)[:5]})) +print(oinf.run(None, {"X": X.astype(numpy.float32)[:5]})) ########################################### # There are negative values. That works. # Strings are still easier to use. model_def = to_onnx( - pipe, X_train.astype(numpy.float32), - options={'clr__raw_scores': True, 'clr__zipmap': False}) + pipe, + X_train.astype(numpy.float32), + options={"clr__raw_scores": True, "clr__zipmap": False}, +) oinf = ReferenceEvaluator(model_def) -print(oinf.run(None, {'X': X.astype(numpy.float32)[:5]})) +print(oinf.run(None, {"X": X.astype(numpy.float32)[:5]})) ######################################### @@ -196,9 +194,11 @@ paths, n_nodes_ptr = clrrf.decision_path(X_test[:2]) print(paths.todense()) -model_def = to_onnx(clrrf, X_train.astype(numpy.float32), - options={id(clrrf): {'decision_path': True, - 'zipmap': False}}) +model_def = to_onnx( + clrrf, + X_train.astype(numpy.float32), + options={id(clrrf): {"decision_path": True, "zipmap": False}}, +) sess = InferenceSession(model_def.SerializeToString()) ########################################## @@ -209,7 +209,7 @@ ########################################## # Let's display the last one. -res = sess.run(None, {'X': X_test[:2].astype(numpy.float32)}) +res = sess.run(None, {"X": X_test[:2].astype(numpy.float32)}) print(res[-1]) ############################################################ @@ -225,9 +225,9 @@ opts = v.get_allowed_options() if not isinstance(opts, dict): continue - name = k.replace('Sklearn', '') - print('%s%s %r' % (name, " " * (30 - len(name)), opts)) + name = k.replace("Sklearn", "") + print("%s%s %r" % (name, " " * (30 - len(name)), opts)) for o in opts: all_opts.add(o) -print('all options:', pformat(list(sorted(all_opts)))) +print("all options:", pformat(list(sorted(all_opts)))) diff --git a/docs/tutorial/plot_dbegin_options_list.py b/docs/tutorial/plot_dbegin_options_list.py index b6c02fc7e..16dedc475 100644 --- a/docs/tutorial/plot_dbegin_options_list.py +++ b/docs/tutorial/plot_dbegin_options_list.py @@ -19,8 +19,6 @@ The first converter to change its behaviour depending on a black list of operators is for model *GaussianMixture*. """ -from pyquickhelper.helpgen.graphviz_helper import plot_graphviz -from onnx.reference import ReferenceEvaluator from timeit import timeit import numpy from onnxruntime import InferenceSession @@ -39,15 +37,18 @@ # ++++++++++++++++++ model_onnx = to_onnx( - model, X_train[:1].astype(numpy.float32), - options={id(model): {'score_samples': True}}, - target_opset=12) -sess = InferenceSession(model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model, + X_train[:1].astype(numpy.float32), + options={id(model): {"score_samples": True}}, + target_opset=12, +) +sess = InferenceSession( + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] +) xt = X_test[:5].astype(numpy.float32) print(model.score_samples(xt)) -print(sess.run(None, {'X': xt})[2]) +print(sess.run(None, {"X": xt})[2]) ################################### @@ -59,26 +60,37 @@ # produces in that case. model_onnx2 = to_onnx( - model, X_train[:1].astype(numpy.float32), - options={id(model): {'score_samples': True}}, - black_op={'ReduceLogSumExp'}, - target_opset=12) -sess2 = InferenceSession(model_onnx2.SerializeToString(), - providers=["CPUExecutionProvider"]) + model, + X_train[:1].astype(numpy.float32), + options={id(model): {"score_samples": True}}, + black_op={"ReduceLogSumExp"}, + target_opset=12, +) +sess2 = InferenceSession( + model_onnx2.SerializeToString(), providers=["CPUExecutionProvider"] +) xt = X_test[:5].astype(numpy.float32) print(model.score_samples(xt)) -print(sess2.run(None, {'X': xt})[2]) +print(sess2.run(None, {"X": xt})[2]) ####################################### # Processing time # +++++++++++++++ -print(timeit(stmt="sess.run(None, {'X': xt})", - number=10000, globals={'sess': sess, 'xt': xt})) +print( + timeit( + stmt="sess.run(None, {'X': xt})", number=10000, globals={"sess": sess, "xt": xt} + ) +) -print(timeit(stmt="sess2.run(None, {'X': xt})", - number=10000, globals={'sess2': sess2, 'xt': xt})) +print( + timeit( + stmt="sess2.run(None, {'X': xt})", + number=10000, + globals={"sess2": sess2, "xt": xt}, + ) +) ################################# # The model using ReduceLogSumExp is much faster. @@ -94,9 +106,11 @@ try: to_onnx( - model, X_train[:1].astype(numpy.float32), - options={id(model): {'score_samples': True}}, - black_op={'ReduceLogSumExp', 'Add'}, - target_opset=12) + model, + X_train[:1].astype(numpy.float32), + options={id(model): {"score_samples": True}}, + black_op={"ReduceLogSumExp", "Add"}, + target_opset=12, + ) except RuntimeError as e: - print('Error:', e) + print("Error:", e) diff --git a/docs/tutorial/plot_dbegin_options_zipmap.py b/docs/tutorial/plot_dbegin_options_zipmap.py index 9194233ee..d7eae7be8 100644 --- a/docs/tutorial/plot_dbegin_options_zipmap.py +++ b/docs/tutorial/plot_dbegin_options_zipmap.py @@ -49,7 +49,7 @@ # dictionaries. sess = rt.InferenceSession(onx.SerializeToString()) -res = sess.run(None, {'X': X_test}) +res = sess.run(None, {"X": X_test}) print(res[1][:2]) print("probabilities type:", type(res[1])) print("type for the first observations:", type(res[1][0])) @@ -60,12 +60,12 @@ # # Probabilities are now a matrix. -initial_type = [('float_input', FloatTensorType([None, 4]))] -options = {id(clr): {'zipmap': False}} +initial_type = [("float_input", FloatTensorType([None, 4]))] +options = {id(clr): {"zipmap": False}} onx2 = to_onnx(clr, X_train, options=options, target_opset=12) sess2 = rt.InferenceSession(onx2.SerializeToString()) -res2 = sess2.run(None, {'X': X_test}) +res2 = sess2.run(None, {"X": X_test}) print(res2[1][:2]) print("probabilities type:", type(res2[1])) print("type for the first observations:", type(res2[1][0])) @@ -78,14 +78,17 @@ # the probabilities into columns. The final model produces # one output for the label, and one output per class. -options = {id(clr): {'zipmap': 'columns'}} +options = {id(clr): {"zipmap": "columns"}} onx3 = to_onnx(clr, X_train, options=options, target_opset=12) sess3 = rt.InferenceSession(onx3.SerializeToString()) -res3 = sess3.run(None, {'X': X_test}) +res3 = sess3.run(None, {"X": X_test}) for i, out in enumerate(sess3.get_outputs()): - print("output: '{}' shape={} values={}...".format( - out.name, res3[i].shape, res3[i][:2])) + print( + "output: '{}' shape={} values={}...".format( + out.name, res3[i].shape, res3[i][:2] + ) + ) ################################### @@ -93,16 +96,13 @@ # +++++++++++++++++++++++++++++ print("Average time with ZipMap:") -print(sum(repeat(lambda: sess.run(None, {'X': X_test}), - number=100, repeat=10)) / 10) +print(sum(repeat(lambda: sess.run(None, {"X": X_test}), number=100, repeat=10)) / 10) print("Average time without ZipMap:") -print(sum(repeat(lambda: sess2.run(None, {'X': X_test}), - number=100, repeat=10)) / 10) +print(sum(repeat(lambda: sess2.run(None, {"X": X_test}), number=100, repeat=10)) / 10) print("Average time without ZipMap but with columns:") -print(sum(repeat(lambda: sess3.run(None, {'X': X_test}), - number=100, repeat=10)) / 10) +print(sum(repeat(lambda: sess3.run(None, {"X": X_test}), number=100, repeat=10)) / 10) # The prediction is much faster without ZipMap # on this example. @@ -120,12 +120,12 @@ # `output_class_labels` can be used to expose the labels # as a third output. -initial_type = [('float_input', FloatTensorType([None, 4]))] -options = {id(clr): {'zipmap': False, 'output_class_labels': True}} +initial_type = [("float_input", FloatTensorType([None, 4]))] +options = {id(clr): {"zipmap": False, "output_class_labels": True}} onx4 = to_onnx(clr, X_train, options=options, target_opset=12) sess4 = rt.InferenceSession(onx4.SerializeToString()) -res4 = sess4.run(None, {'X': X_test}) +res4 = sess4.run(None, {"X": X_test}) print(res4[1][:2]) print("probabilities type:", type(res4[1])) print("class labels:", res4[2]) @@ -134,8 +134,7 @@ # Processing time. print("Average time without ZipMap but with output_class_labels:") -print(sum(repeat(lambda: sess4.run(None, {'X': X_test}), - number=100, repeat=10)) / 10) +print(sum(repeat(lambda: sess4.run(None, {"X": X_test}), number=100, repeat=10)) / 10) ########################################### # MultiOutputClassifier @@ -161,18 +160,22 @@ onx5 = to_onnx(clr, X_train, target_opset=12) sess5 = rt.InferenceSession(onx5.SerializeToString()) -res5 = sess5.run(None, {'X': X_test[:3]}) +res5 = sess5.run(None, {"X": X_test[:3]}) print(res5) ######################################## # Option zipmap is ignored. Labels are missing but they can be # added back as a third output. -onx6 = to_onnx(clr, X_train, target_opset=12, - options={'zipmap': False, 'output_class_labels': True}) +onx6 = to_onnx( + clr, + X_train, + target_opset=12, + options={"zipmap": False, "output_class_labels": True}, +) sess6 = rt.InferenceSession(onx6.SerializeToString()) -res6 = sess6.run(None, {'X': X_test[:3]}) +res6 = sess6.run(None, {"X": X_test[:3]}) print("predicted labels", res6[0]) print("predicted probabilies", res6[1]) print("class labels", res6[2]) diff --git a/docs/tutorial/plot_ebegin_float_double.py b/docs/tutorial/plot_ebegin_float_double.py index e95e979b6..f61db244c 100644 --- a/docs/tutorial/plot_ebegin_float_double.py +++ b/docs/tutorial/plot_ebegin_float_double.py @@ -63,15 +63,18 @@ def area_mismatch_rule(N, delta, factor, rule=None): if rule is None: - def rule(t): return numpy.float32(t) + + def rule(t): + return numpy.float32(t) + xst = [] yst = [] xsf = [] ysf = [] for x in range(-N, N): for y in range(-N, N): - dx = (1. + x * delta) * factor - dy = (1. + y * delta) * factor + dx = (1.0 + x * delta) * factor + dy = (1.0 + y * delta) * factor c1 = 1 if numpy.float64(dx) <= numpy.float64(dy) else 0 c2 = 1 if numpy.float32(dx) <= rule(dy) else 0 key = abs(c1 - c2) @@ -90,12 +93,12 @@ def rule(t): return numpy.float32(t) fig, ax = plt.subplots(1, 1, figsize=(5, 5)) -ax.plot(xst, yst, '.', label="agree") -ax.plot(xsf, ysf, '.', label="disagree") +ax.plot(xst, yst, ".", label="agree") +ax.plot(xsf, ysf, ".", label="disagree") ax.set_title("Region where x <= y and (float)x <= (float)y agree") ax.set_xlabel("x") ax.set_ylabel("y") -ax.plot([min(xst), max(xst)], [min(yst), max(yst)], 'k--') +ax.plot([min(xst), max(xst)], [min(yst), max(yst)], "k--") ax.legend() @@ -115,15 +118,14 @@ def rule(t): return numpy.float32(t) Xi_train, yi_train = X_train.copy(), y_train.copy() Xi_test, yi_test = X_test.copy(), y_test.copy() for i in range(X.shape[1]): - Xi_train[:, i] = (Xi_train[:, i] * 2 ** i).astype(numpy.int64) - Xi_test[:, i] = (Xi_test[:, i] * 2 ** i).astype(numpy.int64) + Xi_train[:, i] = (Xi_train[:, i] * 2**i).astype(numpy.int64) + Xi_test[:, i] = (Xi_test[:, i] * 2**i).astype(numpy.int64) max_depth = 10 -model = Pipeline([ - ('scaler', StandardScaler()), - ('dt', DecisionTreeRegressor(max_depth=max_depth)) -]) +model = Pipeline( + [("scaler", StandardScaler()), ("dt", DecisionTreeRegressor(max_depth=max_depth))] +) model.fit(Xi_train, yi_train) @@ -143,15 +145,14 @@ def diff(p1, p2): return d.max(), (d / numpy.abs(p1)).max() -onx = to_onnx(model, Xi_train[:1].astype(numpy.float32), - target_opset=15) +onx = to_onnx(model, Xi_train[:1].astype(numpy.float32), target_opset=15) sess = InferenceSession(onx.SerializeToString()) X32 = Xi_test.astype(numpy.float32) skl = model.predict(X32) -ort = sess.run(None, {'X': X32})[0] +ort = sess.run(None, {"X": X32})[0] print(diff(skl, ort)) @@ -191,24 +192,25 @@ def diff(p1, p2): # -model2 = Pipeline([ - ('scaler', StandardScaler()), - ('cast', CastTransformer()), - ('dt', DecisionTreeRegressor(max_depth=max_depth)) -]) +model2 = Pipeline( + [ + ("scaler", StandardScaler()), + ("cast", CastTransformer()), + ("dt", DecisionTreeRegressor(max_depth=max_depth)), + ] +) model2.fit(Xi_train, yi_train) ########################################## # The discrepencies. -onx2 = to_onnx(model2, Xi_train[:1].astype(numpy.float32), - target_opset=15) +onx2 = to_onnx(model2, Xi_train[:1].astype(numpy.float32), target_opset=15) sess2 = InferenceSession(onx2.SerializeToString()) skl2 = model2.predict(X32) -ort2 = sess2.run(None, {'X': X32})[0] +ort2 = sess2.run(None, {"X": X32})[0] print(diff(skl2, ort2)) @@ -219,22 +221,27 @@ def diff(p1, p2): # the *dx* is still here. To remove it, we need to use # double in ONNX normalizer. -model3 = Pipeline([ - ('cast64', CastTransformer(dtype=numpy.float64)), - ('scaler', StandardScaler()), - ('cast', CastTransformer()), - ('dt', DecisionTreeRegressor(max_depth=max_depth)) -]) +model3 = Pipeline( + [ + ("cast64", CastTransformer(dtype=numpy.float64)), + ("scaler", StandardScaler()), + ("cast", CastTransformer()), + ("dt", DecisionTreeRegressor(max_depth=max_depth)), + ] +) model3.fit(Xi_train, yi_train) -onx3 = to_onnx(model3, Xi_train[:1].astype(numpy.float32), - options={StandardScaler: {'div': 'div_cast'}}, - target_opset=15) +onx3 = to_onnx( + model3, + Xi_train[:1].astype(numpy.float32), + options={StandardScaler: {"div": "div_cast"}}, + target_opset=15, +) sess3 = InferenceSession(onx3.SerializeToString()) skl3 = model3.predict(X32) -ort3 = sess3.run(None, {'X': X32})[0] +ort3 = sess3.run(None, {"X": X32})[0] print(diff(skl3, ort3)) diff --git a/docs/tutorial/plot_fbegin_investigate.py b/docs/tutorial/plot_fbegin_investigate.py index 422be0798..daecc4ca8 100644 --- a/docs/tutorial/plot_fbegin_investigate.py +++ b/docs/tutorial/plot_fbegin_investigate.py @@ -27,7 +27,6 @@ has *n* steps, it converts the pipeline with step 1, then the pipeline with steps 1, 2, then 1, 2, 3... """ -from pyquickhelper.helpgen.graphviz_helper import plot_graphviz import numpy from onnx.reference import ReferenceEvaluator from onnxruntime import InferenceSession @@ -45,10 +44,7 @@ data = load_iris() X = data.data -pipe = Pipeline(steps=[ - ('std', StandardScaler()), - ('km', KMeans(3, n_init=3)) -]) +pipe = Pipeline(steps=[("std", StandardScaler()), ("km", KMeans(3, n_init=3))]) pipe.fit(X) ################################# @@ -56,9 +52,8 @@ # overloads the methods *transform* and # returns an ONNX graph for every step. steps = collect_intermediate_steps( - pipe, "pipeline", - [("X", FloatTensorType([None, X.shape[1]]))], - target_opset=17) + pipe, "pipeline", [("X", FloatTensorType([None, X.shape[1]]))], target_opset=17 +) ##################################### # We call method transform to population the @@ -70,14 +65,15 @@ # ONNX and scikit-learn outputs. for step in steps: - print('----------------------------') - print(step['model']) - onnx_step = step['onnx_step'] - sess = InferenceSession(onnx_step.SerializeToString(), - providers=["CPUExecutionProvider"]) - onnx_outputs = sess.run(None, {'X': X.astype(numpy.float32)}) + print("----------------------------") + print(step["model"]) + onnx_step = step["onnx_step"] + sess = InferenceSession( + onnx_step.SerializeToString(), providers=["CPUExecutionProvider"] + ) + onnx_outputs = sess.run(None, {"X": X.astype(numpy.float32)}) onnx_output = onnx_outputs[-1] - skl_outputs = step['model']._debug.outputs['transform'] + skl_outputs = step["model"]._debug.outputs["transform"] # comparison diff = numpy.abs(skl_outputs.ravel() - onnx_output.ravel()).max() @@ -99,17 +95,16 @@ # fails due to nan values or a dimension mismatch. -onx = to_onnx(pipe, X[:1].astype(numpy.float32), - target_opset=17) +onx = to_onnx(pipe, X[:1].astype(numpy.float32), target_opset=17) oinf = ReferenceEvaluator(onx, verbose=1) -oinf.run(None, {'X': X[:2].astype(numpy.float32)}) +oinf.run(None, {"X": X[:2].astype(numpy.float32)}) ################################### # And to get a sense of the intermediate results. oinf = ReferenceEvaluator(onx, verbose=3) -oinf.run(None, {'X': X[:2].astype(numpy.float32)}) +oinf.run(None, {"X": X[:2].astype(numpy.float32)}) # This way is usually better if you need to investigate # issues within the code of the runtime for an operator. diff --git a/docs/tutorial/plot_gbegin_cst.py b/docs/tutorial/plot_gbegin_cst.py index b2a9e33ba..0b4e21c18 100644 --- a/docs/tutorial/plot_gbegin_cst.py +++ b/docs/tutorial/plot_gbegin_cst.py @@ -26,18 +26,18 @@ from sklearn.model_selection import train_test_split from skl2onnx import to_onnx from skl2onnx.helpers.onnx_helper import ( - add_output_initializer, select_model_inputs_outputs) + add_output_initializer, + select_model_inputs_outputs, +) data = load_iris() X, y = data.data.astype(numpy.float32), data.target X_train, X_test, y_train, y_test = train_test_split(X, y) -model = LogisticRegression(penalty='elasticnet', C=2., - solver='saga', l1_ratio=0.5) +model = LogisticRegression(penalty="elasticnet", C=2.0, solver="saga", l1_ratio=0.5) model.fit(X_train, y_train) -onx = to_onnx(model, X_train[:1], target_opset=12, - options={'zipmap': False}) +onx = to_onnx(model, X_train[:1], target_opset=12, options={"zipmap": False}) ######################################## # Add training parameter @@ -45,9 +45,8 @@ # new_onx = add_output_initializer( - onx, - ['C', 'l1_ratio'], - [numpy.array([model.C]), numpy.array([model.l1_ratio])]) + onx, ["C", "l1_ratio"], [numpy.array([model.C]), numpy.array([model.l1_ratio])] +) ######################################## # Inference @@ -55,7 +54,7 @@ sess = InferenceSession(new_onx.SerializeToString()) print("output names:", [o.name for o in sess.get_outputs()]) -res = sess.run(None, {'X': X_test[:2]}) +res = sess.run(None, {"X": X_test[:2]}) print("outputs") pprint.pprint(res) @@ -72,11 +71,11 @@ # Next function removes unneeded outputs from a model, # not only the constants. Next model only keeps the probabilities. -simple_onx = select_model_inputs_outputs(new_onx, ['probabilities']) +simple_onx = select_model_inputs_outputs(new_onx, ["probabilities"]) sess = InferenceSession(simple_onx.SerializeToString()) print("output names:", [o.name for o in sess.get_outputs()]) -res = sess.run(None, {'X': X_test[:2]}) +res = sess.run(None, {"X": X_test[:2]}) print("outputs") pprint.pprint(res) @@ -102,6 +101,6 @@ sess = InferenceSession(model.SerializeToString()) print("output names:", [o.name for o in sess.get_outputs()]) -res = sess.run(None, {'X': X_test[:2]}) +res = sess.run(None, {"X": X_test[:2]}) print("outputs") pprint.pprint(res) diff --git a/docs/tutorial/plot_gbegin_dataframe.py b/docs/tutorial/plot_gbegin_dataframe.py index 739a056ba..d30dab6e7 100644 --- a/docs/tutorial/plot_gbegin_dataframe.py +++ b/docs/tutorial/plot_gbegin_dataframe.py @@ -20,37 +20,39 @@ import pprint from onnx.reference import ReferenceEvaluator from onnxruntime import InferenceSession -from pyquickhelper.helpgen.graphviz_helper import plot_graphviz -from skl2onnx import to_onnx from pandas import DataFrame from sklearn.pipeline import Pipeline from sklearn.compose import ColumnTransformer from sklearn.preprocessing import OneHotEncoder from sklearn.ensemble import RandomForestClassifier +from pyquickhelper.helpgen.graphviz_helper import plot_graphviz +from skl2onnx import to_onnx +from skl2onnx.algebra.type_helper import guess_initial_types -data = DataFrame([ - dict(CAT1='a', CAT2='c', num1=0.5, num2=0.6, y=0), - dict(CAT1='b', CAT2='d', num1=0.4, num2=0.8, y=1), - dict(CAT1='a', CAT2='d', num1=0.5, num2=0.56, y=0), - dict(CAT1='a', CAT2='d', num1=0.55, num2=0.56, y=1), - dict(CAT1='a', CAT2='c', num1=0.35, num2=0.86, y=0), - dict(CAT1='a', CAT2='c', num1=0.5, num2=0.68, y=1), -]) +data = DataFrame( + [ + dict(CAT1="a", CAT2="c", num1=0.5, num2=0.6, y=0), + dict(CAT1="b", CAT2="d", num1=0.4, num2=0.8, y=1), + dict(CAT1="a", CAT2="d", num1=0.5, num2=0.56, y=0), + dict(CAT1="a", CAT2="d", num1=0.55, num2=0.56, y=1), + dict(CAT1="a", CAT2="c", num1=0.35, num2=0.86, y=0), + dict(CAT1="a", CAT2="c", num1=0.5, num2=0.68, y=1), + ] +) -cat_cols = ['CAT1', 'CAT2'] -train_data = data.drop('y', axis=1) +cat_cols = ["CAT1", "CAT2"] +train_data = data.drop("y", axis=1) -categorical_transformer = Pipeline([ - ('onehot', OneHotEncoder(sparse=False, handle_unknown='ignore'))]) +categorical_transformer = Pipeline( + [("onehot", OneHotEncoder(sparse=False, handle_unknown="ignore"))] +) preprocessor = ColumnTransformer( - transformers=[ - ('cat', categorical_transformer, cat_cols)], - remainder='passthrough') -pipe = Pipeline([('preprocess', preprocessor), - ('rf', RandomForestClassifier())]) -pipe.fit(train_data, data['y']) + transformers=[("cat", categorical_transformer, cat_cols)], remainder="passthrough" +) +pipe = Pipeline([("preprocess", preprocessor), ("rf", RandomForestClassifier())]) +pipe.fit(train_data, data["y"]) ##################################### # Display. @@ -67,19 +69,7 @@ # Function *to_onnx* does not handle dataframes. -try: - onx = to_onnx(pipe, train_data[:1]) -except NotImplementedError as e: - print(e) - -################################### -# But it possible to use an extended one. - - -onx = to_onnx_ext( - pipe, train_data[:1], - options={RandomForestClassifier: {'zipmap': False}}) - +onx = to_onnx(pipe, train_data[:1], options={RandomForestClassifier: {"zipmap": False}}) ################################# # Prediction with ONNX @@ -100,13 +90,13 @@ oinf = ReferenceEvaluator(onx) got = oinf.run(None, train_data) print(pipe.predict(train_data)) -print(got['label']) +print(got["label"]) ################################# # And probilities. print(pipe.predict_proba(train_data)) -print(got['probabilities']) +print(got["probabilities"]) ###################################### # It looks ok. Let's dig into the details to @@ -121,6 +111,21 @@ # the input type is the column type. +def guess_schema_from_data(X): + init = guess_initial_types(X) + unique = set() + for _, col in init: + if len(col.shape) != 2: + return init + if col.shape[0] is not None: + return init + if len(unique) > 0 and col.__class__ not in unique: + return init + unique.add(col.__class__) + unique = list(unique) + return [("X", unique[0]([None, sum(_[1].shape[1] for _ in init)]))] + + init = guess_schema_from_data(train_data) pprint.pprint(init) @@ -133,6 +138,7 @@ if c not in cat_cols: train_data[c] = train_data[c].astype(numpy.float32) + init = guess_schema_from_data(train_data) pprint.pprint(init) @@ -140,8 +146,8 @@ # Let's convert with *skl2onnx* only. onx2 = to_onnx( - pipe, initial_types=init, - options={RandomForestClassifier: {'zipmap': False}}) + pipe, initial_types=init, options={RandomForestClassifier: {"zipmap": False}} +) ##################################### # Let's run it with onnxruntime. @@ -149,8 +155,7 @@ # where column names become keys, and column values become # values. -inputs = {c: train_data[c].values.reshape((-1, 1)) - for c in train_data.columns} +inputs = {c: train_data[c].values.reshape((-1, 1)) for c in train_data.columns} pprint.pprint(inputs) ############################# diff --git a/docs/tutorial/plot_gconverting.py b/docs/tutorial/plot_gconverting.py index 880444bc6..3d2f41ab1 100644 --- a/docs/tutorial/plot_gconverting.py +++ b/docs/tutorial/plot_gconverting.py @@ -28,8 +28,7 @@ clr.fit(X_train, y_train) -onx = to_onnx(clr, X, options={'zipmap': False}, - target_opset=15) +onx = to_onnx(clr, X, options={"zipmap": False}, target_opset=15) sess = InferenceSession(onx.SerializeToString()) input_names = [i.name for i in sess.get_inputs()] @@ -46,9 +45,13 @@ # parameter *initial_types*. However, the user must specify the input # types as well. -onx = to_onnx(clr, X, options={'zipmap': False}, - initial_types=[('X56', FloatTensorType([None, X.shape[1]]))], - target_opset=15) +onx = to_onnx( + clr, + X, + options={"zipmap": False}, + initial_types=[("X56", FloatTensorType([None, X.shape[1]]))], + target_opset=15, +) sess = InferenceSession(onx.SerializeToString()) input_names = [i.name for i in sess.get_inputs()] @@ -64,10 +67,13 @@ # It is possible to change the input name by using the # parameter *final_types*. -onx = to_onnx(clr, X, options={'zipmap': False}, - final_types=[('L', Int64TensorType([None])), - ('P', FloatTensorType([None, 3]))], - target_opset=15) +onx = to_onnx( + clr, + X, + options={"zipmap": False}, + final_types=[("L", Int64TensorType([None])), ("P", FloatTensorType([None, 3]))], + target_opset=15, +) sess = InferenceSession(onx.SerializeToString()) input_names = [i.name for i in sess.get_inputs()] @@ -92,8 +98,7 @@ def rename_results(proposed_name, existing_names): return result -onx = to_onnx(clr, X, options={'zipmap': False}, - naming=rename_results, target_opset=15) +onx = to_onnx(clr, X, options={"zipmap": False}, naming=rename_results, target_opset=15) sess = InferenceSession(onx.SerializeToString()) input_names = [i.name for i in sess.get_inputs()] diff --git a/docs/tutorial/plot_gexternal_catboost.py b/docs/tutorial/plot_gexternal_catboost.py index 5cf1d72a6..d4ee99478 100644 --- a/docs/tutorial/plot_gexternal_catboost.py +++ b/docs/tutorial/plot_gexternal_catboost.py @@ -18,7 +18,6 @@ Train a CatBoostClassifier ++++++++++++++++++++++++++ """ -from pyquickhelper.helpgen.graphviz_helper import plot_graphviz import numpy from onnx.helper import get_attribute_value from sklearn.datasets import load_iris @@ -26,8 +25,14 @@ from sklearn.preprocessing import StandardScaler import onnxruntime as rt from skl2onnx import convert_sklearn, update_registered_converter -from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes # noqa -from skl2onnx.common.data_types import FloatTensorType, Int64TensorType, guess_tensor_type +from skl2onnx.common.shape_calculator import ( + calculate_linear_classifier_output_shapes, +) # noqa +from skl2onnx.common.data_types import ( + FloatTensorType, + Int64TensorType, + guess_tensor_type, +) from skl2onnx._parse import _apply_zipmap, _get_sklearn_operator_name from catboost import CatBoostClassifier from catboost.utils import convert_to_onnx_object @@ -41,8 +46,9 @@ X = X[ind, :].copy() y = y[ind].copy() -pipe = Pipeline([('scaler', StandardScaler()), - ('lgbm', CatBoostClassifier(n_estimators=3))]) +pipe = Pipeline( + [("scaler", StandardScaler()), ("lgbm", CatBoostClassifier(n_estimators=3))] +) pipe.fit(X, y) ###################################### @@ -55,18 +61,19 @@ # needs to be wrapped. -def skl2onnx_parser_castboost_classifier(scope, model, inputs, - custom_parsers=None): +def skl2onnx_parser_castboost_classifier(scope, model, inputs, custom_parsers=None): options = scope.get_options(model, dict(zipmap=True)) - no_zipmap = isinstance(options['zipmap'], bool) and not options['zipmap'] + no_zipmap = isinstance(options["zipmap"], bool) and not options["zipmap"] alias = _get_sklearn_operator_name(type(model)) this_operator = scope.declare_local_operator(alias, model) this_operator.inputs = inputs - label_variable = scope.declare_local_variable('label', Int64TensorType()) + label_variable = scope.declare_local_variable("label", Int64TensorType()) prob_dtype = guess_tensor_type(inputs[0].type) - probability_tensor_variable = scope.declare_local_variable('probabilities', prob_dtype) + probability_tensor_variable = scope.declare_local_variable( + "probabilities", prob_dtype + ) this_operator.outputs.append(label_variable) this_operator.outputs.append(probability_tensor_variable) probability_tensor = this_operator.outputs @@ -74,8 +81,9 @@ def skl2onnx_parser_castboost_classifier(scope, model, inputs, if no_zipmap: return probability_tensor - return _apply_zipmap(options['zipmap'], scope, model, - inputs[0].type, probability_tensor) + return _apply_zipmap( + options["zipmap"], scope, model, inputs[0].type, probability_tensor + ) def skl2onnx_convert_catboost(scope, operator, container): @@ -85,45 +93,55 @@ def skl2onnx_convert_catboost(scope, operator, container): """ onx = convert_to_onnx_object(operator.raw_operator) opsets = {d.domain: d.version for d in onx.opset_import} - if '' in opsets and opsets[''] >= container.target_opset: - raise RuntimeError( - "CatBoost uses an opset more recent than the target one.") + if "" in opsets and opsets[""] >= container.target_opset: + raise RuntimeError("CatBoost uses an opset more recent than the target one.") if len(onx.graph.initializer) > 0 or len(onx.graph.sparse_initializer) > 0: raise NotImplementedError( - "CatBoost returns a model initializers. This option is not implemented yet.") - if (len(onx.graph.node) not in (1, 2) or not onx.graph.node[0].op_type.startswith("TreeEnsemble") or - (len(onx.graph.node) == 2 and onx.graph.node[1].op_type != "ZipMap")): + "CatBoost returns a model initializers. This option is not implemented yet." + ) + if ( + len(onx.graph.node) not in (1, 2) + or not onx.graph.node[0].op_type.startswith("TreeEnsemble") + or (len(onx.graph.node) == 2 and onx.graph.node[1].op_type != "ZipMap") + ): types = ", ".join(map(lambda n: n.op_type, onx.graph.node)) raise NotImplementedError( f"CatBoost returns {len(onx.graph.node)} != 1 (types={types}). " - f"This option is not implemented yet.") + f"This option is not implemented yet." + ) node = onx.graph.node[0] atts = {} for att in node.attribute: atts[att.name] = get_attribute_value(att) container.add_node( - node.op_type, [operator.inputs[0].full_name], + node.op_type, + [operator.inputs[0].full_name], [operator.outputs[0].full_name, operator.outputs[1].full_name], - op_domain=node.domain, op_version=opsets.get(node.domain, None), - **atts) + op_domain=node.domain, + op_version=opsets.get(node.domain, None), + **atts, + ) update_registered_converter( CatBoostClassifier, - 'CatBoostCatBoostClassifier', + "CatBoostCatBoostClassifier", calculate_linear_classifier_output_shapes, skl2onnx_convert_catboost, parser=skl2onnx_parser_castboost_classifier, - options={'nocl': [True, False], 'zipmap': [True, False, 'columns']}) + options={"nocl": [True, False], "zipmap": [True, False, "columns"]}, +) ################################## # Convert # +++++++ model_onnx = convert_sklearn( - pipe, 'pipeline_catboost', - [('input', FloatTensorType([None, 2]))], - target_opset={'': 12, 'ai.onnx.ml': 2}) + pipe, + "pipeline_catboost", + [("input", FloatTensorType([None, 2]))], + target_opset={"": 12, "ai.onnx.ml": 2}, +) # And save. with open("pipeline_catboost.onnx", "wb") as f: diff --git a/docs/tutorial/plot_gexternal_lightgbm.py b/docs/tutorial/plot_gexternal_lightgbm.py index e11f38afe..1149e93d5 100644 --- a/docs/tutorial/plot_gexternal_lightgbm.py +++ b/docs/tutorial/plot_gexternal_lightgbm.py @@ -19,11 +19,14 @@ Train a LightGBM classifier +++++++++++++++++++++++++++ """ -from pyquickhelper.helpgen.graphviz_helper import plot_graphviz import onnxruntime as rt from skl2onnx import convert_sklearn, update_registered_converter -from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes # noqa -from onnxmltools.convert.lightgbm.operator_converters.LightGbm import convert_lightgbm # noqa +from skl2onnx.common.shape_calculator import ( + calculate_linear_classifier_output_shapes, +) # noqa +from onnxmltools.convert.lightgbm.operator_converters.LightGbm import ( + convert_lightgbm, +) # noqa from skl2onnx.common.data_types import FloatTensorType import numpy from sklearn.datasets import load_iris @@ -40,8 +43,9 @@ X = X[ind, :].copy() y = y[ind].copy() -pipe = Pipeline([('scaler', StandardScaler()), - ('lgbm', LGBMClassifier(n_estimators=3))]) +pipe = Pipeline( + [("scaler", StandardScaler()), ("lgbm", LGBMClassifier(n_estimators=3))] +) pipe.fit(X, y) ###################################### @@ -58,18 +62,23 @@ # lightgbm/shape_calculators/Classifier.py>`_. update_registered_converter( - LGBMClassifier, 'LightGbmLGBMClassifier', - calculate_linear_classifier_output_shapes, convert_lightgbm, - options={'nocl': [True, False], 'zipmap': [True, False, 'columns']}) + LGBMClassifier, + "LightGbmLGBMClassifier", + calculate_linear_classifier_output_shapes, + convert_lightgbm, + options={"nocl": [True, False], "zipmap": [True, False, "columns"]}, +) ################################## # Convert again # +++++++++++++ model_onnx = convert_sklearn( - pipe, 'pipeline_lightgbm', - [('input', FloatTensorType([None, 2]))], - target_opset={'': 12, 'ai.onnx.ml': 2}) + pipe, + "pipeline_lightgbm", + [("input", FloatTensorType([None, 2]))], + target_opset={"": 12, "ai.onnx.ml": 2}, +) # And save. with open("pipeline_lightgbm.onnx", "wb") as f: diff --git a/docs/tutorial/plot_gexternal_lightgbm_reg.py b/docs/tutorial/plot_gexternal_lightgbm_reg.py index 9ccbfa594..2f243d002 100644 --- a/docs/tutorial/plot_gexternal_lightgbm_reg.py +++ b/docs/tutorial/plot_gexternal_lightgbm_reg.py @@ -45,15 +45,20 @@ from lightgbm import LGBMRegressor from onnxruntime import InferenceSession from skl2onnx import to_onnx, update_registered_converter -from skl2onnx.common.shape_calculator import calculate_linear_regressor_output_shapes # noqa +from skl2onnx.common.shape_calculator import ( + calculate_linear_regressor_output_shapes, +) # noqa from onnxmltools import __version__ as oml_version -from onnxmltools.convert.lightgbm.operator_converters.LightGbm import convert_lightgbm # noqa +from onnxmltools.convert.lightgbm.operator_converters.LightGbm import ( + convert_lightgbm, +) # noqa N = 1000 X = numpy.random.randn(N, 20) -y = (numpy.random.randn(N) + - numpy.random.randn(N) * 100 * numpy.random.randint(0, 1, 1000)) +y = numpy.random.randn(N) + numpy.random.randn(N) * 100 * numpy.random.randint( + 0, 1, 1000 +) reg = LGBMRegressor(n_estimators=1000) reg.fit(X, y) @@ -74,22 +79,25 @@ def skl2onnx_convert_lightgbm(scope, operator, container): options = scope.get_options(operator.raw_operator) - if 'split' in options: - if pv.Version(oml_version) < pv.Version('1.9.2'): + if "split" in options: + if pv.Version(oml_version) < pv.Version("1.9.2"): warnings.warn( "Option split was released in version 1.9.2 but %s is " - "installed. It will be ignored." % oml_version) - operator.split = options['split'] + "installed. It will be ignored." % oml_version + ) + operator.split = options["split"] else: operator.split = None convert_lightgbm(scope, operator, container) update_registered_converter( - LGBMRegressor, 'LightGbmLGBMRegressor', + LGBMRegressor, + "LightGbmLGBMRegressor", calculate_linear_regressor_output_shapes, skl2onnx_convert_lightgbm, - options={'split': None}) + options={"split": None}, +) ################################## # Convert @@ -99,11 +107,15 @@ def skl2onnx_convert_lightgbm(scope, operator, container): # TreeEnsembleRegressor node, or more. *split* parameter is the number of # trees per node TreeEnsembleRegressor. -model_onnx = to_onnx(reg, X[:1].astype(numpy.float32), - target_opset={'': 14, 'ai.onnx.ml': 2}) -model_onnx_split = to_onnx(reg, X[:1].astype(numpy.float32), - target_opset={'': 14, 'ai.onnx.ml': 2}, - options={'split': 100}) +model_onnx = to_onnx( + reg, X[:1].astype(numpy.float32), target_opset={"": 14, "ai.onnx.ml": 2} +) +model_onnx_split = to_onnx( + reg, + X[:1].astype(numpy.float32), + target_opset={"": 14, "ai.onnx.ml": 2}, + options={"split": 100}, +) ########################## # Discrepancies @@ -114,15 +126,14 @@ def skl2onnx_convert_lightgbm(scope, operator, container): X32 = X.astype(numpy.float32) expected = reg.predict(X32) -got = sess.run(None, {'X': X32})[0].ravel() -got_split = sess_split.run(None, {'X': X32})[0].ravel() +got = sess.run(None, {"X": X32})[0].ravel() +got_split = sess_split.run(None, {"X": X32})[0].ravel() disp = numpy.abs(got - expected).sum() disp_split = numpy.abs(got_split - expected).sum() print("sum of discrepancies 1 node", disp) -print("sum of discrepancies split node", - disp_split, "ratio:", disp / disp_split) +print("sum of discrepancies split node", disp_split, "ratio:", disp / disp_split) ###################################### # The sum of the discrepancies were reduced 4, 5 times. @@ -140,12 +151,14 @@ def skl2onnx_convert_lightgbm(scope, operator, container): # # The processing time is slower but not much. -print("processing time no split", - timeit.timeit( - lambda: sess.run(None, {'X': X32})[0], number=150)) -print("processing time split", - timeit.timeit( - lambda: sess_split.run(None, {'X': X32})[0], number=150)) +print( + "processing time no split", + timeit.timeit(lambda: sess.run(None, {"X": X32})[0], number=150), +) +print( + "processing time split", + timeit.timeit(lambda: sess_split.run(None, {"X": X32})[0], number=150), +) ############################################# # Split influence @@ -156,23 +169,27 @@ def skl2onnx_convert_lightgbm(scope, operator, container): res = [] for i in tqdm(list(range(20, 170, 20)) + [200, 300, 400, 500]): - model_onnx_split = to_onnx(reg, X[:1].astype(numpy.float32), - target_opset={'': 14, 'ai.onnx.ml': 2}, - options={'split': i}) + model_onnx_split = to_onnx( + reg, + X[:1].astype(numpy.float32), + target_opset={"": 14, "ai.onnx.ml": 2}, + options={"split": i}, + ) sess_split = InferenceSession(model_onnx_split.SerializeToString()) - got_split = sess_split.run(None, {'X': X32})[0].ravel() + got_split = sess_split.run(None, {"X": X32})[0].ravel() disc_split = numpy.abs(got_split - expected).max() res.append(dict(split=i, disc=disc_split)) -df = DataFrame(res).set_index('split') +df = DataFrame(res).set_index("split") df["baseline"] = disc print(df) ########################################## # Graph. _, ax = plt.subplots(1, 1) -df.plot(title="Sum of discrepancies against split\n" - "split = number of tree per node", - ax=ax) +df.plot( + title="Sum of discrepancies against split\n" "split = number of tree per node", + ax=ax, +) # plt.show() diff --git a/docs/tutorial/plot_gexternal_xgboost.py b/docs/tutorial/plot_gexternal_xgboost.py index 3cf467ff5..15ed13263 100644 --- a/docs/tutorial/plot_gexternal_xgboost.py +++ b/docs/tutorial/plot_gexternal_xgboost.py @@ -20,7 +20,6 @@ Train a XGBoost classifier ++++++++++++++++++++++++++ """ -from pyquickhelper.helpgen.graphviz_helper import plot_graphviz import numpy import onnxruntime as rt from sklearn.datasets import load_iris, load_diabetes, make_classification @@ -32,9 +31,9 @@ from skl2onnx import convert_sklearn, to_onnx, update_registered_converter from skl2onnx.common.shape_calculator import ( calculate_linear_classifier_output_shapes, - calculate_linear_regressor_output_shapes) -from onnxmltools.convert.xgboost.operator_converters.XGBoost import ( - convert_xgboost) + calculate_linear_regressor_output_shapes, +) +from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost from onnxmltools.convert import convert_xgboost as convert_xgboost_booster @@ -47,16 +46,18 @@ X = X[ind, :].copy() y = y[ind].copy() -pipe = Pipeline([('scaler', StandardScaler()), - ('xgb', XGBClassifier(n_estimators=3))]) +pipe = Pipeline([("scaler", StandardScaler()), ("xgb", XGBClassifier(n_estimators=3))]) pipe.fit(X, y) # The conversion fails but it is expected. try: - convert_sklearn(pipe, 'pipeline_xgboost', - [('input', FloatTensorType([None, 2]))], - target_opset={'': 12, 'ai.onnx.ml': 2}) + convert_sklearn( + pipe, + "pipeline_xgboost", + [("input", FloatTensorType([None, 2]))], + target_opset={"": 12, "ai.onnx.ml": 2}, + ) except Exception as e: print(e) @@ -82,18 +83,23 @@ # xgboost/shape_calculators/Classifier.py>`_. update_registered_converter( - XGBClassifier, 'XGBoostXGBClassifier', - calculate_linear_classifier_output_shapes, convert_xgboost, - options={'nocl': [True, False], 'zipmap': [True, False, 'columns']}) + XGBClassifier, + "XGBoostXGBClassifier", + calculate_linear_classifier_output_shapes, + convert_xgboost, + options={"nocl": [True, False], "zipmap": [True, False, "columns"]}, +) ################################## # Convert again # +++++++++++++ model_onnx = convert_sklearn( - pipe, 'pipeline_xgboost', - [('input', FloatTensorType([None, 2]))], - target_opset={'': 12, 'ai.onnx.ml': 2}) + pipe, + "pipeline_xgboost", + [("input", FloatTensorType([None, 2]))], + target_opset={"": 12, "ai.onnx.ml": 2}, +) # And save. with open("pipeline_xgboost.onnx", "wb") as f: @@ -121,8 +127,11 @@ # ++++++++++++++++++++++++++++++ update_registered_converter( - XGBRegressor, 'XGBoostXGBRegressor', - calculate_linear_regressor_output_shapes, convert_xgboost) + XGBRegressor, + "XGBoostXGBRegressor", + calculate_linear_regressor_output_shapes, + convert_xgboost, +) data = load_diabetes() @@ -130,8 +139,7 @@ y = data.target X_train, X_test, y_train, _ = train_test_split(x, y, test_size=0.5) -pipe = Pipeline([('scaler', StandardScaler()), - ('xgb', XGBRegressor(n_estimators=3))]) +pipe = Pipeline([("scaler", StandardScaler()), ("xgb", XGBRegressor(n_estimators=3))]) pipe.fit(X_train, y_train) print("predict", pipe.predict(X_test[:5])) @@ -139,8 +147,9 @@ ############################# # ONNX -onx = to_onnx(pipe, X_train.astype(numpy.float32), - target_opset={'': 12, 'ai.onnx.ml': 2}) +onx = to_onnx( + pipe, X_train.astype(numpy.float32), target_opset={"": 12, "ai.onnx.ml": 2} +) sess = rt.InferenceSession(onx.SerializeToString()) pred_onx = sess.run(None, {"X": X_test[:5].astype(numpy.float32)}) @@ -158,18 +167,17 @@ # a different conversion function because it does not # follow :epkg:`scikit-learn` API. -x, y = make_classification(n_classes=2, n_features=5, - n_samples=100, - random_state=42, n_informative=3) -X_train, X_test, y_train, _ = train_test_split(x, y, test_size=0.5, - random_state=42) +x, y = make_classification( + n_classes=2, n_features=5, n_samples=100, random_state=42, n_informative=3 +) +X_train, X_test, y_train, _ = train_test_split(x, y, test_size=0.5, random_state=42) dtrain = DMatrix(X_train, label=y_train) -param = {'objective': 'multi:softmax', 'num_class': 3} +param = {"objective": "multi:softmax", "num_class": 3} bst = train_xgb(param, dtrain, 10) -initial_type = [('float_input', FloatTensorType([None, X_train.shape[1]]))] +initial_type = [("float_input", FloatTensorType([None, X_train.shape[1]]))] try: onx = convert_xgboost_booster(bst, "name", initial_types=initial_type) @@ -182,6 +190,5 @@ sess = rt.InferenceSession(onx.SerializeToString()) input_name = sess.get_inputs()[0].name label_name = sess.get_outputs()[0].name - pred_onx = sess.run( - [label_name], {input_name: X_test.astype(numpy.float32)})[0] + pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0] print(pred_onx) diff --git a/docs/tutorial/plot_icustom_converter.py b/docs/tutorial/plot_icustom_converter.py index 9db907adf..7a82d94b8 100644 --- a/docs/tutorial/plot_icustom_converter.py +++ b/docs/tutorial/plot_icustom_converter.py @@ -31,7 +31,6 @@ If *X* is a matrix of features, :math:`V=\\frac{1}{n}X'X` is the covariance matrix. We compute :math:`X V^{1/2}`. """ -from pyquickhelper.helpgen.graphviz_helper import plot_graphviz import pickle from io import BytesIO import numpy @@ -58,15 +57,14 @@ class DecorrelateTransformer(TransformerMixin, BaseEstimator): * `self.coef_`: square root of the coveriance matrix """ - def __init__(self, alpha=0.): + def __init__(self, alpha=0.0): BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.alpha = alpha def fit(self, X, y=None, sample_weights=None): if sample_weights is not None: - raise NotImplementedError( - "sample_weights != None is not implemented.") + raise NotImplementedError("sample_weights != None is not implemented.") self.mean_ = numpy.mean(X, axis=0, keepdims=True) X = X - self.mean_ V = X.T @ X / X.shape[0] @@ -186,7 +184,9 @@ def decorrelate_transformer_converter(scope, operator, container): Y = OnnxMatMul( OnnxSub(X, op.mean_.astype(dtype), op_version=opv), op.coef_.astype(dtype), - op_version=opv, output_names=out[:1]) + op_version=opv, + output_names=out[:1], + ) Y.add_to(scope, container) @@ -195,18 +195,19 @@ def decorrelate_transformer_converter(scope, operator, container): update_registered_converter( - DecorrelateTransformer, "SklearnDecorrelateTransformer", + DecorrelateTransformer, + "SklearnDecorrelateTransformer", decorrelate_transformer_shape_calculator, - decorrelate_transformer_converter) + decorrelate_transformer_converter, +) onx = to_onnx(dec, X.astype(numpy.float32)) -sess = InferenceSession(onx.SerializeToString(), - providers=["CPUExecutionProvider"]) +sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) exp = dec.transform(X.astype(numpy.float32)) -got = sess.run(None, {'X': X.astype(numpy.float32)})[0] +got = sess.run(None, {"X": X.astype(numpy.float32)})[0] def diff(p1, p2): @@ -223,11 +224,10 @@ def diff(p1, p2): onx = to_onnx(dec, X.astype(numpy.float64)) -sess = InferenceSession(onx.SerializeToString(), - providers=["CPUExecutionProvider"]) +sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) exp = dec.transform(X.astype(numpy.float64)) -got = sess.run(None, {'X': X.astype(numpy.float64)})[0] +got = sess.run(None, {"X": X.astype(numpy.float64)})[0] print(diff(exp, got)) ############################################# diff --git a/docs/tutorial/plot_jcustom_syntax.py b/docs/tutorial/plot_jcustom_syntax.py index f509507c5..acdd6795e 100644 --- a/docs/tutorial/plot_jcustom_syntax.py +++ b/docs/tutorial/plot_jcustom_syntax.py @@ -47,15 +47,14 @@ class DecorrelateTransformer(TransformerMixin, BaseEstimator): * `self.coef_`: square root of the coveriance matrix """ - def __init__(self, alpha=0.): + def __init__(self, alpha=0.0): BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.alpha = alpha def fit(self, X, y=None, sample_weights=None): if sample_weights is not None: - raise NotImplementedError( - "sample_weights != None is not implemented.") + raise NotImplementedError("sample_weights != None is not implemented.") self.mean_ = numpy.mean(X, axis=0, keepdims=True) X = X - self.mean_ V = X.T @ X / X.shape[0] @@ -87,6 +86,7 @@ def transform(self, X): # # The shape calculator does not change. + def decorrelate_transformer_shape_calculator(operator): op = operator.raw_operator input_type = operator.inputs[0].type.__class__ @@ -115,37 +115,40 @@ def decorrelate_transformer_converter(scope, operator, container): # type as the input. proto_dtype = guess_proto_type(X.type) - mean_name = scope.get_unique_variable_name('mean') - container.add_initializer(mean_name, proto_dtype, - op.mean_.shape, list(op.mean_.ravel())) + mean_name = scope.get_unique_variable_name("mean") + container.add_initializer( + mean_name, proto_dtype, op.mean_.shape, list(op.mean_.ravel()) + ) - coef_name = scope.get_unique_variable_name('coef') - container.add_initializer(coef_name, proto_dtype, - op.coef_.shape, list(op.coef_.ravel())) + coef_name = scope.get_unique_variable_name("coef") + container.add_initializer( + coef_name, proto_dtype, op.coef_.shape, list(op.coef_.ravel()) + ) - op_name = scope.get_unique_operator_name('sub') - sub_name = scope.get_unique_variable_name('sub') + op_name = scope.get_unique_operator_name("sub") + sub_name = scope.get_unique_variable_name("sub") # This function is defined in package onnxconverter_common. # Most common operators can be added to the graph with # these functions. It handles the case when specifications # changed accross opsets (a parameter becomes an input # for example). - apply_sub(scope, [X.full_name, mean_name], sub_name, container, - operator_name=op_name) + apply_sub( + scope, [X.full_name, mean_name], sub_name, container, operator_name=op_name + ) - op_name = scope.get_unique_operator_name('matmul') - container.add_node( - 'MatMul', [sub_name, coef_name], - out[0].full_name, name=op_name) + op_name = scope.get_unique_operator_name("matmul") + container.add_node("MatMul", [sub_name, coef_name], out[0].full_name, name=op_name) ########################################## # We need to let *skl2onnx* know about the new converter. update_registered_converter( - DecorrelateTransformer, "SklearnDecorrelateTransformer", + DecorrelateTransformer, + "SklearnDecorrelateTransformer", decorrelate_transformer_shape_calculator, - decorrelate_transformer_converter) + decorrelate_transformer_converter, +) onx = to_onnx(dec, X.astype(numpy.float32)) @@ -153,7 +156,7 @@ def decorrelate_transformer_converter(scope, operator, container): sess = InferenceSession(onx.SerializeToString()) exp = dec.transform(X.astype(numpy.float32)) -got = sess.run(None, {'X': X.astype(numpy.float32)})[0] +got = sess.run(None, {"X": X.astype(numpy.float32)})[0] def diff(p1, p2): @@ -173,7 +176,7 @@ def diff(p1, p2): sess = InferenceSession(onx.SerializeToString()) exp = dec.transform(X.astype(numpy.float64)) -got = sess.run(None, {'X': X.astype(numpy.float64)})[0] +got = sess.run(None, {"X": X.astype(numpy.float64)})[0] print(diff(exp, got)) ############################################# diff --git a/docs/tutorial/plot_kcustom_converter_wrapper.py b/docs/tutorial/plot_kcustom_converter_wrapper.py index 6a4fd37d8..dc9388bed 100644 --- a/docs/tutorial/plot_kcustom_converter_wrapper.py +++ b/docs/tutorial/plot_kcustom_converter_wrapper.py @@ -25,7 +25,6 @@ If *X* is a matrix of features, :math:`V=\\frac{1}{n}X'X` is the covariance matrix. We compute :math:`X V^{1/2}`. """ -from pyquickhelper.helpgen.graphviz_helper import plot_graphviz import pickle from io import BytesIO import numpy @@ -52,7 +51,7 @@ class DecorrelateTransformer(TransformerMixin, BaseEstimator): * `self.coef_`: square root of the coveriance matrix """ - def __init__(self, alpha=0.): + def __init__(self, alpha=0.0): BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.alpha = alpha @@ -75,7 +74,7 @@ def test_decorrelate_transformer(): pred = dec.transform(X) cov = pred.T @ pred for i in range(cov.shape[0]): - cov[i, i] = 1. + cov[i, i] = 1.0 assert_almost_equal(numpy.identity(4), cov) st = BytesIO() @@ -152,9 +151,11 @@ def decorrelate_transformer_converter(scope, operator, container): update_registered_converter( - DecorrelateTransformer, "SklearnDecorrelateTransformer", + DecorrelateTransformer, + "SklearnDecorrelateTransformer", decorrelate_transformer_shape_calculator, - decorrelate_transformer_converter) + decorrelate_transformer_converter, +) onx = to_onnx(dec, X.astype(numpy.float32)) @@ -162,7 +163,7 @@ def decorrelate_transformer_converter(scope, operator, container): sess = InferenceSession(onx.SerializeToString()) exp = dec.transform(X.astype(numpy.float32)) -got = sess.run(None, {'X': X.astype(numpy.float32)})[0] +got = sess.run(None, {"X": X.astype(numpy.float32)})[0] def diff(p1, p2): @@ -182,7 +183,7 @@ def diff(p1, p2): sess = InferenceSession(onx.SerializeToString()) exp = dec.transform(X.astype(numpy.float64)) -got = sess.run(None, {'X': X.astype(numpy.float64)})[0] +got = sess.run(None, {"X": X.astype(numpy.float64)})[0] print(diff(exp, got)) ############################################# diff --git a/docs/tutorial/plot_lcustom_options.py b/docs/tutorial/plot_lcustom_options.py index e9e0b894d..637f19793 100644 --- a/docs/tutorial/plot_lcustom_options.py +++ b/docs/tutorial/plot_lcustom_options.py @@ -22,7 +22,6 @@ ++++++++++++ """ -from pyquickhelper.helpgen.graphviz_helper import plot_graphviz from pandas import DataFrame from skl2onnx.tutorial import measure_time import numpy @@ -31,8 +30,7 @@ from sklearn.datasets import load_iris from skl2onnx import update_registered_converter from skl2onnx.common.data_types import guess_numpy_type -from skl2onnx.algebra.onnx_ops import ( - OnnxSub, OnnxMatMul, OnnxGemm) +from skl2onnx.algebra.onnx_ops import OnnxSub, OnnxMatMul, OnnxGemm from skl2onnx import to_onnx @@ -49,15 +47,14 @@ class DecorrelateTransformer(TransformerMixin, BaseEstimator): * `self.coef_`: square root of the coveriance matrix """ - def __init__(self, alpha=0.): + def __init__(self, alpha=0.0): BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.alpha = alpha def fit(self, X, y=None, sample_weights=None): if sample_weights is not None: - raise NotImplementedError( - "sample_weights != None is not implemented.") + raise NotImplementedError("sample_weights != None is not implemented.") self.mean_ = numpy.mean(X, axis=0, keepdims=True) X = X - self.mean_ V = X.T @ X / X.shape[0] @@ -107,19 +104,26 @@ def decorrelate_transformer_converter(scope, operator, container): dtype = guess_numpy_type(X.type) options = container.get_options(op, dict(use_gemm=False)) - use_gemm = options['use_gemm'] - print('conversion: use_gemm=', use_gemm) + use_gemm = options["use_gemm"] + print("conversion: use_gemm=", use_gemm) if use_gemm: - Y = OnnxGemm(X, op.coef_.astype(dtype), - (- op.mean_ @ op.coef_).astype(dtype), - op_version=opv, alpha=1., beta=1., - output_names=out[:1]) + Y = OnnxGemm( + X, + op.coef_.astype(dtype), + (-op.mean_ @ op.coef_).astype(dtype), + op_version=opv, + alpha=1.0, + beta=1.0, + output_names=out[:1], + ) else: Y = OnnxMatMul( OnnxSub(X, op.mean_.astype(dtype), op_version=opv), op.coef_.astype(dtype), - op_version=opv, output_names=out[:1]) + op_version=opv, + output_names=out[:1], + ) Y.add_to(scope, container) @@ -129,10 +133,12 @@ def decorrelate_transformer_converter(scope, operator, container): update_registered_converter( - DecorrelateTransformer, "SklearnDecorrelateTransformer", + DecorrelateTransformer, + "SklearnDecorrelateTransformer", decorrelate_transformer_shape_calculator, decorrelate_transformer_converter, - options={'use_gemm': [True, False]}) + options={"use_gemm": [True, False]}, +) onx = to_onnx(dec, X.astype(numpy.float32)) @@ -140,7 +146,7 @@ def decorrelate_transformer_converter(scope, operator, container): sess = InferenceSession(onx.SerializeToString()) exp = dec.transform(X.astype(numpy.float32)) -got = sess.run(None, {'X': X.astype(numpy.float32)})[0] +got = sess.run(None, {"X": X.astype(numpy.float32)})[0] def diff(p1, p2): @@ -155,13 +161,12 @@ def diff(p1, p2): ############################################ # We try the non default option, `use_gemm: True`. -onx2 = to_onnx(dec, X.astype(numpy.float32), - options={'use_gemm': True}) +onx2 = to_onnx(dec, X.astype(numpy.float32), options={"use_gemm": True}) sess2 = InferenceSession(onx2.SerializeToString()) exp = dec.transform(X.astype(numpy.float32)) -got2 = sess2.run(None, {'X': X.astype(numpy.float32)})[0] +got2 = sess2.run(None, {"X": X.astype(numpy.float32)})[0] print(diff(exp, got2)) @@ -176,18 +181,18 @@ def diff(p1, p2): X32 = X.astype(numpy.float32) obs = [] -context = {'sess': sess, 'X32': X32} +context = {"sess": sess, "X32": X32} mt = measure_time( - "sess.run(None, {'X': X32})", context, div_by_number=True, - number=100, repeat=1000) -mt['use_gemm'] = False + "sess.run(None, {'X': X32})", context, div_by_number=True, number=100, repeat=1000 +) +mt["use_gemm"] = False obs.append(mt) -context = {'sess2': sess2, 'X32': X32} +context = {"sess2": sess2, "X32": X32} mt2 = measure_time( - "sess2.run(None, {'X': X32})", context, div_by_number=True, - number=10, repeat=100) -mt2['use_gemm'] = True + "sess2.run(None, {'X': X32})", context, div_by_number=True, number=10, repeat=100 +) +mt2["use_gemm"] = True obs.append(mt2) DataFrame(obs).T diff --git a/docs/tutorial/plot_mcustom_parser.py b/docs/tutorial/plot_mcustom_parser.py index 49bcd97dc..84dcd29f2 100644 --- a/docs/tutorial/plot_mcustom_parser.py +++ b/docs/tutorial/plot_mcustom_parser.py @@ -24,15 +24,13 @@ A new transformer +++++++++++++++++ """ -from pyquickhelper.helpgen.graphviz_helper import plot_graphviz import numpy from onnxruntime import InferenceSession from sklearn.base import TransformerMixin, BaseEstimator from sklearn.datasets import load_iris from skl2onnx import update_registered_converter from skl2onnx.common.data_types import guess_numpy_type -from skl2onnx.algebra.onnx_ops import ( - OnnxSub, OnnxMatMul, OnnxGemm) +from skl2onnx.algebra.onnx_ops import OnnxSub, OnnxMatMul, OnnxGemm from skl2onnx import to_onnx, get_model_alias @@ -49,15 +47,14 @@ class DecorrelateTransformer(TransformerMixin, BaseEstimator): * `self.coef_`: square root of the coveriance matrix """ - def __init__(self, alpha=0.): + def __init__(self, alpha=0.0): BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.alpha = alpha def fit(self, X, y=None, sample_weights=None): if sample_weights is not None: - raise NotImplementedError( - "sample_weights != None is not implemented.") + raise NotImplementedError("sample_weights != None is not implemented.") self.mean_ = numpy.mean(X, axis=0, keepdims=True) X = X - self.mean_ V = X.T @ X / X.shape[0] @@ -110,19 +107,25 @@ def decorrelate_transformer_converter(scope, operator, container): Y1 = OnnxMatMul( OnnxSub(X, op.mean_.astype(dtype), op_version=opv), op.coef_.astype(dtype), - op_version=opv, output_names=out[:1]) + op_version=opv, + output_names=out[:1], + ) - Y2 = OnnxGemm(X, op.coef_.astype(dtype), - (- op.mean_ @ op.coef_).astype(dtype), - op_version=opv, alpha=1., beta=1., - output_names=out[1:2]) + Y2 = OnnxGemm( + X, + op.coef_.astype(dtype), + (-op.mean_ @ op.coef_).astype(dtype), + op_version=opv, + alpha=1.0, + beta=1.0, + output_names=out[1:2], + ) Y1.add_to(scope, container) Y2.add_to(scope, container) -def decorrelate_transformer_parser( - scope, model, inputs, custom_parsers=None): +def decorrelate_transformer_parser(scope, model, inputs, custom_parsers=None): alias = get_model_alias(type(model)) this_operator = scope.declare_local_operator(alias, model) @@ -131,35 +134,37 @@ def decorrelate_transformer_parser( # outputs cls_type = inputs[0].type.__class__ - val_y1 = scope.declare_local_variable('nogemm', cls_type()) - val_y2 = scope.declare_local_variable('gemm', cls_type()) + val_y1 = scope.declare_local_variable("nogemm", cls_type()) + val_y2 = scope.declare_local_variable("gemm", cls_type()) this_operator.outputs.append(val_y1) this_operator.outputs.append(val_y2) # ends return this_operator.outputs + ################################### # The registration needs to declare the parser as well. update_registered_converter( - DecorrelateTransformer, "SklearnDecorrelateTransformer", + DecorrelateTransformer, + "SklearnDecorrelateTransformer", decorrelate_transformer_shape_calculator, decorrelate_transformer_converter, - parser=decorrelate_transformer_parser) + parser=decorrelate_transformer_parser, +) ############################################# # And conversion. -onx = to_onnx(dec, X.astype(numpy.float32), - target_opset=14) +onx = to_onnx(dec, X.astype(numpy.float32), target_opset=14) sess = InferenceSession(onx.SerializeToString()) exp = dec.transform(X.astype(numpy.float32)) -results = sess.run(None, {'X': X.astype(numpy.float32)}) +results = sess.run(None, {"X": X.astype(numpy.float32)}) y1 = results[0] y2 = results[1] diff --git a/docs/tutorial/plot_ngrams.py b/docs/tutorial/plot_ngrams.py index b64fb9f4a..65d8540c9 100644 --- a/docs/tutorial/plot_ngrams.py +++ b/docs/tutorial/plot_ngrams.py @@ -24,16 +24,17 @@ from skl2onnx.sklapi import TraceableTfidfVectorizer import skl2onnx.sklapi.register # noqa -corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "Is this the first document?", - "", -]).reshape((4, )) +corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "Is this the first document?", + "", + ] +).reshape((4,)) pattern = r"\b[a-z ]{1,10}\b" -mod1 = TfidfVectorizer(ngram_range=(1, 2), - token_pattern=pattern) +mod1 = TfidfVectorizer(ngram_range=(1, 2), token_pattern=pattern) mod1.fit(corpus) @@ -63,8 +64,7 @@ # instead of concatenating every piece into a string. -mod2 = TraceableTfidfVectorizer( - ngram_range=(1, 2), token_pattern=pattern) +mod2 = TraceableTfidfVectorizer(ngram_range=(1, 2), token_pattern=pattern) mod2.fit(corpus) pprint.pprint(mod2.vocabulary_) @@ -72,8 +72,7 @@ ####################################### # Let's check it produces the same results. -assert_almost_equal(mod1.transform(corpus).todense(), - mod2.transform(corpus).todense()) +assert_almost_equal(mod1.transform(corpus).todense(), mod2.transform(corpus).todense()) #################################### # Conversion. Line `import skl2onnx.sklapi.register` @@ -83,7 +82,7 @@ onx = to_onnx(mod2, corpus) sess = InferenceSession(onx.SerializeToString()) -got = sess.run(None, {'X': corpus}) +got = sess.run(None, {"X": corpus}) ################################### # Let's check if there are discrepancies... diff --git a/docs/tutorial/plot_transformer_discrepancy.py b/docs/tutorial/plot_transformer_discrepancy.py index 5adb28646..f6c093957 100644 --- a/docs/tutorial/plot_transformer_discrepancy.py +++ b/docs/tutorial/plot_transformer_discrepancy.py @@ -34,8 +34,8 @@ def print_sparse_matrix(m): if mi == ma: ma += 1 mat = numpy.empty(m.shape, dtype=numpy.str_) - mat[:, :] = '.' - if hasattr(m, 'todense'): + mat[:, :] = "." + if hasattr(m, "todense"): dense = m.todense() else: dense = m @@ -43,18 +43,19 @@ def print_sparse_matrix(m): for j in range(m.shape[1]): if dense[i, j] > 0: c = int((dense[i, j] - mi) / (ma - mi) * 25) - mat[i, j] = chr(ord('A') + c) - return '\n'.join(''.join(line) for line in mat) + mat[i, j] = chr(ord("A") + c) + return "\n".join("".join(line) for line in mat) def diff(a, b): if a.shape != b.shape: raise ValueError( - f"Cannot compare matrices with different shapes " - f"{a.shape} != {b.shape}.") + f"Cannot compare matrices with different shapes " f"{a.shape} != {b.shape}." + ) d = numpy.abs(a - b).sum() / a.size return d + ########################################## # Artificial datasets # +++++++++++++++++++ @@ -62,16 +63,18 @@ def diff(a, b): # Iris + a text column. -strings = numpy.array([ - "This a sentence.", - "This a sentence with more characters $^*&'(-...", - """var = ClassName(var2, user=mail@anywhere.com, pwd""" - """=")_~-('&]@^\\`|[{#")""", - "c79857654", - "https://complex-url.com/;76543u3456?g=hhh&h=23", - "01-03-05T11:12:13", - "https://complex-url.com/;dd76543u3456?g=ddhhh&h=23", -]).reshape((-1, 1)) +strings = numpy.array( + [ + "This a sentence.", + "This a sentence with more characters $^*&'(-...", + """var = ClassName(var2, user=mail@anywhere.com, pwd""" + """=")_~-('&]@^\\`|[{#")""", + "c79857654", + "https://complex-url.com/;76543u3456?g=hhh&h=23", + "01-03-05T11:12:13", + "https://complex-url.com/;dd76543u3456?g=ddhhh&h=23", + ] +).reshape((-1, 1)) pprint.pprint(strings) @@ -79,11 +82,7 @@ def diff(a, b): # Fit a TfIdfVectorizer # +++++++++++++++++++++ -tfidf = Pipeline([ - ('pre', ColumnTransformer([ - ('tfidf', TfidfVectorizer(), 0) - ])) -]) +tfidf = Pipeline([("pre", ColumnTransformer([("tfidf", TfidfVectorizer(), 0)]))]) ############################# # We leave a couple of strings out of the training set. @@ -107,6 +106,6 @@ def diff(a, b): # +++++++++++++++++++ sess = InferenceSession(onx.SerializeToString()) -got = sess.run(None, {'X': strings})[0] +got = sess.run(None, {"X": strings})[0] print(f"differences={diff(tr, got):g}") print(print_sparse_matrix(got)) diff --git a/docs/tutorial/plot_usparse_xgboost.py b/docs/tutorial/plot_usparse_xgboost.py index 92521f95d..b8ae97aa7 100644 --- a/docs/tutorial/plot_usparse_xgboost.py +++ b/docs/tutorial/plot_usparse_xgboost.py @@ -34,6 +34,7 @@ from sklearn.preprocessing import StandardScaler from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer from sklearn.ensemble import RandomForestClassifier + try: from sklearn.ensemble import HistGradientBoostingClassifier except ImportError: @@ -43,22 +44,25 @@ from skl2onnx.common.data_types import FloatTensorType, StringTensorType from skl2onnx import to_onnx, update_registered_converter from skl2onnx.sklapi import CastTransformer, ReplaceTransformer -from skl2onnx.common.shape_calculator import ( - calculate_linear_classifier_output_shapes) -from onnxmltools.convert.xgboost.operator_converters.XGBoost import ( - convert_xgboost) -from onnxmltools.convert.lightgbm.operator_converters.LightGbm import ( - convert_lightgbm) +from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes +from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost +from onnxmltools.convert.lightgbm.operator_converters.LightGbm import convert_lightgbm update_registered_converter( - XGBClassifier, 'XGBoostXGBClassifier', - calculate_linear_classifier_output_shapes, convert_xgboost, - options={'nocl': [True, False], 'zipmap': [True, False, 'columns']}) + XGBClassifier, + "XGBoostXGBClassifier", + calculate_linear_classifier_output_shapes, + convert_xgboost, + options={"nocl": [True, False], "zipmap": [True, False, "columns"]}, +) update_registered_converter( - LGBMClassifier, 'LightGbmLGBMClassifier', - calculate_linear_classifier_output_shapes, convert_lightgbm, - options={'nocl': [True, False], 'zipmap': [True, False]}) + LGBMClassifier, + "LightGbmLGBMClassifier", + calculate_linear_classifier_output_shapes, + convert_lightgbm, + options={"nocl": [True, False], "zipmap": [True, False]}, +) ########################################## @@ -67,7 +71,7 @@ # # Iris + a text column. -cst = ['class zero', 'class one', 'class two'] +cst = ["class zero", "class one", "class two"] data = load_iris() X = data.data[:, :2] @@ -93,19 +97,25 @@ # sparse matrices to be converted into dense matrices. -def make_pipelines(df_train, y_train, models=None, - sparse_threshold=1., replace_nan=False, - insert_replace=False): - +def make_pipelines( + df_train, + y_train, + models=None, + sparse_threshold=1.0, + replace_nan=False, + insert_replace=False, +): if models is None: models = [ - RandomForestClassifier, HistGradientBoostingClassifier, - XGBClassifier, LGBMClassifier] + RandomForestClassifier, + HistGradientBoostingClassifier, + XGBClassifier, + LGBMClassifier, + ] models = [_ for _ in models if _ is not None] pipes = [] for model in tqdm(models): - if model == HistGradientBoostingClassifier: kwargs = dict(max_iter=5) elif model == XGBClassifier: @@ -114,79 +124,107 @@ def make_pipelines(df_train, y_train, models=None, kwargs = dict(n_estimators=5) if insert_replace: - pipe = Pipeline([ - ('union', ColumnTransformer([ - ('scale1', StandardScaler(), [0, 1]), - ('subject', - Pipeline([ - ('count', CountVectorizer()), - ('tfidf', TfidfTransformer()), - ('repl', ReplaceTransformer()), - ]), "text"), - ], sparse_threshold=sparse_threshold)), - ('cast', CastTransformer()), - ('cls', model(max_depth=3, **kwargs)), - ]) + pipe = Pipeline( + [ + ( + "union", + ColumnTransformer( + [ + ("scale1", StandardScaler(), [0, 1]), + ( + "subject", + Pipeline( + [ + ("count", CountVectorizer()), + ("tfidf", TfidfTransformer()), + ("repl", ReplaceTransformer()), + ] + ), + "text", + ), + ], + sparse_threshold=sparse_threshold, + ), + ), + ("cast", CastTransformer()), + ("cls", model(max_depth=3, **kwargs)), + ] + ) else: - pipe = Pipeline([ - ('union', ColumnTransformer([ - ('scale1', StandardScaler(), [0, 1]), - ('subject', - Pipeline([ - ('count', CountVectorizer()), - ('tfidf', TfidfTransformer()) - ]), "text"), - ], sparse_threshold=sparse_threshold)), - ('cast', CastTransformer()), - ('cls', model(max_depth=3, **kwargs)), - ]) + pipe = Pipeline( + [ + ( + "union", + ColumnTransformer( + [ + ("scale1", StandardScaler(), [0, 1]), + ( + "subject", + Pipeline( + [ + ("count", CountVectorizer()), + ("tfidf", TfidfTransformer()), + ] + ), + "text", + ), + ], + sparse_threshold=sparse_threshold, + ), + ), + ("cast", CastTransformer()), + ("cls", model(max_depth=3, **kwargs)), + ] + ) try: pipe.fit(df_train, y_train) except TypeError as e: - obs = dict(model=model.__name__, pipe=pipe, error=e, - model_onnx=None) + obs = dict(model=model.__name__, pipe=pipe, error=e, model_onnx=None) pipes.append(obs) continue - options = {model: {'zipmap': False}} + options = {model: {"zipmap": False}} if replace_nan: - options[TfidfTransformer] = {'nan': True} + options[TfidfTransformer] = {"nan": True} # convert with warnings.catch_warnings(record=False): warnings.simplefilter("ignore", (FutureWarning, UserWarning)) model_onnx = to_onnx( pipe, - initial_types=[('input', FloatTensorType([None, 2])), - ('text', StringTensorType([None, 1]))], - target_opset={'': 12, 'ai.onnx.ml': 2}, - options=options) - - with open('model.onnx', 'wb') as f: + initial_types=[ + ("input", FloatTensorType([None, 2])), + ("text", StringTensorType([None, 1])), + ], + target_opset={"": 12, "ai.onnx.ml": 2}, + options=options, + ) + + with open("model.onnx", "wb") as f: f.write(model_onnx.SerializeToString()) sess = rt.InferenceSession(model_onnx.SerializeToString()) - inputs = {"input": df[["c0", "c1"]].values.astype(numpy.float32), - "text": df[["text"]].values} + inputs = { + "input": df[["c0", "c1"]].values.astype(numpy.float32), + "text": df[["text"]].values, + } pred_onx = sess.run(None, inputs) - diff = numpy.abs( - pred_onx[1].ravel() - - pipe.predict_proba(df).ravel()).sum() + diff = numpy.abs(pred_onx[1].ravel() - pipe.predict_proba(df).ravel()).sum() - obs = dict(model=model.__name__, - discrepencies=diff, - model_onnx=model_onnx, pipe=pipe) + obs = dict( + model=model.__name__, discrepencies=diff, model_onnx=model_onnx, pipe=pipe + ) pipes.append(obs) return pipes data_sparse = make_pipelines(df, y) -stat = pandas.DataFrame(data_sparse).drop(['model_onnx', 'pipe'], axis=1) -if 'error' in stat.columns: - print(stat.drop('error', axis=1)) +stat = pandas.DataFrame(data_sparse).drop(["model_onnx", "pipe"], axis=1) +if "error" in stat.columns: + print(stat.drop("error", axis=1)) stat ############################ @@ -198,10 +236,10 @@ def make_pipelines(df_train, y_train, models=None, # Let's replace sparse data with dense by using `sparse_threshold=0.` -data_dense = make_pipelines(df, y, sparse_threshold=0.) -stat = pandas.DataFrame(data_dense).drop(['model_onnx', 'pipe'], axis=1) -if 'error' in stat.columns: - print(stat.drop('error', axis=1)) +data_dense = make_pipelines(df, y, sparse_threshold=0.0) +stat = pandas.DataFrame(data_dense).drop(["model_onnx", "pipe"], axis=1) +if "error" in stat.columns: + print(stat.drop("error", axis=1)) stat #################################### @@ -209,10 +247,10 @@ def make_pipelines(df_train, y_train, models=None, # applies on the data. print("sparse") -print(data_sparse[-1]['pipe'].steps[0][-1].transform(df)[:2]) +print(data_sparse[-1]["pipe"].steps[0][-1].transform(df)[:2]) print() print("dense") -print(data_dense[-1]['pipe'].steps[0][-1].transform(df)[:2]) +print(data_dense[-1]["pipe"].steps[0][-1].transform(df)[:2]) #################################### # This shows `RandomForestClassifier @@ -235,10 +273,10 @@ def make_pipelines(df_train, y_train, models=None, # Let's keep sparse data in the scikit-learn pipeline but # replace null values by nan in the onnx graph. -data_dense = make_pipelines(df, y, sparse_threshold=1., replace_nan=True) -stat = pandas.DataFrame(data_dense).drop(['model_onnx', 'pipe'], axis=1) -if 'error' in stat.columns: - print(stat.drop('error', axis=1)) +data_dense = make_pipelines(df, y, sparse_threshold=1.0, replace_nan=True) +stat = pandas.DataFrame(data_dense).drop(["model_onnx", "pipe"], axis=1) +if "error" in stat.columns: + print(stat.drop("error", axis=1)) stat @@ -253,11 +291,12 @@ def make_pipelines(df_train, y_train, models=None, # It is equivalent to the previous options except it is # more explicit. -data_dense = make_pipelines(df, y, sparse_threshold=1., replace_nan=False, - insert_replace=True) -stat = pandas.DataFrame(data_dense).drop(['model_onnx', 'pipe'], axis=1) -if 'error' in stat.columns: - print(stat.drop('error', axis=1)) +data_dense = make_pipelines( + df, y, sparse_threshold=1.0, replace_nan=False, insert_replace=True +) +stat = pandas.DataFrame(data_dense).drop(["model_onnx", "pipe"], axis=1) +if "error" in stat.columns: + print(stat.drop("error", axis=1)) stat ###################################### diff --git a/docs/tutorial/plot_wext_pyod_forest.py b/docs/tutorial/plot_wext_pyod_forest.py index 23c40d713..01d977069 100644 --- a/docs/tutorial/plot_wext_pyod_forest.py +++ b/docs/tutorial/plot_wext_pyod_forest.py @@ -27,12 +27,22 @@ from sklearn.preprocessing import MinMaxScaler from skl2onnx.proto import onnx_proto from skl2onnx.common.data_types import ( - FloatTensorType, Int64TensorType, guess_numpy_type) + FloatTensorType, + Int64TensorType, + guess_numpy_type, +) from skl2onnx import to_onnx, update_registered_converter, get_model_alias from skl2onnx.algebra.onnx_ops import ( - OnnxIdentity, OnnxMul, OnnxLess, OnnxConcat, OnnxCast, OnnxAdd, - OnnxClip) + OnnxIdentity, + OnnxMul, + OnnxLess, + OnnxConcat, + OnnxCast, + OnnxAdd, + OnnxClip, +) from skl2onnx.algebra.onnx_operator import OnnxSubEstimator + try: from pyod.models.iforest import IForest except (ValueError, ImportError) as e: @@ -40,21 +50,28 @@ IForest = None if IForest is not None: - data1 = {'First': [500, 500, 400, 100, 200, 300, 100], - 'Second': ['a', 'b', 'a', 'b', 'a', 'b', 'c']} + data1 = { + "First": [500, 500, 400, 100, 200, 300, 100], + "Second": ["a", "b", "a", "b", "a", "b", "c"], + } - df1 = pd.DataFrame(data1, columns=['First', 'Second']) + df1 = pd.DataFrame(data1, columns=["First", "Second"]) dumdf1 = pd.get_dummies(df1) scaler = MinMaxScaler() scaler.partial_fit(dumdf1) sc_data = scaler.transform(dumdf1) - model1 = IForest(n_estimators=10, bootstrap=True, behaviour='new', - contamination=0.1, random_state=np.random.RandomState(42), - verbose=1, n_jobs=-1).fit(sc_data) + model1 = IForest( + n_estimators=10, + bootstrap=True, + behaviour="new", + contamination=0.1, + random_state=np.random.RandomState(42), + verbose=1, + n_jobs=-1, + ).fit(sc_data) feature_names2 = dumdf1.columns - initial_type = [('float_input', - FloatTensorType([None, len(feature_names2)]))] + initial_type = [("float_input", FloatTensorType([None, len(feature_names2)]))] ############################################# @@ -75,6 +92,7 @@ # The parser defines the number of outputs and their type. # The shape calculator defines their dimensions. + def pyod_iforest_parser(scope, model, inputs, custom_parsers=None): alias = get_model_alias(type(model)) this_operator = scope.declare_local_operator(alias, model) @@ -84,8 +102,8 @@ def pyod_iforest_parser(scope, model, inputs, custom_parsers=None): # outputs cls_type = inputs[0].type.__class__ - val_y1 = scope.declare_local_variable('label', Int64TensorType()) - val_y2 = scope.declare_local_variable('probability', cls_type()) + val_y1 = scope.declare_local_variable("label", Int64TensorType()) + val_y2 = scope.declare_local_variable("probability", cls_type()) this_operator.outputs.append(val_y1) this_operator.outputs.append(val_y2) @@ -98,6 +116,7 @@ def pyod_iforest_shape_calculator(operator): operator.outputs[0].type.shape = [N, 1] operator.outputs[1].type.shape = [N, 2] + ############################################ # Then the converter. @@ -122,54 +141,60 @@ def pyod_iforest_converter(scope, operator, container): # labels threshold = op.threshold_ - above = OnnxLess(scores, np.array([threshold], dtype=dtype), - op_version=opv) - labels = OnnxCast(above, op_version=opv, to=onnx_proto.TensorProto.INT64, - output_names=out[:1]) + above = OnnxLess(scores, np.array([threshold], dtype=dtype), op_version=opv) + labels = OnnxCast( + above, op_version=opv, to=onnx_proto.TensorProto.INT64, output_names=out[:1] + ) # probabilities train_scores = op.decision_scores_ scaler = MinMaxScaler().fit(train_scores.reshape(-1, 1)) - scores_ = OnnxMul(scores, np.array([-1], dtype=dtype), - op_version=opv) + scores_ = OnnxMul(scores, np.array([-1], dtype=dtype), op_version=opv) print(scaler.min_) print(scaler.scale_) scaled = OnnxMul(scores_, scaler.scale_.astype(dtype), op_version=opv) - scaled_centered = OnnxAdd(scaled, scaler.min_.astype(dtype), - op_version=opv) - clipped = OnnxClip(scaled_centered, np.array([0], dtype=dtype), - np.array([1], dtype=dtype), - op_version=opv) + scaled_centered = OnnxAdd(scaled, scaler.min_.astype(dtype), op_version=opv) + clipped = OnnxClip( + scaled_centered, + np.array([0], dtype=dtype), + np.array([1], dtype=dtype), + op_version=opv, + ) clipped_ = OnnxAdd( - OnnxMul(clipped, np.array([-1], dtype=dtype), - op_version=opv), + OnnxMul(clipped, np.array([-1], dtype=dtype), op_version=opv), np.array([1], dtype=dtype), - op_version=opv) + op_version=opv, + ) - scores_2d = OnnxConcat(clipped_, clipped, axis=1, op_version=opv, - output_names=out[1:]) + scores_2d = OnnxConcat( + clipped_, clipped, axis=1, op_version=opv, output_names=out[1:] + ) labels.add_to(scope, container) scores_2d.add_to(scope, container) + ######################################## # Finally the registration. if IForest is not None: update_registered_converter( - IForest, "PyodIForest", + IForest, + "PyodIForest", pyod_iforest_shape_calculator, pyod_iforest_converter, - parser=pyod_iforest_parser) + parser=pyod_iforest_parser, + ) ############################################# # And the conversion. if IForest is not None: - onx = to_onnx(model1, initial_types=initial_type, - target_opset={'': 14, 'ai.onnx.ml': 2}) + onx = to_onnx( + model1, initial_types=initial_type, target_opset={"": 14, "ai.onnx.ml": 2} + ) ############################################### # Checking discrepencies @@ -182,7 +207,7 @@ def pyod_iforest_converter(scope, operator, container): expected_proba = model1.predict_proba(data) sess = InferenceSession(onx.SerializeToString()) - res = sess.run(None, {'float_input': data}) + res = sess.run(None, {"float_input": data}) onx_labels = res[0] onx_proba = res[1] diff --git a/docs/tutorial/plot_woe_transformer.py b/docs/tutorial/plot_woe_transformer.py index bf466eef1..ba45ef414 100644 --- a/docs/tutorial/plot_woe_transformer.py +++ b/docs/tutorial/plot_woe_transformer.py @@ -30,20 +30,19 @@ import matplotlib.pyplot as plt from skl2onnx import to_onnx from skl2onnx.sklapi import WOETransformer + # automatically registers the converter for WOETransformer import skl2onnx.sklapi.register # noqa X = np.arange(10).astype(np.float32).reshape((-1, 1)) -intervals = [ - [(1., 3., False, False), - (5., 7., True, True)]] +intervals = [[(1.0, 3.0, False, False), (5.0, 7.0, True, True)]] weights = [[55, 107]] woe1 = WOETransformer(intervals, onehot=False, weights=weights) woe1.fit(X) prd = woe1.transform(X) -df = pd.DataFrame({'X': X.ravel(), 'woe': prd.ravel()}) +df = pd.DataFrame({"X": X.ravel(), "woe": prd.ravel()}) df ###################################### @@ -57,8 +56,8 @@ woe2.fit(X) prd = woe2.transform(X) df = pd.DataFrame(prd) -df.columns = ['I1', 'I2'] -df['X'] = X +df.columns = ["I1", "I2"] +df["X"] = X df ########################################## @@ -69,8 +68,8 @@ woe.fit(X) prd = woe.transform(X) df = pd.DataFrame(prd) -df.columns = ['I1', 'I2'] -df['X'] = X +df.columns = ["I1", "I2"] +df["X"] = X df ########################################### @@ -82,14 +81,14 @@ # onehot=False onx1 = to_onnx(woe1, X) sess = InferenceSession(onx1.SerializeToString()) -print(sess.run(None, {'X': X})[0]) +print(sess.run(None, {"X": X})[0]) ################################## # onehot=True onx2 = to_onnx(woe2, X) sess = InferenceSession(onx2.SerializeToString()) -print(sess.run(None, {'X': X})[0]) +print(sess.run(None, {"X": X})[0]) ################################################ # ONNX Graphs @@ -98,33 +97,41 @@ # onehot=False pydot_graph = GetPydotGraph( - onx1.graph, name=onx1.graph.name, rankdir="TB", + onx1.graph, + name=onx1.graph.name, + rankdir="TB", node_producer=GetOpNodeProducer( - "docstring", color="yellow", fillcolor="yellow", style="filled")) + "docstring", color="yellow", fillcolor="yellow", style="filled" + ), +) pydot_graph.write_dot("woe1.dot") -os.system('dot -O -Gdpi=300 -Tpng woe1.dot') +os.system("dot -O -Gdpi=300 -Tpng woe1.dot") image = plt.imread("woe1.dot.png") fig, ax = plt.subplots(figsize=(10, 10)) ax.imshow(image) -ax.axis('off') +ax.axis("off") ####################################### # onehot=True pydot_graph = GetPydotGraph( - onx2.graph, name=onx2.graph.name, rankdir="TB", + onx2.graph, + name=onx2.graph.name, + rankdir="TB", node_producer=GetOpNodeProducer( - "docstring", color="yellow", fillcolor="yellow", style="filled")) + "docstring", color="yellow", fillcolor="yellow", style="filled" + ), +) pydot_graph.write_dot("woe2.dot") -os.system('dot -O -Gdpi=300 -Tpng woe2.dot') +os.system("dot -O -Gdpi=300 -Tpng woe2.dot") image = plt.imread("woe2.dot.png") fig, ax = plt.subplots(figsize=(10, 10)) ax.imshow(image) -ax.axis('off') +ax.axis("off") ######################################## # Half-line @@ -133,15 +140,13 @@ # An interval may have only one extremity defined and the other # can be infinite. -intervals = [ - [(-np.inf, 3., True, True), - (5., np.inf, True, True)]] +intervals = [[(-np.inf, 3.0, True, True), (5.0, np.inf, True, True)]] weights = [[55, 107]] woe1 = WOETransformer(intervals, onehot=False, weights=weights) woe1.fit(X) prd = woe1.transform(X) -df = pd.DataFrame({'X': X.ravel(), 'woe': prd.ravel()}) +df = pd.DataFrame({"X": X.ravel(), "woe": prd.ravel()}) df ################################# @@ -149,4 +154,4 @@ onxinf = to_onnx(woe1, X) sess = InferenceSession(onxinf.SerializeToString()) -print(sess.run(None, {'X': X})[0]) +print(sess.run(None, {"X": X})[0]) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..c04c85d1c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,17 @@ +[tool.ruff] + +exclude = [ + ".eggs", + ".git", + "build", + "dist", +] + +# Same as Black. +line-length = 88 + +[tool.ruff.mccabe] +max-complexity = 10 + +[tool.ruff.per-file-ignores] +"skl2onnx/algebra/onnx_ops.py" = ["F821"] diff --git a/requirements-dev.txt b/requirements-dev.txt index 4698ca051..ea181f954 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,18 +1,19 @@ # tests -flatbuffers +black pandas py-cpuinfo pybind11 pytest pytest-cov +ruff wheel # docs +furo sphinx -sphinx_modern_theme_modified sphinxcontrib-blockdiag tqdm # docs/examples -lightgbm +lightgbm<4.0 matplotlib diff --git a/setup.py b/setup.py index a1af4338e..d0a04d373 100644 --- a/setup.py +++ b/setup.py @@ -5,53 +5,59 @@ from distutils.core import setup from setuptools import find_packages import os + this = os.path.dirname(__file__) with open(os.path.join(this, "requirements.txt"), "r") as f: - requirements = [_ for _ in [_.strip("\r\n ") - for _ in f.readlines()] if _ is not None] + requirements = [ + _ for _ in [_.strip("\r\n ") for _ in f.readlines()] if _ is not None + ] packages = find_packages() assert packages # read version from the package file. -version_str = '1.0.0' -with (open(os.path.join(this, 'skl2onnx/__init__.py'), "r")) as f: - line = [_ for _ in [_.strip("\r\n ") - for _ in f.readlines()] if _.startswith("__version__")] +version_str = "1.0.0" +with open(os.path.join(this, "skl2onnx/__init__.py"), "r") as f: + line = [ + _ + for _ in [_.strip("\r\n ") for _ in f.readlines()] + if _.startswith("__version__") + ] if len(line) > 0: - version_str = line[0].split('=')[1].strip('" ') + version_str = line[0].split("=")[1].strip('" ') README = os.path.join(os.getcwd(), "README.md") with open(README) as f: long_description = f.read() - start_pos = long_description.find('## Introduction') + start_pos = long_description.find("## Introduction") if start_pos >= 0: long_description = long_description[start_pos:] setup( - name='skl2onnx', + name="skl2onnx", version=version_str, description="Convert scikit-learn models to ONNX", long_description=long_description, - long_description_content_type='text/markdown', - license='Apache License v2.0', - author='ONNX', - author_email='onnx-technical-discuss@lists.lfaidata.foundation', - url='https://github.com/onnx/sklearn-onnx', + long_description_content_type="text/markdown", + license="Apache License v2.0", + author="ONNX", + author_email="onnx-technical-discuss@lists.lfaidata.foundation", + url="https://github.com/onnx/sklearn-onnx", packages=packages, include_package_data=True, install_requires=requirements, classifiers=[ - 'Development Status :: 4 - Beta', - 'Environment :: Console', - 'Intended Audience :: Developers', - 'Operating System :: MacOS :: MacOS X', - 'Operating System :: Microsoft :: Windows', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - 'License :: OSI Approved :: Apache Software License'], + "Development Status :: 4 - Beta", + "Environment :: Console", + "Intended Audience :: Developers", + "Operating System :: MacOS :: MacOS X", + "Operating System :: Microsoft :: Windows", + "Programming Language :: Python", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "License :: OSI Approved :: Apache Software License", + ], ) diff --git a/skl2onnx/__init__.py b/skl2onnx/__init__.py index 4250c8a4f..4dbc458cc 100644 --- a/skl2onnx/__init__.py +++ b/skl2onnx/__init__.py @@ -13,9 +13,7 @@ from .convert import convert_sklearn, to_onnx, wrap_as_onnx_mixin # noqa -from ._supported_operators import ( # noqa - update_registered_converter, get_model_alias -) +from ._supported_operators import update_registered_converter, get_model_alias # noqa from ._parse import update_registered_parser # noqa from .proto import get_latest_tested_opset_version # noqa @@ -35,11 +33,12 @@ def supported_converters(from_sklearn=False): :return: list of supported models as string """ from .common._registration import _converter_pool # noqa + # The two following lines populates the list of supported converters. from . import shape_calculators # noqa from . import operator_converters # noqa names = sorted(_converter_pool.keys()) if from_sklearn: - return [_[7:] for _ in names if _.startswith('Sklearn')] + return [_[7:] for _ in names if _.startswith("Sklearn")] return list(names) diff --git a/skl2onnx/__main__.py b/skl2onnx/__main__.py index 30a640af8..77333e9df 100644 --- a/skl2onnx/__main__.py +++ b/skl2onnx/__main__.py @@ -4,27 +4,32 @@ def _help(): - print(dedent(""" + print( + dedent( + """ python -m skl2onnx [command] command is: setup generate rst documentation for every ONNX operator - before building the package""")) + before building the package""" + ) + ) def _setup(): from skl2onnx.algebra.onnx_ops import dynamic_class_creation + dynamic_class_creation(True) def main(argv): - if len(argv) <= 1 or '--help' in argv: + if len(argv) <= 1 or "--help" in argv: _help() return if "setup" in argv: - print('generate rst documentation for every ONNX operator') + print("generate rst documentation for every ONNX operator") _setup() return diff --git a/skl2onnx/_parse.py b/skl2onnx/_parse.py index 7910beb49..0b9c750c1 100644 --- a/skl2onnx/_parse.py +++ b/skl2onnx/_parse.py @@ -4,8 +4,8 @@ import numpy as np from sklearn import pipeline -from sklearn.base import ( - ClassifierMixin, ClusterMixin, is_classifier) +from sklearn.base import ClassifierMixin, ClusterMixin, is_classifier + try: from sklearn.base import OutlierMixin except ImportError: @@ -13,8 +13,12 @@ class OutlierMixin: pass + from sklearn.ensemble import ( - IsolationForest, RandomTreesEmbedding, RandomForestClassifier) + IsolationForest, + RandomTreesEmbedding, + RandomForestClassifier, +) from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.linear_model import BayesianRidge from sklearn.model_selection import GridSearchCV @@ -24,6 +28,7 @@ class OutlierMixin: from sklearn.preprocessing import OneHotEncoder from sklearn.pipeline import Pipeline from sklearn.svm import LinearSVC, NuSVC, SVC + try: from sklearn.compose import ColumnTransformer except ImportError: @@ -39,17 +44,20 @@ class OutlierMixin: # changed in 0.20 SimpleImputer = None -from ._supported_operators import ( - _get_sklearn_operator_name, cluster_list, outlier_list) -from ._supported_operators import ( - sklearn_classifier_list, sklearn_operator_name_map) +from ._supported_operators import _get_sklearn_operator_name, cluster_list, outlier_list +from ._supported_operators import sklearn_classifier_list, sklearn_operator_name_map from .common._container import SklearnModelContainerNode from .common._registration import _converter_pool, _shape_calculator_pool from .common._topology import Topology, Variable from .common.data_types import ( - DictionaryType, Int64TensorType, SequenceType, - StringTensorType, TensorType, FloatTensorType, - guess_tensor_type) + DictionaryType, + Int64TensorType, + SequenceType, + StringTensorType, + TensorType, + FloatTensorType, + guess_tensor_type, +) from .common.utils import get_column_indices from .common.utils_checking import check_signature from .common.utils_classifier import get_label_classes @@ -57,36 +65,41 @@ class OutlierMixin: do_not_merge_columns = tuple( - filter(lambda op: op is not None, - [OneHotEncoder, ColumnTransformer])) + filter(lambda op: op is not None, [OneHotEncoder, ColumnTransformer]) +) def _fetch_input_slice(scope, inputs, column_indices): if not isinstance(inputs, list): raise TypeError("Parameter inputs must be a list.") if len(inputs) == 0: - raise RuntimeError("Operator ArrayFeatureExtractor requires at " - "least one inputs.") + raise RuntimeError( + "Operator ArrayFeatureExtractor requires at " "least one inputs." + ) if len(inputs) != 1: - raise RuntimeError("Operator ArrayFeatureExtractor does not support " - "multiple input tensors.") - if (isinstance(inputs[0].type, TensorType) and - len(inputs[0].type.shape) == 2 and - inputs[0].type.shape[1] == len(column_indices)): + raise RuntimeError( + "Operator ArrayFeatureExtractor does not support " "multiple input tensors." + ) + if ( + isinstance(inputs[0].type, TensorType) + and len(inputs[0].type.shape) == 2 + and inputs[0].type.shape[1] == len(column_indices) + ): # No need to extract. return inputs array_feature_extractor_operator = scope.declare_local_operator( - 'SklearnArrayFeatureExtractor') + "SklearnArrayFeatureExtractor" + ) array_feature_extractor_operator.inputs = inputs array_feature_extractor_operator.column_indices = column_indices output_variable_name = scope.declare_local_variable( - 'extracted_feature_columns', inputs[0].type) + "extracted_feature_columns", inputs[0].type + ) array_feature_extractor_operator.outputs.append(output_variable_name) return array_feature_extractor_operator.outputs -def _parse_sklearn_simple_model(scope, model, inputs, custom_parsers=None, - alias=None): +def _parse_sklearn_simple_model(scope, model, inputs, custom_parsers=None, alias=None): """ This function handles all non-pipeline models. @@ -101,18 +114,19 @@ def _parse_sklearn_simple_model(scope, model, inputs, custom_parsers=None, """ # alias can be None if isinstance(model, str): - raise RuntimeError("Parameter model must be an object not a " - "string '{0}'.".format(model)) + raise RuntimeError( + "Parameter model must be an object not a " "string '{0}'.".format(model) + ) if any(not isinstance(i, Variable) for i in inputs): raise TypeError( - "One input is not a Variable for model %r - %r." - "" % (model, inputs)) + "One input is not a Variable for model %r - %r." "" % (model, inputs) + ) if alias is None: alias = _get_sklearn_operator_name(type(model)) this_operator = scope.declare_local_operator(alias, model) this_operator.inputs = inputs - if hasattr(model, 'onnx_parser'): + if hasattr(model, "onnx_parser"): parser_names = model.onnx_parser() if parser_names is not None: try: @@ -122,9 +136,9 @@ def _parse_sklearn_simple_model(scope, model, inputs, custom_parsers=None, "Calling parser %r for model type %r failed due to %r. " "This warnings will become an exception in version 1.11. " "The parser signature should parser(scope=None, " - "inputs=None)." % ( - parser_names, e, type(model)), - DeprecationWarning) + "inputs=None)." % (parser_names, e, type(model)), + DeprecationWarning, + ) names = parser_names() if names is not None: for name in names: @@ -132,35 +146,38 @@ def _parse_sklearn_simple_model(scope, model, inputs, custom_parsers=None, this_operator.outputs.append(name) elif isinstance(name, str): var = scope.declare_local_variable( - name, guess_tensor_type(inputs[0].type)) + name, guess_tensor_type(inputs[0].type) + ) this_operator.outputs.append(var) elif isinstance(name, tuple) and len(name) == 2: var = scope.declare_local_variable( - name[0], guess_tensor_type(name[1])) + name[0], guess_tensor_type(name[1]) + ) this_operator.outputs.append(var) else: raise RuntimeError( "Unexpected output type %r (value=%r) for " - "operator %r." % ( - type(name), name, type(model))) + "operator %r." % (type(name), name, type(model)) + ) return this_operator.outputs - if (type(model) in sklearn_classifier_list - or isinstance(model, ClassifierMixin) - or (isinstance(model, GridSearchCV) - and is_classifier(model))): + if ( + type(model) in sklearn_classifier_list + or isinstance(model, ClassifierMixin) + or (isinstance(model, GridSearchCV) and is_classifier(model)) + ): # For classifiers, we may have two outputs, one for label and # the other one for probabilities of all classes. Notice that # their types here are not necessarily correct and they will # be fixed in shape inference phase. - label_variable = scope.declare_local_variable( - 'label', Int64TensorType()) + label_variable = scope.declare_local_variable("label", Int64TensorType()) if type(model) in [RandomForestClassifier]: prob_dtype = FloatTensorType() else: prob_dtype = guess_tensor_type(inputs[0].type) probability_tensor_variable = scope.declare_local_variable( - 'probabilities', prob_dtype) + "probabilities", prob_dtype + ) this_operator.outputs.append(label_variable) this_operator.outputs.append(probability_tensor_variable) @@ -169,67 +186,69 @@ def _parse_sklearn_simple_model(scope, model, inputs, custom_parsers=None, # the other one for scores of all classes. Notice that their # types here are not necessarily correct and they will be fixed # in shape inference phase - label_variable = scope.declare_local_variable( - 'label', Int64TensorType()) + label_variable = scope.declare_local_variable("label", Int64TensorType()) score_tensor_variable = scope.declare_local_variable( - 'scores', guess_tensor_type(inputs[0].type)) + "scores", guess_tensor_type(inputs[0].type) + ) this_operator.outputs.append(label_variable) this_operator.outputs.append(score_tensor_variable) elif type(model) in {IsolationForest, LocalOutlierFactor}: - label_variable = scope.declare_local_variable( - 'label', Int64TensorType()) + label_variable = scope.declare_local_variable("label", Int64TensorType()) score_tensor_variable = scope.declare_local_variable( - 'scores', guess_tensor_type(inputs[0].type)) + "scores", guess_tensor_type(inputs[0].type) + ) this_operator.outputs.append(label_variable) this_operator.outputs.append(score_tensor_variable) options = scope.get_options(model, dict(score_samples=False)) - if options['score_samples']: + if options["score_samples"]: scores_var = scope.declare_local_variable( - 'score_samples', guess_tensor_type(inputs[0].type)) + "score_samples", guess_tensor_type(inputs[0].type) + ) this_operator.outputs.append(scores_var) elif type(model) in outlier_list or isinstance(model, OutlierMixin): # For outliers, we may have two outputs, one for label and # the other one for scores. - label_variable = scope.declare_local_variable( - 'label', Int64TensorType()) + label_variable = scope.declare_local_variable("label", Int64TensorType()) score_tensor_variable = scope.declare_local_variable( - 'scores', guess_tensor_type(inputs[0].type)) + "scores", guess_tensor_type(inputs[0].type) + ) this_operator.outputs.append(label_variable) this_operator.outputs.append(score_tensor_variable) elif isinstance(model, NearestNeighbors): # For Nearest Neighbours, we have two outputs, one for nearest # neighbours' indices and the other one for distances - index_variable = scope.declare_local_variable( - 'index', Int64TensorType()) + index_variable = scope.declare_local_variable("index", Int64TensorType()) distance_variable = scope.declare_local_variable( - 'distance', guess_tensor_type(inputs[0].type)) + "distance", guess_tensor_type(inputs[0].type) + ) this_operator.outputs.append(index_variable) this_operator.outputs.append(distance_variable) elif type(model) in {GaussianMixture, BayesianGaussianMixture}: - label_variable = scope.declare_local_variable( - 'label', Int64TensorType()) + label_variable = scope.declare_local_variable("label", Int64TensorType()) prob_variable = scope.declare_local_variable( - 'probabilities', guess_tensor_type(inputs[0].type)) + "probabilities", guess_tensor_type(inputs[0].type) + ) this_operator.outputs.append(label_variable) this_operator.outputs.append(prob_variable) options = scope.get_options(model, dict(score_samples=False)) - if options['score_samples']: + if options["score_samples"]: scores_var = scope.declare_local_variable( - 'score_samples', guess_tensor_type(inputs[0].type)) + "score_samples", guess_tensor_type(inputs[0].type) + ) this_operator.outputs.append(scores_var) elif type(model) in {SimpleImputer, Imputer}: if isinstance(inputs[0].type, (Int64TensorType, StringTensorType)): otype = inputs[0].type.__class__() else: otype = guess_tensor_type(inputs[0].type) - variable = scope.declare_local_variable('variable', otype) + variable = scope.declare_local_variable("variable", otype) this_operator.outputs.append(variable) else: - if hasattr(model, 'get_feature_names_out'): + if hasattr(model, "get_feature_names_out"): try: out_names = model.get_feature_names_out() except (AttributeError, ValueError): @@ -237,20 +256,17 @@ def _parse_sklearn_simple_model(scope, model, inputs, custom_parsers=None, out_names = None this_operator.feature_names_out_ = out_names input_type = guess_tensor_type(inputs[0].type) - variable = scope.declare_local_variable( - 'variable', input_type) + variable = scope.declare_local_variable("variable", input_type) this_operator.outputs.append(variable) options = scope.get_options(model, dict(decision_path=False), fail=False) - if options is not None and options['decision_path']: - dec_path = scope.declare_local_variable( - 'decision_path', StringTensorType()) + if options is not None and options["decision_path"]: + dec_path = scope.declare_local_variable("decision_path", StringTensorType()) this_operator.outputs.append(dec_path) options = scope.get_options(model, dict(decision_leaf=False), fail=False) - if options is not None and options['decision_leaf']: - dec_path = scope.declare_local_variable( - 'decision_leaf', Int64TensorType()) + if options is not None and options["decision_leaf"]: + dec_path = scope.declare_local_variable("decision_leaf", Int64TensorType()) this_operator.outputs.append(dec_path) return this_operator.outputs @@ -270,8 +286,7 @@ def _parse_sklearn_pipeline(scope, model, inputs, custom_parsers=None): :return: A list of output variables produced by the input pipeline """ for step in model.steps: - inputs = _parse_sklearn(scope, step[1], inputs, - custom_parsers=custom_parsers) + inputs = _parse_sklearn(scope, step[1], inputs, custom_parsers=custom_parsers) return inputs @@ -287,35 +302,34 @@ def _parse_sklearn_feature_union(scope, model, inputs, custom_parsers=None): # Encode each transform as our IR object for name, transform in model.transformer_list: transformed_result_names.append( - _parse_sklearn( - scope, transform, inputs, - custom_parsers=custom_parsers)[0]) - if (model.transformer_weights is not None and name in - model.transformer_weights): + _parse_sklearn(scope, transform, inputs, custom_parsers=custom_parsers)[0] + ) + if model.transformer_weights is not None and name in model.transformer_weights: transform_result = [transformed_result_names.pop()] # Create a Multiply ONNX node - multiply_operator = scope.declare_local_operator('SklearnMultiply') + multiply_operator = scope.declare_local_operator("SklearnMultiply") multiply_operator.inputs = transform_result multiply_operator.operand = model.transformer_weights[name] multiply_output = scope.declare_local_variable( - 'multiply_output', guess_tensor_type(inputs[0].type)) + "multiply_output", guess_tensor_type(inputs[0].type) + ) multiply_operator.outputs.append(multiply_output) transformed_result_names.append(multiply_operator.outputs[0]) # Create a Concat ONNX node - concat_operator = scope.declare_local_operator('SklearnConcat') + concat_operator = scope.declare_local_operator("SklearnConcat") concat_operator.inputs = transformed_result_names # Declare output name of scikit-learn FeatureUnion union_name = scope.declare_local_variable( - 'union', guess_tensor_type(inputs[0].type)) + "union", guess_tensor_type(inputs[0].type) + ) concat_operator.outputs.append(union_name) return concat_operator.outputs -def _parse_sklearn_column_transformer(scope, model, inputs, - custom_parsers=None): +def _parse_sklearn_column_transformer(scope, model, inputs, custom_parsers=None): """ :param scope: Scope object :param model: A *scikit-learn* *ColumnTransformer* object @@ -326,14 +340,16 @@ def _parse_sklearn_column_transformer(scope, model, inputs, transformed_result_names = [] # Encode each transform as our IR object for name, op, column_indices in model.transformers_: - if op == 'drop': + if op == "drop": continue if isinstance(column_indices, slice): - column_indices = list(range( - column_indices.start - if column_indices.start is not None else 0, - column_indices.stop, column_indices.step - if column_indices.step is not None else 1)) + column_indices = list( + range( + column_indices.start if column_indices.start is not None else 0, + column_indices.stop, + column_indices.step if column_indices.step is not None else 1, + ) + ) elif isinstance(column_indices, (int, str)): column_indices = [column_indices] names = get_column_indices(column_indices, inputs, multiple=True) @@ -355,9 +371,9 @@ def _parse_sklearn_column_transformer(scope, model, inputs, # the default behaviour is to merge columns. ty = transform_inputs[0].type.__class__([None, None]) - conc_op = scope.declare_local_operator('SklearnConcat') + conc_op = scope.declare_local_operator("SklearnConcat") conc_op.inputs = transform_inputs - conc_names = scope.declare_local_variable('merged_columns', ty) + conc_names = scope.declare_local_variable("merged_columns", ty) conc_op.outputs.append(conc_names) transform_inputs = [conc_names] @@ -368,23 +384,27 @@ def _parse_sklearn_column_transformer(scope, model, inputs, elif model_obj == "drop": var_out = None else: - raise RuntimeError("Unknown operator alias " - "'{0}'. These are specified in " - "_supported_operators.py." - "".format(model_obj)) + raise RuntimeError( + "Unknown operator alias " + "'{0}'. These are specified in " + "_supported_operators.py." + "".format(model_obj) + ) else: var_out = _parse_sklearn( - scope, model_obj, - transform_inputs, custom_parsers=custom_parsers)[0] - if (model.transformer_weights is not None and name in - model.transformer_weights): + scope, model_obj, transform_inputs, custom_parsers=custom_parsers + )[0] + if ( + model.transformer_weights is not None + and name in model.transformer_weights + ): # Create a Multiply ONNX node - multiply_operator = scope.declare_local_operator( - 'SklearnMultiply') + multiply_operator = scope.declare_local_operator("SklearnMultiply") multiply_operator.inputs.append(var_out) multiply_operator.operand = model.transformer_weights[name] var_out = scope.declare_local_variable( - 'multiply_output', guess_tensor_type(inputs[0].type)) + "multiply_output", guess_tensor_type(inputs[0].type) + ) multiply_operator.outputs.append(var_out) if var_out: transformed_result_names.append(var_out) @@ -392,12 +412,11 @@ def _parse_sklearn_column_transformer(scope, model, inputs, # Create a Concat ONNX node if len(transformed_result_names) > 1: ty = transformed_result_names[0].type.__class__([None, None]) - concat_operator = scope.declare_local_operator('SklearnConcat') + concat_operator = scope.declare_local_operator("SklearnConcat") concat_operator.inputs = transformed_result_names # Declare output name of scikit-learn ColumnTransformer - transformed_column_name = scope.declare_local_variable( - 'transformed_column', ty) + transformed_column_name = scope.declare_local_variable("transformed_column", ty) concat_operator.outputs.append(transformed_column_name) return concat_operator.outputs return transformed_result_names @@ -407,76 +426,77 @@ def _parse_sklearn_grid_search_cv(scope, model, inputs, custom_parsers=None): options = scope.get_options(model) if options: scope.add_options(id(model.best_estimator_), options) - res = parse_sklearn(scope, model.best_estimator_, inputs, - custom_parsers=custom_parsers) - scope.replace_raw_operator( - model.best_estimator_, model, "SklearnGridSearchCV") + res = parse_sklearn( + scope, model.best_estimator_, inputs, custom_parsers=custom_parsers + ) + scope.replace_raw_operator(model.best_estimator_, model, "SklearnGridSearchCV") return res -def _parse_sklearn_random_trees_embedding(scope, model, inputs, - custom_parsers=None): - res = parse_sklearn(scope, model.base_estimator_, inputs, - custom_parsers=custom_parsers) +def _parse_sklearn_random_trees_embedding(scope, model, inputs, custom_parsers=None): + res = parse_sklearn( + scope, model.base_estimator_, inputs, custom_parsers=custom_parsers + ) if len(res) != 1: - raise RuntimeError( - "A regressor only produces one output not %r." % res) + raise RuntimeError("A regressor only produces one output not %r." % res) scope.replace_raw_operator( - model.base_estimator_, model, "SklearnRandomTreesEmbedding") + model.base_estimator_, model, "SklearnRandomTreesEmbedding" + ) return res -def _apply_zipmap(zipmap_options, scope, model, input_type, - probability_tensor): - if zipmap_options == 'columns': - zipmap_operator = scope.declare_local_operator('SklearnZipMapColumns') +def _apply_zipmap(zipmap_options, scope, model, input_type, probability_tensor): + if zipmap_options == "columns": + zipmap_operator = scope.declare_local_operator("SklearnZipMapColumns") classes = get_label_classes(scope, model) classes_names = get_label_classes(scope, model, node_names=True) else: - zipmap_operator = scope.declare_local_operator('SklearnZipMap') + zipmap_operator = scope.declare_local_operator("SklearnZipMap") classes = get_label_classes(scope, model) zipmap_operator.inputs = probability_tensor label_type = Int64TensorType([None]) - if (hasattr(model, "classes_") and - isinstance(model.classes_, list) and - isinstance(model.classes_[0], np.ndarray)): + if ( + hasattr(model, "classes_") + and isinstance(model.classes_, list) + and isinstance(model.classes_[0], np.ndarray) + ): # multi-label problem pass elif np.issubdtype(classes.dtype, np.floating): classes = np.array(list(map(lambda x: int(x), classes))) if set(map(lambda x: float(x), classes)) != set(model.classes_): - raise RuntimeError("skl2onnx implicitly converts float class " - "labels into integers but at least one label " - "is not an integer. Class labels should " - "be integers or strings.") + raise RuntimeError( + "skl2onnx implicitly converts float class " + "labels into integers but at least one label " + "is not an integer. Class labels should " + "be integers or strings." + ) zipmap_operator.classlabels_int64s = classes elif np.issubdtype(classes.dtype, np.signedinteger): zipmap_operator.classlabels_int64s = [int(i) for i in classes] - elif (np.issubdtype(classes.dtype, np.unsignedinteger) or - classes.dtype == np.bool_): + elif np.issubdtype(classes.dtype, np.unsignedinteger) or classes.dtype == np.bool_: zipmap_operator.classlabels_int64s = [int(i) for i in classes] else: - classes = np.array([s.encode('utf-8') for s in classes]) + classes = np.array([s.encode("utf-8") for s in classes]) zipmap_operator.classlabels_strings = classes label_type = StringTensorType([None]) - zip_label = scope.declare_local_variable('output_label', label_type) + zip_label = scope.declare_local_variable("output_label", label_type) if len(probability_tensor) == 2: zipmap_operator.outputs.append(zip_label) - if zipmap_options == 'columns': + if zipmap_options == "columns": prob_type = probability_tensor[-1].type for cl in classes_names: output_cl = scope.declare_local_variable(cl, prob_type.__class__()) zipmap_operator.outputs.append(output_cl) else: zip_probability = scope.declare_local_variable( - 'output_probability', - SequenceType( - DictionaryType( - label_type, guess_tensor_type(input_type)))) + "output_probability", + SequenceType(DictionaryType(label_type, guess_tensor_type(input_type))), + ) zipmap_operator.outputs.append(zip_probability) zipmap_operator.init_status(is_evaluated=True) @@ -485,65 +505,68 @@ def _apply_zipmap(zipmap_options, scope, model, input_type, def _parse_sklearn_classifier(scope, model, inputs, custom_parsers=None): options = scope.get_options(model, dict(zipmap=True)) - no_zipmap = ( - (isinstance(options['zipmap'], bool) and not options['zipmap']) or - (model.__class__ in [NuSVC, SVC] and not model.probability)) + no_zipmap = (isinstance(options["zipmap"], bool) and not options["zipmap"]) or ( + model.__class__ in [NuSVC, SVC] and not model.probability + ) probability_tensor = _parse_sklearn_simple_model( - scope, model, inputs, custom_parsers=custom_parsers) + scope, model, inputs, custom_parsers=custom_parsers + ) if no_zipmap: - if options.get('output_class_labels', False): + if options.get("output_class_labels", False): if not hasattr(model, "classes_"): raise RuntimeError( "Model type %r has no attribute 'classes_'. " "Option 'output_class_labels' is invalid or a new parser " - "must be used." % model.__class__.__name__) + "must be used." % model.__class__.__name__ + ) - clout = scope.declare_local_operator('SklearnClassLabels') + clout = scope.declare_local_operator("SklearnClassLabels") clout.classes = get_label_classes(scope, model) if model.classes_.dtype in (np.int32, np.int64, np.bool_): ctype = Int64TensorType else: ctype = StringTensorType label_type = ctype(clout.classes.shape) - class_labels = scope.declare_local_variable( - 'class_labels', label_type) + class_labels = scope.declare_local_variable("class_labels", label_type) clout.outputs.append(class_labels) outputs = list(probability_tensor) outputs.append(class_labels) return outputs return probability_tensor - if options.get('output_class_labels', False): + if options.get("output_class_labels", False): raise RuntimeError( - "Option 'output_class_labels' is not compatible with option " - "'zipmap'.") + "Option 'output_class_labels' is not compatible with option " "'zipmap'." + ) return _apply_zipmap( - options['zipmap'], scope, model, inputs[0].type, probability_tensor) + options["zipmap"], scope, model, inputs[0].type, probability_tensor + ) -def _parse_sklearn_multi_output_classifier(scope, model, inputs, - custom_parsers=None): +def _parse_sklearn_multi_output_classifier(scope, model, inputs, custom_parsers=None): options = scope.get_options(model, dict(zipmap=True)) - if options['zipmap']: + if options["zipmap"]: warnings.warn( "Option zipmap is ignored for model %r. " "Set option zipmap to False to " "remove this message." % type(model), - UserWarning) + UserWarning, + ) alias = _get_sklearn_operator_name(type(model)) this_operator = scope.declare_local_operator(alias, model) this_operator.inputs = inputs - if hasattr(model, 'classes_'): + if hasattr(model, "classes_"): classes = model.classes_ else: classes = [get_label_classes(scope, m) for m in model.estimators_] if len(set(cl.dtype for cl in classes)) != 1: raise RuntimeError( "Class labels may have only one type %r." - "" % set(cl.dtype for cl in classes)) + "" % set(cl.dtype for cl in classes) + ) if classes[0].dtype in (np.int32, np.int64, np.bool_): ctype = Int64TensorType else: @@ -551,18 +574,19 @@ def _parse_sklearn_multi_output_classifier(scope, model, inputs, label = scope.declare_local_variable("label", ctype()) proba = scope.declare_local_variable( - "probabilities", SequenceType(guess_tensor_type(inputs[0].type))) + "probabilities", SequenceType(guess_tensor_type(inputs[0].type)) + ) this_operator.outputs.append(label) this_operator.outputs.append(proba) options = scope.get_options(model) - if options.get('output_class_labels', False): - clout = scope.declare_local_operator('SklearnClassLabels') + if options.get("output_class_labels", False): + clout = scope.declare_local_operator("SklearnClassLabels") clout.is_multi_output = True clout.classes = classes class_labels = scope.declare_local_variable( - "class_labels", - SequenceType(ctype())) + "class_labels", SequenceType(ctype()) + ) clout.outputs.append(class_labels) return list(this_operator.outputs) + [class_labels] @@ -570,24 +594,26 @@ def _parse_sklearn_multi_output_classifier(scope, model, inputs, def _parse_sklearn_gaussian_process(scope, model, inputs, custom_parsers=None): - options = scope.get_options( - model, dict(return_cov=False, return_std=False)) - if options['return_std'] and options['return_cov']: + options = scope.get_options(model, dict(return_cov=False, return_std=False)) + if options["return_std"] and options["return_cov"]: raise RuntimeError( "Not returning standard deviation of predictions when " - "returning full covariance.") + "returning full covariance." + ) alias = _get_sklearn_operator_name(type(model)) this_operator = scope.declare_local_operator(alias, model) mean_tensor = scope.declare_local_variable( - "GPmean", guess_tensor_type(inputs[0].type)) + "GPmean", guess_tensor_type(inputs[0].type) + ) this_operator.inputs = inputs this_operator.outputs.append(mean_tensor) - if options['return_std'] or options['return_cov']: + if options["return_std"] or options["return_cov"]: # covariance or standard deviation covstd_tensor = scope.declare_local_variable( - 'GPcovstd', guess_tensor_type(inputs[0].type)) + "GPcovstd", guess_tensor_type(inputs[0].type) + ) this_operator.outputs.append(covstd_tensor) return this_operator.outputs @@ -597,14 +623,16 @@ def _parse_sklearn_bayesian_ridge(scope, model, inputs, custom_parsers=None): alias = _get_sklearn_operator_name(type(model)) this_operator = scope.declare_local_operator(alias, model) mean_tensor = scope.declare_local_variable( - "variable", guess_tensor_type(inputs[0].type)) + "variable", guess_tensor_type(inputs[0].type) + ) this_operator.inputs = inputs this_operator.outputs.append(mean_tensor) - if options['return_std']: + if options["return_std"]: # covariance or standard deviation covstd_tensor = scope.declare_local_variable( - 'std', guess_tensor_type(inputs[0].type)) + "std", guess_tensor_type(inputs[0].type) + ) this_operator.outputs.append(covstd_tensor) return this_operator.outputs @@ -629,28 +657,31 @@ def _parse_sklearn(scope, model, inputs, custom_parsers=None, alias=None): for i, inp in enumerate(inputs): if not isinstance(inp, Variable): raise TypeError( - "Unexpected input type %r for input %r: %r." % ( - type(inp), i, inp)) + "Unexpected input type %r for input %r: %r." % (type(inp), i, inp) + ) if alias is not None: - outputs = _parse_sklearn_simple_model(scope, model, inputs, - custom_parsers=custom_parsers, - alias=alias) + outputs = _parse_sklearn_simple_model( + scope, model, inputs, custom_parsers=custom_parsers, alias=alias + ) return outputs tmodel = type(model) if custom_parsers is not None and tmodel in custom_parsers: - outputs = custom_parsers[tmodel](scope, model, inputs, - custom_parsers=custom_parsers) + outputs = custom_parsers[tmodel]( + scope, model, inputs, custom_parsers=custom_parsers + ) elif tmodel in sklearn_parsers_map: - outputs = sklearn_parsers_map[tmodel](scope, model, inputs, - custom_parsers=custom_parsers) + outputs = sklearn_parsers_map[tmodel]( + scope, model, inputs, custom_parsers=custom_parsers + ) elif isinstance(model, pipeline.Pipeline): parser = sklearn_parsers_map[pipeline.Pipeline] outputs = parser(scope, model, inputs, custom_parsers=custom_parsers) else: - outputs = _parse_sklearn_simple_model(scope, model, inputs, - custom_parsers=custom_parsers) + outputs = _parse_sklearn_simple_model( + scope, model, inputs, custom_parsers=custom_parsers + ) return outputs @@ -681,23 +712,27 @@ def parse_sklearn(scope, model, inputs, custom_parsers=None, final_types=None): raise RuntimeError( "Unable to add duplicated output '{}', '{}'. " "Output and input must have different names." - "".format(var.onnx_name, name)) + "".format(var.onnx_name, name) + ) outputs.append(var) hidden_outputs = _parse_sklearn( - scope, model, inputs, custom_parsers=custom_parsers) + scope, model, inputs, custom_parsers=custom_parsers + ) if len(hidden_outputs) != len(outputs): raise RuntimeError( "Number of declared outputs is unexpected, declared '{}' " "found '{}'.".format( ", ".join(_.onnx_name for _ in outputs), - ", ".join(_.onnx_name for _ in hidden_outputs))) + ", ".join(_.onnx_name for _ in hidden_outputs), + ) + ) for h, o in zip(hidden_outputs, outputs): if o.type is None: - iop = scope.declare_local_operator('SklearnIdentity') + iop = scope.declare_local_operator("SklearnIdentity") else: - iop = scope.declare_local_operator('SklearnCast') + iop = scope.declare_local_operator("SklearnCast") iop.inputs = [h] iop.outputs = [o] h.init_status(is_leaf=False) @@ -706,20 +741,25 @@ def parse_sklearn(scope, model, inputs, custom_parsers=None, final_types=None): o.type = h.type return outputs - res = _parse_sklearn( - scope, model, inputs, custom_parsers=custom_parsers) + res = _parse_sklearn(scope, model, inputs, custom_parsers=custom_parsers) for r in res: r.init_status(is_leaf=True) return res -def parse_sklearn_model(model, initial_types=None, target_opset=None, - custom_conversion_functions=None, - custom_shape_calculators=None, - custom_parsers=None, - options=None, white_op=None, - black_op=None, final_types=None, - naming=None): +def parse_sklearn_model( + model, + initial_types=None, + target_opset=None, + custom_conversion_functions=None, + custom_shape_calculators=None, + custom_parsers=None, + options=None, + white_op=None, + black_op=None, + final_types=None, + naming=None, +): """ Puts *scikit-learn* object into an abstract container so that our framework can work seamlessly on models created @@ -761,21 +801,26 @@ def parse_sklearn_model(model, initial_types=None, target_opset=None, options = _process_options(model, options) raw_model_container = SklearnModelContainerNode( - model, white_op=white_op, black_op=black_op) + model, white_op=white_op, black_op=black_op + ) # Declare a computational graph. It will become a representation of # the input scikit-learn model after parsing. topology = Topology( - raw_model_container, initial_types=initial_types, + raw_model_container, + initial_types=initial_types, target_opset=target_opset, custom_conversion_functions=custom_conversion_functions, custom_shape_calculators=custom_shape_calculators, registered_models=dict( - conv=_converter_pool, shape=_shape_calculator_pool, - aliases=sklearn_operator_name_map)) + conv=_converter_pool, + shape=_shape_calculator_pool, + aliases=sklearn_operator_name_map, + ), + ) # Declare an object to provide variables' and operators' naming mechanism. - scope = topology.declare_scope('__root__', options=options, naming=naming) + scope = topology.declare_scope("__root__", options=options, naming=naming) inputs = scope.input_variables # The object raw_model_container is a part of the topology @@ -786,9 +831,9 @@ def parse_sklearn_model(model, initial_types=None, target_opset=None, raw_model_container.add_input(variable) # Parse the input scikit-learn model as a Topology object. - outputs = parse_sklearn(scope, model, inputs, - custom_parsers=custom_parsers, - final_types=final_types) + outputs = parse_sklearn( + scope, model, inputs, custom_parsers=custom_parsers, final_types=final_types + ) # The object raw_model_container is a part of the topology we're # going to return. We use it to store the outputs of the @@ -796,7 +841,8 @@ def parse_sklearn_model(model, initial_types=None, target_opset=None, if final_types is not None and len(final_types) != len(outputs): raise RuntimeError( "Unexpected number of outputs, expected %d, got %d " - "after parsing." % (len(final_types), len(outputs))) + "after parsing." % (len(final_types), len(outputs)) + ) return topology diff --git a/skl2onnx/_supported_operators.py b/skl2onnx/_supported_operators.py index 1653d076b..b4a5a7278 100644 --- a/skl2onnx/_supported_operators.py +++ b/skl2onnx/_supported_operators.py @@ -9,11 +9,13 @@ # Linear classifiers from sklearn.linear_model import ( - LogisticRegression, LogisticRegressionCV, + LogisticRegression, + LogisticRegressionCV, PassiveAggressiveClassifier, Perceptron, - RidgeClassifier, RidgeClassifierCV, - SGDClassifier + RidgeClassifier, + RidgeClassifierCV, + SGDClassifier, ) from sklearn.svm import LinearSVC, OneClassSVM @@ -21,22 +23,31 @@ from sklearn.linear_model import ( ARDRegression, BayesianRidge, - ElasticNet, ElasticNetCV, + ElasticNet, + ElasticNetCV, HuberRegressor, - Lars, LarsCV, - Lasso, LassoCV, - LassoLars, LassoLarsCV, + Lars, + LarsCV, + Lasso, + LassoCV, + LassoLars, + LassoLarsCV, LassoLarsIC, LinearRegression, - MultiTaskElasticNet, MultiTaskElasticNetCV, - MultiTaskLasso, MultiTaskLassoCV, - OrthogonalMatchingPursuit, OrthogonalMatchingPursuitCV, + MultiTaskElasticNet, + MultiTaskElasticNetCV, + MultiTaskLasso, + MultiTaskLassoCV, + OrthogonalMatchingPursuit, + OrthogonalMatchingPursuitCV, PassiveAggressiveRegressor, RANSACRegressor, - Ridge, RidgeCV, + Ridge, + RidgeCV, SGDRegressor, - TheilSenRegressor + TheilSenRegressor, ) + try: from sklearn.linear_model import GammaRegressor except ImportError: @@ -66,31 +77,36 @@ from sklearn.svm import LinearSVR from sklearn.discriminant_analysis import ( LinearDiscriminantAnalysis, - QuadraticDiscriminantAnalysis + QuadraticDiscriminantAnalysis, ) # Mixture -from sklearn.mixture import ( - GaussianMixture, BayesianGaussianMixture -) +from sklearn.mixture import GaussianMixture, BayesianGaussianMixture # Multi-class from sklearn.multiclass import ( _ConstantPredictor, OneVsRestClassifier, - OneVsOneClassifier + OneVsOneClassifier, ) # Tree-based models from sklearn.ensemble import ( - AdaBoostClassifier, AdaBoostRegressor, - BaggingClassifier, BaggingRegressor, - ExtraTreesClassifier, ExtraTreesRegressor, - GradientBoostingClassifier, GradientBoostingRegressor, + AdaBoostClassifier, + AdaBoostRegressor, + BaggingClassifier, + BaggingRegressor, + ExtraTreesClassifier, + ExtraTreesRegressor, + GradientBoostingClassifier, + GradientBoostingRegressor, IsolationForest, - RandomForestClassifier, RandomForestRegressor, RandomTreesEmbedding, - VotingClassifier + RandomForestClassifier, + RandomForestRegressor, + RandomTreesEmbedding, + VotingClassifier, ) + try: from sklearn.ensemble import VotingRegressor except ImportError: @@ -103,14 +119,14 @@ StackingClassifier = None StackingRegressor = None from sklearn.tree import ( - DecisionTreeClassifier, DecisionTreeRegressor, - ExtraTreeClassifier, ExtraTreeRegressor + DecisionTreeClassifier, + DecisionTreeRegressor, + ExtraTreeClassifier, + ExtraTreeRegressor, ) # Gaussian processes -from sklearn.gaussian_process import ( - GaussianProcessClassifier, GaussianProcessRegressor -) +from sklearn.gaussian_process import GaussianProcessClassifier, GaussianProcessRegressor # GridSearchCV from sklearn.model_selection import GridSearchCV @@ -130,6 +146,7 @@ RadiusNeighborsClassifier, RadiusNeighborsRegressor, ) + try: from sklearn.neighbors import ( KNeighborsTransformer, @@ -146,6 +163,7 @@ GaussianNB, MultinomialNB, ) + try: from sklearn.naive_bayes import CategoricalNB except ImportError: @@ -176,14 +194,23 @@ FeatureHasher, ) from sklearn.feature_extraction.text import ( - CountVectorizer, TfidfTransformer, TfidfVectorizer + CountVectorizer, + TfidfTransformer, + TfidfVectorizer, ) from sklearn.feature_selection import ( - GenericUnivariateSelect, RFE, RFECV, - SelectFdr, SelectFpr, SelectFromModel, - SelectFwe, SelectKBest, SelectPercentile, - VarianceThreshold + GenericUnivariateSelect, + RFE, + RFECV, + SelectFdr, + SelectFpr, + SelectFromModel, + SelectFwe, + SelectKBest, + SelectPercentile, + VarianceThreshold, ) + try: # 0.20 from sklearn.impute import SimpleImputer @@ -191,6 +218,7 @@ # 0.19 from sklearn.preprocessing import Imputer as SimpleImputer from sklearn.preprocessing import Binarizer + try: from sklearn.preprocessing import Imputer except ImportError: @@ -207,9 +235,12 @@ # not available in 0.19 KBinsDiscretizer = None from sklearn.preprocessing import ( - LabelBinarizer, LabelEncoder, - Normalizer, OneHotEncoder + LabelBinarizer, + LabelEncoder, + Normalizer, + OneHotEncoder, ) + try: from sklearn.preprocessing import OrdinalEncoder except ImportError: @@ -222,7 +253,7 @@ MinMaxScaler, PolynomialFeatures, RobustScaler, - StandardScaler + StandardScaler, ) try: @@ -234,7 +265,7 @@ try: from sklearn.ensemble import ( HistGradientBoostingClassifier, - HistGradientBoostingRegressor + HistGradientBoostingRegressor, ) except ImportError: # Second verification as these models still require @@ -242,7 +273,7 @@ try: from sklearn.ensemble._hist_gradient_boosting.gradient_boosting import ( # noqa HistGradientBoostingClassifier, - HistGradientBoostingRegressor + HistGradientBoostingRegressor, ) except ImportError: HistGradientBoostingRegressor = None @@ -263,7 +294,7 @@ from .common._registration import register_converter, register_shape_calculator -logger = logging.getLogger('skl2onnx') +logger = logging.getLogger("skl2onnx") # In most cases, scikit-learn operator produces only one output. # However, each classifier has basically two outputs; one is the @@ -272,40 +303,45 @@ # classifiers. In the parsing stage, we produce two outputs for objects # included in the following list and one output for everything not in # the list. -sklearn_classifier_list = list(filter(lambda m: m is not None, [ - _ConstantPredictor, - AdaBoostClassifier, - BaggingClassifier, - BernoulliNB, - CategoricalNB, - CalibratedClassifierCV, - ComplementNB, - DecisionTreeClassifier, - ExtraTreeClassifier, - ExtraTreesClassifier, - GaussianNB, - GaussianProcessClassifier, - GradientBoostingClassifier, - HistGradientBoostingClassifier, - KNeighborsClassifier, - LinearDiscriminantAnalysis, - LinearSVC, - LogisticRegression, - LogisticRegressionCV, - MLPClassifier, - MultinomialNB, - NuSVC, - OneVsOneClassifier, - OneVsRestClassifier, - PassiveAggressiveClassifier, - Perceptron, - QuadraticDiscriminantAnalysis, - RandomForestClassifier, - SGDClassifier, - StackingClassifier, - SVC, - VotingClassifier, -])) +sklearn_classifier_list = list( + filter( + lambda m: m is not None, + [ + _ConstantPredictor, + AdaBoostClassifier, + BaggingClassifier, + BernoulliNB, + CategoricalNB, + CalibratedClassifierCV, + ComplementNB, + DecisionTreeClassifier, + ExtraTreeClassifier, + ExtraTreesClassifier, + GaussianNB, + GaussianProcessClassifier, + GradientBoostingClassifier, + HistGradientBoostingClassifier, + KNeighborsClassifier, + LinearDiscriminantAnalysis, + LinearSVC, + LogisticRegression, + LogisticRegressionCV, + MLPClassifier, + MultinomialNB, + NuSVC, + OneVsOneClassifier, + OneVsRestClassifier, + PassiveAggressiveClassifier, + Perceptron, + QuadraticDiscriminantAnalysis, + RandomForestClassifier, + SGDClassifier, + StackingClassifier, + SVC, + VotingClassifier, + ], + ) +) # Clustering algorithms: produces two outputs, label and score for # each cluster in most cases. @@ -320,159 +356,166 @@ # scikit-learn models share a single name, it means their are # equivalent in terms of conversion. def build_sklearn_operator_name_map(): - res = {k: "Sklearn" + k.__name__ for k in [ - _ConstantPredictor, - AdaBoostClassifier, - AdaBoostRegressor, - BaggingClassifier, - BaggingRegressor, - BayesianGaussianMixture, - BayesianRidge, - BernoulliNB, - Binarizer, - CalibratedClassifierCV, - CategoricalNB, - CastRegressor, - CastTransformer, - ColumnTransformer, - ComplementNB, - CountVectorizer, - DictVectorizer, - DecisionTreeClassifier, - DecisionTreeRegressor, - ExtraTreeClassifier, - ExtraTreeRegressor, - ExtraTreesClassifier, - ExtraTreesRegressor, - FeatureHasher, - FeatureUnion, - FunctionTransformer, - GammaRegressor, - GaussianNB, - GaussianMixture, - GaussianProcessClassifier, - GaussianProcessRegressor, - GaussianRandomProjection, - GenericUnivariateSelect, - GradientBoostingClassifier, - GradientBoostingRegressor, - HistGradientBoostingClassifier, - HistGradientBoostingRegressor, - Imputer, - IncrementalPCA, - IsolationForest, - KMeans, - LabelBinarizer, - LabelEncoder, - LinearRegression, - LinearSVC, - LinearSVR, - LocalOutlierFactor, - MaxAbsScaler, - MiniBatchKMeans, - MinMaxScaler, - MLPClassifier, - MLPRegressor, - MultinomialNB, - MultiOutputClassifier, - MultiOutputRegressor, - KBinsDiscretizer, - KernelCenterer, - KernelPCA, - KNeighborsClassifier, - KNeighborsRegressor, - KNeighborsTransformer, - KNNImputer, - NearestNeighbors, - NeighborhoodComponentsAnalysis, - Normalizer, - OneClassSVM, - OneHotEncoder, - OneVsOneClassifier, - OneVsRestClassifier, - OrdinalEncoder, - PCA, - PLSRegression, - Pipeline, - PoissonRegressor, - PolynomialFeatures, - PowerTransformer, - QuadraticDiscriminantAnalysis, - RadiusNeighborsClassifier, - RadiusNeighborsRegressor, - RandomForestClassifier, - RandomForestRegressor, - RandomTreesEmbedding, - RANSACRegressor, - ReplaceTransformer, - RFE, - RFECV, - RobustScaler, - SelectFdr, - SelectFpr, - SelectFromModel, - SelectFwe, - SelectKBest, - SelectPercentile, - SGDClassifier, - SGDOneClassSVM, - SimpleImputer, - StackingClassifier, - StackingRegressor, - SVC, - SVR, - TfidfVectorizer, - TfidfTransformer, - TruncatedSVD, - TweedieRegressor, - VarianceThreshold, - VotingClassifier, - VotingRegressor, - ] if k is not None} - res.update({ - ARDRegression: 'SklearnLinearRegressor', - ElasticNet: 'SklearnLinearRegressor', - ElasticNetCV: 'SklearnLinearRegressor', - GridSearchCV: 'SklearnGridSearchCV', - HuberRegressor: 'SklearnLinearRegressor', - LinearRegression: 'SklearnLinearRegressor', - Lars: 'SklearnLinearRegressor', - LarsCV: 'SklearnLinearRegressor', - Lasso: 'SklearnLinearRegressor', - LassoCV: 'SklearnLinearRegressor', - LassoLars: 'SklearnLinearRegressor', - LassoLarsCV: 'SklearnLinearRegressor', - LassoLarsIC: 'SklearnLinearRegressor', - LinearDiscriminantAnalysis: 'SklearnLinearClassifier', - LogisticRegression: 'SklearnLinearClassifier', - LogisticRegressionCV: 'SklearnLinearClassifier', - MultiTaskElasticNet: 'SklearnLinearRegressor', - MultiTaskElasticNetCV: 'SklearnLinearRegressor', - MultiTaskLasso: 'SklearnLinearRegressor', - MultiTaskLassoCV: 'SklearnLinearRegressor', - NuSVC: 'SklearnSVC', - NuSVR: 'SklearnSVR', - OrthogonalMatchingPursuit: 'SklearnLinearRegressor', - OrthogonalMatchingPursuitCV: 'SklearnLinearRegressor', - PassiveAggressiveClassifier: 'SklearnSGDClassifier', - PassiveAggressiveRegressor: 'SklearnLinearRegressor', - Perceptron: 'SklearnSGDClassifier', - QuantileRegressor: 'SklearnLinearRegressor', - Ridge: 'SklearnLinearRegressor', - RidgeCV: 'SklearnLinearRegressor', - RidgeClassifier: 'SklearnLinearClassifier', - RidgeClassifierCV: 'SklearnLinearClassifier', - SGDRegressor: 'SklearnLinearRegressor', - StandardScaler: 'SklearnScaler', - TheilSenRegressor: 'SklearnLinearRegressor', - }) + res = { + k: "Sklearn" + k.__name__ + for k in [ + _ConstantPredictor, + AdaBoostClassifier, + AdaBoostRegressor, + BaggingClassifier, + BaggingRegressor, + BayesianGaussianMixture, + BayesianRidge, + BernoulliNB, + Binarizer, + CalibratedClassifierCV, + CategoricalNB, + CastRegressor, + CastTransformer, + ColumnTransformer, + ComplementNB, + CountVectorizer, + DictVectorizer, + DecisionTreeClassifier, + DecisionTreeRegressor, + ExtraTreeClassifier, + ExtraTreeRegressor, + ExtraTreesClassifier, + ExtraTreesRegressor, + FeatureHasher, + FeatureUnion, + FunctionTransformer, + GammaRegressor, + GaussianNB, + GaussianMixture, + GaussianProcessClassifier, + GaussianProcessRegressor, + GaussianRandomProjection, + GenericUnivariateSelect, + GradientBoostingClassifier, + GradientBoostingRegressor, + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, + Imputer, + IncrementalPCA, + IsolationForest, + KMeans, + LabelBinarizer, + LabelEncoder, + LinearRegression, + LinearSVC, + LinearSVR, + LocalOutlierFactor, + MaxAbsScaler, + MiniBatchKMeans, + MinMaxScaler, + MLPClassifier, + MLPRegressor, + MultinomialNB, + MultiOutputClassifier, + MultiOutputRegressor, + KBinsDiscretizer, + KernelCenterer, + KernelPCA, + KNeighborsClassifier, + KNeighborsRegressor, + KNeighborsTransformer, + KNNImputer, + NearestNeighbors, + NeighborhoodComponentsAnalysis, + Normalizer, + OneClassSVM, + OneHotEncoder, + OneVsOneClassifier, + OneVsRestClassifier, + OrdinalEncoder, + PCA, + PLSRegression, + Pipeline, + PoissonRegressor, + PolynomialFeatures, + PowerTransformer, + QuadraticDiscriminantAnalysis, + RadiusNeighborsClassifier, + RadiusNeighborsRegressor, + RandomForestClassifier, + RandomForestRegressor, + RandomTreesEmbedding, + RANSACRegressor, + ReplaceTransformer, + RFE, + RFECV, + RobustScaler, + SelectFdr, + SelectFpr, + SelectFromModel, + SelectFwe, + SelectKBest, + SelectPercentile, + SGDClassifier, + SGDOneClassSVM, + SimpleImputer, + StackingClassifier, + StackingRegressor, + SVC, + SVR, + TfidfVectorizer, + TfidfTransformer, + TruncatedSVD, + TweedieRegressor, + VarianceThreshold, + VotingClassifier, + VotingRegressor, + ] + if k is not None + } + res.update( + { + ARDRegression: "SklearnLinearRegressor", + ElasticNet: "SklearnLinearRegressor", + ElasticNetCV: "SklearnLinearRegressor", + GridSearchCV: "SklearnGridSearchCV", + HuberRegressor: "SklearnLinearRegressor", + LinearRegression: "SklearnLinearRegressor", + Lars: "SklearnLinearRegressor", + LarsCV: "SklearnLinearRegressor", + Lasso: "SklearnLinearRegressor", + LassoCV: "SklearnLinearRegressor", + LassoLars: "SklearnLinearRegressor", + LassoLarsCV: "SklearnLinearRegressor", + LassoLarsIC: "SklearnLinearRegressor", + LinearDiscriminantAnalysis: "SklearnLinearClassifier", + LogisticRegression: "SklearnLinearClassifier", + LogisticRegressionCV: "SklearnLinearClassifier", + MultiTaskElasticNet: "SklearnLinearRegressor", + MultiTaskElasticNetCV: "SklearnLinearRegressor", + MultiTaskLasso: "SklearnLinearRegressor", + MultiTaskLassoCV: "SklearnLinearRegressor", + NuSVC: "SklearnSVC", + NuSVR: "SklearnSVR", + OrthogonalMatchingPursuit: "SklearnLinearRegressor", + OrthogonalMatchingPursuitCV: "SklearnLinearRegressor", + PassiveAggressiveClassifier: "SklearnSGDClassifier", + PassiveAggressiveRegressor: "SklearnLinearRegressor", + Perceptron: "SklearnSGDClassifier", + QuantileRegressor: "SklearnLinearRegressor", + Ridge: "SklearnLinearRegressor", + RidgeCV: "SklearnLinearRegressor", + RidgeClassifier: "SklearnLinearClassifier", + RidgeClassifierCV: "SklearnLinearClassifier", + SGDRegressor: "SklearnLinearRegressor", + StandardScaler: "SklearnScaler", + TheilSenRegressor: "SklearnLinearRegressor", + } + ) if None in res: del res[None] return res -def update_registered_converter(model, alias, shape_fct, convert_fct, - overwrite=True, parser=None, options=None): +def update_registered_converter( + model, alias, shape_fct, convert_fct, overwrite=True, parser=None, options=None +): """ Registers or updates a converter for a new model so that it can be converted when inserted in a *scikit-learn* pipeline. @@ -510,21 +553,27 @@ def update_registered_converter(model, alias, shape_fct, convert_fct, must declare this option to let the default parser automatically handle that option. """ # noqa - if (not overwrite and model in sklearn_operator_name_map - and alias != sklearn_operator_name_map[model]): - warnings.warn("Model '{0}' was already registered under alias " - "'{1}'.".format(model, sklearn_operator_name_map[model])) + if ( + not overwrite + and model in sklearn_operator_name_map + and alias != sklearn_operator_name_map[model] + ): + warnings.warn( + "Model '{0}' was already registered under alias " + "'{1}'.".format(model, sklearn_operator_name_map[model]) + ) sklearn_operator_name_map[model] = alias - register_converter(alias, convert_fct, overwrite=overwrite, - options=options) + register_converter(alias, convert_fct, overwrite=overwrite, options=options) register_shape_calculator(alias, shape_fct, overwrite=overwrite) if parser is not None: from ._parse import update_registered_parser + update_registered_parser(model, parser) - elif (options is not None and - ('zipmap' in options or 'output_class_labels' in options)): - from ._parse import ( - _parse_sklearn_classifier, update_registered_parser) + elif options is not None and ( + "zipmap" in options or "output_class_labels" in options + ): + from ._parse import _parse_sklearn_classifier, update_registered_parser + update_registered_parser(model, _parse_sklearn_classifier) @@ -542,7 +591,7 @@ def _get_sklearn_operator_name(model_type): alias = None else: alias = sklearn_operator_name_map[model_type] - logger.debug('[parsing] found alias=%r for type=%r.', alias, model_type) + logger.debug("[parsing] found alias=%r for type=%r.", alias, model_type) return alias @@ -557,9 +606,11 @@ def get_model_alias(model_type): """ res = _get_sklearn_operator_name(model_type) if res is None: - raise RuntimeError("Unable to find alias for model '{}'. " - "The converter is likely missing." - "".format(model_type)) + raise RuntimeError( + "Unable to find alias for model '{}'. " + "The converter is likely missing." + "".format(model_type) + ) return res diff --git a/skl2onnx/algebra/automation.py b/skl2onnx/algebra/automation.py index 5bcd08053..40220d97e 100644 --- a/skl2onnx/algebra/automation.py +++ b/skl2onnx/algebra/automation.py @@ -10,23 +10,25 @@ def _get_doc_template(): try: from jinja2 import Template except ImportError: + class Template: def __init__(self, *args): pass def render(self, **context): - schemas = context['schemas'] + schemas = context["schemas"] rows = [] for sch in schemas: - doc = sch.doc or '' + doc = sch.doc or "" name = sch.name if name is None: raise RuntimeError("An operator must have a name.") - rows.extend([name, "=" * len(name), - "", doc, ""]) + rows.extend([name, "=" * len(name), "", doc, ""]) return "\n".join(rows) - return Template(textwrap.dedent(""" + return Template( + textwrap.dedent( + """ {% for sch in schemas %} {{format_name_with_domain(sch)}} @@ -96,7 +98,9 @@ def render(self, **context): {% endif %} {% endfor %} - """)) + """ + ) + ) _template_operator = _get_doc_template() @@ -106,8 +110,9 @@ def get_domain_list(): """ Returns the list of available domains. """ - return list(sorted(set(map(lambda s: s.domain, - onnx.defs.get_all_schemas_with_history())))) + return list( + sorted(set(map(lambda s: s.domain, onnx.defs.get_all_schemas_with_history()))) + ) def get_rst_doc(op_name=None): @@ -124,16 +129,19 @@ def get_rst_doc(op_name=None): if op_name is None: schemas = onnx.defs.get_all_schemas_with_history() elif isinstance(op_name, str): - schemas = [schema for schema in onnx.defs.get_all_schemas_with_history( - ) if schema.name == op_name] + schemas = [ + schema + for schema in onnx.defs.get_all_schemas_with_history() + if schema.name == op_name + ] if len(schemas) > 1: raise RuntimeError( - "Multiple operators have the same name '{}'.".format(op_name)) + "Multiple operators have the same name '{}'.".format(op_name) + ) elif not isinstance(op_name, list): schemas = [op_name] if len(schemas) == 0: - raise ValueError( - "Unable to find any operator with name '{}'.".format(op_name)) + raise ValueError("Unable to find any operator with name '{}'.".format(op_name)) # from onnx.backend.sample.ops import collect_sample_implementations # from onnx.backend.test.case import collect_snippets @@ -141,7 +149,7 @@ def get_rst_doc(op_name=None): # SAMPLE_IMPLEMENTATIONS = collect_sample_implementations() def format_name_with_domain(sch): if sch.domain: - return '{} ({})'.format(sch.name, sch.domain) + return "{} ({})".format(sch.name, sch.domain) return sch.name def get_type_str(obj): @@ -161,11 +169,11 @@ def get_is_homogeneous(obj): def format_option(obj): opts = [] if OpSchema.FormalParameterOption.Optional == obj.option: - opts.append('optional') + opts.append("optional") elif OpSchema.FormalParameterOption.Variadic == obj.option: - opts.append('variadic') + opts.append("variadic") if get_is_homogeneous(obj): - opts.append('heterogeneous') + opts.append("heterogeneous") if opts: return " (%s)" % ", ".join(opts) return "" @@ -187,28 +195,29 @@ def getname(obj, i): def process_documentation(doc): if doc is None: - doc = '' + doc = "" doc = textwrap.dedent(doc) main_docs_url = "https://github.com/onnx/onnx/blob/master/" rep = { - '[the doc](IR.md)': '`ONNX <{0}docs/IR.md>`_', - '[the doc](Broadcasting.md)': - '`Broadcasting in ONNX <{0}docs/Broadcasting.md>`_', - '
': '', - '
': '', - '
': '* ', - '
': ' ', - '': '', - '
': '', - '': '``', - '': '``', - '
': '\n', + "[the doc](IR.md)": "`ONNX <{0}docs/IR.md>`_", + "[the doc](Broadcasting.md)": ( + "`Broadcasting in ONNX <{0}docs/Broadcasting.md>`_" + ), + "
": "", + "
": "", + "
": "* ", + "
": " ", + "": "", + "
": "", + "": "``", + "": "``", + "
": "\n", } for k, v in rep.items(): doc = doc.replace(k, v.format(main_docs_url)) move = 0 lines = [] - for line in doc.split('\n'): + for line in doc.split("\n"): if line.startswith("```"): if move > 0: move -= 4 @@ -228,21 +237,28 @@ def build_doc_url(sch): doc_url += "-ml" doc_url += ".md" doc_url += "#" - if sch.domain not in (None, '', 'ai.onnx'): + if sch.domain not in (None, "", "ai.onnx"): doc_url += sch.domain + "." return doc_url fnwd = format_name_with_domain tmpl = _template_operator - docs = tmpl.render(schemas=schemas, OpSchema=OpSchema, - len=len, getattr=getattr, sorted=sorted, - format_option=format_option, - getconstraint=getconstraint, - getname=getname, enumerate=enumerate, - format_name_with_domain=fnwd, - process_documentation=process_documentation, - build_doc_url=build_doc_url, - str=str, get_type_str=get_type_str) + docs = tmpl.render( + schemas=schemas, + OpSchema=OpSchema, + len=len, + getattr=getattr, + sorted=sorted, + format_option=format_option, + getconstraint=getconstraint, + getname=getname, + enumerate=enumerate, + format_name_with_domain=fnwd, + process_documentation=process_documentation, + build_doc_url=build_doc_url, + str=str, + get_type_str=get_type_str, + ) return docs @@ -250,23 +266,25 @@ def _get_doc_template_sklearn(): try: from jinja2 import Template except ImportError: + class Template: def __init__(self, *args): pass def render(self, **context): - schemas = context['schemas'] + schemas = context["schemas"] rows = [] for sch in schemas: - doc = sch.doc or '' + doc = sch.doc or "" name = sch.name if name is None: raise RuntimeError("An operator must have a name.") - rows.extend([name, "=" * len(name), - "", doc, ""]) + rows.extend([name, "=" * len(name), "", doc, ""]) return "\n".join(rows) - return Template(textwrap.dedent(""" + return Template( + textwrap.dedent( + """ {% for cl in classes %} .. _l-sklops-{{cl.__name__}}: @@ -285,7 +303,9 @@ def render(self, **context): {{format_doc(cl)}} {% endfor %} - """)) + """ + ) + ) _template_operator_sklearn = _get_doc_template_sklearn() @@ -302,10 +322,12 @@ def get_rst_doc_sklearn(): The function relies on module *jinja2* or replaces it with a simple rendering if not present. """ + def format_doc(cl): return "\n".join(cl.__doc__.split("\n")[1:]) from .sklearn_ops import dynamic_class_creation_sklearn + classes = dynamic_class_creation_sklearn() tmpl = _template_operator_sklearn values = [(k, v) for k, v in sorted(classes.items())] diff --git a/skl2onnx/algebra/complex_functions.py b/skl2onnx/algebra/complex_functions.py index 6aab3c45c..e704ac4ab 100644 --- a/skl2onnx/algebra/complex_functions.py +++ b/skl2onnx/algebra/complex_functions.py @@ -6,66 +6,86 @@ from ..common.data_types import FloatTensorType, DoubleTensorType from ..common.utils import get_unique_subgraph from .onnx_ops import ( - OnnxIdentity, OnnxScan, OnnxTranspose, - OnnxSub, OnnxReduceSumSquareApi18, - OnnxSqrt, OnnxPow, OnnxAbs, OnnxReduceSumApi11) + OnnxIdentity, + OnnxScan, + OnnxTranspose, + OnnxSub, + OnnxReduceSumSquareApi18, + OnnxSqrt, + OnnxPow, + OnnxAbs, + OnnxReduceSumApi11, +) -logger = getLogger('skl2onnx') +logger = getLogger("skl2onnx") -def onnx_squareform_pdist(X, metric='sqeuclidean', dtype=None, - op_version=None, **kwargs): +def onnx_squareform_pdist( + X, metric="sqeuclidean", dtype=None, op_version=None, **kwargs +): """ Returns the ONNX graph which computes ``squareform(pdist(X, metric=metric))``. """ - if metric == 'sqeuclidean': + if metric == "sqeuclidean": return _onnx_squareform_pdist_sqeuclidean( - X, dtype=dtype, op_version=op_version, **kwargs) - if metric == 'euclidean': - res = _onnx_squareform_pdist_sqeuclidean( - X, dtype=dtype, op_version=op_version) + X, dtype=dtype, op_version=op_version, **kwargs + ) + if metric == "euclidean": + res = _onnx_squareform_pdist_sqeuclidean(X, dtype=dtype, op_version=op_version) return OnnxSqrt(res, op_version=op_version, **kwargs) - raise NotImplementedError( - "metric='{}' is not implemented.".format(metric)) + raise NotImplementedError("metric='{}' is not implemented.".format(metric)) -def _onnx_squareform_pdist_sqeuclidean(X, dtype=None, op_version=None, - **kwargs): +def _onnx_squareform_pdist_sqeuclidean(X, dtype=None, op_version=None, **kwargs): """ Returns the ONNX graph which computes ``squareform(pdist(X, metric='sqeuclidean'))``. """ unique = get_unique_subgraph() - diff = OnnxSub('next_in', 'next', - op_version=op_version) - id_next = OnnxIdentity('next_in', output_names=['next_out'], - op_version=op_version) - flat = OnnxReduceSumSquareApi18(diff, axes=[1], op_version=op_version, - output_names=['scan_out'], keepdims=0) - flat.set_onnx_name_prefix('cflat_%d' % unique) - id_next.set_onnx_name_prefix('pdistsqe_%d' % unique) + diff = OnnxSub("next_in", "next", op_version=op_version) + id_next = OnnxIdentity("next_in", output_names=["next_out"], op_version=op_version) + flat = OnnxReduceSumSquareApi18( + diff, axes=[1], op_version=op_version, output_names=["scan_out"], keepdims=0 + ) + flat.set_onnx_name_prefix("cflat_%d" % unique) + id_next.set_onnx_name_prefix("pdistsqe_%d" % unique) tensor_type = FloatTensorType if dtype == np.float32 else DoubleTensorType scan_body = id_next.to_onnx( - OrderedDict([('next_in', tensor_type([None, None])), - ('next', tensor_type([None]))]), - outputs=[('next_out', tensor_type([None, None])), - ('scan_out', tensor_type([None]))], + OrderedDict( + [("next_in", tensor_type([None, None])), ("next", tensor_type([None]))] + ), + outputs=[ + ("next_out", tensor_type([None, None])), + ("scan_out", tensor_type([None])), + ], other_outputs=[flat], - target_opset=op_version) - - node = OnnxScan(X, X, output_names=['u(scan0)', 'u(scan1)'], - num_scan_inputs=1, - body=(scan_body.graph, [id_next, flat]), - op_version=op_version, **kwargs) - logger.debug('[_onnx_squareform_pdist_sqeuclidean] +Scan dtype=%r', - dtype) + target_opset=op_version, + ) + + node = OnnxScan( + X, + X, + output_names=["u(scan0)", "u(scan1)"], + num_scan_inputs=1, + body=(scan_body.graph, [id_next, flat]), + op_version=op_version, + **kwargs + ) + logger.debug("[_onnx_squareform_pdist_sqeuclidean] +Scan dtype=%r", dtype) return node[1] -def onnx_cdist(XA, XB, metric='sqeuclidean', dtype=None, - op_version=None, dim_in=None, dim_out=None, - **kwargs): +def onnx_cdist( + XA, + XB, + metric="sqeuclidean", + dtype=None, + op_version=None, + dim_in=None, + dim_out=None, + **kwargs +): """ Returns the ONNX graph which computes ``cdist(XA, XB, metric=metric)``. @@ -82,107 +102,161 @@ def onnx_cdist(XA, XB, metric='sqeuclidean', dtype=None, :param kwargs: addition parameter :return: OnnxOperatorMixin """ - if metric == 'sqeuclidean': + if metric == "sqeuclidean": return _onnx_cdist_sqeuclidean( - XA, XB, dtype=dtype, op_version=op_version, - dim_in=dim_in, dim_out=dim_out, **kwargs) - elif metric == 'euclidean': + XA, + XB, + dtype=dtype, + op_version=op_version, + dim_in=dim_in, + dim_out=dim_out, + **kwargs + ) + elif metric == "euclidean": res = _onnx_cdist_sqeuclidean( - XA, XB, dtype=dtype, op_version=op_version, - dim_in=dim_in, dim_out=dim_out) + XA, XB, dtype=dtype, op_version=op_version, dim_in=dim_in, dim_out=dim_out + ) return OnnxSqrt(res, op_version=op_version, **kwargs) - elif metric == 'minkowski': - p = kwargs.pop('p') + elif metric == "minkowski": + p = kwargs.pop("p") res = _onnx_cdist_minkowski( - XA, XB, dtype=dtype, op_version=op_version, p=p, - dim_in=dim_in, dim_out=dim_out) - return OnnxPow(res, np.array([1. / p], dtype=dtype), - op_version=op_version, **kwargs) - elif metric in ('manhattan', 'cityblock'): + XA, + XB, + dtype=dtype, + op_version=op_version, + p=p, + dim_in=dim_in, + dim_out=dim_out, + ) + return OnnxPow( + res, np.array([1.0 / p], dtype=dtype), op_version=op_version, **kwargs + ) + elif metric in ("manhattan", "cityblock"): return _onnx_cdist_manhattan( - XA, XB, dtype=dtype, op_version=op_version, - dim_in=dim_in, dim_out=dim_out, **kwargs) + XA, + XB, + dtype=dtype, + op_version=op_version, + dim_in=dim_in, + dim_out=dim_out, + **kwargs + ) else: - raise NotImplementedError("metric='{}' is not implemented.".format( - metric)) + raise NotImplementedError("metric='{}' is not implemented.".format(metric)) def _onnx_cdist_begin(op_version): - diff = OnnxSub('next_in', 'next', - op_version=op_version) - id_next = OnnxIdentity('next_in', output_names=['next_out'], - op_version=op_version) + diff = OnnxSub("next_in", "next", op_version=op_version) + id_next = OnnxIdentity("next_in", output_names=["next_out"], op_version=op_version) return diff, id_next -def _onnx_cdist_end(XA, XB, id_next, flat, dtype, op_version, - dim_in=None, dim_out=None, **kwargs): +def _onnx_cdist_end( + XA, XB, id_next, flat, dtype, op_version, dim_in=None, dim_out=None, **kwargs +): unique = get_unique_subgraph() tensor_type = FloatTensorType if dtype == np.float32 else DoubleTensorType - id_next.set_onnx_name_prefix('cdistd_%d' % unique) - flat.set_onnx_name_prefix('cdistdf_%d' % unique) - shape_in = (tensor_type([None, None]) if dim_in is None - else tensor_type([None, dim_in])) + id_next.set_onnx_name_prefix("cdistd_%d" % unique) + flat.set_onnx_name_prefix("cdistdf_%d" % unique) + shape_in = ( + tensor_type([None, None]) if dim_in is None else tensor_type([None, dim_in]) + ) scan_body = id_next.to_onnx( - OrderedDict([('next_in', shape_in), - ('next', tensor_type([None]))]), - outputs=[('next_out', tensor_type([None, None])), - ('scan_out', tensor_type([None]))], + OrderedDict([("next_in", shape_in), ("next", tensor_type([None]))]), + outputs=[ + ("next_out", tensor_type([None, None])), + ("scan_out", tensor_type([None])), + ], other_outputs=[flat], - target_opset=op_version) - logger.debug('[_onnx_cdist_end] + Scan dim_in=%r dim_out=%r dtype=%r', - dim_in, dim_out, dtype) + target_opset=op_version, + ) + logger.debug( + "[_onnx_cdist_end] + Scan dim_in=%r dim_out=%r dtype=%r", dim_in, dim_out, dtype + ) - node = OnnxScan(XA, XB, output_names=['u(scan0)', 'u(scan1)'], - num_scan_inputs=1, - body=(scan_body.graph, [id_next, flat]), - op_version=op_version) - return OnnxTranspose(node[1], perm=[1, 0], op_version=op_version, - **kwargs) + node = OnnxScan( + XA, + XB, + output_names=["u(scan0)", "u(scan1)"], + num_scan_inputs=1, + body=(scan_body.graph, [id_next, flat]), + op_version=op_version, + ) + return OnnxTranspose(node[1], perm=[1, 0], op_version=op_version, **kwargs) -def _onnx_cdist_sqeuclidean(XA, XB, dtype=None, op_version=None, - dim_in=None, dim_out=None, **kwargs): +def _onnx_cdist_sqeuclidean( + XA, XB, dtype=None, op_version=None, dim_in=None, dim_out=None, **kwargs +): """ Returns the ONNX graph which computes ``cdist(X, metric='sqeuclidean')``. """ diff, id_next = _onnx_cdist_begin(op_version) - norm = OnnxReduceSumSquareApi18( - diff, axes=[1], keepdims=0, op_version=op_version) - flat = OnnxIdentity(norm, output_names=['scan_out'], op_version=op_version) - return _onnx_cdist_end(XA, XB, id_next, flat, dtype, op_version, - dim_in=dim_in, dim_out=dim_out, **kwargs) + norm = OnnxReduceSumSquareApi18(diff, axes=[1], keepdims=0, op_version=op_version) + flat = OnnxIdentity(norm, output_names=["scan_out"], op_version=op_version) + return _onnx_cdist_end( + XA, + XB, + id_next, + flat, + dtype, + op_version, + dim_in=dim_in, + dim_out=dim_out, + **kwargs + ) -def _onnx_cdist_minkowski(XA, XB, dtype=None, op_version=None, p=2, - dim_in=None, dim_out=None, **kwargs): +def _onnx_cdist_minkowski( + XA, XB, dtype=None, op_version=None, p=2, dim_in=None, dim_out=None, **kwargs +): """ Returns the ONNX graph which computes the Minkowski distance or ``minkowski(XA, XB, p)``. """ diff, id_next = _onnx_cdist_begin(op_version) - diff_pow = OnnxPow(OnnxAbs(diff, op_version=op_version), - np.array([p], dtype=dtype), op_version=op_version) - norm = OnnxReduceSumApi11( - diff_pow, axes=[1], keepdims=0, op_version=op_version) + diff_pow = OnnxPow( + OnnxAbs(diff, op_version=op_version), + np.array([p], dtype=dtype), + op_version=op_version, + ) + norm = OnnxReduceSumApi11(diff_pow, axes=[1], keepdims=0, op_version=op_version) norm.set_onnx_name_prefix("norm_%d" % id(norm)) - flat = OnnxIdentity(norm, output_names=['scan_out'], op_version=op_version) - return _onnx_cdist_end(XA, XB, id_next, flat, dtype, op_version, - dim_in=dim_in, dim_out=dim_out, **kwargs) + flat = OnnxIdentity(norm, output_names=["scan_out"], op_version=op_version) + return _onnx_cdist_end( + XA, + XB, + id_next, + flat, + dtype, + op_version, + dim_in=dim_in, + dim_out=dim_out, + **kwargs + ) -def _onnx_cdist_manhattan(XA, XB, dtype=None, op_version=None, - dim_in=None, dim_out=None, **kwargs): +def _onnx_cdist_manhattan( + XA, XB, dtype=None, op_version=None, dim_in=None, dim_out=None, **kwargs +): """ Returns the ONNX graph which computes the Manhattan distance or ``Manhattan(X, Y)``. """ diff, id_next = _onnx_cdist_begin(op_version) diff_pow = OnnxAbs(diff, op_version=op_version) - norm = OnnxReduceSumApi11( - diff_pow, axes=[1], keepdims=0, op_version=op_version) + norm = OnnxReduceSumApi11(diff_pow, axes=[1], keepdims=0, op_version=op_version) norm.set_onnx_name_prefix("norm_%d" % id(norm)) - flat = OnnxIdentity(norm, output_names=['scan_out'], op_version=op_version) - return _onnx_cdist_end(XA, XB, id_next, flat, dtype, op_version, - dim_in=dim_in, dim_out=dim_out, **kwargs) + flat = OnnxIdentity(norm, output_names=["scan_out"], op_version=op_version) + return _onnx_cdist_end( + XA, + XB, + id_next, + flat, + dtype, + op_version, + dim_in=dim_in, + dim_out=dim_out, + **kwargs + ) diff --git a/skl2onnx/algebra/custom_ops.py b/skl2onnx/algebra/custom_ops.py index f870fd9ca..9ca39a9f7 100644 --- a/skl2onnx/algebra/custom_ops.py +++ b/skl2onnx/algebra/custom_ops.py @@ -10,17 +10,16 @@ class OnnxCDist(OnnxOperator): """ since_version = 1 - expected_inputs = [('X', 'T'), ('Y', 'T')] - expected_outputs = [('dist', 'T')] + expected_inputs = [("X", "T"), ("Y", "T")] + expected_outputs = [("dist", "T")] input_range = [2, 2] output_range = [1, 1] is_deprecated = False - domain = 'com.microsoft' - operator_name = 'CDist' + domain = "com.microsoft" + operator_name = "CDist" past_version = {} - def __init__(self, X, Y, metric='sqeuclidean', op_version=None, - **kwargs): + def __init__(self, X, Y, metric="sqeuclidean", op_version=None, **kwargs): """ :param X: array or OnnxOperatorMixin :param Y: array or OnnxOperatorMixin @@ -29,8 +28,9 @@ def __init__(self, X, Y, metric='sqeuclidean', op_version=None, :param op_version: opset version :param kwargs: addition parameter """ - OnnxOperator.__init__(self, X, Y, metric=metric, - op_version=op_version, **kwargs) + OnnxOperator.__init__( + self, X, Y, metric=metric, op_version=op_version, **kwargs + ) class OnnxSolve(OnnxOperator): @@ -40,17 +40,16 @@ class OnnxSolve(OnnxOperator): """ since_version = 1 - expected_inputs = [('A', 'T'), ('Y', 'T')] - expected_outputs = [('X', 'T')] + expected_inputs = [("A", "T"), ("Y", "T")] + expected_outputs = [("X", "T")] input_range = [2, 2] output_range = [1, 1] is_deprecated = False - domain = 'com.microsoft' - operator_name = 'Solve' + domain = "com.microsoft" + operator_name = "Solve" past_version = {} - def __init__(self, A, Y, lower=False, transposed=False, - op_version=None, **kwargs): + def __init__(self, A, Y, lower=False, transposed=False, op_version=None, **kwargs): """ :param A: array or OnnxOperatorMixin :param Y: array or OnnxOperatorMixin @@ -60,6 +59,12 @@ def __init__(self, A, Y, lower=False, transposed=False, :param op_version: opset version :param kwargs: additional parameters """ - OnnxOperator.__init__(self, A, Y, - lower=lower, transposed=transposed, - op_version=op_version, **kwargs) + OnnxOperator.__init__( + self, + A, + Y, + lower=lower, + transposed=transposed, + op_version=op_version, + **kwargs + ) diff --git a/skl2onnx/algebra/graph_state.py b/skl2onnx/algebra/graph_state.py index 8bcec801b..b96cd4f8d 100644 --- a/skl2onnx/algebra/graph_state.py +++ b/skl2onnx/algebra/graph_state.py @@ -6,13 +6,20 @@ from onnx import GraphProto from ..proto import onnx_proto, TensorProto from ..common.data_types import ( - guess_proto_type, _guess_numpy_type, _guess_type_proto_str, - _guess_type_proto, FloatType, DoubleType, Int64Type, copy_type) + guess_proto_type, + _guess_numpy_type, + _guess_type_proto_str, + _guess_type_proto, + FloatType, + DoubleType, + Int64Type, + copy_type, +) from ..common._topology import Variable from ..common._registration import get_shape_calculator, get_converter -logger = getLogger('skl2onnx') +logger = getLogger("skl2onnx") class GraphStateVar: @@ -20,18 +27,31 @@ class GraphStateVar: class GraphState: - - def __init__(self, inputs, output_names, operator_name, scope, - container, converter, onnx_prefix_name=None, - options=None, expected_inputs=None, - expected_outputs=None, input_range=None, - output_range=None, operator=None, - run_converters=False, input_types=None, **attrs): - + def __init__( + self, + inputs, + output_names, + operator_name, + scope, + container, + converter, + onnx_prefix_name=None, + options=None, + expected_inputs=None, + expected_outputs=None, + input_range=None, + output_range=None, + operator=None, + run_converters=False, + input_types=None, + **attrs + ): logger.debug( "[State] +%s n_inputs=%r n_outputs=%r", - operator_name, -1 if inputs is None else len(inputs), - -1 if output_names is None else len(output_names)) + operator_name, + -1 if inputs is None else len(inputs), + -1 if output_names is None else len(output_names), + ) self.inputs = inputs self._output_names = output_names self._input_range = input_range.copy() if input_range else [1, 1e9] @@ -39,8 +59,9 @@ def __init__(self, inputs, output_names, operator_name, scope, self.scope = scope self.run_converters = run_converters self.operator = operator - if hasattr(operator_name, 'fit'): + if hasattr(operator_name, "fit"): from .. import get_model_alias + self.operator_instance = operator_name self.is_model = True self.operator_name = get_model_alias(type(operator_name)) @@ -54,9 +75,11 @@ def __init__(self, inputs, output_names, operator_name, scope, self.container = container self.converter = converter self._expected_inputs = ( - None if expected_inputs is None else expected_inputs.copy()) + None if expected_inputs is None else expected_inputs.copy() + ) self._expected_outputs = ( - None if expected_outputs is None else expected_outputs.copy()) + None if expected_outputs is None else expected_outputs.copy() + ) self.computed_inputs_ = None self.computed_outputs_ = None self.sub_op_ = None @@ -65,27 +88,34 @@ def __init__(self, inputs, output_names, operator_name, scope, self.options = options self.input_types = input_types - for att in ['inputs', '_expected_inputs', - '_expected_outputs', 'computed_inputs_', - 'computed_outputs_', '_outputs']: + for att in [ + "inputs", + "_expected_inputs", + "_expected_outputs", + "computed_inputs_", + "computed_outputs_", + "_outputs", + ]: v = getattr(self, att, None) if v is None: continue if not isinstance(v, list): raise TypeError( - "Attribute %r must be a list not %r." - "" % (att, type(v))) + "Attribute %r must be a list not %r." "" % (att, type(v)) + ) for i, vi in enumerate(v): - if hasattr(vi, 'state') or hasattr(vi, 'onx_op'): + if hasattr(vi, "state") or hasattr(vi, "onx_op"): continue if not isinstance(vi, (tuple, str, Variable, GraphStateVar)): raise TypeError( "Unexpected type %r for element %d of attribute %r " - "in %r." % (type(vi), i, att, v)) + "in %r." % (type(vi), i, att, v) + ) if isinstance(vi, tuple) and len(vi) != 2: raise ValueError( "Unexpected value %r for element %d of attribute %r." - "" % (vi, i, att)) + "" % (vi, i, att) + ) change = [] for vi in v: change.append((vi, None) if isinstance(vi, str) else vi) @@ -96,8 +126,8 @@ def __init__(self, inputs, output_names, operator_name, scope, for i in range(0, len(self._expected_outputs)): if i < len(self._output_names): res.append( - (self._output_names[i], - self._expected_outputs[i][1])) + (self._output_names[i], self._expected_outputs[i][1]) + ) else: res.append(self._expected_outputs[i]) for i in range(len(res), len(self._output_names)): @@ -107,7 +137,7 @@ def __init__(self, inputs, output_names, operator_name, scope, if self._expected_outputs is not None: res = [] for p in self._expected_outputs: - if isinstance(p[1], str) and p[1].startswith('tensor('): + if isinstance(p[1], str) and p[1].startswith("tensor("): res.append((p[0], _guess_type_proto_str(p[1], None))) else: res.append(p) @@ -116,7 +146,7 @@ def __init__(self, inputs, output_names, operator_name, scope, if self._expected_inputs is not None: res = [] for p in self._expected_inputs: - if isinstance(p[1], str) and p[1].startswith('tensor('): + if isinstance(p[1], str) and p[1].startswith("tensor("): res.append((p[0], _guess_type_proto_str(p[1], None))) else: res.append(p) @@ -135,9 +165,13 @@ def outputs(self): def _get_var_name(self, var, in_out, operator=None, index=None): "input: True for output, False for input" - if hasattr(var, 'add_to'): - var.add_to(self.scope, self.container, operator=operator, - run_converters=self.run_converters) + if hasattr(var, "add_to"): + var.add_to( + self.scope, + self.container, + operator=operator, + run_converters=self.run_converters, + ) outputs = var.outputs if isinstance(outputs, list): vars = [] @@ -149,20 +183,29 @@ def _get_var_name(self, var, in_out, operator=None, index=None): if len(vars) == 0: raise RuntimeError( "Empty inputs outputs=%s var=%s in_out=%s " - "operator=%r." % (outputs, var, in_out, operator)) + "operator=%r." % (outputs, var, in_out, operator) + ) return vars raise RuntimeError("Unexpected output type {}".format(outputs)) def __fct__(var, operator): if isinstance(var, Variable): return [var] - if isinstance(var, (np.ndarray, np.bool_, np.int64, - np.float32, np.float64, - np.int8, np.uint8)): + if isinstance( + var, + ( + np.ndarray, + np.bool_, + np.int64, + np.float32, + np.float64, + np.int8, + np.uint8, + ), + ): return [self._add_constant(var)] - if hasattr(var, 'ConstantValue'): - return [ - self._add_constant(var.ConstantValue, scope=self.scope)] + if hasattr(var, "ConstantValue"): + return [self._add_constant(var.ConstantValue, scope=self.scope)] if isinstance(var, str): return [(var, None)] if isinstance(var, tuple) and len(var) == 2: @@ -172,23 +215,25 @@ def __fct__(var, operator): return [(a, b)] except ValueError: pass - raise RuntimeError("Unexpected type for parameter 'var': {0}." - "".format(type(var))) + raise RuntimeError( + "Unexpected type for parameter 'var': {0}." "".format(type(var)) + ) try: v = __fct__(var, operator) except TypeError as e: raise RuntimeError( "Unable to process one variable %s and operator=%s " - "(name=%r)." % (var, operator, self.operator_name)) from e + "(name=%r)." % (var, operator, self.operator_name) + ) from e if v is None or not isinstance(v, list) or len(v) == 0: - raise TypeError( - "Unexpected type or empty value %r - %s." % (type(v), v)) + raise TypeError("Unexpected type or empty value %r - %s." % (type(v), v)) if in_out and self._output_names is not None and index is not None: if len(v) != 1: raise RuntimeError( - "Mismatch number of outputs between %s and %s." % ( - v, self._output_names[index])) + "Mismatch number of outputs between %s and %s." + % (v, self._output_names[index]) + ) v2 = self.scope.get(var[0], None) if v2 is not None: v = [v2] @@ -197,91 +242,81 @@ def __fct__(var, operator): except IndexError as e: raise ValueError( "Unexpected output %s in operator name %r." - "" % (vn, self.operator_name)) from e - if (index >= len(self._output_names) and - index >= self._output_range[0]): + "" % (vn, self.operator_name) + ) from e + if index >= len(self._output_names) and index >= self._output_range[0]: return None try: vin = self._output_names[index] except IndexError as e: raise ValueError( "Unexpected index %s in operator name %r with ." - "output names %s." % ( - index, self.operator_name, - self._output_names)) from e + "output names %s." % (index, self.operator_name, self._output_names) + ) from e if vn != vin: raise RuntimeError( - "Mismatched output name %r between %s and %s." % ( - vn, v, vin)) + "Mismatched output name %r between %s and %s." % (vn, v, vin) + ) return v def _add_constant(self, cst, scope): - def _ty_astype(cst): astype = cst.dtype try: ty = guess_proto_type(_guess_numpy_type(cst.dtype, cst.shape)) except NotImplementedError as e: st = str(astype).lower() - if st.startswith('u') or st.startswith("= len(new_inputs) and - j >= input_range[0]): + if j >= len(new_inputs) and j >= input_range[0]: continue if new_inputs[j].type is not None: - new_inputs[i].set_type( - new_inputs[j].type.__class__()) + new_inputs[i].set_type(new_inputs[j].type.__class__()) break # Overwrite types if input_types is specified. @@ -390,8 +421,9 @@ def _update_inputs(inputs, names, scope, expected_inputs, if i >= len(input_types): raise RuntimeError( "Mismatch between computed inputs[%d]=%r and " - "overwritten input_types[%d]=%r." % ( - i, new_inputs, i, input_types)) + "overwritten input_types[%d]=%r." + % (i, new_inputs, i, input_types) + ) if input_types[i] is not None: new_inputs[i].type = input_types[i] return new_inputs @@ -403,15 +435,13 @@ def _update_contraints(vars1, expected1, vars2, expected2, debug=None): if va is None or ex is None: continue for v, ct in zip(va, ex): - if (isinstance(v, str) or ( - hasattr(v, 'type') and v.type is None)): + if isinstance(v, str) or (hasattr(v, "type") and v.type is None): continue - vt = (copy_type(v.type) - if hasattr(v, 'type') else copy_type(v[1])) + vt = copy_type(v.type) if hasattr(v, "type") else copy_type(v[1]) if isinstance(vt, str): continue key = ct[1] - if isinstance(key, str) and key[0] in ('T', 'I', 'V'): + if isinstance(key, str) and key[0] in ("T", "I", "V"): if not isinstance(vt, str) and key not in memo: memo[key] = [] memo[key].append(vt) @@ -420,12 +450,13 @@ def _update_contraints(vars1, expected1, vars2, expected2, debug=None): if len(set(_.__class__ for _ in v)) != 1: raise RuntimeError( "Conflicted constraint %r, got types %r operator=%s" - "." % (k, v, debug)) + "." % (k, v, debug) + ) for i in range(0, len(vars1)): inp = vars1[i] if isinstance(inp, str): continue - if hasattr(inp, 'type') and inp.type is None: + if hasattr(inp, "type") and inp.type is None: ct = expected1[i][1] if ct in memo: vars1[i].set_type(copy_type(memo[ct][0])) @@ -436,7 +467,6 @@ def _update_contraints(vars1, expected1, vars2, expected2, debug=None): def run(self): if self.computed_outputs_ is None: - # We need to register all names in subgraphs and raise # an exception if the names are already taken. for k, v in self.attrs.items(): @@ -448,8 +478,8 @@ def run(self): "A name exists both in the subgraph and " "in the main graph. Use set_onnx_name_prefix to " "to rename one of them, attribute=%r, " - "op_type=%r." % ( - k, self.operator_name)) from e + "op_type=%r." % (k, self.operator_name) + ) from e if self.operator is not None: expected_outputs = self.operator.outputs @@ -466,9 +496,12 @@ def run(self): expected_outputs = None logger.debug( - "[State.run] id=%d op_name=%r is_model=%r " - "expected_outputs=%r", - id(self), self.operator_name, self.is_model, expected_outputs) + "[State.run] id=%d op_name=%r is_model=%r " "expected_outputs=%r", + id(self), + self.operator_name, + self.is_model, + expected_outputs, + ) inputs = [] for i in self.inputs: @@ -476,20 +509,25 @@ def run(self): inputs.extend(v) self.computed_inputs_ = GraphState._update_inputs( - self.inputs, inputs, scope=self.scope, + self.inputs, + inputs, + scope=self.scope, expected_inputs=self._expected_inputs, input_range=self._input_range, - input_types=self.input_types) + input_types=self.input_types, + ) logger.debug( "[State.run] id=%d op_name=%r computed_inputs_=%r", - id(self), self.operator_name, self.computed_inputs_) + id(self), + self.operator_name, + self.computed_inputs_, + ) name = self.scope.get_unique_operator_name(self.onnx_prefix) if self.is_model: if self.sub_op_ is not None: - raise NotImplementedError( - "Attribute 'sub_op_' is not empty.") + raise NotImplementedError("Attribute 'sub_op_' is not empty.") # a model is converted into a subgraph sub_op_inputs = self.computed_inputs_ @@ -497,56 +535,70 @@ def run(self): if not isinstance(v, Variable): raise TypeError( "Every input variable must be a Variable not %r," - " v=%r." % (type(v), v)) + " v=%r." % (type(v), v) + ) scope = v.scope - if hasattr(scope, 'variables'): + if hasattr(scope, "variables"): if v.onnx_name not in scope.variables: raise RuntimeError( "Variable %r missing from scope " - "(operator=%r, model=%r), list=%r." % ( - v, self.operator, + "(operator=%r, model=%r), list=%r." + % ( + v, + self.operator, type(self.operator_instance), - list(sorted(self.scope.variables)))) + list(sorted(self.scope.variables)), + ) + ) # output are not defined, we need to call a parser. from .._parse import _parse_sklearn - self.scope.add_options( - id(self.operator_instance), self.options) + + self.scope.add_options(id(self.operator_instance), self.options) try: sub_outputs = _parse_sklearn( - self.scope, self.operator_instance, sub_op_inputs, - alias=self.operator_name) + self.scope, + self.operator_instance, + sub_op_inputs, + alias=self.operator_name, + ) except RuntimeError as e: raise RuntimeError( "Unable to run parser for model type %r, inputs=%r " - "(input_types=%r)." % ( - type(self.operator_instance), sub_op_inputs, - self.input_types)) from e + "(input_types=%r)." + % ( + type(self.operator_instance), + sub_op_inputs, + self.input_types, + ) + ) from e set_input_names = set(v.onnx_name for v in sub_op_inputs) sub_op = None for op in self.scope.operators.values(): for inp in op.inputs: if inp.onnx_name in set_input_names: sub_op = op - if (sub_outputs is None or - None in sub_outputs): + if sub_outputs is None or None in sub_outputs: raise RuntimeError( "Wrong result when parsing model {}.".format( - type(self.operator_instance))) + type(self.operator_instance) + ) + ) # Checks operator outputs for out in sub_outputs: if not isinstance(out, Variable): - raise TypeError( - "Output %s must be of type Variable." % out) + raise TypeError("Output %s must be of type Variable." % out) self.sub_op_ = sub_op sub_op.outputs = sub_outputs shape_calc = get_shape_calculator(self.operator_name) logger.debug( - "[StateShape] call %r fed %r - %r", sub_op, + "[StateShape] call %r fed %r - %r", + sub_op, "".join(str(i.is_fed) for i in sub_op.inputs), - "".join(str(i.is_fed) for i in sub_op.outputs)) + "".join(str(i.is_fed) for i in sub_op.outputs), + ) shape_calc(sub_op) logger.debug("[StateShape] end - %r", sub_op) @@ -554,91 +606,107 @@ def run(self): # in Topology. if sub_op.outputs is not None and len(sub_op.outputs) > 0: outputs = [ - self.scope.declare_local_variable( - o.onnx_name, type=o.type) - for o in sub_op.outputs] - elif (expected_outputs is not None and - len(expected_outputs) > 0): + self.scope.declare_local_variable(o.onnx_name, type=o.type) + for o in sub_op.outputs + ] + elif expected_outputs is not None and len(expected_outputs) > 0: outputs = [ - self._get_output_name( - self._output_names, o, self.scope) - for o in expected_outputs] + self._get_output_name(self._output_names, o, self.scope) + for o in expected_outputs + ] else: raise RuntimeError( "sub_op.outputs is None as well as expected_outputs " - "for operator %r." % sub_op) + "for operator %r." % sub_op + ) if len(outputs) != len(sub_op.outputs): raise RuntimeError( - "Mismatched number of outputs %s and %s." % ( - outputs, sub_op.outputs)) + "Mismatched number of outputs %s and %s." + % (outputs, sub_op.outputs) + ) for i, out in enumerate(sub_op.outputs): var = outputs[i] self.container.add_node( - 'Identity', [out.onnx_name], [var[0]], - name=self.scope.get_unique_operator_name("SubOpId")) + "Identity", + [out.onnx_name], + [var[0]], + name=self.scope.get_unique_operator_name("SubOpId"), + ) self.computed_outputs_ = outputs self.computed_inputs2_ = sub_op.inputs - self.computed_outputs2_ = [ - (v[0], v[1]) for v in self.computed_outputs_] + self.computed_outputs2_ = [(v[0], v[1]) for v in self.computed_outputs_] if self.run_converters: # The parser was run on sub-operators but not the # converter. conv = get_converter(self.operator_name) logger.debug( - "[StateConv] %r fed %r - %r", sub_op, + "[StateConv] %r fed %r - %r", + sub_op, "".join(str(i.is_fed) for i in sub_op.inputs), - "".join(str(i.is_fed) for i in sub_op.outputs)) + "".join(str(i.is_fed) for i in sub_op.outputs), + ) conv(self.scope, sub_op, self.container) logger.debug("[StateConv] %r - end.", sub_op) else: - if (expected_outputs is not None and - len(sub_op.outputs) == len(expected_outputs)): + if expected_outputs is not None and len(sub_op.outputs) == len( + expected_outputs + ): for v1, v2 in zip(sub_op.outputs, expected_outputs): if isinstance(v2, tuple): v2 = v2[0] - if (hasattr(v1, 'onnx_name') and - hasattr(v2, 'onnx_name')): + if hasattr(v1, "onnx_name") and hasattr(v2, "onnx_name"): if v1.onnx_name != v2.onnx_name: # One identity is missing - n = self.scope.get_unique_operator_name( - 'idgstate') + n = self.scope.get_unique_operator_name("idgstate") self.container.add_node( - 'Identity', [v1.onnx_name], - [v2.onnx_name], name=n) + "Identity", + [v1.onnx_name], + [v2.onnx_name], + name=n, + ) else: def _name_(obj): if isinstance(obj, tuple) and len(obj) == 2: return obj[0] - if hasattr(obj, 'onnx_name'): + if hasattr(obj, "onnx_name"): return obj.onnx_name - raise TypeError( - "Unable to extract variable name from %r." % obj) + raise TypeError("Unable to extract variable name from %r." % obj) # only one node is added if self.options is not None: raise RuntimeError( - "Options must be empty for node %r but is it %r." % ( - self.operator_name, self.options)) + "Options must be empty for node %r but is it %r." + % (self.operator_name, self.options) + ) outputs = [ self._get_output_name(self._output_names, o, self.scope) - for o in expected_outputs] + for o in expected_outputs + ] input_names = [_name_(i) for i in inputs] output_names = [_name_(i) for i in outputs] self.container.add_node( - self.operator_name, input_names, output_names, - name=name, **self.attrs) + self.operator_name, + input_names, + output_names, + name=name, + **self.attrs + ) computed_outputs = [ - (name, ct[1]) for name, ct in zip( - output_names, self._expected_outputs)] + (name, ct[1]) + for name, ct in zip(output_names, self._expected_outputs) + ] self._update_contraints( - computed_outputs, self._expected_outputs, - self.computed_inputs_, self._expected_inputs, - debug=self.operator_name) + computed_outputs, + self._expected_outputs, + self.computed_inputs_, + self._expected_inputs, + debug=self.operator_name, + ) # Registers the variables into scope. self.computed_outputs_ = [] @@ -647,11 +715,12 @@ def _name_(obj): self.computed_outputs_.append((name, kind)) else: var = self.scope.declare_local_variable( - name, kind, missing_type=True) + name, kind, missing_type=True + ) # name already comes from # scope.get_unique_variable_name var.set_onnx_name(name) var.init_status(is_fed=True) self.computed_outputs_.append(var) - logger.debug('[State.run] end id=%d', id(self)) + logger.debug("[State.run] end id=%d", id(self)) diff --git a/skl2onnx/algebra/onnx_operator.py b/skl2onnx/algebra/onnx_operator.py index 2e27099f5..2dca4485c 100644 --- a/skl2onnx/algebra/onnx_operator.py +++ b/skl2onnx/algebra/onnx_operator.py @@ -8,11 +8,16 @@ from onnx.numpy_helper import from_array from scipy.sparse import coo_matrix from ..proto import TensorProto -from ..common.data_types import ( - _guess_type_proto_str, _guess_type_proto_str_inv) +from ..common.data_types import _guess_type_proto_str, _guess_type_proto_str_inv from ..common._topology import ( - Variable, VariableStr, Scope, _update_domain_version, Operator, - _get_main_opset_version, OPSET_TO_IR_VERSION) + Variable, + VariableStr, + Scope, + _update_domain_version, + Operator, + _get_main_opset_version, + OPSET_TO_IR_VERSION, +) from ..common._container import ModelComponentContainer from ..common import utils from ..common.data_types import guess_proto_type, _guess_numpy_type @@ -24,7 +29,7 @@ from .type_helper import _guess_type -logger = getLogger('skl2onnx') +logger = getLogger("skl2onnx") class OnnxOperatorItem: @@ -65,8 +70,9 @@ def add_to(self, scope, container, operator=None, run_converters=False): :param operator: overwrite inputs :param run_converters: must be True if called from method `to_onnx` """ - self.onx_op.add_to(scope, container, operator=operator, - run_converters=run_converters) + self.onx_op.add_to( + scope, container, operator=operator, run_converters=run_converters + ) def get_output_name(self, i=0): """ @@ -91,20 +97,22 @@ def outputs(self): """ if self.onx_op is None: raise RuntimeError( - "self.onx_op cannot be None, type(self)={}".format( - type(self))) + "self.onx_op cannot be None, type(self)={}".format(type(self)) + ) if self.index is None: raise RuntimeError( - "self.index cannot be None, type(self)={}".format( - type(self))) + "self.index cannot be None, type(self)={}".format(type(self)) + ) outputs = self.onx_op.outputs if outputs is None: raise RuntimeError( "self.onx_op.outputs cannot be None, " "type(self)={}, type(self.onx_op)={}, " "type(self.onx_op.state)={}".format( - type(self), type(self.onx_op), type(self.onx_op.state))) - return outputs[self.index:self.index + 1] + type(self), type(self.onx_op), type(self.onx_op.state) + ) + ) + return outputs[self.index : self.index + 1] def get_output_type_inference(self, input_shapes=None): """ @@ -112,20 +120,22 @@ def get_output_type_inference(self, input_shapes=None): """ if self.onx_op is None: raise RuntimeError( - "self.onx_op cannot be None, type(self)={}".format( - type(self))) + "self.onx_op cannot be None, type(self)={}".format(type(self)) + ) if self.index is None: raise RuntimeError( - "self.index cannot be None, type(self)={}".format( - type(self))) + "self.index cannot be None, type(self)={}".format(type(self)) + ) outputs = self.onx_op.get_output_type_inference(input_shapes) if outputs is None: raise RuntimeError( "self.onx_op.outputs cannot be None, " "type(self)={}, type(self.onx_op)={}, " "type(self.onx_op.state)={}".format( - type(self), type(self.onx_op), type(self.onx_op.state))) - return outputs[self.index:self.index + 1] + type(self), type(self.onx_op), type(self.onx_op.state) + ) + ) + return outputs[self.index : self.index + 1] class OnnxOperator: @@ -157,16 +167,15 @@ class OnnxOperator: Parameter *global_context*, *clear_subgraph_inputs* were added. """ - class OnnxOperatorVariable(GraphStateVar): + class OnnxOperatorVariable(GraphStateVar): def __init__(self, index, name=None): self.index = index self.name = name def as_variable(self, scope): name = "ov%s" % self.name - if (hasattr(self, "variable_") and - self.variable_.onnx_name == name): + if hasattr(self, "variable_") and self.variable_.onnx_name == name: return self.variable_ var = Variable(name, name, scope=scope, type=None) if scope is not None: @@ -187,8 +196,7 @@ def __init__(self, name): def as_variable(self, scope): name = self.name - if (hasattr(self, "variable_") and - self.variable_.onnx_name == name): + if hasattr(self, "variable_") and self.variable_.onnx_name == name: return self.variable_ if scope is not None: if name in scope.variables: @@ -208,8 +216,7 @@ def __eq__(self, name): elif isinstance(name, OnnxOperator.UnscopedVariable): return self.name == name.name else: - raise TypeError('Unsupported type for comparison {}'.format( - type(name))) + raise TypeError("Unsupported type for comparison {}".format(type(name))) def __repr__(self): return "UnscopedVariable('%s')" % self.name @@ -225,15 +232,12 @@ def __init__(self, value): def as_variable(self, scope): ha = utils.hash_array(self.value) name = "CST%s" % ha - if (hasattr(self, "variable_") and - self.variable_.onnx_name == name): + if hasattr(self, "variable_") and self.variable_.onnx_name == name: return self.variable_ if scope is not None: - var = scope.declare_local_variable( - name, type=_guess_type(self.value)) + var = scope.declare_local_variable(name, type=_guess_type(self.value)) else: - var = Variable(name, name, scope=scope, - type=_guess_type(self.value)) + var = Variable(name, name, scope=scope, type=_guess_type(self.value)) self.variable_ = var return var @@ -256,9 +260,11 @@ def find_schema(self, op_version): :param op_version: requested version :return: schema """ - if not hasattr(self.__class__, 'past_version'): - raise RuntimeError("Missing attribute 'past_version', there is " - "no other available schema.") + if not hasattr(self.__class__, "past_version"): + raise RuntimeError( + "Missing attribute 'past_version', there is " + "no other available schema." + ) found = None for v in self.past_version.values(): if v.since_version > op_version: @@ -269,20 +275,27 @@ def find_schema(self, op_version): raise RuntimeError( "Operator '{}': requested version {} < " "{} schema version.".format( - self.__class__.__name__, - op_version, self.since_version)) + self.__class__.__name__, op_version, self.since_version + ) + ) return found - def __init__(self, *inputs, op_version=None, output_names=None, - domain=None, global_context=None, - clear_subgraph_inputs=False, **kwargs): - - if (output_names is None and - self.__class__.__name__.startswith("OnnxScan")): + def __init__( + self, + *inputs, + op_version=None, + output_names=None, + domain=None, + global_context=None, + clear_subgraph_inputs=False, + **kwargs + ): + if output_names is None and self.__class__.__name__.startswith("OnnxScan"): raise NotImplementedError( "The class cannot infer the number of variables " "for node '{}' yet. output_names must be specified" - ".".format(self.__class__.__name__)) + ".".format(self.__class__.__name__) + ) if isinstance(output_names, (str, Variable)): output_names = [output_names] if isinstance(output_names[0], str): @@ -290,20 +303,22 @@ def __init__(self, *inputs, op_version=None, output_names=None, elif isinstance(output_names, Operator): if len(output_names.outputs) == 0: raise ValueError( - "output_names cannot be empty (operator %r)." - "" % output_names) + "output_names cannot be empty (operator %r)." "" % output_names + ) output_names = output_names.outputs.copy() elif isinstance(output_names, Operator.OperatorList): if len(output_names) == 0: raise ValueError( "output_names cannot be empty (operator %r)." - "" % self.__class__.__name__) + "" % self.__class__.__name__ + ) output_names = output_names.copy() elif isinstance(output_names, list): if len(output_names) == 0: raise ValueError( "output_names cannot be empty (operator %r)." - "" % self.__class__.__name__) + "" % self.__class__.__name__ + ) output_names = output_names.copy() for i in range(len(output_names)): if isinstance(output_names[i], str): @@ -311,10 +326,11 @@ def __init__(self, *inputs, op_version=None, output_names=None, elif output_names is not None: raise TypeError( "output_names must be a string or a list not %r." - "" % type(output_names)) + "" % type(output_names) + ) if op_version is None: - if domain == '': + if domain == "": self.op_version = get_latest_tested_opset_version() else: self.op_version = None @@ -322,8 +338,7 @@ def __init__(self, *inputs, op_version=None, output_names=None, self.op_version = op_version self.since_version = self.__class__.since_version - if (self.op_version is not None and - self.op_version < self.since_version): + if self.op_version is not None and self.op_version < self.since_version: schema = self.find_schema(self.op_version) self.since_version = schema.since_version self.expected_inputs = schema.expected_inputs.copy() @@ -332,15 +347,18 @@ def __init__(self, *inputs, op_version=None, output_names=None, self.output_range = schema.output_range else: self.expected_inputs = ( - None if self.__class__.expected_inputs is None - else self.__class__.expected_inputs.copy()) + None + if self.__class__.expected_inputs is None + else self.__class__.expected_inputs.copy() + ) self.expected_outputs = ( - None if self.__class__.expected_outputs is None - else self.__class__.expected_outputs.copy()) + None + if self.__class__.expected_outputs is None + else self.__class__.expected_outputs.copy() + ) self.input_range = self.__class__.input_range self.output_range = self.__class__.output_range - if self.__class__.__name__ not in { - 'OnnxScan', 'OnnxLoop', 'OnnxIf'}: + if self.__class__.__name__ not in {"OnnxScan", "OnnxLoop", "OnnxIf"}: # TODO: the minimum opset depends on embedded graph # by default, it takes the given op_version but the # optimal value could be lower. @@ -348,13 +366,13 @@ def __init__(self, *inputs, op_version=None, output_names=None, if self.op_version is None: self.op_version = self.since_version - if (self.op_version is not None and - self.op_version < self.since_version): + if self.op_version is not None and self.op_version < self.since_version: raise RuntimeError( "Operator '{}': requested version {} < " "{} schema version.".format( - self.__class__.__name__, - self.op_version, self.since_version)) + self.__class__.__name__, self.op_version, self.since_version + ) + ) self.state = None self.domain = domain @@ -364,8 +382,9 @@ def __init__(self, *inputs, op_version=None, output_names=None, # check inputs if len(inputs) == 0: if self.input_range[0] == self.input_range[1]: - self.inputs = [OnnxOperator.UnscopedVariable(_[0]) - for _ in self.expected_inputs] + self.inputs = [ + OnnxOperator.UnscopedVariable(_[0]) for _ in self.expected_inputs + ] else: # The number of inputs may vary. self.inputs = None @@ -374,42 +393,53 @@ def __init__(self, *inputs, op_version=None, output_names=None, for inp in inputs: if isinstance(inp, str): self.inputs.append(OnnxOperator.UnscopedVariable(inp)) - elif isinstance(inp, (OnnxOperator, Variable, - OnnxOperatorItem, OnnxSubEstimator)): + elif isinstance( + inp, (OnnxOperator, Variable, OnnxOperatorItem, OnnxSubEstimator) + ): self.inputs.append(inp) elif isinstance(inp, tuple) and len(inp) == 2: self.inputs.append(inp) elif isinstance(inp, (np.ndarray, coo_matrix)): - self.inputs.append( - OnnxOperator.ConstantVariable(inp)) + self.inputs.append(OnnxOperator.ConstantVariable(inp)) elif isinstance(inp, TensorProto): self.inputs.append(OnnxOperator.ConstantVariable(inp)) - elif isinstance(inp, (OnnxOperator.OnnxOperatorVariable, - OnnxOperator.ConstantVariable)): + elif isinstance( + inp, + (OnnxOperator.OnnxOperatorVariable, OnnxOperator.ConstantVariable), + ): self.inputs.append(inp) - elif isinstance(inp, (np.int64, np.float32, - np.float64, np.bool_, - np.int8, np.uint8)): + elif isinstance( + inp, (np.int64, np.float32, np.float64, np.bool_, np.int8, np.uint8) + ): self.inputs.append(OnnxOperator.ConstantVariable(inp)) - elif isinstance(inp, (float, )): + elif isinstance(inp, (float,)): self.inputs.append(np.float64(inp)) - elif isinstance(inp, (int, )): + elif isinstance(inp, (int,)): self.inputs.append(np.int64(inp)) else: raise TypeError( "Unable to interpret the input name for type {} in " "operator '{}' (value={}).".format( - type(inp), self.__class__.__name__, inp)) + type(inp), self.__class__.__name__, inp + ) + ) if self.inputs is not None: - if (len(self.inputs) < self.input_range[0] or - len(self.inputs) > self.input_range[1]): + if ( + len(self.inputs) < self.input_range[0] + or len(self.inputs) > self.input_range[1] + ): raise RuntimeError( "Operator '{}' expects a number of inputs " "in [{}, {}] not {} (expected opset={}, " "class opset={})".format( - self.operator_name, *self.input_range, - len(self.inputs), op_version, self.op_version)) + self.operator_name, + *self.input_range, + len(self.inputs), + op_version, + self.op_version + ) + ) # global context if global_context is None: self.global_context = None @@ -417,12 +447,14 @@ def __init__(self, *inputs, op_version=None, output_names=None, if not isinstance(global_context, dict): raise TypeError( "global_context must be a dictionary not %r." - "" % type(global_context)) + "" % type(global_context) + ) for k, v in global_context.items(): if not isinstance(v, (OnnxOperator, OnnxOperatorItem)): raise TypeError( "Value %r in must be an OnnxOperator or an " - "OnnxOperatorItem not %r." % (k, type(v))) + "OnnxOperatorItem not %r." % (k, type(v)) + ) self.global_context = global_context # check output @@ -433,7 +465,8 @@ def __init__(self, *inputs, op_version=None, output_names=None, if len(self.output_names) == 0: raise ValueError( "output_names can be None but cannot be empty for " - "operator %r." % self) + "operator %r." % self + ) if self.output_variables is None: self.output_variables = [None for o in self.output_names] for i in range(len(self.output_names)): @@ -441,33 +474,32 @@ def __init__(self, *inputs, op_version=None, output_names=None, if isinstance(name, Variable): self.output_variables[i] = name else: - raise TypeError("output_names must be a list of strings " - "and element %r is %r (%r)" % ( - i, type(name), name)) + raise TypeError( + "output_names must be a list of strings " + "and element %r is %r (%r)" % (i, type(name), name) + ) if all(map(lambda x: x is None, self.output_variables)): self.output_variables = None - if (self.output_names is not None and ( - self.expected_outputs is None or - len(self.output_names) > len(self.expected_outputs))): + if self.output_names is not None and ( + self.expected_outputs is None + or len(self.output_names) > len(self.expected_outputs) + ): if self.expected_outputs is None: self.expected_outputs = [] - for i in range(len(self.expected_outputs), - len(self.output_names)): + for i in range(len(self.expected_outputs), len(self.output_names)): self.expected_outputs.append((self.output_names[i], None)) - if (self.expected_inputs is None or - len(self.inputs) > len(self.expected_inputs)): + if self.expected_inputs is None or len(self.inputs) > len(self.expected_inputs): if self.expected_inputs is None: self.expected_inputs = [] - for i in range(len(self.expected_inputs), - len(self.inputs)): + for i in range(len(self.expected_inputs), len(self.inputs)): inp = self.inputs[i] if isinstance(inp, GraphStateVar): inp = tuple(inp) elif isinstance(inp, str): inp = (inp, None) - elif hasattr(inp, 'add_to'): + elif hasattr(inp, "add_to"): # OnnxOperator existing = set(_[0] for _ in self.expected_inputs) i = 10 @@ -479,11 +511,14 @@ def __init__(self, *inputs, op_version=None, output_names=None, self.expected_inputs.append(inp) self.output_names_ = None - self._post_process_attributes( - clear_subgraph_inputs=clear_subgraph_inputs) + self._post_process_attributes(clear_subgraph_inputs=clear_subgraph_inputs) logger.debug( - '[Ops] +%s-%d (%s) id=%d', - self.__class__.__name__, self.op_version, self.domain, id(self)) + "[Ops] +%s-%d (%s) id=%d", + self.__class__.__name__, + self.op_version, + self.domain, + id(self), + ) def _post_process_attributes(self, clear_subgraph_inputs=False): """ @@ -512,33 +547,36 @@ def _post_process_attributes(self, clear_subgraph_inputs=False): if self.__class__.__name__ == "OnnxConstantOfShape": if "value" in self.kwargs: - value = self.kwargs['value'] + value = self.kwargs["value"] if isinstance(value, TensorProto): return if isinstance(value, np.ndarray): - if value.shape == (1, ): + if value.shape == (1,): val = value[0] elif len(value.shape) == 0: val = value else: raise RuntimeError( "Unexpected shape %r for value, it must be " - "an array of one element." % value.shape) - self.kwargs['value'] = from_array( - np.array([val], dtype=value.dtype)) + "an array of one element." % value.shape + ) + self.kwargs["value"] = from_array( + np.array([val], dtype=value.dtype) + ) return raise TypeError( "Unexpected type %r for value. It should be an array " - "of one element." % type(value)) + "of one element." % type(value) + ) return if self.__class__.__name__ == "OnnxCast": if "to" in self.kwargs: - value = self.kwargs['to'] + value = self.kwargs["to"] if isinstance(value, int): return to = guess_proto_type(_guess_numpy_type(value, None)) - self.kwargs['to'] = to + self.kwargs["to"] = to return def __str__(self): @@ -549,7 +587,9 @@ def __str__(self): self.__class__.__name__, len(self.inputs) if self.inputs is not None else 0, [str(o) for o in self.output_names] - if self.output_names is not None else "?") + if self.output_names is not None + else "?", + ) def set_onnx_name_prefix(self, onnx_prefix_name): """ @@ -562,7 +602,7 @@ def set_onnx_name_prefix(self, onnx_prefix_name): if self.onnx_prefix_name is None: self.onnx_prefix_name = onnx_prefix_name for inp in self.inputs: - if hasattr(inp, 'onnx_prefix_name'): + if hasattr(inp, "onnx_prefix_name"): inp.set_onnx_name_prefix(onnx_prefix_name) return self @@ -588,7 +628,7 @@ def get_output_name(self, i, scope=None): return self.state.computed_outputs_[i][0] if self.output_names_ is not None: return self.output_names_[i] - self._set_output_names_(getattr(self, 'scope', None) or scope, None) + self._set_output_names_(getattr(self, "scope", None) or scope, None) return self.output_names_[i] def get_output(self, i, scope=None): @@ -599,8 +639,8 @@ def get_output(self, i, scope=None): res = self.output_names_[i] if not isinstance(res, (tuple, Variable)): raise RuntimeError( - "Unable to retrieve output %r from %r." - "" % (i, self)) + "Unable to retrieve output %r from %r." "" % (i, self) + ) return res def _set_output_names_(self, scope, operator): @@ -608,8 +648,7 @@ def _set_output_names_(self, scope, operator): if operator is not None: self.operator_ = operator if self.output_names_ is not None: - raise RuntimeError( - "output_names_ is already set.") + raise RuntimeError("output_names_ is already set.") elif self.output_variables is not None: outputs = [o.onnx_name for o in self.output_variables] self.output_names_ = outputs @@ -621,15 +660,16 @@ def _set_output_names_(self, scope, operator): if operator is not None and len(louts) != len(operator.outputs): raise RuntimeError( "Output mismatch for '{}'\n{}\n{}".format( - type(operator.raw_operator), - louts, operator.outputs)) + type(operator.raw_operator), louts, operator.outputs + ) + ) outputs = [] for iname, name in enumerate(louts): if name is None: raise AssertionError( - "Issue for operator '{}'.".format( - type(operator.raw_operator))) - if name.startswith('u(') and name[-1] == ')': + "Issue for operator '{}'.".format(type(operator.raw_operator)) + ) + if name.startswith("u(") and name[-1] == ")": name = scope.get_unique_variable_name(name[2:-1]) elif operator is not None: oout = operator.outputs[iname] @@ -639,15 +679,15 @@ def _set_output_names_(self, scope, operator): elif self.expected_outputs is None: raise AttributeError( "expected_outputs is None for operator=%r, output_names=%r, " - "output_variables=%r, operator=%r" % ( - self, self.output_names, self.output_variables, operator)) + "output_variables=%r, operator=%r" + % (self, self.output_names, self.output_variables, operator) + ) else: if scope is None: raise RuntimeError("scope must not be None.") outputs = [] for name in self.expected_outputs: - name = scope.get_unique_variable_name( - self.onnx_prefix + "_" + name[0]) + name = scope.get_unique_variable_name(self.onnx_prefix + "_" + name[0]) outputs.append(name) self.output_names_ = outputs return outputs @@ -657,21 +697,24 @@ def _add_to_inputs(self, operator): for input in self.inputs: if isinstance(input, OnnxOperator.OnnxOperatorVariable): if operator is None: - raise RuntimeError("A placeholder cannot be replaced " - "as an operator is not specified.") + raise RuntimeError( + "A placeholder cannot be replaced " + "as an operator is not specified." + ) if len(operator.inputs) == 0: - raise RuntimeError("No input variable in {}.".format( - operator)) + raise RuntimeError("No input variable in {}.".format(operator)) # The inputs must be looked into the graph. for i in operator.inputs: if i.onnx_name == input.name: inputs.append(i) break else: - vars = ', '.join(map(lambda o: "'%s'" % o.onnx_name, - operator.inputs)) - raise RuntimeError("Unable to find variable " - "{} in {}.".format(input, vars)) + vars = ", ".join( + map(lambda o: "'%s'" % o.onnx_name, operator.inputs) + ) + raise RuntimeError( + "Unable to find variable " "{} in {}.".format(input, vars) + ) else: inputs.append(input) return inputs @@ -693,17 +736,18 @@ def add_to(self, scope, container, operator=None, run_converters=False): if self.is_deprecated: raise RuntimeError( "Node '{}' is deprecated. This API cannot deprecated " - "nodes.".format(self.__class__.__name__)) - if (self.op_version is not None and - self.op_version < self.since_version): + "nodes.".format(self.__class__.__name__) + ) + if self.op_version is not None and self.op_version < self.since_version: raise RuntimeError( "Incompatible versions for node '{}' op_version {} " "< since_version {}.".format( - self.__class__.__name__, self.op_version, - self.since_version)) - if self.kwargs.get('op_version', '') is None: + self.__class__.__name__, self.op_version, self.since_version + ) + ) + if self.kwargs.get("op_version", "") is None: kwargs = self.kwargs.copy() - del kwargs['op_version'] + del kwargs["op_version"] else: kwargs = self.kwargs @@ -715,14 +759,23 @@ def add_to(self, scope, container, operator=None, run_converters=False): logger.debug("[Ops.add_to] state id=%d", id(self)) self.state = GraphState( - inputs, self.output_names_, self.operator_name, - scope, container, None, op_version=self.op_version, - op_domain=domain, onnx_prefix_name=self.onnx_prefix, + inputs, + self.output_names_, + self.operator_name, + scope, + container, + None, + op_version=self.op_version, + op_domain=domain, + onnx_prefix_name=self.onnx_prefix, expected_inputs=self.expected_inputs, expected_outputs=self.expected_outputs, input_range=self.input_range, output_range=self.output_range, - operator=operator, run_converters=run_converters, **kwargs) + operator=operator, + run_converters=run_converters, + **kwargs + ) self.state.run() self._verify_add_to_() @@ -730,9 +783,10 @@ def _verify_add_to_(self): if self.state is None: raise RuntimeError( "Graph was not produced for operator '{}': {}." - "".format(self.__class__.__name__, self)) + "".format(self.__class__.__name__, self) + ) for i in self.inputs: - if hasattr(i, '_verify_add_to_'): + if hasattr(i, "_verify_add_to_"): i._verify_add_to_() @property @@ -748,25 +802,30 @@ def get_output_type_inference(self, input_shapes=None): """ Returns the expected output types in a list. """ - if (self.state is not None and - self.state.computed_outputs_ is not None): + if self.state is not None and self.state.computed_outputs_ is not None: return self.state.computed_outputs_ expected_inputs = ( self.state.computed_inputs_ - if self.expected_inputs is None else self.expected_inputs) + if self.expected_inputs is None + else self.expected_inputs + ) if expected_inputs is None: raise RuntimeError( "Attribute 'expected_inputs' is empty for %r, " - "input_shapes=%r." % (self, input_shapes)) + "input_shapes=%r." % (self, input_shapes) + ) expected_outputs = ( self.state.computed_outputs_ - if self.expected_outputs is None else self.expected_outputs) + if self.expected_outputs is None + else self.expected_outputs + ) if expected_outputs is None: raise RuntimeError( "Attribute 'expected_outputs' is empty for %r, " - "input_shapes=%r." % (self, input_shapes)) + "input_shapes=%r." % (self, input_shapes) + ) # Shape inference only work on a full graph. if input_shapes is None: @@ -790,15 +849,14 @@ def get_output_type_inference(self, input_shapes=None): res = [] for name, ct in expected_outputs: - if isinstance(ct, str) and ct[0] in ('T', 'V', 'I'): - if (ct[0] not in rev or - all(map(lambda k: k not in given, rev[ct]))): + if isinstance(ct, str) and ct[0] in ("T", "V", "I"): + if ct[0] not in rev or all(map(lambda k: k not in given, rev[ct])): raise NotImplementedError( "Unable to guess output type for (%r, %r) - " "given=%r - rev=%r input_shapes=%r expected_inputs" - "=%r." % ( - name, ct, given, rev, input_shapes, - self.expected_inputs)) + "=%r." + % (name, ct, given, rev, input_shapes, self.expected_inputs) + ) found = False for ind in rev[ct]: if ind in given: @@ -809,9 +867,9 @@ def get_output_type_inference(self, input_shapes=None): raise NotImplementedError( "Unable to guess output type for (%r, %r) - " "given=%r - rev=%r input_shapes=%r expected_inputs" - "=%r." % ( - name, ct, given, rev, input_shapes, - self.expected_inputs)) + "=%r." + % (name, ct, given, rev, input_shapes, self.expected_inputs) + ) continue if isinstance(ct, str): try: @@ -819,16 +877,17 @@ def get_output_type_inference(self, input_shapes=None): except NotImplementedError as e: raise NotImplementedError( "Unable to guess output type for (%r, %r) - " - "given=%r - rev=%r." % (name, ct, given, rev)) from e + "given=%r - rev=%r." % (name, ct, given, rev) + ) from e res.append((name, dt)) continue try: - dt = _guess_type_proto_str( - _guess_type_proto_str_inv(ct), None) + dt = _guess_type_proto_str(_guess_type_proto_str_inv(ct), None) except NotImplementedError as e: raise NotImplementedError( "Unable to guess output type for (%r, %r) - given=%r - " - "rev=%r." % (name, ct, given, rev)) from e + "rev=%r." % (name, ct, given, rev) + ) from e res.append((name, dt)) return res @@ -838,7 +897,7 @@ def _clean_attributes(self, *args, recursive=True): Removes attributes in this node and its parents. """ for arg in args: - if arg in ('state', 'output_names_'): + if arg in ("state", "output_names_"): setattr(self, arg, None) elif hasattr(self, arg): delattr(self, arg) @@ -847,8 +906,15 @@ def _clean_attributes(self, *args, recursive=True): if isinstance(obj, OnnxOperator): obj._clean_attributes(*args, recursive=True) - def to_onnx(self, inputs=None, outputs=None, other_outputs=None, - target_opset=None, domain=None, verbose=0): + def to_onnx( + self, + inputs=None, + outputs=None, + other_outputs=None, + target_opset=None, + domain=None, + verbose=0, + ): """ Converts this operator into an ONNX graph. @@ -864,26 +930,32 @@ def to_onnx(self, inputs=None, outputs=None, other_outputs=None, :param verbose: prints information """ if isinstance(target_opset, dict): - dom = self.domain or '' + dom = self.domain or "" target_opset = target_opset.get(dom, None) elif isinstance(target_opset, int): - if self.domain not in ('', None): + if self.domain not in ("", None): # The target_opset is for the domain '' # We ignore it. target_opset = None elif target_opset is not None: raise TypeError( "target_opset must be a dictionary {domain: " - "target_opset} not %r for operator %r." % ( - target_opset, self.__class__.__name__)) - if self.domain in ('', None) and target_opset == 1: + "target_opset} not %r for operator %r." + % (target_opset, self.__class__.__name__) + ) + if self.domain in ("", None) and target_opset == 1: raise RuntimeError("target_opset cannot be 1.") - if (self.op_version is not None and target_opset is not None and - self.op_version > target_opset): + if ( + self.op_version is not None + and target_opset is not None + and self.op_version > target_opset + ): raise RuntimeError( "target_opset={} is lower than the version={} requested " "for this node '{}'.".format( - target_opset, self.op_version, self.__class__.__name__)) + target_opset, self.op_version, self.__class__.__name__ + ) + ) if self.state is not None: # The conversion already happened and needs to be cleaned. self._clean_attributes("output_names_", "state") @@ -899,31 +971,36 @@ def to_onnx(self, inputs=None, outputs=None, other_outputs=None, ty = _guess_type(obj[1]) new_inputs.append((obj[0], ty)) else: - raise TypeError("Inputs must be Variable or " - "tuple(name, type) not {}." - "".format(type(obj))) + raise TypeError( + "Inputs must be Variable or " + "tuple(name, type) not {}." + "".format(type(obj)) + ) inputs = new_inputs for name, typ in inputs: if typ is None: - raise RuntimeError("Type input '{}' for operator '{}' " - "is unknown. You should specify " - "input types.".format( - name, self.__class__.__name__)) + raise RuntimeError( + "Type input '{}' for operator '{}' " + "is unknown. You should specify " + "input types.".format(name, self.__class__.__name__) + ) registered_models = dict( - conv=_converter_pool, shape=_shape_calculator_pool, - aliases=sklearn_operator_name_map) + conv=_converter_pool, + shape=_shape_calculator_pool, + aliases=sklearn_operator_name_map, + ) target_opset = self.get_latest_tested_opset_version(target_opset) container = ModelComponentContainer( - target_opset, registered_models=registered_models) + target_opset, registered_models=registered_models + ) model_name = self.__class__.__name__ - logger.debug( - "[Ops.to_onnx] %s id=%d", - self.__class__.__name__, id(self)) - scope = Scope(model_name, target_opset=target_opset, - registered_models=registered_models) + logger.debug("[Ops.to_onnx] %s id=%d", self.__class__.__name__, id(self)) + scope = Scope( + model_name, target_opset=target_opset, registered_models=registered_models + ) for inp in inputs: var = Variable(inp[0], inp[0], scope=scope, type=inp[1]) container.add_input(var) @@ -938,20 +1015,23 @@ def to_onnx(self, inputs=None, outputs=None, other_outputs=None, if var.output_names is None: # The variable name is likely to be different. from .onnx_ops import OnnxIdentity + var2 = OnnxIdentity( - var, op_version=var.op_version, - output_names=[name]) + var, op_version=var.op_version, output_names=[name] + ) else: var2 = var extra_outputs.append(var2) for out in extra_outputs: - if not hasattr(out, 'add_to'): - raise RuntimeError( - "Extra outputs must have method 'add_to'.") + if not hasattr(out, "add_to"): + raise RuntimeError("Extra outputs must have method 'add_to'.") out.add_to(scope, container, run_converters=True) logger.debug( "[Ops.to_onnx] %s id=%d extra_outputs=%r", - self.__class__.__name__, id(self), extra_outputs) + self.__class__.__name__, + id(self), + extra_outputs, + ) # infer shapes if outputs: @@ -968,26 +1048,35 @@ def to_onnx(self, inputs=None, outputs=None, other_outputs=None, type_shape = o[1] shapes.append(Variable(o[0], o[0], None, type_shape)) else: - raise TypeError("Outputs must be Variable or " - "tuple(name, type).") + raise TypeError("Outputs must be Variable or " "tuple(name, type).") logger.debug( "[Ops.to_onnx] %s id=%d outputs=%r", - self.__class__.__name__, id(self), outputs) + self.__class__.__name__, + id(self), + outputs, + ) else: if verbose > 0: print("[op.to_onnx] infer outputs") - shapes = infer_outputs(container, container.inputs, - initializer=container.initializers, - target_opset=target_opset) + shapes = infer_outputs( + container, + container.inputs, + initializer=container.initializers, + target_opset=target_opset, + ) if self.output_names: - set_names = set(v.onnx_name if hasattr(v, 'onnx_name') else v - for v in self.output_names) - shapes = [shape for shape in shapes - if shape.onnx_name in set_names] + set_names = set( + v.onnx_name if hasattr(v, "onnx_name") else v + for v in self.output_names + ) + shapes = [shape for shape in shapes if shape.onnx_name in set_names] logger.debug( "[Ops.to_onnx] %s id=%d shapes=%r", - self.__class__.__name__, id(self), shapes) + self.__class__.__name__, + id(self), + shapes, + ) if verbose > 0: print("[op.to_onnx] shapes=%r" % shapes) @@ -999,21 +1088,26 @@ def to_onnx(self, inputs=None, outputs=None, other_outputs=None, if verbose >= 2: print("---NODES---") for node in container.nodes: - print(" %s - %s: %r -> %r" % ( - node.op_type, node.name, node.input, node.output)) + print( + " %s - %s: %r -> %r" + % (node.op_type, node.name, node.input, node.output) + ) # convert the graph graph = make_graph( - container.nodes, model_name, container.inputs, - container.outputs, container.initializers) + container.nodes, + model_name, + container.inputs, + container.outputs, + container.initializers, + ) onnx_model = make_model(graph) # domains _update_domain_version(container, onnx_model) # metadata - opv = min(target_opset, - _get_main_opset_version(onnx_model) or target_opset) + opv = min(target_opset, _get_main_opset_version(onnx_model) or target_opset) irv = OPSET_TO_IR_VERSION.get(opv, onnx_proto.IR_VERSION) onnx_model.ir_version = irv onnx_model.producer_name = utils.get_producer() @@ -1041,8 +1135,7 @@ def enumerate_variables(self): for node in self.enumerate_nodes(): if self.inputs: for i, input in enumerate(self.inputs): - if isinstance(input, (OnnxOperator.UnscopedVariable, - Variable)): + if isinstance(input, (OnnxOperator.UnscopedVariable, Variable)): yield (node, i) def enumerate_initial_types(self): @@ -1086,24 +1179,41 @@ class OnnxSubEstimator(OnnxOperator): input_range = [1, 1e9] output_range = [1, 1e9] - def __init__(self, skl_op, *inputs, op_version=None, - output_names=None, domain=None, options=None, - input_types=None, **kwargs): + def __init__( + self, + skl_op, + *inputs, + op_version=None, + output_names=None, + domain=None, + options=None, + input_types=None, + **kwargs + ): OnnxOperator.__init__( - self, *inputs, op_version=op_version, - output_names=output_names, domain=domain, **kwargs) + self, + *inputs, + op_version=op_version, + output_names=output_names, + domain=domain, + **kwargs + ) self.operator_instance = skl_op self.options = options if skl_op is None and input_types is not None: raise RuntimeError( - "input_types is only used when a sub-operator is defined.") + "input_types is only used when a sub-operator is defined." + ) self.input_types = input_types def __repr__(self): return "%s(%r, %s, op_version=%r, output_names=%r)" % ( - self.__class__.__name__, self.operator_instance, + self.__class__.__name__, + self.operator_instance, ", ".join("%r" % i for i in self.inputs), - self.op_version, self.output_names) + self.op_version, + self.output_names, + ) def add_to(self, scope, container, operator=None, run_converters=False): """ @@ -1116,9 +1226,9 @@ def add_to(self, scope, container, operator=None, run_converters=False): :param run_converters: must be True if called from method `to_onnx` """ if self.state is None: - if self.kwargs.get('op_version', '') is None: + if self.kwargs.get("op_version", "") is None: kwargs = self.kwargs.copy() - del kwargs['op_version'] + del kwargs["op_version"] else: kwargs = self.kwargs @@ -1133,18 +1243,24 @@ def add_to(self, scope, container, operator=None, run_converters=False): louts = self.output_names outputs = [] for name in louts: - if (isinstance(name, str) and name.startswith('u(') and - name[-1] == ')'): + if ( + isinstance(name, str) + and name.startswith("u(") + and name[-1] == ")" + ): name = VariableStr( - scope.get_unique_variable_name(name[2:-1]), - scope=scope) - if (isinstance(name, Variable) and - name.raw_name.startswith('u(') and - name.raw_name[-1] == ')'): + scope.get_unique_variable_name(name[2:-1]), scope=scope + ) + if ( + isinstance(name, Variable) + and name.raw_name.startswith("u(") + and name.raw_name[-1] == ")" + ): name = VariableStr( - scope.get_unique_variable_name( - name.raw_name[2:-1]), - scope=scope, type=name.type) + scope.get_unique_variable_name(name.raw_name[2:-1]), + scope=scope, + type=name.type, + ) outputs.append(name) self.output_names_ = outputs else: @@ -1154,27 +1270,29 @@ def add_to(self, scope, container, operator=None, run_converters=False): for input in self.inputs: if isinstance(input, OnnxOperator.OnnxOperatorVariable): if operator is None: - raise RuntimeError("A placeholder cannot be replaced " - "as an operator is not specified.") + raise RuntimeError( + "A placeholder cannot be replaced " + "as an operator is not specified." + ) if len(operator.inputs) == 0: - raise RuntimeError("No input variable in {}.".format( - operator)) + raise RuntimeError("No input variable in {}.".format(operator)) # The inputs must be looked into the graph. for i in operator.inputs: if i.onnx_name == input.name: inputs.append(i) break else: - vars = ', '.join(map(lambda o: "'%s'" % o.onnx_name, - operator.inputs)) - raise RuntimeError("Unable to find variable " - "{} in {}.".format(input, vars)) + vars = ", ".join( + map(lambda o: "'%s'" % o.onnx_name, operator.inputs) + ) + raise RuntimeError( + "Unable to find variable " "{} in {}.".format(input, vars) + ) elif isinstance(input, tuple) and len(input) == 2: if scope is not None and input[0] in scope.variables: var = scope.variables[input[0]] else: - var = Variable(input[0], input[0], scope=scope, - type=input[1]) + var = Variable(input[0], input[0], scope=scope, type=input[1]) if scope is not None: scope.register_variable(var) inputs.append(var) @@ -1183,16 +1301,24 @@ def add_to(self, scope, container, operator=None, run_converters=False): logger.debug("[SubOps.add_to] state id=%d", id(self)) self.state = GraphState( - inputs, self.output_names_, self.operator_instance, - scope, container, None, op_version=self.op_version, - op_domain=None, onnx_prefix_name=self.onnx_prefix, - options=self.options, run_converters=run_converters, - input_types=self.input_types, **kwargs) + inputs, + self.output_names_, + self.operator_instance, + scope, + container, + None, + op_version=self.op_version, + op_domain=None, + onnx_prefix_name=self.onnx_prefix, + options=self.options, + run_converters=run_converters, + input_types=self.input_types, + **kwargs + ) self.state.run() class WrappedModelAlias: - def __init__(self, model, alias): self.model = model self.alias = alias @@ -1207,6 +1333,10 @@ class OnnxSubOperator(OnnxSubEstimator): def __init__(self, *args, **kwargs): OnnxSubEstimator.__init__(self, *args, **kwargs) - warnings.warn(("Class OnnxSubOperator will be removed in 1.10. " - "It should be replaced by OnnxSubEstimator."), - DeprecationWarning) + warnings.warn( + ( + "Class OnnxSubOperator will be removed in 1.10. " + "It should be replaced by OnnxSubEstimator." + ), + DeprecationWarning, + ) diff --git a/skl2onnx/algebra/onnx_operator_mixin.py b/skl2onnx/algebra/onnx_operator_mixin.py index 59235b397..8fab57d66 100644 --- a/skl2onnx/algebra/onnx_operator_mixin.py +++ b/skl2onnx/algebra/onnx_operator_mixin.py @@ -18,9 +18,17 @@ class OnnxOperatorMixin: sharing an API to convert object to *ONNX*. """ - def to_onnx(self, X=None, name=None, - options=None, white_op=None, black_op=None, - final_types=None, target_opset=None, verbose=0): + def to_onnx( + self, + X=None, + name=None, + options=None, + white_op=None, + black_op=None, + final_types=None, + target_opset=None, + verbose=0, + ): """ Converts the model in *ONNX* format. It calls method *_to_onnx* which must be @@ -43,42 +51,52 @@ def to_onnx(self, X=None, name=None, :param verbose: displays information while converting """ from .. import convert_sklearn + if X is None: initial_types = self.infer_initial_types() else: initial_types = guess_initial_types(X, None) - if not hasattr(self, 'op_version'): + if not hasattr(self, "op_version"): if name is None: name = self.__class__.__name__ raise AttributeError( "Attribute 'op_version' is missing for '{}' " - "(model: '{}').".format( - self.__class__.__name__, name)) + "(model: '{}').".format(self.__class__.__name__, name) + ) return convert_sklearn( - self, initial_types=initial_types, - target_opset=target_opset or self.op_version, options=options, - white_op=white_op, black_op=black_op, final_types=final_types, - verbose=verbose) + self, + initial_types=initial_types, + target_opset=target_opset or self.op_version, + options=options, + white_op=white_op, + black_op=black_op, + final_types=final_types, + verbose=verbose, + ) def infer_initial_types(self): """ Infers initial types. """ - if hasattr(self, 'enumerate_initial_types'): + if hasattr(self, "enumerate_initial_types"): return list(self.enumerate_initial_types()) - raise RuntimeError("Method enumerate_initial_types is missing " - "and initial_types are not defined.") + raise RuntimeError( + "Method enumerate_initial_types is missing " + "and initial_types are not defined." + ) def _find_sklearn_parent(self): for cl in self.__class__.__bases__: if issubclass(cl, BaseEstimator): return cl - raise RuntimeError("Unable to find any parent inherited from " - "BaseEstimator: {}.".format( - ", ".join(map(str, self.__class__.__bases__)))) - - def to_onnx_operator(self, inputs=None, outputs=None, - target_opset=None, options=None): + raise RuntimeError( + "Unable to find any parent inherited from " + "BaseEstimator: {}.".format(", ".join(map(str, self.__class__.__bases__))) + ) + + def to_onnx_operator( + self, inputs=None, outputs=None, target_opset=None, options=None + ): """ This function must be overloaded. """ @@ -92,6 +110,7 @@ def onnx_parser(self): mapped to the first *scikit-learn* parent it can find. """ + def parser(scope=None, inputs=None): try: op = self.to_onnx_operator(inputs=inputs, outputs=None) @@ -111,6 +130,7 @@ def parser(scope=None, inputs=None): except IndexError: break return names + return parser def get_inputs(self, inputs, i): @@ -130,32 +150,36 @@ def onnx_shape_calculator(self): mapped to the first *scikit-learn* parent it can find. """ - if not hasattr(self, 'op_version'): + if not hasattr(self, "op_version"): raise AttributeError( "Class '{}' should have an attribute 'op_version'.".format( - self.__class__.__name__)) + self.__class__.__name__ + ) + ) try: op = self.to_onnx_operator() except NotImplementedError: parent = self._find_sklearn_parent() - name = sklearn_operator_name_map.get( - parent, "Sklearn" + parent.__name__) + name = sklearn_operator_name_map.get(parent, "Sklearn" + parent.__name__) return get_shape_calculator(name) def shape_calculator(operator): - onx = op.to_onnx(operator.inputs, operator.outputs, - target_opset=self.op_version) + onx = op.to_onnx( + operator.inputs, operator.outputs, target_opset=self.op_version + ) inferred_model = shape_inference.infer_shapes(onx) shapes = Variable.from_pb(inferred_model.graph.value_info) shapes = {shape.onnx_name: shape for shape in shapes} for o in operator.outputs: name = o.onnx_name if name not in shapes: - raise RuntimeError("Shape of output '{}' cannot be " - "infered. onnx_shape_calculator " - "must be overriden and return " - "a shape calculator.".format(name)) + raise RuntimeError( + "Shape of output '{}' cannot be " + "infered. onnx_shape_calculator " + "must be overriden and return " + "a shape calculator.".format(name) + ) o.set_type(shapes[name].type) return shape_calculator @@ -167,8 +191,10 @@ def onnx_converter(self): mapped to the first *scikit-learn* parent it can find. """ - def converter(scope: Scope, operator: Operator, - container: ModelComponentContainer): + + def converter( + scope: Scope, operator: Operator, container: ModelComponentContainer + ): inputs = operator.inputs # getattr(self, "parsed_inputs_", None) outputs = operator.outputs # kwargs.get('outputs', None) op_version = container.target_opset @@ -176,23 +202,26 @@ def converter(scope: Scope, operator: Operator, try: if inputs: op = self.to_onnx_operator( - inputs=inputs, outputs=outputs, - target_opset=op_version, options=options) + inputs=inputs, + outputs=outputs, + target_opset=op_version, + options=options, + ) else: op = self.to_onnx_operator( - target_opset=op_version, - outputs=outputs, options=options) + target_opset=op_version, outputs=outputs, options=options + ) except TypeError: warnings.warn( "Signature should be to_onnx_operator(self, inputs=None, " "outputs=None, target_opset=None, **kwargs). " "This will be the case in version 1.11, class=%r." "" % type(self), - DeprecationWarning) + DeprecationWarning, + ) try: if inputs: - op = self.to_onnx_operator( - inputs=inputs, outputs=outputs) + op = self.to_onnx_operator(inputs=inputs, outputs=outputs) else: op = self.to_onnx_operator() except NotImplementedError: diff --git a/skl2onnx/algebra/onnx_ops.py b/skl2onnx/algebra/onnx_ops.py index 26104c5c9..0eb8db69d 100644 --- a/skl2onnx/algebra/onnx_ops.py +++ b/skl2onnx/algebra/onnx_ops.py @@ -6,6 +6,7 @@ import sys import os import numpy as np + try: from scipy.sparse import coo_matrix except ImportError: @@ -17,16 +18,24 @@ from ._cache import cache_folder -def ClassFactory(class_name, op_name, inputs, outputs, - input_range, output_range, - domain, attr_names, doc, - deprecated, since_version, - past_version): +def ClassFactory( + class_name, + op_name, + inputs, + outputs, + input_range, + output_range, + domain, + attr_names, + doc, + deprecated, + since_version, + past_version, +): from .onnx_operator import OnnxOperator, OnnxOperatorItem def __init__(self, *args, **kwargs): - - op_version = kwargs.pop('op_version', None) + op_version = kwargs.pop("op_version", None) if isinstance(op_version, dict): op_version = op_version.get(domain, None) @@ -34,14 +43,15 @@ def __init__(self, *args, **kwargs): if len(args) == 0 and input_range[0] == input_range[1]: args = [_[0] for _ in self.__class__.expected_inputs] if not (input_range[0] <= len(args) <= input_range[1]): - raise RuntimeError("Unexpected number of inputs, " - "got {}, expecting {} for operator " - "'{}'.".format( - len(args), len(inputs), op_name)) + raise RuntimeError( + "Unexpected number of inputs, " + "got {}, expecting {} for operator " + "'{}'.".format(len(args), len(inputs), op_name) + ) attr_names = self.attr_names - if '_' in self.__class__.__name__: - op_version_class = int(self.__class__.__name__.split('_')[-1]) + if "_" in self.__class__.__name__: + op_version_class = int(self.__class__.__name__.split("_")[-1]) if op_version is None: op_version = op_version_class try: @@ -52,8 +62,9 @@ def __init__(self, *args, **kwargs): "class '{}' since_version {}. Parameter 'op_version' " "is probably missing when the class " "is instantiated.".format( - op_version, op_version_class, class_name, - since_version)) + op_version, op_version_class, class_name, since_version + ) + ) else: op_version_class = None @@ -67,26 +78,38 @@ def __init__(self, *args, **kwargs): # attr_names refers to the most recent version of # this operator. We may need an older one. for op in range(op_version, 0, -1): - name = '{}_{}'.format(self.__class__.__name__, op) + name = "{}_{}".format(self.__class__.__name__, op) if name in self.past_version: found = (name, op) attr_names = self.past_version[name].attr_names break - if (op_version_class is not None and found is not None and - found[-1] != op_version_class): + if ( + op_version_class is not None + and found is not None + and found[-1] != op_version_class + ): raise RuntimeError( "op_version={} does not refer to the same opset as the class " - "name ('{}').".format(op_version, self.__class__.__name__)) + "name ('{}').".format(op_version, self.__class__.__name__) + ) for key in kwargs: - if key in {'output_names', 'op_version', 'domain', 'ir_version', - 'global_context', 'clear_subgraph_inputs'}: + if key in { + "output_names", + "op_version", + "domain", + "ir_version", + "global_context", + "clear_subgraph_inputs", + }: continue if key not in attr_names: - raise TypeError("Argument '%s' not valid for '%s' opset=%s." - % (key, op_name, op_version)) + raise TypeError( + "Argument '%s' not valid for '%s' opset=%s." + % (key, op_name, op_version) + ) if op_version is not None: - kwargs['op_version'] = op_version + kwargs["op_version"] = op_version # This class can only be created by a user. Let's check # types are either a variable, an operator or an array. for i, a in enumerate(args): @@ -94,37 +117,45 @@ def __init__(self, *args, **kwargs): if len(a) != 2: raise TypeError( "Input %r is a tuple or class %r, it must have two " - "elements (name, type) not %r." % (i, class_name, a)) - if (not isinstance(a[0], str) or - not isinstance(a[1], DataType)): + "elements (name, type) not %r." % (i, class_name, a) + ) + if not isinstance(a[0], str) or not isinstance(a[1], DataType): raise TypeError( "Input %r is a tuple or class %r, it must be a tuple " - "(name, type) not %r." % (i, class_name, a)) + "(name, type) not %r." % (i, class_name, a) + ) continue - if not isinstance(a, ( - Variable, OnnxOperator, np.ndarray, str, - OnnxOperatorItem, coo_matrix)): + if not isinstance( + a, + (Variable, OnnxOperator, np.ndarray, str, OnnxOperatorItem, coo_matrix), + ): raise TypeError( "Unexpected type %r for input %r of operator %r. " "It must be an instance of Variable (or a string), " "OnnxOperator, OnnxOperatorItem, numpy.ndarray, " - "coo_matrix)." % ( - type(a), i, class_name)) + "coo_matrix)." % (type(a), i, class_name) + ) OnnxOperator.__init__(self, *args, **kwargs) - newclass = type(class_name, (OnnxOperator,), - {"__init__": __init__, '__doc__': doc, - 'expected_inputs': inputs, - 'expected_outputs': outputs, - 'operator_name': op_name, - 'input_range': input_range, - 'output_range': output_range, - 'domain': domain, - 'is_deprecated': deprecated, - 'since_version': since_version, - 'past_version': past_version, - 'attr_names': attr_names, - '__module__': __name__}) + newclass = type( + class_name, + (OnnxOperator,), + { + "__init__": __init__, + "__doc__": doc, + "expected_inputs": inputs, + "expected_outputs": outputs, + "operator_name": op_name, + "input_range": input_range, + "output_range": output_range, + "domain": domain, + "is_deprecated": deprecated, + "since_version": since_version, + "past_version": past_version, + "attr_names": attr_names, + "__module__": __name__, + }, + ) return newclass @@ -151,54 +182,61 @@ def dynamic_class_creation(cache=False): res[schema.name] = schema else: res[schema.name] = schema - res[schema.name + '_' + str(schema.since_version)] = schema + res[schema.name + "_" + str(schema.since_version)] = schema cls = {} def _c(obj, label, i): - name = '%s%d' % (obj.name or label, i) + name = "%s%d" % (obj.name or label, i) try: - tys = obj.type_str or '' + tys = obj.type_str or "" except AttributeError: - tys = obj.typeStr or '' + tys = obj.typeStr or "" return (name, tys) for name in sorted(res): schema = res[name] - inputs = [_c(o, 'I', i) for i, o in enumerate(schema.inputs)] - outputs = [_c(o, 'O', i) for i, o in enumerate(schema.outputs)] + inputs = [_c(o, "I", i) for i, o in enumerate(schema.inputs)] + outputs = [_c(o, "O", i) for i, o in enumerate(schema.outputs)] args = [p for p in schema.attributes] - if '_' in name: + if "_" in name: class_name = "Onnx" + name else: class_name = "Onnx" + schema.name filename = os.path.join( - cache_dir, - schema.name + '_' + str(schema.since_version) + ".rst") + cache_dir, schema.name + "_" + str(schema.since_version) + ".rst" + ) if not cache and os.path.exists(filename): with open(filename, "r", encoding="utf-8") as f: doc = f.read() else: doc = get_rst_doc(schema) if cache: - with open(filename, 'w', encoding='utf-8') as f: + with open(filename, "w", encoding="utf-8") as f: f.write(doc) - cl = ClassFactory(class_name, schema.name, inputs, outputs, - [schema.min_input, schema.max_input], - [schema.min_output, schema.max_output], - schema.domain, args, - "**Version**" + doc.split('**Version**')[-1], - getattr(schema, 'deprecated', False), - schema.since_version, {}) + cl = ClassFactory( + class_name, + schema.name, + inputs, + outputs, + [schema.min_input, schema.max_input], + [schema.min_output, schema.max_output], + schema.domain, + args, + "**Version**" + doc.split("**Version**")[-1], + getattr(schema, "deprecated", False), + schema.since_version, + {}, + ) cls[class_name] = cl # Retrieves past classes. for name in cls: - if '_' not in name: + if "_" not in name: continue - main, version = name.split('_') + main, version = name.split("_") last = cls[main] last.past_version[name] = cls[name] @@ -219,8 +257,7 @@ def _update_module(): _update_module() -def OnnxReduceSumApi11(*x, axes=None, keepdims=1, op_version=None, - output_names=None): +def OnnxReduceSumApi11(*x, axes=None, keepdims=1, op_version=None, output_names=None): """ Adds operator ReduceSum with opset>=13 following API from opset 12. """ @@ -229,30 +266,46 @@ def OnnxReduceSumApi11(*x, axes=None, keepdims=1, op_version=None, if op_version is None or op_version >= 13: if axes is None: return OnnxReduceSum( # noqa - *x, keepdims=keepdims, op_version=op_version, - output_names=output_names) + *x, keepdims=keepdims, op_version=op_version, output_names=output_names + ) return OnnxReduceSum( # noqa - *x, np.array(axes, dtype=np.int64), - keepdims=keepdims, op_version=op_version, - output_names=output_names) + *x, + np.array(axes, dtype=np.int64), + keepdims=keepdims, + op_version=op_version, + output_names=output_names, + ) if op_version >= 11: if axes is None: return OnnxReduceSum_11( # noqa - *x, keepdims=keepdims, - op_version=op_version, output_names=output_names) + *x, keepdims=keepdims, op_version=op_version, output_names=output_names + ) return OnnxReduceSum_11( # noqa - *x, axes=axes, keepdims=keepdims, - op_version=op_version, output_names=output_names) + *x, + axes=axes, + keepdims=keepdims, + op_version=op_version, + output_names=output_names, + ) if axes is None: - return OnnxReduceSum_1(*x, keepdims=keepdims, # noqa - op_version=op_version, - output_names=output_names) - return OnnxReduceSum_1(*x, axes=axes, keepdims=keepdims, # noqa - op_version=op_version, output_names=output_names) - - -def OnnxReduceAnyApi18(cl18, cl13, cl11, cl1, *x, axes=None, keepdims=1, - op_version=None, output_names=None): + return OnnxReduceSum_1( + *x, + keepdims=keepdims, # noqa + op_version=op_version, + output_names=output_names, + ) + return OnnxReduceSum_1( + *x, + axes=axes, + keepdims=keepdims, # noqa + op_version=op_version, + output_names=output_names, + ) + + +def OnnxReduceAnyApi18( + cl18, cl13, cl11, cl1, *x, axes=None, keepdims=1, op_version=None, output_names=None +): """ Adds operator Reduce* with opset>=18 following API from opset 17. """ @@ -261,98 +314,155 @@ def OnnxReduceAnyApi18(cl18, cl13, cl11, cl1, *x, axes=None, keepdims=1, if op_version is None or op_version >= 18: if axes is None: return cl18( # noqa - *x, keepdims=keepdims, op_version=op_version, - output_names=output_names) + *x, keepdims=keepdims, op_version=op_version, output_names=output_names + ) return cl18( # noqa - *x, np.array(axes, dtype=np.int64), - keepdims=keepdims, op_version=op_version, - output_names=output_names) + *x, + np.array(axes, dtype=np.int64), + keepdims=keepdims, + op_version=op_version, + output_names=output_names, + ) if op_version >= 13: if axes is None: - return cl13(*x, keepdims=keepdims, # noqa - op_version=op_version, - output_names=output_names) - return cl13(*x, axes=axes, keepdims=keepdims, # noqa - op_version=op_version, output_names=output_names) + return cl13( + *x, + keepdims=keepdims, # noqa + op_version=op_version, + output_names=output_names, + ) + return cl13( + *x, + axes=axes, + keepdims=keepdims, # noqa + op_version=op_version, + output_names=output_names, + ) if op_version >= 11: if axes is None: - return cl11(*x, keepdims=keepdims, # noqa - op_version=op_version, - output_names=output_names) - return cl11(*x, axes=axes, keepdims=keepdims, # noqa - op_version=op_version, output_names=output_names) + return cl11( + *x, + keepdims=keepdims, # noqa + op_version=op_version, + output_names=output_names, + ) + return cl11( + *x, + axes=axes, + keepdims=keepdims, # noqa + op_version=op_version, + output_names=output_names, + ) if axes is None: - return cl1(*x, keepdims=keepdims, # noqa - op_version=op_version, - output_names=output_names) - return cl1(*x, axes=axes, keepdims=keepdims, # noqa - op_version=op_version, output_names=output_names) - - -def OnnxReduceSumSquareApi18(*x, axes=None, keepdims=1, op_version=None, - output_names=None): + return cl1( + *x, + keepdims=keepdims, # noqa + op_version=op_version, + output_names=output_names, + ) + return cl1( + *x, + axes=axes, + keepdims=keepdims, # noqa + op_version=op_version, + output_names=output_names, + ) + + +def OnnxReduceSumSquareApi18( + *x, axes=None, keepdims=1, op_version=None, output_names=None +): """ Adds operator ReduceSumSquare with opset>=18 following API from opset 17. """ if axes is None or not isinstance(axes, (list, np.ndarray)): raise TypeError(f"axes must be a list or an array not {type(axes)}.") return OnnxReduceAnyApi18( - OnnxReduceSumSquare, OnnxReduceSumSquare_13, # noqa - OnnxReduceSumSquare_11, OnnxReduceSumSquare_1, # noqa - *x, axes=axes, keepdims=keepdims, op_version=op_version, - output_names=output_names) - - -def OnnxReduceMeanApi18(*x, axes=None, keepdims=1, op_version=None, - output_names=None): + OnnxReduceSumSquare, + OnnxReduceSumSquare_13, # noqa + OnnxReduceSumSquare_11, + OnnxReduceSumSquare_1, # noqa + *x, + axes=axes, + keepdims=keepdims, + op_version=op_version, + output_names=output_names, + ) + + +def OnnxReduceMeanApi18(*x, axes=None, keepdims=1, op_version=None, output_names=None): """ Adds operator ReduceMean with opset>=18 following API from opset 17. """ return OnnxReduceAnyApi18( - OnnxReduceMean, OnnxReduceMean_13, # noqa - OnnxReduceMean_11, OnnxReduceMean_1, # noqa - *x, axes=axes, keepdims=keepdims, op_version=op_version, - output_names=output_names) - - -def OnnxReduceMaxApi18(*x, axes=None, keepdims=1, op_version=None, - output_names=None): + OnnxReduceMean, + OnnxReduceMean_13, # noqa + OnnxReduceMean_11, + OnnxReduceMean_1, # noqa + *x, + axes=axes, + keepdims=keepdims, + op_version=op_version, + output_names=output_names, + ) + + +def OnnxReduceMaxApi18(*x, axes=None, keepdims=1, op_version=None, output_names=None): """ Adds operator ReduceMean with opset>=18 following API from opset 17. """ return OnnxReduceAnyApi18( - OnnxReduceMax, OnnxReduceMax_13, # noqa - OnnxReduceMax_11, OnnxReduceMax_1, # noqa - *x, axes=axes, keepdims=keepdims, op_version=op_version, - output_names=output_names) - - -def OnnxReduceLogSumExpApi18(*x, axes=None, keepdims=1, op_version=None, - output_names=None): + OnnxReduceMax, + OnnxReduceMax_13, # noqa + OnnxReduceMax_11, + OnnxReduceMax_1, # noqa + *x, + axes=axes, + keepdims=keepdims, + op_version=op_version, + output_names=output_names, + ) + + +def OnnxReduceLogSumExpApi18( + *x, axes=None, keepdims=1, op_version=None, output_names=None +): """ Adds operator ReduceMean with opset>=18 following API from opset 17. """ return OnnxReduceAnyApi18( - OnnxReduceLogSumExp, OnnxReduceLogSumExp_13, # noqa - OnnxReduceLogSumExp_11, OnnxReduceLogSumExp_1, # noqa - *x, axes=axes, keepdims=keepdims, op_version=op_version, - output_names=output_names) - - -def OnnxReduceL2Api18(*x, axes=None, keepdims=1, op_version=None, - output_names=None): + OnnxReduceLogSumExp, + OnnxReduceLogSumExp_13, # noqa + OnnxReduceLogSumExp_11, + OnnxReduceLogSumExp_1, # noqa + *x, + axes=axes, + keepdims=keepdims, + op_version=op_version, + output_names=output_names, + ) + + +def OnnxReduceL2Api18(*x, axes=None, keepdims=1, op_version=None, output_names=None): """ Adds operator ReduceMean with opset>=18 following API from opset 17. """ return OnnxReduceAnyApi18( - OnnxReduceL2, OnnxReduceL2_13, # noqa - OnnxReduceL2_11, OnnxReduceL2_1, # noqa - *x, axes=axes, keepdims=keepdims, op_version=op_version, - output_names=output_names) - - -def OnnxSplitApi18(*x, axis=0, split=None, num_outputs=None, - op_version=None, output_names=None): + OnnxReduceL2, + OnnxReduceL2_13, # noqa + OnnxReduceL2_11, + OnnxReduceL2_1, # noqa + *x, + axes=axes, + keepdims=keepdims, + op_version=op_version, + output_names=output_names, + ) + + +def OnnxSplitApi18( + *x, axis=0, split=None, num_outputs=None, op_version=None, output_names=None +): """ Adds operator Split with opset>=13 following API from opset 11. """ @@ -364,47 +474,68 @@ def OnnxSplitApi18(*x, axis=0, split=None, num_outputs=None, if output_names is None: raise RuntimeError( "split or num_outputs or output_names " - "must be specified since opset 18.") + "must be specified since opset 18." + ) num_outputs = len(output_names) if num_outputs is None: - raise AttributeError( - "num_outputs cannot be None for Split-18.") + raise AttributeError("num_outputs cannot be None for Split-18.") return OnnxSplit_18( # noqa - *x, axis=axis, op_version=op_version, - num_outputs=num_outputs, output_names=output_names) + *x, + axis=axis, + op_version=op_version, + num_outputs=num_outputs, + output_names=output_names, + ) if num_outputs is None: return OnnxSplit_18( # noqa - *x, np.array(split, dtype=np.int64), axis=axis, - op_version=op_version, output_names=output_names) + *x, + np.array(split, dtype=np.int64), + axis=axis, + op_version=op_version, + output_names=output_names, + ) return OnnxSplit_18( # noqa - *x, np.array(split, dtype=np.int64), axis=axis, - num_outputs=num_outputs, op_version=op_version, - output_names=output_names) + *x, + np.array(split, dtype=np.int64), + axis=axis, + num_outputs=num_outputs, + op_version=op_version, + output_names=output_names, + ) if op_version >= 13: if split is None: return OnnxSplit_13( # noqa - *x, axis=axis, op_version=op_version, - output_names=output_names) + *x, axis=axis, op_version=op_version, output_names=output_names + ) return OnnxSplit_13( # noqa - *x, np.array(split, dtype=np.int64), axis=axis, - op_version=op_version, output_names=output_names) + *x, + np.array(split, dtype=np.int64), + axis=axis, + op_version=op_version, + output_names=output_names, + ) if op_version >= 11: if split is None: return OnnxSplit_11( # noqa - *x, axis=axis, op_version=op_version, - output_names=output_names) + *x, axis=axis, op_version=op_version, output_names=output_names + ) return OnnxSplit_11( # noqa - *x, split=split, axis=axis, op_version=op_version, - output_names=output_names) + *x, split=split, axis=axis, op_version=op_version, output_names=output_names + ) if split is None: return OnnxSplit_2( # noqa - *x, axis=axis, op_version=op_version, output_names=output_names) - return OnnxSplit_2(*x, split=split, axis=axis, # noqa - op_version=op_version, output_names=output_names) - - -def OnnxSqueezeApi11(*x, axes=None, op_version=None, - output_names=None): + *x, axis=axis, op_version=op_version, output_names=output_names + ) + return OnnxSplit_2( + *x, + split=split, + axis=axis, # noqa + op_version=op_version, + output_names=output_names, + ) + + +def OnnxSqueezeApi11(*x, axes=None, op_version=None, output_names=None): """ Adds operator Squeeze with opset>=13 following API from opset 11. """ @@ -412,18 +543,21 @@ def OnnxSqueezeApi11(*x, axes=None, op_version=None, raise RuntimeError("op_version must be specified.") if op_version is None or op_version >= 13: return OnnxSqueeze( # noqa - *x, np.array(axes, dtype=np.int64), - op_version=op_version, output_names=output_names) + *x, + np.array(axes, dtype=np.int64), + op_version=op_version, + output_names=output_names, + ) if op_version >= 11: return OnnxSqueeze_11( # noqa - *x, axes=axes, op_version=op_version, - output_names=output_names) - return OnnxSqueeze_1(*x, axes=axes, # noqa - op_version=op_version, output_names=output_names) + *x, axes=axes, op_version=op_version, output_names=output_names + ) + return OnnxSqueeze_1( + *x, axes=axes, op_version=op_version, output_names=output_names # noqa + ) -def OnnxUnsqueezeApi11(*x, axes=None, op_version=None, - output_names=None): +def OnnxUnsqueezeApi11(*x, axes=None, op_version=None, output_names=None): """ Adds operator Unsqueeze with opset>=13 following API from opset 11. """ @@ -431,34 +565,40 @@ def OnnxUnsqueezeApi11(*x, axes=None, op_version=None, raise RuntimeError("op_version must be specified.") if op_version is None or op_version >= 13: return OnnxUnsqueeze( # noqa - *x, np.array(axes, dtype=np.int64), - op_version=op_version, output_names=output_names) + *x, + np.array(axes, dtype=np.int64), + op_version=op_version, + output_names=output_names, + ) if op_version >= 11: return OnnxUnsqueeze_11( # noqa - *x, axes=axes, op_version=op_version, - output_names=output_names) - return OnnxUnsqueeze_1(*x, axes=axes, # noqa - op_version=op_version, output_names=output_names) + *x, axes=axes, op_version=op_version, output_names=output_names + ) + return OnnxUnsqueeze_1( + *x, axes=axes, op_version=op_version, output_names=output_names # noqa + ) -def OnnxReduceL2_typed(dtype, x, axes=None, keepdims=1, op_version=None, - output_names=None): +def OnnxReduceL2_typed( + dtype, x, axes=None, keepdims=1, op_version=None, output_names=None +): """ Adds operator ReduceL2 for float or double. """ if dtype == np.float32: return OnnxReduceL2Api18( # noqa - x, axes=axes, keepdims=keepdims, - op_version=op_version, output_names=output_names) + x, + axes=axes, + keepdims=keepdims, + op_version=op_version, + output_names=output_names, + ) x2 = OnnxMul(x, x, op_version=op_version) # noqa - red = OnnxReduceSumApi11( - x2, axes=[1], keepdims=1, op_version=op_version) - return OnnxSqrt( # noqa - red, op_version=op_version, output_names=output_names) + red = OnnxReduceSumApi11(x2, axes=[1], keepdims=1, op_version=op_version) + return OnnxSqrt(red, op_version=op_version, output_names=output_names) # noqa -def OnnxReshapeApi13(*x, allowzero=0, op_version=None, - output_names=None): +def OnnxReshapeApi13(*x, allowzero=0, op_version=None, output_names=None): """ Adds operator Reshape with opset>=14 following API from opset 13. """ @@ -466,12 +606,10 @@ def OnnxReshapeApi13(*x, allowzero=0, op_version=None, raise RuntimeError("op_version must be specified.") if op_version is None or op_version >= 14: return OnnxReshape( # noqa - *x, allowzero=allowzero, - op_version=op_version, output_names=output_names) + *x, allowzero=allowzero, op_version=op_version, output_names=output_names + ) if op_version >= 13: return OnnxReshape_13( # noqa - *x, op_version=op_version, - output_names=output_names) - return OnnxReshape_5( # noqa - *x, op_version=op_version, - output_names=output_names) + *x, op_version=op_version, output_names=output_names + ) + return OnnxReshape_5(*x, op_version=op_version, output_names=output_names) # noqa diff --git a/skl2onnx/algebra/onnx_subgraph_operator_mixin.py b/skl2onnx/algebra/onnx_subgraph_operator_mixin.py index 0f7de9c8d..e265b7198 100644 --- a/skl2onnx/algebra/onnx_subgraph_operator_mixin.py +++ b/skl2onnx/algebra/onnx_subgraph_operator_mixin.py @@ -7,4 +7,5 @@ class OnnxSubGraphOperatorMixin(OnnxOperatorMixin): """ :class:`OnnxOperatorMixin` for converters. """ + pass diff --git a/skl2onnx/algebra/sklearn_ops.py b/skl2onnx/algebra/sklearn_ops.py index d0dec729b..320053c18 100644 --- a/skl2onnx/algebra/sklearn_ops.py +++ b/skl2onnx/algebra/sklearn_ops.py @@ -6,6 +6,7 @@ import sys import textwrap from sklearn.pipeline import Pipeline, FeatureUnion + try: from sklearn.compose import ColumnTransformer except ImportError: @@ -17,16 +18,21 @@ def ClassFactorySklearn(skl_obj, class_name, doc, conv, shape_calc, alias): from .onnx_subgraph_operator_mixin import OnnxSubGraphOperatorMixin - newclass = type(class_name, (OnnxSubGraphOperatorMixin, skl_obj), - {'__doc__': doc, - 'operator_name': skl_obj.__name__, - '_fct_converter': conv, - '_fct_shape_calc': shape_calc, - 'input_range': [1, 1e9], - 'output_range': [1, 1e9], - 'op_version': None, - 'alias': alias, - '__module__': __name__}) + newclass = type( + class_name, + (OnnxSubGraphOperatorMixin, skl_obj), + { + "__doc__": doc, + "operator_name": skl_obj.__name__, + "_fct_converter": conv, + "_fct_shape_calc": shape_calc, + "input_range": [1, 1e9], + "output_range": [1, 1e9], + "op_version": None, + "alias": alias, + "__module__": __name__, + }, + ) return newclass @@ -52,9 +58,7 @@ def dynamic_class_creation_sklearn(): prefix = "Sklearn" if "sklearn" in str(skl_obj) else "" class_name = "Onnx" + prefix + skl_name try: - cl = ClassFactorySklearn(skl_obj, class_name, - doc, conv, shape_calc, - name) + cl = ClassFactorySklearn(skl_obj, class_name, doc, conv, shape_calc, name) except TypeError: continue cls[class_name] = cl @@ -85,11 +89,15 @@ class to *skl_cl*. available = sorted(filter(lambda n: prefix in n, sys.modules)) raise RuntimeError( "Unable to find a class for '{}' in\n{}".format( - skl_cl.__name__, "\n".join(available))) + skl_cl.__name__, "\n".join(available) + ) + ) cl = getattr(this, full_name) if "automation" in str(cl): - raise RuntimeError("Dynamic operation issue with class " - "name '{}' from '{}'.".format(cl, __name__)) + raise RuntimeError( + "Dynamic operation issue with class " + "name '{}' from '{}'.".format(cl, __name__) + ) return cl @@ -109,8 +117,7 @@ def __init__(self, steps, memory=None, verbose=False, op_version=None): if ColumnTransformer is not None: - class OnnxSklearnColumnTransformer(ColumnTransformer, - OnnxSubGraphOperatorMixin): + class OnnxSklearnColumnTransformer(ColumnTransformer, OnnxSubGraphOperatorMixin): """ Combines `ColumnTransformer `. """ - def __init__(self, sklearn_model, white_op=None, black_op=None, - verbose=0): + def __init__(self, sklearn_model, white_op=None, black_op=None, verbose=0): super(SklearnModelContainerNode, self).__init__( - sklearn_model, white_op=white_op, black_op=black_op, - verbose=verbose) + sklearn_model, white_op=white_op, black_op=black_op, verbose=verbose + ) # Scikit-learn models have no input and output specified, # so we create them and store them in this container. self._inputs = [] @@ -217,8 +218,15 @@ class ModelComponentContainer(_WhiteBlackContainer): *ONNX* *ModelProto*. """ - def __init__(self, target_opset, options=None, registered_models=None, - white_op=None, black_op=None, verbose=0): + def __init__( + self, + target_opset, + options=None, + registered_models=None, + white_op=None, + black_op=None, + verbose=0, + ): """ :param target_opset: number, for example, 7 for *ONNX 1.2*, and 8 for *ONNX 1.3*. @@ -231,7 +239,8 @@ def __init__(self, target_opset, options=None, registered_models=None, :param verbose: display information while converting """ _WhiteBlackContainer.__init__( - self, white_op=white_op, black_op=black_op, verbose=verbose) + self, white_op=white_op, black_op=black_op, verbose=verbose + ) # Inputs of ONNX graph. They are ValueInfoProto in ONNX. self.inputs = [] # Outputs of ONNX graph. They are ValueInfoProto in ONNX. @@ -253,10 +262,10 @@ def __init__(self, target_opset, options=None, registered_models=None, # matches the ONNX version. if isinstance(target_opset, dict): self.target_opset_all = target_opset - self.target_opset = target_opset.get('', None) + self.target_opset = target_opset.get("", None) else: self.target_opset = target_opset - self.target_opset_all = {'': target_opset} + self.target_opset_all = {"": target_opset} # Additional options given to converters. self.options = options # All registered models. @@ -270,40 +279,45 @@ def swap_names(self, old_name, new_name): :param new_name: new name :return: list of impacted objects """ - exc_list = {'Scan', 'Loop', 'If'} + exc_list = {"Scan", "Loop", "If"} for node in self.nodes: if node.op_type not in exc_list: continue - if (old_name in node.input or old_name in node.output or - new_name in node.input or new_name in node.output): + if ( + old_name in node.input + or old_name in node.output + or new_name in node.input + or new_name in node.output + ): raise NotImplementedError( "Unable to handle subgraphs for node type %r." - "(%r, %r)" % (node.op_type, old_name, new_name)) + "(%r, %r)" % (node.op_type, old_name, new_name) + ) res = [] for inp in self.inputs: if inp.name == old_name: inp.name = new_name - res.append(('Io', inp)) + res.append(("Io", inp)) elif inp.name == new_name: inp.name = old_name - res.append(('In', inp)) + res.append(("In", inp)) for inp in self.outputs: if inp.name == old_name: inp.name = new_name - res.append(('Oo', inp)) + res.append(("Oo", inp)) elif inp.name == new_name: inp.name = old_name - res.append(('On', inp)) + res.append(("On", inp)) for inp in self.initializers: if inp.name == old_name: inp.name = new_name - res.append(('-o', inp)) + res.append(("-o", inp)) elif inp.name == new_name: inp.name = old_name - res.append(('-n', inp)) + res.append(("-n", inp)) for node in self.nodes: modified = False @@ -328,8 +342,8 @@ def swap_names(self, old_name, new_name): if modified: if node.op_type in exc_list: raise NotImplementedError( - "Unable to handle subgraphs for node type %r." - "" % node.op_type) + "Unable to handle subgraphs for node type %r." "" % node.op_type + ) node.input[:] = new_input[:] node.output[:] = new_output[:] res.append(("n-", node)) @@ -343,28 +357,23 @@ def __str__(self): if self.inputs: rows.append("INPUTS") for inp in self.inputs: - rows.append( - " " + str(inp).replace(" ", "").replace("\n", " ")) + rows.append(" " + str(inp).replace(" ", "").replace("\n", " ")) if self.outputs: rows.append("OUTPUTS") for out in self.outputs: - rows.append( - " " + str(out).replace(" ", "").replace("\n", " ")) + rows.append(" " + str(out).replace(" ", "").replace("\n", " ")) if self.initializers: rows.append("INITIALIZERS") for ini in self.initializers: - rows.append( - " " + str(ini).replace(" ", "").replace("\n", " ")) + rows.append(" " + str(ini).replace(" ", "").replace("\n", " ")) if self.value_info: rows.append("NODES") for val in self.value_info: - rows.append( - " " + str(val).replace(" ", "").replace("\n", " ")) + rows.append(" " + str(val).replace(" ", "").replace("\n", " ")) if self.nodes: rows.append("PROTO") for nod in self.nodes: - rows.append( - " " + str(nod).replace(" ", "").replace("\n", " ")) + rows.append(" " + str(nod).replace(" ", "").replace("\n", " ")) return "\n".join(rows) def _make_value_info(self, variable): @@ -438,54 +447,77 @@ def add_initializer(self, name, onnx_type, shape, content): tensor.raw_data = content.raw_data tensor.dims.extend(content.dims) elif shape is None and isinstance( - content, (np.float32, np.float64, np.int32, - np.int64, float, np.int8, np.uint8, - np.bool_, np.str_, str)): + content, + ( + np.float32, + np.float64, + np.int32, + np.int64, + float, + np.int8, + np.uint8, + np.bool_, + np.str_, + str, + ), + ): tensor = make_tensor(name, onnx_type, [], [content]) - elif (SparseTensorProto is not None and - isinstance(content, SparseTensorProto)): + elif SparseTensorProto is not None and isinstance(content, SparseTensorProto): raise NotImplementedError("Not implemented yet.") elif shape is None: tensor = make_attribute(name, content) elif isinstance(content, coo_matrix): if SparseTensorProto is None: raise RuntimeError( - "Sparse matrices require SparseTensorProto. Update onnx.") + "Sparse matrices require SparseTensorProto. Update onnx." + ) values_tensor = make_tensor( - name + "_v", data_type=onnx_type, - dims=(len(content.data), ), vals=content.data) - indices = [i * content.shape[1] + j - for i, j in zip(content.row, content.col)] + name + "_v", + data_type=onnx_type, + dims=(len(content.data),), + vals=content.data, + ) + indices = [ + i * content.shape[1] + j for i, j in zip(content.row, content.col) + ] indices_tensor = make_tensor( - name=name + "_i", data_type=TensorProto.INT64, - dims=(len(indices), ), vals=indices) + name=name + "_i", + data_type=TensorProto.INT64, + dims=(len(indices),), + vals=indices, + ) dense_shape = list(content.shape) sparse_tensor = make_sparse_tensor( - values_tensor, indices_tensor, dense_shape) + values_tensor, indices_tensor, dense_shape + ) # cached value: same without names values_tensor = make_tensor( - "_v", data_type=onnx_type, - dims=(len(content.data), ), vals=content.data) + "_v", data_type=onnx_type, dims=(len(content.data),), vals=content.data + ) indices_tensor = make_tensor( - name="_i", data_type=TensorProto.INT64, - dims=(len(indices), ), vals=indices) + name="_i", + data_type=TensorProto.INT64, + dims=(len(indices),), + vals=indices, + ) cached_value = make_sparse_tensor( - values_tensor, indices_tensor, dense_shape) + values_tensor, indices_tensor, dense_shape + ) else: if any(d is None for d in shape): - raise ValueError('Shape of initializer cannot contain None.') - if (hasattr(content, 'dtype') and - content.dtype in (bool, np.bool_)): + raise ValueError("Shape of initializer cannot contain None.") + if hasattr(content, "dtype") and content.dtype in (bool, np.bool_): content = content.astype(np.int32) try: tensor = make_tensor(name, onnx_type, shape, content) except TypeError as e: raise TypeError( "Unable to make a tensor name=%r " - "onnx_type=%r shape=%r content-type=%r." % ( - name, onnx_type, shape, type(content))) from e + "onnx_type=%r shape=%r content-type=%r." + % (name, onnx_type, shape, type(content)) + ) from e if tensor is not None: if cached_value is None: @@ -502,8 +534,12 @@ def add_initializer(self, name, onnx_type, shape, content): return tensor self.add_node( - 'Identity', cached_name, name, op_version=self.target_opset, - name=name + '_op') + "Identity", + cached_name, + name, + op_version=self.target_opset, + name=name + "_op", + ) return name if sparse_tensor is not None: @@ -512,17 +548,25 @@ def add_initializer(self, name, onnx_type, shape, content): if cached_name is None: self.initializers_strings[content] = name self.add_node( - 'Constant', [], [name], sparse_value=sparse_tensor, - op_version=self.target_opset, name=name + '_op') + "Constant", + [], + [name], + sparse_value=sparse_tensor, + op_version=self.target_opset, + name=name + "_op", + ) return sparse_tensor self.add_node( - 'Identity', cached_name, name, op_version=self.target_opset, - name=name + '_op') + "Identity", + cached_name, + name, + op_version=self.target_opset, + name=name + "_op", + ) return name - raise RuntimeError( - "Either tensor or sparse_tensor should be defined.") + raise RuntimeError("Either tensor or sparse_tensor should be defined.") def add_value_info(self, variable): self.value_info.append(self._make_value_info(variable)) @@ -534,8 +578,7 @@ def _check_operator(self, op_type): from a function defined in this submodule by looking into the callstack. The test is enabled for *python >= 3.6*. """ - if (op_type in _apply_operation_specific and - sys.version_info[:2] >= (3, 6)): + if op_type in _apply_operation_specific and sys.version_info[:2] >= (3, 6): tb = traceback.extract_stack() operation = [] fct = _apply_operation_specific[op_type] @@ -548,12 +591,20 @@ def _check_operator(self, op_type): if skl2 and len(operation) == 0: raise RuntimeError( "Operator '{0}' should be added with function " - "'{1}' in submodule _apply_operation.".format( - op_type, fct.__name__)) + "'{1}' in submodule _apply_operation.".format(op_type, fct.__name__) + ) self.check_white_black_list(op_type) - def add_node(self, op_type, inputs, outputs, op_domain='', op_version=None, - name=None, **attrs): + def add_node( + self, + op_type, + inputs, + outputs, + op_domain="", + op_version=None, + name=None, + **attrs, + ): """ Adds a *NodeProto* into the node list of the final ONNX model. If the input operator's domain-version information cannot be @@ -574,14 +625,13 @@ def add_node(self, op_type, inputs, outputs, op_domain='', op_version=None, attributes' names and attributes' values, respectively. """ - if ("axes" in attrs and - (attrs["axes"] is None or - not isinstance(attrs["axes"], (list, np.ndarray)))): + if "axes" in attrs and ( + attrs["axes"] is None or not isinstance(attrs["axes"], (list, np.ndarray)) + ): raise TypeError( - f"axes must be a list or an array not " - f"{type(attrs['axes'])}.") - if name is None or not isinstance( - name, str) or name == '': + f"axes must be a list or an array not " f"{type(attrs['axes'])}." + ) + if name is None or not isinstance(name, str) or name == "": name = f"N{len(self.nodes)}" existing_names = set(n.name for n in self.nodes) if name in existing_names: @@ -599,44 +649,50 @@ def add_node(self, op_type, inputs, outputs, op_domain='', op_version=None, outputs = [outputs] logger.debug( "[Node] %r - %r -> %r (name=%r)", - op_type, ",".join(inputs), ",".join(outputs), name) + op_type, + ",".join(inputs), + ",".join(outputs), + name, + ) try: common = set(inputs) & set(outputs) except TypeError as e: raise TypeError( "inputs or outputs are wrong, inputs=%r, outputs=%r, node=%r." - "" % (inputs, outputs, op_type)) from e + "" % (inputs, outputs, op_type) + ) from e if common: raise RuntimeError( "inputs and outputs cannot have " "variables in common {} in node '{}' " - "with name '{}'.".format(common, op_type, name)) - if not isinstance(inputs, list) or not all( - isinstance(s, str) for s in inputs): - type_list = ','.join(list(str(type(s)) for s in inputs)) - raise ValueError('Inputs must be a list of string but get [%s]' - % type_list) - if (not isinstance(outputs, list) or - not all(isinstance(s, str) for s in outputs)): - type_list = ','.join(list(str(type(s)) for s in outputs)) - raise ValueError('Outputs must be a list of string but get [%s]' - % type_list) + "with name '{}'.".format(common, op_type, name) + ) + if not isinstance(inputs, list) or not all(isinstance(s, str) for s in inputs): + type_list = ",".join(list(str(type(s)) for s in inputs)) + raise ValueError("Inputs must be a list of string but get [%s]" % type_list) + if not isinstance(outputs, list) or not all( + isinstance(s, str) for s in outputs + ): + type_list = ",".join(list(str(type(s)) for s in outputs)) + raise ValueError( + "Outputs must be a list of string but get [%s]" % type_list + ) upd = {} dtypes = set() for k, v in attrs.items(): if v is None: raise ValueError( - 'Failed to create ONNX node. Undefined ' - 'attribute pair (%s, %s) found for type %r and ' - 'version %r' % ( - k, v, op_type, op_version)) + "Failed to create ONNX node. Undefined " + "attribute pair (%s, %s) found for type %r and " + "version %r" % (k, v, op_type, op_version) + ) if isinstance(v, np.ndarray): upd[k] = v dtypes.add(v.dtype) if upd: attrs.update(upd) - if 'dtype' in attrs and op_type != 'EyeLike': + if "dtype" in attrs and op_type != "EyeLike": raise RuntimeError("dtype should not be a parameter.") new_attrs = {} @@ -652,21 +708,30 @@ def add_node(self, op_type, inputs, outputs, op_domain='', op_version=None, except (ValueError, TypeError) as e: raise ValueError( f"Unable to create node {op_type!r} with name={name!r} and " - f"attributes={pprint.pformat(new_attrs)}.") from e + f"attributes={pprint.pformat(new_attrs)}." + ) from e node.domain = op_domain self.node_domain_version_pair_sets.add((op_domain, op_version)) self.nodes.append(node) - if (self.target_opset is not None and - op_version is not None and - op_version > self.target_opset_any_domain(op_domain)): + if ( + self.target_opset is not None + and op_version is not None + and op_version > self.target_opset_any_domain(op_domain) + ): raise RuntimeError( "Opset number {} is higher than targeted opsets {} for " "node type '{}' name='{}' input={} " "output={} (domain='{}').".format( - op_version, self.target_opset_all, - node.op_type, node.name, - node.input, node.output, op_domain)) + op_version, + self.target_opset_all, + node.op_type, + node.name, + node.input, + node.output, + op_domain, + ) + ) def target_opset_any_domain(self, domain): target_opset = self.target_opset_all @@ -675,7 +740,7 @@ def target_opset_any_domain(self, domain): to = target_opset[domain] else: to = None - if to is None and domain == '': + if to is None and domain == "": to = onnx_opset_version() if to is None: smap = C.schema_version_map() @@ -690,14 +755,14 @@ def target_opset_any_domain(self, domain): @property def target_opset_onnx(self): - return self.target_opset_any_domain('') + return self.target_opset_any_domain("") def _get_op_version(self, domain, op_type): """ Determines the highest version of operator *op_type* below or equal to *target_opset*. """ - if not hasattr(self, '_op_versions'): + if not hasattr(self, "_op_versions"): self._build_op_version() key = domain, op_type vers = self._op_versions.get(key, None) @@ -708,8 +773,8 @@ def _get_op_version(self, domain, op_type): else: warnings.warn( "Unable to find operator '{}' in domain '{}' in ONNX, " - "op_version is forced to 1.".format( - op_type, domain)) + "op_version is forced to 1.".format(op_type, domain) + ) vers = [1] highest = self.target_opset_any_domain(domain) pos = len(vers) - 1 @@ -719,8 +784,8 @@ def _get_op_version(self, domain, op_type): pos -= 1 raise RuntimeError( "Unable to find a suitable version for operator '{}' " - "in domain '{}'. Available versions: {}.".format( - op_type, domain, vers)) + "in domain '{}'. Available versions: {}.".format(op_type, domain, vers) + ) def _build_op_version(self): res = {} @@ -738,25 +803,25 @@ def _build_op_version(self): def _get_allowed_options(self, model): if self.registered_models is not None: if inspect.isfunction(model): - if model not in self.registered_models['aliases']: + if model not in self.registered_models["aliases"]: return None - alias = self.registered_models['aliases'][model] - elif hasattr(model, 'alias'): + alias = self.registered_models["aliases"][model] + elif hasattr(model, "alias"): alias = model.alias else: - if type(model) not in self.registered_models['aliases']: + if type(model) not in self.registered_models["aliases"]: return {} - alias = self.registered_models['aliases'][type(model)] - conv = self.registered_models['conv'][alias] + alias = self.registered_models["aliases"][type(model)] + conv = self.registered_models["conv"][alias] allowed = conv.get_allowed_options() if allowed is None: return {} return allowed - clname = (str(model) if inspect.isfunction(model) - else model.__class__.__name__) + clname = str(model) if inspect.isfunction(model) else model.__class__.__name__ raise NotImplementedError( "No registered models, no known allowed options " - "for model '{}'.".format(clname)) + "for model '{}'.".format(clname) + ) def validate_options(self, operator): """ @@ -778,8 +843,12 @@ def get_options(self, model, default_values=None, fail=True): :return: dictionary """ return _build_options( - model, self.options, default_values, - self._get_allowed_options(model), fail=fail) + model, + self.options, + default_values, + self._get_allowed_options(model), + fail=fail, + ) def has_options(self, model, option_name): """ @@ -838,40 +907,55 @@ def ensure_topological_order(self): raise RuntimeError( "Unable to sort a node (cycle). An output was " "already ordered with name %r (iteration=%r)." - "" % (name, n_iter)) + "" % (name, n_iter) + ) order[name] = maxi if len(missing_names) == 0: continue if len(missing_ops) > 0: + def nstr(name): if name in order: return "%s#%d" % (name, order[name]) return name - rows = ["%s(%s) -> [%s]" % ( - n.name or n.op_type, - ', '.join(map(nstr, n.input)), - ', '.join(n.output)) - for n in missing_ops] + + rows = [ + "%s(%s) -> [%s]" + % ( + n.name or n.op_type, + ", ".join(map(nstr, n.input)), + ", ".join(n.output), + ) + for n in missing_ops + ] rows.insert(0, "") rows.append("--") rows.append("--all-nodes--") rows.append("--") - rows.extend("%s|%s(%s) -> [%s]" % ( - n.op_type, n.name or n.op_type, - ', '.join(map(nstr, n.input)), - ', '.join(n.output)) - for n in self.nodes) + rows.extend( + "%s|%s(%s) -> [%s]" + % ( + n.op_type, + n.name or n.op_type, + ", ".join(map(nstr, n.input)), + ", ".join(n.output), + ) + for n in self.nodes + ) raise RuntimeError( "After %d iterations for %d nodes, still unable " "to sort names %r. The graph may be disconnected. " - "List of operators: %s" % ( - n_iter, len(self.nodes), missing_names, - "\n".join(rows))) + "List of operators: %s" + % (n_iter, len(self.nodes), missing_names, "\n".join(rows)) + ) # Update order - topo = sorted([(order[id(node)], node.op_type, - node.name, str(id(node))) - for node in self.nodes]) + topo = sorted( + [ + (order[id(node)], node.op_type, node.name, str(id(node))) + for node in self.nodes + ] + ) map_nodes = {str(id(node)): node for node in self.nodes} self.nodes = [map_nodes[_[-1]] for _ in topo] diff --git a/skl2onnx/common/_onnx_optimisation_common.py b/skl2onnx/common/_onnx_optimisation_common.py index 00486c2aa..e45753f9c 100644 --- a/skl2onnx/common/_onnx_optimisation_common.py +++ b/skl2onnx/common/_onnx_optimisation_common.py @@ -9,8 +9,9 @@ from onnx.helper import make_attribute -def _apply_optimisation_on_graph(fct, onnx_model, recursive=True, - debug_info=None, **kwargs): +def _apply_optimisation_on_graph( + fct, onnx_model, recursive=True, debug_info=None, **kwargs +): """ Applies an optimisation function *fct* on a graph and not on the model. @@ -22,10 +23,8 @@ def _apply_optimisation_on_graph(fct, onnx_model, recursive=True, :param kwargs: additional parameters return: new onnx model """ - if hasattr(onnx_model, 'graph'): - graph = fct( - onnx_model.graph, debug_info=debug_info + ['GRAPH'], - **kwargs) + if hasattr(onnx_model, "graph"): + graph = fct(onnx_model.graph, debug_info=debug_info + ["GRAPH"], **kwargs) new_model = make_model(graph) new_model.ir_version = onnx_model.ir_version new_model.producer_name = onnx_model.producer_name @@ -33,7 +32,7 @@ def _apply_optimisation_on_graph(fct, onnx_model, recursive=True, new_model.domain = onnx_model.domain new_model.model_version = onnx_model.model_version new_model.doc_string = onnx_model.doc_string - if hasattr(onnx_model, 'value_info'): + if hasattr(onnx_model, "value_info"): graph.value_info.extend(onnx_model.value_info) while len(new_model.opset_import) > 0: new_model.opset_import.pop() @@ -42,8 +41,10 @@ def _apply_optimisation_on_graph(fct, onnx_model, recursive=True, op_set.domain = oimp.domain op_set.version = oimp.version return new_model - raise TypeError("This function only works on 'ModelProto' anod not not on" - " {}.".format(type(onnx_model))) + raise TypeError( + "This function only works on 'ModelProto' anod not not on" + " {}.".format(type(onnx_model)) + ) def _apply_remove_node_fct_node(fct, node, recursive, debug_info): @@ -54,29 +55,30 @@ def _apply_remove_node_fct_node(fct, node, recursive, debug_info): :param recursive: does it in subgraphs as well :return: new node """ - if not hasattr(node, 'attribute'): + if not hasattr(node, "attribute"): return node modified = 0 new_atts = [] for att in node.attribute: - if att.name == 'body': + if att.name == "body": new_body = fct( - att.g, recursive=recursive, - debug_info=debug_info + [att.name]) + att.g, recursive=recursive, debug_info=debug_info + [att.name] + ) new_atts.append(_make_att_graph(att.name, new_body)) modified += 1 else: new_atts.append(att) if modified > 0: - new_node = _make_node(node.op_type, node.input, - node.output, name=node.name, - attributes=new_atts) + new_node = _make_node( + node.op_type, node.input, node.output, name=node.name, attributes=new_atts + ) return new_node return node -def _make_node(op_type, inputs, outputs, name=None, doc_string=None, - domain=None, attributes=None): +def _make_node( + op_type, inputs, outputs, name=None, doc_string=None, domain=None, attributes=None +): """ Constructs a NodeProto. @@ -105,8 +107,8 @@ def _make_node(op_type, inputs, outputs, name=None, doc_string=None, if isinstance(attributes, dict): if len(attributes) > 0: node.attribute.extend( - make_attribute(key, value) - for key, value in sorted(attributes.items())) + make_attribute(key, value) for key, value in sorted(attributes.items()) + ) elif attributes: for att in attributes: node.attribute.extend([att]) @@ -132,10 +134,10 @@ def _rename_node_input(onnx_node, old_name, new_name=None): """ inputs = [_replace(name, old_name, new_name) for name in onnx_node.input] outputs = list(onnx_node.output) - if hasattr(onnx_node, 'attribute'): + if hasattr(onnx_node, "attribute"): new_atts = [] for att in onnx_node.attribute: - if att.name == 'body': + if att.name == "body": new_body = _rename_graph_input(att.g, old_name, new_name) attr = AttributeProto() attr.name = att.name @@ -148,8 +150,13 @@ def _rename_node_input(onnx_node, old_name, new_name=None): else: atts = onnx_node.attribute node = _make_node( - onnx_node.op_type, inputs, outputs, name=onnx_node.name, - domain=onnx_node.domain, attributes=atts) + onnx_node.op_type, + inputs, + outputs, + name=onnx_node.name, + domain=onnx_node.domain, + attributes=atts, + ) return node @@ -173,9 +180,8 @@ def _rename_graph_output(graph, old_name, new_name): value_info.doc_string = o.type.doc_string outputs.append(value_info) nodes = list(graph.node) - nodes.append(_make_node('Identity', [old_name], [new_name])) - new_graph = make_graph(nodes, graph.name, graph.input, outputs, - graph.initializer) + nodes.append(_make_node("Identity", [old_name], [new_name])) + new_graph = make_graph(nodes, graph.name, graph.input, outputs, graph.initializer) new_graph.value_info.extend(graph.value_info) return new_graph @@ -200,9 +206,8 @@ def _rename_graph_input(graph, old_name, new_name): value_info.doc_string = i.type.doc_string inputs.append(value_info) nodes = list(graph.node) - nodes.append(_make_node('Identity', [new_name], [old_name])) - new_graph = make_graph(nodes, graph.name, inputs, graph.output, - graph.initializer) + nodes.append(_make_node("Identity", [new_name], [old_name])) + new_graph = make_graph(nodes, graph.name, inputs, graph.output, graph.initializer) new_graph.value_info.extend(graph.value_info) return new_graph @@ -226,10 +231,10 @@ def _rename_node_output(onnx_node, old_name, new_name): """ inputs = list(onnx_node.input) outputs = [_replace(name, old_name, new_name) for name in onnx_node.output] - if hasattr(onnx_node, 'attribute'): + if hasattr(onnx_node, "attribute"): new_atts = [] for att in onnx_node.attribute: - if att.name == 'body': + if att.name == "body": new_body = _rename_graph_output(att.g, old_name, new_name) new_atts.append(_make_att_graph(att.name, new_body)) else: @@ -238,6 +243,11 @@ def _rename_node_output(onnx_node, old_name, new_name): else: atts = onnx_node.attribute node = _make_node( - onnx_node.op_type, inputs, outputs, name=onnx_node.name, - domain=onnx_node.domain, attributes=atts) + onnx_node.op_type, + inputs, + outputs, + name=onnx_node.name, + domain=onnx_node.domain, + attributes=atts, + ) return node diff --git a/skl2onnx/common/_registration.py b/skl2onnx/common/_registration.py index 0e2aa1bef..08070450e 100644 --- a/skl2onnx/common/_registration.py +++ b/skl2onnx/common/_registration.py @@ -11,15 +11,16 @@ class RegisteredConverter: - def __init__(self, fct, options): self._fct = fct self._options = options def __call__(self, *args): - if (len(args) == 3 and - hasattr(args[2], '_get_allowed_options') and - hasattr(args[1], 'raw_operator')): + if ( + len(args) == 3 + and hasattr(args[2], "_get_allowed_options") + and hasattr(args[1], "raw_operator") + ): # Checks that the user did not specify a wrong option. if args[1].raw_operator is not None: args[2]._get_allowed_options(args[1].raw_operator) @@ -37,8 +38,9 @@ def get_allowed_options(self): _shape_calculator_pool = {} -def register_converter(operator_name, conversion_function, overwrite=False, - options=None): +def register_converter( + operator_name, conversion_function, overwrite=False, options=None +): """ :param operator_name: A unique operator ID. It is usually a string but you can use a type as well @@ -52,29 +54,28 @@ def register_converter(operator_name, conversion_function, overwrite=False, (dictionary {name: supported values or None}) """ if conversion_function is None: - raise ValueError( - "A converter cannot be None for %r." % operator_name) + raise ValueError("A converter cannot be None for %r." % operator_name) if not overwrite and operator_name in _converter_pool: - raise ValueError('We do not overwrite registered converter ' - 'by default') + raise ValueError("We do not overwrite registered converter " "by default") if len(_converter_pool) > 0: key = next(iter(_converter_pool)) - check_signature(conversion_function, _converter_pool[key]._fct, - skip=('operator', )) - _converter_pool[operator_name] = RegisteredConverter( - conversion_function, options) + check_signature( + conversion_function, _converter_pool[key]._fct, skip=("operator",) + ) + _converter_pool[operator_name] = RegisteredConverter(conversion_function, options) def get_converter(operator_name): if operator_name not in _converter_pool: - msg = 'Unsupported conversion for operator %s (%d registered)' % ( - operator_name, len(_converter_pool)) + msg = "Unsupported conversion for operator %s (%d registered)" % ( + operator_name, + len(_converter_pool), + ) raise ValueError(msg) return _converter_pool[operator_name] -def register_shape_calculator(operator_name, calculator_function, - overwrite=False): +def register_shape_calculator(operator_name, calculator_function, overwrite=False): """ :param operator_name: A unique operator ID. It is usually a string but you can use a type as well @@ -86,21 +87,21 @@ def register_shape_calculator(operator_name, calculator_function, to enable overwriting. """ if calculator_function is None: - raise ValueError( - "A shape calculator cannot be None for %r." % operator_name) + raise ValueError("A shape calculator cannot be None for %r." % operator_name) if not overwrite and operator_name in _shape_calculator_pool: - raise ValueError('We do not overwrite registrated shape calculator ' - 'by default') + raise ValueError( + "We do not overwrite registrated shape calculator " "by default" + ) if calculator_function is not None and len(_shape_calculator_pool) > 0: key = next(iter(_shape_calculator_pool)) - check_signature(calculator_function, _shape_calculator_pool[key], - skip=('operator', )) + check_signature( + calculator_function, _shape_calculator_pool[key], skip=("operator",) + ) _shape_calculator_pool[operator_name] = calculator_function def get_shape_calculator(operator_name): if operator_name not in _shape_calculator_pool: - msg = ("Unsupported shape calculator for operator " - "'%s'." % operator_name) + msg = "Unsupported shape calculator for operator " "'%s'." % operator_name raise ValueError(msg) return _shape_calculator_pool[operator_name] diff --git a/skl2onnx/common/_topology.py b/skl2onnx/common/_topology.py index b87da0f3a..a5f5139a9 100644 --- a/skl2onnx/common/_topology.py +++ b/skl2onnx/common/_topology.py @@ -10,23 +10,28 @@ from onnx import onnx_pb as onnx_proto from onnx.helper import make_graph, make_model, make_tensor_value_info from onnxconverter_common.data_types import ( # noqa - DataType, TensorType, - FloatType, Int64Type, StringType, - DictionaryType, FloatTensorType, # noqa - Int64TensorType, SequenceType, # noqa - StringTensorType, DoubleTensorType, - Int32TensorType, BooleanTensorType, - DoubleTensorType) + DataType, + TensorType, + FloatType, + Int64Type, + StringType, + DictionaryType, + FloatTensorType, # noqa + Int64TensorType, + SequenceType, # noqa + StringTensorType, + DoubleTensorType, + Int32TensorType, + BooleanTensorType, + DoubleTensorType, +) + try: - from onnxconverter_common.data_types import ( - Int8TensorType, UInt8TensorType) + from onnxconverter_common.data_types import Int8TensorType, UInt8TensorType except ImportError: Int8TensorType = None UInt8TensorType = None -from ..proto import ( - get_opset_number_from_onnx, - get_latest_tested_opset_version -) +from ..proto import get_opset_number_from_onnx, get_latest_tested_opset_version from . import _registration from . import utils from .exceptions import MissingShapeCalculator, MissingConverter @@ -38,22 +43,38 @@ def _default_OPSET_TO_IR_VERSION(): return { - 1: 3, 2: 3, 3: 3, 4: 3, 5: 3, 6: 3, - 7: 3, 8: 4, 9: 4, 10: 5, 11: 6, 12: 7, - 13: 7, 14: 7, 15: 8, 16: 8, 17: 8, 18: 8, - 19: 9 + 1: 3, + 2: 3, + 3: 3, + 4: 3, + 5: 3, + 6: 3, + 7: 3, + 8: 4, + 9: 4, + 10: 5, + 11: 6, + 12: 7, + 13: 7, + 14: 7, + 15: 8, + 16: 8, + 17: 8, + 18: 8, + 19: 9, } try: from onnxconverter_common.topology import OPSET_TO_IR_VERSION + assert OPSET_TO_IR_VERSION[18] is not None except (ImportError, KeyError): OPSET_TO_IR_VERSION = _default_OPSET_TO_IR_VERSION() OPSET_ML_TO_OPSET = {1: 11, 2: 15, 3: 18} -logger = getLogger('skl2onnx') +logger = getLogger("skl2onnx") def get_default_opset_for_domain(domain): @@ -61,15 +82,16 @@ def get_default_opset_for_domain(domain): Returns the associated for a domain given the main opset. """ from .. import __max_supported_opset__ as main_opset - if domain == '': + + if domain == "": return main_opset - if domain == 'ai.onnx.ml': + if domain == "ai.onnx.ml": if main_opset >= 16: return 3 if main_opset < 6: return 1 return 2 - if domain == 'ai.onnx.training': + if domain == "ai.onnx.training": return 1 return None @@ -79,6 +101,7 @@ class Variable: Defines a variable which holds any data defined from *ONNX* types. """ + _UNIQUE_NUMBER_ = 0 def __init__(self, raw_name, onnx_name, scope, type=None): @@ -95,14 +118,13 @@ def __init__(self, raw_name, onnx_name, scope, type=None): e.g., FloatTensorType """ if not isinstance(raw_name, str): + raise TypeError("raw_name must be a string not '%s'." % raw_name.__class__) + if type is not None and not hasattr(type, "shape"): raise TypeError( - "raw_name must be a string not '%s'." % raw_name.__class__) - if type is not None and not hasattr(type, 'shape'): - raise TypeError( - "Unexpected type for variable raw_name=%r, type=%r." % ( - raw_name, type)) - if not isinstance(onnx_name, str) or '(' in onnx_name: - if onnx_name.startswith('u(') and onnx_name[-1] == ')': + "Unexpected type for variable raw_name=%r, type=%r." % (raw_name, type) + ) + if not isinstance(onnx_name, str) or "(" in onnx_name: + if onnx_name.startswith("u(") and onnx_name[-1] == ")": onnx_name0 = onnx_name if scope is None: onnx_name = "UU%03dUU" % Variable._UNIQUE_NUMBER_ @@ -110,11 +132,13 @@ def __init__(self, raw_name, onnx_name, scope, type=None): else: onnx_name = scope.get_unique_variable_name("U") logger.debug( - '[Var] rename raw_name=%r, onnx_name=%r into %r', - raw_name, onnx_name0, onnx_name) + "[Var] rename raw_name=%r, onnx_name=%r into %r", + raw_name, + onnx_name0, + onnx_name, + ) else: - raise TypeError( - "onnx_name must be a string not %r." % onnx_name) + raise TypeError("onnx_name must be a string not %r." % onnx_name) if type is not None: shape = type.shape @@ -123,8 +147,9 @@ def __init__(self, raw_name, onnx_name, scope, type=None): if len(not_none) and min(not_none) == 0: raise RuntimeError( "A variable cannot be empty, raw_name=%r, " - "onnx_name=%r, shape=%r, type=%r." % ( - raw_name, onnx_name, shape, type)) + "onnx_name=%r, shape=%r, type=%r." + % (raw_name, onnx_name, shape, type) + ) self._raw_name = raw_name self._onnx_name = onnx_name @@ -138,24 +163,26 @@ def __init__(self, raw_name, onnx_name, scope, type=None): self._is_root = None self._is_leaf = None if self.type is not None and not isinstance(self.type, DataType): - raise TypeError( - "shape must be a DataType not {}.".format(self.type)) + raise TypeError("shape must be a DataType not {}.".format(self.type)) if isinstance(self.type, TensorType): shape = self.type.shape if not isinstance(shape, (list, tuple)): try: shape = list(shape) except TypeError: - raise TypeError("shape must be a tuple or a list not " - "{}.".format(type_fct(shape))) + raise TypeError( + "shape must be a tuple or a list not " + "{}.".format(type_fct(shape)) + ) for dim in shape: if dim is None: continue if not isinstance(dim, (int, np.int32, np.int64, np.intc)): raise TypeError( "shape must contains integers not %r (type=%r)." - "" % (dim, dim.__class__)) - logger.debug('[Var] +%s', self) + "" % (dim, dim.__class__) + ) + logger.debug("[Var] +%s", self) # links to operators using those variables self.operators_outputs_ = [] @@ -169,8 +196,8 @@ def _check(self): continue if not isinstance(k, (int, np.integer)): raise ValueError( - "Unexpected type %r for shape %r." - "" % (type(k), self)) + "Unexpected type %r for shape %r." "" % (type(k), self) + ) @property def raw_name(self): @@ -203,14 +230,14 @@ def is_leaf(self): def init_status(self, is_fed=None, is_root=None, is_leaf=None): if is_fed is not None and is_fed != self.is_fed: logger.debug( - '[Var] update is_fed=%r for %r, parent=%r', - is_fed, self, self._parent) + "[Var] update is_fed=%r for %r, parent=%r", is_fed, self, self._parent + ) self._is_fed = is_fed if is_root is not None and is_root != self.is_root: - logger.debug('[Var] update is_root=%r for %r', is_root, self) + logger.debug("[Var] update is_root=%r for %r", is_root, self) self._is_root = is_root if is_leaf is not None and is_leaf != self.is_leaf: - logger.debug('[Var] update is_leaf=%r for %r', is_leaf, self) + logger.debug("[Var] update is_leaf=%r for %r", is_leaf, self) self._is_leaf = is_leaf def __setattr__(self, name, value): @@ -220,25 +247,31 @@ def __setattr__(self, name, value): raise AttributeError("You must use method set_onnx_name.") elif name in {"is_fed", "is_root", "is_leaf"}: raise AttributeError("You must use method init_status.") - elif name in {'scope', 'raw_name'}: + elif name in {"scope", "raw_name"}: raise AttributeError("scope or raw_name cannot be changed.") self.__dict__[name] = value def set_type(self, new_type): - if (new_type is None or isinstance(new_type, (str, Variable)) or - not hasattr(new_type, 'shape')): + if ( + new_type is None + or isinstance(new_type, (str, Variable)) + or not hasattr(new_type, "shape") + ): raise TypeError( - "Unexpected new type for variable %r, new_type=%r." % ( - self, new_type)) - logger.debug('[Var] update type for %r', self) + "Unexpected new type for variable %r, new_type=%r." % (self, new_type) + ) + logger.debug("[Var] update type for %r", self) self._type = new_type self._check() def set_onnx_name(self, onnx_name): if onnx_name != self._onnx_name: logger.debug( - '[Var] update onnx_name, from %r to %r in %r', - self.onnx_name, onnx_name, self) + "[Var] update onnx_name, from %r to %r in %r", + self.onnx_name, + onnx_name, + self, + ) if self.scope is not None and not isinstance(self.scope, str): self.scope.rename_onnx_name(self._onnx_name, onnx_name) self._onnx_name = onnx_name @@ -247,9 +280,9 @@ def set_parent(self, operator): if self._parent is not None: raise RuntimeError( "This variable is already the output of operator %r. " - "It cannot be the output of %r." % (self._parent, operator)) - logger.debug( - '[Var] set parent for %r, parent=%r', self, operator) + "It cannot be the output of %r." % (self._parent, operator) + ) + logger.debug("[Var] set parent for %r, parent=%r", self, operator) self._parent = operator def get_first_dimension(self): @@ -257,14 +290,12 @@ def get_first_dimension(self): Returns the first dimension (batch dimension) or None if not specified (shape is empty). """ - if (self.type is None or self.type.shape is None or - len(self.type.shape) == 0): + if self.type is None or self.type.shape is None or len(self.type.shape) == 0: return None return self.type.shape[0] def get_second_dimension(self): - if (self.type is None or self.type.shape is None or - len(self.type.shape) < 2): + if self.type is None or self.type.shape is None or len(self.type.shape) < 2: return None return self.type.shape[1] @@ -276,14 +307,16 @@ def full_name(self): return self.onnx_name def __repr__(self): - return ("Variable('{0}', '{1}', type={2})".format( - self.raw_name, self.onnx_name, self.type)) + return "Variable('{0}', '{1}', type={2})".format( + self.raw_name, self.onnx_name, self.type + ) @staticmethod def from_pb(obj): """ Creates a data type from a protobuf object. """ + def get_dim(d): r = d.dim_value if "dim_param" in str(d): @@ -294,10 +327,9 @@ def get_dim(d): return r def get_shape(tt): - return [get_dim(tt.shape.dim[i]) - for i in range(len(tt.shape.dim))] + return [get_dim(tt.shape.dim[i]) for i in range(len(tt.shape.dim))] - if hasattr(obj, 'extend'): + if hasattr(obj, "extend"): return [Variable.from_pb(o) for o in obj] name = obj.name @@ -317,22 +349,22 @@ def get_shape(tt): ty = Int64TensorType(shape) elif elem == onnx_proto.TensorProto.INT32: ty = Int32TensorType(shape) - elif (UInt8TensorType is not None and - elem == onnx_proto.TensorProto.UINT8): + elif UInt8TensorType is not None and elem == onnx_proto.TensorProto.UINT8: ty = UInt8TensorType(shape) - elif (Int8TensorType is not None and - elem == onnx_proto.TensorProto.INT8): + elif Int8TensorType is not None and elem == onnx_proto.TensorProto.INT8: ty = Int8TensorType(shape) elif elem == 0: ty = FloatTensorType(shape) else: raise NotImplementedError( "Unsupported type '{}' (elem_type={}).".format( - type(obj.type.tensor_type), elem)) + type(obj.type.tensor_type), elem + ) + ) else: - raise NotImplementedError("Unsupported type '{}' as " - "a string ({}).".format( - type(obj), obj)) + raise NotImplementedError( + "Unsupported type '{}' as " "a string ({}).".format(type(obj), obj) + ) return Variable(name, name, None, ty) @@ -356,7 +388,6 @@ def add_operator(self, op, in_or_out): self.operators_inputs_.append(op) def check_compatible_type(self, other_type): - def empty_shape(shape): return shape is None or len(shape) == 0 @@ -370,8 +401,8 @@ def empty_shape(shape): if empty_shape(other_type.shape): return raise TypeError( - "Incompatible type for variable %r and type %r." % ( - self, other_type)) + "Incompatible type for variable %r and type %r." % (self, other_type) + ) class VariableStr(Variable): @@ -390,8 +421,8 @@ def raw_name(self): def onnx_name(self): if self._onnx_name.startswith("u("): raise RuntimeError( - "Variable should be renamed as onnx_name=%r." - "" % self._onnx_name) + "Variable should be renamed as onnx_name=%r." "" % self._onnx_name + ) return self._onnx_name @@ -399,6 +430,7 @@ class Operator: """ Defines an operator available in *ONNX*. """ + class OperatorList(list): def __init__(self, parent, kind): super(Operator.OperatorList, self).__init__() @@ -406,25 +438,23 @@ def __init__(self, parent, kind): self.kind = kind def __eq__(self, second): - raise NotImplementedError( - "Operator equal not implemented and not needed.") + raise NotImplementedError("Operator equal not implemented and not needed.") def append(self, v): if not isinstance(v, Variable): raise TypeError( - "Input and output must be of type Variable not %r." - "" % type(v)) - if self.kind == 'Out': + "Input and output must be of type Variable not %r." "" % type(v) + ) + if self.kind == "Out": v.set_parent(self.parent) super(Operator.OperatorList, self).append(v) logger.debug("[Op] add %s %r to %r", self.kind, v, self.parent) - if self.kind == 'In': + if self.kind == "In": v.add_operator(self.parent, False) elif self.kind == "Out": v.add_operator(self.parent, True) else: - raise RuntimeError( - "Unexpected value for kind=%r." % self.kind) + raise RuntimeError("Unexpected value for kind=%r." % self.kind) def extend(self, vs): for v in vs: @@ -433,35 +463,36 @@ def extend(self, vs): def __getitem__(self, i): v = list.__getitem__(self, i) if isinstance(i, int) and not isinstance(v, Variable): - raise TypeError("Element %d must be a Variable not %r." % ( - i, type(v))) + raise TypeError("Element %d must be a Variable not %r." % (i, type(v))) return v def __setitem__(self, i, v): - raise LookupError( - "Setter should not be used to modify an element.") + raise LookupError("Setter should not be used to modify an element.") def set_element(self, i, v): "Updates element i." if not isinstance(v, Variable): - raise TypeError( - "Value v must be a Variable not %r." % type(v)) + raise TypeError("Value v must be a Variable not %r." % type(v)) logger.debug( "[Op] %s-change element %d from %r to %r in %r", - self.kind, i, self[i], v, self.parent) + self.kind, + i, + self[i], + v, + self.parent, + ) list.__setitem__(self, i, v) def to_string(self): names = [] for o in self: - if hasattr(o, 'onnx_name'): + if hasattr(o, "onnx_name"): names.append(o.onnx_name) else: names.append('"%s"' % str(o)) return ",".join(names) - def __init__(self, onnx_name, scope, type, raw_operator, - target_opset, scope_inst): + def __init__(self, onnx_name, scope, type, raw_operator, target_opset, scope_inst): """ :param onnx_name: A unique ID, which is a string :param scope: The name of the scope where this operator is @@ -477,8 +508,10 @@ def __init__(self, onnx_name, scope, type, raw_operator, :param scope_inst: :class:`Scope` instance the operator belongs to """ if isinstance(raw_operator, str): - raise RuntimeError("Parameter raw_operator must be an object not " - "a string '{0}'.".format(raw_operator)) + raise RuntimeError( + "Parameter raw_operator must be an object not " + "a string '{0}'.".format(raw_operator) + ) # operator name in the converted model, if raw_operator # is not None, output_shapes can be guessed # from the raw model. Otherwise, it can be guessed @@ -487,12 +520,12 @@ def __init__(self, onnx_name, scope, type, raw_operator, self.scope = scope self.type = type self.raw_operator = raw_operator - self.inputs = Operator.OperatorList(self, 'In') - self.outputs = Operator.OperatorList(self, 'Out') + self.inputs = Operator.OperatorList(self, "In") + self.outputs = Operator.OperatorList(self, "Out") self._is_evaluated = None self.target_opset = target_opset self.scope_inst = scope_inst - logger.debug('[Op] +%r', self) + logger.debug("[Op] +%r", self) def new_raw_operator(self, raw_operator, alias): """ @@ -500,8 +533,14 @@ def new_raw_operator(self, raw_operator, alias): changes the raw_operator but keeps the same inputs and outputs. """ - op = Operator(self.onnx_name, self.scope, alias, raw_operator, - self.target_opset, self.scope_inst) + op = Operator( + self.onnx_name, + self.scope, + alias, + raw_operator, + self.target_opset, + self.scope_inst, + ) op.inputs = self.inputs op.outputs = self.outputs return op @@ -515,30 +554,34 @@ def __repr__(self): # The line above fails for python 3.7 textop = type(self.raw_operator) if isinstance(textop, str) and "\n" in textop: - textop = textop.replace('\n', '').replace(' ', '') - return ("Operator(type='{0}', onnx_name='{1}', inputs='{2}', " - "outputs='{3}', raw_operator={4})".format( - self.type, self.onnx_name, - self.inputs.to_string(), - self.outputs.to_string(), - textop)) + textop = textop.replace("\n", "").replace(" ", "") + return ( + "Operator(type='{0}', onnx_name='{1}', inputs='{2}', " + "outputs='{3}', raw_operator={4})".format( + self.type, + self.onnx_name, + self.inputs.to_string(), + self.outputs.to_string(), + textop, + ) + ) def __setattr__(self, name, value): - if name in ('inputs', 'outputs'): - if (isinstance(value, list) and - not isinstance(value, Operator.OperatorList)): - if name == 'inputs': - self.inputs = Operator.OperatorList(self, 'In') + if name in ("inputs", "outputs"): + if isinstance(value, list) and not isinstance(value, Operator.OperatorList): + if name == "inputs": + self.inputs = Operator.OperatorList(self, "In") self.inputs.extend(value) return - if name == 'outputs': - self.outputs = Operator.OperatorList(self, 'Out') + if name == "outputs": + self.outputs = Operator.OperatorList(self, "Out") self.outputs.extend(value) return if not isinstance(value, Operator.OperatorList): raise TypeError( - "inputs or outputs must be of type Operator.OperatorList.") - ioo = name == 'outputs' + "inputs or outputs must be of type Operator.OperatorList." + ) + ioo = name == "outputs" for v in value: v.add_operator(self, ioo) self.__dict__[name] = value @@ -549,9 +592,7 @@ def is_evaluated(self): def init_status(self, is_evaluated=None): if is_evaluated is not None and is_evaluated != self.is_evaluated: - logger.debug( - '[Op] update is_evaluated=%r for %r', - is_evaluated, self) + logger.debug("[Op] update is_evaluated=%r for %r", is_evaluated, self) self._is_evaluated = is_evaluated @property @@ -587,25 +628,31 @@ def infer_types(self): if self.type is None: raise MissingShapeCalculator( "Unable to find a shape calculator for type '{}'.".format( - type(self.raw_operator))) + type(self.raw_operator) + ) + ) try: shape_calc = _registration.get_shape_calculator(self.type) except ValueError: raise MissingShapeCalculator( "Unable to find a shape calculator for alias '{}' " - "and type '{}'.".format(self.type, type(self.raw_operator))) + "and type '{}'.".format(self.type, type(self.raw_operator)) + ) if shape_calc is None: raise MissingShapeCalculator( "Unexpected shape calculator for alias '{}' " - "and type '{}'.".format(self.type, type(self.raw_operator))) + "and type '{}'.".format(self.type, type(self.raw_operator)) + ) logger.debug( - "[Shape-a] %r fed %r - %r", self, + "[Shape-a] %r fed %r - %r", + self, "".join(str(i.is_fed) for i in self.inputs), - "".join(str(i.is_fed) for i in self.outputs)) + "".join(str(i.is_fed) for i in self.outputs), + ) shape_calc(self) logger.debug( - "[Shape-b] %r inputs=%r - outputs=%r", - self, self.inputs, self.outputs) + "[Shape-b] %r inputs=%r - outputs=%r", self, self.inputs, self.outputs + ) class Scope: @@ -615,9 +662,15 @@ class Scope: provides functions to create a unique unused name. """ - def __init__(self, name, target_opset=None, - custom_shape_calculators=None, options=None, - registered_models=None, naming=None): + def __init__( + self, + name, + target_opset=None, + custom_shape_calculators=None, + options=None, + registered_models=None, + naming=None, + ): """ :param name: A string, the unique ID of this scope in a Topology object @@ -670,16 +723,15 @@ def __init__(self, name, target_opset=None, if naming is None: self._naming = Topology._generate_unique_name elif isinstance(naming, str): - self._naming = ( - lambda seed, names: Topology._generate_unique_name( - self.naming + seed, names)) + self._naming = lambda seed, names: Topology._generate_unique_name( + self.naming + seed, names + ) elif callable(self.naming): - self._naming = ( - lambda seed, names: Topology._generate_unique_name( - self.naming(seed, names), names)) + self._naming = lambda seed, names: Topology._generate_unique_name( + self.naming(seed, names), names + ) else: - raise TypeError( - "Unexpected type for parameter naming: %r." % type(naming)) + raise TypeError("Unexpected type for parameter naming: %r." % type(naming)) def get(self, var_name, default_value): "Returns variable with 'name' or default value is not found." @@ -705,13 +757,13 @@ def get_unique_variable_name(self, seed, rename=True): Creates a unique variable ID based on the given seed. """ if not isinstance(seed, str): - raise TypeError("Parameter seed must be a string not {}." - "".format(type(seed))) + raise TypeError( + "Parameter seed must be a string not {}." "".format(type(seed)) + ) if rename: name = self._naming(seed, self.onnx_variable_names) else: - name = Topology._generate_unique_name( - seed, self.onnx_variable_names) + name = Topology._generate_unique_name(seed, self.onnx_variable_names) return name def get_unique_operator_name(self, seed): @@ -720,16 +772,16 @@ def get_unique_operator_name(self, seed): """ return self._naming(seed, self.onnx_operator_names) - def declare_local_variable(self, raw_name, type=None, prepend=False, - missing_type=False, rename=True): + def declare_local_variable( + self, raw_name, type=None, prepend=False, missing_type=False, rename=True + ): """ This function may create a new variable in this scope. If *raw_name* has been used to create other variables, the new variable will hide all other variables created using *raw_name*. """ if type is None and not missing_type: - raise RuntimeError( - "Unknown type for %r (type=%r)." % (raw_name, type)) + raise RuntimeError("Unknown type for %r (type=%r)." % (raw_name, type)) # Get unique ID for the new variable onnx_name = self.get_unique_variable_name(raw_name, rename=rename) @@ -742,16 +794,16 @@ def register_variable(self, var, prepend=False): "Adds a variable to the scope." if var.onnx_name in self.variables: raise RuntimeError( - "Variable %r already registered (other=%r)." % ( - var, self.variables[var.onnx_name])) + "Variable %r already registered (other=%r)." + % (var, self.variables[var.onnx_name]) + ) if var.raw_name in self.variable_name_mapping: # Hide existing variables with the same raw_name if not prepend: self.variable_name_mapping[var.raw_name].append(var.onnx_name) else: - self.variable_name_mapping[var.raw_name].insert( - 0, var.onnx_name) + self.variable_name_mapping[var.raw_name].insert(0, var.onnx_name) else: self.variable_name_mapping[var.raw_name] = [var.onnx_name] @@ -769,50 +821,49 @@ def declare_existing_subgraph_name(self, graph_proto): if self.has_variable_name(name): raise NameError( "Result name %r is already taken (outputs=%r) " - "(node=%r)." % ( - name, output_name, node)) + "(node=%r)." % (name, output_name, node) + ) self.onnx_variable_names.add(name) if node.name in self.onnx_operator_names: raise NameError( "Operator name %r is already taken " - "(node=%r)." % ( - node.name, node)) + "(node=%r)." % (node.name, node) + ) self.onnx_operator_names.add(node.name) def rename_onnx_name(self, old_name, new_name): if new_name in self.variables: raise RuntimeError( - "Name %r already in variables (%r)." % ( - new_name, self.variables[new_name])) + "Name %r already in variables (%r)." + % (new_name, self.variables[new_name]) + ) if old_name not in self.variables: - raise RuntimeError( - "Unable to find name %r in variables." % old_name) - logger.debug( - '[Scope] update onnx_name, from %r to %r', - old_name, new_name) + raise RuntimeError("Unable to find name %r in variables." % old_name) + logger.debug("[Scope] update onnx_name, from %r to %r", old_name, new_name) self.variables[new_name] = self.variables[old_name] del self.variables[old_name] - def declare_local_input(self, raw_name, type=None, prepend=False, - rename=True): + def declare_local_input(self, raw_name, type=None, prepend=False, rename=True): """ Calls `declare_local_variable`. Registers this variable as an input. """ var = self.declare_local_variable( - raw_name, type=type, prepend=prepend, rename=rename) + raw_name, type=type, prepend=prepend, rename=rename + ) self.input_variables.append(var) return var - def declare_local_output(self, raw_name, type=None, prepend=False, - missing_type=False): + def declare_local_output( + self, raw_name, type=None, prepend=False, missing_type=False + ): """ Calls `declare_local_variable`. Registers this variable as an output. """ var = self.declare_local_variable( - raw_name, type=type, prepend=prepend, - missing_type=missing_type) + raw_name, type=type, prepend=prepend, missing_type=missing_type + ) self.output_variables.append(var) return var @@ -821,26 +872,29 @@ def declare_local_operator(self, type, raw_model=None): This function is used to declare new local operator. """ onnx_name = self.get_unique_operator_name(str(type)) - operator = Operator(onnx_name, self.name, type, raw_model, - self.target_opset, scope_inst=self) + operator = Operator( + onnx_name, self.name, type, raw_model, self.target_opset, scope_inst=self + ) self.operators[onnx_name] = operator return operator def _get_allowed_options(self, model, fail=True): if self.registered_models is not None: - if type(model) not in self.registered_models['aliases']: + if type(model) not in self.registered_models["aliases"]: if fail: raise NotImplementedError( "No registered models, no known allowed options " - "for model '{}'.".format(model.__class__.__name__)) + "for model '{}'.".format(model.__class__.__name__) + ) return {} - alias = self.registered_models['aliases'][type(model)] - conv = self.registered_models['conv'][alias] + alias = self.registered_models["aliases"][type(model)] + conv = self.registered_models["conv"][alias] allowed = conv.get_allowed_options() return allowed raise NotImplementedError( "No registered models, no known allowed options " - "for model '{}'.".format(model.__class__.__name__)) + "for model '{}'.".format(model.__class__.__name__) + ) def add_options(self, model_id, options): """ @@ -873,9 +927,12 @@ def get_options(self, model, default_values=None, fail=True): :return: dictionary """ return _build_options( - model, self.options, default_values, + model, + self.options, + default_values, self._get_allowed_options(model, fail=fail), - fail=fail) + fail=fail, + ) def replace_raw_operator(self, op1, op2, alias): """ @@ -885,8 +942,8 @@ def replace_raw_operator(self, op1, op2, alias): for v in self.operators.values(): if id(v.raw_operator) == id(op1): logger.debug( - '[Scope] replace %d by %d in %r.', - id(v.raw_operator), id(op1), v) + "[Scope] replace %d by %d in %r.", id(v.raw_operator), id(op1), v + ) v.raw_operator = op2 v.type = alias @@ -899,9 +956,16 @@ class Topology: These are filled by the converters while a pipeline is being converted. """ - def __init__(self, model, default_batch_size=1, initial_types=None, - target_opset=None, custom_conversion_functions=None, - custom_shape_calculators=None, registered_models=None): + def __init__( + self, + model, + default_batch_size=1, + initial_types=None, + target_opset=None, + custom_conversion_functions=None, + custom_shape_calculators=None, + registered_models=None, + ): """ Initializes a *Topology* object, which is an intermediate representation of a computational graph. @@ -928,23 +992,28 @@ def __init__(self, model, default_batch_size=1, initial_types=None, self.default_batch_size = default_batch_size self.target_opset = target_opset self.custom_conversion_functions = ( - custom_conversion_functions if custom_conversion_functions else {}) + custom_conversion_functions if custom_conversion_functions else {} + ) self.custom_shape_calculators = ( - custom_shape_calculators if custom_shape_calculators else {}) + custom_shape_calculators if custom_shape_calculators else {} + ) for k in self.custom_conversion_functions: if not callable(k): - raise TypeError("Keys in custom_conversion_functions must be " - "types not strings.") + raise TypeError( + "Keys in custom_conversion_functions must be " "types not strings." + ) for k in self.custom_shape_calculators: if not callable(k): - raise TypeError("Keys in custom_shape_calculators must be " - "types not strings.") + raise TypeError( + "Keys in custom_shape_calculators must be " "types not strings." + ) # A map of local overwritten model aliases. self.model_aliases = {} - all_model_types = (set(self.custom_conversion_functions) - | set(self.custom_shape_calculators)) + all_model_types = set(self.custom_conversion_functions) | set( + self.custom_shape_calculators + ) for mtype in all_model_types: alias = "{}_{}".format(mtype.__name__, id(self)) self.model_aliases[mtype] = alias @@ -957,8 +1026,7 @@ def __init__(self, model, default_batch_size=1, initial_types=None, @property def scope(self): if len(self.scopes) != 1: - raise RuntimeError( - "Only one scope is allowed not %d." % len(self.scopes)) + raise RuntimeError("Only one scope is allowed not %d." % len(self.scopes)) return self.scopes[0] @staticmethod @@ -970,15 +1038,15 @@ def _generate_unique_name(seed, existing_names): produced :return: a string similar to the seed """ - if seed == '': - raise ValueError('Name seed must be a non-empty string.') + if seed == "": + raise ValueError("Name seed must be a non-empty string.") # Make the seed meet C-style naming convention # Only alphabets and numbers are allowed - seed = re.sub('[^\\w+]', '_', seed) + seed = re.sub("[^\\w+]", "_", seed) # The first symbol cannot be a number - if re.match('^[0-9]', seed): - seed = '_' + seed + if re.match("^[0-9]", seed): + seed = "_" + seed # If seed has never been seen, we return it as it is. Otherwise, # we will append an number to make it unique. @@ -996,20 +1064,21 @@ def _generate_unique_name(seed, existing_names): def get_unique_scope_name(self, seed): return Topology._generate_unique_name(seed, self.scope_names) - def declare_scope(self, seed, parent_scopes=None, options=None, - naming=None): + def declare_scope(self, seed, parent_scopes=None, options=None, naming=None): """ Creates a new :class:`Scope ` and appends it to the list of existing scopes. """ if len(self.scopes) != 0: - raise RuntimeError( - "Only one scope can be created.") + raise RuntimeError("Only one scope can be created.") scope = Scope( - self.get_unique_scope_name(seed), target_opset=self.target_opset, + self.get_unique_scope_name(seed), + target_opset=self.target_opset, custom_shape_calculators=self.custom_shape_calculators, - options=options, registered_models=self.registered_models, - naming=naming) + options=options, + registered_models=self.registered_models, + naming=naming, + ) # Declare input variables. # They should be the inputs of the scikit-learn @@ -1048,16 +1117,18 @@ def call_converter(self, operator, container, verbose=0): "Unable to find converter for alias '{}' type " "'{}'. You may raise an issue at " "https://github.com/onnx/sklearn-onnx/issues." - "".format(operator.type, - type(getattr(operator, 'raw_model', None)))) + "".format(operator.type, type(getattr(operator, "raw_model", None))) + ) container.validate_options(operator) if verbose > 0: print("[call_converter] call converter for %r." % operator.type) logger.debug( - "[Conv] call %r fed %r - %r", operator, + "[Conv] call %r fed %r - %r", + operator, "".join(str(i.is_fed) for i in operator.inputs), - "".join(str(i.is_fed) for i in operator.outputs)) + "".join(str(i.is_fed) for i in operator.outputs), + ) conv(self.scopes[0], operator, container) logger.debug("[Conv] end - %r", operator) @@ -1066,13 +1137,13 @@ def call_shape_calculator(self, operator): mtype = type(operator.raw_operator) if mtype in self.custom_shape_calculators: # overwritten operator. - source = 'custom' + source = "custom" shape_calc = self.custom_shape_calculators[mtype] elif operator.type in self.custom_shape_calculators: - source = 'custom' + source = "custom" shape_calc = self.custom_shape_calculators[operator.type] elif hasattr(operator.raw_operator, "onnx_shape_calculator"): - source = 'onnx_shape_calculator' + source = "onnx_shape_calculator" shape_calc = operator.raw_operator.onnx_shape_calculator() else: source = "" @@ -1080,13 +1151,15 @@ def call_shape_calculator(self, operator): if shape_calc is not None: logger.debug( - "[Shape1] %r fed %r - %r (source=%r)", operator, + "[Shape1] %r fed %r - %r (source=%r)", + operator, ",".join(str(i.is_fed) for i in operator.inputs), ",".join(str(i.is_fed) for i in operator.outputs), - source) + source, + ) shape_calc(operator) else: - logger.debug('[Shape2] call infer_types for %r', operator) + logger.debug("[Shape2] call infer_types for %r", operator) operator.infer_types() def _initialize_graph_status_for_traversing(self): @@ -1095,8 +1168,7 @@ def _initialize_graph_status_for_traversing(self): traversing the graph. Only used by convert_operators. """ if len(self.scopes) != 1: - raise RuntimeError( - "Only one scope is allowed not %d." % len(self.scopes)) + raise RuntimeError("Only one scope is allowed not %d." % len(self.scopes)) input_names = set(v.onnx_name for v in self.scopes[0].input_variables) if len(input_names) == 0: raise RuntimeError("No detected inputs.") @@ -1107,8 +1179,7 @@ def _initialize_graph_status_for_traversing(self): for operator in self.unordered_operator_iterator(): operator.init_status(is_evaluated=False) - def _propagate_status(self, operator, container, fed_variables, - verbose=0): + def _propagate_status(self, operator, container, fed_variables, verbose=0): """ Propagates status *is_fed* based on output variable and node added in the container. @@ -1123,8 +1194,10 @@ def _propagate_status(self, operator, container, fed_variables, vars[i].append(node) if verbose > 1: - print("[_propagate_status] newly fed=%r" % list( - v.onnx_name for v in operator.outputs if v.is_fed)) + print( + "[_propagate_status] newly fed=%r" + % list(v.onnx_name for v in operator.outputs if v.is_fed) + ) stack = list(fed_variables) scope = self.scopes[0] while len(stack) > 0: @@ -1155,23 +1228,28 @@ def convert_operators(self, container=None, verbose=0): operators. It also processes new operators created by converters. """ + def _check_operator_(operator): if not isinstance(operator.inputs, Operator.OperatorList): raise TypeError( "operator.inputs must be a Operator.OperatorList " - "not %r." % type(operator.inputs)) + "not %r." % type(operator.inputs) + ) if not isinstance(operator.outputs, Operator.OperatorList): raise TypeError( "operator.outputs must be a Operator.OperatorList " - "not %r." % type(operator.outputs)) + "not %r." % type(operator.outputs) + ) if any(not isinstance(i, Variable) for i in operator.inputs): raise TypeError( "One input is not a Variable for operator %r - %r." - "" % (type(operator.raw_operator), operator)) + "" % (type(operator.raw_operator), operator) + ) if any(not isinstance(i, Variable) for i in operator.outputs): raise TypeError( "One output is not a Variable for operator %r - %r." - "" % (type(operator.raw_operator), operator)) + "" % (type(operator.raw_operator), operator) + ) def _check_variable_in_(variable, operator): idop = id(operator) @@ -1179,33 +1257,43 @@ def _check_variable_in_(variable, operator): if idop not in ids: raise RuntimeError( "Operator %r not registered in the list of operators " - "of %r taking it as an input [\n%s]." % ( - operator, variable, - "\n".join(map(str, variable.operators_inputs_)))) + "of %r taking it as an input [\n%s]." + % ( + operator, + variable, + "\n".join(map(str, variable.operators_inputs_)), + ) + ) def _check_variable_out_(variable, operator): if variable.is_fed: add = ["", "--DEBUG-INFO--"] for scope in self.scopes: - add.append('---') - add.append(pprint.pformat( - scope.variable_name_mapping)) - add.append('---') + add.append("---") + add.append(pprint.pformat(scope.variable_name_mapping)) + add.append("---") for var in scope.variables.values(): - add.append(" is_fed=%s %s - n_in=%d n_out=%d" % ( - getattr(var, 'is_fed', '?'), var, - len(var.operators_inputs_), - len(var.operators_outputs_))) - add.append('---') + add.append( + " is_fed=%s %s - n_in=%d n_out=%d" + % ( + getattr(var, "is_fed", "?"), + var, + len(var.operators_inputs_), + len(var.operators_outputs_), + ) + ) + add.append("---") for op in scope.operators.values(): - add.append(" is_evaluated=%s %s" % ( - getattr(op, 'is_evaluated', '?'), op)) - add.append('---') + add.append( + " is_evaluated=%s %s" + % (getattr(op, "is_evaluated", "?"), op) + ) + add.append("---") for v in operator.inputs: add.append(" inputs={}".format(v)) for v in operator.outputs: add.append(" outputs={}".format(v)) - add.append('--- operator producing this variable--') + add.append("--- operator producing this variable--") for op in variable.operators_outputs_: add.append(str(op)) raise RuntimeError( @@ -1218,11 +1306,15 @@ def _check_variable_out_(variable, operator): "of them is producing this output. " "In that case, an identity node must be " "added.{}".format( - variable, operator.type, - operator.onnx_name, operator.is_evaluated, + variable, + operator.type, + operator.onnx_name, + operator.is_evaluated, [v.is_fed for v in operator.inputs], [v.is_fed for v in operator.outputs], - "\n".join(add))) + "\n".join(add), + ) + ) if verbose > 0: print("[convert_operators] begin") @@ -1235,17 +1327,19 @@ def _check_variable_out_(variable, operator): changes = 0 ops = list(self.unordered_operator_iterator()) if verbose > 0: - print("[convert_operators] iteration %d - n_vars=%d " - "n_ops=%d" % ( - n_iter, len(fed_variables), len(ops))) + print( + "[convert_operators] iteration %d - n_vars=%d " + "n_ops=%d" % (n_iter, len(fed_variables), len(ops)) + ) for operator in ops: _check_operator_(operator) for var in operator.inputs: if var.is_fed: fed_variables[var.onnx_name] = var - if (all(variable.is_fed for variable in operator.inputs) and - not operator.is_evaluated): - + if ( + all(variable.is_fed for variable in operator.inputs) + and not operator.is_evaluated + ): for variable in operator.inputs: _check_variable_in_(variable, operator) for variable in operator.outputs: @@ -1258,25 +1352,28 @@ def _check_variable_out_(variable, operator): # output variables are not necessarily known at this stage. operator.init_status(is_evaluated=True) for variable in operator.outputs: - if all(op.is_evaluated - for op in variable.operators_outputs_): + if all(op.is_evaluated for op in variable.operators_outputs_): variable.init_status(is_fed=True) fed_variables[variable.onnx_name] = variable fed_variables.update( - {i.name: i for i in container.initializers - if i.name not in fed_variables}) - self._propagate_status(operator, container, fed_variables, - verbose=verbose) + { + i.name: i + for i in container.initializers + if i.name not in fed_variables + } + ) + self._propagate_status( + operator, container, fed_variables, verbose=verbose + ) # unfed some variables (it happens when a node # shares an output with another node) rem = [] for n, var in fed_variables.items(): - if not hasattr(var, 'operators_outputs_'): + if not hasattr(var, "operators_outputs_"): # initializer continue - if any(not o.is_evaluated - for o in var.operators_outputs_): + if any(not o.is_evaluated for o in var.operators_outputs_): rem.append(n) for r in rem: v = fed_variables[r] @@ -1285,8 +1382,10 @@ def _check_variable_out_(variable, operator): changes += 1 if verbose > 0: - print("[convert_operators] end iter: %d - n_vars=%d" % ( - n_iter, len(fed_variables))) + print( + "[convert_operators] end iter: %d - n_vars=%d" + % (n_iter, len(fed_variables)) + ) if verbose > 0: print("[convert_operators] end.") @@ -1300,31 +1399,36 @@ def _check_variable_out_(variable, operator): for var in self.unordered_variable_iterator(): rows.append( "is_fed=%r is_leaf=%r is_root=%r - %r - n_in=%d n_out=%d" - "" % (var.is_fed, var.is_leaf, var.is_root, var, - len(var.operators_inputs_), - len(var.operators_outputs_))) + "" + % ( + var.is_fed, + var.is_leaf, + var.is_root, + var, + len(var.operators_inputs_), + len(var.operators_outputs_), + ) + ) rows.append("---OPERATORS---") for op in self.unordered_operator_iterator(): rows.append("is_eval=%r - %r" % (op.is_evaluated, op)) rows.append("---NODES---") for node in container.nodes: - rows.append("%s: %r -> %r" % ( - node.op_type, node.input, node.output)) + rows.append("%s: %r -> %r" % (node.op_type, node.input, node.output)) raise RuntimeError( "Not all operators have been evaluated. A variable name " "is probably misspelled.\n%s" - "" % "\n".join(rows)) + "" % "\n".join(rows) + ) # Input and output if len(self.scopes[0].input_variables) > 0: inputs = self.scopes[0].input_variables else: - inputs = [v for v in self.unordered_variable_iterator() - if v.is_root] + inputs = [v for v in self.unordered_variable_iterator() if v.is_root] for i in inputs: container.add_input(i) - outputs = [v for v in self.unordered_variable_iterator() - if v.is_leaf] + outputs = [v for v in self.unordered_variable_iterator() if v.is_leaf] # The function checks that for output variable, # raw_name equal onnx_name. It swaps names if it is not the case. @@ -1339,8 +1443,9 @@ def _check_variable_out_(variable, operator): continue swaped.add(var.raw_name) if verbose > 1: - print("[convert_operators] %r <-> %r." % ( - var.raw_name, var.onnx_name)) + print( + "[convert_operators] %r <-> %r." % (var.raw_name, var.onnx_name) + ) old_name = var.onnx_name new_name = var.raw_name @@ -1348,8 +1453,8 @@ def _check_variable_out_(variable, operator): container.swap_names(old_name, new_name) except NotImplementedError as e: logger.debug( - '[Topo] unable to swap %r and %r (%r).', - old_name, new_name, e) + "[Topo] unable to swap %r and %r (%r).", old_name, new_name, e + ) continue for v in self.unordered_variable_iterator(): @@ -1362,10 +1467,16 @@ def _check_variable_out_(variable, operator): container.add_output(o) -def convert_topology(topology, model_name, doc_string, target_opset, - channel_first_inputs=None, - options=None, remove_identity=True, - verbose=0): +def convert_topology( + topology, + model_name, + doc_string, + target_opset, + channel_first_inputs=None, + options=None, + remove_identity=True, + verbose=0, +): """ This function is used to convert our Topology object defined in _parser.py into a ONNX model (type: ModelProto). @@ -1388,8 +1499,7 @@ def convert_topology(topology, model_name, doc_string, target_opset, if target_opset is None: target_opset = get_latest_tested_opset_version() if isinstance(target_opset, dict): - onnx_target_opset = target_opset.get( - '', get_latest_tested_opset_version()) + onnx_target_opset = target_opset.get("", get_latest_tested_opset_version()) else: onnx_target_opset = target_opset if onnx_target_opset > get_opset_number_from_onnx(): @@ -1399,21 +1509,23 @@ def convert_topology(topology, model_name, doc_string, target_opset, "version of the installed onnx package. See " "https://github.com/onnx/onnx/blob/master/docs/" "Versioning.md#released-versions" - ".".format(onnx_target_opset, found)) + ".".format(onnx_target_opset, found) + ) if onnx_target_opset > get_latest_tested_opset_version(): warnings.warn( "Parameter target_opset {} > {} is higher than the " "the latest tested version" - ".".format( - onnx_target_opset, - get_latest_tested_opset_version())) + ".".format(onnx_target_opset, get_latest_tested_opset_version()) + ) container = ModelComponentContainer( - target_opset, options=options, + target_opset, + options=options, registered_models=topology.registered_models, white_op=topology.raw_model._white_op, black_op=topology.raw_model._black_op, - verbose=verbose) + verbose=verbose, + ) # Traverse the graph from roots to leaves # This loop could eventually be parallelized. @@ -1427,8 +1539,10 @@ def convert_topology(topology, model_name, doc_string, target_opset, if verbose >= 2: print("---NODES---") for node in container.nodes: - print(" %s - %s: %r -> %r" % ( - node.op_type, node.name, node.input, node.output)) + print( + " %s - %s: %r -> %r" + % (node.op_type, node.name, node.input, node.output) + ) # Create a graph from its main components if container.target_opset_onnx < 9: @@ -1445,28 +1559,36 @@ def convert_topology(topology, model_name, doc_string, target_opset, # one of the original model's input, so it has been added into # the container's input list. If this is the case, we need to # skip one iteration to avoid duplicated inputs. - if tensor.name in [value_info.name for value_info in - container.inputs]: + if tensor.name in [value_info.name for value_info in container.inputs]: continue # Initializers are always tensors so we can just call # make_tensor_value_info(...). value_info = make_tensor_value_info( - tensor.name, tensor.data_type, tensor.dims) + tensor.name, tensor.data_type, tensor.dims + ) extra_inputs.append(value_info) # Before ONNX opset 9, initializers were needed to be passed in # with inputs. - graph = make_graph(container.nodes, model_name, - container.inputs + extra_inputs, - container.outputs, container.initializers) + graph = make_graph( + container.nodes, + model_name, + container.inputs + extra_inputs, + container.outputs, + container.initializers, + ) else: # In ONNX opset 9 and above, initializers are included as # operator inputs and therefore do not need to be passed as # extra_inputs. graph = make_graph( - container.nodes, model_name, container.inputs, - container.outputs, container.initializers) + container.nodes, + model_name, + container.inputs, + container.outputs, + container.initializers, + ) # Add extra information related to the graph graph.value_info.extend(container.value_info) @@ -1475,16 +1597,16 @@ def convert_topology(topology, model_name, doc_string, target_opset, onnx_model = make_model(graph) # Update domain version - opv = min(onnx_target_opset, - _get_main_opset_version(onnx_model) or onnx_target_opset) + opv = min( + onnx_target_opset, _get_main_opset_version(onnx_model) or onnx_target_opset + ) if not _update_domain_version(container, onnx_model, verbose=verbose): # Main opset was not added. Doing it here. op_set = onnx_model.opset_import.add() - op_set.domain = '' + op_set.domain = "" op_set.version = opv if verbose > 0: - print('[convert_topology] +opset: name=%r, version=%s' % ( - '', opv)) + print("[convert_topology] +opset: name=%r, version=%s" % ("", opv)) # Add extra information irv = OPSET_TO_IR_VERSION.get(opv, onnx_proto.IR_VERSION) @@ -1515,7 +1637,8 @@ def _update_domain_version(container, onnx_model, verbose=0): purified_operator_set[op_domain] = op_version else: purified_operator_set[op_domain] = max( - purified_operator_set[op_domain], op_version) + purified_operator_set[op_domain], op_version + ) # Fill operator sets i = 0 @@ -1530,28 +1653,30 @@ def _update_domain_version(container, onnx_model, verbose=0): # Just create one ONNX element in opset_import op_set = onnx_model.opset_import.add() if verbose > 0: - print('[_update_domain_version] +opset %d: name=%r, version=%s' % ( - i, op_domain, op_version)) + print( + "[_update_domain_version] +opset %d: name=%r, version=%s" + % (i, op_domain, op_version) + ) op_set.domain = op_domain - if op_set != '': + if op_set != "": max_supported = get_default_opset_for_domain(op_domain) if max_supported is not None and max_supported < op_version: raise RuntimeError( "The model is using version %d of domain %r not supported " "yet by this library. You need to specify " - "target_opset={%r: %r}." % ( - op_version, op_domain, op_domain, max_supported)) + "target_opset={%r: %r}." + % (op_version, op_domain, op_domain, max_supported) + ) op_set.version = op_version i += 1 if container.target_opset_any_domain(op_domain) < op_version: raise RuntimeError( - 'The specified opset %d is too low to convert ' - 'this model, which requires at least opset ' - '%d.' % ( - container.target_opset_any_domain(op_domain), - op_version)) - return '' in purified_operator_set + "The specified opset %d is too low to convert " + "this model, which requires at least opset " + "%d." % (container.target_opset_any_domain(op_domain), op_version) + ) + return "" in purified_operator_set def _get_main_opset_version(model): @@ -1560,7 +1685,7 @@ def _get_main_opset_version(model): """ mld = None for op in model.opset_import: - if op.domain == '': + if op.domain == "": return op.version if op.domain == "ai.onnx.ml": mld = op.version diff --git a/skl2onnx/common/data_types.py b/skl2onnx/common/data_types.py index 6a0d64cff..a09971a6f 100644 --- a/skl2onnx/common/data_types.py +++ b/skl2onnx/common/data_types.py @@ -2,18 +2,29 @@ import numpy as np from onnxconverter_common.data_types import ( # noqa - DataType, Int64Type, FloatType, # noqa - StringType, TensorType, # noqa - Int64TensorType, Int32TensorType, BooleanTensorType, # noqa - FloatTensorType, StringTensorType, DoubleTensorType, # noqa - DictionaryType, SequenceType) # noqa + DataType, + Int64Type, + FloatType, # noqa + StringType, + TensorType, # noqa + Int64TensorType, + Int32TensorType, + BooleanTensorType, # noqa + FloatTensorType, + StringTensorType, + DoubleTensorType, # noqa + DictionaryType, + SequenceType, +) # noqa + try: from onnxconverter_common.data_types import ( # noqa - Complex64TensorType, Complex128TensorType) + Complex64TensorType, + Complex128TensorType, + ) except ImportError: Complex64TensorType = None Complex128TensorType = None -from onnxconverter_common.data_types import find_type_conversion, onnx_built_with_ml # noqa from ..proto import TensorProto, onnx_proto @@ -22,7 +33,7 @@ except ImportError: class DoubleType(DataType): - def __init__(self, doc_string=''): + def __init__(self, doc_string=""): super(DoubleType, self).__init__([1, 1], doc_string) def to_onnx_type(self): @@ -41,7 +52,7 @@ def __repr__(self): except ImportError: class Float16TensorType(TensorType): - def __init__(self, shape=None, doc_string=''): + def __init__(self, shape=None, doc_string=""): super(Float16TensorType, self).__init__(shape, doc_string) def _get_element_onnx_type(self): @@ -53,7 +64,7 @@ def _get_element_onnx_type(self): except ImportError: class Int8TensorType(TensorType): - def __init__(self, shape=None, doc_string=''): + def __init__(self, shape=None, doc_string=""): super(Int8TensorType, self).__init__(shape, doc_string) def _get_element_onnx_type(self): @@ -65,7 +76,7 @@ def _get_element_onnx_type(self): except ImportError: class Int16TensorType(TensorType): - def __init__(self, shape=None, doc_string=''): + def __init__(self, shape=None, doc_string=""): super(Int16TensorType, self).__init__(shape, doc_string) def _get_element_onnx_type(self): @@ -77,7 +88,7 @@ def _get_element_onnx_type(self): except ImportError: class UInt16TensorType(TensorType): - def __init__(self, shape=None, doc_string=''): + def __init__(self, shape=None, doc_string=""): super(UInt16TensorType, self).__init__(shape, doc_string) def _get_element_onnx_type(self): @@ -89,7 +100,7 @@ def _get_element_onnx_type(self): except ImportError: class UInt32TensorType(TensorType): - def __init__(self, shape=None, doc_string=''): + def __init__(self, shape=None, doc_string=""): super(UInt32TensorType, self).__init__(shape, doc_string) def _get_element_onnx_type(self): @@ -101,7 +112,7 @@ def _get_element_onnx_type(self): except ImportError: class UInt64TensorType(TensorType): - def __init__(self, shape=None, doc_string=''): + def __init__(self, shape=None, doc_string=""): super(UInt64TensorType, self).__init__(shape, doc_string) def _get_element_onnx_type(self): @@ -113,7 +124,7 @@ def _get_element_onnx_type(self): except ImportError: class UInt8TensorType(TensorType): - def __init__(self, shape=None, doc_string=''): + def __init__(self, shape=None, doc_string=""): super(UInt8TensorType, self).__init__(shape, doc_string) def _get_element_onnx_type(self): @@ -125,7 +136,7 @@ def _get_element_onnx_type(self): except ImportError: class UInt8Type(DataType): - def __init__(self, doc_string=''): + def __init__(self, doc_string=""): super(UInt8Type, self).__init__([1, 1], doc_string) def to_onnx_type(self): @@ -144,7 +155,7 @@ def __repr__(self): except ImportError: class Int8Type(DataType): - def __init__(self, doc_string=''): + def __init__(self, doc_string=""): super(Int8Type, self).__init__([1, 1], doc_string) def to_onnx_type(self): @@ -162,8 +173,7 @@ def copy_type(vtype, empty=True): if isinstance(vtype, SequenceType): return vtype.__class__(copy_type(vtype.element_type)) if isinstance(vtype, DictionaryType): - return vtype.__class__(copy_type(vtype.key_type), - copy_type(vtype.value_type)) + return vtype.__class__(copy_type(vtype.key_type), copy_type(vtype.value_type)) return vtype.__class__() @@ -171,9 +181,7 @@ def _guess_type_proto(data_type, dims): # This could be moved to onnxconverter_common. for d in dims: if d == 0: - raise RuntimeError( - "Dimension should not be null: {}.".format( - list(dims))) + raise RuntimeError("Dimension should not be null: {}.".format(list(dims))) if data_type == onnx_proto.TensorProto.FLOAT: return FloatTensorType(dims) if data_type == onnx_proto.TensorProto.DOUBLE: @@ -198,7 +206,8 @@ def _guess_type_proto(data_type, dims): raise NotImplementedError( "Unsupported data_type '{}'. You may raise an issue " "at https://github.com/onnx/sklearn-onnx/issues." - "".format(data_type)) + "".format(data_type) + ) def _guess_type_proto_str(data_type, dims): @@ -227,7 +236,8 @@ def _guess_type_proto_str(data_type, dims): raise NotImplementedError( "Unsupported data_type '{}'. You may raise an issue " "at https://github.com/onnx/sklearn-onnx/issues." - "".format(data_type)) + "".format(data_type) + ) def _guess_type_proto_str_inv(data_type): @@ -247,7 +257,8 @@ def _guess_type_proto_str_inv(data_type): raise NotImplementedError( "Unsupported data_type '{}'. You may raise an issue " "at https://github.com/onnx/sklearn-onnx/issues." - "".format(data_type)) + "".format(data_type) + ) def _guess_numpy_type(data_type, dims): @@ -256,13 +267,15 @@ def _guess_numpy_type(data_type, dims): return FloatTensorType(dims) if data_type == np.float64: return DoubleTensorType(dims) - if data_type in (np.str_, str, object) or str(data_type) in ('")] + debug_info = [str(type(onnx_model)).split(".")[-1].strip("'>")] else: - debug_info = debug_info + \ - [str(type(onnx_model)).split('.')[-1].strip("'>")] + debug_info = debug_info + [str(type(onnx_model)).split(".")[-1].strip("'>")] - if hasattr(onnx_model, 'graph'): + if hasattr(onnx_model, "graph"): return _apply_optimisation_on_graph( - onnx_remove_node_identity, onnx_model, - recursive=recursive, debug_info=debug_info) + onnx_remove_node_identity, + onnx_model, + recursive=recursive, + debug_info=debug_info, + ) graph = onnx_model @@ -50,7 +55,7 @@ def retrieve_idnodes(graph, existing_nodes): for i, exnode in enumerate(existing_nodes): if exnode is None: continue - if exnode.op_type == 'Identity': + if exnode.op_type == "Identity": input = exnode.input[0] output = exnode.output[0] idnodes.append((i, exnode, input, output)) @@ -106,38 +111,40 @@ def retrieve_local_variables_nodes(nodes): if out in nodes[j].input: nodes[j] = _rename_node_input(nodes[j], out, inp) logger.debug( - '[VarId-a] rename node input %r into %r' % ( - out, inp)) + "[VarId-a] rename node input %r into %r" % (out, inp) + ) rem += 1 - if nodes[j].op_type == 'Identity': + if nodes[j].op_type == "Identity": restart = True - logger.debug('[NodeId-a] remove %r' % nodes[i]) + logger.debug("[NodeId-a] remove %r" % nodes[i]) nodes[i] = None rem += 1 continue - if (not restart and inp not in inputs and inp not in outputs and - out not in outputs): + if ( + not restart + and inp not in inputs + and inp not in outputs + and out not in outputs + ): # We cannot change an input name or an output name. for j in range(len(nodes)): if nodes[j] is None: continue if inp in nodes[j].output: nodes[j] = _rename_node_output(nodes[j], inp, out) - logger.debug( - '[Var] rename node output %r into %r' % ( - out, inp)) + logger.debug("[Var] rename node output %r into %r" % (out, inp)) rem += 1 - if nodes[j].op_type == 'Identity': + if nodes[j].op_type == "Identity": restart = True if inp in nodes[j].input: nodes[j] = _rename_node_input(nodes[j], inp, out) logger.debug( - '[VarId-b] rename node input %r into %r' % ( - out, inp)) + "[VarId-b] rename node input %r into %r" % (out, inp) + ) rem += 1 - if nodes[j].op_type == 'Identity': + if nodes[j].op_type == "Identity": restart = True - logger.debug('[NodeId-b] remove %r' % nodes[i]) + logger.debug("[NodeId-b] remove %r" % nodes[i]) nodes[i] = None rem += 1 @@ -149,13 +156,20 @@ def retrieve_local_variables_nodes(nodes): continue nodes[i] = _apply_remove_node_fct_node( onnx_remove_node_identity, - node, recursive=True, debug_info=debug_info + [node.name]) + node, + recursive=True, + debug_info=debug_info + [node.name], + ) # Finally create the new graph. nodes = list(filter(lambda n: n is not None, nodes)) - graph = make_graph(nodes, onnx_model.name, - onnx_model.input, onnx_model.output, - onnx_model.initializer) + graph = make_graph( + nodes, + onnx_model.name, + onnx_model.input, + onnx_model.output, + onnx_model.initializer, + ) graph.value_info.extend(onnx_model.value_info) return graph diff --git a/skl2onnx/common/shape_calculator.py b/skl2onnx/common/shape_calculator.py index f62167cd2..50d3b600e 100644 --- a/skl2onnx/common/shape_calculator.py +++ b/skl2onnx/common/shape_calculator.py @@ -32,20 +32,27 @@ def calculate_linear_classifier_output_shapes(operator): def _calculate_linear_classifier_output_shapes( - operator, decision_path=False, decision_leaf=False, - enable_type_checking=True): + operator, decision_path=False, decision_leaf=False, enable_type_checking=True +): n_out = 0 if decision_path: n_out += 1 if decision_leaf: n_out += 1 out_range = [2, 2 + n_out] - check_input_and_output_numbers(operator, input_count_range=1, - output_count_range=out_range) + check_input_and_output_numbers( + operator, input_count_range=1, output_count_range=out_range + ) if enable_type_checking: - check_input_and_output_types(operator, good_input_types=[ - BooleanTensorType, DoubleTensorType, - FloatTensorType, Int64TensorType]) + check_input_and_output_types( + operator, + good_input_types=[ + BooleanTensorType, + DoubleTensorType, + FloatTensorType, + Int64TensorType, + ], + ) N = operator.inputs[0].get_first_dimension() op = operator.raw_operator @@ -55,40 +62,57 @@ def _calculate_linear_classifier_output_shapes( if all(isinstance(i, np.ndarray) for i in class_labels): class_labels = np.concatenate(class_labels) if all(isinstance(i, str) for i in class_labels): - shape = ([N, len(op.classes_)] - if (getattr(op, 'multilabel_', False) or ( - isinstance(op.classes_, list) and - isinstance(op.classes_[0], np.ndarray))) else [N]) + shape = ( + [N, len(op.classes_)] + if ( + getattr(op, "multilabel_", False) + or ( + isinstance(op.classes_, list) + and isinstance(op.classes_[0], np.ndarray) + ) + ) + else [N] + ) operator.outputs[0].set_type(StringTensorType(shape=shape)) - if number_of_classes > 2 or operator.type != 'SklearnLinearSVC': - shape = ([len(op.classes_), N, max([len(x) for x in op.classes_])] - if isinstance(op.classes_, list) - and isinstance(op.classes_[0], np.ndarray) - else [N, number_of_classes]) + if number_of_classes > 2 or operator.type != "SklearnLinearSVC": + shape = ( + [len(op.classes_), N, max([len(x) for x in op.classes_])] + if isinstance(op.classes_, list) + and isinstance(op.classes_[0], np.ndarray) + else [N, number_of_classes] + ) operator.outputs[1].type.shape = shape else: # For binary LinearSVC, we produce probability of # the positive class operator.outputs[1].type.shape = [N, 1] - elif all(isinstance(i, (numbers.Real, bool, np.bool_)) - for i in class_labels): - shape = ([N, len(op.classes_)] - if (getattr(op, 'multilabel_', False) or ( - isinstance(op.classes_, list) and - isinstance(op.classes_[0], np.ndarray))) else [N]) + elif all(isinstance(i, (numbers.Real, bool, np.bool_)) for i in class_labels): + shape = ( + [N, len(op.classes_)] + if ( + getattr(op, "multilabel_", False) + or ( + isinstance(op.classes_, list) + and isinstance(op.classes_[0], np.ndarray) + ) + ) + else [N] + ) operator.outputs[0].set_type(Int64TensorType(shape=shape)) - if number_of_classes > 2 or operator.type != 'SklearnLinearSVC': - shape = ([len(op.classes_), N, max([len(x) for x in op.classes_])] - if isinstance(op.classes_, list) - and isinstance(op.classes_[0], np.ndarray) - else [N, number_of_classes]) + if number_of_classes > 2 or operator.type != "SklearnLinearSVC": + shape = ( + [len(op.classes_), N, max([len(x) for x in op.classes_])] + if isinstance(op.classes_, list) + and isinstance(op.classes_[0], np.ndarray) + else [N, number_of_classes] + ) operator.outputs[1].type.shape = shape else: # For binary LinearSVC, we produce probability of # the positive class operator.outputs[1].type.shape = [N, 1] else: - raise ValueError('Label types must be all integers or all strings.') + raise ValueError("Label types must be all integers or all strings.") # decision_path, decision_leaf for n in range(2, len(operator.outputs)): @@ -107,14 +131,18 @@ def calculate_linear_regressor_output_shapes(operator): _calculate_linear_regressor_output_shapes(operator) -def _calculate_linear_regressor_output_shapes( - operator, enable_type_checking=True): - check_input_and_output_numbers(operator, input_count_range=1, - output_count_range=1) +def _calculate_linear_regressor_output_shapes(operator, enable_type_checking=True): + check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) if enable_type_checking: - check_input_and_output_types(operator, good_input_types=[ - BooleanTensorType, DoubleTensorType, - FloatTensorType, Int64TensorType]) + check_input_and_output_types( + operator, + good_input_types=[ + BooleanTensorType, + DoubleTensorType, + FloatTensorType, + Int64TensorType, + ], + ) inp0 = operator.inputs[0].type if isinstance(inp0, (FloatTensorType, DoubleTensorType)): @@ -123,10 +151,13 @@ def _calculate_linear_regressor_output_shapes( cls_type = FloatTensorType N = operator.inputs[0].get_first_dimension() - if (hasattr(operator.raw_operator, 'coef_') and - len(operator.raw_operator.coef_.shape) > 1): - operator.outputs[0].set_type(cls_type([ - N, operator.raw_operator.coef_.shape[0]])) + if ( + hasattr(operator.raw_operator, "coef_") + and len(operator.raw_operator.coef_.shape) > 1 + ): + operator.outputs[0].set_type( + cls_type([N, operator.raw_operator.coef_.shape[0]]) + ) else: operator.outputs[0].set_type(cls_type([N, 1])) diff --git a/skl2onnx/common/tree_ensemble.py b/skl2onnx/common/tree_ensemble.py index 2f44fc5d0..5e8b3893a 100644 --- a/skl2onnx/common/tree_ensemble.py +++ b/skl2onnx/common/tree_ensemble.py @@ -8,40 +8,40 @@ def get_default_tree_classifier_attribute_pairs(): attrs = {} - attrs['post_transform'] = 'NONE' - attrs['nodes_treeids'] = [] - attrs['nodes_nodeids'] = [] - attrs['nodes_featureids'] = [] - attrs['nodes_modes'] = [] - attrs['nodes_values'] = [] - attrs['nodes_truenodeids'] = [] - attrs['nodes_falsenodeids'] = [] - attrs['nodes_missing_value_tracks_true'] = [] - attrs['nodes_hitrates'] = [] - attrs['class_treeids'] = [] - attrs['class_nodeids'] = [] - attrs['class_ids'] = [] - attrs['class_weights'] = [] + attrs["post_transform"] = "NONE" + attrs["nodes_treeids"] = [] + attrs["nodes_nodeids"] = [] + attrs["nodes_featureids"] = [] + attrs["nodes_modes"] = [] + attrs["nodes_values"] = [] + attrs["nodes_truenodeids"] = [] + attrs["nodes_falsenodeids"] = [] + attrs["nodes_missing_value_tracks_true"] = [] + attrs["nodes_hitrates"] = [] + attrs["class_treeids"] = [] + attrs["class_nodeids"] = [] + attrs["class_ids"] = [] + attrs["class_weights"] = [] return attrs def get_default_tree_regressor_attribute_pairs(): attrs = {} - attrs['post_transform'] = 'NONE' - attrs['n_targets'] = 0 - attrs['nodes_treeids'] = [] - attrs['nodes_nodeids'] = [] - attrs['nodes_featureids'] = [] - attrs['nodes_modes'] = [] - attrs['nodes_values'] = [] - attrs['nodes_truenodeids'] = [] - attrs['nodes_falsenodeids'] = [] - attrs['nodes_missing_value_tracks_true'] = [] - attrs['nodes_hitrates'] = [] - attrs['target_treeids'] = [] - attrs['target_nodeids'] = [] - attrs['target_ids'] = [] - attrs['target_weights'] = [] + attrs["post_transform"] = "NONE" + attrs["n_targets"] = 0 + attrs["nodes_treeids"] = [] + attrs["nodes_nodeids"] = [] + attrs["nodes_featureids"] = [] + attrs["nodes_modes"] = [] + attrs["nodes_values"] = [] + attrs["nodes_truenodeids"] = [] + attrs["nodes_falsenodeids"] = [] + attrs["nodes_missing_value_tracks_true"] = [] + attrs["nodes_hitrates"] = [] + attrs["target_treeids"] = [] + attrs["target_nodeids"] = [] + attrs["target_ids"] = [] + attrs["target_weights"] = [] return attrs @@ -103,39 +103,54 @@ def sklearn_threshold(dy, dtype, mode): return bfy2 return np.float64(fy) raise TypeError("Unexpected dtype {}.".format(dtype)) - raise RuntimeError("Threshold is not changed for other mode and " - "'BRANCH_LEQ' (actually '{}').".format(mode)) - - -def add_node(attr_pairs, is_classifier, tree_id, tree_weight, node_id, - feature_id, mode, value, true_child_id, false_child_id, - weights, weight_id_bias, leaf_weights_are_counts, - adjust_threshold_for_sklearn, dtype, - nodes_missing_value_tracks_true=False): - attr_pairs['nodes_treeids'].append(tree_id) - attr_pairs['nodes_nodeids'].append(node_id) - attr_pairs['nodes_featureids'].append(feature_id) - attr_pairs['nodes_modes'].append(mode) - if adjust_threshold_for_sklearn and mode != 'LEAF': - attr_pairs['nodes_values'].append( - sklearn_threshold(value, dtype, mode)) + raise RuntimeError( + "Threshold is not changed for other mode and " + "'BRANCH_LEQ' (actually '{}').".format(mode) + ) + + +def add_node( + attr_pairs, + is_classifier, + tree_id, + tree_weight, + node_id, + feature_id, + mode, + value, + true_child_id, + false_child_id, + weights, + weight_id_bias, + leaf_weights_are_counts, + adjust_threshold_for_sklearn, + dtype, + nodes_missing_value_tracks_true=False, +): + attr_pairs["nodes_treeids"].append(tree_id) + attr_pairs["nodes_nodeids"].append(node_id) + attr_pairs["nodes_featureids"].append(feature_id) + attr_pairs["nodes_modes"].append(mode) + if adjust_threshold_for_sklearn and mode != "LEAF": + attr_pairs["nodes_values"].append(sklearn_threshold(value, dtype, mode)) else: - attr_pairs['nodes_values'].append(value) - attr_pairs['nodes_truenodeids'].append(true_child_id) - attr_pairs['nodes_falsenodeids'].append(false_child_id) - attr_pairs['nodes_missing_value_tracks_true'].append( - nodes_missing_value_tracks_true) - attr_pairs['nodes_hitrates'].append(1.) + attr_pairs["nodes_values"].append(value) + attr_pairs["nodes_truenodeids"].append(true_child_id) + attr_pairs["nodes_falsenodeids"].append(false_child_id) + attr_pairs["nodes_missing_value_tracks_true"].append( + nodes_missing_value_tracks_true + ) + attr_pairs["nodes_hitrates"].append(1.0) # Add leaf information for making prediction - if mode == 'LEAF': + if mode == "LEAF": flattened_weights = weights.flatten() factor = tree_weight # If the values stored at leaves are counts of possible classes, we # need convert them to probabilities by doing a normalization. if leaf_weights_are_counts: s = sum(flattened_weights) - factor /= float(s) if s != 0. else 1. + factor /= float(s) if s != 0.0 else 1.0 flattened_weights = [w * factor for w in flattened_weights] if len(flattened_weights) == 2 and is_classifier: flattened_weights = [flattened_weights[1]] @@ -144,77 +159,113 @@ def add_node(attr_pairs, is_classifier, tree_id, tree_weight, node_id, # classifiers and regressors if is_classifier: for i, w in enumerate(flattened_weights): - attr_pairs['class_treeids'].append(tree_id) - attr_pairs['class_nodeids'].append(node_id) - attr_pairs['class_ids'].append(i + weight_id_bias) - attr_pairs['class_weights'].append(w) + attr_pairs["class_treeids"].append(tree_id) + attr_pairs["class_nodeids"].append(node_id) + attr_pairs["class_ids"].append(i + weight_id_bias) + attr_pairs["class_weights"].append(w) else: for i, w in enumerate(flattened_weights): - attr_pairs['target_treeids'].append(tree_id) - attr_pairs['target_nodeids'].append(node_id) - attr_pairs['target_ids'].append(i + weight_id_bias) - attr_pairs['target_weights'].append(w) + attr_pairs["target_treeids"].append(tree_id) + attr_pairs["target_nodeids"].append(node_id) + attr_pairs["target_ids"].append(i + weight_id_bias) + attr_pairs["target_weights"].append(w) -def add_tree_to_attribute_pairs(attr_pairs, is_classifier, tree, tree_id, - tree_weight, weight_id_bias, - leaf_weights_are_counts, - adjust_threshold_for_sklearn=False, - dtype=None): +def add_tree_to_attribute_pairs( + attr_pairs, + is_classifier, + tree, + tree_id, + tree_weight, + weight_id_bias, + leaf_weights_are_counts, + adjust_threshold_for_sklearn=False, + dtype=None, +): for i in range(tree.node_count): node_id = i weight = tree.value[i] if tree.children_left[i] > i or tree.children_right[i] > i: - mode = 'BRANCH_LEQ' + mode = "BRANCH_LEQ" feat_id = tree.feature[i] threshold = tree.threshold[i] left_child_id = int(tree.children_left[i]) right_child_id = int(tree.children_right[i]) else: - mode = 'LEAF' + mode = "LEAF" feat_id = 0 - threshold = 0. + threshold = 0.0 left_child_id = 0 right_child_id = 0 - add_node(attr_pairs, is_classifier, tree_id, tree_weight, node_id, - feat_id, mode, threshold, left_child_id, right_child_id, - weight, weight_id_bias, leaf_weights_are_counts, - adjust_threshold_for_sklearn=adjust_threshold_for_sklearn, - dtype=dtype) + add_node( + attr_pairs, + is_classifier, + tree_id, + tree_weight, + node_id, + feat_id, + mode, + threshold, + left_child_id, + right_child_id, + weight, + weight_id_bias, + leaf_weights_are_counts, + adjust_threshold_for_sklearn=adjust_threshold_for_sklearn, + dtype=dtype, + ) def add_tree_to_attribute_pairs_hist_gradient_boosting( - attr_pairs, is_classifier, tree, tree_id, - tree_weight, weight_id_bias, - leaf_weights_are_counts, - adjust_threshold_for_sklearn=False, - dtype=None): + attr_pairs, + is_classifier, + tree, + tree_id, + tree_weight, + weight_id_bias, + leaf_weights_are_counts, + adjust_threshold_for_sklearn=False, + dtype=None, +): for i, node in enumerate(tree.nodes): node_id = i - weight = node['value'] + weight = node["value"] - if node['is_leaf']: - mode = 'LEAF' + if node["is_leaf"]: + mode = "LEAF" feat_id = 0 - threshold = 0. + threshold = 0.0 left_child_id = 0 right_child_id = 0 missing = False else: - mode = 'BRANCH_LEQ' - feat_id = node['feature_idx'] + mode = "BRANCH_LEQ" + feat_id = node["feature_idx"] try: - threshold = node['threshold'] + threshold = node["threshold"] except ValueError: - threshold = node['num_threshold'] - left_child_id = node['left'] - right_child_id = node['right'] - missing = node['missing_go_to_left'] - - add_node(attr_pairs, is_classifier, tree_id, tree_weight, node_id, - feat_id, mode, threshold, left_child_id, right_child_id, - weight, weight_id_bias, leaf_weights_are_counts, - adjust_threshold_for_sklearn=adjust_threshold_for_sklearn, - dtype=dtype, nodes_missing_value_tracks_true=missing) + threshold = node["num_threshold"] + left_child_id = node["left"] + right_child_id = node["right"] + missing = node["missing_go_to_left"] + + add_node( + attr_pairs, + is_classifier, + tree_id, + tree_weight, + node_id, + feat_id, + mode, + threshold, + left_child_id, + right_child_id, + weight, + weight_id_bias, + leaf_weights_are_counts, + adjust_threshold_for_sklearn=adjust_threshold_for_sklearn, + dtype=dtype, + nodes_missing_value_tracks_true=missing, + ) diff --git a/skl2onnx/common/utils.py b/skl2onnx/common/utils.py index a391fc356..b2a98c11c 100644 --- a/skl2onnx/common/utils.py +++ b/skl2onnx/common/utils.py @@ -14,14 +14,14 @@ from onnxconverter_common.utils import check_input_and_output_types # noqa from .data_types import TensorType -_unique_index = {'subgraph': 0} +_unique_index = {"subgraph": 0} def get_unique_subgraph(): "Returns a unique identifier integer for subgraph." global _unique_index - _unique_index['subgraph'] += 1 - return _unique_index['subgraph'] + _unique_index["subgraph"] += 1 + return _unique_index["subgraph"] def get_producer(): @@ -29,6 +29,7 @@ def get_producer(): Internal helper function to return the producer """ from .. import __producer__ + return __producer__ @@ -37,6 +38,7 @@ def get_producer_version(): Internal helper function to return the producer version """ from .. import __producer_version__ + return __producer_version__ @@ -45,6 +47,7 @@ def get_domain(): Internal helper function to return the model domain """ from .. import __domain__ + return __domain__ @@ -53,6 +56,7 @@ def get_model_version(): Internal helper function to return the model version """ from .. import __model_version__ + return __model_version__ @@ -82,12 +86,13 @@ def get_column_index(i, inputs): return 0, 0 vi = 0 pos = 0 - end = (inputs[0].type.shape[1] - if isinstance(inputs[0].type, TensorType) else 1) + end = inputs[0].type.shape[1] if isinstance(inputs[0].type, TensorType) else 1 if end is None: - raise RuntimeError("Cannot extract a specific column {0} when " - "one input ('{1}') has unknown " - "dimension.".format(i, inputs[0])) + raise RuntimeError( + "Cannot extract a specific column {0} when " + "one input ('{1}') has unknown " + "dimension.".format(i, inputs[0]) + ) while True: if pos <= i < end: return (vi, i - pos) @@ -96,13 +101,20 @@ def get_column_index(i, inputs): if vi >= len(inputs): raise RuntimeError( "Input {} (i={}, end={}) is not available in\n{}".format( - vi, i, end, pprint.pformat(inputs))) - rel_end = (inputs[vi].type.shape[1] - if isinstance(inputs[vi].type, TensorType) else 1) + vi, i, end, pprint.pformat(inputs) + ) + ) + rel_end = ( + inputs[vi].type.shape[1] + if isinstance(inputs[vi].type, TensorType) + else 1 + ) if rel_end is None: - raise RuntimeError("Cannot extract a specific column {0} when " - "one input ('{1}') has unknown " - "dimension.".format(i, inputs[vi])) + raise RuntimeError( + "Cannot extract a specific column {0} when " + "one input ('{1}') has unknown " + "dimension.".format(i, inputs[vi]) + ) end += rel_end else: for ind, inp in enumerate(inputs): @@ -114,8 +126,8 @@ def get_column_index(i, inputs): "initial_types fits the column names specified in the " "pipeline to convert. This may happen because a " "ColumnTransformer follows a transformer without " - "any mapped converter in a pipeline." % ( - i, [n.raw_name for n in inputs])) + "any mapped converter in a pipeline." % (i, [n.raw_name for n in inputs]) + ) def get_column_indices(indices, inputs, multiple): @@ -152,7 +164,8 @@ def get_column_indices(indices, inputs, multiple): raise NotImplementedError( "sklearn-onnx is not able to merge multiple columns from " "multiple variables ({0}). You should think about merging " - "initial types.".format(cols)) + "initial types.".format(cols) + ) return onnx_var, onnx_is @@ -162,7 +175,7 @@ def hash_array(value, length=15): onx = from_array(value) except AttributeError as e: # sparse matrix for example - if hasattr(value, 'tocoo'): + if hasattr(value, "tocoo"): coo = value.tocoo() arrs = [coo.data, coo.row, coo.col, np.array(coo.shape)] m = hashlib.sha256() @@ -171,18 +184,18 @@ def hash_array(value, length=15): return m.hexdigest()[:length] raise ValueError( - "Unable to compute hash for type %r (value=%r)." % ( - type(value), value)) from e + "Unable to compute hash for type %r (value=%r)." % (type(value), value) + ) from e except RuntimeError as ee: # cannot be serialized if isinstance(value, (np.ndarray, list)): - b = str(value).encode('utf-8') + b = str(value).encode("utf-8") m = hashlib.sha256() m.update(b) return m.hexdigest()[:length] raise RuntimeError( - "Unable to convert value type %r, (value=%r)." % ( - type(value), value)) from ee + "Unable to convert value type %r, (value=%r)." % (type(value), value) + ) from ee m = hashlib.sha256() m.update(onx.SerializeToString()) diff --git a/skl2onnx/common/utils_checking.py b/skl2onnx/common/utils_checking.py index a4356a433..34086f438 100644 --- a/skl2onnx/common/utils_checking.py +++ b/skl2onnx/common/utils_checking.py @@ -11,10 +11,11 @@ def check_signature(fct, reference, skip=None): (same parameter names). Raises an exception otherwise. """ + def select_parameters(pars): new_pars = OrderedDict() for i, (name, p) in enumerate(pars.items()): - if i >= 3 and name in ('op_type', 'op_domain', 'op_version'): + if i >= 3 and name in ("op_type", "op_domain", "op_version"): if p.default is not None: # Parameters op_type and op_domain are skipped. continue @@ -28,11 +29,12 @@ def select_parameters(pars): if len(fct_pars) != len(ref_pars): raise TypeError( "Function '{}' must have {} parameters but has {}." - "".format(fct.__name__, len(ref_pars), - len(fct_pars))) + "".format(fct.__name__, len(ref_pars), len(fct_pars)) + ) for i, (a, b) in enumerate(zip(fct_pars, ref_pars)): if a != b and skip is not None and b not in skip and a not in skip: raise NameError( "Parameter name mismatch at position {}." "Function '{}' has '{}' but '{}' is expected." - "".format(i + 1, fct.__name__, a, b)) + "".format(i + 1, fct.__name__, a, b) + ) diff --git a/skl2onnx/common/utils_classifier.py b/skl2onnx/common/utils_classifier.py index dceb59753..29d10934b 100644 --- a/skl2onnx/common/utils_classifier.py +++ b/skl2onnx/common/utils_classifier.py @@ -12,30 +12,30 @@ def get_label_classes(scope, op, node_names=False): handles option ``nocl`` and ``zipmap=='columns'`` """ options = scope.get_options(op, dict(nocl=False)) - if options['nocl']: + if options["nocl"]: if len(op.classes_.shape) > 1 and op.classes_.shape[1] > 1: raise RuntimeError( "Options 'nocl=True' is not implemented for multi-label " - "classification (class: {}).".format(op.__class__.__name__)) + "classification (class: {}).".format(op.__class__.__name__) + ) classes = np.arange(0, len(op.classes_)) elif node_names: try: options = scope.get_options(op, dict(zipmap=False)) - zipcol = options['zipmap'] == 'columns' + zipcol = options["zipmap"] == "columns" except NameError: zipcol = False if zipcol: clnames = op.classes_.ravel() - if (np.issubdtype(clnames.dtype, np.integer) or - clnames.dtype == np.bool_): - classes = np.array(['i%d' % c for c in clnames]) + if np.issubdtype(clnames.dtype, np.integer) or clnames.dtype == np.bool_: + classes = np.array(["i%d" % c for c in clnames]) else: - classes = np.array(['s%s' % c for c in clnames]) + classes = np.array(["s%s" % c for c in clnames]) else: classes = op.classes_ - elif hasattr(op, 'classes_'): + elif hasattr(op, "classes_"): classes = op.classes_ - elif hasattr(op, 'intercept_'): + elif hasattr(op, "intercept_"): classes = len(op.intercept_) elif hasattr(op, "y_"): # _ConstantPredictor @@ -43,47 +43,70 @@ def get_label_classes(scope, op, node_names=False): else: raise RuntimeError( "No known ways to retrieve the number of classes for class %r." - "" % type(op)) + "" % type(op) + ) return classes -def _finalize_converter_classes(scope, argmax_output_name, output_full_name, - container, classes, proto_dtype): +def _finalize_converter_classes( + scope, argmax_output_name, output_full_name, container, classes, proto_dtype +): """ See :func:`convert_voting_classifier`. """ - if (np.issubdtype(classes.dtype, np.floating) or - classes.dtype == np.bool_): + if np.issubdtype(classes.dtype, np.floating) or classes.dtype == np.bool_: class_type = onnx_proto.TensorProto.INT32 classes = np.array(list(map(lambda x: int(x), classes))) elif np.issubdtype(classes.dtype, np.signedinteger): class_type = onnx_proto.TensorProto.INT32 else: - classes = np.array([s.encode('utf-8') for s in classes]) + classes = np.array([s.encode("utf-8") for s in classes]) class_type = onnx_proto.TensorProto.STRING - classes_name = scope.get_unique_variable_name('classes') + classes_name = scope.get_unique_variable_name("classes") container.add_initializer(classes_name, class_type, classes.shape, classes) array_feature_extractor_result_name = scope.get_unique_variable_name( - 'array_feature_extractor_result') + "array_feature_extractor_result" + ) container.add_node( - 'ArrayFeatureExtractor', [classes_name, argmax_output_name], - array_feature_extractor_result_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) + "ArrayFeatureExtractor", + [classes_name, argmax_output_name], + array_feature_extractor_result_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) output_shape = (-1,) if class_type == onnx_proto.TensorProto.INT32: - cast2_result_name = scope.get_unique_variable_name('cast2_result') - reshaped_result_name = scope.get_unique_variable_name( - 'reshaped_result') - apply_cast(scope, array_feature_extractor_result_name, - cast2_result_name, container, - to=proto_dtype) - apply_reshape(scope, cast2_result_name, reshaped_result_name, - container, desired_shape=output_shape) - apply_cast(scope, reshaped_result_name, output_full_name, container, - to=onnx_proto.TensorProto.INT64) + cast2_result_name = scope.get_unique_variable_name("cast2_result") + reshaped_result_name = scope.get_unique_variable_name("reshaped_result") + apply_cast( + scope, + array_feature_extractor_result_name, + cast2_result_name, + container, + to=proto_dtype, + ) + apply_reshape( + scope, + cast2_result_name, + reshaped_result_name, + container, + desired_shape=output_shape, + ) + apply_cast( + scope, + reshaped_result_name, + output_full_name, + container, + to=onnx_proto.TensorProto.INT64, + ) else: # string labels - apply_reshape(scope, array_feature_extractor_result_name, - output_full_name, container, desired_shape=output_shape) + apply_reshape( + scope, + array_feature_extractor_result_name, + output_full_name, + container, + desired_shape=output_shape, + ) diff --git a/skl2onnx/common/utils_sklearn.py b/skl2onnx/common/utils_sklearn.py index ab9022d30..4c86573b9 100644 --- a/skl2onnx/common/utils_sklearn.py +++ b/skl2onnx/common/utils_sklearn.py @@ -13,56 +13,63 @@ def enumerate_model_names(model, prefix="", short=True): to the model itself. """ if isinstance(model, (list, tuple)): - if all(map(lambda x: isinstance(x, tuple) and len(x) in (2, 3), - model)): + if all(map(lambda x: isinstance(x, tuple) and len(x) in (2, 3), model)): for i, named_mod in enumerate(model): name, mod = named_mod[:2] - p = (name if short and prefix == "" - else "{}__{}".format(prefix, name)) + p = name if short and prefix == "" else "{}__{}".format(prefix, name) for t in enumerate_model_names(mod, p, short=short): yield t else: for i, mod in enumerate(model): - p = (i if short and prefix == "" - else "{}__{}".format(prefix, i)) + p = i if short and prefix == "" else "{}__{}".format(prefix, i) for t in enumerate_model_names(mod, p, short=short): yield t elif isinstance(model, (dict, OrderedDict)): for name, mod in model.items(): - p = (name if short and prefix == "" - else "{}__{}".format(prefix, name)) + p = name if short and prefix == "" else "{}__{}".format(prefix, name) for t in enumerate_model_names(mod, p, short=short): yield t else: yield (prefix, model) - reserved_atts = {'transformers', 'steps', 'transformer_list', - 'named_estimators_', 'named_transformers_', - 'transformer_', 'estimator_'} + reserved_atts = { + "transformers", + "steps", + "transformer_list", + "named_estimators_", + "named_transformers_", + "transformer_", + "estimator_", + } for key in dir(model): - if (key in ('estimators_', 'estimator') and - hasattr(model, 'named_estimators_')): + if key in ("estimators_", "estimator") and hasattr( + model, "named_estimators_" + ): continue - if (key in ('transformers_', 'transformers') and - hasattr(model, 'named_transformers_')): + if key in ("transformers_", "transformers") and hasattr( + model, "named_transformers_" + ): continue - if (key in reserved_atts or - (key.endswith("_") and not key.endswith("__") and - not key.startswith('_'))): + if key in reserved_atts or ( + key.endswith("_") and not key.endswith("__") and not key.startswith("_") + ): try: with warnings.catch_warnings(): warnings.simplefilter("ignore", FutureWarning) obj = getattr(model, key) except AttributeError: continue - if (hasattr(obj, 'get_params') and - isinstance(obj, BaseEstimator)): - prefix = (key if short and prefix == "" - else "{}__{}".format(prefix, key)) + if hasattr(obj, "get_params") and isinstance(obj, BaseEstimator): + prefix = ( + key if short and prefix == "" else "{}__{}".format(prefix, key) + ) yield (prefix, obj) elif isinstance(obj, (list, tuple, dict, OrderedDict)): if not short or key not in reserved_atts: - prefix = (key if short and prefix == "" - else "{}__{}".format(prefix, key)) + prefix = ( + key + if short and prefix == "" + else "{}__{}".format(prefix, key) + ) for t in enumerate_model_names(obj, prefix, short=short): yield t @@ -71,8 +78,7 @@ def has_pipeline(model): """ Tells if a model contains a pipeline. """ - return any(map(lambda x: isinstance(x[1], Pipeline), - enumerate_model_names(model))) + return any(map(lambda x: isinstance(x[1], Pipeline), enumerate_model_names(model))) def _process_options(model, options): @@ -94,8 +100,8 @@ def _process_options(model, options): new_options[id(names[k])] = v continue try: - ri = k.rindex('__') - m2, k2 = k[:ri], k[ri + 2:] + ri = k.rindex("__") + m2, k2 = k[:ri], k[ri + 2 :] except ValueError: key = id(model) if key not in new_options: @@ -110,7 +116,9 @@ def _process_options(model, options): continue raise RuntimeError( "Unable to find model name '{}' or '{}' in \n{}".format( - k, m2, list(sorted(names)))) + k, m2, list(sorted(names)) + ) + ) return _process_pipeline_options(model, new_options) @@ -129,7 +137,7 @@ def _process_pipeline_options(model, options): last = v.steps[-1][1] key = id(last) for opt, val in opts.items(): - if opt not in {'zipmap', 'nocl', 'output_class_labels'}: + if opt not in {"zipmap", "nocl", "output_class_labels"}: continue if new_options is None: new_options = copy.deepcopy(options) diff --git a/skl2onnx/convert.py b/skl2onnx/convert.py index 62fae01ad..db8948589 100644 --- a/skl2onnx/convert.py +++ b/skl2onnx/convert.py @@ -12,14 +12,25 @@ from . import operator_converters # noqa -def convert_sklearn(model, name=None, initial_types=None, doc_string='', - target_opset=None, custom_conversion_functions=None, - custom_shape_calculators=None, - custom_parsers=None, options=None, - intermediate=False, - white_op=None, black_op=None, final_types=None, - dtype=None, naming=None, model_optim=True, - verbose=0): +def convert_sklearn( + model, + name=None, + initial_types=None, + doc_string="", + target_opset=None, + custom_conversion_functions=None, + custom_shape_calculators=None, + custom_parsers=None, + options=None, + intermediate=False, + white_op=None, + black_op=None, + final_types=None, + dtype=None, + naming=None, + model_optim=True, + verbose=0, +): """ This function produces an equivalent ONNX model of the given scikit-learn model. @@ -155,39 +166,54 @@ def convert_sklearn(model, name=None, initial_types=None, doc_string='', Parameter *naming* was added. """ if initial_types is None: - if hasattr(model, 'infer_initial_types'): + if hasattr(model, "infer_initial_types"): initial_types = model.infer_initial_types() else: - raise ValueError('Initial types are required. See usage of ' - 'convert(...) in skl2onnx.convert for details') + raise ValueError( + "Initial types are required. See usage of " + "convert(...) in skl2onnx.convert for details" + ) if name is None: name = str(uuid4().hex) if dtype is not None: warnings.warn( - "Parameter dtype is no longer supported. " - "It will be removed in 1.9.0.", - DeprecationWarning) + "Parameter dtype is no longer supported. " "It will be removed in 1.9.0.", + DeprecationWarning, + ) - target_opset = (target_opset - if target_opset else get_latest_tested_opset_version()) + target_opset = target_opset if target_opset else get_latest_tested_opset_version() # Parse scikit-learn model as our internal data structure # (i.e., Topology) if verbose >= 1: print("[convert_sklearn] parse_sklearn_model") topology = parse_sklearn_model( - model, initial_types, target_opset, custom_conversion_functions, - custom_shape_calculators, custom_parsers, options=options, - white_op=white_op, black_op=black_op, - final_types=final_types, naming=naming) + model, + initial_types, + target_opset, + custom_conversion_functions, + custom_shape_calculators, + custom_parsers, + options=options, + white_op=white_op, + black_op=black_op, + final_types=final_types, + naming=naming, + ) # Convert our Topology object into ONNX. The outcome is an ONNX model. options = _process_options(model, options) if verbose >= 1: print("[convert_sklearn] convert_topology") onnx_model = convert_topology( - topology, name, doc_string, target_opset, options=options, - remove_identity=model_optim and not intermediate, verbose=verbose) + topology, + name, + doc_string, + target_opset, + options=options, + remove_identity=model_optim and not intermediate, + verbose=verbose, + ) if verbose >= 1: print("[convert_sklearn] end") if verbose >= 2: @@ -200,20 +226,29 @@ def convert_sklearn(model, name=None, initial_types=None, doc_string='', print(" %r" % inp) print("---VARIABLES---") for k, v in sorted(scope.variables.items()): - print(" %r: is.fed=%r is_leaf=%r - %r" % ( - k, v.is_fed, v.is_leaf, v)) + print(" %r: is.fed=%r is_leaf=%r - %r" % (k, v.is_fed, v.is_leaf, v)) print("---OPERATORS---") for k, v in sorted(scope.operators.items()): - print(" %r: is.evaluated=%r - %r" % ( - k, v.is_evaluated, v)) + print(" %r: is.evaluated=%r - %r" % (k, v.is_evaluated, v)) return (onnx_model, topology) if intermediate else onnx_model -def to_onnx(model, X=None, name=None, initial_types=None, - target_opset=None, options=None, - white_op=None, black_op=None, final_types=None, - dtype=None, naming=None, model_optim=True, verbose=0): +def to_onnx( + model, + X=None, + name=None, + initial_types=None, + target_opset=None, + options=None, + white_op=None, + black_op=None, + final_types=None, + dtype=None, + naming=None, + model_optim=True, + verbose=0, +): """ Calls :func:`convert_sklearn` with simplified parameters. @@ -260,20 +295,28 @@ def to_onnx(model, X=None, name=None, initial_types=None, if isinstance(model, OnnxOperatorMixin): if options is not None: raise NotImplementedError( - "options not yet implemented for OnnxOperatorMixin.") + "options not yet implemented for OnnxOperatorMixin." + ) return model.to_onnx(X=X, name=name, target_opset=target_opset) if name is None: name = "ONNX(%s)" % model.__class__.__name__ initial_types = guess_initial_types(X, initial_types) if verbose >= 1: print("[to_onnx] initial_types=%r" % initial_types) - return convert_sklearn(model, initial_types=initial_types, - target_opset=target_opset, - name=name, options=options, - white_op=white_op, black_op=black_op, - final_types=final_types, dtype=dtype, - verbose=verbose, naming=naming, - model_optim=model_optim) + return convert_sklearn( + model, + initial_types=initial_types, + target_opset=target_opset, + name=name, + options=options, + white_op=white_op, + black_op=black_op, + final_types=final_types, + dtype=dtype, + verbose=verbose, + naming=naming, + model_optim=model_optim, + ) def wrap_as_onnx_mixin(model, target_opset=None): @@ -283,6 +326,7 @@ def wrap_as_onnx_mixin(model, target_opset=None): and *OnnxOperatorMixin* API. """ from .algebra.sklearn_ops import find_class + cl = find_class(model.__class__) if "automation" in str(cl): raise RuntimeError("Wrong class name '{}'.".format(cl)) diff --git a/skl2onnx/helpers/investigate.py b/skl2onnx/helpers/investigate.py index a146eb5d8..c3ecbe4db 100644 --- a/skl2onnx/helpers/investigate.py +++ b/skl2onnx/helpers/investigate.py @@ -5,6 +5,7 @@ from types import MethodType import numpy from numpy.testing import assert_almost_equal + try: from scipy.sparse import csr_matrix except ImportError: @@ -12,6 +13,7 @@ from sklearn.base import TransformerMixin, ClassifierMixin from sklearn.base import RegressorMixin, BaseEstimator from sklearn.pipeline import Pipeline, FeatureUnion + try: from sklearn.compose import ColumnTransformer, TransformedTargetRegressor except ImportError: @@ -27,15 +29,16 @@ def enumerate_pipeline_models(pipe, coor=None, vs=None): if coor is None: coor = (0,) yield coor, pipe, vs - if hasattr(pipe, 'transformer_and_mapper_list') and len( - pipe.transformer_and_mapper_list): + if hasattr(pipe, "transformer_and_mapper_list") and len( + pipe.transformer_and_mapper_list + ): # azureml DataTransformer raise NotImplementedError("Unable to handle this specific case.") - elif hasattr(pipe, 'mapper') and pipe.mapper: + elif hasattr(pipe, "mapper") and pipe.mapper: # azureml DataTransformer for couple in enumerate_pipeline_models(pipe.mapper, coor + (0,)): yield couple - elif hasattr(pipe, 'built_features'): + elif hasattr(pipe, "built_features"): # sklearn_pandas.dataframe_mapper.DataFrameMapper for i, (columns, transformers, _) in enumerate(pipe.built_features): if isinstance(columns, str): @@ -43,9 +46,9 @@ def enumerate_pipeline_models(pipe, coor=None, vs=None): if transformers is None: yield (coor + (i,)), None, columns else: - for couple in enumerate_pipeline_models(transformers, - coor + (i,), - columns): + for couple in enumerate_pipeline_models( + transformers, coor + (i,), columns + ): yield couple elif isinstance(pipe, Pipeline): for i, (_, model) in enumerate(pipe.steps): @@ -54,16 +57,17 @@ def enumerate_pipeline_models(pipe, coor=None, vs=None): elif ColumnTransformer is not None and isinstance(pipe, ColumnTransformer): for i, (_, fitted_transformer, column) in enumerate(pipe.transformers): for couple in enumerate_pipeline_models( - fitted_transformer, coor + (i,), column): + fitted_transformer, coor + (i,), column + ): yield couple elif isinstance(pipe, FeatureUnion): for i, (_, model) in enumerate(pipe.transformer_list): for couple in enumerate_pipeline_models(model, coor + (i,)): yield couple elif TransformedTargetRegressor is not None and isinstance( - pipe, TransformedTargetRegressor): - raise NotImplementedError( - "Not yet implemented for TransformedTargetRegressor.") + pipe, TransformedTargetRegressor + ): + raise NotImplementedError("Not yet implemented for TransformedTargetRegressor.") elif isinstance(pipe, (TransformerMixin, ClassifierMixin, RegressorMixin)): pass elif isinstance(pipe, BaseEstimator): @@ -71,7 +75,9 @@ def enumerate_pipeline_models(pipe, coor=None, vs=None): else: raise TypeError( "Parameter pipe is not a scikit-learn object: {}\n{}".format( - type(pipe), pipe)) + type(pipe), pipe + ) + ) class BaseEstimatorDebugInformation: @@ -88,20 +94,22 @@ def __init__(self, model): self.methods = {} if hasattr(model, "transform") and callable(model.transform): model._debug_transform = model.transform - self.methods["transform"] = \ - lambda model, X: model._debug_transform(X) + self.methods["transform"] = lambda model, X: model._debug_transform(X) if hasattr(model, "predict") and callable(model.predict): model._debug_predict = model.predict self.methods["predict"] = lambda model, X: model._debug_predict(X) if hasattr(model, "predict_proba") and callable(model.predict_proba): model._debug_predict_proba = model.predict_proba - self.methods["predict_proba"] = \ - lambda model, X: model._debug_predict_proba(X) + self.methods["predict_proba"] = lambda model, X: model._debug_predict_proba( + X + ) if hasattr(model, "decision_function") and callable( - model.decision_function): # noqa + model.decision_function + ): # noqa model._debug_decision_function = model.decision_function # noqa - self.methods["decision_function"] = \ - lambda model, X: model._debug_decision_function(X) + self.methods[ + "decision_function" + ] = lambda model, X: model._debug_decision_function(X) def __repr__(self): """ @@ -113,21 +121,21 @@ def to_str(self, nrows=5): """ Tries to produce a readable message. """ - rows = ['BaseEstimatorDebugInformation({})'.format( - self.model.__class__.__name__)] + rows = [ + "BaseEstimatorDebugInformation({})".format(self.model.__class__.__name__) + ] for k in sorted(self.inputs): if k in self.outputs: - rows.append(' ' + k + '(') + rows.append(" " + k + "(") self.display(self.inputs[k], nrows) - rows.append(textwrap.indent( - self.display(self.inputs[k], nrows), ' ')) - rows.append(' ) -> (') - rows.append(textwrap.indent( - self.display(self.outputs[k], nrows), ' ')) - rows.append(' )') + rows.append(textwrap.indent(self.display(self.inputs[k], nrows), " ")) + rows.append(" ) -> (") + rows.append( + textwrap.indent(self.display(self.outputs[k], nrows), " ") + ) + rows.append(" )") else: - raise KeyError( - "Unable to find output for method '{}'.".format(k)) + raise KeyError("Unable to find output for method '{}'.".format(k)) return "\n".join(rows) def display(self, data, nrows): @@ -135,11 +143,11 @@ def display(self, data, nrows): Displays the first """ text = str(data) - rows = text.split('\n') + rows = text.split("\n") if len(rows) > nrows: rows = rows[:nrows] - rows.append('...') - if hasattr(data, 'shape'): + rows.append("...") + if hasattr(data, "shape"): rows.insert(0, "shape={}".format(data.shape)) return "\n".join(rows) @@ -156,40 +164,42 @@ def _alter_model_for_debugging(skl_model, recursive=False): """ def transform(self, X, *args, **kwargs): - self._debug.inputs['transform'] = X - y = self._debug.methods['transform'](self, X, *args, **kwargs) - self._debug.outputs['transform'] = y + self._debug.inputs["transform"] = X + y = self._debug.methods["transform"](self, X, *args, **kwargs) + self._debug.outputs["transform"] = y return y def predict(self, X, *args, **kwargs): - self._debug.inputs['predict'] = X - y = self._debug.methods['predict'](self, X, *args, **kwargs) - self._debug.outputs['predict'] = y + self._debug.inputs["predict"] = X + y = self._debug.methods["predict"](self, X, *args, **kwargs) + self._debug.outputs["predict"] = y return y def predict_proba(self, X, *args, **kwargs): - self._debug.inputs['predict_proba'] = X - y = self._debug.methods['predict_proba'](self, X, *args, **kwargs) - self._debug.outputs['predict_proba'] = y + self._debug.inputs["predict_proba"] = X + y = self._debug.methods["predict_proba"](self, X, *args, **kwargs) + self._debug.outputs["predict_proba"] = y return y def decision_function(self, X, *args, **kwargs): - self._debug.inputs['decision_function'] = X - y = self._debug.methods['decision_function'](self, X, *args, **kwargs) - self._debug.outputs['decision_function'] = y + self._debug.inputs["decision_function"] = X + y = self._debug.methods["decision_function"](self, X, *args, **kwargs) + self._debug.outputs["decision_function"] = y return y new_methods = { - 'decision_function': decision_function, - 'transform': transform, - 'predict': predict, - 'predict_proba': predict_proba, + "decision_function": decision_function, + "transform": transform, + "predict": predict, + "predict_proba": predict_proba, } - if hasattr(skl_model, '_debug'): - raise RuntimeError("The same operator cannot be used twice in " - "the same pipeline or this method was called " - "a second time.") + if hasattr(skl_model, "_debug"): + raise RuntimeError( + "The same operator cannot be used twice in " + "the same pipeline or this method was called " + "a second time." + ) if recursive: for model_ in enumerate_pipeline_models(skl_model): @@ -199,16 +209,20 @@ def decision_function(self, X, *args, **kwargs): try: setattr(model, k, MethodType(new_methods[k], model)) except AttributeError: - warnings.warn("Unable to overwrite method '{}' for class " - "{}.".format(k, type(model))) + warnings.warn( + "Unable to overwrite method '{}' for class " + "{}.".format(k, type(model)) + ) else: skl_model._debug = BaseEstimatorDebugInformation(skl_model) for k in skl_model._debug.methods: try: setattr(skl_model, k, MethodType(new_methods[k], skl_model)) except AttributeError: - warnings.warn("Unable to overwrite method '{}' for class " - "{}.".format(k, type(skl_model))) + warnings.warn( + "Unable to overwrite method '{}' for class " + "{}.".format(k, type(skl_model)) + ) def collect_intermediate_steps(model, *args, **kwargs): @@ -225,17 +239,19 @@ def collect_intermediate_steps(model, *args, **kwargs): This function is used to check every intermediate model in a pipeline. """ - if 'intermediate' in kwargs: - if not kwargs['intermediate']: + if "intermediate" in kwargs: + if not kwargs["intermediate"]: raise ValueError("Parameter intermediate must be true.") - del kwargs['intermediate'] + del kwargs["intermediate"] from .. import convert_sklearn from ..helpers.onnx_helper import select_model_inputs_outputs from ..common import MissingShapeCalculator, MissingConverter + try: model_onnx, topology = convert_sklearn( - model, *args, intermediate=True, **kwargs) + model, *args, intermediate=True, **kwargs + ) except (MissingShapeCalculator, MissingConverter): # The model cannot be converted. raise @@ -247,14 +263,15 @@ def collect_intermediate_steps(model, *args, **kwargs): _alter_model_for_debugging(operator.raw_operator) inputs = [i.full_name for i in operator.inputs] outputs = [o.full_name for o in operator.outputs] - steps.append({ - 'model': operator.raw_operator, - 'model_onnx': model_onnx, - 'inputs': inputs, - 'outputs': outputs, - 'onnx_step': select_model_inputs_outputs( - model_onnx, outputs=outputs) - }) + steps.append( + { + "model": operator.raw_operator, + "model_onnx": model_onnx, + "inputs": inputs, + "outputs": outputs, + "onnx_step": select_model_inputs_outputs(model_onnx, outputs=outputs), + } + ) return steps @@ -314,16 +331,16 @@ def to_string(c): if isinstance(c1, list) and isinstance(c2, list): try: res = c1 == c2 - reason = 'list-equal' + reason = "list-equal" except ValueError: # lgtm [py/unreachable-statement] res = False - reason = 'list' + reason = "list" elif isinstance(c1, numpy.ndarray) and isinstance(c2, numpy.ndarray): try: assert_almost_equal(c1, c2, decimal=decimal) res = True except (AssertionError, TypeError): - reason = 'array' + reason = "array" cc1 = c1.ravel() cc2 = c2.ravel() try: @@ -331,7 +348,7 @@ def to_string(c): res = True except (AssertionError, TypeError) as e: res = False - reason = 'array-ravel' + str(e) + reason = "array-ravel" + str(e) else: raise TypeError("Types {} and {}".format(type(c1), type(c2))) if not res: diff --git a/skl2onnx/helpers/onnx_helper.py b/skl2onnx/helpers/onnx_helper.py index aae1572e0..93f13461f 100644 --- a/skl2onnx/helpers/onnx_helper.py +++ b/skl2onnx/helpers/onnx_helper.py @@ -7,8 +7,12 @@ from onnx import shape_inference, TensorProto, ValueInfoProto from onnx.numpy_helper import from_array, to_array from onnx.helper import ( - make_tensor, make_node, make_tensor_value_info, make_graph, - make_model) + make_tensor, + make_node, + make_tensor_value_info, + make_graph, + make_model, +) from ..proto import get_latest_tested_opset_version from onnx import onnx_pb as onnx_proto from ..common._topology import Variable @@ -24,7 +28,7 @@ def load_onnx_model(onnx_file_or_bytes): if isinstance(onnx_file_or_bytes, str): with open(onnx_file_or_bytes, "rb") as f: return onnx.load(f) - elif hasattr(onnx_file_or_bytes, 'read'): + elif hasattr(onnx_file_or_bytes, "read"): return onnx.load(onnx_file_or_bytes) else: b = BytesIO(onnx_file_or_bytes) @@ -41,7 +45,7 @@ def save_onnx_model(model, filename=None): """ content = model.SerializeToString() if filename is not None: - if hasattr(filename, 'write'): + if hasattr(filename, "write"): filename.write(content) else: with open(filename, "wb") as f: @@ -60,8 +64,9 @@ def enumerate_model_node_outputs(model, add_node=False): :return: enumerator """ if not hasattr(model, "graph"): - raise TypeError("Parameter model is not an ONNX model but " - "{}".format(type(model))) + raise TypeError( + "Parameter model is not an ONNX model but " "{}".format(type(model)) + ) for node in model.graph.node: for out in node.output: yield (out, node) if add_node else out @@ -145,8 +150,13 @@ def select_model_inputs_outputs(model, outputs=None, inputs=None): value_info = ValueInfoProto() value_info.name = out var_out.append(value_info) - graph = make_graph(keep_nodes, model.graph.name, model.graph.input, - var_out, model.graph.initializer) + graph = make_graph( + keep_nodes, + model.graph.name, + model.graph.input, + var_out, + model.graph.initializer, + ) onnx_model = make_model(graph) onnx_model.ir_version = model.ir_version onnx_model.producer_name = model.producer_name @@ -159,8 +169,9 @@ def select_model_inputs_outputs(model, outputs=None, inputs=None): onnx.helper.set_model_props(onnx_model, values) if len(onnx_model.graph.input) != len(model.graph.input): - raise RuntimeError("Input mismatch {} != {}".format( - len(onnx_model.input), len(model.input))) + raise RuntimeError( + "Input mismatch {} != {}".format(len(onnx_model.input), len(model.input)) + ) # fix opset import del onnx_model.opset_import[:] @@ -171,33 +182,39 @@ def select_model_inputs_outputs(model, outputs=None, inputs=None): return onnx_model -def infer_outputs(op_type, inputs, outputs=None, initializer=None, - target_opset=None, **atts): +def infer_outputs( + op_type, inputs, outputs=None, initializer=None, target_opset=None, **atts +): """ Infers outputs type and shapes given an ONNX operator. """ - logger = getLogger('skl2onnx') + logger = getLogger("skl2onnx") logger.debug( - '[infer_outputs] op_type=%r inputs=%r outputs=%r', - op_type, [x.name for x in inputs], outputs) + "[infer_outputs] op_type=%r inputs=%r outputs=%r", + op_type, + [x.name for x in inputs], + outputs, + ) if isinstance(op_type, str): required_outputs = [] if outputs: for o in outputs: - if hasattr(o, 'onnx_name'): + if hasattr(o, "onnx_name"): required_outputs.append(o.onnx_name) elif isinstance(o, str): required_outputs.append(o) else: raise TypeError("Unable to require output {}.".format(o)) - node = make_node(op_type, [i.onnx_name for i in inputs], - required_outputs, **atts) + node = make_node( + op_type, [i.onnx_name for i in inputs], required_outputs, **atts + ) node = [node] - elif hasattr(op_type, 'nodes'): + elif hasattr(op_type, "nodes"): node = op_type.nodes else: - raise RuntimeError("Unable to build ONNX nodes from type {}.".format( - type(op_type))) + raise RuntimeError( + "Unable to build ONNX nodes from type {}.".format(type(op_type)) + ) input_init = inputs.copy() if initializer: @@ -207,16 +224,18 @@ def infer_outputs(op_type, inputs, outputs=None, initializer=None, if isinstance(input, Variable): onnx_type = input.type.to_onnx_type() tensor_type = onnx_type.tensor_type - shape = [tensor_type.shape.dim[i].dim_value - for i in range(len(tensor_type.shape.dim))] - inp = make_tensor_value_info(input.onnx_name, - tensor_type.elem_type, - tuple(shape)) + shape = [ + tensor_type.shape.dim[i].dim_value + for i in range(len(tensor_type.shape.dim)) + ] + inp = make_tensor_value_info( + input.onnx_name, tensor_type.elem_type, tuple(shape) + ) onnx_inputs.append(inp) elif isinstance(input, onnx.TensorProto): v = make_tensor_value_info( - input.name, input.data_type.real, - list(d for d in input.dims)) + input.name, input.data_type.real, list(d for d in input.dims) + ) onnx_inputs.append(v) elif isinstance(input, onnx.AttributeProto): value_info = ValueInfoProto() @@ -228,13 +247,11 @@ def infer_outputs(op_type, inputs, outputs=None, initializer=None, else: onnx_inputs.append(input) - graph = make_graph(node, 'infer_shapes', - onnx_inputs, []) - original_model = make_model(graph, producer_name='skl2onnx') + graph = make_graph(node, "infer_shapes", onnx_inputs, []) + original_model = make_model(graph, producer_name="skl2onnx") domains = {} for n in node: - domains[n.domain] = max(domains.get(n.domain, 1), - getattr(n, 'op_version', 1)) + domains[n.domain] = max(domains.get(n.domain, 1), getattr(n, "op_version", 1)) for i, (k, v) in enumerate(domains.items()): if i == 0 and len(original_model.opset_import) == 1: op_set = original_model.opset_import[0] @@ -243,8 +260,7 @@ def infer_outputs(op_type, inputs, outputs=None, initializer=None, op_set.domain = k if target_opset: if isinstance(target_opset, dict): - op_set.version = target_opset.get( - k, get_latest_tested_opset_version()) + op_set.version = target_opset.get(k, get_latest_tested_opset_version()) else: op_set.version = target_opset else: @@ -254,8 +270,8 @@ def infer_outputs(op_type, inputs, outputs=None, initializer=None, inferred_model = shape_inference.infer_shapes(original_model) except RuntimeError as e: raise RuntimeError( - "Unable to infer shape of node '{}'\n{}".format( - op_type, original_model)) from e + "Unable to infer shape of node '{}'\n{}".format(op_type, original_model) + ) from e all_shapes = Variable.from_pb(inferred_model.graph.value_info) used = set() for node in graph.node: @@ -266,8 +282,9 @@ def infer_outputs(op_type, inputs, outputs=None, initializer=None, raise RuntimeError( f"Shape inference fails.\n*Inputs*\n{onnx_inputs}\n" f"*all_shapes*\n{all_shapes}'\n" - f"*Model*\n{original_model}'") - logger.debug('[infer_outputs] shapes=%r', shapes) + f"*Model*\n{original_model}'" + ) + logger.debug("[infer_outputs] shapes=%r", shapes) return shapes @@ -289,8 +306,13 @@ def change_onnx_domain(model, ops): node.op_type = rep[0] node.domain = rep[1] - graph = make_graph(nodes, model.graph.name, model.graph.input, - model.graph.output, model.graph.initializer) + graph = make_graph( + nodes, + model.graph.name, + model.graph.input, + model.graph.output, + model.graph.initializer, + ) onnx_model = make_model(graph) onnx_model.ir_version = model.ir_version onnx_model.producer_name = model.producer_name @@ -303,8 +325,9 @@ def change_onnx_domain(model, ops): onnx.helper.set_model_props(onnx_model, values) if len(onnx_model.graph.input) != len(model.graph.input): - raise RuntimeError("Input mismatch {} != {}".format( - len(onnx_model.input), len(model.input))) + raise RuntimeError( + "Input mismatch {} != {}".format(len(onnx_model.input), len(model.input)) + ) # fix opset import domain_set = set() @@ -325,7 +348,7 @@ def change_onnx_domain(model, ops): return onnx_model -def add_output_initializer(model_onnx, name, value, suffix='_init'): +def add_output_initializer(model_onnx, name, value, suffix="_init"): """ Add a constant and link it to one output. It allows the user to store arrays into the graph @@ -352,7 +375,8 @@ def add_output_initializer(model_onnx, name, value, suffix='_init'): if len(name_list) != len(value_list): raise ValueError( "Mismatched names and values. There are %d names and %d values." - "" % (len(name_list), len(value_list))) + "" % (len(name_list), len(value_list)) + ) nodes = list(model_onnx.graph.node) inits = list(model_onnx.graph.initializer) @@ -364,41 +388,46 @@ def add_output_initializer(model_onnx, name, value, suffix='_init'): names = set(i.name for i in model_onnx.graph.initializer) if name_output in names or name_init in names: raise ValueError( - "Names %r or %r is already taken by an initializer: %r." % ( - name_output, name_init, ", ".join(sorted(names)))) + "Names %r or %r is already taken by an initializer: %r." + % (name_output, name_init, ", ".join(sorted(names))) + ) names = set(i.name for i in model_onnx.graph.output) if name_output in names or name_init in names: raise ValueError( - "Names %r or %r is already taken by an output: %r." % ( - name_output, name_init, ", ".join(sorted(names)))) + "Names %r or %r is already taken by an output: %r." + % (name_output, name_init, ", ".join(sorted(names))) + ) names = set(i.name for i in model_onnx.graph.input) if name_output in names or name_init in names: raise ValueError( - "Names %r or %r is already taken by an output: %r." % ( - name_output, name_init, ", ".join(sorted(names)))) + "Names %r or %r is already taken by an output: %r." + % (name_output, name_init, ", ".join(sorted(names))) + ) try: cst = from_array(value, name=name_init) except RuntimeError as e: st = str(value.dtype).lower() - if st.startswith('u') or st.startswith("`_. """ - if metric == 'cosine': + if metric == "cosine": if isinstance(Y, np.ndarray): - ny = np.sqrt(np.sum(Y ** 2, axis=1, keepdims=True)) + ny = np.sqrt(np.sum(Y**2, axis=1, keepdims=True)) norm_y = Y / ny norm_try = norm_y.T.astype(dtype) else: ny = OnnxReduceL2_typed(dtype, Y, axes=[1], op_version=op_version) norm_y = OnnxDiv(Y, ny, op_version=op_version) - norm_try = OnnxTranspose(norm_y, perm=[1, 0], - op_version=op_version) + norm_try = OnnxTranspose(norm_y, perm=[1, 0], op_version=op_version) nx = OnnxReduceL2_typed(dtype, X, axes=[1], op_version=op_version) norm_x = OnnxDiv(X, nx, op_version=op_version) @@ -200,9 +260,9 @@ def _convert_pairwise_kernel(X, Y, metric=None, raise NotImplementedError("Metric %r is not implemented." % metric) -def convert_kernel(kernel, X, output_names=None, - x_train=None, dtype=None, optim=None, - op_version=None): +def convert_kernel( + kernel, X, output_names=None, x_train=None, dtype=None, optim=None, op_version=None +): if op_version is None: raise RuntimeError("op_version must not be None.") if isinstance(kernel, Sum): @@ -213,72 +273,105 @@ def convert_kernel(kernel, X, output_names=None, clop = None if clop is not None: return clop( - convert_kernel(kernel.k1, X, x_train=x_train, dtype=dtype, - optim=optim, op_version=op_version), - convert_kernel(kernel.k2, X, x_train=x_train, dtype=dtype, - optim=optim, op_version=op_version), - output_names=output_names, op_version=op_version) + convert_kernel( + kernel.k1, + X, + x_train=x_train, + dtype=dtype, + optim=optim, + op_version=op_version, + ), + convert_kernel( + kernel.k2, + X, + x_train=x_train, + dtype=dtype, + optim=optim, + op_version=op_version, + ), + output_names=output_names, + op_version=op_version, + ) if isinstance(kernel, ConstantKernel): # X and x_train should have the same number of features. onnx_zeros_x = _zero_vector_of_size( - X, keepdims=1, dtype=dtype, op_version=op_version) + X, keepdims=1, dtype=dtype, op_version=op_version + ) if x_train is None: onnx_zeros_y = onnx_zeros_x else: onnx_zeros_y = _zero_vector_of_size( - x_train, keepdims=1, dtype=dtype, op_version=op_version) + x_train, keepdims=1, dtype=dtype, op_version=op_version + ) tr = OnnxTranspose(onnx_zeros_y, perm=[1, 0], op_version=op_version) mat = OnnxMatMul(onnx_zeros_x, tr, op_version=op_version) - return OnnxAdd(mat, - np.array([kernel.constant_value], - dtype=dtype), - output_names=output_names, - op_version=op_version) + return OnnxAdd( + mat, + np.array([kernel.constant_value], dtype=dtype), + output_names=output_names, + op_version=op_version, + ) if isinstance(kernel, RBF): # length_scale = np.squeeze(length_scale).astype(float) - zeroh = _zero_vector_of_size(X, axis=1, keepdims=0, dtype=dtype, - op_version=op_version) - zerov = _zero_vector_of_size(X, axis=0, keepdims=1, dtype=dtype, - op_version=op_version) - - if (isinstance(kernel.length_scale, np.ndarray) and - len(kernel.length_scale) > 0): + zeroh = _zero_vector_of_size( + X, axis=1, keepdims=0, dtype=dtype, op_version=op_version + ) + zerov = _zero_vector_of_size( + X, axis=0, keepdims=1, dtype=dtype, op_version=op_version + ) + + if isinstance(kernel.length_scale, np.ndarray) and len(kernel.length_scale) > 0: const = kernel.length_scale.astype(dtype) else: tensor_value = py_make_float_array( - kernel.length_scale, dtype=dtype, as_tensor=True) + kernel.length_scale, dtype=dtype, as_tensor=True + ) const = OnnxConstantOfShape( OnnxShape(zeroh, op_version=op_version), - value=tensor_value, op_version=op_version) + value=tensor_value, + op_version=op_version, + ) X_scaled = OnnxDiv(X, const, op_version=op_version) if x_train is None: dist = onnx_squareform_pdist( - X_scaled, metric='sqeuclidean', dtype=dtype, - op_version=op_version) + X_scaled, metric="sqeuclidean", dtype=dtype, op_version=op_version + ) else: x_train_scaled = OnnxDiv(x_train, const, op_version=op_version) if optim is None: - dist = onnx_cdist(X_scaled, x_train_scaled, - metric='sqeuclidean', - dtype=dtype, op_version=op_version) - elif optim == 'cdist': - dist = OnnxCDist(X_scaled, x_train_scaled, - metric='sqeuclidean', - op_version=op_version) + dist = onnx_cdist( + X_scaled, + x_train_scaled, + metric="sqeuclidean", + dtype=dtype, + op_version=op_version, + ) + elif optim == "cdist": + dist = OnnxCDist( + X_scaled, + x_train_scaled, + metric="sqeuclidean", + op_version=op_version, + ) else: raise ValueError("Unknown optimization '{}'.".format(optim)) tensor_value = py_make_float_array(-0.5, dtype=dtype, as_tensor=True) cst5 = OnnxConstantOfShape( OnnxShape(zerov, op_version=op_version), - value=tensor_value, op_version=op_version) + value=tensor_value, + op_version=op_version, + ) # K = np.exp(-.5 * dists) - exp = OnnxExp(OnnxMul(dist, cst5, op_version=op_version), - output_names=output_names, op_version=op_version) + exp = OnnxExp( + OnnxMul(dist, cst5, op_version=op_version), + output_names=output_names, + op_version=op_version, + ) # This should not be needed. # K = squareform(K) @@ -288,92 +381,129 @@ def convert_kernel(kernel, X, output_names=None, if isinstance(kernel, ExpSineSquared): if not isinstance(kernel.length_scale, (float, int)): raise NotImplementedError( - "length_scale should be float not {}.".format( - type(kernel.length_scale))) + "length_scale should be float not {}.".format(type(kernel.length_scale)) + ) return _convert_exp_sine_squared( - X, Y=X if x_train is None else x_train, + X, + Y=X if x_train is None else x_train, length_scale=kernel.length_scale, - periodicity=kernel.periodicity, dtype=dtype, - output_names=output_names, optim=optim, - op_version=op_version) + periodicity=kernel.periodicity, + dtype=dtype, + output_names=output_names, + optim=optim, + op_version=op_version, + ) if isinstance(kernel, DotProduct): if not isinstance(kernel.sigma_0, (float, int)): raise NotImplementedError( - "sigma_0 should be float not {}.".format( - type(kernel.sigma_0))) + "sigma_0 should be float not {}.".format(type(kernel.sigma_0)) + ) if x_train is None: - return _convert_dot_product(X, X, sigma_0=kernel.sigma_0, - dtype=dtype, - output_names=output_names, - op_version=op_version) + return _convert_dot_product( + X, + X, + sigma_0=kernel.sigma_0, + dtype=dtype, + output_names=output_names, + op_version=op_version, + ) else: if len(x_train.shape) != 2: raise NotImplementedError( - "Only DotProduct for two dimension train set is " - "implemented.") + "Only DotProduct for two dimension train set is " "implemented." + ) return _convert_dot_product( - X, x_train, sigma_0=kernel.sigma_0, - dtype=dtype, output_names=output_names, - op_version=op_version) + X, + x_train, + sigma_0=kernel.sigma_0, + dtype=dtype, + output_names=output_names, + op_version=op_version, + ) if isinstance(kernel, RationalQuadratic): if x_train is None: return _convert_rational_quadratic( - X, X, length_scale=kernel.length_scale, - dtype=dtype, alpha=kernel.alpha, + X, + X, + length_scale=kernel.length_scale, + dtype=dtype, + alpha=kernel.alpha, output_names=output_names, - optim=optim, op_version=op_version) + optim=optim, + op_version=op_version, + ) else: return _convert_rational_quadratic( - X, x_train, length_scale=kernel.length_scale, - dtype=dtype, alpha=kernel.alpha, + X, + x_train, + length_scale=kernel.length_scale, + dtype=dtype, + alpha=kernel.alpha, output_names=output_names, - optim=optim, op_version=op_version) + optim=optim, + op_version=op_version, + ) if isinstance(kernel, PairwiseKernel): if x_train is None: return _convert_pairwise_kernel( - X, X, metric=kernel.metric, - dtype=dtype, output_names=output_names, - optim=optim, op_version=op_version) + X, + X, + metric=kernel.metric, + dtype=dtype, + output_names=output_names, + optim=optim, + op_version=op_version, + ) else: return _convert_pairwise_kernel( - X, x_train, metric=kernel.metric, - dtype=dtype, output_names=output_names, - optim=optim, op_version=op_version) + X, + x_train, + metric=kernel.metric, + dtype=dtype, + output_names=output_names, + optim=optim, + op_version=op_version, + ) if isinstance(kernel, WhiteKernel): # X and x_train should have the same number of features. onnx_zeros_x = _zero_vector_of_size( - X, keepdims=1, dtype=dtype, op_version=op_version) + X, keepdims=1, dtype=dtype, op_version=op_version + ) if x_train is None: onnx_zeros_y = onnx_zeros_x else: onnx_zeros_y = _zero_vector_of_size( - x_train, keepdims=1, dtype=dtype, op_version=op_version) + x_train, keepdims=1, dtype=dtype, op_version=op_version + ) tr = OnnxTranspose(onnx_zeros_y, perm=[1, 0], op_version=op_version) mat = OnnxMatMul(onnx_zeros_x, tr, op_version=op_version) if x_train is not None: - return OnnxIdentity(mat, op_version=op_version, - output_names=output_names) + return OnnxIdentity(mat, op_version=op_version, output_names=output_names) return OnnxMul( OnnxEyeLike(mat, op_version=op_version), - OnnxIdentity(np.array([kernel.noise_level], dtype=dtype), - op_version=op_version), + OnnxIdentity( + np.array([kernel.noise_level], dtype=dtype), op_version=op_version + ), op_version=op_version, - output_names=output_names) + output_names=output_names, + ) - raise RuntimeError("Unable to convert __call__ method for " - "class {}.".format(type(kernel))) + raise RuntimeError( + "Unable to convert __call__ method for " "class {}.".format(type(kernel)) + ) -def _zero_vector_of_size(X, output_names=None, axis=0, - keepdims=None, dtype=None, op_version=None): +def _zero_vector_of_size( + X, output_names=None, axis=0, keepdims=None, dtype=None, op_version=None +): if op_version is None: raise RuntimeError("op_version must not be None.") if keepdims is None: @@ -381,18 +511,27 @@ def _zero_vector_of_size(X, output_names=None, axis=0, if dtype == np.float32: res = OnnxReduceSumApi11( OnnxConstantOfShape( - OnnxShape(X, op_version=op_version), - op_version=op_version), - axes=[1 - axis], keepdims=keepdims, - output_names=output_names, op_version=op_version) + OnnxShape(X, op_version=op_version), op_version=op_version + ), + axes=[1 - axis], + keepdims=keepdims, + output_names=output_names, + op_version=op_version, + ) elif dtype in (np.float64, np.int32, np.int64): res = OnnxReduceSumApi11( OnnxConstantOfShape( - OnnxShape(X, op_version=op_version), value=py_make_float_array( - 0, dtype=dtype, as_tensor=True), op_version=op_version), - axes=[1 - axis], keepdims=keepdims, - output_names=output_names, op_version=op_version) + OnnxShape(X, op_version=op_version), + value=py_make_float_array(0, dtype=dtype, as_tensor=True), + op_version=op_version, + ), + axes=[1 - axis], + keepdims=keepdims, + output_names=output_names, + op_version=op_version, + ) else: raise NotImplementedError( - "Unable to create zero vector of type {}".format(dtype)) + "Unable to create zero vector of type {}".format(dtype) + ) return res diff --git a/skl2onnx/operator_converters/ada_boost.py b/skl2onnx/operator_converters/ada_boost.py index 87ea3d963..3e883043f 100644 --- a/skl2onnx/operator_converters/ada_boost.py +++ b/skl2onnx/operator_converters/ada_boost.py @@ -8,236 +8,350 @@ from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer from ..common._apply_operation import ( - apply_add, apply_cast, apply_clip, apply_concat, apply_div, apply_exp, - apply_mul, apply_reshape, apply_sub, apply_topk, apply_transpose + apply_add, + apply_cast, + apply_clip, + apply_concat, + apply_div, + apply_exp, + apply_mul, + apply_reshape, + apply_sub, + apply_topk, + apply_transpose, ) from ..common.data_types import ( - FloatTensorType, DoubleTensorType, guess_proto_type, guess_numpy_type, - Int64TensorType) + FloatTensorType, + DoubleTensorType, + guess_proto_type, + guess_numpy_type, + Int64TensorType, +) from ..common._registration import register_converter from .._supported_operators import sklearn_operator_name_map def _scikit_learn_before_022(): - if '.dev' in __version__: - return pv.Version( - __version__.split(".dev")[0]) < pv.Version("0.22") - if '.post' in __version__: - return pv.Version( - __version__.split(".post")[0]) < pv.Version("0.22") + if ".dev" in __version__: + return pv.Version(__version__.split(".dev")[0]) < pv.Version("0.22") + if ".post" in __version__: + return pv.Version(__version__.split(".post")[0]) < pv.Version("0.22") return pv.Version(__version__) < pv.Version("0.22") -def _samme_proba(scope, container, proba_name, weight, - zero_name, classes_ind_name, one_name): - weight_name = scope.get_unique_variable_name('weight') - container.add_initializer( - weight_name, onnx_proto.TensorProto.FLOAT, [], [weight]) - - argmax_output_name = scope.get_unique_variable_name('argmax_output') - container.add_node('ArgMax', proba_name, - argmax_output_name, - name=scope.get_unique_operator_name('ArgMax'), - axis=1) - equal_name = scope.get_unique_variable_name('equal') - container.add_node('Equal', [argmax_output_name, classes_ind_name], - equal_name, - name=scope.get_unique_operator_name('Equal')) - - max_proba_name = scope.get_unique_variable_name('probsmax') - container.add_node('Where', [equal_name, one_name, zero_name], - max_proba_name, - name=scope.get_unique_operator_name('Where')) - - samme_proba_name = scope.get_unique_variable_name('samme_proba') - apply_mul(scope, [max_proba_name, weight_name], - samme_proba_name, container, broadcast=1) +def _samme_proba( + scope, container, proba_name, weight, zero_name, classes_ind_name, one_name +): + weight_name = scope.get_unique_variable_name("weight") + container.add_initializer(weight_name, onnx_proto.TensorProto.FLOAT, [], [weight]) + + argmax_output_name = scope.get_unique_variable_name("argmax_output") + container.add_node( + "ArgMax", + proba_name, + argmax_output_name, + name=scope.get_unique_operator_name("ArgMax"), + axis=1, + ) + equal_name = scope.get_unique_variable_name("equal") + container.add_node( + "Equal", + [argmax_output_name, classes_ind_name], + equal_name, + name=scope.get_unique_operator_name("Equal"), + ) + + max_proba_name = scope.get_unique_variable_name("probsmax") + container.add_node( + "Where", + [equal_name, one_name, zero_name], + max_proba_name, + name=scope.get_unique_operator_name("Where"), + ) + + samme_proba_name = scope.get_unique_variable_name("samme_proba") + apply_mul( + scope, [max_proba_name, weight_name], samme_proba_name, container, broadcast=1 + ) return samme_proba_name def _samme_r_proba(scope, container, proba_name, n_classes, dtype, pdtype): - clipped_proba_name = scope.get_unique_variable_name('clipped_proba') - log_proba_name = scope.get_unique_variable_name('log_proba') - reduced_proba_name = scope.get_unique_variable_name('reduced_proba') - reshaped_result_name = scope.get_unique_variable_name('reshaped_result') - inverted_n_classes_name = scope.get_unique_variable_name( - 'inverted_n_classes') - n_classes_minus_one_name = scope.get_unique_variable_name( - 'n_classes_minus_one') - prod_result_name = scope.get_unique_variable_name('prod_result') - sub_result_name = scope.get_unique_variable_name('sub_result') - samme_proba_name = scope.get_unique_variable_name('samme_proba') - - container.add_initializer( - inverted_n_classes_name, pdtype, [], [1. / n_classes]) - container.add_initializer( - n_classes_minus_one_name, pdtype, [], [n_classes - 1]) + clipped_proba_name = scope.get_unique_variable_name("clipped_proba") + log_proba_name = scope.get_unique_variable_name("log_proba") + reduced_proba_name = scope.get_unique_variable_name("reduced_proba") + reshaped_result_name = scope.get_unique_variable_name("reshaped_result") + inverted_n_classes_name = scope.get_unique_variable_name("inverted_n_classes") + n_classes_minus_one_name = scope.get_unique_variable_name("n_classes_minus_one") + prod_result_name = scope.get_unique_variable_name("prod_result") + sub_result_name = scope.get_unique_variable_name("sub_result") + samme_proba_name = scope.get_unique_variable_name("samme_proba") + + container.add_initializer(inverted_n_classes_name, pdtype, [], [1.0 / n_classes]) + container.add_initializer(n_classes_minus_one_name, pdtype, [], [n_classes - 1]) try: cst_min = np.finfo(np.float64).eps.astype(dtype) except TypeError: - raise TypeError("Unable to convert {} (type {}) into {}.".format( - np.finfo(float).eps, type(np.finfo(float).eps), dtype)) + raise TypeError( + "Unable to convert {} (type {}) into {}.".format( + np.finfo(float).eps, type(np.finfo(float).eps), dtype + ) + ) apply_clip( - scope, proba_name, clipped_proba_name, container, - operator_name=scope.get_unique_operator_name('ClipAda'), - min=dtype(cst_min)) + scope, + proba_name, + clipped_proba_name, + container, + operator_name=scope.get_unique_operator_name("ClipAda"), + min=dtype(cst_min), + ) container.add_node( - 'Log', clipped_proba_name, log_proba_name, - name=scope.get_unique_operator_name('Log')) + "Log", + clipped_proba_name, + log_proba_name, + name=scope.get_unique_operator_name("Log"), + ) if container.target_opset < 13: container.add_node( - 'ReduceSum', log_proba_name, reduced_proba_name, axes=[1], - name=scope.get_unique_operator_name('ReduceSum')) + "ReduceSum", + log_proba_name, + reduced_proba_name, + axes=[1], + name=scope.get_unique_operator_name("ReduceSum"), + ) else: - axis_name = scope.get_unique_variable_name('axis') - container.add_initializer( - axis_name, onnx_proto.TensorProto.INT64, [1], [1]) + axis_name = scope.get_unique_variable_name("axis") + container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [1]) container.add_node( - 'ReduceSum', [log_proba_name, axis_name], reduced_proba_name, - name=scope.get_unique_operator_name('ReduceSum')) - apply_reshape(scope, reduced_proba_name, - reshaped_result_name, container, - desired_shape=(-1, 1)) - apply_mul(scope, [reshaped_result_name, inverted_n_classes_name], - prod_result_name, container, broadcast=1) - apply_sub(scope, [log_proba_name, prod_result_name], - sub_result_name, container, broadcast=1) - apply_mul(scope, [sub_result_name, n_classes_minus_one_name], - samme_proba_name, container, broadcast=1) + "ReduceSum", + [log_proba_name, axis_name], + reduced_proba_name, + name=scope.get_unique_operator_name("ReduceSum"), + ) + apply_reshape( + scope, + reduced_proba_name, + reshaped_result_name, + container, + desired_shape=(-1, 1), + ) + apply_mul( + scope, + [reshaped_result_name, inverted_n_classes_name], + prod_result_name, + container, + broadcast=1, + ) + apply_sub( + scope, + [log_proba_name, prod_result_name], + sub_result_name, + container, + broadcast=1, + ) + apply_mul( + scope, + [sub_result_name, n_classes_minus_one_name], + samme_proba_name, + container, + broadcast=1, + ) return samme_proba_name -def _normalise_probability(scope, container, operator, proba_names_list, - model): - est_weights_sum_name = scope.get_unique_variable_name('est_weights_sum') - summation_prob_name = scope.get_unique_variable_name('summation_prob') - div_result_name = scope.get_unique_variable_name('div_result') - exp_operand_name = scope.get_unique_variable_name('exp_operand') - exp_result_name = scope.get_unique_variable_name('exp_result') - reduced_exp_result_name = scope.get_unique_variable_name( - 'reduced_exp_result') - normaliser_name = scope.get_unique_variable_name('normaliser') - zero_scalar_name = scope.get_unique_variable_name('zero_scalar') - comparison_result_name = scope.get_unique_variable_name( - 'comparison_result') - cast_output_name = scope.get_unique_variable_name('cast_output') +def _normalise_probability(scope, container, operator, proba_names_list, model): + est_weights_sum_name = scope.get_unique_variable_name("est_weights_sum") + summation_prob_name = scope.get_unique_variable_name("summation_prob") + div_result_name = scope.get_unique_variable_name("div_result") + exp_operand_name = scope.get_unique_variable_name("exp_operand") + exp_result_name = scope.get_unique_variable_name("exp_result") + reduced_exp_result_name = scope.get_unique_variable_name("reduced_exp_result") + normaliser_name = scope.get_unique_variable_name("normaliser") + zero_scalar_name = scope.get_unique_variable_name("zero_scalar") + comparison_result_name = scope.get_unique_variable_name("comparison_result") + cast_output_name = scope.get_unique_variable_name("cast_output") zero_filtered_normaliser_name = scope.get_unique_variable_name( - 'zero_filtered_normaliser') - mul_operand_name = scope.get_unique_variable_name('mul_operand') - cast_normaliser_name = scope.get_unique_variable_name('cast_normaliser') + "zero_filtered_normaliser" + ) + mul_operand_name = scope.get_unique_variable_name("mul_operand") + cast_normaliser_name = scope.get_unique_variable_name("cast_normaliser") proto_dtype = guess_proto_type(operator.inputs[0].type) if proto_dtype != onnx_proto.TensorProto.DOUBLE: proto_dtype = onnx_proto.TensorProto.FLOAT container.add_initializer( - est_weights_sum_name, proto_dtype, - [], [model.estimator_weights_.sum()]) + est_weights_sum_name, proto_dtype, [], [model.estimator_weights_.sum()] + ) container.add_initializer( - mul_operand_name, proto_dtype, - [], [1. / (model.n_classes_ - 1)]) - container.add_initializer(zero_scalar_name, - onnx_proto.TensorProto.INT32, [], [0]) - - container.add_node('Sum', proba_names_list, - summation_prob_name, - name=scope.get_unique_operator_name('Sum')) - apply_div(scope, [summation_prob_name, est_weights_sum_name], - div_result_name, container, broadcast=1) - apply_mul(scope, [div_result_name, mul_operand_name], - exp_operand_name, container, broadcast=1) + mul_operand_name, proto_dtype, [], [1.0 / (model.n_classes_ - 1)] + ) + container.add_initializer(zero_scalar_name, onnx_proto.TensorProto.INT32, [], [0]) + + container.add_node( + "Sum", + proba_names_list, + summation_prob_name, + name=scope.get_unique_operator_name("Sum"), + ) + apply_div( + scope, + [summation_prob_name, est_weights_sum_name], + div_result_name, + container, + broadcast=1, + ) + apply_mul( + scope, + [div_result_name, mul_operand_name], + exp_operand_name, + container, + broadcast=1, + ) apply_exp(scope, exp_operand_name, exp_result_name, container) if container.target_opset < 13: container.add_node( - 'ReduceSum', exp_result_name, reduced_exp_result_name, axes=[1], - name=scope.get_unique_operator_name('ReduceSum')) + "ReduceSum", + exp_result_name, + reduced_exp_result_name, + axes=[1], + name=scope.get_unique_operator_name("ReduceSum"), + ) else: - axis_name = scope.get_unique_variable_name('axis') - container.add_initializer( - axis_name, onnx_proto.TensorProto.INT64, [1], [1]) + axis_name = scope.get_unique_variable_name("axis") + container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [1]) container.add_node( - 'ReduceSum', [exp_result_name, axis_name], reduced_exp_result_name, - name=scope.get_unique_operator_name('ReduceSum')) - apply_reshape(scope, reduced_exp_result_name, - normaliser_name, container, - desired_shape=(-1, 1)) - apply_cast(scope, normaliser_name, cast_normaliser_name, - container, to=onnx_proto.TensorProto.INT32) - container.add_node('Equal', [cast_normaliser_name, zero_scalar_name], - comparison_result_name, - name=scope.get_unique_operator_name('Equal')) - apply_cast(scope, comparison_result_name, cast_output_name, - container, to=proto_dtype) - apply_add(scope, [normaliser_name, cast_output_name], - zero_filtered_normaliser_name, - container, broadcast=0) - apply_div(scope, [exp_result_name, zero_filtered_normaliser_name], - operator.outputs[1].full_name, container, broadcast=1) + "ReduceSum", + [exp_result_name, axis_name], + reduced_exp_result_name, + name=scope.get_unique_operator_name("ReduceSum"), + ) + apply_reshape( + scope, + reduced_exp_result_name, + normaliser_name, + container, + desired_shape=(-1, 1), + ) + apply_cast( + scope, + normaliser_name, + cast_normaliser_name, + container, + to=onnx_proto.TensorProto.INT32, + ) + container.add_node( + "Equal", + [cast_normaliser_name, zero_scalar_name], + comparison_result_name, + name=scope.get_unique_operator_name("Equal"), + ) + apply_cast( + scope, comparison_result_name, cast_output_name, container, to=proto_dtype + ) + apply_add( + scope, + [normaliser_name, cast_output_name], + zero_filtered_normaliser_name, + container, + broadcast=0, + ) + apply_div( + scope, + [exp_result_name, zero_filtered_normaliser_name], + operator.outputs[1].full_name, + container, + broadcast=1, + ) return operator.outputs[1].full_name def _generate_raw_scores(scope, container, operator, proba_names_list, model): - summation_prob_name = scope.get_unique_variable_name('summation_proba') - est_weights_sum_name = scope.get_unique_variable_name('est_weights') + summation_prob_name = scope.get_unique_variable_name("summation_proba") + est_weights_sum_name = scope.get_unique_variable_name("est_weights") proto_dtype = guess_proto_type(operator.inputs[0].type) if proto_dtype != onnx_proto.TensorProto.DOUBLE: proto_dtype = onnx_proto.TensorProto.FLOAT container.add_initializer( - est_weights_sum_name, proto_dtype, - [], [model.estimator_weights_.sum()]) + est_weights_sum_name, proto_dtype, [], [model.estimator_weights_.sum()] + ) container.add_node( - 'Sum', proba_names_list, summation_prob_name, - name=scope.get_unique_operator_name('Sum')) + "Sum", + proba_names_list, + summation_prob_name, + name=scope.get_unique_operator_name("Sum"), + ) if len(model.classes_) == 2: - div_res_name = scope.get_unique_variable_name('div_res') - operand_name = scope.get_unique_variable_name('operand') - neg_name = scope.get_unique_variable_name('neg') - mul_res_name = scope.get_unique_variable_name('mul_res') - pos_class_scores_name = scope.get_unique_variable_name( - 'pos_class_scores') - neg_class_scores_name = scope.get_unique_variable_name( - 'neg_class_scores') - container.add_initializer( - operand_name, proto_dtype, - [2], [-1, 1]) - container.add_initializer( - neg_name, proto_dtype, - [], [-1]) - - apply_div(scope, [summation_prob_name, est_weights_sum_name], - div_res_name, container, broadcast=1) - apply_mul(scope, [div_res_name, operand_name], - mul_res_name, container, broadcast=1) + div_res_name = scope.get_unique_variable_name("div_res") + operand_name = scope.get_unique_variable_name("operand") + neg_name = scope.get_unique_variable_name("neg") + mul_res_name = scope.get_unique_variable_name("mul_res") + pos_class_scores_name = scope.get_unique_variable_name("pos_class_scores") + neg_class_scores_name = scope.get_unique_variable_name("neg_class_scores") + container.add_initializer(operand_name, proto_dtype, [2], [-1, 1]) + container.add_initializer(neg_name, proto_dtype, [], [-1]) + + apply_div( + scope, + [summation_prob_name, est_weights_sum_name], + div_res_name, + container, + broadcast=1, + ) + apply_mul( + scope, [div_res_name, operand_name], mul_res_name, container, broadcast=1 + ) if container.target_opset < 13: container.add_node( - 'ReduceSum', mul_res_name, pos_class_scores_name, axes=[1], - name=scope.get_unique_operator_name('ReduceSum')) + "ReduceSum", + mul_res_name, + pos_class_scores_name, + axes=[1], + name=scope.get_unique_operator_name("ReduceSum"), + ) else: - axis_name = scope.get_unique_variable_name('axis') - container.add_initializer( - axis_name, onnx_proto.TensorProto.INT64, [1], [1]) + axis_name = scope.get_unique_variable_name("axis") + container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [1]) container.add_node( - 'ReduceSum', [mul_res_name, axis_name], pos_class_scores_name, - name=scope.get_unique_operator_name('ReduceSum')) - apply_mul(scope, [pos_class_scores_name, neg_name], - neg_class_scores_name, container, broadcast=1) + "ReduceSum", + [mul_res_name, axis_name], + pos_class_scores_name, + name=scope.get_unique_operator_name("ReduceSum"), + ) + apply_mul( + scope, + [pos_class_scores_name, neg_name], + neg_class_scores_name, + container, + broadcast=1, + ) apply_concat( - scope, [neg_class_scores_name, pos_class_scores_name], - operator.outputs[1].full_name, container, axis=1) + scope, + [neg_class_scores_name, pos_class_scores_name], + operator.outputs[1].full_name, + container, + axis=1, + ) else: - apply_div(scope, [summation_prob_name, est_weights_sum_name], - operator.outputs[1].full_name, container, broadcast=1) + apply_div( + scope, + [summation_prob_name, est_weights_sum_name], + operator.outputs[1].full_name, + container, + broadcast=1, + ) return operator.outputs[1].full_name -def convert_sklearn_ada_boost_classifier(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_ada_boost_classifier( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converter for AdaBoost classifier. This function goes through the list of estimators and uses @@ -248,28 +362,31 @@ def convert_sklearn_ada_boost_classifier(scope: Scope, operator: Operator, the probability score for the final result. Label is calculated by simply doing an argmax of the probability scores. """ - if scope.get_options(operator.raw_operator, dict(nocl=False))['nocl']: + if scope.get_options(operator.raw_operator, dict(nocl=False))["nocl"]: raise RuntimeError( "Option 'nocl' is not implemented for operator '{}'.".format( - operator.raw_operator.__class__.__name__)) + operator.raw_operator.__class__.__name__ + ) + ) op = operator.raw_operator options = container.get_options(op, dict(raw_scores=False)) - use_raw_scores = options['raw_scores'] + use_raw_scores = options["raw_scores"] classes = op.classes_ class_type = onnx_proto.TensorProto.STRING if np.issubdtype(classes.dtype, np.floating): class_type = onnx_proto.TensorProto.INT32 - classes = classes.astype('int') + classes = classes.astype("int") elif np.issubdtype(classes.dtype, np.signedinteger): class_type = onnx_proto.TensorProto.INT32 else: - classes = np.array([s.encode('utf-8') for s in classes]) + classes = np.array([s.encode("utf-8") for s in classes]) - argmax_output_name = scope.get_unique_variable_name('argmax_output') + argmax_output_name = scope.get_unique_variable_name("argmax_output") array_feature_extractor_result_name = scope.get_unique_variable_name( - 'array_feature_extractor_result') + "array_feature_extractor_result" + ) - classes_name = scope.get_unique_variable_name('classes') + classes_name = scope.get_unique_variable_name("classes") container.add_initializer(classes_name, class_type, classes.shape, classes) proba_names_list = [] @@ -289,9 +406,9 @@ def convert_sklearn_ada_boost_classifier(scope: Scope, operator: Operator, for i_est, estimator in enumerate(op.estimators_): label_name = scope.declare_local_variable( - 'elab_name_%d' % i_est, Int64TensorType()) - proba_name = scope.declare_local_variable( - 'eprob_name_%d' % i_est, proba_type()) + "elab_name_%d" % i_est, Int64TensorType() + ) + proba_name = scope.declare_local_variable("eprob_name_%d" % i_est, proba_type()) op_type = sklearn_operator_name_map[type(estimator)] @@ -300,90 +417,130 @@ def convert_sklearn_ada_boost_classifier(scope: Scope, operator: Operator, this_operator.outputs.extend([label_name, proba_name]) if add_cast: - this_operator = scope.declare_local_operator('SklearnCast') + this_operator = scope.declare_local_operator("SklearnCast") this_operator.inputs.append(proba_name) - var_name = scope.declare_local_variable('cast', FloatTensorType()) + var_name = scope.declare_local_variable("cast", FloatTensorType()) this_operator.outputs.append(var_name) proba_name = var_name - if op.algorithm == 'SAMME.R': + if op.algorithm == "SAMME.R": cur_proba_name = _samme_r_proba( - scope, container, proba_name.onnx_name, len(classes), - dtype, proto_dtype) + scope, container, proba_name.onnx_name, len(classes), dtype, proto_dtype + ) else: # SAMME if _scikit_learn_before_022() and not use_raw_scores: - weight_name = scope.get_unique_variable_name('weight') - samme_proba_name = scope.get_unique_variable_name( - 'samme_proba') + weight_name = scope.get_unique_variable_name("weight") + samme_proba_name = scope.get_unique_variable_name("samme_proba") container.add_initializer( - weight_name, onnx_proto.TensorProto.FLOAT, - [], [op.estimator_weights_[i_est]]) - apply_mul(scope, [proba_name.onnx_name, weight_name], - samme_proba_name, container, broadcast=1) + weight_name, + onnx_proto.TensorProto.FLOAT, + [], + [op.estimator_weights_[i_est]], + ) + apply_mul( + scope, + [proba_name.onnx_name, weight_name], + samme_proba_name, + container, + broadcast=1, + ) cur_proba_name = samme_proba_name else: if classes_ind_name is None: - classes_ind_name = scope.get_unique_variable_name( - 'classes_ind3') + classes_ind_name = scope.get_unique_variable_name("classes_ind3") container.add_initializer( - classes_ind_name, onnx_proto.TensorProto.INT64, - (1, len(classes)), list(range(len(classes)))) + classes_ind_name, + onnx_proto.TensorProto.INT64, + (1, len(classes)), + list(range(len(classes))), + ) if zero_name is None: - shape_name = scope.get_unique_variable_name('shape') + shape_name = scope.get_unique_variable_name("shape") container.add_node( - 'Shape', proba_name.onnx_name, shape_name, - name=scope.get_unique_operator_name('Shape')) + "Shape", + proba_name.onnx_name, + shape_name, + name=scope.get_unique_operator_name("Shape"), + ) - zero_name = scope.get_unique_variable_name('zero') + zero_name = scope.get_unique_variable_name("zero") container.add_node( - 'ConstantOfShape', shape_name, zero_name, - name=scope.get_unique_operator_name('CoSA'), + "ConstantOfShape", + shape_name, + zero_name, + name=scope.get_unique_operator_name("CoSA"), value=make_tensor( - "value", onnx_proto.TensorProto.FLOAT, - (1, ), [0])) + "value", onnx_proto.TensorProto.FLOAT, (1,), [0] + ), + ) - one_name = scope.get_unique_variable_name('one') + one_name = scope.get_unique_variable_name("one") container.add_node( - 'ConstantOfShape', shape_name, one_name, - name=scope.get_unique_operator_name('CoSB'), + "ConstantOfShape", + shape_name, + one_name, + name=scope.get_unique_operator_name("CoSB"), value=make_tensor( - "value", onnx_proto.TensorProto.FLOAT, - (1, ), [1.])) + "value", onnx_proto.TensorProto.FLOAT, (1,), [1.0] + ), + ) cur_proba_name = _samme_proba( - scope, container, proba_name.onnx_name, - op.estimator_weights_[i_est], zero_name, - classes_ind_name, one_name) + scope, + container, + proba_name.onnx_name, + op.estimator_weights_[i_est], + zero_name, + classes_ind_name, + one_name, + ) proba_names_list.append(cur_proba_name) - function = (_generate_raw_scores if use_raw_scores - else _normalise_probability) - class_prob_name = function(scope, container, operator, - proba_names_list, op) - container.add_node('ArgMax', class_prob_name, - argmax_output_name, - name=scope.get_unique_operator_name('ArgMax'), axis=1) + function = _generate_raw_scores if use_raw_scores else _normalise_probability + class_prob_name = function(scope, container, operator, proba_names_list, op) container.add_node( - 'ArrayFeatureExtractor', [classes_name, argmax_output_name], - array_feature_extractor_result_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) + "ArgMax", + class_prob_name, + argmax_output_name, + name=scope.get_unique_operator_name("ArgMax"), + axis=1, + ) + container.add_node( + "ArrayFeatureExtractor", + [classes_name, argmax_output_name], + array_feature_extractor_result_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) if class_type == onnx_proto.TensorProto.INT32: - reshaped_result_name = scope.get_unique_variable_name( - 'reshaped_result') - - apply_reshape(scope, array_feature_extractor_result_name, - reshaped_result_name, container, - desired_shape=(-1,)) - apply_cast(scope, reshaped_result_name, operator.outputs[0].full_name, - container, to=onnx_proto.TensorProto.INT64) + reshaped_result_name = scope.get_unique_variable_name("reshaped_result") + + apply_reshape( + scope, + array_feature_extractor_result_name, + reshaped_result_name, + container, + desired_shape=(-1,), + ) + apply_cast( + scope, + reshaped_result_name, + operator.outputs[0].full_name, + container, + to=onnx_proto.TensorProto.INT64, + ) else: - apply_reshape(scope, array_feature_extractor_result_name, - operator.outputs[0].full_name, container, - desired_shape=(-1,)) + apply_reshape( + scope, + array_feature_extractor_result_name, + operator.outputs[0].full_name, + container, + desired_shape=(-1,), + ) def _get_estimators_label(scope, operator, container, model): @@ -395,14 +552,14 @@ def _get_estimators_label(scope, operator, container, model): var_type = DoubleTensorType else: var_type = FloatTensorType - concatenated_labels_name = scope.get_unique_variable_name( - 'concatenated_labels') + concatenated_labels_name = scope.get_unique_variable_name("concatenated_labels") input_name = operator.inputs estimators_results_list = [] for i, estimator in enumerate(model.estimators_): estimator_label_name = scope.declare_local_variable( - 'est_label_%d' % i, var_type([None, 1])) + "est_label_%d" % i, var_type([None, 1]) + ) op_type = sklearn_operator_name_map[type(estimator)] @@ -412,104 +569,139 @@ def _get_estimators_label(scope, operator, container, model): estimators_results_list.append(estimator_label_name.onnx_name) - apply_concat(scope, estimators_results_list, concatenated_labels_name, - container, axis=1) + apply_concat( + scope, estimators_results_list, concatenated_labels_name, container, axis=1 + ) return concatenated_labels_name def cum_sum(scope, container, rnn_input_name, sequence_length, proto_dtype): opv = container.target_opset - weights_cdf_name = scope.get_unique_variable_name('weights_cdf') + weights_cdf_name = scope.get_unique_variable_name("weights_cdf") if opv < 11: - transposed_input_name = scope.get_unique_variable_name( - 'transposed_input') - reshaped_result_name = scope.get_unique_variable_name( - 'reshaped_result') - weights_name = scope.get_unique_variable_name('weights') - rec_weights_name = scope.get_unique_variable_name('rec_weights') - rnn_output_name = scope.get_unique_variable_name('rnn_output') - permuted_rnn_y_name = scope.get_unique_variable_name('permuted_rnn_y') - - container.add_initializer(weights_name, - proto_dtype, [1, 1, 1], [1]) - container.add_initializer(rec_weights_name, - proto_dtype, [1, 1, 1], [1]) - - apply_transpose(scope, rnn_input_name, transposed_input_name, - container, perm=(1, 0)) - apply_reshape(scope, transposed_input_name, reshaped_result_name, - container, desired_shape=(sequence_length, -1, 1)) + transposed_input_name = scope.get_unique_variable_name("transposed_input") + reshaped_result_name = scope.get_unique_variable_name("reshaped_result") + weights_name = scope.get_unique_variable_name("weights") + rec_weights_name = scope.get_unique_variable_name("rec_weights") + rnn_output_name = scope.get_unique_variable_name("rnn_output") + permuted_rnn_y_name = scope.get_unique_variable_name("permuted_rnn_y") + + container.add_initializer(weights_name, proto_dtype, [1, 1, 1], [1]) + container.add_initializer(rec_weights_name, proto_dtype, [1, 1, 1], [1]) + + apply_transpose( + scope, rnn_input_name, transposed_input_name, container, perm=(1, 0) + ) + apply_reshape( + scope, + transposed_input_name, + reshaped_result_name, + container, + desired_shape=(sequence_length, -1, 1), + ) container.add_node( - 'RNN', inputs=[reshaped_result_name, - weights_name, rec_weights_name], - outputs=[rnn_output_name], activations=['Affine'], - name=scope.get_unique_operator_name('RNN'), - activation_alpha=[1.0], activation_beta=[0.0], hidden_size=1) - apply_transpose(scope, rnn_output_name, permuted_rnn_y_name, container, - perm=(2, 0, 1, 3)) + "RNN", + inputs=[reshaped_result_name, weights_name, rec_weights_name], + outputs=[rnn_output_name], + activations=["Affine"], + name=scope.get_unique_operator_name("RNN"), + activation_alpha=[1.0], + activation_beta=[0.0], + hidden_size=1, + ) + apply_transpose( + scope, rnn_output_name, permuted_rnn_y_name, container, perm=(2, 0, 1, 3) + ) apply_reshape( - scope, permuted_rnn_y_name, weights_cdf_name, container, - desired_shape=(-1, sequence_length)) + scope, + permuted_rnn_y_name, + weights_cdf_name, + container, + desired_shape=(-1, sequence_length), + ) else: - axis_name = scope.get_unique_variable_name('axis_name') - container.add_initializer(axis_name, onnx_proto.TensorProto.INT32, - [], [1]) + axis_name = scope.get_unique_variable_name("axis_name") + container.add_initializer(axis_name, onnx_proto.TensorProto.INT32, [], [1]) container.add_node( - 'CumSum', [rnn_input_name, axis_name], [weights_cdf_name], - name=scope.get_unique_operator_name('CumSum'), - op_version=11) + "CumSum", + [rnn_input_name, axis_name], + [weights_cdf_name], + name=scope.get_unique_operator_name("CumSum"), + op_version=11, + ) return weights_cdf_name -def _apply_gather_elements(scope, container, inputs, output, axis, - dim, zero_type, suffix): +def _apply_gather_elements( + scope, container, inputs, output, axis, dim, zero_type, suffix +): if container.target_opset >= 11: container.add_node( - 'GatherElements', inputs, output, op_version=11, axis=axis, - name=scope.get_unique_operator_name('GatEls' + suffix)) + "GatherElements", + inputs, + output, + op_version=11, + axis=axis, + name=scope.get_unique_operator_name("GatEls" + suffix), + ) else: - classes_ind_name = scope.get_unique_variable_name('classes_ind2') + classes_ind_name = scope.get_unique_variable_name("classes_ind2") container.add_initializer( - classes_ind_name, onnx_proto.TensorProto.INT64, - (1, dim), list(range(dim))) + classes_ind_name, onnx_proto.TensorProto.INT64, (1, dim), list(range(dim)) + ) - shape_name = scope.get_unique_variable_name('shape') + shape_name = scope.get_unique_variable_name("shape") + container.add_node( + "Shape", inputs[0], shape_name, name=scope.get_unique_operator_name("Shape") + ) + zero_name = scope.get_unique_variable_name("zero") + zero_val = 0 if zero_type == onnx_proto.TensorProto.INT64 else 0.0 container.add_node( - 'Shape', inputs[0], shape_name, - name=scope.get_unique_operator_name('Shape')) - zero_name = scope.get_unique_variable_name('zero') - zero_val = (0 if zero_type == onnx_proto.TensorProto.INT64 - else 0.) + "ConstantOfShape", + shape_name, + zero_name, + name=scope.get_unique_operator_name("CoSA"), + value=make_tensor("value", zero_type, (1,), [zero_val]), + op_version=9, + ) + + equal_name = scope.get_unique_variable_name("equal") + container.add_node( + "Equal", + [inputs[1], classes_ind_name], + equal_name, + name=scope.get_unique_operator_name("Equal"), + ) + + selected = scope.get_unique_variable_name("selected") container.add_node( - 'ConstantOfShape', shape_name, zero_name, - name=scope.get_unique_operator_name('CoSA'), - value=make_tensor("value", zero_type, - (1, ), [zero_val]), op_version=9) - - equal_name = scope.get_unique_variable_name('equal') - container.add_node('Equal', [inputs[1], classes_ind_name], - equal_name, - name=scope.get_unique_operator_name('Equal')) - - selected = scope.get_unique_variable_name('selected') - container.add_node('Where', [equal_name, inputs[0], zero_name], - selected, - name=scope.get_unique_operator_name('Where')) + "Where", + [equal_name, inputs[0], zero_name], + selected, + name=scope.get_unique_operator_name("Where"), + ) if container.target_opset < 13: container.add_node( - 'ReduceSum', selected, output, axes=[1], - name=scope.get_unique_operator_name('ReduceSum')) + "ReduceSum", + selected, + output, + axes=[1], + name=scope.get_unique_operator_name("ReduceSum"), + ) else: - axis_name = scope.get_unique_variable_name('axis') - container.add_initializer( - axis_name, onnx_proto.TensorProto.INT64, [1], [1]) + axis_name = scope.get_unique_variable_name("axis") + container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [1]) container.add_node( - 'ReduceSum', [selected, axis_name], output, - name=scope.get_unique_operator_name('ReduceSum')) + "ReduceSum", + [selected, axis_name], + output, + name=scope.get_unique_operator_name("ReduceSum"), + ) -def convert_sklearn_ada_boost_regressor(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_ada_boost_regressor( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converter for AdaBoost regressor. This function first calls _get_estimators_label() which returns a @@ -527,91 +719,137 @@ def convert_sklearn_ada_boost_regressor(scope: Scope, operator: Operator, op = operator.raw_operator - negate_name = scope.get_unique_variable_name('negate') - estimators_weights_name = scope.get_unique_variable_name( - 'estimators_weights') - half_scalar_name = scope.get_unique_variable_name('half_scalar') - last_index_name = scope.get_unique_variable_name('last_index') - negated_labels_name = scope.get_unique_variable_name('negated_labels') - sorted_values_name = scope.get_unique_variable_name('sorted_values') - sorted_indices_name = scope.get_unique_variable_name('sorted_indices') + negate_name = scope.get_unique_variable_name("negate") + estimators_weights_name = scope.get_unique_variable_name("estimators_weights") + half_scalar_name = scope.get_unique_variable_name("half_scalar") + last_index_name = scope.get_unique_variable_name("last_index") + negated_labels_name = scope.get_unique_variable_name("negated_labels") + sorted_values_name = scope.get_unique_variable_name("sorted_values") + sorted_indices_name = scope.get_unique_variable_name("sorted_indices") array_feat_extractor_output_name = scope.get_unique_variable_name( - 'array_feat_extractor_output') - median_value_name = scope.get_unique_variable_name('median_value') - comp_value_name = scope.get_unique_variable_name('comp_value') - median_or_above_name = scope.get_unique_variable_name('median_or_above') - median_idx_name = scope.get_unique_variable_name('median_idx') - cast_result_name = scope.get_unique_variable_name('cast_result') - reshaped_weights_name = scope.get_unique_variable_name('reshaped_weights') - median_estimators_name = scope.get_unique_variable_name( - 'median_estimators') - - container.add_initializer(negate_name, proto_dtype, - [], [-1]) - container.add_initializer(estimators_weights_name, - proto_dtype, - [len(op.estimator_weights_)], - op.estimator_weights_) - container.add_initializer(half_scalar_name, proto_dtype, - [], [0.5]) - container.add_initializer(last_index_name, onnx_proto.TensorProto.INT64, - [], [len(op.estimators_) - 1]) - - concatenated_labels = _get_estimators_label(scope, operator, - container, op) - apply_mul(scope, [concatenated_labels, negate_name], - negated_labels_name, container, broadcast=1) + "array_feat_extractor_output" + ) + median_value_name = scope.get_unique_variable_name("median_value") + comp_value_name = scope.get_unique_variable_name("comp_value") + median_or_above_name = scope.get_unique_variable_name("median_or_above") + median_idx_name = scope.get_unique_variable_name("median_idx") + cast_result_name = scope.get_unique_variable_name("cast_result") + reshaped_weights_name = scope.get_unique_variable_name("reshaped_weights") + median_estimators_name = scope.get_unique_variable_name("median_estimators") + + container.add_initializer(negate_name, proto_dtype, [], [-1]) + container.add_initializer( + estimators_weights_name, + proto_dtype, + [len(op.estimator_weights_)], + op.estimator_weights_, + ) + container.add_initializer(half_scalar_name, proto_dtype, [], [0.5]) + container.add_initializer( + last_index_name, onnx_proto.TensorProto.INT64, [], [len(op.estimators_) - 1] + ) + + concatenated_labels = _get_estimators_label(scope, operator, container, op) + apply_mul( + scope, + [concatenated_labels, negate_name], + negated_labels_name, + container, + broadcast=1, + ) try: - apply_topk(scope, negated_labels_name, - [sorted_values_name, sorted_indices_name], - container, k=len(op.estimators_)) + apply_topk( + scope, + negated_labels_name, + [sorted_values_name, sorted_indices_name], + container, + k=len(op.estimators_), + ) except TypeError: # onnxconverter-common < 1.7.0 - apply_topk(scope, [negated_labels_name], - [sorted_values_name, sorted_indices_name], - container, k=len(op.estimators_)) + apply_topk( + scope, + [negated_labels_name], + [sorted_values_name, sorted_indices_name], + container, + k=len(op.estimators_), + ) container.add_node( - 'ArrayFeatureExtractor', + "ArrayFeatureExtractor", [estimators_weights_name, sorted_indices_name], - array_feat_extractor_output_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) + array_feat_extractor_output_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) apply_reshape( - scope, array_feat_extractor_output_name, reshaped_weights_name, - container, desired_shape=(-1, len(op.estimators_))) + scope, + array_feat_extractor_output_name, + reshaped_weights_name, + container, + desired_shape=(-1, len(op.estimators_)), + ) weights_cdf_name = cum_sum( - scope, container, reshaped_weights_name, - len(op.estimators_), proto_dtype) + scope, container, reshaped_weights_name, len(op.estimators_), proto_dtype + ) container.add_node( - 'ArrayFeatureExtractor', [weights_cdf_name, last_index_name], - median_value_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) - apply_mul(scope, [median_value_name, half_scalar_name], - comp_value_name, container, broadcast=1) + "ArrayFeatureExtractor", + [weights_cdf_name, last_index_name], + median_value_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) + apply_mul( + scope, + [median_value_name, half_scalar_name], + comp_value_name, + container, + broadcast=1, + ) container.add_node( - 'Less', [weights_cdf_name, comp_value_name], + "Less", + [weights_cdf_name, comp_value_name], median_or_above_name, - name=scope.get_unique_operator_name('Less')) - apply_cast(scope, median_or_above_name, cast_result_name, - container, to=proto_dtype) - container.add_node('ArgMin', cast_result_name, - median_idx_name, - name=scope.get_unique_operator_name('ArgMin'), axis=1) + name=scope.get_unique_operator_name("Less"), + ) + apply_cast(scope, median_or_above_name, cast_result_name, container, to=proto_dtype) + container.add_node( + "ArgMin", + cast_result_name, + median_idx_name, + name=scope.get_unique_operator_name("ArgMin"), + axis=1, + ) _apply_gather_elements( - scope, container, [sorted_indices_name, median_idx_name], - median_estimators_name, axis=1, dim=len(op.estimators_), - zero_type=onnx_proto.TensorProto.INT64, suffix="A") + scope, + container, + [sorted_indices_name, median_idx_name], + median_estimators_name, + axis=1, + dim=len(op.estimators_), + zero_type=onnx_proto.TensorProto.INT64, + suffix="A", + ) output_name = operator.output_full_names[0] _apply_gather_elements( - scope, container, [concatenated_labels, median_estimators_name], - output_name, axis=1, dim=len(op.estimators_), - zero_type=proto_dtype, suffix="B") - - -register_converter('SklearnAdaBoostClassifier', - convert_sklearn_ada_boost_classifier, - options={'zipmap': [True, False, 'columns'], - 'nocl': [True, False], - 'output_class_labels': [False, True], - 'raw_scores': [True, False]}) -register_converter('SklearnAdaBoostRegressor', - convert_sklearn_ada_boost_regressor) + scope, + container, + [concatenated_labels, median_estimators_name], + output_name, + axis=1, + dim=len(op.estimators_), + zero_type=proto_dtype, + suffix="B", + ) + + +register_converter( + "SklearnAdaBoostClassifier", + convert_sklearn_ada_boost_classifier, + options={ + "zipmap": [True, False, "columns"], + "nocl": [True, False], + "output_class_labels": [False, True], + "raw_scores": [True, False], + }, +) +register_converter("SklearnAdaBoostRegressor", convert_sklearn_ada_boost_regressor) diff --git a/skl2onnx/operator_converters/array_feature_extractor.py b/skl2onnx/operator_converters/array_feature_extractor.py index e26fc86bf..99b7639e6 100644 --- a/skl2onnx/operator_converters/array_feature_extractor.py +++ b/skl2onnx/operator_converters/array_feature_extractor.py @@ -8,32 +8,41 @@ def convert_sklearn_array_feature_extractor( - scope: Scope, operator: Operator, container: ModelComponentContainer): + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Extracts a subset of columns. This is used by *ColumnTransformer*. """ - column_indices_name = scope.get_unique_variable_name('column_indices') + column_indices_name = scope.get_unique_variable_name("column_indices") for i, ind in enumerate(operator.column_indices): if not isinstance(ind, int): - raise RuntimeError(("Column {0}:'{1}' indices must be specified " - "as integers. This error may happen when " - "column names are used to define a " - "ColumnTransformer. Column name in input data " - "do not necessarily match input variables " - "defined for the ONNX model.").format(i, ind)) - container.add_initializer(column_indices_name, - onnx_proto.TensorProto.INT64, - [len(operator.column_indices)], - operator.column_indices) + raise RuntimeError( + ( + "Column {0}:'{1}' indices must be specified " + "as integers. This error may happen when " + "column names are used to define a " + "ColumnTransformer. Column name in input data " + "do not necessarily match input variables " + "defined for the ONNX model." + ).format(i, ind) + ) + container.add_initializer( + column_indices_name, + onnx_proto.TensorProto.INT64, + [len(operator.column_indices)], + operator.column_indices, + ) container.add_node( - 'ArrayFeatureExtractor', + "ArrayFeatureExtractor", [operator.inputs[0].full_name, column_indices_name], operator.outputs[0].full_name, - name=scope.get_unique_operator_name('ArrayFeatureExtractor'), - op_domain='ai.onnx.ml') + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + op_domain="ai.onnx.ml", + ) -register_converter('SklearnArrayFeatureExtractor', - convert_sklearn_array_feature_extractor) +register_converter( + "SklearnArrayFeatureExtractor", convert_sklearn_array_feature_extractor +) diff --git a/skl2onnx/operator_converters/bagging.py b/skl2onnx/operator_converters/bagging.py index afdd5f973..2146ae972 100644 --- a/skl2onnx/operator_converters/bagging.py +++ b/skl2onnx/operator_converters/bagging.py @@ -4,9 +4,7 @@ import numpy as np from .._supported_operators import sklearn_operator_name_map from ..common.data_types import Int64TensorType -from ..common._apply_operation import ( - apply_cast, apply_concat, - apply_div, apply_reshape) +from ..common._apply_operation import apply_cast, apply_concat, apply_div, apply_reshape from ..common._registration import register_converter from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer @@ -21,169 +19,238 @@ def _calculate_proba(scope, operator, container, model): final_proba_name = operator.outputs[1].full_name proba_list = [] options = container.get_options(model, dict(raw_scores=False)) - use_raw_scores = options['raw_scores'] - has_proba = (hasattr(model.estimators_[0], 'predict_proba') - or (use_raw_scores and hasattr( - model.estimators_[0], 'decision_function'))) + use_raw_scores = options["raw_scores"] + has_proba = hasattr(model.estimators_[0], "predict_proba") or ( + use_raw_scores and hasattr(model.estimators_[0], "decision_function") + ) for index, estimator in enumerate(model.estimators_): op_type = sklearn_operator_name_map[type(estimator)] this_operator = scope.declare_local_operator(op_type, estimator) - if container.has_options(estimator, 'raw_scores'): - container.add_options( - id(estimator), {'raw_scores': use_raw_scores}) - scope.add_options(id(estimator), {'raw_scores': use_raw_scores}) + if container.has_options(estimator, "raw_scores"): + container.add_options(id(estimator), {"raw_scores": use_raw_scores}) + scope.add_options(id(estimator), {"raw_scores": use_raw_scores}) - label_name = scope.declare_local_variable( - 'label_%d' % index, Int64TensorType()) + label_name = scope.declare_local_variable("label_%d" % index, Int64TensorType()) proba_name = scope.declare_local_variable( - 'proba_%d' % index, operator.inputs[0].type.__class__()) + "proba_%d" % index, operator.inputs[0].type.__class__() + ) features = model.estimators_features_[index] - n_features = (model.n_features_in_ if hasattr(model, 'n_features_in_') - else model.n_features_) - if (len(features) == n_features and - list(features) == list(range(n_features))): + n_features = ( + model.n_features_in_ + if hasattr(model, "n_features_in_") + else model.n_features_ + ) + if len(features) == n_features and list(features) == list(range(n_features)): this_operator.inputs = operator.inputs else: # subset of features feat_name = scope.declare_local_variable( - 'fsel_%d' % index, operator.inputs[0].type.__class__()) - index_name = scope.get_unique_variable_name( - 'index_name_%d' % index) + "fsel_%d" % index, operator.inputs[0].type.__class__() + ) + index_name = scope.get_unique_variable_name("index_name_%d" % index) container.add_initializer( - index_name, onnx_proto.TensorProto.INT64, - (len(features), ), list(features)) + index_name, + onnx_proto.TensorProto.INT64, + (len(features),), + list(features), + ) container.add_node( - 'Gather', [operator.inputs[0].full_name, index_name], + "Gather", + [operator.inputs[0].full_name, index_name], [feat_name.full_name], - name=scope.get_unique_operator_name('GatherBG'), axis=1) + name=scope.get_unique_operator_name("GatherBG"), + axis=1, + ) this_operator.inputs.append(feat_name) this_operator.outputs.append(label_name) this_operator.outputs.append(proba_name) - proba_output_name = (proba_name.onnx_name if has_proba - else label_name.onnx_name) + proba_output_name = proba_name.onnx_name if has_proba else label_name.onnx_name reshape_dim_val = len(model.classes_) if has_proba else 1 - reshaped_proba_name = scope.get_unique_variable_name('reshaped_proba') - apply_reshape(scope, proba_output_name, reshaped_proba_name, - container, desired_shape=(1, -1, reshape_dim_val)) + reshaped_proba_name = scope.get_unique_variable_name("reshaped_proba") + apply_reshape( + scope, + proba_output_name, + reshaped_proba_name, + container, + desired_shape=(1, -1, reshape_dim_val), + ) proba_list.append(reshaped_proba_name) - merged_proba_name = scope.get_unique_variable_name('merged_proba') - apply_concat(scope, proba_list, - merged_proba_name, container, axis=0) + merged_proba_name = scope.get_unique_variable_name("merged_proba") + apply_concat(scope, proba_list, merged_proba_name, container, axis=0) if has_proba: if container.target_opset >= 18: - axis_name = scope.get_unique_variable_name('axis') - container.add_initializer( - axis_name, onnx_proto.TensorProto.INT64, [1], [0]) + axis_name = scope.get_unique_variable_name("axis") + container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [0]) container.add_node( - 'ReduceMean', [merged_proba_name, axis_name], + "ReduceMean", + [merged_proba_name, axis_name], final_proba_name, - name=scope.get_unique_operator_name('ReduceMean'), - keepdims=0) + name=scope.get_unique_operator_name("ReduceMean"), + keepdims=0, + ) else: container.add_node( - 'ReduceMean', merged_proba_name, + "ReduceMean", + merged_proba_name, final_proba_name, - name=scope.get_unique_operator_name('ReduceMean'), - axes=[0], keepdims=0) + name=scope.get_unique_operator_name("ReduceMean"), + axes=[0], + keepdims=0, + ) else: - n_estimators_name = scope.get_unique_variable_name('n_estimators') - class_labels_name = scope.get_unique_variable_name('class_labels') - equal_result_name = scope.get_unique_variable_name('equal_result') - cast_output_name = scope.get_unique_variable_name('cast_output') - reduced_proba_name = scope.get_unique_variable_name('reduced_proba') + n_estimators_name = scope.get_unique_variable_name("n_estimators") + class_labels_name = scope.get_unique_variable_name("class_labels") + equal_result_name = scope.get_unique_variable_name("equal_result") + cast_output_name = scope.get_unique_variable_name("cast_output") + reduced_proba_name = scope.get_unique_variable_name("reduced_proba") container.add_initializer( - n_estimators_name, onnx_proto.TensorProto.FLOAT, [], - [len(model.estimators_)]) + n_estimators_name, + onnx_proto.TensorProto.FLOAT, + [], + [len(model.estimators_)], + ) container.add_initializer( - class_labels_name, onnx_proto.TensorProto.INT64, + class_labels_name, + onnx_proto.TensorProto.INT64, [1, 1, len(model.estimators_[0].classes_)], - model.estimators_[0].classes_) + model.estimators_[0].classes_, + ) - container.add_node('Equal', [class_labels_name, merged_proba_name], - equal_result_name, - name=scope.get_unique_operator_name('Equal')) - apply_cast(scope, equal_result_name, cast_output_name, - container, to=onnx_proto.TensorProto.FLOAT) + container.add_node( + "Equal", + [class_labels_name, merged_proba_name], + equal_result_name, + name=scope.get_unique_operator_name("Equal"), + ) + apply_cast( + scope, + equal_result_name, + cast_output_name, + container, + to=onnx_proto.TensorProto.FLOAT, + ) if container.target_opset < 13: container.add_node( - 'ReduceSum', cast_output_name, reduced_proba_name, - name=scope.get_unique_operator_name('ReduceSum'), - axes=[0], keepdims=0) + "ReduceSum", + cast_output_name, + reduced_proba_name, + name=scope.get_unique_operator_name("ReduceSum"), + axes=[0], + keepdims=0, + ) else: - axis_name = scope.get_unique_variable_name('axis') - container.add_initializer( - axis_name, onnx_proto.TensorProto.INT64, [1], [1]) + axis_name = scope.get_unique_variable_name("axis") + container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [1]) container.add_node( - 'ReduceSum', [cast_output_name, axis_name], - reduced_proba_name, keepdims=0, - name=scope.get_unique_operator_name('ReduceSum')) - apply_div(scope, [reduced_proba_name, n_estimators_name], - final_proba_name, container, broadcast=1) + "ReduceSum", + [cast_output_name, axis_name], + reduced_proba_name, + keepdims=0, + name=scope.get_unique_operator_name("ReduceSum"), + ) + apply_div( + scope, + [reduced_proba_name, n_estimators_name], + final_proba_name, + container, + broadcast=1, + ) return final_proba_name -def convert_sklearn_bagging_classifier(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_bagging_classifier( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converter for BaggingClassifier. """ - if scope.get_options(operator.raw_operator, dict(nocl=False))['nocl']: + if scope.get_options(operator.raw_operator, dict(nocl=False))["nocl"]: raise RuntimeError( "Option 'nocl' is not implemented for operator '{}'.".format( - operator.raw_operator.__class__.__name__)) + operator.raw_operator.__class__.__name__ + ) + ) bagging_op = operator.raw_operator classes = bagging_op.classes_ output_shape = (-1,) - classes_name = scope.get_unique_variable_name('classes') - argmax_output_name = scope.get_unique_variable_name('argmax_output') + classes_name = scope.get_unique_variable_name("classes") + argmax_output_name = scope.get_unique_variable_name("argmax_output") array_feature_extractor_result_name = scope.get_unique_variable_name( - 'array_feature_extractor_result') + "array_feature_extractor_result" + ) class_type = onnx_proto.TensorProto.STRING - if (np.issubdtype(bagging_op.classes_.dtype, np.floating) or - bagging_op.classes_.dtype == np.bool_): + if ( + np.issubdtype(bagging_op.classes_.dtype, np.floating) + or bagging_op.classes_.dtype == np.bool_ + ): class_type = onnx_proto.TensorProto.INT32 classes = classes.astype(np.int32) elif np.issubdtype(bagging_op.classes_.dtype, np.signedinteger): class_type = onnx_proto.TensorProto.INT32 else: - classes = np.array([s.encode('utf-8') for s in classes]) + classes = np.array([s.encode("utf-8") for s in classes]) container.add_initializer(classes_name, class_type, classes.shape, classes) proba_name = _calculate_proba(scope, operator, container, bagging_op) container.add_node( - 'ArgMax', proba_name, argmax_output_name, - name=scope.get_unique_operator_name('ArgMax'), axis=1) + "ArgMax", + proba_name, + argmax_output_name, + name=scope.get_unique_operator_name("ArgMax"), + axis=1, + ) container.add_node( - 'ArrayFeatureExtractor', [classes_name, argmax_output_name], - array_feature_extractor_result_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) + "ArrayFeatureExtractor", + [classes_name, argmax_output_name], + array_feature_extractor_result_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) if class_type == onnx_proto.TensorProto.INT32: - cast_result_name = scope.get_unique_variable_name('cast_result') - reshaped_result_name = scope.get_unique_variable_name( - 'reshaped_result') - apply_cast(scope, array_feature_extractor_result_name, - cast_result_name, container, - to=onnx_proto.TensorProto.INT64) - apply_reshape(scope, cast_result_name, reshaped_result_name, - container, desired_shape=output_shape) - apply_cast(scope, reshaped_result_name, operator.outputs[0].full_name, - container, to=onnx_proto.TensorProto.INT64) + cast_result_name = scope.get_unique_variable_name("cast_result") + reshaped_result_name = scope.get_unique_variable_name("reshaped_result") + apply_cast( + scope, + array_feature_extractor_result_name, + cast_result_name, + container, + to=onnx_proto.TensorProto.INT64, + ) + apply_reshape( + scope, + cast_result_name, + reshaped_result_name, + container, + desired_shape=output_shape, + ) + apply_cast( + scope, + reshaped_result_name, + operator.outputs[0].full_name, + container, + to=onnx_proto.TensorProto.INT64, + ) else: # string labels - apply_reshape(scope, array_feature_extractor_result_name, - operator.outputs[0].full_name, container, - desired_shape=output_shape) + apply_reshape( + scope, + array_feature_extractor_result_name, + operator.outputs[0].full_name, + container, + desired_shape=output_shape, + ) -def convert_sklearn_bagging_regressor(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_bagging_regressor( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converter for BaggingRegressor. """ @@ -194,59 +261,79 @@ def convert_sklearn_bagging_regressor(scope: Scope, operator: Operator, this_operator = scope.declare_local_operator(op_type, estimator) features = bagging_op.estimators_features_[index] - n_features = (bagging_op.n_features_in_ - if hasattr(bagging_op, 'n_features_in_') - else bagging_op.n_features_) - if (len(features) == n_features and - list(features) == list(range(n_features))): + n_features = ( + bagging_op.n_features_in_ + if hasattr(bagging_op, "n_features_in_") + else bagging_op.n_features_ + ) + if len(features) == n_features and list(features) == list(range(n_features)): this_operator.inputs = operator.inputs else: # subset of features feat_name = scope.declare_local_variable( - 'fsel_%d' % index, operator.inputs[0].type.__class__()) - index_name = scope.get_unique_variable_name('index_name') + "fsel_%d" % index, operator.inputs[0].type.__class__() + ) + index_name = scope.get_unique_variable_name("index_name") container.add_initializer( - index_name, onnx_proto.TensorProto.INT64, - (len(features), ), list(features)) + index_name, + onnx_proto.TensorProto.INT64, + (len(features),), + list(features), + ) container.add_node( - 'Gather', [operator.inputs[0].full_name, index_name], + "Gather", + [operator.inputs[0].full_name, index_name], [feat_name.full_name], - name=scope.get_unique_operator_name('GatherBG'), axis=1) + name=scope.get_unique_operator_name("GatherBG"), + axis=1, + ) this_operator.inputs.append(feat_name) label_name = scope.declare_local_variable( - 'variable_%d' % index, this_operator.inputs[0].type.__class__()) + "variable_%d" % index, this_operator.inputs[0].type.__class__() + ) this_operator.outputs.append(label_name) - reshaped_proba_name = scope.get_unique_variable_name('reshaped_proba') - apply_reshape(scope, label_name.onnx_name, reshaped_proba_name, - container, desired_shape=(1, -1, 1)) + reshaped_proba_name = scope.get_unique_variable_name("reshaped_proba") + apply_reshape( + scope, + label_name.onnx_name, + reshaped_proba_name, + container, + desired_shape=(1, -1, 1), + ) proba_list.append(reshaped_proba_name) - merged_proba_name = scope.get_unique_variable_name('merged_proba') - apply_concat(scope, proba_list, - merged_proba_name, container, axis=0) + merged_proba_name = scope.get_unique_variable_name("merged_proba") + apply_concat(scope, proba_list, merged_proba_name, container, axis=0) if container.target_opset >= 18: - axis_name = scope.get_unique_variable_name('axis') - container.add_initializer( - axis_name, onnx_proto.TensorProto.INT64, [1], [0]) + axis_name = scope.get_unique_variable_name("axis") + container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [0]) container.add_node( - 'ReduceMean', [merged_proba_name, axis_name], + "ReduceMean", + [merged_proba_name, axis_name], operator.outputs[0].full_name, - name=scope.get_unique_operator_name('ReduceMean'), - keepdims=0) + name=scope.get_unique_operator_name("ReduceMean"), + keepdims=0, + ) else: container.add_node( - 'ReduceMean', merged_proba_name, + "ReduceMean", + merged_proba_name, operator.outputs[0].full_name, - name=scope.get_unique_operator_name('ReduceMean'), - axes=[0], keepdims=0) - - -register_converter('SklearnBaggingClassifier', - convert_sklearn_bagging_classifier, - options={'zipmap': [True, False, 'columns'], - 'nocl': [True, False], - 'output_class_labels': [False, True], - 'raw_scores': [True, False]}) -register_converter('SklearnBaggingRegressor', - convert_sklearn_bagging_regressor) + name=scope.get_unique_operator_name("ReduceMean"), + axes=[0], + keepdims=0, + ) + + +register_converter( + "SklearnBaggingClassifier", + convert_sklearn_bagging_classifier, + options={ + "zipmap": [True, False, "columns"], + "nocl": [True, False], + "output_class_labels": [False, True], + "raw_scores": [True, False], + }, +) +register_converter("SklearnBaggingRegressor", convert_sklearn_bagging_regressor) diff --git a/skl2onnx/operator_converters/binariser.py b/skl2onnx/operator_converters/binariser.py index d22687321..478d4c601 100644 --- a/skl2onnx/operator_converters/binariser.py +++ b/skl2onnx/operator_converters/binariser.py @@ -9,37 +9,47 @@ from .common import concatenate_variables -def convert_sklearn_binarizer(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_binarizer( + scope: Scope, operator: Operator, container: ModelComponentContainer +): feature_name = concatenate_variables(scope, operator.inputs, container) if isinstance(operator.inputs[0].type, DoubleTensorType): - name0 = scope.get_unique_variable_name('cst0') - name1 = scope.get_unique_variable_name('cst1') - thres = scope.get_unique_variable_name('th') + name0 = scope.get_unique_variable_name("cst0") + name1 = scope.get_unique_variable_name("cst1") + thres = scope.get_unique_variable_name("th") + container.add_initializer(name0, onnx_proto.TensorProto.DOUBLE, [], [0.0]) + container.add_initializer(name1, onnx_proto.TensorProto.DOUBLE, [], [1.0]) container.add_initializer( - name0, onnx_proto.TensorProto.DOUBLE, [], [0.]) - container.add_initializer( - name1, onnx_proto.TensorProto.DOUBLE, [], [1.]) - container.add_initializer( - thres, onnx_proto.TensorProto.DOUBLE, [], - [float(operator.raw_operator.threshold)]) - binbool = scope.get_unique_variable_name('binbool') + thres, + onnx_proto.TensorProto.DOUBLE, + [], + [float(operator.raw_operator.threshold)], + ) + binbool = scope.get_unique_variable_name("binbool") container.add_node( - 'Less', [feature_name, thres], binbool, - name=scope.get_unique_operator_name('Less')) + "Less", + [feature_name, thres], + binbool, + name=scope.get_unique_operator_name("Less"), + ) container.add_node( - 'Where', [binbool, name0, name1], operator.output_full_names, - name='Where') + "Where", [binbool, name0, name1], operator.output_full_names, name="Where" + ) return - op_type = 'Binarizer' + op_type = "Binarizer" attrs = { - 'name': scope.get_unique_operator_name(op_type), - 'threshold': float(operator.raw_operator.threshold) + "name": scope.get_unique_operator_name(op_type), + "threshold": float(operator.raw_operator.threshold), } - container.add_node(op_type, feature_name, operator.output_full_names, - op_domain='ai.onnx.ml', **attrs) + container.add_node( + op_type, + feature_name, + operator.output_full_names, + op_domain="ai.onnx.ml", + **attrs + ) -register_converter('SklearnBinarizer', convert_sklearn_binarizer) +register_converter("SklearnBinarizer", convert_sklearn_binarizer) diff --git a/skl2onnx/operator_converters/calibrated_classifier_cv.py b/skl2onnx/operator_converters/calibrated_classifier_cv.py index ccab3eb83..808958b95 100644 --- a/skl2onnx/operator_converters/calibrated_classifier_cv.py +++ b/skl2onnx/operator_converters/calibrated_classifier_cv.py @@ -4,55 +4,77 @@ import numpy as np from onnx import TensorProto from ..common._apply_operation import ( - apply_abs, apply_add, apply_cast, apply_concat, apply_clip, - apply_div, apply_exp, apply_mul, apply_reshape, apply_sub) + apply_abs, + apply_add, + apply_cast, + apply_concat, + apply_clip, + apply_div, + apply_exp, + apply_mul, + apply_reshape, + apply_sub, +) from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer -from ..common.data_types import ( - guess_numpy_type, Int64TensorType, guess_proto_type) +from ..common.data_types import guess_numpy_type, Int64TensorType, guess_proto_type from ..common._registration import register_converter from .._supported_operators import sklearn_operator_name_map from sklearn.ensemble import RandomForestClassifier -def _handle_zeros(scope, container, concatenated_prob_name, - reduced_prob_name, n_classes, proto_type): +def _handle_zeros( + scope, container, concatenated_prob_name, reduced_prob_name, n_classes, proto_type +): """ This function replaces 0s in concatenated_prob_name with 1s and 0s in reduced_prob_name with n_classes. """ - cast_prob_name = scope.get_unique_variable_name('cast_prob') - bool_not_cast_prob_name = scope.get_unique_variable_name( - 'bool_not_cast_prob') - mask_name = scope.get_unique_variable_name('mask') + cast_prob_name = scope.get_unique_variable_name("cast_prob") + bool_not_cast_prob_name = scope.get_unique_variable_name("bool_not_cast_prob") + mask_name = scope.get_unique_variable_name("mask") masked_concatenated_prob_name = scope.get_unique_variable_name( - 'masked_concatenated_prob') - n_classes_name = scope.get_unique_variable_name('n_classes') - reduced_prob_mask_name = scope.get_unique_variable_name( - 'reduced_prob_mask') - masked_reduced_prob_name = scope.get_unique_variable_name( - 'masked_reduced_prob') + "masked_concatenated_prob" + ) + n_classes_name = scope.get_unique_variable_name("n_classes") + reduced_prob_mask_name = scope.get_unique_variable_name("reduced_prob_mask") + masked_reduced_prob_name = scope.get_unique_variable_name("masked_reduced_prob") proto_type2 = proto_type if proto_type2 not in (TensorProto.FLOAT, TensorProto.DOUBLE): proto_type2 = TensorProto.FLOAT - container.add_initializer(n_classes_name, proto_type2, - [], [n_classes]) - - apply_cast(scope, reduced_prob_name, cast_prob_name, container, - to=TensorProto.BOOL) - container.add_node('Not', cast_prob_name, - bool_not_cast_prob_name, - name=scope.get_unique_operator_name('Not')) - apply_cast(scope, bool_not_cast_prob_name, mask_name, container, - to=proto_type2) - apply_add(scope, [concatenated_prob_name, mask_name], - masked_concatenated_prob_name, container, broadcast=1) - apply_mul(scope, [mask_name, n_classes_name], reduced_prob_mask_name, - container, broadcast=1) - apply_add(scope, [reduced_prob_name, reduced_prob_mask_name], - masked_reduced_prob_name, container, broadcast=0) + container.add_initializer(n_classes_name, proto_type2, [], [n_classes]) + + apply_cast(scope, reduced_prob_name, cast_prob_name, container, to=TensorProto.BOOL) + container.add_node( + "Not", + cast_prob_name, + bool_not_cast_prob_name, + name=scope.get_unique_operator_name("Not"), + ) + apply_cast(scope, bool_not_cast_prob_name, mask_name, container, to=proto_type2) + apply_add( + scope, + [concatenated_prob_name, mask_name], + masked_concatenated_prob_name, + container, + broadcast=1, + ) + apply_mul( + scope, + [mask_name, n_classes_name], + reduced_prob_mask_name, + container, + broadcast=1, + ) + apply_add( + scope, + [reduced_prob_name, reduced_prob_mask_name], + masked_reduced_prob_name, + container, + broadcast=0, + ) return masked_concatenated_prob_name, masked_reduced_prob_name @@ -60,48 +82,53 @@ def _transform_sigmoid(scope, container, model, df_col_name, k, proto_type): """ Sigmoid Calibration method """ - a_name = scope.get_unique_variable_name('a') - b_name = scope.get_unique_variable_name('b') - a_df_prod_name = scope.get_unique_variable_name('a_df_prod') - exp_parameter_name = scope.get_unique_variable_name( - 'exp_parameter') - exp_result_name = scope.get_unique_variable_name('exp_result') - unity_name = scope.get_unique_variable_name('unity') - denominator_name = scope.get_unique_variable_name('denominator') + a_name = scope.get_unique_variable_name("a") + b_name = scope.get_unique_variable_name("b") + a_df_prod_name = scope.get_unique_variable_name("a_df_prod") + exp_parameter_name = scope.get_unique_variable_name("exp_parameter") + exp_result_name = scope.get_unique_variable_name("exp_result") + unity_name = scope.get_unique_variable_name("unity") + denominator_name = scope.get_unique_variable_name("denominator") sigmoid_predict_result_name = scope.get_unique_variable_name( - 'sigmoid_predict_result') + "sigmoid_predict_result" + ) proto_type2 = proto_type if proto_type2 not in (TensorProto.FLOAT, TensorProto.DOUBLE): proto_type2 = TensorProto.FLOAT - if hasattr(model, 'calibrators_'): + if hasattr(model, "calibrators_"): # scikit-learn<1.1 calibrators = model.calibrators_ - elif hasattr(model, 'calibrators'): + elif hasattr(model, "calibrators"): # scikit-learn>=1.1 calibrators = model.calibrators else: raise AttributeError( "Unable to find attribute calibrators_ or " "calibrators, check the model was trained, " - "type=%r." % type(model)) + "type=%r." % type(model) + ) - container.add_initializer(a_name, proto_type2, - [], [calibrators[k].a_]) - container.add_initializer(b_name, proto_type2, - [], [calibrators[k].b_]) + container.add_initializer(a_name, proto_type2, [], [calibrators[k].a_]) + container.add_initializer(b_name, proto_type2, [], [calibrators[k].b_]) container.add_initializer(unity_name, proto_type2, [], [1]) - apply_mul(scope, [a_name, df_col_name], a_df_prod_name, container, - broadcast=0) - apply_add(scope, [a_df_prod_name, b_name], exp_parameter_name, - container, broadcast=0) + apply_mul(scope, [a_name, df_col_name], a_df_prod_name, container, broadcast=0) + apply_add( + scope, [a_df_prod_name, b_name], exp_parameter_name, container, broadcast=0 + ) apply_exp(scope, exp_parameter_name, exp_result_name, container) - apply_add(scope, [unity_name, exp_result_name], denominator_name, - container, broadcast=0) - apply_div(scope, [unity_name, denominator_name], - sigmoid_predict_result_name, container, broadcast=0) + apply_add( + scope, [unity_name, exp_result_name], denominator_name, container, broadcast=0 + ) + apply_div( + scope, + [unity_name, denominator_name], + sigmoid_predict_result_name, + container, + broadcast=0, + ) return sigmoid_predict_result_name @@ -112,86 +139,104 @@ def _transform_isotonic(scope, container, model, T, k, dtype, proto_type): ArrayFeatureExtractor can only extract based on the last axis, so we can't fetch different columns for different rows. """ - if hasattr(model, 'calibrators_'): + if hasattr(model, "calibrators_"): # scikit-learn<1.1 calibrators = model.calibrators_ - elif hasattr(model, 'calibrators'): + elif hasattr(model, "calibrators"): # scikit-learn>=1.1 calibrators = model.calibrators else: raise AttributeError( "Unable to find attribute calibrators_ or " "calibrators, check the model was trained, " - "type=%r." % type(model)) - - if calibrators[k].out_of_bounds == 'clip': - clipped_df_name = scope.get_unique_variable_name('clipped_df') - apply_clip(scope, T, clipped_df_name, container, - operator_name=scope.get_unique_operator_name('Clip'), - max=np.array(calibrators[k].X_max_, dtype=dtype), - min=np.array(calibrators[k].X_min_, dtype=dtype)) + "type=%r." % type(model) + ) + + if calibrators[k].out_of_bounds == "clip": + clipped_df_name = scope.get_unique_variable_name("clipped_df") + apply_clip( + scope, + T, + clipped_df_name, + container, + operator_name=scope.get_unique_operator_name("Clip"), + max=np.array(calibrators[k].X_max_, dtype=dtype), + min=np.array(calibrators[k].X_min_, dtype=dtype), + ) T = clipped_df_name - reshaped_df_name = scope.get_unique_variable_name('reshaped_df') - calibrator_x_name = scope.get_unique_variable_name('calibrator_x') - calibrator_y_name = scope.get_unique_variable_name('calibrator_y') - distance_name = scope.get_unique_variable_name('distance') - absolute_distance_name = scope.get_unique_variable_name( - 'absolute_distance') - nearest_x_index_name = scope.get_unique_variable_name( - 'nearest_x_index') - nearest_y_name = scope.get_unique_variable_name('nearest_y') - - if hasattr(calibrators[k], '_X_'): - atX, atY = '_X_', '_y_' - elif hasattr(calibrators[k], '_necessary_X_'): - atX, atY = '_necessary_X_', '_necessary_y_' - elif hasattr(calibrators[k], 'X_thresholds_'): - atX, atY = 'X_thresholds_', 'y_thresholds_' + reshaped_df_name = scope.get_unique_variable_name("reshaped_df") + calibrator_x_name = scope.get_unique_variable_name("calibrator_x") + calibrator_y_name = scope.get_unique_variable_name("calibrator_y") + distance_name = scope.get_unique_variable_name("distance") + absolute_distance_name = scope.get_unique_variable_name("absolute_distance") + nearest_x_index_name = scope.get_unique_variable_name("nearest_x_index") + nearest_y_name = scope.get_unique_variable_name("nearest_y") + + if hasattr(calibrators[k], "_X_"): + atX, atY = "_X_", "_y_" + elif hasattr(calibrators[k], "_necessary_X_"): + atX, atY = "_necessary_X_", "_necessary_y_" + elif hasattr(calibrators[k], "X_thresholds_"): + atX, atY = "X_thresholds_", "y_thresholds_" else: raise AttributeError( "Unable to find attribute '_X_' or '_necessary_X_' " "for type {}\n{}." - "".format(type(calibrators[k]), - pprint.pformat(dir(calibrators[k])))) + "".format(type(calibrators[k]), pprint.pformat(dir(calibrators[k]))) + ) proto_type2 = proto_type if proto_type2 not in (TensorProto.FLOAT, TensorProto.DOUBLE): proto_type2 = TensorProto.FLOAT container.add_initializer( - calibrator_x_name, proto_type2, + calibrator_x_name, + proto_type2, [len(getattr(calibrators[k], atX))], - getattr(calibrators[k], atX)) + getattr(calibrators[k], atX), + ) container.add_initializer( - calibrator_y_name, proto_type2, + calibrator_y_name, + proto_type2, [len(getattr(calibrators[k], atY))], - getattr(calibrators[k], atY)) - - apply_reshape(scope, T, reshaped_df_name, container, - desired_shape=(-1, 1)) - apply_sub(scope, [reshaped_df_name, calibrator_x_name], - distance_name, container, broadcast=1) + getattr(calibrators[k], atY), + ) + + apply_reshape(scope, T, reshaped_df_name, container, desired_shape=(-1, 1)) + apply_sub( + scope, + [reshaped_df_name, calibrator_x_name], + distance_name, + container, + broadcast=1, + ) apply_abs(scope, distance_name, absolute_distance_name, container) - container.add_node('ArgMin', absolute_distance_name, - nearest_x_index_name, axis=1, - name=scope.get_unique_operator_name('ArgMin')) container.add_node( - 'ArrayFeatureExtractor', + "ArgMin", + absolute_distance_name, + nearest_x_index_name, + axis=1, + name=scope.get_unique_operator_name("ArgMin"), + ) + container.add_node( + "ArrayFeatureExtractor", [calibrator_y_name, nearest_x_index_name], - nearest_y_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) - - nearest_y_name_reshaped = scope.get_unique_variable_name( - 'nearest_y_name_reshaped') - apply_reshape(scope, nearest_y_name, - nearest_y_name_reshaped, container, - desired_shape=(-1, 1)) + nearest_y_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) + + nearest_y_name_reshaped = scope.get_unique_variable_name("nearest_y_name_reshaped") + apply_reshape( + scope, nearest_y_name, nearest_y_name_reshaped, container, desired_shape=(-1, 1) + ) return nearest_y_name_reshaped -def convert_calibrated_classifier_base_estimator(scope, operator, container, - model, model_index): +def convert_calibrated_classifier_base_estimator( + scope, operator, container, model, model_index +): # Computational graph: # # In the following graph, variable names are in lower case characters only @@ -281,10 +326,12 @@ def convert_calibrated_classifier_base_estimator(scope, operator, container, # class_prob_tensor [M, C] <--' model_proba = {RandomForestClassifier} - if scope.get_options(operator.raw_operator, dict(nocl=False))['nocl']: + if scope.get_options(operator.raw_operator, dict(nocl=False))["nocl"]: raise RuntimeError( "Option 'nocl' is not implemented for operator '{}'.".format( - operator.raw_operator.__class__.__name__)) + operator.raw_operator.__class__.__name__ + ) + ) proto_type = guess_proto_type(operator.inputs[0].type) proto_type2 = proto_type if proto_type2 not in (TensorProto.FLOAT, TensorProto.DOUBLE): @@ -293,22 +340,27 @@ def convert_calibrated_classifier_base_estimator(scope, operator, container, if dtype != np.float64: dtype = np.float32 - base_model = (model.estimator if hasattr(model, 'estimator') - else model.base_estimator) + base_model = ( + model.estimator if hasattr(model, "estimator") else model.base_estimator + ) op_type = sklearn_operator_name_map[type(base_model)] - n_classes = (len(model.classes_) if hasattr(model, 'classes_') else - len(base_model.classes_)) + n_classes = ( + len(model.classes_) if hasattr(model, "classes_") else len(base_model.classes_) + ) prob_name = [None] * n_classes this_operator = scope.declare_local_operator(op_type, base_model) - if (container.has_options(base_model, 'raw_scores') and - not type(base_model) in model_proba): - container.add_options(id(base_model), {'raw_scores': True}) - scope.add_options(id(base_model), {'raw_scores': True}) + if ( + container.has_options(base_model, "raw_scores") + and type(base_model) not in model_proba + ): + container.add_options(id(base_model), {"raw_scores": True}) + scope.add_options(id(base_model), {"raw_scores": True}) this_operator.inputs = operator.inputs - label_name = scope.declare_local_variable('label', Int64TensorType()) + label_name = scope.declare_local_variable("label", Int64TensorType()) df_name = scope.declare_local_variable( - 'uncal_probability', operator.inputs[0].type.__class__()) + "uncal_probability", operator.inputs[0].type.__class__() + ) this_operator.outputs.append(label_name) this_operator.outputs.append(df_name) df_inp = df_name.full_name @@ -317,80 +369,105 @@ def convert_calibrated_classifier_base_estimator(scope, operator, container, cur_k = k if n_classes == 2: cur_k += 1 - k_name = scope.get_unique_variable_name('k') + k_name = scope.get_unique_variable_name("k") df_col_name = scope.get_unique_variable_name( - 'tdf_col_%d_c%d' % (model_index, k)) + "tdf_col_%d_c%d" % (model_index, k) + ) prob_name[k] = scope.get_unique_variable_name( - 'prob_{}_c{}'.format(model_index, k)) + "prob_{}_c{}".format(model_index, k) + ) container.add_initializer(k_name, TensorProto.INT64, [], [cur_k]) container.add_node( - 'ArrayFeatureExtractor', [df_inp, k_name], df_col_name, - name=scope.get_unique_operator_name( - 'CaliAFE_%d_c%d' % (model_index, k)), - op_domain='ai.onnx.ml') - if model.method == 'sigmoid': - T = _transform_sigmoid(scope, container, model, df_col_name, k, - proto_type) + "ArrayFeatureExtractor", + [df_inp, k_name], + df_col_name, + name=scope.get_unique_operator_name("CaliAFE_%d_c%d" % (model_index, k)), + op_domain="ai.onnx.ml", + ) + if model.method == "sigmoid": + T = _transform_sigmoid(scope, container, model, df_col_name, k, proto_type) else: - T = _transform_isotonic(scope, container, model, df_col_name, - k, dtype, proto_type) + T = _transform_isotonic( + scope, container, model, df_col_name, k, dtype, proto_type + ) prob_name[k] = T if n_classes == 2: break if n_classes == 2: - zeroth_col_name = scope.get_unique_variable_name( - 'zeroth_col%d' % model_index) - merged_prob_name = scope.get_unique_variable_name( - 'merged_prob%d' % model_index) + zeroth_col_name = scope.get_unique_variable_name("zeroth_col%d" % model_index) + merged_prob_name = scope.get_unique_variable_name("merged_prob%d" % model_index) unit_float_tensor_name = scope.get_unique_variable_name( - 'unit_float_tensor%d' % model_index) - - container.add_initializer(unit_float_tensor_name, - proto_type2, [], [1.0]) - - apply_sub(scope, [unit_float_tensor_name, prob_name[0]], - zeroth_col_name, container, broadcast=1) - apply_concat(scope, [zeroth_col_name, prob_name[0]], - merged_prob_name, container, axis=1, - operator_name=scope.get_unique_variable_name( - 'CaliConc%d' % model_index)) + "unit_float_tensor%d" % model_index + ) + + container.add_initializer(unit_float_tensor_name, proto_type2, [], [1.0]) + + apply_sub( + scope, + [unit_float_tensor_name, prob_name[0]], + zeroth_col_name, + container, + broadcast=1, + ) + apply_concat( + scope, + [zeroth_col_name, prob_name[0]], + merged_prob_name, + container, + axis=1, + operator_name=scope.get_unique_variable_name("CaliConc%d" % model_index), + ) class_prob_tensor_name = merged_prob_name else: - concatenated_prob_name = scope.get_unique_variable_name( - 'concatenated_prob') - reduced_prob_name = scope.get_unique_variable_name('reduced_prob') - calc_prob_name = scope.get_unique_variable_name('calc_prob') + concatenated_prob_name = scope.get_unique_variable_name("concatenated_prob") + reduced_prob_name = scope.get_unique_variable_name("reduced_prob") + calc_prob_name = scope.get_unique_variable_name("calc_prob") - apply_concat(scope, prob_name, concatenated_prob_name, - container, axis=1) + apply_concat(scope, prob_name, concatenated_prob_name, container, axis=1) if container.target_opset < 13: container.add_node( - 'ReduceSum', concatenated_prob_name, - reduced_prob_name, axes=[1], - name=scope.get_unique_operator_name('ReduceSum')) + "ReduceSum", + concatenated_prob_name, + reduced_prob_name, + axes=[1], + name=scope.get_unique_operator_name("ReduceSum"), + ) else: - axis_name = scope.get_unique_variable_name('axis') + axis_name = scope.get_unique_variable_name("axis") container.add_initializer(axis_name, TensorProto.INT64, [1], [1]) container.add_node( - 'ReduceSum', [concatenated_prob_name, axis_name], + "ReduceSum", + [concatenated_prob_name, axis_name], reduced_prob_name, - name=scope.get_unique_operator_name('ReduceSum')) - num, deno = _handle_zeros(scope, container, concatenated_prob_name, - reduced_prob_name, n_classes, proto_type) - apply_div(scope, [num, deno], - calc_prob_name, container, broadcast=1, - operator_name=scope.get_unique_variable_name( - 'CaliDiv%d' % model_index)) + name=scope.get_unique_operator_name("ReduceSum"), + ) + num, deno = _handle_zeros( + scope, + container, + concatenated_prob_name, + reduced_prob_name, + n_classes, + proto_type, + ) + apply_div( + scope, + [num, deno], + calc_prob_name, + container, + broadcast=1, + operator_name=scope.get_unique_variable_name("CaliDiv%d" % model_index), + ) class_prob_tensor_name = calc_prob_name return class_prob_tensor_name def convert_sklearn_calibrated_classifier_cv( - scope: Scope, operator: Operator, container: ModelComponentContainer): + scope: Scope, operator: Operator, container: ModelComponentContainer +): # Computational graph: # # In the following graph, variable names are in lower case characters only @@ -447,59 +524,97 @@ def convert_sklearn_calibrated_classifier_cv( if np.issubdtype(op.classes_.dtype, np.floating): class_type = TensorProto.INT32 classes = classes.astype(np.int32) - elif (np.issubdtype(op.classes_.dtype, np.signedinteger) or - op.classes_.dtype == np.bool_): + elif ( + np.issubdtype(op.classes_.dtype, np.signedinteger) + or op.classes_.dtype == np.bool_ + ): class_type = TensorProto.INT32 else: - classes = np.array([s.encode('utf-8') for s in classes]) + classes = np.array([s.encode("utf-8") for s in classes]) clf_length = len(op.calibrated_classifiers_) prob_scores_name = [] - clf_length_name = scope.get_unique_variable_name('clf_length') - classes_name = scope.get_unique_variable_name('classes') - reshaped_result_name = scope.get_unique_variable_name('reshaped_result') - argmax_output_name = scope.get_unique_variable_name('argmax_output') + clf_length_name = scope.get_unique_variable_name("clf_length") + classes_name = scope.get_unique_variable_name("classes") + reshaped_result_name = scope.get_unique_variable_name("reshaped_result") + argmax_output_name = scope.get_unique_variable_name("argmax_output") array_feature_extractor_result_name = scope.get_unique_variable_name( - 'array_feature_extractor_result') - add_result_name = scope.get_unique_variable_name('add_result') + "array_feature_extractor_result" + ) + add_result_name = scope.get_unique_variable_name("add_result") container.add_initializer(classes_name, class_type, classes.shape, classes) - container.add_initializer(clf_length_name, proto_type2, - [], [clf_length]) + container.add_initializer(clf_length_name, proto_type2, [], [clf_length]) for clf_index, clf in enumerate(op.calibrated_classifiers_): - prob_scores_name.append(convert_calibrated_classifier_base_estimator( - scope, operator, container, clf, clf_index)) - - container.add_node('Sum', [s for s in prob_scores_name], - add_result_name, op_version=7, - name=scope.get_unique_operator_name('Sum')) - apply_div(scope, [add_result_name, clf_length_name], - operator.outputs[1].full_name, container, broadcast=1) + prob_scores_name.append( + convert_calibrated_classifier_base_estimator( + scope, operator, container, clf, clf_index + ) + ) + + container.add_node( + "Sum", + [s for s in prob_scores_name], + add_result_name, + op_version=7, + name=scope.get_unique_operator_name("Sum"), + ) + apply_div( + scope, + [add_result_name, clf_length_name], + operator.outputs[1].full_name, + container, + broadcast=1, + ) class_prob_name = operator.outputs[1].full_name - container.add_node('ArgMax', class_prob_name, - argmax_output_name, - name=scope.get_unique_operator_name('ArgMax'), axis=1) container.add_node( - 'ArrayFeatureExtractor', [classes_name, argmax_output_name], - array_feature_extractor_result_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) + "ArgMax", + class_prob_name, + argmax_output_name, + name=scope.get_unique_operator_name("ArgMax"), + axis=1, + ) + container.add_node( + "ArrayFeatureExtractor", + [classes_name, argmax_output_name], + array_feature_extractor_result_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) if class_type == TensorProto.INT32: - apply_reshape(scope, array_feature_extractor_result_name, - reshaped_result_name, container, - desired_shape=output_shape) - apply_cast(scope, reshaped_result_name, operator.outputs[0].full_name, - container, to=TensorProto.INT64) + apply_reshape( + scope, + array_feature_extractor_result_name, + reshaped_result_name, + container, + desired_shape=output_shape, + ) + apply_cast( + scope, + reshaped_result_name, + operator.outputs[0].full_name, + container, + to=TensorProto.INT64, + ) else: - apply_reshape(scope, array_feature_extractor_result_name, - operator.outputs[0].full_name, container, - desired_shape=output_shape) - - -register_converter('SklearnCalibratedClassifierCV', - convert_sklearn_calibrated_classifier_cv, - options={'zipmap': [True, False, 'columns'], - 'output_class_labels': [False, True], - 'nocl': [True, False]}) + apply_reshape( + scope, + array_feature_extractor_result_name, + operator.outputs[0].full_name, + container, + desired_shape=output_shape, + ) + + +register_converter( + "SklearnCalibratedClassifierCV", + convert_sklearn_calibrated_classifier_cv, + options={ + "zipmap": [True, False, "columns"], + "output_class_labels": [False, True], + "nocl": [True, False], + }, +) diff --git a/skl2onnx/operator_converters/cast_op.py b/skl2onnx/operator_converters/cast_op.py index 1a3438a14..7f3db2360 100644 --- a/skl2onnx/operator_converters/cast_op.py +++ b/skl2onnx/operator_converters/cast_op.py @@ -8,19 +8,19 @@ from .._supported_operators import sklearn_operator_name_map -def convert_sklearn_cast(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_cast( + scope: Scope, operator: Operator, container: ModelComponentContainer +): inp = operator.inputs[0] exptype = operator.outputs[0] res = exptype.type.to_onnx_type() et = res.tensor_type.elem_type - apply_cast(scope, inp.full_name, exptype.full_name, - container, to=et) + apply_cast(scope, inp.full_name, exptype.full_name, container, to=et) -def convert_sklearn_cast_regressor(scope: Scope, operator: Operator, - container: ModelComponentContainer): - +def convert_sklearn_cast_regressor( + scope: Scope, operator: Operator, container: ModelComponentContainer +): op = operator.raw_operator estimator = op.estimator @@ -29,17 +29,16 @@ def convert_sklearn_cast_regressor(scope: Scope, operator: Operator, this_operator.inputs = operator.inputs cls = operator.inputs[0].type.__class__ - var_name = scope.declare_local_variable('cast_est', cls()) + var_name = scope.declare_local_variable("cast_est", cls()) this_operator.outputs.append(var_name) var_name = var_name.onnx_name exptype = operator.outputs[0] res = exptype.type.to_onnx_type() et = res.tensor_type.elem_type - apply_cast(scope, var_name, exptype.full_name, - container, to=et) + apply_cast(scope, var_name, exptype.full_name, container, to=et) -register_converter('SklearnCastTransformer', convert_sklearn_cast) -register_converter('SklearnCastRegressor', convert_sklearn_cast_regressor) -register_converter('SklearnCast', convert_sklearn_cast) +register_converter("SklearnCastTransformer", convert_sklearn_cast) +register_converter("SklearnCastRegressor", convert_sklearn_cast_regressor) +register_converter("SklearnCast", convert_sklearn_cast) diff --git a/skl2onnx/operator_converters/class_labels.py b/skl2onnx/operator_converters/class_labels.py index 3be5fd018..3ef9a427f 100644 --- a/skl2onnx/operator_converters/class_labels.py +++ b/skl2onnx/operator_converters/class_labels.py @@ -6,71 +6,93 @@ from ..common._container import ModelComponentContainer -def convert_sklearn_class_labels(scope: Scope, operator: Operator, - container: ModelComponentContainer): - if getattr(operator, 'is_multi_output', False): +def convert_sklearn_class_labels( + scope: Scope, operator: Operator, container: ModelComponentContainer +): + if getattr(operator, "is_multi_output", False): classes = operator.classes if not isinstance(classes, list): raise RuntimeError( - "classes must be a list of numpy arrays but is %r." - "" % type(classes)) + "classes must be a list of numpy arrays but is %r." "" % type(classes) + ) names = [] if classes[0].dtype in (np.int64, np.int32): for i, cl in enumerate(classes): cla = np.array(cl) name = scope.get_unique_variable_name( - operator.outputs[0].full_name + '_cst_%d' % i) + operator.outputs[0].full_name + "_cst_%d" % i + ) container.add_initializer( - name, onnx_proto.TensorProto.INT64, list(cla.shape), - cla.tolist()) + name, onnx_proto.TensorProto.INT64, list(cla.shape), cla.tolist() + ) names.append(name) else: for i, cl in enumerate(classes): name = scope.get_unique_variable_name( - operator.outputs[0].full_name + '_cst_%d' % i) + operator.outputs[0].full_name + "_cst_%d" % i + ) clids = np.arange(len(cl), dtype=np.int64) container.add_initializer( - name, onnx_proto.TensorProto.INT64, list(clids.shape), - clids.tolist()) + name, + onnx_proto.TensorProto.INT64, + list(clids.shape), + clids.tolist(), + ) namele = scope.get_unique_variable_name( - operator.outputs[0].full_name + '_le_%d' % i) + operator.outputs[0].full_name + "_le_%d" % i + ) container.add_node( - 'LabelEncoder', name, namele, op_domain='ai.onnx.ml', - op_version=2, default_string='0', keys_int64s=clids, + "LabelEncoder", + name, + namele, + op_domain="ai.onnx.ml", + op_version=2, + default_string="0", + keys_int64s=clids, values_strings=cl.tolist(), - name=scope.get_unique_operator_name( - 'class_labels_le_%d' % i)) + name=scope.get_unique_operator_name("class_labels_le_%d" % i), + ) names.append(namele) container.add_node( - 'SequenceConstruct', names, operator.outputs[0].full_name, - name=scope.get_unique_operator_name('class_labels_seq')) + "SequenceConstruct", + names, + operator.outputs[0].full_name, + name=scope.get_unique_operator_name("class_labels_seq"), + ) else: classes = np.array(operator.classes) - name = scope.get_unique_variable_name( - operator.outputs[0].full_name + '_cst') + name = scope.get_unique_variable_name(operator.outputs[0].full_name + "_cst") if classes.dtype in (np.int64, np.int32): container.add_initializer( - name, onnx_proto.TensorProto.INT64, list(classes.shape), - classes.tolist()) + name, + onnx_proto.TensorProto.INT64, + list(classes.shape), + classes.tolist(), + ) else: clids = np.arange(len(classes), dtype=np.int64) container.add_initializer( - name, onnx_proto.TensorProto.INT64, list(clids.shape), - clids.tolist()) + name, onnx_proto.TensorProto.INT64, list(clids.shape), clids.tolist() + ) namele = scope.get_unique_variable_name( - operator.outputs[0].full_name + '_le') + operator.outputs[0].full_name + "_le" + ) container.add_node( - 'LabelEncoder', name, namele, op_domain='ai.onnx.ml', - op_version=2, default_string='0', keys_int64s=clids, + "LabelEncoder", + name, + namele, + op_domain="ai.onnx.ml", + op_version=2, + default_string="0", + keys_int64s=clids, values_strings=classes.tolist(), - name=scope.get_unique_operator_name('class_labels_le')) + name=scope.get_unique_operator_name("class_labels_le"), + ) name = namele - container.add_node( - 'Identity', name, operator.outputs[0].full_name) + container.add_node("Identity", name, operator.outputs[0].full_name) -register_converter( - 'SklearnClassLabels', convert_sklearn_class_labels) +register_converter("SklearnClassLabels", convert_sklearn_class_labels) diff --git a/skl2onnx/operator_converters/common.py b/skl2onnx/operator_converters/common.py index 2cf29adee..57633fad9 100644 --- a/skl2onnx/operator_converters/common.py +++ b/skl2onnx/operator_converters/common.py @@ -3,8 +3,12 @@ from ..common._apply_operation import apply_cast from ..common.data_types import ( - Int64TensorType, FloatTensorType, DoubleTensorType, - StringTensorType, guess_proto_type) + Int64TensorType, + FloatTensorType, + DoubleTensorType, + StringTensorType, + guess_proto_type, +) def concatenate_variables(scope, variables, container, main_type=None): @@ -18,11 +22,16 @@ def concatenate_variables(scope, variables, container, main_type=None): # Check if it's possible to concatenate those inputs. type_set = set(type(variable.type) for variable in variables) - number_type_set = {FloatTensorType, Int64TensorType, DoubleTensorType, - StringTensorType} + number_type_set = { + FloatTensorType, + Int64TensorType, + DoubleTensorType, + StringTensorType, + } if any(itype not in number_type_set for itype in type_set): - raise RuntimeError('Numerical tensor(s) and string tensor(s) ' - 'cannot be concatenated.') + raise RuntimeError( + "Numerical tensor(s) and string tensor(s) " "cannot be concatenated." + ) # input variables' names we want to concatenate input_names = [] # dimensions of the variables that is going to be concatenated @@ -32,9 +41,8 @@ def concatenate_variables(scope, variables, container, main_type=None): for variable in variables: if not isinstance(variable.type, main_type): proto_type = guess_proto_type(main_type()) - new_name = scope.get_unique_variable_name('cast') - apply_cast(scope, variable.full_name, new_name, - container, to=proto_type) + new_name = scope.get_unique_variable_name("cast") + apply_cast(scope, variable.full_name, new_name, container, to=proto_type) input_names.append(new_name) else: input_names.append(variable.full_name) @@ -47,20 +55,23 @@ def concatenate_variables(scope, variables, container, main_type=None): return input_names[0] # To combine all inputs, we need a FeatureVectorizer - op_type = 'FeatureVectorizer' - attrs = {'name': scope.get_unique_operator_name(op_type), - 'inputdimensions': input_dims} + op_type = "FeatureVectorizer" + attrs = { + "name": scope.get_unique_operator_name(op_type), + "inputdimensions": input_dims, + } # Create a variable name to capture feature vectorizer's output # Set up our FeatureVectorizer - concatenated_name = scope.get_unique_variable_name('concatenated') - container.add_node(op_type, input_names, concatenated_name, - op_domain='ai.onnx.ml', **attrs) + concatenated_name = scope.get_unique_variable_name("concatenated") + container.add_node( + op_type, input_names, concatenated_name, op_domain="ai.onnx.ml", **attrs + ) if main_type == FloatTensorType: return concatenated_name # Cast output as FeatureVectorizer always produces float32. - concatenated_name_cast = scope.get_unique_variable_name( - 'concatenated_cast') - container.add_node('CastLike', [concatenated_name, input_names[0]], - concatenated_name_cast) + concatenated_name_cast = scope.get_unique_variable_name("concatenated_cast") + container.add_node( + "CastLike", [concatenated_name, input_names[0]], concatenated_name_cast + ) return concatenated_name_cast diff --git a/skl2onnx/operator_converters/concat_op.py b/skl2onnx/operator_converters/concat_op.py index 214f3be9f..9f600d26f 100644 --- a/skl2onnx/operator_converters/concat_op.py +++ b/skl2onnx/operator_converters/concat_op.py @@ -7,8 +7,9 @@ from ..common._container import ModelComponentContainer -def convert_sklearn_concat(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_concat( + scope: Scope, operator: Operator, container: ModelComponentContainer +): exptype = operator.outputs[0].type new_inputs = [] for inp in operator.inputs: @@ -21,8 +22,7 @@ def convert_sklearn_concat(scope: Scope, operator: Operator, apply_cast(scope, inp.full_name, name, container, to=et) new_inputs.append(name) - apply_concat(scope, new_inputs, operator.outputs[0].full_name, - container, axis=1) + apply_concat(scope, new_inputs, operator.outputs[0].full_name, container, axis=1) -register_converter('SklearnConcat', convert_sklearn_concat) +register_converter("SklearnConcat", convert_sklearn_concat) diff --git a/skl2onnx/operator_converters/cross_decomposition.py b/skl2onnx/operator_converters/cross_decomposition.py index 1e4947f04..c1a91519f 100644 --- a/skl2onnx/operator_converters/cross_decomposition.py +++ b/skl2onnx/operator_converters/cross_decomposition.py @@ -5,14 +5,13 @@ from ..common._registration import register_converter from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer -from ..common.data_types import ( - Int64TensorType, guess_numpy_type, guess_proto_type) -from ..algebra.onnx_ops import ( - OnnxAdd, OnnxCast, OnnxDiv, OnnxMatMul, OnnxSub) +from ..common.data_types import Int64TensorType, guess_numpy_type, guess_proto_type +from ..algebra.onnx_ops import OnnxAdd, OnnxCast, OnnxDiv, OnnxMatMul, OnnxSub -def convert_pls_regression(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_pls_regression( + scope: Scope, operator: Operator, container: ModelComponentContainer +): X = operator.inputs[0] op = operator.raw_operator opv = container.target_opset @@ -26,22 +25,23 @@ def convert_pls_regression(scope: Scope, operator: Operator, if isinstance(X.type, Int64TensorType): X = OnnxCast(X, to=proto_dtype, op_version=opv) - coefs = op.x_mean_ if hasattr(op, 'x_mean_') else op._x_mean - std = op.x_std_ if hasattr(op, 'x_std_') else op._x_std - ym = op.y_mean_ if hasattr(op, 'x_mean_') else op._y_mean + coefs = op.x_mean_ if hasattr(op, "x_mean_") else op._x_mean + std = op.x_std_ if hasattr(op, "x_std_") else op._x_std + ym = op.y_mean_ if hasattr(op, "x_mean_") else op._y_mean norm_x = OnnxDiv( OnnxSub(X, coefs.astype(dtype), op_version=opv), - std.astype(dtype), op_version=opv) + std.astype(dtype), + op_version=opv, + ) if hasattr(op, "set_predict_request"): # new in 1.3 coefs = op.coef_.T.astype(dtype) else: coefs = op.coef_.astype(dtype) dot = OnnxMatMul(norm_x, coefs, op_version=opv) - pred = OnnxAdd(dot, ym.astype(dtype), - op_version=opv, output_names=operator.outputs) + pred = OnnxAdd(dot, ym.astype(dtype), op_version=opv, output_names=operator.outputs) pred.add_to(scope, container) -register_converter('SklearnPLSRegression', convert_pls_regression) +register_converter("SklearnPLSRegression", convert_pls_regression) diff --git a/skl2onnx/operator_converters/decision_tree.py b/skl2onnx/operator_converters/decision_tree.py index 8a57a1e4c..e92fe285c 100644 --- a/skl2onnx/operator_converters/decision_tree.py +++ b/skl2onnx/operator_converters/decision_tree.py @@ -14,8 +14,11 @@ ) from ..common._registration import register_converter from ..common.data_types import ( - BooleanTensorType, Int64TensorType, guess_numpy_type, - guess_proto_type) + BooleanTensorType, + Int64TensorType, + guess_numpy_type, + guess_proto_type, +) from ..common.tree_ensemble import ( add_tree_to_attribute_pairs, get_default_tree_classifier_attribute_pairs, @@ -30,69 +33,68 @@ def populate_tree_attributes(model, name, dtype): while adding a node with TreeEnsembleClassifier ONNX op. """ attrs = {} - attrs['name'] = name - attrs['post_transform'] = 'NONE' - attrs['nodes_treeids'] = [] - attrs['nodes_nodeids'] = [] - attrs['nodes_featureids'] = [] - attrs['nodes_modes'] = [] - attrs['nodes_values'] = [] - attrs['nodes_truenodeids'] = [] - attrs['nodes_falsenodeids'] = [] - attrs['nodes_missing_value_tracks_true'] = [] - attrs['nodes_hitrates'] = [] - attrs['class_treeids'] = [] - attrs['class_nodeids'] = [] - attrs['class_ids'] = [] - attrs['class_weights'] = [] - attrs['classlabels_int64s'] = list(range(model.tree_.node_count)) + attrs["name"] = name + attrs["post_transform"] = "NONE" + attrs["nodes_treeids"] = [] + attrs["nodes_nodeids"] = [] + attrs["nodes_featureids"] = [] + attrs["nodes_modes"] = [] + attrs["nodes_values"] = [] + attrs["nodes_truenodeids"] = [] + attrs["nodes_falsenodeids"] = [] + attrs["nodes_missing_value_tracks_true"] = [] + attrs["nodes_hitrates"] = [] + attrs["class_treeids"] = [] + attrs["class_nodeids"] = [] + attrs["class_ids"] = [] + attrs["class_weights"] = [] + attrs["classlabels_int64s"] = list(range(model.tree_.node_count)) for i in range(model.tree_.node_count): node_id = i - if (model.tree_.children_left[i] > i and - model.tree_.children_right[i] > i): + if model.tree_.children_left[i] > i and model.tree_.children_right[i] > i: feat = model.tree_.feature[i] thresh = model.tree_.threshold[i] left = model.tree_.children_left[i] right = model.tree_.children_right[i] - mode = 'BRANCH_LEQ' + mode = "BRANCH_LEQ" else: - feat, thresh, left, right = 0, 0., 0, 0 - mode = 'LEAF' - attrs['nodes_nodeids'].append(node_id) - attrs['nodes_treeids'].append(0) - attrs['nodes_featureids'].append(feat) - attrs['nodes_modes'].append(mode) - attrs['nodes_truenodeids'].append(left) - attrs['nodes_falsenodeids'].append(right) - attrs['nodes_missing_value_tracks_true'].append(False) - attrs['nodes_hitrates'].append(1.) - attrs['nodes_values'].append(thresh) - if mode == 'LEAF': - attrs['class_ids'].append(node_id) - attrs['class_weights'].append(1.) - attrs['class_treeids'].append(0) - attrs['class_nodeids'].append(node_id) + feat, thresh, left, right = 0, 0.0, 0, 0 + mode = "LEAF" + attrs["nodes_nodeids"].append(node_id) + attrs["nodes_treeids"].append(0) + attrs["nodes_featureids"].append(feat) + attrs["nodes_modes"].append(mode) + attrs["nodes_truenodeids"].append(left) + attrs["nodes_falsenodeids"].append(right) + attrs["nodes_missing_value_tracks_true"].append(False) + attrs["nodes_hitrates"].append(1.0) + attrs["nodes_values"].append(thresh) + if mode == "LEAF": + attrs["class_ids"].append(node_id) + attrs["class_weights"].append(1.0) + attrs["class_treeids"].append(0) + attrs["class_nodeids"].append(node_id) if dtype is not None: for k in attrs: - if k in ('node_values', 'class_weights', 'target_weights'): + if k in ("node_values", "class_weights", "target_weights"): attrs[k] = np.array(attrs[k], dtype=dtype) return attrs -def predict(model, scope, operator, container, - op_type, op_domain, op_version, is_ensemble=False): +def predict( + model, scope, operator, container, op_type, op_domain, op_version, is_ensemble=False +): """Predict target and calculate probability scores.""" - indices_name = scope.get_unique_variable_name('indices') - dummy_proba_name = scope.get_unique_variable_name('dummy_proba') - values_name = scope.get_unique_variable_name('values') - out_values_name = scope.get_unique_variable_name('out_indices') - transposed_result_name = scope.get_unique_variable_name( - 'transposed_result') - proba_output_name = scope.get_unique_variable_name('proba_output') - cast_result_name = scope.get_unique_variable_name('cast_result') - reshaped_indices_name = scope.get_unique_variable_name('reshaped_indices') - sum_output_name = scope.get_unique_variable_name('sum_proba') + indices_name = scope.get_unique_variable_name("indices") + dummy_proba_name = scope.get_unique_variable_name("dummy_proba") + values_name = scope.get_unique_variable_name("values") + out_values_name = scope.get_unique_variable_name("out_indices") + transposed_result_name = scope.get_unique_variable_name("transposed_result") + proba_output_name = scope.get_unique_variable_name("proba_output") + cast_result_name = scope.get_unique_variable_name("cast_result") + reshaped_indices_name = scope.get_unique_variable_name("reshaped_indices") + sum_output_name = scope.get_unique_variable_name("sum_proba") value = model.tree_.value.transpose(1, 2, 0) proto_dtype = guess_proto_type(operator.inputs[0].type) @@ -103,102 +105,146 @@ def predict(model, scope, operator, container, if dtype != np.float64: dtype = np.float32 - container.add_initializer( - values_name, proto_dtype, value.shape, value.ravel()) + container.add_initializer(values_name, proto_dtype, value.shape, value.ravel()) input_name = operator.input_full_names if isinstance(operator.inputs[0].type, BooleanTensorType): - cast_input_name = scope.get_unique_variable_name('cast_input') + cast_input_name = scope.get_unique_variable_name("cast_input") - apply_cast(scope, input_name, cast_input_name, - container, to=proto_dtype) + apply_cast(scope, input_name, cast_input_name, container, to=proto_dtype) input_name = cast_input_name if model.tree_.node_count > 1: attrs = populate_tree_attributes( - model, scope.get_unique_operator_name(op_type), dtype) + model, scope.get_unique_operator_name(op_type), dtype + ) container.add_node( - op_type, input_name, + op_type, + input_name, [indices_name, dummy_proba_name], - op_domain=op_domain, op_version=op_version, **attrs) + op_domain=op_domain, + op_version=op_version, + **attrs + ) else: - zero_name = scope.get_unique_variable_name('zero') - zero_matrix_name = scope.get_unique_variable_name('zero_matrix') - reduced_zero_matrix_name = scope.get_unique_variable_name( - 'reduced_zero_matrix') - - container.add_initializer( - zero_name, proto_dtype, [], [0]) - apply_mul(scope, [input_name[0], zero_name], - zero_matrix_name, container, broadcast=1) + zero_name = scope.get_unique_variable_name("zero") + zero_matrix_name = scope.get_unique_variable_name("zero_matrix") + reduced_zero_matrix_name = scope.get_unique_variable_name("reduced_zero_matrix") + + container.add_initializer(zero_name, proto_dtype, [], [0]) + apply_mul( + scope, [input_name[0], zero_name], zero_matrix_name, container, broadcast=1 + ) if container.target_opset < 13: container.add_node( - 'ReduceSum', zero_matrix_name, reduced_zero_matrix_name, - axes=[1], name=scope.get_unique_operator_name('ReduceSum')) + "ReduceSum", + zero_matrix_name, + reduced_zero_matrix_name, + axes=[1], + name=scope.get_unique_operator_name("ReduceSum"), + ) else: - axis_name = scope.get_unique_variable_name('axis') - container.add_initializer( - axis_name, onnx_proto.TensorProto.INT64, [1], [1]) + axis_name = scope.get_unique_variable_name("axis") + container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [1]) container.add_node( - 'ReduceSum', [zero_matrix_name, axis_name], + "ReduceSum", + [zero_matrix_name, axis_name], reduced_zero_matrix_name, - name=scope.get_unique_operator_name('ReduceSum')) - apply_cast(scope, reduced_zero_matrix_name, indices_name, - container, to=onnx_proto.TensorProto.INT64) - apply_reshape(scope, indices_name, reshaped_indices_name, - container, desired_shape=[1, -1]) + name=scope.get_unique_operator_name("ReduceSum"), + ) + apply_cast( + scope, + reduced_zero_matrix_name, + indices_name, + container, + to=onnx_proto.TensorProto.INT64, + ) + apply_reshape( + scope, indices_name, reshaped_indices_name, container, desired_shape=[1, -1] + ) container.add_node( - 'ArrayFeatureExtractor', + "ArrayFeatureExtractor", [values_name, reshaped_indices_name], - out_values_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) - apply_transpose(scope, out_values_name, proba_output_name, - container, perm=(0, 2, 1)) + out_values_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) + apply_transpose( + scope, out_values_name, proba_output_name, container, perm=(0, 2, 1) + ) if is_ensemble: - proba_result_name = scope.get_unique_variable_name('proba_result') - apply_reducesum(scope, proba_output_name, sum_output_name, - container, keepdims=1, axes=[2]) - apply_div(scope, [proba_output_name, sum_output_name], - proba_result_name, container) + proba_result_name = scope.get_unique_variable_name("proba_result") + apply_reducesum( + scope, proba_output_name, sum_output_name, container, keepdims=1, axes=[2] + ) + apply_div( + scope, [proba_output_name, sum_output_name], proba_result_name, container + ) return proba_result_name else: - apply_cast(scope, proba_output_name, cast_result_name, - container, to=onnx_proto.TensorProto.BOOL) - apply_cast(scope, cast_result_name, operator.outputs[1].full_name, - container, to=proto_dtype) - apply_transpose(scope, out_values_name, transposed_result_name, - container, perm=(2, 1, 0)) + apply_cast( + scope, + proba_output_name, + cast_result_name, + container, + to=onnx_proto.TensorProto.BOOL, + ) + apply_cast( + scope, + cast_result_name, + operator.outputs[1].full_name, + container, + to=proto_dtype, + ) + apply_transpose( + scope, out_values_name, transposed_result_name, container, perm=(2, 1, 0) + ) return transposed_result_name def _append_decision_output( - input_name, attrs, fct_label, n_out, scope, operator, container, - op_type='TreeEnsembleClassifier', - op_domain='ai.onnx.ml', op_version=1, - cast_encode=False, regression=False, dtype=np.float32, - overwrite_tree=None): - + input_name, + attrs, + fct_label, + n_out, + scope, + operator, + container, + op_type="TreeEnsembleClassifier", + op_domain="ai.onnx.ml", + op_version=1, + cast_encode=False, + regression=False, + dtype=np.float32, + overwrite_tree=None, +): attrs = attrs.copy() - attrs['name'] = scope.get_unique_operator_name(op_type) - attrs['n_targets'] = 1 - attrs['post_transform'] = 'NONE' + attrs["name"] = scope.get_unique_operator_name(op_type) + attrs["n_targets"] = 1 + attrs["post_transform"] = "NONE" if regression: - attrs['target_weights'] = np.array( - [float(_) for _ in attrs['target_nodeids']], dtype=dtype) + attrs["target_weights"] = np.array( + [float(_) for _ in attrs["target_nodeids"]], dtype=dtype + ) else: - attrs['target_ids'] = [0 for _ in attrs['class_ids']] - attrs['target_weights'] = [float(_) for _ in attrs['class_nodeids']] - attrs['target_nodeids'] = attrs['class_nodeids'] - attrs['target_treeids'] = attrs['class_treeids'] + attrs["target_ids"] = [0 for _ in attrs["class_ids"]] + attrs["target_weights"] = [float(_) for _ in attrs["class_nodeids"]] + attrs["target_nodeids"] = attrs["class_nodeids"] + attrs["target_treeids"] = attrs["class_treeids"] - rem = [k for k in attrs if k.startswith('class')] + rem = [k for k in attrs if k.startswith("class")] for k in rem: del attrs[k] dpath = scope.get_unique_variable_name("dpath") container.add_node( - op_type.replace("Classifier", "Regressor"), input_name, dpath, - op_domain=op_domain, op_version=op_version, **attrs) + op_type.replace("Classifier", "Regressor"), + input_name, + dpath, + op_domain=op_domain, + op_version=op_version, + **attrs + ) if n_out is None: final_name = scope.get_unique_variable_name("dpatho") @@ -207,204 +253,302 @@ def _append_decision_output( if cast_encode: apply_cast( - scope, dpath, final_name, - container, to=onnx_proto.TensorProto.INT64, - operator_name=scope.get_unique_operator_name('TreePathType')) + scope, + dpath, + final_name, + container, + to=onnx_proto.TensorProto.INT64, + operator_name=scope.get_unique_operator_name("TreePathType"), + ) else: op = operator.raw_operator - labels = fct_label( - overwrite_tree if overwrite_tree is not None else op.tree_) + labels = fct_label(overwrite_tree if overwrite_tree is not None else op.tree_) ordered = list(sorted(labels.items())) keys = [float(_[0]) for _ in ordered] values = [_[1] for _ in ordered] name = scope.get_unique_variable_name("spath") container.add_node( - 'LabelEncoder', dpath, name, - op_domain=op_domain, op_version=2, - default_string='0', keys_floats=keys, values_strings=values, - name=scope.get_unique_operator_name('TreePath')) + "LabelEncoder", + dpath, + name, + op_domain=op_domain, + op_version=2, + default_string="0", + keys_floats=keys, + values_strings=values, + name=scope.get_unique_operator_name("TreePath"), + ) apply_reshape( - scope, name, final_name, - container, desired_shape=(-1, 1), - operator_name=scope.get_unique_operator_name('TreePathShape')) + scope, + name, + final_name, + container, + desired_shape=(-1, 1), + operator_name=scope.get_unique_operator_name("TreePathShape"), + ) return final_name def convert_sklearn_decision_tree_classifier( - scope, operator, container, op_type='TreeEnsembleClassifier', - op_domain='ai.onnx.ml', op_version=1): + scope, + operator, + container, + op_type="TreeEnsembleClassifier", + op_domain="ai.onnx.ml", + op_version=1, +): try: dtype = guess_numpy_type(operator.inputs[0].type) except NotImplementedError as e: - raise RuntimeError( - "Unknown variable {}.".format(operator.inputs[0])) from e + raise RuntimeError("Unknown variable {}.".format(operator.inputs[0])) from e if dtype != np.float64: dtype = np.float32 op = operator.raw_operator - options = scope.get_options( - op, dict(decision_path=False, decision_leaf=False)) + options = scope.get_options(op, dict(decision_path=False, decision_leaf=False)) if op.n_outputs_ == 1: attrs = get_default_tree_classifier_attribute_pairs() - attrs['name'] = scope.get_unique_operator_name(op_type) + attrs["name"] = scope.get_unique_operator_name(op_type) classes = get_label_classes(scope, op) if all(isinstance(i, np.ndarray) for i in classes): classes = np.concatenate(classes) if all(isinstance(i, (numbers.Real, bool, np.bool_)) for i in classes): class_labels = [int(i) for i in classes] - attrs['classlabels_int64s'] = class_labels + attrs["classlabels_int64s"] = class_labels elif all(isinstance(i, str) for i in classes): class_labels = [str(i) for i in classes] - attrs['classlabels_strings'] = class_labels + attrs["classlabels_strings"] = class_labels else: - raise ValueError('Labels must be all integers or all strings.') + raise ValueError("Labels must be all integers or all strings.") - add_tree_to_attribute_pairs(attrs, True, op.tree_, 0, 1., 0, True, - True, dtype=dtype) + add_tree_to_attribute_pairs( + attrs, True, op.tree_, 0, 1.0, 0, True, True, dtype=dtype + ) input_name = operator.input_full_names if isinstance(operator.inputs[0].type, BooleanTensorType): - cast_input_name = scope.get_unique_variable_name('cast_input') - - apply_cast(scope, input_name, cast_input_name, - container, to=onnx_proto.TensorProto.FLOAT) + cast_input_name = scope.get_unique_variable_name("cast_input") + + apply_cast( + scope, + input_name, + cast_input_name, + container, + to=onnx_proto.TensorProto.FLOAT, + ) input_name = cast_input_name if dtype is not None: for k in attrs: - if k in ('nodes_values', 'class_weights', - 'target_weights', 'nodes_hitrates', - 'base_values'): + if k in ( + "nodes_values", + "class_weights", + "target_weights", + "nodes_hitrates", + "base_values", + ): attrs[k] = np.array(attrs[k], dtype=dtype) container.add_node( - op_type, input_name, + op_type, + input_name, [operator.outputs[0].full_name, operator.outputs[1].full_name], - op_domain=op_domain, op_version=op_version, **attrs) + op_domain=op_domain, + op_version=op_version, + **attrs + ) n_out = 2 - if options['decision_path']: + if options["decision_path"]: # decision_path _append_decision_output( - input_name, attrs, _build_labels_path, n_out, - scope, operator, container, - op_type=op_type, op_domain=op_domain, - op_version=op_version, dtype=dtype) + input_name, + attrs, + _build_labels_path, + n_out, + scope, + operator, + container, + op_type=op_type, + op_domain=op_domain, + op_version=op_version, + dtype=dtype, + ) n_out += 1 - if options['decision_leaf']: + if options["decision_leaf"]: # decision_path _append_decision_output( - input_name, attrs, _build_labels_leaf, n_out, - scope, operator, container, - op_type=op_type, op_domain=op_domain, - op_version=op_version, cast_encode=True, - dtype=dtype) + input_name, + attrs, + _build_labels_leaf, + n_out, + scope, + operator, + container, + op_type=op_type, + op_domain=op_domain, + op_version=op_version, + cast_encode=True, + dtype=dtype, + ) n_out += 1 else: transposed_result_name = predict( - op, scope, operator, container, op_type, op_domain, op_version) + op, scope, operator, container, op_type, op_domain, op_version + ) predictions = [] for k in range(op.n_outputs_): - preds_name = scope.get_unique_variable_name('preds') - reshaped_preds_name = scope.get_unique_variable_name( - 'reshaped_preds') - k_name = scope.get_unique_variable_name('k_column') - out_k_name = scope.get_unique_variable_name('out_k_column') - argmax_output_name = scope.get_unique_variable_name( - 'argmax_output') - classes_name = scope.get_unique_variable_name('classes') - reshaped_result_name = scope.get_unique_variable_name( - 'reshaped_result') - - container.add_initializer( - k_name, onnx_proto.TensorProto.INT64, - [], [k]) + preds_name = scope.get_unique_variable_name("preds") + reshaped_preds_name = scope.get_unique_variable_name("reshaped_preds") + k_name = scope.get_unique_variable_name("k_column") + out_k_name = scope.get_unique_variable_name("out_k_column") + argmax_output_name = scope.get_unique_variable_name("argmax_output") + classes_name = scope.get_unique_variable_name("classes") + reshaped_result_name = scope.get_unique_variable_name("reshaped_result") + + container.add_initializer(k_name, onnx_proto.TensorProto.INT64, [], [k]) container.add_initializer( - classes_name, onnx_proto.TensorProto.INT64, - op.classes_[k].shape, [int(i) for i in op.classes_[k]]) + classes_name, + onnx_proto.TensorProto.INT64, + op.classes_[k].shape, + [int(i) for i in op.classes_[k]], + ) container.add_node( - 'ArrayFeatureExtractor', [transposed_result_name, k_name], - out_k_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) + "ArrayFeatureExtractor", + [transposed_result_name, k_name], + out_k_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) container.add_node( - 'ArgMax', out_k_name, argmax_output_name, - name=scope.get_unique_operator_name('ArgMax'), axis=1) - apply_reshape(scope, argmax_output_name, reshaped_result_name, - container, desired_shape=(1, -1)) + "ArgMax", + out_k_name, + argmax_output_name, + name=scope.get_unique_operator_name("ArgMax"), + axis=1, + ) + apply_reshape( + scope, + argmax_output_name, + reshaped_result_name, + container, + desired_shape=(1, -1), + ) container.add_node( - 'ArrayFeatureExtractor', [classes_name, reshaped_result_name], - preds_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) - apply_reshape(scope, preds_name, reshaped_preds_name, - container, desired_shape=(-1, 1)) + "ArrayFeatureExtractor", + [classes_name, reshaped_result_name], + preds_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) + apply_reshape( + scope, preds_name, reshaped_preds_name, container, desired_shape=(-1, 1) + ) predictions.append(reshaped_preds_name) - apply_concat(scope, predictions, operator.outputs[0].full_name, - container, axis=1) + apply_concat( + scope, predictions, operator.outputs[0].full_name, container, axis=1 + ) - if options['decision_path']: + if options["decision_path"]: raise RuntimeError( - "Option decision_path for multi-outputs " - "is not implemented yet.") - if options['decision_leaf']: + "Option decision_path for multi-outputs " "is not implemented yet." + ) + if options["decision_leaf"]: raise RuntimeError( - "Option decision_leaf for multi-outputs " - "is not implemented yet.") + "Option decision_leaf for multi-outputs " "is not implemented yet." + ) def convert_sklearn_decision_tree_regressor( - scope, operator, container, op_type='TreeEnsembleRegressor', - op_domain='ai.onnx.ml', op_version=1): + scope, + operator, + container, + op_type="TreeEnsembleRegressor", + op_domain="ai.onnx.ml", + op_version=1, +): dtype = guess_numpy_type(operator.inputs[0].type) if dtype != np.float64: dtype = np.float32 op = operator.raw_operator attrs = get_default_tree_regressor_attribute_pairs() - attrs['name'] = scope.get_unique_operator_name(op_type) - attrs['n_targets'] = int(op.n_outputs_) - add_tree_to_attribute_pairs(attrs, False, op.tree_, 0, 1., 0, False, - True, dtype=dtype) + attrs["name"] = scope.get_unique_operator_name(op_type) + attrs["n_targets"] = int(op.n_outputs_) + add_tree_to_attribute_pairs( + attrs, False, op.tree_, 0, 1.0, 0, False, True, dtype=dtype + ) if dtype is not None: for k in attrs: - if k in ('nodes_values', 'class_weights', - 'target_weights', 'nodes_hitrates', - 'base_values'): + if k in ( + "nodes_values", + "class_weights", + "target_weights", + "nodes_hitrates", + "base_values", + ): attrs[k] = np.array(attrs[k], dtype=dtype) input_name = operator.input_full_names if type(operator.inputs[0].type) in (BooleanTensorType, Int64TensorType): - cast_input_name = scope.get_unique_variable_name('cast_input') + cast_input_name = scope.get_unique_variable_name("cast_input") - apply_cast(scope, operator.input_full_names, cast_input_name, - container, to=onnx_proto.TensorProto.FLOAT) + apply_cast( + scope, + operator.input_full_names, + cast_input_name, + container, + to=onnx_proto.TensorProto.FLOAT, + ) input_name = [cast_input_name] container.add_node( - op_type, input_name, operator.outputs[0].full_name, - op_domain=op_domain, op_version=op_version, **attrs) + op_type, + input_name, + operator.outputs[0].full_name, + op_domain=op_domain, + op_version=op_version, + **attrs + ) - options = scope.get_options( - op, dict(decision_path=False, decision_leaf=False)) + options = scope.get_options(op, dict(decision_path=False, decision_leaf=False)) # decision_path n_out = 1 - if options['decision_path']: + if options["decision_path"]: # decision_path _append_decision_output( - input_name, attrs, _build_labels_path, n_out, - scope, operator, container, - op_type=op_type, op_domain=op_domain, - op_version=op_version, regression=True) + input_name, + attrs, + _build_labels_path, + n_out, + scope, + operator, + container, + op_type=op_type, + op_domain=op_domain, + op_version=op_version, + regression=True, + ) n_out += 1 - if options['decision_leaf']: + if options["decision_leaf"]: # decision_path _append_decision_output( - input_name, attrs, _build_labels_leaf, n_out, - scope, operator, container, - op_type=op_type, op_domain=op_domain, - op_version=op_version, regression=True, cast_encode=True) + input_name, + attrs, + _build_labels_leaf, + n_out, + scope, + operator, + container, + op_type=op_type, + op_domain=op_domain, + op_version=op_version, + regression=True, + cast_encode=True, + ) n_out += 1 @@ -413,11 +557,9 @@ def _recursive_build_labels(tree, index, current): if tree.children_left[index] == -1: yield (index, current.copy()) else: - for it in _recursive_build_labels( - tree, tree.children_left[index], current): + for it in _recursive_build_labels(tree, tree.children_left[index], current): yield it - for it in _recursive_build_labels( - tree, tree.children_right[index], current): + for it in _recursive_build_labels(tree, tree.children_right[index], current): yield it current[index] = False @@ -431,7 +573,7 @@ def _build_labels_path(tree): for nodeid, b in path.items(): if b: spath[nodeid] = "1" - paths[leave_index] = ''.join(spath) + paths[leave_index] = "".join(spath) return paths @@ -444,25 +586,35 @@ def _build_labels_leaf(tree): return paths -register_converter('SklearnDecisionTreeClassifier', - convert_sklearn_decision_tree_classifier, - options={'zipmap': [True, False, 'columns'], - 'nocl': [True, False], - 'output_class_labels': [False, True], - 'decision_path': [True, False], - 'decision_leaf': [True, False]}) -register_converter('SklearnDecisionTreeRegressor', - convert_sklearn_decision_tree_regressor, - options={'decision_path': [True, False], - 'decision_leaf': [True, False]}) -register_converter('SklearnExtraTreeClassifier', - convert_sklearn_decision_tree_classifier, - options={'zipmap': [True, False, 'columns'], - 'nocl': [True, False], - 'output_class_labels': [False, True], - 'decision_path': [True, False], - 'decision_leaf': [True, False]}) -register_converter('SklearnExtraTreeRegressor', - convert_sklearn_decision_tree_regressor, - options={'decision_path': [True, False], - 'decision_leaf': [True, False]}) +register_converter( + "SklearnDecisionTreeClassifier", + convert_sklearn_decision_tree_classifier, + options={ + "zipmap": [True, False, "columns"], + "nocl": [True, False], + "output_class_labels": [False, True], + "decision_path": [True, False], + "decision_leaf": [True, False], + }, +) +register_converter( + "SklearnDecisionTreeRegressor", + convert_sklearn_decision_tree_regressor, + options={"decision_path": [True, False], "decision_leaf": [True, False]}, +) +register_converter( + "SklearnExtraTreeClassifier", + convert_sklearn_decision_tree_classifier, + options={ + "zipmap": [True, False, "columns"], + "nocl": [True, False], + "output_class_labels": [False, True], + "decision_path": [True, False], + "decision_leaf": [True, False], + }, +) +register_converter( + "SklearnExtraTreeRegressor", + convert_sklearn_decision_tree_regressor, + options={"decision_path": [True, False], "decision_leaf": [True, False]}, +) diff --git a/skl2onnx/operator_converters/decomposition.py b/skl2onnx/operator_converters/decomposition.py index 5d53e8b22..7ddd3f622 100644 --- a/skl2onnx/operator_converters/decomposition.py +++ b/skl2onnx/operator_converters/decomposition.py @@ -2,17 +2,21 @@ from ..proto import onnx_proto -from ..common._apply_operation import ( - apply_cast, apply_div, apply_sqrt, apply_sub) +from ..common._apply_operation import apply_cast, apply_div, apply_sqrt, apply_sub from ..common._registration import register_converter from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer from ..common.data_types import ( - Int64TensorType, DoubleTensorType, FloatTensorType, guess_proto_type) + Int64TensorType, + DoubleTensorType, + FloatTensorType, + guess_proto_type, +) -def convert_truncated_svd(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_truncated_svd( + scope: Scope, operator: Operator, container: ModelComponentContainer +): # Create alias for the scikit-learn truncated SVD model we # are going to convert svd = operator.raw_operator @@ -23,66 +27,92 @@ def convert_truncated_svd(scope: Scope, operator: Operator, # Transpose [K, C] matrix to [C, K], where C/K is the # input/transformed feature dimension transform_matrix = svd.components_.transpose() - transform_matrix_name = scope.get_unique_variable_name('transform_matrix') + transform_matrix_name = scope.get_unique_variable_name("transform_matrix") # Put the transformation into an ONNX tensor container.add_initializer( - transform_matrix_name, proto_dtype, - transform_matrix.shape, transform_matrix.flatten()) + transform_matrix_name, + proto_dtype, + transform_matrix.shape, + transform_matrix.flatten(), + ) input_name = operator.inputs[0].full_name if isinstance(operator.inputs[0].type, Int64TensorType): - cast_output_name = scope.get_unique_variable_name('cast_output') + cast_output_name = scope.get_unique_variable_name("cast_output") - apply_cast(scope, input_name, cast_output_name, container, - to=onnx_proto.TensorProto.FLOAT) + apply_cast( + scope, + input_name, + cast_output_name, + container, + to=onnx_proto.TensorProto.FLOAT, + ) input_name = cast_output_name - if operator.type == 'SklearnTruncatedSVD': + if operator.type == "SklearnTruncatedSVD": # Create the major operator, a matrix multiplication. container.add_node( - 'MatMul', [input_name, transform_matrix_name], - operator.outputs[0].full_name, name=operator.full_name) + "MatMul", + [input_name, transform_matrix_name], + operator.outputs[0].full_name, + name=operator.full_name, + ) else: # PCA if svd.mean_ is not None: - mean_name = scope.get_unique_variable_name('mean') - sub_result_name = scope.get_unique_variable_name('sub_result') + mean_name = scope.get_unique_variable_name("mean") + sub_result_name = scope.get_unique_variable_name("sub_result") - container.add_initializer(mean_name, proto_dtype, - svd.mean_.shape, svd.mean_) + container.add_initializer( + mean_name, proto_dtype, svd.mean_.shape, svd.mean_ + ) # Subtract mean from input tensor - apply_sub(scope, [input_name, mean_name], - sub_result_name, container, broadcast=1) + apply_sub( + scope, [input_name, mean_name], sub_result_name, container, broadcast=1 + ) else: sub_result_name = input_name if svd.whiten: explained_variance_name = scope.get_unique_variable_name( - 'explained_variance') + "explained_variance" + ) explained_variance_root_name = scope.get_unique_variable_name( - 'explained_variance_root') - matmul_result_name = scope.get_unique_variable_name( - 'matmul_result') + "explained_variance_root" + ) + matmul_result_name = scope.get_unique_variable_name("matmul_result") container.add_initializer( - explained_variance_name, proto_dtype, - svd.explained_variance_.shape, svd.explained_variance_) + explained_variance_name, + proto_dtype, + svd.explained_variance_.shape, + svd.explained_variance_, + ) container.add_node( - 'MatMul', [sub_result_name, transform_matrix_name], + "MatMul", + [sub_result_name, transform_matrix_name], matmul_result_name, - name=scope.get_unique_operator_name('MatMul')) - apply_sqrt(scope, explained_variance_name, - explained_variance_root_name, container) - apply_div(scope, - [matmul_result_name, explained_variance_root_name], - operator.outputs[0].full_name, container, broadcast=1) + name=scope.get_unique_operator_name("MatMul"), + ) + apply_sqrt( + scope, explained_variance_name, explained_variance_root_name, container + ) + apply_div( + scope, + [matmul_result_name, explained_variance_root_name], + operator.outputs[0].full_name, + container, + broadcast=1, + ) else: container.add_node( - 'MatMul', [sub_result_name, transform_matrix_name], + "MatMul", + [sub_result_name, transform_matrix_name], operator.outputs[0].full_name, - name=scope.get_unique_operator_name('MatMul')) + name=scope.get_unique_operator_name("MatMul"), + ) -register_converter('SklearnIncrementalPCA', convert_truncated_svd) -register_converter('SklearnPCA', convert_truncated_svd) -register_converter('SklearnTruncatedSVD', convert_truncated_svd) +register_converter("SklearnIncrementalPCA", convert_truncated_svd) +register_converter("SklearnPCA", convert_truncated_svd) +register_converter("SklearnTruncatedSVD", convert_truncated_svd) diff --git a/skl2onnx/operator_converters/dict_vectoriser.py b/skl2onnx/operator_converters/dict_vectoriser.py index 57f3fceb1..cd5ce638b 100644 --- a/skl2onnx/operator_converters/dict_vectoriser.py +++ b/skl2onnx/operator_converters/dict_vectoriser.py @@ -7,8 +7,9 @@ from ..common._container import ModelComponentContainer -def convert_sklearn_dict_vectorizer(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_dict_vectorizer( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ When a *DictVectorizer* converts numbers into strings, scikit-learn adds a separator to disambiguate strings @@ -24,13 +25,10 @@ def convert_sklearn_dict_vectorizer(scope: Scope, operator: Operator, This cannot be implemented in ONNX. The converter raises an exception in that case. """ - op_type = 'DictVectorizer' + op_type = "DictVectorizer" op = operator.raw_operator - attrs = { - 'name': scope.get_unique_operator_name(op_type) - } - if all(isinstance(feature_name, str) - for feature_name in op.feature_names_): + attrs = {"name": scope.get_unique_operator_name(op_type)} + if all(isinstance(feature_name, str) for feature_name in op.feature_names_): # all strings, scikit-learn does the following: new_cats = [] unique_cats = set() @@ -39,26 +37,30 @@ def convert_sklearn_dict_vectorizer(scope: Scope, operator: Operator, if op.separator in i: nbsep += 1 if i in unique_cats: - raise RuntimeError( - "Duplicated category '{}'.".format(i)) + raise RuntimeError("Duplicated category '{}'.".format(i)) unique_cats.add(i) new_cats.append(i) if nbsep >= len(new_cats): raise RuntimeError( "All categories contain a separator '{}'. " "This case is not supported by the converter. " - "The mapping must map to numbers not string.". format( - op.separator)) - attrs['string_vocabulary'] = new_cats - elif all(isinstance(feature_name, numbers.Integral) - for feature_name in op.feature_names_): - attrs['int64_vocabulary'] = list(int(i) for i in op.feature_names_) + "The mapping must map to numbers not string.".format(op.separator) + ) + attrs["string_vocabulary"] = new_cats + elif all( + isinstance(feature_name, numbers.Integral) for feature_name in op.feature_names_ + ): + attrs["int64_vocabulary"] = list(int(i) for i in op.feature_names_) else: - raise ValueError('Keys must be all integers or all strings.') + raise ValueError("Keys must be all integers or all strings.") - container.add_node(op_type, operator.input_full_names, - operator.output_full_names, op_domain='ai.onnx.ml', - **attrs) + container.add_node( + op_type, + operator.input_full_names, + operator.output_full_names, + op_domain="ai.onnx.ml", + **attrs + ) -register_converter('SklearnDictVectorizer', convert_sklearn_dict_vectorizer) +register_converter("SklearnDictVectorizer", convert_sklearn_dict_vectorizer) diff --git a/skl2onnx/operator_converters/feature_hasher.py b/skl2onnx/operator_converters/feature_hasher.py index 9f37228f5..4183422f3 100644 --- a/skl2onnx/operator_converters/feature_hasher.py +++ b/skl2onnx/operator_converters/feature_hasher.py @@ -8,115 +8,125 @@ from ..common._container import ModelComponentContainer -def convert_sklearn_feature_hasher(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_feature_hasher( + scope: Scope, operator: Operator, container: ModelComponentContainer +): X = operator.inputs[0] out = operator.outputs op = operator.raw_operator if op.input_type != "string": raise RuntimeError( f"The converter for FeatureHasher only supports " - f"input_type='string' not {op.input_type!r}.") - - hashed_ = scope.get_unique_variable_name('hashed_') - container.add_node('MurmurHash3', X.full_name, hashed_, - positive=0, seed=0, op_domain="com.microsoft", - op_version=1) - hashed = scope.get_unique_variable_name('hashed') - container.add_node('Cast', hashed_, hashed, to=TensorProto.INT64) + f"input_type='string' not {op.input_type!r}." + ) + + hashed_ = scope.get_unique_variable_name("hashed_") + container.add_node( + "MurmurHash3", + X.full_name, + hashed_, + positive=0, + seed=0, + op_domain="com.microsoft", + op_version=1, + ) + hashed = scope.get_unique_variable_name("hashed") + container.add_node("Cast", hashed_, hashed, to=TensorProto.INT64) if op.dtype in (np.float32, np.float64, np.int64): cst_neg = -1 else: cst_neg = 4294967295 - infinite = scope.get_unique_variable_name('infinite') - container.add_initializer(infinite, TensorProto.INT64, [1], - [-2147483648]) + infinite = scope.get_unique_variable_name("infinite") + container.add_initializer(infinite, TensorProto.INT64, [1], [-2147483648]) - infinite2 = scope.get_unique_variable_name('infinite2') - container.add_initializer(infinite2, TensorProto.INT64, [1], - [cst_neg]) + infinite2 = scope.get_unique_variable_name("infinite2") + container.add_initializer(infinite2, TensorProto.INT64, [1], [cst_neg]) - infinite_n = scope.get_unique_variable_name('infinite_n') - container.add_initializer(infinite_n, TensorProto.INT64, [1], - [2147483647 - (op.n_features - 1)]) + infinite_n = scope.get_unique_variable_name("infinite_n") + container.add_initializer( + infinite_n, TensorProto.INT64, [1], [2147483647 - (op.n_features - 1)] + ) - zero = scope.get_unique_variable_name('zero') + zero = scope.get_unique_variable_name("zero") container.add_initializer(zero, TensorProto.INT64, [1], [0]) - one = scope.get_unique_variable_name('one') + one = scope.get_unique_variable_name("one") container.add_initializer(one, TensorProto.INT64, [1], [1]) - mone = scope.get_unique_variable_name('mone') + mone = scope.get_unique_variable_name("mone") container.add_initializer(mone, TensorProto.INT64, [1], [-1]) - mtwo = scope.get_unique_variable_name('mtwo') + mtwo = scope.get_unique_variable_name("mtwo") container.add_initializer(mtwo, TensorProto.INT64, [1], [-2]) - nf = scope.get_unique_variable_name('nf') + nf = scope.get_unique_variable_name("nf") container.add_initializer(nf, TensorProto.INT64, [1], [op.n_features]) - new_shape = scope.get_unique_variable_name('new_shape') + new_shape = scope.get_unique_variable_name("new_shape") container.add_initializer(new_shape, TensorProto.INT64, [2], [-1, 1]) - new_shape2 = scope.get_unique_variable_name('new_shape2') + new_shape2 = scope.get_unique_variable_name("new_shape2") container.add_initializer(new_shape2, TensorProto.INT64, [2], [1, -1]) # values if op.alternate_sign: - cmp = scope.get_unique_variable_name('cmp') - container.add_node('GreaterOrEqual', [hashed, zero], cmp) - values = scope.get_unique_variable_name('values') - container.add_node('Where', [cmp, one, infinite2], values) + cmp = scope.get_unique_variable_name("cmp") + container.add_node("GreaterOrEqual", [hashed, zero], cmp) + values = scope.get_unique_variable_name("values") + container.add_node("Where", [cmp, one, infinite2], values) else: - mul = scope.get_unique_variable_name('mul') - container.add_node('Mul', [hashed, zero], mul) - values = scope.get_unique_variable_name('values') - container.add_node('Add', [mul, one], values) + mul = scope.get_unique_variable_name("mul") + container.add_node("Mul", [hashed, zero], mul) + values = scope.get_unique_variable_name("values") + container.add_node("Add", [mul, one], values) - values_reshaped = scope.get_unique_variable_name('values_reshaped') - container.add_node('Reshape', [values, new_shape], values_reshaped) + values_reshaped = scope.get_unique_variable_name("values_reshaped") + container.add_node("Reshape", [values, new_shape], values_reshaped) # indices - cmp = scope.get_unique_variable_name('cmp_ind') - container.add_node('Equal', [hashed, infinite], cmp) - values_abs = scope.get_unique_variable_name('values_abs') - container.add_node('Abs', hashed, values_abs) - values_ind = scope.get_unique_variable_name('values_ind') - container.add_node('Where', [cmp, infinite_n, values_abs], values_ind) - indices = scope.get_unique_variable_name('indices') - container.add_node('Mod', [values_ind, nf], indices) - indices_reshaped = scope.get_unique_variable_name('indices_reshaped') - container.add_node('Reshape', [indices, new_shape], indices_reshaped) + cmp = scope.get_unique_variable_name("cmp_ind") + container.add_node("Equal", [hashed, infinite], cmp) + values_abs = scope.get_unique_variable_name("values_abs") + container.add_node("Abs", hashed, values_abs) + values_ind = scope.get_unique_variable_name("values_ind") + container.add_node("Where", [cmp, infinite_n, values_abs], values_ind) + indices = scope.get_unique_variable_name("indices") + container.add_node("Mod", [values_ind, nf], indices) + indices_reshaped = scope.get_unique_variable_name("indices_reshaped") + container.add_node("Reshape", [indices, new_shape], indices_reshaped) # scatter - zerot_ = scope.get_unique_variable_name('zerot_') - container.add_node('ConstantOfShape', [nf], zerot_, - value=make_tensor("value", - TensorProto.INT64, [1], [0])) - zerot = scope.get_unique_variable_name('zerot') - container.add_node('Mul', [indices_reshaped, zerot_], zerot) - - final = scope.get_unique_variable_name('final') - container.add_node('ScatterElements', - [zerot, indices_reshaped, values_reshaped], - final, axis=1) + zerot_ = scope.get_unique_variable_name("zerot_") + container.add_node( + "ConstantOfShape", + [nf], + zerot_, + value=make_tensor("value", TensorProto.INT64, [1], [0]), + ) + zerot = scope.get_unique_variable_name("zerot") + container.add_node("Mul", [indices_reshaped, zerot_], zerot) + + final = scope.get_unique_variable_name("final") + container.add_node( + "ScatterElements", [zerot, indices_reshaped, values_reshaped], final, axis=1 + ) # at this point, every string has been processed as if it was in # in a single columns. # in case there is more than one column, we need to reduce over # the last dimension - input_shape = scope.get_unique_variable_name('input_shape') - container.add_node('Shape', X.full_name, input_shape) - shape_not_last = scope.get_unique_variable_name('shape_not_last') - container.add_node('Slice', [input_shape, zero, mone], shape_not_last) - final_shape = scope.get_unique_variable_name('final_last') - container.add_node('Concat', [shape_not_last, mone, nf], - final_shape, axis=0) - final_reshaped = scope.get_unique_variable_name('final_reshaped') - container.add_node('Reshape', [final, final_shape], final_reshaped) - container.add_node('ReduceSum', [final_reshaped, mtwo], - out[0].full_name, keepdims=0) - - -register_converter('SklearnFeatureHasher', convert_sklearn_feature_hasher) + input_shape = scope.get_unique_variable_name("input_shape") + container.add_node("Shape", X.full_name, input_shape) + shape_not_last = scope.get_unique_variable_name("shape_not_last") + container.add_node("Slice", [input_shape, zero, mone], shape_not_last) + final_shape = scope.get_unique_variable_name("final_last") + container.add_node("Concat", [shape_not_last, mone, nf], final_shape, axis=0) + final_reshaped = scope.get_unique_variable_name("final_reshaped") + container.add_node("Reshape", [final, final_shape], final_reshaped) + container.add_node( + "ReduceSum", [final_reshaped, mtwo], out[0].full_name, keepdims=0 + ) + + +register_converter("SklearnFeatureHasher", convert_sklearn_feature_hasher) diff --git a/skl2onnx/operator_converters/feature_selection.py b/skl2onnx/operator_converters/feature_selection.py index 4511c781f..5ceccd2c7 100644 --- a/skl2onnx/operator_converters/feature_selection.py +++ b/skl2onnx/operator_converters/feature_selection.py @@ -7,8 +7,9 @@ from ..common._container import ModelComponentContainer -def convert_sklearn_feature_selection(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_feature_selection( + scope: Scope, operator: Operator, container: ModelComponentContainer +): op = operator.raw_operator # Get indices of the features selected index = op.get_support(indices=True) @@ -16,35 +17,36 @@ def convert_sklearn_feature_selection(scope: Scope, operator: Operator, raise RuntimeError( "Model '{}' did not select any feature. " "This model cannot be converted into ONNX." - "".format(op.__class__.__name__)) + "".format(op.__class__.__name__) + ) output_name = operator.outputs[0].full_name if index.any(): - column_indices_name = scope.get_unique_variable_name('column_indices') + column_indices_name = scope.get_unique_variable_name("column_indices") - container.add_initializer(column_indices_name, - onnx_proto.TensorProto.INT64, - [len(index)], index) + container.add_initializer( + column_indices_name, onnx_proto.TensorProto.INT64, [len(index)], index + ) container.add_node( - 'ArrayFeatureExtractor', + "ArrayFeatureExtractor", [operator.inputs[0].full_name, column_indices_name], - output_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) + output_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) else: - container.add_node('ConstantOfShape', operator.inputs[0].full_name, - output_name, op_version=9) - - -register_converter('SklearnGenericUnivariateSelect', - convert_sklearn_feature_selection) -register_converter('SklearnRFE', convert_sklearn_feature_selection) -register_converter('SklearnRFECV', convert_sklearn_feature_selection) -register_converter('SklearnSelectFdr', convert_sklearn_feature_selection) -register_converter('SklearnSelectFpr', convert_sklearn_feature_selection) -register_converter('SklearnSelectFromModel', convert_sklearn_feature_selection) -register_converter('SklearnSelectFwe', convert_sklearn_feature_selection) -register_converter('SklearnSelectKBest', convert_sklearn_feature_selection) -register_converter('SklearnSelectPercentile', - convert_sklearn_feature_selection) -register_converter('SklearnVarianceThreshold', - convert_sklearn_feature_selection) + container.add_node( + "ConstantOfShape", operator.inputs[0].full_name, output_name, op_version=9 + ) + + +register_converter("SklearnGenericUnivariateSelect", convert_sklearn_feature_selection) +register_converter("SklearnRFE", convert_sklearn_feature_selection) +register_converter("SklearnRFECV", convert_sklearn_feature_selection) +register_converter("SklearnSelectFdr", convert_sklearn_feature_selection) +register_converter("SklearnSelectFpr", convert_sklearn_feature_selection) +register_converter("SklearnSelectFromModel", convert_sklearn_feature_selection) +register_converter("SklearnSelectFwe", convert_sklearn_feature_selection) +register_converter("SklearnSelectKBest", convert_sklearn_feature_selection) +register_converter("SklearnSelectPercentile", convert_sklearn_feature_selection) +register_converter("SklearnVarianceThreshold", convert_sklearn_feature_selection) diff --git a/skl2onnx/operator_converters/flatten_op.py b/skl2onnx/operator_converters/flatten_op.py index 861ab7d39..d0ea4494a 100644 --- a/skl2onnx/operator_converters/flatten_op.py +++ b/skl2onnx/operator_converters/flatten_op.py @@ -5,12 +5,17 @@ from ..common._container import ModelComponentContainer -def convert_sklearn_flatten(scope: Scope, operator: Operator, - container: ModelComponentContainer): - name = scope.get_unique_operator_name('Flatten') - container.add_node('Flatten', operator.inputs[0].full_name, - operator.outputs[0].full_name, name=name, - axis=1) +def convert_sklearn_flatten( + scope: Scope, operator: Operator, container: ModelComponentContainer +): + name = scope.get_unique_operator_name("Flatten") + container.add_node( + "Flatten", + operator.inputs[0].full_name, + operator.outputs[0].full_name, + name=name, + axis=1, + ) -register_converter('SklearnFlatten', convert_sklearn_flatten) +register_converter("SklearnFlatten", convert_sklearn_flatten) diff --git a/skl2onnx/operator_converters/function_transformer.py b/skl2onnx/operator_converters/function_transformer.py index 9fd4ca28b..7cd5b2962 100644 --- a/skl2onnx/operator_converters/function_transformer.py +++ b/skl2onnx/operator_converters/function_transformer.py @@ -7,21 +7,31 @@ from ..common._container import ModelComponentContainer -def convert_sklearn_function_transformer(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_function_transformer( + scope: Scope, operator: Operator, container: ModelComponentContainer +): op = operator.raw_operator if op.func is not None: - raise RuntimeError("FunctionTransformer is not supported unless the " - "transform function is None (= identity). " - "You may raise an issue at " - "https://github.com/onnx/sklearn-onnx/issues.") + raise RuntimeError( + "FunctionTransformer is not supported unless the " + "transform function is None (= identity). " + "You may raise an issue at " + "https://github.com/onnx/sklearn-onnx/issues." + ) if len(operator.inputs) == 1: - apply_identity(scope, operator.inputs[0].full_name, - operator.outputs[0].full_name, container) + apply_identity( + scope, + operator.inputs[0].full_name, + operator.outputs[0].full_name, + container, + ) else: - apply_concat(scope, [i.full_name for i in operator.inputs], - operator.outputs[0].full_name, container) + apply_concat( + scope, + [i.full_name for i in operator.inputs], + operator.outputs[0].full_name, + container, + ) -register_converter('SklearnFunctionTransformer', - convert_sklearn_function_transformer) +register_converter("SklearnFunctionTransformer", convert_sklearn_function_transformer) diff --git a/skl2onnx/operator_converters/gamma_regressor.py b/skl2onnx/operator_converters/gamma_regressor.py index 0e5fd90b9..030eaa47b 100644 --- a/skl2onnx/operator_converters/gamma_regressor.py +++ b/skl2onnx/operator_converters/gamma_regressor.py @@ -1,18 +1,24 @@ # SPDX-License-Identifier: Apache-2.0 import numpy as np -from ..common.data_types import (Int64TensorType, guess_numpy_type) +from ..common.data_types import Int64TensorType, guess_numpy_type from ..common._registration import register_converter from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer from ..algebra.onnx_ops import ( - OnnxAdd, OnnxCast, OnnxExp, OnnxIdentity, OnnxMatMul, - OnnxReshape, OnnxSigmoid) + OnnxAdd, + OnnxCast, + OnnxExp, + OnnxIdentity, + OnnxMatMul, + OnnxReshape, + OnnxSigmoid, +) -def convert_sklearn_gamma_regressor(scope: Scope, operator: Operator, - container: ModelComponentContainer): - +def convert_sklearn_gamma_regressor( + scope: Scope, operator: Operator, container: ModelComponentContainer +): X = operator.inputs[0] out = operator.outputs op = operator.raw_operator @@ -26,16 +32,21 @@ def convert_sklearn_gamma_regressor(scope: Scope, operator: Operator, else: input_var = X - intercept = (op.intercept_.astype(dtype) if len(op.intercept_.shape) > 0 - else np.array([op.intercept_], dtype=dtype)) + intercept = ( + op.intercept_.astype(dtype) + if len(op.intercept_.shape) > 0 + else np.array([op.intercept_], dtype=dtype) + ) eta = OnnxAdd( OnnxMatMul(input_var, op.coef_.astype(dtype), op_version=opv), - intercept, op_version=opv) + intercept, + op_version=opv, + ) if hasattr(op, "_link_instance"): # scikit-learn < 1.1 - from sklearn.linear_model._glm.link import ( - IdentityLink, LogLink, LogitLink) + from sklearn.linear_model._glm.link import IdentityLink, LogLink, LogitLink + if isinstance(op._link_instance, IdentityLink): Y = OnnxIdentity(eta, op_version=opv) elif isinstance(op._link_instance, LogLink): @@ -45,8 +56,8 @@ def convert_sklearn_gamma_regressor(scope: Scope, operator: Operator, else: raise RuntimeError( "Unexpected type %r for _link_instance " - "in operator type %r." % ( - type(op._link_instance), type(op))) + "in operator type %r." % (type(op._link_instance), type(op)) + ) else: # scikit-learn >= 1.1 from sklearn._loss.loss import ( @@ -57,29 +68,32 @@ def convert_sklearn_gamma_regressor(scope: Scope, operator: Operator, HalfSquaredError, HalfTweedieLoss, HalfTweedieLossIdentity, - PinballLoss + PinballLoss, ) loss = op._get_loss() if isinstance( loss, - (AbsoluteError, HalfSquaredError, - HalfTweedieLossIdentity, PinballLoss)): + (AbsoluteError, HalfSquaredError, HalfTweedieLossIdentity, PinballLoss), + ): Y = OnnxIdentity(eta, op_version=opv) - elif isinstance( - loss, (HalfPoissonLoss, HalfGammaLoss, HalfTweedieLoss)): + elif isinstance(loss, (HalfPoissonLoss, HalfGammaLoss, HalfTweedieLoss)): Y = OnnxExp(eta, op_version=opv) elif isinstance(loss, HalfBinomialLoss): Y = OnnxSigmoid(eta, op_version=opv) else: raise RuntimeError( - f"Unexpected type of link for {loss!r} loss " - "in operator type {op!r}.") + f"Unexpected type of link for {loss!r} loss " "in operator type {op!r}." + ) last_dim = 1 if len(op.coef_.shape) == 1 else op.coef_.shape[-1] - final = OnnxReshape(Y, np.array([-1, last_dim], dtype=np.int64), - op_version=opv, output_names=out[:1]) + final = OnnxReshape( + Y, + np.array([-1, last_dim], dtype=np.int64), + op_version=opv, + output_names=out[:1], + ) final.add_to(scope, container) -register_converter('SklearnGammaRegressor', convert_sklearn_gamma_regressor) +register_converter("SklearnGammaRegressor", convert_sklearn_gamma_regressor) diff --git a/skl2onnx/operator_converters/gaussian_mixture.py b/skl2onnx/operator_converters/gaussian_mixture.py index 9ace21021..81d63b76c 100644 --- a/skl2onnx/operator_converters/gaussian_mixture.py +++ b/skl2onnx/operator_converters/gaussian_mixture.py @@ -4,6 +4,7 @@ import numpy as np from scipy.special import digamma from sklearn.mixture import BayesianGaussianMixture, GaussianMixture + try: from sklearn.mixture._gaussian_mixture import _compute_log_det_cholesky except ImportError: @@ -14,16 +15,27 @@ from ..common._container import ModelComponentContainer from ..common.data_types import guess_numpy_type from ..algebra.onnx_ops import ( - OnnxAdd, OnnxSub, OnnxMul, OnnxGemm, OnnxReduceSumSquareApi18, - OnnxReduceLogSumExpApi18, OnnxExp, OnnxArgMax, OnnxConcat, - OnnxReduceSumApi11, OnnxLog, OnnxReduceMaxApi18, OnnxEqual, OnnxCast + OnnxAdd, + OnnxSub, + OnnxMul, + OnnxGemm, + OnnxReduceSumSquareApi18, + OnnxReduceLogSumExpApi18, + OnnxExp, + OnnxArgMax, + OnnxConcat, + OnnxReduceSumApi11, + OnnxLog, + OnnxReduceMaxApi18, + OnnxEqual, + OnnxCast, ) from ..proto import onnx_proto -def _estimate_log_gaussian_prob(X, means, precisions_chol, - covariance_type, dtype, op_version, - combined_reducesum): +def _estimate_log_gaussian_prob( + X, means, precisions_chol, covariance_type, dtype, op_version, combined_reducesum +): """ Converts the same function into ONNX. Returns log probabilities. @@ -34,10 +46,10 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, # self._estimate_log_prob(X) log_det = _compute_log_det_cholesky( - precisions_chol, covariance_type, n_features).astype( - dtype) + precisions_chol, covariance_type, n_features + ).astype(dtype) - if covariance_type == 'full': + if covariance_type == "full": # shape(op.means_) = (n_components, n_features) # shape(op.precisions_cholesky_) = # (n_components, n_features, n_features) @@ -50,19 +62,25 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, ys = [] for c in range(n_components): prec_chol = precisions_chol[c, :, :] - cst = - np.dot(means[c, :], prec_chol) - y = OnnxGemm(X, prec_chol.astype(dtype), - cst.astype(dtype), alpha=1., - beta=1., op_version=opv) + cst = -np.dot(means[c, :], prec_chol) + y = OnnxGemm( + X, + prec_chol.astype(dtype), + cst.astype(dtype), + alpha=1.0, + beta=1.0, + op_version=opv, + ) if combined_reducesum: - y2s = OnnxReduceSumApi11(OnnxMul(y, y, op_version=opv), - axes=[1], op_version=opv) + y2s = OnnxReduceSumApi11( + OnnxMul(y, y, op_version=opv), axes=[1], op_version=opv + ) else: y2s = OnnxReduceSumSquareApi18(y, axes=[1], op_version=opv) ys.append(y2s) log_prob = OnnxConcat(*ys, axis=1, op_version=opv) - elif covariance_type == 'tied': + elif covariance_type == "tied": # shape(op.means_) = (n_components, n_features) # shape(op.precisions_cholesky_) = # (n_features, n_features) @@ -74,19 +92,25 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, ys = [] for f in range(n_components): - cst = - np.dot(means[f, :], precisions_chol) - y = OnnxGemm(X, precisions_chol.astype(dtype), - cst.astype(dtype), - alpha=1., beta=1., op_version=opv) + cst = -np.dot(means[f, :], precisions_chol) + y = OnnxGemm( + X, + precisions_chol.astype(dtype), + cst.astype(dtype), + alpha=1.0, + beta=1.0, + op_version=opv, + ) if combined_reducesum: - y2s = OnnxReduceSumApi11(OnnxMul(y, y, op_version=opv), - axes=[1], op_version=opv) + y2s = OnnxReduceSumApi11( + OnnxMul(y, y, op_version=opv), axes=[1], op_version=opv + ) else: y2s = OnnxReduceSumSquareApi18(y, axes=[1], op_version=opv) ys.append(y2s) log_prob = OnnxConcat(*ys, axis=1, op_version=opv) - elif covariance_type == 'diag': + elif covariance_type == "diag": # shape(op.means_) = (n_components, n_features) # shape(op.precisions_cholesky_) = # (n_components, n_features) @@ -96,20 +120,30 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, # 2. * np.dot(X, (means * precisions).T) + # np.dot(X ** 2, precisions.T)) - precisions = (precisions_chol ** 2).astype(dtype) - mp = np.sum((means ** 2 * precisions), 1).astype(dtype) - zeros = np.zeros((n_components, ), dtype=dtype) + precisions = (precisions_chol**2).astype(dtype) + mp = np.sum((means**2 * precisions), 1).astype(dtype) + zeros = np.zeros((n_components,), dtype=dtype) xmp = OnnxGemm( - X, (means * precisions).T.astype(dtype), - zeros, alpha=-2., beta=0., op_version=opv) - term = OnnxGemm(OnnxMul(X, X, op_version=opv), - precisions.T.astype(dtype), - zeros, alpha=1., beta=0., op_version=opv) + X, + (means * precisions).T.astype(dtype), + zeros, + alpha=-2.0, + beta=0.0, + op_version=opv, + ) + term = OnnxGemm( + OnnxMul(X, X, op_version=opv), + precisions.T.astype(dtype), + zeros, + alpha=1.0, + beta=0.0, + op_version=opv, + ) log_prob = OnnxAdd( - OnnxAdd(mp.astype(dtype), xmp, op_version=opv), - term, op_version=opv) + OnnxAdd(mp.astype(dtype), xmp, op_version=opv), term, op_version=opv + ) - elif covariance_type == 'spherical': + elif covariance_type == "spherical": # shape(op.means_) = (n_components, n_features) # shape(op.precisions_cholesky_) = (n_components, ) @@ -118,39 +152,51 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol, # 2 * np.dot(X, means.T * precisions) + # np.outer(row_norms(X, squared=True), precisions)) - zeros = np.zeros((n_components, ), dtype=dtype) - precisions = (precisions_chol ** 2).astype(dtype) + zeros = np.zeros((n_components,), dtype=dtype) + precisions = (precisions_chol**2).astype(dtype) if combined_reducesum: - normX = OnnxReduceSumApi11(OnnxMul(X, X, op_version=opv), - axes=[1], op_version=opv) + normX = OnnxReduceSumApi11( + OnnxMul(X, X, op_version=opv), axes=[1], op_version=opv + ) else: normX = OnnxReduceSumSquareApi18(X, axes=[1], op_version=opv) outer = OnnxGemm( - normX, precisions[np.newaxis, :].astype(dtype), - zeros.astype(dtype), alpha=1., beta=1., op_version=opv) + normX, + precisions[np.newaxis, :].astype(dtype), + zeros.astype(dtype), + alpha=1.0, + beta=1.0, + op_version=opv, + ) xmp = OnnxGemm( - X, (means.T * precisions).astype(dtype), - zeros, alpha=-2., beta=0., op_version=opv) - mp = (np.sum(means ** 2, 1) * precisions).astype(dtype) - log_prob = OnnxAdd(mp, OnnxAdd(xmp, outer, op_version=opv), - op_version=opv) + X, + (means.T * precisions).astype(dtype), + zeros, + alpha=-2.0, + beta=0.0, + op_version=opv, + ) + mp = (np.sum(means**2, 1) * precisions).astype(dtype) + log_prob = OnnxAdd(mp, OnnxAdd(xmp, outer, op_version=opv), op_version=opv) else: - raise RuntimeError("Unknown op.covariance_type='{}'. Upgrade " - "to a more recent version of skearn-onnx " - "or raise an issue.".format(covariance_type)) + raise RuntimeError( + "Unknown op.covariance_type='{}'. Upgrade " + "to a more recent version of skearn-onnx " + "or raise an issue.".format(covariance_type) + ) # -.5 * (cst + log_prob) + log_det cst = np.array([n_features * np.log(2 * np.pi)]).astype(dtype) add = OnnxAdd(cst, log_prob, op_version=opv) - mul = OnnxMul(add, np.array([-0.5], dtype=dtype), - op_version=opv) + mul = OnnxMul(add, np.array([-0.5], dtype=dtype), op_version=opv) if isinstance(log_det, (np.float32, np.float64, float)): log_det = np.array([log_det], dtype=dtype) return OnnxAdd(mul, log_det.astype(dtype), op_version=opv) -def convert_sklearn_gaussian_mixture(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_gaussian_mixture( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converter for *GaussianMixture*, *BayesianGaussianMixture*. @@ -162,15 +208,17 @@ def convert_sklearn_gaussian_mixture(scope: Scope, operator: Operator, elif operator.target_opset < 11: raise RuntimeError( "Some needed operators are not available below opset 11" - " to convert model %r" % type(operator.raw_operator)) + " to convert model %r" % type(operator.raw_operator) + ) out = operator.outputs op = operator.raw_operator n_components = op.means_.shape[0] opv = container.target_opset options = container.get_options(op, dict(score_samples=None)) - add_score = options.get('score_samples', False) + add_score = options.get("score_samples", False) combined_reducesum = not container.is_allowed( - {'ReduceLogSumExp', 'ReduceSumSquare'}) + {"ReduceLogSumExp", "ReduceSumSquare"} + ) if add_score and len(out) != 3: raise RuntimeError("3 outputs are expected.") @@ -179,7 +227,9 @@ def convert_sklearn_gaussian_mixture(scope: Scope, operator: Operator, raise RuntimeError( "Dimension mismath between expected number of features {} " "and ONNX graphs expectations {}.".format( - op.means_.shape[1], X.type.shape[1])) + op.means_.shape[1], X.type.shape[1] + ) + ) n_features = op.means_.shape[1] # All comments come from scikit-learn code and tells @@ -188,50 +238,58 @@ def convert_sklearn_gaussian_mixture(scope: Scope, operator: Operator, log_weights = op._estimate_log_weights().astype(dtype) log_gauss = _estimate_log_gaussian_prob( - X, op.means_, op.precisions_cholesky_, op.covariance_type, - dtype, opv, combined_reducesum) + X, + op.means_, + op.precisions_cholesky_, + op.covariance_type, + dtype, + opv, + combined_reducesum, + ) if isinstance(op, BayesianGaussianMixture): # log_gauss = (_estimate_log_gaussian_prob( # X, self.means_, self.precisions_cholesky_, self.covariance_type) - # .5 * n_features * np.log(self.degrees_of_freedom_)) - log_lambda = n_features * np.log(2.) + np.sum(digamma( - .5 * (op.degrees_of_freedom_ - - np.arange(0, n_features)[:, np.newaxis])), 0) - cst_log_lambda = .5 * (log_lambda - n_features / op.mean_precision_) - cst = cst_log_lambda - .5 * n_features * np.log(op.degrees_of_freedom_) + log_lambda = n_features * np.log(2.0) + np.sum( + digamma( + 0.5 * (op.degrees_of_freedom_ - np.arange(0, n_features)[:, np.newaxis]) + ), + 0, + ) + cst_log_lambda = 0.5 * (log_lambda - n_features / op.mean_precision_) + cst = cst_log_lambda - 0.5 * n_features * np.log(op.degrees_of_freedom_) if isinstance(cst, np.ndarray): cst_array = cst.astype(dtype) else: cst_array = np.array([cst], dtype=dtype) log_gauss = OnnxAdd(log_gauss, cst_array, op_version=opv) elif not isinstance(op, GaussianMixture): - raise RuntimeError( - "The converter does not support type {}.".format( - type(op))) + raise RuntimeError("The converter does not support type {}.".format(type(op))) # self._estimate_log_prob(X) + self._estimate_log_weights() weighted_log_prob = OnnxAdd(log_gauss, log_weights, op_version=opv) # labels - if container.is_allowed('ArgMax'): - labels = OnnxArgMax(weighted_log_prob, axis=1, - output_names=out[:1], op_version=opv) + if container.is_allowed("ArgMax"): + labels = OnnxArgMax( + weighted_log_prob, axis=1, output_names=out[:1], op_version=opv + ) else: - mxlabels = OnnxReduceMaxApi18( - weighted_log_prob, axes=[1], op_version=opv) + mxlabels = OnnxReduceMaxApi18(weighted_log_prob, axes=[1], op_version=opv) zeros = OnnxEqual( OnnxSub(weighted_log_prob, mxlabels, op_version=opv), np.array([0], dtype=dtype), - op_version=opv) - toint = OnnxCast(zeros, to=onnx_proto.TensorProto.INT64, - op_version=opv) - mulind = OnnxMul(toint, - np.arange(n_components).astype(np.int64), - op_version=opv) + op_version=opv, + ) + toint = OnnxCast(zeros, to=onnx_proto.TensorProto.INT64, op_version=opv) + mulind = OnnxMul( + toint, np.arange(n_components).astype(np.int64), op_version=opv + ) labels = OnnxReduceMaxApi18( - mulind, axes=[1], output_names=out[:1], op_version=opv) + mulind, axes=[1], output_names=out[:1], op_version=opv + ) # def _estimate_log_prob_resp(): # np.exp(log_resp) @@ -245,21 +303,25 @@ def convert_sklearn_gaussian_mixture(scope: Scope, operator: Operator, outnames = None if combined_reducesum: - max_weight = OnnxReduceMaxApi18( - weighted_log_prob, axes=[1], op_version=opv) + max_weight = OnnxReduceMaxApi18(weighted_log_prob, axes=[1], op_version=opv) log_prob_norm_demax = OnnxLog( OnnxReduceSumApi11( OnnxExp( OnnxSub(weighted_log_prob, max_weight, op_version=opv), - op_version=opv), - axes=[1], op_version=opv), - op_version=opv) - log_prob_norm = OnnxAdd(log_prob_norm_demax, max_weight, - op_version=opv, output_names=outnames) + op_version=opv, + ), + axes=[1], + op_version=opv, + ), + op_version=opv, + ) + log_prob_norm = OnnxAdd( + log_prob_norm_demax, max_weight, op_version=opv, output_names=outnames + ) else: log_prob_norm = OnnxReduceLogSumExpApi18( - weighted_log_prob, axes=[1], op_version=opv, - output_names=outnames) + weighted_log_prob, axes=[1], op_version=opv, output_names=outnames + ) log_resp = OnnxSub(weighted_log_prob, log_prob_norm, op_version=opv) # probabilities @@ -272,8 +334,13 @@ def convert_sklearn_gaussian_mixture(scope: Scope, operator: Operator, log_prob_norm.add_to(scope, container) -register_converter('SklearnGaussianMixture', convert_sklearn_gaussian_mixture, - options={'score_samples': [True, False]}) -register_converter('SklearnBayesianGaussianMixture', - convert_sklearn_gaussian_mixture, - options={'score_samples': [True, False]}) +register_converter( + "SklearnGaussianMixture", + convert_sklearn_gaussian_mixture, + options={"score_samples": [True, False]}, +) +register_converter( + "SklearnBayesianGaussianMixture", + convert_sklearn_gaussian_mixture, + options={"score_samples": [True, False]}, +) diff --git a/skl2onnx/operator_converters/gaussian_process.py b/skl2onnx/operator_converters/gaussian_process.py index d7335db3c..886eac706 100644 --- a/skl2onnx/operator_converters/gaussian_process.py +++ b/skl2onnx/operator_converters/gaussian_process.py @@ -3,6 +3,7 @@ import numpy as np from scipy.linalg import solve_triangular from sklearn.gaussian_process.kernels import ConstantKernel as C, RBF + try: from sklearn.gaussian_process._gpc import LAMBDAS, COEFS except ImportError: @@ -13,13 +14,27 @@ from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer from ..algebra.onnx_ops import ( - OnnxAdd, OnnxSqrt, OnnxMatMul, OnnxSub, OnnxReduceSumApi11, - OnnxMul, OnnxMax, OnnxReshapeApi13, OnnxDiv, OnnxNot, - OnnxReciprocal, OnnxCast, OnnxLess, - OnnxPow, OnnxNeg, OnnxConcat, OnnxArrayFeatureExtractor, + OnnxAdd, + OnnxSqrt, + OnnxMatMul, + OnnxSub, + OnnxReduceSumApi11, + OnnxMul, + OnnxMax, + OnnxReshapeApi13, + OnnxDiv, + OnnxNot, + OnnxReciprocal, + OnnxCast, + OnnxLess, + OnnxPow, + OnnxNeg, + OnnxConcat, + OnnxArrayFeatureExtractor, OnnxTranspose, ) from ..algebra.custom_ops import OnnxSolve + try: from ..algebra.onnx_ops import OnnxConstantOfShape except ImportError: @@ -32,15 +47,12 @@ from ..algebra.onnx_ops import OnnxEinsum except ImportError: OnnxEinsum = None -from ._gp_kernels import ( - convert_kernel_diag, - convert_kernel, - _zero_vector_of_size -) +from ._gp_kernels import convert_kernel_diag, convert_kernel, _zero_vector_of_size -def convert_gaussian_process_regressor(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_gaussian_process_regressor( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ The method *predict* from class *GaussianProcessRegressor* may cache some results if it is called with parameter @@ -62,30 +74,37 @@ def convert_gaussian_process_regressor(scope: Scope, operator: Operator, dtype = np.float32 options = container.get_options( - op, dict(return_cov=False, return_std=False, optim=None)) - if hasattr(op, 'kernel_') and op.kernel_ is not None: + op, dict(return_cov=False, return_std=False, optim=None) + ) + if hasattr(op, "kernel_") and op.kernel_ is not None: kernel = op.kernel_ elif op.kernel is None: - kernel = (C(1.0, constant_value_bounds="fixed") * - RBF(1.0, length_scale_bounds="fixed")) + kernel = C(1.0, constant_value_bounds="fixed") * RBF( + 1.0, length_scale_bounds="fixed" + ) else: kernel = op.kernel if not hasattr(op, "X_train_") or op.X_train_ is None: - out0 = _zero_vector_of_size(X, keepdims=1, output_names=out[:1], - dtype=dtype, op_version=opv) + out0 = _zero_vector_of_size( + X, keepdims=1, output_names=out[:1], dtype=dtype, op_version=opv + ) outputs = [out0] - if options['return_cov']: - outputs.append(convert_kernel( - kernel, X, output_names=out[1:], - dtype=dtype, op_version=opv)) - if options['return_std']: + if options["return_cov"]: + outputs.append( + convert_kernel( + kernel, X, output_names=out[1:], dtype=dtype, op_version=opv + ) + ) + if options["return_std"]: outputs.append( OnnxSqrt( - convert_kernel_diag( - kernel, X, dtype=dtype, op_version=opv), - output_names=out[1:], op_version=opv)) + convert_kernel_diag(kernel, X, dtype=dtype, op_version=opv), + output_names=out[1:], + op_version=opv, + ) + ) else: # Code scikit-learn # K_trans = self.kernel_(X, self.X_train_) @@ -93,17 +112,21 @@ def convert_gaussian_process_regressor(scope: Scope, operator: Operator, # y_mean = self._y_train_mean + y_mean * self._y_train_std k_trans = convert_kernel( - kernel, X, x_train=op.X_train_.astype(dtype), - dtype=dtype, optim=options.get('optim', None), - op_version=opv) - k_trans.set_onnx_name_prefix('kgpd') + kernel, + X, + x_train=op.X_train_.astype(dtype), + dtype=dtype, + optim=options.get("optim", None), + op_version=opv, + ) + k_trans.set_onnx_name_prefix("kgpd") y_mean_b = OnnxMatMul(k_trans, op.alpha_.astype(dtype), op_version=opv) mean_y = op._y_train_mean.astype(dtype) if len(mean_y.shape) == 1: mean_y = mean_y.reshape(mean_y.shape + (1,)) - if not hasattr(op, '_y_train_std') or op._y_train_std == 1: + if not hasattr(op, "_y_train_std") or op._y_train_std == 1: if isinstance(y_mean_b, (np.float32, np.float64)): y_mean_b = np.array([y_mean_b]) if isinstance(mean_y, (np.float32, np.float64)): @@ -122,19 +145,22 @@ def convert_gaussian_process_regressor(scope: Scope, operator: Operator, if isinstance(mean_y, (np.float32, np.float64)): mean_y = np.array([mean_y]) y_mean = OnnxAdd( - OnnxMul(y_mean_b, var_y, op_version=opv), - mean_y, op_version=opv) + OnnxMul(y_mean_b, var_y, op_version=opv), mean_y, op_version=opv + ) - y_mean.set_onnx_name_prefix('gpr') + y_mean.set_onnx_name_prefix("gpr") y_mean_reshaped = OnnxReshapeApi13( - y_mean, np.array([-1, 1], dtype=np.int64), - op_version=opv, output_names=out[:1]) + y_mean, + np.array([-1, 1], dtype=np.int64), + op_version=opv, + output_names=out[:1], + ) outputs = [y_mean_reshaped] - if options['return_cov']: + if options["return_cov"]: raise NotImplementedError() - if options['return_std']: - if hasattr(op, '_K_inv') and op._K_inv is not None: + if options["return_std"]: + if hasattr(op, "_K_inv") and op._K_inv is not None: # scikit-learn < 0.24.2 _K_inv = op._K_inv else: @@ -143,40 +169,45 @@ def convert_gaussian_process_regressor(scope: Scope, operator: Operator, _K_inv = L_inv.dot(L_inv.T) # y_var = self.kernel_.diag(X) - y_var = convert_kernel_diag(kernel, X, dtype=dtype, - optim=options.get('optim', None), - op_version=opv) + y_var = convert_kernel_diag( + kernel, X, dtype=dtype, optim=options.get("optim", None), op_version=opv + ) # y_var -= np.einsum("ij,ij->i", # np.dot(K_trans, self._K_inv), K_trans) k_dot = OnnxMatMul(k_trans, _K_inv.astype(dtype), op_version=opv) ys_var = OnnxSub( - y_var, OnnxReduceSumApi11( + y_var, + OnnxReduceSumApi11( OnnxMul(k_dot, k_trans, op_version=opv), - axes=[1], keepdims=0, op_version=opv), - op_version=opv) + axes=[1], + keepdims=0, + op_version=opv, + ), + op_version=opv, + ) # y_var_negative = y_var < 0 # if np.any(y_var_negative): # y_var[y_var_negative] = 0.0 - ys0_var = OnnxMax(ys_var, np.array([0], dtype=dtype), - op_version=opv) + ys0_var = OnnxMax(ys_var, np.array([0], dtype=dtype), op_version=opv) - if hasattr(op, '_y_train_std') and op._y_train_std != 1: + if hasattr(op, "_y_train_std") and op._y_train_std != 1: # y_var = y_var * self._y_train_std**2 - ys0_var = OnnxMul(ys0_var, var_y ** 2, op_version=opv) + ys0_var = OnnxMul(ys0_var, var_y**2, op_version=opv) # var = np.sqrt(ys0_var) var = OnnxSqrt(ys0_var, output_names=out[1:], op_version=opv) - var.set_onnx_name_prefix('gprv') + var.set_onnx_name_prefix("gprv") outputs.append(var) for o in outputs: o.add_to(scope, container) -def convert_gaussian_process_classifier(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_gaussian_process_classifier( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ The method *predict* from class *GaussianProcessClassifier* may cache some results if it is called with parameter @@ -196,140 +227,165 @@ def convert_gaussian_process_classifier(scope: Scope, operator: Operator, raise RuntimeError("container.target_opset must not be None") if OnnxEinsum is None or OnnxErf is None: raise RuntimeError( - "target opset must be >= 12 for operator 'einsum' and 'erf'.") + "target opset must be >= 12 for operator 'einsum' and 'erf'." + ) if LAMBDAS is None: raise RuntimeError("Only scikit-learn>=0.22 is supported.") outputs = [] options = container.get_options(op, dict(optim=None)) - if hasattr(op, 'kernel_') and op.kernel_ is not None: + if hasattr(op, "kernel_") and op.kernel_ is not None: kernel = op.kernel_ elif op.kernel is None: - kernel = (C(1.0, constant_value_bounds="fixed") * - RBF(1.0, length_scale_bounds="fixed")) + kernel = C(1.0, constant_value_bounds="fixed") * RBF( + 1.0, length_scale_bounds="fixed" + ) else: kernel = op.kernel - if not hasattr(op_est, 'X_train_'): + if not hasattr(op_est, "X_train_"): raise NotImplementedError("Only binary classification is iplemented.") dtype = guess_numpy_type(X.type) if dtype != np.float64: dtype = np.float32 K_starT = convert_kernel( - kernel, X, x_train=op_est.X_train_.astype(dtype), dtype=dtype, - optim=options.get('optim', None), op_version=opv) + kernel, + X, + x_train=op_est.X_train_.astype(dtype), + dtype=dtype, + optim=options.get("optim", None), + op_version=opv, + ) K_star = OnnxTranspose(K_starT, op_version=opv) - K_star.set_onnx_name_prefix('kstar') + K_star.set_onnx_name_prefix("kstar") # common # f_star = K_star.T.dot(self.y_train_ - self.pi_) - f_star_right = (op_est.y_train_ - op_est.pi_).astype( - dtype).reshape((-1, 1)) + f_star_right = (op_est.y_train_ - op_est.pi_).astype(dtype).reshape((-1, 1)) f_star = OnnxMatMul(K_starT, f_star_right, op_version=opv) - f_star.set_onnx_name_prefix('f_star') + f_star.set_onnx_name_prefix("f_star") best = OnnxCast( OnnxNot( - OnnxLess(f_star, np.array([0], dtype=dtype), op_version=opv), - op_version=opv), - to=onnx_proto.TensorProto.INT64, op_version=opv) + OnnxLess(f_star, np.array([0], dtype=dtype), op_version=opv), op_version=opv + ), + to=onnx_proto.TensorProto.INT64, + op_version=opv, + ) classes = OnnxArrayFeatureExtractor(op.classes_.astype(np.int64), best) labels = OnnxTranspose(classes, op_version=opv, output_names=out[:1]) - labels.set_onnx_name_prefix('labels') + labels.set_onnx_name_prefix("labels") outputs.append(labels) # predict_proba # a x = b, x = a^-1 b # v = solve(self.L_, self.W_sr_[:, np.newaxis] * K_star) # Line 5 - v = OnnxSolve(op_est.L_.astype(dtype), - OnnxMul(op_est.W_sr_[:, np.newaxis].astype(dtype), - K_star, op_version=opv), - op_version=opv) - v.set_onnx_name_prefix('solve') + v = OnnxSolve( + op_est.L_.astype(dtype), + OnnxMul(op_est.W_sr_[:, np.newaxis].astype(dtype), K_star, op_version=opv), + op_version=opv, + ) + v.set_onnx_name_prefix("solve") # var_f_star = self.kernel_.diag(X) - np.einsum("ij,ij->j", v, v) var_f_star_kernel = convert_kernel_diag( - kernel, X, dtype=dtype, - optim=options.get('optim', None), op_version=opv) - var_f_star_kernel.set_onnx_name_prefix('diag') - var_f_star = OnnxSub(var_f_star_kernel, - OnnxEinsum(v, v, equation="ij,ij->j", - op_version=opv), - op_version=opv) - var_f_star.set_onnx_name_prefix('var_f_star') + kernel, X, dtype=dtype, optim=options.get("optim", None), op_version=opv + ) + var_f_star_kernel.set_onnx_name_prefix("diag") + var_f_star = OnnxSub( + var_f_star_kernel, + OnnxEinsum(v, v, equation="ij,ij->j", op_version=opv), + op_version=opv, + ) + var_f_star.set_onnx_name_prefix("var_f_star") # alpha = 1 / (2 * var_f_star) - alpha = OnnxReciprocal(OnnxMul(var_f_star, np.array([2], dtype=dtype), - op_version=opv), - op_version=opv) - alpha.set_onnx_name_prefix('alpha') + alpha = OnnxReciprocal( + OnnxMul(var_f_star, np.array([2], dtype=dtype), op_version=opv), op_version=opv + ) + alpha.set_onnx_name_prefix("alpha") # gamma = LAMBDAS * f_star - gamma = OnnxMul(LAMBDAS.astype(dtype), - OnnxReshapeApi13( - f_star, np.array([1, -1], dtype=np.int64), - op_version=opv), - op_version=opv) - gamma.set_onnx_name_prefix('gamma') + gamma = OnnxMul( + LAMBDAS.astype(dtype), + OnnxReshapeApi13(f_star, np.array([1, -1], dtype=np.int64), op_version=opv), + op_version=opv, + ) + gamma.set_onnx_name_prefix("gamma") # integrals = np.sqrt(np.pi / alpha) * # erf(gamma * np.sqrt(alpha / (alpha + LAMBDAS**2))) / # (2 * np.sqrt(var_f_star * 2 * np.pi)) - integrals_1 = OnnxSqrt(OnnxDiv(np.array([np.pi], dtype=dtype), - alpha, op_version=opv), - op_version=opv) - integrals_1.set_onnx_name_prefix('int1') - - integrals_2_1 = OnnxAdd(alpha, OnnxPow(LAMBDAS.astype(dtype), - np.array([2], dtype=dtype), - op_version=opv), - op_version=opv) - integrals_2_1.set_onnx_name_prefix('int21') - - integrals_2_2 = OnnxSqrt(OnnxDiv(alpha, integrals_2_1, op_version=opv), - op_version=opv) - integrals_2_2.set_onnx_name_prefix('int22') + integrals_1 = OnnxSqrt( + OnnxDiv(np.array([np.pi], dtype=dtype), alpha, op_version=opv), op_version=opv + ) + integrals_1.set_onnx_name_prefix("int1") + + integrals_2_1 = OnnxAdd( + alpha, + OnnxPow(LAMBDAS.astype(dtype), np.array([2], dtype=dtype), op_version=opv), + op_version=opv, + ) + integrals_2_1.set_onnx_name_prefix("int21") + + integrals_2_2 = OnnxSqrt( + OnnxDiv(alpha, integrals_2_1, op_version=opv), op_version=opv + ) + integrals_2_2.set_onnx_name_prefix("int22") integrals_div = OnnxMul( np.array([2], dtype=dtype), OnnxSqrt( OnnxMul( - OnnxMul(var_f_star, np.array([2], dtype=dtype), - op_version=opv), - np.array([np.pi], dtype=dtype), op_version=opv), - op_version=opv), - op_version=opv) - integrals_div.set_onnx_name_prefix('intdiv') + OnnxMul(var_f_star, np.array([2], dtype=dtype), op_version=opv), + np.array([np.pi], dtype=dtype), + op_version=opv, + ), + op_version=opv, + ), + op_version=opv, + ) + integrals_div.set_onnx_name_prefix("intdiv") integrals = OnnxMul( integrals_1, - OnnxDiv(OnnxErf(OnnxMul(gamma, integrals_2_2, op_version=opv), - op_version=opv), - integrals_div, op_version=opv), - op_version=opv) - integrals.set_onnx_name_prefix('integrals') + OnnxDiv( + OnnxErf(OnnxMul(gamma, integrals_2_2, op_version=opv), op_version=opv), + integrals_div, + op_version=opv, + ), + op_version=opv, + ) + integrals.set_onnx_name_prefix("integrals") # pi_star = (COEFS * integrals).sum(axis=0) + .5 * COEFS.sum() - coef_sum = (.5 * COEFS.sum()).astype(dtype) + coef_sum = (0.5 * COEFS.sum()).astype(dtype) if not isinstance(coef_sum, np.ndarray): coef_sum = np.array([coef_sum]) pi_star = OnnxAdd( OnnxReduceSumApi11( OnnxMul(COEFS.astype(dtype), integrals, op_version=opv), - op_version=opv, axes=[0]), - coef_sum, op_version=opv) - pi_star.set_onnx_name_prefix('pi_star') + op_version=opv, + axes=[0], + ), + coef_sum, + op_version=opv, + ) + pi_star.set_onnx_name_prefix("pi_star") pi_star = OnnxReshapeApi13( - pi_star, np.array([-1, 1], dtype=np.int64), - op_version=opv) - pi_star.set_onnx_name_prefix('pi_star2') + pi_star, np.array([-1, 1], dtype=np.int64), op_version=opv + ) + pi_star.set_onnx_name_prefix("pi_star2") final = OnnxConcat( - OnnxAdd(OnnxNeg(pi_star, op_version=opv), - np.array([1], dtype=dtype), - op_version=opv), - pi_star, op_version=opv, axis=1, - output_names=out[1:2]) + OnnxAdd( + OnnxNeg(pi_star, op_version=opv), np.array([1], dtype=dtype), op_version=opv + ), + pi_star, + op_version=opv, + axis=1, + output_names=out[1:2], + ) outputs.append(final) for o in outputs: @@ -337,16 +393,24 @@ def convert_gaussian_process_classifier(scope: Scope, operator: Operator, if OnnxConstantOfShape is not None: - register_converter('SklearnGaussianProcessRegressor', - convert_gaussian_process_regressor, - options={'return_cov': [False, True], - 'return_std': [False, True], - 'optim': [None, 'cdist']}) + register_converter( + "SklearnGaussianProcessRegressor", + convert_gaussian_process_regressor, + options={ + "return_cov": [False, True], + "return_std": [False, True], + "optim": [None, "cdist"], + }, + ) if OnnxEinsum is not None and OnnxErf is not None: - register_converter('SklearnGaussianProcessClassifier', - convert_gaussian_process_classifier, - options={'optim': [None, 'cdist'], - 'nocl': [False, True], - 'output_class_labels': [False, True], - 'zipmap': [False, True]}) + register_converter( + "SklearnGaussianProcessClassifier", + convert_gaussian_process_classifier, + options={ + "optim": [None, "cdist"], + "nocl": [False, True], + "output_class_labels": [False, True], + "zipmap": [False, True], + }, + ) diff --git a/skl2onnx/operator_converters/gradient_boosting.py b/skl2onnx/operator_converters/gradient_boosting.py index 1d6165849..49354a411 100644 --- a/skl2onnx/operator_converters/gradient_boosting.py +++ b/skl2onnx/operator_converters/gradient_boosting.py @@ -4,172 +4,217 @@ import numbers import numpy as np from ..common._apply_operation import apply_cast -from ..common.data_types import ( - BooleanTensorType, Int64TensorType, guess_numpy_type) +from ..common.data_types import BooleanTensorType, Int64TensorType, guess_numpy_type from ..common._registration import register_converter from ..common.tree_ensemble import ( - add_tree_to_attribute_pairs, get_default_tree_classifier_attribute_pairs, - get_default_tree_regressor_attribute_pairs) + add_tree_to_attribute_pairs, + get_default_tree_classifier_attribute_pairs, + get_default_tree_regressor_attribute_pairs, +) from ..proto import onnx_proto def convert_sklearn_gradient_boosting_classifier( - scope, operator, container, op_type='TreeEnsembleClassifier', - op_domain='ai.onnx.ml', op_version=1): + scope, + operator, + container, + op_type="TreeEnsembleClassifier", + op_domain="ai.onnx.ml", + op_version=1, +): dtype = guess_numpy_type(operator.inputs[0].type) if dtype != np.float64: dtype = np.float32 op = operator.raw_operator - if op.loss not in ('deviance', 'log_loss'): + if op.loss not in ("deviance", "log_loss"): raise NotImplementedError( "Loss '{0}' is not supported yet. You " "may raise an issue at " - "https://github.com/onnx/sklearn-onnx/issues.".format(op.loss)) + "https://github.com/onnx/sklearn-onnx/issues.".format(op.loss) + ) attrs = get_default_tree_classifier_attribute_pairs() - attrs['name'] = scope.get_unique_operator_name(op_type) + attrs["name"] = scope.get_unique_operator_name(op_type) - transform = 'LOGISTIC' if op.n_classes_ == 2 else 'SOFTMAX' - if op.init == 'zero': + transform = "LOGISTIC" if op.n_classes_ == 2 else "SOFTMAX" + if op.init == "zero": loss = op._loss if hasattr(op, "_loss") else op.loss_ base_values = np.zeros(loss.K) elif op.init is None: - if hasattr(op.estimators_[0, 0], 'n_features_in_'): + if hasattr(op.estimators_[0, 0], "n_features_in_"): # sklearn >= 1.2 n_features = op.estimators_[0, 0].n_features_in_ else: # sklearn < 1.2 n_features = op.estimators_[0, 0].n_features_ x0 = np.zeros((1, n_features)) - if hasattr(op, '_raw_predict_init'): + if hasattr(op, "_raw_predict_init"): # sklearn >= 0.21 base_values = op._raw_predict_init(x0).ravel() - elif hasattr(op, '_init_decision_function'): + elif hasattr(op, "_init_decision_function"): # sklearn >= 0.20 and sklearn < 0.21 base_values = op._init_decision_function(x0).ravel() else: raise RuntimeError("scikit-learn < 0.19 is not supported.") else: raise NotImplementedError( - 'Setting init to an estimator is not supported, you may raise an ' - 'issue at https://github.com/onnx/sklearn-onnx/issues.') + "Setting init to an estimator is not supported, you may raise an " + "issue at https://github.com/onnx/sklearn-onnx/issues." + ) - attrs['base_values'] = [float(v) for v in base_values] + attrs["base_values"] = [float(v) for v in base_values] options = container.get_options(op, dict(raw_scores=False)) - if not options['raw_scores']: - attrs['post_transform'] = transform + if not options["raw_scores"]: + attrs["post_transform"] = transform classes = op.classes_ if all(isinstance(i, (numbers.Real, bool, np.bool_)) for i in classes): class_labels = [int(i) for i in classes] - attrs['classlabels_int64s'] = class_labels + attrs["classlabels_int64s"] = class_labels elif all(isinstance(i, str) for i in classes): class_labels = [str(i) for i in classes] - attrs['classlabels_strings'] = class_labels + attrs["classlabels_strings"] = class_labels else: - raise ValueError('Labels must be all integer or all strings.') + raise ValueError("Labels must be all integer or all strings.") tree_weight = op.learning_rate - n_est = (op.n_estimators_ if hasattr(op, 'n_estimators_') else - op.n_estimators) + n_est = op.n_estimators_ if hasattr(op, "n_estimators_") else op.n_estimators if op.n_classes_ == 2: for tree_id in range(n_est): tree = op.estimators_[tree_id][0].tree_ - add_tree_to_attribute_pairs(attrs, True, tree, tree_id, - tree_weight, 0, False, True, - dtype=dtype) + add_tree_to_attribute_pairs( + attrs, True, tree, tree_id, tree_weight, 0, False, True, dtype=dtype + ) else: for i in range(n_est): for c in range(op.n_classes_): tree_id = i * op.n_classes_ + c tree = op.estimators_[i][c].tree_ - add_tree_to_attribute_pairs(attrs, True, tree, tree_id, - tree_weight, c, False, True, - dtype=dtype) + add_tree_to_attribute_pairs( + attrs, True, tree, tree_id, tree_weight, c, False, True, dtype=dtype + ) if dtype is not None: for k in attrs: - if k in ('nodes_values', 'class_weights', - 'target_weights', 'nodes_hitrates', - 'base_values'): + if k in ( + "nodes_values", + "class_weights", + "target_weights", + "nodes_hitrates", + "base_values", + ): attrs[k] = np.array(attrs[k], dtype=dtype) input_name = operator.input_full_names if isinstance(operator.inputs[0].type, BooleanTensorType): - cast_input_name = scope.get_unique_variable_name('cast_input') - - apply_cast(scope, input_name, cast_input_name, - container, to=onnx_proto.TensorProto.FLOAT) + cast_input_name = scope.get_unique_variable_name("cast_input") + + apply_cast( + scope, + input_name, + cast_input_name, + container, + to=onnx_proto.TensorProto.FLOAT, + ) input_name = cast_input_name container.add_node( - op_type, input_name, + op_type, + input_name, [operator.outputs[0].full_name, operator.outputs[1].full_name], - op_domain=op_domain, op_version=op_version, **attrs) + op_domain=op_domain, + op_version=op_version, + **attrs + ) def convert_sklearn_gradient_boosting_regressor( - scope, operator, container, op_type='TreeEnsembleRegressor', - op_domain='ai.onnx.ml', op_version=1): + scope, + operator, + container, + op_type="TreeEnsembleRegressor", + op_domain="ai.onnx.ml", + op_version=1, +): op = operator.raw_operator attrs = get_default_tree_regressor_attribute_pairs() - attrs['name'] = scope.get_unique_operator_name(op_type) - attrs['n_targets'] = 1 + attrs["name"] = scope.get_unique_operator_name(op_type) + attrs["n_targets"] = 1 - if op.init == 'zero': + if op.init == "zero": loss = op._loss if hasattr(op, "_loss") else op.loss_ cst = np.zeros(loss.K) elif op.init is None: # constant_ was introduced in scikit-learn 0.21. - if hasattr(op.init_, 'constant_'): + if hasattr(op.init_, "constant_"): cst = [float(x) for x in op.init_.constant_] - elif op.loss == 'ls': + elif op.loss == "ls": cst = [op.init_.mean] else: cst = [op.init_.quantile] else: raise NotImplementedError( - 'Setting init to an estimator is not supported, you may raise an ' - 'issue at https://github.com/onnx/sklearn-onnx/issues.') + "Setting init to an estimator is not supported, you may raise an " + "issue at https://github.com/onnx/sklearn-onnx/issues." + ) - attrs['base_values'] = [float(x) for x in cst] + attrs["base_values"] = [float(x) for x in cst] tree_weight = op.learning_rate - n_est = (op.n_estimators_ if hasattr(op, 'n_estimators_') else - op.n_estimators) + n_est = op.n_estimators_ if hasattr(op, "n_estimators_") else op.n_estimators dtype = guess_numpy_type(operator.inputs[0].type) if dtype != np.float64: dtype = np.float32 for i in range(n_est): tree = op.estimators_[i][0].tree_ tree_id = i - add_tree_to_attribute_pairs(attrs, False, tree, tree_id, tree_weight, - 0, False, True, dtype=dtype) + add_tree_to_attribute_pairs( + attrs, False, tree, tree_id, tree_weight, 0, False, True, dtype=dtype + ) if dtype is not None: for k in attrs: - if k in ('nodes_values', 'class_weights', - 'target_weights', 'nodes_hitrates', - 'base_values'): + if k in ( + "nodes_values", + "class_weights", + "target_weights", + "nodes_hitrates", + "base_values", + ): attrs[k] = np.array(attrs[k], dtype=dtype) input_name = operator.input_full_names if type(operator.inputs[0].type) in (BooleanTensorType, Int64TensorType): - cast_input_name = scope.get_unique_variable_name('cast_input') - - apply_cast(scope, operator.input_full_names, cast_input_name, - container, to=onnx_proto.TensorProto.FLOAT) + cast_input_name = scope.get_unique_variable_name("cast_input") + + apply_cast( + scope, + operator.input_full_names, + cast_input_name, + container, + to=onnx_proto.TensorProto.FLOAT, + ) input_name = cast_input_name container.add_node( - op_type, input_name, operator.output_full_names, - op_domain=op_domain, op_version=op_version, **attrs) - - -register_converter('SklearnGradientBoostingClassifier', - convert_sklearn_gradient_boosting_classifier, - options={'zipmap': [True, False, 'columns'], - 'raw_scores': [True, False], - 'output_class_labels': [False, True], - 'nocl': [True, False]}) -register_converter('SklearnGradientBoostingRegressor', - convert_sklearn_gradient_boosting_regressor) + op_type, + input_name, + operator.output_full_names, + op_domain=op_domain, + op_version=op_version, + **attrs + ) + + +register_converter( + "SklearnGradientBoostingClassifier", + convert_sklearn_gradient_boosting_classifier, + options={ + "zipmap": [True, False, "columns"], + "raw_scores": [True, False], + "output_class_labels": [False, True], + "nocl": [True, False], + }, +) +register_converter( + "SklearnGradientBoostingRegressor", convert_sklearn_gradient_boosting_regressor +) diff --git a/skl2onnx/operator_converters/grid_search_cv.py b/skl2onnx/operator_converters/grid_search_cv.py index 6bebad905..f2c8b6135 100644 --- a/skl2onnx/operator_converters/grid_search_cv.py +++ b/skl2onnx/operator_converters/grid_search_cv.py @@ -7,8 +7,9 @@ from .._supported_operators import sklearn_operator_name_map -def convert_sklearn_grid_search_cv(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_grid_search_cv( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converter for scikit-learn's GridSearchCV. """ @@ -16,8 +17,7 @@ def convert_sklearn_grid_search_cv(scope: Scope, operator: Operator, grid_search_op = operator.raw_operator best_estimator = grid_search_op.best_estimator_ op_type = sklearn_operator_name_map[type(best_estimator)] - grid_search_operator = scope.declare_local_operator( - op_type, best_estimator) + grid_search_operator = scope.declare_local_operator(op_type, best_estimator) container.add_options(id(best_estimator), opts) scope.add_options(id(best_estimator), opts) grid_search_operator.inputs = operator.inputs @@ -28,6 +28,6 @@ def convert_sklearn_grid_search_cv(scope: Scope, operator: Operator, apply_identity(scope, v.full_name, o.full_name, container) -register_converter('SklearnGridSearchCV', - convert_sklearn_grid_search_cv, - options="passthrough") +register_converter( + "SklearnGridSearchCV", convert_sklearn_grid_search_cv, options="passthrough" +) diff --git a/skl2onnx/operator_converters/id_op.py b/skl2onnx/operator_converters/id_op.py index f7318d4e8..5f0afd91c 100644 --- a/skl2onnx/operator_converters/id_op.py +++ b/skl2onnx/operator_converters/id_op.py @@ -7,12 +7,16 @@ from ..common._container import ModelComponentContainer -def convert_sklearn_identity(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_identity( + scope: Scope, operator: Operator, container: ModelComponentContainer +): apply_identity( - scope, operator.inputs[0].full_name, - operator.outputs[0].full_name, container, - operator_name=scope.get_unique_operator_name('CIdentity')) + scope, + operator.inputs[0].full_name, + operator.outputs[0].full_name, + container, + operator_name=scope.get_unique_operator_name("CIdentity"), + ) -register_converter('SklearnIdentity', convert_sklearn_identity) +register_converter("SklearnIdentity", convert_sklearn_identity) diff --git a/skl2onnx/operator_converters/imputer_op.py b/skl2onnx/operator_converters/imputer_op.py index 82fdfe413..0aee5f656 100644 --- a/skl2onnx/operator_converters/imputer_op.py +++ b/skl2onnx/operator_converters/imputer_op.py @@ -11,31 +11,36 @@ from .common import concatenate_variables -def convert_sklearn_imputer(scope: Scope, operator: Operator, - container: ModelComponentContainer): - op_type = 'Imputer' - attrs = {'name': scope.get_unique_operator_name(op_type)} +def convert_sklearn_imputer( + scope: Scope, operator: Operator, container: ModelComponentContainer +): + op_type = "Imputer" + attrs = {"name": scope.get_unique_operator_name(op_type)} op = operator.raw_operator - if (hasattr(op, 'fill_value') and isinstance(op.fill_value, str) and - op.fill_value.lower() != 'nan'): - raise RuntimeError("Imputer cannot fill missing values with a " - "string '%s'." % op.fill_value) - if not hasattr(op, 'statistics_'): - raise RuntimeError("Member statistics_ is not present, was the " - "model fitted?") + if ( + hasattr(op, "fill_value") + and isinstance(op.fill_value, str) + and op.fill_value.lower() != "nan" + ): + raise RuntimeError( + "Imputer cannot fill missing values with a " "string '%s'." % op.fill_value + ) + if not hasattr(op, "statistics_"): + raise RuntimeError( + "Member statistics_ is not present, was the " "model fitted?" + ) if isinstance(operator.inputs[0].type, StringTensorType): if not isinstance(op.missing_values, (str, np.str_)): raise NotImplementedError( "The converter is implemented when the missing values " - "are string not %r." % type(op.missing_values)) + "are string not %r." % type(op.missing_values) + ) zero = scope.get_unique_variable_name("zero") - container.add_initializer( - zero, onnx_proto.TensorProto.INT64, [1], [0]) + container.add_initializer(zero, onnx_proto.TensorProto.INT64, [1], [0]) - concatenated_feature = concatenate_variables( - scope, operator.inputs, container) + concatenated_feature = concatenate_variables(scope, operator.inputs, container) names = [] for i in range(op.statistics_.size): # loop on features @@ -45,68 +50,83 @@ def convert_sklearn_imputer(scope: Scope, operator: Operator, else: skl_fill_value = op.fill_value container.add_node( - "LabelEncoder", [zero], [fill_value], - keys_int64s=[0], values_strings=[op.statistics_[i]], - default_string=skl_fill_value, op_domain='ai.onnx.ml', - op_version=2) + "LabelEncoder", + [zero], + [fill_value], + keys_int64s=[0], + values_strings=[op.statistics_[i]], + default_string=skl_fill_value, + op_domain="ai.onnx.ml", + op_version=2, + ) init = scope.get_unique_variable_name("i%d" % i) - container.add_initializer( - init, onnx_proto.TensorProto.INT64, [1], [i]) + container.add_initializer(init, onnx_proto.TensorProto.INT64, [1], [i]) name = scope.get_unique_variable_name("impi%d" % i) container.add_node( - "ArrayFeatureExtractor", [concatenated_feature, init], [name], - op_domain='ai.onnx.ml') + "ArrayFeatureExtractor", + [concatenated_feature, init], + [name], + op_domain="ai.onnx.ml", + ) cond = scope.get_unique_variable_name("impc%d" % i) container.add_node( - "LabelEncoder", [name], [cond], + "LabelEncoder", + [name], + [cond], keys_strings=[str(op.missing_values)], - values_int64s=[1], default_int64=0, - op_domain='ai.onnx.ml', op_version=2) + values_int64s=[1], + default_int64=0, + op_domain="ai.onnx.ml", + op_version=2, + ) condb = scope.get_unique_variable_name("impc%d" % i) - container.add_node("Cast", [cond], [condb], - to=onnx_proto.TensorProto.BOOL) + container.add_node("Cast", [cond], [condb], to=onnx_proto.TensorProto.BOOL) repli = scope.get_unique_variable_name("nomiss%d" % i) container.add_node("Where", [condb, fill_value, name], [repli]) names.append(repli) - apply_concat( - scope, names, operator.outputs[0].full_name, container, axis=1) + apply_concat(scope, names, operator.outputs[0].full_name, container, axis=1) else: if isinstance(operator.inputs[0].type, Int64TensorType): - attrs['imputed_value_int64s'] = op.statistics_.astype(np.int64) + attrs["imputed_value_int64s"] = op.statistics_.astype(np.int64) use_int = True - delta = np.max( - np.abs(attrs['imputed_value_int64s'] - op.statistics_)) + delta = np.max(np.abs(attrs["imputed_value_int64s"] - op.statistics_)) if delta != 0: raise RuntimeError( "SimpleImputer takes integer as input but nan values are " "replaced by float {} != {}.".format( - attrs['imputed_value_int64s'], op.statistics_)) + attrs["imputed_value_int64s"], op.statistics_ + ) + ) else: - attrs['imputed_value_floats'] = op.statistics_.astype(np.float32) + attrs["imputed_value_floats"] = op.statistics_.astype(np.float32) use_int = False - if isinstance(op.missing_values, str) and op.missing_values == 'NaN': - attrs['replaced_value_float'] = np.NaN + if isinstance(op.missing_values, str) and op.missing_values == "NaN": + attrs["replaced_value_float"] = np.NaN elif isinstance(op.missing_values, float): if use_int: ar = np.array([op.missing_values]).astype(np.int64) - attrs['replaced_value_int64'] = ar[0] + attrs["replaced_value_int64"] = ar[0] else: - attrs['replaced_value_float'] = float(op.missing_values) + attrs["replaced_value_float"] = float(op.missing_values) else: raise RuntimeError( "Unsupported proposed value '{0}'. You may raise an issue at " "https://github.com/onnx/sklearn-onnx/issues." - "".format(op.missing_values)) + "".format(op.missing_values) + ) - concatenated_feature = concatenate_variables( - scope, operator.inputs, container) + concatenated_feature = concatenate_variables(scope, operator.inputs, container) container.add_node( - op_type, concatenated_feature, - operator.outputs[0].full_name, op_domain='ai.onnx.ml', **attrs) + op_type, + concatenated_feature, + operator.outputs[0].full_name, + op_domain="ai.onnx.ml", + **attrs + ) -register_converter('SklearnImputer', convert_sklearn_imputer) -register_converter('SklearnSimpleImputer', convert_sklearn_imputer) +register_converter("SklearnImputer", convert_sklearn_imputer) +register_converter("SklearnSimpleImputer", convert_sklearn_imputer) diff --git a/skl2onnx/operator_converters/isolation_forest.py b/skl2onnx/operator_converters/isolation_forest.py index dfb63fbb4..8a17a6032 100644 --- a/skl2onnx/operator_converters/isolation_forest.py +++ b/skl2onnx/operator_converters/isolation_forest.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 import numpy as np + try: from sklearn.ensemble._iforest import _average_path_length except ImportError: @@ -8,31 +9,53 @@ from sklearn.ensemble.iforest import _average_path_length from ..common._registration import register_converter from ..common.data_types import ( - BooleanTensorType, Int64TensorType, - guess_numpy_type, guess_proto_type) + BooleanTensorType, + Int64TensorType, + guess_numpy_type, + guess_proto_type, +) from ..common.tree_ensemble import ( add_tree_to_attribute_pairs, - get_default_tree_regressor_attribute_pairs) + get_default_tree_regressor_attribute_pairs, +) from ..proto import onnx_proto from ..algebra.onnx_ops import ( - OnnxTreeEnsembleRegressor_1, OnnxLog, - OnnxCast, OnnxLess, OnnxLabelEncoder, OnnxMul, - OnnxGreater, OnnxAdd, OnnxDiv, OnnxSum, OnnxNeg, - OnnxReshapeApi13, OnnxEqual, OnnxPow, OnnxGather, OnnxMax) + OnnxTreeEnsembleRegressor_1, + OnnxLog, + OnnxCast, + OnnxLess, + OnnxLabelEncoder, + OnnxMul, + OnnxGreater, + OnnxAdd, + OnnxDiv, + OnnxSum, + OnnxNeg, + OnnxReshapeApi13, + OnnxEqual, + OnnxPow, + OnnxGather, + OnnxMax, +) def convert_sklearn_isolation_forest( - scope, operator, container, op_type='TreeEnsembleRegressor', - op_domain='ai.onnx.ml', op_version=1): + scope, + operator, + container, + op_type="TreeEnsembleRegressor", + op_domain="ai.onnx.ml", + op_version=1, +): op = operator.raw_operator outputs = operator.outputs opv = container.target_opset - opvml = container.target_opset_any_domain('ai.onnx.ml') + opvml = container.target_opset_any_domain("ai.onnx.ml") options = container.get_options(op, dict(score_samples=None)) if opvml < 2: raise RuntimeError( - "This converter requires at least opset 2 for " - "domain 'ai.onnx.ml'.") + "This converter requires at least opset 2 for " "domain 'ai.onnx.ml'." + ) input_name = operator.inputs[0] dtype = guess_numpy_type(operator.inputs[0].type) @@ -44,27 +67,29 @@ def convert_sklearn_isolation_forest( raise RuntimeError( "Converter for IsolationForest does not support the case when " "_max_features={} != number of given features {}.".format( - op._max_features, operator.inputs[0].type.shape[1])) + op._max_features, operator.inputs[0].type.shape[1] + ) + ) # decision_path scores = [] - for i, (tree, features) in enumerate( - zip(op.estimators_, op.estimators_features_)): - + for i, (tree, features) in enumerate(zip(op.estimators_, op.estimators_features_)): # X_subset = X[:, features] - gather = OnnxGather(input_name, features.astype(np.int64), - axis=1, op_version=opv) + gather = OnnxGather( + input_name, features.astype(np.int64), axis=1, op_version=opv + ) attrs = get_default_tree_regressor_attribute_pairs() - attrs['n_targets'] = 1 - add_tree_to_attribute_pairs(attrs, False, tree.tree_, 0, 1., 0, False, - True, dtype=dtype) + attrs["n_targets"] = 1 + add_tree_to_attribute_pairs( + attrs, False, tree.tree_, 0, 1.0, 0, False, True, dtype=dtype + ) # tree leave - attrs['n_targets'] = 1 - attrs['post_transform'] = 'NONE' - attrs['target_ids'] = [0 for _ in attrs['target_ids']] - attrs['target_weights'] = [float(_) for _ in attrs['target_nodeids']] + attrs["n_targets"] = 1 + attrs["post_transform"] = "NONE" + attrs["target_ids"] = [0 for _ in attrs["target_ids"]] + attrs["target_weights"] = [float(_) for _ in attrs["target_nodeids"]] leave = OnnxTreeEnsembleRegressor_1(gather, op_version=1, **attrs) # tree - retrieve node_sample @@ -75,22 +100,25 @@ def convert_sklearn_isolation_forest( keys = [float(_[0]) for _ in ordered] node_sample = OnnxReshapeApi13( OnnxLabelEncoder( - leave, op_version=opvml, - keys_floats=keys, values_floats=values), + leave, op_version=opvml, keys_floats=keys, values_floats=values + ), np.array([-1, 1], dtype=np.int64), - op_version=opv) + op_version=opv, + ) else: keys = [int(_[0]) for _ in ordered] values = [float(_[1]) for _ in ordered] node_sample = OnnxReshapeApi13( OnnxLabelEncoder( - OnnxCast(leave, op_version=opv, - to=onnx_proto.TensorProto.INT64), + OnnxCast(leave, op_version=opv, to=onnx_proto.TensorProto.INT64), op_version=opvml, - keys_int64s=keys, values_floats=values), + keys_int64s=keys, + values_floats=values, + ), np.array([-1, 1], dtype=np.int64), - op_version=opv) - node_sample.set_onnx_name_prefix('node_sample%d' % i) + op_version=opv, + ) + node_sample.set_onnx_name_prefix("node_sample%d" % i) # tree - retrieve path_length labels = _build_labels(tree.tree_, output="path_length") @@ -101,121 +129,134 @@ def convert_sklearn_isolation_forest( values = [float(_[1]) for _ in ordered] path_length = OnnxReshapeApi13( OnnxLabelEncoder( - leave, op_version=opvml, - keys_floats=keys, values_floats=values), + leave, op_version=opvml, keys_floats=keys, values_floats=values + ), np.array([-1, 1], dtype=np.int64), - op_version=opv) + op_version=opv, + ) else: keys = [int(_[0]) for _ in ordered] path_length = OnnxReshapeApi13( OnnxLabelEncoder( - OnnxCast(leave, op_version=opv, - to=onnx_proto.TensorProto.INT64), + OnnxCast(leave, op_version=opv, to=onnx_proto.TensorProto.INT64), op_version=opvml, - keys_int64s=keys, values_floats=values), + keys_int64s=keys, + values_floats=values, + ), np.array([-1, 1], dtype=np.int64), - op_version=opv) - path_length.set_onnx_name_prefix('path_length%d' % i) + op_version=opv, + ) + path_length.set_onnx_name_prefix("path_length%d" % i) # score eq2 = OnnxCast( - OnnxEqual(node_sample, np.array([2], dtype=np.float32), - op_version=opv), - to=proto_dtype, op_version=opv) - eq2.set_onnx_name_prefix('eq2_%d' % i) + OnnxEqual(node_sample, np.array([2], dtype=np.float32), op_version=opv), + to=proto_dtype, + op_version=opv, + ) + eq2.set_onnx_name_prefix("eq2_%d" % i) # 2.0 * (np.log(n_samples_leaf[not_mask] - 1.0) + np.euler_gamma) eqp2p = OnnxCast( - OnnxGreater( - node_sample, np.array([2], dtype=np.float32), - op_version=opv), - to=proto_dtype, op_version=opv) - eqp2p.set_onnx_name_prefix('plus2_%d' % i) + OnnxGreater(node_sample, np.array([2], dtype=np.float32), op_version=opv), + to=proto_dtype, + op_version=opv, + ) + eqp2p.set_onnx_name_prefix("plus2_%d" % i) eqp2ps = OnnxMul(eqp2p, node_sample, op_version=opv) - eqp2ps.set_onnx_name_prefix('eqp2ps%d' % i) + eqp2ps.set_onnx_name_prefix("eqp2ps%d" % i) - eqp2ps_1 = OnnxAdd(eqp2ps, np.array([-1], dtype=dtype), - op_version=opv) + eqp2ps_1 = OnnxAdd(eqp2ps, np.array([-1], dtype=dtype), op_version=opv) - eqp2p_m1 = OnnxMax(eqp2ps_1, np.array([1], dtype=dtype), - op_version=opv) - eqp2p_m1.set_onnx_name_prefix('eqp2p_m1_%d' % i) + eqp2p_m1 = OnnxMax(eqp2ps_1, np.array([1], dtype=dtype), op_version=opv) + eqp2p_m1.set_onnx_name_prefix("eqp2p_m1_%d" % i) eqp_log = OnnxMul( - OnnxAdd(OnnxLog(eqp2p_m1, op_version=opv), - np.array([np.euler_gamma], dtype=dtype), - op_version=opv), - np.array([2], dtype=dtype), op_version=opv) - eqp_log.set_onnx_name_prefix('eqp_log%d' % i) + OnnxAdd( + OnnxLog(eqp2p_m1, op_version=opv), + np.array([np.euler_gamma], dtype=dtype), + op_version=opv, + ), + np.array([2], dtype=dtype), + op_version=opv, + ) + eqp_log.set_onnx_name_prefix("eqp_log%d" % i) # - 2.0 * (n_samples_leaf[not_mask] - 1.0) / n_samples_leaf[not_mask] - eqp2p_m0 = OnnxMax(eqp2ps_1, np.array([0], dtype=dtype), - op_version=opv) - eqp2p_m0.set_onnx_name_prefix('eqp2p_m1_%d' % i) + eqp2p_m0 = OnnxMax(eqp2ps_1, np.array([0], dtype=dtype), op_version=opv) + eqp2p_m0.set_onnx_name_prefix("eqp2p_m1_%d" % i) eqp_ns = OnnxMul( OnnxDiv( eqp2p_m0, - OnnxMax(eqp2ps, np.array([1], dtype=dtype), - op_version=opv), - op_version=opv), - np.array([-2], dtype=dtype), op_version=opv) - eqp_ns.set_onnx_name_prefix('eqp_ns%d' % i) + OnnxMax(eqp2ps, np.array([1], dtype=dtype), op_version=opv), + op_version=opv, + ), + np.array([-2], dtype=dtype), + op_version=opv, + ) + eqp_ns.set_onnx_name_prefix("eqp_ns%d" % i) # np.ravel(node_indicator.sum(axis=1)) # + _average_path_length(n_samples_leaf) # - 1.0 av_path_length_log = OnnxMul( - OnnxAdd(eqp_log, eqp_ns, op_version=opv), - eqp2p, op_version=opv) - av_path_length_log.set_onnx_name_prefix('avlog%d' % i) + OnnxAdd(eqp_log, eqp_ns, op_version=opv), eqp2p, op_version=opv + ) + av_path_length_log.set_onnx_name_prefix("avlog%d" % i) av_path_length = OnnxAdd(eq2, av_path_length_log, op_version=opv) - av_path_length.set_onnx_name_prefix('avpl%d' % i) + av_path_length.set_onnx_name_prefix("avpl%d" % i) depth = OnnxAdd( OnnxAdd(path_length, av_path_length, op_version=opv), np.array([-1], dtype=dtype), - op_version=opv) - depth.set_onnx_name_prefix('depth%d' % i) + op_version=opv, + ) + depth.set_onnx_name_prefix("depth%d" % i) scores.append(depth) cst = len(op.estimators_) * _average_path_length([op.max_samples_]) - depths = OnnxDiv(OnnxSum(*scores, op_version=opv), - np.array([cst], dtype=dtype), - op_version=opv) + depths = OnnxDiv( + OnnxSum(*scores, op_version=opv), np.array([cst], dtype=dtype), op_version=opv + ) # decision_function - output_names = outputs[2].full_name if options['score_samples'] else None + output_names = outputs[2].full_name if options["score_samples"] else None score_samples = OnnxNeg( - OnnxPow(np.array([2], dtype=dtype), - OnnxNeg(depths, op_version=opv), - op_version=opv), - op_version=opv, output_names=output_names) + OnnxPow( + np.array([2], dtype=dtype), OnnxNeg(depths, op_version=opv), op_version=opv + ), + op_version=opv, + output_names=output_names, + ) decision = OnnxAdd( - score_samples, np.array([-op.offset_], dtype=dtype), - op_version=opv, output_names=outputs[1].full_name) - decision.set_onnx_name_prefix('dec') + score_samples, + np.array([-op.offset_], dtype=dtype), + op_version=opv, + output_names=outputs[1].full_name, + ) + decision.set_onnx_name_prefix("dec") - less = OnnxLess(decision, np.array([0], dtype=dtype), - op_version=opv) + less = OnnxLess(decision, np.array([0], dtype=dtype), op_version=opv) predict = OnnxAdd( OnnxMul( - OnnxCast(less, op_version=opv, - to=onnx_proto.TensorProto.INT64), + OnnxCast(less, op_version=opv, to=onnx_proto.TensorProto.INT64), np.array([-2], dtype=np.int64), - op_version=opv), + op_version=opv, + ), np.array([1], dtype=np.int64), op_version=opv, - output_names=outputs[0].full_name) - predict.set_onnx_name_prefix('predict') + output_names=outputs[0].full_name, + ) + predict.set_onnx_name_prefix("predict") predict.add_to(scope, container) less.add_to(scope, container) - if options['score_samples']: + if options["score_samples"]: score_samples.add_to(scope, container) @@ -225,25 +266,23 @@ def _recursive_build_labels(index, current): if tree.children_left[index] == -1: yield (index, current.copy()) else: - for it in _recursive_build_labels( - tree.children_left[index], current): + for it in _recursive_build_labels(tree.children_left[index], current): yield it - for it in _recursive_build_labels( - tree.children_right[index], current): + for it in _recursive_build_labels(tree.children_right[index], current): yield it current[index] = False paths = {} current = {} - if output == 'path_length': + if output == "path_length": for leave_index, path in _recursive_build_labels(0, current): spath = {} for nodeid, b in path.items(): if b: spath[nodeid] = 1 paths[leave_index] = sum(spath.values()) - elif output == 'node_sample': + elif output == "node_sample": for leave_index, path in _recursive_build_labels(0, current): spath = {} for nodeid, b in path.items(): @@ -255,6 +294,8 @@ def _recursive_build_labels(index, current): return paths -register_converter('SklearnIsolationForest', - convert_sklearn_isolation_forest, - options={'score_samples': [True, False]}) +register_converter( + "SklearnIsolationForest", + convert_sklearn_isolation_forest, + options={"score_samples": [True, False]}, +) diff --git a/skl2onnx/operator_converters/k_bins_discretiser.py b/skl2onnx/operator_converters/k_bins_discretiser.py index 4d4f2f318..7f31c487e 100644 --- a/skl2onnx/operator_converters/k_bins_discretiser.py +++ b/skl2onnx/operator_converters/k_bins_discretiser.py @@ -5,101 +5,156 @@ from ..proto import onnx_proto from ..common._apply_operation import ( - apply_cast, apply_concat, apply_reshape, - apply_mul, apply_add + apply_cast, + apply_concat, + apply_reshape, + apply_mul, + apply_add, ) from ..common._registration import register_converter from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer -def convert_sklearn_k_bins_discretiser(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_k_bins_discretiser( + scope: Scope, operator: Operator, container: ModelComponentContainer +): op = operator.raw_operator - if op.encode == 'onehot': - raise RuntimeError("onehot encoding not supported. " - "ONNX does not support sparse tensors. " - "with opset < 11. You may raise an isue at " - "https://github.com/onnx/sklearn-onnx/issues.") + if op.encode == "onehot": + raise RuntimeError( + "onehot encoding not supported. " + "ONNX does not support sparse tensors. " + "with opset < 11. You may raise an isue at " + "https://github.com/onnx/sklearn-onnx/issues." + ) - ranges = list(map(lambda e: e[1:-1] if len(e) > 2 - else [np.finfo(np.float32).max], op.bin_edges_)) + ranges = list( + map( + lambda e: e[1:-1] if len(e) > 2 else [np.finfo(np.float32).max], + op.bin_edges_, + ) + ) digitised_output_name = [None] * len(ranges) last_column_name = None for i, item in enumerate(ranges): - digitised_output_name[i] = ( - scope.get_unique_variable_name('digitised_output_{}'.format(i))) - column_index_name = scope.get_unique_variable_name('column_index') - range_column_name = scope.get_unique_variable_name('range_column') - column_name = scope.get_unique_variable_name('column') - cast_column_name = scope.get_unique_variable_name('cast_column') - less_result_name = scope.get_unique_variable_name('less_result') - cast_result_name = scope.get_unique_variable_name('cast_result') - concatenated_array_name = scope.get_unique_variable_name( - 'concatenated_array') - argmax_output_name = scope.get_unique_variable_name('argmax_output') + digitised_output_name[i] = scope.get_unique_variable_name( + "digitised_output_{}".format(i) + ) + column_index_name = scope.get_unique_variable_name("column_index") + range_column_name = scope.get_unique_variable_name("range_column") + column_name = scope.get_unique_variable_name("column") + cast_column_name = scope.get_unique_variable_name("cast_column") + less_result_name = scope.get_unique_variable_name("less_result") + cast_result_name = scope.get_unique_variable_name("cast_result") + concatenated_array_name = scope.get_unique_variable_name("concatenated_array") + argmax_output_name = scope.get_unique_variable_name("argmax_output") - container.add_initializer(column_index_name, - onnx_proto.TensorProto.INT64, [], [i]) - container.add_initializer(range_column_name, - onnx_proto.TensorProto.FLOAT, - [len(item)], item) + container.add_initializer( + column_index_name, onnx_proto.TensorProto.INT64, [], [i] + ) + container.add_initializer( + range_column_name, onnx_proto.TensorProto.FLOAT, [len(item)], item + ) container.add_node( - 'ArrayFeatureExtractor', - [operator.inputs[0].full_name, column_index_name], column_name, - name=scope.get_unique_operator_name('ArrayFeatureExtractor'), - op_domain='ai.onnx.ml') - apply_cast(scope, column_name, cast_column_name, - container, to=onnx_proto.TensorProto.FLOAT) + "ArrayFeatureExtractor", + [operator.inputs[0].full_name, column_index_name], + column_name, + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + op_domain="ai.onnx.ml", + ) + apply_cast( + scope, + column_name, + cast_column_name, + container, + to=onnx_proto.TensorProto.FLOAT, + ) container.add_node( - 'Less', [cast_column_name, range_column_name], + "Less", + [cast_column_name, range_column_name], less_result_name, - name=scope.get_unique_operator_name('Less')) - apply_cast(scope, less_result_name, cast_result_name, - container, to=onnx_proto.TensorProto.FLOAT) + name=scope.get_unique_operator_name("Less"), + ) + apply_cast( + scope, + less_result_name, + cast_result_name, + container, + to=onnx_proto.TensorProto.FLOAT, + ) if last_column_name is None: - last_column_name = scope.get_unique_variable_name('last_column') - zero_float = scope.get_unique_variable_name('zero_float') - one_float = scope.get_unique_variable_name('one_float') - zero_column = scope.get_unique_variable_name('zero_column') + last_column_name = scope.get_unique_variable_name("last_column") + zero_float = scope.get_unique_variable_name("zero_float") + one_float = scope.get_unique_variable_name("one_float") + zero_column = scope.get_unique_variable_name("zero_column") container.add_initializer( - one_float, onnx_proto.TensorProto.FLOAT, - [1], np.ones(1)) + one_float, onnx_proto.TensorProto.FLOAT, [1], np.ones(1) + ) container.add_initializer( - zero_float, onnx_proto.TensorProto.FLOAT, - [1], np.zeros(1)) - apply_mul(scope, [cast_column_name, zero_float], zero_column, - container, broadcast=1) - apply_add(scope, [zero_column, one_float], last_column_name, - container, broadcast=1) + zero_float, onnx_proto.TensorProto.FLOAT, [1], np.zeros(1) + ) + apply_mul( + scope, + [cast_column_name, zero_float], + zero_column, + container, + broadcast=1, + ) + apply_add( + scope, + [zero_column, one_float], + last_column_name, + container, + broadcast=1, + ) - apply_concat(scope, [cast_result_name, last_column_name], - concatenated_array_name, container, axis=1) - container.add_node('ArgMax', concatenated_array_name, - argmax_output_name, axis=1, - name=scope.get_unique_operator_name('ArgMax')) - if op.encode == 'onehot-dense': - onehot_result_name = scope.get_unique_variable_name( - 'onehot_result') + apply_concat( + scope, + [cast_result_name, last_column_name], + concatenated_array_name, + container, + axis=1, + ) + container.add_node( + "ArgMax", + concatenated_array_name, + argmax_output_name, + axis=1, + name=scope.get_unique_operator_name("ArgMax"), + ) + if op.encode == "onehot-dense": + onehot_result_name = scope.get_unique_variable_name("onehot_result") container.add_node( - 'OneHotEncoder', argmax_output_name, + "OneHotEncoder", + argmax_output_name, onehot_result_name, - name=scope.get_unique_operator_name('OneHotEncoder'), + name=scope.get_unique_operator_name("OneHotEncoder"), cats_int64s=list(range(op.n_bins_[i])), - op_domain='ai.onnx.ml') - apply_reshape(scope, onehot_result_name, digitised_output_name[i], - container, desired_shape=(-1, op.n_bins_[i])) + op_domain="ai.onnx.ml", + ) + apply_reshape( + scope, + onehot_result_name, + digitised_output_name[i], + container, + desired_shape=(-1, op.n_bins_[i]), + ) else: - apply_cast(scope, argmax_output_name, digitised_output_name[i], - container, to=onnx_proto.TensorProto.FLOAT) - apply_concat(scope, digitised_output_name, - operator.outputs[0].full_name, container, axis=1) + apply_cast( + scope, + argmax_output_name, + digitised_output_name[i], + container, + to=onnx_proto.TensorProto.FLOAT, + ) + apply_concat( + scope, digitised_output_name, operator.outputs[0].full_name, container, axis=1 + ) -register_converter('SklearnKBinsDiscretizer', - convert_sklearn_k_bins_discretiser) +register_converter("SklearnKBinsDiscretizer", convert_sklearn_k_bins_discretiser) diff --git a/skl2onnx/operator_converters/k_means.py b/skl2onnx/operator_converters/k_means.py index 10b9cabdf..901fade96 100644 --- a/skl2onnx/operator_converters/k_means.py +++ b/skl2onnx/operator_converters/k_means.py @@ -7,12 +7,20 @@ from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer from ..algebra.onnx_ops import ( - OnnxReduceSumSquareApi18, OnnxGemm, OnnxMatMul, - OnnxAdd, OnnxArgMin, OnnxCast, OnnxSqrt, OnnxMul) - - -def convert_sklearn_kmeans(scope: Scope, operator: Operator, - container: ModelComponentContainer): + OnnxReduceSumSquareApi18, + OnnxGemm, + OnnxMatMul, + OnnxAdd, + OnnxArgMin, + OnnxCast, + OnnxSqrt, + OnnxMul, +) + + +def convert_sklearn_kmeans( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Computation graph of distances to all centroids for a batch of examples. Note that a centriod is just the center of a cluster. We use ``[]`` to @@ -81,32 +89,29 @@ def convert_sklearn_kmeans(scope: Scope, operator: Operator, C2 = row_norms(C, squared=True).astype(dtype) C = C.astype(dtype) - rs = OnnxReduceSumSquareApi18( - input_name, axes=[1], keepdims=1, op_version=opv) + rs = OnnxReduceSumSquareApi18(input_name, axes=[1], keepdims=1, op_version=opv) - if options['gemm']: + if options["gemm"]: N = X.get_first_dimension() if isinstance(N, int): - zeros = np.zeros((N, ), dtype=dtype) + zeros = np.zeros((N,), dtype=dtype) else: - zeros = OnnxMul(rs, np.array([0], dtype=dtype), - op_version=opv) - gemm_out = OnnxGemm(input_name, C, zeros, alpha=-2., - transB=1, op_version=opv) + zeros = OnnxMul(rs, np.array([0], dtype=dtype), op_version=opv) + gemm_out = OnnxGemm(input_name, C, zeros, alpha=-2.0, transB=1, op_version=opv) else: - gemm_out = OnnxMatMul( - input_name, (C.T * (-2)).astype(dtype), op_version=opv) + gemm_out = OnnxMatMul(input_name, (C.T * (-2)).astype(dtype), op_version=opv) z = OnnxAdd(rs, gemm_out, op_version=opv) y2 = OnnxAdd(C2, z, op_version=opv) - ll = OnnxArgMin(y2, axis=1, keepdims=0, output_names=out[:1], - op_version=opv) + ll = OnnxArgMin(y2, axis=1, keepdims=0, output_names=out[:1], op_version=opv) y2s = OnnxSqrt(y2, output_names=out[1:], op_version=opv) ll.add_to(scope, container) y2s.add_to(scope, container) -register_converter('SklearnKMeans', convert_sklearn_kmeans, - options={'gemm': [True, False]}) -register_converter('SklearnMiniBatchKMeans', convert_sklearn_kmeans, - options={'gemm': [True, False]}) +register_converter( + "SklearnKMeans", convert_sklearn_kmeans, options={"gemm": [True, False]} +) +register_converter( + "SklearnMiniBatchKMeans", convert_sklearn_kmeans, options={"gemm": [True, False]} +) diff --git a/skl2onnx/operator_converters/kernel_pca.py b/skl2onnx/operator_converters/kernel_pca.py index 6ffeab0ae..5f853b99e 100644 --- a/skl2onnx/operator_converters/kernel_pca.py +++ b/skl2onnx/operator_converters/kernel_pca.py @@ -4,9 +4,18 @@ from sklearn.preprocessing import normalize from ..algebra.complex_functions import onnx_cdist from ..algebra.onnx_ops import ( - OnnxMatMul, OnnxTranspose, OnnxDiv, OnnxSub, OnnxAdd, - OnnxMul, OnnxPow, OnnxTanh, OnnxSqrt, OnnxExp, - OnnxReduceSumApi11) + OnnxMatMul, + OnnxTranspose, + OnnxDiv, + OnnxSub, + OnnxAdd, + OnnxMul, + OnnxPow, + OnnxTanh, + OnnxSqrt, + OnnxExp, + OnnxReduceSumApi11, +) from ..algebra.onnx_operator import OnnxSubEstimator from ..common._registration import register_converter from ..common._topology import Scope, Operator @@ -14,8 +23,9 @@ from ..common.data_types import guess_numpy_type -def kernel_centerer_converter(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def kernel_centerer_converter( + scope: Scope, operator: Operator, container: ModelComponentContainer +): op = operator.raw_operator op_version = container.target_opset X = operator.inputs[0] @@ -23,98 +33,121 @@ def kernel_centerer_converter(scope: Scope, operator: Operator, N = np.array([op.K_fit_rows_.shape[0]], dtype=dtype) K_pred_cols = OnnxDiv( - OnnxReduceSumApi11(X, axes=[1], op_version=op_version), - N, op_version=op_version) + OnnxReduceSumApi11(X, axes=[1], op_version=op_version), N, op_version=op_version + ) # K -= self.K_fit_rows_ # K -= K_pred_cols # K += self.K_fit_all_ K1 = OnnxSub(X, op.K_fit_rows_.astype(dtype), op_version=op_version) K2 = OnnxSub(K1, K_pred_cols, op_version=op_version) - final = OnnxAdd(K2, np.array([op.K_fit_all_], dtype=dtype), - op_version=op_version, - output_names=operator.outputs[:1]) + final = OnnxAdd( + K2, + np.array([op.K_fit_all_], dtype=dtype), + op_version=op_version, + output_names=operator.outputs[:1], + ) final.add_to(scope, container) -def kernel_pca_converter(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def kernel_pca_converter( + scope: Scope, operator: Operator, container: ModelComponentContainer +): op = operator.raw_operator op_version = container.target_opset X = operator.inputs[0] dtype = guess_numpy_type(X.type) options = container.get_options(op, dict(optim=None)) - optim = options['optim'] + optim = options["optim"] # def _get_kernel(self, X, Y=None): # return pairwise_kernels( # X, Y, metric=self.kernel, filter_params=True, **params) if callable(op.kernel): raise RuntimeError( - "Unable to converter KernelPCA with a custom kernel %r." - "" % op.kernel) - if op.kernel == 'precomputed': + "Unable to converter KernelPCA with a custom kernel %r." "" % op.kernel + ) + if op.kernel == "precomputed": raise RuntimeError( "The converter is not implemented when kernel=%r for " - "type=%r." % (op.kernel, type(op))) + "type=%r." % (op.kernel, type(op)) + ) kernel = op.kernel params = {"gamma": op.gamma, "degree": op.degree, "coef0": op.coef0} - if kernel == 'linear': + if kernel == "linear": Y = op.X_fit_.astype(dtype) dist = OnnxMatMul( - X, OnnxTranspose(Y, perm=[1, 0], op_version=op_version), - op_version=op_version) - elif kernel == 'cosine': + X, + OnnxTranspose(Y, perm=[1, 0], op_version=op_version), + op_version=op_version, + ) + elif kernel == "cosine": yn = normalize(op.X_fit_, copy=True) ynt = yn.astype(dtype) norm = OnnxSqrt( OnnxReduceSumApi11( - OnnxPow(X, np.array([2], dtype=np.int64), - op_version=op_version), - axes=[1], op_version=op_version, keepdims=1), - op_version=op_version) + OnnxPow(X, np.array([2], dtype=np.int64), op_version=op_version), + axes=[1], + op_version=op_version, + keepdims=1, + ), + op_version=op_version, + ) dist = OnnxMatMul( OnnxDiv(X, norm, op_version=op_version), OnnxTranspose(ynt, perm=[1, 0], op_version=op_version), - op_version=op_version) - elif kernel in ('poly', 'sigmoid'): + op_version=op_version, + ) + elif kernel in ("poly", "sigmoid"): Y = op.X_fit_.astype(dtype) dot = OnnxMatMul( - X, OnnxTranspose(Y, perm=[1, 0], op_version=op_version), - op_version=op_version) - if params['gamma'] is None: - gamma = np.array([1. / Y.shape[1]], dtype=dtype) + X, + OnnxTranspose(Y, perm=[1, 0], op_version=op_version), + op_version=op_version, + ) + if params["gamma"] is None: + gamma = np.array([1.0 / Y.shape[1]], dtype=dtype) else: - gamma = np.array([params['gamma']], dtype=dtype) + gamma = np.array([params["gamma"]], dtype=dtype) dot_g = OnnxMul(dot, gamma, op_version=op_version) - dot_c = OnnxAdd(dot_g, np.array([params['coef0']], dtype=dtype), - op_version=op_version) - if kernel == 'poly': - dist = OnnxPow(dot_c, - np.array([params['degree']], dtype=np.int64), - op_version=op_version) + dot_c = OnnxAdd( + dot_g, np.array([params["coef0"]], dtype=dtype), op_version=op_version + ) + if kernel == "poly": + dist = OnnxPow( + dot_c, + np.array([params["degree"]], dtype=np.int64), + op_version=op_version, + ) else: dist = OnnxTanh(dot_c, op_version=op_version) - elif kernel == 'rbf': - if optim == 'cdist': + elif kernel == "rbf": + if optim == "cdist": from skl2onnx.algebra.custom_ops import OnnxCDist + Y = op.X_fit_.astype(dtype) - pair = OnnxCDist(X, Y, metric='sqeuclidean', op_version=op_version) + pair = OnnxCDist(X, Y, metric="sqeuclidean", op_version=op_version) elif optim is None: Y = op.X_fit_.astype(dtype) - dim_in = Y.shape[1] if hasattr(Y, 'shape') else None - dim_out = Y.shape[0] if hasattr(Y, 'shape') else None - pair = onnx_cdist(X, Y, metric='sqeuclidean', dtype=dtype, - op_version=op_version, - dim_in=dim_in, dim_out=dim_out) + dim_in = Y.shape[1] if hasattr(Y, "shape") else None + dim_out = Y.shape[0] if hasattr(Y, "shape") else None + pair = onnx_cdist( + X, + Y, + metric="sqeuclidean", + dtype=dtype, + op_version=op_version, + dim_in=dim_in, + dim_out=dim_out, + ) else: raise ValueError("Unknown optimisation '{}'.".format(optim)) - if params['gamma'] is None: - gamma = np.array([-1. / Y.shape[1]], dtype=dtype) + if params["gamma"] is None: + gamma = np.array([-1.0 / Y.shape[1]], dtype=dtype) else: - gamma = np.array([-params['gamma']], dtype=dtype) + gamma = np.array([-params["gamma"]], dtype=dtype) pair_g = OnnxMul(pair, gamma, op_version=op_version) dist = OnnxExp(pair_g, op_version=op_version) else: @@ -123,29 +156,34 @@ def kernel_pca_converter(scope: Scope, operator: Operator, # K = self._centerer.transform(self._get_kernel(X, self.X_fit_)) K = OnnxSubEstimator(op._centerer, dist, op_version=op_version) - if hasattr(op, 'eigenvalues_'): + if hasattr(op, "eigenvalues_"): # scikit-learn>=1.0 non_zeros = np.flatnonzero(op.eigenvalues_) scaled_alphas = np.zeros_like(op.eigenvectors_) - scaled_alphas[:, non_zeros] = ( - op.eigenvectors_[:, non_zeros] / - np.sqrt(op.eigenvalues_[non_zeros])) + scaled_alphas[:, non_zeros] = op.eigenvectors_[:, non_zeros] / np.sqrt( + op.eigenvalues_[non_zeros] + ) else: # scikit-learn<1.0 non_zeros = np.flatnonzero(op.lambdas_) scaled_alphas = np.zeros_like(op.alphas_) - scaled_alphas[:, non_zeros] = ( - op.alphas_[:, non_zeros] / np.sqrt(op.lambdas_[non_zeros])) + scaled_alphas[:, non_zeros] = op.alphas_[:, non_zeros] / np.sqrt( + op.lambdas_[non_zeros] + ) # np.dot(K, scaled_alphas) - output = OnnxMatMul(K, scaled_alphas.astype(dtype), - op_version=op_version, - output_names=operator.outputs[:1]) + output = OnnxMatMul( + K, + scaled_alphas.astype(dtype), + op_version=op_version, + output_names=operator.outputs[:1], + ) # register the output output.add_to(scope, container) -register_converter('SklearnKernelCenterer', kernel_centerer_converter) -register_converter('SklearnKernelPCA', kernel_pca_converter, - options={'optim': [None, 'cdist']}) +register_converter("SklearnKernelCenterer", kernel_centerer_converter) +register_converter( + "SklearnKernelPCA", kernel_pca_converter, options={"optim": [None, "cdist"]} +) diff --git a/skl2onnx/operator_converters/label_binariser.py b/skl2onnx/operator_converters/label_binariser.py index 7bbe840b6..35f35b9d0 100644 --- a/skl2onnx/operator_converters/label_binariser.py +++ b/skl2onnx/operator_converters/label_binariser.py @@ -9,87 +9,122 @@ from ..common._container import ModelComponentContainer -def convert_sklearn_label_binariser(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_label_binariser( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """Converts Scikit Label Binariser model to onnx format.""" binariser_op = operator.raw_operator classes = binariser_op.classes_ - if (hasattr(binariser_op, 'sparse_input_') and - binariser_op.sparse_input_): + if hasattr(binariser_op, "sparse_input_") and binariser_op.sparse_input_: raise RuntimeError("sparse is not supported for LabelBinarizer.") - if (hasattr(binariser_op, 'y_type_') and - binariser_op.y_type_ == "multilabel-indicator"): + if ( + hasattr(binariser_op, "y_type_") + and binariser_op.y_type_ == "multilabel-indicator" + ): if binariser_op.pos_label != 1: - raise RuntimeError("pos_label != 1 is not supported " - "for LabelBinarizer.") + raise RuntimeError("pos_label != 1 is not supported " "for LabelBinarizer.") if list(classes) != list(range(len(classes))): - raise RuntimeError("classes != [0, 1, ..., n_classes] is not " - "supported for LabelBinarizer.") - container.add_node('Identity', operator.inputs[0].full_name, - operator.output_full_names, - name=scope.get_unique_operator_name('identity')) + raise RuntimeError( + "classes != [0, 1, ..., n_classes] is not " + "supported for LabelBinarizer." + ) + container.add_node( + "Identity", + operator.inputs[0].full_name, + operator.output_full_names, + name=scope.get_unique_operator_name("identity"), + ) else: - zeros_tensor = np.full((1, len(classes)), - binariser_op.neg_label, dtype=np.float32) - unit_tensor = np.full((1, len(classes)), - binariser_op.pos_label, dtype=np.float32) + zeros_tensor = np.full( + (1, len(classes)), binariser_op.neg_label, dtype=np.float32 + ) + unit_tensor = np.full( + (1, len(classes)), binariser_op.pos_label, dtype=np.float32 + ) - classes_tensor_name = scope.get_unique_variable_name('classes_tensor') + classes_tensor_name = scope.get_unique_variable_name("classes_tensor") equal_condition_tensor_name = scope.get_unique_variable_name( - 'equal_condition_tensor') - zeros_tensor_name = scope.get_unique_variable_name('zero_tensor') - unit_tensor_name = scope.get_unique_variable_name('unit_tensor') - where_result_name = scope.get_unique_variable_name('where_result') + "equal_condition_tensor" + ) + zeros_tensor_name = scope.get_unique_variable_name("zero_tensor") + unit_tensor_name = scope.get_unique_variable_name("unit_tensor") + where_result_name = scope.get_unique_variable_name("where_result") class_dtype = onnx_proto.TensorProto.STRING - if (np.issubdtype(binariser_op.classes_.dtype, np.signedinteger) or - binariser_op.classes_.dtype == np.bool_): + if ( + np.issubdtype(binariser_op.classes_.dtype, np.signedinteger) + or binariser_op.classes_.dtype == np.bool_ + ): class_dtype = onnx_proto.TensorProto.INT64 else: - classes = np.array([s.encode('utf-8') for s in classes]) + classes = np.array([s.encode("utf-8") for s in classes]) - container.add_initializer(classes_tensor_name, class_dtype, - [len(classes)], classes) container.add_initializer( - zeros_tensor_name, onnx_proto.TensorProto.FLOAT, - zeros_tensor.shape, zeros_tensor.ravel()) + classes_tensor_name, class_dtype, [len(classes)], classes + ) + container.add_initializer( + zeros_tensor_name, + onnx_proto.TensorProto.FLOAT, + zeros_tensor.shape, + zeros_tensor.ravel(), + ) container.add_initializer( - unit_tensor_name, onnx_proto.TensorProto.FLOAT, - unit_tensor.shape, unit_tensor.ravel()) + unit_tensor_name, + onnx_proto.TensorProto.FLOAT, + unit_tensor.shape, + unit_tensor.ravel(), + ) - reshaped_input_name = scope.get_unique_variable_name('reshaped_input') - apply_reshape(scope, operator.inputs[0].full_name, reshaped_input_name, - container, desired_shape=[-1, 1]) + reshaped_input_name = scope.get_unique_variable_name("reshaped_input") + apply_reshape( + scope, + operator.inputs[0].full_name, + reshaped_input_name, + container, + desired_shape=[-1, 1], + ) # Models with classes_/inputs of string type would fail in the # following step as Equal op does not support string comparison. - container.add_node('Equal', [classes_tensor_name, reshaped_input_name], - equal_condition_tensor_name, - name=scope.get_unique_operator_name('equal')) container.add_node( - 'Where', + "Equal", + [classes_tensor_name, reshaped_input_name], + equal_condition_tensor_name, + name=scope.get_unique_operator_name("equal"), + ) + container.add_node( + "Where", [equal_condition_tensor_name, unit_tensor_name, zeros_tensor_name], where_result_name, - name=scope.get_unique_operator_name('where')) + name=scope.get_unique_operator_name("where"), + ) where_res = where_result_name if len(binariser_op.classes_) == 2: array_f_name = scope.get_unique_variable_name( - 'array_feature_extractor_result') - pos_class_index_name = scope.get_unique_variable_name( - 'pos_class_index') + "array_feature_extractor_result" + ) + pos_class_index_name = scope.get_unique_variable_name("pos_class_index") container.add_initializer( - pos_class_index_name, onnx_proto.TensorProto.INT64, [], [1]) + pos_class_index_name, onnx_proto.TensorProto.INT64, [], [1] + ) container.add_node( - 'ArrayFeatureExtractor', + "ArrayFeatureExtractor", [where_result_name, pos_class_index_name], - array_f_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) + array_f_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) where_res = array_f_name - apply_cast(scope, where_res, operator.output_full_names, container, - to=onnx_proto.TensorProto.INT64) + apply_cast( + scope, + where_res, + operator.output_full_names, + container, + to=onnx_proto.TensorProto.INT64, + ) -register_converter('SklearnLabelBinarizer', convert_sklearn_label_binariser) +register_converter("SklearnLabelBinarizer", convert_sklearn_label_binariser) diff --git a/skl2onnx/operator_converters/label_encoder.py b/skl2onnx/operator_converters/label_encoder.py index 378090033..d6a807ea2 100644 --- a/skl2onnx/operator_converters/label_encoder.py +++ b/skl2onnx/operator_converters/label_encoder.py @@ -7,30 +7,36 @@ from ..common._container import ModelComponentContainer -def convert_sklearn_label_encoder(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_label_encoder( + scope: Scope, operator: Operator, container: ModelComponentContainer +): op = operator.raw_operator - op_type = 'LabelEncoder' - attrs = {'name': scope.get_unique_operator_name(op_type)} + op_type = "LabelEncoder" + attrs = {"name": scope.get_unique_operator_name(op_type)} classes = op.classes_ if np.issubdtype(classes.dtype, np.floating): - attrs['keys_floats'] = classes - elif (np.issubdtype(classes.dtype, np.signedinteger) or - classes.dtype == np.bool_): - attrs['keys_int64s'] = [int(i) for i in classes] + attrs["keys_floats"] = classes + elif np.issubdtype(classes.dtype, np.signedinteger) or classes.dtype == np.bool_: + attrs["keys_int64s"] = [int(i) for i in classes] else: - attrs['keys_strings'] = np.array([s.encode('utf-8') for s in classes]) - attrs['values_int64s'] = np.arange(len(classes)) + attrs["keys_strings"] = np.array([s.encode("utf-8") for s in classes]) + attrs["values_int64s"] = np.arange(len(classes)) - cop = container.target_opset_any_domain('ai.onnx.ml') + cop = container.target_opset_any_domain("ai.onnx.ml") if cop is not None and cop < 2: raise RuntimeError( "LabelEncoder requires at least opset 2 for domain 'ai.onnx.ml' " - "not {}".format(cop)) + "not {}".format(cop) + ) - container.add_node(op_type, operator.input_full_names, - operator.output_full_names, op_domain='ai.onnx.ml', - op_version=2, **attrs) + container.add_node( + op_type, + operator.input_full_names, + operator.output_full_names, + op_domain="ai.onnx.ml", + op_version=2, + **attrs + ) -register_converter('SklearnLabelEncoder', convert_sklearn_label_encoder) +register_converter("SklearnLabelEncoder", convert_sklearn_label_encoder) diff --git a/skl2onnx/operator_converters/linear_classifier.py b/skl2onnx/operator_converters/linear_classifier.py index 058a284e1..ce8d022e8 100644 --- a/skl2onnx/operator_converters/linear_classifier.py +++ b/skl2onnx/operator_converters/linear_classifier.py @@ -10,42 +10,48 @@ ) from sklearn.svm import LinearSVC from ..common._apply_operation import ( - apply_cast, apply_add, apply_sigmoid, apply_softmax, - apply_normalizer) + apply_cast, + apply_add, + apply_sigmoid, + apply_softmax, + apply_normalizer, +) from ..common._registration import register_converter from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer -from ..common.data_types import ( - BooleanTensorType, DoubleTensorType, guess_proto_type) -from ..common.utils_classifier import ( - get_label_classes, _finalize_converter_classes) +from ..common.data_types import BooleanTensorType, DoubleTensorType, guess_proto_type +from ..common.utils_classifier import get_label_classes, _finalize_converter_classes from ..proto import onnx_proto -def apply_logistic(scope, input_name, output_name, container, - proto_dtype): +def apply_logistic(scope, input_name, output_name, container, proto_dtype): sig_name = scope.get_unique_variable_name(input_name + "sig") apply_sigmoid(scope, input_name, sig_name, container) apply_normalizer( - scope, sig_name, output_name, container, norm='L1', - use_float=proto_dtype == onnx_proto.TensorProto.FLOAT) + scope, + sig_name, + output_name, + container, + norm="L1", + use_float=proto_dtype == onnx_proto.TensorProto.FLOAT, + ) -def convert_sklearn_linear_classifier(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_linear_classifier( + scope: Scope, operator: Operator, container: ModelComponentContainer +): op = operator.raw_operator coefficients = op.coef_.flatten().astype(float).tolist() classes = get_label_classes(scope, op) number_of_classes = len(classes) - use_linear_op = container.is_allowed({'LinearClassifier'}) + use_linear_op = container.is_allowed({"LinearClassifier"}) options = container.get_options(op, dict(raw_scores=False)) - use_raw_scores = options['raw_scores'] + use_raw_scores = options["raw_scores"] if isinstance(op.intercept_, (float, np.float32)) and op.intercept_ == 0: # fit_intercept = False - intercepts = ([0.0] * number_of_classes if number_of_classes != 2 else - [0.0]) + intercepts = [0.0] * number_of_classes if number_of_classes != 2 else [0.0] else: intercepts = op.intercept_.tolist() @@ -54,187 +60,269 @@ def convert_sklearn_linear_classifier(scope: Scope, operator: Operator, intercepts = list(map(lambda x: -1 * x, intercepts)) + intercepts multi_class = 0 - if hasattr(op, 'multi_class'): - if op.multi_class == 'ovr': + if hasattr(op, "multi_class"): + if op.multi_class == "ovr": multi_class = 1 else: multi_class = 2 - classifier_type = 'LinearClassifier' - classifier_attrs = { - 'name': scope.get_unique_operator_name(classifier_type) - } - - classifier_attrs['coefficients'] = coefficients - classifier_attrs['intercepts'] = intercepts - classifier_attrs['multi_class'] = 1 if multi_class == 2 else 0 - if (use_raw_scores or - isinstance(op, (LinearSVC, RidgeClassifier, RidgeClassifierCV))): - classifier_attrs['post_transform'] = 'NONE' + classifier_type = "LinearClassifier" + classifier_attrs = {"name": scope.get_unique_operator_name(classifier_type)} + + classifier_attrs["coefficients"] = coefficients + classifier_attrs["intercepts"] = intercepts + classifier_attrs["multi_class"] = 1 if multi_class == 2 else 0 + if use_raw_scores or isinstance( + op, (LinearSVC, RidgeClassifier, RidgeClassifierCV) + ): + classifier_attrs["post_transform"] = "NONE" elif isinstance(op, LogisticRegression): - ovr = (op.multi_class in ["ovr", "warn"] or - (op.multi_class == 'auto' and (op.classes_.size <= 2 or - op.solver == 'liblinear'))) - classifier_attrs['post_transform'] = ( - 'LOGISTIC' if ovr else 'SOFTMAX') + ovr = op.multi_class in ["ovr", "warn"] or ( + op.multi_class == "auto" + and (op.classes_.size <= 2 or op.solver == "liblinear") + ) + classifier_attrs["post_transform"] = "LOGISTIC" if ovr else "SOFTMAX" else: - classifier_attrs['post_transform'] = ( - 'LOGISTIC' if multi_class > 2 else 'SOFTMAX') + classifier_attrs["post_transform"] = ( + "LOGISTIC" if multi_class > 2 else "SOFTMAX" + ) if all(isinstance(i, str) for i in classes): class_labels = [str(i) for i in classes] - classifier_attrs['classlabels_strings'] = class_labels + classifier_attrs["classlabels_strings"] = class_labels elif all(isinstance(i, (numbers.Real, bool, np.bool_)) for i in classes): class_labels = [int(i) for i in classes] - classifier_attrs['classlabels_ints'] = class_labels + classifier_attrs["classlabels_ints"] = class_labels else: - raise RuntimeError('Label vector must be a string or a integer ' - 'tensor.') + raise RuntimeError("Label vector must be a string or a integer " "tensor.") - if (not use_linear_op or - type(operator.inputs[0].type) in (DoubleTensorType, )): + if not use_linear_op or type(operator.inputs[0].type) in (DoubleTensorType,): # Double -> double parameters not supported in ONNX LinearClassifier proto_dtype = guess_proto_type(operator.inputs[0].type) - coef = scope.get_unique_variable_name('coef') - model_coef = np.array( - classifier_attrs['coefficients'], dtype=np.float64) + coef = scope.get_unique_variable_name("coef") + model_coef = np.array(classifier_attrs["coefficients"], dtype=np.float64) model_coef = model_coef.reshape((number_of_classes, -1)).T container.add_initializer( - coef, proto_dtype, model_coef.shape, model_coef.ravel().tolist()) - intercept = scope.get_unique_variable_name('intercept') - model_intercept = np.array( - classifier_attrs['intercepts'], dtype=np.float64) + coef, proto_dtype, model_coef.shape, model_coef.ravel().tolist() + ) + intercept = scope.get_unique_variable_name("intercept") + model_intercept = np.array(classifier_attrs["intercepts"], dtype=np.float64) model_intercept = model_intercept.reshape((number_of_classes, -1)).T container.add_initializer( - intercept, proto_dtype, model_intercept.shape, - model_intercept.ravel().tolist()) - multiplied = scope.get_unique_variable_name('multiplied') + intercept, + proto_dtype, + model_intercept.shape, + model_intercept.ravel().tolist(), + ) + multiplied = scope.get_unique_variable_name("multiplied") container.add_node( - 'MatMul', [operator.inputs[0].full_name, coef], multiplied, - name=scope.get_unique_operator_name('MatMul')) + "MatMul", + [operator.inputs[0].full_name, coef], + multiplied, + name=scope.get_unique_operator_name("MatMul"), + ) if use_raw_scores: raw_score_name = operator.outputs[1].full_name else: - raw_score_name = scope.get_unique_variable_name('raw_scores') + raw_score_name = scope.get_unique_variable_name("raw_scores") apply_add(scope, [multiplied, intercept], raw_score_name, container) - argmax_output_name = scope.get_unique_variable_name('label') - container.add_node('ArgMax', raw_score_name, argmax_output_name, - name=scope.get_unique_operator_name('ArgMax'), - axis=1) + argmax_output_name = scope.get_unique_variable_name("label") + container.add_node( + "ArgMax", + raw_score_name, + argmax_output_name, + name=scope.get_unique_operator_name("ArgMax"), + axis=1, + ) _finalize_converter_classes( - scope, argmax_output_name, operator.outputs[0].full_name, - container, np.array(class_labels), - onnx_proto.TensorProto.DOUBLE) + scope, + argmax_output_name, + operator.outputs[0].full_name, + container, + np.array(class_labels), + onnx_proto.TensorProto.DOUBLE, + ) if use_raw_scores: return - if classifier_attrs['post_transform'] == 'LOGISTIC': - apply_logistic(scope, raw_score_name, - operator.outputs[1].full_name, container, - proto_dtype=onnx_proto.TensorProto.DOUBLE) + if classifier_attrs["post_transform"] == "LOGISTIC": + apply_logistic( + scope, + raw_score_name, + operator.outputs[1].full_name, + container, + proto_dtype=onnx_proto.TensorProto.DOUBLE, + ) return - elif classifier_attrs['post_transform'] == 'SOFTMAX': - apply_softmax(scope, raw_score_name, - operator.outputs[1].full_name, container) + elif classifier_attrs["post_transform"] == "SOFTMAX": + apply_softmax( + scope, raw_score_name, operator.outputs[1].full_name, container + ) return raise NotImplementedError( "post_transform '{}' is not supported with double.".format( - classifier_attrs['post_transform'])) + classifier_attrs["post_transform"] + ) + ) label_name = operator.outputs[0].full_name input_name = operator.inputs[0].full_name if isinstance(operator.inputs[0].type, BooleanTensorType): - cast_input_name = scope.get_unique_variable_name('cast_input') + cast_input_name = scope.get_unique_variable_name("cast_input") - apply_cast(scope, input_name, cast_input_name, - container, to=onnx_proto.TensorProto.FLOAT) + apply_cast( + scope, + input_name, + cast_input_name, + container, + to=onnx_proto.TensorProto.FLOAT, + ) input_name = cast_input_name if use_raw_scores: - container.add_node(classifier_type, input_name, - [label_name, operator.outputs[1].full_name], - op_domain='ai.onnx.ml', **classifier_attrs) - elif (isinstance(op, (LinearSVC, RidgeClassifier, RidgeClassifierCV)) - and op.classes_.shape[0] <= 2): - raw_scores_tensor_name = scope.get_unique_variable_name( - 'raw_scores_tensor') + container.add_node( + classifier_type, + input_name, + [label_name, operator.outputs[1].full_name], + op_domain="ai.onnx.ml", + **classifier_attrs + ) + elif ( + isinstance(op, (LinearSVC, RidgeClassifier, RidgeClassifierCV)) + and op.classes_.shape[0] <= 2 + ): + raw_scores_tensor_name = scope.get_unique_variable_name("raw_scores_tensor") positive_class_index_name = scope.get_unique_variable_name( - 'positive_class_index') + "positive_class_index" + ) - container.add_initializer(positive_class_index_name, - onnx_proto.TensorProto.INT64, [], [1]) + container.add_initializer( + positive_class_index_name, onnx_proto.TensorProto.INT64, [], [1] + ) - if (hasattr(op, '_label_binarizer') and - op._label_binarizer.y_type_ == 'multilabel-indicator'): - y_pred_name = scope.get_unique_variable_name('y_pred') - binarised_label_name = scope.get_unique_variable_name( - 'binarised_label') + if ( + hasattr(op, "_label_binarizer") + and op._label_binarizer.y_type_ == "multilabel-indicator" + ): + y_pred_name = scope.get_unique_variable_name("y_pred") + binarised_label_name = scope.get_unique_variable_name("binarised_label") - container.add_node(classifier_type, input_name, - [y_pred_name, raw_scores_tensor_name], - op_domain='ai.onnx.ml', **classifier_attrs) container.add_node( - 'Binarizer', raw_scores_tensor_name, binarised_label_name, - op_domain='ai.onnx.ml') + classifier_type, + input_name, + [y_pred_name, raw_scores_tensor_name], + op_domain="ai.onnx.ml", + **classifier_attrs + ) + container.add_node( + "Binarizer", + raw_scores_tensor_name, + binarised_label_name, + op_domain="ai.onnx.ml", + ) apply_cast( - scope, binarised_label_name, label_name, - container, to=onnx_proto.TensorProto.INT64) + scope, + binarised_label_name, + label_name, + container, + to=onnx_proto.TensorProto.INT64, + ) else: - container.add_node(classifier_type, input_name, - [label_name, raw_scores_tensor_name], - op_domain='ai.onnx.ml', **classifier_attrs) + container.add_node( + classifier_type, + input_name, + [label_name, raw_scores_tensor_name], + op_domain="ai.onnx.ml", + **classifier_attrs + ) container.add_node( - 'ArrayFeatureExtractor', + "ArrayFeatureExtractor", [raw_scores_tensor_name, positive_class_index_name], - operator.outputs[1].full_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) + operator.outputs[1].full_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) else: # Make sure the probability sum is 1 over all classes if multi_class > 0 and not isinstance( - op, (LinearSVC, RidgeClassifier, RidgeClassifierCV)): + op, (LinearSVC, RidgeClassifier, RidgeClassifierCV) + ): probability_tensor_name = scope.get_unique_variable_name( - 'probability_tensor') - container.add_node(classifier_type, input_name, - [label_name, probability_tensor_name], - op_domain='ai.onnx.ml', **classifier_attrs) - use_float = type(operator.inputs[0].type) not in ( - DoubleTensorType, ) + "probability_tensor" + ) + container.add_node( + classifier_type, + input_name, + [label_name, probability_tensor_name], + op_domain="ai.onnx.ml", + **classifier_attrs + ) + use_float = type(operator.inputs[0].type) not in (DoubleTensorType,) apply_normalizer( - scope, probability_tensor_name, operator.outputs[1].full_name, - container, norm='L1', use_float=use_float) - elif (hasattr(op, '_label_binarizer') and - op._label_binarizer.y_type_ == 'multilabel-indicator'): - y_pred_name = scope.get_unique_variable_name('y_pred') - binarised_label_name = scope.get_unique_variable_name( - 'binarised_label') + scope, + probability_tensor_name, + operator.outputs[1].full_name, + container, + norm="L1", + use_float=use_float, + ) + elif ( + hasattr(op, "_label_binarizer") + and op._label_binarizer.y_type_ == "multilabel-indicator" + ): + y_pred_name = scope.get_unique_variable_name("y_pred") + binarised_label_name = scope.get_unique_variable_name("binarised_label") container.add_node( - classifier_type, input_name, + classifier_type, + input_name, [y_pred_name, operator.outputs[1].full_name], - op_domain='ai.onnx.ml', **classifier_attrs) + op_domain="ai.onnx.ml", + **classifier_attrs + ) container.add_node( - 'Binarizer', operator.outputs[1].full_name, - binarised_label_name, op_domain='ai.onnx.ml') + "Binarizer", + operator.outputs[1].full_name, + binarised_label_name, + op_domain="ai.onnx.ml", + ) apply_cast( - scope, binarised_label_name, label_name, - container, to=onnx_proto.TensorProto.INT64) + scope, + binarised_label_name, + label_name, + container, + to=onnx_proto.TensorProto.INT64, + ) else: - container.add_node(classifier_type, input_name, - [label_name, operator.outputs[1].full_name], - op_domain='ai.onnx.ml', **classifier_attrs) - - -register_converter('SklearnLinearClassifier', - convert_sklearn_linear_classifier, - options={'zipmap': [True, False, 'columns'], - 'nocl': [True, False], - 'output_class_labels': [False, True], - 'raw_scores': [True, False]}) -register_converter('SklearnLinearSVC', convert_sklearn_linear_classifier, - options={'nocl': [True, False], - 'output_class_labels': [False, True], - 'raw_scores': [True, False]}) + container.add_node( + classifier_type, + input_name, + [label_name, operator.outputs[1].full_name], + op_domain="ai.onnx.ml", + **classifier_attrs + ) + + +register_converter( + "SklearnLinearClassifier", + convert_sklearn_linear_classifier, + options={ + "zipmap": [True, False, "columns"], + "nocl": [True, False], + "output_class_labels": [False, True], + "raw_scores": [True, False], + }, +) +register_converter( + "SklearnLinearSVC", + convert_sklearn_linear_classifier, + options={ + "nocl": [True, False], + "output_class_labels": [False, True], + "raw_scores": [True, False], + }, +) diff --git a/skl2onnx/operator_converters/linear_regressor.py b/skl2onnx/operator_converters/linear_regressor.py index 8f210c5de..06055bfc2 100644 --- a/skl2onnx/operator_converters/linear_regressor.py +++ b/skl2onnx/operator_converters/linear_regressor.py @@ -2,140 +2,187 @@ import numpy as np from ..common._apply_operation import ( - apply_cast, apply_add, apply_sqrt, apply_div, apply_sub, - apply_reshape) + apply_cast, + apply_add, + apply_sqrt, + apply_div, + apply_sub, + apply_reshape, +) from ..common.data_types import ( - BooleanTensorType, Int64TensorType, DoubleTensorType, - guess_numpy_type, guess_proto_type) + BooleanTensorType, + Int64TensorType, + DoubleTensorType, + guess_numpy_type, + guess_proto_type, +) from ..common._registration import register_converter from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer from ..proto import onnx_proto from ..algebra.onnx_ops import ( - OnnxAdd, OnnxCast, OnnxExp, OnnxIdentity, OnnxMatMul, - OnnxReshape, OnnxSigmoid) + OnnxAdd, + OnnxCast, + OnnxExp, + OnnxIdentity, + OnnxMatMul, + OnnxReshape, + OnnxSigmoid, +) -def convert_sklearn_linear_regressor(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_linear_regressor( + scope: Scope, operator: Operator, container: ModelComponentContainer +): op = operator.raw_operator - use_linear_op = container.is_allowed({'LinearRegressor'}) + use_linear_op = container.is_allowed({"LinearRegressor"}) - if (not use_linear_op or - type(operator.inputs[0].type) in (DoubleTensorType, )): + if not use_linear_op or type(operator.inputs[0].type) in (DoubleTensorType,): proto_dtype = guess_proto_type(operator.inputs[0].type) - coef = scope.get_unique_variable_name('coef') + coef = scope.get_unique_variable_name("coef") if len(op.coef_.shape) == 1: model_coef = op.coef_.reshape((-1, 1)) else: model_coef = op.coef_.T container.add_initializer( - coef, proto_dtype, model_coef.shape, model_coef.ravel().tolist()) - intercept = scope.get_unique_variable_name('intercept') - value_intercept = op.intercept_.reshape((-1, )) + coef, proto_dtype, model_coef.shape, model_coef.ravel().tolist() + ) + intercept = scope.get_unique_variable_name("intercept") + value_intercept = op.intercept_.reshape((-1,)) container.add_initializer( - intercept, proto_dtype, value_intercept.shape, - value_intercept.ravel().tolist()) - multiplied = scope.get_unique_variable_name('multiplied') + intercept, + proto_dtype, + value_intercept.shape, + value_intercept.ravel().tolist(), + ) + multiplied = scope.get_unique_variable_name("multiplied") container.add_node( - 'MatMul', [operator.inputs[0].full_name, coef], multiplied, - name=scope.get_unique_operator_name('MatMul')) - resh = scope.get_unique_variable_name('resh') - apply_add(scope, [multiplied, intercept], - resh, container) + "MatMul", + [operator.inputs[0].full_name, coef], + multiplied, + name=scope.get_unique_operator_name("MatMul"), + ) + resh = scope.get_unique_variable_name("resh") + apply_add(scope, [multiplied, intercept], resh, container) last_dim = 1 if len(model_coef.shape) == 1 else model_coef.shape[-1] - apply_reshape(scope, resh, operator.outputs[0].full_name, - container, desired_shape=(-1, last_dim)) + apply_reshape( + scope, + resh, + operator.outputs[0].full_name, + container, + desired_shape=(-1, last_dim), + ) return - op_type = 'LinearRegressor' + op_type = "LinearRegressor" dtype = guess_numpy_type(operator.inputs[0].type) if dtype not in (np.float32, np.float64): dtype = np.float32 - attrs = {'name': scope.get_unique_operator_name(op_type)} - attrs['coefficients'] = op.coef_.astype(dtype).ravel() - attrs['intercepts'] = np.array([op.intercept_], dtype=dtype).ravel() + attrs = {"name": scope.get_unique_operator_name(op_type)} + attrs["coefficients"] = op.coef_.astype(dtype).ravel() + attrs["intercepts"] = np.array([op.intercept_], dtype=dtype).ravel() if len(op.coef_.shape) == 2: - attrs['targets'] = op.coef_.shape[0] + attrs["targets"] = op.coef_.shape[0] input_name = operator.input_full_names if type(operator.inputs[0].type) in (BooleanTensorType, Int64TensorType): - cast_input_name = scope.get_unique_variable_name('cast_input') + cast_input_name = scope.get_unique_variable_name("cast_input") - apply_cast(scope, operator.input_full_names, cast_input_name, - container, - to=(onnx_proto.TensorProto.DOUBLE - if dtype == np.float64 - else onnx_proto.TensorProto.FLOAT)) + apply_cast( + scope, + operator.input_full_names, + cast_input_name, + container, + to=( + onnx_proto.TensorProto.DOUBLE + if dtype == np.float64 + else onnx_proto.TensorProto.FLOAT + ), + ) input_name = cast_input_name - container.add_node(op_type, input_name, - operator.outputs[0].full_name, op_domain='ai.onnx.ml', - **attrs) + container.add_node( + op_type, + input_name, + operator.outputs[0].full_name, + op_domain="ai.onnx.ml", + **attrs, + ) -def convert_sklearn_bayesian_ridge(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_bayesian_ridge( + scope: Scope, operator: Operator, container: ModelComponentContainer +): convert_sklearn_linear_regressor(scope, operator, container) op = operator.raw_operator options = container.get_options(op, dict(return_std=False)) - return_std = options['return_std'] + return_std = options["return_std"] if not return_std: return proto_dtype = guess_proto_type(operator.inputs[0].type) - if hasattr(op, 'normalize') and op.normalize: + if hasattr(op, "normalize") and op.normalize: # if self.normalize: # X = (X - self.X_offset_) / self.X_scale_ - offset = scope.get_unique_variable_name('offset') + offset = scope.get_unique_variable_name("offset") container.add_initializer( - offset, proto_dtype, op.X_offset_.shape, - op.X_offset_.ravel().tolist()) - scale = scope.get_unique_variable_name('scale') + offset, proto_dtype, op.X_offset_.shape, op.X_offset_.ravel().tolist() + ) + scale = scope.get_unique_variable_name("scale") container.add_initializer( - scale, proto_dtype, op.X_scale_.shape, - op.X_scale_.ravel().tolist()) - centered = scope.get_unique_variable_name('centered') - apply_sub(scope, [operator.inputs[0].full_name, offset], - centered, container) - scaled = scope.get_unique_variable_name('scaled') + scale, proto_dtype, op.X_scale_.shape, op.X_scale_.ravel().tolist() + ) + centered = scope.get_unique_variable_name("centered") + apply_sub(scope, [operator.inputs[0].full_name, offset], centered, container) + scaled = scope.get_unique_variable_name("scaled") apply_div(scope, [centered, scale], scaled, container) input_name = scaled else: input_name = operator.inputs[0].full_name # sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1) - sigma = scope.get_unique_variable_name('sigma') + sigma = scope.get_unique_variable_name("sigma") container.add_initializer( - sigma, proto_dtype, op.sigma_.shape, op.sigma_.ravel().tolist()) - sigmaed0 = scope.get_unique_variable_name('sigma0') + sigma, proto_dtype, op.sigma_.shape, op.sigma_.ravel().tolist() + ) + sigmaed0 = scope.get_unique_variable_name("sigma0") container.add_node( - 'MatMul', [input_name, sigma], sigmaed0, - name=scope.get_unique_operator_name('MatMul')) - sigmaed = scope.get_unique_variable_name('sigma') + "MatMul", + [input_name, sigma], + sigmaed0, + name=scope.get_unique_operator_name("MatMul"), + ) + sigmaed = scope.get_unique_variable_name("sigma") if container.target_opset < 13: container.add_node( - 'ReduceSum', sigmaed0, sigmaed, axes=[1], - name=scope.get_unique_operator_name('ReduceSum')) + "ReduceSum", + sigmaed0, + sigmaed, + axes=[1], + name=scope.get_unique_operator_name("ReduceSum"), + ) else: - axis_name = scope.get_unique_variable_name('axis') - container.add_initializer( - axis_name, onnx_proto.TensorProto.INT64, [1], [1]) + axis_name = scope.get_unique_variable_name("axis") + container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [1]) container.add_node( - 'ReduceSum', [sigmaed0, axis_name], sigmaed, - name=scope.get_unique_operator_name('ReduceSum')) + "ReduceSum", + [sigmaed0, axis_name], + sigmaed, + name=scope.get_unique_operator_name("ReduceSum"), + ) # y_std = np.sqrt(sigmas_squared_data + (1. / self.alpha_)) # return y_mean, y_std - std0 = scope.get_unique_variable_name('std0') - alphainv = scope.get_unique_variable_name('alphainv') + std0 = scope.get_unique_variable_name("std0") + alphainv = scope.get_unique_variable_name("alphainv") container.add_initializer(alphainv, proto_dtype, [1], [1 / op.alpha_]) apply_add(scope, [sigmaed, alphainv], std0, container) apply_sqrt(scope, std0, operator.outputs[1].full_name, container) -def convert_sklearn_poisson_regressor(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_poisson_regressor( + scope: Scope, operator: Operator, container: ModelComponentContainer +): X = operator.inputs[0] out = operator.outputs op = operator.raw_operator @@ -149,16 +196,21 @@ def convert_sklearn_poisson_regressor(scope: Scope, operator: Operator, else: input_var = X - intercept = (op.intercept_.astype(dtype) if len(op.intercept_.shape) > 0 - else np.array([op.intercept_], dtype=dtype)) + intercept = ( + op.intercept_.astype(dtype) + if len(op.intercept_.shape) > 0 + else np.array([op.intercept_], dtype=dtype) + ) eta = OnnxAdd( OnnxMatMul(input_var, op.coef_.astype(dtype), op_version=opv), - intercept, op_version=opv) + intercept, + op_version=opv, + ) if hasattr(op, "_link_instance"): # scikit-learn < 1.1 - from sklearn.linear_model._glm.link import ( - IdentityLink, LogLink, LogitLink) + from sklearn.linear_model._glm.link import IdentityLink, LogLink, LogitLink + if isinstance(op._link_instance, IdentityLink): Y = OnnxIdentity(eta, op_version=opv) elif isinstance(op._link_instance, LogLink): @@ -168,8 +220,8 @@ def convert_sklearn_poisson_regressor(scope: Scope, operator: Operator, else: raise RuntimeError( "Unexpected type %r for _link_instance " - "in operator type %r." % ( - type(op._link_instance), type(op))) + "in operator type %r." % (type(op._link_instance), type(op)) + ) else: # scikit-learn >= 1.1 from sklearn._loss.loss import ( @@ -180,35 +232,40 @@ def convert_sklearn_poisson_regressor(scope: Scope, operator: Operator, HalfSquaredError, HalfTweedieLoss, HalfTweedieLossIdentity, - PinballLoss + PinballLoss, ) + loss = op._get_loss() if isinstance( loss, - (AbsoluteError, HalfSquaredError, - HalfTweedieLossIdentity, PinballLoss)): + (AbsoluteError, HalfSquaredError, HalfTweedieLossIdentity, PinballLoss), + ): Y = OnnxIdentity(eta, op_version=opv) - elif isinstance(loss, (HalfPoissonLoss, HalfGammaLoss, - HalfTweedieLoss)): + elif isinstance(loss, (HalfPoissonLoss, HalfGammaLoss, HalfTweedieLoss)): Y = OnnxExp(eta, op_version=opv) elif isinstance(loss, HalfBinomialLoss): Y = OnnxSigmoid(eta, op_version=opv) else: raise RuntimeError( - f"Unexpected type of link for {loss!r} loss " - "in operator type {op!r}.") + f"Unexpected type of link for {loss!r} loss " "in operator type {op!r}." + ) last_dim = 1 if len(op.coef_.shape) == 1 else op.coef_.shape[-1] - final = OnnxReshape(Y, np.array([-1, last_dim], dtype=np.int64), - op_version=opv, output_names=out[:1]) + final = OnnxReshape( + Y, + np.array([-1, last_dim], dtype=np.int64), + op_version=opv, + output_names=out[:1], + ) final.add_to(scope, container) -register_converter('SklearnLinearRegressor', convert_sklearn_linear_regressor) -register_converter('SklearnLinearSVR', convert_sklearn_linear_regressor) -register_converter('SklearnBayesianRidge', convert_sklearn_bayesian_ridge, - options={'return_std': [True, False]}) -register_converter('SklearnPoissonRegressor', - convert_sklearn_poisson_regressor) -register_converter('SklearnTweedieRegressor', - convert_sklearn_poisson_regressor) +register_converter("SklearnLinearRegressor", convert_sklearn_linear_regressor) +register_converter("SklearnLinearSVR", convert_sklearn_linear_regressor) +register_converter( + "SklearnBayesianRidge", + convert_sklearn_bayesian_ridge, + options={"return_std": [True, False]}, +) +register_converter("SklearnPoissonRegressor", convert_sklearn_poisson_regressor) +register_converter("SklearnTweedieRegressor", convert_sklearn_poisson_regressor) diff --git a/skl2onnx/operator_converters/local_outlier_factor.py b/skl2onnx/operator_converters/local_outlier_factor.py index bc83cc1f3..2dcd0398a 100644 --- a/skl2onnx/operator_converters/local_outlier_factor.py +++ b/skl2onnx/operator_converters/local_outlier_factor.py @@ -5,26 +5,41 @@ from onnx import TensorProto from ..common._registration import register_converter from ..common.data_types import ( - BooleanTensorType, Int64TensorType, - guess_numpy_type, guess_proto_type) + BooleanTensorType, + Int64TensorType, + guess_numpy_type, + guess_proto_type, +) from ..algebra.onnx_ops import ( - OnnxCast, OnnxLess, OnnxMul, OnnxAdd, OnnxDiv, - OnnxGather, OnnxReduceMeanApi18, OnnxMax, OnnxSqueezeApi11) + OnnxCast, + OnnxLess, + OnnxMul, + OnnxAdd, + OnnxDiv, + OnnxGather, + OnnxReduceMeanApi18, + OnnxMax, + OnnxSqueezeApi11, +) from .nearest_neighbours import onnx_nearest_neighbors_indices_k def convert_sklearn_local_outlier_factor( - scope, operator, container, op_type='TreeEnsembleRegressor', - op_domain='ai.onnx.ml', op_version=1): + scope, + operator, + container, + op_type="TreeEnsembleRegressor", + op_domain="ai.onnx.ml", + op_version=1, +): op = operator.raw_operator if not op.novelty: raise RuntimeError( - "The converter only converts the model %r is novelty is True." - "" % type(op)) + "The converter only converts the model %r is novelty is True." "" % type(op) + ) outputs = operator.outputs opv = container.target_opset - options = container.get_options( - op, dict(score_samples=None, optim=None)) + options = container.get_options(op, dict(score_samples=None, optim=None)) X = operator.inputs[0] dtype = guess_numpy_type(operator.inputs[0].type) @@ -32,68 +47,88 @@ def convert_sklearn_local_outlier_factor( if type(operator.inputs[0].type) in (BooleanTensorType, Int64TensorType): X = OnnxCast(X, to=proto_dtype, op_version=opv) - metric = (op.effective_metric_ if hasattr(op, 'effective_metric_') else - op.metric) + metric = op.effective_metric_ if hasattr(op, "effective_metric_") else op.metric neighb = op._fit_X.astype(dtype) k = op.n_neighbors_ kwargs = {} if op.p != 2: - if options['optim'] == 'cdist': + if options["optim"] == "cdist": warnings.warn( "Option p=%r may not be compatible with the runtime. " "See https://github.com/microsoft/onnxruntime/blob/master/" - "docs/ContribOperators.md#com.microsoft.CDist.") - kwargs['p'] = op.p + "docs/ContribOperators.md#com.microsoft.CDist." + ) + kwargs["p"] = op.p top_k, dist = onnx_nearest_neighbors_indices_k( - X, neighb, k, metric, dtype=dtype, - op_version=opv, keep_distances=True, - optim=options.get('optim', None), - **kwargs) + X, + neighb, + k, + metric, + dtype=dtype, + op_version=opv, + keep_distances=True, + optim=options.get("optim", None), + **kwargs + ) # dist_k = self._distances_fit_X_[neighbors_indices, self.n_neighbors_ - 1] # reach_dist_array = np.maximum(distances_X, dist_k) - dist_k_ = OnnxGather(op._distances_fit_X_.astype(dtype), - top_k, op_version=opv) + dist_k_ = OnnxGather(op._distances_fit_X_.astype(dtype), top_k, op_version=opv) dist_k = OnnxSqueezeApi11( - OnnxGather(dist_k_, np.array([op.n_neighbors_ - 1], - dtype=np.int64), - axis=2, op_version=opv), - axes=[2], op_version=opv) - dist_k.set_onnx_name_prefix('dist_k') + OnnxGather( + dist_k_, + np.array([op.n_neighbors_ - 1], dtype=np.int64), + axis=2, + op_version=opv, + ), + axes=[2], + op_version=opv, + ) + dist_k.set_onnx_name_prefix("dist_k") reach_dist_array = OnnxMax( OnnxMul(dist, np.array([-1], dtype=dtype), op_version=opv), - dist_k, op_version=opv) + dist_k, + op_version=opv, + ) # X_lrd= return 1.0 / (np.mean(reach_dist_array, axis=1) + 1e-10) X_lrd = OnnxDiv( np.array([1], dtype=dtype), OnnxAdd( - OnnxReduceMeanApi18(reach_dist_array, axes=[1], - op_version=opv, keepdims=1), - np.array([1e-10], dtype=dtype), op_version=opv), - op_version=opv) - X_lrd.set_onnx_name_prefix('X_lrd') + OnnxReduceMeanApi18(reach_dist_array, axes=[1], op_version=opv, keepdims=1), + np.array([1e-10], dtype=dtype), + op_version=opv, + ), + op_version=opv, + ) + X_lrd.set_onnx_name_prefix("X_lrd") # lrd_ratios_array = self._lrd[neighbors_indices_X] / X_lrd[:, np.newaxis] lrd_ratios_array = OnnxDiv( - OnnxGather(op._lrd.astype(dtype), top_k, op_version=opv), - X_lrd, op_version=opv) - lrd_ratios_array.set_onnx_name_prefix('lrd_ratios_array') + OnnxGather(op._lrd.astype(dtype), top_k, op_version=opv), X_lrd, op_version=opv + ) + lrd_ratios_array.set_onnx_name_prefix("lrd_ratios_array") # -np.mean(lrd_ratios_array, axis=1) - if options['score_samples']: + if options["score_samples"]: output_names_score_samples = [outputs[2]] else: output_names_score_samples = None - score_samples = OnnxReduceMeanApi18( - lrd_ratios_array, axes=[1], op_version=opv) - score_samples.set_onnx_name_prefix('score_samples') + score_samples = OnnxReduceMeanApi18(lrd_ratios_array, axes=[1], op_version=opv) + score_samples.set_onnx_name_prefix("score_samples") score_samples_neg = OnnxMul( - score_samples, np.array([-1], dtype=dtype), op_version=opv, - output_names=output_names_score_samples) - final = OnnxAdd(score_samples_neg, np.array([-op.offset_], dtype=dtype), - op_version=opv, output_names=[outputs[1]]) + score_samples, + np.array([-1], dtype=dtype), + op_version=opv, + output_names=output_names_score_samples, + ) + final = OnnxAdd( + score_samples_neg, + np.array([-op.offset_], dtype=dtype), + op_version=opv, + output_names=[outputs[1]], + ) # labels # is_inlier = np.ones(X.shape[0], dtype=int) @@ -103,19 +138,26 @@ def convert_sklearn_local_outlier_factor( OnnxMul( OnnxCast( OnnxLess(final, np.array([0], dtype=dtype), op_version=opv), - to=TensorProto.INT64, op_version=opv), - np.array([-2], dtype=np.int64), op_version=opv), - np.array([1], dtype=np.int64), op_version=opv, - output_names=outputs[0].full_name) - predict.set_onnx_name_prefix('predict') + to=TensorProto.INT64, + op_version=opv, + ), + np.array([-2], dtype=np.int64), + op_version=opv, + ), + np.array([1], dtype=np.int64), + op_version=opv, + output_names=outputs[0].full_name, + ) + predict.set_onnx_name_prefix("predict") predict.add_to(scope, container) final.add_to(scope, container) - if options['score_samples']: + if options["score_samples"]: score_samples_neg.add_to(scope, container) -register_converter('SklearnLocalOutlierFactor', - convert_sklearn_local_outlier_factor, - options={'score_samples': [True, False], - 'optim': [None, 'cdist']}) +register_converter( + "SklearnLocalOutlierFactor", + convert_sklearn_local_outlier_factor, + options={"score_samples": [True, False], "optim": [None, "cdist"]}, +) diff --git a/skl2onnx/operator_converters/multilayer_perceptron.py b/skl2onnx/operator_converters/multilayer_perceptron.py index 8decac8d0..dc209b660 100644 --- a/skl2onnx/operator_converters/multilayer_perceptron.py +++ b/skl2onnx/operator_converters/multilayer_perceptron.py @@ -4,8 +4,13 @@ import numpy as np from ..common.data_types import guess_proto_type from ..common._apply_operation import ( - apply_add, apply_cast, apply_concat, apply_identity, - apply_reshape, apply_sub) + apply_add, + apply_cast, + apply_concat, + apply_identity, + apply_reshape, + apply_sub, +) from ..common._registration import register_converter from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer @@ -18,50 +23,70 @@ def _forward_pass(scope, container, model, activations, proto_dtype): the neurons in the hidden layers and the output layer. """ activations_map = { - 'identity': 'Identity', 'tanh': 'Tanh', 'logistic': 'Sigmoid', - 'relu': 'Relu', 'softmax': 'Softmax' + "identity": "Identity", + "tanh": "Tanh", + "logistic": "Sigmoid", + "relu": "Relu", + "softmax": "Softmax", } out_activation_result_name = scope.get_unique_variable_name( - 'out_activations_result') + "out_activations_result" + ) # Iterate over the hidden layers for i in range(model.n_layers_ - 1): - coefficient_name = scope.get_unique_variable_name('coefficient') - intercepts_name = scope.get_unique_variable_name('intercepts') - mul_result_name = scope.get_unique_variable_name('mul_result') - add_result_name = scope.get_unique_variable_name('add_result') + coefficient_name = scope.get_unique_variable_name("coefficient") + intercepts_name = scope.get_unique_variable_name("intercepts") + mul_result_name = scope.get_unique_variable_name("mul_result") + add_result_name = scope.get_unique_variable_name("add_result") container.add_initializer( - coefficient_name, proto_dtype, - model.coefs_[i].shape, model.coefs_[i].ravel()) + coefficient_name, + proto_dtype, + model.coefs_[i].shape, + model.coefs_[i].ravel(), + ) container.add_initializer( - intercepts_name, proto_dtype, - [1, len(model.intercepts_[i])], model.intercepts_[i]) + intercepts_name, + proto_dtype, + [1, len(model.intercepts_[i])], + model.intercepts_[i], + ) container.add_node( - 'MatMul', [activations[i], coefficient_name], - mul_result_name, name=scope.get_unique_operator_name('MatMul')) - apply_add(scope, [mul_result_name, intercepts_name], - add_result_name, container, broadcast=1) + "MatMul", + [activations[i], coefficient_name], + mul_result_name, + name=scope.get_unique_operator_name("MatMul"), + ) + apply_add( + scope, + [mul_result_name, intercepts_name], + add_result_name, + container, + broadcast=1, + ) # For the hidden layers if (i + 1) != (model.n_layers_ - 1): - activations_result_name = scope.get_unique_variable_name( - 'next_activations') + activations_result_name = scope.get_unique_variable_name("next_activations") container.add_node( - activations_map[model.activation], add_result_name, + activations_map[model.activation], + add_result_name, activations_result_name, - name=scope.get_unique_operator_name( - activations_map[model.activation])) + name=scope.get_unique_operator_name(activations_map[model.activation]), + ) activations.append(activations_result_name) # For the last layer container.add_node( - activations_map[model.out_activation_], add_result_name, + activations_map[model.out_activation_], + add_result_name, out_activation_result_name, - name=scope.get_unique_operator_name(activations_map[model.activation])) + name=scope.get_unique_operator_name(activations_map[model.activation]), + ) activations.append(out_activation_result_name) return activations @@ -72,19 +97,18 @@ def _predict(scope, input_name, container, model, proto_dtype): This function initialises the input layer, calls _forward_pass() and returns the final layer. """ - cast_input_name = scope.get_unique_variable_name('cast_input') + cast_input_name = scope.get_unique_variable_name("cast_input") - apply_cast(scope, input_name, cast_input_name, - container, to=proto_dtype) + apply_cast(scope, input_name, cast_input_name, container, to=proto_dtype) # forward propagate - activations = _forward_pass(scope, container, model, [cast_input_name], - proto_dtype) + activations = _forward_pass(scope, container, model, [cast_input_name], proto_dtype) return activations[-1] -def convert_sklearn_mlp_classifier(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_mlp_classifier( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converter for MLPClassifier. This function calls _predict() which returns the probability scores @@ -97,79 +121,119 @@ def convert_sklearn_mlp_classifier(scope: Scope, operator: Operator, classes = mlp_op.classes_ class_type = onnx_proto.TensorProto.STRING - argmax_output_name = scope.get_unique_variable_name('argmax_output') + argmax_output_name = scope.get_unique_variable_name("argmax_output") array_feature_extractor_result_name = scope.get_unique_variable_name( - 'array_feature_extractor_result') + "array_feature_extractor_result" + ) proto_dtype = guess_proto_type(operator.inputs[0].type) if proto_dtype != onnx_proto.TensorProto.DOUBLE: proto_dtype = onnx_proto.TensorProto.FLOAT - y_pred = _predict(scope, operator.inputs[0].full_name, container, mlp_op, - proto_dtype) + y_pred = _predict( + scope, operator.inputs[0].full_name, container, mlp_op, proto_dtype + ) - if (np.issubdtype(mlp_op.classes_.dtype, np.floating) or - mlp_op.classes_.dtype == np.bool_): + if ( + np.issubdtype(mlp_op.classes_.dtype, np.floating) + or mlp_op.classes_.dtype == np.bool_ + ): class_type = onnx_proto.TensorProto.INT32 classes = classes.astype(np.int32) elif np.issubdtype(mlp_op.classes_.dtype, np.integer): class_type = onnx_proto.TensorProto.INT32 else: - classes = np.array([s.encode('utf-8') for s in classes]) + classes = np.array([s.encode("utf-8") for s in classes]) if len(classes) == 2: - unity_name = scope.get_unique_variable_name('unity') + unity_name = scope.get_unique_variable_name("unity") negative_class_proba_name = scope.get_unique_variable_name( - 'negative_class_proba') + "negative_class_proba" + ) container.add_initializer(unity_name, proto_dtype, [], [1]) - apply_sub(scope, [unity_name, y_pred], - negative_class_proba_name, container, broadcast=1) - apply_concat(scope, [negative_class_proba_name, y_pred], - operator.outputs[1].full_name, container, axis=1) + apply_sub( + scope, + [unity_name, y_pred], + negative_class_proba_name, + container, + broadcast=1, + ) + apply_concat( + scope, + [negative_class_proba_name, y_pred], + operator.outputs[1].full_name, + container, + axis=1, + ) else: - apply_identity(scope, y_pred, - operator.outputs[1].full_name, container) + apply_identity(scope, y_pred, operator.outputs[1].full_name, container) - if mlp_op._label_binarizer.y_type_ == 'multilabel-indicator': - binariser_output_name = scope.get_unique_variable_name( - 'binariser_output') + if mlp_op._label_binarizer.y_type_ == "multilabel-indicator": + binariser_output_name = scope.get_unique_variable_name("binariser_output") - container.add_node('Binarizer', y_pred, binariser_output_name, - threshold=0.5, op_domain='ai.onnx.ml') + container.add_node( + "Binarizer", + y_pred, + binariser_output_name, + threshold=0.5, + op_domain="ai.onnx.ml", + ) apply_cast( - scope, binariser_output_name, operator.outputs[0].full_name, - container, to=onnx_proto.TensorProto.INT64) + scope, + binariser_output_name, + operator.outputs[0].full_name, + container, + to=onnx_proto.TensorProto.INT64, + ) else: - classes_name = scope.get_unique_variable_name('classes') - container.add_initializer(classes_name, class_type, - classes.shape, classes) + classes_name = scope.get_unique_variable_name("classes") + container.add_initializer(classes_name, class_type, classes.shape, classes) - container.add_node('ArgMax', operator.outputs[1].full_name, - argmax_output_name, axis=1, - name=scope.get_unique_operator_name('ArgMax')) container.add_node( - 'ArrayFeatureExtractor', [classes_name, argmax_output_name], - array_feature_extractor_result_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) + "ArgMax", + operator.outputs[1].full_name, + argmax_output_name, + axis=1, + name=scope.get_unique_operator_name("ArgMax"), + ) + container.add_node( + "ArrayFeatureExtractor", + [classes_name, argmax_output_name], + array_feature_extractor_result_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) if class_type == onnx_proto.TensorProto.INT32: - reshaped_result_name = scope.get_unique_variable_name( - 'reshaped_result') - - apply_reshape(scope, array_feature_extractor_result_name, - reshaped_result_name, container, - desired_shape=(-1,)) + reshaped_result_name = scope.get_unique_variable_name("reshaped_result") + + apply_reshape( + scope, + array_feature_extractor_result_name, + reshaped_result_name, + container, + desired_shape=(-1,), + ) apply_cast( - scope, reshaped_result_name, operator.outputs[0].full_name, - container, to=onnx_proto.TensorProto.INT64) + scope, + reshaped_result_name, + operator.outputs[0].full_name, + container, + to=onnx_proto.TensorProto.INT64, + ) else: - apply_reshape(scope, array_feature_extractor_result_name, - operator.outputs[0].full_name, container, - desired_shape=(-1,)) - - -def convert_sklearn_mlp_regressor(scope: Scope, operator: Operator, - container: ModelComponentContainer): + apply_reshape( + scope, + array_feature_extractor_result_name, + operator.outputs[0].full_name, + container, + desired_shape=(-1,), + ) + + +def convert_sklearn_mlp_regressor( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converter for MLPRegressor. This function calls _predict() which returns the scores. @@ -180,16 +244,21 @@ def convert_sklearn_mlp_regressor(scope: Scope, operator: Operator, if proto_dtype != onnx_proto.TensorProto.DOUBLE: proto_dtype = onnx_proto.TensorProto.FLOAT - y_pred = _predict(scope, operator.inputs[0].full_name, container, mlp_op, - proto_dtype=proto_dtype) - apply_reshape(scope, y_pred, operator.output_full_names, - container, desired_shape=(-1, 1)) - - -register_converter('SklearnMLPClassifier', - convert_sklearn_mlp_classifier, - options={'zipmap': [True, False, 'columns'], - 'output_class_labels': [False, True], - 'nocl': [True, False]}) -register_converter('SklearnMLPRegressor', - convert_sklearn_mlp_regressor) + y_pred = _predict( + scope, operator.inputs[0].full_name, container, mlp_op, proto_dtype=proto_dtype + ) + apply_reshape( + scope, y_pred, operator.output_full_names, container, desired_shape=(-1, 1) + ) + + +register_converter( + "SklearnMLPClassifier", + convert_sklearn_mlp_classifier, + options={ + "zipmap": [True, False, "columns"], + "output_class_labels": [False, True], + "nocl": [True, False], + }, +) +register_converter("SklearnMLPRegressor", convert_sklearn_mlp_regressor) diff --git a/skl2onnx/operator_converters/multioutput.py b/skl2onnx/operator_converters/multioutput.py index f28e67ed0..41c5b7124 100644 --- a/skl2onnx/operator_converters/multioutput.py +++ b/skl2onnx/operator_converters/multioutput.py @@ -4,8 +4,8 @@ from ..common._registration import register_converter from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer -from ..algebra.onnx_ops import ( - OnnxConcat, OnnxReshapeApi13, OnnxIdentity) +from ..algebra.onnx_ops import OnnxConcat, OnnxReshapeApi13, OnnxIdentity + try: from ..algebra.onnx_ops import OnnxSequenceConstruct except ImportError: @@ -15,7 +15,8 @@ def convert_multi_output_regressor_converter( - scope: Scope, operator: Operator, container: ModelComponentContainer): + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converts a *MultiOutputRegressor* into *ONNX* format. """ @@ -26,59 +27,69 @@ def convert_multi_output_regressor_converter( OnnxReshapeApi13( OnnxSubEstimator(sub, inp, op_version=op_version), np.array([-1, 1], dtype=np.int64), - op_version=op_version) - for sub in op.estimators_] + op_version=op_version, + ) + for sub in op.estimators_ + ] - output = OnnxConcat(*y_list, axis=1, op_version=op_version, - output_names=[operator.outputs[0]]) + output = OnnxConcat( + *y_list, axis=1, op_version=op_version, output_names=[operator.outputs[0]] + ) output.add_to(scope=scope, container=container) def convert_multi_output_classifier_converter( - scope: Scope, operator: Operator, container: ModelComponentContainer): + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converts a *MultiOutputClassifier* into *ONNX* format. """ if OnnxSequenceConstruct is None: - raise RuntimeError( - "This converter requires opset>=11.") + raise RuntimeError("This converter requires opset>=11.") op_version = container.target_opset op_version = container.target_opset op = operator.raw_operator inp = operator.inputs[0] options = scope.get_options(op) - if options.get('nocl', True): + if options.get("nocl", True): options = options.copy() else: options = {} - options.update({'zipmap': False}) - y_list = [OnnxSubEstimator(sub, inp, op_version=op_version, - options=options) - for sub in op.estimators_] + options.update({"zipmap": False}) + y_list = [ + OnnxSubEstimator(sub, inp, op_version=op_version, options=options) + for sub in op.estimators_ + ] # labels - label_list = [OnnxReshapeApi13(y[0], np.array([-1, 1], dtype=np.int64), - op_version=op_version) - for y in y_list] + label_list = [ + OnnxReshapeApi13(y[0], np.array([-1, 1], dtype=np.int64), op_version=op_version) + for y in y_list + ] # probabilities - proba_list = [OnnxIdentity(y[1], op_version=op_version) - for y in y_list] - label = OnnxConcat(*label_list, axis=1, op_version=op_version, - output_names=[operator.outputs[0]]) + proba_list = [OnnxIdentity(y[1], op_version=op_version) for y in y_list] + label = OnnxConcat( + *label_list, axis=1, op_version=op_version, output_names=[operator.outputs[0]] + ) label.add_to(scope=scope, container=container) proba = OnnxSequenceConstruct( - *proba_list, op_version=op_version, - output_names=[operator.outputs[1]]) + *proba_list, op_version=op_version, output_names=[operator.outputs[1]] + ) proba.add_to(scope=scope, container=container) -register_converter('SklearnMultiOutputRegressor', - convert_multi_output_regressor_converter) -register_converter('SklearnMultiOutputClassifier', - convert_multi_output_classifier_converter, - options={'nocl': [False, True], - 'output_class_labels': [False, True], - 'zipmap': [False, True]}) +register_converter( + "SklearnMultiOutputRegressor", convert_multi_output_regressor_converter +) +register_converter( + "SklearnMultiOutputClassifier", + convert_multi_output_classifier_converter, + options={ + "nocl": [False, True], + "output_class_labels": [False, True], + "zipmap": [False, True], + }, +) diff --git a/skl2onnx/operator_converters/multiply_op.py b/skl2onnx/operator_converters/multiply_op.py index 733e7c8d1..725e07f9c 100644 --- a/skl2onnx/operator_converters/multiply_op.py +++ b/skl2onnx/operator_converters/multiply_op.py @@ -8,16 +8,21 @@ from ..proto import onnx_proto -def convert_sklearn_multiply(scope: Scope, operator: Operator, - container: ModelComponentContainer): - operand_name = scope.get_unique_variable_name( - 'operand') +def convert_sklearn_multiply( + scope: Scope, operator: Operator, container: ModelComponentContainer +): + operand_name = scope.get_unique_variable_name("operand") - container.add_initializer(operand_name, onnx_proto.TensorProto.FLOAT, - [], [operator.operand]) + container.add_initializer( + operand_name, onnx_proto.TensorProto.FLOAT, [], [operator.operand] + ) - apply_mul(scope, [operator.inputs[0].full_name, operand_name], - operator.outputs[0].full_name, container) + apply_mul( + scope, + [operator.inputs[0].full_name, operand_name], + operator.outputs[0].full_name, + container, + ) -register_converter('SklearnMultiply', convert_sklearn_multiply) +register_converter("SklearnMultiply", convert_sklearn_multiply) diff --git a/skl2onnx/operator_converters/naive_bayes.py b/skl2onnx/operator_converters/naive_bayes.py index 08b7716dc..e1a63d3ae 100644 --- a/skl2onnx/operator_converters/naive_bayes.py +++ b/skl2onnx/operator_converters/naive_bayes.py @@ -4,13 +4,23 @@ import numpy as np from ..proto import onnx_proto from ..common._apply_operation import ( - apply_add, apply_cast, apply_div, apply_exp, - apply_log, apply_mul, apply_pow, apply_sub, apply_reshape, + apply_add, + apply_cast, + apply_div, + apply_exp, + apply_log, + apply_mul, + apply_pow, + apply_sub, + apply_reshape, apply_transpose, ) from ..common.data_types import ( - BooleanTensorType, Int64TensorType, guess_numpy_type, - guess_proto_type) + BooleanTensorType, + Int64TensorType, + guess_numpy_type, + guess_proto_type, +) from ..common._registration import register_converter from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer @@ -18,205 +28,292 @@ def _joint_log_likelihood_bernoulli( - scope, container, input_name, feature_log_prob_name, - class_log_prior_name, binarize, feature_count, proto_dtype, - sum_result_name): + scope, + container, + input_name, + feature_log_prob_name, + class_log_prior_name, + binarize, + feature_count, + proto_dtype, + sum_result_name, +): """ Calculate joint log likelihood for Bernoulli Naive Bayes model. """ - constant_name = scope.get_unique_variable_name('constant') - exp_result_name = scope.get_unique_variable_name('exp_result') - sub_result_name = scope.get_unique_variable_name('sub_result') - neg_prob_name = scope.get_unique_variable_name('neg_prob') - sum_neg_prob_name = scope.get_unique_variable_name('sum_neg_prob') - difference_matrix_name = scope.get_unique_variable_name( - 'difference_matrix') - dot_prod_name = scope.get_unique_variable_name('dot_prod') - partial_sum_result_name = scope.get_unique_variable_name( - 'partial_sum_result') + constant_name = scope.get_unique_variable_name("constant") + exp_result_name = scope.get_unique_variable_name("exp_result") + sub_result_name = scope.get_unique_variable_name("sub_result") + neg_prob_name = scope.get_unique_variable_name("neg_prob") + sum_neg_prob_name = scope.get_unique_variable_name("sum_neg_prob") + difference_matrix_name = scope.get_unique_variable_name("difference_matrix") + dot_prod_name = scope.get_unique_variable_name("dot_prod") + partial_sum_result_name = scope.get_unique_variable_name("partial_sum_result") # Define constant slightly greater than 1 to avoid log 0 # scenarios when calculating log (1 - x) and x=1 in line 70 container.add_initializer(constant_name, proto_dtype, [], [1.000000001]) if binarize is not None: - threshold_name = scope.get_unique_variable_name('threshold') - condition_name = scope.get_unique_variable_name('condition') - cast_values_name = scope.get_unique_variable_name('cast_values') - zero_tensor_name = scope.get_unique_variable_name('zero_tensor') - binarised_input_name = scope.get_unique_variable_name( - 'binarised_input') + threshold_name = scope.get_unique_variable_name("threshold") + condition_name = scope.get_unique_variable_name("condition") + cast_values_name = scope.get_unique_variable_name("cast_values") + zero_tensor_name = scope.get_unique_variable_name("zero_tensor") + binarised_input_name = scope.get_unique_variable_name("binarised_input") num_features = feature_count.shape[1] - container.add_initializer(threshold_name, proto_dtype, - [1], [binarize]) + container.add_initializer(threshold_name, proto_dtype, [1], [binarize]) container.add_initializer( zero_tensor_name, - proto_dtype, [1, num_features], - np.zeros((1, num_features)).ravel()) + proto_dtype, + [1, num_features], + np.zeros((1, num_features)).ravel(), + ) container.add_node( - 'Greater', [input_name, threshold_name], - condition_name, name=scope.get_unique_operator_name('Greater'), - op_version=9) - apply_cast(scope, condition_name, cast_values_name, container, - to=proto_dtype) - apply_add(scope, [zero_tensor_name, cast_values_name], - binarised_input_name, container, broadcast=1) + "Greater", + [input_name, threshold_name], + condition_name, + name=scope.get_unique_operator_name("Greater"), + op_version=9, + ) + apply_cast(scope, condition_name, cast_values_name, container, to=proto_dtype) + apply_add( + scope, + [zero_tensor_name, cast_values_name], + binarised_input_name, + container, + broadcast=1, + ) input_name = binarised_input_name apply_exp(scope, feature_log_prob_name, exp_result_name, container) - apply_sub(scope, [constant_name, exp_result_name], sub_result_name, - container, broadcast=1) + apply_sub( + scope, [constant_name, exp_result_name], sub_result_name, container, broadcast=1 + ) apply_log(scope, sub_result_name, neg_prob_name, container) if container.target_opset < 13: - container.add_node('ReduceSum', neg_prob_name, - sum_neg_prob_name, axes=[0], - name=scope.get_unique_operator_name('ReduceSum')) + container.add_node( + "ReduceSum", + neg_prob_name, + sum_neg_prob_name, + axes=[0], + name=scope.get_unique_operator_name("ReduceSum"), + ) else: - axis_name = scope.get_unique_variable_name('axis') - container.add_initializer( - axis_name, onnx_proto.TensorProto.INT64, [1], [0]) + axis_name = scope.get_unique_variable_name("axis") + container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [0]) container.add_node( - 'ReduceSum', [neg_prob_name, axis_name], sum_neg_prob_name, - name=scope.get_unique_operator_name('ReduceSum')) - apply_sub(scope, [feature_log_prob_name, neg_prob_name], - difference_matrix_name, container) + "ReduceSum", + [neg_prob_name, axis_name], + sum_neg_prob_name, + name=scope.get_unique_operator_name("ReduceSum"), + ) + apply_sub( + scope, [feature_log_prob_name, neg_prob_name], difference_matrix_name, container + ) container.add_node( - 'MatMul', [input_name, difference_matrix_name], - dot_prod_name, name=scope.get_unique_operator_name('MatMul')) - - apply_add(scope, [dot_prod_name, sum_neg_prob_name], - partial_sum_result_name, container) - apply_add(scope, [partial_sum_result_name, class_log_prior_name], - sum_result_name, container) + "MatMul", + [input_name, difference_matrix_name], + dot_prod_name, + name=scope.get_unique_operator_name("MatMul"), + ) + + apply_add( + scope, [dot_prod_name, sum_neg_prob_name], partial_sum_result_name, container + ) + apply_add( + scope, + [partial_sum_result_name, class_log_prior_name], + sum_result_name, + container, + ) return sum_result_name def _joint_log_likelihood_gaussian( - scope, container, input_name, model, proto_dtype, sum_result_name): + scope, container, input_name, model, proto_dtype, sum_result_name +): """ Calculate joint log likelihood for Gaussian Naive Bayes model. """ features = model.theta_.shape[1] jointi = np.log(model.class_prior_) - var_sigma = model.var_ if hasattr(model, 'var_') else model.sigma_ - sigma_sum_log = - 0.5 * np.sum(np.log(2. * np.pi * var_sigma), axis=1) - theta_name = scope.get_unique_variable_name('theta') - sigma_name = scope.get_unique_variable_name('sigma') - sigma_sum_log_name = scope.get_unique_variable_name('sigma_sum_log') - jointi_name = scope.get_unique_variable_name('jointi') - exponent_name = scope.get_unique_variable_name('exponent') - prod_operand_name = scope.get_unique_variable_name('prod_operand') - reshaped_input_name = scope.get_unique_variable_name('reshaped_input') - subtracted_input_name = scope.get_unique_variable_name('subtracted_input') - pow_result_name = scope.get_unique_variable_name('pow_result') - div_result_name = scope.get_unique_variable_name('div_result') - reduced_sum_name = scope.get_unique_variable_name('reduced_sum') - mul_result_name = scope.get_unique_variable_name('mul_result') - part_log_likelihood_name = scope.get_unique_variable_name( - 'part_log_likelihood') + var_sigma = model.var_ if hasattr(model, "var_") else model.sigma_ + sigma_sum_log = -0.5 * np.sum(np.log(2.0 * np.pi * var_sigma), axis=1) + theta_name = scope.get_unique_variable_name("theta") + sigma_name = scope.get_unique_variable_name("sigma") + sigma_sum_log_name = scope.get_unique_variable_name("sigma_sum_log") + jointi_name = scope.get_unique_variable_name("jointi") + exponent_name = scope.get_unique_variable_name("exponent") + prod_operand_name = scope.get_unique_variable_name("prod_operand") + reshaped_input_name = scope.get_unique_variable_name("reshaped_input") + subtracted_input_name = scope.get_unique_variable_name("subtracted_input") + pow_result_name = scope.get_unique_variable_name("pow_result") + div_result_name = scope.get_unique_variable_name("div_result") + reduced_sum_name = scope.get_unique_variable_name("reduced_sum") + mul_result_name = scope.get_unique_variable_name("mul_result") + part_log_likelihood_name = scope.get_unique_variable_name("part_log_likelihood") theta = model.theta_.reshape((1, -1, features)) sigma = var_sigma.reshape((1, -1, features)) - container.add_initializer(theta_name, proto_dtype, theta.shape, - theta.ravel()) - container.add_initializer(sigma_name, proto_dtype, sigma.shape, - sigma.ravel()) - container.add_initializer(jointi_name, proto_dtype, [1, jointi.shape[0]], - jointi) + container.add_initializer(theta_name, proto_dtype, theta.shape, theta.ravel()) + container.add_initializer(sigma_name, proto_dtype, sigma.shape, sigma.ravel()) + container.add_initializer(jointi_name, proto_dtype, [1, jointi.shape[0]], jointi) container.add_initializer( - sigma_sum_log_name, proto_dtype, - [1, sigma_sum_log.shape[0]], sigma_sum_log.ravel()) + sigma_sum_log_name, + proto_dtype, + [1, sigma_sum_log.shape[0]], + sigma_sum_log.ravel(), + ) container.add_initializer(exponent_name, proto_dtype, [], [2]) container.add_initializer(prod_operand_name, proto_dtype, [], [0.5]) - apply_reshape(scope, input_name, reshaped_input_name, container, - desired_shape=[-1, 1, features]) - apply_sub(scope, [reshaped_input_name, theta_name], subtracted_input_name, - container, broadcast=1) - apply_pow(scope, [subtracted_input_name, exponent_name], pow_result_name, - container, broadcast=1) - apply_div(scope, [pow_result_name, sigma_name], div_result_name, - container, broadcast=1) + apply_reshape( + scope, + input_name, + reshaped_input_name, + container, + desired_shape=[-1, 1, features], + ) + apply_sub( + scope, + [reshaped_input_name, theta_name], + subtracted_input_name, + container, + broadcast=1, + ) + apply_pow( + scope, + [subtracted_input_name, exponent_name], + pow_result_name, + container, + broadcast=1, + ) + apply_div( + scope, [pow_result_name, sigma_name], div_result_name, container, broadcast=1 + ) if container.target_opset < 13: - container.add_node('ReduceSum', div_result_name, - reduced_sum_name, axes=[2], keepdims=0, - name=scope.get_unique_operator_name('ReduceSum')) + container.add_node( + "ReduceSum", + div_result_name, + reduced_sum_name, + axes=[2], + keepdims=0, + name=scope.get_unique_operator_name("ReduceSum"), + ) else: - axis_name = scope.get_unique_variable_name('axis') - container.add_initializer( - axis_name, onnx_proto.TensorProto.INT64, [1], [2]) + axis_name = scope.get_unique_variable_name("axis") + container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [2]) container.add_node( - 'ReduceSum', [div_result_name, axis_name], reduced_sum_name, - keepdims=0, name=scope.get_unique_operator_name('ReduceSum')) - apply_mul(scope, [reduced_sum_name, prod_operand_name], mul_result_name, - container, broadcast=1) - apply_sub(scope, [sigma_sum_log_name, mul_result_name], - part_log_likelihood_name, - container, broadcast=1) - apply_add(scope, [jointi_name, part_log_likelihood_name], - sum_result_name, container, broadcast=1) + "ReduceSum", + [div_result_name, axis_name], + reduced_sum_name, + keepdims=0, + name=scope.get_unique_operator_name("ReduceSum"), + ) + apply_mul( + scope, + [reduced_sum_name, prod_operand_name], + mul_result_name, + container, + broadcast=1, + ) + apply_sub( + scope, + [sigma_sum_log_name, mul_result_name], + part_log_likelihood_name, + container, + broadcast=1, + ) + apply_add( + scope, + [jointi_name, part_log_likelihood_name], + sum_result_name, + container, + broadcast=1, + ) return sum_result_name def _joint_log_likelihood_categorical( - scope, container, input_name, model, sum_result_name): + scope, container, input_name, model, sum_result_name +): """ Calculate joint log likelihood for Categorical Naive Bayes model. """ jll_list = [] - class_log_prior_name = scope.get_unique_variable_name('class_log_prior') - summation_jll_name = scope.get_unique_variable_name('summation_jll') + class_log_prior_name = scope.get_unique_variable_name("class_log_prior") + summation_jll_name = scope.get_unique_variable_name("summation_jll") container.add_initializer( - class_log_prior_name, onnx_proto.TensorProto.FLOAT, - model.class_log_prior_.shape, model.class_log_prior_) + class_log_prior_name, + onnx_proto.TensorProto.FLOAT, + model.class_log_prior_.shape, + model.class_log_prior_, + ) - n_features = (model.n_features_in_ - if hasattr(model, 'n_features_in_') - else model.n_features_) + n_features = ( + model.n_features_in_ if hasattr(model, "n_features_in_") else model.n_features_ + ) for i in range(n_features): - feature_index_name = scope.get_unique_variable_name('feature_index') - indices_name = scope.get_unique_variable_name('indices') - cast_indices_name = scope.get_unique_variable_name('cast_indices') - feature_log_proba_name = scope.get_unique_variable_name( - 'feature_log_proba') - jll_name = scope.get_unique_variable_name('jll') - transposed_jll_name = scope.get_unique_variable_name('transposed_jll') + feature_index_name = scope.get_unique_variable_name("feature_index") + indices_name = scope.get_unique_variable_name("indices") + cast_indices_name = scope.get_unique_variable_name("cast_indices") + feature_log_proba_name = scope.get_unique_variable_name("feature_log_proba") + jll_name = scope.get_unique_variable_name("jll") + transposed_jll_name = scope.get_unique_variable_name("transposed_jll") container.add_initializer( - feature_index_name, onnx_proto.TensorProto.INT64, [], [i]) + feature_index_name, onnx_proto.TensorProto.INT64, [], [i] + ) container.add_initializer( - feature_log_proba_name, onnx_proto.TensorProto.FLOAT, + feature_log_proba_name, + onnx_proto.TensorProto.FLOAT, model.feature_log_prob_[i].shape, - model.feature_log_prob_[i].ravel()) + model.feature_log_prob_[i].ravel(), + ) container.add_node( - 'ArrayFeatureExtractor', [input_name, feature_index_name], - indices_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) - apply_cast(scope, indices_name, cast_indices_name, - container, to=onnx_proto.TensorProto.INT64) + "ArrayFeatureExtractor", + [input_name, feature_index_name], + indices_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) + apply_cast( + scope, + indices_name, + cast_indices_name, + container, + to=onnx_proto.TensorProto.INT64, + ) container.add_node( - 'ArrayFeatureExtractor', + "ArrayFeatureExtractor", [feature_log_proba_name, cast_indices_name], - jll_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) - apply_transpose(scope, jll_name, transposed_jll_name, - container, perm=(1, 0)) + jll_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) + apply_transpose(scope, jll_name, transposed_jll_name, container, perm=(1, 0)) jll_list.append(transposed_jll_name) - container.add_node('Sum', jll_list, - summation_jll_name, - name=scope.get_unique_operator_name('Sum')) - apply_add(scope, [summation_jll_name, class_log_prior_name], - sum_result_name, container, broadcast=1) + container.add_node( + "Sum", jll_list, summation_jll_name, name=scope.get_unique_operator_name("Sum") + ) + apply_add( + scope, + [summation_jll_name, class_log_prior_name], + sum_result_name, + container, + broadcast=1, + ) return sum_result_name -def convert_sklearn_naive_bayes(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_naive_bayes( + scope: Scope, operator: Operator, container: ModelComponentContainer +): # Computational graph: # # Note: In the following graph, variable names are in lower case @@ -380,149 +477,233 @@ def convert_sklearn_naive_bayes(scope: Scope, operator: Operator, classes = get_label_classes(scope, nb_op) output_shape = (-1,) - sum_result_name = scope.get_unique_variable_name('sum_result') - argmax_output_name = scope.get_unique_variable_name('argmax_output') - cast2_result_name = scope.get_unique_variable_name('cast2_result') - reshaped_result_name = scope.get_unique_variable_name('reshaped_result') - classes_name = scope.get_unique_variable_name('classes') + sum_result_name = scope.get_unique_variable_name("sum_result") + argmax_output_name = scope.get_unique_variable_name("argmax_output") + cast2_result_name = scope.get_unique_variable_name("cast2_result") + reshaped_result_name = scope.get_unique_variable_name("reshaped_result") + classes_name = scope.get_unique_variable_name("classes") reduce_log_sum_exp_result_name = scope.get_unique_variable_name( - 'reduce_log_sum_exp_result') - log_prob_name = scope.get_unique_variable_name('log_prob') + "reduce_log_sum_exp_result" + ) + log_prob_name = scope.get_unique_variable_name("log_prob") array_feature_extractor_result_name = scope.get_unique_variable_name( - 'array_feature_extractor_result') + "array_feature_extractor_result" + ) class_type = onnx_proto.TensorProto.STRING - if (np.issubdtype(classes.dtype, np.floating) or - classes.dtype == np.bool_): + if np.issubdtype(classes.dtype, np.floating) or classes.dtype == np.bool_: class_type = onnx_proto.TensorProto.INT32 classes = classes.astype(np.int32) elif np.issubdtype(classes.dtype, np.signedinteger): class_type = onnx_proto.TensorProto.INT32 else: - classes = np.array([s.encode('utf-8') for s in classes]) + classes = np.array([s.encode("utf-8") for s in classes]) container.add_initializer(classes_name, class_type, classes.shape, classes) - if operator.type not in ('SklearnCategoricalNB', 'SklearnGaussianNB'): - class_log_prior_name = scope.get_unique_variable_name( - 'class_log_prior') - feature_log_prob_name = scope.get_unique_variable_name( - 'feature_log_prob') + if operator.type not in ("SklearnCategoricalNB", "SklearnGaussianNB"): + class_log_prior_name = scope.get_unique_variable_name("class_log_prior") + feature_log_prob_name = scope.get_unique_variable_name("feature_log_prob") - class_log_prior = nb_op.class_log_prior_.astype( - float_dtype).reshape((1, -1)) + class_log_prior = nb_op.class_log_prior_.astype(float_dtype).reshape((1, -1)) feature_log_prob = nb_op.feature_log_prob_.T.astype(float_dtype) container.add_initializer( - feature_log_prob_name, proto_dtype, - feature_log_prob.shape, feature_log_prob.flatten()) + feature_log_prob_name, + proto_dtype, + feature_log_prob.shape, + feature_log_prob.flatten(), + ) container.add_initializer( - class_log_prior_name, proto_dtype, - class_log_prior.shape, class_log_prior.flatten()) + class_log_prior_name, + proto_dtype, + class_log_prior.shape, + class_log_prior.flatten(), + ) input_name = operator.inputs[0].full_name if type(operator.inputs[0].type) in (BooleanTensorType, Int64TensorType): - cast_input_name = scope.get_unique_variable_name('cast_input') + cast_input_name = scope.get_unique_variable_name("cast_input") - apply_cast(scope, operator.input_full_names, cast_input_name, - container, to=proto_dtype) + apply_cast( + scope, operator.input_full_names, cast_input_name, container, to=proto_dtype + ) input_name = cast_input_name - if operator.type == 'SklearnBernoulliNB': + if operator.type == "SklearnBernoulliNB": sum_result_name = _joint_log_likelihood_bernoulli( - scope, container, input_name, feature_log_prob_name, - class_log_prior_name, nb_op.binarize, nb_op.feature_count_, - proto_dtype, sum_result_name) - elif operator.type == 'SklearnGaussianNB': + scope, + container, + input_name, + feature_log_prob_name, + class_log_prior_name, + nb_op.binarize, + nb_op.feature_count_, + proto_dtype, + sum_result_name, + ) + elif operator.type == "SklearnGaussianNB": sum_result_name = _joint_log_likelihood_gaussian( - scope, container, input_name, nb_op, - proto_dtype, sum_result_name) - elif operator.type == 'SklearnCategoricalNB': + scope, container, input_name, nb_op, proto_dtype, sum_result_name + ) + elif operator.type == "SklearnCategoricalNB": sum_result_name = _joint_log_likelihood_categorical( - scope, container, input_name, nb_op, sum_result_name) + scope, container, input_name, nb_op, sum_result_name + ) else: # MultinomialNB or ComplementNB matmul_result_name = ( - scope.get_unique_variable_name('matmul_result') - if operator.type == 'SklearnMultinomialNB' or len(classes) == 1 - else sum_result_name) + scope.get_unique_variable_name("matmul_result") + if operator.type == "SklearnMultinomialNB" or len(classes) == 1 + else sum_result_name + ) container.add_node( - 'MatMul', [input_name, feature_log_prob_name], - matmul_result_name, name=scope.get_unique_operator_name('MatMul')) - if operator.type == 'SklearnMultinomialNB' or len(classes) == 1: - apply_add(scope, [matmul_result_name, class_log_prior_name], - sum_result_name, container, broadcast=1) + "MatMul", + [input_name, feature_log_prob_name], + matmul_result_name, + name=scope.get_unique_operator_name("MatMul"), + ) + if operator.type == "SklearnMultinomialNB" or len(classes) == 1: + apply_add( + scope, + [matmul_result_name, class_log_prior_name], + sum_result_name, + container, + broadcast=1, + ) - container.add_node('ArgMax', sum_result_name, - argmax_output_name, - name=scope.get_unique_operator_name('ArgMax'), axis=1) + container.add_node( + "ArgMax", + sum_result_name, + argmax_output_name, + name=scope.get_unique_operator_name("ArgMax"), + axis=1, + ) # Calculation of class probability log_prob_shape = [-1, 1] - reshaped_log_prob_name = scope.get_unique_variable_name( - 'reshaped_log_prob') + reshaped_log_prob_name = scope.get_unique_variable_name("reshaped_log_prob") if container.target_opset >= 18: - axis_name = scope.get_unique_variable_name('axis') - container.add_initializer( - axis_name, onnx_proto.TensorProto.INT64, [1], [1]) + axis_name = scope.get_unique_variable_name("axis") + container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [1]) container.add_node( - 'ReduceLogSumExp', [sum_result_name, axis_name], + "ReduceLogSumExp", + [sum_result_name, axis_name], reduce_log_sum_exp_result_name, - name=scope.get_unique_operator_name('ReduceLogSumExp'), - keepdims=0) + name=scope.get_unique_operator_name("ReduceLogSumExp"), + keepdims=0, + ) else: container.add_node( - 'ReduceLogSumExp', sum_result_name, + "ReduceLogSumExp", + sum_result_name, reduce_log_sum_exp_result_name, - name=scope.get_unique_operator_name('ReduceLogSumExp'), - axes=[1], keepdims=0) - apply_reshape(scope, reduce_log_sum_exp_result_name, - reshaped_log_prob_name, container, - desired_shape=log_prob_shape) - apply_sub(scope, [sum_result_name, reshaped_log_prob_name], log_prob_name, - container, broadcast=1) + name=scope.get_unique_operator_name("ReduceLogSumExp"), + axes=[1], + keepdims=0, + ) + apply_reshape( + scope, + reduce_log_sum_exp_result_name, + reshaped_log_prob_name, + container, + desired_shape=log_prob_shape, + ) + apply_sub( + scope, + [sum_result_name, reshaped_log_prob_name], + log_prob_name, + container, + broadcast=1, + ) apply_exp(scope, log_prob_name, operator.outputs[1].full_name, container) container.add_node( - 'ArrayFeatureExtractor', [classes_name, argmax_output_name], - array_feature_extractor_result_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) + "ArrayFeatureExtractor", + [classes_name, argmax_output_name], + array_feature_extractor_result_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) # Reshape op does not seem to handle INT64 tensor even though it is # listed as one of the supported types in the doc, so Cast was # required here. if class_type == onnx_proto.TensorProto.INT32: - apply_cast(scope, array_feature_extractor_result_name, - cast2_result_name, container, - to=proto_dtype) - apply_reshape(scope, cast2_result_name, reshaped_result_name, - container, desired_shape=output_shape) - apply_cast(scope, reshaped_result_name, operator.outputs[0].full_name, - container, to=onnx_proto.TensorProto.INT64) + apply_cast( + scope, + array_feature_extractor_result_name, + cast2_result_name, + container, + to=proto_dtype, + ) + apply_reshape( + scope, + cast2_result_name, + reshaped_result_name, + container, + desired_shape=output_shape, + ) + apply_cast( + scope, + reshaped_result_name, + operator.outputs[0].full_name, + container, + to=onnx_proto.TensorProto.INT64, + ) else: # string labels - apply_reshape(scope, array_feature_extractor_result_name, - operator.outputs[0].full_name, container, - desired_shape=output_shape) - - -register_converter('SklearnBernoulliNB', convert_sklearn_naive_bayes, - options={'zipmap': [True, False, 'columns'], - 'output_class_labels': [False, True], - 'nocl': [True, False]}) -register_converter('SklearnCategoricalNB', convert_sklearn_naive_bayes, - options={'zipmap': [True, False, 'columns'], - 'output_class_labels': [False, True], - 'nocl': [True, False]}) -register_converter('SklearnComplementNB', convert_sklearn_naive_bayes, - options={'zipmap': [True, False, 'columns'], - 'output_class_labels': [False, True], - 'nocl': [True, False]}) -register_converter('SklearnGaussianNB', convert_sklearn_naive_bayes, - options={'zipmap': [True, False, 'columns'], - 'output_class_labels': [False, True], - 'nocl': [True, False]}) -register_converter('SklearnMultinomialNB', convert_sklearn_naive_bayes, - options={'zipmap': [True, False, 'columns'], - 'output_class_labels': [False, True], - 'nocl': [True, False]}) + apply_reshape( + scope, + array_feature_extractor_result_name, + operator.outputs[0].full_name, + container, + desired_shape=output_shape, + ) + + +register_converter( + "SklearnBernoulliNB", + convert_sklearn_naive_bayes, + options={ + "zipmap": [True, False, "columns"], + "output_class_labels": [False, True], + "nocl": [True, False], + }, +) +register_converter( + "SklearnCategoricalNB", + convert_sklearn_naive_bayes, + options={ + "zipmap": [True, False, "columns"], + "output_class_labels": [False, True], + "nocl": [True, False], + }, +) +register_converter( + "SklearnComplementNB", + convert_sklearn_naive_bayes, + options={ + "zipmap": [True, False, "columns"], + "output_class_labels": [False, True], + "nocl": [True, False], + }, +) +register_converter( + "SklearnGaussianNB", + convert_sklearn_naive_bayes, + options={ + "zipmap": [True, False, "columns"], + "output_class_labels": [False, True], + "nocl": [True, False], + }, +) +register_converter( + "SklearnMultinomialNB", + convert_sklearn_naive_bayes, + options={ + "zipmap": [True, False, "columns"], + "output_class_labels": [False, True], + "nocl": [True, False], + }, +) diff --git a/skl2onnx/operator_converters/nearest_neighbours.py b/skl2onnx/operator_converters/nearest_neighbours.py index f74a56e41..2bc79ad78 100644 --- a/skl2onnx/operator_converters/nearest_neighbours.py +++ b/skl2onnx/operator_converters/nearest_neighbours.py @@ -29,6 +29,7 @@ OnnxTopK_1, OnnxTranspose, ) + try: from ..algebra.onnx_ops import ( OnnxConstantOfShape, @@ -54,16 +55,27 @@ from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer from ..common.data_types import ( - Int64TensorType, DoubleTensorType, - guess_numpy_type, guess_proto_type) + Int64TensorType, + DoubleTensorType, + guess_numpy_type, + guess_proto_type, +) from ..common.utils_classifier import get_label_classes from ..proto import onnx_proto from ._gp_kernels import py_make_float_array -def onnx_nearest_neighbors_indices_k(X, Y, k, metric='euclidean', dtype=None, - op_version=None, keep_distances=False, - optim=None, **kwargs): +def onnx_nearest_neighbors_indices_k( + X, + Y, + k, + metric="euclidean", + dtype=None, + op_version=None, + keep_distances=False, + optim=None, + **kwargs +): """ Retrieves the nearest neigbours *ONNX*. :param X: features or *OnnxOperatorMixin* @@ -78,45 +90,66 @@ def onnx_nearest_neighbors_indices_k(X, Y, k, metric='euclidean', dtype=None, :param kwargs: additional parameters for function @see fn onnx_cdist :return: top indices, top distances """ - kwargs_dist = {k: v for k, v in kwargs.items() if k == 'p'} - kwargs_topk = {k: v for k, v in kwargs.items() if k != 'p'} - if optim == 'cdist': + kwargs_dist = {k: v for k, v in kwargs.items() if k == "p"} + kwargs_topk = {k: v for k, v in kwargs.items() if k != "p"} + if optim == "cdist": from skl2onnx.algebra.custom_ops import OnnxCDist - dist = OnnxCDist(X, Y, metric=metric, op_version=op_version, - **kwargs_dist) + + dist = OnnxCDist(X, Y, metric=metric, op_version=op_version, **kwargs_dist) elif optim is None: - dim_in = Y.shape[1] if hasattr(Y, 'shape') else None - dim_out = Y.shape[0] if hasattr(Y, 'shape') else None - dist = onnx_cdist(X, Y, metric=metric, dtype=dtype, - op_version=op_version, - dim_in=dim_in, dim_out=dim_out, - **kwargs_dist) + dim_in = Y.shape[1] if hasattr(Y, "shape") else None + dim_out = Y.shape[0] if hasattr(Y, "shape") else None + dist = onnx_cdist( + X, + Y, + metric=metric, + dtype=dtype, + op_version=op_version, + dim_in=dim_in, + dim_out=dim_out, + **kwargs_dist + ) else: raise ValueError("Unknown optimisation '{}'.".format(optim)) if op_version < 10: - neg_dist = OnnxMul(dist, np.array([-1], dtype=dtype), - op_version=op_version) + neg_dist = OnnxMul(dist, np.array([-1], dtype=dtype), op_version=op_version) node = OnnxTopK_1(neg_dist, k=k, op_version=1, **kwargs_topk) elif op_version < 11: - neg_dist = OnnxMul(dist, np.array([-1], dtype=dtype), - op_version=op_version) - node = OnnxTopK_10(neg_dist, np.array([k], dtype=np.int64), - op_version=10, **kwargs_topk) + neg_dist = OnnxMul(dist, np.array([-1], dtype=dtype), op_version=op_version) + node = OnnxTopK_10( + neg_dist, np.array([k], dtype=np.int64), op_version=10, **kwargs_topk + ) else: - node = OnnxTopK_11(dist, np.array([k], dtype=np.int64), - largest=0, sorted=1, - op_version=11, **kwargs_topk) + node = OnnxTopK_11( + dist, + np.array([k], dtype=np.int64), + largest=0, + sorted=1, + op_version=11, + **kwargs_topk + ) if keep_distances: - return (node[1], OnnxMul( - node[0], np.array([-1], dtype=dtype), op_version=op_version)) + return ( + node[1], + OnnxMul(node[0], np.array([-1], dtype=dtype), op_version=op_version), + ) if keep_distances: return (node[1], node[0]) return node[1] def onnx_nearest_neighbors_indices_radius( - X, Y, radius, metric='euclidean', dtype=None, op_version=None, - keep_distances=False, optim=None, proto_dtype=None, **kwargs): + X, + Y, + radius, + metric="euclidean", + dtype=None, + op_version=None, + keep_distances=False, + optim=None, + proto_dtype=None, + **kwargs +): """ Retrieves the nearest neigbours *ONNX*. :param X: features or *OnnxOperatorMixin* @@ -134,44 +167,54 @@ def onnx_nearest_neighbors_indices_radius( binary weights """ opv = op_version - if optim == 'cdist': + if optim == "cdist": from skl2onnx.algebra.custom_ops import OnnxCDist - dist = OnnxCDist(X, Y, metric=metric, op_version=op_version, - **kwargs) + + dist = OnnxCDist(X, Y, metric=metric, op_version=op_version, **kwargs) elif optim is None: - dim_in = Y.shape[1] if hasattr(Y, 'shape') else None - dim_out = Y.shape[0] if hasattr(Y, 'shape') else None - dist = onnx_cdist(X, Y, metric=metric, dtype=dtype, - op_version=op_version, - dim_in=dim_in, dim_out=dim_out, - **kwargs) + dim_in = Y.shape[1] if hasattr(Y, "shape") else None + dim_out = Y.shape[0] if hasattr(Y, "shape") else None + dist = onnx_cdist( + X, + Y, + metric=metric, + dtype=dtype, + op_version=op_version, + dim_in=dim_in, + dim_out=dim_out, + **kwargs + ) else: raise ValueError("Unknown optimisation '{}'.".format(optim)) less = OnnxLess(dist, np.array([radius], dtype=dtype), op_version=opv) - less.set_onnx_name_prefix('cond') + less.set_onnx_name_prefix("cond") shape = OnnxShape(dist, op_version=opv) zero = OnnxCast( - OnnxConstantOfShape(shape, op_version=opv), - op_version=opv, to=proto_dtype) + OnnxConstantOfShape(shape, op_version=opv), op_version=opv, to=proto_dtype + ) tensor_value = py_make_float_array(-1, dtype=np.float32, as_tensor=True) minus = OnnxCast( - OnnxConstantOfShape( - shape, op_version=opv, value=tensor_value), - op_version=opv, to=onnx_proto.TensorProto.INT64) + OnnxConstantOfShape(shape, op_version=opv, value=tensor_value), + op_version=opv, + to=onnx_proto.TensorProto.INT64, + ) minus_range = OnnxAdd( OnnxNeg( OnnxCumSum(minus, np.array([1], dtype=np.int64), op_version=opv), - op_version=opv), - minus, op_version=opv) - minus_range.set_onnx_name_prefix('arange') + op_version=opv, + ), + minus, + op_version=opv, + ) + minus_range.set_onnx_name_prefix("arange") dist_only = OnnxWhere(less, dist, zero, op_version=opv) - dist_only.set_onnx_name_prefix('nndist') + dist_only.set_onnx_name_prefix("nndist") indices = OnnxWhere(less, minus_range, minus, op_version=opv) - indices.set_onnx_name_prefix('nnind') + indices.set_onnx_name_prefix("nnind") binary = OnnxCast(less, to=proto_dtype, op_version=opv) - binary.set_onnx_name_prefix('nnbin') + binary.set_onnx_name_prefix("nnbin") return indices, dist_only, binary @@ -211,65 +254,90 @@ def _convert_nearest_neighbors(operator, container, k=None, radius=None): options = container.get_options(op, dict(optim=None)) - single_reg = (not hasattr(op, '_y') or len(op._y.shape) == 1 or - len(op._y.shape) == 2 and op._y.shape[1] == 1) + single_reg = ( + not hasattr(op, "_y") + or len(op._y.shape) == 1 + or len(op._y.shape) == 2 + and op._y.shape[1] == 1 + ) ndim = 1 if single_reg else op._y.shape[1] - metric = (op.effective_metric_ if hasattr(op, 'effective_metric_') else - op.metric) + metric = op.effective_metric_ if hasattr(op, "effective_metric_") else op.metric neighb = op._fit_X.astype(dtype) - if (hasattr(op, 'n_neighbors') and op.n_neighbors is not None and - hasattr(op, 'radius') and op.radius is not None): + if ( + hasattr(op, "n_neighbors") + and op.n_neighbors is not None + and hasattr(op, "radius") + and op.radius is not None + ): raise RuntimeError( "The model defines radius and n_neighbors at the " "same time ({} and {}). " - "This case is not supported.".format( - op.radius, op.n_neighbors)) + "This case is not supported.".format(op.radius, op.n_neighbors) + ) - if hasattr(op, 'n_neighbors') and op.n_neighbors is not None: + if hasattr(op, "n_neighbors") and op.n_neighbors is not None: k = op.n_neighbors if k is None else k radius = None - elif hasattr(op, 'radius') and op.radius is not None: + elif hasattr(op, "radius") and op.radius is not None: k = None radius = op.radius if radius is None else radius else: - raise RuntimeError( - "Cannot convert class '{}'.".format(op.__class__.__name__)) + raise RuntimeError("Cannot convert class '{}'.".format(op.__class__.__name__)) - training_labels = op._y if hasattr(op, '_y') else None + training_labels = op._y if hasattr(op, "_y") else None distance_kwargs = {} - if metric == 'minkowski': + if metric == "minkowski": if op.p != 2: - distance_kwargs['p'] = op.p + distance_kwargs["p"] = op.p else: metric = "euclidean" - weights = op.weights if hasattr(op, 'weights') else 'distance' + weights = op.weights if hasattr(op, "weights") else "distance" binary = None - if weights == 'uniform' and radius is None: + if weights == "uniform" and radius is None: top_indices = onnx_nearest_neighbors_indices_k( - X, neighb, k, metric=metric, dtype=dtype, - op_version=opv, optim=options.get('optim', None), - **distance_kwargs) + X, + neighb, + k, + metric=metric, + dtype=dtype, + op_version=opv, + optim=options.get("optim", None), + **distance_kwargs + ) top_distances = None elif radius is not None: three = onnx_nearest_neighbors_indices_radius( - X, neighb, radius, metric=metric, dtype=dtype, - op_version=opv, keep_distances=True, + X, + neighb, + radius, + metric=metric, + dtype=dtype, + op_version=opv, + keep_distances=True, proto_dtype=proto_type, - optim=options.get('optim', None), - **distance_kwargs) + optim=options.get("optim", None), + **distance_kwargs + ) top_indices, top_distances, binary = three - elif weights == 'distance': + elif weights == "distance": top_indices, top_distances = onnx_nearest_neighbors_indices_k( - X, neighb, k, metric=metric, dtype=dtype, - op_version=opv, keep_distances=True, - optim=options.get('optim', None), - **distance_kwargs) + X, + neighb, + k, + metric=metric, + dtype=dtype, + op_version=opv, + keep_distances=True, + optim=options.get("optim", None), + **distance_kwargs + ) else: raise RuntimeError( - "Unable to convert KNeighborsRegressor when weights is callable.") + "Unable to convert KNeighborsRegressor when weights is callable." + ) if training_labels is not None: if ndim > 1: @@ -287,35 +355,38 @@ def _convert_nearest_neighbors(operator, container, k=None, radius=None): else: raise RuntimeError( "Conversion of a KNeighborsRegressor for multi regression " - "requires opset >= 9.") + "requires opset >= 9." + ) if training_labels.dtype == np.int32: training_labels = training_labels.astype(np.int64) flattened = OnnxFlatten(top_indices, op_version=opv) extracted = OnnxArrayFeatureExtractor( - training_labels, flattened, op_version=opv) + training_labels, flattened, op_version=opv + ) reshaped = OnnxReshapeApi13(extracted, shape, op_version=opv) if ndim > 1: reshaped = OnnxTranspose(reshaped, op_version=opv, perm=[1, 0, 2]) - reshaped.set_onnx_name_prefix('knny') + reshaped.set_onnx_name_prefix("knny") else: reshaped = None axis = 1 if binary is not None: - if op.weights == 'uniform': + if op.weights == "uniform": wei = binary else: - modified = OnnxMax(top_distances, np.array([1e-6], dtype=dtype), - op_version=opv) - wei = OnnxMul(binary, OnnxReciprocal(modified, op_version=opv), - op_version=opv) + modified = OnnxMax( + top_distances, np.array([1e-6], dtype=dtype), op_version=opv + ) + wei = OnnxMul( + binary, OnnxReciprocal(modified, op_version=opv), op_version=opv + ) norm = OnnxReduceSumApi11(wei, op_version=opv, axes=[1], keepdims=0) elif top_distances is not None: - modified = OnnxMax(top_distances, np.array([1e-6], dtype=dtype), - op_version=opv) + modified = OnnxMax(top_distances, np.array([1e-6], dtype=dtype), op_version=opv) wei = OnnxReciprocal(modified, op_version=opv) norm = OnnxReduceSumApi11(wei, op_version=opv, axes=[1], keepdims=0) else: @@ -323,14 +394,15 @@ def _convert_nearest_neighbors(operator, container, k=None, radius=None): wei = None if wei is not None: - wei.set_onnx_name_prefix('wei') + wei.set_onnx_name_prefix("wei") if norm is not None: - norm.set_onnx_name_prefix('norm') + norm.set_onnx_name_prefix("norm") return top_indices, top_distances, reshaped, wei, norm, axis -def convert_nearest_neighbors_regressor(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_nearest_neighbors_regressor( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converts *KNeighborsRegressor* into *ONNX*. The converted model may return different predictions depending @@ -349,18 +421,17 @@ def convert_nearest_neighbors_regressor(scope: Scope, operator: Operator, opv = container.target_opset out = operator.outputs - reshaped_cast = OnnxCast( - reshaped, to=proto_type, op_version=opv) + reshaped_cast = OnnxCast(reshaped, to=proto_type, op_version=opv) if top_distances is not None: # Multi-target - if (hasattr(operator.raw_operator, '_y') and - len(operator.raw_operator._y.shape) > 1 and - operator.raw_operator._y.shape[1] > 1): - rs = OnnxTranspose(reshaped_cast, perm=[1, 0, 2], - op_version=opv) + if ( + hasattr(operator.raw_operator, "_y") + and len(operator.raw_operator._y.shape) > 1 + and operator.raw_operator._y.shape[1] > 1 + ): + rs = OnnxTranspose(reshaped_cast, perm=[1, 0, 2], op_version=opv) weighted_rs = OnnxMul(rs, wei, op_version=opv) - weighted = OnnxTranspose(weighted_rs, perm=[1, 0, 2], - op_version=opv) + weighted = OnnxTranspose(weighted_rs, perm=[1, 0, 2], op_version=opv) if OnnxIsNaN is not None: # This steps sometimes produces nan (bug in onnxuntime) @@ -371,38 +442,46 @@ def convert_nearest_neighbors_regressor(scope: Scope, operator: Operator, weighted = OnnxWhere(isnan, csts0, weighted, op_version=opv) # Back to original plan. - res = OnnxReduceSumApi11(weighted, axes=[axis], op_version=opv, - keepdims=0) - norm2 = OnnxReshapeApi13(norm, np.array([-1, 1], dtype=np.int64), - op_version=opv) + res = OnnxReduceSumApi11(weighted, axes=[axis], op_version=opv, keepdims=0) + norm2 = OnnxReshapeApi13( + norm, np.array([-1, 1], dtype=np.int64), op_version=opv + ) res = OnnxDiv(res, norm2, op_version=opv, output_names=out) else: weighted = OnnxMul(reshaped_cast, wei, op_version=opv) - res = OnnxReduceSumApi11(weighted, axes=[axis], op_version=opv, - keepdims=0) - res.set_onnx_name_prefix('final') + res = OnnxReduceSumApi11(weighted, axes=[axis], op_version=opv, keepdims=0) + res.set_onnx_name_prefix("final") if opv >= 12: shape = OnnxShape(res, op_version=opv) norm = OnnxReshapeApi13(norm, shape, op_version=opv) - norm.set_onnx_name_prefix('normr') + norm.set_onnx_name_prefix("normr") res = OnnxDiv(res, norm, op_version=opv) - res = OnnxReshapeApi13(res, np.array([-1, 1], dtype=np.int64), - output_names=out, op_version=opv) + res = OnnxReshapeApi13( + res, np.array([-1, 1], dtype=np.int64), output_names=out, op_version=opv + ) else: - if (hasattr(operator.raw_operator, '_y') and - len(np.squeeze(operator.raw_operator._y).shape) == 1): + if ( + hasattr(operator.raw_operator, "_y") + and len(np.squeeze(operator.raw_operator._y).shape) == 1 + ): keepdims = 1 elif operator.raw_operator.n_neighbors == 1: keepdims = 0 else: keepdims = 0 - res = OnnxReduceMeanApi18(reshaped_cast, axes=[axis], op_version=opv, - keepdims=keepdims, output_names=out) + res = OnnxReduceMeanApi18( + reshaped_cast, + axes=[axis], + op_version=opv, + keepdims=keepdims, + output_names=out, + ) res.add_to(scope, container) -def get_proba_and_label(container, nb_classes, reshaped, - wei, axis, opv, proto_type, keep_axis=True): +def get_proba_and_label( + container, nb_classes, reshaped, wei, axis, opv, proto_type, keep_axis=True +): """ This function calculates the label by choosing majority label amongst the nearest neighbours. @@ -411,25 +490,23 @@ def get_proba_and_label(container, nb_classes, reshaped, for cl in range(nb_classes): cst = np.array([cl], dtype=np.int64) mat_cast = OnnxCast( - OnnxEqual(reshaped, cst, op_version=opv), - op_version=opv, to=proto_type) + OnnxEqual(reshaped, cst, op_version=opv), op_version=opv, to=proto_type + ) if wei is not None: if not keep_axis: - mat_cast = OnnxSqueezeApi11(mat_cast, axes=[-1], - op_version=opv) + mat_cast = OnnxSqueezeApi11(mat_cast, axes=[-1], op_version=opv) mat_cast = OnnxMul(mat_cast, wei, op_version=opv) wh = OnnxReduceSumApi11(mat_cast, axes=[1], op_version=opv) conc.append(wh) all_together = OnnxConcat(*conc, axis=1, op_version=opv) - sum_prob = OnnxReduceSumApi11( - all_together, axes=[1], op_version=opv, keepdims=1) - res = OnnxArgMax(all_together, axis=axis, op_version=opv, - keepdims=0) + sum_prob = OnnxReduceSumApi11(all_together, axes=[1], op_version=opv, keepdims=1) + res = OnnxArgMax(all_together, axis=axis, op_version=opv, keepdims=0) return all_together, sum_prob, res -def convert_nearest_neighbors_classifier(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_nearest_neighbors_classifier( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converts *KNeighborsClassifier* into *ONNX*. The converted model may return different predictions depending @@ -453,70 +530,87 @@ def convert_nearest_neighbors_classifier(scope: Scope, operator: Operator, if axis == 0: raise RuntimeError( "Binary classification not implemented in scikit-learn. " - "Check this code is not reused for other libraries.") + "Check this code is not reused for other libraries." + ) classes = get_label_classes(scope, op) - if hasattr(classes, 'dtype') and ( - np.issubdtype(classes.dtype, np.floating) or - classes.dtype == np.bool_): + if hasattr(classes, "dtype") and ( + np.issubdtype(classes.dtype, np.floating) or classes.dtype == np.bool_ + ): classes = classes.astype(np.int32) is_integer = True elif isinstance(classes[0], (int, np.int32, np.int64)): is_integer = True else: is_integer = False - if (isinstance(op.classes_, list) - and isinstance(op.classes_[0], np.ndarray)): + if isinstance(op.classes_, list) and isinstance(op.classes_[0], np.ndarray): # Multi-label out_labels, out_probas = [], [] for index, cur_class in enumerate(op.classes_): - transpose_result = OnnxTranspose( - reshaped, op_version=opv, perm=[0, 2, 1]) + transpose_result = OnnxTranspose(reshaped, op_version=opv, perm=[0, 2, 1]) extracted_name = OnnxArrayFeatureExtractor( - transpose_result, np.array([index], dtype=np.int64), - op_version=opv) - extracted_name.set_onnx_name_prefix('tr%d' % index) + transpose_result, np.array([index], dtype=np.int64), op_version=opv + ) + extracted_name.set_onnx_name_prefix("tr%d" % index) all_together, sum_prob, res = get_proba_and_label( - container, len(cur_class), extracted_name, - wei, 1, opv, proto_type, keep_axis=False) + container, + len(cur_class), + extracted_name, + wei, + 1, + opv, + proto_type, + keep_axis=False, + ) probas = OnnxDiv(all_together, sum_prob, op_version=opv) - res_name = OnnxArrayFeatureExtractor( - cur_class, res, op_version=opv) - res_name.set_onnx_name_prefix('div%d' % index) + res_name = OnnxArrayFeatureExtractor(cur_class, res, op_version=opv) + res_name.set_onnx_name_prefix("div%d" % index) reshaped_labels = OnnxReshapeApi13( - res_name, np.array([-1, 1], dtype=np.int64), op_version=opv) + res_name, np.array([-1, 1], dtype=np.int64), op_version=opv + ) reshaped_probas = OnnxReshapeApi13( - probas, np.array([1, -1, len(cur_class)], dtype=np.int64), - op_version=opv) + probas, + np.array([1, -1, len(cur_class)], dtype=np.int64), + op_version=opv, + ) out_labels.append(reshaped_labels) out_probas.append(reshaped_probas) - concatenated_labels = OnnxConcat( - *out_labels, axis=1, op_version=opv) + concatenated_labels = OnnxConcat(*out_labels, axis=1, op_version=opv) final_proba = OnnxConcat( - *out_probas, axis=0, output_names=out[1:], op_version=opv) + *out_probas, axis=0, output_names=out[1:], op_version=opv + ) final_label = OnnxCast( - concatenated_labels, to=onnx_proto.TensorProto.INT64, - output_names=out[:1], op_version=opv) + concatenated_labels, + to=onnx_proto.TensorProto.INT64, + output_names=out[:1], + op_version=opv, + ) final_label.add_to(scope, container) final_proba.add_to(scope, container) else: all_together, sum_prob, res = get_proba_and_label( - container, nb_classes, reshaped, wei, axis, opv, proto_type) - probas = OnnxDiv(all_together, sum_prob, op_version=opv, - output_names=out[1:]) - probas.set_onnx_name_prefix('bprob') + container, nb_classes, reshaped, wei, axis, opv, proto_type + ) + probas = OnnxDiv(all_together, sum_prob, op_version=opv, output_names=out[1:]) + probas.set_onnx_name_prefix("bprob") res_name = OnnxArrayFeatureExtractor(classes, res, op_version=opv) if is_integer: res_name = OnnxCast( - res_name, to=onnx_proto.TensorProto.INT64, op_version=opv) - out_labels = OnnxReshapeApi13(res_name, np.array([-1], dtype=np.int64), - output_names=out[:1], op_version=opv) - out_labels.set_onnx_name_prefix('blab') + res_name, to=onnx_proto.TensorProto.INT64, op_version=opv + ) + out_labels = OnnxReshapeApi13( + res_name, + np.array([-1], dtype=np.int64), + output_names=out[:1], + op_version=opv, + ) + out_labels.set_onnx_name_prefix("blab") out_labels.add_to(scope, container) probas.add_to(scope, container) -def convert_nearest_neighbors_transform(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_nearest_neighbors_transform( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converts *NearestNeighbors* into *ONNX*. """ @@ -528,18 +622,23 @@ def convert_nearest_neighbors_transform(scope: Scope, operator: Operator, out = operator.outputs - ind = OnnxIdentity(top_indices, output_names=out[:1], - op_version=container.target_opset) + ind = OnnxIdentity( + top_indices, output_names=out[:1], op_version=container.target_opset + ) dist = OnnxMul( - top_distances, np.array([-1], dtype=dtype), - output_names=out[1:], op_version=container.target_opset) + top_distances, + np.array([-1], dtype=dtype), + output_names=out[1:], + op_version=container.target_opset, + ) dist.add_to(scope, container) ind.add_to(scope, container) -def convert_k_neighbours_transformer(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_k_neighbours_transformer( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converts *KNeighborsTransformer* into *ONNX*. """ @@ -551,115 +650,148 @@ def convert_k_neighbours_transformer(scope: Scope, operator: Operator, dtype = np.float32 transformer_op = operator.raw_operator op_version = container.target_opset - k = (transformer_op.n_neighbors + 1 if transformer_op.mode == 'distance' - else transformer_op.n_neighbors) + k = ( + transformer_op.n_neighbors + 1 + if transformer_op.mode == "distance" + else transformer_op.n_neighbors + ) out = operator.outputs - many = _convert_nearest_neighbors( - operator, container, k=k) + many = _convert_nearest_neighbors(operator, container, k=k) top_indices, top_dist = many[:2] top_dist = ( OnnxReshapeApi13( - OnnxMul(top_dist, np.array([-1], dtype=dtype), - op_version=op_version), + OnnxMul(top_dist, np.array([-1], dtype=dtype), op_version=op_version), np.array([-1, 1, k], dtype=np.int64), - op_version=op_version) - if transformer_op.mode == 'distance' else None) + op_version=op_version, + ) + if transformer_op.mode == "distance" + else None + ) fit_samples_indices = np.array( - np.arange(transformer_op.n_samples_fit_).reshape((1, -1, 1)), - dtype=np.int64) + np.arange(transformer_op.n_samples_fit_).reshape((1, -1, 1)), dtype=np.int64 + ) reshaped_ind = OnnxReshapeApi13( - top_indices, np.array([-1, 1, k], dtype=np.int64), - op_version=op_version) + top_indices, np.array([-1, 1, k], dtype=np.int64), op_version=op_version + ) comparison_res = OnnxCast( OnnxEqual(fit_samples_indices, reshaped_ind, op_version=op_version), - op_version=op_version, to=proto_type) + op_version=op_version, + to=proto_type, + ) if top_dist: - comparison_res = OnnxMul( - comparison_res, top_dist, op_version=op_version) + comparison_res = OnnxMul(comparison_res, top_dist, op_version=op_version) res = OnnxReduceSumApi11( - comparison_res, op_version=op_version, axes=[2], - keepdims=0, output_names=out[:1]) + comparison_res, + op_version=op_version, + axes=[2], + keepdims=0, + output_names=out[:1], + ) res.add_to(scope, container) def _nan_euclidean_distance( - container, model, input_name, op_version, optim, dtype, proto_type): + container, model, input_name, op_version, optim, dtype, proto_type +): training_data = model._fit_X.astype(dtype) shape = OnnxShape(input_name, op_version=op_version) zero = OnnxConstantOfShape( - shape, value=make_tensor("value", proto_type, (1, ), [0]), - op_version=op_version) + shape, value=make_tensor("value", proto_type, (1,), [0]), op_version=op_version + ) missing_input_name = OnnxIsNaN(input_name, op_version=op_version) - masked_input_name = OnnxWhere(missing_input_name, zero, input_name, - op_version=op_version) + masked_input_name = OnnxWhere( + missing_input_name, zero, input_name, op_version=op_version + ) missing_y = np.isnan(training_data) training_data[missing_y] = 0 - d_in = training_data.shape[1] if hasattr(training_data, 'shape') else None - d_out = training_data.shape[0] if hasattr(training_data, 'shape') else None + d_in = training_data.shape[1] if hasattr(training_data, "shape") else None + d_out = training_data.shape[0] if hasattr(training_data, "shape") else None if optim is None: dist = _onnx_cdist_sqeuclidean( - masked_input_name, training_data, dtype=dtype, - op_version=container.target_opset, dim_in=d_in, dim_out=d_out) - elif optim == 'cdist': + masked_input_name, + training_data, + dtype=dtype, + op_version=container.target_opset, + dim_in=d_in, + dim_out=d_out, + ) + elif optim == "cdist": from skl2onnx.algebra.custom_ops import OnnxCDist + dist = OnnxCDist( - masked_input_name, training_data, metric='sqeuclidean', - op_version=container.target_opset) + masked_input_name, + training_data, + metric="sqeuclidean", + op_version=container.target_opset, + ) else: raise RuntimeError("Unexpected optimization '{}'.".format(optim)) dist1 = OnnxMatMul( OnnxMul(masked_input_name, masked_input_name, op_version=op_version), - missing_y.T.astype(dtype), op_version=op_version) + missing_y.T.astype(dtype), + op_version=op_version, + ) dist2 = OnnxMatMul( - OnnxCast(missing_input_name, to=proto_type, - op_version=op_version), + OnnxCast(missing_input_name, to=proto_type, op_version=op_version), (training_data * training_data).T.astype(dtype), - op_version=op_version) - distances = OnnxSub(dist, OnnxAdd(dist1, dist2, op_version=op_version), - op_version=op_version) + op_version=op_version, + ) + distances = OnnxSub( + dist, OnnxAdd(dist1, dist2, op_version=op_version), op_version=op_version + ) present_x = OnnxSub( np.array([1], dtype=dtype), - OnnxCast(missing_input_name, to=proto_type, - op_version=op_version), - op_version=op_version) - present_y = (1. - missing_y).astype(dtype) + OnnxCast(missing_input_name, to=proto_type, op_version=op_version), + op_version=op_version, + ) + present_y = (1.0 - missing_y).astype(dtype) present_count = OnnxMatMul( - present_x, present_y.T.astype(dtype), op_version=op_version) - present_count = OnnxMax(np.array([1], dtype=dtype), - present_count, op_version=op_version) + present_x, present_y.T.astype(dtype), op_version=op_version + ) + present_count = OnnxMax( + np.array([1], dtype=dtype), present_count, op_version=op_version + ) dist = OnnxDiv(distances, present_count, op_version=op_version) - return OnnxMul( - dist, np.array([d_in], dtype=dtype), - op_version=op_version), missing_input_name + return ( + OnnxMul(dist, np.array([d_in], dtype=dtype), op_version=op_version), + missing_input_name, + ) -def _nearest_neighbours(container, model, input_name, - op_version, optim, dtype, proto_type, **kwargs): +def _nearest_neighbours( + container, model, input_name, op_version, optim, dtype, proto_type, **kwargs +): dist, missing_input_name = _nan_euclidean_distance( - container, model, input_name, op_version, optim, dtype, - proto_type) + container, model, input_name, op_version, optim, dtype, proto_type + ) if op_version < 10: - neg_dist = OnnxMul(dist, np.array( - [-1], dtype=dtype), op_version=op_version) - node = OnnxTopK_1( - neg_dist, k=model.n_neighbors, op_version=1, **kwargs) + neg_dist = OnnxMul(dist, np.array([-1], dtype=dtype), op_version=op_version) + node = OnnxTopK_1(neg_dist, k=model.n_neighbors, op_version=1, **kwargs) elif op_version < 11: - neg_dist = OnnxMul(dist, np.array( - [-1], dtype=dtype), op_version=op_version) + neg_dist = OnnxMul(dist, np.array([-1], dtype=dtype), op_version=op_version) node = OnnxTopK_10( - neg_dist, np.array([model.n_neighbors], dtype=np.int64), - op_version=10, **kwargs) + neg_dist, + np.array([model.n_neighbors], dtype=np.int64), + op_version=10, + **kwargs + ) else: node = OnnxTopK_11( - dist, np.array([model.n_neighbors], dtype=np.int64), - largest=0, sorted=1, op_version=11, **kwargs) + dist, + np.array([model.n_neighbors], dtype=np.int64), + largest=0, + sorted=1, + op_version=11, + **kwargs + ) return node[1], missing_input_name -def convert_knn_imputer(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_knn_imputer( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converts *KNNImputer* into *ONNX*. """ @@ -670,17 +802,16 @@ def convert_knn_imputer(scope: Scope, operator: Operator, if proto_type != onnx_proto.TensorProto.DOUBLE: proto_type = onnx_proto.TensorProto.FLOAT knn_op = operator.raw_operator - if knn_op.metric != 'nan_euclidean': - raise RuntimeError( - "Unable to convert KNNImputer when metric is callable.") - if knn_op.weights not in ('uniform', 'distance'): - raise RuntimeError( - "Unable to convert KNNImputer when weights is callable.") - if knn_op.weights == 'distance': + if knn_op.metric != "nan_euclidean": + raise RuntimeError("Unable to convert KNNImputer when metric is callable.") + if knn_op.weights not in ("uniform", "distance"): + raise RuntimeError("Unable to convert KNNImputer when weights is callable.") + if knn_op.weights == "distance": raise NotImplementedError( - 'KNNImputer with distance as metric is not supported, ' - 'you may raise an issue at ' - 'https://github.com/onnx/sklearn-onnx/issues.') + "KNNImputer with distance as metric is not supported, " + "you may raise an issue at " + "https://github.com/onnx/sklearn-onnx/issues." + ) options = container.get_options(knn_op, dict(optim=None)) op_version = container.target_opset input_name = operator.inputs[0] @@ -688,40 +819,52 @@ def convert_knn_imputer(scope: Scope, operator: Operator, training_data[np.isnan(training_data)] = 0 out = operator.outputs top_indices, missing_input_name = _nearest_neighbours( - container, knn_op, input_name, op_version, options['optim'], - dtype, proto_type) + container, knn_op, input_name, op_version, options["optim"], dtype, proto_type + ) flattened = OnnxFlatten(top_indices, op_version=op_version) extracted = OnnxArrayFeatureExtractor( - training_data.T, flattened, op_version=op_version) + training_data.T, flattened, op_version=op_version + ) reshaped = OnnxReshapeApi13( - extracted, np.array([training_data.shape[1], -1, knn_op.n_neighbors], - dtype=np.int64), - op_version=op_version) - transpose_result = OnnxTranspose( - reshaped, op_version=op_version, perm=[1, 2, 0]) + extracted, + np.array([training_data.shape[1], -1, knn_op.n_neighbors], dtype=np.int64), + op_version=op_version, + ) + transpose_result = OnnxTranspose(reshaped, op_version=op_version, perm=[1, 2, 0]) reduced = OnnxReduceSumApi11( - transpose_result, op_version=op_version, axes=[1], keepdims=0) + transpose_result, op_version=op_version, axes=[1], keepdims=0 + ) cast_res = OnnxCast( - OnnxCast(transpose_result, to=onnx_proto.TensorProto.BOOL, - op_version=op_version), - to=proto_type, op_version=op_version) - deno = OnnxReduceSumApi11( - cast_res, op_version=op_version, axes=[1], keepdims=0) + OnnxCast( + transpose_result, to=onnx_proto.TensorProto.BOOL, op_version=op_version + ), + to=proto_type, + op_version=op_version, + ) + deno = OnnxReduceSumApi11(cast_res, op_version=op_version, axes=[1], keepdims=0) deno_updated = OnnxAdd( - deno, OnnxCast( - OnnxNot(OnnxCast(deno, to=onnx_proto.TensorProto.BOOL, - op_version=op_version), op_version=op_version), - to=proto_type, op_version=op_version), - op_version=op_version) + deno, + OnnxCast( + OnnxNot( + OnnxCast(deno, to=onnx_proto.TensorProto.BOOL, op_version=op_version), + op_version=op_version, + ), + to=proto_type, + op_version=op_version, + ), + op_version=op_version, + ) imputed_out = OnnxWhere( missing_input_name, - OnnxDiv(reduced, deno_updated, op_version=op_version), input_name, - output_names=out[:1], op_version=op_version) + OnnxDiv(reduced, deno_updated, op_version=op_version), + input_name, + output_names=out[:1], + op_version=op_version, + ) imputed_out.add_to(scope, container) -def convert_nca(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_nca(scope: Scope, operator: Operator, container: ModelComponentContainer): """ Converts *NeighborhoodComponentsAnalysis* into *ONNX*. """ @@ -738,44 +881,57 @@ def convert_nca(scope: Scope, operator: Operator, X = OnnxCast(X, to=onnx_proto.TensorProto.FLOAT, op_version=op_version) elif isinstance(X.type, DoubleTensorType): components = OnnxCast( - components, to=onnx_proto.TensorProto.DOUBLE, - op_version=op_version) + components, to=onnx_proto.TensorProto.DOUBLE, op_version=op_version + ) else: components = components.astype(dtype) - res = OnnxMatMul( - X, components, - output_names=out[:1], op_version=op_version) + res = OnnxMatMul(X, components, output_names=out[:1], op_version=op_version) res.add_to(scope, container) register_converter( - 'SklearnKNeighborsClassifier', convert_nearest_neighbors_classifier, - options={'zipmap': [True, False, 'columns'], - 'nocl': [True, False], - 'raw_scores': [True, False], - 'output_class_labels': [False, True], - 'optim': [None, 'cdist']}) -register_converter( - 'SklearnRadiusNeighborsClassifier', convert_nearest_neighbors_classifier, - options={'zipmap': [True, False, 'columns'], - 'nocl': [True, False], - 'raw_scores': [True, False], - 'output_class_labels': [False, True], - 'optim': [None, 'cdist']}) + "SklearnKNeighborsClassifier", + convert_nearest_neighbors_classifier, + options={ + "zipmap": [True, False, "columns"], + "nocl": [True, False], + "raw_scores": [True, False], + "output_class_labels": [False, True], + "optim": [None, "cdist"], + }, +) register_converter( - 'SklearnKNeighborsRegressor', convert_nearest_neighbors_regressor, - options={'optim': [None, 'cdist']}) + "SklearnRadiusNeighborsClassifier", + convert_nearest_neighbors_classifier, + options={ + "zipmap": [True, False, "columns"], + "nocl": [True, False], + "raw_scores": [True, False], + "output_class_labels": [False, True], + "optim": [None, "cdist"], + }, +) register_converter( - 'SklearnRadiusNeighborsRegressor', convert_nearest_neighbors_regressor, - options={'optim': [None, 'cdist']}) + "SklearnKNeighborsRegressor", + convert_nearest_neighbors_regressor, + options={"optim": [None, "cdist"]}, +) register_converter( - 'SklearnKNeighborsTransformer', convert_k_neighbours_transformer, - options={'optim': [None, 'cdist']}) + "SklearnRadiusNeighborsRegressor", + convert_nearest_neighbors_regressor, + options={"optim": [None, "cdist"]}, +) register_converter( - 'SklearnNearestNeighbors', convert_nearest_neighbors_transform, - options={'optim': [None, 'cdist']}) + "SklearnKNeighborsTransformer", + convert_k_neighbours_transformer, + options={"optim": [None, "cdist"]}, +) register_converter( - 'SklearnKNNImputer', convert_knn_imputer, - options={'optim': [None, 'cdist']}) + "SklearnNearestNeighbors", + convert_nearest_neighbors_transform, + options={"optim": [None, "cdist"]}, +) register_converter( - 'SklearnNeighborhoodComponentsAnalysis', convert_nca) + "SklearnKNNImputer", convert_knn_imputer, options={"optim": [None, "cdist"]} +) +register_converter("SklearnNeighborhoodComponentsAnalysis", convert_nca) diff --git a/skl2onnx/operator_converters/normaliser.py b/skl2onnx/operator_converters/normaliser.py index 4dafe01ac..6479acc34 100644 --- a/skl2onnx/operator_converters/normaliser.py +++ b/skl2onnx/operator_converters/normaliser.py @@ -9,8 +9,9 @@ from .common import concatenate_variables -def convert_sklearn_normalizer(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_normalizer( + scope: Scope, operator: Operator, container: ModelComponentContainer +): if len(operator.inputs) > 1: # If there are multiple input tensors, # we combine them using a FeatureVectorizer @@ -19,17 +20,24 @@ def convert_sklearn_normalizer(scope: Scope, operator: Operator, # No concatenation is needed, we just use the first variable's name feature_name = operator.inputs[0].full_name op = operator.raw_operator - norm_map = {'max': 'MAX', 'l1': 'L1', 'l2': 'L2'} + norm_map = {"max": "MAX", "l1": "L1", "l2": "L2"} if op.norm in norm_map: norm = norm_map[op.norm] else: - raise RuntimeError("Invalid norm '%s'. You may raise an issue" - "at https://github.com/onnx/sklearn-onnx/" - "issues." % op.norm) - use_float = type(operator.inputs[0].type) not in (DoubleTensorType, ) + raise RuntimeError( + "Invalid norm '%s'. You may raise an issue" + "at https://github.com/onnx/sklearn-onnx/" + "issues." % op.norm + ) + use_float = type(operator.inputs[0].type) not in (DoubleTensorType,) apply_normalizer( - scope, feature_name, operator.outputs[0].full_name, container, - norm=norm, use_float=use_float) + scope, + feature_name, + operator.outputs[0].full_name, + container, + norm=norm, + use_float=use_float, + ) -register_converter('SklearnNormalizer', convert_sklearn_normalizer) +register_converter("SklearnNormalizer", convert_sklearn_normalizer) diff --git a/skl2onnx/operator_converters/one_hot_encoder.py b/skl2onnx/operator_converters/one_hot_encoder.py index 9b4b0f356..815736139 100644 --- a/skl2onnx/operator_converters/one_hot_encoder.py +++ b/skl2onnx/operator_converters/one_hot_encoder.py @@ -4,8 +4,11 @@ import numpy as np from ..common._apply_operation import apply_cast, apply_concat, apply_reshape from ..common.data_types import ( - Int64TensorType, StringTensorType, Int32TensorType, - FloatTensorType, DoubleTensorType + Int64TensorType, + StringTensorType, + Int32TensorType, + FloatTensorType, + DoubleTensorType, ) from ..common._registration import register_converter from ..common._topology import Scope, Operator @@ -13,8 +16,9 @@ from ..proto import onnx_proto -def convert_sklearn_one_hot_encoder(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_one_hot_encoder( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converts *OneHotEncoder* into ONNX. It supports multiple inputs of types @@ -28,74 +32,92 @@ def convert_sklearn_one_hot_encoder(scope: Scope, operator: Operator, raise RuntimeError( "Shapes must be known when OneHotEncoder is converted. " "There are {} inputs with the following number of columns " - "{}.".format(len(operator.inputs), all_shapes)) + "{}.".format(len(operator.inputs), all_shapes) + ) total = sum(all_shapes) if total != len(ohe_op.categories_): raise RuntimeError( "Mismatch between the number of sets of categories {} and " "the total number of inputs columns {}.".format( - len(ohe_op.categories_), total)) + len(ohe_op.categories_), total + ) + ) enum_cats = [] index_inputs = 0 for index, cats in enumerate(ohe_op.categories_): - while sum(all_shapes[:index_inputs + 1]) <= index: + while sum(all_shapes[: index_inputs + 1]) <= index: index_inputs += 1 index_in_input = index - sum(all_shapes[:index_inputs]) inp = operator.inputs[index_inputs] if not isinstance( - inp.type, - (Int64TensorType, StringTensorType, Int32TensorType, - FloatTensorType, DoubleTensorType)): + inp.type, + ( + Int64TensorType, + StringTensorType, + Int32TensorType, + FloatTensorType, + DoubleTensorType, + ), + ): raise NotImplementedError( "{} input datatype not yet supported. " "You may raise an issue at " "https://github.com/onnx/sklearn-onnx/issues" - "".format(type(inp.type))) + "".format(type(inp.type)) + ) if all_shapes[index_inputs] == 1: assert index_in_input == 0 afeat = False else: afeat = True - enum_cats.append( - (afeat, index_in_input, inp.full_name, cats, inp.type)) + enum_cats.append((afeat, index_in_input, inp.full_name, cats, inp.type)) else: inp = operator.inputs[0] - enum_cats = [(True, i, inp.full_name, cats, inp.type) - for i, cats in enumerate(ohe_op.categories_)] + enum_cats = [ + (True, i, inp.full_name, cats, inp.type) + for i, cats in enumerate(ohe_op.categories_) + ] result, categories_len = [], 0 for index, enum_c in enumerate(enum_cats): afeat, index_in, name, categories, inp_type = enum_c container.debug( "[conv.OneHotEncoder] cat %r/%r name=%r type=%r", - index + 1, len(enum_cats), name, inp_type) + index + 1, + len(enum_cats), + name, + inp_type, + ) if len(categories) == 0: continue if afeat: - index_name = scope.get_unique_variable_name( - name + str(index_in)) + index_name = scope.get_unique_variable_name(name + str(index_in)) container.add_initializer( - index_name, onnx_proto.TensorProto.INT64, [], [index_in]) - out_name = scope.get_unique_variable_name( - name + str(index_in)) + index_name, onnx_proto.TensorProto.INT64, [], [index_in] + ) + out_name = scope.get_unique_variable_name(name + str(index_in)) container.add_node( - 'ArrayFeatureExtractor', [name, index_name], - out_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) + "ArrayFeatureExtractor", + [name, index_name], + out_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) name = out_name - attrs = {'name': scope.get_unique_operator_name('OneHotEncoder')} - attrs['zeros'] = 1 if ohe_op.handle_unknown == 'ignore' else 0 + attrs = {"name": scope.get_unique_operator_name("OneHotEncoder")} + attrs["zeros"] = 1 if ohe_op.handle_unknown == "ignore" else 0 if isinstance(inp_type, (Int64TensorType, Int32TensorType)): - attrs['cats_int64s'] = categories.astype(np.int64) + attrs["cats_int64s"] = categories.astype(np.int64) elif isinstance(inp_type, StringTensorType): - attrs['cats_strings'] = np.array( - [str(s).encode('utf-8') for s in categories]) + attrs["cats_strings"] = np.array( + [str(s).encode("utf-8") for s in categories] + ) elif isinstance(inp_type, (FloatTensorType, DoubleTensorType)): # The converter checks that categories can be casted into # integers. String is not allowed here. @@ -105,60 +127,78 @@ def convert_sklearn_one_hot_encoder(scope: Scope, operator: Operator, ci = int(c) except TypeError: raise RuntimeError( - "Category '{}' cannot be casted into int.".format(c)) + "Category '{}' cannot be casted into int.".format(c) + ) if ci != c: raise RuntimeError( "Category %r is not an int64. " "The converter only supports string and int64 " - "categories not %r." % (c, type(c))) - attrs['cats_int64s'] = categories.astype(np.int64) + "categories not %r." % (c, type(c)) + ) + attrs["cats_int64s"] = categories.astype(np.int64) else: raise RuntimeError( "Input type {} is not supported for OneHotEncoder. " - "Ideally, it should either be integer or strings.".format( - inp_type)) + "Ideally, it should either be integer or strings.".format(inp_type) + ) - ohe_output = scope.get_unique_variable_name(name + 'out') + ohe_output = scope.get_unique_variable_name(name + "out") - if 'cats_int64s' in attrs: + if "cats_int64s" in attrs: # Let's cast this input in int64. - cast_feature = scope.get_unique_variable_name(name + 'cast') - apply_cast(scope, name, cast_feature, container, - to=onnx_proto.TensorProto.INT64) + cast_feature = scope.get_unique_variable_name(name + "cast") + apply_cast( + scope, name, cast_feature, container, to=onnx_proto.TensorProto.INT64 + ) name = cast_feature - container.add_node('OneHotEncoder', name, - ohe_output, op_domain='ai.onnx.ml', - **attrs) - if hasattr(ohe_op, 'drop_idx_') and ohe_op.drop_idx_ is not None: - extracted_outputs_name = scope.get_unique_variable_name( - 'extracted_outputs') - indices_to_keep_name = scope.get_unique_variable_name( - 'indices_to_keep') + container.add_node( + "OneHotEncoder", name, ohe_output, op_domain="ai.onnx.ml", **attrs + ) + if hasattr(ohe_op, "drop_idx_") and ohe_op.drop_idx_ is not None: + extracted_outputs_name = scope.get_unique_variable_name("extracted_outputs") + indices_to_keep_name = scope.get_unique_variable_name("indices_to_keep") indices_to_keep = np.delete( - np.arange(len(categories)), ohe_op.drop_idx_[index]) + np.arange(len(categories)), ohe_op.drop_idx_[index] + ) container.add_initializer( - indices_to_keep_name, onnx_proto.TensorProto.INT64, - indices_to_keep.shape, indices_to_keep) + indices_to_keep_name, + onnx_proto.TensorProto.INT64, + indices_to_keep.shape, + indices_to_keep, + ) container.add_node( - 'ArrayFeatureExtractor', [ohe_output, indices_to_keep_name], - extracted_outputs_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) + "ArrayFeatureExtractor", + [ohe_output, indices_to_keep_name], + extracted_outputs_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) ohe_output, categories = extracted_outputs_name, indices_to_keep result.append(ohe_output) categories_len += len(categories) - concat_result_name = scope.get_unique_variable_name('concat_result') + concat_result_name = scope.get_unique_variable_name("concat_result") apply_concat(scope, result, concat_result_name, container, axis=2) reshape_input = concat_result_name if np.issubdtype(ohe_op.dtype, np.signedinteger): - reshape_input = scope.get_unique_variable_name('cast') - apply_cast(scope, concat_result_name, reshape_input, - container, to=onnx_proto.TensorProto.INT64) - apply_reshape(scope, reshape_input, operator.output_full_names, - container, desired_shape=(-1, categories_len)) - - -register_converter('SklearnOneHotEncoder', convert_sklearn_one_hot_encoder) + reshape_input = scope.get_unique_variable_name("cast") + apply_cast( + scope, + concat_result_name, + reshape_input, + container, + to=onnx_proto.TensorProto.INT64, + ) + apply_reshape( + scope, + reshape_input, + operator.output_full_names, + container, + desired_shape=(-1, categories_len), + ) + + +register_converter("SklearnOneHotEncoder", convert_sklearn_one_hot_encoder) diff --git a/skl2onnx/operator_converters/one_vs_one_classifier.py b/skl2onnx/operator_converters/one_vs_one_classifier.py index 2c716268b..80dcb3a04 100644 --- a/skl2onnx/operator_converters/one_vs_one_classifier.py +++ b/skl2onnx/operator_converters/one_vs_one_classifier.py @@ -10,101 +10,104 @@ from .._supported_operators import sklearn_operator_name_map -def _iteration_one_versus(scope, container, inputs, i, estimator, cl_type, - proto_dtype, use_raw_scores=True, prob_shape=None): +def _iteration_one_versus( + scope, + container, + inputs, + i, + estimator, + cl_type, + proto_dtype, + use_raw_scores=True, + prob_shape=None, +): op_type = sklearn_operator_name_map[type(estimator)] this_operator = scope.declare_local_operator(op_type, raw_model=estimator) this_operator.inputs = inputs if is_regressor(estimator): - score_name = scope.declare_local_variable('score_%d' % i, cl_type()) + score_name = scope.declare_local_variable("score_%d" % i, cl_type()) this_operator.outputs.append(score_name) - if hasattr(estimator, 'coef_') and len(estimator.coef_.shape) == 2: + if hasattr(estimator, "coef_") and len(estimator.coef_.shape) == 2: raise RuntimeError( "OneVsRestClassifier or OneVsOneClassifier accepts " - "regressor with only one target.") + "regressor with only one target." + ) p1 = score_name.onnx_name return None, None, p1 - if container.has_options(estimator, 'raw_scores'): - options = {'raw_scores': use_raw_scores} - elif container.has_options(estimator, 'zipmap'): - options = {'zipmap': False} + if container.has_options(estimator, "raw_scores"): + options = {"raw_scores": use_raw_scores} + elif container.has_options(estimator, "zipmap"): + options = {"zipmap": False} else: options = None if options is not None: container.add_options(id(estimator), options) scope.add_options(id(estimator), options) - label_name = scope.declare_local_variable( - 'label_%d' % i, Int64TensorType()) - prob_name = scope.declare_local_variable( - 'proba_%d' % i, inputs[0].type.__class__()) + label_name = scope.declare_local_variable("label_%d" % i, Int64TensorType()) + prob_name = scope.declare_local_variable("proba_%d" % i, inputs[0].type.__class__()) this_operator.outputs.append(label_name) this_operator.outputs.append(prob_name) # gets the label for the class 1 - label = scope.get_unique_variable_name('lab_%d' % i) - apply_reshape(scope, label_name.onnx_name, label, container, - desired_shape=(-1, 1)) - cast_label = scope.get_unique_variable_name('cast_lab_%d' % i) - apply_cast(scope, label, cast_label, container, - to=proto_dtype) + label = scope.get_unique_variable_name("lab_%d" % i) + apply_reshape(scope, label_name.onnx_name, label, container, desired_shape=(-1, 1)) + cast_label = scope.get_unique_variable_name("cast_lab_%d" % i) + apply_cast(scope, label, cast_label, container, to=proto_dtype) # get the probability for the class 1 if prob_shape is None: # shape to use to reshape score - cst0 = scope.get_unique_variable_name('cst0') + cst0 = scope.get_unique_variable_name("cst0") container.add_initializer(cst0, onnx_proto.TensorProto.INT64, [1], [0]) - shape = scope.get_unique_variable_name('shape') - container.add_node('Shape', [inputs[0].full_name], [shape]) - first_dim = scope.get_unique_variable_name('dim') - container.add_node('Gather', [shape, cst0], [first_dim]) - cst_1 = scope.get_unique_variable_name('cst_1') - container.add_initializer( - cst_1, onnx_proto.TensorProto.INT64, [1], [-1]) - prob_shape = scope.get_unique_variable_name('shape') + shape = scope.get_unique_variable_name("shape") + container.add_node("Shape", [inputs[0].full_name], [shape]) + first_dim = scope.get_unique_variable_name("dim") + container.add_node("Gather", [shape, cst0], [first_dim]) + cst_1 = scope.get_unique_variable_name("cst_1") + container.add_initializer(cst_1, onnx_proto.TensorProto.INT64, [1], [-1]) + prob_shape = scope.get_unique_variable_name("shape") apply_concat(scope, [first_dim, cst_1], prob_shape, container, axis=0) - prob_reshaped = scope.get_unique_variable_name('prob_%d' % i) - container.add_node('Reshape', [prob_name.onnx_name, prob_shape], - [prob_reshaped]) + prob_reshaped = scope.get_unique_variable_name("prob_%d" % i) + container.add_node("Reshape", [prob_name.onnx_name, prob_shape], [prob_reshaped]) - cst1 = scope.get_unique_variable_name('cst1') + cst1 = scope.get_unique_variable_name("cst1") container.add_initializer(cst1, onnx_proto.TensorProto.INT64, [1], [1]) - cst2 = scope.get_unique_variable_name('cst2') + cst2 = scope.get_unique_variable_name("cst2") container.add_initializer(cst2, onnx_proto.TensorProto.INT64, [1], [2]) - prob1 = scope.get_unique_variable_name('prob1_%d' % i) - container.add_node( - 'Slice', [prob_reshaped, cst1, cst2, cst1], prob1) + prob1 = scope.get_unique_variable_name("prob1_%d" % i) + container.add_node("Slice", [prob_reshaped, cst1, cst2, cst1], prob1) return prob_shape, cast_label, prob1 -def convert_one_vs_one_classifier(scope: Scope, operator: Operator, - container: ModelComponentContainer): - +def convert_one_vs_one_classifier( + scope: Scope, operator: Operator, container: ModelComponentContainer +): proto_dtype = guess_proto_type(operator.inputs[0].type) if proto_dtype != onnx_proto.TensorProto.DOUBLE: proto_dtype = onnx_proto.TensorProto.FLOAT op = operator.raw_operator # shape to use to reshape score - cst0 = scope.get_unique_variable_name('cst0') + cst0 = scope.get_unique_variable_name("cst0") container.add_initializer(cst0, onnx_proto.TensorProto.INT64, [1], [0]) - cst1 = scope.get_unique_variable_name('cst1') + cst1 = scope.get_unique_variable_name("cst1") container.add_initializer(cst1, onnx_proto.TensorProto.INT64, [1], [1]) - cst2 = scope.get_unique_variable_name('cst2') + cst2 = scope.get_unique_variable_name("cst2") container.add_initializer(cst2, onnx_proto.TensorProto.INT64, [1], [2]) - shape = scope.get_unique_variable_name('shape') - container.add_node('Shape', [operator.inputs[0].full_name], [shape]) - first_dim = scope.get_unique_variable_name('dim') - container.add_node('Gather', [shape, cst0], [first_dim]) - cst_1 = scope.get_unique_variable_name('cst_1') + shape = scope.get_unique_variable_name("shape") + container.add_node("Shape", [operator.inputs[0].full_name], [shape]) + first_dim = scope.get_unique_variable_name("dim") + container.add_node("Gather", [shape, cst0], [first_dim]) + cst_1 = scope.get_unique_variable_name("cst_1") container.add_initializer(cst_1, onnx_proto.TensorProto.INT64, [1], [-1]) - prob_shape = scope.get_unique_variable_name('shape') + prob_shape = scope.get_unique_variable_name("shape") apply_concat(scope, [first_dim, cst_1], prob_shape, container, axis=0) label_names = [] @@ -113,42 +116,52 @@ def convert_one_vs_one_classifier(scope: Scope, operator: Operator, cl_type = operator.inputs[0].type.__class__ for i, estimator in enumerate(op.estimators_): prob_shape, cast_label, prob1 = _iteration_one_versus( - scope, container, operator.inputs, i, estimator, cl_type, - proto_dtype, True, prob_shape=prob_shape) + scope, + container, + operator.inputs, + i, + estimator, + cl_type, + proto_dtype, + True, + prob_shape=prob_shape, + ) label_names.append(cast_label) prob_names.append(prob1) - conc_lab_name = scope.get_unique_variable_name('concat_out_ovo_label') + conc_lab_name = scope.get_unique_variable_name("concat_out_ovo_label") apply_concat(scope, label_names, conc_lab_name, container, axis=1) - conc_prob_name = scope.get_unique_variable_name('concat_out_ovo_prob') + conc_prob_name = scope.get_unique_variable_name("concat_out_ovo_prob") apply_concat(scope, prob_names, conc_prob_name, container, axis=1) # calls _ovr_decision_function - this_operator = scope.declare_local_operator( - "SklearnOVRDecisionFunction", op) + this_operator = scope.declare_local_operator("SklearnOVRDecisionFunction", op) cl_type = operator.inputs[0].type.__class__ label = scope.declare_local_variable("label", cl_type()) - container.add_node('Identity', [conc_lab_name], [label.onnx_name]) + container.add_node("Identity", [conc_lab_name], [label.onnx_name]) prob_score = scope.declare_local_variable("prob_score", cl_type()) - container.add_node('Identity', [conc_prob_name], [prob_score.onnx_name]) + container.add_node("Identity", [conc_prob_name], [prob_score.onnx_name]) this_operator.inputs.append(label) this_operator.inputs.append(prob_score) - ovr_name = scope.declare_local_variable('ovr_output', cl_type()) + ovr_name = scope.declare_local_variable("ovr_output", cl_type()) this_operator.outputs.append(ovr_name) output_name = operator.outputs[1].full_name - container.add_node('Identity', [ovr_name.onnx_name], [output_name]) + container.add_node("Identity", [ovr_name.onnx_name], [output_name]) - container.add_node( - 'ArgMax', 'ovr_output', operator.outputs[0].full_name, axis=1) + container.add_node("ArgMax", "ovr_output", operator.outputs[0].full_name, axis=1) -register_converter('SklearnOneVsOneClassifier', - convert_one_vs_one_classifier, - options={'zipmap': [True, False, 'columns'], - 'nocl': [True, False], - 'output_class_labels': [False, True]}) +register_converter( + "SklearnOneVsOneClassifier", + convert_one_vs_one_classifier, + options={ + "zipmap": [True, False, "columns"], + "nocl": [True, False], + "output_class_labels": [False, True], + }, +) diff --git a/skl2onnx/operator_converters/one_vs_rest_classifier.py b/skl2onnx/operator_converters/one_vs_rest_classifier.py index 7a5287cb3..3709460cf 100644 --- a/skl2onnx/operator_converters/one_vs_rest_classifier.py +++ b/skl2onnx/operator_converters/one_vs_rest_classifier.py @@ -5,81 +5,114 @@ from sklearn.svm import LinearSVC from ..proto import onnx_proto from ..common._apply_operation import ( - apply_concat, apply_identity, apply_mul, apply_reshape) + apply_concat, + apply_identity, + apply_mul, + apply_reshape, +) from ..common._registration import register_converter from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer from ..common._apply_operation import apply_normalization from ..common._apply_operation import ( - apply_slice, apply_sub, apply_cast, apply_abs, apply_add, apply_div) + apply_slice, + apply_sub, + apply_cast, + apply_abs, + apply_add, + apply_div, +) from ..common.utils_classifier import _finalize_converter_classes from ..common.data_types import guess_proto_type, Int64TensorType -from ..algebra.onnx_ops import ( - OnnxReshape, OnnxShape, OnnxSlice, OnnxTile) +from ..algebra.onnx_ops import OnnxReshape, OnnxShape, OnnxSlice, OnnxTile from .._supported_operators import sklearn_operator_name_map -def _iteration_one_versus(scope, container, inputs, i, estimator, cl_type, - proto_dtype, use_raw_scores=True, prob_shape=None): +def _iteration_one_versus( + scope, + container, + inputs, + i, + estimator, + cl_type, + proto_dtype, + use_raw_scores=True, + prob_shape=None, +): op_type = sklearn_operator_name_map[type(estimator)] - this_operator = scope.declare_local_operator( - op_type, raw_model=estimator) + this_operator = scope.declare_local_operator(op_type, raw_model=estimator) this_operator.inputs = inputs if is_regressor(estimator): - score_name = scope.declare_local_variable('score_%d' % i, cl_type()) + score_name = scope.declare_local_variable("score_%d" % i, cl_type()) this_operator.outputs.append(score_name) - if hasattr(estimator, 'coef_') and len(estimator.coef_.shape) == 2: + if hasattr(estimator, "coef_") and len(estimator.coef_.shape) == 2: raise RuntimeError( "OneVsRestClassifier or OneVsOneClassifier accepts " - "regressor with only one target.") + "regressor with only one target." + ) p1 = score_name.onnx_name else: - if container.has_options(estimator, 'raw_scores'): - container.add_options( - id(estimator), {'raw_scores': use_raw_scores}) - scope.add_options( - id(estimator), {'raw_scores': use_raw_scores}) - label_name = scope.declare_local_variable( - 'label_%d' % i, Int64TensorType()) - prob_name = scope.declare_local_variable('proba_%d' % i, cl_type()) + if container.has_options(estimator, "raw_scores"): + container.add_options(id(estimator), {"raw_scores": use_raw_scores}) + scope.add_options(id(estimator), {"raw_scores": use_raw_scores}) + label_name = scope.declare_local_variable("label_%d" % i, Int64TensorType()) + prob_name = scope.declare_local_variable("proba_%d" % i, cl_type()) this_operator.outputs.append(label_name) this_operator.outputs.append(prob_name) # gets the probability for the class 1 - p1 = scope.get_unique_variable_name('probY_%d' % i) + p1 = scope.get_unique_variable_name("probY_%d" % i) if isinstance(estimator, LinearSVC): apply_identity(scope, prob_name.onnx_name, p1, container) else: - apply_slice(scope, prob_name.onnx_name, p1, container, starts=[1], - ends=[2], axes=[1]) + apply_slice( + scope, + prob_name.onnx_name, + p1, + container, + starts=[1], + ends=[2], + axes=[1], + ) return None, None, p1 -def convert_one_vs_rest_classifier(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_one_vs_rest_classifier( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converts a *OneVsRestClassifier* into *ONNX* format. """ - if scope.get_options(operator.raw_operator, dict(nocl=False))['nocl']: + if scope.get_options(operator.raw_operator, dict(nocl=False))["nocl"]: raise RuntimeError( "Option 'nocl' is not implemented for operator '{}'.".format( - operator.raw_operator.__class__.__name__)) + operator.raw_operator.__class__.__name__ + ) + ) proto_dtype = guess_proto_type(operator.inputs[0].type) if proto_dtype != onnx_proto.TensorProto.DOUBLE: proto_dtype = onnx_proto.TensorProto.FLOAT op = operator.raw_operator options = container.get_options(op, dict(raw_scores=False)) - use_raw_scores = options['raw_scores'] + use_raw_scores = options["raw_scores"] probs_names = [] cl_type = operator.inputs[0].type.__class__ prob_shape = None for i, estimator in enumerate(op.estimators_): prob_shape, _, p1 = _iteration_one_versus( - scope, container, operator.inputs, i, estimator, cl_type, - proto_dtype, use_raw_scores, prob_shape=prob_shape) + scope, + container, + operator.inputs, + i, + estimator, + cl_type, + proto_dtype, + use_raw_scores, + prob_shape=prob_shape, + ) probs_names.append(p1) if op.multilabel_: @@ -93,90 +126,122 @@ def convert_one_vs_rest_classifier(scope: Scope, operator: Operator, # https://github.com/scikit-learn/scikit-learn/sklearn/ # multiclass.py#L290 # Raw score would mean: scores = conc_name. - thresh_name = scope.get_unique_variable_name('thresh') + thresh_name = scope.get_unique_variable_name("thresh") container.add_initializer( - thresh_name, proto_dtype, - [1, len(op.classes_)], [.5] * len(op.classes_)) - scores = scope.get_unique_variable_name('threshed') + thresh_name, proto_dtype, [1, len(op.classes_)], [0.5] * len(op.classes_) + ) + scores = scope.get_unique_variable_name("threshed") apply_sub(scope, [conc_name, thresh_name], scores, container) # sign - signed_input = scope.get_unique_variable_name('signed') - container.add_node('Sign', [scores], [signed_input], - name=scope.get_unique_operator_name('Sign')) + signed_input = scope.get_unique_variable_name("signed") + container.add_node( + "Sign", + [scores], + [signed_input], + name=scope.get_unique_operator_name("Sign"), + ) # clip - signed_input_cast = scope.get_unique_variable_name('signed_int64') - apply_cast(scope, signed_input, signed_input_cast, - container, to=onnx_proto.TensorProto.INT64) + signed_input_cast = scope.get_unique_variable_name("signed_int64") + apply_cast( + scope, + signed_input, + signed_input_cast, + container, + to=onnx_proto.TensorProto.INT64, + ) - label_name = scope.get_unique_variable_name('label') + label_name = scope.get_unique_variable_name("label") if container.target_opset <= 11: - abs_name = scope.get_unique_variable_name('abs') - add_name = scope.get_unique_variable_name('add') - cst_2 = scope.get_unique_variable_name('cst2') - container.add_initializer( - cst_2, onnx_proto.TensorProto.INT64, [1], [2]) + abs_name = scope.get_unique_variable_name("abs") + add_name = scope.get_unique_variable_name("add") + cst_2 = scope.get_unique_variable_name("cst2") + container.add_initializer(cst_2, onnx_proto.TensorProto.INT64, [1], [2]) apply_abs(scope, [signed_input_cast], [abs_name], container) - apply_add(scope, [signed_input_cast, abs_name], [add_name], - container) - apply_div( - scope, [add_name, cst_2], [label_name], - container) + apply_add(scope, [signed_input_cast, abs_name], [add_name], container) + apply_div(scope, [add_name, cst_2], [label_name], container) else: - zero_cst = scope.get_unique_variable_name('zero') - container.add_initializer( - zero_cst, onnx_proto.TensorProto.INT64, [], [0]) + zero_cst = scope.get_unique_variable_name("zero") + container.add_initializer(zero_cst, onnx_proto.TensorProto.INT64, [], [0]) container.add_node( - 'Clip', [signed_input_cast, zero_cst], + "Clip", + [signed_input_cast, zero_cst], [label_name], - name=scope.get_unique_operator_name('Clip')) - apply_reshape(scope, [label_name], [operator.outputs[0].full_name], - container, desired_shape=(-1, op.n_classes_)) + name=scope.get_unique_operator_name("Clip"), + ) + apply_reshape( + scope, + [label_name], + [operator.outputs[0].full_name], + container, + desired_shape=(-1, op.n_classes_), + ) else: # concatenates outputs - conc_name = scope.get_unique_variable_name('concatenated') + conc_name = scope.get_unique_variable_name("concatenated") apply_concat(scope, probs_names, conc_name, container, axis=1) if len(op.estimators_) == 1: - zeroth_col_name = scope.get_unique_variable_name('zeroth_col') - merged_prob_name = scope.get_unique_variable_name('merged_prob') - unit_float_tensor_name = scope.get_unique_variable_name( - 'unit_float_tensor') + zeroth_col_name = scope.get_unique_variable_name("zeroth_col") + merged_prob_name = scope.get_unique_variable_name("merged_prob") + unit_float_tensor_name = scope.get_unique_variable_name("unit_float_tensor") if use_raw_scores: container.add_initializer( - unit_float_tensor_name, proto_dtype, [], [-1.0]) - apply_mul(scope, [unit_float_tensor_name, conc_name], - zeroth_col_name, container, broadcast=1) + unit_float_tensor_name, proto_dtype, [], [-1.0] + ) + apply_mul( + scope, + [unit_float_tensor_name, conc_name], + zeroth_col_name, + container, + broadcast=1, + ) else: container.add_initializer( - unit_float_tensor_name, proto_dtype, [], [1.0]) - apply_sub(scope, [unit_float_tensor_name, conc_name], - zeroth_col_name, container, broadcast=1) - apply_concat(scope, [zeroth_col_name, conc_name], - merged_prob_name, container, axis=1) + unit_float_tensor_name, proto_dtype, [], [1.0] + ) + apply_sub( + scope, + [unit_float_tensor_name, conc_name], + zeroth_col_name, + container, + broadcast=1, + ) + apply_concat( + scope, [zeroth_col_name, conc_name], merged_prob_name, container, axis=1 + ) conc_name = merged_prob_name if use_raw_scores: - apply_identity(scope, conc_name, - operator.outputs[1].full_name, container) + apply_identity(scope, conc_name, operator.outputs[1].full_name, container) else: # normalizes the outputs apply_normalization( - scope, conc_name, operator.outputs[1].full_name, - container, axis=1, p=1) + scope, conc_name, operator.outputs[1].full_name, container, axis=1, p=1 + ) # extracts the labels - label_name = scope.get_unique_variable_name('label_name') - container.add_node('ArgMax', conc_name, label_name, - name=scope.get_unique_operator_name('ArgMax'), - axis=1) + label_name = scope.get_unique_variable_name("label_name") + container.add_node( + "ArgMax", + conc_name, + label_name, + name=scope.get_unique_operator_name("ArgMax"), + axis=1, + ) - _finalize_converter_classes(scope, label_name, - operator.outputs[0].full_name, container, - op.classes_, proto_dtype) + _finalize_converter_classes( + scope, + label_name, + operator.outputs[0].full_name, + container, + op.classes_, + proto_dtype, + ) -def convert_constant_predictor_classifier(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_constant_predictor_classifier( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converts a *_ConstantPredictor* into *ONNX* format. """ @@ -185,35 +250,51 @@ def convert_constant_predictor_classifier(scope: Scope, operator: Operator, if proto_dtype != onnx_proto.TensorProto.DOUBLE: proto_dtype = onnx_proto.TensorProto.FLOAT op = operator.raw_operator - dtype = {onnx_proto.TensorProto.DOUBLE: np.float64, - onnx_proto.TensorProto.FLOAT: np.float32} + dtype = { + onnx_proto.TensorProto.DOUBLE: np.float64, + onnx_proto.TensorProto.FLOAT: np.float32, + } shape = OnnxShape(operator.inputs[0].full_name, op_version=op_version) - first = OnnxSlice(shape, np.array([0], dtype=np.int64), - np.array([1], dtype=np.int64), op_version=op_version) + first = OnnxSlice( + shape, + np.array([0], dtype=np.int64), + np.array([1], dtype=np.int64), + op_version=op_version, + ) y = op.y_.astype(dtype[proto_dtype]).ravel() - labels = OnnxTile(y.astype(np.int64), - first, op_version=op_version, - output_names=[operator.outputs[0].full_name]) + labels = OnnxTile( + y.astype(np.int64), + first, + op_version=op_version, + output_names=[operator.outputs[0].full_name], + ) cst = np.hstack([(1 - y).astype(y.dtype), y]) proba_flat = OnnxTile(cst, first, op_version=op_version) proba_reshape = OnnxReshape( - proba_flat, np.array([-1, 2], dtype=np.int64), + proba_flat, + np.array([-1, 2], dtype=np.int64), output_names=[operator.outputs[1].full_name], - op_version=op_version) + op_version=op_version, + ) labels.add_to(scope, container) proba_reshape.add_to(scope, container) -register_converter('SklearnOneVsRestClassifier', - convert_one_vs_rest_classifier, - options={'zipmap': [True, False, 'columns'], - 'nocl': [True, False], - 'output_class_labels': [False, True], - 'raw_scores': [True, False]}) +register_converter( + "SklearnOneVsRestClassifier", + convert_one_vs_rest_classifier, + options={ + "zipmap": [True, False, "columns"], + "nocl": [True, False], + "output_class_labels": [False, True], + "raw_scores": [True, False], + }, +) -register_converter('Sklearn_ConstantPredictor', - convert_constant_predictor_classifier, - options={'zipmap': [True, False, 'columns'], - 'nocl': [True, False]}) +register_converter( + "Sklearn_ConstantPredictor", + convert_constant_predictor_classifier, + options={"zipmap": [True, False, "columns"], "nocl": [True, False]}, +) diff --git a/skl2onnx/operator_converters/ordinal_encoder.py b/skl2onnx/operator_converters/ordinal_encoder.py index 00d80f056..6eec3c521 100644 --- a/skl2onnx/operator_converters/ordinal_encoder.py +++ b/skl2onnx/operator_converters/ordinal_encoder.py @@ -10,27 +10,33 @@ from ..proto import onnx_proto -def convert_sklearn_ordinal_encoder(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_ordinal_encoder( + scope: Scope, operator: Operator, container: ModelComponentContainer +): ordinal_op = operator.raw_operator result = [] concatenated_input_name = operator.inputs[0].full_name - concat_result_name = scope.get_unique_variable_name('concat_result') + concat_result_name = scope.get_unique_variable_name("concat_result") if len(operator.inputs) > 1: - concatenated_input_name = scope.get_unique_variable_name( - 'concatenated_input') - if all(isinstance(inp.type, type(operator.inputs[0].type)) - for inp in operator.inputs): + concatenated_input_name = scope.get_unique_variable_name("concatenated_input") + if all( + isinstance(inp.type, type(operator.inputs[0].type)) + for inp in operator.inputs + ): input_names = list(map(lambda x: x.full_name, operator.inputs)) else: input_names = [] for inp in operator.inputs: if isinstance(inp.type, Int64TensorType): - input_names.append(scope.get_unique_variable_name( - 'cast_input')) - apply_cast(scope, inp.full_name, input_names[-1], - container, to=onnx_proto.TensorProto.STRING) + input_names.append(scope.get_unique_variable_name("cast_input")) + apply_cast( + scope, + inp.full_name, + input_names[-1], + container, + to=onnx_proto.TensorProto.STRING, + ) elif isinstance(inp.type, StringTensorType): input_names.append(inp.full_name) else: @@ -38,57 +44,72 @@ def convert_sklearn_ordinal_encoder(scope: Scope, operator: Operator, "{} input datatype not yet supported. " "You may raise an issue at " "https://github.com/onnx/sklearn-onnx/issues" - "".format(type(inp.type))) + "".format(type(inp.type)) + ) - apply_concat(scope, input_names, - concatenated_input_name, container, axis=1) + apply_concat(scope, input_names, concatenated_input_name, container, axis=1) if len(ordinal_op.categories_) == 0: raise RuntimeError( - "No categories found in type=%r, encoder=%r." % ( - type(ordinal_op), ordinal_op)) + "No categories found in type=%r, encoder=%r." + % (type(ordinal_op), ordinal_op) + ) for index, categories in enumerate(ordinal_op.categories_): - attrs = {'name': scope.get_unique_operator_name('LabelEncoder')} + attrs = {"name": scope.get_unique_operator_name("LabelEncoder")} if len(categories) > 0: - if (np.issubdtype(categories.dtype, np.floating) or - categories.dtype == np.bool_): - attrs['keys_floats'] = categories + if ( + np.issubdtype(categories.dtype, np.floating) + or categories.dtype == np.bool_ + ): + attrs["keys_floats"] = categories elif np.issubdtype(categories.dtype, np.signedinteger): - attrs['keys_int64s'] = categories + attrs["keys_int64s"] = categories else: - attrs['keys_strings'] = np.array( - [str(s).encode('utf-8') for s in categories]) - attrs['values_int64s'] = np.arange( - len(categories)).astype(np.int64) + attrs["keys_strings"] = np.array( + [str(s).encode("utf-8") for s in categories] + ) + attrs["values_int64s"] = np.arange(len(categories)).astype(np.int64) - index_name = scope.get_unique_variable_name('index') - feature_column_name = scope.get_unique_variable_name( - 'feature_column') - result.append(scope.get_unique_variable_name('ordinal_output')) - label_encoder_output = scope.get_unique_variable_name( - 'label_encoder') + index_name = scope.get_unique_variable_name("index") + feature_column_name = scope.get_unique_variable_name("feature_column") + result.append(scope.get_unique_variable_name("ordinal_output")) + label_encoder_output = scope.get_unique_variable_name("label_encoder") container.add_initializer( - index_name, onnx_proto.TensorProto.INT64, [], [index]) + index_name, onnx_proto.TensorProto.INT64, [], [index] + ) container.add_node( - 'ArrayFeatureExtractor', + "ArrayFeatureExtractor", [concatenated_input_name, index_name], - feature_column_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) + feature_column_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) - container.add_node('LabelEncoder', feature_column_name, - label_encoder_output, op_domain='ai.onnx.ml', - op_version=2, **attrs) - apply_reshape(scope, label_encoder_output, result[-1], - container, desired_shape=(-1, 1)) - apply_concat(scope, result, concat_result_name, - container, axis=1) + container.add_node( + "LabelEncoder", + feature_column_name, + label_encoder_output, + op_domain="ai.onnx.ml", + op_version=2, + **attrs + ) + apply_reshape( + scope, + label_encoder_output, + result[-1], + container, + desired_shape=(-1, 1), + ) + apply_concat(scope, result, concat_result_name, container, axis=1) cast_type = ( onnx_proto.TensorProto.FLOAT if np.issubdtype(ordinal_op.dtype, np.floating) - else onnx_proto.TensorProto.INT64) - apply_cast(scope, concat_result_name, operator.output_full_names, - container, to=cast_type) + else onnx_proto.TensorProto.INT64 + ) + apply_cast( + scope, concat_result_name, operator.output_full_names, container, to=cast_type + ) -register_converter('SklearnOrdinalEncoder', convert_sklearn_ordinal_encoder) +register_converter("SklearnOrdinalEncoder", convert_sklearn_ordinal_encoder) diff --git a/skl2onnx/operator_converters/ovr_decision_function.py b/skl2onnx/operator_converters/ovr_decision_function.py index 11e9ed0fd..a45834598 100644 --- a/skl2onnx/operator_converters/ovr_decision_function.py +++ b/skl2onnx/operator_converters/ovr_decision_function.py @@ -1,8 +1,13 @@ # SPDX-License-Identifier: Apache-2.0 from ..common._apply_operation import ( - apply_concat, apply_abs, - apply_add, apply_mul, apply_div) + apply_concat, + apply_abs, + apply_add, + apply_mul, + apply_div, +) + try: from ..common._apply_operation import apply_less except ImportError: @@ -16,9 +21,8 @@ def convert_sklearn_ovr_decision_function( - scope: Scope, operator: Operator, - container: ModelComponentContainer): - + scope: Scope, operator: Operator, container: ModelComponentContainer +): # Applies _ovr_decision_function. # See https://github.com/scikit-learn/scikit-learn/blob/ # master/sklearn/utils/multiclass.py#L407: @@ -44,88 +48,102 @@ def convert_sklearn_ovr_decision_function( proto_dtype = onnx_proto.TensorProto.FLOAT op = operator.raw_operator - cst3 = scope.get_unique_variable_name('cst3') + cst3 = scope.get_unique_variable_name("cst3") container.add_initializer(cst3, proto_dtype, [], [3]) - cst1 = scope.get_unique_variable_name('cst1') + cst1 = scope.get_unique_variable_name("cst1") container.add_initializer(cst1, proto_dtype, [], [1]) iprediction = operator.inputs[0].full_name score_name = operator.inputs[1].full_name n_classes = len(op.classes_) - sumc_name = [scope.get_unique_variable_name('svcsumc_%d' % i) - for i in range(n_classes)] - vote_name = [scope.get_unique_variable_name('svcvote_%d' % i) - for i in range(n_classes)] + sumc_name = [ + scope.get_unique_variable_name("svcsumc_%d" % i) for i in range(n_classes) + ] + vote_name = [ + scope.get_unique_variable_name("svcvote_%d" % i) for i in range(n_classes) + ] sumc_add = {n: [] for n in sumc_name} vote_add = {n: [] for n in vote_name} k = 0 for i in range(n_classes): for j in range(i + 1, n_classes): - ind = scope.get_unique_variable_name('Cind_%d' % k) - container.add_initializer( - ind, onnx_proto.TensorProto.INT64, [], [k]) + ind = scope.get_unique_variable_name("Cind_%d" % k) + container.add_initializer(ind, onnx_proto.TensorProto.INT64, [], [k]) # confidences - ext = scope.get_unique_variable_name('Csvc_%d' % k) + ext = scope.get_unique_variable_name("Csvc_%d" % k) container.add_node( - 'ArrayFeatureExtractor', [score_name, ind], - ext, op_domain='ai.onnx.ml') + "ArrayFeatureExtractor", [score_name, ind], ext, op_domain="ai.onnx.ml" + ) sumc_add[sumc_name[j]].append(ext) - neg = scope.get_unique_variable_name('Cneg_%d' % k) - container.add_node('Neg', ext, neg, op_domain='', op_version=6) + neg = scope.get_unique_variable_name("Cneg_%d" % k) + container.add_node("Neg", ext, neg, op_domain="", op_version=6) sumc_add[sumc_name[i]].append(neg) # votes - ext = scope.get_unique_variable_name('Vsvcv_%d' % k) + ext = scope.get_unique_variable_name("Vsvcv_%d" % k) container.add_node( - 'ArrayFeatureExtractor', [iprediction, ind], - ext, op_domain='ai.onnx.ml') + "ArrayFeatureExtractor", [iprediction, ind], ext, op_domain="ai.onnx.ml" + ) vote_add[vote_name[j]].append(ext) - neg = scope.get_unique_variable_name('Vnegv_%d' % k) - container.add_node('Neg', ext, neg, op_domain='', op_version=6) - neg1 = scope.get_unique_variable_name('Vnegv1_%d' % k) - apply_add(scope, [neg, cst1], neg1, container, broadcast=1, - operator_name='AddCl_%d_%d' % (i, j)) + neg = scope.get_unique_variable_name("Vnegv_%d" % k) + container.add_node("Neg", ext, neg, op_domain="", op_version=6) + neg1 = scope.get_unique_variable_name("Vnegv1_%d" % k) + apply_add( + scope, + [neg, cst1], + neg1, + container, + broadcast=1, + operator_name="AddCl_%d_%d" % (i, j), + ) vote_add[vote_name[i]].append(neg1) # next k += 1 for k, v in sumc_add.items(): - name = scope.get_unique_operator_name('Sum') - container.add_node( - 'Sum', v, k, op_domain='', name=name, op_version=8) + name = scope.get_unique_operator_name("Sum") + container.add_node("Sum", v, k, op_domain="", name=name, op_version=8) for k, v in vote_add.items(): - name = scope.get_unique_operator_name('Sum') - container.add_node( - 'Sum', v, k, op_domain='', name=name, op_version=8) + name = scope.get_unique_operator_name("Sum") + container.add_node("Sum", v, k, op_domain="", name=name, op_version=8) - conc = scope.get_unique_variable_name('Csvcconc') + conc = scope.get_unique_variable_name("Csvcconc") apply_concat(scope, sumc_name, conc, container, axis=1) - conc_vote = scope.get_unique_variable_name('Vsvcconcv') + conc_vote = scope.get_unique_variable_name("Vsvcconcv") apply_concat(scope, vote_name, conc_vote, container, axis=1) - conc_abs = scope.get_unique_variable_name('Cabs') + conc_abs = scope.get_unique_variable_name("Cabs") apply_abs(scope, conc, conc_abs, container) - conc_abs1 = scope.get_unique_variable_name('Cconc_abs1') - apply_add(scope, [conc_abs, cst1], conc_abs1, container, broadcast=1, - operator_name='AddF0') - conc_abs3 = scope.get_unique_variable_name('Cconc_abs3') + conc_abs1 = scope.get_unique_variable_name("Cconc_abs1") + apply_add( + scope, + [conc_abs, cst1], + conc_abs1, + container, + broadcast=1, + operator_name="AddF0", + ) + conc_abs3 = scope.get_unique_variable_name("Cconc_abs3") apply_mul(scope, [conc_abs1, cst3], conc_abs3, container, broadcast=1) - final = scope.get_unique_variable_name('Csvcfinal') - apply_div( - scope, [conc, conc_abs3], final, container, broadcast=0) + final = scope.get_unique_variable_name("Csvcfinal") + apply_div(scope, [conc, conc_abs3], final, container, broadcast=0) output_name = operator.outputs[0].full_name apply_add( - scope, [conc_vote, final], output_name, container, broadcast=0, - operator_name='AddF1') + scope, + [conc_vote, final], + output_name, + container, + broadcast=0, + operator_name="AddF1", + ) -register_converter('SklearnOVRDecisionFunction', - convert_sklearn_ovr_decision_function) +register_converter("SklearnOVRDecisionFunction", convert_sklearn_ovr_decision_function) diff --git a/skl2onnx/operator_converters/pipelines.py b/skl2onnx/operator_converters/pipelines.py index c7b3ba34d..9c0ded09f 100644 --- a/skl2onnx/operator_converters/pipelines.py +++ b/skl2onnx/operator_converters/pipelines.py @@ -7,43 +7,48 @@ from .._parse import _parse_sklearn -def convert_pipeline(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_pipeline( + scope: Scope, operator: Operator, container: ModelComponentContainer +): model = operator.raw_operator inputs = operator.inputs for step in model.steps: step_model = step[1] if is_classifier(step_model): - scope.add_options(id(step_model), options={'zipmap': False}) - container.add_options(id(step_model), options={'zipmap': False}) - outputs = _parse_sklearn(scope, step_model, inputs, - custom_parsers=None) + scope.add_options(id(step_model), options={"zipmap": False}) + container.add_options(id(step_model), options={"zipmap": False}) + outputs = _parse_sklearn(scope, step_model, inputs, custom_parsers=None) inputs = outputs if len(outputs) != len(operator.outputs): raise RuntimeError( "Mismatch between pipeline output %d and " - "last step outputs %d." % ( - len(outputs), len(operator.outputs))) + "last step outputs %d." % (len(outputs), len(operator.outputs)) + ) for fr, to in zip(outputs, operator.outputs): container.add_node( - 'Identity', fr.full_name, to.full_name, - name=scope.get_unique_operator_name("Id" + operator.onnx_name)) + "Identity", + fr.full_name, + to.full_name, + name=scope.get_unique_operator_name("Id" + operator.onnx_name), + ) -def convert_feature_union(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_feature_union( + scope: Scope, operator: Operator, container: ModelComponentContainer +): raise NotImplementedError( - "This converter not needed so far. It is usually handled " - "during parsing.") + "This converter not needed so far. It is usually handled " "during parsing." + ) -def convert_column_transformer(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_column_transformer( + scope: Scope, operator: Operator, container: ModelComponentContainer +): raise NotImplementedError( - "This converter not needed so far. It is usually handled " - "during parsing.") + "This converter not needed so far. It is usually handled " "during parsing." + ) -register_converter('SklearnPipeline', convert_pipeline) -register_converter('SklearnFeatureUnion', convert_feature_union) -register_converter('SklearnColumnTransformer', convert_column_transformer) +register_converter("SklearnPipeline", convert_pipeline) +register_converter("SklearnFeatureUnion", convert_feature_union) +register_converter("SklearnColumnTransformer", convert_column_transformer) diff --git a/skl2onnx/operator_converters/polynomial_features.py b/skl2onnx/operator_converters/polynomial_features.py index 640310e20..27b0c9533 100644 --- a/skl2onnx/operator_converters/polynomial_features.py +++ b/skl2onnx/operator_converters/polynomial_features.py @@ -10,87 +10,123 @@ from ..common._container import ModelComponentContainer -def convert_sklearn_polynomial_features(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_polynomial_features( + scope: Scope, operator: Operator, container: ModelComponentContainer +): op = operator.raw_operator transformed_columns = [None] * (op.n_output_features_) - n_features = (op.n_features_in_ - if hasattr(op, 'n_features_in_') - else op.n_features_) - if hasattr(op, '_min_degree'): + n_features = op.n_features_in_ if hasattr(op, "n_features_in_") else op.n_features_ + if hasattr(op, "_min_degree"): # scikit-learn >= 1.0 combinations = op._combinations( - n_features, op._min_degree, op._max_degree, op.interaction_only, - op.include_bias) + n_features, + op._min_degree, + op._max_degree, + op.interaction_only, + op.include_bias, + ) else: combinations = op._combinations( - n_features, op.degree, op.interaction_only, op.include_bias) + n_features, op.degree, op.interaction_only, op.include_bias + ) unit_name = None last_feat = None for i, comb in enumerate(combinations): if len(comb) == 0: - unit_name = scope.get_unique_variable_name('unit') + unit_name = scope.get_unique_variable_name("unit") transformed_columns[i] = unit_name else: - comb_name = scope.get_unique_variable_name('comb') - col_name = scope.get_unique_variable_name('col') - prod_name = scope.get_unique_variable_name('prod') + comb_name = scope.get_unique_variable_name("comb") + col_name = scope.get_unique_variable_name("col") + prod_name = scope.get_unique_variable_name("prod") - container.add_initializer(comb_name, onnx_proto.TensorProto.INT64, - [len(comb)], list(comb)) + container.add_initializer( + comb_name, onnx_proto.TensorProto.INT64, [len(comb)], list(comb) + ) container.add_node( - 'ArrayFeatureExtractor', - [operator.inputs[0].full_name, comb_name], col_name, - name=scope.get_unique_operator_name('ArrayFeatureExtractor'), - op_domain='ai.onnx.ml') + "ArrayFeatureExtractor", + [operator.inputs[0].full_name, comb_name], + col_name, + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + op_domain="ai.onnx.ml", + ) reduce_prod_input = col_name - if (operator.inputs[0].type._get_element_onnx_type() - == onnx_proto.TensorProto.INT64): - float_col_name = scope.get_unique_variable_name('col') + if ( + operator.inputs[0].type._get_element_onnx_type() + == onnx_proto.TensorProto.INT64 + ): + float_col_name = scope.get_unique_variable_name("col") - apply_cast(scope, col_name, float_col_name, container, - to=onnx_proto.TensorProto.FLOAT) + apply_cast( + scope, + col_name, + float_col_name, + container, + to=onnx_proto.TensorProto.FLOAT, + ) reduce_prod_input = float_col_name if container.target_opset >= 18: - axis_name = scope.get_unique_variable_name('axis') + axis_name = scope.get_unique_variable_name("axis") container.add_initializer( - axis_name, onnx_proto.TensorProto.INT64, [1], [1]) + axis_name, onnx_proto.TensorProto.INT64, [1], [1] + ) container.add_node( - 'ReduceProd', [reduce_prod_input, axis_name], prod_name, - name=scope.get_unique_operator_name('ReduceProd')) + "ReduceProd", + [reduce_prod_input, axis_name], + prod_name, + name=scope.get_unique_operator_name("ReduceProd"), + ) else: container.add_node( - 'ReduceProd', reduce_prod_input, prod_name, + "ReduceProd", + reduce_prod_input, + prod_name, axes=[1], - name=scope.get_unique_operator_name('ReduceProd')) + name=scope.get_unique_operator_name("ReduceProd"), + ) transformed_columns[i] = prod_name last_feat = prod_name if unit_name is not None: - shape_name = scope.get_unique_variable_name('shape') - container.add_node('Shape', last_feat, shape_name) - container.add_node('ConstantOfShape', shape_name, unit_name, - value=make_tensor( - 'ONE', TensorProto.FLOAT, [1], [1.]), - op_version=9) + shape_name = scope.get_unique_variable_name("shape") + container.add_node("Shape", last_feat, shape_name) + container.add_node( + "ConstantOfShape", + shape_name, + unit_name, + value=make_tensor("ONE", TensorProto.FLOAT, [1], [1.0]), + op_version=9, + ) - if (operator.inputs[0].type._get_element_onnx_type() - == onnx_proto.TensorProto.INT64): - concat_result_name = scope.get_unique_variable_name('concat_result') + if operator.inputs[0].type._get_element_onnx_type() == onnx_proto.TensorProto.INT64: + concat_result_name = scope.get_unique_variable_name("concat_result") - apply_concat(scope, [t for t in transformed_columns], - concat_result_name, container, axis=1) - apply_cast(scope, concat_result_name, - operator.outputs[0].full_name, container, - to=onnx_proto.TensorProto.INT64) + apply_concat( + scope, + [t for t in transformed_columns], + concat_result_name, + container, + axis=1, + ) + apply_cast( + scope, + concat_result_name, + operator.outputs[0].full_name, + container, + to=onnx_proto.TensorProto.INT64, + ) else: - apply_concat(scope, [t for t in transformed_columns], - operator.outputs[0].full_name, container, axis=1) + apply_concat( + scope, + [t for t in transformed_columns], + operator.outputs[0].full_name, + container, + axis=1, + ) -register_converter('SklearnPolynomialFeatures', - convert_sklearn_polynomial_features) +register_converter("SklearnPolynomialFeatures", convert_sklearn_polynomial_features) diff --git a/skl2onnx/operator_converters/power_transformer.py b/skl2onnx/operator_converters/power_transformer.py index d2c69baed..461900cbe 100644 --- a/skl2onnx/operator_converters/power_transformer.py +++ b/skl2onnx/operator_converters/power_transformer.py @@ -7,13 +7,25 @@ from ..common._container import ModelComponentContainer from ..common.data_types import guess_numpy_type from ..algebra.onnx_ops import ( - OnnxAdd, OnnxSub, OnnxPow, OnnxDiv, OnnxMul, - OnnxCast, OnnxNot, OnnxLess, OnnxLog, OnnxNeg, - OnnxImputer, OnnxIdentity, OnnxScaler) - - -def convert_powertransformer(scope: Scope, operator: Operator, - container: ModelComponentContainer): + OnnxAdd, + OnnxSub, + OnnxPow, + OnnxDiv, + OnnxMul, + OnnxCast, + OnnxNot, + OnnxLess, + OnnxLog, + OnnxNeg, + OnnxImputer, + OnnxIdentity, + OnnxScaler, +) + + +def convert_powertransformer( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """Converter for PowerTransformer""" op_in = operator.inputs[0] op_out = operator.outputs[0].full_name @@ -30,14 +42,14 @@ def convert_powertransformer(scope: Scope, operator: Operator, # logical masks for input less_than_zero = OnnxLess(op_in, zeros_, op_version=opv) - less_mask = OnnxCast(less_than_zero, - to=getattr(TensorProto, 'FLOAT'), - op_version=opv) + less_mask = OnnxCast( + less_than_zero, to=getattr(TensorProto, "FLOAT"), op_version=opv + ) greater_than_zero = OnnxNot(less_than_zero, op_version=opv) - greater_mask = OnnxCast(greater_than_zero, - to=getattr(TensorProto, 'FLOAT'), - op_version=opv) + greater_mask = OnnxCast( + greater_than_zero, to=getattr(TensorProto, "FLOAT"), op_version=opv + ) # logical masks for lambdas lambda_zero_mask = np.float32(lambdas == 0) @@ -45,7 +57,7 @@ def convert_powertransformer(scope: Scope, operator: Operator, lambda_two_mask = np.float32(lambdas == 2) lambda_nontwo_mask = np.float32(lambdas != 2) - if 'yeo-johnson' in op.method: + if "yeo-johnson" in op.method: y0 = OnnxAdd(op_in, ones_, op_version=opv) # For positive input y1 = OnnxSub(ones_, op_in, op_version=opv) # For negative input @@ -54,31 +66,37 @@ def convert_powertransformer(scope: Scope, operator: Operator, y_gr0_l_ne0 = OnnxSub(y_gr0_l_ne0, ones_, op_version=opv) y_gr0_l_ne0 = OnnxDiv(y_gr0_l_ne0, lambdas, op_version=opv) y_gr0_l_ne0 = OnnxImputer( - y_gr0_l_ne0, imputed_value_floats=[0.0], - replaced_value_float=np.inf, op_version=opv) - y_gr0_l_ne0 = OnnxMul(y_gr0_l_ne0, lambda_nonzero_mask, - op_version=opv) + y_gr0_l_ne0, + imputed_value_floats=[0.0], + replaced_value_float=np.inf, + op_version=opv, + ) + y_gr0_l_ne0 = OnnxMul(y_gr0_l_ne0, lambda_nonzero_mask, op_version=opv) # positive input, lambda == 0 y_gr0_l_eq0 = OnnxLog(y0, op_version=opv) - y_gr0_l_eq0 = OnnxMul(y_gr0_l_eq0, lambda_zero_mask, - op_version=opv) + y_gr0_l_eq0 = OnnxMul(y_gr0_l_eq0, lambda_zero_mask, op_version=opv) # positive input, an arbitrary lambda y_gr0 = OnnxAdd(y_gr0_l_ne0, y_gr0_l_eq0, op_version=opv) - y_gr0 = OnnxImputer(y_gr0, imputed_value_floats=[0.0], - replaced_value_float=np.NAN, - op_version=opv) + y_gr0 = OnnxImputer( + y_gr0, + imputed_value_floats=[0.0], + replaced_value_float=np.NAN, + op_version=opv, + ) y_gr0 = OnnxMul(y_gr0, greater_mask, op_version=opv) # negative input, lambda != 2 y_le0_l_ne2 = OnnxPow(y1, 2 - lambdas, op_version=opv) y_le0_l_ne2 = OnnxSub(ones_, y_le0_l_ne2, op_version=opv) - y_le0_l_ne2 = OnnxDiv( - y_le0_l_ne2, (2 - lambdas).astype(dtype), op_version=opv) + y_le0_l_ne2 = OnnxDiv(y_le0_l_ne2, (2 - lambdas).astype(dtype), op_version=opv) y_le0_l_ne2 = OnnxImputer( - y_le0_l_ne2, imputed_value_floats=[0.0], - replaced_value_float=np.inf, op_version=opv) + y_le0_l_ne2, + imputed_value_floats=[0.0], + replaced_value_float=np.inf, + op_version=opv, + ) y_le0_l_ne2 = OnnxMul(y_le0_l_ne2, lambda_nontwo_mask, op_version=opv) # negative input, lambda == 2 @@ -87,32 +105,38 @@ def convert_powertransformer(scope: Scope, operator: Operator, # negative input, an arbitrary lambda y_le0 = OnnxAdd(y_le0_l_ne2, y_le0_l_eq2, op_version=opv) - y_le0 = OnnxImputer(y_le0, imputed_value_floats=[0.0], - replaced_value_float=np.NAN, - op_version=opv) + y_le0 = OnnxImputer( + y_le0, + imputed_value_floats=[0.0], + replaced_value_float=np.NAN, + op_version=opv, + ) y_le0 = OnnxMul(y_le0, less_mask, op_version=opv) # Arbitrary input and lambda y = OnnxAdd(y_gr0, y_le0, op_version=opv) - elif 'box-cox' in op.method: + elif "box-cox" in op.method: # positive input, lambda != 0 y_gr0_l_ne0 = OnnxPow(op_in, lambdas, op_version=opv) y_gr0_l_ne0 = OnnxSub(y_gr0_l_ne0, ones_, op_version=opv) y_gr0_l_ne0 = OnnxDiv(y_gr0_l_ne0, lambdas, op_version=opv) - y_gr0_l_ne0 = OnnxImputer(y_gr0_l_ne0, - imputed_value_floats=[0.0], - replaced_value_float=np.inf, - op_version=opv) - y_gr0_l_ne0 = OnnxMul(y_gr0_l_ne0, lambda_nonzero_mask, - op_version=opv) + y_gr0_l_ne0 = OnnxImputer( + y_gr0_l_ne0, + imputed_value_floats=[0.0], + replaced_value_float=np.inf, + op_version=opv, + ) + y_gr0_l_ne0 = OnnxMul(y_gr0_l_ne0, lambda_nonzero_mask, op_version=opv) # positive input, lambda == 0 y_gr0_l_eq0 = OnnxLog(op_in, op_version=opv) - y_gr0_l_eq0 = OnnxImputer(y_gr0_l_eq0, - imputed_value_floats=[0.0], - replaced_value_float=np.NAN, - op_version=opv) + y_gr0_l_eq0 = OnnxImputer( + y_gr0_l_eq0, + imputed_value_floats=[0.0], + replaced_value_float=np.NAN, + op_version=opv, + ) y_gr0_l_eq0 = OnnxMul(y_gr0_l_eq0, lambda_zero_mask, op_version=opv) # positive input, arbitrary lambda @@ -123,26 +147,32 @@ def convert_powertransformer(scope: Scope, operator: Operator, # raises ValueError. # Therefore we cannot use convert_sklearn() for that model else: - raise NotImplementedError( - 'Method {} is not supported'.format(op.method)) + raise NotImplementedError("Method {} is not supported".format(op.method)) - y.set_onnx_name_prefix('pref') + y.set_onnx_name_prefix("pref") if op.standardize: - use_scaler_op = container.is_allowed({'Scaler'}) + use_scaler_op = container.is_allowed({"Scaler"}) if not use_scaler_op or dtype != np.float32: sub = OnnxSub(y, op._scaler.mean_.astype(dtype), op_version=opv) - final = OnnxDiv(sub, op._scaler.scale_.astype(dtype), - op_version=opv, output_names=[op_out]) + final = OnnxDiv( + sub, + op._scaler.scale_.astype(dtype), + op_version=opv, + output_names=[op_out], + ) else: final = OnnxScaler( - y, offset=op._scaler.mean_.astype(dtype), + y, + offset=op._scaler.mean_.astype(dtype), scale=(1.0 / op._scaler.scale_).astype(dtype), - op_version=opv, output_names=[op_out]) + op_version=opv, + output_names=[op_out], + ) else: final = OnnxIdentity(y, op_version=opv, output_names=[op_out]) final.add_to(scope, container) -register_converter('SklearnPowerTransformer', convert_powertransformer) +register_converter("SklearnPowerTransformer", convert_powertransformer) diff --git a/skl2onnx/operator_converters/quadratic_discriminant_analysis.py b/skl2onnx/operator_converters/quadratic_discriminant_analysis.py index 8615cee9c..d68bb9289 100644 --- a/skl2onnx/operator_converters/quadratic_discriminant_analysis.py +++ b/skl2onnx/operator_converters/quadratic_discriminant_analysis.py @@ -2,11 +2,22 @@ from ..common._apply_operation import ( - apply_add, apply_argmax, apply_cast, apply_concat, apply_div, apply_exp, - apply_log, apply_matmul, apply_mul, apply_pow, - apply_reducesum, apply_reshape, apply_sub, apply_transpose) -from ..common.data_types import ( - BooleanTensorType, Int64TensorType, guess_proto_type) + apply_add, + apply_argmax, + apply_cast, + apply_concat, + apply_div, + apply_exp, + apply_log, + apply_matmul, + apply_mul, + apply_pow, + apply_reducesum, + apply_reshape, + apply_sub, + apply_transpose, +) +from ..common.data_types import BooleanTensorType, Int64TensorType, guess_proto_type from ..common._registration import register_converter from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer @@ -14,8 +25,8 @@ def convert_quadratic_discriminant_analysis_classifier( - scope: Scope, operator: Operator, container: ModelComponentContainer): - + scope: Scope, operator: Operator, container: ModelComponentContainer +): input_name = operator.inputs[0].full_name model = operator.raw_operator @@ -25,132 +36,158 @@ def convert_quadratic_discriminant_analysis_classifier( if proto_dtype != onnx_proto.TensorProto.DOUBLE: proto_dtype = onnx_proto.TensorProto.FLOAT - if isinstance(operator.inputs[0].type, - (BooleanTensorType, Int64TensorType)): - cast_input_name = scope.get_unique_variable_name('cast_input') - apply_cast(scope, operator.input_full_names, cast_input_name, - container, to=proto_dtype) + if isinstance(operator.inputs[0].type, (BooleanTensorType, Int64TensorType)): + cast_input_name = scope.get_unique_variable_name("cast_input") + apply_cast( + scope, operator.input_full_names, cast_input_name, container, to=proto_dtype + ) input_name = cast_input_name norm_array_name = [] sum_array_name = [] - const_n05 = scope.get_unique_variable_name('const_n05') - const_p2 = scope.get_unique_variable_name('const_p2') + const_n05 = scope.get_unique_variable_name("const_n05") + const_p2 = scope.get_unique_variable_name("const_p2") container.add_initializer(const_n05, proto_dtype, [], [-0.5]) container.add_initializer(const_p2, proto_dtype, [], [2]) for i in range(n_classes): R = model.rotations_[i] - rotation_name = scope.get_unique_variable_name('rotations') - container.add_initializer(rotation_name, proto_dtype, - [R.shape[0], R.shape[1]], R) + rotation_name = scope.get_unique_variable_name("rotations") + container.add_initializer( + rotation_name, proto_dtype, [R.shape[0], R.shape[1]], R + ) S = model.scalings_[i] - scaling_name = scope.get_unique_variable_name('scalings') + scaling_name = scope.get_unique_variable_name("scalings") container.add_initializer( - scaling_name, proto_dtype, [S.shape[0], ], S) + scaling_name, + proto_dtype, + [ + S.shape[0], + ], + S, + ) mean = model.means_[i] - mean_name = scope.get_unique_variable_name('means') + mean_name = scope.get_unique_variable_name("means") container.add_initializer(mean_name, proto_dtype, mean.shape, mean) - Xm_name = scope.get_unique_variable_name('Xm') + Xm_name = scope.get_unique_variable_name("Xm") apply_sub(scope, [input_name, mean_name], [Xm_name], container) - s_pow_name = scope.get_unique_variable_name('s_pow_n05') + s_pow_name = scope.get_unique_variable_name("s_pow_n05") apply_pow(scope, [scaling_name, const_n05], [s_pow_name], container) - mul_name = scope.get_unique_variable_name('mul') + mul_name = scope.get_unique_variable_name("mul") apply_mul(scope, [rotation_name, s_pow_name], [mul_name], container) - x2_name = scope.get_unique_variable_name('matmul') + x2_name = scope.get_unique_variable_name("matmul") apply_matmul(scope, [Xm_name, mul_name], [x2_name], container) - pow_x2_name = scope.get_unique_variable_name('pow_x2') + pow_x2_name = scope.get_unique_variable_name("pow_x2") apply_pow(scope, [x2_name, const_p2], [pow_x2_name], container) - sum_name = scope.get_unique_variable_name('sum') - apply_reducesum(scope, [pow_x2_name], [sum_name], - container, axes=[1], keepdims=1) + sum_name = scope.get_unique_variable_name("sum") + apply_reducesum( + scope, [pow_x2_name], [sum_name], container, axes=[1], keepdims=1 + ) norm_array_name.append(sum_name) - log_name = scope.get_unique_variable_name('log') + log_name = scope.get_unique_variable_name("log") apply_log(scope, [scaling_name], [log_name], container) - sum_log_name = scope.get_unique_variable_name('sum_log') - apply_reducesum( - scope, [log_name], [sum_log_name], container, keepdims=1) + sum_log_name = scope.get_unique_variable_name("sum_log") + apply_reducesum(scope, [log_name], [sum_log_name], container, keepdims=1) sum_array_name.append(sum_log_name) - concat_norm_name = scope.get_unique_variable_name('concat_norm') + concat_norm_name = scope.get_unique_variable_name("concat_norm") apply_concat(scope, norm_array_name, [concat_norm_name], container) - reshape_norm_name = scope.get_unique_variable_name('reshape_concat_norm') - apply_reshape(scope, [concat_norm_name], [reshape_norm_name], - container, desired_shape=[n_classes, -1]) + reshape_norm_name = scope.get_unique_variable_name("reshape_concat_norm") + apply_reshape( + scope, + [concat_norm_name], + [reshape_norm_name], + container, + desired_shape=[n_classes, -1], + ) - transpose_norm_name = scope.get_unique_variable_name('transpose_norm') - apply_transpose(scope, [reshape_norm_name], [transpose_norm_name], - container, perm=(1, 0)) + transpose_norm_name = scope.get_unique_variable_name("transpose_norm") + apply_transpose( + scope, [reshape_norm_name], [transpose_norm_name], container, perm=(1, 0) + ) - concat_logsum = scope.get_unique_variable_name('concat_logsum') + concat_logsum = scope.get_unique_variable_name("concat_logsum") apply_concat(scope, sum_array_name, [concat_logsum], container) - add_norm2_u_name = scope.get_unique_variable_name('add_norm2_u') - apply_add(scope, [transpose_norm_name, concat_logsum], - [add_norm2_u_name], container) + add_norm2_u_name = scope.get_unique_variable_name("add_norm2_u") + apply_add( + scope, [transpose_norm_name, concat_logsum], [add_norm2_u_name], container + ) - norm2_u_n05_name = scope.get_unique_variable_name('norm2_u_n05') - apply_mul( - scope, [const_n05, add_norm2_u_name], [norm2_u_n05_name], container) + norm2_u_n05_name = scope.get_unique_variable_name("norm2_u_n05") + apply_mul(scope, [const_n05, add_norm2_u_name], [norm2_u_n05_name], container) - priors = scope.get_unique_variable_name('priors') + priors = scope.get_unique_variable_name("priors") container.add_initializer( - priors, proto_dtype, [n_classes, ], model.priors_) - log_p = scope.get_unique_variable_name('log_p') + priors, + proto_dtype, + [ + n_classes, + ], + model.priors_, + ) + log_p = scope.get_unique_variable_name("log_p") apply_log(scope, [priors], [log_p], container) - decision_fun = scope.get_unique_variable_name('decision_fun') + decision_fun = scope.get_unique_variable_name("decision_fun") apply_add(scope, [norm2_u_n05_name, log_p], [decision_fun], container) - argmax_out = scope.get_unique_variable_name('argmax_out') + argmax_out = scope.get_unique_variable_name("argmax_out") apply_argmax(scope, [decision_fun], [argmax_out], container, axis=1) - classes = scope.get_unique_variable_name('classes') + classes = scope.get_unique_variable_name("classes") container.add_initializer( - classes, onnx_proto.TensorProto.INT64, [n_classes], model.classes_) + classes, onnx_proto.TensorProto.INT64, [n_classes], model.classes_ + ) container.add_node( - 'ArrayFeatureExtractor', + "ArrayFeatureExtractor", [classes, argmax_out], [operator.outputs[0].full_name], - op_domain='ai.onnx.ml' + op_domain="ai.onnx.ml", ) - df_max = scope.get_unique_variable_name('df_max') + df_max = scope.get_unique_variable_name("df_max") if container.target_opset >= 18: - axis_name = scope.get_unique_variable_name('axis') - container.add_initializer( - axis_name, onnx_proto.TensorProto.INT64, [1], [1]) - container.add_node( - 'ReduceMax', [decision_fun, axis_name], [df_max]) + axis_name = scope.get_unique_variable_name("axis") + container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [1]) + container.add_node("ReduceMax", [decision_fun, axis_name], [df_max]) else: - container.add_node( - 'ReduceMax', [decision_fun], [df_max], axes=[1]) - df_sub_max = scope.get_unique_variable_name('df_sub_max') + container.add_node("ReduceMax", [decision_fun], [df_max], axes=[1]) + df_sub_max = scope.get_unique_variable_name("df_sub_max") apply_sub(scope, [decision_fun, df_max], [df_sub_max], container) - likelihood = scope.get_unique_variable_name('likelihood') + likelihood = scope.get_unique_variable_name("likelihood") apply_exp(scope, [df_sub_max], [likelihood], container) - likelihood_sum = scope.get_unique_variable_name('likelihood_sum') - apply_reducesum(scope, [likelihood], [likelihood_sum], container, - axes=[1], keepdims=1) - apply_div(scope, [likelihood, likelihood_sum], - [operator.outputs[1].full_name], container, ) - - -register_converter('SklearnQuadraticDiscriminantAnalysis', - convert_quadratic_discriminant_analysis_classifier, - options={'zipmap': [True, False, 'columns'], - 'nocl': [True, False], - 'output_class_labels': [False, True]}) + likelihood_sum = scope.get_unique_variable_name("likelihood_sum") + apply_reducesum( + scope, [likelihood], [likelihood_sum], container, axes=[1], keepdims=1 + ) + apply_div( + scope, + [likelihood, likelihood_sum], + [operator.outputs[1].full_name], + container, + ) + + +register_converter( + "SklearnQuadraticDiscriminantAnalysis", + convert_quadratic_discriminant_analysis_classifier, + options={ + "zipmap": [True, False, "columns"], + "nocl": [True, False], + "output_class_labels": [False, True], + }, +) diff --git a/skl2onnx/operator_converters/random_forest.py b/skl2onnx/operator_converters/random_forest.py index 4c873eb0f..d04c22ac4 100644 --- a/skl2onnx/operator_converters/random_forest.py +++ b/skl2onnx/operator_converters/random_forest.py @@ -10,19 +10,22 @@ apply_reshape, apply_transpose, ) -from ..common.data_types import ( - BooleanTensorType, Int64TensorType, guess_numpy_type) +from ..common.data_types import BooleanTensorType, Int64TensorType, guess_numpy_type from ..common._registration import register_converter from ..common.tree_ensemble import ( add_tree_to_attribute_pairs, add_tree_to_attribute_pairs_hist_gradient_boosting, get_default_tree_classifier_attribute_pairs, - get_default_tree_regressor_attribute_pairs + get_default_tree_regressor_attribute_pairs, ) from ..common.utils_classifier import get_label_classes from ..proto import onnx_proto from .decision_tree import ( - predict, _build_labels_path, _build_labels_leaf, _append_decision_output) + predict, + _build_labels_path, + _build_labels_leaf, + _append_decision_output, +) def _num_estimators(op): @@ -33,487 +36,655 @@ def _num_estimators(op): # ... # classifier.fit(X_tmp, y_tmp) # classifier.n_estimators += 30 - if hasattr(op, 'estimators_'): + if hasattr(op, "estimators_"): return len(op.estimators_) - elif hasattr(op, '_predictors'): + elif hasattr(op, "_predictors"): # HistGradientBoosting* return len(op._predictors) raise NotImplementedError( - "Model should have attribute 'estimators_' or '_predictors'.") + "Model should have attribute 'estimators_' or '_predictors'." + ) def _calculate_labels(scope, container, model, proba): predictions = [] - transposed_result_name = scope.get_unique_variable_name( - 'transposed_result') - apply_transpose(scope, proba, transposed_result_name, - container, perm=(1, 2, 0)) + transposed_result_name = scope.get_unique_variable_name("transposed_result") + apply_transpose(scope, proba, transposed_result_name, container, perm=(1, 2, 0)) for k in range(model.n_outputs_): - preds_name = scope.get_unique_variable_name('preds') - reshaped_preds_name = scope.get_unique_variable_name( - 'reshaped_preds') - k_name = scope.get_unique_variable_name('k_column') - out_k_name = scope.get_unique_variable_name('out_k_column') - argmax_output_name = scope.get_unique_variable_name( - 'argmax_output') - classes_name = scope.get_unique_variable_name('classes') - reshaped_result_name = scope.get_unique_variable_name( - 'reshaped_result') - - container.add_initializer( - k_name, onnx_proto.TensorProto.INT64, - [], [k]) + preds_name = scope.get_unique_variable_name("preds") + reshaped_preds_name = scope.get_unique_variable_name("reshaped_preds") + k_name = scope.get_unique_variable_name("k_column") + out_k_name = scope.get_unique_variable_name("out_k_column") + argmax_output_name = scope.get_unique_variable_name("argmax_output") + classes_name = scope.get_unique_variable_name("classes") + reshaped_result_name = scope.get_unique_variable_name("reshaped_result") + + container.add_initializer(k_name, onnx_proto.TensorProto.INT64, [], [k]) container.add_initializer( - classes_name, onnx_proto.TensorProto.INT64, - model.classes_[k].shape, model.classes_[k]) + classes_name, + onnx_proto.TensorProto.INT64, + model.classes_[k].shape, + model.classes_[k], + ) container.add_node( - 'ArrayFeatureExtractor', [transposed_result_name, k_name], - out_k_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) + "ArrayFeatureExtractor", + [transposed_result_name, k_name], + out_k_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) container.add_node( - 'ArgMax', out_k_name, argmax_output_name, - name=scope.get_unique_operator_name('ArgMax'), axis=1) - apply_reshape(scope, argmax_output_name, reshaped_result_name, - container, desired_shape=(1, -1)) + "ArgMax", + out_k_name, + argmax_output_name, + name=scope.get_unique_operator_name("ArgMax"), + axis=1, + ) + apply_reshape( + scope, + argmax_output_name, + reshaped_result_name, + container, + desired_shape=(1, -1), + ) container.add_node( - 'ArrayFeatureExtractor', [classes_name, reshaped_result_name], - preds_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) - apply_reshape(scope, preds_name, reshaped_preds_name, - container, desired_shape=(-1, 1)) + "ArrayFeatureExtractor", + [classes_name, reshaped_result_name], + preds_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) + apply_reshape( + scope, preds_name, reshaped_preds_name, container, desired_shape=(-1, 1) + ) predictions.append(reshaped_preds_name) return predictions def convert_sklearn_random_forest_classifier( - scope, operator, container, op_type='TreeEnsembleClassifier', - op_domain='ai.onnx.ml', op_version=1): + scope, + operator, + container, + op_type="TreeEnsembleClassifier", + op_domain="ai.onnx.ml", + op_version=1, +): dtype = guess_numpy_type(operator.inputs[0].type) if dtype != np.float64: dtype = np.float32 attr_dtype = dtype if op_version >= 3 else np.float32 op = operator.raw_operator - if hasattr(op, 'n_outputs_'): + if hasattr(op, "n_outputs_"): n_outputs = int(op.n_outputs_) options = container.get_options( - op, dict(raw_scores=False, decision_path=False, - decision_leaf=False)) - elif hasattr(op, 'n_trees_per_iteration_'): + op, dict(raw_scores=False, decision_path=False, decision_leaf=False) + ) + elif hasattr(op, "n_trees_per_iteration_"): # HistGradientBoostingClassifier n_outputs = op.n_trees_per_iteration_ options = container.get_options(op, dict(raw_scores=False)) else: raise NotImplementedError( - "Model should have attribute 'n_outputs_' or " - "'n_trees_per_iteration_'.") + "Model should have attribute 'n_outputs_' or " "'n_trees_per_iteration_'." + ) - use_raw_scores = options['raw_scores'] + use_raw_scores = options["raw_scores"] - if n_outputs == 1 or hasattr(op, 'loss_') or hasattr(op, '_loss'): + if n_outputs == 1 or hasattr(op, "loss_") or hasattr(op, "_loss"): classes = get_label_classes(scope, op) if all(isinstance(i, np.ndarray) for i in classes): classes = np.concatenate(classes) attr_pairs = get_default_tree_classifier_attribute_pairs() - attr_pairs['name'] = scope.get_unique_operator_name(op_type) + attr_pairs["name"] = scope.get_unique_operator_name(op_type) if all(isinstance(i, (numbers.Real, bool, np.bool_)) for i in classes): class_labels = [int(i) for i in classes] - attr_pairs['classlabels_int64s'] = class_labels + attr_pairs["classlabels_int64s"] = class_labels elif all(isinstance(i, str) for i in classes): class_labels = [str(i) for i in classes] - attr_pairs['classlabels_strings'] = class_labels + attr_pairs["classlabels_strings"] = class_labels else: - raise ValueError( - 'Only string and integer class labels are allowed.') + raise ValueError("Only string and integer class labels are allowed.") # random forest calculate the final score by averaging over all trees' # outcomes, so all trees' weights are identical. - if hasattr(op, 'estimators_'): + if hasattr(op, "estimators_"): estimator_count = len(op.estimators_) - tree_weight = 1. / estimator_count - elif hasattr(op, '_predictors'): + tree_weight = 1.0 / estimator_count + elif hasattr(op, "_predictors"): # HistGradientBoostingRegressor estimator_count = len(op._predictors) - tree_weight = 1. + tree_weight = 1.0 else: raise NotImplementedError( - "Model should have attribute 'estimators_' or '_predictors'.") + "Model should have attribute 'estimators_' or '_predictors'." + ) for tree_id in range(estimator_count): - - if hasattr(op, 'estimators_'): + if hasattr(op, "estimators_"): tree = op.estimators_[tree_id].tree_ add_tree_to_attribute_pairs( - attr_pairs, True, tree, tree_id, - tree_weight, 0, True, True, - dtype=dtype) + attr_pairs, + True, + tree, + tree_id, + tree_weight, + 0, + True, + True, + dtype=dtype, + ) else: # HistGradientBoostClassifier if len(op._predictors[tree_id]) == 1: tree = op._predictors[tree_id][0] add_tree_to_attribute_pairs_hist_gradient_boosting( - attr_pairs, True, tree, tree_id, tree_weight, 0, - False, False, dtype=dtype) + attr_pairs, + True, + tree, + tree_id, + tree_weight, + 0, + False, + False, + dtype=dtype, + ) else: for cl, tree in enumerate(op._predictors[tree_id]): add_tree_to_attribute_pairs_hist_gradient_boosting( - attr_pairs, True, tree, tree_id * n_outputs + cl, - tree_weight, cl, False, False, - dtype=dtype) - - if hasattr(op, '_baseline_prediction'): + attr_pairs, + True, + tree, + tree_id * n_outputs + cl, + tree_weight, + cl, + False, + False, + dtype=dtype, + ) + + if hasattr(op, "_baseline_prediction"): if isinstance(op._baseline_prediction, np.ndarray): - attr_pairs['base_values'] = list( - op._baseline_prediction.ravel()) + attr_pairs["base_values"] = list(op._baseline_prediction.ravel()) else: - attr_pairs['base_values'] = [op._baseline_prediction] + attr_pairs["base_values"] = [op._baseline_prediction] - if hasattr(op, 'loss_'): + if hasattr(op, "loss_"): loss = op.loss_ - elif hasattr(op, '_loss'): + elif hasattr(op, "_loss"): # scikit-learn >= 0.24 loss = op._loss else: loss = None if loss is not None: if use_raw_scores: - attr_pairs['post_transform'] = "NONE" + attr_pairs["post_transform"] = "NONE" + elif loss.__class__.__name__ in ("BinaryCrossEntropy", "HalfBinomialLoss"): + attr_pairs["post_transform"] = "LOGISTIC" elif loss.__class__.__name__ in ( - "BinaryCrossEntropy", "HalfBinomialLoss"): - attr_pairs['post_transform'] = "LOGISTIC" - elif loss.__class__.__name__ in ( - "CategoricalCrossEntropy", "HalfMultinomialLoss"): - attr_pairs['post_transform'] = "SOFTMAX" + "CategoricalCrossEntropy", + "HalfMultinomialLoss", + ): + attr_pairs["post_transform"] = "SOFTMAX" else: raise NotImplementedError( "There is no corresponding post_transform for " - "'{}'.".format(loss.__class__.__name__)) + "'{}'.".format(loss.__class__.__name__) + ) elif use_raw_scores: raise RuntimeError( "The converter cannot implement decision_function for " - "'{}' and loss '{}'.".format(type(op), loss)) + "'{}' and loss '{}'.".format(type(op), loss) + ) input_name = operator.input_full_names if isinstance(operator.inputs[0].type, BooleanTensorType): - cast_input_name = scope.get_unique_variable_name('cast_input') - - apply_cast(scope, input_name, cast_input_name, - container, to=onnx_proto.TensorProto.FLOAT) + cast_input_name = scope.get_unique_variable_name("cast_input") + + apply_cast( + scope, + input_name, + cast_input_name, + container, + to=onnx_proto.TensorProto.FLOAT, + ) input_name = cast_input_name if dtype is not None: for k in attr_pairs: - if k in ('nodes_values', 'class_weights', - 'target_weights', 'nodes_hitrates', - 'base_values'): - attr_pairs[k] = np.array( - attr_pairs[k], dtype=attr_dtype).ravel() + if k in ( + "nodes_values", + "class_weights", + "target_weights", + "nodes_hitrates", + "base_values", + ): + attr_pairs[k] = np.array(attr_pairs[k], dtype=attr_dtype).ravel() container.add_node( - op_type, input_name, + op_type, + input_name, [operator.outputs[0].full_name, operator.outputs[1].full_name], - op_domain=op_domain, op_version=op_version, **attr_pairs) - - if (not options.get('decision_path', False) and - not options.get('decision_leaf', False)): + op_domain=op_domain, + op_version=op_version, + **attr_pairs + ) + + if not options.get("decision_path", False) and not options.get( + "decision_leaf", False + ): return # decision_path tree_paths = [] tree_leaves = [] for i, tree in enumerate(op.estimators_): - attrs = get_default_tree_classifier_attribute_pairs() - attrs['name'] = scope.get_unique_operator_name( - "%s_%d" % (op_type, i)) - attrs['n_targets'] = int(op.n_outputs_) + attrs["name"] = scope.get_unique_operator_name("%s_%d" % (op_type, i)) + attrs["n_targets"] = int(op.n_outputs_) add_tree_to_attribute_pairs( - attrs, True, tree.tree_, 0, 1., 0, False, - True, dtype=dtype) - - attrs['n_targets'] = 1 - attrs['post_transform'] = 'NONE' - attrs['target_ids'] = [0 for _ in attrs['class_ids']] - attrs['target_weights'] = [ - float(_) for _ in attrs['class_nodeids']] - attrs['target_nodeids'] = attrs['class_nodeids'] - attrs['target_treeids'] = attrs['class_treeids'] - rem = [k for k in attrs if k.startswith('class')] + attrs, True, tree.tree_, 0, 1.0, 0, False, True, dtype=dtype + ) + + attrs["n_targets"] = 1 + attrs["post_transform"] = "NONE" + attrs["target_ids"] = [0 for _ in attrs["class_ids"]] + attrs["target_weights"] = [float(_) for _ in attrs["class_nodeids"]] + attrs["target_nodeids"] = attrs["class_nodeids"] + attrs["target_treeids"] = attrs["class_treeids"] + rem = [k for k in attrs if k.startswith("class")] for k in rem: del attrs[k] if dtype is not None: for k in attrs: - if k in ('nodes_values', 'class_weights', - 'target_weights', 'nodes_hitrates', - 'base_values'): + if k in ( + "nodes_values", + "class_weights", + "target_weights", + "nodes_hitrates", + "base_values", + ): attrs[k] = np.array(attrs[k], dtype=attr_dtype).ravel() - if options['decision_path']: + if options["decision_path"]: # decision_path tree_paths.append( _append_decision_output( - input_name, attrs, _build_labels_path, None, - scope, operator, container, - op_type=op_type, op_domain=op_domain, - op_version=op_version, regression=True, - overwrite_tree=tree.tree_)) - if options['decision_leaf']: + input_name, + attrs, + _build_labels_path, + None, + scope, + operator, + container, + op_type=op_type, + op_domain=op_domain, + op_version=op_version, + regression=True, + overwrite_tree=tree.tree_, + ) + ) + if options["decision_leaf"]: # decision_path tree_leaves.append( _append_decision_output( - input_name, attrs, _build_labels_leaf, None, - scope, operator, container, - op_type=op_type, op_domain=op_domain, - op_version=op_version, regression=True, - cast_encode=True)) + input_name, + attrs, + _build_labels_leaf, + None, + scope, + operator, + container, + op_type=op_type, + op_domain=op_domain, + op_version=op_version, + regression=True, + cast_encode=True, + ) + ) # merges everything n_out = 2 - if options['decision_path']: + if options["decision_path"]: apply_concat( - scope, tree_paths, operator.outputs[n_out].full_name, - container, axis=1, - operator_name=scope.get_unique_operator_name('concat')) + scope, + tree_paths, + operator.outputs[n_out].full_name, + container, + axis=1, + operator_name=scope.get_unique_operator_name("concat"), + ) n_out += 1 - if options['decision_leaf']: + if options["decision_leaf"]: # decision_path apply_concat( - scope, tree_leaves, operator.outputs[n_out].full_name, - container, axis=1, - operator_name=scope.get_unique_operator_name('concat')) + scope, + tree_leaves, + operator.outputs[n_out].full_name, + container, + axis=1, + operator_name=scope.get_unique_operator_name("concat"), + ) n_out += 1 else: if use_raw_scores: raise RuntimeError( "The converter cannot implement decision_function for " - "'{}'.".format(type(op))) - concatenated_proba_name = scope.get_unique_variable_name( - 'concatenated_proba') + "'{}'.".format(type(op)) + ) + concatenated_proba_name = scope.get_unique_variable_name("concatenated_proba") proba = [] for est in op.estimators_: reshaped_est_proba_name = scope.get_unique_variable_name( - 'reshaped_est_proba') + "reshaped_est_proba" + ) est_proba = predict( - est, scope, operator, container, op_type, op_domain, - op_version, is_ensemble=True) + est, + scope, + operator, + container, + op_type, + op_domain, + op_version, + is_ensemble=True, + ) apply_reshape( - scope, est_proba, reshaped_est_proba_name, container, - desired_shape=( - 1, n_outputs, -1, max([len(x) for x in op.classes_]))) + scope, + est_proba, + reshaped_est_proba_name, + container, + desired_shape=(1, n_outputs, -1, max([len(x) for x in op.classes_])), + ) proba.append(reshaped_est_proba_name) - apply_concat(scope, proba, concatenated_proba_name, - container, axis=0) + apply_concat(scope, proba, concatenated_proba_name, container, axis=0) if container.target_opset >= 18: - axis_name = scope.get_unique_variable_name('axis') - container.add_initializer( - axis_name, onnx_proto.TensorProto.INT64, [1], [0]) + axis_name = scope.get_unique_variable_name("axis") + container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [0]) container.add_node( - 'ReduceMean', [concatenated_proba_name, axis_name], + "ReduceMean", + [concatenated_proba_name, axis_name], operator.outputs[1].full_name, - name=scope.get_unique_operator_name('ReduceMean'), - keepdims=0) + name=scope.get_unique_operator_name("ReduceMean"), + keepdims=0, + ) else: container.add_node( - 'ReduceMean', concatenated_proba_name, + "ReduceMean", + concatenated_proba_name, operator.outputs[1].full_name, - name=scope.get_unique_operator_name('ReduceMean'), - axes=[0], keepdims=0) + name=scope.get_unique_operator_name("ReduceMean"), + axes=[0], + keepdims=0, + ) predictions = _calculate_labels( - scope, container, op, operator.outputs[1].full_name) - apply_concat(scope, predictions, operator.outputs[0].full_name, - container, axis=1) + scope, container, op, operator.outputs[1].full_name + ) + apply_concat( + scope, predictions, operator.outputs[0].full_name, container, axis=1 + ) - if (options.get('decision_path', False) or - options.get('decision_leaf', False)): + if options.get("decision_path", False) or options.get("decision_leaf", False): raise RuntimeError( - "Decision output for multi-outputs is not implemented yet.") + "Decision output for multi-outputs is not implemented yet." + ) def convert_sklearn_random_forest_regressor_converter( - scope, operator, container, op_type='TreeEnsembleRegressor', - op_domain='ai.onnx.ml', op_version=1): + scope, + operator, + container, + op_type="TreeEnsembleRegressor", + op_domain="ai.onnx.ml", + op_version=1, +): dtype = guess_numpy_type(operator.inputs[0].type) if dtype != np.float64: dtype = np.float32 op = operator.raw_operator attrs = get_default_tree_regressor_attribute_pairs() - attrs['name'] = scope.get_unique_operator_name(op_type) + attrs["name"] = scope.get_unique_operator_name(op_type) - if hasattr(op, 'n_outputs_'): - attrs['n_targets'] = int(op.n_outputs_) - elif hasattr(op, 'n_trees_per_iteration_'): + if hasattr(op, "n_outputs_"): + attrs["n_targets"] = int(op.n_outputs_) + elif hasattr(op, "n_trees_per_iteration_"): # HistGradientBoostingRegressor - attrs['n_targets'] = op.n_trees_per_iteration_ + attrs["n_targets"] = op.n_trees_per_iteration_ else: raise NotImplementedError( - "Model should have attribute 'n_outputs_' or " - "'n_trees_per_iteration_'.") + "Model should have attribute 'n_outputs_' or " "'n_trees_per_iteration_'." + ) - if hasattr(op, 'estimators_'): + if hasattr(op, "estimators_"): estimator_count = len(op.estimators_) - tree_weight = 1. / estimator_count - elif hasattr(op, '_predictors'): + tree_weight = 1.0 / estimator_count + elif hasattr(op, "_predictors"): # HistGradientBoostingRegressor estimator_count = len(op._predictors) - tree_weight = 1. + tree_weight = 1.0 else: raise NotImplementedError( - "Model should have attribute 'estimators_' or '_predictors'.") + "Model should have attribute 'estimators_' or '_predictors'." + ) # random forest calculate the final score by averaging over all trees' # outcomes, so all trees' weights are identical. for tree_id in range(estimator_count): - if hasattr(op, 'estimators_'): + if hasattr(op, "estimators_"): tree = op.estimators_[tree_id].tree_ - add_tree_to_attribute_pairs(attrs, False, tree, tree_id, - tree_weight, 0, False, True, - dtype=dtype) + add_tree_to_attribute_pairs( + attrs, False, tree, tree_id, tree_weight, 0, False, True, dtype=dtype + ) else: # HistGradientBoostingRegressor if len(op._predictors[tree_id]) != 1: raise NotImplementedError( "The converter does not work when the number of trees " - "is not 1 but {}.".format(len(op._predictors[tree_id]))) + "is not 1 but {}.".format(len(op._predictors[tree_id])) + ) tree = op._predictors[tree_id][0] add_tree_to_attribute_pairs_hist_gradient_boosting( - attrs, False, tree, tree_id, tree_weight, 0, False, - False, dtype=dtype) + attrs, False, tree, tree_id, tree_weight, 0, False, False, dtype=dtype + ) - if hasattr(op, '_baseline_prediction'): + if hasattr(op, "_baseline_prediction"): if isinstance(op._baseline_prediction, np.ndarray): - attrs['base_values'] = list(op._baseline_prediction) + attrs["base_values"] = list(op._baseline_prediction) else: - attrs['base_values'] = [op._baseline_prediction] + attrs["base_values"] = [op._baseline_prediction] input_name = operator.input_full_names if type(operator.inputs[0].type) in (BooleanTensorType, Int64TensorType): - cast_input_name = scope.get_unique_variable_name('cast_input') - - apply_cast(scope, operator.input_full_names, cast_input_name, - container, to=onnx_proto.TensorProto.FLOAT) + cast_input_name = scope.get_unique_variable_name("cast_input") + + apply_cast( + scope, + operator.input_full_names, + cast_input_name, + container, + to=onnx_proto.TensorProto.FLOAT, + ) input_name = cast_input_name if dtype is not None: for k in attrs: - if k in ('nodes_values', 'class_weights', - 'target_weights', 'nodes_hitrates', - 'base_values'): + if k in ( + "nodes_values", + "class_weights", + "target_weights", + "nodes_hitrates", + "base_values", + ): attrs[k] = np.array(attrs[k], dtype=dtype).ravel() container.add_node( - op_type, input_name, - operator.outputs[0].full_name, op_domain=op_domain, - op_version=op_version, **attrs) - - if hasattr(op, 'n_trees_per_iteration_'): + op_type, + input_name, + operator.outputs[0].full_name, + op_domain=op_domain, + op_version=op_version, + **attrs + ) + + if hasattr(op, "n_trees_per_iteration_"): # HistGradientBoostingRegressor does not implement decision_path. return if isinstance(op, RandomTreesEmbedding): options = scope.get_options(op) else: - options = scope.get_options( - op, dict(decision_path=False, decision_leaf=False)) + options = scope.get_options(op, dict(decision_path=False, decision_leaf=False)) - if (not options.get('decision_path', False) and - not options.get('decision_leaf', False)): + if not options.get("decision_path", False) and not options.get( + "decision_leaf", False + ): return # decision_path tree_paths = [] tree_leaves = [] for i, tree in enumerate(op.estimators_): - attrs = get_default_tree_regressor_attribute_pairs() - attrs['name'] = scope.get_unique_operator_name("%s_%d" % (op_type, i)) - attrs['n_targets'] = int(op.n_outputs_) - add_tree_to_attribute_pairs(attrs, False, tree.tree_, 0, 1., 0, False, - True, dtype=dtype) + attrs["name"] = scope.get_unique_operator_name("%s_%d" % (op_type, i)) + attrs["n_targets"] = int(op.n_outputs_) + add_tree_to_attribute_pairs( + attrs, False, tree.tree_, 0, 1.0, 0, False, True, dtype=dtype + ) - attrs['n_targets'] = 1 - attrs['post_transform'] = 'NONE' - attrs['target_ids'] = [0 for _ in attrs['target_ids']] - attrs['target_weights'] = [float(_) for _ in attrs['target_nodeids']] + attrs["n_targets"] = 1 + attrs["post_transform"] = "NONE" + attrs["target_ids"] = [0 for _ in attrs["target_ids"]] + attrs["target_weights"] = [float(_) for _ in attrs["target_nodeids"]] if dtype is not None: for k in attrs: - if k in ('nodes_values', 'class_weights', - 'target_weights', 'nodes_hitrates', - 'base_values'): + if k in ( + "nodes_values", + "class_weights", + "target_weights", + "nodes_hitrates", + "base_values", + ): attrs[k] = np.array(attrs[k], dtype=dtype).ravel() - if options.get('decision_path', False): + if options.get("decision_path", False): # decision_path tree_paths.append( _append_decision_output( - input_name, attrs, _build_labels_path, None, - scope, operator, container, - op_type=op_type, op_domain=op_domain, - op_version=op_version, regression=True, - overwrite_tree=tree.tree_)) - if options.get('decision_leaf', False): + input_name, + attrs, + _build_labels_path, + None, + scope, + operator, + container, + op_type=op_type, + op_domain=op_domain, + op_version=op_version, + regression=True, + overwrite_tree=tree.tree_, + ) + ) + if options.get("decision_leaf", False): # decision_path tree_leaves.append( _append_decision_output( - input_name, attrs, _build_labels_leaf, None, - scope, operator, container, - op_type=op_type, op_domain=op_domain, - op_version=op_version, regression=True, cast_encode=True)) + input_name, + attrs, + _build_labels_leaf, + None, + scope, + operator, + container, + op_type=op_type, + op_domain=op_domain, + op_version=op_version, + regression=True, + cast_encode=True, + ) + ) # merges everything n_out = 1 - if options.get('decision_path', False): + if options.get("decision_path", False): apply_concat( - scope, tree_paths, operator.outputs[n_out].full_name, container, - axis=1, operator_name=scope.get_unique_operator_name('concat')) + scope, + tree_paths, + operator.outputs[n_out].full_name, + container, + axis=1, + operator_name=scope.get_unique_operator_name("concat"), + ) n_out += 1 - if options.get('decision_leaf', False): + if options.get("decision_leaf", False): # decision_path apply_concat( - scope, tree_leaves, operator.outputs[n_out].full_name, container, - axis=1, operator_name=scope.get_unique_operator_name('concat')) + scope, + tree_leaves, + operator.outputs[n_out].full_name, + container, + axis=1, + operator_name=scope.get_unique_operator_name("concat"), + ) n_out += 1 -register_converter('SklearnRandomForestClassifier', - convert_sklearn_random_forest_classifier, - options={'zipmap': [True, False, 'columns'], - 'raw_scores': [True, False], - 'nocl': [True, False], - 'output_class_labels': [False, True], - 'decision_path': [True, False], - 'decision_leaf': [True, False]}) -register_converter('SklearnRandomForestRegressor', - convert_sklearn_random_forest_regressor_converter, - options={'decision_path': [True, False], - 'decision_leaf': [True, False]}) -register_converter('SklearnExtraTreesClassifier', - convert_sklearn_random_forest_classifier, - options={'zipmap': [True, False, 'columns'], - 'raw_scores': [True, False], - 'nocl': [True, False], - 'output_class_labels': [False, True], - 'decision_path': [True, False], - 'decision_leaf': [True, False]}) -register_converter('SklearnExtraTreesRegressor', - convert_sklearn_random_forest_regressor_converter, - options={'decision_path': [True, False], - 'decision_leaf': [True, False]}) -register_converter('SklearnHistGradientBoostingClassifier', - convert_sklearn_random_forest_classifier, - options={'zipmap': [True, False, 'columns'], - 'raw_scores': [True, False], - 'output_class_labels': [False, True], - 'nocl': [True, False]}) -register_converter('SklearnHistGradientBoostingRegressor', - convert_sklearn_random_forest_regressor_converter, - options={'zipmap': [True, False, 'columns'], - 'raw_scores': [True, False], - 'output_class_labels': [False, True], - 'nocl': [True, False]}) +register_converter( + "SklearnRandomForestClassifier", + convert_sklearn_random_forest_classifier, + options={ + "zipmap": [True, False, "columns"], + "raw_scores": [True, False], + "nocl": [True, False], + "output_class_labels": [False, True], + "decision_path": [True, False], + "decision_leaf": [True, False], + }, +) +register_converter( + "SklearnRandomForestRegressor", + convert_sklearn_random_forest_regressor_converter, + options={"decision_path": [True, False], "decision_leaf": [True, False]}, +) +register_converter( + "SklearnExtraTreesClassifier", + convert_sklearn_random_forest_classifier, + options={ + "zipmap": [True, False, "columns"], + "raw_scores": [True, False], + "nocl": [True, False], + "output_class_labels": [False, True], + "decision_path": [True, False], + "decision_leaf": [True, False], + }, +) +register_converter( + "SklearnExtraTreesRegressor", + convert_sklearn_random_forest_regressor_converter, + options={"decision_path": [True, False], "decision_leaf": [True, False]}, +) +register_converter( + "SklearnHistGradientBoostingClassifier", + convert_sklearn_random_forest_classifier, + options={ + "zipmap": [True, False, "columns"], + "raw_scores": [True, False], + "output_class_labels": [False, True], + "nocl": [True, False], + }, +) +register_converter( + "SklearnHistGradientBoostingRegressor", + convert_sklearn_random_forest_regressor_converter, + options={ + "zipmap": [True, False, "columns"], + "raw_scores": [True, False], + "output_class_labels": [False, True], + "nocl": [True, False], + }, +) diff --git a/skl2onnx/operator_converters/random_projection.py b/skl2onnx/operator_converters/random_projection.py index e9f4a2933..1f1db77f9 100644 --- a/skl2onnx/operator_converters/random_projection.py +++ b/skl2onnx/operator_converters/random_projection.py @@ -8,8 +8,9 @@ from ..algebra.onnx_ops import OnnxMatMul -def convert_random_projection(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_random_projection( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """Converter for PowerTransformer""" op_in = operator.inputs[0] op_out = operator.outputs[0].full_name @@ -19,10 +20,10 @@ def convert_random_projection(scope: Scope, operator: Operator, if dtype != np.float64: dtype = np.float32 - y = OnnxMatMul(op_in, op.components_.T.astype(dtype), - op_version=opv, output_names=[op_out]) + y = OnnxMatMul( + op_in, op.components_.T.astype(dtype), op_version=opv, output_names=[op_out] + ) y.add_to(scope, container) -register_converter( - 'SklearnGaussianRandomProjection', convert_random_projection) +register_converter("SklearnGaussianRandomProjection", convert_random_projection) diff --git a/skl2onnx/operator_converters/random_trees_embedding.py b/skl2onnx/operator_converters/random_trees_embedding.py index e082e014e..6e3305d63 100644 --- a/skl2onnx/operator_converters/random_trees_embedding.py +++ b/skl2onnx/operator_converters/random_trees_embedding.py @@ -9,8 +9,8 @@ def convert_sklearn_random_tree_embedding( - scope: Scope, operator: Operator, container: ModelComponentContainer): - + scope: Scope, operator: Operator, container: ModelComponentContainer +): X = operator.inputs[0] out = operator.outputs op = operator.raw_operator @@ -18,21 +18,21 @@ def convert_sklearn_random_tree_embedding( if op.sparse_output: raise RuntimeError( - "The converter cannot convert the model with sparse outputs.") + "The converter cannot convert the model with sparse outputs." + ) outputs = [] for est in op.estimators_: - leave = OnnxSubEstimator(est, X, op_version=opv, - options={'decision_leaf': True}) - outputs.append(OnnxReshape(leave[1], - np.array([-1, 1], dtype=np.int64), - op_version=opv)) + leave = OnnxSubEstimator( + est, X, op_version=opv, options={"decision_leaf": True} + ) + outputs.append( + OnnxReshape(leave[1], np.array([-1, 1], dtype=np.int64), op_version=opv) + ) merged = OnnxConcat(*outputs, axis=1, op_version=opv) ohe = OnnxSubEstimator(op.one_hot_encoder_, merged, op_version=opv) - y = OnnxIdentity(ohe, op_version=opv, - output_names=out) + y = OnnxIdentity(ohe, op_version=opv, output_names=out) y.add_to(scope, container) -register_converter('SklearnRandomTreesEmbedding', - convert_sklearn_random_tree_embedding) +register_converter("SklearnRandomTreesEmbedding", convert_sklearn_random_tree_embedding) diff --git a/skl2onnx/operator_converters/ransac_regressor.py b/skl2onnx/operator_converters/ransac_regressor.py index 096ac7f02..48ab2ab5a 100644 --- a/skl2onnx/operator_converters/ransac_regressor.py +++ b/skl2onnx/operator_converters/ransac_regressor.py @@ -8,8 +8,9 @@ from ..common._container import ModelComponentContainer -def convert_sklearn_ransac_regressor(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_ransac_regressor( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converter for RANSACRegressor. """ @@ -18,11 +19,12 @@ def convert_sklearn_ransac_regressor(scope: Scope, operator: Operator, this_operator = scope.declare_local_operator(op_type, ransac_op.estimator_) this_operator.inputs = operator.inputs label_name = scope.declare_local_variable( - 'label', operator.inputs[0].type.__class__()) + "label", operator.inputs[0].type.__class__() + ) this_operator.outputs.append(label_name) - apply_identity(scope, label_name.full_name, - operator.outputs[0].full_name, container) + apply_identity( + scope, label_name.full_name, operator.outputs[0].full_name, container + ) -register_converter('SklearnRANSACRegressor', - convert_sklearn_ransac_regressor) +register_converter("SklearnRANSACRegressor", convert_sklearn_ransac_regressor) diff --git a/skl2onnx/operator_converters/replace_op.py b/skl2onnx/operator_converters/replace_op.py index 7b654e0ff..951b04c06 100644 --- a/skl2onnx/operator_converters/replace_op.py +++ b/skl2onnx/operator_converters/replace_op.py @@ -7,29 +7,34 @@ from ..common.data_types import guess_proto_type -def convert_sklearn_replace_transformer(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_replace_transformer( + scope: Scope, operator: Operator, container: ModelComponentContainer +): op = operator.raw_operator input_name = operator.inputs[0].full_name output_name = operator.outputs[0].full_name proto_dtype = guess_proto_type(operator.inputs[0].type) - cst_nan_name = scope.get_unique_variable_name('nan_name') + cst_nan_name = scope.get_unique_variable_name("nan_name") container.add_initializer(cst_nan_name, proto_dtype, [1], [op.to_value]) - cst_zero_name = scope.get_unique_variable_name('zero_name') - container.add_initializer( - cst_zero_name, proto_dtype, [1], [op.from_value]) - - mask_name = scope.get_unique_variable_name('mask_name') - container.add_node('Equal', [input_name, cst_zero_name], - mask_name, - name=scope.get_unique_operator_name('Equal')) - - container.add_node('Where', [mask_name, cst_nan_name, input_name], - output_name, - name=scope.get_unique_operator_name('Where')) - - -register_converter( - 'SklearnReplaceTransformer', convert_sklearn_replace_transformer) + cst_zero_name = scope.get_unique_variable_name("zero_name") + container.add_initializer(cst_zero_name, proto_dtype, [1], [op.from_value]) + + mask_name = scope.get_unique_variable_name("mask_name") + container.add_node( + "Equal", + [input_name, cst_zero_name], + mask_name, + name=scope.get_unique_operator_name("Equal"), + ) + + container.add_node( + "Where", + [mask_name, cst_nan_name, input_name], + output_name, + name=scope.get_unique_operator_name("Where"), + ) + + +register_converter("SklearnReplaceTransformer", convert_sklearn_replace_transformer) diff --git a/skl2onnx/operator_converters/scaler_op.py b/skl2onnx/operator_converters/scaler_op.py index f219f35d9..14d1b7e4b 100644 --- a/skl2onnx/operator_converters/scaler_op.py +++ b/skl2onnx/operator_converters/scaler_op.py @@ -4,8 +4,7 @@ import numpy as np from sklearn.preprocessing import MaxAbsScaler from sklearn.preprocessing import RobustScaler, StandardScaler -from ..algebra.onnx_ops import ( - OnnxSub, OnnxDiv, OnnxCast, OnnxMul, OnnxClip, OnnxAdd) +from ..algebra.onnx_ops import OnnxSub, OnnxDiv, OnnxCast, OnnxMul, OnnxClip, OnnxAdd from ..common._registration import register_converter from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer @@ -14,8 +13,9 @@ from .common import concatenate_variables -def convert_sklearn_scaler(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_scaler( + scope: Scope, operator: Operator, container: ModelComponentContainer +): # If there are multiple input variables, we need to combine them as a # whole tensor. Integer(s) would be converted to float(s). # Options div use true division instead of Scaler operator @@ -28,8 +28,8 @@ def convert_sklearn_scaler(scope: Scope, operator: Operator, C = operator.outputs[0].get_second_dimension() op = operator.raw_operator - op_type = 'Scaler' - attrs = {'name': scope.get_unique_operator_name(op_type)} + op_type = "Scaler" + attrs = {"name": scope.get_unique_operator_name(op_type)} if isinstance(op, StandardScaler): model_C = None @@ -41,22 +41,20 @@ def convert_sklearn_scaler(scope: Scope, operator: Operator, model_C = op.var_.shape[0] if model_C is None: # Identity - container.add_node( - 'Identity', feature_name, - operator.outputs[0].full_name) + container.add_node("Identity", feature_name, operator.outputs[0].full_name) return if C is not None and C != model_C: raise RuntimeError( "Unable Mismatch between expected shape %r and model (., %r)" - " in operator %r." % ( - operator.outputs[0].type.shape, model_C, operator)) + " in operator %r." % (operator.outputs[0].type.shape, model_C, operator) + ) C = model_C - attrs['offset'] = ( - op.mean_ if op.with_mean else - np.array([0.0] * C, dtype=np.float32)) - attrs['scale'] = ( - 1.0 / op.scale_ if op.with_std else - np.array([1.0] * C, dtype=np.float32)) + attrs["offset"] = ( + op.mean_ if op.with_mean else np.array([0.0] * C, dtype=np.float32) + ) + attrs["scale"] = ( + 1.0 / op.scale_ if op.with_std else np.array([1.0] * C, dtype=np.float32) + ) inv_scale = op.scale_ if op.with_std else None elif isinstance(op, RobustScaler): model_C = None @@ -66,22 +64,22 @@ def convert_sklearn_scaler(scope: Scope, operator: Operator, model_C = op.scale_.shape[0] if model_C is None: # Identity - container.add_node( - 'Identity', feature_name, - operator.outputs[0].full_name) + container.add_node("Identity", feature_name, operator.outputs[0].full_name) return if C is not None and C != model_C: raise RuntimeError( "Unable Mismatch between expected shape %r and model (., %r)" - " in operator %r." % ( - operator.outputs[0].type.shape, model_C, operator)) + " in operator %r." % (operator.outputs[0].type.shape, model_C, operator) + ) C = model_C - attrs['offset'] = ( - op.center_ if op.with_centering else - np.array([0.0] * C, dtype=np.float32)) - attrs['scale'] = ( - 1.0 / op.scale_ if op.with_scaling else - np.array([1.0] * C, dtype=np.float32)) + attrs["offset"] = ( + op.center_ if op.with_centering else np.array([0.0] * C, dtype=np.float32) + ) + attrs["scale"] = ( + 1.0 / op.scale_ + if op.with_scaling + else np.array([1.0] * C, dtype=np.float32) + ) inv_scale = op.scale_ if op.with_scaling else None elif isinstance(op, MaxAbsScaler): model_C = None @@ -91,25 +89,25 @@ def convert_sklearn_scaler(scope: Scope, operator: Operator, model_C = op.scale_.shape[0] if model_C is None: # Identity - container.add_node( - 'Identity', feature_name, - operator.outputs[0].full_name) + container.add_node("Identity", feature_name, operator.outputs[0].full_name) return if C is not None and C != model_C: raise RuntimeError( "Unable Mismatch between expected shape %r and model (., %r)" - " in operator %r." % ( - operator.outputs[0].type.shape, model_C, operator)) + " in operator %r." % (operator.outputs[0].type.shape, model_C, operator) + ) C = model_C - attrs['scale'] = 1.0 / op.scale_ - attrs['offset'] = np.array([0.] * C, dtype=np.float32) + attrs["scale"] = 1.0 / op.scale_ + attrs["offset"] = np.array([0.0] * C, dtype=np.float32) inv_scale = op.scale_ else: - raise ValueError('Only scikit-learn StandardScaler and RobustScaler ' - 'are supported but got %s. You may raise ' - 'an issue at ' - 'https://github.com/onnx/sklearn-onnx/issues.' - '' % type(op)) + raise ValueError( + "Only scikit-learn StandardScaler and RobustScaler " + "are supported but got %s. You may raise " + "an issue at " + "https://github.com/onnx/sklearn-onnx/issues." + "" % type(op) + ) proto_dtype = guess_proto_type(operator.inputs[0].type) if proto_dtype != onnx_proto.TensorProto.DOUBLE: @@ -123,65 +121,77 @@ def convert_sklearn_scaler(scope: Scope, operator: Operator, if isinstance(v, np.ndarray) and v.dtype != dtype: attrs[k] = v.astype(dtype) - use_scaler_op = container.is_allowed({'Scaler'}) + use_scaler_op = container.is_allowed({"Scaler"}) if not use_scaler_op or dtype == np.float64: opv = container.target_opset if inv_scale is None: sub = OnnxSub( - feature_name, attrs['offset'].astype(dtype), + feature_name, + attrs["offset"].astype(dtype), op_version=opv, - output_names=[operator.outputs[0].full_name]) + output_names=[operator.outputs[0].full_name], + ) sub.add_to(scope, container) else: - sub = OnnxSub( - feature_name, attrs['offset'].astype(dtype), - op_version=opv) - div = OnnxDiv(sub, inv_scale.astype(dtype), - op_version=opv, - output_names=[operator.outputs[0].full_name]) + sub = OnnxSub(feature_name, attrs["offset"].astype(dtype), op_version=opv) + div = OnnxDiv( + sub, + inv_scale.astype(dtype), + op_version=opv, + output_names=[operator.outputs[0].full_name], + ) div.add_to(scope, container) return if inv_scale is not None: - options = container.get_options(op, dict(div='std')) - div = options['div'] - if div == 'div': + options = container.get_options(op, dict(div="std")) + div = options["div"] + if div == "div": opv = container.target_opset - sub = OnnxSub( - feature_name, attrs['offset'].astype(dtype), - op_version=opv) - div = OnnxDiv(sub, inv_scale.astype(dtype), - op_version=opv, - output_names=[operator.outputs[0].full_name]) + sub = OnnxSub(feature_name, attrs["offset"].astype(dtype), op_version=opv) + div = OnnxDiv( + sub, + inv_scale.astype(dtype), + op_version=opv, + output_names=[operator.outputs[0].full_name], + ) div.add_to(scope, container) return - if div == 'div_cast': + if div == "div_cast": opv = container.target_opset - cast = OnnxCast(feature_name, to=onnx_proto.TensorProto.DOUBLE, - op_version=opv) - sub = OnnxSub(cast, attrs['offset'].astype(np.float64), - op_version=opv) + cast = OnnxCast( + feature_name, to=onnx_proto.TensorProto.DOUBLE, op_version=opv + ) + sub = OnnxSub(cast, attrs["offset"].astype(np.float64), op_version=opv) div = OnnxDiv(sub, inv_scale.astype(np.float64), op_version=opv) - cast = OnnxCast(div, to=proto_dtype, op_version=opv, - output_names=[operator.outputs[0].full_name]) + cast = OnnxCast( + div, + to=proto_dtype, + op_version=opv, + output_names=[operator.outputs[0].full_name], + ) cast.add_to(scope, container) return - if attrs['offset'].size != attrs['scale'].size: + if attrs["offset"].size != attrs["scale"].size: # Scaler does not accept different size for offset and scale. - size = max(attrs['offset'].size, attrs['scale'].size) - ones = np.ones(size, dtype=attrs['offset'].dtype) - attrs['offset'] = attrs['offset'] * ones - attrs['scale'] = attrs['scale'] * ones + size = max(attrs["offset"].size, attrs["scale"].size) + ones = np.ones(size, dtype=attrs["offset"].dtype) + attrs["offset"] = attrs["offset"] * ones + attrs["scale"] = attrs["scale"] * ones container.add_node( - op_type, feature_name, operator.outputs[0].full_name, - op_domain='ai.onnx.ml', **attrs) + op_type, + feature_name, + operator.outputs[0].full_name, + op_domain="ai.onnx.ml", + **attrs + ) def convert_sklearn_min_max_scaler( - scope: Scope, operator: Operator, - container: ModelComponentContainer): + scope: Scope, operator: Operator, container: ModelComponentContainer +): # If there are multiple input variables, we need to combine them as a # whole tensor. Integer(s) would be converted to float(s). # Options div use true division instead of Scaler operator @@ -208,30 +218,41 @@ def convert_sklearn_min_max_scaler( # if self.clip: # np.clip(X, self.feature_range[0], self.feature_range[1], out=X) casted = OnnxCast(feature_name, to=proto_dtype, op_version=opv) - scaled = OnnxMul(casted, op.scale_.astype(dtype), - op_version=opv) + scaled = OnnxMul(casted, op.scale_.astype(dtype), op_version=opv) - if getattr(op, 'clip', False): + if getattr(op, "clip", False): # parameter clip was introduced in scikit-learn 0.24 - offset = OnnxAdd(scaled, op.min_.astype(dtype), - op_version=opv) + offset = OnnxAdd(scaled, op.min_.astype(dtype), op_version=opv) - clipped = OnnxClip(offset, np.array(op.feature_range[0], dtype=dtype), - np.array(op.feature_range[1], dtype=dtype), - op_version=opv, - output_names=[operator.outputs[0].full_name]) + clipped = OnnxClip( + offset, + np.array(op.feature_range[0], dtype=dtype), + np.array(op.feature_range[1], dtype=dtype), + op_version=opv, + output_names=[operator.outputs[0].full_name], + ) clipped.add_to(scope, container) else: - offset = OnnxAdd(scaled, op.min_.astype(dtype), - op_version=opv, - output_names=[operator.outputs[0].full_name]) + offset = OnnxAdd( + scaled, + op.min_.astype(dtype), + op_version=opv, + output_names=[operator.outputs[0].full_name], + ) offset.add_to(scope, container) -register_converter('SklearnRobustScaler', convert_sklearn_scaler, - options={'div': ['std', 'div', 'div_cast']}) -register_converter('SklearnScaler', convert_sklearn_scaler, - options={'div': ['std', 'div', 'div_cast']}) -register_converter('SklearnMinMaxScaler', convert_sklearn_min_max_scaler) -register_converter('SklearnMaxAbsScaler', convert_sklearn_scaler, - options={'div': ['std', 'div', 'div_cast']}) +register_converter( + "SklearnRobustScaler", + convert_sklearn_scaler, + options={"div": ["std", "div", "div_cast"]}, +) +register_converter( + "SklearnScaler", convert_sklearn_scaler, options={"div": ["std", "div", "div_cast"]} +) +register_converter("SklearnMinMaxScaler", convert_sklearn_min_max_scaler) +register_converter( + "SklearnMaxAbsScaler", + convert_sklearn_scaler, + options={"div": ["std", "div", "div_cast"]}, +) diff --git a/skl2onnx/operator_converters/sequence.py b/skl2onnx/operator_converters/sequence.py index 77b937016..534bcfcee 100644 --- a/skl2onnx/operator_converters/sequence.py +++ b/skl2onnx/operator_converters/sequence.py @@ -6,26 +6,30 @@ from ..common._container import ModelComponentContainer -def convert_sklearn_sequence_at(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_sequence_at( + scope: Scope, operator: Operator, container: ModelComponentContainer +): i_index = operator.index index_name = scope.get_unique_variable_name("seq_at%d" % i_index) - container.add_initializer( - index_name, onnx_proto.TensorProto.INT64, [], [i_index]) + container.add_initializer(index_name, onnx_proto.TensorProto.INT64, [], [i_index]) container.add_node( - 'SequenceAt', [operator.inputs[0].full_name, index_name], + "SequenceAt", + [operator.inputs[0].full_name, index_name], operator.outputs[0].full_name, - name=scope.get_unique_operator_name('SequenceAt%d' % i_index)) + name=scope.get_unique_operator_name("SequenceAt%d" % i_index), + ) -def convert_sklearn_sequence_construct(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_sequence_construct( + scope: Scope, operator: Operator, container: ModelComponentContainer +): container.add_node( - 'SequenceConstruct', [i.full_name for i in operator.inputs], + "SequenceConstruct", + [i.full_name for i in operator.inputs], operator.outputs[0].full_name, - name=scope.get_unique_operator_name('SequenceConstruct')) + name=scope.get_unique_operator_name("SequenceConstruct"), + ) -register_converter('SklearnSequenceAt', convert_sklearn_sequence_at) -register_converter( - 'SklearnSequenceConstruct', convert_sklearn_sequence_construct) +register_converter("SklearnSequenceAt", convert_sklearn_sequence_at) +register_converter("SklearnSequenceConstruct", convert_sklearn_sequence_construct) diff --git a/skl2onnx/operator_converters/sgd_classifier.py b/skl2onnx/operator_converters/sgd_classifier.py index adeb023b5..485bc03a3 100644 --- a/skl2onnx/operator_converters/sgd_classifier.py +++ b/skl2onnx/operator_converters/sgd_classifier.py @@ -3,11 +3,24 @@ import numpy as np from ..common._apply_operation import ( - apply_add, apply_cast, apply_clip, apply_concat, apply_div, apply_exp, - apply_identity, apply_mul, apply_reciprocal, apply_reshape, apply_sub) + apply_add, + apply_cast, + apply_clip, + apply_concat, + apply_div, + apply_exp, + apply_identity, + apply_mul, + apply_reciprocal, + apply_reshape, + apply_sub, +) from ..common.data_types import ( - BooleanTensorType, Int64TensorType, guess_numpy_type, - guess_proto_type) + BooleanTensorType, + Int64TensorType, + guess_numpy_type, + guess_proto_type, +) from ..common._registration import register_converter from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer @@ -19,106 +32,150 @@ def _decision_function(scope, operator, container, model, proto_type): """Predict for linear model. score = X * coefficient + intercept """ - coef_name = scope.get_unique_variable_name('coef') - intercept_name = scope.get_unique_variable_name('intercept') - matmul_result_name = scope.get_unique_variable_name( - 'matmul_result') - score_name = scope.get_unique_variable_name('score') + coef_name = scope.get_unique_variable_name("coef") + intercept_name = scope.get_unique_variable_name("intercept") + matmul_result_name = scope.get_unique_variable_name("matmul_result") + score_name = scope.get_unique_variable_name("score") coef = model.coef_.T - container.add_initializer(coef_name, proto_type, - coef.shape, coef.ravel()) - container.add_initializer(intercept_name, proto_type, - model.intercept_.shape, model.intercept_) + container.add_initializer(coef_name, proto_type, coef.shape, coef.ravel()) + container.add_initializer( + intercept_name, proto_type, model.intercept_.shape, model.intercept_ + ) input_name = operator.inputs[0].full_name if type(operator.inputs[0].type) in (BooleanTensorType, Int64TensorType): - cast_input_name = scope.get_unique_variable_name('cast_input') + cast_input_name = scope.get_unique_variable_name("cast_input") - apply_cast(scope, operator.input_full_names, cast_input_name, - container, to=proto_type) + apply_cast( + scope, operator.input_full_names, cast_input_name, container, to=proto_type + ) input_name = cast_input_name container.add_node( - 'MatMul', [input_name, coef_name], + "MatMul", + [input_name, coef_name], matmul_result_name, - name=scope.get_unique_operator_name('MatMul')) - apply_add(scope, [matmul_result_name, intercept_name], - score_name, container, broadcast=0) + name=scope.get_unique_operator_name("MatMul"), + ) + apply_add( + scope, [matmul_result_name, intercept_name], score_name, container, broadcast=0 + ) return score_name -def _handle_zeros(scope, container, scores, proba, reduced_proba, num_classes, - proto_type): +def _handle_zeros( + scope, container, scores, proba, reduced_proba, num_classes, proto_type +): """Handle cases where reduced_proba values are zeros to avoid NaNs in class probability scores because of divide by 0 when we calculate proba / reduced_proba in _normalise_proba(). This is done by replacing reduced_proba values of 0s with num_classes and corresponding proba values with 1. """ - num_classes_name = scope.get_unique_variable_name('num_classes') - bool_reduced_proba_name = scope.get_unique_variable_name( - 'bool_reduced_proba') + num_classes_name = scope.get_unique_variable_name("num_classes") + bool_reduced_proba_name = scope.get_unique_variable_name("bool_reduced_proba") bool_not_reduced_proba_name = scope.get_unique_variable_name( - 'bool_not_reduced_proba') - not_reduced_proba_name = scope.get_unique_variable_name( - 'not_reduced_proba') - proba_updated_name = scope.get_unique_variable_name('proba_updated') - mask_name = scope.get_unique_variable_name('mask') - reduced_proba_updated_name = scope.get_unique_variable_name( - 'reduced_proba_updated') - - container.add_initializer(num_classes_name, proto_type, - [], [num_classes]) - - apply_cast(scope, reduced_proba, bool_reduced_proba_name, container, - to=onnx_proto.TensorProto.BOOL) - container.add_node('Not', bool_reduced_proba_name, - bool_not_reduced_proba_name, - name=scope.get_unique_operator_name('Not')) - apply_cast(scope, bool_not_reduced_proba_name, not_reduced_proba_name, - container, to=proto_type) - apply_add(scope, [proba, not_reduced_proba_name], - proba_updated_name, container, broadcast=1) - apply_mul(scope, [not_reduced_proba_name, num_classes_name], - mask_name, container, broadcast=1) - apply_add(scope, [reduced_proba, mask_name], - reduced_proba_updated_name, container, broadcast=0) + "bool_not_reduced_proba" + ) + not_reduced_proba_name = scope.get_unique_variable_name("not_reduced_proba") + proba_updated_name = scope.get_unique_variable_name("proba_updated") + mask_name = scope.get_unique_variable_name("mask") + reduced_proba_updated_name = scope.get_unique_variable_name("reduced_proba_updated") + + container.add_initializer(num_classes_name, proto_type, [], [num_classes]) + + apply_cast( + scope, + reduced_proba, + bool_reduced_proba_name, + container, + to=onnx_proto.TensorProto.BOOL, + ) + container.add_node( + "Not", + bool_reduced_proba_name, + bool_not_reduced_proba_name, + name=scope.get_unique_operator_name("Not"), + ) + apply_cast( + scope, + bool_not_reduced_proba_name, + not_reduced_proba_name, + container, + to=proto_type, + ) + apply_add( + scope, + [proba, not_reduced_proba_name], + proba_updated_name, + container, + broadcast=1, + ) + apply_mul( + scope, + [not_reduced_proba_name, num_classes_name], + mask_name, + container, + broadcast=1, + ) + apply_add( + scope, + [reduced_proba, mask_name], + reduced_proba_updated_name, + container, + broadcast=0, + ) return proba_updated_name, reduced_proba_updated_name -def _normalise_proba(scope, operator, container, scores, proba, num_classes, - unity_name, proto_type): - reduced_proba_name = scope.get_unique_variable_name('reduced_proba') - sub_result_name = scope.get_unique_variable_name('sub_result') +def _normalise_proba( + scope, operator, container, scores, proba, num_classes, unity_name, proto_type +): + reduced_proba_name = scope.get_unique_variable_name("reduced_proba") + sub_result_name = scope.get_unique_variable_name("sub_result") if num_classes == 2: - apply_sub(scope, [unity_name, proba], - sub_result_name, container, broadcast=1) - apply_concat(scope, [sub_result_name, proba], - operator.outputs[1].full_name, container, axis=1) + apply_sub(scope, [unity_name, proba], sub_result_name, container, broadcast=1) + apply_concat( + scope, + [sub_result_name, proba], + operator.outputs[1].full_name, + container, + axis=1, + ) else: if container.target_opset < 13: container.add_node( - 'ReduceSum', proba, reduced_proba_name, axes=[1], - name=scope.get_unique_operator_name('ReduceSum')) + "ReduceSum", + proba, + reduced_proba_name, + axes=[1], + name=scope.get_unique_operator_name("ReduceSum"), + ) else: - axis_name = scope.get_unique_variable_name('axis') - container.add_initializer( - axis_name, onnx_proto.TensorProto.INT64, [1], [1]) + axis_name = scope.get_unique_variable_name("axis") + container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [1]) container.add_node( - 'ReduceSum', [proba, axis_name], reduced_proba_name, - name=scope.get_unique_operator_name('ReduceSum')) + "ReduceSum", + [proba, axis_name], + reduced_proba_name, + name=scope.get_unique_operator_name("ReduceSum"), + ) proba_updated, reduced_proba_updated = _handle_zeros( - scope, container, scores, proba, reduced_proba_name, num_classes, - proto_type) - apply_div(scope, [proba_updated, reduced_proba_updated], - operator.outputs[1].full_name, container, broadcast=1) + scope, container, scores, proba, reduced_proba_name, num_classes, proto_type + ) + apply_div( + scope, + [proba_updated, reduced_proba_updated], + operator.outputs[1].full_name, + container, + broadcast=1, + ) return operator.outputs[1].full_name -def _predict_proba_log(scope, operator, container, scores, num_classes, - proto_type): +def _predict_proba_log(scope, operator, container, scores, num_classes, proto_type): """Probability estimation for SGDClassifier with loss=log (or log_loss) and Logistic Regression. Positive class probabilities are computed as @@ -126,47 +183,68 @@ def _predict_proba_log(scope, operator, container, scores, num_classes, multiclass is handled by normalising that over all classes. """ if num_classes >= 3 or container.target_opset < 13: - negated_scores_name = scope.get_unique_variable_name('negated_scores') - negate_name = scope.get_unique_variable_name('negate') - exp_result_name = scope.get_unique_variable_name('exp_result') - unity_name = scope.get_unique_variable_name('unity') - add_result_name = scope.get_unique_variable_name('add_result') - proba_name = scope.get_unique_variable_name('proba') + negated_scores_name = scope.get_unique_variable_name("negated_scores") + negate_name = scope.get_unique_variable_name("negate") + exp_result_name = scope.get_unique_variable_name("exp_result") + unity_name = scope.get_unique_variable_name("unity") + add_result_name = scope.get_unique_variable_name("add_result") + proba_name = scope.get_unique_variable_name("proba") container.add_initializer(negate_name, proto_type, [], [-1]) container.add_initializer(unity_name, proto_type, [], [1]) - apply_mul(scope, [scores, negate_name], - negated_scores_name, container, broadcast=1) + apply_mul( + scope, [scores, negate_name], negated_scores_name, container, broadcast=1 + ) apply_exp(scope, negated_scores_name, exp_result_name, container) - apply_add(scope, [exp_result_name, unity_name], - add_result_name, container, broadcast=1) + apply_add( + scope, + [exp_result_name, unity_name], + add_result_name, + container, + broadcast=1, + ) apply_reciprocal(scope, add_result_name, proba_name, container) - return _normalise_proba(scope, operator, container, scores, proba_name, - num_classes, unity_name, proto_type) + return _normalise_proba( + scope, + operator, + container, + scores, + proba_name, + num_classes, + unity_name, + proto_type, + ) # Sigmoid cannot be used for num_classes > 2 because # onnxruntime has a different implementation than numpy. # It introduces discrepancies when x < 1e16. # Below that threshold, Sigmoid must be replaced by Exp # because Sigmoid is not an increasing function. - sigmo = scope.get_unique_variable_name('sigmoid') - container.add_node('Sigmoid', [scores], [sigmo], - name=scope.get_unique_operator_name('Sigmoid')) + sigmo = scope.get_unique_variable_name("sigmoid") + container.add_node( + "Sigmoid", [scores], [sigmo], name=scope.get_unique_operator_name("Sigmoid") + ) - unity_name = scope.get_unique_variable_name('unity') + unity_name = scope.get_unique_variable_name("unity") container.add_initializer(unity_name, proto_type, [1], [1]) - sigmo_0 = scope.get_unique_variable_name('sigmo_0') - container.add_node('Sub', [unity_name, sigmo], [sigmo_0], - name=scope.get_unique_operator_name('Sub')) - apply_concat(scope, [sigmo_0, sigmo], [operator.outputs[1].full_name], - container, axis=1) + sigmo_0 = scope.get_unique_variable_name("sigmo_0") + container.add_node( + "Sub", + [unity_name, sigmo], + [sigmo_0], + name=scope.get_unique_operator_name("Sub"), + ) + apply_concat( + scope, [sigmo_0, sigmo], [operator.outputs[1].full_name], container, axis=1 + ) return operator.outputs[1].full_name -def _predict_proba_modified_huber(scope, operator, container, - scores, num_classes, proto_type): +def _predict_proba_modified_huber( + scope, operator, container, scores, num_classes, proto_type +): """Probability estimation for SGDClassifier with loss=modified_huber. Multiclass probability estimates are derived from binary @@ -177,30 +255,48 @@ def _predict_proba_modified_huber(scope, operator, container, dtype = guess_numpy_type(operator.inputs[0].type) if dtype != np.float64: dtype = np.float32 - unity_name = scope.get_unique_variable_name('unity') - constant_name = scope.get_unique_variable_name('constant') - add_result_name = scope.get_unique_variable_name('add_result') - proba_name = scope.get_unique_variable_name('proba') - clipped_scores_name = scope.get_unique_variable_name('clipped_scores') - - container.add_initializer(unity_name, proto_type, - [], [1]) - container.add_initializer(constant_name, proto_type, - [], [2]) - - apply_clip(scope, scores, clipped_scores_name, container, - max=np.array(1, dtype=dtype), - min=np.array(-1, dtype=dtype)) - apply_add(scope, [clipped_scores_name, unity_name], - add_result_name, container, broadcast=1) - apply_div(scope, [add_result_name, constant_name], - proba_name, container, broadcast=1) - return _normalise_proba(scope, operator, container, scores, proba_name, - num_classes, unity_name, proto_type) - - -def convert_sklearn_sgd_classifier(scope: Scope, operator: Operator, - container: ModelComponentContainer): + unity_name = scope.get_unique_variable_name("unity") + constant_name = scope.get_unique_variable_name("constant") + add_result_name = scope.get_unique_variable_name("add_result") + proba_name = scope.get_unique_variable_name("proba") + clipped_scores_name = scope.get_unique_variable_name("clipped_scores") + + container.add_initializer(unity_name, proto_type, [], [1]) + container.add_initializer(constant_name, proto_type, [], [2]) + + apply_clip( + scope, + scores, + clipped_scores_name, + container, + max=np.array(1, dtype=dtype), + min=np.array(-1, dtype=dtype), + ) + apply_add( + scope, + [clipped_scores_name, unity_name], + add_result_name, + container, + broadcast=1, + ) + apply_div( + scope, [add_result_name, constant_name], proba_name, container, broadcast=1 + ) + return _normalise_proba( + scope, + operator, + container, + scores, + proba_name, + num_classes, + unity_name, + proto_type, + ) + + +def convert_sklearn_sgd_classifier( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """Converter for SGDClassifier.""" sgd_op = operator.raw_operator classes = get_label_classes(scope, sgd_op) @@ -209,78 +305,107 @@ def convert_sklearn_sgd_classifier(scope: Scope, operator: Operator, if proto_type != onnx_proto.TensorProto.DOUBLE: proto_type = onnx_proto.TensorProto.FLOAT - if (np.issubdtype(classes.dtype, np.floating) or - classes.dtype == np.bool_): + if np.issubdtype(classes.dtype, np.floating) or classes.dtype == np.bool_: class_type = onnx_proto.TensorProto.INT32 classes = classes.astype(np.int32) elif np.issubdtype(classes.dtype, np.signedinteger): class_type = onnx_proto.TensorProto.INT32 else: - classes = np.array([s.encode('utf-8') for s in classes]) + classes = np.array([s.encode("utf-8") for s in classes]) - classes_name = scope.get_unique_variable_name('classes') - predicted_label_name = scope.get_unique_variable_name( - 'predicted_label') - final_label_name = scope.get_unique_variable_name('final_label') + classes_name = scope.get_unique_variable_name("classes") + predicted_label_name = scope.get_unique_variable_name("predicted_label") + final_label_name = scope.get_unique_variable_name("final_label") - container.add_initializer(classes_name, class_type, - classes.shape, classes) + container.add_initializer(classes_name, class_type, classes.shape, classes) scores = _decision_function(scope, operator, container, sgd_op, proto_type) options = container.get_options(sgd_op, dict(raw_scores=False)) - use_raw_scores = options['raw_scores'] - if sgd_op.loss in ('log', 'log_loss') and not use_raw_scores: - proba = _predict_proba_log(scope, operator, container, scores, - len(classes), proto_type) - elif sgd_op.loss == 'modified_huber' and not use_raw_scores: + use_raw_scores = options["raw_scores"] + if sgd_op.loss in ("log", "log_loss") and not use_raw_scores: + proba = _predict_proba_log( + scope, operator, container, scores, len(classes), proto_type + ) + elif sgd_op.loss == "modified_huber" and not use_raw_scores: proba = _predict_proba_modified_huber( - scope, operator, container, scores, len(classes), - proto_type) + scope, operator, container, scores, len(classes), proto_type + ) else: if len(classes) == 2: - negate_name = scope.get_unique_variable_name('negate') - negated_scores_name = scope.get_unique_variable_name( - 'negated_scores') - - container.add_initializer( - negate_name, proto_type, [], [-1]) - - apply_mul(scope, [scores, negate_name], - negated_scores_name, container, broadcast=1) - apply_concat(scope, [negated_scores_name, scores], - operator.outputs[1].full_name, container, axis=1) + negate_name = scope.get_unique_variable_name("negate") + negated_scores_name = scope.get_unique_variable_name("negated_scores") + + container.add_initializer(negate_name, proto_type, [], [-1]) + + apply_mul( + scope, + [scores, negate_name], + negated_scores_name, + container, + broadcast=1, + ) + apply_concat( + scope, + [negated_scores_name, scores], + operator.outputs[1].full_name, + container, + axis=1, + ) else: - apply_identity(scope, scores, - operator.outputs[1].full_name, container) + apply_identity(scope, scores, operator.outputs[1].full_name, container) proba = operator.outputs[1].full_name - container.add_node('ArgMax', proba, - predicted_label_name, - name=scope.get_unique_operator_name('ArgMax'), - axis=1, - keepdims=1) container.add_node( - 'ArrayFeatureExtractor', [classes_name, predicted_label_name], - final_label_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) + "ArgMax", + proba, + predicted_label_name, + name=scope.get_unique_operator_name("ArgMax"), + axis=1, + keepdims=1, + ) + container.add_node( + "ArrayFeatureExtractor", + [classes_name, predicted_label_name], + final_label_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) if class_type == onnx_proto.TensorProto.INT32: reshaped_final_label_name = scope.get_unique_variable_name( - 'reshaped_final_label') - - apply_reshape(scope, final_label_name, reshaped_final_label_name, - container, desired_shape=(-1,)) - apply_cast(scope, reshaped_final_label_name, - operator.outputs[0].full_name, container, - to=onnx_proto.TensorProto.INT64) + "reshaped_final_label" + ) + + apply_reshape( + scope, + final_label_name, + reshaped_final_label_name, + container, + desired_shape=(-1,), + ) + apply_cast( + scope, + reshaped_final_label_name, + operator.outputs[0].full_name, + container, + to=onnx_proto.TensorProto.INT64, + ) else: - apply_reshape(scope, final_label_name, - operator.outputs[0].full_name, container, - desired_shape=(-1,)) - - -register_converter('SklearnSGDClassifier', - convert_sklearn_sgd_classifier, - options={'zipmap': [True, False, 'columns'], - 'nocl': [True, False], - 'output_class_labels': [False, True], - 'raw_scores': [True, False]}) + apply_reshape( + scope, + final_label_name, + operator.outputs[0].full_name, + container, + desired_shape=(-1,), + ) + + +register_converter( + "SklearnSGDClassifier", + convert_sklearn_sgd_classifier, + options={ + "zipmap": [True, False, "columns"], + "nocl": [True, False], + "output_class_labels": [False, True], + "raw_scores": [True, False], + }, +) diff --git a/skl2onnx/operator_converters/sgd_oneclass_svm.py b/skl2onnx/operator_converters/sgd_oneclass_svm.py index b4dc7778b..6c32c08e6 100644 --- a/skl2onnx/operator_converters/sgd_oneclass_svm.py +++ b/skl2onnx/operator_converters/sgd_oneclass_svm.py @@ -1,19 +1,17 @@ # SPDX-License-Identifier: Apache-2.0 -from ..common._apply_operation import ( - apply_cast, apply_sub) -from ..common.data_types import ( - BooleanTensorType, Int64TensorType, guess_proto_type) +from ..common._apply_operation import apply_cast, apply_sub +from ..common.data_types import BooleanTensorType, Int64TensorType, guess_proto_type from ..common._registration import register_converter from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer from ..proto import onnx_proto -def convert_sklearn_sgd_oneclass_svm(scope: Scope, operator: Operator, - container: ModelComponentContainer): - +def convert_sklearn_sgd_oneclass_svm( + scope: Scope, operator: Operator, container: ModelComponentContainer +): input_name = operator.inputs[0].full_name output_names = operator.output_full_names model = operator.raw_operator @@ -24,31 +22,38 @@ def convert_sklearn_sgd_oneclass_svm(scope: Scope, operator: Operator, if proto_dtype != onnx_proto.TensorProto.DOUBLE: proto_dtype = onnx_proto.TensorProto.FLOAT - if isinstance(operator.inputs[0].type, - (BooleanTensorType, Int64TensorType)): - cast_input_name = scope.get_unique_variable_name('cast_input') - apply_cast(scope, operator.input_full_names, cast_input_name, - container, to=proto_dtype) + if isinstance(operator.inputs[0].type, (BooleanTensorType, Int64TensorType)): + cast_input_name = scope.get_unique_variable_name("cast_input") + apply_cast( + scope, operator.input_full_names, cast_input_name, container, to=proto_dtype + ) input_name = cast_input_name - coef_name = scope.get_unique_variable_name('coef') - container.add_initializer(coef_name, proto_dtype, - coef.shape, coef.ravel()) + coef_name = scope.get_unique_variable_name("coef") + container.add_initializer(coef_name, proto_dtype, coef.shape, coef.ravel()) - offset_name = scope.get_unique_variable_name('offset') + offset_name = scope.get_unique_variable_name("offset") container.add_initializer(offset_name, proto_dtype, offset.shape, offset) - matmul_result_name = scope.get_unique_variable_name('matmul_result') - container.add_node('MatMul', [input_name, coef_name], matmul_result_name, - name=scope.get_unique_operator_name('MatMul')) + matmul_result_name = scope.get_unique_variable_name("matmul_result") + container.add_node( + "MatMul", + [input_name, coef_name], + matmul_result_name, + name=scope.get_unique_operator_name("MatMul"), + ) - apply_sub(scope, [matmul_result_name, offset_name], output_names[1], - container, broadcast=0) + apply_sub( + scope, + [matmul_result_name, offset_name], + output_names[1], + container, + broadcast=0, + ) - pred = scope.get_unique_variable_name('class_prediction') - container.add_node('Sign', output_names[1], pred, op_version=9) - apply_cast(scope, pred, output_names[0], - container, to=onnx_proto.TensorProto.INT64) + pred = scope.get_unique_variable_name("class_prediction") + container.add_node("Sign", output_names[1], pred, op_version=9) + apply_cast(scope, pred, output_names[0], container, to=onnx_proto.TensorProto.INT64) -register_converter('SklearnSGDOneClassSVM', convert_sklearn_sgd_oneclass_svm) +register_converter("SklearnSGDOneClassSVM", convert_sklearn_sgd_oneclass_svm) diff --git a/skl2onnx/operator_converters/stacking.py b/skl2onnx/operator_converters/stacking.py index c03264db0..889081c63 100644 --- a/skl2onnx/operator_converters/stacking.py +++ b/skl2onnx/operator_converters/stacking.py @@ -4,8 +4,7 @@ import numpy as np from ..proto import onnx_proto -from ..common._apply_operation import ( - apply_cast, apply_concat, apply_reshape) +from ..common._apply_operation import apply_cast, apply_concat, apply_reshape from ..common._registration import register_converter from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer @@ -13,30 +12,28 @@ from .._supported_operators import sklearn_operator_name_map -def _fetch_scores(scope, container, model, inputs, raw_scores=False, - is_regressor=False): +def _fetch_scores( + scope, container, model, inputs, raw_scores=False, is_regressor=False +): op_type = sklearn_operator_name_map[type(model)] this_operator = scope.declare_local_operator(op_type, model) - if container.has_options(model, 'raw_scores'): - container.add_options(id(model), {'raw_scores': raw_scores}) + if container.has_options(model, "raw_scores"): + container.add_options(id(model), {"raw_scores": raw_scores}) this_operator.inputs.append(inputs) if is_regressor: - output_proba = scope.declare_local_variable( - 'variable', inputs.type.__class__()) + output_proba = scope.declare_local_variable("variable", inputs.type.__class__()) this_operator.outputs.append(output_proba) else: - label_name = scope.declare_local_variable( - 'label', Int64TensorType()) + label_name = scope.declare_local_variable("label", Int64TensorType()) this_operator.outputs.append(label_name) output_proba = scope.declare_local_variable( - 'probability_tensor', inputs.type.__class__()) + "probability_tensor", inputs.type.__class__() + ) this_operator.outputs.append(output_proba) proto_type = guess_proto_type(inputs.type) - new_name = scope.get_unique_variable_name( - output_proba.full_name + '_castio') - apply_cast(scope, output_proba.full_name, new_name, - container, to=proto_type) + new_name = scope.get_unique_variable_name(output_proba.full_name + "_castio") + apply_cast(scope, output_proba.full_name, new_name, container, to=proto_type) return new_name @@ -46,61 +43,66 @@ def _add_passthrough_connection(operator, predictions): def _transform_regressor(scope, operator, container, model): - merged_prob_tensor = scope.get_unique_variable_name( - 'merged_probability_tensor') + merged_prob_tensor = scope.get_unique_variable_name("merged_probability_tensor") predictions = [ - _fetch_scores( - scope, container, est, operator.inputs[0], is_regressor=True) + _fetch_scores(scope, container, est, operator.inputs[0], is_regressor=True) for est in model.estimators_ ] _add_passthrough_connection(operator, predictions) - apply_concat( - scope, predictions, merged_prob_tensor, container, axis=1) + apply_concat(scope, predictions, merged_prob_tensor, container, axis=1) return merged_prob_tensor def _transform(scope, operator, container, model): - merged_prob_tensor = scope.get_unique_variable_name( - 'merged_probability_tensor') + merged_prob_tensor = scope.get_unique_variable_name("merged_probability_tensor") predictions = [ - _fetch_scores(scope, container, est, operator.inputs[0], - raw_scores=meth == 'decision_function') + _fetch_scores( + scope, + container, + est, + operator.inputs[0], + raw_scores=meth == "decision_function", + ) for est, meth in zip(model.estimators_, model.stack_method_) - if est != 'drop' + if est != "drop" ] op = operator.raw_operator - select_lact_column = (len(op.classes_) == 2 and all( - op.stack_method_[est_idx] == 'predict_proba' - for est_idx in range(0, len(op.estimators_)))) + select_lact_column = len(op.classes_) == 2 and all( + op.stack_method_[est_idx] == "predict_proba" + for est_idx in range(0, len(op.estimators_)) + ) if select_lact_column: - column_index_name = scope.get_unique_variable_name('column_index') - container.add_initializer(column_index_name, - onnx_proto.TensorProto.INT64, [], [1]) + column_index_name = scope.get_unique_variable_name("column_index") + container.add_initializer( + column_index_name, onnx_proto.TensorProto.INT64, [], [1] + ) new_predictions = [] for ipred, pred in enumerate(predictions): - prob1 = scope.get_unique_variable_name('stack_prob%d' % ipred) + prob1 = scope.get_unique_variable_name("stack_prob%d" % ipred) container.add_node( - 'ArrayFeatureExtractor', - [pred, column_index_name], prob1, - name=scope.get_unique_operator_name('ArrayFeatureExtractor'), - op_domain='ai.onnx.ml') + "ArrayFeatureExtractor", + [pred, column_index_name], + prob1, + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + op_domain="ai.onnx.ml", + ) new_predictions.append(prob1) predictions = new_predictions _add_passthrough_connection(operator, predictions) - apply_concat( - scope, predictions, merged_prob_tensor, container, axis=1) + apply_concat(scope, predictions, merged_prob_tensor, container, axis=1) return merged_prob_tensor -def convert_sklearn_stacking_classifier(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_stacking_classifier( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converter for StackingClassifier. It invokes converters for each estimator, concatenating their results before calling converter @@ -109,58 +111,89 @@ def convert_sklearn_stacking_classifier(scope: Scope, operator: Operator, stacking_op = operator.raw_operator classes = stacking_op.classes_ options = container.get_options(stacking_op, dict(raw_scores=False)) - use_raw_scores = options['raw_scores'] + use_raw_scores = options["raw_scores"] class_type = onnx_proto.TensorProto.STRING - if (np.issubdtype(stacking_op.classes_.dtype, np.floating) or - stacking_op.classes_.dtype == np.bool_): + if ( + np.issubdtype(stacking_op.classes_.dtype, np.floating) + or stacking_op.classes_.dtype == np.bool_ + ): class_type = onnx_proto.TensorProto.INT32 classes = classes.astype(np.int32) elif np.issubdtype(stacking_op.classes_.dtype, np.signedinteger): class_type = onnx_proto.TensorProto.INT32 else: - classes = np.array([s.encode('utf-8') for s in classes]) + classes = np.array([s.encode("utf-8") for s in classes]) - classes_name = scope.get_unique_variable_name('classes') - argmax_output_name = scope.get_unique_variable_name('argmax_output') - reshaped_result_name = scope.get_unique_variable_name('reshaped_result') + classes_name = scope.get_unique_variable_name("classes") + argmax_output_name = scope.get_unique_variable_name("argmax_output") + reshaped_result_name = scope.get_unique_variable_name("reshaped_result") array_feature_extractor_result_name = scope.get_unique_variable_name( - 'array_feature_extractor_result') + "array_feature_extractor_result" + ) container.add_initializer(classes_name, class_type, classes.shape, classes) - merged_proba_tensor = _transform( - scope, operator, container, stacking_op) + merged_proba_tensor = _transform(scope, operator, container, stacking_op) merge_proba = scope.declare_local_variable( - 'merged_stacked_proba', operator.inputs[0].type.__class__()) - container.add_node( - 'Identity', [merged_proba_tensor], [merge_proba.onnx_name]) + "merged_stacked_proba", operator.inputs[0].type.__class__() + ) + container.add_node("Identity", [merged_proba_tensor], [merge_proba.onnx_name]) prob = _fetch_scores( - scope, container, stacking_op.final_estimator_, merge_proba, - raw_scores=use_raw_scores) - container.add_node('Identity', prob, operator.outputs[1].onnx_name, - name=scope.get_unique_operator_name('OpProb')) - container.add_node('ArgMax', prob, - argmax_output_name, - name=scope.get_unique_operator_name('ArgMax'), axis=1) + scope, + container, + stacking_op.final_estimator_, + merge_proba, + raw_scores=use_raw_scores, + ) + container.add_node( + "Identity", + prob, + operator.outputs[1].onnx_name, + name=scope.get_unique_operator_name("OpProb"), + ) container.add_node( - 'ArrayFeatureExtractor', [classes_name, argmax_output_name], - array_feature_extractor_result_name, op_domain='ai.onnx.ml', - name=scope.get_unique_operator_name('ArrayFeatureExtractor')) + "ArgMax", + prob, + argmax_output_name, + name=scope.get_unique_operator_name("ArgMax"), + axis=1, + ) + container.add_node( + "ArrayFeatureExtractor", + [classes_name, argmax_output_name], + array_feature_extractor_result_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("ArrayFeatureExtractor"), + ) if class_type == onnx_proto.TensorProto.INT32: - apply_reshape(scope, array_feature_extractor_result_name, - reshaped_result_name, container, - desired_shape=(-1,)) - apply_cast(scope, reshaped_result_name, operator.outputs[0].full_name, - container, to=onnx_proto.TensorProto.INT64) + apply_reshape( + scope, + array_feature_extractor_result_name, + reshaped_result_name, + container, + desired_shape=(-1,), + ) + apply_cast( + scope, + reshaped_result_name, + operator.outputs[0].full_name, + container, + to=onnx_proto.TensorProto.INT64, + ) else: - apply_reshape(scope, array_feature_extractor_result_name, - operator.outputs[0].full_name, container, - desired_shape=(-1,)) - - -def convert_sklearn_stacking_regressor(scope: Scope, operator: Operator, - container: ModelComponentContainer): + apply_reshape( + scope, + array_feature_extractor_result_name, + operator.outputs[0].full_name, + container, + desired_shape=(-1,), + ) + + +def convert_sklearn_stacking_regressor( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converter for StackingRegressor. It invokes converters for each estimator, concatenating their results before calling converter @@ -168,24 +201,30 @@ def convert_sklearn_stacking_regressor(scope: Scope, operator: Operator, """ stacking_op = operator.raw_operator - merged_proba_tensor = _transform_regressor( - scope, operator, container, stacking_op) + merged_proba_tensor = _transform_regressor(scope, operator, container, stacking_op) merge_proba = scope.declare_local_variable( - 'merged_stacked_proba', operator.inputs[0].type.__class__()) - container.add_node( - 'Identity', [merged_proba_tensor], [merge_proba.onnx_name]) + "merged_stacked_proba", operator.inputs[0].type.__class__() + ) + container.add_node("Identity", [merged_proba_tensor], [merge_proba.onnx_name]) prob = _fetch_scores( - scope, container, stacking_op.final_estimator_, merge_proba, - is_regressor=True) - container.add_node('Identity', prob, operator.outputs[0].full_name, - name=scope.get_unique_operator_name('Identity')) - - -register_converter('SklearnStackingClassifier', - convert_sklearn_stacking_classifier, - options={'zipmap': [True, False, 'columns'], - 'nocl': [True, False], - 'output_class_labels': [False, True], - 'raw_scores': [True, False]}) -register_converter('SklearnStackingRegressor', - convert_sklearn_stacking_regressor) + scope, container, stacking_op.final_estimator_, merge_proba, is_regressor=True + ) + container.add_node( + "Identity", + prob, + operator.outputs[0].full_name, + name=scope.get_unique_operator_name("Identity"), + ) + + +register_converter( + "SklearnStackingClassifier", + convert_sklearn_stacking_classifier, + options={ + "zipmap": [True, False, "columns"], + "nocl": [True, False], + "output_class_labels": [False, True], + "raw_scores": [True, False], + }, +) +register_converter("SklearnStackingRegressor", convert_sklearn_stacking_regressor) diff --git a/skl2onnx/operator_converters/support_vector_machines.py b/skl2onnx/operator_converters/support_vector_machines.py index 32631ddf6..6f57771d2 100644 --- a/skl2onnx/operator_converters/support_vector_machines.py +++ b/skl2onnx/operator_converters/support_vector_machines.py @@ -5,12 +5,12 @@ from scipy.sparse import isspmatrix from sklearn.svm import SVC, NuSVC, SVR, NuSVR, OneClassSVM from ..common._apply_operation import apply_cast -from ..common.data_types import ( - BooleanTensorType, Int64TensorType, guess_proto_type) +from ..common.data_types import BooleanTensorType, Int64TensorType, guess_proto_type from ..common._registration import register_converter from ..proto import onnx_proto from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer + try: from ..common._apply_operation import apply_less except ImportError: @@ -19,9 +19,13 @@ def convert_sklearn_svm_regressor( - scope: Scope, operator: Operator, - container: ModelComponentContainer, - op_type='SVMRegressor', op_domain='ai.onnx.ml', op_version=1): + scope: Scope, + operator: Operator, + container: ModelComponentContainer, + op_type="SVMRegressor", + op_domain="ai.onnx.ml", + op_version=1, +): """ Converter for model `SVR `_. *onnxruntime* returns the raw score from *svm* algorithm as a *matrix[N, (C(C-1)/2]*. """ - svm_attrs = {'name': scope.get_unique_operator_name('SVM')} + svm_attrs = {"name": scope.get_unique_operator_name("SVM")} op = operator.raw_operator if isinstance(op.dual_coef_, np.ndarray): coef = op.dual_coef_.ravel() @@ -51,79 +55,107 @@ def convert_sklearn_svm_regressor( else: support_vectors = op.support_vectors_ - svm_attrs['kernel_type'] = op.kernel.upper() - svm_attrs['kernel_params'] = [np.float32(_) for _ in - [op._gamma, op.coef0, op.degree]] + svm_attrs["kernel_type"] = op.kernel.upper() + svm_attrs["kernel_params"] = [ + np.float32(_) for _ in [op._gamma, op.coef0, op.degree] + ] if isspmatrix(support_vectors): - svm_attrs['support_vectors'] = support_vectors.toarray().ravel() + svm_attrs["support_vectors"] = support_vectors.toarray().ravel() else: - svm_attrs['support_vectors'] = support_vectors + svm_attrs["support_vectors"] = support_vectors if isspmatrix(coef): - svm_attrs['coefficients'] = coef.toarray().ravel() + svm_attrs["coefficients"] = coef.toarray().ravel() else: - svm_attrs['coefficients'] = coef - svm_attrs['rho'] = intercept.astype(np.float32) - svm_attrs['coefficients'] = svm_attrs['coefficients'].astype(np.float32) - svm_attrs['support_vectors'] = svm_attrs['support_vectors'].astype( - np.float32) + svm_attrs["coefficients"] = coef + svm_attrs["rho"] = intercept.astype(np.float32) + svm_attrs["coefficients"] = svm_attrs["coefficients"].astype(np.float32) + svm_attrs["support_vectors"] = svm_attrs["support_vectors"].astype(np.float32) proto_dtype = guess_proto_type(operator.inputs[0].type) if proto_dtype != onnx_proto.TensorProto.DOUBLE: proto_dtype = onnx_proto.TensorProto.FLOAT - if operator.type in ['SklearnSVR', 'SklearnNuSVR'] or isinstance( - op, (SVR, NuSVR)): - svm_attrs['post_transform'] = 'NONE' - svm_attrs['n_supports'] = len(op.support_) + if operator.type in ["SklearnSVR", "SklearnNuSVR"] or isinstance(op, (SVR, NuSVR)): + svm_attrs["post_transform"] = "NONE" + svm_attrs["n_supports"] = len(op.support_) input_name = operator.input_full_names - if type(operator.inputs[0].type) in ( - BooleanTensorType, Int64TensorType): - cast_input_name = scope.get_unique_variable_name('cast_input') - apply_cast(scope, operator.input_full_names, cast_input_name, - container, to=proto_dtype) + if type(operator.inputs[0].type) in (BooleanTensorType, Int64TensorType): + cast_input_name = scope.get_unique_variable_name("cast_input") + apply_cast( + scope, + operator.input_full_names, + cast_input_name, + container, + to=proto_dtype, + ) input_name = cast_input_name - svm_out = scope.get_unique_variable_name('SVM03') + svm_out = scope.get_unique_variable_name("SVM03") container.add_node( - op_type, input_name, svm_out, - op_domain=op_domain, op_version=op_version, **svm_attrs) - apply_cast(scope, svm_out, operator.output_full_names, - container, to=proto_dtype) - elif (operator.type in ['SklearnOneClassSVM'] or - isinstance(op, OneClassSVM)): - svm_attrs['post_transform'] = 'NONE' - svm_attrs['n_supports'] = len(op.support_) + op_type, + input_name, + svm_out, + op_domain=op_domain, + op_version=op_version, + **svm_attrs + ) + apply_cast( + scope, svm_out, operator.output_full_names, container, to=proto_dtype + ) + elif operator.type in ["SklearnOneClassSVM"] or isinstance(op, OneClassSVM): + svm_attrs["post_transform"] = "NONE" + svm_attrs["n_supports"] = len(op.support_) input_name = operator.input_full_names - if type(operator.inputs[0].type) in ( - BooleanTensorType, Int64TensorType): - cast_input_name = scope.get_unique_variable_name('cast_input') - apply_cast(scope, operator.input_full_names, cast_input_name, - container, to=proto_dtype) + if type(operator.inputs[0].type) in (BooleanTensorType, Int64TensorType): + cast_input_name = scope.get_unique_variable_name("cast_input") + apply_cast( + scope, + operator.input_full_names, + cast_input_name, + container, + to=proto_dtype, + ) input_name = cast_input_name - svm_out0 = scope.get_unique_variable_name('SVMO1') + svm_out0 = scope.get_unique_variable_name("SVMO1") container.add_node( - op_type, input_name, svm_out0, - op_domain=op_domain, op_version=op_version, **svm_attrs) + op_type, + input_name, + svm_out0, + op_domain=op_domain, + op_version=op_version, + **svm_attrs + ) svm_out = operator.output_full_names[1] apply_cast(scope, svm_out0, svm_out, container, to=proto_dtype) - pred = scope.get_unique_variable_name('float_prediction') - container.add_node('Sign', svm_out, pred, op_version=9) - apply_cast(scope, pred, operator.output_full_names[0], - container, to=onnx_proto.TensorProto.INT64) + pred = scope.get_unique_variable_name("float_prediction") + container.add_node("Sign", svm_out, pred, op_version=9) + apply_cast( + scope, + pred, + operator.output_full_names[0], + container, + to=onnx_proto.TensorProto.INT64, + ) else: - raise ValueError("Unknown support vector machine model type found " - "'{0}'.".format(operator.type)) + raise ValueError( + "Unknown support vector machine model type found " + "'{0}'.".format(operator.type) + ) def convert_sklearn_svm_classifier( - scope: Scope, operator: Operator, - container: ModelComponentContainer, - op_type='SVMClassifier', op_domain='ai.onnx.ml', op_version=1): + scope: Scope, + operator: Operator, + container: ModelComponentContainer, + op_type="SVMClassifier", + op_domain="ai.onnx.ml", + op_version=1, +): """ Converter for model `SVC 0: - svm_attrs['prob_a'] = op.probA_.astype(np.float32) + svm_attrs["prob_a"] = op.probA_.astype(np.float32) else: handles_ovr = True if len(op.probB_) > 0: - svm_attrs['prob_b'] = op.probB_.astype(np.float32) - - if (hasattr(op, 'decision_function_shape') and - op.decision_function_shape == 'ovr' and handles_ovr and - len(op.classes_) > 2): - output_name = scope.get_unique_variable_name('before_ovr') + svm_attrs["prob_b"] = op.probB_.astype(np.float32) + + if ( + hasattr(op, "decision_function_shape") + and op.decision_function_shape == "ovr" + and handles_ovr + and len(op.classes_) > 2 + ): + output_name = scope.get_unique_variable_name("before_ovr") elif len(op.classes_) == 2 and use_raw_scores: - output_name = scope.get_unique_variable_name('raw_scores') + output_name = scope.get_unique_variable_name("raw_scores") else: output_name = operator.outputs[1].full_name - svm_attrs['post_transform'] = 'NONE' - svm_attrs['vectors_per_class'] = op.n_support_.tolist() + svm_attrs["post_transform"] = "NONE" + svm_attrs["vectors_per_class"] = op.n_support_.tolist() label_name = operator.outputs[0].full_name probability_tensor_name = output_name - if all(isinstance(i, (numbers.Real, bool, np.bool_)) - for i in op.classes_): + if all(isinstance(i, (numbers.Real, bool, np.bool_)) for i in op.classes_): labels = [int(i) for i in op.classes_] - svm_attrs['classlabels_ints'] = labels + svm_attrs["classlabels_ints"] = labels elif all(isinstance(i, str) for i in op.classes_): labels = [str(i) for i in op.classes_] - svm_attrs['classlabels_strings'] = labels + svm_attrs["classlabels_strings"] = labels else: raise RuntimeError("Invalid class label type '%s'." % op.classes_) - svm_out = scope.get_unique_variable_name('SVM02') + svm_out = scope.get_unique_variable_name("SVM02") container.add_node( - op_type, operator.inputs[0].full_name, + op_type, + operator.inputs[0].full_name, [label_name, svm_out], - op_domain=op_domain, op_version=op_version, **svm_attrs) - apply_cast(scope, svm_out, probability_tensor_name, - container, to=proto_dtype) + op_domain=op_domain, + op_version=op_version, + **svm_attrs + ) + apply_cast(scope, svm_out, probability_tensor_name, container, to=proto_dtype) if len(op.classes_) == 2 and use_raw_scores: - minus_one = scope.get_unique_variable_name('minus_one') + minus_one = scope.get_unique_variable_name("minus_one") container.add_initializer(minus_one, proto_dtype, [], [-1]) container.add_node( - 'Mul', [output_name, minus_one], operator.outputs[1].full_name, - name=scope.get_unique_operator_name('MulRawScores')) + "Mul", + [output_name, minus_one], + operator.outputs[1].full_name, + name=scope.get_unique_operator_name("MulRawScores"), + ) else: - raise ValueError("Unknown support vector machine model type found " - "'{0}'.".format(operator.type)) - - if (hasattr(op, 'decision_function_shape') and - op.decision_function_shape == 'ovr' and handles_ovr and - len(op.classes_) > 2): + raise ValueError( + "Unknown support vector machine model type found " + "'{0}'.".format(operator.type) + ) + + if ( + hasattr(op, "decision_function_shape") + and op.decision_function_shape == "ovr" + and handles_ovr + and len(op.classes_) > 2 + ): # Applies _ovr_decision_function. # See https://github.com/scikit-learn/scikit-learn/blob/ # master/sklearn/utils/multiclass.py#L407: @@ -248,19 +291,18 @@ def convert_sklearn_svm_classifier( if apply_less is None: raise RuntimeError( - "Function apply_less is missing. " - "onnxconverter-common is too old.") + "Function apply_less is missing. " "onnxconverter-common is too old." + ) - cst0 = scope.get_unique_variable_name('cst0') - negative = scope.get_unique_variable_name('negative') + cst0 = scope.get_unique_variable_name("cst0") + negative = scope.get_unique_variable_name("negative") container.add_initializer(cst0, proto_dtype, [], [0]) apply_less(scope, [output_name, cst0], negative, container) - inegative = scope.get_unique_variable_name('inegative') - apply_cast(scope, negative, inegative, container, - to=proto_dtype) + inegative = scope.get_unique_variable_name("inegative") + apply_cast(scope, negative, inegative, container, to=proto_dtype) - score_name = scope.get_unique_variable_name('neg') - container.add_node('Neg', [output_name], score_name) + score_name = scope.get_unique_variable_name("neg") + container.add_node("Neg", [output_name], score_name) # # ... @@ -281,29 +323,33 @@ def convert_sklearn_svm_classifier( # sum_of_confidences / (3 * (np.abs(sum_of_confidences) + 1))) # return votes + transformed_confidences - this_operator = scope.declare_local_operator( - "SklearnOVRDecisionFunction", op) + this_operator = scope.declare_local_operator("SklearnOVRDecisionFunction", op) cl_type = operator.inputs[0].type.__class__ prob_sign = scope.declare_local_variable("prob_sign", cl_type()) - container.add_node('Identity', [inegative], [prob_sign.onnx_name]) + container.add_node("Identity", [inegative], [prob_sign.onnx_name]) prob_score = scope.declare_local_variable("prob_sign", cl_type()) - container.add_node('Identity', [score_name], [prob_score.onnx_name]) + container.add_node("Identity", [score_name], [prob_score.onnx_name]) this_operator.inputs.append(prob_sign) this_operator.inputs.append(prob_score) - ovr_name = scope.declare_local_variable('ovr_output', cl_type()) + ovr_name = scope.declare_local_variable("ovr_output", cl_type()) this_operator.outputs.append(ovr_name) output_name = operator.outputs[1].full_name - container.add_node('Identity', [ovr_name.onnx_name], [output_name]) - - -register_converter('SklearnOneClassSVM', convert_sklearn_svm_regressor) -register_converter('SklearnSVC', convert_sklearn_svm_classifier, - options={'zipmap': [True, False, 'columns'], - 'nocl': [True, False], - 'output_class_labels': [False, True], - 'raw_scores': [True, False]}) -register_converter('SklearnSVR', convert_sklearn_svm_regressor) + container.add_node("Identity", [ovr_name.onnx_name], [output_name]) + + +register_converter("SklearnOneClassSVM", convert_sklearn_svm_regressor) +register_converter( + "SklearnSVC", + convert_sklearn_svm_classifier, + options={ + "zipmap": [True, False, "columns"], + "nocl": [True, False], + "output_class_labels": [False, True], + "raw_scores": [True, False], + }, +) +register_converter("SklearnSVR", convert_sklearn_svm_regressor) diff --git a/skl2onnx/operator_converters/text_vectoriser.py b/skl2onnx/operator_converters/text_vectoriser.py index 9b72d6e3a..5d5ba7eba 100644 --- a/skl2onnx/operator_converters/text_vectoriser.py +++ b/skl2onnx/operator_converters/text_vectoriser.py @@ -4,8 +4,7 @@ import warnings from collections import OrderedDict, Counter import numpy as np -from ..common._apply_operation import ( - apply_cast, apply_reshape, apply_identity) +from ..common._apply_operation import apply_cast, apply_reshape, apply_identity from ..common._registration import register_converter from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer @@ -24,7 +23,7 @@ def _intelligent_split(text, op, tokenizer, existing): `_. """ - if op.analyzer == 'word': + if op.analyzer == "word": if op.ngram_range[0] == op.ngram_range[1] == 1: spl = [text] elif op.ngram_range[0] == 1 and len(text) >= 2: @@ -46,8 +45,7 @@ def _intelligent_split(text, op, tokenizer, existing): if len(spl) == 1: pass elif len(spl) == 2: - if (spl[0] not in op.vocabulary_ or - spl[1] not in op.vocabulary_): + if spl[0] not in op.vocabulary_ or spl[1] not in op.vocabulary_: # This is neceassarily a single token. spl = [text] elif spl[0] in op.vocabulary_ and spl[1] in op.vocabulary_: @@ -57,12 +55,16 @@ def _intelligent_split(text, op, tokenizer, existing): pass elif len(spl) == 3: stok = (all([s in op.vocabulary_ for s in spl]), spl) - spl12 = (spl[2] in op.vocabulary_ and - (spl[0] + ' ' + spl[1]) in op.vocabulary_, - [spl[0] + ' ' + spl[1], spl[2]]) - spl23 = (spl[0] in op.vocabulary_ and - (spl[1] + ' ' + spl[2]) in op.vocabulary_, - [spl[0], spl[1] + ' ' + spl[2]]) + spl12 = ( + spl[2] in op.vocabulary_ + and (spl[0] + " " + spl[1]) in op.vocabulary_, + [spl[0] + " " + spl[1], spl[2]], + ) + spl23 = ( + spl[0] in op.vocabulary_ + and (spl[1] + " " + spl[2]) in op.vocabulary_, + [spl[0], spl[1] + " " + spl[2]], + ) c = Counter(map(lambda t: t[0], [stok, spl12, spl23])) if c.get(True, -1) == 0: spl = [text] @@ -77,8 +79,10 @@ def _intelligent_split(text, op, tokenizer, existing): pass else: exc = ( - "More than one decomposition in tokens: [" + - ", ".join(map(lambda t: "-".join(t), found)) + "].") + "More than one decomposition in tokens: [" + + ", ".join(map(lambda t: "-".join(t), found)) + + "]." + ) elif any(map(lambda g: g in op.vocabulary_, spl)): # TODO: handle this case with an algorithm # which is able to break a string into @@ -89,7 +93,8 @@ def _intelligent_split(text, op, tokenizer, existing): "Unable to split n-grams '{}' into tokens. " "{} This happens when a token contain " "spaces. Token '{}' may be a token or a n-gram '{}'." - "".format(text, exc, text, spl)) + "".format(text, exc, text, spl) + ) else: # We reuse the tokenizer hoping that will clear # ambiguities but this might be slow. @@ -102,9 +107,9 @@ def _intelligent_split(text, op, tokenizer, existing): raise RuntimeError( f"The converter cannot guess how to split expression " f"{text!r} into tokens. This case happens when tokens have " - f"spaces.") - if (op.ngram_range[0] == 1 and - (len(op.ngram_range) == 1 or op.ngram_range[1] > 1)): + f"spaces." + ) + if op.ngram_range[0] == 1 and (len(op.ngram_range) == 1 or op.ngram_range[1] > 1): # All grams should be existing in the vocabulary. for g in spl: if g not in op.vocabulary_: @@ -112,13 +117,15 @@ def _intelligent_split(text, op, tokenizer, existing): "Unable to split n-grams '{}' into tokens {} " "existing in the vocabulary. Token '{}' does not " "exist in the vocabulary." - ".".format(text, spl, g)) + ".".format(text, spl, g) + ) existing.add(spl) return spl -def convert_sklearn_text_vectorizer(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_text_vectorizer( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converters for class `TfidfVectorizer = 9." - "".format(op.__class__.__name__)) + "".format(op.__class__.__name__) + ) if op.analyzer == "char_wb": raise NotImplementedError( "CountVectorizer cannot be converted, " "only tokenizer='word' is fully supported. " "You may raise an issue at " - "https://github.com/onnx/sklearn-onnx/issues.") + "https://github.com/onnx/sklearn-onnx/issues." + ) if op.analyzer == "char": warnings.warn( "The conversion of CountVectorizer may not work. " "only tokenizer='word' is fully supported. " "You may raise an issue at " "https://github.com/onnx/sklearn-onnx/issues.", - UserWarning) + UserWarning, + ) if op.strip_accents is not None: raise NotImplementedError( "CountVectorizer cannot be converted, " "only strip_accents=None is supported. " "You may raise an issue at " - "https://github.com/onnx/sklearn-onnx/issues.") + "https://github.com/onnx/sklearn-onnx/issues." + ) options = container.get_options( - op, dict(separators="DEFAULT", - tokenexp=None, - nan=False, - keep_empty_string=False)) - if set(options) != {'separators', 'tokenexp', 'nan', 'keep_empty_string'}: - raise RuntimeError("Unknown option {} for {}".format( - set(options) - {'separators'}, type(op))) - - if op.analyzer == 'word': - default_pattern = '(?u)\\b\\w\\w+\\b' - if options['separators'] == "DEFAULT" and options['tokenexp'] is None: + op, + dict(separators="DEFAULT", tokenexp=None, nan=False, keep_empty_string=False), + ) + if set(options) != {"separators", "tokenexp", "nan", "keep_empty_string"}: + raise RuntimeError( + "Unknown option {} for {}".format(set(options) - {"separators"}, type(op)) + ) + + if op.analyzer == "word": + default_pattern = "(?u)\\b\\w\\w+\\b" + if options["separators"] == "DEFAULT" and options["tokenexp"] is None: regex = op.token_pattern if regex == default_pattern: - regex = '[a-zA-Z0-9_]+' + regex = "[a-zA-Z0-9_]+" default_separators = None - elif options['tokenexp'] is not None: - if options['tokenexp']: - regex = options['tokenexp'] + elif options["tokenexp"] is not None: + if options["tokenexp"]: + regex = options["tokenexp"] else: regex = op.token_pattern if regex == default_pattern: - regex = '[a-zA-Z0-9_]+' + regex = "[a-zA-Z0-9_]+" default_separators = None else: regex = None - default_separators = options['separators'] + default_separators = options["separators"] else: - if options['separators'] != 'DEFAULT': - raise RuntimeError("Option separators has no effect " - "if analyser != 'word'.") - regex = options['tokenexp'] if options['tokenexp'] else '.' + if options["separators"] != "DEFAULT": + raise RuntimeError( + "Option separators has no effect " "if analyser != 'word'." + ) + regex = options["tokenexp"] if options["tokenexp"] else "." default_separators = None if op.preprocessor is not None: raise NotImplementedError( "Custom preprocessor cannot be converted into ONNX. " "You may raise an issue at " - "https://github.com/onnx/sklearn-onnx/issues.") + "https://github.com/onnx/sklearn-onnx/issues." + ) if op.tokenizer is not None: raise NotImplementedError( "Custom tokenizer cannot be converted into ONNX. " "You may raise an issue at " - "https://github.com/onnx/sklearn-onnx/issues.") + "https://github.com/onnx/sklearn-onnx/issues." + ) if op.strip_accents is not None: raise NotImplementedError( "Operator StringNormalizer cannot remove accents. " "You may raise an issue at " - "https://github.com/onnx/sklearn-onnx/issues.") + "https://github.com/onnx/sklearn-onnx/issues." + ) if hasattr(op, "stop_words_"): - stop_words = op.stop_words_ | ( - set(op.stop_words) if op.stop_words else set()) + stop_words = op.stop_words_ | (set(op.stop_words) if op.stop_words else set()) else: stop_words = set() for w in stop_words: if not isinstance(w, str): raise TypeError( - f"One stop word is not a string {w!r} " - f"in stop_words={stop_words}.") + f"One stop word is not a string {w!r} " f"in stop_words={stop_words}." + ) if op.lowercase or stop_words: if len(operator.input_full_names) != 1: - raise RuntimeError("Only one input is allowed, found {}.".format( - operator.input_full_names)) + raise RuntimeError( + "Only one input is allowed, found {}.".format(operator.input_full_names) + ) # StringNormalizer - op_type = 'StringNormalizer' - attrs = {'name': scope.get_unique_operator_name(op_type)} - normalized = scope.get_unique_variable_name('normalized') + op_type = "StringNormalizer" + attrs = {"name": scope.get_unique_operator_name(op_type)} + normalized = scope.get_unique_variable_name("normalized") if container.target_opset >= 10: - attrs.update({ - 'case_change_action': 'LOWER', - 'is_case_sensitive': not op.lowercase, - }) + attrs.update( + { + "case_change_action": "LOWER", + "is_case_sensitive": not op.lowercase, + } + ) op_version = 10 - domain = '' + domain = "" else: - attrs.update({ - 'casechangeaction': 'LOWER', - 'is_case_sensitive': not op.lowercase, - }) + attrs.update( + { + "casechangeaction": "LOWER", + "is_case_sensitive": not op.lowercase, + } + ) op_version = 9 - domain = 'com.microsoft' - opvs = 1 if domain == 'com.microsoft' else op_version + domain = "com.microsoft" + opvs = 1 if domain == "com.microsoft" else op_version if stop_words: - attrs['stopwords'] = list(sorted(stop_words)) + attrs["stopwords"] = list(sorted(stop_words)) - if options['keep_empty_string']: - del attrs['name'] + if options["keep_empty_string"]: + del attrs["name"] op_norm = OnnxStringNormalizer( - 'text_in', op_version=container.target_opset, - output_names=['text_out'], **attrs) + "text_in", + op_version=container.target_opset, + output_names=["text_out"], + **attrs, + ) scan_body = op_norm.to_onnx( - OrderedDict([('text_in', StringTensorType())]), - outputs=[('text_out', StringTensorType())], - target_opset=op_version) - - vector = scope.get_unique_variable_name('vector') - apply_reshape(scope, operator.input_full_names[0], - vector, container, - desired_shape=(-1, 1)) - container.add_node('Scan', vector, normalized, - body=scan_body.graph, num_scan_inputs=1) + OrderedDict([("text_in", StringTensorType())]), + outputs=[("text_out", StringTensorType())], + target_opset=op_version, + ) + + vector = scope.get_unique_variable_name("vector") + apply_reshape( + scope, + operator.input_full_names[0], + vector, + container, + desired_shape=(-1, 1), + ) + container.add_node( + "Scan", vector, normalized, body=scan_body.graph, num_scan_inputs=1 + ) else: - flatten = scope.get_unique_variable_name('flattened') - apply_reshape(scope, operator.input_full_names[0], - flatten, container, - desired_shape=(-1, )) - container.add_node(op_type, flatten, - normalized, op_version=opvs, - op_domain=domain, **attrs) + flatten = scope.get_unique_variable_name("flattened") + apply_reshape( + scope, + operator.input_full_names[0], + flatten, + container, + desired_shape=(-1,), + ) + container.add_node( + op_type, flatten, normalized, op_version=opvs, op_domain=domain, **attrs + ) else: normalized = operator.input_full_names @@ -327,29 +358,36 @@ def convert_sklearn_text_vectorizer(scope: Scope, operator: Operator, while padvalue in op.vocabulary_: padvalue += "#" - op_type = 'Tokenizer' - attrs = {'name': scope.get_unique_operator_name(op_type)} - attrs.update({ - 'pad_value': padvalue, - 'mark': False, - 'mincharnum': 1, - }) + op_type = "Tokenizer" + attrs = {"name": scope.get_unique_operator_name(op_type)} + attrs.update( + { + "pad_value": padvalue, + "mark": False, + "mincharnum": 1, + } + ) if regex is None: - attrs['separators'] = default_separators + attrs["separators"] = default_separators else: - attrs['tokenexp'] = regex + attrs["tokenexp"] = regex - tokenized = scope.get_unique_variable_name('tokenized') - container.add_node(op_type, normalized, tokenized, - op_domain='com.microsoft', **attrs) + tokenized = scope.get_unique_variable_name("tokenized") + container.add_node( + op_type, normalized, tokenized, op_domain="com.microsoft", **attrs + ) # Flatten # Tokenizer outputs shape {1, C} or {1, 1, C}. # Second shape is not allowed by TfIdfVectorizer. # We use Flatten which produces {1, C} in both cases. - flatt_tokenized = scope.get_unique_variable_name('flattened') - container.add_node("Flatten", tokenized, flatt_tokenized, - name=scope.get_unique_operator_name('Flatten')) + flatt_tokenized = scope.get_unique_variable_name("flattened") + container.add_node( + "Flatten", + tokenized, + flatt_tokenized, + name=scope.get_unique_operator_name("Flatten"), + ) tokenized = flatt_tokenized # Ngram - TfIdfVectorizer @@ -358,8 +396,8 @@ def convert_sklearn_text_vectorizer(scope: Scope, operator: Operator, weights = [0 for i in range(C)] for k, v in op.vocabulary_.items(): words[v] = k - weights[v] = 1. - mode = 'TF' + weights[v] = 1.0 + mode = "TF" # Scikit-learn sorts n-grams by alphabetical order.. # onnx assumes it is sorted by n. @@ -371,7 +409,7 @@ def convert_sklearn_text_vectorizer(scope: Scope, operator: Operator, if isinstance(w, tuple): # TraceableCountVectorizer, TraceableTfIdfVectorizer spl = list(w) - w = ' '.join(w) + w = " ".join(w) else: # CountVectorizer, TfIdfVectorizer try: @@ -387,10 +425,10 @@ def convert_sklearn_text_vectorizer(scope: Scope, operator: Operator, f"{len(errors)} errors occurred. You can fix it by using " f"class Traceable{op.__class__.__name__}.\n" f"You can learn more at https://github.com/scikit-learn/" - f"scikit-learn/issues/13733.\n{err}") + f"scikit-learn/issues/13733.\n{err}" + ) - ng_split_words = sorted([(len(a[0]), a[0], i) - for i, a in enumerate(split_words)]) + ng_split_words = sorted([(len(a[0]), a[0], i) for i, a in enumerate(split_words)]) key_indices = [a[2] for a in ng_split_words] ngcounts = [0 for i in range(op.ngram_range[0])] @@ -406,75 +444,96 @@ def convert_sklearn_text_vectorizer(scope: Scope, operator: Operator, weights[ind] = weights_[i] # Create the node. - attrs = {'name': scope.get_unique_operator_name("TfIdfVectorizer")} - attrs.update({ - 'min_gram_length': op.ngram_range[0], - 'max_gram_length': op.ngram_range[1], - 'mode': mode, - 'max_skip_count': 0, - 'pool_strings': words, - 'ngram_indexes': key_indices, - 'ngram_counts': ngcounts, - 'weights': list(map(np.float32, weights)), - }) - output = scope.get_unique_variable_name('output') + attrs = {"name": scope.get_unique_operator_name("TfIdfVectorizer")} + attrs.update( + { + "min_gram_length": op.ngram_range[0], + "max_gram_length": op.ngram_range[1], + "mode": mode, + "max_skip_count": 0, + "pool_strings": words, + "ngram_indexes": key_indices, + "ngram_counts": ngcounts, + "weights": list(map(np.float32, weights)), + } + ) + output = scope.get_unique_variable_name("output") proto_dtype = guess_proto_type(operator.inputs[0].type) if proto_dtype != onnx_proto.TensorProto.DOUBLE: proto_dtype = onnx_proto.TensorProto.FLOAT if proto_dtype == onnx_proto.TensorProto.DOUBLE: - output_tf = scope.get_unique_variable_name('cast_result') + output_tf = scope.get_unique_variable_name("cast_result") else: output_tf = output if container.target_opset < 9: - op_type = 'Ngram' - container.add_node(op_type, tokenized, output_tf, - op_domain='com.microsoft', **attrs) + op_type = "Ngram" + container.add_node( + op_type, tokenized, output_tf, op_domain="com.microsoft", **attrs + ) else: - op_type = 'TfIdfVectorizer' - container.add_node(op_type, tokenized, output_tf, op_domain='', - op_version=9, **attrs) + op_type = "TfIdfVectorizer" + container.add_node( + op_type, tokenized, output_tf, op_domain="", op_version=9, **attrs + ) if proto_dtype == onnx_proto.TensorProto.DOUBLE: - apply_cast(scope, output_tf, output, - container, to=proto_dtype) + apply_cast(scope, output_tf, output, container, to=proto_dtype) if op.binary: - cast_result_name = scope.get_unique_variable_name('cast_result') - output_name = scope.get_unique_variable_name('output_name') - - apply_cast(scope, output, cast_result_name, container, - to=onnx_proto.TensorProto.BOOL) - apply_cast(scope, cast_result_name, output_name, - container, to=onnx_proto.TensorProto.FLOAT) + cast_result_name = scope.get_unique_variable_name("cast_result") + output_name = scope.get_unique_variable_name("output_name") + + apply_cast( + scope, output, cast_result_name, container, to=onnx_proto.TensorProto.BOOL + ) + apply_cast( + scope, + cast_result_name, + output_name, + container, + to=onnx_proto.TensorProto.FLOAT, + ) output = output_name options = container.get_options(op, dict(nan=False)) - replace_by_nan = options.get('nan', False) + replace_by_nan = options.get("nan", False) if replace_by_nan: # This part replaces all null values by nan. - cst_nan_name = scope.get_unique_variable_name('nan_name') + cst_nan_name = scope.get_unique_variable_name("nan_name") container.add_initializer(cst_nan_name, proto_dtype, [1], [np.nan]) - cst_zero_name = scope.get_unique_variable_name('zero_name') + cst_zero_name = scope.get_unique_variable_name("zero_name") container.add_initializer(cst_zero_name, proto_dtype, [1], [0]) - mask_name = scope.get_unique_variable_name('mask_name') - container.add_node('Equal', [output, cst_zero_name], - mask_name, - name=scope.get_unique_operator_name('Equal')) - - where_name = scope.get_unique_variable_name('where_name') - container.add_node('Where', [mask_name, cst_nan_name, output], - where_name, - name=scope.get_unique_operator_name('Where')) + mask_name = scope.get_unique_variable_name("mask_name") + container.add_node( + "Equal", + [output, cst_zero_name], + mask_name, + name=scope.get_unique_operator_name("Equal"), + ) + + where_name = scope.get_unique_variable_name("where_name") + container.add_node( + "Where", + [mask_name, cst_nan_name, output], + where_name, + name=scope.get_unique_operator_name("Where"), + ) output = where_name apply_identity(scope, output, operator.output_full_names, container) -register_converter('SklearnCountVectorizer', convert_sklearn_text_vectorizer, - options={'tokenexp': None, 'separators': None, - 'nan': [True, False], - 'keep_empty_string': [True, False]}) +register_converter( + "SklearnCountVectorizer", + convert_sklearn_text_vectorizer, + options={ + "tokenexp": None, + "separators": None, + "nan": [True, False], + "keep_empty_string": [True, False], + }, +) diff --git a/skl2onnx/operator_converters/tfidf_transformer.py b/skl2onnx/operator_converters/tfidf_transformer.py index e755e78c8..fe6c20239 100644 --- a/skl2onnx/operator_converters/tfidf_transformer.py +++ b/skl2onnx/operator_converters/tfidf_transformer.py @@ -8,11 +8,17 @@ from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer from ..common._apply_operation import ( - apply_add, apply_log, apply_mul, apply_identity, apply_normalizer) + apply_add, + apply_log, + apply_mul, + apply_identity, + apply_normalizer, +) -def convert_sklearn_tfidf_transformer(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_tfidf_transformer( + scope: Scope, operator: Operator, container: ModelComponentContainer +): # TODO: use sparse containers when available dtype = guess_numpy_type(operator.inputs[0].type) if dtype != np.float64: @@ -24,7 +30,7 @@ def convert_sklearn_tfidf_transformer(scope: Scope, operator: Operator, proto_dtype = onnx_proto.TensorProto.FLOAT op = operator.raw_operator data = operator.input_full_names - output_name = scope.get_unique_variable_name('tfidftr_output') + output_name = scope.get_unique_variable_name("tfidftr_output") if op.sublinear_tf: # code scikit-learn @@ -46,7 +52,8 @@ def convert_sklearn_tfidf_transformer(scope: Scope, operator: Operator, # sparse containers have not yet been implemented. raise RuntimeError( "ONNX does not support sparse tensors before opset < 11, " - "sublinear_tf must be False.") + "sublinear_tf must be False." + ) if op.use_idf: cst = op.idf_.astype(float_type) @@ -54,42 +61,55 @@ def convert_sklearn_tfidf_transformer(scope: Scope, operator: Operator, cst = np.diag(cst) cst = cst.ravel().flatten() shape = [len(cst)] - idfcst = scope.get_unique_variable_name('idfcst') + idfcst = scope.get_unique_variable_name("idfcst") container.add_initializer(idfcst, proto_dtype, shape, cst) apply_mul(scope, data + [idfcst], output_name, container, broadcast=1) else: output_name = data[0] if op.norm is not None: - norm_name = scope.get_unique_variable_name('tfidftr_norm') + norm_name = scope.get_unique_variable_name("tfidftr_norm") apply_normalizer( - scope, output_name, norm_name, container, - norm=op.norm.upper(), use_float=float_type == np.float32) + scope, + output_name, + norm_name, + container, + norm=op.norm.upper(), + use_float=float_type == np.float32, + ) output_name = norm_name options = container.get_options(op, dict(nan=False)) - replace_by_nan = options.get('nan', False) + replace_by_nan = options.get("nan", False) if replace_by_nan: # This part replaces all null values by nan. - cst_nan_name = scope.get_unique_variable_name('nan_name') + cst_nan_name = scope.get_unique_variable_name("nan_name") container.add_initializer(cst_nan_name, proto_dtype, [1], [np.nan]) - cst_zero_name = scope.get_unique_variable_name('zero_name') + cst_zero_name = scope.get_unique_variable_name("zero_name") container.add_initializer(cst_zero_name, proto_dtype, [1], [0]) - mask_name = scope.get_unique_variable_name('mask_name') - container.add_node('Equal', [output_name, cst_zero_name], - mask_name, - name=scope.get_unique_operator_name('Equal')) + mask_name = scope.get_unique_variable_name("mask_name") + container.add_node( + "Equal", + [output_name, cst_zero_name], + mask_name, + name=scope.get_unique_operator_name("Equal"), + ) - where_name = scope.get_unique_variable_name('where_name') - container.add_node('Where', [mask_name, cst_nan_name, output_name], - where_name, - name=scope.get_unique_operator_name('Where')) + where_name = scope.get_unique_variable_name("where_name") + container.add_node( + "Where", + [mask_name, cst_nan_name, output_name], + where_name, + name=scope.get_unique_operator_name("Where"), + ) output_name = where_name apply_identity(scope, output_name, operator.output_full_names, container) -register_converter('SklearnTfidfTransformer', - convert_sklearn_tfidf_transformer, - options={'nan': [True, False]}) +register_converter( + "SklearnTfidfTransformer", + convert_sklearn_tfidf_transformer, + options={"nan": [True, False]}, +) diff --git a/skl2onnx/operator_converters/tfidf_vectoriser.py b/skl2onnx/operator_converters/tfidf_vectoriser.py index c5c9bcaba..a624b90b9 100644 --- a/skl2onnx/operator_converters/tfidf_vectoriser.py +++ b/skl2onnx/operator_converters/tfidf_vectoriser.py @@ -3,16 +3,16 @@ from onnx import onnx_pb as onnx_proto from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer from ..common._apply_operation import apply_identity -from ..common.data_types import ( - FloatTensorType, DoubleTensorType, guess_proto_type) +from ..common.data_types import FloatTensorType, DoubleTensorType, guess_proto_type from ..common._registration import register_converter from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer from .._supported_operators import sklearn_operator_name_map -def convert_sklearn_tfidf_vectoriser(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_tfidf_vectoriser( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converter for scikit-learn's TfidfVectoriser. """ @@ -30,23 +30,31 @@ def convert_sklearn_tfidf_vectoriser(scope: Scope, operator: Operator, clr = DoubleTensorType else: raise RuntimeError( - "Unexpected dtype '{}'. Float or double expected.".format( - proto_dtype)) + "Unexpected dtype '{}'. Float or double expected.".format(proto_dtype) + ) cv_output_name = scope.declare_local_variable( - 'count_vec_output', clr([None, columns])) + "count_vec_output", clr([None, columns]) + ) cv_operator.outputs.append(cv_output_name) op_type = sklearn_operator_name_map[TfidfTransformer] tfidf_operator = scope.declare_local_operator(op_type, tfidf_op) tfidf_operator.inputs.append(cv_output_name) - tfidf_output_name = scope.declare_local_variable('tfidf_output', clr()) + tfidf_output_name = scope.declare_local_variable("tfidf_output", clr()) tfidf_operator.outputs.append(tfidf_output_name) - apply_identity(scope, tfidf_output_name.full_name, - operator.outputs[0].full_name, container) + apply_identity( + scope, tfidf_output_name.full_name, operator.outputs[0].full_name, container + ) -register_converter('SklearnTfidfVectorizer', convert_sklearn_tfidf_vectoriser, - options={'tokenexp': None, 'separators': None, - 'nan': [True, False], - 'keep_empty_string': [True, False]}) +register_converter( + "SklearnTfidfVectorizer", + convert_sklearn_tfidf_vectoriser, + options={ + "tokenexp": None, + "separators": None, + "nan": [True, False], + "keep_empty_string": [True, False], + }, +) diff --git a/skl2onnx/operator_converters/voting_classifier.py b/skl2onnx/operator_converters/voting_classifier.py index 429ecd5d9..a2388998b 100644 --- a/skl2onnx/operator_converters/voting_classifier.py +++ b/skl2onnx/operator_converters/voting_classifier.py @@ -12,8 +12,9 @@ from ..proto import onnx_proto -def convert_voting_classifier(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_voting_classifier( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converts a *VotingClassifier* into *ONNX* format. @@ -26,19 +27,25 @@ def convert_voting_classifier(scope: Scope, operator: Operator, distinction and always creates two outputs, labels and probabilities. """ - if scope.get_options(operator.raw_operator, dict(nocl=False))['nocl']: + if scope.get_options(operator.raw_operator, dict(nocl=False))["nocl"]: raise RuntimeError( "Option 'nocl' is not implemented for operator '{}'.".format( - operator.raw_operator.__class__.__name__)) + operator.raw_operator.__class__.__name__ + ) + ) proto_dtype = guess_proto_type(operator.inputs[0].type) if proto_dtype != onnx_proto.TensorProto.DOUBLE: proto_dtype = onnx_proto.TensorProto.FLOAT op = operator.raw_operator n_classes = len(op.classes_) - classes_ind_name = scope.get_unique_variable_name('classes_ind') - container.add_initializer(classes_ind_name, onnx_proto.TensorProto.INT64, - (1, n_classes), list(range(n_classes))) + classes_ind_name = scope.get_unique_variable_name("classes_ind") + container.add_initializer( + classes_ind_name, + onnx_proto.TensorProto.INT64, + (1, n_classes), + list(range(n_classes)), + ) probs_names = [] one_name = None @@ -51,48 +58,65 @@ def convert_voting_classifier(scope: Scope, operator: Operator, this_operator = scope.declare_local_operator(op_type, estimator) this_operator.inputs = operator.inputs - label_name = scope.declare_local_variable( - 'label_%d' % i, Int64TensorType()) + label_name = scope.declare_local_variable("label_%d" % i, Int64TensorType()) prob_name = scope.declare_local_variable( - 'voting_proba_%d' % i, operator.inputs[0].type.__class__()) + "voting_proba_%d" % i, operator.inputs[0].type.__class__() + ) this_operator.outputs.append(label_name) this_operator.outputs.append(prob_name) - if op.voting == 'hard': + if op.voting == "hard": if one_name is None: - shape_name = scope.get_unique_variable_name('shape') + shape_name = scope.get_unique_variable_name("shape") container.add_node( - 'Shape', prob_name.onnx_name, shape_name, - name=scope.get_unique_operator_name('Shape')) - zero_name = scope.get_unique_variable_name('zero') + "Shape", + prob_name.onnx_name, + shape_name, + name=scope.get_unique_operator_name("Shape"), + ) + zero_name = scope.get_unique_variable_name("zero") container.add_node( - 'ConstantOfShape', shape_name, zero_name, - name=scope.get_unique_operator_name('CoSA'), - value=make_tensor("value", proto_dtype, - (1, ), [0.]), op_version=9) - one_name = scope.get_unique_variable_name('one') + "ConstantOfShape", + shape_name, + zero_name, + name=scope.get_unique_operator_name("CoSA"), + value=make_tensor("value", proto_dtype, (1,), [0.0]), + op_version=9, + ) + one_name = scope.get_unique_variable_name("one") container.add_node( - 'ConstantOfShape', shape_name, one_name, - name=scope.get_unique_operator_name('CoSB'), - value=make_tensor("value", proto_dtype, - (1, ), [1.]), op_version=9) - - argmax_output_name = scope.get_unique_variable_name( - 'argmax_output') - container.add_node('ArgMax', prob_name.onnx_name, - argmax_output_name, - name=scope.get_unique_operator_name('ArgMax'), - axis=1) - - equal_name = scope.get_unique_variable_name('equal') - container.add_node('Equal', [argmax_output_name, classes_ind_name], - equal_name, - name=scope.get_unique_operator_name('Equal')) - - max_proba_name = scope.get_unique_variable_name('probsmax') - container.add_node('Where', [equal_name, one_name, zero_name], - max_proba_name, - name=scope.get_unique_operator_name('Where')) + "ConstantOfShape", + shape_name, + one_name, + name=scope.get_unique_operator_name("CoSB"), + value=make_tensor("value", proto_dtype, (1,), [1.0]), + op_version=9, + ) + + argmax_output_name = scope.get_unique_variable_name("argmax_output") + container.add_node( + "ArgMax", + prob_name.onnx_name, + argmax_output_name, + name=scope.get_unique_operator_name("ArgMax"), + axis=1, + ) + + equal_name = scope.get_unique_variable_name("equal") + container.add_node( + "Equal", + [argmax_output_name, classes_ind_name], + equal_name, + name=scope.get_unique_operator_name("Equal"), + ) + + max_proba_name = scope.get_unique_variable_name("probsmax") + container.add_node( + "Where", + [equal_name, one_name, zero_name], + max_proba_name, + name=scope.get_unique_operator_name("Where"), + ) prob_name = max_proba_name else: prob_name = prob_name.onnx_name @@ -100,37 +124,53 @@ def convert_voting_classifier(scope: Scope, operator: Operator, if op.weights is not None: val = op.weights[i] / op.weights.sum() else: - val = 1. / len(op.estimators_) - - weights_name = scope.get_unique_variable_name('w%d' % i) - container.add_initializer( - weights_name, proto_dtype, [1], [val]) - wprob_name = scope.get_unique_variable_name('wprob_name') - apply_mul(scope, [prob_name, weights_name], - wprob_name, container, broadcast=1) + val = 1.0 / len(op.estimators_) + + weights_name = scope.get_unique_variable_name("w%d" % i) + container.add_initializer(weights_name, proto_dtype, [1], [val]) + wprob_name = scope.get_unique_variable_name("wprob_name") + apply_mul(scope, [prob_name, weights_name], wprob_name, container, broadcast=1) probs_names.append(wprob_name) if op.flatten_transform in (False, None): - container.add_node('Sum', probs_names, - operator.outputs[1].full_name, - name=scope.get_unique_operator_name('Sum')) + container.add_node( + "Sum", + probs_names, + operator.outputs[1].full_name, + name=scope.get_unique_operator_name("Sum"), + ) else: raise NotImplementedError( "flatten_transform==True is not implemented yet. " "You may raise an issue at " - "https://github.com/onnx/sklearn-onnx/issues.") + "https://github.com/onnx/sklearn-onnx/issues." + ) # labels - label_name = scope.get_unique_variable_name('label_name') - container.add_node('ArgMax', operator.outputs[1].full_name, label_name, - name=scope.get_unique_operator_name('ArgMax'), axis=1) - _finalize_converter_classes(scope, label_name, - operator.outputs[0].full_name, container, - op.classes_, proto_dtype) - - -register_converter('SklearnVotingClassifier', - convert_voting_classifier, - options={'zipmap': [True, False, 'columns'], - 'output_class_labels': [False, True], - 'nocl': [True, False]}) + label_name = scope.get_unique_variable_name("label_name") + container.add_node( + "ArgMax", + operator.outputs[1].full_name, + label_name, + name=scope.get_unique_operator_name("ArgMax"), + axis=1, + ) + _finalize_converter_classes( + scope, + label_name, + operator.outputs[0].full_name, + container, + op.classes_, + proto_dtype, + ) + + +register_converter( + "SklearnVotingClassifier", + convert_voting_classifier, + options={ + "zipmap": [True, False, "columns"], + "output_class_labels": [False, True], + "nocl": [True, False], + }, +) diff --git a/skl2onnx/operator_converters/voting_regressor.py b/skl2onnx/operator_converters/voting_regressor.py index 73da321ac..c7ce01690 100644 --- a/skl2onnx/operator_converters/voting_regressor.py +++ b/skl2onnx/operator_converters/voting_regressor.py @@ -5,23 +5,22 @@ from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer from ..common._apply_operation import apply_mul -from ..common.data_types import ( - guess_proto_type, FloatTensorType, DoubleTensorType) +from ..common.data_types import guess_proto_type, FloatTensorType, DoubleTensorType from .._supported_operators import sklearn_operator_name_map -def convert_voting_regressor(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_voting_regressor( + scope: Scope, operator: Operator, container: ModelComponentContainer +): """ Converts a *VotingRegressor* into *ONNX* format. """ op = operator.raw_operator - if not isinstance(operator.inputs[0].type, - (FloatTensorType, DoubleTensorType)): - this_operator = scope.declare_local_operator('SklearnCast') + if not isinstance(operator.inputs[0].type, (FloatTensorType, DoubleTensorType)): + this_operator = scope.declare_local_operator("SklearnCast") this_operator.inputs = operator.inputs - var_name = scope.declare_local_variable('cast', FloatTensorType()) + var_name = scope.declare_local_variable("cast", FloatTensorType()) this_operator.outputs.append(var_name) inputs = this_operator.outputs else: @@ -38,30 +37,32 @@ def convert_voting_regressor(scope: Scope, operator: Operator, this_operator.inputs = inputs var_name = scope.declare_local_variable( - 'var_%d' % i, inputs[0].type.__class__()) + "var_%d" % i, inputs[0].type.__class__() + ) this_operator.outputs.append(var_name) var_name = var_name.onnx_name if op.weights is not None: val = op.weights[i] / op.weights.sum() else: - val = 1. / len(op.estimators_) + val = 1.0 / len(op.estimators_) - weights_name = scope.get_unique_variable_name('w%d' % i) + weights_name = scope.get_unique_variable_name("w%d" % i) proto_dtype = guess_proto_type(inputs[0].type) - container.add_initializer( - weights_name, proto_dtype, [1], [val]) - wvar_name = scope.get_unique_variable_name('wvar_%d' % i) - apply_mul(scope, [var_name, weights_name], - wvar_name, container, broadcast=1) + container.add_initializer(weights_name, proto_dtype, [1], [val]) + wvar_name = scope.get_unique_variable_name("wvar_%d" % i) + apply_mul(scope, [var_name, weights_name], wvar_name, container, broadcast=1) - flat_name = scope.get_unique_variable_name('fvar_%d' % i) - container.add_node('Flatten', wvar_name, flat_name) + flat_name = scope.get_unique_variable_name("fvar_%d" % i) + container.add_node("Flatten", wvar_name, flat_name) vars_names.append(flat_name) - container.add_node('Sum', vars_names, - operator.outputs[0].full_name, - name=scope.get_unique_operator_name('Sum')) + container.add_node( + "Sum", + vars_names, + operator.outputs[0].full_name, + name=scope.get_unique_operator_name("Sum"), + ) -register_converter('SklearnVotingRegressor', convert_voting_regressor) +register_converter("SklearnVotingRegressor", convert_voting_regressor) diff --git a/skl2onnx/operator_converters/zip_map.py b/skl2onnx/operator_converters/zip_map.py index 85e6fb0c4..914ea0529 100644 --- a/skl2onnx/operator_converters/zip_map.py +++ b/skl2onnx/operator_converters/zip_map.py @@ -2,69 +2,105 @@ from ..proto import onnx_proto from ..common._apply_operation import ( - apply_slice, apply_cast, apply_identity, apply_reshape) + apply_slice, + apply_cast, + apply_identity, + apply_reshape, +) from ..common._registration import register_converter from ..common._topology import Scope, Operator from ..common._container import ModelComponentContainer -def _common_convert_sklearn_zipmap(scope: Scope, operator: Operator, - container: ModelComponentContainer): - zipmap_attrs = {'name': scope.get_unique_operator_name('ZipMap')} +def _common_convert_sklearn_zipmap( + scope: Scope, operator: Operator, container: ModelComponentContainer +): + zipmap_attrs = {"name": scope.get_unique_operator_name("ZipMap")} to_type = onnx_proto.TensorProto.INT64 - if hasattr(operator, 'classlabels_int64s'): - zipmap_attrs['classlabels_int64s'] = operator.classlabels_int64s - elif hasattr(operator, 'classlabels_strings'): - zipmap_attrs['classlabels_strings'] = operator.classlabels_strings + if hasattr(operator, "classlabels_int64s"): + zipmap_attrs["classlabels_int64s"] = operator.classlabels_int64s + elif hasattr(operator, "classlabels_strings"): + zipmap_attrs["classlabels_strings"] = operator.classlabels_strings to_type = onnx_proto.TensorProto.STRING if to_type == onnx_proto.TensorProto.STRING: - apply_identity(scope, operator.inputs[0].full_name, - operator.outputs[0].full_name, container) + apply_identity( + scope, + operator.inputs[0].full_name, + operator.outputs[0].full_name, + container, + ) else: - apply_cast(scope, operator.inputs[0].full_name, - operator.outputs[0].full_name, container, to=to_type) + apply_cast( + scope, + operator.inputs[0].full_name, + operator.outputs[0].full_name, + container, + to=to_type, + ) return zipmap_attrs -def convert_sklearn_zipmap(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_zipmap( + scope: Scope, operator: Operator, container: ModelComponentContainer +): if len(operator.inputs) == 2: - zipmap_attrs = _common_convert_sklearn_zipmap( - scope, operator, container) - container.add_node('ZipMap', operator.inputs[1].full_name, - operator.outputs[1].full_name, - op_domain='ai.onnx.ml', **zipmap_attrs) + zipmap_attrs = _common_convert_sklearn_zipmap(scope, operator, container) + container.add_node( + "ZipMap", + operator.inputs[1].full_name, + operator.outputs[1].full_name, + op_domain="ai.onnx.ml", + **zipmap_attrs + ) return - if hasattr(operator, 'classlabels_int64s'): + if hasattr(operator, "classlabels_int64s"): zipmap_attrs = dict(classlabels_int64s=operator.classlabels_int64s) - elif hasattr(operator, 'classlabels_strings'): + elif hasattr(operator, "classlabels_strings"): zipmap_attrs = dict(classlabels_strings=operator.classlabels_strings) else: raise RuntimeError( "operator should have attribute 'classlabels_int64s' or " - "'classlabels_strings'.") - container.add_node('ZipMap', operator.inputs[0].full_name, - operator.outputs[0].full_name, - op_domain='ai.onnx.ml', **zipmap_attrs) + "'classlabels_strings'." + ) + container.add_node( + "ZipMap", + operator.inputs[0].full_name, + operator.outputs[0].full_name, + op_domain="ai.onnx.ml", + **zipmap_attrs + ) -def convert_sklearn_zipmap_columns(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def convert_sklearn_zipmap_columns( + scope: Scope, operator: Operator, container: ModelComponentContainer +): _common_convert_sklearn_zipmap(scope, operator, container) probs = operator.inputs[1].full_name for i in range(1, len(operator.outputs)): out = operator.outputs[i].full_name flat = scope.get_unique_variable_name(out) apply_slice( - scope, probs, flat, container, starts=[i - 1], ends=[i], axes=[1], - operator_name=scope.get_unique_operator_name('Slice')) + scope, + probs, + flat, + container, + starts=[i - 1], + ends=[i], + axes=[1], + operator_name=scope.get_unique_operator_name("Slice"), + ) apply_reshape( - scope, flat, out, container, desired_shape=(-1, ), - operator_name=scope.get_unique_operator_name('reshape')) + scope, + flat, + out, + container, + desired_shape=(-1,), + operator_name=scope.get_unique_operator_name("reshape"), + ) -register_converter('SklearnZipMap', convert_sklearn_zipmap) -register_converter('SklearnZipMapColumns', convert_sklearn_zipmap_columns) +register_converter("SklearnZipMap", convert_sklearn_zipmap) +register_converter("SklearnZipMapColumns", convert_sklearn_zipmap_columns) diff --git a/skl2onnx/proto/__init__.py b/skl2onnx/proto/__init__.py index bd23169ca..443dfa451 100644 --- a/skl2onnx/proto/__init__.py +++ b/skl2onnx/proto/__init__.py @@ -12,6 +12,7 @@ # (string tensor get assigned twice) from onnx import mapping from onnx.onnx_pb import TensorProto, ValueInfoProto # noqa + try: from onnx.onnx_pb import SparseTensorProto # noqa except ImportError: @@ -21,25 +22,25 @@ def make_tensor_fixed(name, data_type, dims, vals, raw=False): - ''' + """ Make a TensorProto with specified arguments. If raw is False, this function will choose the corresponding proto field to store the values based on data_type. If raw is True, use "raw_data" proto field to store the values, and values should be of type bytes in this case. - ''' + """ tensor = TensorProto() tensor.data_type = data_type tensor.name = name - if (data_type == TensorProto.COMPLEX64 or - data_type == TensorProto.COMPLEX128): + if data_type == TensorProto.COMPLEX64 or data_type == TensorProto.COMPLEX128: vals = split_complex_to_pairs(vals) if raw: tensor.raw_data = vals else: field = mapping.STORAGE_TENSOR_TYPE_TO_FIELD[ - mapping.TENSOR_TYPE_TO_STORAGE_TENSOR_TYPE[data_type]] + mapping.TENSOR_TYPE_TO_STORAGE_TENSOR_TYPE[data_type] + ] getattr(tensor, field).extend(vals) tensor.dims.extend(dims) @@ -63,4 +64,5 @@ def get_latest_tested_opset_version(): (return by `onnx.defs.onnx_opset_version()`). """ from .. import __max_supported_opset__ + return min(__max_supported_opset__, get_opset_number_from_onnx()) diff --git a/skl2onnx/shape_calculators/array_feature_extractor.py b/skl2onnx/shape_calculators/array_feature_extractor.py index 1c578911a..e1ac00b09 100644 --- a/skl2onnx/shape_calculators/array_feature_extractor.py +++ b/skl2onnx/shape_calculators/array_feature_extractor.py @@ -13,5 +13,6 @@ def calculate_sklearn_array_feature_extractor(operator): operator.outputs[0].type = i.type.__class__([N, C]) -register_shape_calculator('SklearnArrayFeatureExtractor', - calculate_sklearn_array_feature_extractor) +register_shape_calculator( + "SklearnArrayFeatureExtractor", calculate_sklearn_array_feature_extractor +) diff --git a/skl2onnx/shape_calculators/cast_op.py b/skl2onnx/shape_calculators/cast_op.py index 6667e0d50..20927f45e 100644 --- a/skl2onnx/shape_calculators/cast_op.py +++ b/skl2onnx/shape_calculators/cast_op.py @@ -8,21 +8,18 @@ def calculate_sklearn_cast(operator): - check_input_and_output_numbers( - operator, input_count_range=1, output_count_range=1) + check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) def calculate_sklearn_cast_transformer(operator): - check_input_and_output_numbers( - operator, input_count_range=1, output_count_range=1) + check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) op = operator.raw_operator otype = _guess_numpy_type(op.dtype, operator.inputs[0].type.shape) operator.outputs[0].type = otype +register_shape_calculator("SklearnCast", calculate_sklearn_cast) +register_shape_calculator("SklearnCastTransformer", calculate_sklearn_cast_transformer) register_shape_calculator( - 'SklearnCast', calculate_sklearn_cast) -register_shape_calculator( - 'SklearnCastTransformer', calculate_sklearn_cast_transformer) -register_shape_calculator('SklearnCastRegressor', - calculate_linear_regressor_output_shapes) + "SklearnCastRegressor", calculate_linear_regressor_output_shapes +) diff --git a/skl2onnx/shape_calculators/class_labels.py b/skl2onnx/shape_calculators/class_labels.py index b8944be83..3df221a40 100644 --- a/skl2onnx/shape_calculators/class_labels.py +++ b/skl2onnx/shape_calculators/class_labels.py @@ -9,5 +9,4 @@ def calculate_sklearn_class_labels(operator): check_input_and_output_numbers(operator, output_count_range=1) -register_shape_calculator( - 'SklearnClassLabels', calculate_sklearn_class_labels) +register_shape_calculator("SklearnClassLabels", calculate_sklearn_class_labels) diff --git a/skl2onnx/shape_calculators/concat.py b/skl2onnx/shape_calculators/concat.py index 78bd062b0..9f242b283 100644 --- a/skl2onnx/shape_calculators/concat.py +++ b/skl2onnx/shape_calculators/concat.py @@ -3,10 +3,17 @@ from ..common._registration import register_shape_calculator from ..common.data_types import ( - FloatType, Int64Type, StringType, TensorType, - DoubleType, BooleanTensorType, FloatTensorType, - Int64TensorType, StringTensorType, - DoubleTensorType) + FloatType, + Int64Type, + StringType, + TensorType, + DoubleType, + BooleanTensorType, + FloatTensorType, + Int64TensorType, + StringTensorType, + DoubleTensorType, +) from ..common.utils import check_input_and_output_numbers @@ -27,8 +34,7 @@ def calculate_sklearn_concat(operator): C = None else: C += i.type.shape[1] - elif isinstance(i.type, ( - Int64Type, FloatType, StringType, DoubleType)): + elif isinstance(i.type, (Int64Type, FloatType, StringType, DoubleType)): C += 1 else: C = None @@ -41,19 +47,28 @@ def more_generic(t1, t2): raise RuntimeError( "Cannot merge columns with types {} and {}." "Inputs:\n{}\nOutputs:\n{}".format( - t1, t2, operator.inputs, operator.outputs)) - for ts in [StringTensorType, DoubleTensorType, FloatTensorType, - Int64TensorType, BooleanTensorType]: + t1, t2, operator.inputs, operator.outputs + ) + ) + for ts in [ + StringTensorType, + DoubleTensorType, + FloatTensorType, + Int64TensorType, + BooleanTensorType, + ]: if isinstance(t1, ts) or isinstance(t2, ts): return ts raise RuntimeError( "Cannot merge columns with types {} and {}." "Inputs:\n{}\nOutputs:\n{}".format( - t1, t2, operator.inputs, operator.outputs)) + t1, t2, operator.inputs, operator.outputs + ) + ) raise NotImplementedError( "Columns must be tensors." - "Inputs:\n{}\nOutputs:\n{}".format( - operator.inputs, operator.outputs)) + "Inputs:\n{}\nOutputs:\n{}".format(operator.inputs, operator.outputs) + ) # Let's determine the resulting type final_type = None @@ -69,24 +84,23 @@ def more_generic(t1, t2): raise NotImplementedError( "Columns must be tensors.\n" "- Inputs: {}\n- Outputs: {}\n- types: {}" - "".format( - operator.inputs, operator.outputs, seen_types)) + "".format(operator.inputs, operator.outputs, seen_types) + ) if final_type != operator.outputs[0].type: operator.outputs[0].type = type(final_type)([N, C]) else: operator.outputs[0].type.shape = [N, C] -register_shape_calculator('SklearnConcat', calculate_sklearn_concat) -register_shape_calculator('SklearnGenericUnivariateSelect', - calculate_sklearn_concat) -register_shape_calculator('SklearnMultiply', calculate_sklearn_concat) -register_shape_calculator('SklearnRFE', calculate_sklearn_concat) -register_shape_calculator('SklearnRFECV', calculate_sklearn_concat) -register_shape_calculator('SklearnSelectFdr', calculate_sklearn_concat) -register_shape_calculator('SklearnSelectFpr', calculate_sklearn_concat) -register_shape_calculator('SklearnSelectFromModel', calculate_sklearn_concat) -register_shape_calculator('SklearnSelectFwe', calculate_sklearn_concat) -register_shape_calculator('SklearnSelectKBest', calculate_sklearn_concat) -register_shape_calculator('SklearnSelectPercentile', calculate_sklearn_concat) -register_shape_calculator('SklearnVarianceThreshold', calculate_sklearn_concat) +register_shape_calculator("SklearnConcat", calculate_sklearn_concat) +register_shape_calculator("SklearnGenericUnivariateSelect", calculate_sklearn_concat) +register_shape_calculator("SklearnMultiply", calculate_sklearn_concat) +register_shape_calculator("SklearnRFE", calculate_sklearn_concat) +register_shape_calculator("SklearnRFECV", calculate_sklearn_concat) +register_shape_calculator("SklearnSelectFdr", calculate_sklearn_concat) +register_shape_calculator("SklearnSelectFpr", calculate_sklearn_concat) +register_shape_calculator("SklearnSelectFromModel", calculate_sklearn_concat) +register_shape_calculator("SklearnSelectFwe", calculate_sklearn_concat) +register_shape_calculator("SklearnSelectKBest", calculate_sklearn_concat) +register_shape_calculator("SklearnSelectPercentile", calculate_sklearn_concat) +register_shape_calculator("SklearnVarianceThreshold", calculate_sklearn_concat) diff --git a/skl2onnx/shape_calculators/cross_decomposition.py b/skl2onnx/shape_calculators/cross_decomposition.py index ec7a3faa8..ddeb9e152 100644 --- a/skl2onnx/shape_calculators/cross_decomposition.py +++ b/skl2onnx/shape_calculators/cross_decomposition.py @@ -2,20 +2,18 @@ from ..common._registration import register_shape_calculator -from ..common.data_types import ( - FloatTensorType, Int64TensorType, DoubleTensorType) -from ..common.utils import ( - check_input_and_output_numbers, check_input_and_output_types) +from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType +from ..common.utils import check_input_and_output_numbers, check_input_and_output_types def calculate_pls_regression_output_shapes(operator): check_input_and_output_numbers(operator, input_count_range=1) check_input_and_output_types( - operator, good_input_types=[ - FloatTensorType, Int64TensorType, DoubleTensorType]) + operator, good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType] + ) if len(operator.inputs[0].type.shape) != 2: - raise RuntimeError('Input must be a [N, C]-tensor') + raise RuntimeError("Input must be a [N, C]-tensor") op = operator.raw_operator cls_type = operator.inputs[0].type.__class__ @@ -25,5 +23,6 @@ def calculate_pls_regression_output_shapes(operator): operator.outputs[0].type = cls_type([N, op.coef_.shape[1]]) -register_shape_calculator('SklearnPLSRegression', - calculate_pls_regression_output_shapes) +register_shape_calculator( + "SklearnPLSRegression", calculate_pls_regression_output_shapes +) diff --git a/skl2onnx/shape_calculators/dict_vectorizer.py b/skl2onnx/shape_calculators/dict_vectorizer.py index 59dce8105..d5b0f232f 100644 --- a/skl2onnx/shape_calculators/dict_vectorizer.py +++ b/skl2onnx/shape_calculators/dict_vectorizer.py @@ -6,17 +6,17 @@ def calculate_sklearn_dict_vectorizer_output_shapes(operator): - ''' + """ Allowed input/output patterns are 1. Map ---> [1, C] C is the total number of allowed keys in the input dictionary. - ''' - check_input_and_output_numbers(operator, input_count_range=1, - output_count_range=1) + """ + check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) C = len(operator.raw_operator.feature_names_) operator.outputs[0].type.shape = [None, C] -register_shape_calculator('SklearnDictVectorizer', - calculate_sklearn_dict_vectorizer_output_shapes) +register_shape_calculator( + "SklearnDictVectorizer", calculate_sklearn_dict_vectorizer_output_shapes +) diff --git a/skl2onnx/shape_calculators/ensemble_shapes.py b/skl2onnx/shape_calculators/ensemble_shapes.py index de7fbbd37..c9d955a88 100644 --- a/skl2onnx/shape_calculators/ensemble_shapes.py +++ b/skl2onnx/shape_calculators/ensemble_shapes.py @@ -2,12 +2,12 @@ from ..common._registration import register_shape_calculator -from ..common.utils import ( - check_input_and_output_numbers, check_input_and_output_types) +from ..common.utils import check_input_and_output_numbers, check_input_and_output_types from ..common.shape_calculator import ( calculate_linear_regressor_output_shapes, calculate_linear_classifier_output_shapes, - _calculate_linear_classifier_output_shapes) + _calculate_linear_classifier_output_shapes, +) from ..common.data_types import ( BooleanTensorType, DoubleTensorType, @@ -25,24 +25,29 @@ def calculate_tree_regressor_output_shapes(operator): batch. If the input batch size is N, the output shape may be [N, 1]. """ - check_input_and_output_numbers(operator, input_count_range=1, - output_count_range=[1, 3]) - check_input_and_output_types(operator, good_input_types=[ - BooleanTensorType, DoubleTensorType, - FloatTensorType, Int64TensorType]) + check_input_and_output_numbers( + operator, input_count_range=1, output_count_range=[1, 3] + ) + check_input_and_output_types( + operator, + good_input_types=[ + BooleanTensorType, + DoubleTensorType, + FloatTensorType, + Int64TensorType, + ], + ) N = operator.inputs[0].get_first_dimension() if operator.outputs[0].type is None: - raise RuntimeError( - "Output type is unknown for operator %r." % operator) + raise RuntimeError("Output type is unknown for operator %r." % operator) operator.outputs[0].type.shape = [N, 1] # decision_path, decision_leaf for n in range(2, len(operator.outputs)): - if hasattr(operator.raw_operator, 'estimators_'): + if hasattr(operator.raw_operator, "estimators_"): # random forest - operator.outputs[n].type.shape = [ - N, len(operator.raw_operator.estimators_)] + operator.outputs[n].type.shape = [N, len(operator.raw_operator.estimators_)] else: # single tree operator.outputs[n].type.shape = [N, 1] @@ -55,39 +60,49 @@ def calculate_tree_classifier_output_shapes(operator): # decision_path, decision_leaf for n in range(2, len(operator.outputs)): if operator.outputs[n].type is None: - raise RuntimeError( - "Output type is unknown for operator %r." % operator) - if hasattr(operator.raw_operator, 'estimators_'): + raise RuntimeError("Output type is unknown for operator %r." % operator) + if hasattr(operator.raw_operator, "estimators_"): # random forest - operator.outputs[n].type.shape = [ - N, len(operator.raw_operator.estimators_)] + operator.outputs[n].type.shape = [N, len(operator.raw_operator.estimators_)] else: # single tree operator.outputs[n].type.shape = [N, 1] -register_shape_calculator('SklearnDecisionTreeRegressor', - calculate_tree_regressor_output_shapes) -register_shape_calculator('SklearnExtraTreeRegressor', - calculate_tree_regressor_output_shapes) -register_shape_calculator('SklearnExtraTreesRegressor', - calculate_tree_regressor_output_shapes) -register_shape_calculator('SklearnGradientBoostingRegressor', - calculate_linear_regressor_output_shapes) -register_shape_calculator('SklearnHistGradientBoostingRegressor', - calculate_linear_regressor_output_shapes) -register_shape_calculator('SklearnRandomForestRegressor', - calculate_tree_regressor_output_shapes) +register_shape_calculator( + "SklearnDecisionTreeRegressor", calculate_tree_regressor_output_shapes +) +register_shape_calculator( + "SklearnExtraTreeRegressor", calculate_tree_regressor_output_shapes +) +register_shape_calculator( + "SklearnExtraTreesRegressor", calculate_tree_regressor_output_shapes +) +register_shape_calculator( + "SklearnGradientBoostingRegressor", calculate_linear_regressor_output_shapes +) +register_shape_calculator( + "SklearnHistGradientBoostingRegressor", calculate_linear_regressor_output_shapes +) +register_shape_calculator( + "SklearnRandomForestRegressor", calculate_tree_regressor_output_shapes +) -register_shape_calculator('SklearnDecisionTreeClassifier', - calculate_tree_classifier_output_shapes) -register_shape_calculator('SklearnExtraTreeClassifier', - calculate_tree_classifier_output_shapes) -register_shape_calculator('SklearnExtraTreesClassifier', - calculate_tree_classifier_output_shapes) -register_shape_calculator('SklearnGradientBoostingClassifier', - calculate_linear_classifier_output_shapes) -register_shape_calculator('SklearnHistGradientBoostingClassifier', - calculate_linear_classifier_output_shapes) -register_shape_calculator('SklearnRandomForestClassifier', - calculate_tree_classifier_output_shapes) +register_shape_calculator( + "SklearnDecisionTreeClassifier", calculate_tree_classifier_output_shapes +) +register_shape_calculator( + "SklearnExtraTreeClassifier", calculate_tree_classifier_output_shapes +) +register_shape_calculator( + "SklearnExtraTreesClassifier", calculate_tree_classifier_output_shapes +) +register_shape_calculator( + "SklearnGradientBoostingClassifier", calculate_linear_classifier_output_shapes +) +register_shape_calculator( + "SklearnHistGradientBoostingClassifier", calculate_linear_classifier_output_shapes +) +register_shape_calculator( + "SklearnRandomForestClassifier", calculate_tree_classifier_output_shapes +) diff --git a/skl2onnx/shape_calculators/feature_hasher.py b/skl2onnx/shape_calculators/feature_hasher.py index 1c8409d41..9ba186e48 100644 --- a/skl2onnx/shape_calculators/feature_hasher.py +++ b/skl2onnx/shape_calculators/feature_hasher.py @@ -2,8 +2,11 @@ import numpy as np from ..common.data_types import ( - StringTensorType, Int64TensorType, FloatTensorType, - DoubleTensorType) + StringTensorType, + Int64TensorType, + FloatTensorType, + DoubleTensorType, +) from ..common._registration import register_shape_calculator from ..common.utils import check_input_and_output_numbers from ..common.utils import check_input_and_output_types @@ -12,7 +15,8 @@ def calculate_sklearn_feature_hasher(operator): check_input_and_output_numbers(operator, output_count_range=1) check_input_and_output_types( - operator, good_input_types=[StringTensorType, Int64TensorType]) + operator, good_input_types=[StringTensorType, Int64TensorType] + ) N = operator.inputs[0].get_first_dimension() model = operator.raw_operator @@ -25,9 +29,8 @@ def calculate_sklearn_feature_hasher(operator): operator.outputs[0].type = Int64TensorType(shape=shape) else: raise RuntimeError( - f"Converter is not implemented for " - f"FeatureHasher.dtype={model.dtype}.") + f"Converter is not implemented for " f"FeatureHasher.dtype={model.dtype}." + ) -register_shape_calculator('SklearnFeatureHasher', - calculate_sklearn_feature_hasher) +register_shape_calculator("SklearnFeatureHasher", calculate_sklearn_feature_hasher) diff --git a/skl2onnx/shape_calculators/flatten.py b/skl2onnx/shape_calculators/flatten.py index 27ffcd625..910d69c78 100644 --- a/skl2onnx/shape_calculators/flatten.py +++ b/skl2onnx/shape_calculators/flatten.py @@ -7,8 +7,7 @@ def calculate_sklearn_flatten(operator): - check_input_and_output_numbers(operator, output_count_range=1, - input_count_range=1) + check_input_and_output_numbers(operator, output_count_range=1, input_count_range=1) i = operator.inputs[0] N = i.get_first_dimension() if isinstance(i.type, TensorType): @@ -26,4 +25,4 @@ def calculate_sklearn_flatten(operator): operator.outputs[0].type.shape = [N * C] -register_shape_calculator('SklearnFlatten', calculate_sklearn_flatten) +register_shape_calculator("SklearnFlatten", calculate_sklearn_flatten) diff --git a/skl2onnx/shape_calculators/function_transformer.py b/skl2onnx/shape_calculators/function_transformer.py index c4ab2a132..2d3628be8 100644 --- a/skl2onnx/shape_calculators/function_transformer.py +++ b/skl2onnx/shape_calculators/function_transformer.py @@ -11,10 +11,12 @@ def calculate_sklearn_function_transformer_output_shapes(operator): Only identity function is supported. """ if operator.raw_operator.func is not None: - raise RuntimeError("FunctionTransformer is not supported unless the " - "transform function is None (= identity). " - "You may raise an issue at " - "https://github.com/onnx/sklearn-onnx/issues.") + raise RuntimeError( + "FunctionTransformer is not supported unless the " + "transform function is None (= identity). " + "You may raise an issue at " + "https://github.com/onnx/sklearn-onnx/issues." + ) N = operator.inputs[0].get_first_dimension() C = 0 for variable in operator.inputs: @@ -28,5 +30,6 @@ def calculate_sklearn_function_transformer_output_shapes(operator): operator.outputs[0].type.shape = [N, C] -register_shape_calculator('SklearnFunctionTransformer', - calculate_sklearn_function_transformer_output_shapes) +register_shape_calculator( + "SklearnFunctionTransformer", calculate_sklearn_function_transformer_output_shapes +) diff --git a/skl2onnx/shape_calculators/gaussian_process.py b/skl2onnx/shape_calculators/gaussian_process.py index d38653381..3a4083d54 100644 --- a/skl2onnx/shape_calculators/gaussian_process.py +++ b/skl2onnx/shape_calculators/gaussian_process.py @@ -9,14 +9,16 @@ def calculate_sklearn_gaussian_process_regressor_shape(operator): check_input_and_output_types( - operator, good_input_types=[FloatTensorType, DoubleTensorType], - good_output_types=[FloatTensorType, DoubleTensorType]) + operator, + good_input_types=[FloatTensorType, DoubleTensorType], + good_output_types=[FloatTensorType, DoubleTensorType], + ) if len(operator.inputs) != 1: - raise RuntimeError("Only one input vector is allowed for " - "GaussianProcessRegressor.") + raise RuntimeError( + "Only one input vector is allowed for " "GaussianProcessRegressor." + ) if len(operator.outputs) not in (1, 2): - raise RuntimeError("One output is expected for " - "GaussianProcessRegressor.") + raise RuntimeError("One output is expected for " "GaussianProcessRegressor.") variable = operator.inputs[0] @@ -25,14 +27,17 @@ def calculate_sklearn_gaussian_process_regressor_shape(operator): # Output 1 is mean # Output 2 is cov or std - if hasattr(op, 'y_train_') and op.y_train_ is not None: + if hasattr(op, "y_train_") and op.y_train_ is not None: dim = 1 if len(op.y_train_.shape) == 1 else op.y_train_.shape[1] else: dim = 1 operator.outputs[0].type.shape = [N, dim] -register_shape_calculator('SklearnGaussianProcessRegressor', - calculate_sklearn_gaussian_process_regressor_shape) -register_shape_calculator('SklearnGaussianProcessClassifier', - calculate_linear_classifier_output_shapes) +register_shape_calculator( + "SklearnGaussianProcessRegressor", + calculate_sklearn_gaussian_process_regressor_shape, +) +register_shape_calculator( + "SklearnGaussianProcessClassifier", calculate_linear_classifier_output_shapes +) diff --git a/skl2onnx/shape_calculators/grid_search_cv.py b/skl2onnx/shape_calculators/grid_search_cv.py index 011e866c5..bde460fce 100644 --- a/skl2onnx/shape_calculators/grid_search_cv.py +++ b/skl2onnx/shape_calculators/grid_search_cv.py @@ -1,9 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 import logging -from ..common._registration import ( - register_shape_calculator, - get_shape_calculator) +from ..common._registration import register_shape_calculator, get_shape_calculator from .._supported_operators import sklearn_operator_name_map @@ -12,9 +10,12 @@ def convert_sklearn_grid_search_cv(operator): best_estimator = grid_search_op.best_estimator_ name = sklearn_operator_name_map.get(type(best_estimator), None) if name is None: - logger = logging.getLogger('skl2onnx') - logger.warn("[convert_sklearn_grid_search_cv] failed to find alias " - "to model type %r.", type(best_estimator)) + logger = logging.getLogger("skl2onnx") + logger.warn( + "[convert_sklearn_grid_search_cv] failed to find alias " + "to model type %r.", + type(best_estimator), + ) return op = operator.new_raw_operator(best_estimator, name) shape_calc = get_shape_calculator(name) @@ -22,5 +23,4 @@ def convert_sklearn_grid_search_cv(operator): operator.outputs = op.outputs -register_shape_calculator('SklearnGridSearchCV', - convert_sklearn_grid_search_cv) +register_shape_calculator("SklearnGridSearchCV", convert_sklearn_grid_search_cv) diff --git a/skl2onnx/shape_calculators/identity.py b/skl2onnx/shape_calculators/identity.py index 5bba5ec77..aadd0160b 100644 --- a/skl2onnx/shape_calculators/identity.py +++ b/skl2onnx/shape_calculators/identity.py @@ -6,9 +6,8 @@ def calculate_sklearn_identity(operator): - check_input_and_output_numbers(operator, input_count_range=1, - output_count_range=1) + check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) operator.outputs[0].type = operator.inputs[0].type -register_shape_calculator('SklearnIdentity', calculate_sklearn_identity) +register_shape_calculator("SklearnIdentity", calculate_sklearn_identity) diff --git a/skl2onnx/shape_calculators/imputer.py b/skl2onnx/shape_calculators/imputer.py index 22effc1d7..441611ca9 100644 --- a/skl2onnx/shape_calculators/imputer.py +++ b/skl2onnx/shape_calculators/imputer.py @@ -3,7 +3,11 @@ from ..common._registration import register_shape_calculator from ..common.data_types import ( - FloatTensorType, Int64TensorType, DoubleTensorType, StringTensorType) + FloatTensorType, + Int64TensorType, + DoubleTensorType, + StringTensorType, +) from ..common.utils import check_input_and_output_numbers from ..common.utils import check_input_and_output_types @@ -17,18 +21,22 @@ def calculate_sklearn_imputer_output_shapes(operator): them along C-axis. The produced tensor's shape is used as the output shape. """ - check_input_and_output_numbers(operator, input_count_range=1, - output_count_range=1) + check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) check_input_and_output_types( - operator, good_input_types=[ - FloatTensorType, Int64TensorType, DoubleTensorType, - StringTensorType]) + operator, + good_input_types=[ + FloatTensorType, + Int64TensorType, + DoubleTensorType, + StringTensorType, + ], + ) if not isinstance(operator.inputs[0].type, type(operator.outputs[0].type)): # noqa raise RuntimeError( "Inputs and outputs should have the same type " - "%r != %r." % ( - type(operator.inputs[0].type), - type(operator.outputs[0].type))) + "%r != %r." + % (type(operator.inputs[0].type), type(operator.outputs[0].type)) + ) N = operator.inputs[0].get_first_dimension() C = 0 @@ -42,9 +50,8 @@ def calculate_sklearn_imputer_output_shapes(operator): operator.outputs[0].type.shape = [N, C] -register_shape_calculator('SklearnImputer', - calculate_sklearn_imputer_output_shapes) -register_shape_calculator('SklearnSimpleImputer', - calculate_sklearn_imputer_output_shapes) -register_shape_calculator('SklearnBinarizer', - calculate_sklearn_imputer_output_shapes) +register_shape_calculator("SklearnImputer", calculate_sklearn_imputer_output_shapes) +register_shape_calculator( + "SklearnSimpleImputer", calculate_sklearn_imputer_output_shapes +) +register_shape_calculator("SklearnBinarizer", calculate_sklearn_imputer_output_shapes) diff --git a/skl2onnx/shape_calculators/isolation_forest.py b/skl2onnx/shape_calculators/isolation_forest.py index 5371a45bd..476101163 100644 --- a/skl2onnx/shape_calculators/isolation_forest.py +++ b/skl2onnx/shape_calculators/isolation_forest.py @@ -11,4 +11,5 @@ def calculate_isolation_forest_output_shapes(operator): register_shape_calculator( - 'SklearnIsolationForest', calculate_isolation_forest_output_shapes) + "SklearnIsolationForest", calculate_isolation_forest_output_shapes +) diff --git a/skl2onnx/shape_calculators/k_bins_discretiser.py b/skl2onnx/shape_calculators/k_bins_discretiser.py index 6a928ec3c..7973e49f9 100644 --- a/skl2onnx/shape_calculators/k_bins_discretiser.py +++ b/skl2onnx/shape_calculators/k_bins_discretiser.py @@ -1,9 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 -from ..common.data_types import ( - FloatTensorType, Int64TensorType, DoubleTensorType -) +from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType from ..common._registration import register_shape_calculator from ..common.utils import check_input_and_output_numbers from ..common.utils import check_input_and_output_types @@ -12,14 +10,15 @@ def calculate_sklearn_k_bins_discretiser(operator): check_input_and_output_numbers(operator, output_count_range=1) check_input_and_output_types( - operator, good_input_types=[ - FloatTensorType, Int64TensorType, DoubleTensorType]) + operator, good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType] + ) M = operator.inputs[0].get_first_dimension() model = operator.raw_operator - N = len(model.n_bins_) if model.encode == 'ordinal' else sum(model.n_bins_) + N = len(model.n_bins_) if model.encode == "ordinal" else sum(model.n_bins_) operator.outputs[0].type.shape = [M, N] -register_shape_calculator('SklearnKBinsDiscretizer', - calculate_sklearn_k_bins_discretiser) +register_shape_calculator( + "SklearnKBinsDiscretizer", calculate_sklearn_k_bins_discretiser +) diff --git a/skl2onnx/shape_calculators/k_means.py b/skl2onnx/shape_calculators/k_means.py index 6c060ffd5..9428d633d 100644 --- a/skl2onnx/shape_calculators/k_means.py +++ b/skl2onnx/shape_calculators/k_means.py @@ -2,9 +2,7 @@ from ..common._registration import register_shape_calculator -from ..common.data_types import ( - FloatTensorType, Int64TensorType, DoubleTensorType -) +from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType from ..common.utils import check_input_and_output_types @@ -12,7 +10,8 @@ def calculate_sklearn_kmeans_output_shapes(operator): check_input_and_output_types( operator, good_input_types=[Int64TensorType, FloatTensorType, DoubleTensorType], - good_output_types=[Int64TensorType, FloatTensorType, DoubleTensorType]) + good_output_types=[Int64TensorType, FloatTensorType, DoubleTensorType], + ) if len(operator.inputs) != 1: raise RuntimeError("Only one input vector is allowed for KMeans.") if len(operator.outputs) != 2: @@ -25,7 +24,7 @@ def calculate_sklearn_kmeans_output_shapes(operator): operator.outputs[1].type.shape = [N, op.n_clusters] -register_shape_calculator('SklearnKMeans', - calculate_sklearn_kmeans_output_shapes) -register_shape_calculator('SklearnMiniBatchKMeans', - calculate_sklearn_kmeans_output_shapes) +register_shape_calculator("SklearnKMeans", calculate_sklearn_kmeans_output_shapes) +register_shape_calculator( + "SklearnMiniBatchKMeans", calculate_sklearn_kmeans_output_shapes +) diff --git a/skl2onnx/shape_calculators/kernel_pca.py b/skl2onnx/shape_calculators/kernel_pca.py index 568a23cf2..d93a32cb1 100644 --- a/skl2onnx/shape_calculators/kernel_pca.py +++ b/skl2onnx/shape_calculators/kernel_pca.py @@ -2,38 +2,39 @@ from ..common._registration import register_shape_calculator -from ..common.data_types import ( - FloatTensorType, DoubleTensorType) -from ..common.utils import ( - check_input_and_output_numbers, - check_input_and_output_types) +from ..common.data_types import FloatTensorType, DoubleTensorType +from ..common.utils import check_input_and_output_numbers, check_input_and_output_types def calculate_sklearn_kernel_pca_output_shapes(operator): - check_input_and_output_numbers( - operator, input_count_range=1, output_count_range=1) + check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) check_input_and_output_types( - operator, good_input_types=[FloatTensorType, DoubleTensorType], - good_output_types=[FloatTensorType, DoubleTensorType]) + operator, + good_input_types=[FloatTensorType, DoubleTensorType], + good_output_types=[FloatTensorType, DoubleTensorType], + ) N = operator.inputs[0].get_first_dimension() op = operator.raw_operator - lbd = op.eigenvalues_ if hasattr(op, 'eigenvalues_') else op.lambdas_ + lbd = op.eigenvalues_ if hasattr(op, "eigenvalues_") else op.lambdas_ C = lbd.shape[0] operator.outputs[0].type.shape = [N, C] def calculate_sklearn_kernel_centerer_output_shapes(operator): - check_input_and_output_numbers( - operator, input_count_range=1, output_count_range=1) + check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) check_input_and_output_types( - operator, good_input_types=[FloatTensorType, DoubleTensorType], - good_output_types=[FloatTensorType, DoubleTensorType]) + operator, + good_input_types=[FloatTensorType, DoubleTensorType], + good_output_types=[FloatTensorType, DoubleTensorType], + ) N = operator.inputs[0].get_first_dimension() C = operator.raw_operator.K_fit_rows_.shape[0] operator.outputs[0].type.shape = [N, C] -register_shape_calculator('SklearnKernelCenterer', - calculate_sklearn_kernel_centerer_output_shapes) -register_shape_calculator('SklearnKernelPCA', - calculate_sklearn_kernel_pca_output_shapes) +register_shape_calculator( + "SklearnKernelCenterer", calculate_sklearn_kernel_centerer_output_shapes +) +register_shape_calculator( + "SklearnKernelPCA", calculate_sklearn_kernel_pca_output_shapes +) diff --git a/skl2onnx/shape_calculators/label_binariser.py b/skl2onnx/shape_calculators/label_binariser.py index be360094f..1a210a4db 100644 --- a/skl2onnx/shape_calculators/label_binariser.py +++ b/skl2onnx/shape_calculators/label_binariser.py @@ -9,13 +9,14 @@ def calculate_sklearn_label_binariser_output_shapes(operator): check_input_and_output_numbers(operator, output_count_range=1) - check_input_and_output_types(operator, good_input_types=[ - Int64TensorType, StringTensorType]) + check_input_and_output_types( + operator, good_input_types=[Int64TensorType, StringTensorType] + ) N = operator.inputs[0].get_first_dimension() - operator.outputs[0].type = Int64TensorType( - [N, len(operator.raw_operator.classes_)]) + operator.outputs[0].type = Int64TensorType([N, len(operator.raw_operator.classes_)]) -register_shape_calculator('SklearnLabelBinarizer', - calculate_sklearn_label_binariser_output_shapes) +register_shape_calculator( + "SklearnLabelBinarizer", calculate_sklearn_label_binariser_output_shapes +) diff --git a/skl2onnx/shape_calculators/label_encoder.py b/skl2onnx/shape_calculators/label_encoder.py index 72601f8ff..6c7fe17d2 100644 --- a/skl2onnx/shape_calculators/label_encoder.py +++ b/skl2onnx/shape_calculators/label_encoder.py @@ -15,13 +15,14 @@ def calculate_sklearn_label_encoder_output_shapes(operator): encoder only alters input features' values, not their shape. """ check_input_and_output_numbers(operator, output_count_range=1) - check_input_and_output_types(operator, good_input_types=[ - FloatTensorType, Int64TensorType, - StringTensorType]) + check_input_and_output_types( + operator, good_input_types=[FloatTensorType, Int64TensorType, StringTensorType] + ) input_shape = copy.deepcopy(operator.inputs[0].type.shape) operator.outputs[0].type = Int64TensorType(copy.deepcopy(input_shape)) -register_shape_calculator('SklearnLabelEncoder', - calculate_sklearn_label_encoder_output_shapes) +register_shape_calculator( + "SklearnLabelEncoder", calculate_sklearn_label_encoder_output_shapes +) diff --git a/skl2onnx/shape_calculators/linear_classifier.py b/skl2onnx/shape_calculators/linear_classifier.py index 118836512..b4068bcf0 100644 --- a/skl2onnx/shape_calculators/linear_classifier.py +++ b/skl2onnx/shape_calculators/linear_classifier.py @@ -5,29 +5,40 @@ from ..common.shape_calculator import calculate_linear_classifier_output_shapes -register_shape_calculator('SklearnLinearClassifier', - calculate_linear_classifier_output_shapes) -register_shape_calculator('SklearnLinearSVC', - calculate_linear_classifier_output_shapes) -register_shape_calculator('SklearnAdaBoostClassifier', - calculate_linear_classifier_output_shapes) -register_shape_calculator('SklearnBaggingClassifier', - calculate_linear_classifier_output_shapes) -register_shape_calculator('SklearnBernoulliNB', - calculate_linear_classifier_output_shapes) -register_shape_calculator('SklearnCategoricalNB', - calculate_linear_classifier_output_shapes) -register_shape_calculator('SklearnComplementNB', - calculate_linear_classifier_output_shapes) -register_shape_calculator('SklearnGaussianNB', - calculate_linear_classifier_output_shapes) -register_shape_calculator('SklearnMultinomialNB', - calculate_linear_classifier_output_shapes) -register_shape_calculator('SklearnCalibratedClassifierCV', - calculate_linear_classifier_output_shapes) -register_shape_calculator('SklearnMLPClassifier', - calculate_linear_classifier_output_shapes) -register_shape_calculator('SklearnSGDClassifier', - calculate_linear_classifier_output_shapes) -register_shape_calculator('SklearnStackingClassifier', - calculate_linear_classifier_output_shapes) +register_shape_calculator( + "SklearnLinearClassifier", calculate_linear_classifier_output_shapes +) +register_shape_calculator("SklearnLinearSVC", calculate_linear_classifier_output_shapes) +register_shape_calculator( + "SklearnAdaBoostClassifier", calculate_linear_classifier_output_shapes +) +register_shape_calculator( + "SklearnBaggingClassifier", calculate_linear_classifier_output_shapes +) +register_shape_calculator( + "SklearnBernoulliNB", calculate_linear_classifier_output_shapes +) +register_shape_calculator( + "SklearnCategoricalNB", calculate_linear_classifier_output_shapes +) +register_shape_calculator( + "SklearnComplementNB", calculate_linear_classifier_output_shapes +) +register_shape_calculator( + "SklearnGaussianNB", calculate_linear_classifier_output_shapes +) +register_shape_calculator( + "SklearnMultinomialNB", calculate_linear_classifier_output_shapes +) +register_shape_calculator( + "SklearnCalibratedClassifierCV", calculate_linear_classifier_output_shapes +) +register_shape_calculator( + "SklearnMLPClassifier", calculate_linear_classifier_output_shapes +) +register_shape_calculator( + "SklearnSGDClassifier", calculate_linear_classifier_output_shapes +) +register_shape_calculator( + "SklearnStackingClassifier", calculate_linear_classifier_output_shapes +) diff --git a/skl2onnx/shape_calculators/linear_regressor.py b/skl2onnx/shape_calculators/linear_regressor.py index fa4c10111..1fdab33ea 100644 --- a/skl2onnx/shape_calculators/linear_regressor.py +++ b/skl2onnx/shape_calculators/linear_regressor.py @@ -2,12 +2,14 @@ from ..common._registration import register_shape_calculator -from ..common.utils import ( - check_input_and_output_numbers, check_input_and_output_types) +from ..common.utils import check_input_and_output_numbers, check_input_and_output_types from ..common.shape_calculator import calculate_linear_regressor_output_shapes from ..common.data_types import ( - BooleanTensorType, DoubleTensorType, - FloatTensorType, Int64TensorType) + BooleanTensorType, + DoubleTensorType, + FloatTensorType, + Int64TensorType, +) def calculate_bayesian_ridge_output_shapes(operator): @@ -19,11 +21,18 @@ def calculate_bayesian_ridge_output_shapes(operator): batch. If the input batch size is N, the output shape may be [N, 1]. """ - check_input_and_output_numbers(operator, input_count_range=1, - output_count_range=[1, 2]) - check_input_and_output_types(operator, good_input_types=[ - BooleanTensorType, DoubleTensorType, - FloatTensorType, Int64TensorType]) + check_input_and_output_numbers( + operator, input_count_range=1, output_count_range=[1, 2] + ) + check_input_and_output_types( + operator, + good_input_types=[ + BooleanTensorType, + DoubleTensorType, + FloatTensorType, + Int64TensorType, + ], + ) inp0 = operator.inputs[0].type if isinstance(inp0, (FloatTensorType, DoubleTensorType)): @@ -32,10 +41,11 @@ def calculate_bayesian_ridge_output_shapes(operator): cls_type = FloatTensorType N = operator.inputs[0].get_first_dimension() - if (hasattr(operator.raw_operator, 'coef_') and - len(operator.raw_operator.coef_.shape) > 1): - operator.outputs[0].type = cls_type([ - N, operator.raw_operator.coef_.shape[1]]) + if ( + hasattr(operator.raw_operator, "coef_") + and len(operator.raw_operator.coef_.shape) > 1 + ): + operator.outputs[0].type = cls_type([N, operator.raw_operator.coef_.shape[1]]) else: operator.outputs[0].type = cls_type([N, 1]) @@ -44,25 +54,34 @@ def calculate_bayesian_ridge_output_shapes(operator): operator.outputs[1].type = cls_type([N, 1]) -register_shape_calculator('SklearnAdaBoostRegressor', - calculate_linear_regressor_output_shapes) -register_shape_calculator('SklearnBaggingRegressor', - calculate_linear_regressor_output_shapes) -register_shape_calculator('SklearnBayesianRidge', - calculate_bayesian_ridge_output_shapes) -register_shape_calculator('SklearnLinearRegressor', - calculate_linear_regressor_output_shapes) -register_shape_calculator('SklearnLinearSVR', - calculate_linear_regressor_output_shapes) -register_shape_calculator('SklearnMLPRegressor', - calculate_linear_regressor_output_shapes) -register_shape_calculator('SklearnPoissonRegressor', - calculate_linear_regressor_output_shapes) -register_shape_calculator('SklearnRANSACRegressor', - calculate_linear_regressor_output_shapes) -register_shape_calculator('SklearnStackingRegressor', - calculate_linear_regressor_output_shapes) -register_shape_calculator('SklearnTweedieRegressor', - calculate_linear_regressor_output_shapes) -register_shape_calculator('SklearnGammaRegressor', - calculate_linear_regressor_output_shapes) +register_shape_calculator( + "SklearnAdaBoostRegressor", calculate_linear_regressor_output_shapes +) +register_shape_calculator( + "SklearnBaggingRegressor", calculate_linear_regressor_output_shapes +) +register_shape_calculator( + "SklearnBayesianRidge", calculate_bayesian_ridge_output_shapes +) +register_shape_calculator( + "SklearnLinearRegressor", calculate_linear_regressor_output_shapes +) +register_shape_calculator("SklearnLinearSVR", calculate_linear_regressor_output_shapes) +register_shape_calculator( + "SklearnMLPRegressor", calculate_linear_regressor_output_shapes +) +register_shape_calculator( + "SklearnPoissonRegressor", calculate_linear_regressor_output_shapes +) +register_shape_calculator( + "SklearnRANSACRegressor", calculate_linear_regressor_output_shapes +) +register_shape_calculator( + "SklearnStackingRegressor", calculate_linear_regressor_output_shapes +) +register_shape_calculator( + "SklearnTweedieRegressor", calculate_linear_regressor_output_shapes +) +register_shape_calculator( + "SklearnGammaRegressor", calculate_linear_regressor_output_shapes +) diff --git a/skl2onnx/shape_calculators/local_outlier_factor.py b/skl2onnx/shape_calculators/local_outlier_factor.py index 14929b78d..6bd1e2bd4 100644 --- a/skl2onnx/shape_calculators/local_outlier_factor.py +++ b/skl2onnx/shape_calculators/local_outlier_factor.py @@ -11,4 +11,5 @@ def calculate_local_outlier_factor_output_shapes(operator): register_shape_calculator( - 'SklearnLocalOutlierFactor', calculate_local_outlier_factor_output_shapes) + "SklearnLocalOutlierFactor", calculate_local_outlier_factor_output_shapes +) diff --git a/skl2onnx/shape_calculators/mixture.py b/skl2onnx/shape_calculators/mixture.py index 4f523d176..9a9842655 100644 --- a/skl2onnx/shape_calculators/mixture.py +++ b/skl2onnx/shape_calculators/mixture.py @@ -2,24 +2,20 @@ from ..common._registration import register_shape_calculator -from ..common.data_types import ( - FloatTensorType, Int64TensorType, DoubleTensorType -) -from ..common.utils import ( - check_input_and_output_numbers, - check_input_and_output_types -) +from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType +from ..common.utils import check_input_and_output_numbers, check_input_and_output_types def calculate_gaussian_mixture_output_shapes(operator): - check_input_and_output_numbers(operator, input_count_range=1, - output_count_range=[2, 3]) + check_input_and_output_numbers( + operator, input_count_range=1, output_count_range=[2, 3] + ) check_input_and_output_types( - operator, good_input_types=[ - FloatTensorType, Int64TensorType, DoubleTensorType]) + operator, good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType] + ) if len(operator.inputs[0].type.shape) != 2: - raise RuntimeError('Input must be a [N, C]-tensor') + raise RuntimeError("Input must be a [N, C]-tensor") op = operator.raw_operator N = operator.inputs[0].get_first_dimension() @@ -29,7 +25,9 @@ def calculate_gaussian_mixture_output_shapes(operator): operator.outputs[2].type.shape = [N, 1] -register_shape_calculator('SklearnGaussianMixture', - calculate_gaussian_mixture_output_shapes) -register_shape_calculator('SklearnBayesianGaussianMixture', - calculate_gaussian_mixture_output_shapes) +register_shape_calculator( + "SklearnGaussianMixture", calculate_gaussian_mixture_output_shapes +) +register_shape_calculator( + "SklearnBayesianGaussianMixture", calculate_gaussian_mixture_output_shapes +) diff --git a/skl2onnx/shape_calculators/multioutput.py b/skl2onnx/shape_calculators/multioutput.py index 55ded66de..fe275dfb4 100644 --- a/skl2onnx/shape_calculators/multioutput.py +++ b/skl2onnx/shape_calculators/multioutput.py @@ -10,8 +10,7 @@ def multioutput_regressor_shape_calculator(operator): """Shape calculator for MultiOutputRegressor""" - check_input_and_output_numbers( - operator, input_count_range=1, output_count_range=1) + check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) i = operator.inputs[0] o = operator.outputs[0] N = i.get_first_dimension() @@ -21,12 +20,11 @@ def multioutput_regressor_shape_calculator(operator): def multioutput_classifier_shape_calculator(operator): """Shape calculator for MultiOutputClassifier""" - check_input_and_output_numbers( - operator, input_count_range=1, output_count_range=2) + check_input_and_output_numbers(operator, input_count_range=1, output_count_range=2) if not isinstance(operator.outputs[1].type, SequenceType): raise RuntimeError( - "Probabilites should be a sequence not %r." - "" % operator.outputs[1].type) + "Probabilites should be a sequence not %r." "" % operator.outputs[1].type + ) i = operator.inputs[0] outputs = operator.outputs N = i.get_first_dimension() @@ -34,7 +32,9 @@ def multioutput_classifier_shape_calculator(operator): outputs[0].type.shape = [N, C] -register_shape_calculator('SklearnMultiOutputRegressor', - multioutput_regressor_shape_calculator) -register_shape_calculator('SklearnMultiOutputClassifier', - multioutput_classifier_shape_calculator) +register_shape_calculator( + "SklearnMultiOutputRegressor", multioutput_regressor_shape_calculator +) +register_shape_calculator( + "SklearnMultiOutputClassifier", multioutput_classifier_shape_calculator +) diff --git a/skl2onnx/shape_calculators/nearest_neighbours.py b/skl2onnx/shape_calculators/nearest_neighbours.py index 3e5a6bdfe..ddbde8f6a 100644 --- a/skl2onnx/shape_calculators/nearest_neighbours.py +++ b/skl2onnx/shape_calculators/nearest_neighbours.py @@ -5,19 +5,16 @@ import numpy as np from ..common._registration import register_shape_calculator from ..common.shape_calculator import calculate_linear_classifier_output_shapes -from ..common.data_types import ( - FloatTensorType, Int64TensorType, DoubleTensorType -) +from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType from ..common.utils import check_input_and_output_numbers from ..common.utils import check_input_and_output_types def calculate_sklearn_neighbours_transformer(operator): - check_input_and_output_numbers(operator, input_count_range=1, - output_count_range=1) + check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) check_input_and_output_types( - operator, good_input_types=[ - FloatTensorType, Int64TensorType, DoubleTensorType]) + operator, good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType] + ) N = operator.inputs[0].get_first_dimension() n_samples_fit = operator.raw_operator.n_samples_fit_ @@ -30,11 +27,12 @@ def calculate_sklearn_neighbours_transformer(operator): def calculate_sklearn_nearest_neighbours(operator): - check_input_and_output_numbers(operator, input_count_range=1, - output_count_range=[1, 2]) + check_input_and_output_numbers( + operator, input_count_range=1, output_count_range=[1, 2] + ) check_input_and_output_types( - operator, good_input_types=[ - FloatTensorType, Int64TensorType, DoubleTensorType]) + operator, good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType] + ) N = operator.inputs[0].get_first_dimension() neighbours = operator.raw_operator.n_neighbors @@ -43,15 +41,18 @@ def calculate_sklearn_nearest_neighbours(operator): def calculate_sklearn_nearest_neighbours_regressor(operator): - check_input_and_output_numbers(operator, input_count_range=1, - output_count_range=[1, 2]) + check_input_and_output_numbers( + operator, input_count_range=1, output_count_range=[1, 2] + ) check_input_and_output_types( - operator, good_input_types=[ - FloatTensorType, Int64TensorType, DoubleTensorType]) + operator, good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType] + ) N = operator.inputs[0].get_first_dimension() - if (hasattr(operator.raw_operator, '_y') and - len(np.squeeze(operator.raw_operator._y).shape) == 1): + if ( + hasattr(operator.raw_operator, "_y") + and len(np.squeeze(operator.raw_operator._y).shape) == 1 + ): C = 1 else: C = operator.raw_operator._y.shape[-1] @@ -59,11 +60,10 @@ def calculate_sklearn_nearest_neighbours_regressor(operator): def calculate_sklearn_nca(operator): - check_input_and_output_numbers(operator, input_count_range=1, - output_count_range=1) + check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) check_input_and_output_types( - operator, good_input_types=[ - FloatTensorType, Int64TensorType, DoubleTensorType]) + operator, good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType] + ) N = operator.inputs[0].get_first_dimension() output_type = ( @@ -76,28 +76,32 @@ def calculate_sklearn_nca(operator): def calculate_sklearn_knn_imputer(operator): - check_input_and_output_numbers(operator, input_count_range=1, - output_count_range=1) - check_input_and_output_types( - operator, good_input_types=[FloatTensorType]) + check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) + check_input_and_output_types(operator, good_input_types=[FloatTensorType]) operator.outputs[0].type = copy.deepcopy(operator.inputs[0].type) operator.outputs[0].type.shape = operator.inputs[0].type.shape -register_shape_calculator('SklearnKNeighborsRegressor', - calculate_sklearn_nearest_neighbours_regressor) -register_shape_calculator('SklearnRadiusNeighborsRegressor', - calculate_sklearn_nearest_neighbours_regressor) -register_shape_calculator('SklearnKNeighborsClassifier', - calculate_linear_classifier_output_shapes) -register_shape_calculator('SklearnRadiusNeighborsClassifier', - calculate_linear_classifier_output_shapes) -register_shape_calculator('SklearnKNNImputer', - calculate_sklearn_knn_imputer) -register_shape_calculator('SklearnKNeighborsTransformer', - calculate_sklearn_neighbours_transformer) -register_shape_calculator('SklearnNearestNeighbors', - calculate_sklearn_nearest_neighbours) register_shape_calculator( - 'SklearnNeighborhoodComponentsAnalysis', calculate_sklearn_nca) + "SklearnKNeighborsRegressor", calculate_sklearn_nearest_neighbours_regressor +) +register_shape_calculator( + "SklearnRadiusNeighborsRegressor", calculate_sklearn_nearest_neighbours_regressor +) +register_shape_calculator( + "SklearnKNeighborsClassifier", calculate_linear_classifier_output_shapes +) +register_shape_calculator( + "SklearnRadiusNeighborsClassifier", calculate_linear_classifier_output_shapes +) +register_shape_calculator("SklearnKNNImputer", calculate_sklearn_knn_imputer) +register_shape_calculator( + "SklearnKNeighborsTransformer", calculate_sklearn_neighbours_transformer +) +register_shape_calculator( + "SklearnNearestNeighbors", calculate_sklearn_nearest_neighbours +) +register_shape_calculator( + "SklearnNeighborhoodComponentsAnalysis", calculate_sklearn_nca +) diff --git a/skl2onnx/shape_calculators/one_hot_encoder.py b/skl2onnx/shape_calculators/one_hot_encoder.py index 6734b6e42..64a8c0ad0 100644 --- a/skl2onnx/shape_calculators/one_hot_encoder.py +++ b/skl2onnx/shape_calculators/one_hot_encoder.py @@ -10,9 +10,8 @@ def calculate_sklearn_one_hot_encoder_output_shapes(operator): op = operator.raw_operator categories_len = 0 for index, categories in enumerate(op.categories_): - if hasattr(op, 'drop_idx_') and op.drop_idx_ is not None: - categories = (categories[np.arange(len(categories)) != - op.drop_idx_[index]]) + if hasattr(op, "drop_idx_") and op.drop_idx_ is not None: + categories = categories[np.arange(len(categories)) != op.drop_idx_[index]] categories_len += len(categories) instances = operator.inputs[0].get_first_dimension() if np.issubdtype(op.dtype, np.signedinteger): @@ -21,5 +20,6 @@ def calculate_sklearn_one_hot_encoder_output_shapes(operator): operator.outputs[0].type = FloatTensorType([instances, categories_len]) -register_shape_calculator('SklearnOneHotEncoder', - calculate_sklearn_one_hot_encoder_output_shapes) +register_shape_calculator( + "SklearnOneHotEncoder", calculate_sklearn_one_hot_encoder_output_shapes +) diff --git a/skl2onnx/shape_calculators/one_vs_one_classifier.py b/skl2onnx/shape_calculators/one_vs_one_classifier.py index 4f9c8e0e6..bea837f33 100644 --- a/skl2onnx/shape_calculators/one_vs_one_classifier.py +++ b/skl2onnx/shape_calculators/one_vs_one_classifier.py @@ -4,5 +4,6 @@ from ..common.shape_calculator import calculate_linear_classifier_output_shapes -register_shape_calculator('SklearnOneVsOneClassifier', - calculate_linear_classifier_output_shapes) +register_shape_calculator( + "SklearnOneVsOneClassifier", calculate_linear_classifier_output_shapes +) diff --git a/skl2onnx/shape_calculators/one_vs_rest_classifier.py b/skl2onnx/shape_calculators/one_vs_rest_classifier.py index 579db17d3..c54e686be 100644 --- a/skl2onnx/shape_calculators/one_vs_rest_classifier.py +++ b/skl2onnx/shape_calculators/one_vs_rest_classifier.py @@ -11,8 +11,10 @@ def calculate_constant_predictor_output_shapes(operator): operator.outputs[1].type.shape = [N, 2] -register_shape_calculator('Sklearn_ConstantPredictor', - calculate_constant_predictor_output_shapes) +register_shape_calculator( + "Sklearn_ConstantPredictor", calculate_constant_predictor_output_shapes +) -register_shape_calculator('SklearnOneVsRestClassifier', - calculate_linear_classifier_output_shapes) +register_shape_calculator( + "SklearnOneVsRestClassifier", calculate_linear_classifier_output_shapes +) diff --git a/skl2onnx/shape_calculators/ordinal_encoder.py b/skl2onnx/shape_calculators/ordinal_encoder.py index 907f86a63..a08ab3bd4 100644 --- a/skl2onnx/shape_calculators/ordinal_encoder.py +++ b/skl2onnx/shape_calculators/ordinal_encoder.py @@ -11,11 +11,14 @@ def calculate_sklearn_ordinal_encoder_output_shapes(operator): op_features = sum(list(map(lambda x: x.type.shape[1], operator.inputs))) if np.issubdtype(ordinal_op.dtype, np.floating): operator.outputs[0].type = FloatTensorType( - [operator.inputs[0].get_first_dimension(), op_features]) + [operator.inputs[0].get_first_dimension(), op_features] + ) else: operator.outputs[0].type = Int64TensorType( - [operator.inputs[0].get_first_dimension(), op_features]) + [operator.inputs[0].get_first_dimension(), op_features] + ) -register_shape_calculator('SklearnOrdinalEncoder', - calculate_sklearn_ordinal_encoder_output_shapes) +register_shape_calculator( + "SklearnOrdinalEncoder", calculate_sklearn_ordinal_encoder_output_shapes +) diff --git a/skl2onnx/shape_calculators/ovr_decision_function.py b/skl2onnx/shape_calculators/ovr_decision_function.py index 3649aa36b..a5d1d087b 100644 --- a/skl2onnx/shape_calculators/ovr_decision_function.py +++ b/skl2onnx/shape_calculators/ovr_decision_function.py @@ -7,8 +7,10 @@ def calculate_sklearn_ovr_decision_function(operator): N = operator.inputs[0].get_first_dimension() operator.outputs[0].type = operator.inputs[0].type.__class__( - [N, len(operator.raw_operator.classes_)]) + [N, len(operator.raw_operator.classes_)] + ) -register_shape_calculator('SklearnOVRDecisionFunction', - calculate_sklearn_ovr_decision_function) +register_shape_calculator( + "SklearnOVRDecisionFunction", calculate_sklearn_ovr_decision_function +) diff --git a/skl2onnx/shape_calculators/pipelines.py b/skl2onnx/shape_calculators/pipelines.py index 7384c8c55..12ce7fd85 100644 --- a/skl2onnx/shape_calculators/pipelines.py +++ b/skl2onnx/shape_calculators/pipelines.py @@ -15,9 +15,8 @@ def column_transformer_shape_calculator(operator): pass +register_shape_calculator("SklearnPipeline", pipeline_shape_calculator) +register_shape_calculator("SklearnFeatureUnion", feature_union_shape_calculator) register_shape_calculator( - 'SklearnPipeline', pipeline_shape_calculator) -register_shape_calculator( - 'SklearnFeatureUnion', feature_union_shape_calculator) -register_shape_calculator( - 'SklearnColumnTransformer', column_transformer_shape_calculator) + "SklearnColumnTransformer", column_transformer_shape_calculator +) diff --git a/skl2onnx/shape_calculators/polynomial_features.py b/skl2onnx/shape_calculators/polynomial_features.py index 719e9f3a1..871619872 100644 --- a/skl2onnx/shape_calculators/polynomial_features.py +++ b/skl2onnx/shape_calculators/polynomial_features.py @@ -2,9 +2,7 @@ import copy -from ..common.data_types import ( - FloatTensorType, Int64TensorType, DoubleTensorType -) +from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType from ..common._registration import register_shape_calculator from ..common.utils import check_input_and_output_numbers from ..common.utils import check_input_and_output_types @@ -13,8 +11,8 @@ def calculate_sklearn_polynomial_features(operator): check_input_and_output_numbers(operator, output_count_range=1) check_input_and_output_types( - operator, good_input_types=[ - FloatTensorType, Int64TensorType, DoubleTensorType]) + operator, good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType] + ) N = operator.inputs[0].get_first_dimension() model = operator.raw_operator @@ -22,5 +20,6 @@ def calculate_sklearn_polynomial_features(operator): operator.outputs[0].type.shape = [N, model.n_output_features_] -register_shape_calculator('SklearnPolynomialFeatures', - calculate_sklearn_polynomial_features) +register_shape_calculator( + "SklearnPolynomialFeatures", calculate_sklearn_polynomial_features +) diff --git a/skl2onnx/shape_calculators/power_transformer.py b/skl2onnx/shape_calculators/power_transformer.py index 72dd70cae..ce61ffa0b 100644 --- a/skl2onnx/shape_calculators/power_transformer.py +++ b/skl2onnx/shape_calculators/power_transformer.py @@ -2,9 +2,7 @@ from ..common._registration import register_shape_calculator -from ..common.data_types import ( - FloatTensorType -) +from ..common.data_types import FloatTensorType def powertransformer_shape_calculator(operator): @@ -15,5 +13,4 @@ def powertransformer_shape_calculator(operator): output.type = FloatTensorType([n, c]) -register_shape_calculator('SklearnPowerTransformer', - powertransformer_shape_calculator) +register_shape_calculator("SklearnPowerTransformer", powertransformer_shape_calculator) diff --git a/skl2onnx/shape_calculators/quadratic_discriminant_analysis.py b/skl2onnx/shape_calculators/quadratic_discriminant_analysis.py index cf66973bd..3dab73c60 100644 --- a/skl2onnx/shape_calculators/quadratic_discriminant_analysis.py +++ b/skl2onnx/shape_calculators/quadratic_discriminant_analysis.py @@ -11,5 +11,6 @@ def calculate_quadratic_discriminant_analysis_shapes(operator): register_shape_calculator( - 'SklearnQuadraticDiscriminantAnalysis', - calculate_quadratic_discriminant_analysis_shapes) + "SklearnQuadraticDiscriminantAnalysis", + calculate_quadratic_discriminant_analysis_shapes, +) diff --git a/skl2onnx/shape_calculators/random_projection.py b/skl2onnx/shape_calculators/random_projection.py index 805180afb..fd211cea1 100644 --- a/skl2onnx/shape_calculators/random_projection.py +++ b/skl2onnx/shape_calculators/random_projection.py @@ -13,5 +13,6 @@ def random_projection_shape_calculator(operator): operator.outputs[0].type.shape = [n, c] -register_shape_calculator('SklearnGaussianRandomProjection', - random_projection_shape_calculator) +register_shape_calculator( + "SklearnGaussianRandomProjection", random_projection_shape_calculator +) diff --git a/skl2onnx/shape_calculators/random_trees_embedding.py b/skl2onnx/shape_calculators/random_trees_embedding.py index e7b07c49e..aa5490026 100644 --- a/skl2onnx/shape_calculators/random_trees_embedding.py +++ b/skl2onnx/shape_calculators/random_trees_embedding.py @@ -10,9 +10,8 @@ def calculate_sklearn_random_trees_embedding_output_shapes(operator): op = operator.raw_operator.one_hot_encoder_ categories_len = 0 for index, categories in enumerate(op.categories_): - if hasattr(op, 'drop_idx_') and op.drop_idx_ is not None: - categories = (categories[np.arange(len(categories)) != - op.drop_idx_[index]]) + if hasattr(op, "drop_idx_") and op.drop_idx_ is not None: + categories = categories[np.arange(len(categories)) != op.drop_idx_[index]] categories_len += len(categories) instances = operator.inputs[0].get_first_dimension() if np.issubdtype(op.dtype, np.signedinteger): @@ -22,5 +21,6 @@ def calculate_sklearn_random_trees_embedding_output_shapes(operator): register_shape_calculator( - 'SklearnRandomTreesEmbedding', - calculate_sklearn_random_trees_embedding_output_shapes) + "SklearnRandomTreesEmbedding", + calculate_sklearn_random_trees_embedding_output_shapes, +) diff --git a/skl2onnx/shape_calculators/replace_op.py b/skl2onnx/shape_calculators/replace_op.py index 506adf906..a34167340 100644 --- a/skl2onnx/shape_calculators/replace_op.py +++ b/skl2onnx/shape_calculators/replace_op.py @@ -6,10 +6,10 @@ def calculate_sklearn_replace_transformer(operator): - check_input_and_output_numbers( - operator, input_count_range=1, output_count_range=1) + check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) operator.outputs[0].type = operator.inputs[0].type register_shape_calculator( - 'SklearnReplaceTransformer', calculate_sklearn_replace_transformer) + "SklearnReplaceTransformer", calculate_sklearn_replace_transformer +) diff --git a/skl2onnx/shape_calculators/scaler.py b/skl2onnx/shape_calculators/scaler.py index af299f123..12f7077e3 100644 --- a/skl2onnx/shape_calculators/scaler.py +++ b/skl2onnx/shape_calculators/scaler.py @@ -3,9 +3,7 @@ import numbers from ..common._registration import register_shape_calculator -from ..common.data_types import ( - FloatTensorType, Int64TensorType, DoubleTensorType -) +from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType from ..common.utils import check_input_and_output_numbers from ..common.utils import check_input_and_output_types @@ -18,19 +16,19 @@ def calculate_sklearn_scaler_output_shapes(operator): Similar to imputer, this operator can take multiple input feature tensors and concatenate them along C-axis. """ - check_input_and_output_numbers(operator, input_count_range=[1, None], - output_count_range=1) + check_input_and_output_numbers( + operator, input_count_range=[1, None], output_count_range=1 + ) check_input_and_output_types( - operator, good_input_types=[ - FloatTensorType, Int64TensorType, DoubleTensorType], - good_output_types=[FloatTensorType, DoubleTensorType]) + operator, + good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType], + good_output_types=[FloatTensorType, DoubleTensorType], + ) # Inputs: multiple float- and integer-tensors # Output: one float tensor for variable in operator.inputs: - if (len(set(variable.get_first_dimension() - for variable in operator.inputs)) - > 1): - raise RuntimeError('Batch size must be identical across inputs.') + if len(set(variable.get_first_dimension() for variable in operator.inputs)) > 1: + raise RuntimeError("Batch size must be identical across inputs.") N = operator.inputs[0].get_first_dimension() C = 0 @@ -45,13 +43,8 @@ def calculate_sklearn_scaler_output_shapes(operator): operator.outputs[0].type.shape = [N, C] -register_shape_calculator('SklearnRobustScaler', - calculate_sklearn_scaler_output_shapes) -register_shape_calculator('SklearnScaler', - calculate_sklearn_scaler_output_shapes) -register_shape_calculator('SklearnNormalizer', - calculate_sklearn_scaler_output_shapes) -register_shape_calculator('SklearnMinMaxScaler', - calculate_sklearn_scaler_output_shapes) -register_shape_calculator('SklearnMaxAbsScaler', - calculate_sklearn_scaler_output_shapes) +register_shape_calculator("SklearnRobustScaler", calculate_sklearn_scaler_output_shapes) +register_shape_calculator("SklearnScaler", calculate_sklearn_scaler_output_shapes) +register_shape_calculator("SklearnNormalizer", calculate_sklearn_scaler_output_shapes) +register_shape_calculator("SklearnMinMaxScaler", calculate_sklearn_scaler_output_shapes) +register_shape_calculator("SklearnMaxAbsScaler", calculate_sklearn_scaler_output_shapes) diff --git a/skl2onnx/shape_calculators/sequence.py b/skl2onnx/shape_calculators/sequence.py index 4ef12cea2..b423b66aa 100644 --- a/skl2onnx/shape_calculators/sequence.py +++ b/skl2onnx/shape_calculators/sequence.py @@ -11,6 +11,7 @@ def calculate_sklearn_sequence_construct(operator): pass -register_shape_calculator('SklearnSequenceAt', calculate_sklearn_sequence_at) +register_shape_calculator("SklearnSequenceAt", calculate_sklearn_sequence_at) register_shape_calculator( - 'SklearnSequenceConstruct', calculate_sklearn_sequence_construct) + "SklearnSequenceConstruct", calculate_sklearn_sequence_construct +) diff --git a/skl2onnx/shape_calculators/sgd_oneclass_svm.py b/skl2onnx/shape_calculators/sgd_oneclass_svm.py index 3e3d5a5dd..763b5e75f 100644 --- a/skl2onnx/shape_calculators/sgd_oneclass_svm.py +++ b/skl2onnx/shape_calculators/sgd_oneclass_svm.py @@ -6,9 +6,16 @@ def calculate_sgd_oneclass_svm_output_shapes(operator): N = operator.inputs[0].get_first_dimension() - operator.outputs[0].type = Int64TensorType([N, ]) - operator.outputs[1].type.shape = [N, ] + operator.outputs[0].type = Int64TensorType( + [ + N, + ] + ) + operator.outputs[1].type.shape = [ + N, + ] register_shape_calculator( - 'SklearnSGDOneClassSVM', calculate_sgd_oneclass_svm_output_shapes) + "SklearnSGDOneClassSVM", calculate_sgd_oneclass_svm_output_shapes +) diff --git a/skl2onnx/shape_calculators/support_vector_machines.py b/skl2onnx/shape_calculators/support_vector_machines.py index da60ddd8b..f15b5934e 100644 --- a/skl2onnx/shape_calculators/support_vector_machines.py +++ b/skl2onnx/shape_calculators/support_vector_machines.py @@ -32,38 +32,40 @@ def calculate_sklearn_svm_output_shapes(operator): op = operator.raw_operator N = operator.inputs[0].get_first_dimension() - if operator.type in ['SklearnOneClassSVM']: + if operator.type in ["SklearnOneClassSVM"]: operator.outputs[0].type = Int64TensorType([N, 1]) operator.outputs[1].type.shape = [N, 1] - elif operator.type in ['SklearnSVC'] or isinstance(op, (SVC, NuSVC)): + elif operator.type in ["SklearnSVC"] or isinstance(op, (SVC, NuSVC)): number_of_classes = len(op.classes_) - check_input_and_output_numbers(operator, input_count_range=[1, None], - output_count_range=[1, 2]) + check_input_and_output_numbers( + operator, input_count_range=[1, None], output_count_range=[1, 2] + ) if all(isinstance(i, str) for i in op.classes_): operator.outputs[0].type = StringTensorType([N]) operator.outputs[1].type.shape = [N, number_of_classes] - elif all(isinstance(i, (numbers.Real, bool, np.bool_)) - for i in op.classes_): + elif all(isinstance(i, (numbers.Real, bool, np.bool_)) for i in op.classes_): operator.outputs[0].type = Int64TensorType([N]) operator.outputs[1].type.shape = [N, number_of_classes] else: - raise RuntimeError('Class labels should be either all strings or ' - 'all integers. C++ backends do not support ' - 'mixed types.') + raise RuntimeError( + "Class labels should be either all strings or " + "all integers. C++ backends do not support " + "mixed types." + ) - elif operator.type in ['SklearnSVR']: - check_input_and_output_numbers(operator, input_count_range=[1, None], - output_count_range=1) + elif operator.type in ["SklearnSVR"]: + check_input_and_output_numbers( + operator, input_count_range=[1, None], output_count_range=1 + ) operator.outputs[0].type.shape = [N, 1] else: raise RuntimeError( - "New kind of SVM, no shape calculator exist for '{}'.".format( - operator.type)) + "New kind of SVM, no shape calculator exist for '{}'.".format(operator.type) + ) -register_shape_calculator( - 'SklearnOneClassSVM', calculate_sklearn_svm_output_shapes) -register_shape_calculator('SklearnSVC', calculate_sklearn_svm_output_shapes) -register_shape_calculator('SklearnSVR', calculate_sklearn_svm_output_shapes) +register_shape_calculator("SklearnOneClassSVM", calculate_sklearn_svm_output_shapes) +register_shape_calculator("SklearnSVC", calculate_sklearn_svm_output_shapes) +register_shape_calculator("SklearnSVR", calculate_sklearn_svm_output_shapes) diff --git a/skl2onnx/shape_calculators/svd.py b/skl2onnx/shape_calculators/svd.py index a2d6a7352..78a9c3cab 100644 --- a/skl2onnx/shape_calculators/svd.py +++ b/skl2onnx/shape_calculators/svd.py @@ -2,8 +2,7 @@ from ..common._registration import register_shape_calculator -from ..common.data_types import ( - FloatTensorType, Int64TensorType, DoubleTensorType) +from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType from ..common.utils import check_input_and_output_numbers from ..common.utils import check_input_and_output_types @@ -15,30 +14,33 @@ def calculate_sklearn_truncated_svd_output_shapes(operator): Transform feature dimension from C to K """ - check_input_and_output_numbers(operator, input_count_range=1, - output_count_range=1) + check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) check_input_and_output_types( - operator, good_input_types=[ - FloatTensorType, Int64TensorType, DoubleTensorType], - good_output_types=[FloatTensorType, DoubleTensorType]) + operator, + good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType], + good_output_types=[FloatTensorType, DoubleTensorType], + ) if len(operator.inputs[0].type.shape) != 2: - raise RuntimeError('Only 2-D tensor(s) can be input(s).') + raise RuntimeError("Only 2-D tensor(s) can be input(s).") cls_type = operator.inputs[0].type.__class__ if cls_type != DoubleTensorType: cls_type = FloatTensorType N = operator.inputs[0].get_first_dimension() - K = (operator.raw_operator.n_components - if operator.type == 'SklearnTruncatedSVD' - else operator.raw_operator.n_components_) + K = ( + operator.raw_operator.n_components + if operator.type == "SklearnTruncatedSVD" + else operator.raw_operator.n_components_ + ) operator.outputs[0].type = cls_type([N, K]) -register_shape_calculator('SklearnIncrementalPCA', - calculate_sklearn_truncated_svd_output_shapes) -register_shape_calculator('SklearnPCA', - calculate_sklearn_truncated_svd_output_shapes) -register_shape_calculator('SklearnTruncatedSVD', - calculate_sklearn_truncated_svd_output_shapes) +register_shape_calculator( + "SklearnIncrementalPCA", calculate_sklearn_truncated_svd_output_shapes +) +register_shape_calculator("SklearnPCA", calculate_sklearn_truncated_svd_output_shapes) +register_shape_calculator( + "SklearnTruncatedSVD", calculate_sklearn_truncated_svd_output_shapes +) diff --git a/skl2onnx/shape_calculators/text_vectorizer.py b/skl2onnx/shape_calculators/text_vectorizer.py index d58f1eb99..895f7eea3 100644 --- a/skl2onnx/shape_calculators/text_vectorizer.py +++ b/skl2onnx/shape_calculators/text_vectorizer.py @@ -6,20 +6,21 @@ def calculate_sklearn_text_vectorizer_output_shapes(operator): - ''' + """ Allowed input/output patterns are 1. Map ---> [1, C] C is the total number of allowed keys in the input dictionary. - ''' - check_input_and_output_numbers(operator, input_count_range=1, - output_count_range=1) + """ + check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) C = max(operator.raw_operator.vocabulary_.values()) + 1 operator.outputs[0].type.shape = [None, C] -register_shape_calculator('SklearnCountVectorizer', - calculate_sklearn_text_vectorizer_output_shapes) -register_shape_calculator('SklearnTfidfVectorizer', - calculate_sklearn_text_vectorizer_output_shapes) +register_shape_calculator( + "SklearnCountVectorizer", calculate_sklearn_text_vectorizer_output_shapes +) +register_shape_calculator( + "SklearnTfidfVectorizer", calculate_sklearn_text_vectorizer_output_shapes +) diff --git a/skl2onnx/shape_calculators/tfidf_transformer.py b/skl2onnx/shape_calculators/tfidf_transformer.py index d9b5d20a1..443cc9c76 100644 --- a/skl2onnx/shape_calculators/tfidf_transformer.py +++ b/skl2onnx/shape_calculators/tfidf_transformer.py @@ -6,11 +6,11 @@ def calculate_sklearn_tfidf_transformer_output_shapes(operator): - check_input_and_output_numbers(operator, input_count_range=1, - output_count_range=1) + check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) C = operator.inputs[0].type.shape[1] operator.outputs[0].type.shape = [1, C] -register_shape_calculator('SklearnTfidfTransformer', - calculate_sklearn_tfidf_transformer_output_shapes) +register_shape_calculator( + "SklearnTfidfTransformer", calculate_sklearn_tfidf_transformer_output_shapes +) diff --git a/skl2onnx/shape_calculators/voting_classifier.py b/skl2onnx/shape_calculators/voting_classifier.py index e02dcf12b..7aaecbf9a 100644 --- a/skl2onnx/shape_calculators/voting_classifier.py +++ b/skl2onnx/shape_calculators/voting_classifier.py @@ -1,14 +1,13 @@ # SPDX-License-Identifier: Apache-2.0 from ..common._registration import register_shape_calculator -from ..common.shape_calculator import ( - _calculate_linear_classifier_output_shapes) +from ..common.shape_calculator import _calculate_linear_classifier_output_shapes def voting_classifier_shape_calculator(operator): return _calculate_linear_classifier_output_shapes( - operator, enable_type_checking=False) + operator, enable_type_checking=False + ) -register_shape_calculator( - 'SklearnVotingClassifier', voting_classifier_shape_calculator) +register_shape_calculator("SklearnVotingClassifier", voting_classifier_shape_calculator) diff --git a/skl2onnx/shape_calculators/voting_regressor.py b/skl2onnx/shape_calculators/voting_regressor.py index 66e044956..bc91be75a 100644 --- a/skl2onnx/shape_calculators/voting_regressor.py +++ b/skl2onnx/shape_calculators/voting_regressor.py @@ -7,8 +7,8 @@ def voting_regressor_shape_calculator(operator): return _calculate_linear_regressor_output_shapes( - operator, enable_type_checking=False) + operator, enable_type_checking=False + ) -register_shape_calculator( - 'SklearnVotingRegressor', voting_regressor_shape_calculator) +register_shape_calculator("SklearnVotingRegressor", voting_regressor_shape_calculator) diff --git a/skl2onnx/shape_calculators/zip_map.py b/skl2onnx/shape_calculators/zip_map.py index 9e0b7f8ed..7dcc15b39 100644 --- a/skl2onnx/shape_calculators/zip_map.py +++ b/skl2onnx/shape_calculators/zip_map.py @@ -5,26 +5,31 @@ def calculate_sklearn_zipmap(operator): - if (len(operator.inputs) != len(operator.outputs) or - len(operator.inputs) not in (1, 2)): + if len(operator.inputs) != len(operator.outputs) or len(operator.inputs) not in ( + 1, + 2, + ): raise RuntimeError( - "SklearnZipMap expects the same number of inputs and outputs.") + "SklearnZipMap expects the same number of inputs and outputs." + ) if len(operator.inputs) == 2: operator.outputs[0].type = operator.inputs[0].type.__class__( - operator.inputs[0].type.shape) + operator.inputs[0].type.shape + ) if operator.outputs[1].type is not None: - operator.outputs[1].type.element_type.value_type = \ - operator.inputs[1].type.__class__([]) + operator.outputs[1].type.element_type.value_type = operator.inputs[ + 1 + ].type.__class__([]) def calculate_sklearn_zipmap_columns(operator): N = operator.inputs[0].get_first_dimension() operator.outputs[0].type = operator.inputs[0].type.__class__( - operator.inputs[0].type.shape) + operator.inputs[0].type.shape + ) for i in range(1, len(operator.outputs)): operator.outputs[i].type.shape = [N] -register_shape_calculator('SklearnZipMap', calculate_sklearn_zipmap) -register_shape_calculator( - 'SklearnZipMapColumns', calculate_sklearn_zipmap_columns) +register_shape_calculator("SklearnZipMap", calculate_sklearn_zipmap) +register_shape_calculator("SklearnZipMapColumns", calculate_sklearn_zipmap_columns) diff --git a/skl2onnx/sklapi/cast_regressor.py b/skl2onnx/sklapi/cast_regressor.py index 4da4a4ebb..37e83da41 100644 --- a/skl2onnx/sklapi/cast_regressor.py +++ b/skl2onnx/sklapi/cast_regressor.py @@ -2,10 +2,13 @@ import numpy as np from sklearn.base import RegressorMixin, BaseEstimator + try: from sklearn.utils.validation import _deprecate_positional_args except ImportError: - def _deprecate_positional_args(x): return x # noqa + + def _deprecate_positional_args(x): + return x # noqa class CastRegressor(RegressorMixin, BaseEstimator): # noqa @@ -34,8 +37,8 @@ def _cast(self, a, name): a2 = a.astype(self.dtype) except ValueError: raise ValueError( - "Unable to cast {} from {} into {}.".format( - name, a.dtype, self.dtype)) + "Unable to cast {} from {} into {}.".format(name, a.dtype, self.dtype) + ) return a2 def fit(self, X, y=None, sample_weight=None): @@ -49,15 +52,15 @@ def predict(self, X, y=None): """ Predicts and casts the prediction. """ - return self._cast(self.estimator.predict(X), 'predict(X)') + return self._cast(self.estimator.predict(X), "predict(X)") def decision_function(self, X, y=None): """ Calls *decision_function* and casts the outputs. """ - if not hasattr(self.estimator, 'decision_function'): + if not hasattr(self.estimator, "decision_function"): raise AttributeError( - "%r object has no attribute 'decision_function'." % - self.estimator.__class__.__name__) - return self._cast(self.estimator.decision_function(X), - 'decision_function(X)') + "%r object has no attribute 'decision_function'." + % self.estimator.__class__.__name__ + ) + return self._cast(self.estimator.decision_function(X), "decision_function(X)") diff --git a/skl2onnx/sklapi/cast_transformer.py b/skl2onnx/sklapi/cast_transformer.py index 1c9c13f6f..703b8b5c0 100644 --- a/skl2onnx/sklapi/cast_transformer.py +++ b/skl2onnx/sklapi/cast_transformer.py @@ -2,10 +2,13 @@ import numpy as np from sklearn.base import TransformerMixin, BaseEstimator + try: from sklearn.utils.validation import _deprecate_positional_args except ImportError: - def _deprecate_positional_args(x): return x # noqa + + def _deprecate_positional_args(x): + return x # noqa class CastTransformer(TransformerMixin, BaseEstimator): @@ -27,30 +30,28 @@ def __init__(self, *, dtype=np.float32): def _cast(self, a, name): if not isinstance(a, np.ndarray): - if hasattr(a, 'values') and hasattr(a, 'iloc'): + if hasattr(a, "values") and hasattr(a, "iloc"): # dataframe a = a.values - elif not hasattr(a, 'astype'): - raise TypeError( - "{} must be a numpy array or a dataframe.".format( - name)) + elif not hasattr(a, "astype"): + raise TypeError("{} must be a numpy array or a dataframe.".format(name)) try: a2 = a.astype(self.dtype) except ValueError: raise ValueError( - "Unable to cast {} from {} into {}.".format( - name, a.dtype, self.dtype)) + "Unable to cast {} from {} into {}.".format(name, a.dtype, self.dtype) + ) return a2 def fit(self, X, y=None, sample_weight=None): """ Does nothing except checking *dtype* may be applied. """ - self._cast(X, 'X') + self._cast(X, "X") return self def transform(self, X, y=None): """ Casts array X. """ - return self._cast(X, 'X') + return self._cast(X, "X") diff --git a/skl2onnx/sklapi/replace_transformer.py b/skl2onnx/sklapi/replace_transformer.py index f135ab0c9..46083455a 100644 --- a/skl2onnx/sklapi/replace_transformer.py +++ b/skl2onnx/sklapi/replace_transformer.py @@ -2,10 +2,13 @@ import numpy as np from sklearn.base import TransformerMixin, BaseEstimator + try: from sklearn.utils.validation import _deprecate_positional_args except ImportError: - def _deprecate_positional_args(x): return x # noqa + + def _deprecate_positional_args(x): + return x # noqa class ReplaceTransformer(TransformerMixin, BaseEstimator): @@ -29,13 +32,13 @@ def __init__(self, *, from_value=0, to_value=np.nan, dtype=np.float32): self.to_value = to_value def _replace(self, a): - if hasattr(a, 'todense'): + if hasattr(a, "todense"): if np.isnan(self.to_value) and self.from_value == 0: # implicit return a raise RuntimeError( - "Unable to replace 0 by nan one value by another " - "in sparse matrix.") + "Unable to replace 0 by nan one value by another " "in sparse matrix." + ) return np.where(a == self.from_value, self.to_value, a) def fit(self, X, y=None, sample_weight=None): diff --git a/skl2onnx/sklapi/sklearn_text.py b/skl2onnx/sklapi/sklearn_text.py index a9efa8efe..709398dfc 100644 --- a/skl2onnx/sklapi/sklearn_text.py +++ b/skl2onnx/sklapi/sklearn_text.py @@ -3,9 +3,9 @@ @brief Overloads :epkg:`TfidfVectorizer` and :epkg:`CountVectorizer`. """ from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer + try: - from sklearn.feature_extraction.text import ( - _VectorizerMixin as VectorizerMixin) + from sklearn.feature_extraction.text import _VectorizerMixin as VectorizerMixin except ImportError: # pragma: no cover # scikit-learn < 0.23 from sklearn.feature_extraction.text import VectorizerMixin @@ -34,12 +34,12 @@ def _word_ngrams(self, tokens, stop_words=None): for token in tokens: val = (token,) if isinstance(token, str) else token if not isinstance(val, tuple): - raise TypeError( - f"Unexpected type {type(val)}:{val!r} for a token.") + raise TypeError(f"Unexpected type {type(val)}:{val!r} for a token.") if any(map(lambda x: not isinstance(x, str), val)): raise TypeError( f"Unexpected type {val!r}, one part of a " - f"token is not a string.") + f"token is not a string." + ) new_tokens.append(val) tokens = new_tokens @@ -69,13 +69,13 @@ def space_join(tokens): new_tokens.extend(token) else: raise TypeError( # pragma: no cover - f"Unable to build a n-grams out of {tokens}.") + f"Unable to build a n-grams out of {tokens}." + ) return tuple(new_tokens) - for n in range(min_n, - min(max_n + 1, n_original_tokens + 1)): + for n in range(min_n, min(max_n + 1, n_original_tokens + 1)): for i in range(n_original_tokens - n + 1): - tokens_append(space_join(original_tokens[i: i + n])) + tokens_append(space_join(original_tokens[i : i + n])) return tokens @staticmethod @@ -83,8 +83,7 @@ def _fix_vocabulary(expected, new_voc): update = {} for w, wid in new_voc.items(): if not isinstance(w, tuple): - raise TypeError( - f"Tuple is expected for a token not {type(w)}.") + raise TypeError(f"Tuple is expected for a token not {type(w)}.") s = " ".join(w) if s in expected: if expected[s] != wid: @@ -148,8 +147,7 @@ class TraceableCountVectorizer(CountVectorizer, NGramsMixin): """ def _word_ngrams(self, tokens, stop_words=None): - return NGramsMixin._word_ngrams( - self, tokens=tokens, stop_words=stop_words) + return NGramsMixin._word_ngrams(self, tokens=tokens, stop_words=stop_words) def fit(self, X, y=None): # scikit-learn implements fit_transform and fit calls it. @@ -164,8 +162,8 @@ def fit(self, X, y=None): self.same_ = same if self.stop_words != same.stop_words: raise AssertionError( - f"Different stop_words {self.stop_words} " - f"!= {same.stop_words}.") + f"Different stop_words {self.stop_words} " f"!= {same.stop_words}." + ) update, dups = self._fix_vocabulary(same.vocabulary_, self.vocabulary_) self.updated_vocabulary_ = update self.duplicated_vocabulary_ = dups @@ -218,11 +216,10 @@ class TraceableTfidfVectorizer(TfidfVectorizer, NGramsMixin): scikit-learn cannot distinguish between bi gram ("a b", "c") and ("a", "b c"). Therefore, there are merged into the same column by scikit-learn. This class, even if it is able to distinguish - between them, keeps the same ambiguity. """ + between them, keeps the same ambiguity.""" def _word_ngrams(self, tokens, stop_words=None): - return NGramsMixin._word_ngrams( - self, tokens=tokens, stop_words=stop_words) + return NGramsMixin._word_ngrams(self, tokens=tokens, stop_words=stop_words) def fit(self, X, y=None): super().fit(X, y=y) @@ -231,8 +228,8 @@ def fit(self, X, y=None): self.same_ = same if self.stop_words != same.stop_words: raise AssertionError( - f"Different stop_words {self.stop_words} " - f"!= {same.stop_words}.") + f"Different stop_words {self.stop_words} " f"!= {same.stop_words}." + ) update, dups = self._fix_vocabulary(same.vocabulary_, self.vocabulary_) self.updated_vocabulary_ = update self.duplicated_vocabulary_ = dups diff --git a/skl2onnx/sklapi/sklearn_text_onnx.py b/skl2onnx/sklapi/sklearn_text_onnx.py index 611200e88..e3ae5e32b 100644 --- a/skl2onnx/sklapi/sklearn_text_onnx.py +++ b/skl2onnx/sklapi/sklearn_text_onnx.py @@ -2,11 +2,10 @@ from .. import update_registered_converter from ..shape_calculators.text_vectorizer import ( - calculate_sklearn_text_vectorizer_output_shapes) -from ..operator_converters.text_vectoriser import ( - convert_sklearn_text_vectorizer) -from ..operator_converters.tfidf_vectoriser import ( - convert_sklearn_tfidf_vectoriser) + calculate_sklearn_text_vectorizer_output_shapes, +) +from ..operator_converters.text_vectoriser import convert_sklearn_text_vectorizer +from ..operator_converters.tfidf_vectoriser import convert_sklearn_tfidf_vectoriser from .sklearn_text import TraceableCountVectorizer, TraceableTfidfVectorizer @@ -14,17 +13,27 @@ def register(): """Register converter for TraceableCountVectorizer, TraceableTfidfVectorizer.""" update_registered_converter( - TraceableCountVectorizer, "Skl2onnxTraceableCountVectorizer", + TraceableCountVectorizer, + "Skl2onnxTraceableCountVectorizer", calculate_sklearn_text_vectorizer_output_shapes, convert_sklearn_text_vectorizer, - options={'tokenexp': None, 'separators': None, - 'nan': [True, False], - 'keep_empty_string': [True, False]}) + options={ + "tokenexp": None, + "separators": None, + "nan": [True, False], + "keep_empty_string": [True, False], + }, + ) update_registered_converter( - TraceableTfidfVectorizer, "Skl2onnxTraceableTfidfVectorizer", + TraceableTfidfVectorizer, + "Skl2onnxTraceableTfidfVectorizer", calculate_sklearn_text_vectorizer_output_shapes, convert_sklearn_tfidf_vectoriser, - options={'tokenexp': None, 'separators': None, - 'nan': [True, False], - 'keep_empty_string': [True, False]}) + options={ + "tokenexp": None, + "separators": None, + "nan": [True, False], + "keep_empty_string": [True, False], + }, + ) diff --git a/skl2onnx/sklapi/woe_transformer.py b/skl2onnx/sklapi/woe_transformer.py index 2ddf21196..46ade73ef 100644 --- a/skl2onnx/sklapi/woe_transformer.py +++ b/skl2onnx/sklapi/woe_transformer.py @@ -2,10 +2,13 @@ import numpy as np from sklearn.base import TransformerMixin, BaseEstimator + try: from sklearn.utils.validation import _deprecate_positional_args except ImportError: - def _deprecate_positional_args(x): return x # noqa + + def _deprecate_positional_args(x): + return x # noqa class WOETransformer(TransformerMixin, BaseEstimator): @@ -70,7 +73,7 @@ def fit(self, X, y=None, sample_weight=None): dim += 1 continue intervals = self.intervals[i] - if intervals == 'passthrough': + if intervals == "passthrough": self.intervals_.append(None) self.weights_.append(None) self.indices_.append((dim, dim + 1)) @@ -78,18 +81,19 @@ def fit(self, X, y=None, sample_weight=None): continue if not isinstance(intervals, list): raise TypeError( - "Intervals for column %d must be a list not %r." - "" % (i, intervals)) + "Intervals for column %d must be a list not %r." "" % (i, intervals) + ) inlist = [] inweight = [] for index, interval in enumerate(intervals): if not isinstance(interval, tuple): raise TypeError( - "Interval %d is not a tuple but %r." % (i, interval)) + "Interval %d is not a tuple but %r." % (i, interval) + ) if len(interval) < 2: raise ValueError( - "Interval %d should have at least two values " - "%r." % interval) + "Interval %d should have at least two values " "%r." % interval + ) res = [] for j in range(0, 2): try: @@ -97,13 +101,15 @@ def fit(self, X, y=None, sample_weight=None): except (TypeError, ValueError) as e: raise TypeError( "Value at index %d in %r must be a float." - "" % (j, interval)) from e + "" % (j, interval) + ) from e res.append(fv) if len(interval) >= 3: if not isinstance(interval[2], bool): raise TypeError( "Value at index %i in %r must be a boolean." - "" % (2, interval)) + "" % (2, interval) + ) res.append(interval[2]) else: res.append(False) @@ -111,13 +117,17 @@ def fit(self, X, y=None, sample_weight=None): if not isinstance(interval[3], bool): raise TypeError( "Value at index %i in %r must be a boolean." - "" % (3, interval)) + "" % (3, interval) + ) res.append(interval[3]) else: res.append(True) inlist.append(tuple(res)) - if (self.weights is None or i >= len(self.weights) or - index >= len(self.weights[i])): + if ( + self.weights is None + or i >= len(self.weights) + or index >= len(self.weights[i]) + ): inweight.append(1) else: inweight.append(self.weights[i][index]) @@ -147,8 +157,7 @@ def _transform_column(self, X, column_index): right = col <= interval[1] else: right = col < interval[1] - res[:, i] = ((left * right).astype(X.dtype) * - self.weights_[column_index][i]) + res[:, i] = (left * right).astype(X.dtype) * self.weights_[column_index][i] if self.onehot: return res return res.sum(axis=1, keepdims=0) @@ -164,7 +173,7 @@ def transform(self, X, y=None): for i in range(X.shape[1]): a, b = self.indices_[i] if self.onehot: - res[:, a: b] = self._transform_column(X, i) + res[:, a:b] = self._transform_column(X, i) else: res[:, i] = self._transform_column(X, i) return res @@ -181,8 +190,11 @@ def get_feature_names(self): for interval in intervals: name = [ "[" if interval[2] else "]", - str(interval[0]), ",", str(interval[1]), - "]" if interval[3] else "["] + str(interval[0]), + ",", + str(interval[1]), + "]" if interval[3] else "[", + ] names.append("".join(name)) return names diff --git a/skl2onnx/sklapi/woe_transformer_onnx.py b/skl2onnx/sklapi/woe_transformer_onnx.py index be5de3df4..8b550c8f1 100644 --- a/skl2onnx/sklapi/woe_transformer_onnx.py +++ b/skl2onnx/sklapi/woe_transformer_onnx.py @@ -4,36 +4,57 @@ from typing import List import numpy as np from onnx.helper import ( - make_node, make_graph, make_model, make_tensor_value_info, - TensorProto) + make_node, + make_graph, + make_model, + make_tensor_value_info, + TensorProto, +) from onnx.numpy_helper import from_array from onnx import onnx_pb as onnx_proto from sklearn.base import BaseEstimator from ..common.data_types import ( - Int64TensorType, FloatTensorType, DoubleTensorType, - guess_numpy_type, guess_proto_type) + Int64TensorType, + FloatTensorType, + DoubleTensorType, + guess_numpy_type, + guess_proto_type, +) from ..common._topology import Scope, Operator, Variable, OPSET_TO_IR_VERSION from ..common._container import ModelComponentContainer from ..common.utils import ( check_input_and_output_types, check_input_and_output_numbers, - get_producer, get_producer_version, - get_domain, get_model_version) + get_producer, + get_producer_version, + get_domain, + get_model_version, +) from .. import update_registered_converter from .._supported_operators import _get_sklearn_operator_name from ..algebra.onnx_ops import ( - OnnxIdentity, OnnxMatMul, OnnxGather, OnnxConcat, OnnxReshapeApi13, - OnnxTreeEnsembleRegressor_1, OnnxOneHotEncoder, OnnxCast) + OnnxIdentity, + OnnxMatMul, + OnnxGather, + OnnxConcat, + OnnxReshapeApi13, + OnnxTreeEnsembleRegressor_1, + OnnxOneHotEncoder, + OnnxCast, +) from .woe_transformer import WOETransformer -def woe_parser(scope: Scope, model: BaseEstimator, - inputs: List[Variable], custom_parsers: dict = None): +def woe_parser( + scope: Scope, + model: BaseEstimator, + inputs: List[Variable], + custom_parsers: dict = None, +): "ONNX parser for WOETransformer: defines the output type." alias = _get_sklearn_operator_name(type(model)) this_operator = scope.declare_local_operator(alias, model) - output = scope.declare_local_variable( - "encoding", inputs[0].type.__class__()) + output = scope.declare_local_variable("encoding", inputs[0].type.__class__()) this_operator.inputs = inputs this_operator.outputs.append(output) return this_operator.outputs @@ -43,9 +64,9 @@ def woe_shape_calculator(operator: Operator): "ONNX shape calculator for WOETransformer: defines the output shape." type_list = [Int64TensorType, FloatTensorType, DoubleTensorType] check_input_and_output_types( - operator, good_input_types=type_list, good_output_types=type_list) - check_input_and_output_numbers( - operator, input_count_range=1, output_count_range=1) + operator, good_input_types=type_list, good_output_types=type_list + ) + check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) op = operator.raw_operator x = operator.inputs[0] N = x.get_first_dimension() @@ -63,11 +84,10 @@ def woe_shape_calculator(operator: Operator): class Tree: - class Node: - - def __init__(self, parent, is_left, is_leaf, feature, - threshold, value, index=-1): + def __init__( + self, parent, is_left, is_leaf, feature, threshold, value, index=-1 + ): self.parent = parent self.is_left = is_left self.is_leaf = is_leaf @@ -79,19 +99,34 @@ def __init__(self, parent, is_left, is_leaf, feature, def __str__(self): return ( "Node(%s, is_left=%r, is_leaf=%r, feature=%r, " - "threshold=%r, value=%r, index=%r)%s" % ( - self.parent if isinstance(self.parent, int) + "threshold=%r, value=%r, index=%r)%s" + % ( + self.parent + if isinstance(self.parent, int) else "id%r" % id(self.parent), - self.is_left, self.is_leaf, self.feature, - self.threshold, self.value, self.index, - " # %s %r -> %r%s%s%s" % ( - self.onnx_mode, self.onnx_threshold, self.onnx_value, + self.is_left, + self.is_leaf, + self.feature, + self.threshold, + self.value, + self.index, + " # %s %r -> %r%s%s%s" + % ( + self.onnx_mode, + self.onnx_threshold, + self.onnx_value, " -- %r" % self.intervals_ - if hasattr(self, 'intervals_') else '', + if hasattr(self, "intervals_") + else "", " LL %r" % self.intervals_left_ - if hasattr(self, 'intervals_left_') else '', + if hasattr(self, "intervals_left_") + else "", " RR %r" % self.intervals_right_ - if hasattr(self, 'intervals_right_') else ''))) + if hasattr(self, "intervals_right_") + else "", + ), + ) + ) @property def onnx_value(self): @@ -108,16 +143,15 @@ def onnx_mode(self): # 'BRANCH_LEQ', 'BRANCH_LT', 'BRANCH_GTE', 'BRANCH_GT', # 'BRANCH_EQ', 'BRANCH_NEQ', 'LEAF' if self.is_leaf: - return 'LEAF' + return "LEAF" if self.threshold[1]: - return 'BRANCH_LEQ' - return 'BRANCH_LT' + return "BRANCH_LEQ" + return "BRANCH_LT" @staticmethod def _is_on_left_side(th, kind, x, leq, left_right_extremity): - if kind not in ('BRANCH_LEQ', 'BRANCH_LT'): - raise NotImplementedError( - "Not implemented for mode %r." % kind) + if kind not in ("BRANCH_LEQ", "BRANCH_LT"): + raise NotImplementedError("Not implemented for mode %r." % kind) if x < th: return False if x > th: @@ -126,17 +160,16 @@ def _is_on_left_side(th, kind, x, leq, left_right_extremity): return True if left_right_extremity and not leq: return False - if kind == 'BRANCH_LEQ' and leq: + if kind == "BRANCH_LEQ" and leq: return False - if kind == 'BRANCH_LT' and not leq: + if kind == "BRANCH_LT" and not leq: return False return True def is_on_left_side(self, x, leq, left_right_extremity): th = self.threshold[0] kind = self.onnx_mode - res = Tree.Node._is_on_left_side( - th, kind, x, leq, left_right_extremity) + res = Tree.Node._is_on_left_side(th, kind, x, leq, left_right_extremity) return res def __init__(self): @@ -155,21 +188,23 @@ def __str__(self): res = res.replace("id" + k, "n" + v) return res - def add_node(self, parent, is_left, is_leaf, feature, threshold, - value=None, index=-1): + def add_node( + self, parent, is_left, is_leaf, feature, threshold, value=None, index=-1 + ): if is_leaf and value is None: raise ValueError("value must be specified when is_leaf=True.") if not is_leaf and value is not None: raise ValueError("value must not be specified when is_leaf=False.") - node = Tree.Node(parent, is_left, is_leaf, feature, threshold, - value, index=index) + node = Tree.Node( + parent, is_left, is_leaf, feature, threshold, value, index=index + ) self.nodes.append(node) if is_leaf: if value in self.leave_values: raise RuntimeError( "The tree must contain unique tree value, %r " - "already in %r.\n%s" % ( - value, self.leave_values, str(self))) + "already in %r.\n%s" % (value, self.leave_values, str(self)) + ) self.leave_values.add(value) return node @@ -185,7 +220,7 @@ def onnx_attributes(self): Operators-ml.md#ai.onnx.ml.TreeEnsembleRegressor>`_. """ atts = dict( - aggregate_function='SUM', + aggregate_function="SUM", base_values=[float(0)], n_targets=1, nodes_featureids=[n.feature for n in self.nodes], @@ -194,14 +229,16 @@ def onnx_attributes(self): nodes_nodeids=[i for i in range(len(self.nodes))], nodes_treeids=[0 for n in self.nodes], nodes_values=[float(n.onnx_threshold) for n in self.nodes], - post_transform='NONE', - target_ids=[0 for n in self.nodes if n.onnx_mode == 'LEAF'], - target_nodeids=[i for i, n in enumerate(self.nodes) - if n.onnx_mode == 'LEAF'], - target_treeids=[0 for n in self.nodes - if n.onnx_mode == 'LEAF'], - target_weights=[float(n.onnx_value) for n in self.nodes - if n.onnx_mode == 'LEAF']) + post_transform="NONE", + target_ids=[0 for n in self.nodes if n.onnx_mode == "LEAF"], + target_nodeids=[ + i for i, n in enumerate(self.nodes) if n.onnx_mode == "LEAF" + ], + target_treeids=[0 for n in self.nodes if n.onnx_mode == "LEAF"], + target_weights=[ + float(n.onnx_value) for n in self.nodes if n.onnx_mode == "LEAF" + ], + ) ids = {id(n): (i, n) for i, n in enumerate(self.nodes)} nodes_truenodeids = [0 for n in self.nodes] # right @@ -215,12 +252,14 @@ def onnx_attributes(self): nodes_truenodeids[val[0]] = i else: nodes_falsenodeids[val[0]] = i - atts.update(dict( - nodes_falsenodeids=nodes_falsenodeids, - nodes_truenodeids=nodes_truenodeids)) - if len(atts['target_weights']) != len(set(atts['target_weights'])): - warnings.warn( - "All targets should be unique %r." % atts['target_weights']) + atts.update( + dict( + nodes_falsenodeids=nodes_falsenodeids, + nodes_truenodeids=nodes_truenodeids, + ) + ) + if len(atts["target_weights"]) != len(set(atts["target_weights"])): + warnings.warn("All targets should be unique %r." % atts["target_weights"]) return atts def mapping(self, intervals): @@ -230,18 +269,21 @@ def mapping(self, intervals): `intervals_rights_` as dictionary `{idx: interval}` each side intersects. """ + def process(node, intervals): - if hasattr(node, 'intervals_'): + if hasattr(node, "intervals_"): return 0 if node.parent is None or node.parent == -1: node.intervals_ = intervals else: - if not hasattr(node.parent, 'intervals_'): + if not hasattr(node.parent, "intervals_"): return 0 node.intervals_ = ( - node.parent.intervals_left_ if node.is_left - else node.parent.intervals_right_) + node.parent.intervals_left_ + if node.is_left + else node.parent.intervals_right_ + ) if node.value is not None: # leaf @@ -267,7 +309,7 @@ def process(node, intervals): return 1 for node in self.nodes: - for at in ['intervals_', 'intervals_left_', 'intervals_right_']: + for at in ["intervals_", "intervals_left_", "intervals_right_"]: if hasattr(node, at): delattr(node, at) @@ -307,8 +349,8 @@ def digitize2tree(bins, right=False, feature=0): def add_root(index): if index < 0 or index >= len(bins): raise IndexError( # pragma: no cover - "Unexpected index %d / len(bins)=%d." % ( - index, len(bins))) + "Unexpected index %d / len(bins)=%d." % (index, len(bins)) + ) parent = -1 is_left = False is_leaf = False @@ -325,8 +367,7 @@ def add_nodes(parent, i, j, is_left): if i == j: # leaf value = parent.index * 2 - n = tree.add_node( - parent, is_left, True, 0, 0, value=value, index=i) + n = tree.add_node(parent, is_left, True, 0, 0, value=value, index=i) n_nodes.append(n) values.append(i) return n @@ -355,8 +396,7 @@ def add_nodes(parent, i, j, is_left): # leaf value = parent.index * 2 + 1 values.append(j) - n = tree.add_node( - parent, is_left, True, 0, 0, value=value, index=j) + n = tree.add_node(parent, is_left, True, 0, 0, value=value, index=j) n_nodes.append(n) return n if i + 1 < j: @@ -370,8 +410,8 @@ def add_nodes(parent, i, j, is_left): add_nodes(n, index, j, False) return n raise NotImplementedError( # pragma: no cover - "Unexpected case where i=%r, j=%r, is_left=%r." % ( - i, j, is_left)) + "Unexpected case where i=%r, j=%r, is_left=%r." % (i, j, is_left) + ) index = len(bins) // 2 root = add_root(index) @@ -401,15 +441,13 @@ def _mapping_to_key_value(mapping, weights): if len(v) == 0: continue if len(v) != 1: - raise RuntimeError( - 'Intervals overlops in mapping %r.' % mapping) + raise RuntimeError("Intervals overlops in mapping %r." % mapping) value = list(v)[0] key_value[float(k)] = float(weights[value]) return key_value -def woe_converter(scope: Scope, operator: Operator, - container: ModelComponentContainer): +def woe_converter(scope: Scope, operator: Operator, container: ModelComponentContainer): """ ONNX Converter for WOETransformer. It follows *skl2onnx* API. @@ -426,7 +464,7 @@ def woe_converter(scope: Scope, operator: Operator, vector_shape = np.array([-1], dtype=np.int64) dtype = guess_numpy_type(X.type) proto_type = guess_proto_type(X.type) - verbose = getattr(container, 'verbose', 0) + verbose = getattr(container, "verbose", 0) columns = [] @@ -439,7 +477,10 @@ def woe_converter(scope: Scope, operator: Operator, columns.append( OnnxReshapeApi13( OnnxGather(X, index, op_version=opv, axis=1), - new_shape, op_version=opv)) + new_shape, + op_version=opv, + ) + ) continue # encoding columns @@ -450,29 +491,34 @@ def woe_converter(scope: Scope, operator: Operator, if op.onehot: node = OnnxTreeEnsembleRegressor_1( - X, op_version=1, domain='ai.onnx.ml', **atts) - cats = list(sorted(set(int(n.onnx_value) - for n in tree.nodes if n.is_leaf))) + X, op_version=1, domain="ai.onnx.ml", **atts + ) + cats = list(sorted(set(int(n.onnx_value) for n in tree.nodes if n.is_leaf))) mat_mapping = _mapping2matrix(mapping, cats, op.weights_[i], dtype) if verbose > 1: print("[woe_converter] mapping=%r" % mapping) ohe = OnnxOneHotEncoder( - OnnxReshapeApi13( - node, vector_shape, op_version=opv), - op_version=opv, cats_int64s=cats) + OnnxReshapeApi13(node, vector_shape, op_version=opv), + op_version=opv, + cats_int64s=cats, + ) ren = OnnxMatMul( OnnxCast(ohe, op_version=opv, to=proto_type), - mat_mapping, op_version=opv) + mat_mapping, + op_version=opv, + ) columns.append(ren) else: key_value = _mapping_to_key_value(mapping, op.weights_[i]) - atts['target_weights'] = [ - key_value.get(v, 0.) for v in atts['target_weights']] + atts["target_weights"] = [ + key_value.get(v, 0.0) for v in atts["target_weights"] + ] if verbose > 1: print("[woe_converter] mapping=%r" % mapping) print("[woe_converter] key_value=%r" % key_value) node = OnnxTreeEnsembleRegressor_1( - X, op_version=1, domain='ai.onnx.ml', **atts) + X, op_version=1, domain="ai.onnx.ml", **atts + ) lab = OnnxReshapeApi13(node, new_shape, op_version=opv) columns.append(lab) @@ -504,28 +550,29 @@ def woe_transformer_to_onnx(op, opset=None): C = len(op.intervals_) # inputs - X = make_tensor_value_info( - 'X', TensorProto.FLOAT, [None, len(op.intervals_)]) - Y = make_tensor_value_info( - 'Y', TensorProto.FLOAT, [None, C]) + X = make_tensor_value_info("X", TensorProto.FLOAT, [None, len(op.intervals_)]) + Y = make_tensor_value_info("Y", TensorProto.FLOAT, [None, C]) # nodes nodes = [] columns = [] - inits = [from_array(np.array([-1, 1], dtype=np.int64), name='new_shape'), - from_array(np.array([-1], dtype=np.int64), name='vector_shape')] + inits = [ + from_array(np.array([-1, 1], dtype=np.int64), name="new_shape"), + from_array(np.array([-1], dtype=np.int64), name="vector_shape"), + ] thresholds = op._decision_thresholds(add_index=False) for i, threshold in enumerate(thresholds): if threshold is None: # Passthrough columns - inits.append(from_array( - np.array([i], dtype=np.int64), name='index%d' % i)) - nodes.append(make_node( - 'Gather', ['X', 'index%d' % i], ['col%d' % i], axis=1)) - nodes.append(make_node( - 'Reshape', ['col%d' % i, 'new_shape'], ['reshr%d' % i])) - columns.append('reshr%d' % i) + inits.append(from_array(np.array([i], dtype=np.int64), name="index%d" % i)) + nodes.append( + make_node("Gather", ["X", "index%d" % i], ["col%d" % i], axis=1) + ) + nodes.append( + make_node("Reshape", ["col%d" % i, "new_shape"], ["reshr%d" % i]) + ) + columns.append("reshr%d" % i) continue # encoding columns @@ -535,49 +582,67 @@ def woe_transformer_to_onnx(op, opset=None): atts = tree.onnx_attributes() if op.onehot: - nodes.append(make_node( - 'TreeEnsembleRegressor', ['X'], ['rf%d' % i], - domain='ai.onnx.ml', **atts)) - cats = list(sorted(set(int(n.onnx_value) - for n in tree.nodes if n.is_leaf))) - mat_mapping = _mapping2matrix( - mapping, cats, op.weights_[i], np.float32) - nodes.append(make_node( - 'Reshape', ['rf%d' % i, 'vector_shape'], ['resh%d' % i])) - nodes.append(make_node( - 'OneHotEncoder', ['resh%d' % i], ['ohe%d' % i], - domain='ai.onnx.ml', cats_int64s=cats)) - nodes.append(make_node( - 'Cast', ['ohe%d' % i], ['cast%d' % i], to=TensorProto.FLOAT)) - inits.append(from_array(mat_mapping, 'mat_map%i' % i)) - nodes.append(make_node( - 'MatMul', ['cast%d' % i, 'mat_map%i' % i], ["mul%d" % i])) + nodes.append( + make_node( + "TreeEnsembleRegressor", + ["X"], + ["rf%d" % i], + domain="ai.onnx.ml", + **atts + ) + ) + cats = list(sorted(set(int(n.onnx_value) for n in tree.nodes if n.is_leaf))) + mat_mapping = _mapping2matrix(mapping, cats, op.weights_[i], np.float32) + nodes.append( + make_node("Reshape", ["rf%d" % i, "vector_shape"], ["resh%d" % i]) + ) + nodes.append( + make_node( + "OneHotEncoder", + ["resh%d" % i], + ["ohe%d" % i], + domain="ai.onnx.ml", + cats_int64s=cats, + ) + ) + nodes.append( + make_node("Cast", ["ohe%d" % i], ["cast%d" % i], to=TensorProto.FLOAT) + ) + inits.append(from_array(mat_mapping, "mat_map%i" % i)) + nodes.append( + make_node("MatMul", ["cast%d" % i, "mat_map%i" % i], ["mul%d" % i]) + ) columns.append("mul%d" % i) else: key_value = _mapping_to_key_value(mapping, op.weights_[i]) - atts['target_weights'] = [ - key_value.get(v, 0.) for v in atts['target_weights']] - nodes.append(make_node( - 'TreeEnsembleRegressor', ['X'], ['rf%d' % i], - domain='ai.onnx.ml', **atts)) - nodes.append(make_node( - 'Reshape', ['rf%d' % i, 'new_shape'], ['lab%d' % i])) + atts["target_weights"] = [ + key_value.get(v, 0.0) for v in atts["target_weights"] + ] + nodes.append( + make_node( + "TreeEnsembleRegressor", + ["X"], + ["rf%d" % i], + domain="ai.onnx.ml", + **atts + ) + ) + nodes.append(make_node("Reshape", ["rf%d" % i, "new_shape"], ["lab%d" % i])) columns.append("lab%d" % i) - nodes.append(make_node( - 'Concat', columns, ['Y'], axis=1)) + nodes.append(make_node("Concat", columns, ["Y"], axis=1)) # final graph - graph_def = make_graph(nodes, 't1', [X], [Y], inits) - model_def = make_model(graph_def, producer_name='skl2onnx') + graph_def = make_graph(nodes, "t1", [X], [Y], inits) + model_def = make_model(graph_def, producer_name="skl2onnx") if opset is not None: del model_def.opset_import[:] op_set = model_def.opset_import.add() - op_set.domain = '' + op_set.domain = "" op_set.version = opset op_set = model_def.opset_import.add() - op_set.domain = 'ai.onnx.ml' + op_set.domain = "ai.onnx.ml" op_set.version = 2 irv = OPSET_TO_IR_VERSION.get(opset, onnx_proto.IR_VERSION) model_def.ir_version = irv @@ -593,5 +658,9 @@ def woe_transformer_to_onnx(op, opset=None): def register(): "Register converter for WOETransformer." update_registered_converter( - WOETransformer, "Skl2onnxWOETransformer", - woe_shape_calculator, woe_converter, parser=woe_parser) + WOETransformer, + "Skl2onnxWOETransformer", + woe_shape_calculator, + woe_converter, + parser=woe_parser, + ) diff --git a/skl2onnx/tutorial/benchmark.py b/skl2onnx/tutorial/benchmark.py index 7d8ccc443..411342bc9 100644 --- a/skl2onnx/tutorial/benchmark.py +++ b/skl2onnx/tutorial/benchmark.py @@ -38,8 +38,14 @@ def measure_time(stmt, context, repeat=10, number=50, div_by_number=False): if div_by_number: res /= number mean = numpy.mean(res) - dev = numpy.mean(res ** 2) + dev = numpy.mean(res**2) dev = (dev - mean**2) ** 0.5 - mes = dict(average=mean, deviation=dev, min_exec=numpy.min(res), - max_exec=numpy.max(res), repeat=repeat, number=number) + mes = dict( + average=mean, + deviation=dev, + min_exec=numpy.min(res), + max_exec=numpy.max(res), + repeat=repeat, + number=number, + ) return mes diff --git a/skl2onnx/tutorial/imagenet_classes.py b/skl2onnx/tutorial/imagenet_classes.py index 567c60650..bdeb97969 100644 --- a/skl2onnx/tutorial/imagenet_classes.py +++ b/skl2onnx/tutorial/imagenet_classes.py @@ -5,1029 +5,1020 @@ """ class_names = { - 0: 'tench, Tinca tinca', - 1: 'goldfish, Carassius auratus', - 2: 'great white shark, white shark, man-eater, man-eating shark, ' - 'Carcharodon carcharias', - 3: 'tiger shark, Galeocerdo cuvieri', - 4: 'hammerhead, hammerhead shark', - 5: 'electric ray, crampfish, numbfish, torpedo', - 6: 'stingray', - 7: 'cock', - 8: 'hen', - 9: 'ostrich, Struthio camelus', - 10: 'brambling, Fringilla montifringilla', - 11: 'goldfinch, Carduelis carduelis', - 12: 'house finch, linnet, Carpodacus mexicanus', - 13: 'junco, snowbird', - 14: 'indigo bunting, indigo finch, indigo bird, Passerina cyanea', - 15: 'robin, American robin, Turdus migratorius', - 16: 'bulbul', - 17: 'jay', - 18: 'magpie', - 19: 'chickadee', - 20: 'water ouzel, dipper', - 21: 'kite', - 22: 'bald eagle, American eagle, Haliaeetus leucocephalus', - 23: 'vulture', - 24: 'great grey owl, great gray owl, Strix nebulosa', - 25: 'European fire salamander, Salamandra salamandra', - 26: 'common newt, Triturus vulgaris', - 27: 'eft', - 28: 'spotted salamander, Ambystoma maculatum', - 29: 'axolotl, mud puppy, Ambystoma mexicanum', - 30: 'bullfrog, Rana catesbeiana', - 31: 'tree frog, tree-frog', - 32: 'tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui', - 33: 'loggerhead, loggerhead turtle, Caretta caretta', - 34: 'leatherback turtle, leatherback, leathery turtle, ' - 'Dermochelys coriacea', - 35: 'mud turtle', - 36: 'terrapin', - 37: 'box turtle, box tortoise', - 38: 'banded gecko', - 39: 'common iguana, iguana, Iguana iguana', - 40: 'American chameleon, anole, Anolis carolinensis', - 41: 'whiptail, whiptail lizard', - 42: 'agama', - 43: 'frilled lizard, Chlamydosaurus kingi', - 44: 'alligator lizard', - 45: 'Gila monster, Heloderma suspectum', - 46: 'green lizard, Lacerta viridis', - 47: 'African chameleon, Chamaeleo chamaeleon', - 48: 'Komodo dragon, Komodo lizard, dragon lizard, giant lizard, ' - 'Varanus komodoensis', - 49: 'African crocodile, Nile crocodile, Crocodylus niloticus', - 50: 'American alligator, Alligator mississipiensis', - 51: 'triceratops', - 52: 'thunder snake, worm snake, Carphophis amoenus', - 53: 'ringneck snake, ring-necked snake, ring snake', - 54: 'hognose snake, puff adder, sand viper', - 55: 'green snake, grass snake', - 56: 'king snake, kingsnake', - 57: 'garter snake, grass snake', - 58: 'water snake', - 59: 'vine snake', - 60: 'night snake, Hypsiglena torquata', - 61: 'boa constrictor, Constrictor constrictor', - 62: 'rock python, rock snake, Python sebae', - 63: 'Indian cobra, Naja naja', - 64: 'green mamba', - 65: 'sea snake', - 66: 'horned viper, cerastes, sand viper, horned asp, Cerastes cornutus', - 67: 'diamondback, diamondback rattlesnake, Crotalus adamanteus', - 68: 'sidewinder, horned rattlesnake, Crotalus cerastes', - 69: 'trilobite', - 70: 'harvestman, daddy longlegs, Phalangium opilio', - 71: 'scorpion', - 72: 'black and gold garden spider, Argiope aurantia', - 73: 'barn spider, Araneus cavaticus', - 74: 'garden spider, Aranea diademata', - 75: 'black widow, Latrodectus mactans', - 76: 'tarantula', - 77: 'wolf spider, hunting spider', - 78: 'tick', - 79: 'centipede', - 80: 'black grouse', - 81: 'ptarmigan', - 82: 'ruffed grouse, partridge, Bonasa umbellus', - 83: 'prairie chicken, prairie grouse, prairie fowl', - 84: 'peacock', - 85: 'quail', - 86: 'partridge', - 87: 'African grey, African gray, Psittacus erithacus', - 88: 'macaw', - 89: 'sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita', - 90: 'lorikeet', - 91: 'coucal', - 92: 'bee eater', - 93: 'hornbill', - 94: 'hummingbird', - 95: 'jacamar', - 96: 'toucan', - 97: 'drake', - 98: 'red-breasted merganser, Mergus serrator', - 99: 'goose', - 100: 'black swan, Cygnus atratus', - 101: 'tusker', - 102: 'echidna, spiny anteater, anteater', - 103: 'platypus, duckbill, duckbilled platypus, duck-billed ' - 'platypus, Ornithorhynchus anatinus', - 104: 'wallaby, brush kangaroo', - 105: 'koala, koala bear, kangaroo bear, native bear, ' - 'Phascolarctos cinereus', - 106: 'wombat', - 107: 'jellyfish', - 108: 'sea anemone, anemone', - 109: 'brain coral', - 110: 'flatworm, platyhelminth', - 111: 'nematode, nematode worm, roundworm', - 112: 'conch', - 113: 'snail', - 114: 'slug', - 115: 'sea slug, nudibranch', - 116: 'chiton, coat-of-mail shell, sea cradle, polyplacophore', - 117: 'chambered nautilus, pearly nautilus, nautilus', - 118: 'Dungeness crab, Cancer magister', - 119: 'rock crab, Cancer irroratus', - 120: 'fiddler crab', - 121: 'king crab, Alaska crab, Alaskan king crab, Alaska king ' - 'crab, Paralithodes camtschatica', - 122: 'American lobster, Northern lobster, Maine lobster, ' - 'Homarus americanus', - 123: 'spiny lobster, langouste, rock lobster, crawfish, ' - 'crayfish, sea crawfish', - 124: 'crayfish, crawfish, crawdad, crawdaddy', - 125: 'hermit crab', - 126: 'isopod', - 127: 'white stork, Ciconia ciconia', - 128: 'black stork, Ciconia nigra', - 129: 'spoonbill', - 130: 'flamingo', - 131: 'little blue heron, Egretta caerulea', - 132: 'American egret, great white heron, Egretta albus', - 133: 'bittern', - 134: 'crane', - 135: 'limpkin, Aramus pictus', - 136: 'European gallinule, Porphyrio porphyrio', - 137: 'American coot, marsh hen, mud hen, water hen, Fulica americana', - 138: 'bustard', - 139: 'ruddy turnstone, Arenaria interpres', - 140: 'red-backed sandpiper, dunlin, Erolia alpina', - 141: 'redshank, Tringa totanus', - 142: 'dowitcher', - 143: 'oystercatcher, oyster catcher', - 144: 'pelican', - 145: 'king penguin, Aptenodytes patagonica', - 146: 'albatross, mollymawk', - 147: 'grey whale, gray whale, devilfish, Eschrichtius ' - 'gibbosus, Eschrichtius robustus', - 148: 'killer whale, killer, orca, grampus, sea wolf, Orcinus orca', - 149: 'dugong, Dugong dugon', - 150: 'sea lion', - 151: 'Chihuahua', - 152: 'Japanese spaniel', - 153: 'Maltese dog, Maltese terrier, Maltese', - 154: 'Pekinese, Pekingese, Peke', - 155: 'Shih-Tzu', - 156: 'Blenheim spaniel', - 157: 'papillon', - 158: 'toy terrier', - 159: 'Rhodesian ridgeback', - 160: 'Afghan hound, Afghan', - 161: 'basset, basset hound', - 162: 'beagle', - 163: 'bloodhound, sleuthhound', - 164: 'bluetick', - 165: 'black-and-tan coonhound', - 166: 'Walker hound, Walker foxhound', - 167: 'English foxhound', - 168: 'redbone', - 169: 'borzoi, Russian wolfhound', - 170: 'Irish wolfhound', - 171: 'Italian greyhound', - 172: 'whippet', - 173: 'Ibizan hound, Ibizan Podenco', - 174: 'Norwegian elkhound, elkhound', - 175: 'otterhound, otter hound', - 176: 'Saluki, gazelle hound', - 177: 'Scottish deerhound, deerhound', - 178: 'Weimaraner', - 179: 'Staffordshire bullterrier, Staffordshire bull terrier', - 180: 'American Staffordshire terrier, Staffordshire terrier, ' - 'American pit bull terrier, pit bull terrier', - 181: 'Bedlington terrier', - 182: 'Border terrier', - 183: 'Kerry blue terrier', - 184: 'Irish terrier', - 185: 'Norfolk terrier', - 186: 'Norwich terrier', - 187: 'Yorkshire terrier', - 188: 'wire-haired fox terrier', - 189: 'Lakeland terrier', - 190: 'Sealyham terrier, Sealyham', - 191: 'Airedale, Airedale terrier', - 192: 'cairn, cairn terrier', - 193: 'Australian terrier', - 194: 'Dandie Dinmont, Dandie Dinmont terrier', - 195: 'Boston bull, Boston terrier', - 196: 'miniature schnauzer', - 197: 'giant schnauzer', - 198: 'standard schnauzer', - 199: 'Scotch terrier, Scottish terrier, Scottie', - 200: 'Tibetan terrier, chrysanthemum dog', - 201: 'silky terrier, Sydney silky', - 202: 'soft-coated wheaten terrier', - 203: 'West Highland white terrier', - 204: 'Lhasa, Lhasa apso', - 205: 'flat-coated retriever', - 206: 'curly-coated retriever', - 207: 'golden retriever', - 208: 'Labrador retriever', - 209: 'Chesapeake Bay retriever', - 210: 'German short-haired pointer', - 211: 'vizsla, Hungarian pointer', - 212: 'English setter', - 213: 'Irish setter, red setter', - 214: 'Gordon setter', - 215: 'Brittany spaniel', - 216: 'clumber, clumber spaniel', - 217: 'English springer, English springer spaniel', - 218: 'Welsh springer spaniel', - 219: 'cocker spaniel, English cocker spaniel, cocker', - 220: 'Sussex spaniel', - 221: 'Irish water spaniel', - 222: 'kuvasz', - 223: 'schipperke', - 224: 'groenendael', - 225: 'malinois', - 226: 'briard', - 227: 'kelpie', - 228: 'komondor', - 229: 'Old English sheepdog, bobtail', - 230: 'Shetland sheepdog, Shetland sheep dog, Shetland', - 231: 'collie', - 232: 'Border collie', - 233: 'Bouvier des Flandres, Bouviers des Flandres', - 234: 'Rottweiler', - 235: 'German shepherd, German shepherd dog, German police dog, alsatian', - 236: 'Doberman, Doberman pinscher', - 237: 'miniature pinscher', - 238: 'Greater Swiss Mountain dog', - 239: 'Bernese mountain dog', - 240: 'Appenzeller', - 241: 'EntleBucher', - 242: 'boxer', - 243: 'bull mastiff', - 244: 'Tibetan mastiff', - 245: 'French bulldog', - 246: 'Great Dane', - 247: 'Saint Bernard, St Bernard', - 248: 'Eskimo dog, husky', - 249: 'malamute, malemute, Alaskan malamute', - 250: 'Siberian husky', - 251: 'dalmatian, coach dog, carriage dog', - 252: 'affenpinscher, monkey pinscher, monkey dog', - 253: 'basenji', - 254: 'pug, pug-dog', - 255: 'Leonberg', - 256: 'Newfoundland, Newfoundland dog', - 257: 'Great Pyrenees', - 258: 'Samoyed, Samoyede', - 259: 'Pomeranian', - 260: 'chow, chow chow', - 261: 'keeshond', - 262: 'Brabancon griffon', - 263: 'Pembroke, Pembroke Welsh corgi', - 264: 'Cardigan, Cardigan Welsh corgi', - 265: 'toy poodle', - 266: 'miniature poodle', - 267: 'standard poodle', - 268: 'Mexican hairless', - 269: 'timber wolf, grey wolf, gray wolf, Canis lupus', - 270: 'white wolf, Arctic wolf, Canis lupus tundrarum', - 271: 'red wolf, maned wolf, Canis rufus, Canis niger', - 272: 'coyote, prairie wolf, brush wolf, Canis latrans', - 273: 'dingo, warrigal, warragal, Canis dingo', - 274: 'dhole, Cuon alpinus', - 275: 'African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus', - 276: 'hyena, hyaena', - 277: 'red fox, Vulpes vulpes', - 278: 'kit fox, Vulpes macrotis', - 279: 'Arctic fox, white fox, Alopex lagopus', - 280: 'grey fox, gray fox, Urocyon cinereoargenteus', - 281: 'tabby, tabby cat', - 282: 'tiger cat', - 283: 'Persian cat', - 284: 'Siamese cat, Siamese', - 285: 'Egyptian cat', - 286: 'cougar, puma, catamount, mountain lion, painter, ' - 'panther, Felis concolor', - 287: 'lynx, catamount', - 288: 'leopard, Panthera pardus', - 289: 'snow leopard, ounce, Panthera uncia', - 290: 'jaguar, panther, Panthera onca, Felis onca', - 291: 'lion, king of beasts, Panthera leo', - 292: 'tiger, Panthera tigris', - 293: 'cheetah, chetah, Acinonyx jubatus', - 294: 'brown bear, bruin, Ursus arctos', - 295: 'American black bear, black bear, Ursus americanus, ' - 'Euarctos americanus', - 296: 'ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus', - 297: 'sloth bear, Melursus ursinus, Ursus ursinus', - 298: 'mongoose', - 299: 'meerkat, mierkat', - 300: 'tiger beetle', - 301: 'ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle', - 302: 'ground beetle, carabid beetle', - 303: 'long-horned beetle, longicorn, longicorn beetle', - 304: 'leaf beetle, chrysomelid', - 305: 'dung beetle', - 306: 'rhinoceros beetle', - 307: 'weevil', - 308: 'fly', - 309: 'bee', - 310: 'ant, emmet, pismire', - 311: 'grasshopper, hopper', - 312: 'cricket', - 313: 'walking stick, walkingstick, stick insect', - 314: 'cockroach, roach', - 315: 'mantis, mantid', - 316: 'cicada, cicala', - 317: 'leafhopper', - 318: 'lacewing, lacewing fly', + 0: "tench, Tinca tinca", + 1: "goldfish, Carassius auratus", + 2: "great white shark, white shark, man-eater, man-eating shark, " + "Carcharodon carcharias", + 3: "tiger shark, Galeocerdo cuvieri", + 4: "hammerhead, hammerhead shark", + 5: "electric ray, crampfish, numbfish, torpedo", + 6: "stingray", + 7: "cock", + 8: "hen", + 9: "ostrich, Struthio camelus", + 10: "brambling, Fringilla montifringilla", + 11: "goldfinch, Carduelis carduelis", + 12: "house finch, linnet, Carpodacus mexicanus", + 13: "junco, snowbird", + 14: "indigo bunting, indigo finch, indigo bird, Passerina cyanea", + 15: "robin, American robin, Turdus migratorius", + 16: "bulbul", + 17: "jay", + 18: "magpie", + 19: "chickadee", + 20: "water ouzel, dipper", + 21: "kite", + 22: "bald eagle, American eagle, Haliaeetus leucocephalus", + 23: "vulture", + 24: "great grey owl, great gray owl, Strix nebulosa", + 25: "European fire salamander, Salamandra salamandra", + 26: "common newt, Triturus vulgaris", + 27: "eft", + 28: "spotted salamander, Ambystoma maculatum", + 29: "axolotl, mud puppy, Ambystoma mexicanum", + 30: "bullfrog, Rana catesbeiana", + 31: "tree frog, tree-frog", + 32: "tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui", + 33: "loggerhead, loggerhead turtle, Caretta caretta", + 34: "leatherback turtle, leatherback, leathery turtle, " "Dermochelys coriacea", + 35: "mud turtle", + 36: "terrapin", + 37: "box turtle, box tortoise", + 38: "banded gecko", + 39: "common iguana, iguana, Iguana iguana", + 40: "American chameleon, anole, Anolis carolinensis", + 41: "whiptail, whiptail lizard", + 42: "agama", + 43: "frilled lizard, Chlamydosaurus kingi", + 44: "alligator lizard", + 45: "Gila monster, Heloderma suspectum", + 46: "green lizard, Lacerta viridis", + 47: "African chameleon, Chamaeleo chamaeleon", + 48: "Komodo dragon, Komodo lizard, dragon lizard, giant lizard, " + "Varanus komodoensis", + 49: "African crocodile, Nile crocodile, Crocodylus niloticus", + 50: "American alligator, Alligator mississipiensis", + 51: "triceratops", + 52: "thunder snake, worm snake, Carphophis amoenus", + 53: "ringneck snake, ring-necked snake, ring snake", + 54: "hognose snake, puff adder, sand viper", + 55: "green snake, grass snake", + 56: "king snake, kingsnake", + 57: "garter snake, grass snake", + 58: "water snake", + 59: "vine snake", + 60: "night snake, Hypsiglena torquata", + 61: "boa constrictor, Constrictor constrictor", + 62: "rock python, rock snake, Python sebae", + 63: "Indian cobra, Naja naja", + 64: "green mamba", + 65: "sea snake", + 66: "horned viper, cerastes, sand viper, horned asp, Cerastes cornutus", + 67: "diamondback, diamondback rattlesnake, Crotalus adamanteus", + 68: "sidewinder, horned rattlesnake, Crotalus cerastes", + 69: "trilobite", + 70: "harvestman, daddy longlegs, Phalangium opilio", + 71: "scorpion", + 72: "black and gold garden spider, Argiope aurantia", + 73: "barn spider, Araneus cavaticus", + 74: "garden spider, Aranea diademata", + 75: "black widow, Latrodectus mactans", + 76: "tarantula", + 77: "wolf spider, hunting spider", + 78: "tick", + 79: "centipede", + 80: "black grouse", + 81: "ptarmigan", + 82: "ruffed grouse, partridge, Bonasa umbellus", + 83: "prairie chicken, prairie grouse, prairie fowl", + 84: "peacock", + 85: "quail", + 86: "partridge", + 87: "African grey, African gray, Psittacus erithacus", + 88: "macaw", + 89: "sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita", + 90: "lorikeet", + 91: "coucal", + 92: "bee eater", + 93: "hornbill", + 94: "hummingbird", + 95: "jacamar", + 96: "toucan", + 97: "drake", + 98: "red-breasted merganser, Mergus serrator", + 99: "goose", + 100: "black swan, Cygnus atratus", + 101: "tusker", + 102: "echidna, spiny anteater, anteater", + 103: "platypus, duckbill, duckbilled platypus, duck-billed " + "platypus, Ornithorhynchus anatinus", + 104: "wallaby, brush kangaroo", + 105: "koala, koala bear, kangaroo bear, native bear, " "Phascolarctos cinereus", + 106: "wombat", + 107: "jellyfish", + 108: "sea anemone, anemone", + 109: "brain coral", + 110: "flatworm, platyhelminth", + 111: "nematode, nematode worm, roundworm", + 112: "conch", + 113: "snail", + 114: "slug", + 115: "sea slug, nudibranch", + 116: "chiton, coat-of-mail shell, sea cradle, polyplacophore", + 117: "chambered nautilus, pearly nautilus, nautilus", + 118: "Dungeness crab, Cancer magister", + 119: "rock crab, Cancer irroratus", + 120: "fiddler crab", + 121: "king crab, Alaska crab, Alaskan king crab, Alaska king " + "crab, Paralithodes camtschatica", + 122: "American lobster, Northern lobster, Maine lobster, " "Homarus americanus", + 123: "spiny lobster, langouste, rock lobster, crawfish, " "crayfish, sea crawfish", + 124: "crayfish, crawfish, crawdad, crawdaddy", + 125: "hermit crab", + 126: "isopod", + 127: "white stork, Ciconia ciconia", + 128: "black stork, Ciconia nigra", + 129: "spoonbill", + 130: "flamingo", + 131: "little blue heron, Egretta caerulea", + 132: "American egret, great white heron, Egretta albus", + 133: "bittern", + 134: "crane", + 135: "limpkin, Aramus pictus", + 136: "European gallinule, Porphyrio porphyrio", + 137: "American coot, marsh hen, mud hen, water hen, Fulica americana", + 138: "bustard", + 139: "ruddy turnstone, Arenaria interpres", + 140: "red-backed sandpiper, dunlin, Erolia alpina", + 141: "redshank, Tringa totanus", + 142: "dowitcher", + 143: "oystercatcher, oyster catcher", + 144: "pelican", + 145: "king penguin, Aptenodytes patagonica", + 146: "albatross, mollymawk", + 147: "grey whale, gray whale, devilfish, Eschrichtius " + "gibbosus, Eschrichtius robustus", + 148: "killer whale, killer, orca, grampus, sea wolf, Orcinus orca", + 149: "dugong, Dugong dugon", + 150: "sea lion", + 151: "Chihuahua", + 152: "Japanese spaniel", + 153: "Maltese dog, Maltese terrier, Maltese", + 154: "Pekinese, Pekingese, Peke", + 155: "Shih-Tzu", + 156: "Blenheim spaniel", + 157: "papillon", + 158: "toy terrier", + 159: "Rhodesian ridgeback", + 160: "Afghan hound, Afghan", + 161: "basset, basset hound", + 162: "beagle", + 163: "bloodhound, sleuthhound", + 164: "bluetick", + 165: "black-and-tan coonhound", + 166: "Walker hound, Walker foxhound", + 167: "English foxhound", + 168: "redbone", + 169: "borzoi, Russian wolfhound", + 170: "Irish wolfhound", + 171: "Italian greyhound", + 172: "whippet", + 173: "Ibizan hound, Ibizan Podenco", + 174: "Norwegian elkhound, elkhound", + 175: "otterhound, otter hound", + 176: "Saluki, gazelle hound", + 177: "Scottish deerhound, deerhound", + 178: "Weimaraner", + 179: "Staffordshire bullterrier, Staffordshire bull terrier", + 180: "American Staffordshire terrier, Staffordshire terrier, " + "American pit bull terrier, pit bull terrier", + 181: "Bedlington terrier", + 182: "Border terrier", + 183: "Kerry blue terrier", + 184: "Irish terrier", + 185: "Norfolk terrier", + 186: "Norwich terrier", + 187: "Yorkshire terrier", + 188: "wire-haired fox terrier", + 189: "Lakeland terrier", + 190: "Sealyham terrier, Sealyham", + 191: "Airedale, Airedale terrier", + 192: "cairn, cairn terrier", + 193: "Australian terrier", + 194: "Dandie Dinmont, Dandie Dinmont terrier", + 195: "Boston bull, Boston terrier", + 196: "miniature schnauzer", + 197: "giant schnauzer", + 198: "standard schnauzer", + 199: "Scotch terrier, Scottish terrier, Scottie", + 200: "Tibetan terrier, chrysanthemum dog", + 201: "silky terrier, Sydney silky", + 202: "soft-coated wheaten terrier", + 203: "West Highland white terrier", + 204: "Lhasa, Lhasa apso", + 205: "flat-coated retriever", + 206: "curly-coated retriever", + 207: "golden retriever", + 208: "Labrador retriever", + 209: "Chesapeake Bay retriever", + 210: "German short-haired pointer", + 211: "vizsla, Hungarian pointer", + 212: "English setter", + 213: "Irish setter, red setter", + 214: "Gordon setter", + 215: "Brittany spaniel", + 216: "clumber, clumber spaniel", + 217: "English springer, English springer spaniel", + 218: "Welsh springer spaniel", + 219: "cocker spaniel, English cocker spaniel, cocker", + 220: "Sussex spaniel", + 221: "Irish water spaniel", + 222: "kuvasz", + 223: "schipperke", + 224: "groenendael", + 225: "malinois", + 226: "briard", + 227: "kelpie", + 228: "komondor", + 229: "Old English sheepdog, bobtail", + 230: "Shetland sheepdog, Shetland sheep dog, Shetland", + 231: "collie", + 232: "Border collie", + 233: "Bouvier des Flandres, Bouviers des Flandres", + 234: "Rottweiler", + 235: "German shepherd, German shepherd dog, German police dog, alsatian", + 236: "Doberman, Doberman pinscher", + 237: "miniature pinscher", + 238: "Greater Swiss Mountain dog", + 239: "Bernese mountain dog", + 240: "Appenzeller", + 241: "EntleBucher", + 242: "boxer", + 243: "bull mastiff", + 244: "Tibetan mastiff", + 245: "French bulldog", + 246: "Great Dane", + 247: "Saint Bernard, St Bernard", + 248: "Eskimo dog, husky", + 249: "malamute, malemute, Alaskan malamute", + 250: "Siberian husky", + 251: "dalmatian, coach dog, carriage dog", + 252: "affenpinscher, monkey pinscher, monkey dog", + 253: "basenji", + 254: "pug, pug-dog", + 255: "Leonberg", + 256: "Newfoundland, Newfoundland dog", + 257: "Great Pyrenees", + 258: "Samoyed, Samoyede", + 259: "Pomeranian", + 260: "chow, chow chow", + 261: "keeshond", + 262: "Brabancon griffon", + 263: "Pembroke, Pembroke Welsh corgi", + 264: "Cardigan, Cardigan Welsh corgi", + 265: "toy poodle", + 266: "miniature poodle", + 267: "standard poodle", + 268: "Mexican hairless", + 269: "timber wolf, grey wolf, gray wolf, Canis lupus", + 270: "white wolf, Arctic wolf, Canis lupus tundrarum", + 271: "red wolf, maned wolf, Canis rufus, Canis niger", + 272: "coyote, prairie wolf, brush wolf, Canis latrans", + 273: "dingo, warrigal, warragal, Canis dingo", + 274: "dhole, Cuon alpinus", + 275: "African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus", + 276: "hyena, hyaena", + 277: "red fox, Vulpes vulpes", + 278: "kit fox, Vulpes macrotis", + 279: "Arctic fox, white fox, Alopex lagopus", + 280: "grey fox, gray fox, Urocyon cinereoargenteus", + 281: "tabby, tabby cat", + 282: "tiger cat", + 283: "Persian cat", + 284: "Siamese cat, Siamese", + 285: "Egyptian cat", + 286: "cougar, puma, catamount, mountain lion, painter, " "panther, Felis concolor", + 287: "lynx, catamount", + 288: "leopard, Panthera pardus", + 289: "snow leopard, ounce, Panthera uncia", + 290: "jaguar, panther, Panthera onca, Felis onca", + 291: "lion, king of beasts, Panthera leo", + 292: "tiger, Panthera tigris", + 293: "cheetah, chetah, Acinonyx jubatus", + 294: "brown bear, bruin, Ursus arctos", + 295: "American black bear, black bear, Ursus americanus, " "Euarctos americanus", + 296: "ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus", + 297: "sloth bear, Melursus ursinus, Ursus ursinus", + 298: "mongoose", + 299: "meerkat, mierkat", + 300: "tiger beetle", + 301: "ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle", + 302: "ground beetle, carabid beetle", + 303: "long-horned beetle, longicorn, longicorn beetle", + 304: "leaf beetle, chrysomelid", + 305: "dung beetle", + 306: "rhinoceros beetle", + 307: "weevil", + 308: "fly", + 309: "bee", + 310: "ant, emmet, pismire", + 311: "grasshopper, hopper", + 312: "cricket", + 313: "walking stick, walkingstick, stick insect", + 314: "cockroach, roach", + 315: "mantis, mantid", + 316: "cicada, cicala", + 317: "leafhopper", + 318: "lacewing, lacewing fly", 319: "dragonfly, darning needle, devil's darning needle, " - "sewing needle, snake feeder, snake doctor, mosquito " - "hawk, skeeter hawk", - 320: 'damselfly', - 321: 'admiral', - 322: 'ringlet, ringlet butterfly', - 323: 'monarch, monarch butterfly, milkweed butterfly, Danaus plexippus', - 324: 'cabbage butterfly', - 325: 'sulphur butterfly, sulfur butterfly', - 326: 'lycaenid, lycaenid butterfly', - 327: 'starfish, sea star', - 328: 'sea urchin', - 329: 'sea cucumber, holothurian', - 330: 'wood rabbit, cottontail, cottontail rabbit', - 331: 'hare', - 332: 'Angora, Angora rabbit', - 333: 'hamster', - 334: 'porcupine, hedgehog', - 335: 'fox squirrel, eastern fox squirrel, Sciurus niger', - 336: 'marmot', - 337: 'beaver', - 338: 'guinea pig, Cavia cobaya', - 339: 'sorrel', - 340: 'zebra', - 341: 'hog, pig, grunter, squealer, Sus scrofa', - 342: 'wild boar, boar, Sus scrofa', - 343: 'warthog', - 344: 'hippopotamus, hippo, river horse, Hippopotamus amphibius', - 345: 'ox', - 346: 'water buffalo, water ox, Asiatic buffalo, Bubalus bubalis', - 347: 'bison', - 348: 'ram, tup', - 349: 'bighorn, bighorn sheep, cimarron, Rocky Mountain ' - 'bighorn, Rocky Mountain sheep, Ovis canadensis', - 350: 'ibex, Capra ibex', - 351: 'hartebeest', - 352: 'impala, Aepyceros melampus', - 353: 'gazelle', - 354: 'Arabian camel, dromedary, Camelus dromedarius', - 355: 'llama', - 356: 'weasel', - 357: 'mink', - 358: 'polecat, fitch, foulmart, foumart, Mustela putorius', - 359: 'black-footed ferret, ferret, Mustela nigripes', - 360: 'otter', - 361: 'skunk, polecat, wood pussy', - 362: 'badger', - 363: 'armadillo', - 364: 'three-toed sloth, ai, Bradypus tridactylus', - 365: 'orangutan, orang, orangutang, Pongo pygmaeus', - 366: 'gorilla, Gorilla gorilla', - 367: 'chimpanzee, chimp, Pan troglodytes', - 368: 'gibbon, Hylobates lar', - 369: 'siamang, Hylobates syndactylus, Symphalangus syndactylus', - 370: 'guenon, guenon monkey', - 371: 'patas, hussar monkey, Erythrocebus patas', - 372: 'baboon', - 373: 'macaque', - 374: 'langur', - 375: 'colobus, colobus monkey', - 376: 'proboscis monkey, Nasalis larvatus', - 377: 'marmoset', - 378: 'capuchin, ringtail, Cebus capucinus', - 379: 'howler monkey, howler', - 380: 'titi, titi monkey', - 381: 'spider monkey, Ateles geoffroyi', - 382: 'squirrel monkey, Saimiri sciureus', - 383: 'Madagascar cat, ring-tailed lemur, Lemur catta', - 384: 'indri, indris, Indri indri, Indri brevicaudatus', - 385: 'Indian elephant, Elephas maximus', - 386: 'African elephant, Loxodonta africana', - 387: 'lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens', - 388: 'giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca', - 389: 'barracouta, snoek', - 390: 'eel', - 391: 'coho, cohoe, coho salmon, blue jack, silver salmon, ' - 'Oncorhynchus kisutch', - 392: 'rock beauty, Holocanthus tricolor', - 393: 'anemone fish', - 394: 'sturgeon', - 395: 'gar, garfish, garpike, billfish, Lepisosteus osseus', - 396: 'lionfish', - 397: 'puffer, pufferfish, blowfish, globefish', - 398: 'abacus', - 399: 'abaya', + "sewing needle, snake feeder, snake doctor, mosquito " + "hawk, skeeter hawk", + 320: "damselfly", + 321: "admiral", + 322: "ringlet, ringlet butterfly", + 323: "monarch, monarch butterfly, milkweed butterfly, Danaus plexippus", + 324: "cabbage butterfly", + 325: "sulphur butterfly, sulfur butterfly", + 326: "lycaenid, lycaenid butterfly", + 327: "starfish, sea star", + 328: "sea urchin", + 329: "sea cucumber, holothurian", + 330: "wood rabbit, cottontail, cottontail rabbit", + 331: "hare", + 332: "Angora, Angora rabbit", + 333: "hamster", + 334: "porcupine, hedgehog", + 335: "fox squirrel, eastern fox squirrel, Sciurus niger", + 336: "marmot", + 337: "beaver", + 338: "guinea pig, Cavia cobaya", + 339: "sorrel", + 340: "zebra", + 341: "hog, pig, grunter, squealer, Sus scrofa", + 342: "wild boar, boar, Sus scrofa", + 343: "warthog", + 344: "hippopotamus, hippo, river horse, Hippopotamus amphibius", + 345: "ox", + 346: "water buffalo, water ox, Asiatic buffalo, Bubalus bubalis", + 347: "bison", + 348: "ram, tup", + 349: "bighorn, bighorn sheep, cimarron, Rocky Mountain " + "bighorn, Rocky Mountain sheep, Ovis canadensis", + 350: "ibex, Capra ibex", + 351: "hartebeest", + 352: "impala, Aepyceros melampus", + 353: "gazelle", + 354: "Arabian camel, dromedary, Camelus dromedarius", + 355: "llama", + 356: "weasel", + 357: "mink", + 358: "polecat, fitch, foulmart, foumart, Mustela putorius", + 359: "black-footed ferret, ferret, Mustela nigripes", + 360: "otter", + 361: "skunk, polecat, wood pussy", + 362: "badger", + 363: "armadillo", + 364: "three-toed sloth, ai, Bradypus tridactylus", + 365: "orangutan, orang, orangutang, Pongo pygmaeus", + 366: "gorilla, Gorilla gorilla", + 367: "chimpanzee, chimp, Pan troglodytes", + 368: "gibbon, Hylobates lar", + 369: "siamang, Hylobates syndactylus, Symphalangus syndactylus", + 370: "guenon, guenon monkey", + 371: "patas, hussar monkey, Erythrocebus patas", + 372: "baboon", + 373: "macaque", + 374: "langur", + 375: "colobus, colobus monkey", + 376: "proboscis monkey, Nasalis larvatus", + 377: "marmoset", + 378: "capuchin, ringtail, Cebus capucinus", + 379: "howler monkey, howler", + 380: "titi, titi monkey", + 381: "spider monkey, Ateles geoffroyi", + 382: "squirrel monkey, Saimiri sciureus", + 383: "Madagascar cat, ring-tailed lemur, Lemur catta", + 384: "indri, indris, Indri indri, Indri brevicaudatus", + 385: "Indian elephant, Elephas maximus", + 386: "African elephant, Loxodonta africana", + 387: "lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens", + 388: "giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca", + 389: "barracouta, snoek", + 390: "eel", + 391: "coho, cohoe, coho salmon, blue jack, silver salmon, " "Oncorhynchus kisutch", + 392: "rock beauty, Holocanthus tricolor", + 393: "anemone fish", + 394: "sturgeon", + 395: "gar, garfish, garpike, billfish, Lepisosteus osseus", + 396: "lionfish", + 397: "puffer, pufferfish, blowfish, globefish", + 398: "abacus", + 399: "abaya", 400: "academic gown, academic robe, judge's robe", - 401: 'accordion, piano accordion, squeeze box', - 402: 'acoustic guitar', - 403: 'aircraft carrier, carrier, flattop, attack aircraft carrier', - 404: 'airliner', - 405: 'airship, dirigible', - 406: 'altar', - 407: 'ambulance', - 408: 'amphibian, amphibious vehicle', - 409: 'analog clock', - 410: 'apiary, bee house', - 411: 'apron', - 412: 'ashcan, trash can, garbage can, wastebin, ash bin, ' - 'ash-bin, ashbin, dustbin, trash barrel, trash bin', - 413: 'assault rifle, assault gun', - 414: 'backpack, back pack, knapsack, packsack, rucksack, haversack', - 415: 'bakery, bakeshop, bakehouse', - 416: 'balance beam, beam', - 417: 'balloon', - 418: 'ballpoint, ballpoint pen, ballpen, Biro', - 419: 'Band Aid', - 420: 'banjo', - 421: 'bannister, banister, balustrade, balusters, handrail', - 422: 'barbell', - 423: 'barber chair', - 424: 'barbershop', - 425: 'barn', - 426: 'barometer', - 427: 'barrel, cask', - 428: 'barrow, garden cart, lawn cart, wheelbarrow', - 429: 'baseball', - 430: 'basketball', - 431: 'bassinet', - 432: 'bassoon', - 433: 'bathing cap, swimming cap', - 434: 'bath towel', - 435: 'bathtub, bathing tub, bath, tub', - 436: 'beach wagon, station wagon, wagon, estate car, ' - 'beach waggon, station waggon, waggon', - 437: 'beacon, lighthouse, beacon light, pharos', - 438: 'beaker', - 439: 'bearskin, busby, shako', - 440: 'beer bottle', - 441: 'beer glass', - 442: 'bell cote, bell cot', - 443: 'bib', - 444: 'bicycle-built-for-two, tandem bicycle, tandem', - 445: 'bikini, two-piece', - 446: 'binder, ring-binder', - 447: 'binoculars, field glasses, opera glasses', - 448: 'birdhouse', - 449: 'boathouse', - 450: 'bobsled, bobsleigh, bob', - 451: 'bolo tie, bolo, bola tie, bola', - 452: 'bonnet, poke bonnet', - 453: 'bookcase', - 454: 'bookshop, bookstore, bookstall', - 455: 'bottlecap', - 456: 'bow', - 457: 'bow tie, bow-tie, bowtie', - 458: 'brass, memorial tablet, plaque', - 459: 'brassiere, bra, bandeau', - 460: 'breakwater, groin, groyne, mole, bulwark, seawall, jetty', - 461: 'breastplate, aegis, egis', - 462: 'broom', - 463: 'bucket, pail', - 464: 'buckle', - 465: 'bulletproof vest', - 466: 'bullet train, bullet', - 467: 'butcher shop, meat market', - 468: 'cab, hack, taxi, taxicab', - 469: 'caldron, cauldron', - 470: 'candle, taper, wax light', - 471: 'cannon', - 472: 'canoe', - 473: 'can opener, tin opener', - 474: 'cardigan', - 475: 'car mirror', - 476: 'carousel, carrousel, merry-go-round, roundabout, whirligig', + 401: "accordion, piano accordion, squeeze box", + 402: "acoustic guitar", + 403: "aircraft carrier, carrier, flattop, attack aircraft carrier", + 404: "airliner", + 405: "airship, dirigible", + 406: "altar", + 407: "ambulance", + 408: "amphibian, amphibious vehicle", + 409: "analog clock", + 410: "apiary, bee house", + 411: "apron", + 412: "ashcan, trash can, garbage can, wastebin, ash bin, " + "ash-bin, ashbin, dustbin, trash barrel, trash bin", + 413: "assault rifle, assault gun", + 414: "backpack, back pack, knapsack, packsack, rucksack, haversack", + 415: "bakery, bakeshop, bakehouse", + 416: "balance beam, beam", + 417: "balloon", + 418: "ballpoint, ballpoint pen, ballpen, Biro", + 419: "Band Aid", + 420: "banjo", + 421: "bannister, banister, balustrade, balusters, handrail", + 422: "barbell", + 423: "barber chair", + 424: "barbershop", + 425: "barn", + 426: "barometer", + 427: "barrel, cask", + 428: "barrow, garden cart, lawn cart, wheelbarrow", + 429: "baseball", + 430: "basketball", + 431: "bassinet", + 432: "bassoon", + 433: "bathing cap, swimming cap", + 434: "bath towel", + 435: "bathtub, bathing tub, bath, tub", + 436: "beach wagon, station wagon, wagon, estate car, " + "beach waggon, station waggon, waggon", + 437: "beacon, lighthouse, beacon light, pharos", + 438: "beaker", + 439: "bearskin, busby, shako", + 440: "beer bottle", + 441: "beer glass", + 442: "bell cote, bell cot", + 443: "bib", + 444: "bicycle-built-for-two, tandem bicycle, tandem", + 445: "bikini, two-piece", + 446: "binder, ring-binder", + 447: "binoculars, field glasses, opera glasses", + 448: "birdhouse", + 449: "boathouse", + 450: "bobsled, bobsleigh, bob", + 451: "bolo tie, bolo, bola tie, bola", + 452: "bonnet, poke bonnet", + 453: "bookcase", + 454: "bookshop, bookstore, bookstall", + 455: "bottlecap", + 456: "bow", + 457: "bow tie, bow-tie, bowtie", + 458: "brass, memorial tablet, plaque", + 459: "brassiere, bra, bandeau", + 460: "breakwater, groin, groyne, mole, bulwark, seawall, jetty", + 461: "breastplate, aegis, egis", + 462: "broom", + 463: "bucket, pail", + 464: "buckle", + 465: "bulletproof vest", + 466: "bullet train, bullet", + 467: "butcher shop, meat market", + 468: "cab, hack, taxi, taxicab", + 469: "caldron, cauldron", + 470: "candle, taper, wax light", + 471: "cannon", + 472: "canoe", + 473: "can opener, tin opener", + 474: "cardigan", + 475: "car mirror", + 476: "carousel, carrousel, merry-go-round, roundabout, whirligig", 477: "carpenter's kit, tool kit", - 478: 'carton', - 479: 'car wheel', - 480: 'cash machine, cash dispenser, automated teller ' - 'machine, automatic teller machine, automated teller, ' - 'automatic teller, ATM', - 481: 'cassette', - 482: 'cassette player', - 483: 'castle', - 484: 'catamaran', - 485: 'CD player', - 486: 'cello, violoncello', - 487: 'cellular telephone, cellular phone, cellphone, cell, mobile phone', - 488: 'chain', - 489: 'chainlink fence', - 490: 'chain mail, ring mail, mail, chain armor, chain ' - 'armour, ring armor, ring armour', - 491: 'chain saw, chainsaw', - 492: 'chest', - 493: 'chiffonier, commode', - 494: 'chime, bell, gong', - 495: 'china cabinet, china closet', - 496: 'Christmas stocking', - 497: 'church, church building', - 498: 'cinema, movie theater, movie theatre, movie house, picture palace', - 499: 'cleaver, meat cleaver, chopper', - 500: 'cliff dwelling', - 501: 'cloak', - 502: 'clog, geta, patten, sabot', - 503: 'cocktail shaker', - 504: 'coffee mug', - 505: 'coffeepot', - 506: 'coil, spiral, volute, whorl, helix', - 507: 'combination lock', - 508: 'computer keyboard, keypad', - 509: 'confectionery, confectionary, candy store', - 510: 'container ship, containership, container vessel', - 511: 'convertible', - 512: 'corkscrew, bottle screw', - 513: 'cornet, horn, trumpet, trump', - 514: 'cowboy boot', - 515: 'cowboy hat, ten-gallon hat', - 516: 'cradle', - 517: 'crane', - 518: 'crash helmet', - 519: 'crate', - 520: 'crib, cot', - 521: 'Crock Pot', - 522: 'croquet ball', - 523: 'crutch', - 524: 'cuirass', - 525: 'dam, dike, dyke', - 526: 'desk', - 527: 'desktop computer', - 528: 'dial telephone, dial phone', - 529: 'diaper, nappy, napkin', - 530: 'digital clock', - 531: 'digital watch', - 532: 'dining table, board', - 533: 'dishrag, dishcloth', - 534: 'dishwasher, dish washer, dishwashing machine', - 535: 'disk brake, disc brake', - 536: 'dock, dockage, docking facility', - 537: 'dogsled, dog sled, dog sleigh', - 538: 'dome', - 539: 'doormat, welcome mat', - 540: 'drilling platform, offshore rig', - 541: 'drum, membranophone, tympan', - 542: 'drumstick', - 543: 'dumbbell', - 544: 'Dutch oven', - 545: 'electric fan, blower', - 546: 'electric guitar', - 547: 'electric locomotive', - 548: 'entertainment center', - 549: 'envelope', - 550: 'espresso maker', - 551: 'face powder', - 552: 'feather boa, boa', - 553: 'file, file cabinet, filing cabinet', - 554: 'fireboat', - 555: 'fire engine, fire truck', - 556: 'fire screen, fireguard', - 557: 'flagpole, flagstaff', - 558: 'flute, transverse flute', - 559: 'folding chair', - 560: 'football helmet', - 561: 'forklift', - 562: 'fountain', - 563: 'fountain pen', - 564: 'four-poster', - 565: 'freight car', - 566: 'French horn, horn', - 567: 'frying pan, frypan, skillet', - 568: 'fur coat', - 569: 'garbage truck, dustcart', - 570: 'gasmask, respirator, gas helmet', - 571: 'gas pump, gasoline pump, petrol pump, island dispenser', - 572: 'goblet', - 573: 'go-kart', - 574: 'golf ball', - 575: 'golfcart, golf cart', - 576: 'gondola', - 577: 'gong, tam-tam', - 578: 'gown', - 579: 'grand piano, grand', - 580: 'greenhouse, nursery, glasshouse', - 581: 'grille, radiator grille', - 582: 'grocery store, grocery, food market, market', - 583: 'guillotine', - 584: 'hair slide', - 585: 'hair spray', - 586: 'half track', - 587: 'hammer', - 588: 'hamper', - 589: 'hand blower, blow dryer, blow drier, hair dryer, hair drier', - 590: 'hand-held computer, hand-held microcomputer', - 591: 'handkerchief, hankie, hanky, hankey', - 592: 'hard disc, hard disk, fixed disk', - 593: 'harmonica, mouth organ, harp, mouth harp', - 594: 'harp', - 595: 'harvester, reaper', - 596: 'hatchet', - 597: 'holster', - 598: 'home theater, home theatre', - 599: 'honeycomb', - 600: 'hook, claw', - 601: 'hoopskirt, crinoline', - 602: 'horizontal bar, high bar', - 603: 'horse cart, horse-cart', - 604: 'hourglass', - 605: 'iPod', - 606: 'iron, smoothing iron', + 478: "carton", + 479: "car wheel", + 480: "cash machine, cash dispenser, automated teller " + "machine, automatic teller machine, automated teller, " + "automatic teller, ATM", + 481: "cassette", + 482: "cassette player", + 483: "castle", + 484: "catamaran", + 485: "CD player", + 486: "cello, violoncello", + 487: "cellular telephone, cellular phone, cellphone, cell, mobile phone", + 488: "chain", + 489: "chainlink fence", + 490: "chain mail, ring mail, mail, chain armor, chain " + "armour, ring armor, ring armour", + 491: "chain saw, chainsaw", + 492: "chest", + 493: "chiffonier, commode", + 494: "chime, bell, gong", + 495: "china cabinet, china closet", + 496: "Christmas stocking", + 497: "church, church building", + 498: "cinema, movie theater, movie theatre, movie house, picture palace", + 499: "cleaver, meat cleaver, chopper", + 500: "cliff dwelling", + 501: "cloak", + 502: "clog, geta, patten, sabot", + 503: "cocktail shaker", + 504: "coffee mug", + 505: "coffeepot", + 506: "coil, spiral, volute, whorl, helix", + 507: "combination lock", + 508: "computer keyboard, keypad", + 509: "confectionery, confectionary, candy store", + 510: "container ship, containership, container vessel", + 511: "convertible", + 512: "corkscrew, bottle screw", + 513: "cornet, horn, trumpet, trump", + 514: "cowboy boot", + 515: "cowboy hat, ten-gallon hat", + 516: "cradle", + 517: "crane", + 518: "crash helmet", + 519: "crate", + 520: "crib, cot", + 521: "Crock Pot", + 522: "croquet ball", + 523: "crutch", + 524: "cuirass", + 525: "dam, dike, dyke", + 526: "desk", + 527: "desktop computer", + 528: "dial telephone, dial phone", + 529: "diaper, nappy, napkin", + 530: "digital clock", + 531: "digital watch", + 532: "dining table, board", + 533: "dishrag, dishcloth", + 534: "dishwasher, dish washer, dishwashing machine", + 535: "disk brake, disc brake", + 536: "dock, dockage, docking facility", + 537: "dogsled, dog sled, dog sleigh", + 538: "dome", + 539: "doormat, welcome mat", + 540: "drilling platform, offshore rig", + 541: "drum, membranophone, tympan", + 542: "drumstick", + 543: "dumbbell", + 544: "Dutch oven", + 545: "electric fan, blower", + 546: "electric guitar", + 547: "electric locomotive", + 548: "entertainment center", + 549: "envelope", + 550: "espresso maker", + 551: "face powder", + 552: "feather boa, boa", + 553: "file, file cabinet, filing cabinet", + 554: "fireboat", + 555: "fire engine, fire truck", + 556: "fire screen, fireguard", + 557: "flagpole, flagstaff", + 558: "flute, transverse flute", + 559: "folding chair", + 560: "football helmet", + 561: "forklift", + 562: "fountain", + 563: "fountain pen", + 564: "four-poster", + 565: "freight car", + 566: "French horn, horn", + 567: "frying pan, frypan, skillet", + 568: "fur coat", + 569: "garbage truck, dustcart", + 570: "gasmask, respirator, gas helmet", + 571: "gas pump, gasoline pump, petrol pump, island dispenser", + 572: "goblet", + 573: "go-kart", + 574: "golf ball", + 575: "golfcart, golf cart", + 576: "gondola", + 577: "gong, tam-tam", + 578: "gown", + 579: "grand piano, grand", + 580: "greenhouse, nursery, glasshouse", + 581: "grille, radiator grille", + 582: "grocery store, grocery, food market, market", + 583: "guillotine", + 584: "hair slide", + 585: "hair spray", + 586: "half track", + 587: "hammer", + 588: "hamper", + 589: "hand blower, blow dryer, blow drier, hair dryer, hair drier", + 590: "hand-held computer, hand-held microcomputer", + 591: "handkerchief, hankie, hanky, hankey", + 592: "hard disc, hard disk, fixed disk", + 593: "harmonica, mouth organ, harp, mouth harp", + 594: "harp", + 595: "harvester, reaper", + 596: "hatchet", + 597: "holster", + 598: "home theater, home theatre", + 599: "honeycomb", + 600: "hook, claw", + 601: "hoopskirt, crinoline", + 602: "horizontal bar, high bar", + 603: "horse cart, horse-cart", + 604: "hourglass", + 605: "iPod", + 606: "iron, smoothing iron", 607: "jack-o'-lantern", - 608: 'jean, blue jean, denim', - 609: 'jeep, landrover', - 610: 'jersey, T-shirt, tee shirt', - 611: 'jigsaw puzzle', - 612: 'jinrikisha, ricksha, rickshaw', - 613: 'joystick', - 614: 'kimono', - 615: 'knee pad', - 616: 'knot', - 617: 'lab coat, laboratory coat', - 618: 'ladle', - 619: 'lampshade, lamp shade', - 620: 'laptop, laptop computer', - 621: 'lawn mower, mower', - 622: 'lens cap, lens cover', - 623: 'letter opener, paper knife, paperknife', - 624: 'library', - 625: 'lifeboat', - 626: 'lighter, light, igniter, ignitor', - 627: 'limousine, limo', - 628: 'liner, ocean liner', - 629: 'lipstick, lip rouge', - 630: 'Loafer', - 631: 'lotion', - 632: 'loudspeaker, speaker, speaker unit, loudspeaker ' - 'system, speaker system', + 608: "jean, blue jean, denim", + 609: "jeep, landrover", + 610: "jersey, T-shirt, tee shirt", + 611: "jigsaw puzzle", + 612: "jinrikisha, ricksha, rickshaw", + 613: "joystick", + 614: "kimono", + 615: "knee pad", + 616: "knot", + 617: "lab coat, laboratory coat", + 618: "ladle", + 619: "lampshade, lamp shade", + 620: "laptop, laptop computer", + 621: "lawn mower, mower", + 622: "lens cap, lens cover", + 623: "letter opener, paper knife, paperknife", + 624: "library", + 625: "lifeboat", + 626: "lighter, light, igniter, ignitor", + 627: "limousine, limo", + 628: "liner, ocean liner", + 629: "lipstick, lip rouge", + 630: "Loafer", + 631: "lotion", + 632: "loudspeaker, speaker, speaker unit, loudspeaker " "system, speaker system", 633: "loupe, jeweler's loupe", - 634: 'lumbermill, sawmill', - 635: 'magnetic compass', - 636: 'mailbag, postbag', - 637: 'mailbox, letter box', - 638: 'maillot', - 639: 'maillot, tank suit', - 640: 'manhole cover', - 641: 'maraca', - 642: 'marimba, xylophone', - 643: 'mask', - 644: 'matchstick', - 645: 'maypole', - 646: 'maze, labyrinth', - 647: 'measuring cup', - 648: 'medicine chest, medicine cabinet', - 649: 'megalith, megalithic structure', - 650: 'microphone, mike', - 651: 'microwave, microwave oven', - 652: 'military uniform', - 653: 'milk can', - 654: 'minibus', - 655: 'miniskirt, mini', - 656: 'minivan', - 657: 'missile', - 658: 'mitten', - 659: 'mixing bowl', - 660: 'mobile home, manufactured home', - 661: 'Model T', - 662: 'modem', - 663: 'monastery', - 664: 'monitor', - 665: 'moped', - 666: 'mortar', - 667: 'mortarboard', - 668: 'mosque', - 669: 'mosquito net', - 670: 'motor scooter, scooter', - 671: 'mountain bike, all-terrain bike, off-roader', - 672: 'mountain tent', - 673: 'mouse, computer mouse', - 674: 'mousetrap', - 675: 'moving van', - 676: 'muzzle', - 677: 'nail', - 678: 'neck brace', - 679: 'necklace', - 680: 'nipple', - 681: 'notebook, notebook computer', - 682: 'obelisk', - 683: 'oboe, hautboy, hautbois', - 684: 'ocarina, sweet potato', - 685: 'odometer, hodometer, mileometer, milometer', - 686: 'oil filter', - 687: 'organ, pipe organ', - 688: 'oscilloscope, scope, cathode-ray oscilloscope, CRO', - 689: 'overskirt', - 690: 'oxcart', - 691: 'oxygen mask', - 692: 'packet', - 693: 'paddle, boat paddle', - 694: 'paddlewheel, paddle wheel', - 695: 'padlock', - 696: 'paintbrush', + 634: "lumbermill, sawmill", + 635: "magnetic compass", + 636: "mailbag, postbag", + 637: "mailbox, letter box", + 638: "maillot", + 639: "maillot, tank suit", + 640: "manhole cover", + 641: "maraca", + 642: "marimba, xylophone", + 643: "mask", + 644: "matchstick", + 645: "maypole", + 646: "maze, labyrinth", + 647: "measuring cup", + 648: "medicine chest, medicine cabinet", + 649: "megalith, megalithic structure", + 650: "microphone, mike", + 651: "microwave, microwave oven", + 652: "military uniform", + 653: "milk can", + 654: "minibus", + 655: "miniskirt, mini", + 656: "minivan", + 657: "missile", + 658: "mitten", + 659: "mixing bowl", + 660: "mobile home, manufactured home", + 661: "Model T", + 662: "modem", + 663: "monastery", + 664: "monitor", + 665: "moped", + 666: "mortar", + 667: "mortarboard", + 668: "mosque", + 669: "mosquito net", + 670: "motor scooter, scooter", + 671: "mountain bike, all-terrain bike, off-roader", + 672: "mountain tent", + 673: "mouse, computer mouse", + 674: "mousetrap", + 675: "moving van", + 676: "muzzle", + 677: "nail", + 678: "neck brace", + 679: "necklace", + 680: "nipple", + 681: "notebook, notebook computer", + 682: "obelisk", + 683: "oboe, hautboy, hautbois", + 684: "ocarina, sweet potato", + 685: "odometer, hodometer, mileometer, milometer", + 686: "oil filter", + 687: "organ, pipe organ", + 688: "oscilloscope, scope, cathode-ray oscilloscope, CRO", + 689: "overskirt", + 690: "oxcart", + 691: "oxygen mask", + 692: "packet", + 693: "paddle, boat paddle", + 694: "paddlewheel, paddle wheel", + 695: "padlock", + 696: "paintbrush", 697: "pajama, pyjama, pj's, jammies", - 698: 'palace', - 699: 'panpipe, pandean pipe, syrinx', - 700: 'paper towel', - 701: 'parachute, chute', - 702: 'parallel bars, bars', - 703: 'park bench', - 704: 'parking meter', - 705: 'passenger car, coach, carriage', - 706: 'patio, terrace', - 707: 'pay-phone, pay-station', - 708: 'pedestal, plinth, footstall', - 709: 'pencil box, pencil case', - 710: 'pencil sharpener', - 711: 'perfume, essence', - 712: 'Petri dish', - 713: 'photocopier', - 714: 'pick, plectrum, plectron', - 715: 'pickelhaube', - 716: 'picket fence, paling', - 717: 'pickup, pickup truck', - 718: 'pier', - 719: 'piggy bank, penny bank', - 720: 'pill bottle', - 721: 'pillow', - 722: 'ping-pong ball', - 723: 'pinwheel', - 724: 'pirate, pirate ship', - 725: 'pitcher, ewer', + 698: "palace", + 699: "panpipe, pandean pipe, syrinx", + 700: "paper towel", + 701: "parachute, chute", + 702: "parallel bars, bars", + 703: "park bench", + 704: "parking meter", + 705: "passenger car, coach, carriage", + 706: "patio, terrace", + 707: "pay-phone, pay-station", + 708: "pedestal, plinth, footstall", + 709: "pencil box, pencil case", + 710: "pencil sharpener", + 711: "perfume, essence", + 712: "Petri dish", + 713: "photocopier", + 714: "pick, plectrum, plectron", + 715: "pickelhaube", + 716: "picket fence, paling", + 717: "pickup, pickup truck", + 718: "pier", + 719: "piggy bank, penny bank", + 720: "pill bottle", + 721: "pillow", + 722: "ping-pong ball", + 723: "pinwheel", + 724: "pirate, pirate ship", + 725: "pitcher, ewer", 726: "plane, carpenter's plane, woodworking plane", - 727: 'planetarium', - 728: 'plastic bag', - 729: 'plate rack', - 730: 'plow, plough', + 727: "planetarium", + 728: "plastic bag", + 729: "plate rack", + 730: "plow, plough", 731: "plunger, plumber's helper", - 732: 'Polaroid camera, Polaroid Land camera', - 733: 'pole', - 734: 'police van, police wagon, paddy wagon, patrol wagon, ' - 'wagon, black Maria', - 735: 'poncho', - 736: 'pool table, billiard table, snooker table', - 737: 'pop bottle, soda bottle', - 738: 'pot, flowerpot', + 732: "Polaroid camera, Polaroid Land camera", + 733: "pole", + 734: "police van, police wagon, paddy wagon, patrol wagon, " "wagon, black Maria", + 735: "poncho", + 736: "pool table, billiard table, snooker table", + 737: "pop bottle, soda bottle", + 738: "pot, flowerpot", 739: "potter's wheel", - 740: 'power drill', - 741: 'prayer rug, prayer mat', - 742: 'printer', - 743: 'prison, prison house', - 744: 'projectile, missile', - 745: 'projector', - 746: 'puck, hockey puck', - 747: 'punching bag, punch bag, punching ball, punchball', - 748: 'purse', - 749: 'quill, quill pen', - 750: 'quilt, comforter, comfort, puff', - 751: 'racer, race car, racing car', - 752: 'racket, racquet', - 753: 'radiator', - 754: 'radio, wireless', - 755: 'radio telescope, radio reflector', - 756: 'rain barrel', - 757: 'recreational vehicle, RV, R.V.', - 758: 'reel', - 759: 'reflex camera', - 760: 'refrigerator, icebox', - 761: 'remote control, remote', - 762: 'restaurant, eating house, eating place, eatery', - 763: 'revolver, six-gun, six-shooter', - 764: 'rifle', - 765: 'rocking chair, rocker', - 766: 'rotisserie', - 767: 'rubber eraser, rubber, pencil eraser', - 768: 'rugby ball', - 769: 'rule, ruler', - 770: 'running shoe', - 771: 'safe', - 772: 'safety pin', - 773: 'saltshaker, salt shaker', - 774: 'sandal', - 775: 'sarong', - 776: 'sax, saxophone', - 777: 'scabbard', - 778: 'scale, weighing machine', - 779: 'school bus', - 780: 'schooner', - 781: 'scoreboard', - 782: 'screen, CRT screen', - 783: 'screw', - 784: 'screwdriver', - 785: 'seat belt, seatbelt', - 786: 'sewing machine', - 787: 'shield, buckler', - 788: 'shoe shop, shoe-shop, shoe store', - 789: 'shoji', - 790: 'shopping basket', - 791: 'shopping cart', - 792: 'shovel', - 793: 'shower cap', - 794: 'shower curtain', - 795: 'ski', - 796: 'ski mask', - 797: 'sleeping bag', - 798: 'slide rule, slipstick', - 799: 'sliding door', - 800: 'slot, one-armed bandit', - 801: 'snorkel', - 802: 'snowmobile', - 803: 'snowplow, snowplough', - 804: 'soap dispenser', - 805: 'soccer ball', - 806: 'sock', - 807: 'solar dish, solar collector, solar furnace', - 808: 'sombrero', - 809: 'soup bowl', - 810: 'space bar', - 811: 'space heater', - 812: 'space shuttle', - 813: 'spatula', - 814: 'speedboat', + 740: "power drill", + 741: "prayer rug, prayer mat", + 742: "printer", + 743: "prison, prison house", + 744: "projectile, missile", + 745: "projector", + 746: "puck, hockey puck", + 747: "punching bag, punch bag, punching ball, punchball", + 748: "purse", + 749: "quill, quill pen", + 750: "quilt, comforter, comfort, puff", + 751: "racer, race car, racing car", + 752: "racket, racquet", + 753: "radiator", + 754: "radio, wireless", + 755: "radio telescope, radio reflector", + 756: "rain barrel", + 757: "recreational vehicle, RV, R.V.", + 758: "reel", + 759: "reflex camera", + 760: "refrigerator, icebox", + 761: "remote control, remote", + 762: "restaurant, eating house, eating place, eatery", + 763: "revolver, six-gun, six-shooter", + 764: "rifle", + 765: "rocking chair, rocker", + 766: "rotisserie", + 767: "rubber eraser, rubber, pencil eraser", + 768: "rugby ball", + 769: "rule, ruler", + 770: "running shoe", + 771: "safe", + 772: "safety pin", + 773: "saltshaker, salt shaker", + 774: "sandal", + 775: "sarong", + 776: "sax, saxophone", + 777: "scabbard", + 778: "scale, weighing machine", + 779: "school bus", + 780: "schooner", + 781: "scoreboard", + 782: "screen, CRT screen", + 783: "screw", + 784: "screwdriver", + 785: "seat belt, seatbelt", + 786: "sewing machine", + 787: "shield, buckler", + 788: "shoe shop, shoe-shop, shoe store", + 789: "shoji", + 790: "shopping basket", + 791: "shopping cart", + 792: "shovel", + 793: "shower cap", + 794: "shower curtain", + 795: "ski", + 796: "ski mask", + 797: "sleeping bag", + 798: "slide rule, slipstick", + 799: "sliding door", + 800: "slot, one-armed bandit", + 801: "snorkel", + 802: "snowmobile", + 803: "snowplow, snowplough", + 804: "soap dispenser", + 805: "soccer ball", + 806: "sock", + 807: "solar dish, solar collector, solar furnace", + 808: "sombrero", + 809: "soup bowl", + 810: "space bar", + 811: "space heater", + 812: "space shuttle", + 813: "spatula", + 814: "speedboat", 815: "spider web, spider's web", - 816: 'spindle', - 817: 'sports car, sport car', - 818: 'spotlight, spot', - 819: 'stage', - 820: 'steam locomotive', - 821: 'steel arch bridge', - 822: 'steel drum', - 823: 'stethoscope', - 824: 'stole', - 825: 'stone wall', - 826: 'stopwatch, stop watch', - 827: 'stove', - 828: 'strainer', - 829: 'streetcar, tram, tramcar, trolley, trolley car', - 830: 'stretcher', - 831: 'studio couch, day bed', - 832: 'stupa, tope', - 833: 'submarine, pigboat, sub, U-boat', - 834: 'suit, suit of clothes', - 835: 'sundial', - 836: 'sunglass', - 837: 'sunglasses, dark glasses, shades', - 838: 'sunscreen, sunblock, sun blocker', - 839: 'suspension bridge', - 840: 'swab, swob, mop', - 841: 'sweatshirt', - 842: 'swimming trunks, bathing trunks', - 843: 'swing', - 844: 'switch, electric switch, electrical switch', - 845: 'syringe', - 846: 'table lamp', - 847: 'tank, army tank, armored combat vehicle, armoured combat vehicle', - 848: 'tape player', - 849: 'teapot', - 850: 'teddy, teddy bear', - 851: 'television, television system', - 852: 'tennis ball', - 853: 'thatch, thatched roof', - 854: 'theater curtain, theatre curtain', - 855: 'thimble', - 856: 'thresher, thrasher, threshing machine', - 857: 'throne', - 858: 'tile roof', - 859: 'toaster', - 860: 'tobacco shop, tobacconist shop, tobacconist', - 861: 'toilet seat', - 862: 'torch', - 863: 'totem pole', - 864: 'tow truck, tow car, wrecker', - 865: 'toyshop', - 866: 'tractor', - 867: 'trailer truck, tractor trailer, trucking rig, rig, ' - 'articulated lorry, semi', - 868: 'tray', - 869: 'trench coat', - 870: 'tricycle, trike, velocipede', - 871: 'trimaran', - 872: 'tripod', - 873: 'triumphal arch', - 874: 'trolleybus, trolley coach, trackless trolley', - 875: 'trombone', - 876: 'tub, vat', - 877: 'turnstile', - 878: 'typewriter keyboard', - 879: 'umbrella', - 880: 'unicycle, monocycle', - 881: 'upright, upright piano', - 882: 'vacuum, vacuum cleaner', - 883: 'vase', - 884: 'vault', - 885: 'velvet', - 886: 'vending machine', - 887: 'vestment', - 888: 'viaduct', - 889: 'violin, fiddle', - 890: 'volleyball', - 891: 'waffle iron', - 892: 'wall clock', - 893: 'wallet, billfold, notecase, pocketbook', - 894: 'wardrobe, closet, press', - 895: 'warplane, military plane', - 896: 'washbasin, handbasin, washbowl, lavabo, wash-hand basin', - 897: 'washer, automatic washer, washing machine', - 898: 'water bottle', - 899: 'water jug', - 900: 'water tower', - 901: 'whiskey jug', - 902: 'whistle', - 903: 'wig', - 904: 'window screen', - 905: 'window shade', - 906: 'Windsor tie', - 907: 'wine bottle', - 908: 'wing', - 909: 'wok', - 910: 'wooden spoon', - 911: 'wool, woolen, woollen', - 912: 'worm fence, snake fence, snake-rail fence, Virginia fence', - 913: 'wreck', - 914: 'yawl', - 915: 'yurt', - 916: 'web site, website, internet site, site', - 917: 'comic book', - 918: 'crossword puzzle, crossword', - 919: 'street sign', - 920: 'traffic light, traffic signal, stoplight', - 921: 'book jacket, dust cover, dust jacket, dust wrapper', - 922: 'menu', - 923: 'plate', - 924: 'guacamole', - 925: 'consomme', - 926: 'hot pot, hotpot', - 927: 'trifle', - 928: 'ice cream, icecream', - 929: 'ice lolly, lolly, lollipop, popsicle', - 930: 'French loaf', - 931: 'bagel, beigel', - 932: 'pretzel', - 933: 'cheeseburger', - 934: 'hotdog, hot dog, red hot', - 935: 'mashed potato', - 936: 'head cabbage', - 937: 'broccoli', - 938: 'cauliflower', - 939: 'zucchini, courgette', - 940: 'spaghetti squash', - 941: 'acorn squash', - 942: 'butternut squash', - 943: 'cucumber, cuke', - 944: 'artichoke, globe artichoke', - 945: 'bell pepper', - 946: 'cardoon', - 947: 'mushroom', - 948: 'Granny Smith', - 949: 'strawberry', - 950: 'orange', - 951: 'lemon', - 952: 'fig', - 953: 'pineapple, ananas', - 954: 'banana', - 955: 'jackfruit, jak, jack', - 956: 'custard apple', - 957: 'pomegranate', - 958: 'hay', - 959: 'carbonara', - 960: 'chocolate sauce, chocolate syrup', - 961: 'dough', - 962: 'meat loaf, meatloaf', - 963: 'pizza, pizza pie', - 964: 'potpie', - 965: 'burrito', - 966: 'red wine', - 967: 'espresso', - 968: 'cup', - 969: 'eggnog', - 970: 'alp', - 971: 'bubble', - 972: 'cliff, drop, drop-off', - 973: 'coral reef', - 974: 'geyser', - 975: 'lakeside, lakeshore', - 976: 'promontory, headland, head, foreland', - 977: 'sandbar, sand bar', - 978: 'seashore, coast, seacoast, sea-coast', - 979: 'valley, vale', - 980: 'volcano', - 981: 'ballplayer, baseball player', - 982: 'groom, bridegroom', - 983: 'scuba diver', - 984: 'rapeseed', - 985: 'daisy', + 816: "spindle", + 817: "sports car, sport car", + 818: "spotlight, spot", + 819: "stage", + 820: "steam locomotive", + 821: "steel arch bridge", + 822: "steel drum", + 823: "stethoscope", + 824: "stole", + 825: "stone wall", + 826: "stopwatch, stop watch", + 827: "stove", + 828: "strainer", + 829: "streetcar, tram, tramcar, trolley, trolley car", + 830: "stretcher", + 831: "studio couch, day bed", + 832: "stupa, tope", + 833: "submarine, pigboat, sub, U-boat", + 834: "suit, suit of clothes", + 835: "sundial", + 836: "sunglass", + 837: "sunglasses, dark glasses, shades", + 838: "sunscreen, sunblock, sun blocker", + 839: "suspension bridge", + 840: "swab, swob, mop", + 841: "sweatshirt", + 842: "swimming trunks, bathing trunks", + 843: "swing", + 844: "switch, electric switch, electrical switch", + 845: "syringe", + 846: "table lamp", + 847: "tank, army tank, armored combat vehicle, armoured combat vehicle", + 848: "tape player", + 849: "teapot", + 850: "teddy, teddy bear", + 851: "television, television system", + 852: "tennis ball", + 853: "thatch, thatched roof", + 854: "theater curtain, theatre curtain", + 855: "thimble", + 856: "thresher, thrasher, threshing machine", + 857: "throne", + 858: "tile roof", + 859: "toaster", + 860: "tobacco shop, tobacconist shop, tobacconist", + 861: "toilet seat", + 862: "torch", + 863: "totem pole", + 864: "tow truck, tow car, wrecker", + 865: "toyshop", + 866: "tractor", + 867: "trailer truck, tractor trailer, trucking rig, rig, " + "articulated lorry, semi", + 868: "tray", + 869: "trench coat", + 870: "tricycle, trike, velocipede", + 871: "trimaran", + 872: "tripod", + 873: "triumphal arch", + 874: "trolleybus, trolley coach, trackless trolley", + 875: "trombone", + 876: "tub, vat", + 877: "turnstile", + 878: "typewriter keyboard", + 879: "umbrella", + 880: "unicycle, monocycle", + 881: "upright, upright piano", + 882: "vacuum, vacuum cleaner", + 883: "vase", + 884: "vault", + 885: "velvet", + 886: "vending machine", + 887: "vestment", + 888: "viaduct", + 889: "violin, fiddle", + 890: "volleyball", + 891: "waffle iron", + 892: "wall clock", + 893: "wallet, billfold, notecase, pocketbook", + 894: "wardrobe, closet, press", + 895: "warplane, military plane", + 896: "washbasin, handbasin, washbowl, lavabo, wash-hand basin", + 897: "washer, automatic washer, washing machine", + 898: "water bottle", + 899: "water jug", + 900: "water tower", + 901: "whiskey jug", + 902: "whistle", + 903: "wig", + 904: "window screen", + 905: "window shade", + 906: "Windsor tie", + 907: "wine bottle", + 908: "wing", + 909: "wok", + 910: "wooden spoon", + 911: "wool, woolen, woollen", + 912: "worm fence, snake fence, snake-rail fence, Virginia fence", + 913: "wreck", + 914: "yawl", + 915: "yurt", + 916: "web site, website, internet site, site", + 917: "comic book", + 918: "crossword puzzle, crossword", + 919: "street sign", + 920: "traffic light, traffic signal, stoplight", + 921: "book jacket, dust cover, dust jacket, dust wrapper", + 922: "menu", + 923: "plate", + 924: "guacamole", + 925: "consomme", + 926: "hot pot, hotpot", + 927: "trifle", + 928: "ice cream, icecream", + 929: "ice lolly, lolly, lollipop, popsicle", + 930: "French loaf", + 931: "bagel, beigel", + 932: "pretzel", + 933: "cheeseburger", + 934: "hotdog, hot dog, red hot", + 935: "mashed potato", + 936: "head cabbage", + 937: "broccoli", + 938: "cauliflower", + 939: "zucchini, courgette", + 940: "spaghetti squash", + 941: "acorn squash", + 942: "butternut squash", + 943: "cucumber, cuke", + 944: "artichoke, globe artichoke", + 945: "bell pepper", + 946: "cardoon", + 947: "mushroom", + 948: "Granny Smith", + 949: "strawberry", + 950: "orange", + 951: "lemon", + 952: "fig", + 953: "pineapple, ananas", + 954: "banana", + 955: "jackfruit, jak, jack", + 956: "custard apple", + 957: "pomegranate", + 958: "hay", + 959: "carbonara", + 960: "chocolate sauce, chocolate syrup", + 961: "dough", + 962: "meat loaf, meatloaf", + 963: "pizza, pizza pie", + 964: "potpie", + 965: "burrito", + 966: "red wine", + 967: "espresso", + 968: "cup", + 969: "eggnog", + 970: "alp", + 971: "bubble", + 972: "cliff, drop, drop-off", + 973: "coral reef", + 974: "geyser", + 975: "lakeside, lakeshore", + 976: "promontory, headland, head, foreland", + 977: "sandbar, sand bar", + 978: "seashore, coast, seacoast, sea-coast", + 979: "valley, vale", + 980: "volcano", + 981: "ballplayer, baseball player", + 982: "groom, bridegroom", + 983: "scuba diver", + 984: "rapeseed", + 985: "daisy", 986: "yellow lady's slipper, yellow lady-slipper, Cypripedium " - "calceolus, Cypripedium parviflorum", - 987: 'corn', - 988: 'acorn', - 989: 'hip, rose hip, rosehip', - 990: 'buckeye, horse chestnut, conker', - 991: 'coral fungus', - 992: 'agaric', - 993: 'gyromitra', - 994: 'stinkhorn, carrion fungus', - 995: 'earthstar', - 996: 'hen-of-the-woods, hen of the woods, Polyporus frondosus, ' - 'Grifola frondosa', - 997: 'bolete', - 998: 'ear, spike, capitulum', - 999: 'toilet tissue, toilet paper, bathroom tissue'} + "calceolus, Cypripedium parviflorum", + 987: "corn", + 988: "acorn", + 989: "hip, rose hip, rosehip", + 990: "buckeye, horse chestnut, conker", + 991: "coral fungus", + 992: "agaric", + 993: "gyromitra", + 994: "stinkhorn, carrion fungus", + 995: "earthstar", + 996: "hen-of-the-woods, hen of the woods, Polyporus frondosus, " "Grifola frondosa", + 997: "bolete", + 998: "ear, spike, capitulum", + 999: "toilet tissue, toilet paper, bathroom tissue", +} diff --git a/tests/benchmark.py b/tests/benchmark.py index 107288311..fc93dd6b2 100644 --- a/tests/benchmark.py +++ b/tests/benchmark.py @@ -70,8 +70,7 @@ def run_all_tests(folder=None, verbose=True): print(t.__class__.__name__) break except TypeError as e: - raise RuntimeError( - "Unable to run test '{}'.".format(ts)) from e + raise RuntimeError("Unable to run test '{}'.".format(ts)) from e runner.run(ts) from test_utils.tests_helper import make_report_backend diff --git a/tests/test_algebra_cascade.py b/tests/test_algebra_cascade.py index d0901c2ad..0ba83580b 100644 --- a/tests/test_algebra_cascade.py +++ b/tests/test_algebra_cascade.py @@ -4,9 +4,13 @@ import numpy as np from numpy.testing import assert_almost_equal from onnx.defs import onnx_opset_version + try: from onnxruntime.capi.onnxruntime_pybind11_state import ( - InvalidGraph, Fail, InvalidArgument) + InvalidGraph, + Fail, + InvalidArgument, + ) except ImportError: InvalidGraph = RuntimeError InvalidArgument = RuntimeError @@ -18,69 +22,72 @@ from skl2onnx import to_onnx, convert_sklearn from skl2onnx.proto import get_latest_tested_opset_version from test_utils import ( - fit_regression_model, TARGET_OPSET, - InferenceSessionEx as InferenceSession) + fit_regression_model, + TARGET_OPSET, + InferenceSessionEx as InferenceSession, +) class TestOnnxOperatorsCascade(unittest.TestCase): - @unittest.skipIf(TARGET_OPSET < 9, reason="not available") def test_cascade_add(self): - - def generate_onnx_graph(dim, nbnode, input_name='X1', opv=None): + def generate_onnx_graph(dim, nbnode, input_name="X1", opv=None): i1 = input_name for i in range(nbnode - 1): i2 = (np.ones((1, dim)) * nbnode * 10).astype(np.float32) node = OnnxAdd(i1, i2, op_version=opv) i1 = node i2 = (np.ones((1, dim)) * nbnode * 10).astype(np.float32) - node = OnnxAdd(i1, i2, output_names=['Y'], op_version=opv) - onx = node.to_onnx([(input_name, FloatTensorType((None, dim)))], - outputs=[('Y', FloatTensorType())], - target_opset=opv) + node = OnnxAdd(i1, i2, output_names=["Y"], op_version=opv) + onx = node.to_onnx( + [(input_name, FloatTensorType((None, dim)))], + outputs=[("Y", FloatTensorType())], + target_opset=opv, + ) return onx - exp = [np.array([[11., 11., 11., 11., 11.]]), - np.array([[42., 42., 42., 42., 42.]]), - np.array([[93., 93., 93., 93., 93.]]), - np.array([[100100., 100100., 100100., 100100., 100100.]])] - for opv in ({'': 10}, 9, 10, 11, 12, onnx_opset_version()): + exp = [ + np.array([[11.0, 11.0, 11.0, 11.0, 11.0]]), + np.array([[42.0, 42.0, 42.0, 42.0, 42.0]]), + np.array([[93.0, 93.0, 93.0, 93.0, 93.0]]), + np.array([[100100.0, 100100.0, 100100.0, 100100.0, 100100.0]]), + ] + for opv in ({"": 10}, 9, 10, 11, 12, onnx_opset_version()): if isinstance(opv, dict): - if opv[''] > get_latest_tested_opset_version(): + if opv[""] > get_latest_tested_opset_version(): continue elif opv is not None and opv > get_latest_tested_opset_version(): continue for i, nbnode in enumerate((1, 2, 3, 100)): with self.subTest(n_nodes=nbnode): onx = generate_onnx_graph(5, nbnode, opv=opv) - if opv == {'': 10}: + if opv == {"": 10}: for im in onx.opset_import: if im.version > 10: raise AssertionError( - "Wrong final opset\nopv={}\n{}".format( - opv, onx)) + "Wrong final opset\nopv={}\n{}".format(opv, onx) + ) else: for im in onx.opset_import: if im.version > opv: raise AssertionError( - "Wrong final opset\nopv={}\n{}".format( - opv, onx)) + "Wrong final opset\nopv={}\n{}".format(opv, onx) + ) as_string = onx.SerializeToString() try: ort = InferenceSession( - as_string, providers=["CPUExecutionProvider"]) + as_string, providers=["CPUExecutionProvider"] + ) except (InvalidGraph, InvalidArgument) as e: - if (isinstance(opv, dict) and - opv[''] >= onnx_opset_version()): + if isinstance(opv, dict) and opv[""] >= onnx_opset_version(): continue - if (isinstance(opv, int) and - opv >= onnx_opset_version()): + if isinstance(opv, int) and opv >= onnx_opset_version(): continue raise AssertionError( - "Unable to load opv={}\n---\n{}\n---".format( - opv, onx)) from e + "Unable to load opv={}\n---\n{}\n---".format(opv, onx) + ) from e X = (np.ones((1, 5)) * nbnode).astype(np.float32) - res_out = ort.run(None, {'X1': X}) + res_out = ort.run(None, {"X1": X}) assert len(res_out) == 1 res = res_out[0] assert_almost_equal(exp[i], res) @@ -89,37 +96,34 @@ def generate_onnx_graph(dim, nbnode, input_name='X1', opv=None): dim = 10 onx = generate_onnx_graph(dim, 300, opv=11) as_string = onx.SerializeToString() - ort = InferenceSession( - as_string, providers=["CPUExecutionProvider"]) + ort = InferenceSession(as_string, providers=["CPUExecutionProvider"]) X = (np.ones((1, dim)) * nbnode).astype(np.float32) - res_out = ort.run(None, {'X1': X}) + res_out = ort.run(None, {"X1": X}) assert len(res_out) == 1 res = res_out[0] assert res.shape[1] == dim @unittest.skipIf(TARGET_OPSET < 9, reason="not available") def test_cascade_scaler(self): - - def generate_onnx_graph(dim, nbnode, input_name='X1', opv=1): + def generate_onnx_graph(dim, nbnode, input_name="X1", opv=1): i1 = input_name scale = list(np.ones((1, dim)).ravel()) for i in range(nbnode - 1): - i2 = list(map(float, np.ones((1, dim)).astype( - np.float32).ravel())) + i2 = list(map(float, np.ones((1, dim)).astype(np.float32).ravel())) node = OnnxScaler(i1, offset=i2, scale=scale, op_version=opv) i1 = node i2 = list(map(float, np.ones((1, dim)).astype(np.float32).ravel())) - node = OnnxScaler(i1, offset=i2, scale=scale, output_names=['Y'], - op_version=opv) - onx = node.to_onnx([(input_name, FloatTensorType((None, dim)))], - outputs=[('Y', FloatTensorType((None, dim)))], - target_opset=TARGET_OPSET) + node = OnnxScaler( + i1, offset=i2, scale=scale, output_names=["Y"], op_version=opv + ) + onx = node.to_onnx( + [(input_name, FloatTensorType((None, dim)))], + outputs=[("Y", FloatTensorType((None, dim)))], + target_opset=TARGET_OPSET, + ) return onx - exp = [np.zeros((1, 5)), - np.zeros((1, 5)), - np.zeros((1, 5)), - np.zeros((1, 5))] + exp = [np.zeros((1, 5)), np.zeros((1, 5)), np.zeros((1, 5)), np.zeros((1, 5))] for opv in (1, 2, 3): if opv > get_latest_tested_opset_version(): continue @@ -128,17 +132,18 @@ def generate_onnx_graph(dim, nbnode, input_name='X1', opv=1): as_string = onx.SerializeToString() try: ort = InferenceSession( - as_string, providers=["CPUExecutionProvider"]) + as_string, providers=["CPUExecutionProvider"] + ) except InvalidGraph as e: - if opv in (3, ): + if opv in (3,): continue if opv >= onnx_opset_version(): continue raise AssertionError( - "Unable to load opv={}\n---\n{}\n---".format( - opv, onx)) from e + "Unable to load opv={}\n---\n{}\n---".format(opv, onx) + ) from e X = (np.ones((1, 5)) * nbnode).astype(np.float32) - res_out = ort.run(None, {'X1': X}) + res_out = ort.run(None, {"X1": X}) assert len(res_out) == 1 res = res_out[0] assert_almost_equal(exp[i], res) @@ -146,10 +151,9 @@ def generate_onnx_graph(dim, nbnode, input_name='X1', opv=1): dim = 10 onx = generate_onnx_graph(dim, 300) as_string = onx.SerializeToString() - ort = InferenceSession( - as_string, providers=["CPUExecutionProvider"]) + ort = InferenceSession(as_string, providers=["CPUExecutionProvider"]) X = (np.ones((1, dim)) * nbnode).astype(np.float32) - res_out = ort.run(None, {'X1': X}) + res_out = ort.run(None, {"X1": X}) assert len(res_out) == 1 res = res_out[0] assert res.shape[1] == dim @@ -168,49 +172,53 @@ def test_scaler_converted(self): try: onx = to_onnx(st, X.astype(np.float32), target_opset=opv) except RuntimeError as e: - if ("is higher than the number of the " - "installed onnx package") in str(e): + if ( + "is higher than the number of the " "installed onnx package" + ) in str(e): continue raise e as_string = onx.SerializeToString() try: ort = InferenceSession( - as_string, providers=["CPUExecutionProvider"]) + as_string, providers=["CPUExecutionProvider"] + ) except InvalidGraph as e: if opv > onnx_opset_version(): continue raise AssertionError( - "Unable to load opv={}\n---\n{}\n---".format( - opv, onx)) from e - res_out = ort.run(None, {'X': X.astype(np.float32)}) + "Unable to load opv={}\n---\n{}\n---".format(opv, onx) + ) from e + res_out = ort.run(None, {"X": X.astype(np.float32)}) assert len(res_out) == 1 res = res_out[0] assert_almost_equal(exp, res) for opv in [1, 2] + list(range(10, onnx_opset_version() + 1)): with self.subTest(opvml=opv): - onx = to_onnx(st, X.astype(np.float32), - target_opset={'ai.onnx.ml': opv, - '': TARGET_OPSET}) + onx = to_onnx( + st, + X.astype(np.float32), + target_opset={"ai.onnx.ml": opv, "": TARGET_OPSET}, + ) as_string = onx.SerializeToString() try: ort = InferenceSession( - as_string, providers=["CPUExecutionProvider"]) + as_string, providers=["CPUExecutionProvider"] + ) except InvalidGraph as e: if opv > onnx_opset_version(): continue raise AssertionError( - "Unable to load opv={}\n---\n{}\n---".format( - opv, onx)) from e - res_out = ort.run(None, {'X': X.astype(np.float32)}) + "Unable to load opv={}\n---\n{}\n---".format(opv, onx) + ) from e + res_out = ort.run(None, {"X": X.astype(np.float32)}) assert len(res_out) == 1 res = res_out[0] assert_almost_equal(exp, res) @unittest.skipIf(TARGET_OPSET < 9, reason="not available") def test_model_mlp_regressor_default(self): - model, X_test = fit_regression_model( - MLPRegressor(random_state=42)) + model, X_test = fit_regression_model(MLPRegressor(random_state=42)) exp = model.predict(X_test) for opv in (1, 2, 7, 8, 9, 10, 11, 12, 13, onnx_opset_version()): if opv is not None and opv > TARGET_OPSET: @@ -218,31 +226,36 @@ def test_model_mlp_regressor_default(self): with self.subTest(opv=opv): try: onx = convert_sklearn( - model, "scikit-learn MLPRegressor", + model, + "scikit-learn MLPRegressor", [("input", FloatTensorType([None, X_test.shape[1]]))], - target_opset=opv) + target_opset=opv, + ) except RuntimeError as e: - if ("is higher than the number of the " - "installed onnx package") in str(e): + if ( + "is higher than the number of the " "installed onnx package" + ) in str(e): continue raise e as_string = onx.SerializeToString() try: ort = InferenceSession( - as_string, providers=["CPUExecutionProvider"]) + as_string, providers=["CPUExecutionProvider"] + ) except (RuntimeError, InvalidGraph, Fail) as e: if opv in (None, 1, 2): continue if opv >= onnx_opset_version(): continue - if ("No suitable kernel definition found for " - "op Cast(9)") in str(e): + if ("No suitable kernel definition found for " "op Cast(9)") in str( + e + ): # too old onnxruntime continue raise AssertionError( - "Unable to load opv={}\n---\n{}\n---".format( - opv, onx)) from e - res_out = ort.run(None, {'input': X_test}) + "Unable to load opv={}\n---\n{}\n---".format(opv, onx) + ) from e + res_out = ort.run(None, {"input": X_test}) assert len(res_out) == 1 res = res_out[0] assert_almost_equal(exp.ravel(), res.ravel(), decimal=4) diff --git a/tests/test_algebra_complex.py b/tests/test_algebra_complex.py index 87da48521..330192b6f 100644 --- a/tests/test_algebra_complex.py +++ b/tests/test_algebra_complex.py @@ -2,51 +2,59 @@ import numpy as np from numpy.testing import assert_almost_equal from onnxruntime import InferenceSession + try: from onnxruntime.capi.onnxruntime_pybind11_state import ( - InvalidGraph, Fail, InvalidArgument) + InvalidGraph, + Fail, + InvalidArgument, + ) except ImportError: InvalidGraph = RuntimeError InvalidArgument = RuntimeError Fail = RuntimeError -from skl2onnx.common.data_types import ( - Complex64TensorType, Complex128TensorType) +from skl2onnx.common.data_types import Complex64TensorType, Complex128TensorType from skl2onnx.algebra.onnx_ops import OnnxAdd from test_utils import TARGET_OPSET class TestAlgebraComplex(unittest.TestCase): - - @unittest.skipIf(Complex64TensorType is None, - reason="not available") + @unittest.skipIf(Complex64TensorType is None, reason="not available") @unittest.skipIf(TARGET_OPSET < 13, reason="not implemented") def test_complex(self): - for dt, var, pr in ((np.complex64, Complex64TensorType, 14), - (np.complex128, Complex128TensorType, 15)): - X = np.array([[1 - 2j, -12j], - [-1 - 2j, 1 + 2j]]).astype(dt) + for dt, var, pr in ( + (np.complex64, Complex64TensorType, 14), + (np.complex128, Complex128TensorType, 15), + ): + X = np.array([[1 - 2j, -12j], [-1 - 2j, 1 + 2j]]).astype(dt) for opv in range(10, 20): if opv > TARGET_OPSET: continue with self.subTest(dt=dt, opset=opv): - out = OnnxAdd('X', np.array([1 + 2j], dtype=dt), - output_names=['Y'], op_version=opv) - onx = out.to_onnx([('X', var((None, 2)))], - outputs=[('Y', var())], - target_opset=opv) - self.assertIn('elem_type: %d' % pr, str(onx)) + out = OnnxAdd( + "X", + np.array([1 + 2j], dtype=dt), + output_names=["Y"], + op_version=opv, + ) + onx = out.to_onnx( + [("X", var((None, 2)))], + outputs=[("Y", var())], + target_opset=opv, + ) + self.assertIn("elem_type: %d" % pr, str(onx)) try: ort = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) except InvalidGraph as e: if "Type Error: Type 'tensor(complex" in str(e): continue raise e assert ort is not None - got = ort.run(None, {'X': X})[0] + got = ort.run(None, {"X": X})[0] assert_almost_equal(X + np.array([1 + 2j]), got) diff --git a/tests/test_algebra_converters.py b/tests/test_algebra_converters.py index ae1114c30..ed13b15c5 100644 --- a/tests/test_algebra_converters.py +++ b/tests/test_algebra_converters.py @@ -5,6 +5,7 @@ from numpy.testing import assert_almost_equal from sklearn.preprocessing import StandardScaler from skl2onnx.algebra.onnx_ops import OnnxMatMul, OnnxExp, OnnxAdd, OnnxDiv + try: from skl2onnx.algebra.sklearn_ops import OnnxSklearnStandardScaler from skl2onnx import wrap_as_onnx_mixin @@ -14,12 +15,12 @@ class TestAlgebraConverters(unittest.TestCase): - @unittest.skipIf(TARGET_OPSET < 9, reason="not available") - @unittest.skipIf(OnnxSklearnStandardScaler is None, - reason="Cannot infer operators with current ONNX") + @unittest.skipIf( + OnnxSklearnStandardScaler is None, + reason="Cannot infer operators with current ONNX", + ) def test_algebra_converter(self): - X = numpy.array([[1, 2], [2, 3]]) op = OnnxSklearnStandardScaler() op.fit(X) @@ -28,13 +29,13 @@ def test_algebra_converter(self): try: sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) except RuntimeError as e: raise RuntimeError("Unable to read\n{}".format(onx)) from e X = numpy.array([[0, 1], [-1, -2]]) try: - Y = sess.run(None, {'X': X.astype(numpy.float32)})[0] + Y = sess.run(None, {"X": X.astype(numpy.float32)})[0] except RuntimeError as e: raise RuntimeError("Unable to run\n{}".format(onx)) from e assert_almost_equal(Y, op.transform(X)) @@ -51,13 +52,13 @@ def test_algebra_converter(self): try: sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) except RuntimeError as e: raise RuntimeError("Unable to read\n{}".format(onx)) from e X = numpy.array([[0, 1], [-1, -2]]) try: - Y = sess.run(None, {'X': X.astype(numpy.float32)})[0] + Y = sess.run(None, {"X": X.astype(numpy.float32)})[0] except RuntimeError as e: raise RuntimeError("Unable to run\n{}".format(onx)) from e assert_almost_equal(Y, op.transform(X)) @@ -68,12 +69,12 @@ def test_algebra_to_onnx(self): beta = numpy.array([1, 2, 3, 4]) / 10 beta32 = beta.astype(numpy.float32) onnxExpM = OnnxExp( - OnnxMatMul('X', beta32, op_version=TARGET_OPSET), - op_version=TARGET_OPSET) + OnnxMatMul("X", beta32, op_version=TARGET_OPSET), op_version=TARGET_OPSET + ) cst = numpy.ones((1, 3), dtype=numpy.float32) onnxExpM1 = OnnxAdd(onnxExpM, cst, op_version=TARGET_OPSET) onnxPred = OnnxDiv(onnxExpM, onnxExpM1, op_version=TARGET_OPSET) - inputs = {'X': X[:1].astype(numpy.float32)} + inputs = {"X": X[:1].astype(numpy.float32)} model_onnx = onnxPred.to_onnx(inputs, target_opset=TARGET_OPSET) s1 = str(model_onnx) model_onnx = onnxPred.to_onnx(inputs, target_opset=TARGET_OPSET) @@ -88,14 +89,13 @@ def test_algebra_to_onnx(self): def test_add_12(self): idi = numpy.identity(2, dtype=numpy.float32) - onx = OnnxAdd('X', idi, output_names=['Y'], op_version=12) - model_def = onx.to_onnx({'X': idi.astype(numpy.float32)}, - target_opset=12) + onx = OnnxAdd("X", idi, output_names=["Y"], op_version=12) + model_def = onx.to_onnx({"X": idi.astype(numpy.float32)}, target_opset=12) X = numpy.array([[1, 2], [3, 4]], dtype=numpy.float32) sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': X}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": X}) exp = idi + X self.assertEqual(exp.shape, got[0].shape) self.assertEqual(list(exp.ravel()), list(got[0].ravel())) diff --git a/tests/test_algebra_custom_model.py b/tests/test_algebra_custom_model.py index fa5163195..c6f78f8ba 100644 --- a/tests/test_algebra_custom_model.py +++ b/tests/test_algebra_custom_model.py @@ -18,9 +18,7 @@ from test_utils import dump_data_and_model, TARGET_OPSET -class CustomOpTransformer(BaseEstimator, TransformerMixin, - OnnxOperatorMixin): - +class CustomOpTransformer(BaseEstimator, TransformerMixin, OnnxOperatorMixin): def __init__(self, op_version=None): BaseEstimator.__init__(self) TransformerMixin.__init__(self) @@ -35,23 +33,28 @@ def fit(self, X, y=None): def transform(self, X): return (X - self.W_) / self.S_ - def to_onnx_operator(self, inputs=None, outputs=None, - target_opset=None, **kwargs): + def to_onnx_operator(self, inputs=None, outputs=None, target_opset=None, **kwargs): if inputs is None: raise RuntimeError("inputs should contain one name") i0 = self.get_inputs(inputs, 0) W = self.W_.astype(np.float32) S = self.S_.astype(np.float32) # case if there are multiple output nodes - return OnnxDiv(OnnxSub(i0, W, op_version=self.op_version), S, - output_names=outputs, op_version=self.op_version) + return OnnxDiv( + OnnxSub(i0, W, op_version=self.op_version), + S, + output_names=outputs, + op_version=self.op_version, + ) class CustomOpTransformerShape(CustomOpTransformer): def onnx_shape_calculator(self): def shape_calculator(operator): operator.outputs[0].type = FloatTensorType( - shape=operator.inputs[0].type.shape) + shape=operator.inputs[0].type.shape + ) + return shape_calculator @@ -60,7 +63,6 @@ class CustomOpScaler(StandardScaler, OnnxOperatorMixin): class TestCustomModelAlgebra(unittest.TestCase): - def test_base_api(self): model = CustomOpScaler() data = [[0, 0, 3], [1, 1, 0], [0, 2, 1], [1, 0, 2]] @@ -73,7 +75,7 @@ def test_base_api(self): @unittest.skipIf(TARGET_OPSET < 12, reason="not available") def test_custom_scaler(self): - mat = np.array([[0., 1.], [0., 1.], [2., 2.]]) + mat = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]]) tr = CustomOpTransformerShape(op_version=TARGET_OPSET) tr.fit(mat) z = tr.transform(mat) @@ -83,15 +85,15 @@ def test_custom_scaler(self): model_onnx = tr.to_onnx(matf) onnx.checker.check_model(model_onnx) dump_data_and_model( - mat.astype(np.float32), tr, model_onnx, - basename="CustomTransformerAlgebra") + mat.astype(np.float32), tr, model_onnx, basename="CustomTransformerAlgebra" + ) @unittest.skipIf(TARGET_OPSET < 12, reason="not available") def test_custom_scaler_pipeline_right(self): pipe = make_pipeline( - StandardScaler(), - CustomOpTransformerShape(op_version=TARGET_OPSET)) - mat = np.array([[0., 1.], [0., 1.], [2., 2.]]) + StandardScaler(), CustomOpTransformerShape(op_version=TARGET_OPSET) + ) + mat = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]]) pipe.fit(mat) z = pipe.transform(mat) assert z is not None @@ -100,15 +102,18 @@ def test_custom_scaler_pipeline_right(self): model_onnx = to_onnx(pipe, matf, target_opset=TARGET_OPSET) onnx.checker.check_model(model_onnx) dump_data_and_model( - mat.astype(np.float32), pipe, model_onnx, - basename="CustomTransformerPipelineRightAlgebra") + mat.astype(np.float32), + pipe, + model_onnx, + basename="CustomTransformerPipelineRightAlgebra", + ) @unittest.skipIf(TARGET_OPSET < 8, reason="not available") def test_custom_scaler_pipeline_left(self): pipe = make_pipeline( - CustomOpTransformer(op_version=TARGET_OPSET), - StandardScaler()) - mat = np.array([[0., 1.], [0., 1.], [2., 2.]]) + CustomOpTransformer(op_version=TARGET_OPSET), StandardScaler() + ) + mat = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]]) pipe.fit(mat) z = pipe.transform(mat) @@ -120,9 +125,9 @@ def test_custom_scaler_pipeline_left(self): assert "inputs should contain one name" in str(e) pipe = make_pipeline( - CustomOpTransformerShape(op_version=TARGET_OPSET), - StandardScaler()) - mat = np.array([[0., 1.], [0., 1.], [2., 2.]]) + CustomOpTransformerShape(op_version=TARGET_OPSET), StandardScaler() + ) + mat = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]]) pipe.fit(mat) z = pipe.transform(mat) assert z is not None @@ -136,8 +141,11 @@ def test_custom_scaler_pipeline_left(self): onnx.checker.check_model(model_onnx) dump_data_and_model( - mat.astype(np.float32), pipe, model_onnx, - basename="CustomTransformerPipelineLeftAlgebra") + mat.astype(np.float32), + pipe, + model_onnx, + basename="CustomTransformerPipelineLeftAlgebra", + ) if __name__ == "__main__": diff --git a/tests/test_algebra_custom_model_sub_estimator.py b/tests/test_algebra_custom_model_sub_estimator.py index e304a8409..cea2fd1ae 100644 --- a/tests/test_algebra_custom_model_sub_estimator.py +++ b/tests/test_algebra_custom_model_sub_estimator.py @@ -7,6 +7,7 @@ import warnings import numpy as np from numpy.testing import assert_almost_equal + try: from onnxruntime.capi.onnxruntime_pybind11_state import InvalidArgument except ImportError: @@ -17,6 +18,7 @@ from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.svm import LinearSVC + try: # scikit-learn >= 0.22 from sklearn.utils._testing import ignore_warnings @@ -26,21 +28,19 @@ from skl2onnx.algebra.onnx_operator_mixin import OnnxOperatorMixin from skl2onnx import to_onnx, update_registered_converter from skl2onnx.common.data_types import FloatTensorType -from skl2onnx.common.shape_calculator import ( - calculate_linear_classifier_output_shapes) +from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes from skl2onnx.algebra.onnx_operator import OnnxSubEstimator from skl2onnx.algebra.onnx_ops import ( OnnxArgMax, OnnxConcat, OnnxIdentity, OnnxReshape, - OnnxSoftmax) + OnnxSoftmax, +) from test_utils import TARGET_OPSET, InferenceSessionEx as InferenceSession -class CustomOpTransformer1(BaseEstimator, TransformerMixin, - OnnxOperatorMixin): - +class CustomOpTransformer1(BaseEstimator, TransformerMixin, OnnxOperatorMixin): def __init__(self, op_version=None): BaseEstimator.__init__(self) TransformerMixin.__init__(self) @@ -54,26 +54,26 @@ def fit(self, X, y=None): def transform(self, X): return self.norm_.transform(X) - def to_onnx_operator(self, inputs=None, outputs=('Y', ), - target_opset=None, **kwargs): + def to_onnx_operator( + self, inputs=None, outputs=("Y",), target_opset=None, **kwargs + ): if inputs is None: raise RuntimeError("inputs should contain one name") opv = target_opset or self.op_version i0 = self.get_inputs(inputs, 0) out = OnnxSubEstimator(self.norm_, i0, op_version=opv) - return OnnxIdentity(out, op_version=self.op_version, - output_names=outputs) + return OnnxIdentity(out, op_version=self.op_version, output_names=outputs) def onnx_shape_calculator(self): def shape_calculator(operator): operator.outputs[0].type = FloatTensorType( - shape=operator.inputs[0].type.shape) - return shape_calculator + shape=operator.inputs[0].type.shape + ) + return shape_calculator -class CustomOpTransformer1w(BaseEstimator, TransformerMixin, - OnnxOperatorMixin): +class CustomOpTransformer1w(BaseEstimator, TransformerMixin, OnnxOperatorMixin): def __init__(self, op_version=None): BaseEstimator.__init__(self) TransformerMixin.__init__(self) @@ -87,25 +87,24 @@ def fit(self, X, y=None): def transform(self, X): return self.norm_.transform(X) - def to_onnx_operator(self, inputs=None, outputs=('Y', )): + def to_onnx_operator(self, inputs=None, outputs=("Y",)): if inputs is None: raise RuntimeError("inputs should contain one name") opv = self.op_version i0 = self.get_inputs(inputs, 0) out = OnnxSubEstimator(self.norm_, i0, op_version=opv) - return OnnxIdentity(out, op_version=self.op_version, - output_names=outputs) + return OnnxIdentity(out, op_version=self.op_version, output_names=outputs) def onnx_shape_calculator(self): def shape_calculator(operator): operator.outputs[0].type = FloatTensorType( - shape=operator.inputs[0].type.shape) - return shape_calculator + shape=operator.inputs[0].type.shape + ) + return shape_calculator -class CustomOpTransformer2(BaseEstimator, TransformerMixin, - OnnxOperatorMixin): +class CustomOpTransformer2(BaseEstimator, TransformerMixin, OnnxOperatorMixin): def __init__(self, op_version=None): BaseEstimator.__init__(self) TransformerMixin.__init__(self) @@ -119,26 +118,26 @@ def fit(self, X, y=None): def transform(self, X): return self.norm_.transform(X) - def to_onnx_operator(self, inputs=None, outputs=('Y', ), - target_opset=None, **kwargs): + def to_onnx_operator( + self, inputs=None, outputs=("Y",), target_opset=None, **kwargs + ): if inputs is None: raise RuntimeError("inputs should contain one name") opv = target_opset or self.op_version i0 = self.get_inputs(inputs, 0) - out = OnnxSubEstimator(self.norm_, i0, op_version=opv, - output_names=outputs) + out = OnnxSubEstimator(self.norm_, i0, op_version=opv, output_names=outputs) return out def onnx_shape_calculator(self): def shape_calculator(operator): operator.outputs[0].type = FloatTensorType( - shape=operator.inputs[0].type.shape) - return shape_calculator + shape=operator.inputs[0].type.shape + ) + return shape_calculator -class CustomOpTransformer3(BaseEstimator, TransformerMixin, - OnnxOperatorMixin): +class CustomOpTransformer3(BaseEstimator, TransformerMixin, OnnxOperatorMixin): def __init__(self, op_version=None): BaseEstimator.__init__(self) TransformerMixin.__init__(self) @@ -152,27 +151,28 @@ def fit(self, X, y=None): def transform(self, X): return self.norm_.predict_proba(X) - def to_onnx_operator(self, inputs=None, outputs=('Y', ), - target_opset=None, **kwargs): + def to_onnx_operator( + self, inputs=None, outputs=("Y",), target_opset=None, **kwargs + ): if inputs is None: raise RuntimeError("inputs should contain one name") opv = target_opset or self.op_version i0 = self.get_inputs(inputs, 0) - out = OnnxSubEstimator(self.norm_, i0, op_version=opv, - options={'zipmap': False}) - return OnnxIdentity( - out[1], output_names=outputs, op_version=self.op_version) + out = OnnxSubEstimator( + self.norm_, i0, op_version=opv, options={"zipmap": False} + ) + return OnnxIdentity(out[1], output_names=outputs, op_version=self.op_version) def onnx_shape_calculator(self): def shape_calculator(operator): operator.outputs[0].type = FloatTensorType( - shape=operator.inputs[0].type.shape) - return shape_calculator + shape=operator.inputs[0].type.shape + ) + return shape_calculator -class CustomOpTransformer4(BaseEstimator, TransformerMixin, - OnnxOperatorMixin): +class CustomOpTransformer4(BaseEstimator, TransformerMixin, OnnxOperatorMixin): def __init__(self, op_version=None): BaseEstimator.__init__(self) TransformerMixin.__init__(self) @@ -186,25 +186,26 @@ def fit(self, X, y=None): def transform(self, X): return self.norm_.predict_proba(X) - def to_onnx_operator(self, inputs=None, outputs=('Y', ), - target_opset=None, **kwargs): + def to_onnx_operator( + self, inputs=None, outputs=("Y",), target_opset=None, **kwargs + ): if inputs is None: raise RuntimeError("inputs should contain one name") opv = target_opset or self.op_version i0 = self.get_inputs(inputs, 0) out = OnnxSubEstimator(self.norm_, i0, op_version=opv) - return OnnxIdentity( - out[1], output_names=outputs, op_version=opv) + return OnnxIdentity(out[1], output_names=outputs, op_version=opv) def onnx_shape_calculator(self): def shape_calculator(operator): operator.outputs[0].type = FloatTensorType( - shape=operator.inputs[0].type.shape) + shape=operator.inputs[0].type.shape + ) + return shape_calculator class Custom2OpTransformer1(BaseEstimator, TransformerMixin): - def __init__(self): BaseEstimator.__init__(self) TransformerMixin.__init__(self) @@ -218,8 +219,7 @@ def transform(self, X): def custom_shape_calculator(operator): - operator.outputs[0].type = FloatTensorType( - shape=operator.inputs[0].type.shape) + operator.outputs[0].type = FloatTensorType(shape=operator.inputs[0].type.shape) def custom_transformer_converter1(scope, operator, container): @@ -228,8 +228,7 @@ def custom_transformer_converter1(scope, operator, container): op = operator.raw_operator opv = container.target_opset out = OnnxSubEstimator(op.norm_, i0, op_version=opv) - final = OnnxIdentity(out, op_version=opv, - output_names=outputs) + final = OnnxIdentity(out, op_version=opv, output_names=outputs) final.add_to(scope, container) @@ -243,8 +242,7 @@ def custom_transformer_converter1w(scope, operator, container): op = operator.raw_operator opv = container.target_opset out = OnnxSubEstimator(op.norm_, i0, op_version=opv) - final = OnnxIdentity(out, op_version=opv, - output_names=outputs) + final = OnnxIdentity(out, op_version=opv, output_names=outputs) final.add_to(scope, container) @@ -259,8 +257,7 @@ def custom_transformer_converter1ww(scope, operator, container): opv = container.target_opset idin = OnnxIdentity(i0, op_version=opv) out = OnnxSubEstimator(op.norm_, idin, op_version=opv) - final = OnnxIdentity(out, op_version=opv, - output_names=outputs) + final = OnnxIdentity(out, op_version=opv, output_names=outputs) final.add_to(scope, container) @@ -273,13 +270,11 @@ def custom_transformer_converter2(scope, operator, container): outputs = operator.outputs op = operator.raw_operator opv = container.target_opset - out = OnnxSubEstimator(op.norm_, i0, op_version=opv, - output_names=outputs) + out = OnnxSubEstimator(op.norm_, i0, op_version=opv, output_names=outputs) out.add_to(scope, container) class Custom2OpTransformer3(Custom2OpTransformer1): - def fit(self, X, y=None): self.norm_ = LogisticRegression().fit(X, y) return self @@ -293,10 +288,8 @@ def custom_transformer_converter3(scope, operator, container): outputs = operator.outputs op = operator.raw_operator opv = container.target_opset - out = OnnxSubEstimator(op.norm_, i0, op_version=opv, - options={'zipmap': False}) - final = OnnxIdentity( - out[1], output_names=outputs, op_version=opv) + out = OnnxSubEstimator(op.norm_, i0, op_version=opv, options={"zipmap": False}) + final = OnnxIdentity(out[1], output_names=outputs, op_version=opv) final.add_to(scope, container) @@ -310,13 +303,11 @@ def custom_transformer_converter4(scope, operator, container): op = operator.raw_operator opv = container.target_opset out = OnnxSubEstimator(op.norm_, i0, op_version=opv) - final = OnnxIdentity( - out[1], output_names=outputs, op_version=opv) + final = OnnxIdentity(out[1], output_names=outputs, op_version=opv) final.add_to(scope, container) class CustomOpClassifier(BaseEstimator, ClassifierMixin): - def __init__(self): BaseEstimator.__init__(self) ClassifierMixin.__init__(self) @@ -355,66 +346,69 @@ def custom_classifier_converter(scope, operator, container): y_list = [ OnnxReshape( OnnxSubEstimator(est, X, op_version=opv)[1], - np.array([-1, 1], dtype=np.int64), op_version=opv) - for est in op.estimators_] + np.array([-1, 1], dtype=np.int64), + op_version=opv, + ) + for est in op.estimators_ + ] y_matrix = OnnxConcat(*y_list, axis=1, op_version=opv) - probs = OnnxSoftmax(y_matrix, axis=1, op_version=opv, - output_names=[outputs[1]]) + probs = OnnxSoftmax(y_matrix, axis=1, op_version=opv, output_names=[outputs[1]]) probs.add_to(scope, container) - labels = OnnxArgMax(probs, axis=1, keepdims=0, op_version=opv, - output_names=[outputs[0]]) + labels = OnnxArgMax( + probs, axis=1, keepdims=0, op_version=opv, output_names=[outputs[0]] + ) labels.add_to(scope, container) class TestCustomModelAlgebraSubEstimator(unittest.TestCase): - def check_transform(self, obj, X): expected = obj.transform(X) onx = to_onnx(obj, X, target_opset=TARGET_OPSET) try: sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) except InvalidArgument as e: - raise AssertionError( - "Issue %r with\n%s" % (e, str(onx))) from e - got = sess.run(None, {'X': X})[0] + raise AssertionError("Issue %r with\n%s" % (e, str(onx))) from e + got = sess.run(None, {"X": X})[0] assert_almost_equal(expected, got, decimal=5) def check_classifier(self, obj, X): expected_labels = obj.predict(X) expected_probas = obj.predict_proba(X) - onx = to_onnx(obj, X, target_opset=TARGET_OPSET, - options={id(obj): {'zipmap': False}}) + onx = to_onnx( + obj, X, target_opset=TARGET_OPSET, options={id(obj): {"zipmap": False}} + ) try: sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) except InvalidArgument as e: - raise AssertionError( - "Issue %r with\n%s" % (e, str(onx))) from e - got = sess.run(None, {'X': X}) + raise AssertionError("Issue %r with\n%s" % (e, str(onx))) from e + got = sess.run(None, {"X": X}) assert_almost_equal(expected_probas, got[1], decimal=5) assert_almost_equal(expected_labels, got[0]) def test_custom_scaler_1(self): - X = np.array([[0., 1.], [0., 1.], [2., 2.]], dtype=np.float32) + X = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]], dtype=np.float32) tr = CustomOpTransformer1(op_version=TARGET_OPSET) tr.fit(X) self.check_transform(tr, X) def test_custom_scaler_1_classic(self): update_registered_converter( - Custom2OpTransformer1, 'Custom2OpTransformer1', + Custom2OpTransformer1, + "Custom2OpTransformer1", custom_shape_calculator, - custom_transformer_converter1) - X = np.array([[0., 1.], [0., 1.], [2., 2.]], dtype=np.float32) + custom_transformer_converter1, + ) + X = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]], dtype=np.float32) tr = Custom2OpTransformer1() tr.fit(X) self.check_transform(tr, X) def test_custom_scaler_1w(self): - X = np.array([[0., 1.], [0., 1.], [2., 2.]], dtype=np.float32) + X = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]], dtype=np.float32) tr = CustomOpTransformer1w(op_version=TARGET_OPSET) tr.fit(X) with warnings.catch_warnings(record=True) as w: @@ -426,42 +420,48 @@ def test_custom_scaler_1w(self): def test_custom_scaler_1w_classic(self): update_registered_converter( - Custom2OpTransformer1w, 'Custom2OpTransformer1w', + Custom2OpTransformer1w, + "Custom2OpTransformer1w", custom_shape_calculator, - custom_transformer_converter1w) - X = np.array([[0., 1.], [0., 1.], [2., 2.]], dtype=np.float32) + custom_transformer_converter1w, + ) + X = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]], dtype=np.float32) tr = Custom2OpTransformer1w() tr.fit(X) self.check_transform(tr, X) def test_custom_scaler_1ww_classic(self): update_registered_converter( - Custom2OpTransformer1ww, 'Custom2OpTransformer1ww', + Custom2OpTransformer1ww, + "Custom2OpTransformer1ww", custom_shape_calculator, - custom_transformer_converter1ww) - X = np.array([[0., 1.], [0., 1.], [2., 2.]], dtype=np.float32) + custom_transformer_converter1ww, + ) + X = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]], dtype=np.float32) tr = Custom2OpTransformer1ww() tr.fit(X) self.check_transform(tr, X) def test_custom_scaler_2(self): - X = np.array([[0., 1.], [0., 1.], [2., 2.]], dtype=np.float32) + X = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]], dtype=np.float32) tr = CustomOpTransformer2(op_version=TARGET_OPSET) tr.fit(X) self.check_transform(tr, X) def test_custom_scaler_2_classic(self): update_registered_converter( - Custom2OpTransformer2, 'Custom2OpTransformer2', + Custom2OpTransformer2, + "Custom2OpTransformer2", custom_shape_calculator, - custom_transformer_converter2) - X = np.array([[0., 1.], [0., 1.], [2., 2.]], dtype=np.float32) + custom_transformer_converter2, + ) + X = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]], dtype=np.float32) tr = Custom2OpTransformer2() tr.fit(X) self.check_transform(tr, X) def test_custom_scaler_3(self): - X = np.array([[0., 1.], [0., 1.], [2., 2.]], dtype=np.float32) + X = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]], dtype=np.float32) y = np.array([0, 0, 1], dtype=np.int64) tr = CustomOpTransformer3(op_version=TARGET_OPSET) tr.fit(X, y) @@ -469,17 +469,19 @@ def test_custom_scaler_3(self): def test_custom_scaler_3_classic(self): update_registered_converter( - Custom2OpTransformer3, 'Custom2OpTransformer3', + Custom2OpTransformer3, + "Custom2OpTransformer3", custom_shape_calculator, - custom_transformer_converter3) - X = np.array([[0., 1.], [0., 1.], [2., 2.]], dtype=np.float32) + custom_transformer_converter3, + ) + X = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]], dtype=np.float32) y = np.array([0, 0, 1], dtype=np.int64) tr = Custom2OpTransformer3() tr.fit(X, y) self.check_transform(tr, X) def test_custom_scaler_4(self): - X = np.array([[0., 1.], [0., 1.], [2., 2.]], dtype=np.float32) + X = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]], dtype=np.float32) y = np.array([0, 0, 1], dtype=np.int64) tr = CustomOpTransformer4(op_version=TARGET_OPSET) tr.fit(X, y) @@ -487,10 +489,12 @@ def test_custom_scaler_4(self): def test_custom_scaler_4_classic(self): update_registered_converter( - Custom2OpTransformer4, 'Custom2OpTransformer4', + Custom2OpTransformer4, + "Custom2OpTransformer4", custom_shape_calculator, - custom_transformer_converter4) - X = np.array([[0., 1.], [0., 1.], [2., 2.]], dtype=np.float32) + custom_transformer_converter4, + ) + X = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]], dtype=np.float32) tr = Custom2OpTransformer1() tr.fit(X) self.check_transform(tr, X) @@ -498,11 +502,12 @@ def test_custom_scaler_4_classic(self): @ignore_warnings(category=ConvergenceWarning) def test_custom_classifier(self): update_registered_converter( - CustomOpClassifier, 'CustomOpClassifier', + CustomOpClassifier, + "CustomOpClassifier", calculate_linear_classifier_output_shapes, custom_classifier_converter, - options={'zipmap': [False, True], - 'nocl': [False, True]}) + options={"zipmap": [False, True], "nocl": [False, True]}, + ) data = load_iris() X, y = data.data, data.target X = X.astype(np.float32) diff --git a/tests/test_algebra_deprecation.py b/tests/test_algebra_deprecation.py index daa8c4ff5..bf95741c4 100644 --- a/tests/test_algebra_deprecation.py +++ b/tests/test_algebra_deprecation.py @@ -15,8 +15,7 @@ class DecorrelateTransformer(TransformerMixin, BaseEstimator): - - def __init__(self, alpha=0.): + def __init__(self, alpha=0.0): BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.alpha = alpha @@ -54,13 +53,14 @@ def decorrelate_transformer_converter(scope, operator, container): class TestOnnxDeprecation(unittest.TestCase): - @classmethod def setUpClass(cls): update_registered_converter( - DecorrelateTransformer, "SklearnDecorrelateTransformer", + DecorrelateTransformer, + "SklearnDecorrelateTransformer", decorrelate_transformer_shape_calculator, - decorrelate_transformer_converter) + decorrelate_transformer_converter, + ) def test_decorrelate_transformer(self): data = load_iris() @@ -71,7 +71,7 @@ def test_decorrelate_transformer(self): pred = dec.transform(X) cov = pred.T @ pred for i in range(cov.shape[0]): - cov[i, i] = 1. + cov[i, i] = 1.0 assert_almost_equal(np.identity(4), cov) st = BytesIO() @@ -80,7 +80,6 @@ def test_decorrelate_transformer(self): assert_almost_equal(dec.transform(X), dec2.transform(X)) def test_sub_operator(self): - data = load_iris() X = data.data @@ -89,22 +88,23 @@ def test_sub_operator(self): with warnings.catch_warnings(record=True) as ws: warnings.simplefilter("always") - onx = to_onnx(dec, X.astype(np.float32), - target_opset=TARGET_OPSET) + onx = to_onnx(dec, X.astype(np.float32), target_opset=TARGET_OPSET) mes = None for w in ws: - if (w.category == DeprecationWarning and - 'numpy' not in str(w.message).lower()): + if ( + w.category == DeprecationWarning + and "numpy" not in str(w.message).lower() + ): mes = w.message self.assertTrue(mes is not None) - self.assertIn('will be removed', str(mes)) + self.assertIn("will be removed", str(mes)) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) exp = dec.transform(X.astype(np.float32)) - got = sess.run(None, {'X': X.astype(np.float32)})[0] + got = sess.run(None, {"X": X.astype(np.float32)})[0] def diff(p1, p2): p1 = p1.ravel() diff --git a/tests/test_algebra_double.py b/tests/test_algebra_double.py index e156bf28b..c02f1fff9 100644 --- a/tests/test_algebra_double.py +++ b/tests/test_algebra_double.py @@ -11,31 +11,29 @@ class TestAlgebraDouble(unittest.TestCase): - @unittest.skipIf(TARGET_OPSET < 10, reason="not available") - @unittest.skipIf(pv.Version(onnxruntime.__version__) - <= pv.Version("0.4.0"), - reason="Sub(7) not available") + @unittest.skipIf( + pv.Version(onnxruntime.__version__) <= pv.Version("0.4.0"), + reason="Sub(7) not available", + ) def test_algebra_converter(self): - coef = numpy.array([[1, 2], [3, 4]], dtype=numpy.float64) intercept = 1 X_test = numpy.array([[1, -2], [3, -4]], dtype=numpy.float64) onnx_fct = OnnxSub( - OnnxMatMul('X', coef, op_version=TARGET_OPSET), + OnnxMatMul("X", coef, op_version=TARGET_OPSET), numpy.array([intercept], dtype=numpy.float64), - output_names=['Y'], - op_version=TARGET_OPSET) - onnx_model = onnx_fct.to_onnx( - {'X': X_test}, target_opset=TARGET_OPSET) + output_names=["Y"], + op_version=TARGET_OPSET, + ) + onnx_model = onnx_fct.to_onnx({"X": X_test}, target_opset=TARGET_OPSET) sess = InferenceSession( - onnx_model.SerializeToString(), - providers=["CPUExecutionProvider"]) - ort_pred = sess.run(None, {'X': X_test})[0] - assert_almost_equal(ort_pred, - numpy.array([[-6., -7.], [-10., -11.]])) + onnx_model.SerializeToString(), providers=["CPUExecutionProvider"] + ) + ort_pred = sess.run(None, {"X": X_test})[0] + assert_almost_equal(ort_pred, numpy.array([[-6.0, -7.0], [-10.0, -11.0]])) if __name__ == "__main__": diff --git a/tests/test_algebra_onnx_doc.py b/tests/test_algebra_onnx_doc.py index be3e0a072..1a2d8b722 100644 --- a/tests/test_algebra_onnx_doc.py +++ b/tests/test_algebra_onnx_doc.py @@ -11,15 +11,15 @@ class TestAlgebraOnnxDoc(unittest.TestCase): - def setUp(self): self._algebra = dynamic_class_creation() def predict_with_onnxruntime(self, model_def, *inputs): import onnxruntime as ort + sess = ort.InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) names = [i.name for i in sess.get_inputs()] input = {name: input for name, input in zip(names, inputs)} res = sess.run(None, input) @@ -31,33 +31,36 @@ def test_transpose2(self): from skl2onnx.algebra.onnx_ops import OnnxTranspose node = OnnxTranspose( - OnnxTranspose( - 'X', perm=[1, 0, 2], - op_version=TARGET_OPSET), - perm=[1, 0, 2], output_names=['Y'], - op_version=TARGET_OPSET) + OnnxTranspose("X", perm=[1, 0, 2], op_version=TARGET_OPSET), + perm=[1, 0, 2], + output_names=["Y"], + op_version=TARGET_OPSET, + ) X = np.arange(2 * 3 * 4).reshape((2, 3, 4)).astype(np.float32) - model_def = node.to_onnx({'X': X}) + model_def = node.to_onnx({"X": X}) onnx.checker.check_model(model_def) res = self.predict_with_onnxruntime(model_def, X) - assert_almost_equal(res['Y'], X) + assert_almost_equal(res["Y"], X) - @unittest.skipIf(sys.platform.startswith("win"), - reason="onnx schema are incorrect on Windows") + @unittest.skipIf( + sys.platform.startswith("win"), reason="onnx schema are incorrect on Windows" + ) def test_doc_onnx(self): rst = get_rst_doc() assert "**Summary**" in rst - @unittest.skipIf(sys.platform.startswith("win"), - reason="onnx schema are incorrect on Windows") + @unittest.skipIf( + sys.platform.startswith("win"), reason="onnx schema are incorrect on Windows" + ) def test_doc_sklearn(self): try: rst = get_rst_doc_sklearn() assert ".. _l-sklops-OnnxSklearnBernoulliNB:" in rst except KeyError as e: - assert ("SklearnGaussianProcessRegressor" in str(e) or - "SklearnGaussianProcessClassifier" in str(e)) + assert "SklearnGaussianProcessRegressor" in str( + e + ) or "SklearnGaussianProcessClassifier" in str(e) if __name__ == "__main__": diff --git a/tests/test_algebra_onnx_operator_mixin_syntax.py b/tests/test_algebra_onnx_operator_mixin_syntax.py index 1a42348f5..f5d508a28 100644 --- a/tests/test_algebra_onnx_operator_mixin_syntax.py +++ b/tests/test_algebra_onnx_operator_mixin_syntax.py @@ -15,9 +15,7 @@ from test_utils import dump_data_and_model, TARGET_OPSET -class CustomOpTransformer(BaseEstimator, TransformerMixin, - OnnxOperatorMixin): - +class CustomOpTransformer(BaseEstimator, TransformerMixin, OnnxOperatorMixin): def __init__(self, op_version=TARGET_OPSET): BaseEstimator.__init__(self) TransformerMixin.__init__(self) @@ -34,61 +32,62 @@ def transform(self, X): def onnx_shape_calculator(self): def shape_calculator(operator): operator.outputs[0].type = operator.inputs[0].type + return shape_calculator - def to_onnx_operator(self, inputs=None, outputs=('Y', ), - target_opset=None, **kwargs): + def to_onnx_operator( + self, inputs=None, outputs=("Y",), target_opset=None, **kwargs + ): if inputs is None: raise RuntimeError("inputs should contain one name") i0 = self.get_inputs(inputs, 0) W = self.W_.astype(np.float32) S = self.S_.astype(np.float32) return OnnxDiv( - OnnxSub( - i0, W, op_version=self.op_version), - S, output_names=outputs, op_version=self.op_version) + OnnxSub(i0, W, op_version=self.op_version), + S, + output_names=outputs, + op_version=self.op_version, + ) class TestOnnxOperatorMixinSyntax(unittest.TestCase): - def test_way1_convert_sklearn(self): - X = np.arange(20).reshape(10, 2) tr = KMeans(n_clusters=2, n_init=10) tr.fit(X) onx = convert_sklearn( - tr, initial_types=[('X', FloatTensorType((None, X.shape[1])))], - target_opset=TARGET_OPSET) + tr, + initial_types=[("X", FloatTensorType((None, X.shape[1])))], + target_opset=TARGET_OPSET, + ) if TARGET_OPSET == 11: sonx = str(onx) if "version: 11" not in sonx or "ir_version: 6" not in sonx: - raise AssertionError("Issue with TARGET_OPSET: {}\n{}".format( - TARGET_OPSET, sonx)) + raise AssertionError( + "Issue with TARGET_OPSET: {}\n{}".format(TARGET_OPSET, sonx) + ) dump_data_and_model( - X.astype(np.float32), tr, onx, - basename="MixinWay1ConvertSklearn") + X.astype(np.float32), tr, onx, basename="MixinWay1ConvertSklearn" + ) def test_way2_to_onnx(self): - X = np.arange(20).reshape(10, 2) tr = KMeans(n_clusters=2, n_init=10) tr.fit(X) - onx = to_onnx(tr, X.astype(np.float32), - target_opset=TARGET_OPSET) + onx = to_onnx(tr, X.astype(np.float32), target_opset=TARGET_OPSET) if TARGET_OPSET == 11: sonx = str(onx) if "version: 11" not in sonx or "ir_version: 6" not in sonx: - raise AssertionError("Issue with TARGET_OPSET: {}\n{}".format( - TARGET_OPSET, sonx)) + raise AssertionError( + "Issue with TARGET_OPSET: {}\n{}".format(TARGET_OPSET, sonx) + ) - dump_data_and_model( - X.astype(np.float32), tr, onx, - basename="MixinWay2ToOnnx") + dump_data_and_model(X.astype(np.float32), tr, onx, basename="MixinWay2ToOnnx") def test_way3_mixin(self): - X = np.arange(20).reshape(10, 2) # avoids point of different cluster to be very close # and avoid a small discrepancy due to double/float @@ -100,8 +99,9 @@ def test_way3_mixin(self): try: tr_mixin = wrap_as_onnx_mixin(tr, target_opset=TARGET_OPSET) except KeyError as e: - assert ("SklearnGaussianProcessRegressor" in str(e) or - "SklearnGaussianProcessClassifier" in str(e)) + assert "SklearnGaussianProcessRegressor" in str( + e + ) or "SklearnGaussianProcessClassifier" in str(e) return try: @@ -111,18 +111,17 @@ def test_way3_mixin(self): onx = tr_mixin.to_onnx(X.astype(np.float32)) dump_data_and_model( - X.astype(np.float32), tr, onx, - basename="MixinWay3OnnxMixin") + X.astype(np.float32), tr, onx, basename="MixinWay3OnnxMixin" + ) def test_way4_mixin_fit(self): - X = np.arange(20).reshape(10, 2) try: - tr = wrap_as_onnx_mixin(KMeans(n_clusters=2), - target_opset=TARGET_OPSET) + tr = wrap_as_onnx_mixin(KMeans(n_clusters=2), target_opset=TARGET_OPSET) except KeyError as e: - assert ("SklearnGaussianProcessRegressor" in str(e) or - "SklearnGaussianProcessClassifier" in str(e)) + assert "SklearnGaussianProcessRegressor" in str( + e + ) or "SklearnGaussianProcessClassifier" in str(e) return tr.fit(X) @@ -130,65 +129,71 @@ def test_way4_mixin_fit(self): if TARGET_OPSET == 11: sonx = str(onx) if "version: 11" not in sonx or "ir_version: 6" not in sonx: - raise AssertionError("Issue with TARGET_OPSET: {}\n{}".format( - TARGET_OPSET, sonx)) + raise AssertionError( + "Issue with TARGET_OPSET: {}\n{}".format(TARGET_OPSET, sonx) + ) dump_data_and_model( - X.astype(np.float32), tr, onx, - basename="MixinWay4OnnxMixin2") + X.astype(np.float32), tr, onx, basename="MixinWay4OnnxMixin2" + ) def test_pipe_way1_convert_sklearn(self): - X = np.arange(20).reshape(10, 2) tr = make_pipeline( CustomOpTransformer(op_version=TARGET_OPSET), - KMeans(n_clusters=2, n_init=10)) + KMeans(n_clusters=2, n_init=10), + ) tr.fit(X) onx = convert_sklearn( - tr, initial_types=[('X', FloatTensorType((None, X.shape[1])))], - target_opset=TARGET_OPSET) + tr, + initial_types=[("X", FloatTensorType((None, X.shape[1])))], + target_opset=TARGET_OPSET, + ) if TARGET_OPSET == 11: sonx = str(onx) if "version: 11" not in sonx or "ir_version: 6" not in sonx: - raise AssertionError("Issue with TARGET_OPSET: {}\n{}".format( - TARGET_OPSET, sonx)) + raise AssertionError( + "Issue with TARGET_OPSET: {}\n{}".format(TARGET_OPSET, sonx) + ) dump_data_and_model( - X.astype(np.float32), tr, onx, - basename="MixinPipeWay1ConvertSklearn") + X.astype(np.float32), tr, onx, basename="MixinPipeWay1ConvertSklearn" + ) def test_pipe_way2_to_onnx(self): - X = np.arange(20).reshape(10, 2) tr = make_pipeline( CustomOpTransformer(op_version=TARGET_OPSET), - KMeans(n_clusters=2, n_init=10)) + KMeans(n_clusters=2, n_init=10), + ) tr.fit(X) onx = to_onnx(tr, X.astype(np.float32), target_opset=TARGET_OPSET) if TARGET_OPSET == 11: sonx = str(onx) if "version: 11" not in sonx or "ir_version: 6" not in sonx: - raise AssertionError("Issue with TARGET_OPSET: {}\n{}".format( - TARGET_OPSET, sonx)) + raise AssertionError( + "Issue with TARGET_OPSET: {}\n{}".format(TARGET_OPSET, sonx) + ) dump_data_and_model( - X.astype(np.float32), tr, onx, - basename="MixinPipeWay2ToOnnx") + X.astype(np.float32), tr, onx, basename="MixinPipeWay2ToOnnx" + ) def test_pipe_way3_mixin(self): - X = np.arange(20).reshape(10, 2) tr = make_pipeline( CustomOpTransformer(op_version=TARGET_OPSET), - KMeans(n_clusters=2, n_init=10)) + KMeans(n_clusters=2, n_init=10), + ) tr.fit(X) try: tr_mixin = wrap_as_onnx_mixin(tr, target_opset=TARGET_OPSET) except KeyError as e: - assert ("SklearnGaussianProcessRegressor" in str(e) or - "SklearnGaussianProcessClassifier" in str(e)) + assert "SklearnGaussianProcessRegressor" in str( + e + ) or "SklearnGaussianProcessClassifier" in str(e) return try: @@ -199,24 +204,25 @@ def test_pipe_way3_mixin(self): if TARGET_OPSET == 11: sonx = str(onx) if "version: 11" not in sonx or "ir_version: 6" not in sonx: - raise AssertionError("Issue with TARGET_OPSET: {}\n{}".format( - TARGET_OPSET, sonx)) + raise AssertionError( + "Issue with TARGET_OPSET: {}\n{}".format(TARGET_OPSET, sonx) + ) dump_data_and_model( - X.astype(np.float32), tr, onx, - basename="MixinPipeWay3OnnxMixin") + X.astype(np.float32), tr, onx, basename="MixinPipeWay3OnnxMixin" + ) def test_pipe_way4_mixin_fit(self): - X = np.arange(20).reshape(10, 2) try: tr = wrap_as_onnx_mixin( - make_pipeline(CustomOpTransformer(), - KMeans(n_clusters=2, n_init=10)), - target_opset=TARGET_OPSET) + make_pipeline(CustomOpTransformer(), KMeans(n_clusters=2, n_init=10)), + target_opset=TARGET_OPSET, + ) except KeyError as e: - assert ("SklearnGaussianProcessRegressor" in str(e) or - "SklearnGaussianProcessClassifier" in str(e)) + assert "SklearnGaussianProcessRegressor" in str( + e + ) or "SklearnGaussianProcessClassifier" in str(e) return tr.fit(X) @@ -225,38 +231,41 @@ def test_pipe_way4_mixin_fit(self): if TARGET_OPSET == 11: sonx = str(onx) if "version: 11" not in sonx or "ir_version: 6" not in sonx: - raise AssertionError("Issue with TARGET_OPSET: {}\n{}".format( - TARGET_OPSET, sonx)) + raise AssertionError( + "Issue with TARGET_OPSET: {}\n{}".format(TARGET_OPSET, sonx) + ) dump_data_and_model( - X.astype(np.float32), tr, onx, - basename="MixinPipeWay4OnnxMixin2") + X.astype(np.float32), tr, onx, basename="MixinPipeWay4OnnxMixin2" + ) - def common_test_onnxt_runtime_unary(self, onnx_cl, np_fct, - op_version=None, debug=False): - onx = onnx_cl('X', output_names=['Y']) + def common_test_onnxt_runtime_unary( + self, onnx_cl, np_fct, op_version=None, debug=False + ): + onx = onnx_cl("X", output_names=["Y"]) X = np.array([[1, 2], [3, -4]], dtype=np.float64) - model_def = onx.to_onnx( - {'X': X.astype(np.float32)}, target_opset=op_version) + model_def = onx.to_onnx({"X": X.astype(np.float32)}, target_opset=op_version) if debug: print(model_def) try: oinf = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) except RuntimeError as e: - if ("Could not find an implementation for the node " - "Cl_Clip:Clip(11)" in str(e)): + if ( + "Could not find an implementation for the node " + "Cl_Clip:Clip(11)" in str(e) + ): # Not yet implemented in onnxruntime return raise e X = X.astype(np.float32) try: - got = oinf.run(None, {'X': X})[0] + got = oinf.run(None, {"X": X})[0] except Exception as e: raise AssertionError( - "Cannot run model due to %r\n%r\n%s" % ( - e, onx, str(model_def))) from e + "Cannot run model due to %r\n%r\n%s" % (e, onx, str(model_def)) + ) from e assert_almost_equal(np_fct(X), got, decimal=6) @unittest.skipIf(onnx.defs.onnx_opset_version() < 10, "irrelevant") @@ -264,31 +273,35 @@ def test_onnx_clip_10(self): with self.subTest(name="OnnxClip_6[1e-5, 1e5]"): self.common_test_onnxt_runtime_unary( lambda x, output_names=None: OnnxClip_6( - x, min=1e-5, max=1e5, output_names=output_names), + x, min=1e-5, max=1e5, output_names=output_names + ), lambda x: np.clip(x, 1e-5, 1e5), - op_version=10) + op_version=10, + ) with self.subTest(name="OnnxClip-10[1e-5, 1e5]"): self.common_test_onnxt_runtime_unary( lambda x, output_names=None: OnnxClip( - x, min=1e-5, max=1e5, output_names=output_names, - op_version=10), + x, min=1e-5, max=1e5, output_names=output_names, op_version=10 + ), lambda x: np.clip(x, 1e-5, 1e5), - op_version=10) + op_version=10, + ) with self.subTest(name="OnnxClip-10[-1e5, 1e-5]"): self.common_test_onnxt_runtime_unary( lambda x, output_names=None: OnnxClip( - x, max=1e-5, output_names=output_names, - op_version=10), + x, max=1e-5, output_names=output_names, op_version=10 + ), lambda x: np.clip(x, -1e5, 1e-5), - op_version=10) + op_version=10, + ) with self.subTest(name="OnnxClip-10[0.1, 2.1]"): self.common_test_onnxt_runtime_unary( lambda x, output_names=None: OnnxClip( - x, min=0.1, max=2.1, - output_names=output_names, - op_version=10), + x, min=0.1, max=2.1, output_names=output_names, op_version=10 + ), lambda x: np.clip(x, 0.1, 2.1), - op_version=10) + op_version=10, + ) if __name__ == "__main__": diff --git a/tests/test_algebra_onnx_operators.py b/tests/test_algebra_onnx_operators.py index 990c498c3..ed52d72b8 100644 --- a/tests/test_algebra_onnx_operators.py +++ b/tests/test_algebra_onnx_operators.py @@ -6,35 +6,42 @@ import numpy as np from numpy.testing import assert_almost_equal import onnx -from onnx import ( - helper, TensorProto, load_model) +from onnx import helper, TensorProto, load_model from sklearn.base import BaseEstimator, TransformerMixin from sklearn.cluster import KMeans from sklearn.datasets import load_iris from sklearn.utils.extmath import row_norms from skl2onnx import convert_sklearn from skl2onnx.common._topology import Variable -from skl2onnx.common.data_types import ( - FloatTensorType, guess_numpy_type) +from skl2onnx.common.data_types import FloatTensorType, guess_numpy_type from skl2onnx.algebra.onnx_operator import OnnxOperator from skl2onnx.algebra.onnx_ops import ( - OnnxSub, OnnxDiv, OnnxReshapeApi13, - OnnxReduceSumSquareApi18, OnnxGemm, - OnnxAdd, OnnxArgMin, OnnxSqrt, - OnnxArrayFeatureExtractor, OnnxMul, - OnnxPad, OnnxBatchNormalization, - OnnxConstantOfShape, OnnxMatMul, OnnxSoftmax) + OnnxSub, + OnnxDiv, + OnnxReshapeApi13, + OnnxReduceSumSquareApi18, + OnnxGemm, + OnnxAdd, + OnnxArgMin, + OnnxSqrt, + OnnxArrayFeatureExtractor, + OnnxMul, + OnnxPad, + OnnxBatchNormalization, + OnnxConstantOfShape, + OnnxMatMul, + OnnxSoftmax, +) from test_utils import ( - dump_data_and_model, TARGET_OPSET, - InferenceSessionEx as InferenceSession) + dump_data_and_model, + TARGET_OPSET, + InferenceSessionEx as InferenceSession, +) class TestOnnxOperators(unittest.TestCase): - def test_sub(self): - class CustomOpTransformer(BaseEstimator, TransformerMixin): - def __init__(self, op_version=None): self.op_version = op_version @@ -45,7 +52,7 @@ def fit(self, X, y=None): def transform(self, X): return X - self.W - mat = np.array([[0., 1.], [1., 2.], [3., 4.]]) + mat = np.array([[0.0, 1.0], [1.0, 2.0], [3.0, 4.0]]) tr = CustomOpTransformer(op_version=None) tr.fit(mat) z = tr.transform(mat) @@ -54,21 +61,23 @@ def conv(scope, operator, container): dtype = guess_numpy_type(operator.inputs[0].type) W = operator.raw_operator.W.astype(dtype) op = OnnxSub( - operator.inputs[0], W, output_names=operator.outputs, - op_version=TARGET_OPSET) + operator.inputs[0], + W, + output_names=operator.outputs, + op_version=TARGET_OPSET, + ) op.add_to(scope, container) text = str(container) if 'name:"Su_Sub"' not in text: - raise AssertionError( - "Unnamed operator: '{}'".format(text)) + raise AssertionError("Unnamed operator: '{}'".format(text)) nin = list(op.enumerate_initial_types()) nno = list(op.enumerate_nodes()) nva = list(op.enumerate_variables()) self.assertEqual(len(nin), 1) - self.assertEqual(nin[0][0], 'input') + self.assertEqual(nin[0][0], "input") self.assertEqual(nin[0][1].shape, [None, 2]) self.assertEqual(len(nno), 1) - self.assertEqual(nno[0].output_names[0].onnx_name, 'variable') + self.assertEqual(nno[0].output_names[0].onnx_name, "variable") self.assertEqual(len(nva), 1) assert isinstance(nva[0], tuple) self.assertEqual(nva[0][1], 0) @@ -79,21 +88,22 @@ def shape(operator): operator.outputs[0].type.shape = [N, W.shape[0]] model_onnx = convert_sklearn( - tr, 'a-sub', [('input', FloatTensorType([None, 2]))], + tr, + "a-sub", + [("input", FloatTensorType([None, 2]))], custom_shape_calculators={CustomOpTransformer: shape}, custom_conversion_functions={CustomOpTransformer: conv}, - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - z2 = sess.run(None, {'input': mat.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + z2 = sess.run(None, {"input": mat.astype(np.float32)})[0] assert_almost_equal(z, z2) def test_sub_div(self): - class CustomOpTransformer(BaseEstimator, TransformerMixin): - def __init__(self): pass @@ -105,7 +115,7 @@ def fit(self, X, y=None): def transform(self, X): return (X - self.W) / self.S - mat = np.array([[0., 1.], [0., 1.], [2., 2.]]) + mat = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]]) tr = CustomOpTransformer() tr.fit(mat) z = tr.transform(mat) @@ -117,8 +127,10 @@ def conv(scope, operator, container): out = operator.outputs op = OnnxDiv( OnnxSub(X, W, op_version=container.target_opset), - S, output_names=out, - op_version=container.target_opset) + S, + output_names=out, + op_version=container.target_opset, + ) op.add_to(scope, container) def shape(operator): @@ -127,23 +139,26 @@ def shape(operator): operator.outputs[0].type.shape = [N, W.shape[0]] model_onnx = convert_sklearn( - tr, 'a-sub-div', [('input', FloatTensorType([None, 2]))], + tr, + "a-sub-div", + [("input", FloatTensorType([None, 2]))], custom_shape_calculators={CustomOpTransformer: shape}, custom_conversion_functions={CustomOpTransformer: conv}, - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) try: sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) except RuntimeError as e: raise AssertionError( - "Cannot load model\n---\n{}\n---".format(model_onnx)) from e - z2 = sess.run(None, {'input': mat.astype(np.float32)})[0] + "Cannot load model\n---\n{}\n---".format(model_onnx) + ) from e + z2 = sess.run(None, {"input": mat.astype(np.float32)})[0] assert_almost_equal(z, z2) def test_sub_kmeans(self): - def conv(scope, operator, container): X = operator.inputs[0] out = operator.outputs @@ -155,30 +170,35 @@ def conv(scope, operator, container): C = C.astype(dtype) rs = OnnxReduceSumSquareApi18( - X, axes=[1], keepdims=1, - op_version=container.target_opset) + X, axes=[1], keepdims=1, op_version=container.target_opset + ) N = X.type.shape[0] if isinstance(N, int): - zeros = np.zeros((N, )) + zeros = np.zeros((N,)) else: zeros = OnnxMul( - rs, np.array([0], dtype=np.float32), - op_version=container.target_opset) + rs, + np.array([0], dtype=np.float32), + op_version=container.target_opset, + ) z = OnnxAdd( rs, OnnxGemm( - X, C, zeros, alpha=-2., transB=1, - op_version=container.target_opset), - op_version=container.target_opset) + X, C, zeros, alpha=-2.0, transB=1, op_version=container.target_opset + ), + op_version=container.target_opset, + ) y2 = OnnxAdd(C2, z, op_version=container.target_opset) lo = OnnxArgMin( - y2, axis=1, keepdims=0, output_names=out[:1], - op_version=container.target_opset) - y2s = OnnxSqrt( - y2, output_names=out[1:], - op_version=container.target_opset) + y2, + axis=1, + keepdims=0, + output_names=out[:1], + op_version=container.target_opset, + ) + y2s = OnnxSqrt(y2, output_names=out[1:], op_version=container.target_opset) lo.add_to(scope, container) y2s.add_to(scope, container) @@ -188,13 +208,19 @@ def conv(scope, operator, container): model = KMeans(n_clusters=3) model.fit(X) model_onnx = convert_sklearn( - model, 'a-kmeans', - [('input', FloatTensorType([None, X.shape[1]]))], + model, + "a-kmeans", + [("input", FloatTensorType([None, X.shape[1]]))], custom_conversion_functions={KMeans: conv}, - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) - dump_data_and_model(X.astype(np.float32)[40:60], model, model_onnx, - basename="SklearnKMeansCustom-Dec4") + dump_data_and_model( + X.astype(np.float32)[40:60], + model, + model_onnx, + basename="SklearnKMeansCustom-Dec4", + ) def test_unscoped(self): var2 = OnnxOperator.UnscopedVariable("a") @@ -211,48 +237,56 @@ def test_constant_of_shape(self): for opset in range(20, 8, -1): if opset > TARGET_OPSET: continue - for value in [np.array([5], dtype=np.float32), - np.array(5, dtype=np.float32)]: + for value in [ + np.array([5], dtype=np.float32), + np.array(5, dtype=np.float32), + ]: with self.subTest(opset=opset, value=value): tensor_value = onnx.helper.make_tensor( - "value", onnx.TensorProto.FLOAT, - [1], [5]) + "value", onnx.TensorProto.FLOAT, [1], [5] + ) cst = OnnxConstantOfShape( - 'X', value=tensor_value, op_version=opset, - output_names=['Y']) + "X", value=tensor_value, op_version=opset, output_names=["Y"] + ) shape = np.array([3, 4], dtype=np.int64) onx = cst.to_onnx( - {'X': shape}, target_opset=opset, - outputs=[('Y', FloatTensorType())]) + {"X": shape}, + target_opset=opset, + outputs=[("Y", FloatTensorType())], + ) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': shape}) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": shape}) assert_almost_equal( - res[0], np.full(tuple(shape), 5, dtype=np.float32)) + res[0], np.full(tuple(shape), 5, dtype=np.float32) + ) cst = OnnxConstantOfShape( - 'X', value=value, op_version=opset, - output_names=['Y']) + "X", value=value, op_version=opset, output_names=["Y"] + ) shape = np.array([3, 4], dtype=np.int64) onx = cst.to_onnx( - {'X': shape}, target_opset=opset, - outputs=[('Y', FloatTensorType())]) + {"X": shape}, + target_opset=opset, + outputs=[("Y", FloatTensorType())], + ) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': shape}) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": shape}) assert_almost_equal( - res[0], np.full(tuple(shape), 5, dtype=np.float32)) + res[0], np.full(tuple(shape), 5, dtype=np.float32) + ) for opset in [TARGET_OPSET]: for value in [5, np.float32(5)]: with self.subTest(opset=opset, value=value): with self.assertRaises(TypeError): OnnxConstantOfShape( - 'X', value=value, op_version=opset, - output_names=['Y']) + "X", value=value, op_version=opset, output_names=["Y"] + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_onnx_reversed_order(self): @@ -260,16 +294,20 @@ def test_onnx_reversed_order(self): idi2 = np.identity(2) * 2 onx = OnnxAdd( - OnnxAdd('X', idi.astype(np.float32), op_version=TARGET_OPSET), - idi2.astype(np.float32), output_names=['Y'], - op_version=TARGET_OPSET) - model_def = onx.to_onnx({'X': idi.astype(np.float32)}) + OnnxAdd("X", idi.astype(np.float32), op_version=TARGET_OPSET), + idi2.astype(np.float32), + output_names=["Y"], + op_version=TARGET_OPSET, + ) + model_def = onx.to_onnx({"X": idi.astype(np.float32)}) self.assertEqual(len(model_def.graph.output), 1) onx = OnnxAdd( idi2.astype(np.float32), - OnnxAdd('X', idi.astype(np.float32), op_version=TARGET_OPSET), - output_names=['Y'], op_version=TARGET_OPSET) - model_def = onx.to_onnx({'X': idi.astype(np.float32)}) + OnnxAdd("X", idi.astype(np.float32), op_version=TARGET_OPSET), + output_names=["Y"], + op_version=TARGET_OPSET, + ) + model_def = onx.to_onnx({"X": idi.astype(np.float32)}) onnx2 = model_def.SerializeToString() self.assertIsInstance(onx.outputs, list) self.assertEqual(len(onx.outputs), 1) @@ -286,74 +324,79 @@ def test_onnx_reversed_order(self): assert reload is not None def test_onnx_reversed_order_second(self): - X = helper.make_tensor_value_info('X', TensorProto.FLOAT, [2, 2]) - Y = helper.make_tensor_value_info('Y', TensorProto.FLOAT, [2, 2]) + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 2]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 2]) nodes = [ - helper.make_node('Add', ['X', 'idi'], ['temp']), - helper.make_node('Add', ['temp', 'idi2'], ['Y']) + helper.make_node("Add", ["X", "idi"], ["temp"]), + helper.make_node("Add", ["temp", "idi2"], ["Y"]), ] - graph_def = helper.make_graph(nodes, 't1', [X], [Y]) - model_def = helper.make_model(graph_def, producer_name='A') + graph_def = helper.make_graph(nodes, "t1", [X], [Y]) + model_def = helper.make_model(graph_def, producer_name="A") self.assertEqual(len(model_def.graph.output), 1) nodes = [ - helper.make_node('Add', ['X', 'idi'], ['temp']), - helper.make_node('Add', ['idi2', 'temp'], ['Y']) + helper.make_node("Add", ["X", "idi"], ["temp"]), + helper.make_node("Add", ["idi2", "temp"], ["Y"]), ] - graph_def = helper.make_graph(nodes, 't1', [X], [Y]) - model_def = helper.make_model(graph_def, producer_name='A') + graph_def = helper.make_graph(nodes, "t1", [X], [Y]) + model_def = helper.make_model(graph_def, producer_name="A") self.assertEqual(len(model_def.graph.output), 1) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_onnxt_array_feature_extractor(self): onx = OnnxArrayFeatureExtractor( - 'X', np.array([1], dtype=np.int64), - output_names=['Y'], op_version=1) + "X", np.array([1], dtype=np.int64), output_names=["Y"], op_version=1 + ) X = np.array([[1, 2], [3, 4]], dtype=np.float32) - model_def = onx.to_onnx({'X': X}, - outputs=[('Y', FloatTensorType([2]))], - target_opset=TARGET_OPSET) + model_def = onx.to_onnx( + {"X": X}, outputs=[("Y", FloatTensorType([2]))], target_opset=TARGET_OPSET + ) sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': X})[0] + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": X})[0] self.assertEqual(got.shape, (2, 1)) assert_almost_equal(X[:, 1:2], got) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_container_init(self): onx = OnnxReshapeApi13( - OnnxReshapeApi13('X', np.array([1, -1], dtype=np.int64), - op_version=TARGET_OPSET), + OnnxReshapeApi13( + "X", np.array([1, -1], dtype=np.int64), op_version=TARGET_OPSET + ), np.array([1, -1], dtype=np.int64), - output_names=['Y'], op_version=TARGET_OPSET) + output_names=["Y"], + op_version=TARGET_OPSET, + ) X = np.array([[1, 2], [3, 4]], dtype=np.float32) - model_def = onx.to_onnx({'X': X}, - outputs=[('Y', FloatTensorType([None, 2]))], - target_opset=TARGET_OPSET) + model_def = onx.to_onnx( + {"X": X}, + outputs=[("Y", FloatTensorType([None, 2]))], + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': X})[0] + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": X})[0] assert_almost_equal(X.reshape((1, -1)), got) - inits = [row for row in str(model_def).split('\n') - if row.startswith(" initializer {")] + inits = [ + row + for row in str(model_def).split("\n") + if row.startswith(" initializer {") + ] self.assertEqual(len(inits), 1) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_default(self): - pad = OnnxPad(mode='constant', value=1.5, - pads=[0, 1, 0, 1], op_version=10) + pad = OnnxPad(mode="constant", value=1.5, pads=[0, 1, 0, 1], op_version=10) - X = helper.make_tensor_value_info( - 'X', onnx.TensorProto.FLOAT, [None, 2]) + X = helper.make_tensor_value_info("X", onnx.TensorProto.FLOAT, [None, 2]) model_def = pad.to_onnx({pad.inputs[0].name: X}, target_opset=10) onnx.checker.check_model(model_def) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_batch_normalization(self): - def _batchnorm_test_mode(x, s, bias, mean, var, epsilon=1e-5): dims_x = len(x.shape) dim_ones = (1,) * (dims_x - 2) @@ -372,14 +415,13 @@ def _batchnorm_test_mode(x, s, bias, mean, var, epsilon=1e-5): y = _batchnorm_test_mode(x, s, bias, mean, var).astype(np.float32) onx = OnnxBatchNormalization( - 'X', s, bias, mean, var, output_names=['Y'], - op_version=TARGET_OPSET) - model_def = onx.to_onnx({'X': x.astype(np.float32)}, - target_opset=TARGET_OPSET) + "X", s, bias, mean, var, output_names=["Y"], op_version=TARGET_OPSET + ) + model_def = onx.to_onnx({"X": x.astype(np.float32)}, target_opset=TARGET_OPSET) oinf = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = oinf.run(None, {'X': x}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = oinf.run(None, {"X": x}) assert_almost_equal(y, got[0], decimal=5) # input size: (2, 3, 4, 5) @@ -389,106 +431,112 @@ def _batchnorm_test_mode(x, s, bias, mean, var, epsilon=1e-5): mean = np.random.randn(3).astype(np.float32) var = np.random.rand(3).astype(np.float32) epsilon = 1e-2 - y = _batchnorm_test_mode( - x, s, bias, mean, var, epsilon).astype(np.float32) + y = _batchnorm_test_mode(x, s, bias, mean, var, epsilon).astype(np.float32) onx = OnnxBatchNormalization( - 'X', s, bias, mean, var, - output_names=['Y'], epsilon=epsilon, - op_version=TARGET_OPSET) - model_def = onx.to_onnx({'X': x.astype(np.float32)}, - target_opset=TARGET_OPSET) + "X", + s, + bias, + mean, + var, + output_names=["Y"], + epsilon=epsilon, + op_version=TARGET_OPSET, + ) + model_def = onx.to_onnx({"X": x.astype(np.float32)}, target_opset=TARGET_OPSET) oinf = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = oinf.run(None, {'X': x}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = oinf.run(None, {"X": x}) assert_almost_equal(y, got[0], decimal=5) @unittest.skipIf(TARGET_OPSET < 11, reason="not available") def test_onnxt_runtime_pad(self): - data = np.array([[1.0, 1.2], [2.3, 3.4], [4.5, 5.7]], - dtype=np.float32) + data = np.array([[1.0, 1.2], [2.3, 3.4], [4.5, 5.7]], dtype=np.float32) pads = np.array([0, 2, 0, 0], dtype=np.int64) constant_value = np.array([0.0], dtype=np.float32) - exp = np.array([[0.0, 0.0, 1.0, 1.2], - [0.0, 0.0, 2.3, 3.4], - [0.0, 0.0, 4.5, 5.7]], dtype=np.float32) + exp = np.array( + [[0.0, 0.0, 1.0, 1.2], [0.0, 0.0, 2.3, 3.4], [0.0, 0.0, 4.5, 5.7]], + dtype=np.float32, + ) onx = OnnxPad( - 'data', 'pads', constant_value, output_names=['Y'], - op_version=TARGET_OPSET) - model_def = onx.to_onnx({'data': data, 'pads': pads}, - target_opset=TARGET_OPSET) + "data", "pads", constant_value, output_names=["Y"], op_version=TARGET_OPSET + ) + model_def = onx.to_onnx({"data": data, "pads": pads}, target_opset=TARGET_OPSET) oinf = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = oinf.run(None, {'data': data, 'pads': pads}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = oinf.run(None, {"data": data, "pads": pads}) assert_almost_equal(exp, got[0]) - data = np.array([[1.0, 1.2], [2.3, 3.4], [4.5, 5.7]], - dtype=np.float32) + data = np.array([[1.0, 1.2], [2.3, 3.4], [4.5, 5.7]], dtype=np.float32) pads = np.array([0, 2, 0, 0], dtype=np.int64) constant_value = np.array([0.0], dtype=np.float32) - exp = np.array([[0, 1.2, 1.0, 1.2], - [0, 3.4, 2.3, 3.4], - [0, 5.7, 4.5, 5.7]], dtype=np.float32) + exp = np.array( + [[0, 1.2, 1.0, 1.2], [0, 3.4, 2.3, 3.4], [0, 5.7, 4.5, 5.7]], + dtype=np.float32, + ) onx = OnnxPad( - 'data', 'pads', output_names=['Y'], - mode='reflect', op_version=TARGET_OPSET) - model_def = onx.to_onnx({'data': data, 'pads': pads}, - target_opset=TARGET_OPSET) + "data", "pads", output_names=["Y"], mode="reflect", op_version=TARGET_OPSET + ) + model_def = onx.to_onnx({"data": data, "pads": pads}, target_opset=TARGET_OPSET) oinf = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = oinf.run(None, {'data': data, 'pads': pads}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = oinf.run(None, {"data": data, "pads": pads}) try: assert_almost_equal(exp, got[0]) except AssertionError as e: warnings.warn(e) - data = np.array([[1.0, 1.2], [2.3, 3.4], [4.5, 5.7]], - dtype=np.float32) + data = np.array([[1.0, 1.2], [2.3, 3.4], [4.5, 5.7]], dtype=np.float32) pads = np.array([0, 2, 0, 0], dtype=np.int64) constant_value = np.array([0.0], dtype=np.float32) - exp = np.array([[1.0, 1.0, 1.0, 1.2], - [2.3, 2.3, 2.3, 3.4], - [4.5, 4.5, 4.5, 5.7]], dtype=np.float32) + exp = np.array( + [[1.0, 1.0, 1.0, 1.2], [2.3, 2.3, 2.3, 3.4], [4.5, 4.5, 4.5, 5.7]], + dtype=np.float32, + ) onx = OnnxPad( - 'data', 'pads', output_names=['Y'], - mode='edge', op_version=TARGET_OPSET) - model_def = onx.to_onnx({'data': data, 'pads': pads}, - target_opset=TARGET_OPSET) + "data", "pads", output_names=["Y"], mode="edge", op_version=TARGET_OPSET + ) + model_def = onx.to_onnx({"data": data, "pads": pads}, target_opset=TARGET_OPSET) oinf = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = oinf.run(None, {'data': data, 'pads': pads}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = oinf.run(None, {"data": data, "pads": pads}) assert_almost_equal(exp, got[0]) def test_softmax(self): X = np.random.randn(100, 4).astype(np.float32) y = X.sum(axis=1) + np.random.randn(100) / 10 y = y.astype(np.float32) - self.assertEqual(y.shape, (100, )) + self.assertEqual(y.shape, (100,)) weight = np.random.randn(4, 1).astype(np.float32) intercept = np.random.randn(1).astype(np.float32) node = OnnxAdd( - OnnxMatMul('X', weight, op_version=TARGET_OPSET), - intercept, op_version=TARGET_OPSET) - nn_onnx = node.to_onnx({'X': X}, target_opset=TARGET_OPSET) + OnnxMatMul("X", weight, op_version=TARGET_OPSET), + intercept, + op_version=TARGET_OPSET, + ) + nn_onnx = node.to_onnx({"X": X}, target_opset=TARGET_OPSET) with open("debug_ort_add.onnx", "wb") as f: f.write(nn_onnx.SerializeToString()) self.assertEqual(len(nn_onnx.graph.output), 1) - node = OnnxMatMul('X', weight, op_version=TARGET_OPSET) - nn_onnx = node.to_onnx({'X': X}, target_opset=TARGET_OPSET) + node = OnnxMatMul("X", weight, op_version=TARGET_OPSET) + nn_onnx = node.to_onnx({"X": X}, target_opset=TARGET_OPSET) self.assertEqual(len(nn_onnx.graph.output), 1) node = OnnxSoftmax( OnnxAdd( - OnnxMatMul('X', weight, op_version=TARGET_OPSET), - intercept, op_version=TARGET_OPSET), - op_version=TARGET_OPSET) - nn_onnx = node.to_onnx({'X': X}, target_opset=TARGET_OPSET) + OnnxMatMul("X", weight, op_version=TARGET_OPSET), + intercept, + op_version=TARGET_OPSET, + ), + op_version=TARGET_OPSET, + ) + nn_onnx = node.to_onnx({"X": X}, target_opset=TARGET_OPSET) self.assertEqual(len(nn_onnx.graph.output), 1) diff --git a/tests/test_algebra_onnx_operators_if.py b/tests/test_algebra_onnx_operators_if.py index 97e8a14e6..4fd34b1b6 100644 --- a/tests/test_algebra_onnx_operators_if.py +++ b/tests/test_algebra_onnx_operators_if.py @@ -8,6 +8,7 @@ import onnx.helper from onnx import TensorProto from onnxruntime import __version__ as ort_version + try: # scikit-learn >= 0.22 from sklearn.utils._testing import ignore_warnings @@ -16,319 +17,381 @@ from sklearn.utils.testing import ignore_warnings from skl2onnx.common.data_types import FloatTensorType from skl2onnx.algebra.onnx_ops import ( - OnnxAdd, OnnxSub, OnnxIf, OnnxGreater, - OnnxReduceSum, OnnxMul, OnnxReduceMin) -from test_utils import ( - TARGET_OPSET, TARGET_IR, - InferenceSessionEx as InferenceSession) + OnnxAdd, + OnnxSub, + OnnxIf, + OnnxGreater, + OnnxReduceSum, + OnnxMul, + OnnxReduceMin, +) +from test_utils import TARGET_OPSET, TARGET_IR, InferenceSessionEx as InferenceSession -ort_version = ".".join(ort_version.split('.')[:2]) +ort_version = ".".join(ort_version.split(".")[:2]) class TestOnnxOperatorsIf(unittest.TestCase): - @ignore_warnings(category=DeprecationWarning) - @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.5.0'), - reason="too old onnxruntime") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.5.0"), reason="too old onnxruntime" + ) def test_onnx_if_test1(self): - then_out = onnx.helper.make_tensor_value_info( - 'then_out', onnx.TensorProto.FLOAT, [5]) + "then_out", onnx.TensorProto.FLOAT, [5] + ) else_out = onnx.helper.make_tensor_value_info( - 'else_out', onnx.TensorProto.FLOAT, [5]) + "else_out", onnx.TensorProto.FLOAT, [5] + ) x = np.array([1, 2, 3, 4, 5]).astype(np.float32) y = np.array([5, 4, 3, 2, 1]).astype(np.float32) then_const_node = onnx.helper.make_node( - 'Constant', inputs=[], outputs=['then_out'], - value=onnx.numpy_helper.from_array(x)) + "Constant", + inputs=[], + outputs=["then_out"], + value=onnx.numpy_helper.from_array(x), + ) else_const_node = onnx.helper.make_node( - 'Constant', inputs=[], outputs=['else_out'], - value=onnx.numpy_helper.from_array(y)) + "Constant", + inputs=[], + outputs=["else_out"], + value=onnx.numpy_helper.from_array(y), + ) then_body = onnx.helper.make_graph( - [then_const_node], 'then_body', [], [then_out]) + [then_const_node], "then_body", [], [then_out] + ) else_body = onnx.helper.make_graph( - [else_const_node], 'else_body', [], [else_out]) + [else_const_node], "else_body", [], [else_out] + ) if_node = onnx.helper.make_node( - 'If', inputs=['cond'], outputs=['Z'], - then_branch=then_body, else_branch=else_body) - - cond = onnx.helper.make_tensor_value_info('cond', TensorProto.BOOL, []) - Z = onnx.helper.make_tensor_value_info('Z', TensorProto.FLOAT, [None]) - graph_def = onnx.helper.make_graph([if_node], 'example', [cond], [Z]) - model_def = onnx.helper.make_model(graph_def, producer_name='skl2onnx') + "If", + inputs=["cond"], + outputs=["Z"], + then_branch=then_body, + else_branch=else_body, + ) + + cond = onnx.helper.make_tensor_value_info("cond", TensorProto.BOOL, []) + Z = onnx.helper.make_tensor_value_info("Z", TensorProto.FLOAT, [None]) + graph_def = onnx.helper.make_graph([if_node], "example", [cond], [Z]) + model_def = onnx.helper.make_model(graph_def, producer_name="skl2onnx") del model_def.opset_import[:] op_set = model_def.opset_import.add() - op_set.domain = '' + op_set.domain = "" op_set.version = TARGET_OPSET model_def.ir_version = TARGET_IR cond = np.array(1).astype(bool) expected = x if cond else y sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'cond': cond}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"cond": cond}) assert_almost_equal(expected, res[0]) @ignore_warnings(category=DeprecationWarning) - @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.5.0'), - reason="too old onnxruntime") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.5.0"), reason="too old onnxruntime" + ) def test_onnx_if_test2(self): - then_out = onnx.helper.make_tensor_value_info( - 'then_out', onnx.TensorProto.FLOAT, [5]) + "then_out", onnx.TensorProto.FLOAT, [5] + ) else_out = onnx.helper.make_tensor_value_info( - 'else_out', onnx.TensorProto.FLOAT, [5]) + "else_out", onnx.TensorProto.FLOAT, [5] + ) x = np.array([1, 2, 3, 4, 5]).astype(np.float32) y = np.array([5, 4, 3, 2, 1]).astype(np.float32) then_const_node = onnx.helper.make_node( - 'Constant', inputs=[], outputs=['then_out'], - value=onnx.numpy_helper.from_array(x)) + "Constant", + inputs=[], + outputs=["then_out"], + value=onnx.numpy_helper.from_array(x), + ) else_const_node = onnx.helper.make_node( - 'Identity', inputs=['Y'], outputs=['else_out']) + "Identity", inputs=["Y"], outputs=["else_out"] + ) then_body = onnx.helper.make_graph( - [then_const_node], 'then_body', [], [then_out]) + [then_const_node], "then_body", [], [then_out] + ) else_body = onnx.helper.make_graph( - [else_const_node], 'else_body', [], [else_out]) + [else_const_node], "else_body", [], [else_out] + ) if_node = onnx.helper.make_node( - 'If', inputs=['cond'], outputs=['Z'], - then_branch=then_body, else_branch=else_body) - - cond = onnx.helper.make_tensor_value_info('cond', TensorProto.BOOL, []) - Y = onnx.helper.make_tensor_value_info('Y', TensorProto.FLOAT, [None]) - Z = onnx.helper.make_tensor_value_info('Z', TensorProto.FLOAT, [None]) - graph_def = onnx.helper.make_graph( - [if_node], 'example', [cond, Y], [Z]) - model_def = onnx.helper.make_model(graph_def, producer_name='skl2onnx') + "If", + inputs=["cond"], + outputs=["Z"], + then_branch=then_body, + else_branch=else_body, + ) + + cond = onnx.helper.make_tensor_value_info("cond", TensorProto.BOOL, []) + Y = onnx.helper.make_tensor_value_info("Y", TensorProto.FLOAT, [None]) + Z = onnx.helper.make_tensor_value_info("Z", TensorProto.FLOAT, [None]) + graph_def = onnx.helper.make_graph([if_node], "example", [cond, Y], [Z]) + model_def = onnx.helper.make_model(graph_def, producer_name="skl2onnx") del model_def.opset_import[:] op_set = model_def.opset_import.add() - op_set.domain = '' + op_set.domain = "" op_set.version = TARGET_OPSET model_def.ir_version = TARGET_IR cond = np.array(1).astype(bool) expected = x if cond else y sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'cond': cond, 'Y': y}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"cond": cond, "Y": y}) assert_almost_equal(expected, res[0]) @ignore_warnings(category=DeprecationWarning) def test_onnx_if_algebra_direct(self): - opv = TARGET_OPSET x1 = np.array([[0, 3], [7, 0]], dtype=np.float32) x2 = np.array([[1, 0], [2, 0]], dtype=np.float32) - node = OnnxAdd( - 'x1', 'x2', output_names=['absxythen'], op_version=opv) + node = OnnxAdd("x1", "x2", output_names=["absxythen"], op_version=opv) then_body = node.to_onnx( - {'x1': x1, 'x2': x2}, target_opset=opv, - outputs=[('absxythen', FloatTensorType())]) - node = OnnxSub( - 'x1', 'x2', output_names=['absxyelse'], op_version=opv) + {"x1": x1, "x2": x2}, + target_opset=opv, + outputs=[("absxythen", FloatTensorType())], + ) + node = OnnxSub("x1", "x2", output_names=["absxyelse"], op_version=opv) else_body = node.to_onnx( - {'x1': x1, 'x2': x2}, target_opset=opv, - outputs=[('absxyelse', FloatTensorType())]) + {"x1": x1, "x2": x2}, + target_opset=opv, + outputs=[("absxyelse", FloatTensorType())], + ) del else_body.graph.input[:] del then_body.graph.input[:] cond = OnnxGreater( - OnnxReduceSum('x1', op_version=opv), - OnnxReduceSum('x2', op_version=opv), - op_version=opv) - ifnode = OnnxIf(cond, then_branch=then_body.graph, - else_branch=else_body.graph, - op_version=opv, output_names=['y']) + OnnxReduceSum("x1", op_version=opv), + OnnxReduceSum("x2", op_version=opv), + op_version=opv, + ) + ifnode = OnnxIf( + cond, + then_branch=then_body.graph, + else_branch=else_body.graph, + op_version=opv, + output_names=["y"], + ) model_def = ifnode.to_onnx( - {'x1': x1, 'x2': x2}, target_opset=opv, - outputs=[('y', FloatTensorType())]) + {"x1": x1, "x2": x2}, target_opset=opv, outputs=[("y", FloatTensorType())] + ) sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'x1': x1, 'x2': x2}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"x1": x1, "x2": x2}) assert_almost_equal(x1 + x2, res[0]) @ignore_warnings(category=DeprecationWarning) def test_onnx_if_algebra_indirect(self): - opv = TARGET_OPSET x1 = np.array([[0, 3], [7, 0]], dtype=np.float32) x2 = np.array([[1, 0], [2, 0]], dtype=np.float32) - node_xy = OnnxMul( - 'x1', 'x2', op_version=opv, output_names=['xy']) - node_then = OnnxAdd( - 'x1', 'xy', output_names=['absxythen'], op_version=opv) + node_xy = OnnxMul("x1", "x2", op_version=opv, output_names=["xy"]) + node_then = OnnxAdd("x1", "xy", output_names=["absxythen"], op_version=opv) then_body = node_then.to_onnx( - {'x1': x1, 'xy': x2}, target_opset=opv, - outputs=[('absxythen', FloatTensorType())]) - node_else = OnnxSub( - 'x1', 'x2', output_names=['absxyelse'], op_version=opv) + {"x1": x1, "xy": x2}, + target_opset=opv, + outputs=[("absxythen", FloatTensorType())], + ) + node_else = OnnxSub("x1", "x2", output_names=["absxyelse"], op_version=opv) else_body = node_else.to_onnx( - {'x1': x1, 'x2': x2}, target_opset=opv, - outputs=[('absxyelse', FloatTensorType())]) + {"x1": x1, "x2": x2}, + target_opset=opv, + outputs=[("absxyelse", FloatTensorType())], + ) del else_body.graph.input[:] del then_body.graph.input[:] cond = OnnxGreater( - OnnxReduceSum('x1', op_version=opv), - OnnxReduceSum('x2', op_version=opv), - op_version=opv) - ifnode = OnnxIf(cond, then_branch=then_body.graph, - else_branch=else_body.graph, - op_version=opv, output_names=['y'], - global_context={'xy': node_xy}) + OnnxReduceSum("x1", op_version=opv), + OnnxReduceSum("x2", op_version=opv), + op_version=opv, + ) + ifnode = OnnxIf( + cond, + then_branch=then_body.graph, + else_branch=else_body.graph, + op_version=opv, + output_names=["y"], + global_context={"xy": node_xy}, + ) model_def = ifnode.to_onnx( - {'x1': x1, 'x2': x2}, target_opset=opv, - outputs=[('y', FloatTensorType())]) + {"x1": x1, "x2": x2}, target_opset=opv, outputs=[("y", FloatTensorType())] + ) sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'x1': x1, 'x2': x2}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"x1": x1, "x2": x2}) assert_almost_equal(x1 + x1 * x2, res[0]) @ignore_warnings(category=DeprecationWarning) def test_onnx_if_algebra_indirect_unnamed(self): - opv = TARGET_OPSET x1 = np.array([[0, 3], [7, 0]], dtype=np.float32) x2 = np.array([[1, 0], [2, 0]], dtype=np.float32) - node_xy = OnnxMul('x1', 'x2', op_version=opv) - node_then = OnnxAdd( - 'x1', 'xy', output_names=['absxythen'], op_version=opv) + node_xy = OnnxMul("x1", "x2", op_version=opv) + node_then = OnnxAdd("x1", "xy", output_names=["absxythen"], op_version=opv) then_body = node_then.to_onnx( - {'x1': x1, 'xy': x2}, target_opset=opv, - outputs=[('absxythen', FloatTensorType())]) - node_else = OnnxSub( - 'x1', 'x2', output_names=['absxyelse'], op_version=opv) + {"x1": x1, "xy": x2}, + target_opset=opv, + outputs=[("absxythen", FloatTensorType())], + ) + node_else = OnnxSub("x1", "x2", output_names=["absxyelse"], op_version=opv) else_body = node_else.to_onnx( - {'x1': x1, 'x2': x2}, target_opset=opv, - outputs=[('absxyelse', FloatTensorType())]) + {"x1": x1, "x2": x2}, + target_opset=opv, + outputs=[("absxyelse", FloatTensorType())], + ) del else_body.graph.input[:] del then_body.graph.input[:] cond = OnnxGreater( - OnnxReduceSum('x1', op_version=opv), - OnnxReduceSum('x2', op_version=opv), - op_version=opv) - ifnode = OnnxIf(cond, then_branch=then_body.graph, - else_branch=else_body.graph, - op_version=opv, output_names=['y'], - global_context={'xy': node_xy}) + OnnxReduceSum("x1", op_version=opv), + OnnxReduceSum("x2", op_version=opv), + op_version=opv, + ) + ifnode = OnnxIf( + cond, + then_branch=then_body.graph, + else_branch=else_body.graph, + op_version=opv, + output_names=["y"], + global_context={"xy": node_xy}, + ) model_def = ifnode.to_onnx( - {'x1': x1, 'x2': x2}, target_opset=opv, - outputs=[('y', FloatTensorType())]) + {"x1": x1, "x2": x2}, target_opset=opv, outputs=[("y", FloatTensorType())] + ) sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'x1': x1, 'x2': x2}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"x1": x1, "x2": x2}) assert_almost_equal(x1 + x1 * x2, res[0]) @ignore_warnings(category=DeprecationWarning) def test_onnx_if_algebra_indirect_unnamed_clear_input(self): - opv = TARGET_OPSET x1 = np.array([[0, 3], [7, 0]], dtype=np.float32) x2 = np.array([[1, 0], [2, 0]], dtype=np.float32) - node_xy = OnnxMul('x1', 'x2', op_version=opv) - node_then = OnnxAdd( - 'x1', 'xy', output_names=['absxythen'], op_version=opv) + node_xy = OnnxMul("x1", "x2", op_version=opv) + node_then = OnnxAdd("x1", "xy", output_names=["absxythen"], op_version=opv) then_body = node_then.to_onnx( - {'x1': x1, 'xy': x2}, target_opset=opv, - outputs=[('absxythen', FloatTensorType())]) - node_else = OnnxSub( - 'x1', 'x2', output_names=['absxyelse'], op_version=opv) + {"x1": x1, "xy": x2}, + target_opset=opv, + outputs=[("absxythen", FloatTensorType())], + ) + node_else = OnnxSub("x1", "x2", output_names=["absxyelse"], op_version=opv) else_body = node_else.to_onnx( - {'x1': x1, 'x2': x2}, target_opset=opv, - outputs=[('absxyelse', FloatTensorType())]) + {"x1": x1, "x2": x2}, + target_opset=opv, + outputs=[("absxyelse", FloatTensorType())], + ) cond = OnnxGreater( - OnnxReduceSum('x1', op_version=opv), - OnnxReduceSum('x2', op_version=opv), - op_version=opv) - ifnode = OnnxIf(cond, then_branch=then_body.graph, - else_branch=else_body.graph, - op_version=opv, output_names=['y'], - global_context={'xy': node_xy}, - clear_subgraph_inputs=True) + OnnxReduceSum("x1", op_version=opv), + OnnxReduceSum("x2", op_version=opv), + op_version=opv, + ) + ifnode = OnnxIf( + cond, + then_branch=then_body.graph, + else_branch=else_body.graph, + op_version=opv, + output_names=["y"], + global_context={"xy": node_xy}, + clear_subgraph_inputs=True, + ) model_def = ifnode.to_onnx( - {'x1': x1, 'x2': x2}, target_opset=opv, - outputs=[('y', FloatTensorType())]) + {"x1": x1, "x2": x2}, target_opset=opv, outputs=[("y", FloatTensorType())] + ) sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'x1': x1, 'x2': x2}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"x1": x1, "x2": x2}) assert_almost_equal(x1 + x1 * x2, res[0]) @ignore_warnings(category=DeprecationWarning) def test_onnx_if_algebra_indirect_unnamed_clear_input_recursive(self): - opv = TARGET_OPSET x1 = np.array([[0, 3], [7, 0]], dtype=np.float32) x2 = np.array([[1, 0], [2, 0]], dtype=np.float32) - node_xy = OnnxMul('x1', 'x2', op_version=opv) - node_then = OnnxAdd( - 'x1', 'xy', output_names=['absxythen'], op_version=opv) + node_xy = OnnxMul("x1", "x2", op_version=opv) + node_then = OnnxAdd("x1", "xy", output_names=["absxythen"], op_version=opv) then_body = node_then.to_onnx( - {'x1': x1, 'xy': x2}, target_opset=opv, - outputs=[('absxythen', FloatTensorType())]) - node_else = OnnxSub( - 'x1', 'x2', output_names=['absxyelse'], op_version=opv) + {"x1": x1, "xy": x2}, + target_opset=opv, + outputs=[("absxythen", FloatTensorType())], + ) + node_else = OnnxSub("x1", "x2", output_names=["absxyelse"], op_version=opv) else_body = node_else.to_onnx( - {'x1': x1, 'x2': x2}, target_opset=opv, - outputs=[('absxyelse', FloatTensorType())]) + {"x1": x1, "x2": x2}, + target_opset=opv, + outputs=[("absxyelse", FloatTensorType())], + ) cond = OnnxGreater( - OnnxReduceSum('x1', op_version=opv), - OnnxReduceSum('x2', op_version=opv), - op_version=opv) - ifnode = OnnxIf(cond, then_branch=then_body.graph, - else_branch=else_body.graph, - op_version=opv, output_names=['yt'], - clear_subgraph_inputs=True) + OnnxReduceSum("x1", op_version=opv), + OnnxReduceSum("x2", op_version=opv), + op_version=opv, + ) + ifnode = OnnxIf( + cond, + then_branch=then_body.graph, + else_branch=else_body.graph, + op_version=opv, + output_names=["yt"], + clear_subgraph_inputs=True, + ) subgraph = ifnode.to_onnx( - {'x1': x1, 'x2': x2}, target_opset=opv, - outputs=[('yt', FloatTensorType())]) + {"x1": x1, "x2": x2}, target_opset=opv, outputs=[("yt", FloatTensorType())] + ) cond2 = OnnxGreater( - OnnxReduceMin('x1', op_version=opv), - OnnxReduceMin('x2', op_version=opv), - op_version=opv) - ifnode2 = OnnxIf(cond2, then_branch=then_body.graph, - else_branch=subgraph.graph, - op_version=opv, output_names=['y'], - global_context={'xy': node_xy}, - clear_subgraph_inputs=True) + OnnxReduceMin("x1", op_version=opv), + OnnxReduceMin("x2", op_version=opv), + op_version=opv, + ) + ifnode2 = OnnxIf( + cond2, + then_branch=then_body.graph, + else_branch=subgraph.graph, + op_version=opv, + output_names=["y"], + global_context={"xy": node_xy}, + clear_subgraph_inputs=True, + ) model_def = ifnode2.to_onnx( - {'x1': x1, 'x2': x2}, target_opset=opv, - outputs=[('y', FloatTensorType())]) + {"x1": x1, "x2": x2}, target_opset=opv, outputs=[("y", FloatTensorType())] + ) sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'x1': x1, 'x2': x2}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"x1": x1, "x2": x2}) assert_almost_equal(x1 + x1 * x2, res[0]) diff --git a/tests/test_algebra_onnx_operators_opset.py b/tests/test_algebra_onnx_operators_opset.py index 08fa6d3b3..719509753 100644 --- a/tests/test_algebra_onnx_operators_opset.py +++ b/tests/test_algebra_onnx_operators_opset.py @@ -9,23 +9,25 @@ class TestOnnxOperatorsOpset(unittest.TestCase): - @unittest.skipIf(onnx.defs.onnx_opset_version() < 10, "irrelevant") def test_pad_opset_10(self): - - pad = OnnxPad('X', output_names=['Y'], - mode='constant', value=1.5, - pads=[0, 1, 0, 1], - op_version=2) + pad = OnnxPad( + "X", + output_names=["Y"], + mode="constant", + value=1.5, + pads=[0, 1, 0, 1], + op_version=2, + ) X = np.array([[0, 1]], dtype=np.float32) - model_def = pad.to_onnx({'X': X}, target_opset=10) + model_def = pad.to_onnx({"X": X}, target_opset=10) onnx.checker.check_model(model_def) def predict_with_onnxruntime(model_def, *inputs): sess = ort.InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) names = [i.name for i in sess.get_inputs()] dinputs = {name: input for name, input in zip(names, inputs)} res = sess.run(None, dinputs) @@ -33,8 +35,7 @@ def predict_with_onnxruntime(model_def, *inputs): return {name: output for name, output in zip(names, res)} Y = predict_with_onnxruntime(model_def, X) - assert_almost_equal( - np.array([[1.5, 0., 1., 1.5]], dtype=np.float32), Y['Y']) + assert_almost_equal(np.array([[1.5, 0.0, 1.0, 1.5]], dtype=np.float32), Y["Y"]) if __name__ == "__main__": diff --git a/tests/test_algebra_onnx_operators_scan.py b/tests/test_algebra_onnx_operators_scan.py index b26d98934..a03c9f0f3 100644 --- a/tests/test_algebra_onnx_operators_scan.py +++ b/tests/test_algebra_onnx_operators_scan.py @@ -10,6 +10,7 @@ import onnx from onnx.onnx_cpp2py_export.checker import ValidationError from onnxruntime import __version__ as ort_version + try: # scikit-learn >= 0.22 from sklearn.utils._testing import ignore_warnings @@ -18,88 +19,97 @@ from sklearn.utils.testing import ignore_warnings from skl2onnx.common.data_types import FloatTensorType from skl2onnx.algebra.onnx_ops import ( - OnnxAdd, OnnxIdentity, OnnxScan, - OnnxSub, OnnxReduceSumSquareApi18, - OnnxSqueezeApi11, OnnxShape) + OnnxAdd, + OnnxIdentity, + OnnxScan, + OnnxSub, + OnnxReduceSumSquareApi18, + OnnxSqueezeApi11, + OnnxShape, +) from skl2onnx.algebra.custom_ops import OnnxCDist + try: from skl2onnx.algebra.onnx_ops import OnnxConstantOfShape except ImportError: # onnx is too old OnnxConstantOfShape = None -from onnx import ( - helper, TensorProto, __version__ as onnx__version__) -from skl2onnx.algebra.complex_functions import ( - onnx_squareform_pdist, onnx_cdist) +from onnx import helper, TensorProto, __version__ as onnx__version__ +from skl2onnx.algebra.complex_functions import onnx_squareform_pdist, onnx_cdist from skl2onnx.proto import get_latest_tested_opset_version -from test_utils import ( - TARGET_OPSET, TARGET_IR, - InferenceSessionEx as InferenceSession) +from test_utils import TARGET_OPSET, TARGET_IR, InferenceSessionEx as InferenceSession _TARGET_OPSET_ = min(get_latest_tested_opset_version(), TARGET_OPSET) THRESHOLD = "0.4.0" THRESHOLD2 = "0.5.0" -ort_version = ".".join(ort_version.split('.')[:2]) +ort_version = ".".join(ort_version.split(".")[:2]) class TestOnnxOperatorsScan(unittest.TestCase): - @unittest.skipIf(TARGET_OPSET < 10, reason="not available") - @unittest.skipIf(pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="fails with onnxruntime 0.4.0") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version(THRESHOLD), + reason="fails with onnxruntime 0.4.0", + ) @ignore_warnings(category=DeprecationWarning) def test_onnx_example(self): sum_in = onnx.helper.make_tensor_value_info( - 'sum_in', onnx.TensorProto.FLOAT, [2]) - next = onnx.helper.make_tensor_value_info( - 'next', onnx.TensorProto.FLOAT, [2]) + "sum_in", onnx.TensorProto.FLOAT, [2] + ) + next = onnx.helper.make_tensor_value_info("next", onnx.TensorProto.FLOAT, [2]) sum_out = onnx.helper.make_tensor_value_info( - 'sum_out', onnx.TensorProto.FLOAT, [2]) + "sum_out", onnx.TensorProto.FLOAT, [2] + ) scan_out = onnx.helper.make_tensor_value_info( - 'scan_out', onnx.TensorProto.FLOAT, [2]) + "scan_out", onnx.TensorProto.FLOAT, [2] + ) add_node = onnx.helper.make_node( - 'Add', - inputs=['sum_in', 'next'], - outputs=['sum_out'] + "Add", inputs=["sum_in", "next"], outputs=["sum_out"] ) id_node = onnx.helper.make_node( - 'Identity', - inputs=['sum_out'], - outputs=['scan_out'] + "Identity", inputs=["sum_out"], outputs=["scan_out"] ) scan_body = onnx.helper.make_graph( - [add_node, id_node], - 'scan_body', - [sum_in, next], - [sum_out, scan_out] + [add_node, id_node], "scan_body", [sum_in, next], [sum_out, scan_out] ) node = onnx.helper.make_node( - 'Scan', - inputs=['initial', 'x'], - outputs=['y', 'z'], + "Scan", + inputs=["initial", "x"], + outputs=["y", "z"], num_scan_inputs=1, - body=scan_body + body=scan_body, ) initial = helper.make_tensor_value_info( - 'initial', TensorProto.FLOAT, [2, ]) - X = helper.make_tensor_value_info('x', TensorProto.FLOAT, [3, 2]) - Y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [2, ]) - Z = helper.make_tensor_value_info('z', TensorProto.FLOAT, [3, 2]) + "initial", + TensorProto.FLOAT, + [ + 2, + ], + ) + X = helper.make_tensor_value_info("x", TensorProto.FLOAT, [3, 2]) + Y = helper.make_tensor_value_info( + "y", + TensorProto.FLOAT, + [ + 2, + ], + ) + Z = helper.make_tensor_value_info("z", TensorProto.FLOAT, [3, 2]) graph_def = helper.make_graph( [node], - 'test-model', + "test-model", [initial, X], [Y, Z], ) - model_def = helper.make_model(graph_def, producer_name='onnx-example') + model_def = helper.make_model(graph_def, producer_name="onnx-example") del model_def.opset_import[:] op_set = model_def.opset_import.add() - op_set.domain = '' + op_set.domain = "" op_set.version = TARGET_OPSET model_def.ir_version = TARGET_IR @@ -112,13 +122,13 @@ def test_onnx_example(self): try: sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) except Exception as e: if "Current official support for domain ai.onnx" in str(e): return raise e - res = sess.run(None, {'initial': initial, 'x': x}) + res = sess.run(None, {"initial": initial, "x": x}) y = np.array([9, 12]).astype(np.float32).reshape((2,)) z = np.array([1, 2, 4, 6, 9, 12]).astype(np.float32).reshape((3, 2)) @@ -126,37 +136,40 @@ def test_onnx_example(self): assert_almost_equal(z, res[1]) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") - @unittest.skipIf(pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="fails with onnxruntime 0.4.0") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version(THRESHOLD), + reason="fails with onnxruntime 0.4.0", + ) @ignore_warnings(category=DeprecationWarning) def test_onnx_example_algebra(self): initial = np.array([0, 0]).astype(np.float32).reshape((2,)) x = np.array([1, 2, 3, 4, 5, 6]).astype(np.float32).reshape((3, 2)) opv = _TARGET_OPSET_ - add_node = OnnxAdd( - 'sum_in', 'next', output_names=['sum_out'], - op_version=opv) - id_node = OnnxIdentity( - add_node, output_names=['scan_out'], - op_version=opv) + add_node = OnnxAdd("sum_in", "next", output_names=["sum_out"], op_version=opv) + id_node = OnnxIdentity(add_node, output_names=["scan_out"], op_version=opv) scan_body = id_node.to_onnx( - {'sum_in': initial, 'next': initial}, - outputs=[('sum_out', FloatTensorType()), - ('scan_out', FloatTensorType())]) + {"sum_in": initial, "next": initial}, + outputs=[("sum_out", FloatTensorType()), ("scan_out", FloatTensorType())], + ) - node = OnnxScan('initial', 'x', output_names=['y', 'z'], - num_scan_inputs=1, body=scan_body.graph, - op_version=opv) + node = OnnxScan( + "initial", + "x", + output_names=["y", "z"], + num_scan_inputs=1, + body=scan_body.graph, + op_version=opv, + ) model_def = node.to_onnx( - {'initial': initial, 'x': x}, - outputs=[('y', FloatTensorType()), - ('z', FloatTensorType())]) + {"initial": initial, "x": x}, + outputs=[("y", FloatTensorType()), ("z", FloatTensorType())], + ) sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'initial': initial, 'x': x}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"initial": initial, "x": x}) y = np.array([9, 12]).astype(np.float32).reshape((2,)) z = np.array([1, 2, 4, 6, 9, 12]).astype(np.float32).reshape((3, 2)) @@ -164,46 +177,53 @@ def test_onnx_example_algebra(self): assert_almost_equal(z, res[1]) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") - @unittest.skipIf(pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="fails with onnxruntime 0.4.0") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version(THRESHOLD), + reason="fails with onnxruntime 0.4.0", + ) @ignore_warnings(category=DeprecationWarning) def test_onnx_example_pdist(self): x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((3, 2)) opv = _TARGET_OPSET_ - diff = OnnxSub('next_in', 'next', output_names=['diff'], - op_version=opv) - id_next = OnnxIdentity( - 'next_in', output_names=['next_out'], - op_version=opv) + diff = OnnxSub("next_in", "next", output_names=["diff"], op_version=opv) + id_next = OnnxIdentity("next_in", output_names=["next_out"], op_version=opv) norm = OnnxReduceSumSquareApi18( - diff, output_names=['norm'], axes=[1], - op_version=opv) + diff, output_names=["norm"], axes=[1], op_version=opv + ) flat = OnnxSqueezeApi11( - norm, output_names=['scan_out'], axes=[1], - op_version=opv) + norm, output_names=["scan_out"], axes=[1], op_version=opv + ) scan_body = id_next.to_onnx( - OrderedDict([('next_in', x), ('next', FloatTensorType())]), - outputs=[('next_out', FloatTensorType([3, 2])), - ('scan_out', FloatTensorType([3]))], + OrderedDict([("next_in", x), ("next", FloatTensorType())]), + outputs=[ + ("next_out", FloatTensorType([3, 2])), + ("scan_out", FloatTensorType([3])), + ], other_outputs=[flat], - target_opset=opv) + target_opset=opv, + ) sess = InferenceSession( - scan_body.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'next_in': x, 'next': x[:1]}) + scan_body.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"next_in": x, "next": x[:1]}) assert_almost_equal(x, res[0]) - exp = np.array([0., 18., 20.], dtype=np.float32) + exp = np.array([0.0, 18.0, 20.0], dtype=np.float32) assert_almost_equal(exp, res[1]) node = OnnxScan( - 'x', 'x', output_names=['y', 'z'], - num_scan_inputs=1, body=scan_body.graph, - op_version=opv) - model_def = node.to_onnx({'x': x}, - outputs=[('y', FloatTensorType([3, 2])), - ('z', FloatTensorType([3, 3]))]) + "x", + "x", + output_names=["y", "z"], + num_scan_inputs=1, + body=scan_body.graph, + op_version=opv, + ) + model_def = node.to_onnx( + {"x": x}, + outputs=[("y", FloatTensorType([3, 2])), ("z", FloatTensorType([3, 3]))], + ) try: onnx.checker.check_model(model_def) except ValidationError as e: @@ -213,257 +233,294 @@ def test_onnx_example_pdist(self): raise e sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'x': x}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"x": x}) exp = squareform(pdist(x, metric="sqeuclidean")) assert_almost_equal(x, res[0]) assert_almost_equal(exp, res[1]) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") - @unittest.skipIf(pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="fails with onnxruntime 0.4.0") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version(THRESHOLD), + reason="fails with onnxruntime 0.4.0", + ) @ignore_warnings(category=DeprecationWarning) def test_onnx_example_pdist_in(self): opv = _TARGET_OPSET_ x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((3, 2)) - cop = OnnxAdd( - 'input', 'input', op_version=opv) + cop = OnnxAdd("input", "input", op_version=opv) cop2 = OnnxIdentity( - onnx_squareform_pdist( - cop, dtype=np.float32, - op_version=opv), - output_names=['pdist'], - op_version=opv) + onnx_squareform_pdist(cop, dtype=np.float32, op_version=opv), + output_names=["pdist"], + op_version=opv, + ) model_def = cop2.to_onnx( - inputs=[('input', FloatTensorType([None, None]))], - outputs=[('pdist', FloatTensorType())]) + inputs=[("input", FloatTensorType([None, None]))], + outputs=[("pdist", FloatTensorType())], + ) sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': x}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": x}) exp = squareform(pdist(x * 2, metric="sqeuclidean")) assert_almost_equal(exp, res[0]) x = np.array([1, 2, 4, 5]).astype(np.float32).reshape((2, 2)) sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': x}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": x}) exp = squareform(pdist(x * 2, metric="sqeuclidean")) assert_almost_equal(exp, res[0]) x = np.array([1, 2, 4, 5, 5, 6]).astype(np.float32).reshape((2, 3)) x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((2, 3)) sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': x}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": x}) exp = squareform(pdist(x * 2, metric="sqeuclidean")) assert_almost_equal(exp, res[0]) - @unittest.skipIf((OnnxConstantOfShape is None or - pv.Version(ort_version) <= pv.Version(THRESHOLD)), - reason="fails with onnxruntime 0.4.0") + @unittest.skipIf( + ( + OnnxConstantOfShape is None + or pv.Version(ort_version) <= pv.Version(THRESHOLD) + ), + reason="fails with onnxruntime 0.4.0", + ) @ignore_warnings(category=DeprecationWarning) def test_onnx_example_constant_of_shape(self): x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((3, 2)) opv = _TARGET_OPSET_ cop2 = OnnxConstantOfShape( - OnnxShape('input', op_version=opv), - output_names=['mat'], op_version=opv) - model_def = cop2.to_onnx({'input': x}, - outputs=[('mat', FloatTensorType())]) + OnnxShape("input", op_version=opv), output_names=["mat"], op_version=opv + ) + model_def = cop2.to_onnx({"input": x}, outputs=[("mat", FloatTensorType())]) sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': x}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": x}) exp = np.zeros((3, 2), dtype=np.float32) assert_almost_equal(exp, res[0]) - tensor_value = onnx.helper.make_tensor("value", onnx.TensorProto.FLOAT, - (1,), [-5]) + tensor_value = onnx.helper.make_tensor( + "value", onnx.TensorProto.FLOAT, (1,), [-5] + ) cop2 = OnnxConstantOfShape( - OnnxShape('input', op_version=opv), - value=tensor_value, output_names=['mat'], - op_version=opv) - model_def = cop2.to_onnx({'input': x}, - outputs=[('mat', FloatTensorType())]) + OnnxShape("input", op_version=opv), + value=tensor_value, + output_names=["mat"], + op_version=opv, + ) + model_def = cop2.to_onnx({"input": x}, outputs=[("mat", FloatTensorType())]) sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': x}) - exp = np.full((3, 2), -5.) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": x}) + exp = np.full((3, 2), -5.0) assert_almost_equal(exp, res[0]) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") - @unittest.skipIf(pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="fails with onnxruntime 0.4.0") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version(THRESHOLD), + reason="fails with onnxruntime 0.4.0", + ) @ignore_warnings(category=DeprecationWarning) def test_onnx_example_cdist_in(self): x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((3, 2)) - x2 = np.array([1.1, 2.1, 4.01, 5.01, 5.001, 4.001, 0, 0]).astype( - np.float32).reshape((4, 2)) + x2 = ( + np.array([1.1, 2.1, 4.01, 5.01, 5.001, 4.001, 0, 0]) + .astype(np.float32) + .reshape((4, 2)) + ) opv = _TARGET_OPSET_ - cop = OnnxAdd( - 'input', 'input', op_version=opv) + cop = OnnxAdd("input", "input", op_version=opv) cop2 = OnnxIdentity( - onnx_cdist(cop, x2, dtype=np.float32, - op_version=opv), - output_names=['cdist'], op_version=opv) + onnx_cdist(cop, x2, dtype=np.float32, op_version=opv), + output_names=["cdist"], + op_version=opv, + ) model_def = cop2.to_onnx( - inputs=[('input', FloatTensorType([None, None]))], - outputs=[('cdist', FloatTensorType())]) + inputs=[("input", FloatTensorType([None, None]))], + outputs=[("cdist", FloatTensorType())], + ) sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': x}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": x}) exp = scipy_cdist(x * 2, x2, metric="sqeuclidean") assert_almost_equal(exp, res[0], decimal=5) - x = np.array([[6.1, 2.8, 4.7, 1.2], - [5.7, 3.8, 1.7, 0.3], - [7.7, 2.6, 6.9, 2.3], - [6.0, 2.9, 4.5, 1.5], - [6.8, 2.8, 4.8, 1.4], - [5.4, 3.4, 1.5, 0.4], - [5.6, 2.9, 3.6, 1.3], - [6.9, 3.1, 5.1, 2.3]], dtype=np.float32) - cop = OnnxAdd('input', 'input', op_version=opv) + x = np.array( + [ + [6.1, 2.8, 4.7, 1.2], + [5.7, 3.8, 1.7, 0.3], + [7.7, 2.6, 6.9, 2.3], + [6.0, 2.9, 4.5, 1.5], + [6.8, 2.8, 4.8, 1.4], + [5.4, 3.4, 1.5, 0.4], + [5.6, 2.9, 3.6, 1.3], + [6.9, 3.1, 5.1, 2.3], + ], + dtype=np.float32, + ) + cop = OnnxAdd("input", "input", op_version=opv) cop2 = OnnxIdentity( onnx_cdist(cop, x, dtype=np.float32, op_version=opv), - output_names=['cdist'], - op_version=opv) + output_names=["cdist"], + op_version=opv, + ) model_def = cop2.to_onnx( - inputs=[('input', FloatTensorType([None, None]))], - outputs=[('cdist', FloatTensorType())]) + inputs=[("input", FloatTensorType([None, None]))], + outputs=[("cdist", FloatTensorType())], + ) sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': x}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": x}) exp = scipy_cdist(x * 2, x, metric="sqeuclidean") assert_almost_equal(exp, res[0], decimal=4) assert "u_scan0_" not in str(model_def) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") - @unittest.skipIf(pv.Version(ort_version) <= pv.Version(THRESHOLD2), - reason="fails with onnxruntime 0.4.0") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version(THRESHOLD2), + reason="fails with onnxruntime 0.4.0", + ) @ignore_warnings(category=DeprecationWarning) def test_onnx_example_cdist_in_mink(self): x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((3, 2)) - x2 = np.array([1.1, 2.1, 4.01, 5.01, 5.001, 4.001, 0, 0]).astype( - np.float32).reshape((4, 2)) + x2 = ( + np.array([1.1, 2.1, 4.01, 5.01, 5.001, 4.001, 0, 0]) + .astype(np.float32) + .reshape((4, 2)) + ) opv = _TARGET_OPSET_ - cop = OnnxAdd( - 'input', 'input', op_version=opv) + cop = OnnxAdd("input", "input", op_version=opv) cop2 = OnnxIdentity( - onnx_cdist(cop, x2, dtype=np.float32, - metric="minkowski", p=2, - op_version=opv), - output_names=['cdist'], - op_version=opv) + onnx_cdist( + cop, x2, dtype=np.float32, metric="minkowski", p=2, op_version=opv + ), + output_names=["cdist"], + op_version=opv, + ) model_def = cop2.to_onnx( - inputs=[('input', FloatTensorType([None, None]))], - outputs=[('cdist', FloatTensorType())]) + inputs=[("input", FloatTensorType([None, None]))], + outputs=[("cdist", FloatTensorType())], + ) sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': x}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": x}) exp = scipy_cdist(x * 2, x2, metric="minkowski") assert_almost_equal(exp, res[0], decimal=5) - x = np.array([[6.1, 2.8, 4.7, 1.2], - [5.7, 3.8, 1.7, 0.3], - [7.7, 2.6, 6.9, 2.3], - [6.0, 2.9, 4.5, 1.5], - [6.8, 2.8, 4.8, 1.4], - [5.4, 3.4, 1.5, 0.4], - [5.6, 2.9, 3.6, 1.3], - [6.9, 3.1, 5.1, 2.3]], dtype=np.float32) - cop = OnnxAdd( - 'input', 'input', op_version=opv) + x = np.array( + [ + [6.1, 2.8, 4.7, 1.2], + [5.7, 3.8, 1.7, 0.3], + [7.7, 2.6, 6.9, 2.3], + [6.0, 2.9, 4.5, 1.5], + [6.8, 2.8, 4.8, 1.4], + [5.4, 3.4, 1.5, 0.4], + [5.6, 2.9, 3.6, 1.3], + [6.9, 3.1, 5.1, 2.3], + ], + dtype=np.float32, + ) + cop = OnnxAdd("input", "input", op_version=opv) cop2 = OnnxIdentity( - onnx_cdist(cop, x, dtype=np.float32, - op_version=opv), - output_names=['cdist'], - op_version=opv) + onnx_cdist(cop, x, dtype=np.float32, op_version=opv), + output_names=["cdist"], + op_version=opv, + ) model_def = cop2.to_onnx( - inputs=[('input', FloatTensorType([None, None]))], - outputs=[('cdist', FloatTensorType())]) + inputs=[("input", FloatTensorType([None, None]))], + outputs=[("cdist", FloatTensorType())], + ) sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': x}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": x}) exp = scipy_cdist(x * 2, x, metric="sqeuclidean") assert_almost_equal(exp, res[0], decimal=4) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") - @unittest.skipIf(pv.Version(ort_version) <= pv.Version(THRESHOLD2), - reason="fails with onnxruntime 0.4.0") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version(THRESHOLD2), + reason="fails with onnxruntime 0.4.0", + ) @ignore_warnings(category=DeprecationWarning) def test_onnx_example_cdist_in_custom_ops(self): x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((3, 2)) - x2 = np.array([1.1, 2.1, 4.01, 5.01, 5.001, 4.001, 0, 0]).astype( - np.float32).reshape((4, 2)) + x2 = ( + np.array([1.1, 2.1, 4.01, 5.01, 5.001, 4.001, 0, 0]) + .astype(np.float32) + .reshape((4, 2)) + ) opv = _TARGET_OPSET_ - cop = OnnxAdd( - 'input', 'input', op_version=opv) + cop = OnnxAdd("input", "input", op_version=opv) cop2 = OnnxIdentity( - OnnxCDist(cop, x2, op_version=opv), - output_names=['cdist'], - op_version=opv) + OnnxCDist(cop, x2, op_version=opv), output_names=["cdist"], op_version=opv + ) model_def = cop2.to_onnx( - inputs=[('input', FloatTensorType([None, None]))], - outputs=[('cdist', FloatTensorType())]) + inputs=[("input", FloatTensorType([None, None]))], + outputs=[("cdist", FloatTensorType())], + ) try: sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) except RuntimeError as e: if "CDist is not a registered" in str(e): return - res = sess.run(None, {'input': x}) + res = sess.run(None, {"input": x}) exp = scipy_cdist(x * 2, x2, metric="sqeuclidean") assert_almost_equal(exp, res[0], decimal=5) - x = np.array([[6.1, 2.8, 4.7, 1.2], - [5.7, 3.8, 1.7, 0.3], - [7.7, 2.6, 6.9, 2.3], - [6.0, 2.9, 4.5, 1.5], - [6.8, 2.8, 4.8, 1.4], - [5.4, 3.4, 1.5, 0.4], - [5.6, 2.9, 3.6, 1.3], - [6.9, 3.1, 5.1, 2.3]], dtype=np.float32) - cop = OnnxAdd( - 'input', 'input', op_version=opv) + x = np.array( + [ + [6.1, 2.8, 4.7, 1.2], + [5.7, 3.8, 1.7, 0.3], + [7.7, 2.6, 6.9, 2.3], + [6.0, 2.9, 4.5, 1.5], + [6.8, 2.8, 4.8, 1.4], + [5.4, 3.4, 1.5, 0.4], + [5.6, 2.9, 3.6, 1.3], + [6.9, 3.1, 5.1, 2.3], + ], + dtype=np.float32, + ) + cop = OnnxAdd("input", "input", op_version=opv) cop2 = OnnxIdentity( - OnnxCDist(cop, x, - op_version=opv), - output_names=['cdist'], - op_version=opv) + OnnxCDist(cop, x, op_version=opv), output_names=["cdist"], op_version=opv + ) model_def = cop2.to_onnx( - inputs=[('input', FloatTensorType([None, None]))], - outputs=[('cdist', FloatTensorType())]) + inputs=[("input", FloatTensorType([None, None]))], + outputs=[("cdist", FloatTensorType())], + ) sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': x}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": x}) exp = scipy_cdist(x * 2, x, metric="sqeuclidean") assert_almost_equal(exp, res[0], decimal=4) diff --git a/tests/test_algebra_onnx_operators_sparse.py b/tests/test_algebra_onnx_operators_sparse.py index c8ad90a27..d52d50f44 100644 --- a/tests/test_algebra_onnx_operators_sparse.py +++ b/tests/test_algebra_onnx_operators_sparse.py @@ -7,14 +7,16 @@ from numpy.testing import assert_almost_equal from scipy.sparse import coo_matrix from onnxruntime import InferenceSession, __version__ as ort_version + try: from onnxruntime.capi.onnxruntime_pybind11_state import ( - InvalidArgument as OrtInvalidArgument + InvalidArgument as OrtInvalidArgument, ) except ImportError: OrtInvalidArgument = None from skl2onnx.common.data_types import FloatTensorType from skl2onnx.algebra.onnx_ops import OnnxAdd + try: from skl2onnx.algebra.onnx_ops import OnnxConstantOfShape except ImportError: @@ -26,57 +28,54 @@ class TestOnnxOperatorsSparse(unittest.TestCase): - - @unittest.skipIf(TARGET_OPSET < 11, - reason="only available for opset >= 11") - @unittest.skipIf(pv.Version(ort_version) < pv.Version(THRESHOLD), - reason="fails with onnxruntime < %s" % THRESHOLD) + @unittest.skipIf(TARGET_OPSET < 11, reason="only available for opset >= 11") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version(THRESHOLD), + reason="fails with onnxruntime < %s" % THRESHOLD, + ) def test_onnx_init_dense(self): X = np.array([1, 2, 3, 4, 5, 6]).astype(np.float32).reshape((3, 2)) - node = OnnxAdd('X', X, output_names=['Y'], op_version=TARGET_OPSET) + node = OnnxAdd("X", X, output_names=["Y"], op_version=TARGET_OPSET) - model_def = node.to_onnx({'X': X}, - outputs=[('Y', FloatTensorType())]) + model_def = node.to_onnx({"X": X}, outputs=[("Y", FloatTensorType())]) sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': X})[0] + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": X})[0] assert_almost_equal(X + X, res) - @unittest.skipIf(TARGET_OPSET < 11, - reason="only available for opset >= 11") - @unittest.skipIf(pv.Version(ort_version) < pv.Version(THRESHOLD), - reason="fails with onnxruntime < %s" % THRESHOLD) + @unittest.skipIf(TARGET_OPSET < 11, reason="only available for opset >= 11") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version(THRESHOLD), + reason="fails with onnxruntime < %s" % THRESHOLD, + ) def test_onnx_init_sparse_coo(self): row = np.array([0, 0, 1, 3, 1], dtype=np.float32) col = np.array([0, 2, 1, 3, 1], dtype=np.float32) data = np.array([1, 1, 1, 1, 1], dtype=np.float32) X = coo_matrix((data, (row, col)), shape=(4, 4)) - node = OnnxAdd( - 'X', X, output_names=['Y'], - op_version=TARGET_OPSET) + node = OnnxAdd("X", X, output_names=["Y"], op_version=TARGET_OPSET) - model_def = node.to_onnx( - {'X': X}, outputs=[('Y', FloatTensorType())]) + model_def = node.to_onnx({"X": X}, outputs=[("Y", FloatTensorType())]) try: sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) except (RuntimeError, OrtInvalidArgument): # Sparse tensor is not supported for constant. return try: - res = sess.run(None, {'X': X})[0] + res = sess.run(None, {"X": X})[0] except RuntimeError as e: # Sparse tensor is not supported for constant. warnings.warn( - "Unable to run with %r\n---\n%s\n%s" % ( - {'X': X}, model_def, e)) + "Unable to run with %r\n---\n%s\n%s" % ({"X": X}, model_def, e) + ) return assert_almost_equal(X + X, res) diff --git a/tests/test_algebra_onnx_operators_sub_estimator.py b/tests/test_algebra_onnx_operators_sub_estimator.py index 81a53ffa7..231d7f7cd 100644 --- a/tests/test_algebra_onnx_operators_sub_estimator.py +++ b/tests/test_algebra_onnx_operators_sub_estimator.py @@ -5,40 +5,42 @@ import packaging.version as pv import numpy as np from numpy.testing import assert_almost_equal -from sklearn.base import ( - BaseEstimator, ClassifierMixin, clone, TransformerMixin) +from sklearn.base import BaseEstimator, ClassifierMixin, clone, TransformerMixin from sklearn.datasets import load_iris from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler from onnxruntime import __version__ as ort_version from skl2onnx.algebra.onnx_ops import ( - OnnxIdentity, OnnxCast, OnnxReduceMaxApi18, OnnxGreater, - OnnxExp) + OnnxIdentity, + OnnxCast, + OnnxReduceMaxApi18, + OnnxGreater, + OnnxExp, +) from skl2onnx import update_registered_converter from skl2onnx import to_onnx, get_model_alias from skl2onnx.proto import onnx_proto -from skl2onnx.common.data_types import ( - FloatTensorType, Int64TensorType) +from skl2onnx.common.data_types import FloatTensorType, Int64TensorType from skl2onnx.algebra.onnx_operator import OnnxSubEstimator from test_utils import TARGET_OPSET, InferenceSessionEx as InferenceSession class ValidatorClassifier(BaseEstimator, ClassifierMixin): - def __init__(self, estimator=None, threshold=0.75): ClassifierMixin.__init__(self) BaseEstimator.__init__(self) if estimator is None: - estimator = LogisticRegression(solver='liblinear') + estimator = LogisticRegression(solver="liblinear") self.estimator = estimator self.threshold = threshold def fit(self, X, y, sample_weight=None): sig = inspect.signature(self.estimator.fit) - if 'sample_weight' in sig.parameters: + if "sample_weight" in sig.parameters: self.estimator_ = clone(self.estimator).fit( - X, y, sample_weight=sample_weight) + X, y, sample_weight=sample_weight + ) else: self.estimator_ = clone(self.estimator).fit(X, y) return self @@ -56,45 +58,40 @@ def validate(self, X): def validator_classifier_shape_calculator(operator): - input = operator.inputs[0] # inputs in ONNX graph outputs = operator.outputs # outputs in ONNX graph op = operator.raw_operator # scikit-learn model (mmust be fitted) if len(outputs) != 3: raise RuntimeError("3 outputs expected not {}.".format(len(outputs))) - N = input.type.shape[0] # number of observations - C = op.estimator_.classes_.shape[0] # dimension of outputs + N = input.type.shape[0] # number of observations + C = op.estimator_.classes_.shape[0] # dimension of outputs - outputs[0].type = Int64TensorType([N]) # label + outputs[0].type = Int64TensorType([N]) # label outputs[1].type = FloatTensorType([N, C]) # probabilities - outputs[2].type = Int64TensorType([C]) # validation + outputs[2].type = Int64TensorType([C]) # validation def validator_classifier_converter(scope, operator, container): - input = operator.inputs[0] # input in ONNX graph - outputs = operator.outputs # outputs in ONNX graph - op = operator.raw_operator # scikit-learn model (mmust be fitted) + input = operator.inputs[0] # input in ONNX graph + outputs = operator.outputs # outputs in ONNX graph + op = operator.raw_operator # scikit-learn model (mmust be fitted) opv = container.target_opset # We reuse existing converter and declare it as local # operator. model = op.estimator_ - onnx_op = OnnxSubEstimator(model, input, op_version=opv, - options={'zipmap': False}) + onnx_op = OnnxSubEstimator(model, input, op_version=opv, options={"zipmap": False}) rmax = OnnxReduceMaxApi18(onnx_op[1], axes=[1], keepdims=0, op_version=opv) - great = OnnxGreater(rmax, np.array([op.threshold], dtype=np.float32), - op_version=opv) - valid = OnnxCast(great, to=onnx_proto.TensorProto.INT64, - op_version=opv) - - r1 = OnnxIdentity(onnx_op[0], output_names=[outputs[0].full_name], - op_version=opv) - r2 = OnnxIdentity(onnx_op[1], output_names=[outputs[1].full_name], - op_version=opv) - r3 = OnnxIdentity(valid, output_names=[outputs[2].full_name], - op_version=opv) + great = OnnxGreater( + rmax, np.array([op.threshold], dtype=np.float32), op_version=opv + ) + valid = OnnxCast(great, to=onnx_proto.TensorProto.INT64, op_version=opv) + + r1 = OnnxIdentity(onnx_op[0], output_names=[outputs[0].full_name], op_version=opv) + r2 = OnnxIdentity(onnx_op[1], output_names=[outputs[1].full_name], op_version=opv) + r3 = OnnxIdentity(valid, output_names=[outputs[2].full_name], op_version=opv) r1.add_to(scope, container) r2.add_to(scope, container) @@ -109,9 +106,9 @@ def validator_classifier_parser(scope, model, inputs, custom_parsers=None): this_operator.inputs.append(inputs[0]) # outputs - val_label = scope.declare_local_variable('val_label', Int64TensorType()) - val_prob = scope.declare_local_variable('val_prob', FloatTensorType()) - val_val = scope.declare_local_variable('val_val', Int64TensorType()) + val_label = scope.declare_local_variable("val_label", Int64TensorType()) + val_prob = scope.declare_local_variable("val_prob", FloatTensorType()) + val_val = scope.declare_local_variable("val_val", Int64TensorType()) this_operator.outputs.append(val_label) this_operator.outputs.append(val_prob) this_operator.outputs.append(val_val) @@ -141,7 +138,6 @@ def dummy_conv_2(scope, operator): class MinMaxScalerTwo(BaseEstimator, TransformerMixin): - def __init__(self): pass @@ -161,7 +157,7 @@ def subsub_mmtwo_parser(scope, model, inputs, custom_parsers=None): this_operator = scope.declare_local_operator(alias, model) this_operator.inputs.append(inputs[0]) cls_type = inputs[0].type.__class__ - val = scope.declare_local_variable('variable', cls_type()) + val = scope.declare_local_variable("variable", cls_type()) this_operator.outputs.append(val) return this_operator.outputs @@ -176,19 +172,18 @@ def subsub_mmtwo_converter(scope, operator, container): out = operator.outputs X = operator.inputs[0] x2 = OnnxSubEstimator(op.est1_, X, op_version=opv) - x2.set_onnx_name_prefix('AAA') + x2.set_onnx_name_prefix("AAA") x2_exp = OnnxExp(x2, op_version=opv) x3 = OnnxSubEstimator(op.est2_, x2_exp, op_version=opv) - x3.set_onnx_name_prefix('BBB') + x3.set_onnx_name_prefix("BBB") final = OnnxIdentity(x3, op_version=opv, output_names=out[:1]) final.add_to(scope, container) class TestOnnxOperatorSubEstimator(unittest.TestCase): - @unittest.skipIf( - pv.Version(ort_version) < pv.Version("1.0"), - reason="Cast not available.") + pv.Version(ort_version) < pv.Version("1.0"), reason="Cast not available." + ) def test_sub_estimator_exc(self): data = load_iris() X, y = data.data, data.target @@ -201,20 +196,24 @@ def test_sub_estimator_exc(self): try: update_registered_converter( - ValidatorClassifier, 'CustomValidatorClassifier', + ValidatorClassifier, + "CustomValidatorClassifier", validator_classifier_shape_calculator, validator_classifier_converter, - parser=dummy1_parser) + parser=dummy1_parser, + ) raise AssertionError("exception not raised") except TypeError: pass try: update_registered_converter( - ValidatorClassifier, 'CustomValidatorClassifier', + ValidatorClassifier, + "CustomValidatorClassifier", validator_classifier_shape_calculator, validator_classifier_converter, - parser=dummy1_parser) + parser=dummy1_parser, + ) raise AssertionError("exception not raised") except TypeError: pass @@ -223,10 +222,12 @@ def test_sub_estimator_exc(self): try: update_registered_converter( - ValidatorClassifier, 'CustomValidatorClassifier', + ValidatorClassifier, + "CustomValidatorClassifier", dummy_val_2, validator_classifier_converter, - parser=validator_classifier_parser) + parser=validator_classifier_parser, + ) raise AssertionError("exception not raised") except TypeError: pass @@ -235,27 +236,31 @@ def test_sub_estimator_exc(self): try: update_registered_converter( - ValidatorClassifier, 'CustomValidatorClassifier', + ValidatorClassifier, + "CustomValidatorClassifier", validator_classifier_shape_calculator, dummy_conv_1, - parser=validator_classifier_parser) + parser=validator_classifier_parser, + ) raise AssertionError("exception not raised") except NameError: pass try: update_registered_converter( - ValidatorClassifier, 'CustomValidatorClassifier', + ValidatorClassifier, + "CustomValidatorClassifier", validator_classifier_shape_calculator, dummy_conv_2, - parser=validator_classifier_parser) + parser=validator_classifier_parser, + ) raise AssertionError("exception not raised") except TypeError: pass @unittest.skipIf( - pv.Version(ort_version) < pv.Version("1.0"), - reason="Cast not available.") + pv.Version(ort_version) < pv.Version("1.0"), reason="Cast not available." + ) def test_sub_estimator(self): data = load_iris() X, y = data.data, data.target @@ -265,24 +270,26 @@ def test_sub_estimator(self): model.fit(X_train, y_train) update_registered_converter( - ValidatorClassifier, 'CustomValidatorClassifier', + ValidatorClassifier, + "CustomValidatorClassifier", validator_classifier_shape_calculator, validator_classifier_converter, - parser=validator_classifier_parser) + parser=validator_classifier_parser, + ) X32 = X_test[:5].astype(np.float32) - model_onnx = to_onnx( - model, X32, target_opset=TARGET_OPSET) - sess = InferenceSession(model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': X32}) + model_onnx = to_onnx(model, X32, target_opset=TARGET_OPSET) + sess = InferenceSession( + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": X32}) assert_almost_equal(model.predict(X32), res[0]) assert_almost_equal(model.predict_proba(X32), res[1], decimal=4) assert_almost_equal(model.validate(X32), res[2]) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("1.0"), - reason="Cast not available.") + pv.Version(ort_version) < pv.Version("1.0"), reason="Cast not available." + ) def test_sub_sub_estimator(self): data = load_iris() X, y = data.data, data.target @@ -292,17 +299,19 @@ def test_sub_sub_estimator(self): model.fit(X_train, y_train) update_registered_converter( - MinMaxScalerTwo, "SubSubDummy", + MinMaxScalerTwo, + "SubSubDummy", subsub_mmtwo_shape_calculator, subsub_mmtwo_converter, - parser=subsub_mmtwo_parser) + parser=subsub_mmtwo_parser, + ) X32 = X_test[:5].astype(np.float32) - model_onnx = to_onnx( - model, X32, target_opset=TARGET_OPSET) - sess = InferenceSession(model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': X32}) + model_onnx = to_onnx(model, X32, target_opset=TARGET_OPSET) + sess = InferenceSession( + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": X32}) assert_almost_equal(model.transform(X32), res[0], decimal=5) diff --git a/tests/test_algebra_onnx_operators_wrapped.py b/tests/test_algebra_onnx_operators_wrapped.py index 8e9a046bb..d0de8dfa1 100644 --- a/tests/test_algebra_onnx_operators_wrapped.py +++ b/tests/test_algebra_onnx_operators_wrapped.py @@ -17,8 +17,7 @@ class DecorrelateTransformer(TransformerMixin, BaseEstimator): - - def __init__(self, alpha=0.): + def __init__(self, alpha=0.0): BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.alpha = alpha @@ -33,8 +32,7 @@ def transform(self, X): class DecorrelateTransformer2(TransformerMixin, BaseEstimator): - - def __init__(self, alpha=0.): + def __init__(self, alpha=0.0): BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.alpha = alpha @@ -76,97 +74,104 @@ def decorrelate_transformer_convertor2(scope, operator, container): class TestOnnxOperatorsWrapped(unittest.TestCase): - - @unittest.skipIf(pv.Version(ortv) < pv.Version('0.5.0'), - reason="onnxruntime too old") + @unittest.skipIf( + pv.Version(ortv) < pv.Version("0.5.0"), reason="onnxruntime too old" + ) def test_sub(self): - data = load_iris() X = data.data dec = DecorrelateTransformer() dec.fit(X) update_registered_converter( - DecorrelateTransformer, "SklearnDecorrelateTransformer", + DecorrelateTransformer, + "SklearnDecorrelateTransformer", decorrelate_transformer_shape_calculator, - decorrelate_transformer_convertor) + decorrelate_transformer_convertor, + ) onx = to_onnx(dec, X.astype(np.float32), target_opset=TARGET_OPSET) self.assertIn('output: "variable"', str(onx)) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) exp = dec.transform(X.astype(np.float32)) - got = sess.run(None, {'X': X.astype(np.float32)})[0] + got = sess.run(None, {"X": X.astype(np.float32)})[0] assert_almost_equal(got, exp, decimal=4) - @unittest.skipIf(pv.Version(ortv) < pv.Version('0.5.0'), - reason="onnxruntime too old") + @unittest.skipIf( + pv.Version(ortv) < pv.Version("0.5.0"), reason="onnxruntime too old" + ) def test_sub_double(self): - data = load_iris() X = data.data dec = DecorrelateTransformer() dec.fit(X) update_registered_converter( - DecorrelateTransformer, "SklearnDecorrelateTransformer", + DecorrelateTransformer, + "SklearnDecorrelateTransformer", decorrelate_transformer_shape_calculator, - decorrelate_transformer_convertor) + decorrelate_transformer_convertor, + ) onx = to_onnx(dec, X.astype(np.float64), target_opset=TARGET_OPSET) self.assertIn('output: "variable"', str(onx)) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) exp = dec.transform(X.astype(np.float64)) - got = sess.run(None, {'X': X.astype(np.float64)})[0] + got = sess.run(None, {"X": X.astype(np.float64)})[0] assert_almost_equal(got, exp, decimal=4) - @unittest.skipIf(pv.Version(ortv) < pv.Version('0.5.0'), - reason="onnxruntime too old") + @unittest.skipIf( + pv.Version(ortv) < pv.Version("0.5.0"), reason="onnxruntime too old" + ) def test_sub_output(self): - data = load_iris() X = data.data dec = DecorrelateTransformer2() dec.fit(X) update_registered_converter( - DecorrelateTransformer2, "SklearnDecorrelateTransformer2", + DecorrelateTransformer2, + "SklearnDecorrelateTransformer2", decorrelate_transformer_shape_calculator, - decorrelate_transformer_convertor2) + decorrelate_transformer_convertor2, + ) onx = to_onnx(dec, X.astype(np.float32), target_opset=TARGET_OPSET) self.assertIn('output: "variable"', str(onx)) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) exp = dec.transform(X.astype(np.float32)) - got = sess.run(None, {'X': X.astype(np.float32)})[0] + got = sess.run(None, {"X": X.astype(np.float32)})[0] assert_almost_equal(got, exp, decimal=4) - @unittest.skipIf(pv.Version(ortv) < pv.Version('0.5.0'), - reason="onnxruntime too old") + @unittest.skipIf( + pv.Version(ortv) < pv.Version("0.5.0"), reason="onnxruntime too old" + ) def test_sub_output_double(self): - data = load_iris() X = data.data dec = DecorrelateTransformer2() dec.fit(X) update_registered_converter( - DecorrelateTransformer2, "SklearnDecorrelateTransformer2", + DecorrelateTransformer2, + "SklearnDecorrelateTransformer2", decorrelate_transformer_shape_calculator, - decorrelate_transformer_convertor2) + decorrelate_transformer_convertor2, + ) onx = to_onnx(dec, X.astype(np.float64), target_opset=TARGET_OPSET) self.assertIn('output: "variable"', str(onx)) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) exp = dec.transform(X.astype(np.float64)) - got = sess.run(None, {'X': X.astype(np.float64)})[0] + got = sess.run(None, {"X": X.astype(np.float64)})[0] assert_almost_equal(got, exp, decimal=4) diff --git a/tests/test_algebra_symbolic.py b/tests/test_algebra_symbolic.py index 923bda0e7..f84a35974 100644 --- a/tests/test_algebra_symbolic.py +++ b/tests/test_algebra_symbolic.py @@ -5,179 +5,165 @@ import numpy from numpy.random import rand from numpy.testing import assert_almost_equal + try: from onnxruntime.capi.onnxruntime_pybind11_state import InvalidGraph, Fail except ImportError: InvalidGraph = RuntimeError Fail = RuntimeError from skl2onnx.common.data_types import FloatTensorType + try: from skl2onnx.algebra.onnx_ops import OnnxAbs, OnnxNormalizer, OnnxArgMin from skl2onnx.algebra.onnx_ops import OnnxSplitApi18, OnnxScaler except ImportError: - warnings.warn( - 'Unable to test OnnxAbs, OnnxNormalizer, OnnxArgMin, OnnxSplit.') + warnings.warn("Unable to test OnnxAbs, OnnxNormalizer, OnnxArgMin, OnnxSplit.") OnnxAbs = None from test_utils import TARGET_OPSET, InferenceSessionEx as InferenceSession class TestAlgebraSymbolic(unittest.TestCase): - @unittest.skipIf(TARGET_OPSET < 10, reason="not available") - @unittest.skipIf(OnnxAbs is None, - reason="Cannot infer operators with current ONNX") + @unittest.skipIf(OnnxAbs is None, reason="Cannot infer operators with current ONNX") def test_algebra_abs(self): - - op = OnnxAbs('I0', op_version=TARGET_OPSET) - onx = op.to_onnx({'I0': numpy.empty((1, 2), dtype=numpy.float32)}) + op = OnnxAbs("I0", op_version=TARGET_OPSET) + onx = op.to_onnx({"I0": numpy.empty((1, 2), dtype=numpy.float32)}) assert onx is not None try: sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) except RuntimeError as e: raise RuntimeError("Unable to read\n{}".format(onx)) from e X = numpy.array([[0, 1], [-1, -2]]) try: - Y = sess.run(None, {'I0': X.astype(numpy.float32)})[0] + Y = sess.run(None, {"I0": X.astype(numpy.float32)})[0] except RuntimeError as e: raise RuntimeError("Unable to run\n{}".format(onx)) from e assert_almost_equal(Y, numpy.abs(X)) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") - @unittest.skipIf(OnnxAbs is None, - reason="shape inference fails for Normalizer") + @unittest.skipIf(OnnxAbs is None, reason="shape inference fails for Normalizer") def test_algebra_normalizer(self): - op = OnnxNormalizer('I0', norm='L1', op_version=1, - output_names=['Y']) - onx = op.to_onnx({'I0': numpy.ones((1, 2), dtype=numpy.float32)}, - outputs=[('Y', FloatTensorType())], - target_opset={'': 10}) + op = OnnxNormalizer("I0", norm="L1", op_version=1, output_names=["Y"]) + onx = op.to_onnx( + {"I0": numpy.ones((1, 2), dtype=numpy.float32)}, + outputs=[("Y", FloatTensorType())], + target_opset={"": 10}, + ) assert onx is not None sonx = str(onx) assert "ai.onnx.ml" in sonx assert "version: 1" in sonx sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) X = numpy.array([[0, 2], [0, -2]]) exp = numpy.array([[0, 1], [0, -1]]) - Y = sess.run(None, {'I0': X.astype(numpy.float32)})[0] + Y = sess.run(None, {"I0": X.astype(numpy.float32)})[0] assert_almost_equal(exp, Y) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") - @unittest.skipIf(OnnxAbs is None, - reason="Cannot infer operators with current ONNX") + @unittest.skipIf(OnnxAbs is None, reason="Cannot infer operators with current ONNX") def test_algebra_normalizer_shape(self): - - op = OnnxNormalizer('I0', norm='L1', op_version=1, output_names=['O0']) - onx = op.to_onnx({'I0': numpy.ones((1, 2), dtype=numpy.float32)}, - outputs=[('O0', FloatTensorType((None, 2)))]) + op = OnnxNormalizer("I0", norm="L1", op_version=1, output_names=["O0"]) + onx = op.to_onnx( + {"I0": numpy.ones((1, 2), dtype=numpy.float32)}, + outputs=[("O0", FloatTensorType((None, 2)))], + ) assert onx is not None sonx = str(onx) assert "ai.onnx.ml" in sonx assert "version: 1" in sonx sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) X = numpy.array([[0, 2], [0, -2]]) exp = numpy.array([[0, 1], [0, -1]]) - Y = sess.run(None, {'I0': X.astype(numpy.float32)})[0] + Y = sess.run(None, {"I0": X.astype(numpy.float32)})[0] assert_almost_equal(exp, Y) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") - @unittest.skipIf(OnnxAbs is None, - reason="Cannot infer operators with current ONNX") + @unittest.skipIf(OnnxAbs is None, reason="Cannot infer operators with current ONNX") def test_algebra_argmin(self): - - op = OnnxArgMin('I0', op_version=TARGET_OPSET) - onx = op.to_onnx({'I0': numpy.ones((1, 2), dtype=numpy.float32)}) + op = OnnxArgMin("I0", op_version=TARGET_OPSET) + onx = op.to_onnx({"I0": numpy.ones((1, 2), dtype=numpy.float32)}) assert onx is not None sonx = str(onx) assert len(sonx) > 0 sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) X = numpy.array([[0, 2], [0, -2]]) exp = numpy.array([[0, 1]]) - Y = sess.run(None, {'I0': X.astype(numpy.float32)})[0] + Y = sess.run(None, {"I0": X.astype(numpy.float32)})[0] assert_almost_equal(exp, Y) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") - @unittest.skipIf(OnnxAbs is None, - reason="Cannot infer operators with current ONNX") + @unittest.skipIf(OnnxAbs is None, reason="Cannot infer operators with current ONNX") def test_algebra_normalizer_argmin_named_output(self): - op = OnnxArgMin( - OnnxNormalizer('I0', norm='L1', output_names=['Y']), - op_version=TARGET_OPSET) - onx = op.to_onnx({'I0': numpy.ones((1, 2), dtype=numpy.float32)}) + OnnxNormalizer("I0", norm="L1", output_names=["Y"]), op_version=TARGET_OPSET + ) + onx = op.to_onnx({"I0": numpy.ones((1, 2), dtype=numpy.float32)}) assert onx is not None sonx = str(onx) assert len(sonx) > 0 sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) X = numpy.array([[0, 2], [0, -2]]) exp = numpy.array([[0, 1]]) - Y = sess.run(None, {'I0': X.astype(numpy.float32)})[0] + Y = sess.run(None, {"I0": X.astype(numpy.float32)})[0] assert_almost_equal(exp, Y) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") - @unittest.skipIf(OnnxAbs is None, - reason="Cannot infer operators with current ONNX") + @unittest.skipIf(OnnxAbs is None, reason="Cannot infer operators with current ONNX") def test_algebra_normalizer_argmin(self): - - op = OnnxArgMin( - OnnxNormalizer( - 'I0', norm='L1'), - op_version=TARGET_OPSET) - onx = op.to_onnx({'I0': numpy.ones((1, 2), dtype=numpy.float32)}) + op = OnnxArgMin(OnnxNormalizer("I0", norm="L1"), op_version=TARGET_OPSET) + onx = op.to_onnx({"I0": numpy.ones((1, 2), dtype=numpy.float32)}) assert onx is not None sonx = str(onx) assert len(sonx) > 0 sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) X = numpy.array([[0, 2], [0, -2]]) exp = numpy.array([[0, 1]]) - Y = sess.run(None, {'I0': X.astype(numpy.float32)})[0] + Y = sess.run(None, {"I0": X.astype(numpy.float32)})[0] assert_almost_equal(exp, Y) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") - @unittest.skipIf(OnnxAbs is None, - reason="Cannot infer operators with current ONNX") + @unittest.skipIf(OnnxAbs is None, reason="Cannot infer operators with current ONNX") def test_algebra_split(self): - - op = OnnxSplitApi18('I0', axis=0, output_names=['O1', 'O2'], - op_version=TARGET_OPSET) - onx = op.to_onnx({'I0': numpy.arange(6, dtype=numpy.float32)}) + op = OnnxSplitApi18( + "I0", axis=0, output_names=["O1", "O2"], op_version=TARGET_OPSET + ) + onx = op.to_onnx({"I0": numpy.arange(6, dtype=numpy.float32)}) assert onx is not None sonx = str(onx) assert len(sonx) > 0 sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) X = numpy.arange(6) exp = [numpy.array([0, 1, 2]), numpy.array([3, 4, 5])] - Y = sess.run(None, {'I0': X.astype(numpy.float32)}) + Y = sess.run(None, {"I0": X.astype(numpy.float32)}) assert len(Y) == len(exp) assert_almost_equal(exp[0], Y[0]) assert_almost_equal(exp[1], Y[1]) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") - @unittest.skipIf(OnnxAbs is None, - reason="Cannot infer operators with current ONNX") + @unittest.skipIf(OnnxAbs is None, reason="Cannot infer operators with current ONNX") def test_cascade_scaler(self): - - def generate_onnx_graph(dim, nbnode, input_name='X1'): + def generate_onnx_graph(dim, nbnode, input_name="X1"): matrices = [] scale = list(numpy.ones((1, dim)).ravel()) i1 = input_name @@ -188,26 +174,27 @@ def generate_onnx_graph(dim, nbnode, input_name='X1'): i1 = node i2 = list(rand(1, dim).ravel()) matrices.append(i2) - node = OnnxScaler( - i1, offset=i2, scale=scale, output_names=['Y']) - onx = node.to_onnx([(input_name, FloatTensorType((None, dim)))], - outputs=[('Y', FloatTensorType((None, dim)))]) + node = OnnxScaler(i1, offset=i2, scale=scale, output_names=["Y"]) + onx = node.to_onnx( + [(input_name, FloatTensorType((None, dim)))], + outputs=[("Y", FloatTensorType((None, dim)))], + ) return onx, matrices import onnxruntime as ort + dim = 5 for nbnode in range(1, 4): onx = generate_onnx_graph(dim, nbnode)[0] X = rand(1, dim) try: sess = ort.InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) except InvalidGraph as e: - raise AssertionError( - "Loading error:\n{}\n{}".format(e, onx)) from e + raise AssertionError("Loading error:\n{}\n{}".format(e, onx)) from e try: - Y = sess.run(None, {'X1': X.astype(numpy.float32)})[0] + Y = sess.run(None, {"X1": X.astype(numpy.float32)})[0] except RuntimeError as e: raise RuntimeError("Run error:\n{}\n{}".format(e, onx)) assert X.shape == Y.shape diff --git a/tests/test_algebra_test_helper.py b/tests/test_algebra_test_helper.py index 42b208f3f..e01e75e69 100644 --- a/tests/test_algebra_test_helper.py +++ b/tests/test_algebra_test_helper.py @@ -5,22 +5,30 @@ from skl2onnx.proto import onnx_proto from skl2onnx.algebra.type_helper import _guess_type from skl2onnx.common.data_types import ( - FloatTensorType, Int64TensorType, - Int32TensorType, StringTensorType, - BooleanTensorType, DoubleTensorType, - Int8TensorType, UInt8TensorType, - guess_data_type, guess_numpy_type, _guess_numpy_type, - guess_proto_type, guess_tensor_type, _guess_type_proto) + FloatTensorType, + Int64TensorType, + Int32TensorType, + StringTensorType, + BooleanTensorType, + DoubleTensorType, + Int8TensorType, + UInt8TensorType, + guess_data_type, + guess_numpy_type, + _guess_numpy_type, + guess_proto_type, + guess_tensor_type, + _guess_type_proto, +) + try: - from skl2onnx.common.data_types import ( - Complex64TensorType, Complex128TensorType) + from skl2onnx.common.data_types import Complex64TensorType, Complex128TensorType except ImportError: Complex64TensorType = None Complex128TensorType = None class TestAlgebraTestHelper(unittest.TestCase): - def test_guess_type(self): dtypes = [ (np.int32, Int32TensorType), @@ -29,7 +37,7 @@ def test_guess_type(self): (np.str_, StringTensorType), (np.bool_, BooleanTensorType), (np.int8, Int8TensorType), - (np.uint8, UInt8TensorType) + (np.uint8, UInt8TensorType), ] if Complex64TensorType is not None: dtypes.append((np.complex64, Complex64TensorType)) @@ -47,12 +55,14 @@ def test_guess_type(self): dtypes = [np.float64] for dtype in dtypes: mat = np.zeros((3, 3), dtype=dtype) - _guess_type(mat, ) + _guess_type( + mat, + ) def test_guess_data_type(self): ty = guess_data_type(np.array([3, 5], dtype=np.int32)) self.assertEqual(len(ty), 1) - self.assertEqual(ty[0][0], 'input') + self.assertEqual(ty[0][0], "input") assert isinstance(ty[0][1], Int32TensorType) ty = guess_data_type("tensor(int32)", shape=[3, 5]) @@ -93,7 +103,7 @@ def test_guess_numpy_type(self): (np.str_, StringTensorType), (np.bool_, BooleanTensorType), (np.int8, Int8TensorType), - (np.uint8, UInt8TensorType) + (np.uint8, UInt8TensorType), ] if Complex64TensorType is not None: dtypes.append((np.complex64, Complex64TensorType)) @@ -116,14 +126,16 @@ def test_proto_type(self): (np.str_, StringTensorType, onnx_proto.TensorProto.STRING), (np.bool_, BooleanTensorType, onnx_proto.TensorProto.BOOL), (np.int8, Int8TensorType, onnx_proto.TensorProto.INT8), - (np.uint8, UInt8TensorType, onnx_proto.TensorProto.UINT8) + (np.uint8, UInt8TensorType, onnx_proto.TensorProto.UINT8), ] if Complex64TensorType is not None: - dtypes.append((np.complex64, Complex64TensorType, - onnx_proto.TensorProto.COMPLEX64)) + dtypes.append( + (np.complex64, Complex64TensorType, onnx_proto.TensorProto.COMPLEX64) + ) if Complex128TensorType is not None: - dtypes.append((np.complex128, Complex128TensorType, - onnx_proto.TensorProto.COMPLEX128)) + dtypes.append( + (np.complex128, Complex128TensorType, onnx_proto.TensorProto.COMPLEX128) + ) for dtype, exp, pt in dtypes: nt2 = guess_proto_type(exp([None, 1])) self.assertEqual(nt2, pt) @@ -137,14 +149,16 @@ def test_tensor_type(self): (np.float32, FloatTensorType, onnx_proto.TensorProto.FLOAT), (np.float64, DoubleTensorType, onnx_proto.TensorProto.DOUBLE), (np.int8, FloatTensorType, onnx_proto.TensorProto.INT8), - (np.uint8, FloatTensorType, onnx_proto.TensorProto.UINT8) + (np.uint8, FloatTensorType, onnx_proto.TensorProto.UINT8), ] if Complex64TensorType is not None: - dtypes.append((np.complex64, Complex64TensorType, - onnx_proto.TensorProto.COMPLEX64)) + dtypes.append( + (np.complex64, Complex64TensorType, onnx_proto.TensorProto.COMPLEX64) + ) if Complex128TensorType is not None: - dtypes.append((np.complex128, Complex128TensorType, - onnx_proto.TensorProto.COMPLEX128)) + dtypes.append( + (np.complex128, Complex128TensorType, onnx_proto.TensorProto.COMPLEX128) + ) for dtype, exp, pt in dtypes: nt2 = guess_tensor_type(exp([None, 1])) self.assertEqual(nt2.__class__, exp) diff --git a/tests/test_algebra_to_onnx.py b/tests/test_algebra_to_onnx.py index 21f7b6a85..0ea9d7f74 100644 --- a/tests/test_algebra_to_onnx.py +++ b/tests/test_algebra_to_onnx.py @@ -3,9 +3,14 @@ import numpy as np from onnx.defs import onnx_opset_version from onnxruntime import InferenceSession, __version__ as ort_version + try: from onnxruntime.capi.onnxruntime_pybind11_state import ( - InvalidGraph, Fail, InvalidArgument, NotImplemented) + InvalidGraph, + Fail, + InvalidArgument, + NotImplemented, + ) except ImportError: InvalidGraph = RuntimeError InvalidArgument = RuntimeError @@ -19,80 +24,84 @@ from sklearn.utils.testing import ignore_warnings from sklearn.linear_model import LinearRegression, LogisticRegression from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType -from skl2onnx.algebra.onnx_ops import ( - OnnxAdd, OnnxLinearRegressor, OnnxIdentity) +from skl2onnx.algebra.onnx_ops import OnnxAdd, OnnxLinearRegressor, OnnxIdentity from skl2onnx.algebra.onnx_operator import OnnxSubEstimator from skl2onnx.proto import get_latest_tested_opset_version from test_utils import TARGET_OPSET -ort_version = ort_version.split('+')[0] +ort_version = ort_version.split("+")[0] class TestOnnxOperatorsToOnnx(unittest.TestCase): - @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @ignore_warnings(category=DeprecationWarning) def test_onnx_ml(self): def generate_onnx_graph(opv): - node = OnnxAdd(('X1', FloatTensorType()), - np.array([0.1], dtype=np.float32), - op_version=opv) + node = OnnxAdd( + ("X1", FloatTensorType()), + np.array([0.1], dtype=np.float32), + op_version=opv, + ) out = OnnxLinearRegressor( - node, coefficients=[0.3, 0.3, 0.4, 0.5, 0.6], - intercepts=[-50.], op_version=1) - last = OnnxIdentity(out, output_names=['Y'], op_version=opv) - onx = last.to_onnx([('X1', FloatTensorType((None, 5)))], - outputs=[('Y', FloatTensorType())], - target_opset=opv) + node, + coefficients=[0.3, 0.3, 0.4, 0.5, 0.6], + intercepts=[-50.0], + op_version=1, + ) + last = OnnxIdentity(out, output_names=["Y"], op_version=opv) + onx = last.to_onnx( + [("X1", FloatTensorType((None, 5)))], + outputs=[("Y", FloatTensorType())], + target_opset=opv, + ) return onx, (node, out, last) - for opv in [{'': 10}] + list(range(9, TARGET_OPSET + 1)): + for opv in [{"": 10}] + list(range(9, TARGET_OPSET + 1)): with self.subTest(opv=opv): if isinstance(opv, dict): - if opv[''] > get_latest_tested_opset_version(): + if opv[""] > get_latest_tested_opset_version(): continue - elif (opv is not None and - opv > get_latest_tested_opset_version()): + elif opv is not None and opv > get_latest_tested_opset_version(): continue for i, nbnode in enumerate((1, 2, 3, 100)): onx, nodes = generate_onnx_graph(opv=opv) - if opv == {'': 10}: + if opv == {"": 10}: for im in onx.opset_import: if im.version > 10: raise AssertionError( - "Wrong final opset\nopv={}\n{}".format( - opv, onx)) + "Wrong final opset\nopv={}\n{}".format(opv, onx) + ) else: for im in onx.opset_import: if im.version > opv: raise AssertionError( - "Wrong final opset\nopv={}\n{}".format( - opv, onx)) + "Wrong final opset\nopv={}\n{}".format(opv, onx) + ) as_string = onx.SerializeToString() try: ort = InferenceSession( - as_string, - providers=["CPUExecutionProvider"]) + as_string, providers=["CPUExecutionProvider"] + ) except (InvalidGraph, InvalidArgument) as e: - if (isinstance(opv, dict) and - opv[''] >= onnx_opset_version()): + if isinstance(opv, dict) and opv[""] >= onnx_opset_version(): continue - if (isinstance(opv, int) and - opv >= onnx_opset_version()): + if isinstance(opv, int) and opv >= onnx_opset_version(): continue raise AssertionError( - "Unable to load opv={}\n---\n{}\n---".format( - opv, onx)) from e + "Unable to load opv={}\n---\n{}\n---".format(opv, onx) + ) from e X = (np.ones((1, 5)) * nbnode).astype(np.float32) - res_out = ort.run(None, {'X1': X}) + res_out = ort.run(None, {"X1": X}) assert len(res_out) == 1 res = res_out[0] self.assertEqual(res.shape, (1, 1)) inputs = None - expected = [[('Ad_C0', FloatTensorType(shape=[]))], - [('Li_Y0', FloatTensorType(shape=[]))], - [('Y', FloatTensorType(shape=[]))]] + expected = [ + [("Ad_C0", FloatTensorType(shape=[]))], + [("Li_Y0", FloatTensorType(shape=[]))], + [("Y", FloatTensorType(shape=[]))], + ] for i, node in enumerate(nodes): shape = node.get_output_type_inference(inputs) self.assertEqual(len(shape), 1) @@ -101,71 +110,70 @@ def generate_onnx_graph(opv): else: self.assertEqual( str(expected[i]), - str([(shape[0].onnx_name, shape[0].type)])) + str([(shape[0].onnx_name, shape[0].type)]), + ) inputs = shape - def common_test_sub_graph(self, first_input, model, options=None, - cls_type=FloatTensorType, start=9): + def common_test_sub_graph( + self, first_input, model, options=None, cls_type=FloatTensorType, start=9 + ): def generate_onnx_graph(opv): dtype = np.float32 if cls_type == FloatTensorType else np.float64 - node = OnnxAdd(first_input, np.array([0.1], dtype=dtype), - op_version=opv) + node = OnnxAdd(first_input, np.array([0.1], dtype=dtype), op_version=opv) lr = model() lr.fit(np.ones([10, 5]), np.arange(0, 10) % 3) out = OnnxSubEstimator(lr, node, op_version=1, options=options) if model == LogisticRegression: - last = OnnxIdentity(out[1], output_names=['Y'], op_version=opv) + last = OnnxIdentity(out[1], output_names=["Y"], op_version=opv) else: - last = OnnxIdentity(out, output_names=['Y'], op_version=opv) - onx = last.to_onnx([('X1', cls_type((None, 5)))], - outputs=[('Y', cls_type())], - target_opset=opv) + last = OnnxIdentity(out, output_names=["Y"], op_version=opv) + onx = last.to_onnx( + [("X1", cls_type((None, 5)))], + outputs=[("Y", cls_type())], + target_opset=opv, + ) return onx dtype = np.float32 if cls_type == FloatTensorType else np.float64 opsets = list(range(start, TARGET_OPSET + 1)) - for opv in [{'': TARGET_OPSET}] + opsets: + for opv in [{"": TARGET_OPSET}] + opsets: with self.subTest(opv=opv): if isinstance(opv, dict): - if opv[''] > get_latest_tested_opset_version(): + if opv[""] > get_latest_tested_opset_version(): continue - elif (opv is not None and - opv > get_latest_tested_opset_version()): + elif opv is not None and opv > get_latest_tested_opset_version(): continue for i, nbnode in enumerate((1, 2, 3, 100)): onx = generate_onnx_graph(opv=opv) - if opv == {'': TARGET_OPSET}: + if opv == {"": TARGET_OPSET}: for im in onx.opset_import: if im.version > TARGET_OPSET: raise AssertionError( - "Wrong final opset\nopv={}\n{}".format( - opv, onx)) + "Wrong final opset\nopv={}\n{}".format(opv, onx) + ) else: for im in onx.opset_import: if im.version > opv: raise AssertionError( - "Wrong final opset\nopv={}\n{}".format( - opv, onx)) - self.assertNotIn('zipmap', str(onx).lower()) + "Wrong final opset\nopv={}\n{}".format(opv, onx) + ) + self.assertNotIn("zipmap", str(onx).lower()) as_string = onx.SerializeToString() try: ort = InferenceSession( - as_string, - providers=["CPUExecutionProvider"]) - except (InvalidGraph, InvalidArgument, Fail, - NotImplemented) as e: - if (isinstance(opv, dict) and - opv[''] >= onnx_opset_version()): + as_string, providers=["CPUExecutionProvider"] + ) + except (InvalidGraph, InvalidArgument, Fail, NotImplemented) as e: + if isinstance(opv, dict) and opv[""] >= onnx_opset_version(): continue - if (isinstance(opv, int) and - opv >= onnx_opset_version()): + if isinstance(opv, int) and opv >= onnx_opset_version(): continue raise AssertionError( - "Unable to load opv={}\n---\n{}\n---".format( - opv, onx)) from e + "Unable to load opv={}\n---\n{}\n---".format(opv, onx) + ) from e X = (np.ones((1, 5)) * nbnode).astype(dtype) - res_out = ort.run(None, {'X1': X}) + res_out = ort.run(None, {"X1": X}) assert len(res_out) == 1 res = res_out[0] if model == LogisticRegression: @@ -176,72 +184,78 @@ def generate_onnx_graph(opv): @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @ignore_warnings(category=DeprecationWarning) def test_sub_graph_tuple(self): - self.common_test_sub_graph( - ('X1', FloatTensorType()), LinearRegression) + self.common_test_sub_graph(("X1", FloatTensorType()), LinearRegression) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @unittest.skipIf( - pv.Version(ort_version) < pv.Version("1.4.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("1.4.0"), reason="not available" + ) @ignore_warnings(category=DeprecationWarning) def test_sub_graph_tuple_double(self): self.common_test_sub_graph( - ('X1', DoubleTensorType()), LinearRegression, - cls_type=DoubleTensorType) + ("X1", DoubleTensorType()), LinearRegression, cls_type=DoubleTensorType + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @ignore_warnings(category=DeprecationWarning) def test_sub_graph_str(self): - self.common_test_sub_graph('X1', LinearRegression) + self.common_test_sub_graph("X1", LinearRegression) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @unittest.skipIf( - pv.Version(ort_version) < pv.Version("1.4.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("1.4.0"), reason="not available" + ) @ignore_warnings(category=DeprecationWarning) def test_sub_graph_str_double(self): - self.common_test_sub_graph('X1', LinearRegression, - cls_type=DoubleTensorType) + self.common_test_sub_graph("X1", LinearRegression, cls_type=DoubleTensorType) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @ignore_warnings(category=DeprecationWarning) def test_sub_graph_tuple_cls(self): self.common_test_sub_graph( - ('X1', FloatTensorType()), LogisticRegression, - {'zipmap': False}) + ("X1", FloatTensorType()), LogisticRegression, {"zipmap": False} + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @unittest.skipIf( - pv.Version(ort_version) < pv.Version("1.4.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("1.4.0"), reason="not available" + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version("1.10.0"), - reason="ArgMax not available for double") + reason="ArgMax not available for double", + ) @ignore_warnings(category=DeprecationWarning) def test_sub_graph_tuple_cls_double(self): self.common_test_sub_graph( - ('X1', DoubleTensorType()), LogisticRegression, - options={'zipmap': False}, cls_type=DoubleTensorType, - start=13) + ("X1", DoubleTensorType()), + LogisticRegression, + options={"zipmap": False}, + cls_type=DoubleTensorType, + start=13, + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @ignore_warnings(category=DeprecationWarning) def test_sub_graph_str_cls(self): - self.common_test_sub_graph('X1', LogisticRegression, - {'zipmap': False}) + self.common_test_sub_graph("X1", LogisticRegression, {"zipmap": False}) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @unittest.skipIf( - pv.Version(ort_version) < pv.Version("1.4.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("1.4.0"), reason="not available" + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version("1.10.0"), - reason="ArgMax not available for double") + reason="ArgMax not available for double", + ) @ignore_warnings(category=DeprecationWarning) def test_sub_graph_str_cls_double(self): self.common_test_sub_graph( - 'X1', LogisticRegression, options={'zipmap': False}, - cls_type=DoubleTensorType, start=13) + "X1", + LogisticRegression, + options={"zipmap": False}, + cls_type=DoubleTensorType, + start=13, + ) if __name__ == "__main__": diff --git a/tests/test_convert.py b/tests/test_convert.py index 098be8c8a..f672ebfa5 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -6,6 +6,7 @@ import numpy from sklearn.preprocessing import OneHotEncoder, LabelEncoder from sklearn.cluster import KMeans + try: from sklearn.preprocessing import KBinsDiscretizer except ImportError: @@ -18,24 +19,21 @@ def get_domain_opset(onx): domains = onx.opset_import - res = [{'domain': dom.domain, 'version': dom.version} - for dom in domains] - return {d['domain']: d['version'] for d in res} + res = [{"domain": dom.domain, "version": dom.version} for dom in domains] + return {d["domain"]: d["version"] for d in res} class TestConvert(unittest.TestCase): - def test_target_opset(self): data = load_iris() X = data.data model = KMeans(n_clusters=3) model.fit(X) for i in range(1, TARGET_OPSET + 1): - model_onnx = to_onnx(model, X[:1].astype(numpy.float32), - target_opset=i) + model_onnx = to_onnx(model, X[:1].astype(numpy.float32), target_opset=i) dom = get_domain_opset(model_onnx) self.assertEqual(len(dom), 1) - assert dom[''] <= i + assert dom[""] <= i def test_target_opset_dict(self): data = load_iris() @@ -44,12 +42,13 @@ def test_target_opset_dict(self): model.fit(X) for i in range(1, TARGET_OPSET + 1): for j in (1, 2): - tops = {'': i, 'ai.onnx.ml': j} - model_onnx = to_onnx(model, X[:1].astype(numpy.float32), - target_opset=tops) + tops = {"": i, "ai.onnx.ml": j} + model_onnx = to_onnx( + model, X[:1].astype(numpy.float32), target_opset=tops + ) dom = get_domain_opset(model_onnx) self.assertEqual(len(dom), 1) - assert dom[''] <= i + assert dom[""] <= i @unittest.skipIf(KBinsDiscretizer is None, "skl too old") def test_target_opset_dict_kbins(self): @@ -59,15 +58,16 @@ def test_target_opset_dict_kbins(self): model.fit(X) for i in range(9, TARGET_OPSET + 1): for j in (1, 2): - tops = {'': i, 'ai.onnx.ml': j} - model_onnx = to_onnx(model, X[:1].astype(numpy.float32), - target_opset=tops) + tops = {"": i, "ai.onnx.ml": j} + model_onnx = to_onnx( + model, X[:1].astype(numpy.float32), target_opset=tops + ) dom = get_domain_opset(model_onnx) - if dom != {'ai.onnx.ml': 1, '': i}: - assert dom[''] <= i - assert dom['ai.onnx.ml'] == 1 + if dom != {"ai.onnx.ml": 1, "": i}: + assert dom[""] <= i + assert dom["ai.onnx.ml"] == 1 continue - self.assertEqual(dom, {'ai.onnx.ml': 1, '': i}) + self.assertEqual(dom, {"ai.onnx.ml": 1, "": i}) def test_regressor(self): data = load_iris() @@ -77,43 +77,42 @@ def test_regressor(self): model.fit(X, y) for i in range(9, TARGET_OPSET + 1): for j in (1, 2): - tops = {'': i, 'ai.onnx.ml': j} - model_onnx = to_onnx(model, X[:1].astype(numpy.float32), - target_opset=tops) + tops = {"": i, "ai.onnx.ml": j} + model_onnx = to_onnx( + model, X[:1].astype(numpy.float32), target_opset=tops + ) dom = get_domain_opset(model_onnx) self.assertEqual(len(dom), 1) - self.assertIn(dom[''], (i, i - 1)) + self.assertIn(dom[""], (i, i - 1)) def test_onehot(self): try: - model = OneHotEncoder(categories='auto') + model = OneHotEncoder(categories="auto") except TypeError: # parameter categories added in 0.20 return - data = numpy.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], - dtype=numpy.int64) + data = numpy.array( + [[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=numpy.int64 + ) model.fit(data) for i in range(9, TARGET_OPSET + 1): for j in (1, 2): - tops = {'': i, 'ai.onnx.ml': j} - model_onnx = to_onnx(model, data[:1], - target_opset=tops) + tops = {"": i, "ai.onnx.ml": j} + model_onnx = to_onnx(model, data[:1], target_opset=tops) dom = get_domain_opset(model_onnx) self.assertEqual(len(dom), 2) - self.assertIn(dom[''], list(range(9, TARGET_OPSET + 1))) - self.assertEqual(dom['ai.onnx.ml'], 1) + self.assertIn(dom[""], list(range(9, TARGET_OPSET + 1))) + self.assertEqual(dom["ai.onnx.ml"], 1) def test_label_encoder(self): model = LabelEncoder() - data = numpy.array([1.2, 3.4, 5.4, 1.2], - dtype=numpy.float32) + data = numpy.array([1.2, 3.4, 5.4, 1.2], dtype=numpy.float32) model.fit(data) for i in range(9, TARGET_OPSET + 1): for j in (1, 2): - tops = {'': i, 'ai.onnx.ml': j} + tops = {"": i, "ai.onnx.ml": j} try: - model_onnx = to_onnx(model, data[:1], - target_opset=tops) + model_onnx = to_onnx(model, data[:1], target_opset=tops) except RuntimeError as e: if j == 1: # expected @@ -123,7 +122,7 @@ def test_label_encoder(self): raise AssertionError("It should fail for opset.ml == 1") dom = get_domain_opset(model_onnx) self.assertEqual(len(dom), 2) - self.assertEqual(dom['ai.onnx.ml'], 2) + self.assertEqual(dom["ai.onnx.ml"], 2) def test_warnings(self): with warnings.catch_warnings(record=True) as w: @@ -143,35 +142,46 @@ def test_name(self): model.fit(X) with self.assertRaises(TypeError): - to_onnx(model, X[:1].astype(numpy.float32), - target_opset=TARGET_OPSET, naming=(2, 3)) - - model_onnx = to_onnx(model, X[:1].astype(numpy.float32), - target_opset=TARGET_OPSET, naming='KBINS') + to_onnx( + model, + X[:1].astype(numpy.float32), + target_opset=TARGET_OPSET, + naming=(2, 3), + ) + + model_onnx = to_onnx( + model, + X[:1].astype(numpy.float32), + target_opset=TARGET_OPSET, + naming="KBINS", + ) inputs = set(i.name for i in model_onnx.graph.input) outputs = set(o.name for o in model_onnx.graph.output) for node in model_onnx.graph.node: for i in node.input: - if i not in inputs and not i.startswith('KBINS'): + if i not in inputs and not i.startswith("KBINS"): raise AssertionError("Wrong %r." % i) for o in node.output: - if o not in outputs and not o.startswith('KBINS'): + if o not in outputs and not o.startswith("KBINS"): raise AssertionError("Wrong %r." % o) - model_onnx = to_onnx(model, X[:1].astype(numpy.float32), - target_opset=TARGET_OPSET, - naming=lambda n, ns: 'FBINS' + n) + model_onnx = to_onnx( + model, + X[:1].astype(numpy.float32), + target_opset=TARGET_OPSET, + naming=lambda n, ns: "FBINS" + n, + ) inputs = set(i.name for i in model_onnx.graph.input) outputs = set(o.name for o in model_onnx.graph.output) for node in model_onnx.graph.node: for i in node.input: - if i not in inputs and not i.startswith('FBINS'): + if i not in inputs and not i.startswith("FBINS"): raise AssertionError("Wrong %r." % i) for o in node.output: - if o not in outputs and not o.startswith('FBINS'): + if o not in outputs and not o.startswith("FBINS"): raise AssertionError("Wrong %r." % o) - self.assertEqual(inputs, {'X'}) - self.assertEqual(outputs, {'variable'}) + self.assertEqual(inputs, {"X"}) + self.assertEqual(outputs, {"variable"}) if __name__ == "__main__": diff --git a/tests/test_convert_options.py b/tests/test_convert_options.py index 7015f4408..77c6ca04e 100644 --- a/tests/test_convert_options.py +++ b/tests/test_convert_options.py @@ -13,6 +13,7 @@ from sklearn.multioutput import MultiOutputClassifier from sklearn.tree import DecisionTreeClassifier from skl2onnx import to_onnx + try: from sklearn.utils._testing import ignore_warnings except ImportError: @@ -21,11 +22,10 @@ from test_utils import TARGET_OPSET, InferenceSessionEx as InferenceSession -sklver = '.'.join(sklver.split('.')[:2]) +sklver = ".".join(sklver.split(".")[:2]) class TestConvertOptions(unittest.TestCase): - @staticmethod def get_model_classifiers(): models = [ @@ -67,119 +67,146 @@ def dict_to_array(proba_as_dict): @staticmethod def almost_equal( - expected_label, expected_proba, - label, probas, zipmap=False, decimal=5): + expected_label, expected_proba, label, probas, zipmap=False, decimal=5 + ): if expected_label.tolist() != label.tolist(): raise AssertionError( - "Label mismatch %r (expected) != %r." % ( - expected_label.tolist(), - label.tolist())) + "Label mismatch %r (expected) != %r." + % (expected_label.tolist(), label.tolist()) + ) if zipmap: - raise AssertionError( - "zipmap should be False, not %r." % zipmap) + raise AssertionError("zipmap should be False, not %r." % zipmap) assert_almost_equal(expected_proba, probas, decimal=decimal) @staticmethod def almost_equal_class_labels( - expected_label, expected_proba, expected_class_labels, - label, probas, class_labels, - zipmap=False, decimal=5): + expected_label, + expected_proba, + expected_class_labels, + label, + probas, + class_labels, + zipmap=False, + decimal=5, + ): if expected_class_labels.tolist() != class_labels.tolist(): raise AssertionError( - "Class labels mismatch %r (expected) != %r." % ( - expected_class_labels.tolist(), - class_labels.tolist())) + "Class labels mismatch %r (expected) != %r." + % (expected_class_labels.tolist(), class_labels.tolist()) + ) if expected_label.tolist() != label.tolist(): raise AssertionError( - "Label mismatch %r (expected) != %r." % ( - expected_label.tolist(), - label.tolist())) + "Label mismatch %r (expected) != %r." + % (expected_label.tolist(), label.tolist()) + ) if zipmap: - raise AssertionError( - "zipmap should be False, not %r." % zipmap) + raise AssertionError("zipmap should be False, not %r." % zipmap) assert_almost_equal(expected_proba, probas, decimal=decimal) def classifier_option_output_class_labels(self, use_string): data = load_iris() X, y = data.data, data.target if use_string: - y = ['cl%d' % _ for _ in y] + y = ["cl%d" % _ for _ in y] X = X.astype(numpy.float32) - X_train, X_test, y_train, y_test = train_test_split( - X, y, random_state=42) + X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) for zipmap, addcl in [(False, True), (False, False)]: for cls in TestConvertOptions.get_model_classifiers(): - with self.subTest(cls=cls.__class__.__name__, zipmap=zipmap, - output_class_labels=addcl): + with self.subTest( + cls=cls.__class__.__name__, zipmap=zipmap, output_class_labels=addcl + ): cls.fit(X_train, y_train) expected_label = cls.predict(X_test) expected_proba = cls.predict_proba(X_test) onx = to_onnx( - cls, X[:1], options={ - 'zipmap': zipmap, 'output_class_labels': addcl}, - target_opset=TARGET_OPSET) + cls, + X[:1], + options={"zipmap": zipmap, "output_class_labels": addcl}, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': X_test}) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": X_test}) if addcl: TestConvertOptions.almost_equal_class_labels( - expected_label, expected_proba, cls.classes_, - *got, zipmap=zipmap) + expected_label, + expected_proba, + cls.classes_, + *got, + zipmap=zipmap + ) else: TestConvertOptions.almost_equal( - expected_label, expected_proba, - *got, zipmap=zipmap) + expected_label, expected_proba, *got, zipmap=zipmap + ) onx = to_onnx( - cls, X[:1], - options={cls.__class__: { - 'zipmap': zipmap, 'output_class_labels': addcl}}, - target_opset=TARGET_OPSET) + cls, + X[:1], + options={ + cls.__class__: { + "zipmap": zipmap, + "output_class_labels": addcl, + } + }, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': X_test}) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": X_test}) if addcl: TestConvertOptions.almost_equal_class_labels( - expected_label, expected_proba, cls.classes_, - *got, zipmap=zipmap) + expected_label, + expected_proba, + cls.classes_, + *got, + zipmap=zipmap + ) else: TestConvertOptions.almost_equal( - expected_label, expected_proba, - *got, zipmap=zipmap) + expected_label, expected_proba, *got, zipmap=zipmap + ) onx = to_onnx( - cls, X[:1], - options={id(cls): { - 'zipmap': zipmap, 'output_class_labels': addcl}}, - target_opset=TARGET_OPSET) + cls, + X[:1], + options={ + id(cls): {"zipmap": zipmap, "output_class_labels": addcl} + }, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': X_test}) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": X_test}) if addcl: TestConvertOptions.almost_equal_class_labels( - expected_label, expected_proba, cls.classes_, - *got, zipmap=zipmap) + expected_label, + expected_proba, + cls.classes_, + *got, + zipmap=zipmap + ) else: TestConvertOptions.almost_equal( - expected_label, expected_proba, - *got, zipmap=zipmap) + expected_label, expected_proba, *got, zipmap=zipmap + ) - @unittest.skipIf(pv.Version(sklver) < pv.Version("0.24"), - reason="known issue with string") - @ignore_warnings(category=(FutureWarning, ConvergenceWarning, - DeprecationWarning)) + @unittest.skipIf( + pv.Version(sklver) < pv.Version("0.24"), reason="known issue with string" + ) + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) def test_classifier_option_output_class_labels_int64(self): self.classifier_option_output_class_labels(False) - @unittest.skipIf(pv.Version(sklver) < pv.Version("0.24"), - reason="known issue with string") - @ignore_warnings(category=(FutureWarning, ConvergenceWarning, - DeprecationWarning)) + @unittest.skipIf( + pv.Version(sklver) < pv.Version("0.24"), reason="known issue with string" + ) + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) def test_classifier_option_output_class_labels_str(self): self.classifier_option_output_class_labels(True) @@ -191,28 +218,29 @@ def get_model_multi_label(): return models @staticmethod - def almost_equal_multi(expected_label, expected_proba, label, *probas, - zipmap=False, decimal=5): + def almost_equal_multi( + expected_label, expected_proba, label, *probas, zipmap=False, decimal=5 + ): assert_almost_equal(expected_label, label) - if zipmap == 'columns': + if zipmap == "columns": for row, pr in zip(expected_proba.T, probas): - assert_almost_equal( - row.ravel(), pr.ravel(), decimal=decimal) + assert_almost_equal(row.ravel(), pr.ravel(), decimal=decimal) elif zipmap: for expected, proba in zip(expected_proba, probas): assert_almost_equal( expected_proba, TestConvertOptions.dict_to_array(proba), - decimal=decimal) + decimal=decimal, + ) else: proba = probas[0] assert_almost_equal(expected_proba, proba, decimal=decimal) - @unittest.skipIf(pv.Version(sklver) < pv.Version("0.24"), - reason="known issue with string") - @ignore_warnings(category=(FutureWarning, ConvergenceWarning, - DeprecationWarning)) + @unittest.skipIf( + pv.Version(sklver) < pv.Version("0.24"), reason="known issue with string" + ) + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) def test_multi_label_option_zipmap(self): data = load_iris() X, y = data.data, data.target @@ -221,22 +249,30 @@ def test_multi_label_option_zipmap(self): y[0, :] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y) - for zipmap in [False, True, 'columns']: + for zipmap in [False, True, "columns"]: for cls in TestConvertOptions.get_model_multi_label(): with self.subTest(cls=cls.__class__, zipmap=zipmap): cls.fit(X_train, y_train) expected_label = cls.predict(X_test) expected_proba = cls.predict_proba(X_test) - if zipmap == 'columns': + if zipmap == "columns": # Not implemented. with self.assertRaises(ValueError): - to_onnx(cls, X[:1], options={'zipmap': zipmap}, - target_opset=TARGET_OPSET) + to_onnx( + cls, + X[:1], + options={"zipmap": zipmap}, + target_opset=TARGET_OPSET, + ) continue - onx = to_onnx(cls, X[:1], options={'zipmap': zipmap}, - target_opset=TARGET_OPSET) + onx = to_onnx( + cls, + X[:1], + options={"zipmap": zipmap}, + target_opset=TARGET_OPSET, + ) if zipmap: # The converter works but SequenceConstruct @@ -244,52 +280,58 @@ def test_multi_label_option_zipmap(self): continue sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': X_test}) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": X_test}) TestConvertOptions.almost_equal_multi( - expected_label, expected_proba, *got, zipmap=zipmap) + expected_label, expected_proba, *got, zipmap=zipmap + ) onx = to_onnx( - cls, X[:1], - options={cls.__class__: {'zipmap': zipmap}}, - target_opset=TARGET_OPSET) + cls, + X[:1], + options={cls.__class__: {"zipmap": zipmap}}, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': X_test}) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": X_test}) assert_almost_equal(expected_label, got[0]) onx = to_onnx( - cls, X[:1], - options={id(cls): {'zipmap': zipmap}}, - target_opset=TARGET_OPSET) + cls, + X[:1], + options={id(cls): {"zipmap": zipmap}}, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': X_test}) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": X_test}) assert_almost_equal(expected_label, got[0]) @staticmethod def almost_equal_multi_labels( - expected_label, expected_proba, expected_class_labels, - *probas, decimal=5): + expected_label, expected_proba, expected_class_labels, *probas, decimal=5 + ): if expected_label.tolist() != probas[0].tolist(): raise AssertionError( - "Labels mismatched %r != %r." % ( - expected_label.tolist(), probas[0].tolist())) + "Labels mismatched %r != %r." + % (expected_label.tolist(), probas[0].tolist()) + ) for pr1, pr2 in zip(expected_proba, probas[1]): assert_almost_equal(pr1, pr2, decimal=decimal) for la1, la2 in zip(expected_class_labels, probas[2]): if la1.tolist() != la2.tolist(): raise AssertionError( - "Class labels mismatched %r != %r." % ( - la1.tolist(), la2.tolist())) + "Class labels mismatched %r != %r." % (la1.tolist(), la2.tolist()) + ) - @unittest.skipIf(pv.Version(sklver) < pv.Version("0.24"), - reason="known issue with string") - @ignore_warnings(category=(FutureWarning, ConvergenceWarning, - DeprecationWarning)) + @unittest.skipIf( + pv.Version(sklver) < pv.Version("0.24"), reason="known issue with string" + ) + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) def test_multi_label_option_zipmap_class_labels(self): data = load_iris() X, y = data.data, data.target @@ -305,48 +347,47 @@ def test_multi_label_option_zipmap_class_labels(self): expected_label = cls.predict(X_test) expected_proba = cls.predict_proba(X_test) expected_class_labels = [c.classes_ for c in cls.estimators_] - opts = {'zipmap': False, 'output_class_labels': True} + opts = {"zipmap": False, "output_class_labels": True} - onx = to_onnx(cls, X[:1], options=opts, - target_opset=TARGET_OPSET) + onx = to_onnx(cls, X[:1], options=opts, target_opset=TARGET_OPSET) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': X_test}) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": X_test}) self.assertEqual(len(got), 3) TestConvertOptions.almost_equal_multi_labels( - expected_label, expected_proba, expected_class_labels, - *got) + expected_label, expected_proba, expected_class_labels, *got + ) onx = to_onnx( - cls, X[:1], options={cls.__class__: opts}, - target_opset=TARGET_OPSET) + cls, X[:1], options={cls.__class__: opts}, target_opset=TARGET_OPSET + ) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': X_test}) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": X_test}) self.assertEqual(len(got), 3) TestConvertOptions.almost_equal_multi_labels( - expected_label, expected_proba, expected_class_labels, - *got) + expected_label, expected_proba, expected_class_labels, *got + ) onx = to_onnx( - cls, X[:1], options={id(cls): opts}, - target_opset=TARGET_OPSET) + cls, X[:1], options={id(cls): opts}, target_opset=TARGET_OPSET + ) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': X_test}) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": X_test}) self.assertEqual(len(got), 3) TestConvertOptions.almost_equal_multi_labels( - expected_label, expected_proba, expected_class_labels, - *got) + expected_label, expected_proba, expected_class_labels, *got + ) - @unittest.skipIf(pv.Version(sklver) < pv.Version("0.24"), - reason="known issue with string") - @ignore_warnings(category=(FutureWarning, ConvergenceWarning, - DeprecationWarning)) + @unittest.skipIf( + pv.Version(sklver) < pv.Version("0.24"), reason="known issue with string" + ) + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) def test_multi_label_option_zipmap_class_labels_string(self): data = load_iris() X, y = data.data, data.target @@ -354,8 +395,7 @@ def test_multi_label_option_zipmap_class_labels_string(self): y = numpy.vstack([y, 1 - y]).T y[0, :] = 1 y[:10, 1] = 3 - y = numpy.array(list(map( - lambda s: "cl%d" % s, y.ravel()))).reshape(y.shape) + y = numpy.array(list(map(lambda s: "cl%d" % s, y.ravel()))).reshape(y.shape) X_train, X_test, y_train, y_test = train_test_split(X, y) for cls in TestConvertOptions.get_model_multi_label(): @@ -364,44 +404,43 @@ def test_multi_label_option_zipmap_class_labels_string(self): expected_label = cls.predict(X_test) expected_proba = cls.predict_proba(X_test) expected_class_labels = [c.classes_ for c in cls.estimators_] - opts = {'zipmap': False, 'output_class_labels': True} + opts = {"zipmap": False, "output_class_labels": True} - onx = to_onnx(cls, X[:1], options=opts, - target_opset=TARGET_OPSET) + onx = to_onnx(cls, X[:1], options=opts, target_opset=TARGET_OPSET) # with open("debugmo2.onnx", "wb") as f: # f.write(onx.SerializeToString()) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': X_test}) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": X_test}) self.assertEqual(len(got), 3) TestConvertOptions.almost_equal_multi_labels( - expected_label, expected_proba, expected_class_labels, - *got) + expected_label, expected_proba, expected_class_labels, *got + ) onx = to_onnx( - cls, X[:1], options={cls.__class__: opts}, - target_opset=TARGET_OPSET) + cls, X[:1], options={cls.__class__: opts}, target_opset=TARGET_OPSET + ) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': X_test}) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": X_test}) self.assertEqual(len(got), 3) TestConvertOptions.almost_equal_multi_labels( - expected_label, expected_proba, expected_class_labels, - *got) + expected_label, expected_proba, expected_class_labels, *got + ) onx = to_onnx( - cls, X[:1], options={id(cls): opts}, - target_opset=TARGET_OPSET) + cls, X[:1], options={id(cls): opts}, target_opset=TARGET_OPSET + ) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': X_test}) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": X_test}) self.assertEqual(len(got), 3) TestConvertOptions.almost_equal_multi_labels( - expected_label, expected_proba, expected_class_labels, - *got) + expected_label, expected_proba, expected_class_labels, *got + ) if __name__ == "__main__": diff --git a/tests/test_custom_transformer_ordwoe.py b/tests/test_custom_transformer_ordwoe.py index 5429ba940..ea98ba79d 100644 --- a/tests/test_custom_transformer_ordwoe.py +++ b/tests/test_custom_transformer_ordwoe.py @@ -29,10 +29,10 @@ def fit(self, X, y, sample_weight=None): self.encoder_ = OrdinalEncoder().fit(X) tr = self.encoder_.transform(X) maxi = (tr.max(axis=1) + 1).astype(np.int64) - intervals = [[(i - 1, i, False, True) for i in range(0, m)] - for m in maxi] - weights = [[10 * j + i for i in range(len(inter))] - for j, inter in enumerate(intervals)] + intervals = [[(i - 1, i, False, True) for i in range(0, m)] for m in maxi] + weights = [ + [10 * j + i for i in range(len(inter))] for j, inter in enumerate(intervals) + ] self.woe_ = WOETransformer(intervals, onehot=False, weights=weights) self.woe_.fit(tr) return self @@ -42,25 +42,22 @@ def transform(self, X): return self.woe_.transform(tr) -def ordwoe_encoder_parser( - scope, model, inputs, custom_parsers=None): +def ordwoe_encoder_parser(scope, model, inputs, custom_parsers=None): if len(inputs) != 1: - raise RuntimeError( - "Unexpected number of inputs: %d != 1." % len(inputs)) + raise RuntimeError("Unexpected number of inputs: %d != 1." % len(inputs)) if inputs[0].type is None: - raise RuntimeError( - "Unexpected type: %r." % (inputs[0], )) + raise RuntimeError("Unexpected type: %r." % (inputs[0],)) alias = get_model_alias(type(model)) this_operator = scope.declare_local_operator(alias, model) this_operator.inputs.append(inputs[0]) this_operator.outputs.append( - scope.declare_local_variable('catwoe', FloatTensorType())) + scope.declare_local_variable("catwoe", FloatTensorType()) + ) return this_operator.outputs def ordwoe_encoder_shape_calculator(operator): - check_input_and_output_numbers( - operator, input_count_range=1, output_count_range=1) + check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) input_dim = operator.inputs[0].get_first_dimension() shape = operator.inputs[0].type.shape second_dim = None if len(shape) != 2 else shape[1] @@ -75,15 +72,14 @@ def ordwoe_encoder_converter(scope, operator, container): sub = OnnxSubEstimator(op.encoder_, X, op_version=opv) cast = OnnxCast(sub, op_version=opv, to=np.float32) - cat = OnnxSubEstimator(op.woe_, cast, op_version=opv, - input_types=[Int64TensorType()]) - idcat = OnnxIdentity(cat, output_names=operator.outputs[:1], - op_version=opv) + cat = OnnxSubEstimator( + op.woe_, cast, op_version=opv, input_types=[Int64TensorType()] + ) + idcat = OnnxIdentity(cat, output_names=operator.outputs[:1], op_version=opv) idcat.add_to(scope, container) class TestCustomTransformerOrdWOE(unittest.TestCase): - def test_pipeline(self): data = load_iris() X = data.data.astype(np.float32) @@ -92,19 +88,20 @@ def test_pipeline(self): expected = pipe.transform(X) onx = to_onnx(pipe, X, target_opset=TARGET_OPSET) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': X})[0] + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": X})[0] assert_almost_equal(expected, got) @unittest.skipIf(TARGET_OPSET < 12, reason="opset>=12 is required") def test_custom_ordinal_woe(self): - update_registered_converter( - OrdinalWOETransformer, "OrdinalWOETransformer", + OrdinalWOETransformer, + "OrdinalWOETransformer", ordwoe_encoder_shape_calculator, ordwoe_encoder_converter, - parser=ordwoe_encoder_parser) + parser=ordwoe_encoder_parser, + ) data = load_iris() X, y = data.data, data.target @@ -117,9 +114,9 @@ def test_custom_ordinal_woe(self): onx = to_onnx(ordwoe, X, target_opset=TARGET_OPSET) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': X})[0] + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": X})[0] assert_almost_equal(expected, got) diff --git a/tests/test_custom_transformer_tsne.py b/tests/test_custom_transformer_tsne.py index d9f41b769..34ea5ade2 100644 --- a/tests/test_custom_transformer_tsne.py +++ b/tests/test_custom_transformer_tsne.py @@ -21,16 +21,18 @@ from test_utils import dump_data_and_model, TARGET_OPSET -ort_version = '.'.join(ort_version.split('.')[:2]) +ort_version = ".".join(ort_version.split(".")[:2]) class PredictableTSNE(BaseEstimator, TransformerMixin): - def __init__(self, - transformer=None, - estimator=None, - normalize=True, - keep_tsne_outputs=False, - **kwargs): + def __init__( + self, + transformer=None, + estimator=None, + normalize=True, + keep_tsne_outputs=False, + **kwargs + ): TransformerMixin.__init__(self) BaseEstimator.__init__(self) if estimator is None: @@ -43,10 +45,13 @@ def __init__(self, if not hasattr(transformer, "fit_transform"): raise AttributeError( "transformer {} does not have a 'fit_transform' " - "method.".format(type(transformer))) + "method.".format(type(transformer)) + ) if not hasattr(estimator, "predict"): - raise AttributeError("estimator {} does not have a 'predict' " - "method.".format(type(estimator))) + raise AttributeError( + "estimator {} does not have a 'predict' " + "method.".format(type(estimator)) + ) self.normalize = normalize if kwargs: self.set_params(**kwargs) @@ -66,7 +71,8 @@ def fit(self, X, y, sample_weight=None): sig = inspect.signature(self.estimator.fit) if "sample_weight" in sig.parameters: self.estimator_ = clone(self.estimator).fit( - X, target, sample_weight=sample_weight) + X, target, sample_weight=sample_weight + ) else: self.estimator_ = clone(self.estimator).fit(X, target) mean = target.mean(axis=0) @@ -136,15 +142,17 @@ def predictable_tsne_converter(scope, operator, container): offset=op.mean_.ravel().astype(numpy.float32), ) - container.add_node("Scaler", [knn_output.onnx_name], [output.full_name], - op_domain="ai.onnx.ml", - **attrs) + container.add_node( + "Scaler", + [knn_output.onnx_name], + [output.full_name], + op_domain="ai.onnx.ml", + **attrs + ) class TestCustomTransformerTSNE(unittest.TestCase): - def test_custom_pipeline_scaler(self): - digits = datasets.load_digits(n_class=6) Xd = digits.data[:50] yd = digits.target[:50] @@ -164,41 +172,45 @@ def test_custom_pipeline_scaler(self): ptsne_knn, "predictable_tsne", [("input", FloatTensorType([None, Xd.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) dump_data_and_model( Xd.astype(numpy.float32)[:7], ptsne_knn, model_onnx, - basename="CustomTransformerTSNEkNN-OneOffArray") + basename="CustomTransformerTSNEkNN-OneOffArray", + ) trace_line = [] def my_parser(scope, model, inputs, custom_parsers=None): trace_line.append(model) - return _parse_sklearn_simple_model(scope, model, inputs, - custom_parsers) + return _parse_sklearn_simple_model(scope, model, inputs, custom_parsers) model_onnx = convert_sklearn( ptsne_knn, "predictable_tsne", [("input", FloatTensorType([None, Xd.shape[1]]))], custom_parsers={PredictableTSNE: my_parser}, - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) assert len(trace_line) == 1 dump_data_and_model( Xd.astype(numpy.float32)[:7], ptsne_knn, model_onnx, - basename="CustomTransformerTSNEkNNCustomParser-OneOffArray") + basename="CustomTransformerTSNEkNNCustomParser-OneOffArray", + ) update_registered_parser(PredictableTSNE, my_parser) model_onnx = convert_sklearn( ptsne_knn, "predictable_tsne", [("input", FloatTensorType([None, Xd.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) assert len(trace_line) == 2 diff --git a/tests/test_investigate.py b/tests/test_investigate.py index 68dd1c694..691153976 100644 --- a/tests/test_investigate.py +++ b/tests/test_investigate.py @@ -5,6 +5,7 @@ from contextlib import redirect_stdout import numpy from numpy.testing import assert_almost_equal + try: from sklearn.compose import ColumnTransformer except ImportError: @@ -16,12 +17,13 @@ from sklearn.preprocessing import RobustScaler, StandardScaler from skl2onnx import convert_sklearn from skl2onnx.helpers import ( - collect_intermediate_steps, compare_objects, - enumerate_pipeline_models) + collect_intermediate_steps, + compare_objects, + enumerate_pipeline_models, +) from skl2onnx.helpers.investigate import _alter_model_for_debugging from skl2onnx.common import MissingShapeCalculator -from skl2onnx.common.data_types import ( - FloatTensorType, guess_data_type) +from skl2onnx.common.data_types import FloatTensorType, guess_data_type from test_utils import TARGET_OPSET, InferenceSessionEx as InferenceSession @@ -30,61 +32,68 @@ class MyScaler(StandardScaler): class TestInvestigate(unittest.TestCase): - def test_simple_pipeline(self): for opset in (11, TARGET_OPSET): if opset > TARGET_OPSET: continue - data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]], - dtype=numpy.float32) - model = Pipeline([("scaler1", StandardScaler()), - ("scaler2", StandardScaler())]) + data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]], dtype=numpy.float32) + model = Pipeline( + [("scaler1", StandardScaler()), ("scaler2", StandardScaler())] + ) model.fit(data) all_models = list(enumerate_pipeline_models(model)) steps = collect_intermediate_steps( - model, "pipeline", [("input", FloatTensorType([None, 2]))], - target_opset=opset) + model, + "pipeline", + [("input", FloatTensorType([None, 2]))], + target_opset=opset, + ) self.assertEqual(len(steps), 2) self.assertEqual(len(all_models), 3) - expected = 'version:%d}' % opset - expected1 = 'version:1}' + expected = "version:%d}" % opset + expected1 = "version:1}" model.transform(data) for step in steps: - onnx_step = step['onnx_step'] - text = str(onnx_step).replace('\n', ' ').replace(' ', '') + onnx_step = step["onnx_step"] + text = str(onnx_step).replace("\n", " ").replace(" ", "") if expected not in text and expected1 not in text: raise AssertionError( - "Unable to find '{}'\n'{}'\n".format( - expected, text)) + "Unable to find '{}'\n'{}'\n".format(expected, text) + ) sess = InferenceSession( - onnx_step.SerializeToString(), - providers=["CPUExecutionProvider"]) - onnx_outputs = sess.run(None, {'input': data}) + onnx_step.SerializeToString(), providers=["CPUExecutionProvider"] + ) + onnx_outputs = sess.run(None, {"input": data}) onnx_output = onnx_outputs[0] - skl_outputs = step['model']._debug.outputs['transform'] - assert str(step['model']._debug) is not None - sdt = step['model']._debug.display(data, 5) - assert 'shape' in sdt + skl_outputs = step["model"]._debug.outputs["transform"] + assert str(step["model"]._debug) is not None + sdt = step["model"]._debug.display(data, 5) + assert "shape" in sdt assert_almost_equal(onnx_output, skl_outputs) compare_objects(onnx_output, skl_outputs) def test_missing_converter(self): - data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]], - dtype=numpy.float32) - model = Pipeline([("scaler1", StandardScaler()), - ("scaler2", StandardScaler()), - ("scaler3", MyScaler())]) + data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]], dtype=numpy.float32) + model = Pipeline( + [ + ("scaler1", StandardScaler()), + ("scaler2", StandardScaler()), + ("scaler3", MyScaler()), + ] + ) model.fit(data) all_models = list(enumerate_pipeline_models(model)) try: collect_intermediate_steps( - model, "pipeline", + model, + "pipeline", [("input", FloatTensorType([None, 2]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) except MissingShapeCalculator as e: assert "MyScaler" in str(e) assert "gallery" in str(e) @@ -98,159 +107,165 @@ def test_missing_converter(self): # whole pipeline continue step_model = step - data_in = step_model._debug.inputs['transform'] + data_in = step_model._debug.inputs["transform"] t = guess_data_type(data_in) try: - onnx_step = convert_sklearn(step_model, initial_types=t, - target_opset=TARGET_OPSET) + onnx_step = convert_sklearn( + step_model, initial_types=t, target_opset=TARGET_OPSET + ) except MissingShapeCalculator as e: if "MyScaler" in str(e): continue raise sess = InferenceSession( - onnx_step.SerializeToString(), - providers=["CPUExecutionProvider"]) - onnx_outputs = sess.run(None, {'input': data_in}) + onnx_step.SerializeToString(), providers=["CPUExecutionProvider"] + ) + onnx_outputs = sess.run(None, {"input": data_in}) onnx_output = onnx_outputs[0] - skl_outputs = step_model._debug.outputs['transform'] + skl_outputs = step_model._debug.outputs["transform"] assert_almost_equal(onnx_output, skl_outputs) compare_objects(onnx_output, skl_outputs) def test_simple_column_transformer(self): if ColumnTransformer is None: return - data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]], - dtype=numpy.float32) - model = ColumnTransformer([("scaler1", StandardScaler(), [0]), - ("scaler2", RobustScaler(), [1])]) + data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]], dtype=numpy.float32) + model = ColumnTransformer( + [("scaler1", StandardScaler(), [0]), ("scaler2", RobustScaler(), [1])] + ) model.fit(data) all_models = list(enumerate_pipeline_models(model)) steps = collect_intermediate_steps( - model, "coulmn transformer", + model, + "coulmn transformer", [("input", FloatTensorType([None, 2]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) assert len(steps) == 2 assert len(all_models) == 3 model.transform(data) for step in steps: - onnx_step = step['onnx_step'] + onnx_step = step["onnx_step"] sess = InferenceSession( - onnx_step.SerializeToString(), - providers=["CPUExecutionProvider"]) - onnx_outputs = sess.run(None, {'input': data}) + onnx_step.SerializeToString(), providers=["CPUExecutionProvider"] + ) + onnx_outputs = sess.run(None, {"input": data}) onnx_output = onnx_outputs[0] - skl_outputs = step['model']._debug.outputs['transform'] + skl_outputs = step["model"]._debug.outputs["transform"] assert_almost_equal(onnx_output, skl_outputs) compare_objects(onnx_output.tolist(), skl_outputs.tolist()) def test_simple_feature_union(self): - data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]], - dtype=numpy.float32) - model = FeatureUnion([("scaler1", StandardScaler()), - ("scaler2", RobustScaler())]) + data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]], dtype=numpy.float32) + model = FeatureUnion( + [("scaler1", StandardScaler()), ("scaler2", RobustScaler())] + ) model.fit(data) all_models = list(enumerate_pipeline_models(model)) steps = collect_intermediate_steps( - model, "feature union", + model, + "feature union", [("input", FloatTensorType([None, 2]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) assert len(steps) == 2 assert len(all_models) == 3 model.transform(data) for step in steps: - onnx_step = step['onnx_step'] + onnx_step = step["onnx_step"] sess = InferenceSession( - onnx_step.SerializeToString(), - providers=["CPUExecutionProvider"]) - onnx_outputs = sess.run(None, {'input': data}) + onnx_step.SerializeToString(), providers=["CPUExecutionProvider"] + ) + onnx_outputs = sess.run(None, {"input": data}) onnx_output = onnx_outputs[0] - skl_outputs = step['model']._debug.outputs['transform'] + skl_outputs = step["model"]._debug.outputs["transform"] assert_almost_equal(onnx_output, skl_outputs) compare_objects(onnx_output, skl_outputs) def test_simple_pipeline_predict(self): data = load_iris() X, y = data.data, data.target - model = Pipeline([("scaler1", StandardScaler()), - ("lr", LogisticRegression())]) + model = Pipeline([("scaler1", StandardScaler()), ("lr", LogisticRegression())]) model.fit(X, y) all_models = list(enumerate_pipeline_models(model)) steps = collect_intermediate_steps( - model, "pipeline", + model, + "pipeline", [("input", FloatTensorType((None, X.shape[1])))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) assert len(steps) == 2 assert len(all_models) == 3 model.predict(X) for step in steps: - onnx_step = step['onnx_step'] + onnx_step = step["onnx_step"] sess = InferenceSession( - onnx_step.SerializeToString(), - providers=["CPUExecutionProvider"]) - onnx_outputs = sess.run(None, {'input': X.astype(numpy.float32)}) + onnx_step.SerializeToString(), providers=["CPUExecutionProvider"] + ) + onnx_outputs = sess.run(None, {"input": X.astype(numpy.float32)}) onnx_output = onnx_outputs[0] - dbg_outputs = step['model']._debug.outputs - skl_outputs = (dbg_outputs['transform'] if 'transform' in - dbg_outputs else dbg_outputs['predict']) + dbg_outputs = step["model"]._debug.outputs + skl_outputs = ( + dbg_outputs["transform"] + if "transform" in dbg_outputs + else dbg_outputs["predict"] + ) assert_almost_equal(onnx_output, skl_outputs, decimal=6) compare_objects(onnx_output, skl_outputs) def test_simple_pipeline_predict_proba(self): data = load_iris() X, y = data.data, data.target - model = Pipeline([("scaler1", StandardScaler()), - ("lr", LogisticRegression())]) + model = Pipeline([("scaler1", StandardScaler()), ("lr", LogisticRegression())]) model.fit(X, y) all_models = list(enumerate_pipeline_models(model)) steps = collect_intermediate_steps( - model, "pipeline", + model, + "pipeline", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) assert len(steps) == 2 assert len(all_models) == 3 model.predict_proba(X) for step in steps: - onnx_step = step['onnx_step'] + onnx_step = step["onnx_step"] sess = InferenceSession( - onnx_step.SerializeToString(), - providers=["CPUExecutionProvider"]) - onnx_outputs = sess.run(None, {'input': X.astype(numpy.float32)}) - dbg_outputs = step['model']._debug.outputs - if 'transform' in dbg_outputs: + onnx_step.SerializeToString(), providers=["CPUExecutionProvider"] + ) + onnx_outputs = sess.run(None, {"input": X.astype(numpy.float32)}) + dbg_outputs = step["model"]._debug.outputs + if "transform" in dbg_outputs: onnx_output = onnx_outputs[0] - skl_outputs = dbg_outputs['transform'] + skl_outputs = dbg_outputs["transform"] else: onnx_output = onnx_outputs[1] - skl_outputs = dbg_outputs['predict_proba'] + skl_outputs = dbg_outputs["predict_proba"] assert_almost_equal(onnx_output, skl_outputs, decimal=6) compare_objects(onnx_output, skl_outputs) def test_verbose(self): data = load_iris() X, y = data.data, data.target - model = Pipeline([("scaler1", StandardScaler()), - ("lr", LogisticRegression())]) + model = Pipeline([("scaler1", StandardScaler()), ("lr", LogisticRegression())]) model.fit(X, y) st = io.StringIO() with redirect_stdout(st): - convert_sklearn( - model, initial_types=[('X', FloatTensorType())], - verbose=1) + convert_sklearn(model, initial_types=[("X", FloatTensorType())], verbose=1) self.assertIn("[convert_sklearn] convert_topology", st.getvalue()) - @unittest.skipIf(TARGET_OPSET < 18, - reason="ReferenceEvaluator not implemented") + @unittest.skipIf(TARGET_OPSET < 18, reason="ReferenceEvaluator not implemented") def test_replay_run(self): try: from .test_utils.utils_backend_onnx import ReferenceEvaluatorEx @@ -258,16 +273,15 @@ def test_replay_run(self): from test_utils.utils_backend_onnx import ReferenceEvaluatorEx data = load_iris() X, y = data.data, data.target - model = Pipeline([("scaler1", StandardScaler()), - ("lr", LogisticRegression())]) + model = Pipeline([("scaler1", StandardScaler()), ("lr", LogisticRegression())]) model.fit(X, y) onx = convert_sklearn( - model, initial_types=[('X', FloatTensorType())], - options={'zipmap': False}) + model, initial_types=[("X", FloatTensorType())], options={"zipmap": False} + ) sess = ReferenceEvaluatorEx(onx) sess.run(None, {"X": X}) repl = sess.replay_run() - self.assertIn('probability_tensor', repl) + self.assertIn("probability_tensor", repl) if __name__ == "__main__": diff --git a/tests/test_onnx_helper.py b/tests/test_onnx_helper.py index 5778de85d..243fc7553 100644 --- a/tests/test_onnx_helper.py +++ b/tests/test_onnx_helper.py @@ -20,13 +20,14 @@ change_onnx_domain, add_output_initializer, get_initializers, - update_onnx_initializers) + update_onnx_initializers, +) from test_utils import TARGET_OPSET, InferenceSessionEx as InferenceSession def one_hot_encoder_supports_string(): # pv.Version does not work with development versions - vers = '.'.join(sklearn_version.split('.')[:2]) + vers = ".".join(sklearn_version.split(".")[:2]) return pv.Version(vers) >= pv.Version("0.20.0") @@ -40,17 +41,20 @@ def get_model(self, model): from onnxruntime import InferenceSession session = InferenceSession( - save_onnx_model(model), - providers=["CPUExecutionProvider"]) + save_onnx_model(model), providers=["CPUExecutionProvider"] + ) return lambda X: session.run(None, {"input": X})[0] def test_onnx_helper_load_save(self): model = make_pipeline(StandardScaler(), Binarizer(threshold=0.5)) X = numpy.array([[0.1, 1.1], [0.2, 2.2]]) model.fit(X) - model_onnx = convert_sklearn(model, "binarizer", - [("input", FloatTensorType([None, 2]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "binarizer", + [("input", FloatTensorType([None, 2]))], + target_opset=TARGET_OPSET, + ) filename = "temp_onnx_helper_load_save.onnx" save_onnx_model(model_onnx, filename) model = load_onnx_model(filename) @@ -72,13 +76,17 @@ def test_onnx_helper_load_save(self): def test_onnx_helper_load_save_init(self): model = make_pipeline( Binarizer(), - OneHotEncoder(sparse=False, handle_unknown='ignore'), - StandardScaler()) + OneHotEncoder(sparse=False, handle_unknown="ignore"), + StandardScaler(), + ) X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]]) model.fit(X) - model_onnx = convert_sklearn(model, "pipe3", - [("input", FloatTensorType([None, 2]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "pipe3", + [("input", FloatTensorType([None, 2]))], + target_opset=TARGET_OPSET, + ) filename = "temp_onnx_helper_load_save.onnx" save_onnx_model(model_onnx, filename) model = load_onnx_model(filename) @@ -98,14 +106,18 @@ def test_onnx_helper_load_save_init(self): reason="OneHotEncoder did not have categories_ before 0.20", ) def test_onnx_helper_load_save_init_meta(self): - model = make_pipeline(Binarizer(), OneHotEncoder(sparse=False), - StandardScaler()) + model = make_pipeline( + Binarizer(), OneHotEncoder(sparse=False), StandardScaler() + ) X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]]) model.fit(X) - model_onnx = convert_sklearn(model, "pipe3", - [("input", FloatTensorType([None, 2]))], - target_opset=TARGET_OPSET) - meta = {'pA': 'one', 'pB': 'two'} + model_onnx = convert_sklearn( + model, + "pipe3", + [("input", FloatTensorType([None, 2]))], + target_opset=TARGET_OPSET, + ) + meta = {"pA": "one", "pB": "two"} onnx.helper.set_model_props(model_onnx, meta) new_model = select_model_inputs_outputs(model_onnx, "variable") vals = {p.key: p.value for p in new_model.metadata_props} @@ -115,11 +127,13 @@ def test_change_onnx_domain(self): model = make_pipeline(StandardScaler()) X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]]) model.fit(X) - model_onnx = convert_sklearn(model, "pipe3", - [("input", FloatTensorType([None, 2]))], - target_opset=TARGET_OPSET) - model_onnx = change_onnx_domain( - model_onnx, {'Scaler': ('ScalerNew', 'ML2')}) + model_onnx = convert_sklearn( + model, + "pipe3", + [("input", FloatTensorType([None, 2]))], + target_opset=TARGET_OPSET, + ) + model_onnx = change_onnx_domain(model_onnx, {"Scaler": ("ScalerNew", "ML2")}) self.assertIn('domain: "ML2"', str(model_onnx)) self.assertIn('op_type: "ScalerNew"', str(model_onnx)) @@ -128,34 +142,37 @@ def test_add_output_initializer(self): cst = numpy.array([0.5, 0.7, 0.8], dtype=numpy.int32) X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]]) model.fit(X) - model_onnx = convert_sklearn(model, "pipe3", - [("input", DoubleTensorType([None, 2]))], - target_opset=TARGET_OPSET) - new_model_onnx = add_output_initializer( - model_onnx, "new_output", cst) + model_onnx = convert_sklearn( + model, + "pipe3", + [("input", DoubleTensorType([None, 2]))], + target_opset=TARGET_OPSET, + ) + new_model_onnx = add_output_initializer(model_onnx, "new_output", cst) sess = InferenceSession( - new_model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X}) + new_model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X}) self.assertEqual(len(res), 2) assert_almost_equal(cst, res[1]) self.assertEqual(model_onnx.domain, new_model_onnx.domain) names = [o.name for o in sess.get_outputs()] - self.assertEqual(['variable', 'new_output'], names) + self.assertEqual(["variable", "new_output"], names) new_model_onnx = add_output_initializer( - model_onnx, ["new_output1", "new_output2"], [cst, cst + 1]) + model_onnx, ["new_output1", "new_output2"], [cst, cst + 1] + ) sess = InferenceSession( - new_model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X}) + new_model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X}) self.assertEqual(len(res), 3) assert_almost_equal(cst, res[1]) assert_almost_equal(cst + 1, res[2]) names = [o.name for o in sess.get_outputs()] - self.assertEqual(['variable', 'new_output1', 'new_output2'], names) + self.assertEqual(["variable", "new_output1", "new_output2"], names) with self.assertRaises(ValueError): add_output_initializer(model_onnx, "input", cst) @@ -173,30 +190,35 @@ def test_get_initializers(self): model = make_pipeline(StandardScaler()) X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]]) model.fit(X) - model_onnx = convert_sklearn(model, "pipe3", - [("input", DoubleTensorType([None, 2]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "pipe3", + [("input", DoubleTensorType([None, 2]))], + target_opset=TARGET_OPSET, + ) init = get_initializers(model_onnx) self.assertEqual(len(init), 2) - assert_almost_equal(init['Di_Divcst'], - numpy.array([0.10897247, 0.51173724])) - assert_almost_equal(init['Su_Subcst'], numpy.array([0.225, 1.975])) + assert_almost_equal(init["Di_Divcst"], numpy.array([0.10897247, 0.51173724])) + assert_almost_equal(init["Su_Subcst"], numpy.array([0.225, 1.975])) def test_update_onnx_initializers(self): model = make_pipeline(StandardScaler()) X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]]) model.fit(X) - model_onnx = convert_sklearn(model, "pipe3", - [("input", DoubleTensorType([None, 2]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "pipe3", + [("input", DoubleTensorType([None, 2]))], + target_opset=TARGET_OPSET, + ) init = get_initializers(model_onnx) self.assertEqual(len(init), 2) for v in init.values(): v[:] = 1.5 update_onnx_initializers(model_onnx, init) init = get_initializers(model_onnx) - assert_almost_equal(init['Di_Divcst'], numpy.array([1.5, 1.5])) - assert_almost_equal(init['Su_Subcst'], numpy.array([1.5, 1.5])) + assert_almost_equal(init["Di_Divcst"], numpy.array([1.5, 1.5])) + assert_almost_equal(init["Su_Subcst"], numpy.array([1.5, 1.5])) if __name__ == "__main__": diff --git a/tests/test_onnx_rare_helper.py b/tests/test_onnx_rare_helper.py index 7e47a0526..6afeeea6b 100644 --- a/tests/test_onnx_rare_helper.py +++ b/tests/test_onnx_rare_helper.py @@ -15,20 +15,18 @@ class TestOnnxRareHelper(unittest.TestCase): - def test_kmeans_upgrade(self): data = load_iris() X = data.data model = KMeans(n_clusters=3) model.fit(X) - model_onnx = convert_sklearn(model, "kmeans", - [("input", FloatTensorType([None, 4]))], - target_opset=7) + model_onnx = convert_sklearn( + model, "kmeans", [("input", FloatTensorType([None, 4]))], target_opset=7 + ) model8 = upgrade_opset_number(model_onnx, 8) assert "version: 8" in str(model8) - @unittest.skipIf(onnx_opset_version() < 11, - reason="Needs opset >= 11") + @unittest.skipIf(onnx_opset_version() < 11, reason="Needs opset >= 11") def test_knn_upgrade(self): iris = load_iris() X, _ = iris.data, iris.target @@ -36,9 +34,9 @@ def test_knn_upgrade(self): clr = NearestNeighbors(n_neighbors=3, radius=None) clr.fit(X) - model_onnx = convert_sklearn(clr, "up", - [("input", FloatTensorType([None, 4]))], - target_opset=9) + model_onnx = convert_sklearn( + clr, "up", [("input", FloatTensorType([None, 4]))], target_opset=9 + ) try: upgrade_opset_number(model_onnx, 8) raise AssertionError() diff --git a/tests/test_onnxruntime.py b/tests/test_onnxruntime.py index e00f6809a..28ec705ae 100644 --- a/tests/test_onnxruntime.py +++ b/tests/test_onnxruntime.py @@ -4,6 +4,7 @@ import unittest import numpy as np from numpy.testing import assert_allclose + try: import onnx.reference # noqa from test_utils import ReferenceEvaluatorEx @@ -13,21 +14,77 @@ class TestOnnxruntime(unittest.TestCase): - - X3_15 = np.array([ - [-0.32256478, 1.7266265, 0.47051477, 1.1111994, 1.9582617, - -2.1582267, -1.9729482, -1.5662458, 1.8967382, 0.9119621, - -0.93173814, 2.9724689, -0.7231156, 0.10379718, -1.3578224, - 0.37283298, -0.38267845, 0.23394746, -1.6884863, 0.6374923], - [-0.53266096, -0.767421, 1.661441, 0.52790266, 1.6549803, - 0.5076044, -2.9024098, 0.86126643, -1.3819953, 2.5567708, - -1.7888857, -0.07472081, 0.24990171, -0.87638474, -0.14730039, - 1.3493251, -0.7835222, -0.9997528, -0.91080195, -3.6515126], - [-0.8703916, 0.43145382, 1.0918913, -1.397069, -0.48047885, - 3.1278436, 3.8035386, -0.22710086, -0.42011356, 1.4203368, - 0.47596663, -0.44953802, -0.68278235, 0.87819546, -2.4272032, - 0.08891433, 0.7960927, 1.2197107, 1.7008729, 1.0122501]], - dtype=np.float32) + X3_15 = np.array( + [ + [ + -0.32256478, + 1.7266265, + 0.47051477, + 1.1111994, + 1.9582617, + -2.1582267, + -1.9729482, + -1.5662458, + 1.8967382, + 0.9119621, + -0.93173814, + 2.9724689, + -0.7231156, + 0.10379718, + -1.3578224, + 0.37283298, + -0.38267845, + 0.23394746, + -1.6884863, + 0.6374923, + ], + [ + -0.53266096, + -0.767421, + 1.661441, + 0.52790266, + 1.6549803, + 0.5076044, + -2.9024098, + 0.86126643, + -1.3819953, + 2.5567708, + -1.7888857, + -0.07472081, + 0.24990171, + -0.87638474, + -0.14730039, + 1.3493251, + -0.7835222, + -0.9997528, + -0.91080195, + -3.6515126, + ], + [ + -0.8703916, + 0.43145382, + 1.0918913, + -1.397069, + -0.48047885, + 3.1278436, + 3.8035386, + -0.22710086, + -0.42011356, + 1.4203368, + 0.47596663, + -0.44953802, + -0.68278235, + 0.87819546, + -2.4272032, + 0.08891433, + 0.7960927, + 1.2197107, + 1.7008729, + 1.0122501, + ], + ], + dtype=np.float32, + ) @unittest.skipIf(ReferenceEvaluatorEx is None, "onnx too old") def test_tree_ensemble_classifier(self): @@ -61,36 +118,33 @@ def test_tree_ensemble_classifier(self): print(repr(X[:5])) """ X = self.X3_15 - name = os.path.join(os.path.dirname(__file__), - "datasets", "treecl.onnx") + name = os.path.join(os.path.dirname(__file__), "datasets", "treecl.onnx") sess = ReferenceEvaluatorEx(name) - label, proba = sess.run(None, {'input': X}) + label, proba = sess.run(None, {"input": X}) sesso = InferenceSession(name, providers=["CPUExecutionProvider"]) - labelo, probao = sesso.run(None, {'input': X}) + labelo, probao = sesso.run(None, {"input": X}) assert_allclose(probao, proba, atol=1e-8) assert_allclose(labelo, label) @unittest.skipIf(ReferenceEvaluatorEx is None, "onnx too old") def test_tree_ensemble_classifier_2(self): X = self.X3_15 - name = os.path.join(os.path.dirname(__file__), - "datasets", "treecl2.onnx") + name = os.path.join(os.path.dirname(__file__), "datasets", "treecl2.onnx") sess = ReferenceEvaluatorEx(name) - label, proba = sess.run(None, {'input': X}) + label, proba = sess.run(None, {"input": X}) sesso = InferenceSession(name, providers=["CPUExecutionProvider"]) - labelo, probao = sesso.run(None, {'input': X}) + labelo, probao = sesso.run(None, {"input": X}) assert_allclose(probao, proba, atol=1e-6) assert_allclose(labelo, label) @unittest.skipIf(ReferenceEvaluatorEx is None, "onnx too old") def test_tree_ensemble_classifier_3(self): X = self.X3_15[:, :10] - name = os.path.join(os.path.dirname(__file__), - "datasets", "treecl3.onnx") + name = os.path.join(os.path.dirname(__file__), "datasets", "treecl3.onnx") sess = ReferenceEvaluatorEx(name) - label, proba = sess.run(None, {'input': X}) + label, proba = sess.run(None, {"input": X}) sesso = InferenceSession(name, providers=["CPUExecutionProvider"]) - labelo, probao = sesso.run(None, {'input': X}) + labelo, probao = sesso.run(None, {"input": X}) assert_allclose(probao, proba, atol=1e-6) assert_allclose(labelo, label) diff --git a/tests/test_op10.py b/tests/test_op10.py index 474bcfabf..30029e6cf 100644 --- a/tests/test_op10.py +++ b/tests/test_op10.py @@ -14,32 +14,36 @@ class TestOp10(unittest.TestCase): - def check_domain(self, model, domain="", target_opset=10): for op in model.opset_import: if op.domain == domain: if op.version > target_opset: raise RuntimeError( - "Wrong opset {} > {} expected".format( - op.domain, target_opset)) + "Wrong opset {} > {} expected".format(op.domain, target_opset) + ) @unittest.skipIf(onnx_opset_version() < 10, reason="out of scope") def test_logistic_regression(self): - model, X = fit_classification_model( - linear_model.LogisticRegression(), 3) + model, X = fit_classification_model(linear_model.LogisticRegression(), 3) target_opset = 10 - model_onnx = convert_sklearn(model, "op10", - [("input", FloatTensorType([None, 3]))], - target_opset=target_opset) + model_onnx = convert_sklearn( + model, + "op10", + [("input", FloatTensorType([None, 3]))], + target_opset=target_opset, + ) self.check_domain(model_onnx, target_opset=target_opset) @unittest.skipIf(onnx_opset_version() < 10, reason="out of scope") def test_kmeans(self): model, X = fit_classification_model(KMeans(), 3) target_opset = 10 - model_onnx = convert_sklearn(model, "op10", - [("input", FloatTensorType([None, 3]))], - target_opset=target_opset) + model_onnx = convert_sklearn( + model, + "op10", + [("input", FloatTensorType([None, 3]))], + target_opset=target_opset, + ) self.check_domain(model_onnx, target_opset=target_opset) @unittest.skipIf(onnx_opset_version() < 10, reason="out of scope") @@ -47,18 +51,23 @@ def test_gaussian_mixture(self): model, X = fit_classification_model(GaussianMixture(), 3) target_opset = 10 model_onnx = convert_sklearn( - model, "op10", + model, + "op10", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=target_opset) + target_opset=target_opset, + ) self.check_domain(model_onnx, target_opset=target_opset) @unittest.skipIf(onnx_opset_version() < 10, reason="out of scope") def test_gaussian_process_regressor(self): model, X = fit_classification_model(GaussianProcessRegressor(), 3) target_opset = 10 - model_onnx = convert_sklearn(model, "op10", - [("input", FloatTensorType([None, 3]))], - target_opset=target_opset) + model_onnx = convert_sklearn( + model, + "op10", + [("input", FloatTensorType([None, 3]))], + target_opset=target_opset, + ) self.check_domain(model_onnx, target_opset=target_opset) @unittest.skipIf(onnx_opset_version() < 10, reason="out of scope") @@ -73,9 +82,12 @@ def test_voting_classifier(self): ) model, X = fit_classification_model(model, 3) target_opset = 10 - model_onnx = convert_sklearn(model, "op10", - [("input", FloatTensorType([None, 3]))], - target_opset=target_opset) + model_onnx = convert_sklearn( + model, + "op10", + [("input", FloatTensorType([None, 3]))], + target_opset=target_opset, + ) self.check_domain(model_onnx, target_opset=target_opset) diff --git a/tests/test_opset13.py b/tests/test_opset13.py index 8fe3e3025..e0ffd9725 100644 --- a/tests/test_opset13.py +++ b/tests/test_opset13.py @@ -9,12 +9,12 @@ OnnxReduceSumApi11, OnnxSplitApi18, OnnxSqueezeApi11, - OnnxUnsqueezeApi11) + OnnxUnsqueezeApi11, +) from test_utils import TARGET_OPSET class TestOpset13(unittest.TestCase): - def test_reduce_sum(self): X = numpy.array([[2, 1], [0, 1]], dtype=numpy.float32) @@ -23,44 +23,53 @@ def test_reduce_sum(self): continue with self.subTest(opset=opset): onx = OnnxReduceSumApi11( - 'X', output_names=['Y'], keepdims=0, op_version=opset) + "X", output_names=["Y"], keepdims=0, op_version=opset + ) model_def = onx.to_onnx( - {'X': X.astype(numpy.float32)}, target_opset=opset) + {"X": X.astype(numpy.float32)}, target_opset=opset + ) got = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]).run( - None, {'X': X}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ).run(None, {"X": X}) assert_almost_equal(numpy.sum(X), got[0], decimal=6) onx = OnnxReduceSumApi11( - 'X', output_names=['Y'], axes=[1], op_version=opset) + "X", output_names=["Y"], axes=[1], op_version=opset + ) model_def = onx.to_onnx( - {'X': X.astype(numpy.float32)}, target_opset=opset) + {"X": X.astype(numpy.float32)}, target_opset=opset + ) got = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]).run( - None, {'X': X}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ).run(None, {"X": X}) assert_almost_equal( - numpy.sum(X, axis=1, keepdims=True), got[0], decimal=6) + numpy.sum(X, axis=1, keepdims=True), got[0], decimal=6 + ) def test_split(self): - x = numpy.array([1., 2., 3., 4., 5., 6.]).astype(numpy.float32) - y = [numpy.array([1., 2.]).astype(numpy.float32), - numpy.array([3., 4.]).astype(numpy.float32), - numpy.array([5., 6.]).astype(numpy.float32)] + x = numpy.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).astype(numpy.float32) + y = [ + numpy.array([1.0, 2.0]).astype(numpy.float32), + numpy.array([3.0, 4.0]).astype(numpy.float32), + numpy.array([5.0, 6.0]).astype(numpy.float32), + ] for opset in (10, 11, 12, 13, 17, 18): if opset > TARGET_OPSET: continue with self.subTest(opset=opset): onx = OnnxSplitApi18( - 'X', axis=0, split=[2, 2, 2], - output_names=['Y1', 'Y2', 'Y3'], op_version=opset) + "X", + axis=0, + split=[2, 2, 2], + output_names=["Y1", "Y2", "Y3"], + op_version=opset, + ) model_def = onx.to_onnx( - {'X': x.astype(numpy.float32)}, target_opset=opset) + {"X": x.astype(numpy.float32)}, target_opset=opset + ) got = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]).run( - None, {'X': x}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ).run(None, {"X": x}) assert_almost_equal(y[0], got[0]) assert_almost_equal(y[1], got[1]) assert_almost_equal(y[2], got[2]) @@ -73,17 +82,20 @@ def test_squeeze(self): continue with self.subTest(opset=opset): onx = OnnxSqueezeApi11( - 'X', axes=[1], output_names=['Y'], op_version=opset) + "X", axes=[1], output_names=["Y"], op_version=opset + ) model_def = onx.to_onnx( - {'X': x.astype(numpy.float32)}, target_opset=opset) + {"X": x.astype(numpy.float32)}, target_opset=opset + ) got = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]).run( - None, {'X': x}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ).run(None, {"X": x}) assert_almost_equal(y, got[0]) - @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.0.0'), - reason="onnxruntime too old, onnx too recent") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.0.0"), + reason="onnxruntime too old, onnx too recent", + ) def test_unsqueeze(self): x = numpy.random.randn(1, 3, 1, 5).astype(numpy.float32) y = numpy.expand_dims(x, axis=-2) @@ -92,13 +104,14 @@ def test_unsqueeze(self): continue with self.subTest(opset=opset): onx = OnnxUnsqueezeApi11( - 'X', axes=[-2], output_names=['Y'], op_version=opset) + "X", axes=[-2], output_names=["Y"], op_version=opset + ) model_def = onx.to_onnx( - {'X': x.astype(numpy.float32)}, target_opset=opset) + {"X": x.astype(numpy.float32)}, target_opset=opset + ) got = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]).run( - None, {'X': x}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ).run(None, {"X": x}) assert_almost_equal(y, got[0]) diff --git a/tests/test_optimisation.py b/tests/test_optimisation.py index 4acee2ec8..b14533207 100644 --- a/tests/test_optimisation.py +++ b/tests/test_optimisation.py @@ -6,77 +6,93 @@ from onnx import checker from onnx import helper from onnx import TensorProto as tp -from skl2onnx.common.onnx_optimisation_identity import ( - onnx_remove_node_identity) -from test_utils import ( - TARGET_OPSET, TARGET_IR, - InferenceSessionEx as InferenceSession) +from skl2onnx.common.onnx_optimisation_identity import onnx_remove_node_identity +from test_utils import TARGET_OPSET, TARGET_IR, InferenceSessionEx as InferenceSession class TestOptimisation(unittest.TestCase): - - @unittest.skipIf(TARGET_OPSET <= 14, - reason="only verified with opset 15+") + @unittest.skipIf(TARGET_OPSET <= 14, reason="only verified with opset 15+") def test_coptimisation_identity_removal(self): # investigation issue #854 then_branch = helper.make_graph( - [helper.make_node('Identity', inputs=["identity_one"], - outputs=["then_result"])], - 'then_branch', + [ + helper.make_node( + "Identity", inputs=["identity_one"], outputs=["then_result"] + ) + ], + "then_branch", [], - [helper.make_tensor_value_info('then_result', tp.INT64, [1])]) + [helper.make_tensor_value_info("then_result", tp.INT64, [1])], + ) else_branch = helper.make_graph( - [helper.make_node('Identity', inputs=["identity_zero"], - outputs=["else_result"])], - 'else_branch', + [ + helper.make_node( + "Identity", inputs=["identity_zero"], outputs=["else_result"] + ) + ], + "else_branch", [], - [helper.make_tensor_value_info('else_result', tp.INT64, [1])]) + [helper.make_tensor_value_info("else_result", tp.INT64, [1])], + ) nodes = [ - helper.make_node('Constant', inputs=[], outputs=["one"], - value=helper.make_tensor( - name='', data_type=tp.INT64, dims=[1], - vals=[1])), - helper.make_node('Constant', inputs=[], outputs=["zero"], - value=helper.make_tensor( - name='', data_type=tp.INT64, dims=[1], - vals=[0])), - - helper.make_node('Identity', inputs=["one"], - outputs=["identity_one"]), - helper.make_node('Identity', inputs=["zero"], - outputs=["identity_zero"]), - - helper.make_node('If', inputs=["X"], outputs=["y"], - then_branch=then_branch, - else_branch=else_branch)] + helper.make_node( + "Constant", + inputs=[], + outputs=["one"], + value=helper.make_tensor( + name="", data_type=tp.INT64, dims=[1], vals=[1] + ), + ), + helper.make_node( + "Constant", + inputs=[], + outputs=["zero"], + value=helper.make_tensor( + name="", data_type=tp.INT64, dims=[1], vals=[0] + ), + ), + helper.make_node("Identity", inputs=["one"], outputs=["identity_one"]), + helper.make_node("Identity", inputs=["zero"], outputs=["identity_zero"]), + helper.make_node( + "If", + inputs=["X"], + outputs=["y"], + then_branch=then_branch, + else_branch=else_branch, + ), + ] g = helper.make_graph( - nodes, 'if_test', - [helper.make_tensor_value_info('X', tp.BOOL, [1])], - [helper.make_tensor_value_info('y', tp.INT64, [1])]) + nodes, + "if_test", + [helper.make_tensor_value_info("X", tp.BOOL, [1])], + [helper.make_tensor_value_info("y", tp.INT64, [1])], + ) # Create the model and check m = helper.make_model( - g, opset_imports=[helper.make_opsetid('', TARGET_OPSET)], - ir_version=TARGET_IR) + g, + opset_imports=[helper.make_opsetid("", TARGET_OPSET)], + ir_version=TARGET_IR, + ) checker.check_model(m) sess = InferenceSession( - m.SerializeToString(), - providers=["CPUExecutionProvider"]) + m.SerializeToString(), providers=["CPUExecutionProvider"] + ) optimized_model = onnx_remove_node_identity(m) sess_opt = InferenceSession( - optimized_model.SerializeToString(), - providers=["CPUExecutionProvider"]) + optimized_model.SerializeToString(), providers=["CPUExecutionProvider"] + ) for v in [True, False]: x = np.array([v]) - expected = sess.run(None, {'X': x}) - got = sess_opt.run(None, {'X': x}) + expected = sess.run(None, {"X": x}) + got = sess_opt.run(None, {"X": x}) assert_almost_equal(expected, got) diff --git a/tests/test_options.py b/tests/test_options.py index 5b0402d3c..eed853cf7 100644 --- a/tests/test_options.py +++ b/tests/test_options.py @@ -39,7 +39,7 @@ def dummy_converter(scope, operator, container): cst = numpy.array([57777], dtype=numpy.float32) elif len(options) == 1: opts = list(options.items()) - if opts[0][0] == 'opt1': + if opts[0][0] == "opt1": if opts[0][1] is None: cst = numpy.array([57789], dtype=numpy.float32) elif opts[0][1]: @@ -48,16 +48,16 @@ def dummy_converter(scope, operator, container): cst = numpy.array([57779], dtype=numpy.float32) else: raise AssertionError("Issue with %r." % options) - elif opts[0][0] == 'opt3': + elif opts[0][0] == "opt3": if opts[0][1] is None: cst = numpy.array([51789], dtype=numpy.float32) - elif opts[0][1] == 'r': + elif opts[0][1] == "r": cst = numpy.array([56779], dtype=numpy.float32) - elif opts[0][1] == 't': + elif opts[0][1] == "t": cst = numpy.array([58779], dtype=numpy.float32) else: raise AssertionError("Issue with %r." % options) - elif opts[0][0] == 'opt2': + elif opts[0][0] == "opt2": if opts[0][1] is None: cst = numpy.array([44444], dtype=numpy.float32) elif isinstance(opts[0][1], int): @@ -71,25 +71,29 @@ def dummy_converter(scope, operator, container): id1 = OnnxIdentity(X, op_version=opv) op = OnnxAdd(id1, cst, op_version=opv) - id2 = OnnxIdentity(op, output_names=out[:1], - op_version=opv) + id2 = OnnxIdentity(op, output_names=out[:1], op_version=opv) id2.add_to(scope, container) class TestOptions(unittest.TestCase): - @classmethod def setUpClass(cls): update_registered_converter( - DummyTransformer, "IdentityTransformer", - dummy_shape_calculator, dummy_converter, - options={'opt1': [False, True], 'opt2': None, - 'opt3': ('r', 't'), 'opt4': -1}) + DummyTransformer, + "IdentityTransformer", + dummy_shape_calculator, + dummy_converter, + options={ + "opt1": [False, True], + "opt2": None, + "opt3": ("r", "t"), + "opt4": -1, + }, + ) def check_in(self, value, onx): if str(value) not in str(onx): - raise AssertionError( - "Unable to find %r in\n%s" % (str(value), str(onx))) + raise AssertionError("Unable to find %r in\n%s" % (str(value), str(onx))) def test_no_options(self): digits = datasets.load_digits(n_class=6) @@ -97,34 +101,37 @@ def test_no_options(self): yd = digits.target[:20] idtr = DummyTransformer().fit(Xd, yd) model_onnx = to_onnx(idtr, Xd, target_opset=TARGET_OPSET) - self.check_in('57777', model_onnx) + self.check_in("57777", model_onnx) def test_options_list_true(self): digits = datasets.load_digits(n_class=6) Xd = digits.data[:20].astype(numpy.float32) yd = digits.target[:20] idtr = DummyTransformer().fit(Xd, yd) - model_onnx = to_onnx(idtr, Xd, target_opset=TARGET_OPSET, - options={'opt1': True}) - self.check_in('57778', model_onnx) + model_onnx = to_onnx( + idtr, Xd, target_opset=TARGET_OPSET, options={"opt1": True} + ) + self.check_in("57778", model_onnx) def test_options_list_false(self): digits = datasets.load_digits(n_class=6) Xd = digits.data[:20].astype(numpy.float32) yd = digits.target[:20] idtr = DummyTransformer().fit(Xd, yd) - model_onnx = to_onnx(idtr, Xd, target_opset=TARGET_OPSET, - options={'opt1': False}) - self.check_in('57779', model_onnx) + model_onnx = to_onnx( + idtr, Xd, target_opset=TARGET_OPSET, options={"opt1": False} + ) + self.check_in("57779", model_onnx) def test_options_list_outside_none(self): digits = datasets.load_digits(n_class=6) Xd = digits.data[:20].astype(numpy.float32) yd = digits.target[:20] idtr = DummyTransformer().fit(Xd, yd) - model_onnx = to_onnx(idtr, Xd, target_opset=TARGET_OPSET, - options={'opt1': None}) - self.check_in('57789', model_onnx) + model_onnx = to_onnx( + idtr, Xd, target_opset=TARGET_OPSET, options={"opt1": None} + ) + self.check_in("57789", model_onnx) def test_options_list_outside(self): digits = datasets.load_digits(n_class=6) @@ -133,8 +140,7 @@ def test_options_list_outside(self): idtr = DummyTransformer().fit(Xd, yd) with self.assertRaises(ValueError): # value not allowed - to_onnx(idtr, Xd, target_opset=TARGET_OPSET, - options={'opt1': 'OUT'}) + to_onnx(idtr, Xd, target_opset=TARGET_OPSET, options={"opt1": "OUT"}) def test_options_integer(self): digits = datasets.load_digits(n_class=6) @@ -143,35 +149,33 @@ def test_options_integer(self): idtr = DummyTransformer().fit(Xd, yd) with self.assertRaises(TypeError): # integer not allowed - to_onnx(idtr, Xd, target_opset=TARGET_OPSET, - options={'opt4': 44444}) + to_onnx(idtr, Xd, target_opset=TARGET_OPSET, options={"opt4": 44444}) def test_options_tuple1(self): digits = datasets.load_digits(n_class=6) Xd = digits.data[:20].astype(numpy.float32) yd = digits.target[:20] idtr = DummyTransformer().fit(Xd, yd) - model_onnx = to_onnx(idtr, Xd, target_opset=TARGET_OPSET, - options={'opt3': 't'}) - self.check_in('58779', model_onnx) + model_onnx = to_onnx(idtr, Xd, target_opset=TARGET_OPSET, options={"opt3": "t"}) + self.check_in("58779", model_onnx) def test_options_tuple2(self): digits = datasets.load_digits(n_class=6) Xd = digits.data[:20].astype(numpy.float32) yd = digits.target[:20] idtr = DummyTransformer().fit(Xd, yd) - model_onnx = to_onnx(idtr, Xd, target_opset=TARGET_OPSET, - options={'opt3': 'r'}) - self.check_in('56779', model_onnx) + model_onnx = to_onnx(idtr, Xd, target_opset=TARGET_OPSET, options={"opt3": "r"}) + self.check_in("56779", model_onnx) def test_options_tuple_none(self): digits = datasets.load_digits(n_class=6) Xd = digits.data[:20].astype(numpy.float32) yd = digits.target[:20] idtr = DummyTransformer().fit(Xd, yd) - model_onnx = to_onnx(idtr, Xd, target_opset=TARGET_OPSET, - options={'opt3': None}) - self.check_in('51789', model_onnx) + model_onnx = to_onnx( + idtr, Xd, target_opset=TARGET_OPSET, options={"opt3": None} + ) + self.check_in("51789", model_onnx) def test_options_tuple_out(self): digits = datasets.load_digits(n_class=6) @@ -180,26 +184,27 @@ def test_options_tuple_out(self): idtr = DummyTransformer().fit(Xd, yd) with self.assertRaises(ValueError): # value not allowed - to_onnx(idtr, Xd, target_opset=TARGET_OPSET, - options={'opt3': 'G'}) + to_onnx(idtr, Xd, target_opset=TARGET_OPSET, options={"opt3": "G"}) def test_options_none(self): digits = datasets.load_digits(n_class=6) Xd = digits.data[:20].astype(numpy.float32) yd = digits.target[:20] idtr = DummyTransformer().fit(Xd, yd) - model_onnx = to_onnx(idtr, Xd, target_opset=TARGET_OPSET, - options={'opt2': None}) - self.check_in('44444', model_onnx) + model_onnx = to_onnx( + idtr, Xd, target_opset=TARGET_OPSET, options={"opt2": None} + ) + self.check_in("44444", model_onnx) def test_options_num(self): digits = datasets.load_digits(n_class=6) Xd = digits.data[:20].astype(numpy.float32) yd = digits.target[:20] idtr = DummyTransformer().fit(Xd, yd) - model_onnx = to_onnx(idtr, Xd, target_opset=TARGET_OPSET, - options={'opt2': 33333}) - self.check_in('33333', model_onnx) + model_onnx = to_onnx( + idtr, Xd, target_opset=TARGET_OPSET, options={"opt2": 33333} + ) + self.check_in("33333", model_onnx) if __name__ == "__main__": diff --git a/tests/test_other_converter_library_pipelines.py b/tests/test_other_converter_library_pipelines.py index 4a5b2c76f..1e5031385 100644 --- a/tests/test_other_converter_library_pipelines.py +++ b/tests/test_other_converter_library_pipelines.py @@ -13,9 +13,11 @@ from skl2onnx.common.data_types import FloatTensorType from skl2onnx import convert_sklearn, update_registered_converter from skl2onnx.common.shape_calculator import ( - calculate_linear_classifier_output_shapes, ) + calculate_linear_classifier_output_shapes, +) from skl2onnx.operator_converters.linear_classifier import ( - convert_sklearn_linear_classifier, ) + convert_sklearn_linear_classifier, +) from test_utils import dump_data_and_model, TARGET_OPSET @@ -26,8 +28,7 @@ def __init__(self, penalty="l1"): BaseEstimator.__init__(self) ClassifierMixin.__init__(self) self.penalty = penalty - self.estimator = LogisticRegression(penalty=self.penalty, - solver="liblinear") + self.estimator = LogisticRegression(penalty=self.penalty, solver="liblinear") def fit(self, X, y, sample_weight=None): self.estimator_ = self.estimator.fit(X, y, sample_weight=sample_weight) @@ -69,11 +70,14 @@ def test_custom_pipeline_scaler(self): try: model_onnx = convert_sklearn( - pipe, "pipeline", [("input", FloatTensorType([None, 2]))], - target_opset=TARGET_OPSET) + pipe, + "pipeline", + [("input", FloatTensorType([None, 2]))], + target_opset=TARGET_OPSET, + ) except RuntimeError as e: if "No proper shape calculator found for" not in str( - e + e ) and "Unable to find a shape calculator for type" not in str(e): raise e @@ -82,42 +86,52 @@ def test_custom_pipeline_scaler(self): pipe, "pipeline", [("input", FloatTensorType([None, 2]))], - custom_conversion_functions={ - "MyCustomClassifier": my_custom_converter}, + custom_conversion_functions={"MyCustomClassifier": my_custom_converter}, custom_shape_calculators={ - "MyCustomClassifier": my_custom_shape_extractor}, - target_opset=TARGET_OPSET) + "MyCustomClassifier": my_custom_shape_extractor + }, + target_opset=TARGET_OPSET, + ) except TypeError as e: - if "Keys in custom_conversion_functions must be types" not in str( - e): + if "Keys in custom_conversion_functions must be types" not in str(e): raise e model_onnx = convert_sklearn( pipe, "pipeline", [("input", FloatTensorType([None, 2]))], - custom_conversion_functions={ - MyCustomClassifier: my_custom_converter}, - custom_shape_calculators={ - MyCustomClassifier: my_custom_shape_extractor}, - target_opset=TARGET_OPSET) + custom_conversion_functions={MyCustomClassifier: my_custom_converter}, + custom_shape_calculators={MyCustomClassifier: my_custom_shape_extractor}, + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(numpy.float32), - pipe, model_onnx, - basename="SklearnPipelineScalerCustomClassifier") + pipe, + model_onnx, + basename="SklearnPipelineScalerCustomClassifier", + ) update_registered_converter( - MyCustomClassifier, "MyCustomClassifier", - my_custom_shape_extractor, my_custom_converter) + MyCustomClassifier, + "MyCustomClassifier", + my_custom_shape_extractor, + my_custom_converter, + ) - model_onnx = convert_sklearn(pipe, "pipeline", - [("input", FloatTensorType([None, 2]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + pipe, + "pipeline", + [("input", FloatTensorType([None, 2]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(numpy.float32), pipe, model_onnx, - basename="SklearnPipelineScalerCustomClassifier2") + X.astype(numpy.float32), + pipe, + model_onnx, + basename="SklearnPipelineScalerCustomClassifier2", + ) if __name__ == "__main__": diff --git a/tests/test_parsing_options.py b/tests/test_parsing_options.py index f576be1fc..c1fc7c497 100644 --- a/tests/test_parsing_options.py +++ b/tests/test_parsing_options.py @@ -7,91 +7,106 @@ from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline from sklearn.datasets import make_regression -from skl2onnx.common.data_types import ( - FloatTensorType, DoubleTensorType) +from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType from skl2onnx import convert_sklearn from test_utils import TARGET_OPSET, InferenceSessionEx as InferenceSession class TestParsingOptions(unittest.TestCase): - def test_pipeline(self): - model = Pipeline( - [('sc1', StandardScaler()), ('sc2', StandardScaler())]) + model = Pipeline([("sc1", StandardScaler()), ("sc2", StandardScaler())]) X, y = make_regression(n_features=4, random_state=42) model.fit(X) - initial_types = [('input', FloatTensorType((None, X.shape[1])))] - model_onnx = convert_sklearn(model, initial_types=initial_types, - target_opset=TARGET_OPSET) + initial_types = [("input", FloatTensorType((None, X.shape[1])))] + model_onnx = convert_sklearn( + model, initial_types=initial_types, target_opset=TARGET_OPSET + ) assert model_onnx is not None model_onnx = convert_sklearn( - model, initial_types=initial_types, - final_types=[('output', None)], - target_opset=TARGET_OPSET) + model, + initial_types=initial_types, + final_types=[("output", None)], + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - assert sess.get_outputs()[0].name == 'output' + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + assert sess.get_outputs()[0].name == "output" model_onnx = convert_sklearn( - model, initial_types=initial_types, - final_types=[('output4', None)], - target_opset=TARGET_OPSET) + model, + initial_types=initial_types, + final_types=[("output4", None)], + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - assert sess.get_outputs()[0].name == 'output4' + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + assert sess.get_outputs()[0].name == "output4" model_onnx = convert_sklearn( - model, initial_types=initial_types, - final_types=[('output4', DoubleTensorType())], - target_opset=TARGET_OPSET) + model, + initial_types=initial_types, + final_types=[("output4", DoubleTensorType())], + target_opset=TARGET_OPSET, + ) try: sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) except RuntimeError as e: if "Cast(9)" in str(e): return raise e - assert sess.get_outputs()[0].name == 'output4' + assert sess.get_outputs()[0].name == "output4" assert str(sess.get_outputs()[0].type) == "tensor(double)" def test_decisiontree_regressor(self): model = DecisionTreeRegressor(max_depth=2) X, y = make_regression(n_features=4, random_state=42) model.fit(X, y) - initial_types = [('input', FloatTensorType((None, X.shape[1])))] - model_onnx = convert_sklearn(model, initial_types=initial_types, - final_types=[('output4', None)], - target_opset=TARGET_OPSET) + initial_types = [("input", FloatTensorType((None, X.shape[1])))] + model_onnx = convert_sklearn( + model, + initial_types=initial_types, + final_types=[("output4", None)], + target_opset=TARGET_OPSET, + ) assert model_onnx is not None sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - assert sess.get_outputs()[0].name == 'output4' + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + assert sess.get_outputs()[0].name == "output4" def test_kmeans(self): model = KMeans() X, y = make_regression(n_features=4, random_state=42) model.fit(X, y) - initial_types = [('input', FloatTensorType((None, X.shape[1])))] + initial_types = [("input", FloatTensorType((None, X.shape[1])))] with self.assertRaises(RuntimeError): - convert_sklearn(model, initial_types=initial_types, - final_types=[('output4', None)], - target_opset=TARGET_OPSET) + convert_sklearn( + model, + initial_types=initial_types, + final_types=[("output4", None)], + target_opset=TARGET_OPSET, + ) with self.assertRaises(RuntimeError): - convert_sklearn(model, initial_types=initial_types, - final_types=[('dup1', None), ('dup1', None)], - target_opset=TARGET_OPSET) + convert_sklearn( + model, + initial_types=initial_types, + final_types=[("dup1", None), ("dup1", None)], + target_opset=TARGET_OPSET, + ) model_onnx = convert_sklearn( - model, initial_types=initial_types, - final_types=[('output4', None), ('output5', None)], - target_opset=TARGET_OPSET) + model, + initial_types=initial_types, + final_types=[("output4", None), ("output5", None)], + target_opset=TARGET_OPSET, + ) assert model_onnx is not None sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - assert sess.get_outputs()[0].name == 'output4' - assert sess.get_outputs()[1].name == 'output5' + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + assert sess.get_outputs()[0].name == "output4" + assert sess.get_outputs()[1].name == "output5" if __name__ == "__main__": diff --git a/tests/test_raw_name.py b/tests/test_raw_name.py index 276cf458c..efa1ceed3 100644 --- a/tests/test_raw_name.py +++ b/tests/test_raw_name.py @@ -11,7 +11,6 @@ class RawNameTest(unittest.TestCase): - _raw_names = ( "float_input", "float_input--", @@ -35,8 +34,8 @@ def _get_initial_types(X, raw_name): @staticmethod def _predict(clr_onnx, X): sess = rt.InferenceSession( - clr_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + clr_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) input_name = sess.get_inputs()[0].name label_name = sess.get_outputs()[0].name return sess.run([label_name], {input_name: X.astype(numpy.float32)})[0] @@ -55,7 +54,8 @@ def test_raw_name(self): clr_onnx = convert_sklearn( clr, initial_types=self._get_initial_types(X, raw_name), - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) pred_onnx = self._predict(clr_onnx, X) assert_almost_equal(pred, pred_onnx) diff --git a/tests/test_scikit_pandas.py b/tests/test_scikit_pandas.py index 6e343f5b6..0abffee8f 100644 --- a/tests/test_scikit_pandas.py +++ b/tests/test_scikit_pandas.py @@ -14,6 +14,7 @@ def has_scikit_pandas(): try: import sklearn_pandas # noqa + return True except ImportError: return False @@ -25,20 +26,23 @@ def dataframe_mapper_shape_calculator(operator): class TestOtherLibrariesInPipelineScikitPandas(unittest.TestCase): - @unittest.skipIf(not has_scikit_pandas(), - reason="scikit-pandas not installed") + @unittest.skipIf(not has_scikit_pandas(), reason="scikit-pandas not installed") def test_scikit_pandas(self): from sklearn_pandas import DataFrameMapper - df = pandas.DataFrame({ - "feat1": [1, 2, 3, 4, 5, 6], - "feat2": [1.0, 2.0, 3.0, 2.0, 3.0, 4.0], - }) - - mapper = DataFrameMapper([ - (["feat1", "feat2"], StandardScaler()), - (["feat1", "feat2"], MinMaxScaler()), - ]) + df = pandas.DataFrame( + { + "feat1": [1, 2, 3, 4, 5, 6], + "feat2": [1.0, 2.0, 3.0, 2.0, 3.0, 4.0], + } + ) + + mapper = DataFrameMapper( + [ + (["feat1", "feat2"], StandardScaler()), + (["feat1", "feat2"], MinMaxScaler()), + ] + ) try: model_onnx = convert_sklearn( # noqa diff --git a/tests/test_shapes.py b/tests/test_shapes.py index 2879098d3..330cdd41e 100644 --- a/tests/test_shapes.py +++ b/tests/test_shapes.py @@ -13,26 +13,27 @@ from test_utils import TARGET_OPSET, InferenceSessionEx as InferenceSession -ort_version = ort_version.split('+')[0] +ort_version = ort_version.split("+")[0] class TestShapes(unittest.TestCase): - @unittest.skipIf(TARGET_OPSET < 11, reason="not available") - @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.0.0"), - reason="not available") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.0.0"), reason="not available" + ) def test_onnxruntime_shapes_reg(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y) clr = RandomForestRegressor(max_depth=1) clr.fit(X_train, y_train) - initial_type = [('float_input', FloatTensorType([None, 4]))] - onx = convert_sklearn(clr, initial_types=initial_type, - target_opset=TARGET_OPSET) + initial_type = [("float_input", FloatTensorType([None, 4]))] + onx = convert_sklearn( + clr, initial_types=initial_type, target_opset=TARGET_OPSET + ) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) input_name = sess.get_inputs()[0].name pred_onx = sess.run(None, {input_name: X_test.astype(numpy.float32)}) shape1 = sess.get_inputs()[0].shape @@ -46,21 +47,25 @@ def test_onnxruntime_shapes_reg(self): self.assertEqual(pred_onx[0].shape[1], shape2[1]) @unittest.skipIf(TARGET_OPSET < 11, reason="not available") - @unittest.skipIf(pv.Version(ort_version) <= pv.Version("1.0.0"), - reason="not available") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("1.0.0"), reason="not available" + ) def test_onnxruntime_shapes_clr(self): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y) clr = RandomForestClassifier(max_depth=1) clr.fit(X_train, y_train) - initial_type = [('float_input', FloatTensorType([None, 4]))] - onx = convert_sklearn(clr, initial_types=initial_type, - options={id(clr): {'zipmap': False}}, - target_opset=TARGET_OPSET) + initial_type = [("float_input", FloatTensorType([None, 4]))] + onx = convert_sklearn( + clr, + initial_types=initial_type, + options={id(clr): {"zipmap": False}}, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) input_name = sess.get_inputs()[0].name pred_onx = sess.run(None, {input_name: X_test.astype(numpy.float32)}) shape1 = sess.get_inputs()[0].shape diff --git a/tests/test_sklearn_adaboost_converter.py b/tests/test_sklearn_adaboost_converter.py index cfb6120c7..678bcd2c9 100644 --- a/tests/test_sklearn_adaboost_converter.py +++ b/tests/test_sklearn_adaboost_converter.py @@ -19,53 +19,71 @@ dump_data_and_model, fit_classification_model, fit_regression_model, - TARGET_OPSET + TARGET_OPSET, ) -ort_version = '.'.join(ort_version.split('.')[:2]) -skl_version = '.'.join(sklearn_version.split('.')[:2]) +ort_version = ".".join(ort_version.split(".")[:2]) +skl_version = ".".join(sklearn_version.split(".")[:2]) class TestSklearnAdaBoostModels(unittest.TestCase): @unittest.skipIf(TARGET_OPSET < 11, reason="not available") def test_ada_boost_classifier_samme_r(self): if pv.Version(skl_version) < pv.Version("1.2"): - model, X_test = fit_classification_model(AdaBoostClassifier( - n_estimators=10, algorithm="SAMME.R", random_state=42, - base_estimator=DecisionTreeClassifier( - max_depth=2, random_state=42)), 3) + model, X_test = fit_classification_model( + AdaBoostClassifier( + n_estimators=10, + algorithm="SAMME.R", + random_state=42, + base_estimator=DecisionTreeClassifier(max_depth=2, random_state=42), + ), + 3, + ) else: - model, X_test = fit_classification_model(AdaBoostClassifier( - n_estimators=10, algorithm="SAMME.R", random_state=42, - estimator=DecisionTreeClassifier( - max_depth=2, random_state=42)), 3) + model, X_test = fit_classification_model( + AdaBoostClassifier( + n_estimators=10, + algorithm="SAMME.R", + random_state=42, + estimator=DecisionTreeClassifier(max_depth=2, random_state=42), + ), + 3, + ) model_onnx = convert_sklearn( model, "AdaBoost classification", [("input", FloatTensorType((None, X_test.shape[1])))], - target_opset=10 + target_opset=10, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X_test, - model, - model_onnx, - basename="SklearnAdaBoostClassifierSAMMER") + X_test, model, model_onnx, basename="SklearnAdaBoostClassifierSAMMER" + ) @unittest.skipIf(TARGET_OPSET < 11, reason="not available") def test_ada_boost_classifier_samme_r_decision_function(self): if pv.Version(skl_version) < pv.Version("1.2"): - model, X_test = fit_classification_model(AdaBoostClassifier( - n_estimators=10, algorithm="SAMME.R", random_state=42, - base_estimator=DecisionTreeClassifier( - max_depth=2, random_state=42)), 4) + model, X_test = fit_classification_model( + AdaBoostClassifier( + n_estimators=10, + algorithm="SAMME.R", + random_state=42, + base_estimator=DecisionTreeClassifier(max_depth=2, random_state=42), + ), + 4, + ) else: - model, X_test = fit_classification_model(AdaBoostClassifier( - n_estimators=10, algorithm="SAMME.R", random_state=42, - estimator=DecisionTreeClassifier( - max_depth=2, random_state=42)), 4) - options = {id(model): {'raw_scores': True}} + model, X_test = fit_classification_model( + AdaBoostClassifier( + n_estimators=10, + algorithm="SAMME.R", + random_state=42, + estimator=DecisionTreeClassifier(max_depth=2, random_state=42), + ), + 4, + ) + options = {id(model): {"raw_scores": True}} model_onnx = convert_sklearn( model, "AdaBoost classification", @@ -79,45 +97,64 @@ def test_ada_boost_classifier_samme_r_decision_function(self): model, model_onnx, basename="SklearnAdaBoostClassifierSAMMERDecisionFunction", - methods=['predict', 'decision_function']) + methods=["predict", "decision_function"], + ) @unittest.skipIf(TARGET_OPSET < 11, reason="not available") def test_ada_boost_classifier_samme_r_logreg(self): if pv.Version(skl_version) < pv.Version("1.2"): - model, X_test = fit_classification_model(AdaBoostClassifier( - n_estimators=5, algorithm="SAMME.R", - base_estimator=LogisticRegression( - solver='liblinear')), 4) + model, X_test = fit_classification_model( + AdaBoostClassifier( + n_estimators=5, + algorithm="SAMME.R", + base_estimator=LogisticRegression(solver="liblinear"), + ), + 4, + ) else: - model, X_test = fit_classification_model(AdaBoostClassifier( - n_estimators=5, algorithm="SAMME.R", - estimator=LogisticRegression( - solver='liblinear')), 4) + model, X_test = fit_classification_model( + AdaBoostClassifier( + n_estimators=5, + algorithm="SAMME.R", + estimator=LogisticRegression(solver="liblinear"), + ), + 4, + ) model_onnx = convert_sklearn( model, "AdaBoost classification", [("input", FloatTensorType((None, X_test.shape[1])))], - target_opset=10 + target_opset=10, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X_test, - model, - model_onnx, - basename="SklearnAdaBoostClassifierSAMMERLogReg") + X_test, model, model_onnx, basename="SklearnAdaBoostClassifierSAMMERLogReg" + ) @unittest.skipIf(TARGET_OPSET < 11, reason="not available") def test_ada_boost_classifier_samme(self): if pv.Version(skl_version) < pv.Version("1.2"): - model, X_test = fit_classification_model(AdaBoostClassifier( - n_estimators=5, algorithm="SAMME", random_state=42, - base_estimator=DecisionTreeClassifier( - max_depth=6, random_state=42)), 2, n_features=7) + model, X_test = fit_classification_model( + AdaBoostClassifier( + n_estimators=5, + algorithm="SAMME", + random_state=42, + base_estimator=DecisionTreeClassifier(max_depth=6, random_state=42), + ), + 2, + n_features=7, + ) else: - model, X_test = fit_classification_model(AdaBoostClassifier( - n_estimators=5, algorithm="SAMME", random_state=42, - estimator=DecisionTreeClassifier( - max_depth=6, random_state=42)), 2, n_features=7) + model, X_test = fit_classification_model( + AdaBoostClassifier( + n_estimators=5, + algorithm="SAMME", + random_state=42, + estimator=DecisionTreeClassifier(max_depth=6, random_state=42), + ), + 2, + n_features=7, + ) model_onnx = convert_sklearn( model, "AdaBoostClSamme", @@ -126,24 +163,32 @@ def test_ada_boost_classifier_samme(self): ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X_test, - model, - model_onnx, - basename="SklearnAdaBoostClassifierSAMMEDT") + X_test, model, model_onnx, basename="SklearnAdaBoostClassifierSAMMEDT" + ) @unittest.skipIf(TARGET_OPSET < 11, reason="not available") def test_ada_boost_classifier_samme_decision_function(self): if pv.Version(skl_version) < pv.Version("1.2"): - model, X_test = fit_classification_model(AdaBoostClassifier( - n_estimators=5, algorithm="SAMME", random_state=42, - base_estimator=DecisionTreeClassifier( - max_depth=6, random_state=42)), 2) + model, X_test = fit_classification_model( + AdaBoostClassifier( + n_estimators=5, + algorithm="SAMME", + random_state=42, + base_estimator=DecisionTreeClassifier(max_depth=6, random_state=42), + ), + 2, + ) else: - model, X_test = fit_classification_model(AdaBoostClassifier( - n_estimators=5, algorithm="SAMME", random_state=42, - estimator=DecisionTreeClassifier( - max_depth=6, random_state=42)), 2) - options = {id(model): {'raw_scores': True}} + model, X_test = fit_classification_model( + AdaBoostClassifier( + n_estimators=5, + algorithm="SAMME", + random_state=42, + estimator=DecisionTreeClassifier(max_depth=6, random_state=42), + ), + 2, + ) + options = {id(model): {"raw_scores": True}} model_onnx = convert_sklearn( model, "AdaBoostClSamme", @@ -157,32 +202,30 @@ def test_ada_boost_classifier_samme_decision_function(self): model, model_onnx, basename="SklearnAdaBoostClassifierSAMMEDTDecisionFunction", - methods=['predict', 'decision_function_binary'], + methods=["predict", "decision_function_binary"], ) @unittest.skipIf(TARGET_OPSET < 11, reason="not available") def test_ada_boost_classifier_lr(self): model, X_test = fit_classification_model( - AdaBoostClassifier(learning_rate=0.3, random_state=42), 3, - is_int=True) + AdaBoostClassifier(learning_rate=0.3, random_state=42), 3, is_int=True + ) model_onnx = convert_sklearn( model, "AdaBoost classification", [("input", Int64TensorType((None, X_test.shape[1])))], - target_opset=10 + target_opset=10, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X_test, - model, - model_onnx, - basename="SklearnAdaBoostClassifierLR") + X_test, model, model_onnx, basename="SklearnAdaBoostClassifierLR" + ) @unittest.skipIf(TARGET_OPSET < 11, reason="not available") def test_ada_boost_classifier_bool(self): model, X_test = fit_classification_model( - AdaBoostClassifier(random_state=42), 3, - is_bool=True) + AdaBoostClassifier(random_state=42), 3, is_bool=True + ) model_onnx = convert_sklearn( model, "AdaBoost classification", @@ -191,113 +234,137 @@ def test_ada_boost_classifier_bool(self): ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X_test, - model, - model_onnx, - basename="SklearnAdaBoostClassifierBool") + X_test, model, model_onnx, basename="SklearnAdaBoostClassifierBool" + ) @unittest.skipIf(TARGET_OPSET < 11, reason="not available") def test_ada_boost_regressor(self): - model, X = fit_regression_model( - AdaBoostRegressor(n_estimators=5)) + model, X = fit_regression_model(AdaBoostRegressor(n_estimators=5)) model_onnx = convert_sklearn( - model, "AdaBoost regression", + model, + "AdaBoost regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=10) + target_opset=10, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, + X, + model, + model_onnx, backend="onnxruntime", - basename="SklearnAdaBoostRegressor-Dec4") + basename="SklearnAdaBoostRegressor-Dec4", + ) @unittest.skipIf(TARGET_OPSET < 11, reason="not available") def test_ada_boost_regressor_lreg(self): if pv.Version(skl_version) < pv.Version("1.2"): model, X = fit_regression_model( - AdaBoostRegressor(n_estimators=5, - base_estimator=LinearRegression())) + AdaBoostRegressor(n_estimators=5, base_estimator=LinearRegression()) + ) else: model, X = fit_regression_model( - AdaBoostRegressor(n_estimators=5, - estimator=LinearRegression())) + AdaBoostRegressor(n_estimators=5, estimator=LinearRegression()) + ) model_onnx = convert_sklearn( - model, "AdaBoost regression", + model, + "AdaBoost regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=10) + target_opset=10, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, + X, + model, + model_onnx, backend="onnxruntime", - basename="SklearnAdaBoostRegressorLReg-Dec4") + basename="SklearnAdaBoostRegressorLReg-Dec4", + ) @unittest.skipIf(TARGET_OPSET < 11, reason="not available") def test_ada_boost_regressor_int(self): - model, X = fit_regression_model( - AdaBoostRegressor(n_estimators=5), is_int=True) + model, X = fit_regression_model(AdaBoostRegressor(n_estimators=5), is_int=True) model_onnx = convert_sklearn( - model, "AdaBoost regression", + model, + "AdaBoost regression", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=10) + target_opset=10, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, + X, + model, + model_onnx, backend="onnxruntime", - basename="SklearnAdaBoostRegressorInt-Dec4") + basename="SklearnAdaBoostRegressorInt-Dec4", + ) @unittest.skipIf(TARGET_OPSET < 11, reason="not available") def test_ada_boost_regressor_lr10(self): model, X = fit_regression_model( - AdaBoostRegressor(learning_rate=0.5, random_state=42)) + AdaBoostRegressor(learning_rate=0.5, random_state=42) + ) model_onnx = convert_sklearn( - model, "AdaBoost regression", + model, + "AdaBoost regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=10) + target_opset=10, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, + X, + model, + model_onnx, backend="onnxruntime", - basename="SklearnAdaBoostRegressorLR-Dec4") + basename="SklearnAdaBoostRegressorLR-Dec4", + ) - @unittest.skipIf((pv.Version(ort_version) < - pv.Version("0.5.9999")), - reason="not available") + @unittest.skipIf( + (pv.Version(ort_version) < pv.Version("0.5.9999")), reason="not available" + ) @unittest.skipIf(TARGET_OPSET < 11, reason="not available") def test_ada_boost_regressor_lr11(self): model, X = fit_regression_model( - AdaBoostRegressor(learning_rate=0.5, random_state=42)) + AdaBoostRegressor(learning_rate=0.5, random_state=42) + ) if onnx_opset_version() < 11: try: convert_sklearn( - model, "AdaBoost regression", - [("input", FloatTensorType([None, X.shape[1]]))]) + model, + "AdaBoost regression", + [("input", FloatTensorType([None, X.shape[1]]))], + ) except RuntimeError: return model_onnx = convert_sklearn( - model, "AdaBoost regression", + model, + "AdaBoost regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnAdaBoostRegressorLR-Dec4") + X, model, model_onnx, basename="SklearnAdaBoostRegressorLR-Dec4" + ) @unittest.skipIf(TARGET_OPSET < 11, reason="not available") def test_ada_boost_regressor_bool(self): model, X = fit_regression_model( - AdaBoostRegressor(learning_rate=0.5, random_state=42), - is_bool=True) + AdaBoostRegressor(learning_rate=0.5, random_state=42), is_bool=True + ) model_onnx = convert_sklearn( - model, "AdaBoost regression", + model, + "AdaBoost regression", [("input", BooleanTensorType([None, X.shape[1]]))], target_opset=10, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, + X, + model, + model_onnx, backend="onnxruntime", - basename="SklearnAdaBoostRegressorBool") + basename="SklearnAdaBoostRegressorBool", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_array_feature_extractor.py b/tests/test_sklearn_array_feature_extractor.py index d3c79006b..9c918cb0f 100644 --- a/tests/test_sklearn_array_feature_extractor.py +++ b/tests/test_sklearn_array_feature_extractor.py @@ -8,6 +8,7 @@ from onnxruntime import __version__ as ort_version from sklearn.mixture import GaussianMixture from sklearn.preprocessing import OneHotEncoder + try: from sklearn.compose import ColumnTransformer except ImportError: @@ -16,57 +17,78 @@ from skl2onnx import to_onnx from skl2onnx.common.data_types import FloatTensorType from sklearn.pipeline import Pipeline -from test_utils import ( - dump_data_and_model, - TARGET_OPSET) +from test_utils import dump_data_and_model, TARGET_OPSET class TestSklearnArrayFeatureExtractor(unittest.TestCase): - @unittest.skipIf( - ColumnTransformer is None or - pv.Version(ort_version) <= pv.Version("0.4.0"), - reason="onnxruntime too old") + ColumnTransformer is None or pv.Version(ort_version) <= pv.Version("0.4.0"), + reason="onnxruntime too old", + ) def test_array_feature_extractor(self): data_to_cluster = pd.DataFrame( - [[1, 2, 3.5, 4.5], [1, 2, 1.7, 4.0], - [2, 4, 2.4, 4.3], [2, 4, 2.5, 4.0]], - columns=[1, 2, 3, 4]) + [[1, 2, 3.5, 4.5], [1, 2, 1.7, 4.0], [2, 4, 2.4, 4.3], [2, 4, 2.5, 4.0]], + columns=[1, 2, 3, 4], + ) cat_attributes_clustering = [1, 2] num_attributes_clustering = [3, 4] # this is of length 12 in reality gmm = GaussianMixture(n_components=2, random_state=1) - ohe_cat = [OneHotEncoder(categories='auto', sparse=False, drop=None) - for i in cat_attributes_clustering] - ct_cat = ColumnTransformer([ - ("oneHotEncoder" + str(i), ohe_cat[i], [i]) - for i, item in enumerate(cat_attributes_clustering) - ], remainder='passthrough') - onehotencoding_pipeline = Pipeline([("columnTransformer", ct_cat), ]) - clustering_pipeline = Pipeline([ - ('onehotencoder_and_scaler', onehotencoding_pipeline), - ('clustering', gmm)]) + ohe_cat = [ + OneHotEncoder(categories="auto", sparse=False, drop=None) + for i in cat_attributes_clustering + ] + ct_cat = ColumnTransformer( + [ + ("oneHotEncoder" + str(i), ohe_cat[i], [i]) + for i, item in enumerate(cat_attributes_clustering) + ], + remainder="passthrough", + ) + onehotencoding_pipeline = Pipeline( + [ + ("columnTransformer", ct_cat), + ] + ) + clustering_pipeline = Pipeline( + [("onehotencoder_and_scaler", onehotencoding_pipeline), ("clustering", gmm)] + ) clustering_pipeline.fit(X=data_to_cluster) initial_type = [ - ('float_input', FloatTensorType( - [None, len([*cat_attributes_clustering, - *num_attributes_clustering])]))] + ( + "float_input", + FloatTensorType( + [ + None, + len([*cat_attributes_clustering, *num_attributes_clustering]), + ] + ), + ) + ] data = data_to_cluster.values.astype(np.float32) # checks the first step model_onnx = to_onnx( - clustering_pipeline.steps[0][1], initial_types=initial_type, - target_opset=TARGET_OPSET) + clustering_pipeline.steps[0][1], + initial_types=initial_type, + target_opset=TARGET_OPSET, + ) dump_data_and_model( - data, clustering_pipeline.steps[0][1], model_onnx, - basename="SklearnArrayFeatureExtractorStep0") + data, + clustering_pipeline.steps[0][1], + model_onnx, + basename="SklearnArrayFeatureExtractorStep0", + ) # checks the whole pipeline model_onnx = to_onnx( - clustering_pipeline, initial_types=initial_type, - target_opset=TARGET_OPSET) + clustering_pipeline, initial_types=initial_type, target_opset=TARGET_OPSET + ) dump_data_and_model( - data, clustering_pipeline, model_onnx, - basename="SklearnArrayFeatureExtractor") + data, + clustering_pipeline, + model_onnx, + basename="SklearnArrayFeatureExtractor", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_bagging_converter.py b/tests/test_sklearn_bagging_converter.py index 732e984f3..713577987 100644 --- a/tests/test_sklearn_bagging_converter.py +++ b/tests/test_sklearn_bagging_converter.py @@ -4,6 +4,7 @@ import unittest import packaging.version as pv import onnxruntime + try: # scikit-learn >= 0.22 from sklearn.utils._testing import ignore_warnings @@ -14,310 +15,364 @@ BaggingClassifier, BaggingRegressor, GradientBoostingClassifier, - GradientBoostingRegressor) + GradientBoostingRegressor, +) from sklearn.linear_model import SGDClassifier, SGDRegressor from skl2onnx import convert_sklearn from skl2onnx.common.data_types import ( BooleanTensorType, FloatTensorType, - Int64TensorType) + Int64TensorType, +) from test_utils import ( dump_data_and_model, fit_classification_model, fit_regression_model, - TARGET_OPSET) + TARGET_OPSET, +) class TestSklearnBaggingConverter(unittest.TestCase): @ignore_warnings(category=(FutureWarning, DeprecationWarning)) def test_bagging_classifier_default_binary_int(self): - model, X = fit_classification_model( - BaggingClassifier(), 2, is_int=True) + model, X = fit_classification_model(BaggingClassifier(), 2, is_int=True) model_onnx = convert_sklearn( model, "bagging classifier", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnBaggingClassifierDefaultBinary") + X, model, model_onnx, basename="SklearnBaggingClassifierDefaultBinary" + ) @ignore_warnings(category=(FutureWarning, DeprecationWarning)) def test_bagging_classifier_default_multiclass_int(self): - model, X = fit_classification_model( - BaggingClassifier(), 4, is_int=True) + model, X = fit_classification_model(BaggingClassifier(), 4, is_int=True) model_onnx = convert_sklearn( model, "bagging classifier", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnBaggingClassifierDefaultMulticlass") + X, model, model_onnx, basename="SklearnBaggingClassifierDefaultMulticlass" + ) @ignore_warnings(category=FutureWarning) def test_bagging_classifier_default_binary(self): - model, X = fit_classification_model( - BaggingClassifier(), 2) + model, X = fit_classification_model(BaggingClassifier(), 2) model_onnx = convert_sklearn( model, "bagging classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( X, model, - model_onnx, verbose=False, - basename="SklearnBaggingClassifierDefaultBinary") + model_onnx, + verbose=False, + basename="SklearnBaggingClassifierDefaultBinary", + ) @ignore_warnings(category=FutureWarning) def test_bagging_classifier_max_features(self): - model, X = fit_classification_model( - BaggingClassifier(max_features=0.5), 2) + model, X = fit_classification_model(BaggingClassifier(max_features=0.5), 2) model_onnx = convert_sklearn( - model, "bagging classifier", + model, + "bagging classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, verbose=False, - basename="SklearnBaggingClassifierMaxFeatures") + X, + model, + model_onnx, + verbose=False, + basename="SklearnBaggingClassifierMaxFeatures", + ) @ignore_warnings(category=FutureWarning) def test_bagging_classifier_bootstrap_features(self): model, X = fit_classification_model( - BaggingClassifier(bootstrap_features=True), 2) + BaggingClassifier(bootstrap_features=True), 2 + ) model_onnx = convert_sklearn( - model, "bagging classifier", + model, + "bagging classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X[:5], model, model_onnx, verbose=False, - basename="SklearnBaggingClassifierBootstrapFeatures") + X[:5], + model, + model_onnx, + verbose=False, + basename="SklearnBaggingClassifierBootstrapFeatures", + ) @ignore_warnings(category=FutureWarning) def test_bagging_classifier_default_binary_nozipmap(self): - model, X = fit_classification_model( - BaggingClassifier(), 2) + model, X = fit_classification_model(BaggingClassifier(), 2) model_onnx = convert_sklearn( - model, "bagging classifier", + model, + "bagging classifier", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET, - options={id(model): {'zipmap': False}}) + options={id(model): {"zipmap": False}}, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnBaggingClassifierDefaultBinaryNoZipMap") + X, + model, + model_onnx, + basename="SklearnBaggingClassifierDefaultBinaryNoZipMap", + ) @ignore_warnings(category=FutureWarning) def test_bagging_classifier_default_multiclass(self): - model, X = fit_classification_model( - BaggingClassifier(), 3) + model, X = fit_classification_model(BaggingClassifier(), 3) model_onnx = convert_sklearn( model, "bagging classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( X[:10], model, model_onnx, - basename="SklearnBaggingClassifierDefaultMulticlass") + basename="SklearnBaggingClassifierDefaultMulticlass", + ) @ignore_warnings(category=(FutureWarning, DeprecationWarning)) def test_bagging_classifier_sgd_binary(self): model, X = fit_classification_model( BaggingClassifier( - SGDClassifier(loss='modified_huber', random_state=42), - random_state=42), 2) + SGDClassifier(loss="modified_huber", random_state=42), random_state=42 + ), + 2, + ) model_onnx = convert_sklearn( model, "bagging classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnBaggingClassifierSGDBinary") + X, model, model_onnx, basename="SklearnBaggingClassifierSGDBinary" + ) @ignore_warnings(category=FutureWarning) def test_bagging_classifier_sgd_binary_decision_function(self): model, X = fit_classification_model( - BaggingClassifier(SGDClassifier(random_state=42), - random_state=42), 2) - options = {id(model): {'raw_scores': True}} + BaggingClassifier(SGDClassifier(random_state=42), random_state=42), 2 + ) + options = {id(model): {"raw_scores": True}} model_onnx = convert_sklearn( model, "bagging classifier", [("input", FloatTensorType([None, X.shape[1]]))], options=options, - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X[:5], model, model_onnx, + X[:5], + model, + model_onnx, basename="SklearnBaggingClassifierSGDBinaryDecisionFunction-Dec3", - methods=['predict', 'decision_function_binary']) + methods=["predict", "decision_function_binary"], + ) - @unittest.skipIf(pv.Version(onnxruntime.__version__) - <= pv.Version("0.4.0"), - reason="Not implemented.") + @unittest.skipIf( + pv.Version(onnxruntime.__version__) <= pv.Version("0.4.0"), + reason="Not implemented.", + ) @ignore_warnings(category=(FutureWarning, DeprecationWarning)) def test_bagging_classifier_sgd_multiclass(self): model, X = fit_classification_model( BaggingClassifier( - SGDClassifier(loss='modified_huber', random_state=42), - random_state=42), 5) + SGDClassifier(loss="modified_huber", random_state=42), random_state=42 + ), + 5, + ) model_onnx = convert_sklearn( model, "bagging classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X[:5], model, model_onnx, - basename="SklearnBaggingClassifierSGDMulticlass-Dec3") + X[:5], + model, + model_onnx, + basename="SklearnBaggingClassifierSGDMulticlass-Dec3", + ) @ignore_warnings(category=FutureWarning) def test_bagging_classifier_sgd_multiclass_decision_function(self): model, X = fit_classification_model( BaggingClassifier( GradientBoostingClassifier(random_state=42, n_estimators=4), - random_state=42), 4, n_features=10) - options = {id(model): {'raw_scores': True, "zipmap": False}} + random_state=42, + ), + 4, + n_features=10, + ) + options = {id(model): {"raw_scores": True, "zipmap": False}} model_onnx = convert_sklearn( - model, "bagging classifier", + model, + "bagging classifier", [("input", FloatTensorType([None, X.shape[1]]))], options=options, - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X[:15], model, model_onnx, + X[:15], + model, + model_onnx, basename="SklearnBaggingClassifierSGDMultiDecisionFunction-Dec3", - methods=['predict', 'decision_function']) + methods=["predict", "decision_function"], + ) @ignore_warnings(category=FutureWarning) def test_bagging_classifier_gradient_boosting_binary(self): model, X = fit_classification_model( - BaggingClassifier( - GradientBoostingClassifier(n_estimators=10)), 2) + BaggingClassifier(GradientBoostingClassifier(n_estimators=10)), 2 + ) model_onnx = convert_sklearn( model, "bagging classifier", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET, - options={"zipmap": False}) + options={"zipmap": False}, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X, model, model_onnx, - basename="SklearnBaggingClassifierGradientBoostingBinary") + basename="SklearnBaggingClassifierGradientBoostingBinary", + ) @ignore_warnings(category=FutureWarning) def test_bagging_classifier_gradient_boosting_multiclass(self): model, X = fit_classification_model( - BaggingClassifier( - GradientBoostingClassifier(n_estimators=10)), 3) + BaggingClassifier(GradientBoostingClassifier(n_estimators=10)), 3 + ) model_onnx = convert_sklearn( - model, "bagging classifier", + model, + "bagging classifier", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET, - options={"zipmap": False}) + options={"zipmap": False}, + ) dump_data_and_model( - X, model, model_onnx, - basename="SklearnBaggingClassifierGradientBoostingMulticlass") + X, + model, + model_onnx, + basename="SklearnBaggingClassifierGradientBoostingMulticlass", + ) @ignore_warnings(category=FutureWarning) def test_bagging_regressor_default(self): - model, X = fit_regression_model( - BaggingRegressor()) + model, X = fit_regression_model(BaggingRegressor()) model_onnx = convert_sklearn( model, "bagging regressor", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnBaggingRegressorDefault-Dec4") + X, model, model_onnx, basename="SklearnBaggingRegressorDefault-Dec4" + ) @ignore_warnings(category=FutureWarning) def test_bagging_regressor_max_features(self): model, X = fit_regression_model( - BaggingRegressor(max_features=0.5, n_estimators=3)) + BaggingRegressor(max_features=0.5, n_estimators=3) + ) model_onnx = convert_sklearn( - model, "bagging regressor", + model, + "bagging regressor", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, verbose=False, - basename="SklearnBaggingRegressorMaxFeatures-Dec4") + X, + model, + model_onnx, + verbose=False, + basename="SklearnBaggingRegressorMaxFeatures-Dec4", + ) def test_bagging_regressor_bootstrap_features(self): - model, X = fit_regression_model( - BaggingRegressor(bootstrap_features=False)) + model, X = fit_regression_model(BaggingRegressor(bootstrap_features=False)) model_onnx = convert_sklearn( - model, "bagging regressor", + model, + "bagging regressor", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, verbose=False, - basename="SklearnBaggingRegressorBootstrapFeatures-Dec4") + X, + model, + model_onnx, + verbose=False, + basename="SklearnBaggingRegressorBootstrapFeatures-Dec4", + ) @ignore_warnings(category=FutureWarning) def test_bagging_regressor_sgd(self): - model, X = fit_regression_model( - BaggingRegressor(SGDRegressor())) + model, X = fit_regression_model(BaggingRegressor(SGDRegressor())) model_onnx = convert_sklearn( - model, "bagging regressor", + model, + "bagging regressor", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnBaggingRegressorSGD-Dec4") + X, model, model_onnx, basename="SklearnBaggingRegressorSGD-Dec4" + ) @ignore_warnings(category=FutureWarning) def test_bagging_regressor_gradient_boosting(self): model, X = fit_regression_model( - BaggingRegressor( - GradientBoostingRegressor(n_estimators=10))) + BaggingRegressor(GradientBoostingRegressor(n_estimators=10)) + ) model_onnx = convert_sklearn( - model, "bagging regressor", + model, + "bagging regressor", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnBaggingRegressorGradientBoosting-Dec4") + X, + model, + model_onnx, + basename="SklearnBaggingRegressorGradientBoosting-Dec4", + ) @ignore_warnings(category=FutureWarning) def test_bagging_regressor_bool(self): - model, X = fit_regression_model( - BaggingRegressor(), is_bool=True) + model, X = fit_regression_model(BaggingRegressor(), is_bool=True) model_onnx = convert_sklearn( model, "bagging regressor", @@ -326,10 +381,8 @@ def test_bagging_regressor_bool(self): ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnBaggingRegressorBool-Dec4") + X, model, model_onnx, basename="SklearnBaggingRegressorBool-Dec4" + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_binarizer_converter.py b/tests/test_sklearn_binarizer_converter.py index db488d071..e63eaea94 100644 --- a/tests/test_sklearn_binarizer_converter.py +++ b/tests/test_sklearn_binarizer_converter.py @@ -14,19 +14,21 @@ class TestSklearnBinarizer(unittest.TestCase): def test_model_binarizer(self): - data = np.array([[1., -1., 2.], - [2., 0., 0.], - [0., 1., -1.]], dtype=np.float32) + data = np.array( + [[1.0, -1.0, 2.0], [2.0, 0.0, 0.0], [0.0, 1.0, -1.0]], dtype=np.float32 + ) model = Binarizer(threshold=0.5) model.fit(data) model_onnx = convert_sklearn( - model, "scikit-learn binarizer", + model, + "scikit-learn binarizer", [("input", FloatTensorType(data.shape))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data, model, model_onnx, - basename="SklearnBinarizer-SkipDim1") + data, model, model_onnx, basename="SklearnBinarizer-SkipDim1" + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_calibrated_classifier_cv_converter.py b/tests/test_sklearn_calibrated_classifier_cv_converter.py index 740379160..1f3555084 100644 --- a/tests/test_sklearn_calibrated_classifier_cv_converter.py +++ b/tests/test_sklearn_calibrated_classifier_cv_converter.py @@ -11,8 +11,8 @@ from onnxruntime import __version__ as ort_version from sklearn.calibration import CalibratedClassifierCV from sklearn.datasets import load_digits, load_iris -from sklearn.ensemble import ( - RandomForestClassifier, GradientBoostingClassifier) +from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier + try: from sklearn.ensemble import HistGradientBoostingClassifier except ImportError: @@ -23,6 +23,7 @@ from sklearn.svm import SVC, LinearSVC from sklearn.tree import DecisionTreeClassifier from sklearn.exceptions import ConvergenceWarning + try: # scikit-learn >= 0.22 from sklearn.utils._testing import ignore_warnings @@ -36,49 +37,59 @@ apply_less = None from skl2onnx import convert_sklearn from skl2onnx.common.data_types import ( - FloatTensorType, Int64TensorType, onnx_built_with_ml) + FloatTensorType, + Int64TensorType, +) from test_utils import ( - dump_data_and_model, TARGET_OPSET, - InferenceSessionEx as InferenceSession) + dump_data_and_model, + TARGET_OPSET, + InferenceSessionEx as InferenceSession, +) -ort_version = ort_version.split('+')[0] +ort_version = ort_version.split("+")[0] class TestSklearnCalibratedClassifierCVConverters(unittest.TestCase): - @unittest.skipIf(not onnx_built_with_ml(), - reason="Requires ONNX-ML extension.") - @ignore_warnings( - category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) def test_model_calibrated_classifier_cv_float(self): data = load_iris() X, y = data.data, data.target clf = MultinomialNB().fit(X, y) model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y) model_onnx = convert_sklearn( - model, "scikit-learn CalibratedClassifierCVMNB", + model, + "scikit-learn CalibratedClassifierCVMNB", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnCalibratedClassifierCVFloat") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnCalibratedClassifierCVFloat", + ) - @ignore_warnings( - category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) def test_model_calibrated_classifier_cv_float_nozipmap(self): data = load_iris() X, y = data.data, data.target clf = MultinomialNB().fit(X, y) model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y) model_onnx = convert_sklearn( - model, "scikit-learn CalibratedClassifierCVMNB", + model, + "scikit-learn CalibratedClassifierCVMNB", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET, - options={id(model): {'zipmap': False}}) + options={id(model): {"zipmap": False}}, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnCalibratedClassifierCVFloatNoZipMap") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnCalibratedClassifierCVFloatNoZipMap", + ) @ignore_warnings(category=FutureWarning) def test_model_calibrated_classifier_cv_sigmoid_int(self): @@ -87,38 +98,45 @@ def test_model_calibrated_classifier_cv_sigmoid_int(self): clf = MultinomialNB().fit(X, y) model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y) model_onnx = convert_sklearn( - model, "scikit-learn CalibratedClassifierCVMNB", + model, + "scikit-learn CalibratedClassifierCVMNB", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.int64), model, model_onnx, - basename="SklearnCalibratedClassifierCVInt-Dec4") + X.astype(np.int64), + model, + model_onnx, + basename="SklearnCalibratedClassifierCVInt-Dec4", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") - @ignore_warnings( - category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) def test_model_calibrated_classifier_cv_isotonic_float(self): data = load_iris() X, y = data.data, data.target clf = KNeighborsClassifier().fit(X, y) model = CalibratedClassifierCV(clf, cv=2, method="isotonic").fit(X, y) model_onnx = convert_sklearn( - model, "scikit-learn CalibratedClassifierCVKNN", + model, + "scikit-learn CalibratedClassifierCVKNN", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) try: dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnCalibratedClassifierCVIsotonicFloat") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnCalibratedClassifierCVIsotonicFloat", + ) except Exception as e: - raise AssertionError("Issue with model\n{}".format( - model_onnx)) from e + raise AssertionError("Issue with model\n{}".format(model_onnx)) from e - @ignore_warnings( - category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) def test_model_calibrated_classifier_cv_binary_mnb(self): data = load_iris() X, y = data.data, data.target @@ -126,19 +144,23 @@ def test_model_calibrated_classifier_cv_binary_mnb(self): clf = MultinomialNB().fit(X, y) model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y) model_onnx = convert_sklearn( - model, "scikit-learn CalibratedClassifierCV", + model, + "scikit-learn CalibratedClassifierCV", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnCalibratedClassifierCVBinaryMNB") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnCalibratedClassifierCVBinaryMNB", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") - @ignore_warnings( - category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) def test_model_calibrated_classifier_cv_isotonic_binary_knn(self): data = load_iris() X, y = data.data, data.target @@ -146,184 +168,218 @@ def test_model_calibrated_classifier_cv_isotonic_binary_knn(self): clf = KNeighborsClassifier().fit(X, y) model = CalibratedClassifierCV(clf, cv=2, method="isotonic").fit(X, y) model_onnx = convert_sklearn( - model, "scikit-learn CalibratedClassifierCV", + model, + "scikit-learn CalibratedClassifierCV", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnCalibratedClassifierCVIsotonicBinaryKNN") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnCalibratedClassifierCVIsotonicBinaryKNN", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") - @ignore_warnings( - category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) def test_model_calibrated_classifier_cv_logistic_regression(self): data = load_iris() X, y = data.data, data.target y[y > 1] = 1 model = CalibratedClassifierCV( - base_estimator=LogisticRegression(), method='sigmoid').fit(X, y) + base_estimator=LogisticRegression(), method="sigmoid" + ).fit(X, y) model_onnx = convert_sklearn( - model, "unused", + model, + "unused", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnCalibratedClassifierCVBinaryLogReg") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnCalibratedClassifierCVBinaryLogReg", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") - @ignore_warnings( - category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) def test_model_calibrated_classifier_cv_rf(self): data = load_iris() X, y = data.data, data.target y[y > 1] = 1 model = CalibratedClassifierCV( - base_estimator=RandomForestClassifier(n_estimators=2), - method='sigmoid').fit(X, y) + base_estimator=RandomForestClassifier(n_estimators=2), method="sigmoid" + ).fit(X, y) model_onnx = convert_sklearn( - model, "clarf", + model, + "clarf", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnCalibratedClassifierRF") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnCalibratedClassifierRF", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") - @ignore_warnings( - category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) def test_model_calibrated_classifier_cv_gbt(self): data = load_iris() X, y = data.data, data.target y[y > 1] = 1 model = CalibratedClassifierCV( - base_estimator=GradientBoostingClassifier(n_estimators=2), - method='sigmoid').fit(X, y) + base_estimator=GradientBoostingClassifier(n_estimators=2), method="sigmoid" + ).fit(X, y) model_onnx = convert_sklearn( - model, "clarf", + model, + "clarf", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnCalibratedClassifierGBT") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnCalibratedClassifierGBT", + ) + @unittest.skipIf(HistGradientBoostingClassifier is None, reason="not available") @unittest.skipIf( - HistGradientBoostingClassifier is None, reason="not available") - @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") - @ignore_warnings( - category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) def test_model_calibrated_classifier_cv_hgbt(self): data = load_iris() X, y = data.data, data.target y[y > 1] = 1 model = CalibratedClassifierCV( - base_estimator=HistGradientBoostingClassifier(max_iter=4), - method='sigmoid').fit(X, y) + base_estimator=HistGradientBoostingClassifier(max_iter=4), method="sigmoid" + ).fit(X, y) model_onnx = convert_sklearn( - model, "clarf", + model, + "clarf", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnCalibratedClassifierHGBT") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnCalibratedClassifierHGBT", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") - @ignore_warnings( - category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) def test_model_calibrated_classifier_cv_tree(self): data = load_iris() X, y = data.data, data.target y[y > 1] = 1 model = CalibratedClassifierCV( - base_estimator=DecisionTreeClassifier(), - method='sigmoid').fit(X, y) + base_estimator=DecisionTreeClassifier(), method="sigmoid" + ).fit(X, y) model_onnx = convert_sklearn( - model, "clarf", + model, + "clarf", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnCalibratedClassifierDT") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnCalibratedClassifierDT", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) @unittest.skipIf(apply_less is None, reason="onnxconverter-common old") - @ignore_warnings( - category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) def test_model_calibrated_classifier_cv_svc(self): data = load_iris() X, y = data.data, data.target - model = CalibratedClassifierCV( - base_estimator=SVC(), - method='sigmoid').fit(X, y) + model = CalibratedClassifierCV(base_estimator=SVC(), method="sigmoid").fit(X, y) model_onnx = convert_sklearn( - model, "unused", + model, + "unused", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnCalibratedClassifierSVC") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnCalibratedClassifierSVC", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) @unittest.skipIf(apply_less is None, reason="onnxconverter-common old") - @ignore_warnings( - category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) def test_model_calibrated_classifier_cv_linearsvc(self): data = load_iris() X, y = data.data, data.target model = CalibratedClassifierCV( - base_estimator=LinearSVC(), - method='sigmoid').fit(X, y) + base_estimator=LinearSVC(), method="sigmoid" + ).fit(X, y) model_onnx = convert_sklearn( - model, "unused", + model, + "unused", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnCalibratedClassifierLinearSVC") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnCalibratedClassifierLinearSVC", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) @unittest.skipIf(apply_less is None, reason="onnxconverter-common old") - @ignore_warnings( - category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) def test_model_calibrated_classifier_cv_linearsvc2(self): data = load_iris() X, y = data.data, data.target y[y == 2] = 0 self.assertEqual(len(set(y)), 2) model = CalibratedClassifierCV( - base_estimator=LinearSVC(), - method='sigmoid').fit(X, y) + base_estimator=LinearSVC(), method="sigmoid" + ).fit(X, y) model_onnx = convert_sklearn( - model, "unused", + model, + "unused", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnCalibratedClassifierLinearSVC2") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnCalibratedClassifierLinearSVC2", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) @unittest.skipIf(apply_less is None, reason="onnxconverter-common old") - @ignore_warnings( - category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) def test_model_calibrated_classifier_cv_svc2_binary(self): data = load_iris() X, y = data.data, data.target @@ -335,21 +391,22 @@ def test_model_calibrated_classifier_cv_svc2_binary(self): model_sub.fit(X, y) with self.subTest(model=model_sub): model = CalibratedClassifierCV( - base_estimator=model_sub, cv=2, - method='sigmoid').fit(X, y) + base_estimator=model_sub, cv=2, method="sigmoid" + ).fit(X, y) model_onnx = convert_sklearn( - model, "unused", + model, + "unused", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET, - options={id(model): {'zipmap': False}}) + options={id(model): {"zipmap": False}}, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) if sess is not None: try: - res = sess.run( - None, {'input': X[:5].astype(np.float32)}) + res = sess.run(None, {"input": X[:5].astype(np.float32)}) except RuntimeError as e: raise AssertionError("runtime failed") from e assert_almost_equal(model.predict_proba(X[:5]), res[1]) @@ -357,8 +414,11 @@ def test_model_calibrated_classifier_cv_svc2_binary(self): name = model_sub.__class__.__name__ dump_data_and_model( - X.astype(np.float32)[:10], model, model_onnx, - basename=f"SklearnCalibratedClassifierBinary{name}SVC2") + X.astype(np.float32)[:10], + model, + model_onnx, + basename=f"SklearnCalibratedClassifierBinary{name}SVC2", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_cast_regressor.py b/tests/test_sklearn_cast_regressor.py index d89a9ff2f..a158f1a3f 100644 --- a/tests/test_sklearn_cast_regressor.py +++ b/tests/test_sklearn_cast_regressor.py @@ -13,109 +13,132 @@ from sklearn.pipeline import Pipeline from sklearn.tree import DecisionTreeRegressor from sklearn.preprocessing import StandardScaler + try: from sklearn.compose import ColumnTransformer except ImportError: ColumnTransformer = None from skl2onnx.sklapi import CastRegressor, CastTransformer from skl2onnx import convert_sklearn, to_onnx -from skl2onnx.common.data_types import ( - FloatTensorType, DoubleTensorType) +from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType from test_utils import ( - dump_data_and_model, TARGET_OPSET, - InferenceSessionEx as InferenceSession) + dump_data_and_model, + TARGET_OPSET, + InferenceSessionEx as InferenceSession, +) class TestSklearnCastRegressorConverter(unittest.TestCase): - def common_test_cast_regressor(self, dtype, input_type): model = CastRegressor(DecisionTreeRegressor(max_depth=2), dtype=dtype) - data = numpy.array([[0.1, 0.2, 3.1], [1, 1, 0], - [0, 2, 1], [1, 0, 2], - [0.1, 2.1, 1.1], [1.1, 0.1, 2.2], - [-0.1, -2.1, -1.1], [-1.1, -0.1, -2.2], - [0.2, 2.2, 1.2], [1.2, 0.2, 2.2]], - dtype=numpy.float32) - y = (numpy.sum(data, axis=1, keepdims=0) + - numpy.random.randn(data.shape[0])) + data = numpy.array( + [ + [0.1, 0.2, 3.1], + [1, 1, 0], + [0, 2, 1], + [1, 0, 2], + [0.1, 2.1, 1.1], + [1.1, 0.1, 2.2], + [-0.1, -2.1, -1.1], + [-1.1, -0.1, -2.2], + [0.2, 2.2, 1.2], + [1.2, 0.2, 2.2], + ], + dtype=numpy.float32, + ) + y = numpy.sum(data, axis=1, keepdims=0) + numpy.random.randn(data.shape[0]) model.fit(data, y) pred = model assert pred.dtype == dtype model_onnx = convert_sklearn( - model, "cast", [("input", FloatTensorType([None, 3]))], - target_opset=TARGET_OPSET) + model, + "cast", + [("input", FloatTensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data, model, model_onnx, - basename="SklearnCastRegressor{}".format( - input_type.__class__.__name__)) + data, + model, + model_onnx, + basename="SklearnCastRegressor{}".format(input_type.__class__.__name__), + ) - @unittest.skipIf(pv.Version(ort_version) < pv.Version('0.5.0'), - reason="runtime too old") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("0.5.0"), reason="runtime too old" + ) def test_cast_regressor_float(self): - self.common_test_cast_regressor( - numpy.float32, FloatTensorType) + self.common_test_cast_regressor(numpy.float32, FloatTensorType) - @unittest.skipIf(pv.Version(ort_version) < pv.Version('0.5.0'), - reason="runtime too old") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("0.5.0"), reason="runtime too old" + ) def test_cast_regressor_float64(self): - self.common_test_cast_regressor( - numpy.float64, DoubleTensorType) + self.common_test_cast_regressor(numpy.float64, DoubleTensorType) @unittest.skipIf(TARGET_OPSET < 9, reason="not supported") - @unittest.skipIf(pv.Version(ort_version) < pv.Version('0.5.0'), - reason="runtime too old") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("0.5.0"), reason="runtime too old" + ) def test_pipeline(self): - def maxdiff(a1, a2): d = numpy.abs(a1.ravel() - a2.ravel()) return d.max() X, y = make_regression(10000, 10, random_state=3) - X_train, X_test, y_train, _ = train_test_split( - X, y, random_state=3) + X_train, X_test, y_train, _ = train_test_split(X, y, random_state=3) Xi_train, yi_train = X_train.copy(), y_train.copy() Xi_test = X_test.copy() for i in range(X.shape[1]): - Xi_train[:, i] = (Xi_train[:, i] * math.pi * 2 ** i).astype( - numpy.int64) - Xi_test[:, i] = (Xi_test[:, i] * math.pi * 2 ** i).astype( - numpy.int64) + Xi_train[:, i] = (Xi_train[:, i] * math.pi * 2**i).astype(numpy.int64) + Xi_test[:, i] = (Xi_test[:, i] * math.pi * 2**i).astype(numpy.int64) max_depth = 10 Xi_test = Xi_test.astype(numpy.float32) # model 1 - model1 = Pipeline([ - ('scaler', StandardScaler()), - ('dt', DecisionTreeRegressor(max_depth=max_depth)) - ]) + model1 = Pipeline( + [ + ("scaler", StandardScaler()), + ("dt", DecisionTreeRegressor(max_depth=max_depth)), + ] + ) model1.fit(Xi_train, yi_train) exp1 = model1.predict(Xi_test) - onx1 = to_onnx(model1, X_train[:1].astype(numpy.float32), - target_opset=TARGET_OPSET) + onx1 = to_onnx( + model1, X_train[:1].astype(numpy.float32), target_opset=TARGET_OPSET + ) sess1 = InferenceSession( - onx1.SerializeToString(), - providers=["CPUExecutionProvider"]) - got1 = sess1.run(None, {'X': Xi_test})[0] + onx1.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got1 = sess1.run(None, {"X": Xi_test})[0] md1 = maxdiff(exp1, got1) # model 2 - model2 = Pipeline([ - ('cast64', CastTransformer(dtype=numpy.float64)), - ('scaler', StandardScaler()), - ('cast', CastTransformer()), - ('dt', CastRegressor(DecisionTreeRegressor(max_depth=max_depth), - dtype=numpy.float32)) - ]) + model2 = Pipeline( + [ + ("cast64", CastTransformer(dtype=numpy.float64)), + ("scaler", StandardScaler()), + ("cast", CastTransformer()), + ( + "dt", + CastRegressor( + DecisionTreeRegressor(max_depth=max_depth), dtype=numpy.float32 + ), + ), + ] + ) model2.fit(Xi_train, yi_train) exp2 = model2.predict(Xi_test) - onx = to_onnx(model2, X_train[:1].astype(numpy.float32), - options={StandardScaler: {'div': 'div_cast'}}, - target_opset=TARGET_OPSET) + onx = to_onnx( + model2, + X_train[:1].astype(numpy.float32), + options={StandardScaler: {"div": "div_cast"}}, + target_opset=TARGET_OPSET, + ) sess2 = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got2 = sess2.run(None, {'X': Xi_test})[0] + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got2 = sess2.run(None, {"X": Xi_test})[0] md2 = maxdiff(exp2, got2) assert md2 <= md1 assert md2 <= 0.0 diff --git a/tests/test_sklearn_cast_transformer.py b/tests/test_sklearn_cast_transformer.py index 00cdcb013..e90a376c1 100644 --- a/tests/test_sklearn_cast_transformer.py +++ b/tests/test_sklearn_cast_transformer.py @@ -14,6 +14,7 @@ from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.tree import DecisionTreeRegressor + try: from sklearn.compose import ColumnTransformer except ImportError: @@ -21,131 +22,156 @@ from skl2onnx.sklapi import CastTransformer from skl2onnx import convert_sklearn, to_onnx from skl2onnx.common.data_types import ( - Int64TensorType, FloatTensorType, DoubleTensorType) + Int64TensorType, + FloatTensorType, + DoubleTensorType, +) from test_utils import ( - dump_data_and_model, TARGET_OPSET, - InferenceSessionEx as InferenceSession) + dump_data_and_model, + TARGET_OPSET, + InferenceSessionEx as InferenceSession, +) class TestSklearnCastTransformerConverter(unittest.TestCase): - def common_test_cast_transformer(self, dtype, input_type): - model = Pipeline([ - ('cast', CastTransformer(dtype=dtype)), - ('invcast', CastTransformer(dtype=numpy.float32)), - ]) - data = numpy.array([[0.1, 0.2, 3.1], [1, 1, 0], - [0, 2, 1], [1, 0, 2]], - dtype=numpy.float32) + model = Pipeline( + [ + ("cast", CastTransformer(dtype=dtype)), + ("invcast", CastTransformer(dtype=numpy.float32)), + ] + ) + data = numpy.array( + [[0.1, 0.2, 3.1], [1, 1, 0], [0, 2, 1], [1, 0, 2]], dtype=numpy.float32 + ) model.fit(data) pred = model.steps[0][1].transform(data) assert pred.dtype == dtype model_onnx = convert_sklearn( - model, "cast", [("input", FloatTensorType([None, 3]))], - target_opset=TARGET_OPSET) + model, + "cast", + [("input", FloatTensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data, model, model_onnx, - basename="SklearnCastTransformer{}".format( - input_type.__class__.__name__)) + data, + model, + model_onnx, + basename="SklearnCastTransformer{}".format(input_type.__class__.__name__), + ) - @unittest.skipIf(pv.Version(ort_version) < pv.Version('0.5.0'), - reason="runtime too old") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("0.5.0"), reason="runtime too old" + ) def test_cast_transformer_float(self): - self.common_test_cast_transformer( - numpy.float32, FloatTensorType) + self.common_test_cast_transformer(numpy.float32, FloatTensorType) - @unittest.skipIf(pv.Version(ort_version) < pv.Version('0.5.0'), - reason="runtime too old") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("0.5.0"), reason="runtime too old" + ) def test_cast_transformer_float64(self): - self.common_test_cast_transformer( - numpy.float64, DoubleTensorType) + self.common_test_cast_transformer(numpy.float64, DoubleTensorType) - @unittest.skipIf(pv.Version(ort_version) < pv.Version('0.5.0'), - reason="runtime too old") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("0.5.0"), reason="runtime too old" + ) def test_cast_transformer_int64(self): - self.common_test_cast_transformer( - numpy.int64, Int64TensorType) + self.common_test_cast_transformer(numpy.int64, Int64TensorType) @unittest.skipIf(TARGET_OPSET < 9, reason="not supported") - @unittest.skipIf(pv.Version(ort_version) < pv.Version('0.5.0'), - reason="runtime too old") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("0.5.0"), reason="runtime too old" + ) def test_pipeline(self): - def maxdiff(a1, a2): d = numpy.abs(a1.ravel() - a2.ravel()) return d.max() X, y = make_regression(10000, 10, random_state=3) - X_train, X_test, y_train, _ = train_test_split( - X, y, random_state=3) + X_train, X_test, y_train, _ = train_test_split(X, y, random_state=3) Xi_train, yi_train = X_train.copy(), y_train.copy() Xi_test = X_test.copy() for i in range(X.shape[1]): - Xi_train[:, i] = (Xi_train[:, i] * math.pi * 2 ** i).astype( - numpy.int64) - Xi_test[:, i] = (Xi_test[:, i] * math.pi * 2 ** i).astype( - numpy.int64) + Xi_train[:, i] = (Xi_train[:, i] * math.pi * 2**i).astype(numpy.int64) + Xi_test[:, i] = (Xi_test[:, i] * math.pi * 2**i).astype(numpy.int64) max_depth = 10 Xi_test = Xi_test.astype(numpy.float32) # model 1 - model1 = Pipeline([ - ('scaler', StandardScaler()), - ('dt', DecisionTreeRegressor(max_depth=max_depth)) - ]) + model1 = Pipeline( + [ + ("scaler", StandardScaler()), + ("dt", DecisionTreeRegressor(max_depth=max_depth)), + ] + ) model1.fit(Xi_train, yi_train) exp1 = model1.predict(Xi_test) - onx1 = to_onnx(model1, X_train[:1].astype(numpy.float32), - target_opset=TARGET_OPSET) + onx1 = to_onnx( + model1, X_train[:1].astype(numpy.float32), target_opset=TARGET_OPSET + ) sess1 = InferenceSession( - onx1.SerializeToString(), - providers=["CPUExecutionProvider"]) - got1 = sess1.run(None, {'X': Xi_test})[0] + onx1.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got1 = sess1.run(None, {"X": Xi_test})[0] md1 = maxdiff(exp1, got1) # model 2 - model2 = Pipeline([ - ('cast64', CastTransformer(dtype=numpy.float64)), - ('scaler', StandardScaler()), - ('cast', CastTransformer()), - ('dt', DecisionTreeRegressor(max_depth=max_depth)) - ]) + model2 = Pipeline( + [ + ("cast64", CastTransformer(dtype=numpy.float64)), + ("scaler", StandardScaler()), + ("cast", CastTransformer()), + ("dt", DecisionTreeRegressor(max_depth=max_depth)), + ] + ) model2.fit(Xi_train, yi_train) exp2 = model2.predict(Xi_test) - onx = to_onnx(model2, X_train[:1].astype(numpy.float32), - options={StandardScaler: {'div': 'div_cast'}}, - target_opset=TARGET_OPSET) + onx = to_onnx( + model2, + X_train[:1].astype(numpy.float32), + options={StandardScaler: {"div": "div_cast"}}, + target_opset=TARGET_OPSET, + ) sess2 = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got2 = sess2.run(None, {'X': Xi_test})[0] + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got2 = sess2.run(None, {"X": Xi_test})[0] md2 = maxdiff(exp2, got2) assert md2 <= md1 assert md2 <= 0.01 - @unittest.skipIf(ColumnTransformer is None, - reason="scikit-learn too old") + @unittest.skipIf(ColumnTransformer is None, reason="scikit-learn too old") def test_cast_transformer_dataframe(self): - model = Pipeline([ - ('prep', ColumnTransformer([ - ('prep1', CastTransformer(), [0, 1]), - ('prep2', CastTransformer(), [2]), - ])), - ('invcast', CastTransformer(dtype=numpy.float32)), - ]) - data = numpy.array([[0.1, 0.2, 3.4], [1, 1, 0], - [0, 2, 1], [1, 0, 2]], - dtype=numpy.float32) + model = Pipeline( + [ + ( + "prep", + ColumnTransformer( + [ + ("prep1", CastTransformer(), [0, 1]), + ("prep2", CastTransformer(), [2]), + ] + ), + ), + ("invcast", CastTransformer(dtype=numpy.float32)), + ] + ) + data = numpy.array( + [[0.1, 0.2, 3.4], [1, 1, 0], [0, 2, 1], [1, 0, 2]], dtype=numpy.float32 + ) data = DataFrame(data) model.fit(data) model_onnx = convert_sklearn( - model, "cast", [("input", FloatTensorType([None, 3]))], - target_opset=TARGET_OPSET) + model, + "cast", + [("input", FloatTensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data.values, model, model_onnx, - basename="SklearnCastTransformerCT") + data.values, model, model_onnx, basename="SklearnCastTransformerCT" + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_concat.py b/tests/test_sklearn_concat.py index 2e267759e..11214848b 100644 --- a/tests/test_sklearn_concat.py +++ b/tests/test_sklearn_concat.py @@ -7,65 +7,69 @@ import numpy as np from sklearn.tree import DecisionTreeRegressor from sklearn.model_selection import train_test_split + try: from sklearn.compose import ColumnTransformer except ImportError: ColumnTransformer = None from sklearn.pipeline import Pipeline -from sklearn.preprocessing import ( - StandardScaler, - OneHotEncoder, - FunctionTransformer -) +from sklearn.preprocessing import StandardScaler, OneHotEncoder, FunctionTransformer from skl2onnx import convert_sklearn from skl2onnx.common.data_types import ( - BooleanTensorType, FloatTensorType, - Int64TensorType, StringTensorType) + BooleanTensorType, + FloatTensorType, + Int64TensorType, + StringTensorType, +) from test_utils import TARGET_OPSET def _column_tranformer_fitted_from_df(data): def transformer_for_column(column: pd.Series): - if column.dtype in ['float64', 'float32', 'int64']: + if column.dtype in ["float64", "float32", "int64"]: return StandardScaler() - if column.dtype in ['bool']: - return 'passthrough' - if column.dtype in ['O']: + if column.dtype in ["bool"]: + return "passthrough" + if column.dtype in ["O"]: try: - return OneHotEncoder(drop='first') + return OneHotEncoder(drop="first") except TypeError: # older version of scikit-learn return OneHotEncoder() raise ValueError( - 'Unexpected column dtype for {column.name}:{column.dtype}'.format( - column=column)) + "Unexpected column dtype for {column.name}:{column.dtype}".format( + column=column + ) + ) return ColumnTransformer( - [(col, transformer_for_column( - data[col]), [col]) for col in data.columns], - remainder='drop' + [(col, transformer_for_column(data[col]), [col]) for col in data.columns], + remainder="drop", ).fit(data) def _convert_dataframe_schema(data): def type_for_column(column: pd.Series): - if column.dtype in ['float64', 'float32']: + if column.dtype in ["float64", "float32"]: return FloatTensorType([None, 1]) - if column.dtype in ['int64']: + if column.dtype in ["int64"]: return Int64TensorType([None, 1]) - if column.dtype in ['bool']: + if column.dtype in ["bool"]: return BooleanTensorType([None, 1]) - if column.dtype in ['O']: + if column.dtype in ["O"]: return StringTensorType([None, 1]) raise ValueError( - 'Unexpected column dtype for {column.name}:{column.dtype}'.format( - column=column)) + "Unexpected column dtype for {column.name}:{column.dtype}".format( + column=column + ) + ) + return [(col, type_for_column(data[col])) for col in data.columns] def _predict(session: rt.InferenceSession, data: pd.DataFrame) -> pd.Series: def _correctly_typed_column(column: pd.Series) -> pd.Series: - if column.dtype in ['float64']: + if column.dtype in ["float64"]: return column.astype(np.float32) return column @@ -77,78 +81,68 @@ def _correctly_shaped_values(values): for c in data.columns } - return pd.Series( - session.run(None, inputs)[0].reshape(-1), - index=data.index - ) + return pd.Series(session.run(None, inputs)[0].reshape(-1), index=data.index) class TestSklearnPipeline(unittest.TestCase): - @unittest.skipIf(ColumnTransformer is None, reason="too old scikit-learn") def test_concat(self): - data = os.path.join(os.path.dirname(__file__), - "datasets", "small_titanic.csv") + data = os.path.join(os.path.dirname(__file__), "datasets", "small_titanic.csv") data = pd.read_csv(data) - data['female'] = data['sex'] == 'female' - data = data[['age', 'fare', 'female', 'embarked', - 'pclass', 'survived']] + data["female"] = data["sex"] == "female" + data = data[["age", "fare", "female", "embarked", "pclass", "survived"]] for col in data: dtype = data[col].dtype - if dtype in ['float64', 'float32']: - data[col].fillna(0., inplace=True) - if dtype in ['int64']: + if dtype in ["float64", "float32"]: + data[col].fillna(0.0, inplace=True) + if dtype in ["int64"]: data[col].fillna(0, inplace=True) - elif dtype in ['O']: - data[col].fillna('N/A', inplace=True) + elif dtype in ["O"]: + data[col].fillna("N/A", inplace=True) - full_df = data.drop('survived', axis=1) - full_labels = data['survived'] + full_df = data.drop("survived", axis=1) + full_labels = data["survived"] train_df, test_df, train_labels, test_labels = train_test_split( - full_df, full_labels, test_size=.2, random_state=0) + full_df, full_labels, test_size=0.2, random_state=0 + ) col_transformer = _column_tranformer_fitted_from_df(full_df) regressor = DecisionTreeRegressor(random_state=0) - regressor.fit( - col_transformer.transform(train_df), - train_labels) + regressor.fit(col_transformer.transform(train_df), train_labels) model = Pipeline( - steps=[('preprocessor', col_transformer), - ('regressor', regressor)]) + steps=[("preprocessor", col_transformer), ("regressor", regressor)] + ) initial_types = _convert_dataframe_schema(full_df) itypes = set(_[1].__class__ for _ in initial_types) self.assertIn(BooleanTensorType, itypes) self.assertIn(FloatTensorType, itypes) - onx = convert_sklearn(model, initial_types=initial_types, - target_opset=TARGET_OPSET) + onx = convert_sklearn( + model, initial_types=initial_types, target_opset=TARGET_OPSET + ) session = rt.InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) pred_skl = model.predict(test_df) pred_onx = _predict(session, test_df) - diff = np.sort( - np.abs(np.squeeze(pred_skl) - np.squeeze(pred_onx))) + diff = np.sort(np.abs(np.squeeze(pred_skl) - np.squeeze(pred_onx))) if diff[0] != diff[-1]: raise AssertionError( - "Discrepencies\nSKL\n{}\nORT\n{}".format(pred_skl, pred_onx)) + "Discrepencies\nSKL\n{}\nORT\n{}".format(pred_skl, pred_onx) + ) class TestConcatOutputType(unittest.TestCase): - @unittest.skipIf(ColumnTransformer is None, reason="too old scikit-learn") def test_concat_output_type(self): # create sample dataset - data_dict = { - 'a': [1, 2, 3], - 'b': [1.5, 2.6, 5.2] - } + data_dict = {"a": [1, 2, 3], "b": [1.5, 2.6, 5.2]} # load to dataframe data = pd.DataFrame.from_dict(data_dict) @@ -159,7 +153,7 @@ def test_concat_output_type(self): col_transformer = ColumnTransformer( transformers=[ ("a", FunctionTransformer(), ["a"]), - ("b", StandardScaler(), ["b"]) + ("b", StandardScaler(), ["b"]), ], ) @@ -170,8 +164,9 @@ def test_concat_output_type(self): initial_types = _convert_dataframe_schema(data) # convert to onnx - onx = convert_sklearn(col_transformer, initial_types=initial_types, - target_opset=TARGET_OPSET) + onx = convert_sklearn( + col_transformer, initial_types=initial_types, target_opset=TARGET_OPSET + ) # make sure that the output of the concat is a float # we are concatenating an `int` with a `float`, and diff --git a/tests/test_sklearn_constant_predictor.py b/tests/test_sklearn_constant_predictor.py index f2846b654..4844a3d4d 100644 --- a/tests/test_sklearn_constant_predictor.py +++ b/tests/test_sklearn_constant_predictor.py @@ -10,10 +10,7 @@ from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType -from test_utils import ( - dump_data_and_model, - TARGET_OPSET -) +from test_utils import dump_data_and_model, TARGET_OPSET ort_version = ".".join(ort_version.split(".")[:2]) @@ -27,14 +24,20 @@ def test_constant_predictor_float(self): test_x = np.array([[1, 0], [2, 8]]) model_onnx = to_onnx( - model, "scikit-learn ConstantPredictor", + model, + "scikit-learn ConstantPredictor", initial_types=[("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET, - options={'zipmap': False}) + options={"zipmap": False}, + ) self.assertIsNotNone(model_onnx is not None) - dump_data_and_model(test_x.astype(np.float32), model, model_onnx, - basename="SklearnConstantPredictorFloat") + dump_data_and_model( + test_x.astype(np.float32), + model, + model_onnx, + basename="SklearnConstantPredictorFloat", + ) def test_constant_predictor_double(self): model = _ConstantPredictor() @@ -44,14 +47,20 @@ def test_constant_predictor_double(self): test_x = np.array([[1, 0], [2, 8]]) model_onnx = to_onnx( - model, "scikit-learn ConstantPredictor", + model, + "scikit-learn ConstantPredictor", initial_types=[("input", DoubleTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET, - options={'zipmap': False}) + options={"zipmap": False}, + ) self.assertIsNotNone(model_onnx is not None) - dump_data_and_model(test_x.astype(np.float64), model, model_onnx, - basename="SklearnConstantPredictorDouble") + dump_data_and_model( + test_x.astype(np.float64), + model, + model_onnx, + basename="SklearnConstantPredictorDouble", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_count_vectorizer_converter.py b/tests/test_sklearn_count_vectorizer_converter.py index 3ef41680e..eb7f1cc66 100644 --- a/tests/test_sklearn_count_vectorizer_converter.py +++ b/tests/test_sklearn_count_vectorizer_converter.py @@ -12,106 +12,123 @@ class TestSklearnCountVectorizer(unittest.TestCase): - @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_count_vectorizer11(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vect = CountVectorizer(ngram_range=(1, 1)) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, "CountVectorizer", - [("input", StringTensorType([1]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "CountVectorizer", + [("input", StringTensorType([1]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, - vect, - model_onnx, - basename="SklearnCountVectorizer11-OneOff-SklCol") + corpus, vect, model_onnx, basename="SklearnCountVectorizer11-OneOff-SklCol" + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_count_vectorizer22(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vect = CountVectorizer(ngram_range=(2, 2)) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, "CountVectorizer", - [("input", StringTensorType([1]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "CountVectorizer", + [("input", StringTensorType([1]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, - vect, - model_onnx, - basename="SklearnCountVectorizer22-OneOff-SklCol") + corpus, vect, model_onnx, basename="SklearnCountVectorizer22-OneOff-SklCol" + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_count_vectorizer12(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vect = CountVectorizer(ngram_range=(1, 2)) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, "CountVectorizer", - [("input", StringTensorType([1]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "CountVectorizer", + [("input", StringTensorType([1]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, - vect, - model_onnx, - basename="SklearnCountVectorizer12-OneOff-SklCol") + corpus, vect, model_onnx, basename="SklearnCountVectorizer12-OneOff-SklCol" + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_count_vectorizer13(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vect = CountVectorizer(ngram_range=(1, 3)) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, "CountVectorizer", - [("input", StringTensorType([1]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "CountVectorizer", + [("input", StringTensorType([1]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, - vect, - model_onnx, - basename="SklearnCountVectorizer13-OneOff-SklCol") + corpus, vect, model_onnx, basename="SklearnCountVectorizer13-OneOff-SklCol" + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_count_vectorizer_binary(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vect = CountVectorizer(binary=True) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, "CountVectorizer", - [("input", StringTensorType([1]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "CountVectorizer", + [("input", StringTensorType([1]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( corpus, vect, model_onnx, - basename="SklearnCountVectorizerBinary-OneOff-SklCol") + basename="SklearnCountVectorizerBinary-OneOff-SklCol", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_count_vectorizer_converter_bug.py b/tests/test_sklearn_count_vectorizer_converter_bug.py index 5f35035c9..73b4fea1d 100644 --- a/tests/test_sklearn_count_vectorizer_converter_bug.py +++ b/tests/test_sklearn_count_vectorizer_converter_bug.py @@ -12,60 +12,69 @@ class TestSklearnCountVectorizerBug(unittest.TestCase): - @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_count_vectorizer_custom_tokenizer(self): - corpus = numpy.array([ - '9999', - '999 99', - '1234', - '1 2 3 4', - '1 2 3 4+', - ]).reshape((5, 1)) - vect = CountVectorizer(ngram_range=(1, 1), - tokenizer=lambda s: [s]) + corpus = numpy.array( + [ + "9999", + "999 99", + "1234", + "1 2 3 4", + "1 2 3 4+", + ] + ).reshape((5, 1)) + vect = CountVectorizer(ngram_range=(1, 1), tokenizer=lambda s: [s]) vect.fit(corpus.ravel()) - extra = { - CountVectorizer: { - "separators": ["ZZZZ"] - } - } + extra = {CountVectorizer: {"separators": ["ZZZZ"]}} prev = vect.tokenizer vect.tokenizer = None - model_onnx = convert_sklearn(vect, 'CountVectorizer', - [('input', StringTensorType([1]))], - options=extra, - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "CountVectorizer", + [("input", StringTensorType([1]))], + options=extra, + target_opset=TARGET_OPSET, + ) vect.tokenizer = prev self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer11CustomTokenizer-OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer11CustomTokenizer-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_count_vectorizer_wrong_ngram(self): - corpus = numpy.array([ - 'A AABBB0', - 'AAABB B1', - 'AA ABBB2', - 'AAAB BB3', - 'AAA BBB4', - ]).reshape((5, 1)) - vect = TfidfVectorizer(ngram_range=(1, 2), - token_pattern=r"(?u)\b\w\w+\b") + corpus = numpy.array( + [ + "A AABBB0", + "AAABB B1", + "AA ABBB2", + "AAAB BB3", + "AAA BBB4", + ] + ).reshape((5, 1)) + vect = TfidfVectorizer(ngram_range=(1, 2), token_pattern=r"(?u)\b\w\w+\b") vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer12Wngram-OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer12Wngram-OneOff-SklCol", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_custom_nmf.py b/tests/test_sklearn_custom_nmf.py index f9ffabdd7..d24247155 100644 --- a/tests/test_sklearn_custom_nmf.py +++ b/tests/test_sklearn_custom_nmf.py @@ -5,8 +5,7 @@ import numpy as np from sklearn.decomposition import NMF from skl2onnx.common.data_types import FloatTensorType -from skl2onnx.algebra.onnx_ops import ( - OnnxArrayFeatureExtractor, OnnxMul, OnnxReduceSum) +from skl2onnx.algebra.onnx_ops import OnnxArrayFeatureExtractor, OnnxMul, OnnxReduceSum from onnxruntime import InferenceSession from test_utils import TARGET_OPSET @@ -14,10 +13,11 @@ class TestSklearnCustomNMF(unittest.TestCase): @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_custom_nmf(self): - - mat = np.array([[1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], - [1, 0, 0, 0], [0, 0, 1, 0]], dtype=np.float64) - mat[:mat.shape[1], :] += np.identity(mat.shape[1]) + mat = np.array( + [[1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], [0, 0, 1, 0]], + dtype=np.float64, + ) + mat[: mat.shape[1], :] += np.identity(mat.shape[1]) mod = NMF(n_components=2, max_iter=2) W = mod.fit_transform(mat) @@ -46,27 +46,24 @@ def nmf_to_onnx(W, H): and returns the predictions for it. It assumes these indices applies on the training data. """ - col = OnnxArrayFeatureExtractor(H, 'col') - row = OnnxArrayFeatureExtractor(W.T, 'row') + col = OnnxArrayFeatureExtractor(H, "col") + row = OnnxArrayFeatureExtractor(W.T, "row") dot = OnnxMul(col, row, op_version=TARGET_OPSET) - res = OnnxReduceSum(dot, output_names="rec", - op_version=TARGET_OPSET) + res = OnnxReduceSum(dot, output_names="rec", op_version=TARGET_OPSET) indices_type = np.array([0], dtype=np.int64) - onx = res.to_onnx(inputs={'col': indices_type, - 'row': indices_type}, - outputs=[('rec', FloatTensorType((None, 1)))]) + onx = res.to_onnx( + inputs={"col": indices_type, "row": indices_type}, + outputs=[("rec", FloatTensorType((None, 1)))], + ) return onx - model_onnx = nmf_to_onnx(W.astype(np.float32), - H.astype(np.float32)) + model_onnx = nmf_to_onnx(W.astype(np.float32), H.astype(np.float32)) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) def predict_onnx(sess, row_indices, col_indices): - res = sess.run(None, - {'col': col_indices, - 'row': row_indices}) + res = sess.run(None, {"col": col_indices, "row": row_indices}) return res onnx_preds = [] diff --git a/tests/test_sklearn_decision_tree_converters.py b/tests/test_sklearn_decision_tree_converters.py index e61d3dc59..4ecc9c582 100644 --- a/tests/test_sklearn_decision_tree_converters.py +++ b/tests/test_sklearn_decision_tree_converters.py @@ -7,8 +7,10 @@ from numpy.testing import assert_almost_equal from pandas import DataFrame from sklearn.tree import ( - DecisionTreeClassifier, DecisionTreeRegressor, - ExtraTreeClassifier, ExtraTreeRegressor + DecisionTreeClassifier, + DecisionTreeRegressor, + ExtraTreeClassifier, + ExtraTreeRegressor, ) from sklearn.datasets import make_classification from skl2onnx.common.data_types import ( @@ -35,26 +37,27 @@ ) -ort_version = ort_version.split('+')[0] +ort_version = ort_version.split("+")[0] class TestSklearnDecisionTreeModels(unittest.TestCase): @unittest.skipIf( pv.Version(ort_version) <= pv.Version("0.3.0"), - reason="No suitable kernel definition found " - "for op Cast(9) (node Cast)") + reason="No suitable kernel definition found " "for op Cast(9) (node Cast)", + ) def test_decisiontree_classifier1(self): model = DecisionTreeClassifier(max_depth=2) X, y = make_classification(10, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) - initial_types = [('input', FloatTensorType((None, X.shape[1])))] - model_onnx = convert_sklearn(model, initial_types=initial_types, - target_opset=TARGET_OPSET) + initial_types = [("input", FloatTensorType((None, X.shape[1])))] + model_onnx = convert_sklearn( + model, initial_types=initial_types, target_opset=TARGET_OPSET + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X.astype(np.float32)}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(np.float32)}) pred = model.predict_proba(X) if res[1][0][0] != pred[0, 0]: raise AssertionError("{}\n--\n{}".format(pred, DataFrame(res[1]))) @@ -64,13 +67,14 @@ def test_decisiontree_regressor0(self): X, y = make_classification(10, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) - initial_types = [('input', FloatTensorType((None, X.shape[1])))] - model_onnx = convert_sklearn(model, initial_types=initial_types, - target_opset=TARGET_OPSET) + initial_types = [("input", FloatTensorType((None, X.shape[1])))] + model_onnx = convert_sklearn( + model, initial_types=initial_types, target_opset=TARGET_OPSET + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X.astype(np.float32)}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(np.float32)}) pred = model.predict(X) if res[0][0, 0] != pred[0]: raise AssertionError("{}\n--\n{}".format(pred, DataFrame(res[1]))) @@ -81,15 +85,17 @@ def test_decisiontree_regressor_decision_path(self): X, y = make_classification(10, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) - initial_types = [('input', FloatTensorType((None, X.shape[1])))] + initial_types = [("input", FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn( - model, initial_types=initial_types, - options={id(model): {'decision_path': True}}, - target_opset=TARGET_OPSET) + model, + initial_types=initial_types, + options={id(model): {"decision_path": True}}, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X.astype(np.float32)}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(np.float32)}) pred = model.predict(X) assert_almost_equal(pred, res[0].ravel()) dec = model.decision_path(X) @@ -102,15 +108,17 @@ def test_decisiontree_regressor_decision_leaf(self): X, y = make_classification(10, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) - initial_types = [('input', FloatTensorType((None, X.shape[1])))] + initial_types = [("input", FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn( - model, initial_types=initial_types, - options={id(model): {'decision_leaf': True}}, - target_opset=TARGET_OPSET) + model, + initial_types=initial_types, + options={id(model): {"decision_leaf": True}}, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X.astype(np.float32)}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(np.float32)}) pred = model.predict(X) assert_almost_equal(pred, res[0].ravel()) dec = model.decision_path(X) @@ -123,16 +131,17 @@ def test_decisiontree_regressor_decision_path_leaf(self): X, y = make_classification(10, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) - initial_types = [('input', FloatTensorType((None, X.shape[1])))] + initial_types = [("input", FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn( - model, initial_types=initial_types, - options={id(model): {'decision_leaf': True, - 'decision_path': True}}, - target_opset=TARGET_OPSET) + model, + initial_types=initial_types, + options={id(model): {"decision_leaf": True, "decision_path": True}}, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X.astype(np.float32)}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(np.float32)}) pred = model.predict(X) assert_almost_equal(pred, res[0].ravel()) dec = model.decision_path(X) @@ -147,15 +156,17 @@ def test_decisiontree_classifier_decision_path(self): X, y = make_classification(10, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) - initial_types = [('input', FloatTensorType((None, X.shape[1])))] + initial_types = [("input", FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn( - model, initial_types=initial_types, - options={id(model): {'decision_path': True, 'zipmap': False}}, - target_opset=TARGET_OPSET) + model, + initial_types=initial_types, + options={id(model): {"decision_path": True, "zipmap": False}}, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X.astype(np.float32)}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(np.float32)}) pred = model.predict(X) assert_almost_equal(pred, res[0].ravel()) prob = model.predict_proba(X) @@ -170,15 +181,17 @@ def test_decisiontree_classifier_decision_leaf(self): X, y = make_classification(10, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) - initial_types = [('input', FloatTensorType((None, X.shape[1])))] + initial_types = [("input", FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn( - model, initial_types=initial_types, - options={id(model): {'decision_leaf': True, 'zipmap': False}}, - target_opset=TARGET_OPSET) + model, + initial_types=initial_types, + options={id(model): {"decision_leaf": True, "zipmap": False}}, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X.astype(np.float32)}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(np.float32)}) pred = model.predict(X) assert_almost_equal(pred, res[0].ravel()) prob = model.predict_proba(X) @@ -193,16 +206,23 @@ def test_decisiontree_classifier_decision_path_leaf(self): X, y = make_classification(10, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) - initial_types = [('input', FloatTensorType((None, X.shape[1])))] + initial_types = [("input", FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn( - model, initial_types=initial_types, - options={id(model): {'decision_leaf': True, 'decision_path': True, - 'zipmap': False}}, - target_opset=TARGET_OPSET) + model, + initial_types=initial_types, + options={ + id(model): { + "decision_leaf": True, + "decision_path": True, + "zipmap": False, + } + }, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X.astype(np.float32)}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(np.float32)}) pred = model.predict(X) assert_almost_equal(pred, res[0].ravel()) prob = model.predict_proba(X) @@ -241,84 +261,109 @@ def test_extra_tree_regressor(self): def test_decision_tree_regressor_int(self): model, X = fit_regression_model( - DecisionTreeRegressor(random_state=42), is_int=True) + DecisionTreeRegressor(random_state=42), is_int=True + ) model_onnx = convert_sklearn( - model, "decision tree regression", + model, + "decision tree regression", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnDecisionTreeRegressionInt") + X, model, model_onnx, basename="SklearnDecisionTreeRegressionInt" + ) def test_model_multi_class_nocl(self): model, X = fit_classification_model( - DecisionTreeClassifier(), - 4, label_string=True) + DecisionTreeClassifier(), 4, label_string=True + ) model_onnx = convert_sklearn( - model, "multi-class nocl", + model, + "multi-class nocl", [("input", FloatTensorType([None, X.shape[1]]))], - options={id(model): {'nocl': True}}, - target_opset=TARGET_OPSET) + options={id(model): {"nocl": True}}, + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) sonx = str(model_onnx) - assert 'classlabels_strings' not in sonx - assert 'cl0' not in sonx + assert "classlabels_strings" not in sonx + assert "cl0" not in sonx dump_data_and_model( - X, model, model_onnx, classes=model.classes_, - basename="SklearnDTMultiNoCl") + X, model, model_onnx, classes=model.classes_, basename="SklearnDTMultiNoCl" + ) def test_model_decision_tree_classifier_multilabel(self): model, X_test = fit_multilabel_classification_model( - DecisionTreeClassifier(random_state=42)) - options = {id(model): {'zipmap': False}} + DecisionTreeClassifier(random_state=42) + ) + options = {id(model): {"zipmap": False}} model_onnx = convert_sklearn( - model, "scikit-learn DecisionTreeClassifier", + model, + "scikit-learn DecisionTreeClassifier", [("input", FloatTensorType([None, X_test.shape[1]]))], - options=options, target_opset=TARGET_OPSET) + options=options, + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - assert 'zipmap' not in str(model_onnx).lower() + assert "zipmap" not in str(model_onnx).lower() dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnDecisionTreeClassifierMultiLabel-Out0") + X_test, + model, + model_onnx, + basename="SklearnDecisionTreeClassifierMultiLabel-Out0", + ) def test_model_extra_tree_classifier_multilabel(self): model, X_test = fit_multilabel_classification_model( - ExtraTreeClassifier(random_state=42)) - options = {id(model): {'zipmap': False}} + ExtraTreeClassifier(random_state=42) + ) + options = {id(model): {"zipmap": False}} model_onnx = convert_sklearn( - model, "scikit-learn ExtraTreeClassifier", + model, + "scikit-learn ExtraTreeClassifier", [("input", FloatTensorType([None, X_test.shape[1]]))], - options=options, target_opset=TARGET_OPSET) + options=options, + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - assert 'zipmap' not in str(model_onnx).lower() + assert "zipmap" not in str(model_onnx).lower() dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnExtraTreeClassifierMultiLabel-Out0") + X_test, + model, + model_onnx, + basename="SklearnExtraTreeClassifierMultiLabel-Out0", + ) def test_decision_tree_regressor_bool(self): model, X = fit_regression_model( - DecisionTreeRegressor(random_state=42), is_bool=True) + DecisionTreeRegressor(random_state=42), is_bool=True + ) model_onnx = convert_sklearn( - model, "decision tree regressor", + model, + "decision tree regressor", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnDecisionTreeRegressionBool-Dec4") + X, model, model_onnx, basename="SklearnDecisionTreeRegressionBool-Dec4" + ) def test_extra_tree_regressor_bool(self): model, X = fit_regression_model( - ExtraTreeRegressor(random_state=42), is_bool=True) + ExtraTreeRegressor(random_state=42), is_bool=True + ) model_onnx = convert_sklearn( - model, "extra tree regressor", + model, + "extra tree regressor", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnExtraTreeRegressionBool-Dec4") + X, model, model_onnx, basename="SklearnExtraTreeRegressionBool-Dec4" + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_dict_vectorizer_converter.py b/tests/test_sklearn_dict_vectorizer_converter.py index 44ff84d51..da28ab72c 100644 --- a/tests/test_sklearn_dict_vectorizer_converter.py +++ b/tests/test_sklearn_dict_vectorizer_converter.py @@ -15,11 +15,14 @@ StringTensorType, FloatTensorType, Int64TensorType, - BooleanTensorType) -from skl2onnx.common.data_types import onnx_built_with_ml + BooleanTensorType, +) from test_utils import ( - dump_data_and_model, TARGET_OPSET, - InferenceSessionEx as InferenceSession) + dump_data_and_model, + TARGET_OPSET, + InferenceSessionEx as InferenceSession, +) + try: from onnxruntime.capi.onnxruntime_pybind11_state import InvalidArgument from onnxruntime.capi.onnxruntime_pybind11_state import InvalidGraph @@ -34,15 +37,20 @@ def test_model_dict_vectorizer(self): data = [{"amy": 1.0, "chin": 200.0}, {"nice": 3.0, "amy": 1.0}] model.fit_transform(data) model_onnx = convert_sklearn( - model, "dictionary vectorizer", - [( - "input", - DictionaryType(StringTensorType([1]), FloatTensorType([1])), - )], target_opset=TARGET_OPSET) + model, + "dictionary vectorizer", + [ + ( + "input", + DictionaryType(StringTensorType([1]), FloatTensorType([1])), + ) + ], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data, model, model_onnx, - basename="SklearnDictVectorizer-OneOff-SkipDim1") + data, model, model_onnx, basename="SklearnDictVectorizer-OneOff-SkipDim1" + ) def test_model_dict_vectorizer_sort_false(self): model = DictVectorizer(sparse=False, sort=False) @@ -51,73 +59,91 @@ def test_model_dict_vectorizer_sort_false(self): model_onnx = convert_sklearn( model, "dictionary vectorizer", - [( - "input", - DictionaryType(Int64TensorType([1]), FloatTensorType([1])), - )], target_opset=TARGET_OPSET) + [ + ( + "input", + DictionaryType(Int64TensorType([1]), FloatTensorType([1])), + ) + ], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data, model, model_onnx, - basename="SklearnDictVectorizerSortFalse-OneOff-SkipDim1") + data, + model, + model_onnx, + basename="SklearnDictVectorizerSortFalse-OneOff-SkipDim1", + ) def test_model_dict_vectorizer_issue(self): - key_value_map = [{1: 'A', 2: 'B'}, {1: 'C', 3: 'D'}, - {1: 'C', 3: 'A'}] + key_value_map = [{1: "A", 2: "B"}, {1: "C", 3: "D"}, {1: "C", 3: "A"}] model = DictVectorizer(sparse=False).fit(key_value_map) with self.assertRaises(RuntimeError): convert_sklearn( - model, 'dv', - [("input", DictionaryType(Int64TensorType([1]), - StringTensorType([1])))], - target_opset=TARGET_OPSET) + model, + "dv", + [ + ( + "input", + DictionaryType(Int64TensorType([1]), StringTensorType([1])), + ) + ], + target_opset=TARGET_OPSET, + ) - @unittest.skipIf(not onnx_built_with_ml(), - reason="Requires ONNX-ML extension.") def test_model_dict_vectorizer_pipeline_float(self): - data = [{'ALL_LOWER': 1, 'NEXT_ALL_LOWER': 1}, - {'PREV_ALL_LOWER': 1, 'ALL_LOWER': 1, 'NEXT_ALL_LOWER': 1}, - {'PREV_ALL_LOWER': 1, 'ALL_LOWER': 1, 'NEXT_ALL_LOWER': 1}, - {'PREV_ALL_LOWER': 1, 'ALL_LOWER': 1, 'NEXT_ALL_LOWER': 1}] + data = [ + {"ALL_LOWER": 1, "NEXT_ALL_LOWER": 1}, + {"PREV_ALL_LOWER": 1, "ALL_LOWER": 1, "NEXT_ALL_LOWER": 1}, + {"PREV_ALL_LOWER": 1, "ALL_LOWER": 1, "NEXT_ALL_LOWER": 1}, + {"PREV_ALL_LOWER": 1, "ALL_LOWER": 1, "NEXT_ALL_LOWER": 1}, + ] model = make_pipeline(DictVectorizer(sparse=False), StandardScaler()) model.fit(data) expected = model.transform(data) model_onnx = convert_sklearn( - model, 'dv', - [("input", DictionaryType(StringTensorType([1]), - FloatTensorType([1])))], - target_opset=TARGET_OPSET) + model, + "dv", + [("input", DictionaryType(StringTensorType([1]), FloatTensorType([1])))], + target_opset=TARGET_OPSET, + ) onnx.checker.check_model(model_onnx) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - inp = {'ALL_LOWER': numpy.array([1], dtype=numpy.float32), - 'NEXT_ALL_LOWER': numpy.array([1], dtype=numpy.float32)} - res = sess.run(None, {'input': inp}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + inp = { + "ALL_LOWER": numpy.array([1], dtype=numpy.float32), + "NEXT_ALL_LOWER": numpy.array([1], dtype=numpy.float32), + } + res = sess.run(None, {"input": inp}) assert_almost_equal(expected[0].ravel(), res[0].ravel()) - @unittest.skipIf(not onnx_built_with_ml(), - reason="Requires ONNX-ML extension.") def test_model_dict_vectorizer_pipeline_int(self): - data = [{'ALL_LOWER': 1, 'NEXT_ALL_LOWER': 1}, - {'PREV_ALL_LOWER': 1, 'ALL_LOWER': 1, 'NEXT_ALL_LOWER': 1}, - {'PREV_ALL_LOWER': 1, 'ALL_LOWER': 1, 'NEXT_ALL_LOWER': 1}, - {'PREV_ALL_LOWER': 1, 'ALL_LOWER': 1, 'NEXT_ALL_LOWER': 1}] + data = [ + {"ALL_LOWER": 1, "NEXT_ALL_LOWER": 1}, + {"PREV_ALL_LOWER": 1, "ALL_LOWER": 1, "NEXT_ALL_LOWER": 1}, + {"PREV_ALL_LOWER": 1, "ALL_LOWER": 1, "NEXT_ALL_LOWER": 1}, + {"PREV_ALL_LOWER": 1, "ALL_LOWER": 1, "NEXT_ALL_LOWER": 1}, + ] model = make_pipeline(DictVectorizer(sparse=False), StandardScaler()) model.fit(data) # expected = model.transform(data) model_onnx = convert_sklearn( - model, 'dv', - [("input", DictionaryType(StringTensorType([1]), - Int64TensorType([1])))], - target_opset=TARGET_OPSET) + model, + "dv", + [("input", DictionaryType(StringTensorType([1]), Int64TensorType([1])))], + target_opset=TARGET_OPSET, + ) onnx.checker.check_model(model_onnx) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - inp = {'ALL_LOWER': numpy.array(1, dtype=numpy.int64), - 'NEXT_ALL_LOWER': numpy.array(1, dtype=numpy.int64)} + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + inp = { + "ALL_LOWER": numpy.array(1, dtype=numpy.int64), + "NEXT_ALL_LOWER": numpy.array(1, dtype=numpy.int64), + } try: - got = sess.run(None, {'input': inp}) + got = sess.run(None, {"input": inp}) except InvalidArgument: return self.assertTrue(got is not None) @@ -125,33 +151,32 @@ def test_model_dict_vectorizer_pipeline_int(self): expected = model.transform(data) assert_almost_equal(expected[0], res) - @unittest.skipIf(not onnx_built_with_ml(), - reason="Requires ONNX-ML extension.") def test_model_dict_vectorizer_pipeline_boolean(self): - data = [{'ALL_LOWER': True, 'NEXT_ALL_LOWER': True}, - {'PREV_ALL_LOWER': True, 'ALL_LOWER': True, - 'NEXT_ALL_LOWER': True}, - {'PREV_ALL_LOWER': True, 'ALL_LOWER': True, - 'NEXT_ALL_LOWER': True}, - {'PREV_ALL_LOWER': True, 'ALL_LOWER': True, - 'NEXT_ALL_LOWER': True}] + data = [ + {"ALL_LOWER": True, "NEXT_ALL_LOWER": True}, + {"PREV_ALL_LOWER": True, "ALL_LOWER": True, "NEXT_ALL_LOWER": True}, + {"PREV_ALL_LOWER": True, "ALL_LOWER": True, "NEXT_ALL_LOWER": True}, + {"PREV_ALL_LOWER": True, "ALL_LOWER": True, "NEXT_ALL_LOWER": True}, + ] model = make_pipeline(DictVectorizer(sparse=False), StandardScaler()) model.fit(data) model_onnx = convert_sklearn( - model, 'dv', - [("input", DictionaryType(StringTensorType([1]), - BooleanTensorType([1])))], - target_opset=TARGET_OPSET) + model, + "dv", + [("input", DictionaryType(StringTensorType([1]), BooleanTensorType([1])))], + target_opset=TARGET_OPSET, + ) onnx.checker.check_model(model_onnx) try: sess = InferenceSession( model_onnx.SerializeToString(), providers=["CPUExecutionProvider"], - verbose=0) + verbose=0, + ) except InvalidGraph: return - got = sess.run(None, {'input': data}) + got = sess.run(None, {"input": data}) self.assertTrue(got is not None) diff --git a/tests/test_sklearn_documentation.py b/tests/test_sklearn_documentation.py index cf7904bfb..ce9e5922b 100644 --- a/tests/test_sklearn_documentation.py +++ b/tests/test_sklearn_documentation.py @@ -11,13 +11,18 @@ from sklearn.base import BaseEstimator, TransformerMixin from sklearn.datasets import fetch_20newsgroups + try: from sklearn.datasets._twenty_newsgroups import ( - strip_newsgroup_footer, strip_newsgroup_quoting) + strip_newsgroup_footer, + strip_newsgroup_quoting, + ) except ImportError: # scikit-learn < 0.24 from sklearn.datasets.twenty_newsgroups import ( - strip_newsgroup_footer, strip_newsgroup_quoting) + strip_newsgroup_footer, + strip_newsgroup_quoting, + ) from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.pipeline import Pipeline @@ -54,7 +59,7 @@ def transform(self, posts): sub = "" for line in headers.split("\n"): if line.startswith(prefix): - sub = line[len(prefix):] + sub = line[len(prefix) :] break features[i, 0] = sub @@ -64,18 +69,16 @@ def transform(self, posts): class TestSklearnDocumentation(unittest.TestCase): "Test example from the documentation of scikit-learn." + @unittest.skipIf(sys.platform == "win32", reason="Too long on Windows") @unittest.skipIf( - sys.platform == "win32", - reason="Too long on Windows") - @unittest.skipIf( - TARGET_OPSET < 10, - reason="Encoding issue fixed in a later version") + TARGET_OPSET < 10, reason="Encoding issue fixed in a later version" + ) def test_pipeline_tfidf(self): categories = ["alt.atheism", "talk.religion.misc"] try: - train = fetch_20newsgroups(random_state=1, - subset="test", - categories=categories) + train = fetch_20newsgroups( + random_state=1, subset="test", categories=categories + ) except urllib.error.URLError: warnings.warn("Unit test may fail due to connectivity issue.") return @@ -85,49 +88,54 @@ def test_pipeline_tfidf(self): tfi.fit(tdata.ravel()) extra = { TfidfVectorizer: { - "separators": [ - " ", "[.]", "\\?", ",", ";", ":", "\\!", "\\(", "\\)" - ] + "separators": [" ", "[.]", "\\?", ",", ";", ":", "\\!", "\\(", "\\)"] } } model_onnx = convert_sklearn( - tfi, "tfidf", + tfi, + "tfidf", initial_types=[("input", StringTensorType([1]))], - options=extra, target_opset=TARGET_OPSET + options=extra, + target_opset=TARGET_OPSET, ) dump_data_and_model( tdata[:5], tfi, model_onnx, - basename="SklearnDocumentationTfIdf-OneOff-SklCol") + basename="SklearnDocumentationTfIdf-OneOff-SklCol", + ) @unittest.skipIf( ColumnTransformer is None, reason="ColumnTransformer introduced in 0.20", ) @unittest.skipIf( - TARGET_OPSET < 10, - reason="Encoding issue fixed in a later version") + TARGET_OPSET < 10, reason="Encoding issue fixed in a later version" + ) def test_pipeline_tfidf_pipeline_minmax(self): categories = ["alt.atheism", "talk.religion.misc"] try: - train = fetch_20newsgroups(random_state=1, - subset="train", - categories=categories) + train = fetch_20newsgroups( + random_state=1, subset="train", categories=categories + ) except urllib.error.URLError: warnings.warn("Unit test may fail due to connectivity issue.") return train_data = SubjectBodyExtractor().fit_transform(train.data) - pipeline = Pipeline([( - "union", - ColumnTransformer( - [ - ("subject", TfidfVectorizer(min_df=50), 0), - ("body", TfidfVectorizer(min_df=40), 1), - ], - transformer_weights={"subject": 0.8}, - ), - )]) + pipeline = Pipeline( + [ + ( + "union", + ColumnTransformer( + [ + ("subject", TfidfVectorizer(min_df=50), 0), + ("body", TfidfVectorizer(min_df=40), 1), + ], + transformer_weights={"subject": 0.8}, + ), + ) + ] + ) pipeline.fit(train_data[:300]) extra = { TfidfVectorizer: { @@ -152,20 +160,25 @@ def test_pipeline_tfidf_pipeline_minmax(self): } } model_onnx = convert_sklearn( - pipeline, "tfidf", + pipeline, + "tfidf", initial_types=[("input", StringTensorType([None, 2]))], - options=extra, target_opset=TARGET_OPSET + options=extra, + target_opset=TARGET_OPSET, + ) + test_data = np.array( + [ + ["Albert Einstein", "Not relatively."], + ["Alan turing", "Not automatically."], + ] ) - test_data = np.array([ - ["Albert Einstein", "Not relatively."], - ["Alan turing", "Not automatically."], - ]) dump_data_and_model( test_data, pipeline, model_onnx, verbose=False, - basename="SklearnDocumentationTfIdfUnion1") + basename="SklearnDocumentationTfIdfUnion1", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_double_tensor_type_cls.py b/tests/test_sklearn_double_tensor_type_cls.py index 9e91e5d28..ff2d39881 100644 --- a/tests/test_sklearn_double_tensor_type_cls.py +++ b/tests/test_sklearn_double_tensor_type_cls.py @@ -7,6 +7,7 @@ from sklearn.calibration import CalibratedClassifierCV from sklearn.exceptions import ConvergenceWarning from sklearn.ensemble import BaggingClassifier + # Requires PR #488. # from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.linear_model import LogisticRegression, SGDClassifier @@ -15,6 +16,7 @@ from sklearn.multiclass import OneVsRestClassifier from sklearn.naive_bayes import BernoulliNB from sklearn.svm import SVC + try: from sklearn.ensemble import VotingClassifier except ImportError: @@ -37,27 +39,33 @@ from skl2onnx.common.data_types import DoubleTensorType from onnxruntime import __version__ as ort_version from onnx import __version__ as onnx_version -from test_utils import ( - dump_data_and_model, fit_classification_model, TARGET_OPSET) +from test_utils import dump_data_and_model, fit_classification_model, TARGET_OPSET warnings_to_skip = (DeprecationWarning, FutureWarning, ConvergenceWarning) -ort_version = ort_version.split('+')[0] -ORT_VERSION = '1.7.0' -onnx_version = ".".join(onnx_version.split('.')[:2]) +ort_version = ort_version.split("+")[0] +ORT_VERSION = "1.7.0" +onnx_version = ".".join(onnx_version.split(".")[:2]) -LOG_LOSS = ("log_loss" if pv.Version(skl_version) >= pv.Version("1.1") - else "log") +LOG_LOSS = "log_loss" if pv.Version(skl_version) >= pv.Version("1.1") else "log" class TestSklearnDoubleTensorTypeClassifier(unittest.TestCase): - def _common_classifier( - self, model_cls_set, name_root=None, debug=False, - raw_scores=True, pos_features=False, is_int=False, - comparable_outputs=None, n_features=4, - n_repeated=None, n_redundant=None, verbose=False): + self, + model_cls_set, + name_root=None, + debug=False, + raw_scores=True, + pos_features=False, + is_int=False, + comparable_outputs=None, + n_features=4, + n_repeated=None, + n_redundant=None, + verbose=False, + ): for model_cls in model_cls_set: if name_root is None: name = model_cls.__name__ @@ -65,309 +73,370 @@ def _common_classifier( name = name_root for n_cl in [2, 3]: model, X = fit_classification_model( - model_cls(), n_cl, n_features=n_features, - pos_features=pos_features, is_int=is_int, - n_repeated=n_repeated, n_redundant=n_redundant) - pmethod = ('decision_function_binary' if n_cl == 2 else - 'decision_function') + model_cls(), + n_cl, + n_features=n_features, + pos_features=pos_features, + is_int=is_int, + n_repeated=n_repeated, + n_redundant=n_redundant, + ) + pmethod = ( + "decision_function_binary" if n_cl == 2 else "decision_function" + ) bs = [True, False] if raw_scores else [False] for b in bs: for z in [False]: # zipmap does not allow tensor(double) as inputs - with self.subTest(n_classes=n_cl, raw_scores=b, - model=name): + with self.subTest(n_classes=n_cl, raw_scores=b, model=name): if raw_scores: - options = {"raw_scores": b, - "zipmap": z} + options = {"raw_scores": b, "zipmap": z} else: options = {"zipmap": z} model_onnx = convert_sklearn( - model, "model", - [("input", DoubleTensorType( - [None, X.shape[1]]))], + model, + "model", + [("input", DoubleTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET, - options={id(model): options}) + options={id(model): options}, + ) if debug: print(model_onnx) self.assertIn("elem_type: 11", str(model_onnx)) - methods = None if not b else ['predict', pmethod] + methods = None if not b else ["predict", pmethod] if not b and n_cl == 2: # onnxruntime does not support sigmoid for # DoubleTensorType continue dump_data_and_model( - X.astype(np.float64)[:7], model, model_onnx, - methods=methods, verbose=verbose, + X.astype(np.float64)[:7], + model, + model_onnx, + methods=methods, + verbose=verbose, comparable_outputs=comparable_outputs, basename="Sklearn{}Double2RAW{}" - "ZIP{}CL{}".format( - name, - 1 if b else 0, - 1 if z else 0, n_cl)) + "ZIP{}CL{}".format( + name, 1 if b else 0, 1 if z else 0, n_cl + ), + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="ArgMax is missing") + pv.Version(ort_version) < pv.Version(ORT_VERSION), reason="ArgMax is missing" + ) @unittest.skipIf( - pv.Version(onnx_version) < pv.Version(ORT_VERSION), - reason="ArgMax is missing") + pv.Version(onnx_version) < pv.Version(ORT_VERSION), reason="ArgMax is missing" + ) @ignore_warnings(category=warnings_to_skip) def test_model_logistic_64(self): self._common_classifier([LogisticRegression]) @unittest.skipIf( - pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="ArgMax is missing") + pv.Version(ort_version) < pv.Version(ORT_VERSION), reason="ArgMax is missing" + ) @unittest.skipIf( - pv.Version(onnx_version) < pv.Version(ORT_VERSION), - reason="ArgMax is missing") + pv.Version(onnx_version) < pv.Version(ORT_VERSION), reason="ArgMax is missing" + ) @ignore_warnings(category=warnings_to_skip) def test_modelsgd_64(self): self._common_classifier([SGDClassifier]) - self._common_classifier([lambda: SGDClassifier(loss='hinge')], - "SGDClassifierHinge") - self._common_classifier([lambda: SGDClassifier(loss='perceptron')], - "SGDClassifierPerceptron") + self._common_classifier( + [lambda: SGDClassifier(loss="hinge")], "SGDClassifierHinge" + ) + self._common_classifier( + [lambda: SGDClassifier(loss="perceptron")], "SGDClassifierPerceptron" + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Reciprocal are missing") + reason="ArgMax, Reciprocal are missing", + ) @unittest.skipIf( pv.Version(onnx_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Reciprocal are missing") + reason="ArgMax, Reciprocal are missing", + ) @ignore_warnings(category=warnings_to_skip) def test_modelsgdlog_64(self): self._common_classifier( - [lambda: SGDClassifier(loss=LOG_LOSS, random_state=32)], - "SGDClassifierLog") + [lambda: SGDClassifier(loss=LOG_LOSS, random_state=32)], "SGDClassifierLog" + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Relu are missing") + reason="ArgMax, Relu are missing", + ) @unittest.skipIf( pv.Version(onnx_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Relu are missing") + reason="ArgMax, Relu are missing", + ) @ignore_warnings(category=warnings_to_skip) def test_mlpclassifier_relu_64(self): self._common_classifier( - [lambda: MLPClassifier(activation='relu')], - "MLPClassifierRelu", raw_scores=False) + [lambda: MLPClassifier(activation="relu")], + "MLPClassifierRelu", + raw_scores=False, + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Tanh are missing") + reason="ArgMax, Tanh are missing", + ) @unittest.skipIf( pv.Version(onnx_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Tanh are missing") + reason="ArgMax, Tanh are missing", + ) @ignore_warnings(category=warnings_to_skip) def test_mlpclassifier_tanh_64(self): self._common_classifier( - [lambda: MLPClassifier(activation='tanh', - hidden_layer_sizes=(2,))], - "MLPClassifierTanh", raw_scores=False) + [lambda: MLPClassifier(activation="tanh", hidden_layer_sizes=(2,))], + "MLPClassifierTanh", + raw_scores=False, + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Sigmoid are missing") + reason="ArgMax, Sigmoid are missing", + ) @unittest.skipIf( pv.Version(onnx_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Tanh are missing") + reason="ArgMax, Tanh are missing", + ) @ignore_warnings(category=warnings_to_skip) def test_mlpclassifier_logistic_64(self): self._common_classifier( - [lambda: MLPClassifier(activation='logistic', - hidden_layer_sizes=(2,))], - "MLPClassifierLogistic", raw_scores=False) + [lambda: MLPClassifier(activation="logistic", hidden_layer_sizes=(2,))], + "MLPClassifierLogistic", + raw_scores=False, + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="ArgMax is missing") + pv.Version(ort_version) < pv.Version(ORT_VERSION), reason="ArgMax is missing" + ) @unittest.skipIf( pv.Version(onnx_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Tanh are missing") + reason="ArgMax, Tanh are missing", + ) @ignore_warnings(category=warnings_to_skip) def test_mlpclassifier_identity_64(self): self._common_classifier( - [lambda: MLPClassifier(activation='identity', - hidden_layer_sizes=(2,))], - "MLPClassifierIdentity", raw_scores=False) + [lambda: MLPClassifier(activation="identity", hidden_layer_sizes=(2,))], + "MLPClassifierIdentity", + raw_scores=False, + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="ArgMax, TopK are missing") + reason="ArgMax, TopK are missing", + ) @unittest.skipIf( pv.Version(onnx_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Tanh are missing") + reason="ArgMax, Tanh are missing", + ) @ignore_warnings(category=warnings_to_skip) def test_knn_64(self): self._common_classifier( - [lambda: KNeighborsClassifier()], - "KNeighborsClassifier", raw_scores=False) + [lambda: KNeighborsClassifier()], "KNeighborsClassifier", raw_scores=False + ) - @unittest.skipIf( - VotingClassifier is None, reason="scikit-learn too old") + @unittest.skipIf(VotingClassifier is None, reason="scikit-learn too old") @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Sum are missing") + reason="ArgMax, Sum are missing", + ) @unittest.skipIf( pv.Version(onnx_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Tanh are missing") + reason="ArgMax, Tanh are missing", + ) @ignore_warnings(category=warnings_to_skip) def test_voting_64(self): - estimators = [('a', LogisticRegression()), - ('b', LogisticRegression())] + estimators = [("a", LogisticRegression()), ("b", LogisticRegression())] self._common_classifier( - [lambda: VotingClassifier(estimators, - flatten_transform=False)], - "VotingClassifier", raw_scores=False, - comparable_outputs=[0]) + [lambda: VotingClassifier(estimators, flatten_transform=False)], + "VotingClassifier", + raw_scores=False, + comparable_outputs=[0], + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="ArgMax, LpNormalization are missing") + reason="ArgMax, LpNormalization are missing", + ) @unittest.skipIf( pv.Version(onnx_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Tanh are missing") + reason="ArgMax, Tanh are missing", + ) @ignore_warnings(category=warnings_to_skip) def test_ovr_64(self): self._common_classifier( [lambda: OneVsRestClassifier(LogisticRegression())], - "VotingClassifier", raw_scores=False) + "VotingClassifier", + raw_scores=False, + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="ArgMax, LpNormalization are missing") + reason="ArgMax, LpNormalization are missing", + ) @unittest.skipIf( pv.Version(onnx_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Tanh are missing") + reason="ArgMax, Tanh are missing", + ) @ignore_warnings(category=warnings_to_skip) def test_svc_linear_64(self): self._common_classifier( - [lambda: SVC(kernel='linear')], "SVCLinear", - raw_scores=False) + [lambda: SVC(kernel="linear")], "SVCLinear", raw_scores=False + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Sum are missing") + reason="ArgMax, Sum are missing", + ) @ignore_warnings(category=warnings_to_skip) def test_svc_poly_64(self): self._common_classifier( - [lambda: SVC(kernel='poly')], "SVCpoly", - raw_scores=False) + [lambda: SVC(kernel="poly")], "SVCpoly", raw_scores=False + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Sum are missing") + reason="ArgMax, Sum are missing", + ) @unittest.skipIf( pv.Version(onnx_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Tanh are missing") + reason="ArgMax, Tanh are missing", + ) @ignore_warnings(category=warnings_to_skip) def test_svc_rbf_64(self): - self._common_classifier( - [lambda: SVC(kernel='rbf')], "SVCrbf", - raw_scores=False) + self._common_classifier([lambda: SVC(kernel="rbf")], "SVCrbf", raw_scores=False) @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Sum are missing") + reason="ArgMax, Sum are missing", + ) @unittest.skipIf( pv.Version(onnx_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Tanh are missing") + reason="ArgMax, Tanh are missing", + ) @ignore_warnings(category=warnings_to_skip) def test_svc_sigmoid_64(self): self._common_classifier( - [lambda: SVC(kernel='sigmoid')], "SVCsigmoid", - raw_scores=False) + [lambda: SVC(kernel="sigmoid")], "SVCsigmoid", raw_scores=False + ) - @unittest.skipIf( - BernoulliNB is None, reason="new in scikit version 0.20") + @unittest.skipIf(BernoulliNB is None, reason="new in scikit version 0.20") @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Log are missing") + reason="ArgMax, Log are missing", + ) @unittest.skipIf( pv.Version(onnx_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Tanh are missing") + reason="ArgMax, Tanh are missing", + ) @ignore_warnings(category=warnings_to_skip) def test_bernoullinb_64(self): self._common_classifier( - [lambda: BernoulliNB()], "BernoulliNB", raw_scores=False) + [lambda: BernoulliNB()], "BernoulliNB", raw_scores=False + ) - @unittest.skipIf( - ComplementNB is None, reason="new in scikit version 0.20") + @unittest.skipIf(ComplementNB is None, reason="new in scikit version 0.20") @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="ArgMax, ReduceLogSumExp are missing") + reason="ArgMax, ReduceLogSumExp are missing", + ) @unittest.skipIf( pv.Version(onnx_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Tanh are missing") + reason="ArgMax, Tanh are missing", + ) @ignore_warnings(category=warnings_to_skip) def test_complementnb_64(self): self._common_classifier( - [lambda: ComplementNB()], "ComplementNB", - raw_scores=False, pos_features=True) + [lambda: ComplementNB()], + "ComplementNB", + raw_scores=False, + pos_features=True, + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="ArgMax, ReduceMean are missing") + reason="ArgMax, ReduceMean are missing", + ) @unittest.skipIf( pv.Version(onnx_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Tanh are missing") + reason="ArgMax, Tanh are missing", + ) @ignore_warnings(category=warnings_to_skip) def test_bagging_64(self): self._common_classifier( - [lambda: BaggingClassifier( - LogisticRegression(random_state=42), random_state=42)], - "BaggingClassifier") + [ + lambda: BaggingClassifier( + LogisticRegression(random_state=42), random_state=42 + ) + ], + "BaggingClassifier", + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Sigmoid are missing") + reason="ArgMax, Sigmoid are missing", + ) @unittest.skipIf( pv.Version(onnx_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Tanh are missing") - @unittest.skipIf( - StackingClassifier is None, reason="scikit-learn too old") + reason="ArgMax, Tanh are missing", + ) + @unittest.skipIf(StackingClassifier is None, reason="scikit-learn too old") @ignore_warnings(category=warnings_to_skip) def test_stacking_64(self): self._common_classifier( - [lambda: StackingClassifier([ - ('a', LogisticRegression()), - ('b', LogisticRegression())])], - "StackingClassifier") + [ + lambda: StackingClassifier( + [("a", LogisticRegression()), ("b", LogisticRegression())] + ) + ], + "StackingClassifier", + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Sigmoid are missing") + reason="ArgMax, Sigmoid are missing", + ) @unittest.skipIf( pv.Version(onnx_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Tanh are missing") - @unittest.skipIf( - StackingClassifier is None, reason="scikit-learn too old") + reason="ArgMax, Tanh are missing", + ) + @unittest.skipIf(StackingClassifier is None, reason="scikit-learn too old") @ignore_warnings(category=warnings_to_skip) def test_calibration_sigmoid_64(self): self._common_classifier( - [lambda: CalibratedClassifierCV( - base_estimator=LogisticRegression(), method='sigmoid')], + [ + lambda: CalibratedClassifierCV( + base_estimator=LogisticRegression(), method="sigmoid" + ) + ], "CalibratedClassifierCV", - raw_scores=False) + raw_scores=False, + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Sigmoid are missing") + reason="ArgMax, Sigmoid are missing", + ) @unittest.skipIf( pv.Version(onnx_version) < pv.Version(ORT_VERSION), - reason="ArgMax, Tanh are missing") - @unittest.skipIf( - StackingClassifier is None, reason="scikit-learn too old") - @unittest.skipIf( - True, reason="Converter does not call IsotonicRegression") + reason="ArgMax, Tanh are missing", + ) + @unittest.skipIf(StackingClassifier is None, reason="scikit-learn too old") + @unittest.skipIf(True, reason="Converter does not call IsotonicRegression") @ignore_warnings(category=warnings_to_skip) def test_calibration_isotonic_64(self): self._common_classifier( - [lambda: CalibratedClassifierCV( - base_estimator=LogisticRegression(), method='isotonic')], + [ + lambda: CalibratedClassifierCV( + base_estimator=LogisticRegression(), method="isotonic" + ) + ], "CalibratedClassifierCV", - raw_scores=False) + raw_scores=False, + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_double_tensor_type_reg.py b/tests/test_sklearn_double_tensor_type_reg.py index 3d5fca4ce..e4aa67cbd 100644 --- a/tests/test_sklearn_double_tensor_type_reg.py +++ b/tests/test_sklearn_double_tensor_type_reg.py @@ -6,6 +6,7 @@ import packaging.version as pv import numpy as np from sklearn.exceptions import ConvergenceWarning + try: from sklearn.utils._testing import ignore_warnings except ImportError: @@ -15,6 +16,7 @@ from sklearn.linear_model import LinearRegression, SGDRegressor from sklearn.neighbors import KNeighborsRegressor from sklearn.neural_network import MLPRegressor + try: from sklearn.ensemble import VotingRegressor except ImportError: @@ -23,8 +25,7 @@ from skl2onnx import convert_sklearn, to_onnx from skl2onnx.common.data_types import DoubleTensorType from onnxruntime import __version__ as ort_version -from test_utils import ( - dump_data_and_model, fit_regression_model, TARGET_OPSET) +from test_utils import dump_data_and_model, fit_regression_model, TARGET_OPSET warnings_to_skip = (DeprecationWarning, FutureWarning, ConvergenceWarning) @@ -32,127 +33,162 @@ class TestSklearnDoubleTensorTypeRegressor(unittest.TestCase): @unittest.skipIf( pv.Version(ort_version) <= pv.Version("1.2.0"), - reason="onnxruntime misses implementation for double") + reason="onnxruntime misses implementation for double", + ) @ignore_warnings(category=warnings_to_skip) def test_model_linear_regression_64(self): model, X = fit_regression_model(LinearRegression()) model_onnx = convert_sklearn( - model, "linear regression", + model, + "linear regression", [("input", DoubleTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIn("elem_type: 11", str(model_onnx)) dump_data_and_model( - X.astype(np.float64), model, model_onnx, - basename="SklearnLinearRegressionDouble") + X.astype(np.float64), + model, + model_onnx, + basename="SklearnLinearRegressionDouble", + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version("1.7.0"), reason="onnxruntime misses implementation for " - "Relu, Tanh, Sigmoid for double") + "Relu, Tanh, Sigmoid for double", + ) @ignore_warnings(category=warnings_to_skip) def test_model_mlpregressor_64(self): # Could not find an implementation for the node Relu:Relu(6) # Could not find an implementation for the node Tanh:Tanh(6) # Could not find an implementation for the node Sigmoid:Sigmoid(6) - for activation in ['relu', 'tanh', 'logistic']: + for activation in ["relu", "tanh", "logistic"]: with self.subTest(activation=activation): - model, X = fit_regression_model( - MLPRegressor(activation=activation)) + model, X = fit_regression_model(MLPRegressor(activation=activation)) model_onnx = convert_sklearn( - model, "linear regression", + model, + "linear regression", [("input", DoubleTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIn("elem_type: 11", str(model_onnx)) dump_data_and_model( - X.astype(np.float64), model, model_onnx, - basename="SklearnMLPRegressorDouble%s" % activation) + X.astype(np.float64), + model, + model_onnx, + basename="SklearnMLPRegressorDouble%s" % activation, + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version("1.7.0"), - reason="onnxruntime misses implementation for " - "ReduceMean for double") + reason="onnxruntime misses implementation for " "ReduceMean for double", + ) @ignore_warnings(category=warnings_to_skip) def test_bagging_regressor_sgd_64(self): # Could not find an implementation for # the node ReduceMean:ReduceMean(11) - model, X = fit_regression_model( - BaggingRegressor(SGDRegressor())) + model, X = fit_regression_model(BaggingRegressor(SGDRegressor())) model_onnx = convert_sklearn( - model, "bagging regressor", + model, + "bagging regressor", [("input", DoubleTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) dump_data_and_model( - X.astype(np.float64), model, model_onnx, - basename="SklearnBaggingRegressorSGDDouble") + X.astype(np.float64), + model, + model_onnx, + basename="SklearnBaggingRegressorSGDDouble", + ) @unittest.skipIf( pv.Version(ort_version) <= pv.Version("1.2.0"), - reason="onnxruntime misses implementation for double") + reason="onnxruntime misses implementation for double", + ) @ignore_warnings(category=warnings_to_skip) def test_model_sgd_regressor_64(self): model, X = fit_regression_model(SGDRegressor()) model_onnx = convert_sklearn( - model, "linear regression", + model, + "linear regression", [("input", DoubleTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIn("elem_type: 11", str(model_onnx)) dump_data_and_model( - X.astype(np.float64), model, model_onnx, - basename="SklearnLinearSGDRegressorDouble") + X.astype(np.float64), + model, + model_onnx, + basename="SklearnLinearSGDRegressorDouble", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("1.7.0"), - reason="shape_inference fails") + pv.Version(ort_version) < pv.Version("1.7.0"), reason="shape_inference fails" + ) @ignore_warnings(category=warnings_to_skip) def test_gpr_rbf_fitted_true_double(self): gp = GaussianProcessRegressor( - alpha=1e-7, n_restarts_optimizer=15, normalize_y=True) + alpha=1e-7, n_restarts_optimizer=15, normalize_y=True + ) gp, X = fit_regression_model(gp) model_onnx = to_onnx( - gp, initial_types=[('X', DoubleTensorType([None, None]))], - target_opset=TARGET_OPSET) + gp, + initial_types=[("X", DoubleTensorType([None, None]))], + target_opset=TARGET_OPSET, + ) dump_data_and_model( - X.astype(np.float64), gp, model_onnx, verbose=False, - basename="SklearnGaussianProcessRBFTDouble") + X.astype(np.float64), + gp, + model_onnx, + verbose=False, + basename="SklearnGaussianProcessRBFTDouble", + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version("1.7.0"), - reason="onnxruntime misses implementation for " - "TopK for double") + reason="onnxruntime misses implementation for " "TopK for double", + ) @ignore_warnings(category=warnings_to_skip) def test_model_knn_regressor_double(self): # Could not find an implementation for the node To_TopK:TopK(11) model, X = fit_regression_model(KNeighborsRegressor(n_neighbors=2)) model_onnx = convert_sklearn( - model, "KNN regressor", + model, + "KNN regressor", [("input", DoubleTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET, - options={id(model): {'optim': 'cdist'}}) + options={id(model): {"optim": "cdist"}}, + ) dump_data_and_model( X.astype(np.float64)[:7], - model, model_onnx, - basename="SklearnKNeighborsRegressorDouble") + model, + model_onnx, + basename="SklearnKNeighborsRegressorDouble", + ) @unittest.skipIf(VotingRegressor is None, reason="new in 0.21") @unittest.skipIf( pv.Version(ort_version) < pv.Version("1.7.0"), - reason="onnxruntime misses implementation for " - "Sum for double") + reason="onnxruntime misses implementation for " "Sum for double", + ) @ignore_warnings(category=warnings_to_skip) def test_model_voting_regression(self): # Could not find an implementation for the node Sum:Sum(8) - model = VotingRegressor([ - ('lr', LinearRegression()), - ('dt', SGDRegressor())]) + model = VotingRegressor([("lr", LinearRegression()), ("dt", SGDRegressor())]) model, X = fit_regression_model(model) model_onnx = convert_sklearn( - model, "voting regression", + model, + "voting regression", [("input", DoubleTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) dump_data_and_model( - X.astype(np.float64), model, model_onnx, + X.astype(np.float64), + model, + model_onnx, basename="SklearnVotingRegressorDouble", - comparable_outputs=[0]) + comparable_outputs=[0], + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_double_tensor_type_tr.py b/tests/test_sklearn_double_tensor_type_tr.py index ba3c311c4..78e2e7523 100644 --- a/tests/test_sklearn_double_tensor_type_tr.py +++ b/tests/test_sklearn_double_tensor_type_tr.py @@ -5,6 +5,7 @@ from sklearn.datasets import load_diabetes from sklearn.model_selection import train_test_split from sklearn.exceptions import ConvergenceWarning + try: from sklearn.utils._testing import ignore_warnings except ImportError: @@ -13,10 +14,12 @@ from sklearn.decomposition import PCA from sklearn.mixture import GaussianMixture, BayesianGaussianMixture from sklearn.preprocessing import Binarizer + try: from onnxruntime.capi.onnxruntime_pybind11_state import Fail as OrtFail from onnxruntime.capi.onnxruntime_pybind11_state import ( - NotImplemented as OrtNotImplemented) + NotImplemented as OrtNotImplemented, + ) except ImportError: OrtFail = RuntimeError OrtNotImplemented = RuntimeError @@ -25,22 +28,21 @@ from skl2onnx.common.data_types import DoubleTensorType from onnxruntime import __version__ as ort_version from test_utils import ( - dump_data_and_model, TARGET_OPSET, - InferenceSessionEx as InferenceSession) + dump_data_and_model, + TARGET_OPSET, + InferenceSessionEx as InferenceSession, +) -warnings_to_skip = ( - DeprecationWarning, FutureWarning, ConvergenceWarning, UserWarning) +warnings_to_skip = (DeprecationWarning, FutureWarning, ConvergenceWarning, UserWarning) ORT_VERSION = "1.7.0" OPSET_VERSION = 11 -ort_version = ".".join(ort_version.split('.')[:2]) +ort_version = ".".join(ort_version.split(".")[:2]) class TestSklearnDoubleTensorTypeTransformer(unittest.TestCase): - - def _common_transform( - self, model_cls_set, name_root=None, debug=False): + def _common_transform(self, model_cls_set, name_root=None, debug=False): for model_cls in model_cls_set: if name_root is None: name = model_cls.__name__ @@ -51,26 +53,32 @@ def _common_transform( X = np.random.randn(100, 4).astype(np.float64) model.fit(X) X = np.random.randn(100, 4).astype(np.float64) - pmethod = 'transform' + pmethod = "transform" with self.subTest(model=name): options = {} model_onnx = convert_sklearn( - model, "model", + model, + "model", [("input", DoubleTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET, - options={id(model): options}) + options={id(model): options}, + ) if debug: print(model_onnx) self.assertIn("elem_type: 11", str(model_onnx)) methods = [pmethod] dump_data_and_model( - X.astype(np.float64), model, model_onnx, + X.astype(np.float64), + model, + model_onnx, methods=methods, - basename="Sklearn{}Double".format(name)) + basename="Sklearn{}Double".format(name), + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version("0.5.0"), - reason="onnxruntime misses operator for double") + reason="onnxruntime misses operator for double", + ) @ignore_warnings(category=warnings_to_skip) def test_scaler_64(self): self._common_transform([StandardScaler]) @@ -95,101 +103,110 @@ def _test_score(self, model, X, tg, decimal=5, black_op=None): exp = model.score_samples(X) expp = model.predict_proba(X) onx = to_onnx( - model, X[:1], target_opset=tg, - options={id(model): {'score_samples': True}}, - black_op=black_op) + model, + X[:1], + target_opset=tg, + options={id(model): {"score_samples": True}}, + black_op=black_op, + ) try: sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) except OrtFail as e: - raise RuntimeError('Issue {}\n{}'.format(e, str(onx))) - got = sess.run(None, {'X': X}) + raise RuntimeError("Issue {}\n{}".format(e, str(onx))) + got = sess.run(None, {"X": X}) self.assertEqual(len(got), 3) - np.testing.assert_almost_equal( - expp.ravel(), got[1].ravel(), decimal=decimal) - np.testing.assert_almost_equal( - exp.ravel(), got[2].ravel(), decimal=decimal) + np.testing.assert_almost_equal(expp.ravel(), got[1].ravel(), decimal=decimal) + np.testing.assert_almost_equal(exp.ravel(), got[2].ravel(), decimal=decimal) @unittest.skipIf( - TARGET_OPSET < OPSET_VERSION, - reason="onnxruntime misses Gemm for double") + TARGET_OPSET < OPSET_VERSION, reason="onnxruntime misses Gemm for double" + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="onnxruntime misses Gemm for double") + reason="onnxruntime misses Gemm for double", + ) @ignore_warnings(category=warnings_to_skip) def test_model_gaussian_mixture_binary_classification(self): - model, X = self._fit_model_binary_classification( - GaussianMixture(), load_iris()) + model, X = self._fit_model_binary_classification(GaussianMixture(), load_iris()) for tg in range(min(9, TARGET_OPSET), TARGET_OPSET + 1): with self.subTest(target_opset=tg): if tg < 11: with self.assertRaises(RuntimeError): model_onnx = convert_sklearn( - model, "gaussian_mixture", - [("input", DoubleTensorType([ - None, X.shape[1]]))], - target_opset=tg) + model, + "gaussian_mixture", + [("input", DoubleTensorType([None, X.shape[1]]))], + target_opset=tg, + ) continue model_onnx = convert_sklearn( - model, "gaussian_mixture", + model, + "gaussian_mixture", [("input", DoubleTensorType([None, X.shape[1]]))], - target_opset=tg) + target_opset=tg, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnBinGaussianMixtureDouble") + X, model, model_onnx, basename="SklearnBinGaussianMixtureDouble" + ) self._test_score(model, X, tg) - @unittest.skipIf( - TARGET_OPSET < OPSET_VERSION, - reason="onnx misses Gemm for double") + @unittest.skipIf(TARGET_OPSET < OPSET_VERSION, reason="onnx misses Gemm for double") @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="onnxruntime misses Gemm for double") + reason="onnxruntime misses Gemm for double", + ) @ignore_warnings(category=warnings_to_skip) def test_model_bayesian_mixture_binary_classification(self): for cov in ["full", "tied", "diag", "spherical"]: with self.subTest(cov=cov): model, X = self._fit_model_binary_classification( - BayesianGaussianMixture(), load_iris(), - covariance_type=cov) + BayesianGaussianMixture(), load_iris(), covariance_type=cov + ) model_onnx = convert_sklearn( - model, "gaussian_mixture", + model, + "gaussian_mixture", [("input", DoubleTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnBinBayesianGaussianMixtureDouble") + X, + model, + model_onnx, + basename="SklearnBinBayesianGaussianMixtureDouble", + ) self._test_score(model, X, TARGET_OPSET) - @unittest.skipIf( - TARGET_OPSET < OPSET_VERSION, - reason="onnx misses Gemm for double") + @unittest.skipIf(TARGET_OPSET < OPSET_VERSION, reason="onnx misses Gemm for double") @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="onnxruntime misses Gemm for double") + reason="onnxruntime misses Gemm for double", + ) @ignore_warnings(category=warnings_to_skip) def test_model_gaussian_mixture_multiclass(self): model, X = self._fit_model_multiclass_classification( - GaussianMixture(), load_iris()) + GaussianMixture(), load_iris() + ) model_onnx = convert_sklearn( - model, "gaussian_mixture", + model, + "gaussian_mixture", [("input", DoubleTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnMclGaussianMixtureDouble") + X, model, model_onnx, basename="SklearnMclGaussianMixtureDouble" + ) self._test_score(model, X, TARGET_OPSET) - @unittest.skipIf( - TARGET_OPSET < OPSET_VERSION, - reason="onnx misses Gemm for double") + @unittest.skipIf(TARGET_OPSET < OPSET_VERSION, reason="onnx misses Gemm for double") @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="onnxruntime misses Gemm for double") + reason="onnxruntime misses Gemm for double", + ) @ignore_warnings(category=warnings_to_skip) def test_gaussian_mixture_comp2(self): data = load_iris() @@ -197,190 +214,243 @@ def test_gaussian_mixture_comp2(self): model = GaussianMixture(n_components=2) model.fit(X) model_onnx = convert_sklearn( - model, "GM", [("input", DoubleTensorType([None, 4]))], - target_opset=TARGET_OPSET) + model, + "GM", + [("input", DoubleTensorType([None, 4]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X.astype(np.float64)[40:60], model, model_onnx, + X.astype(np.float64)[40:60], + model, + model_onnx, basename="GaussianMixtureC2Double", - intermediate_steps=False) + intermediate_steps=False, + ) self._test_score(model, X, TARGET_OPSET) @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="onnxruntime misses Gemm for double") - @unittest.skipIf( - TARGET_OPSET < OPSET_VERSION, - reason="onnx misses Gemm for double") + reason="onnxruntime misses Gemm for double", + ) + @unittest.skipIf(TARGET_OPSET < OPSET_VERSION, reason="onnx misses Gemm for double") @ignore_warnings(category=warnings_to_skip) def test_gaussian_mixture_full(self): data = load_iris() X = data.data - model = GaussianMixture(n_components=2, covariance_type='full') + model = GaussianMixture(n_components=2, covariance_type="full") model.fit(X) model_onnx = convert_sklearn( - model, "GM", [("input", DoubleTensorType([None, 4]))], - target_opset=TARGET_OPSET) + model, + "GM", + [("input", DoubleTensorType([None, 4]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X.astype(np.float64)[40:60], model, model_onnx, + X.astype(np.float64)[40:60], + model, + model_onnx, basename="GaussianMixtureC2FullDouble", - intermediate_steps=False) + intermediate_steps=False, + ) self._test_score(model, X, TARGET_OPSET) @unittest.skipIf( - TARGET_OPSET < OPSET_VERSION, - reason="onnxruntime misses Gemm for double") + TARGET_OPSET < OPSET_VERSION, reason="onnxruntime misses Gemm for double" + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="onnxruntime misses Gemm for double") + reason="onnxruntime misses Gemm for double", + ) @ignore_warnings(category=warnings_to_skip) def test_gaussian_mixture_tied(self): data = load_iris() X = data.data - model = GaussianMixture(n_components=2, covariance_type='tied') + model = GaussianMixture(n_components=2, covariance_type="tied") model.fit(X) model_onnx = convert_sklearn( - model, "GM", [("input", DoubleTensorType([None, 4]))], - target_opset=TARGET_OPSET) + model, + "GM", + [("input", DoubleTensorType([None, 4]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(np.float64)[40:60], - model, model_onnx, basename="GaussianMixtureC2TiedDouble", - intermediate_steps=False) + model, + model_onnx, + basename="GaussianMixtureC2TiedDouble", + intermediate_steps=False, + ) self._test_score(model, X, TARGET_OPSET) - @unittest.skipIf( - TARGET_OPSET < OPSET_VERSION, - reason="onnx misses Gemm for double") + @unittest.skipIf(TARGET_OPSET < OPSET_VERSION, reason="onnx misses Gemm for double") @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="onnxruntime misses Gemm for double") + reason="onnxruntime misses Gemm for double", + ) @ignore_warnings(category=warnings_to_skip) def test_gaussian_mixture_diag(self): data = load_iris() X = data.data - model = GaussianMixture(n_components=2, covariance_type='diag') + model = GaussianMixture(n_components=2, covariance_type="diag") model.fit(X) model_onnx = convert_sklearn( - model, "GM", [("input", DoubleTensorType([None, 4]))], - target_opset=TARGET_OPSET) - self.assertIn('ReduceLogSumExp', str(model_onnx)) + model, + "GM", + [("input", DoubleTensorType([None, 4]))], + target_opset=TARGET_OPSET, + ) + self.assertIn("ReduceLogSumExp", str(model_onnx)) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(np.float64)[40:60], - model, model_onnx, basename="GaussianMixtureC2DiagDouble", - intermediate_steps=False) + model, + model_onnx, + basename="GaussianMixtureC2DiagDouble", + intermediate_steps=False, + ) self._test_score(model, X, TARGET_OPSET, decimal=4) - @unittest.skipIf( - TARGET_OPSET < OPSET_VERSION, - reason="onnx misses Gemm for double") + @unittest.skipIf(TARGET_OPSET < OPSET_VERSION, reason="onnx misses Gemm for double") @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="onnxruntime misses Gemm for double") + reason="onnxruntime misses Gemm for double", + ) @ignore_warnings(category=warnings_to_skip) def test_gaussian_mixture_spherical(self): data = load_iris() X = data.data - model = GaussianMixture(n_components=2, covariance_type='spherical') + model = GaussianMixture(n_components=2, covariance_type="spherical") model.fit(X) model_onnx = convert_sklearn( - model, "GM", [("input", DoubleTensorType([None, 4]))], - target_opset=TARGET_OPSET) + model, + "GM", + [("input", DoubleTensorType([None, 4]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(np.float64)[40:60], - model, model_onnx, basename="GaussianMixtureC2SphericalDouble", - intermediate_steps=False) + model, + model_onnx, + basename="GaussianMixtureC2SphericalDouble", + intermediate_steps=False, + ) self._test_score(model, X, TARGET_OPSET, decimal=4) - @unittest.skipIf( - TARGET_OPSET < OPSET_VERSION, - reason="onnx misses Gemm for double") + @unittest.skipIf(TARGET_OPSET < OPSET_VERSION, reason="onnx misses Gemm for double") @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="onnxruntime misses Gemm for double") + reason="onnxruntime misses Gemm for double", + ) @ignore_warnings(category=warnings_to_skip) def _test_gaussian_mixture_full_black_op(self): data = load_iris() X = data.data - model = GaussianMixture(n_components=2, covariance_type='full') + model = GaussianMixture(n_components=2, covariance_type="full") model.fit(X) with self.assertRaises(RuntimeError): convert_sklearn( - model, "GM", [("input", DoubleTensorType([None, 4]))], - target_opset=TARGET_OPSET, black_op={'Add'}) + model, + "GM", + [("input", DoubleTensorType([None, 4]))], + target_opset=TARGET_OPSET, + black_op={"Add"}, + ) model_onnx = convert_sklearn( - model, "GM", [("input", DoubleTensorType([None, 4]))], - target_opset=TARGET_OPSET, black_op={'ReduceLogSumExp'}) + model, + "GM", + [("input", DoubleTensorType([None, 4]))], + target_opset=TARGET_OPSET, + black_op={"ReduceLogSumExp"}, + ) self.assertIsNotNone(model_onnx) - self.assertNotIn('ReduceLogSumExp', str(model_onnx)) + self.assertNotIn("ReduceLogSumExp", str(model_onnx)) dump_data_and_model( X.astype(np.float64)[40:60], - model, model_onnx, basename="GaussianMixtureC2FullBLDouble", - intermediate_steps=False) + model, + model_onnx, + basename="GaussianMixtureC2FullBLDouble", + intermediate_steps=False, + ) self._test_score(model, X, TARGET_OPSET) @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="onnxruntime misses Gemm for double") - @unittest.skipIf(TARGET_OPSET < 11, - reason="OnnxEqual does not support float") + reason="onnxruntime misses Gemm for double", + ) + @unittest.skipIf(TARGET_OPSET < 11, reason="OnnxEqual does not support float") @ignore_warnings(category=warnings_to_skip) def _test_gaussian_mixture_full_black_op_noargmax(self): data = load_iris() X = data.data - model = GaussianMixture(n_components=2, covariance_type='full') + model = GaussianMixture(n_components=2, covariance_type="full") model.fit(X) with self.assertRaises(RuntimeError): convert_sklearn( - model, "GM", [("input", DoubleTensorType([None, 4]))], - target_opset=TARGET_OPSET, black_op={'Add'}) + model, + "GM", + [("input", DoubleTensorType([None, 4]))], + target_opset=TARGET_OPSET, + black_op={"Add"}, + ) model_onnx = convert_sklearn( - model, "GM", [("input", DoubleTensorType([None, 4]))], + model, + "GM", + [("input", DoubleTensorType([None, 4]))], target_opset=TARGET_OPSET, - black_op={'ReduceLogSumExp', 'ArgMax'}) + black_op={"ReduceLogSumExp", "ArgMax"}, + ) self.assertIsNotNone(model_onnx) - self.assertNotIn('ArgMax', str(model_onnx)) + self.assertNotIn("ArgMax", str(model_onnx)) dump_data_and_model( X.astype(np.float64)[40:60], - model, model_onnx, + model, + model_onnx, basename="GaussianMixtureC2FullBLNMDouble", - intermediate_steps=False) + intermediate_steps=False, + ) self._test_score(model, X, TARGET_OPSET) @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="onnxruntime misses Gemm for double") - @unittest.skipIf(TARGET_OPSET < 11, - reason="OnnxEqual does not support float") + reason="onnxruntime misses Gemm for double", + ) + @unittest.skipIf(TARGET_OPSET < 11, reason="OnnxEqual does not support float") @ignore_warnings(category=warnings_to_skip) def test_gaussian_mixture_full_black_op_noargmax_inf(self): data = load_iris() X = data.data - model = GaussianMixture(n_components=10, covariance_type='full') + model = GaussianMixture(n_components=10, covariance_type="full") model.fit(X) model_onnx1 = convert_sklearn( - model, "GM", [("input", DoubleTensorType([None, 4]))], + model, + "GM", + [("input", DoubleTensorType([None, 4]))], target_opset=TARGET_OPSET, - options={id(model): {'score_samples': True}}) + options={id(model): {"score_samples": True}}, + ) model_onnx2 = convert_sklearn( - model, "GM", [("input", DoubleTensorType([None, 4]))], + model, + "GM", + [("input", DoubleTensorType([None, 4]))], target_opset=TARGET_OPSET, - options={id(model): {'score_samples': True}}, - black_op={'ReduceLogSumExp', 'ArgMax'}) - self.assertNotIn('ArgMax', str(model_onnx2)) + options={id(model): {"score_samples": True}}, + black_op={"ReduceLogSumExp", "ArgMax"}, + ) + self.assertNotIn("ArgMax", str(model_onnx2)) sess1 = InferenceSession( - model_onnx1.SerializeToString(), - providers=["CPUExecutionProvider"]) - res1 = sess1.run(None, {'input': (X[:5] * 1e2).astype(np.float64)}) + model_onnx1.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res1 = sess1.run(None, {"input": (X[:5] * 1e2).astype(np.float64)}) a1, b1, c1 = res1 sess2 = InferenceSession( - model_onnx2.SerializeToString(), - providers=["CPUExecutionProvider"]) - res2 = sess2.run(None, {'input': (X[:5] * 1e2).astype(np.float64)}) + model_onnx2.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res2 = sess2.run(None, {"input": (X[:5] * 1e2).astype(np.float64)}) a2, b2, c2 = res2 self.assertEqual(b1.max(), b2.max()) @@ -389,34 +459,36 @@ def test_gaussian_mixture_full_black_op_noargmax_inf(self): self.assertLess(abs(c1.min() - c2.min()) / c2.min(), 1e-5) self._test_score( - model, X, TARGET_OPSET, black_op={'ReduceLogSumExp', 'ArgMax'}, - decimal=2) + model, X, TARGET_OPSET, black_op={"ReduceLogSumExp", "ArgMax"}, decimal=2 + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="onnxruntime misses Where for double") + reason="onnxruntime misses Where for double", + ) @ignore_warnings(category=warnings_to_skip) def test_binarizer(self): - data = np.array([[1., -1., 2.], - [2., 0., 0.], - [0., 1., -1.]], dtype=np.float64) + data = np.array( + [[1.0, -1.0, 2.0], [2.0, 0.0, 0.0], [0.0, 1.0, -1.0]], dtype=np.float64 + ) model = Binarizer(threshold=0.5) model.fit(data) model_onnx = convert_sklearn( - model, "scikit-learn binarizer", + model, + "scikit-learn binarizer", [("input", DoubleTensorType(data.shape))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data, model, model_onnx, - basename="SklearnBinarizerDouble-SkipDim1") + data, model, model_onnx, basename="SklearnBinarizerDouble-SkipDim1" + ) - @unittest.skipIf( - TARGET_OPSET < OPSET_VERSION, - reason="onnx misses Gemm for double") + @unittest.skipIf(TARGET_OPSET < OPSET_VERSION, reason="onnx misses Gemm for double") @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="onnxruntime misses Gemm for double") + reason="onnxruntime misses Gemm for double", + ) @ignore_warnings(category=warnings_to_skip) def test_kmeans_clustering(self): data = load_iris() @@ -424,20 +496,23 @@ def test_kmeans_clustering(self): model = KMeans(n_clusters=3) model.fit(X) model_onnx = convert_sklearn( - model, "kmeans", + model, + "kmeans", [("input", DoubleTensorType([None, 4]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X[40:60], model, model_onnx, - basename="SklearnKMeansDoubleGemm-Dec4") + X[40:60], model, model_onnx, basename="SklearnKMeansDoubleGemm-Dec4" + ) @unittest.skipIf( - TARGET_OPSET < OPSET_VERSION, - reason="onnx misses ArgMin for double") + TARGET_OPSET < OPSET_VERSION, reason="onnx misses ArgMin for double" + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version(ORT_VERSION), - reason="onnxruntime misses Gemm for double") + reason="onnxruntime misses Gemm for double", + ) @ignore_warnings(category=warnings_to_skip) def test_kmeans_clustering_nogemm(self): data = load_iris() @@ -445,36 +520,40 @@ def test_kmeans_clustering_nogemm(self): model = KMeans(n_clusters=3) model.fit(X) model_onnx = convert_sklearn( - model, "kmeans", + model, + "kmeans", [("input", DoubleTensorType([None, 4]))], target_opset=TARGET_OPSET, - options={id(model): {'gemm': False}}) + options={id(model): {"gemm": False}}, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X[40:60], model, model_onnx, - basename="SklearnKMeansDoubleNoGemm-Dec4") + X[40:60], model, model_onnx, basename="SklearnKMeansDoubleNoGemm-Dec4" + ) @unittest.skipIf( pv.Version(ort_version) < pv.Version("0.5.0"), - reason="onnxruntime misses Gemm for double") + reason="onnxruntime misses Gemm for double", + ) @ignore_warnings(category=warnings_to_skip) def test_pca_default(self): - def _fit_model_pca(model): data = load_diabetes() X_train, X_test, *_ = train_test_split( - data.data, data.target, test_size=0.2, random_state=42) + data.data, data.target, test_size=0.2, random_state=42 + ) model.fit(X_train) return model, X_test.astype(np.float64) model, X_test = _fit_model_pca(PCA(random_state=42, n_components=2)) model_onnx = convert_sklearn( - model, initial_types=[ - ("input", DoubleTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET) + model, + initial_types=[("input", DoubleTensorType([None, X_test.shape[1]]))], + target_opset=TARGET_OPSET, + ) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnPCADoubleDefault") + X_test, model, model_onnx, basename="SklearnPCADoubleDefault" + ) # Untested operators: # * float parameters only: diff --git a/tests/test_sklearn_feature_hasher.py b/tests/test_sklearn_feature_hasher.py index f1fa643d4..50dfc8d4c 100644 --- a/tests/test_sklearn_feature_hasher.py +++ b/tests/test_sklearn_feature_hasher.py @@ -9,62 +9,78 @@ from pandas import DataFrame from onnx import TensorProto from onnx.helper import ( - make_model, make_node, - make_graph, make_tensor_value_info, make_opsetid) + make_model, + make_node, + make_graph, + make_tensor_value_info, + make_opsetid, +) from onnx.checker import check_model from onnxruntime import __version__ as ort_version from sklearn.feature_extraction import FeatureHasher from skl2onnx import to_onnx from skl2onnx.common.data_types import ( - StringTensorType, Int64TensorType, FloatTensorType, - DoubleTensorType) -from test_utils import ( - TARGET_OPSET, TARGET_IR, - InferenceSessionEx as InferenceSession) + StringTensorType, + Int64TensorType, + FloatTensorType, + DoubleTensorType, +) +from test_utils import TARGET_OPSET, TARGET_IR, InferenceSessionEx as InferenceSession class TestSklearnFeatureHasher(unittest.TestCase): - - @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.12.0"), - reason="no murmurhash3 in ort") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.12.0"), reason="no murmurhash3 in ort" + ) def test_ort_murmurhash3_int(self): - X = make_tensor_value_info('X', TensorProto.UINT32, [None]) - Y = make_tensor_value_info('Y', TensorProto.UINT32, [None]) - node = make_node('MurmurHash3', ['X'], ['Y'], domain="com.microsoft", - positive=1, seed=0) - graph = make_graph([node], 'hash', [X], [Y]) - onnx_model = make_model(graph, opset_imports=[ - make_opsetid('', TARGET_OPSET), - make_opsetid('com.microsoft', 1)], - ir_version=TARGET_IR) + X = make_tensor_value_info("X", TensorProto.UINT32, [None]) + Y = make_tensor_value_info("Y", TensorProto.UINT32, [None]) + node = make_node( + "MurmurHash3", ["X"], ["Y"], domain="com.microsoft", positive=1, seed=0 + ) + graph = make_graph([node], "hash", [X], [Y]) + onnx_model = make_model( + graph, + opset_imports=[ + make_opsetid("", TARGET_OPSET), + make_opsetid("com.microsoft", 1), + ], + ir_version=TARGET_IR, + ) check_model(onnx_model) sess = InferenceSession( - onnx_model.SerializeToString(), - providers=["CPUExecutionProvider"]) - feeds = {'X': np.array([0, 1, 2, 3, 4, 5], dtype=np.uint32)} + onnx_model.SerializeToString(), providers=["CPUExecutionProvider"] + ) + feeds = {"X": np.array([0, 1, 2, 3, 4, 5], dtype=np.uint32)} got = sess.run(None, feeds) self.assertEqual(got[0].shape, feeds["X"].shape) self.assertEqual(got[0].dtype, feeds["X"].dtype) - @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.12.0"), - reason="no murmurhash3 in ort") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.12.0"), reason="no murmurhash3 in ort" + ) def test_ort_murmurhash3_string(self): - X = make_tensor_value_info('X', TensorProto.STRING, [None]) - Y = make_tensor_value_info('Y', TensorProto.INT32, [None]) - node = make_node('MurmurHash3', ['X'], ['Y'], domain="com.microsoft", - positive=0, seed=0) - graph = make_graph([node], 'hash', [X], [Y]) - onnx_model = make_model(graph, opset_imports=[ - make_opsetid('', TARGET_OPSET), - make_opsetid('com.microsoft', 1)], - ir_version=TARGET_IR) + X = make_tensor_value_info("X", TensorProto.STRING, [None]) + Y = make_tensor_value_info("Y", TensorProto.INT32, [None]) + node = make_node( + "MurmurHash3", ["X"], ["Y"], domain="com.microsoft", positive=0, seed=0 + ) + graph = make_graph([node], "hash", [X], [Y]) + onnx_model = make_model( + graph, + opset_imports=[ + make_opsetid("", TARGET_OPSET), + make_opsetid("com.microsoft", 1), + ], + ir_version=TARGET_IR, + ) check_model(onnx_model) sess = InferenceSession( - onnx_model.SerializeToString(), - providers=["CPUExecutionProvider"]) + onnx_model.SerializeToString(), providers=["CPUExecutionProvider"] + ) - input_strings = ['z0', 'o11', 'd222', 'q4444', 't333', 'c5555'] - feeds = {'X': np.array(input_strings)} + input_strings = ["z0", "o11", "d222", "q4444", "t333", "c5555"] + feeds = {"X": np.array(input_strings)} got = sess.run(None, feeds) n_features = 4 @@ -81,7 +97,7 @@ def test_ort_murmurhash3_string(self): for i in range(final.shape[0]): mat[i, indices[i]] = final[i] - skl = FeatureHasher(n_features, input_type='string', dtype=np.uint32) + skl = FeatureHasher(n_features, input_type="string", dtype=np.uint32) expected = skl.transform(feeds["X"].reshape((-1, 1))) dense = expected.todense() for i, (a, b) in enumerate(zip(dense.tolist(), mat.tolist())): @@ -90,12 +106,14 @@ def test_ort_murmurhash3_string(self): def test_feature_hasher(self): n_features = 5 - input_strings = ['z0', 'o11', 'd222', 'q4444', 't333', 'c5555'] + input_strings = ["z0", "o11", "d222", "q4444", "t333", "c5555"] data = np.array(input_strings).reshape((-1, 1)) - for alternate_sign, dtype in [(True, np.float32), - (True, np.float64), - (True, np.int64), - (False, np.float32)]: + for alternate_sign, dtype in [ + (True, np.float32), + (True, np.float64), + (True, np.int64), + (False, np.float32), + ]: if dtype == np.float32: final_type = FloatTensorType elif dtype == np.float64: @@ -105,61 +123,64 @@ def test_feature_hasher(self): else: final_type = None with self.subTest(alternate_sign=alternate_sign, dtype=dtype): - model = FeatureHasher(n_features=n_features, - alternate_sign=alternate_sign, - dtype=dtype, - input_type='string') + model = FeatureHasher( + n_features=n_features, + alternate_sign=alternate_sign, + dtype=dtype, + input_type="string", + ) model.fit(data) expected = model.transform(data).todense() model_onnx = to_onnx( - model, initial_types=[("X", StringTensorType([None, 1]))], + model, + initial_types=[("X", StringTensorType([None, 1]))], target_opset=TARGET_OPSET, - final_types=[('Y', final_type([None, 1]))]) + final_types=[("Y", final_type([None, 1]))], + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': data}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": data}) self.assertEqual(expected.shape, got[0].shape) self.assertEqual(expected.dtype, got[0].dtype) - for i, (a, b) in enumerate(zip(expected.tolist(), - got[0].tolist())): + for i, (a, b) in enumerate(zip(expected.tolist(), got[0].tolist())): if a != b: - raise AssertionError( - f"Discrepancies at line {i}: {a} != {b}") + raise AssertionError(f"Discrepancies at line {i}: {a} != {b}") def test_feature_hasher_two_columns(self): n_features = 5 - input_strings = ['z0', 'o11', 'd222', 'q4444', 't333', 'c5555'] + input_strings = ["z0", "o11", "d222", "q4444", "t333", "c5555"] data = np.array(input_strings).reshape((-1, 2)) - model = FeatureHasher(n_features=n_features, - alternate_sign=True, - dtype=np.float32, - input_type='string') + model = FeatureHasher( + n_features=n_features, + alternate_sign=True, + dtype=np.float32, + input_type="string", + ) model.fit(data) expected = model.transform(data).todense() model_onnx = to_onnx( - model, initial_types=[ - ("X", StringTensorType([None, data.shape[1]]))], + model, + initial_types=[("X", StringTensorType([None, data.shape[1]]))], target_opset=TARGET_OPSET, - final_types=[('Y', FloatTensorType([None, n_features]))]) + final_types=[("Y", FloatTensorType([None, n_features]))], + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': data}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": data}) self.assertEqual(expected.shape, got[0].shape) self.assertEqual(expected.dtype, got[0].dtype) - for i, (a, b) in enumerate(zip(expected.tolist(), - got[0].tolist())): + for i, (a, b) in enumerate(zip(expected.tolist(), got[0].tolist())): if a != b: - raise AssertionError( - f"Discrepancies at line {i}: {a} != {b}") + raise AssertionError(f"Discrepancies at line {i}: {a} != {b}") def test_feature_hasher_dataframe(self): n_features = 5 - input_strings = ['z0', 'o11', 'd222', 'q4444', 't333', 'c5555'] + input_strings = ["z0", "o11", "d222", "q4444", "t333", "c5555"] data = np.array(input_strings).reshape((-1, 2)) data = DataFrame(data) data.columns = ["c1", "c2"] @@ -174,59 +195,61 @@ def test_feature_hasher_dataframe(self): if df != ar: return - model = FeatureHasher(n_features=n_features, - alternate_sign=True, - dtype=np.float32, - input_type='string') + model = FeatureHasher( + n_features=n_features, + alternate_sign=True, + dtype=np.float32, + input_type="string", + ) model.fit(data) expected = model.transform(data).todense() print(expected) model_onnx = to_onnx( - model, initial_types=[ - ("X", StringTensorType([None, data.shape[0]]))], + model, + initial_types=[("X", StringTensorType([None, data.shape[0]]))], target_opset=TARGET_OPSET, - final_types=[('Y', FloatTensorType([None, n_features]))]) + final_types=[("Y", FloatTensorType([None, n_features]))], + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': data_nx}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": data_nx}) self.assertEqual(expected.shape, got[0].shape) self.assertEqual(expected.dtype, got[0].dtype) - for i, (a, b) in enumerate(zip(expected.tolist(), - got[0].tolist())): + for i, (a, b) in enumerate(zip(expected.tolist(), got[0].tolist())): if a != b: - raise AssertionError( - f"Discrepancies at line {i}: {a} != {b}") + raise AssertionError(f"Discrepancies at line {i}: {a} != {b}") def test_feature_hasher_two_columns_unicode(self): n_features = 5 - input_strings = ['z0', 'o11', 'd222', '고리', 'é', 'ô'] + input_strings = ["z0", "o11", "d222", "고리", "é", "ô"] data = np.array(input_strings).reshape((-1, 2)) - model = FeatureHasher(n_features=n_features, - alternate_sign=True, - dtype=np.float32, - input_type='string') + model = FeatureHasher( + n_features=n_features, + alternate_sign=True, + dtype=np.float32, + input_type="string", + ) model.fit(data) expected = model.transform(data).todense() model_onnx = to_onnx( - model, initial_types=[ - ("X", StringTensorType([None, data.shape[1]]))], + model, + initial_types=[("X", StringTensorType([None, data.shape[1]]))], target_opset=TARGET_OPSET, - final_types=[('Y', FloatTensorType([None, n_features]))]) + final_types=[("Y", FloatTensorType([None, n_features]))], + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': data}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": data}) self.assertEqual(expected.shape, got[0].shape) self.assertEqual(expected.dtype, got[0].dtype) - for i, (a, b) in enumerate(zip(expected.tolist(), - got[0].tolist())): + for i, (a, b) in enumerate(zip(expected.tolist(), got[0].tolist())): if a != b: - raise AssertionError( - f"Discrepancies at line {i}: {a} != {b}") + raise AssertionError(f"Discrepancies at line {i}: {a} != {b}") if __name__ == "__main__": diff --git a/tests/test_sklearn_feature_selection_converters.py b/tests/test_sklearn_feature_selection_converters.py index f8418d508..63f3136d0 100644 --- a/tests/test_sklearn_feature_selection_converters.py +++ b/tests/test_sklearn_feature_selection_converters.py @@ -30,122 +30,134 @@ class TestSklearnFeatureSelectionConverters(unittest.TestCase): def test_generic_univariate_select_int(self): model = GenericUnivariateSelect() X = np.array( - [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], - dtype=np.int64) + [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.int64 + ) y = np.array([0, 1, 0, 1]) model.fit(X, y) model_onnx = convert_sklearn( - model, "generic univariate select", + model, + "generic univariate select", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X, model, model_onnx, - basename="SklearnGenericUnivariateSelect") + X, model, model_onnx, basename="SklearnGenericUnivariateSelect" + ) def test_rfe_int(self): model = RFE(estimator=SVR(kernel="linear")) X = np.array( - [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], - dtype=np.int64) + [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.int64 + ) y = np.array([0, 1, 0, 1]) model.fit(X, y) model_onnx = convert_sklearn( - model, "rfe", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "rfe", + [("input", Int64TensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X, model, model_onnx, basename="SklearnRFE", - methods=["transform"]) + X, model, model_onnx, basename="SklearnRFE", methods=["transform"] + ) def test_rfecv_int(self): model = RFECV(estimator=SVR(kernel="linear"), cv=3) X = np.array( - [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], - dtype=np.int64) + [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.int64 + ) y = np.array([0, 1, 0, 1]) model.fit(X, y) model_onnx = convert_sklearn( - model, "rfecv", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "rfecv", + [("input", Int64TensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X, model, model_onnx, basename="SklearnRFECV", - methods=["transform"]) + X, model, model_onnx, basename="SklearnRFECV", methods=["transform"] + ) def test_select_fdr_int(self): model = SelectFdr() X, y = load_breast_cancer(return_X_y=True) model.fit(X, y) model_onnx = convert_sklearn( - model, "select fdr", + model, + "select fdr", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.int64), model, model_onnx, - basename="SklearnSelectFdr") + X.astype(np.int64), model, model_onnx, basename="SklearnSelectFdr" + ) def test_select_fpr_int(self): model = SelectFpr() X = np.array( - [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], - dtype=np.int64) + [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.int64 + ) y = np.array([0, 1, 0, 1]) model.fit(X, y) model_onnx = convert_sklearn( - model, "select fpr", + model, + "select fpr", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnSelectFpr") + dump_data_and_model(X, model, model_onnx, basename="SklearnSelectFpr") def test_select_from_model_int(self): model = SelectFromModel(estimator=SVR(kernel="linear")) X = np.array( - [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], - dtype=np.int64) + [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.int64 + ) y = np.array([0, 1, 0, 1]) model.fit(X, y) model_onnx = convert_sklearn( - model, "select from model", + model, + "select from model", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnSelectFromModel") + dump_data_and_model(X, model, model_onnx, basename="SklearnSelectFromModel") def test_select_fwe_int(self): model = SelectFwe() X, y = load_breast_cancer(return_X_y=True) model.fit(X, y) model_onnx = convert_sklearn( - model, "select fwe", + model, + "select fwe", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.int64), model, model_onnx, - basename="SklearnSelectFwe") + X.astype(np.int64), model, model_onnx, basename="SklearnSelectFwe" + ) def test_select_k_best_int(self): model = SelectKBest(k="all") X = np.array( - [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], - dtype=np.int64) + [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.int64 + ) y = np.array([0, 1, 0, 1]) model.fit(X, y) model_onnx = convert_sklearn( - model, "select k best", + model, + "select k best", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnSelectKBest") + dump_data_and_model(X, model, model_onnx, basename="SklearnSelectKBest") def test_select_percentile_int(self): model = SelectPercentile() @@ -156,195 +168,208 @@ def test_select_percentile_int(self): y = np.array([0, 1, 0, 1]) model.fit(X, y) model_onnx = convert_sklearn( - model, "select percentile", + model, + "select percentile", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnSelectPercentile") + dump_data_and_model(X, model, model_onnx, basename="SklearnSelectPercentile") def test_variance_threshold_int(self): model = VarianceThreshold() X = np.array( - [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], - dtype=np.int64) + [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.int64 + ) y = np.array([0, 1, 0, 1]) model.fit(X, y) model_onnx = convert_sklearn( - model, "variance threshold", + model, + "variance threshold", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnVarianceThreshold") + dump_data_and_model(X, model, model_onnx, basename="SklearnVarianceThreshold") def test_generic_univariate_select_float(self): model = GenericUnivariateSelect() X = np.array( - [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], - dtype=np.float32) + [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.float32 + ) y = np.array([0, 1, 0, 1]) model.fit(X, y) model_onnx = convert_sklearn( - model, "generic univariate select", + model, + "generic univariate select", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X, model, model_onnx, - basename="SklearnGenericUnivariateSelect") + X, model, model_onnx, basename="SklearnGenericUnivariateSelect" + ) def test_rfe_float(self): model = RFE(estimator=SVR(kernel="linear")) X = np.array( - [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], - dtype=np.float32) + [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.float32 + ) y = np.array([0, 1, 0, 1]) model.fit(X, y) model_onnx = convert_sklearn( - model, "rfe", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "rfe", + [("input", FloatTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X, model, model_onnx, basename="SklearnRFE", - methods=["transform"]) + X, model, model_onnx, basename="SklearnRFE", methods=["transform"] + ) def test_rfecv_float(self): model = RFECV(estimator=SVR(kernel="linear"), cv=3) X = np.array( - [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], - dtype=np.float32) + [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.float32 + ) y = np.array([0, 1, 0, 1]) model.fit(X, y) model_onnx = convert_sklearn( - model, "rfecv", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "rfecv", + [("input", FloatTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X, model, model_onnx, basename="SklearnRFECV", - methods=["transform"]) + X, model, model_onnx, basename="SklearnRFECV", methods=["transform"] + ) def test_select_fdr_float(self): model = SelectFdr() X, y = load_breast_cancer(return_X_y=True) model.fit(X, y) model_onnx = convert_sklearn( - model, "select fdr", + model, + "select fdr", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnSelectFdr") + X.astype(np.float32), model, model_onnx, basename="SklearnSelectFdr" + ) def test_select_fpr_float(self): model = SelectFpr() X = np.array( - [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], - dtype=np.float32) + [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.float32 + ) y = np.array([0, 1, 0, 1]) model.fit(X, y) model_onnx = convert_sklearn( - model, "select fpr", + model, + "select fpr", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnSelectFpr") + dump_data_and_model(X, model, model_onnx, basename="SklearnSelectFpr") def test_select_from_model_float(self): model = SelectFromModel(estimator=SVR(kernel="linear")) X = np.array( - [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], - dtype=np.float32) + [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.float32 + ) y = np.array([0, 1, 0, 1]) model.fit(X, y) model_onnx = convert_sklearn( - model, "select from model", + model, + "select from model", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnSelectFromModel") + dump_data_and_model(X, model, model_onnx, basename="SklearnSelectFromModel") def test_select_from_model_float_nomodel(self): - model = SelectFromModel( - estimator=SVR(kernel="linear"), threshold=1e5) + model = SelectFromModel(estimator=SVR(kernel="linear"), threshold=1e5) X = np.array( - [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], - dtype=np.float32) + [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.float32 + ) y = np.array([0, 1, 0, 1]) model.fit(X, y) with self.assertRaises(RuntimeError): convert_sklearn( - model, "select from model", + model, + "select from model", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) def test_select_fwe_float(self): model = SelectFwe() X, y = load_breast_cancer(return_X_y=True) model.fit(X, y) model_onnx = convert_sklearn( - model, "select fwe", + model, + "select fwe", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.float32), - model, model_onnx, basename="SklearnSelectFwe") + X.astype(np.float32), model, model_onnx, basename="SklearnSelectFwe" + ) def test_select_k_best_float(self): model = SelectKBest(k="all") X = np.array( - [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], - dtype=np.float32) + [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.float32 + ) y = np.array([0, 1, 0, 1]) model.fit(X, y) model_onnx = convert_sklearn( - model, "select k best", + model, + "select k best", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnSelectKBest") + dump_data_and_model(X, model, model_onnx, basename="SklearnSelectKBest") def test_select_percentile_float(self): model = SelectPercentile() X = np.array( - [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], - dtype=np.float32) + [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.float32 + ) y = np.array([0, 1, 0, 1]) model.fit(X, y) model_onnx = convert_sklearn( - model, "select percentile", + model, + "select percentile", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnSelectPercentile") + dump_data_and_model(X, model, model_onnx, basename="SklearnSelectPercentile") def test_variance_threshold_float(self): model = VarianceThreshold() X = np.array( - [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], - dtype=np.float32) + [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.float32 + ) y = np.array([0, 1, 0, 1]) model.fit(X, y) model_onnx = convert_sklearn( - model, "variance threshold", + model, + "variance threshold", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnVarianceThreshold") + dump_data_and_model(X, model, model_onnx, basename="SklearnVarianceThreshold") if __name__ == "__main__": diff --git a/tests/test_sklearn_feature_union.py b/tests/test_sklearn_feature_union.py index b40774ad1..104523b1b 100644 --- a/tests/test_sklearn_feature_union.py +++ b/tests/test_sklearn_feature_union.py @@ -15,111 +15,134 @@ from test_utils import dump_data_and_model, TARGET_OPSET -ort_version = ort_version.split('+')[0] +ort_version = ort_version.split("+")[0] class TestSklearnAdaBoostModels(unittest.TestCase): @unittest.skipIf( - pv.Version(ort_version) <= pv.Version('0.4.0'), - reason="onnxruntime too old") + pv.Version(ort_version) <= pv.Version("0.4.0"), reason="onnxruntime too old" + ) def test_feature_union_default(self): data = load_iris() X, y = data.data, data.target X = X.astype(np.float32) - X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, - random_state=42) - model = FeatureUnion([('standard', StandardScaler()), - ('minmax', MinMaxScaler())]).fit(X_train) + X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, random_state=42) + model = FeatureUnion( + [("standard", StandardScaler()), ("minmax", MinMaxScaler())] + ).fit(X_train) model_onnx = convert_sklearn( - model, 'feature union', - [('input', FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "feature union", + [("input", FloatTensorType([None, X_test.shape[1]]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - dump_data_and_model(X_test, model, model_onnx, - basename="SklearnFeatureUnionDefault") + dump_data_and_model( + X_test, model, model_onnx, basename="SklearnFeatureUnionDefault" + ) @unittest.skipIf( - pv.Version(ort_version) <= pv.Version('0.4.0'), - reason="onnxruntime too old") + pv.Version(ort_version) <= pv.Version("0.4.0"), reason="onnxruntime too old" + ) def test_feature_union_nested(self): data = load_iris() X, y = data.data, data.target X = X.astype(np.float32) X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.5, random_state=42) - model = FeatureUnion([ - ('features', FeatureUnion([ - ('standard', StandardScaler()), - ]) - ), - ]).fit(X_train) + X, y, test_size=0.5, random_state=42 + ) + model = FeatureUnion( + [ + ( + "features", + FeatureUnion( + [ + ("standard", StandardScaler()), + ] + ), + ), + ] + ).fit(X_train) model_onnx = convert_sklearn( - model, 'feature union', - [('input', FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "feature union", + [("input", FloatTensorType([None, X_test.shape[1]]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - dump_data_and_model(X_test, model, model_onnx, - basename="SklearnFeatureUnionNested") + dump_data_and_model( + X_test, model, model_onnx, basename="SklearnFeatureUnionNested" + ) @unittest.skipIf( - pv.Version(ort_version) <= pv.Version('0.4.0'), - reason="onnxruntime too old") + pv.Version(ort_version) <= pv.Version("0.4.0"), reason="onnxruntime too old" + ) def test_feature_union_transformer_weights_0(self): data = load_iris() X, y = data.data, data.target X = X.astype(np.float32) - X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, - random_state=42) - model = FeatureUnion([('standard', StandardScaler()), - ('minmax', MinMaxScaler())], - transformer_weights={'standard': 2, 'minmax': 4} - ).fit(X_train) + X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, random_state=42) + model = FeatureUnion( + [("standard", StandardScaler()), ("minmax", MinMaxScaler())], + transformer_weights={"standard": 2, "minmax": 4}, + ).fit(X_train) model_onnx = convert_sklearn( - model, 'feature union', - [('input', FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "feature union", + [("input", FloatTensorType([None, X_test.shape[1]]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - dump_data_and_model(X_test, model, model_onnx, - basename="SklearnFeatureUnionTransformerWeights0") + dump_data_and_model( + X_test, model, model_onnx, basename="SklearnFeatureUnionTransformerWeights0" + ) def test_feature_union_transformer_weights_1(self): data = load_digits() X, y = data.data, data.target X = X.astype(np.int64) - X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, - random_state=42) - model = FeatureUnion([('pca', PCA()), - ('svd', TruncatedSVD())], - transformer_weights={'pca': 10, 'svd': 3} - ).fit(X_train) + X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, random_state=42) + model = FeatureUnion( + [("pca", PCA()), ("svd", TruncatedSVD())], + transformer_weights={"pca": 10, "svd": 3}, + ).fit(X_train) model_onnx = convert_sklearn( - model, 'feature union', - [('input', Int64TensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "feature union", + [("input", Int64TensorType([None, X_test.shape[1]]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnFeatureUnionTransformerWeights1-Dec4") + X_test, + model, + model_onnx, + basename="SklearnFeatureUnionTransformerWeights1-Dec4", + ) def test_feature_union_transformer_weights_2(self): data = load_digits() X, y = data.data, data.target X = X.astype(np.float32) - X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, - random_state=42) - model = FeatureUnion([('pca', PCA()), - ('svd', TruncatedSVD())], - transformer_weights={'pca': 10, 'svd': 3} - ).fit(X_train) + X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, random_state=42) + model = FeatureUnion( + [("pca", PCA()), ("svd", TruncatedSVD())], + transformer_weights={"pca": 10, "svd": 3}, + ).fit(X_train) model_onnx = convert_sklearn( - model, 'feature union', - [('input', FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "feature union", + [("input", FloatTensorType([None, X_test.shape[1]]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnFeatureUnionTransformerWeights2-Dec4") + X_test, + model, + model_onnx, + basename="SklearnFeatureUnionTransformerWeights2-Dec4", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_function_transformer_converter.py b/tests/test_sklearn_function_transformer_converter.py index c7a680cb7..41b67e367 100644 --- a/tests/test_sklearn_function_transformer_converter.py +++ b/tests/test_sklearn_function_transformer_converter.py @@ -25,8 +25,9 @@ class TestSklearnFunctionTransformerConverter(unittest.TestCase): - @unittest.skipIf(ColumnTransformer is None, - reason="ColumnTransformer introduced in 0.20") + @unittest.skipIf( + ColumnTransformer is None, reason="ColumnTransformer introduced in 0.20" + ) def test_function_transformer(self): def convert_dataframe_schema(df, drop=None): inputs = [] @@ -51,28 +52,35 @@ def convert_dataframe_schema(df, drop=None): # behaviour is different accross versions of scikit-learn. data["X3"] = (y + 1).astype(np.int64) - pipe = Pipeline(steps=[ - ("select", - ColumnTransformer( - [("id", FunctionTransformer(validate=True), - ["X1", "X2", "X3"])])), - ("logreg", LogisticRegression(max_iter=1400)), - ]) + pipe = Pipeline( + steps=[ + ( + "select", + ColumnTransformer( + [("id", FunctionTransformer(validate=True), ["X1", "X2", "X3"])] + ), + ), + ("logreg", LogisticRegression(max_iter=1400)), + ] + ) pipe.fit(data[["X1", "X2", "X3"]], y) inputs = convert_dataframe_schema(data) - model_onnx = convert_sklearn(pipe, "scikit-learn function_transformer", - inputs, target_opset=TARGET_OPSET, - options={'zipmap': False}) + model_onnx = convert_sklearn( + pipe, + "scikit-learn function_transformer", + inputs, + target_opset=TARGET_OPSET, + options={"zipmap": False}, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data[:5], - pipe, - model_onnx, - basename="SklearnFunctionTransformer-DF") + data[:5], pipe, model_onnx, basename="SklearnFunctionTransformer-DF" + ) - @unittest.skipIf(ColumnTransformer is None, - reason="ColumnTransformer introduced in 0.20") + @unittest.skipIf( + ColumnTransformer is None, reason="ColumnTransformer introduced in 0.20" + ) def test_passthrough(self): def convert_dataframe_schema(df, drop=None): inputs = [] @@ -93,26 +101,34 @@ def convert_dataframe_schema(df, drop=None): y = data.target data = pandas.DataFrame(X, columns=["X1", "X2"]) - pipe = Pipeline(steps=[ - ("select", - ColumnTransformer([("id", FunctionTransformer(), ["X1"]), - ("id2", "passthrough", ["X2"])])), - ("logreg", LogisticRegression()), - ]) + pipe = Pipeline( + steps=[ + ( + "select", + ColumnTransformer( + [ + ("id", FunctionTransformer(), ["X1"]), + ("id2", "passthrough", ["X2"]), + ] + ), + ), + ("logreg", LogisticRegression()), + ] + ) pipe.fit(data[["X1", "X2"]], y) inputs = convert_dataframe_schema(data) - model_onnx = convert_sklearn(pipe, "scikit-learn function_transformer", - inputs, target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + pipe, "scikit-learn function_transformer", inputs, target_opset=TARGET_OPSET + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data[:5], - pipe, - model_onnx, - basename="SklearnFunctionTransformerPass-DF") + data[:5], pipe, model_onnx, basename="SklearnFunctionTransformerPass-DF" + ) - @unittest.skipIf(ColumnTransformer is None, - reason="ColumnTransformer introduced in 0.20") + @unittest.skipIf( + ColumnTransformer is None, reason="ColumnTransformer introduced in 0.20" + ) def test_remainder_passthrough(self): def convert_dataframe_schema(df, drop=None): inputs = [] @@ -133,21 +149,27 @@ def convert_dataframe_schema(df, drop=None): y = data.target data = pandas.DataFrame(X, columns=["X1", "X2"]) - pipe = Pipeline(steps=[ - ("select", - ColumnTransformer([("id", FunctionTransformer(), ["X1"])], - remainder="passthrough")), - ("logreg", LogisticRegression()), - ]) + pipe = Pipeline( + steps=[ + ( + "select", + ColumnTransformer( + [("id", FunctionTransformer(), ["X1"])], remainder="passthrough" + ), + ), + ("logreg", LogisticRegression()), + ] + ) pipe.fit(data[["X1", "X2"]], y) inputs = convert_dataframe_schema(data) - model_onnx = convert_sklearn(pipe, "scikit-learn function_transformer", - inputs, target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + pipe, "scikit-learn function_transformer", inputs, target_opset=TARGET_OPSET + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data[:5], pipe, model_onnx, - basename="SklearnFunctionTransformerPassRem-DF") + data[:5], pipe, model_onnx, basename="SklearnFunctionTransformerPassRem-DF" + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_gamma_regressor.py b/tests/test_sklearn_gamma_regressor.py index 751537836..e09a3cc48 100644 --- a/tests/test_sklearn_gamma_regressor.py +++ b/tests/test_sklearn_gamma_regressor.py @@ -4,6 +4,7 @@ import unittest import numpy as np + try: from sklearn.linear_model import GammaRegressor except ImportError: @@ -12,22 +13,19 @@ from skl2onnx import convert_sklearn from skl2onnx.common.data_types import ( - FloatTensorType, DoubleTensorType, Int64TensorType + FloatTensorType, + DoubleTensorType, + Int64TensorType, ) -from test_utils import ( - dump_data_and_model, - TARGET_OPSET -) +from test_utils import dump_data_and_model, TARGET_OPSET ort_version = ".".join(ort_version.split(".")[:2]) class TestGammaRegressorConverter(unittest.TestCase): - @unittest.skipIf(GammaRegressor is None, - reason="scikit-learn<1.0") + @unittest.skipIf(GammaRegressor is None, reason="scikit-learn<1.0") def test_gamma_regressor_float(self): - model = GammaRegressor() X = np.array([[1, 2], [2, 3], [3, 4], [4, 3]]) y = np.array([19, 26, 33, 30]) @@ -38,16 +36,19 @@ def test_gamma_regressor_float(self): model, "scikit-learn Gamma Regressor", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx is not None) - dump_data_and_model(test_x.astype(np.float32), model, model_onnx, - basename="SklearnGammaRegressor") + dump_data_and_model( + test_x.astype(np.float32), + model, + model_onnx, + basename="SklearnGammaRegressor", + ) - @unittest.skipIf(GammaRegressor is None, - reason="scikit-learn<1.0") + @unittest.skipIf(GammaRegressor is None, reason="scikit-learn<1.0") def test_gamma_regressor_int(self): - model = GammaRegressor() X = np.array([[10, 20], [20, 30], [30, 40], [40, 30]]) y = np.array([19, 26, 33, 30]) @@ -58,16 +59,16 @@ def test_gamma_regressor_int(self): model, "scikit-learn Gamma Regressor", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx is not None) - dump_data_and_model(test_x.astype(np.int64), model, model_onnx, - basename="SklearnGammaRegressor") + dump_data_and_model( + test_x.astype(np.int64), model, model_onnx, basename="SklearnGammaRegressor" + ) - @unittest.skipIf(GammaRegressor is None, - reason="scikit-learn<1.0") + @unittest.skipIf(GammaRegressor is None, reason="scikit-learn<1.0") def test_gamma_regressor_double(self): - model = GammaRegressor() X = np.array([[1.1, 2.1], [2.3, 3.2], [3.2, 4.3], [4.2, 3.1]]) y = np.array([19, 26, 33, 30]) @@ -78,11 +79,16 @@ def test_gamma_regressor_double(self): model, "scikit-learn Gamma Regressor", [("input", DoubleTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx is not None) - dump_data_and_model(test_x.astype(np.double), model, model_onnx, - basename="SklearnGammaRegressor") + dump_data_and_model( + test_x.astype(np.double), + model, + model_onnx, + basename="SklearnGammaRegressor", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_gaussian_mixture_converter.py b/tests/test_sklearn_gaussian_mixture_converter.py index fa86fd14e..fb1de2731 100644 --- a/tests/test_sklearn_gaussian_mixture_converter.py +++ b/tests/test_sklearn_gaussian_mixture_converter.py @@ -4,6 +4,7 @@ import numpy as np from sklearn.datasets import load_iris from sklearn.mixture import GaussianMixture, BayesianGaussianMixture + try: # scikit-learn >= 0.22 from sklearn.utils._testing import ignore_warnings @@ -17,9 +18,11 @@ from skl2onnx import convert_sklearn, to_onnx from skl2onnx.common.data_types import FloatTensorType from test_utils import ( - dump_data_and_model, TARGET_OPSET, + dump_data_and_model, + TARGET_OPSET, InferenceSessionEx as InferenceSession, - ReferenceEvaluatorEx) + ReferenceEvaluatorEx, +) class TestGaussianMixtureConverter(unittest.TestCase): @@ -43,9 +46,12 @@ def _test_score(self, model, X, tg, decimal=5, black_op=None): exp = model.score_samples(X) expp = model.predict_proba(X) onx = to_onnx( - model, X[:1], target_opset=tg, - options={id(model): {'score_samples': True}}, - black_op=black_op) + model, + X[:1], + target_opset=tg, + options={id(model): {"score_samples": True}}, + black_op=black_op, + ) if ReferenceEvaluatorEx is None: sess = None else: @@ -54,42 +60,40 @@ def _test_score(self, model, X, tg, decimal=5, black_op=None): except NotImplementedError: sess = None if sess is not None: - got = sess.run(None, {'X': X}) + got = sess.run(None, {"X": X}) self.assertEqual(len(got), 3) np.testing.assert_almost_equal( - expp.ravel(), got[1].ravel(), decimal=decimal) - np.testing.assert_almost_equal( - exp.ravel(), got[2].ravel(), decimal=decimal) + expp.ravel(), got[1].ravel(), decimal=decimal + ) + np.testing.assert_almost_equal(exp.ravel(), got[2].ravel(), decimal=decimal) try: sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) except OrtFail as e: - raise RuntimeError('Issue {}\n{}'.format(e, str(onx))) - got = sess.run(None, {'X': X}) + raise RuntimeError("Issue {}\n{}".format(e, str(onx))) + got = sess.run(None, {"X": X}) self.assertEqual(len(got), 3) - np.testing.assert_almost_equal( - expp.ravel(), got[1].ravel(), decimal=decimal) - np.testing.assert_almost_equal( - exp.ravel(), got[2].ravel(), decimal=decimal) + np.testing.assert_almost_equal(expp.ravel(), got[1].ravel(), decimal=decimal) + np.testing.assert_almost_equal(exp.ravel(), got[2].ravel(), decimal=decimal) - @unittest.skipIf(TARGET_OPSET < 11, - reason="Missing Gemm (11)") + @unittest.skipIf(TARGET_OPSET < 11, reason="Missing Gemm (11)") @ignore_warnings(category=UserWarning) def test_model_gaussian_mixture_binary_classification(self): - model, X = self._fit_model_binary_classification( - GaussianMixture(), load_iris()) + model, X = self._fit_model_binary_classification(GaussianMixture(), load_iris()) for tg in range(min(9, TARGET_OPSET), TARGET_OPSET + 1): with self.subTest(target_opset=tg): model_onnx = convert_sklearn( - model, "gaussian_mixture", + model, + "gaussian_mixture", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=tg) + target_opset=tg, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnBinGaussianMixture") + X, model, model_onnx, basename="SklearnBinGaussianMixture" + ) self._test_score(model, X, tg) @ignore_warnings(category=UserWarning) @@ -97,38 +101,33 @@ def test_model_bayesian_mixture_binary_classification(self): for cov in ["full", "tied", "diag", "spherical"]: with self.subTest(cov=cov): model, X = self._fit_model_binary_classification( - BayesianGaussianMixture(), load_iris(), - covariance_type=cov) + BayesianGaussianMixture(), load_iris(), covariance_type=cov + ) model_onnx = convert_sklearn( model, "gaussian_mixture", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnBinBayesianGaussianMixture") + X, model, model_onnx, basename="SklearnBinBayesianGaussianMixture" + ) self._test_score(model, X, TARGET_OPSET) @ignore_warnings(category=UserWarning) def test_model_gaussian_mixture_multiclass(self): model, X = self._fit_model_multiclass_classification( - GaussianMixture(), load_iris()) + GaussianMixture(), load_iris() + ) model_onnx = convert_sklearn( model, "gaussian_mixture", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnMclGaussianMixture") + dump_data_and_model(X, model, model_onnx, basename="SklearnMclGaussianMixture") self._test_score(model, X, TARGET_OPSET) @ignore_warnings(category=UserWarning) @@ -137,167 +136,210 @@ def test_gaussian_mixture_comp2(self): X = data.data model = GaussianMixture(n_components=2) model.fit(X) - model_onnx = convert_sklearn(model, "GM", - [("input", FloatTensorType([None, 4]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "GM", + [("input", FloatTensorType([None, 4]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(np.float32)[40:60], model, model_onnx, basename="GaussianMixtureC2", - intermediate_steps=True) + intermediate_steps=True, + ) self._test_score(model, X, TARGET_OPSET) @ignore_warnings(category=UserWarning) def test_gaussian_mixture_full(self): data = load_iris() X = data.data - model = GaussianMixture(n_components=2, covariance_type='full') + model = GaussianMixture(n_components=2, covariance_type="full") model.fit(X) - model_onnx = convert_sklearn(model, "GM", - [("input", FloatTensorType([None, 4]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "GM", + [("input", FloatTensorType([None, 4]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(np.float32)[40:60], model, model_onnx, basename="GaussianMixtureC2Full", - intermediate_steps=True) + intermediate_steps=True, + ) self._test_score(model, X, TARGET_OPSET) @ignore_warnings(category=UserWarning) def test_gaussian_mixture_tied(self): data = load_iris() X = data.data - model = GaussianMixture(n_components=2, covariance_type='tied') + model = GaussianMixture(n_components=2, covariance_type="tied") model.fit(X) - model_onnx = convert_sklearn(model, "GM", - [("input", FloatTensorType([None, 4]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "GM", + [("input", FloatTensorType([None, 4]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(np.float32)[40:60], model, model_onnx, basename="GaussianMixtureC2Tied", - intermediate_steps=True) + intermediate_steps=True, + ) self._test_score(model, X, TARGET_OPSET) @ignore_warnings(category=UserWarning) def test_gaussian_mixture_diag(self): data = load_iris() X = data.data - model = GaussianMixture(n_components=2, covariance_type='diag') + model = GaussianMixture(n_components=2, covariance_type="diag") model.fit(X) - model_onnx = convert_sklearn(model, "GM", - [("input", FloatTensorType([None, 4]))], - target_opset=TARGET_OPSET) - self.assertIn('ReduceLogSumExp', str(model_onnx)) + model_onnx = convert_sklearn( + model, + "GM", + [("input", FloatTensorType([None, 4]))], + target_opset=TARGET_OPSET, + ) + self.assertIn("ReduceLogSumExp", str(model_onnx)) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(np.float32)[40:60], model, model_onnx, basename="GaussianMixtureC2Diag", - intermediate_steps=True) + intermediate_steps=True, + ) self._test_score(model, X, TARGET_OPSET, decimal=4) @ignore_warnings(category=UserWarning) def test_gaussian_mixture_spherical(self): data = load_iris() X = data.data - model = GaussianMixture(n_components=2, covariance_type='spherical') + model = GaussianMixture(n_components=2, covariance_type="spherical") model.fit(X) model_onnx = convert_sklearn( - model, "GM", [("input", FloatTensorType([None, 4]))], - target_opset=TARGET_OPSET) + model, + "GM", + [("input", FloatTensorType([None, 4]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(np.float32)[40:60], - model, model_onnx, + model, + model_onnx, basename="GaussianMixtureC2Spherical", - intermediate_steps=True) + intermediate_steps=True, + ) self._test_score(model, X, TARGET_OPSET, decimal=4) @ignore_warnings(category=UserWarning) def test_gaussian_mixture_full_black_op(self): data = load_iris() X = data.data - model = GaussianMixture(n_components=2, covariance_type='full') + model = GaussianMixture(n_components=2, covariance_type="full") model.fit(X) with self.assertRaises(RuntimeError): convert_sklearn( - model, "GM", [("input", FloatTensorType([None, 4]))], - target_opset=TARGET_OPSET, black_op={'Add'}) + model, + "GM", + [("input", FloatTensorType([None, 4]))], + target_opset=TARGET_OPSET, + black_op={"Add"}, + ) model_onnx = convert_sklearn( - model, "GM", [("input", FloatTensorType([None, 4]))], - target_opset=TARGET_OPSET, black_op={'ReduceLogSumExp'}) + model, + "GM", + [("input", FloatTensorType([None, 4]))], + target_opset=TARGET_OPSET, + black_op={"ReduceLogSumExp"}, + ) self.assertIsNotNone(model_onnx) - self.assertNotIn('ReduceLogSumExp', str(model_onnx)) + self.assertNotIn("ReduceLogSumExp", str(model_onnx)) dump_data_and_model( X.astype(np.float32)[40:60], model, model_onnx, basename="GaussianMixtureC2FullBL", - intermediate_steps=True) + intermediate_steps=True, + ) self._test_score(model, X, TARGET_OPSET) - @unittest.skipIf(TARGET_OPSET < 11, - reason="OnnxEqual does not support float") + @unittest.skipIf(TARGET_OPSET < 11, reason="OnnxEqual does not support float") @ignore_warnings(category=UserWarning) def test_gaussian_mixture_full_black_op_noargmax(self): data = load_iris() X = data.data - model = GaussianMixture(n_components=2, covariance_type='full') + model = GaussianMixture(n_components=2, covariance_type="full") model.fit(X) with self.assertRaises(RuntimeError): convert_sklearn( - model, "GM", [("input", FloatTensorType([None, 4]))], - target_opset=TARGET_OPSET, black_op={'Add'}) + model, + "GM", + [("input", FloatTensorType([None, 4]))], + target_opset=TARGET_OPSET, + black_op={"Add"}, + ) model_onnx = convert_sklearn( - model, "GM", [("input", FloatTensorType([None, 4]))], + model, + "GM", + [("input", FloatTensorType([None, 4]))], target_opset=TARGET_OPSET, - black_op={'ReduceLogSumExp', 'ArgMax'}) + black_op={"ReduceLogSumExp", "ArgMax"}, + ) self.assertIsNotNone(model_onnx) - self.assertNotIn('ArgMax', str(model_onnx)) + self.assertNotIn("ArgMax", str(model_onnx)) dump_data_and_model( X.astype(np.float32)[40:60], - model, model_onnx, + model, + model_onnx, basename="GaussianMixtureC2FullBLNM", - intermediate_steps=True) + intermediate_steps=True, + ) self._test_score(model, X, TARGET_OPSET) - @unittest.skipIf(TARGET_OPSET < 11, - reason="OnnxEqual does not support float") + @unittest.skipIf(TARGET_OPSET < 11, reason="OnnxEqual does not support float") @ignore_warnings(category=UserWarning) def test_gaussian_mixture_full_black_op_noargmax_inf(self): data = load_iris() X = data.data - model = GaussianMixture(n_components=10, covariance_type='full') + model = GaussianMixture(n_components=10, covariance_type="full") model.fit(X) model_onnx1 = convert_sklearn( - model, "GM", [("input", FloatTensorType([None, 4]))], + model, + "GM", + [("input", FloatTensorType([None, 4]))], target_opset=TARGET_OPSET, - options={id(model): {'score_samples': True}}) + options={id(model): {"score_samples": True}}, + ) model_onnx2 = convert_sklearn( - model, "GM", [("input", FloatTensorType([None, 4]))], + model, + "GM", + [("input", FloatTensorType([None, 4]))], target_opset=TARGET_OPSET, - options={id(model): {'score_samples': True}}, - black_op={'ReduceLogSumExp', 'ArgMax'}) - self.assertNotIn('ArgMax', str(model_onnx2)) + options={id(model): {"score_samples": True}}, + black_op={"ReduceLogSumExp", "ArgMax"}, + ) + self.assertNotIn("ArgMax", str(model_onnx2)) sess1 = InferenceSession( - model_onnx1.SerializeToString(), - providers=["CPUExecutionProvider"]) - res1 = sess1.run(None, {'input': (X[:5] * 1e2).astype(np.float32)}) + model_onnx1.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res1 = sess1.run(None, {"input": (X[:5] * 1e2).astype(np.float32)}) a1, b1, c1 = res1 sess2 = InferenceSession( - model_onnx2.SerializeToString(), - providers=["CPUExecutionProvider"]) - res2 = sess2.run(None, {'input': (X[:5] * 1e2).astype(np.float32)}) + model_onnx2.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res2 = sess2.run(None, {"input": (X[:5] * 1e2).astype(np.float32)}) a2, b2, c2 = res2 self.assertEqual(b1.max(), b2.max()) @@ -306,8 +348,8 @@ def test_gaussian_mixture_full_black_op_noargmax_inf(self): self.assertLess(abs(c1.min() - c2.min()) / c2.min(), 1e-5) self._test_score( - model, X, TARGET_OPSET, black_op={'ReduceLogSumExp', 'ArgMax'}, - decimal=2) + model, X, TARGET_OPSET, black_op={"ReduceLogSumExp", "ArgMax"}, decimal=2 + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_gaussian_process_classifier.py b/tests/test_sklearn_gaussian_process_classifier.py index 69c9ea956..df3050248 100644 --- a/tests/test_sklearn_gaussian_process_classifier.py +++ b/tests/test_sklearn_gaussian_process_classifier.py @@ -7,6 +7,7 @@ from numpy.testing import assert_almost_equal import scipy from onnxruntime import SessionOptions + try: from onnxruntime.capi.onnxruntime_pybind11_state import Fail as OrtFail except ImportError: @@ -14,6 +15,7 @@ from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn import __version__ as sklver + try: from sklearn.gaussian_process import GaussianProcessClassifier except ImportError: @@ -22,33 +24,37 @@ from skl2onnx import to_onnx from skl2onnx.helpers.onnx_helper import change_onnx_domain from test_utils import ( - dump_data_and_model, TARGET_OPSET, - InferenceSessionEx as InferenceSession) + dump_data_and_model, + TARGET_OPSET, + InferenceSessionEx as InferenceSession, +) -sklver_ = ".".join(sklver.split('.')[:2]) +sklver_ = ".".join(sklver.split(".")[:2]) class TestSklearnGaussianProcessClassifier(unittest.TestCase): - @classmethod def setUpClass(cls): try: - from ortcustomops import ( - onnx_op, PyCustomOpDef, get_library_path) + from ortcustomops import onnx_op, PyCustomOpDef, get_library_path except ImportError: return - @onnx_op(op_type="SolveFloat", - inputs=[PyCustomOpDef.dt_float, PyCustomOpDef.dt_float], - outputs=[PyCustomOpDef.dt_float]) + @onnx_op( + op_type="SolveFloat", + inputs=[PyCustomOpDef.dt_float, PyCustomOpDef.dt_float], + outputs=[PyCustomOpDef.dt_float], + ) def solveopf(a, b): # The user custom op implementation here. return scipy.linalg.solve(a, b).astype(np.float32) - @onnx_op(op_type="SolveDouble", - inputs=[PyCustomOpDef.dt_double, PyCustomOpDef.dt_double], - outputs=[PyCustomOpDef.dt_double]) + @onnx_op( + op_type="SolveDouble", + inputs=[PyCustomOpDef.dt_double, PyCustomOpDef.dt_double], + outputs=[PyCustomOpDef.dt_double], + ) def solveopd(a, b): # The user custom op implementation here. return scipy.linalg.solve(a, b).astype(np.float64) @@ -62,13 +68,11 @@ def fit_classification_model(self, gp, n_classes=2): y = y % 2 elif n_classes != 3: raise NotImplementedError("n_classes must be 2 or 3") - X_train, X_test, y_train, y_test = train_test_split( - X, y, random_state=3) + X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=3) gp.fit(X_train, y_train) return gp, X_test.astype(np.float32) def common_test_gpc(self, dtype=np.float32, n_classes=2): - gp = GaussianProcessClassifier() gp, X = self.fit_classification_model(gp, n_classes=n_classes) @@ -78,53 +82,58 @@ def common_test_gpc(self, dtype=np.float32, n_classes=2): else: cls = DoubleTensorType model_onnx = to_onnx( - gp, initial_types=[('X', cls([None, None]))], + gp, + initial_types=[("X", cls([None, None]))], target_opset=TARGET_OPSET, - options={GaussianProcessClassifier: { - 'zipmap': False, 'optim': 'cdist'}}) + options={GaussianProcessClassifier: {"zipmap": False, "optim": "cdist"}}, + ) self.assertTrue(model_onnx is not None) try: sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) except OrtFail: - if not hasattr(self, 'path'): + if not hasattr(self, "path"): return - suffix = 'Double' if dtype == np.float64 else 'Float' + suffix = "Double" if dtype == np.float64 else "Float" # Operator Solve is missing model_onnx = change_onnx_domain( - model_onnx, {'Solve': ('Solve%s' % suffix, 'ai.onnx.contrib')}) + model_onnx, {"Solve": ("Solve%s" % suffix, "ai.onnx.contrib")} + ) so = SessionOptions() so.register_custom_ops_library(self.path) sess = InferenceSession( - model_onnx.SerializeToString(), so, - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), so, providers=["CPUExecutionProvider"] + ) - res = sess.run(None, {'X': X.astype(dtype)}) + res = sess.run(None, {"X": X.astype(dtype)}) assert_almost_equal(res[0].ravel(), gp.predict(X).ravel()) - assert_almost_equal(res[1], gp.predict_proba(X), - decimal=3) + assert_almost_equal(res[1], gp.predict_proba(X), decimal=3) return dt = 32 if dtype == np.float32 else 64 dump_data_and_model( - X.astype(dtype), gp, model_onnx, verbose=False, - basename="SklearnGaussianProcessRBFT%d%d" % (n_classes, dt)) + X.astype(dtype), + gp, + model_onnx, + verbose=False, + basename="SklearnGaussianProcessRBFT%d%d" % (n_classes, dt), + ) @unittest.skipIf(TARGET_OPSET < 12, reason="einsum") - @unittest.skipIf(GaussianProcessClassifier is None, - reason="scikit-learn is too old") - @unittest.skipIf(pv.Version(sklver_) < pv.Version("0.22"), - reason="not available") + @unittest.skipIf( + GaussianProcessClassifier is None, reason="scikit-learn is too old" + ) + @unittest.skipIf(pv.Version(sklver_) < pv.Version("0.22"), reason="not available") def test_gpc_float_bin(self): self.common_test_gpc(dtype=np.float32) @unittest.skipIf(TARGET_OPSET < 12, reason="einsum, reciprocal") - @unittest.skipIf(GaussianProcessClassifier is None, - reason="scikit-learn is too old") - @unittest.skipIf(pv.Version(sklver_) < pv.Version("0.22"), - reason="not available") + @unittest.skipIf( + GaussianProcessClassifier is None, reason="scikit-learn is too old" + ) + @unittest.skipIf(pv.Version(sklver_) < pv.Version("0.22"), reason="not available") def test_gpc_double_bin(self): self.common_test_gpc(dtype=np.float64) diff --git a/tests/test_sklearn_gaussian_process_regressor.py b/tests/test_sklearn_gaussian_process_regressor.py index 5768b3da5..1c1165d32 100644 --- a/tests/test_sklearn_gaussian_process_regressor.py +++ b/tests/test_sklearn_gaussian_process_regressor.py @@ -14,9 +14,17 @@ from sklearn.datasets import load_iris, make_regression, make_friedman2 from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import ( - Sum, DotProduct, ExpSineSquared, RationalQuadratic, - RBF, ConstantKernel as C, PairwiseKernel, WhiteKernel) + Sum, + DotProduct, + ExpSineSquared, + RationalQuadratic, + RBF, + ConstantKernel as C, + PairwiseKernel, + WhiteKernel, +) from sklearn.model_selection import train_test_split + try: # scikit-learn >= 0.22 from sklearn.utils._testing import ignore_warnings @@ -28,9 +36,11 @@ from skl2onnx import to_onnx from skl2onnx.proto import get_latest_tested_opset_version from skl2onnx.operator_converters.gaussian_process import ( - convert_kernel, convert_kernel_diag + convert_kernel, + convert_kernel_diag, ) from onnxruntime import SessionOptions + try: from onnxruntime import GraphOptimizationLevel except ImportError: @@ -41,14 +51,19 @@ NotImplemented = RuntimeError from onnxruntime import __version__ as ort_version from test_utils import ( - dump_data_and_model, fit_regression_model, TARGET_OPSET, - InferenceSessionEx as InferenceSession) + dump_data_and_model, + fit_regression_model, + TARGET_OPSET, + InferenceSessionEx as InferenceSession, +) _TARGET_OPSET_ = min(get_latest_tested_opset_version(), TARGET_OPSET) -ort_version = ".".join(ort_version.split('.')[:2]) +ort_version = ".".join(ort_version.split(".")[:2]) -Xtrain_ = pd.read_csv(StringIO(""" +Xtrain_ = pd.read_csv( + StringIO( + """ 1.000000000000000000e+02,1.158972369426435591e+02,5.667579938823991137e-01,2.264397682069040421e-02,1.182166076334919581e-02,2.600819340784729095e-01 1.000000000000000000e+02,8.493978168996618194e+01,2.775702708579337874e-01,1.887456201351307358e-02,2.912599235354124821e-02,2.327206144705836199e-01 1.000000000000000000e+02,8.395765637241281354e+01,7.760226193410907358e-01,2.139558949508506974e-02,1.944769253403489523e-02,5.462612465817335838e-01 @@ -59,9 +74,16 @@ 1.000000000000000000e+02,8.121250906502669409e+01,1.865077048426986073e+00,2.182149790268794742e-02,4.300530595437276893e-02,5.083327963416256479e-01 1.000000000000000000e+02,8.612638714481262525e+01,2.717895097207565502e-01,2.029318789405683970e-02,2.387016690377936207e-02,1.889736980423707968e-01 1.000000000000000000e+02,7.377491009582655579e+01,7.210994150180145557e-01,2.239484250704669444e-02,1.642684033674572316e-02,4.341188586319142395e-01 -""".strip("\n\r ")), header=None).values - -Xtest_ = pd.read_csv(StringIO(""" +""".strip( + "\n\r " + ) + ), + header=None, +).values + +Xtest_ = pd.read_csv( + StringIO( + """ 1.000000000000000000e+02,1.061277971307766705e+02,1.472195004809226493e+00,2.307125069497626552e-02,4.539948095743629591e-02,2.855191098141335870e-01 1.000000000000000000e+02,9.417031896832908444e+01,1.249743892709246573e+00,2.370416174339620707e-02,2.613847280316268853e-02,5.097165413593484073e-01 1.000000000000000000e+02,9.305231488674536422e+01,1.795726729335217264e+00,2.473274733802270642e-02,1.349765645107412620e-02,9.410288840541443378e-02 @@ -72,9 +94,16 @@ 1.000000000000000000e+02,1.228982583299257101e+02,1.115599996405831629e+00,1.929354155079938959e-02,3.056996308544096715e-03,1.197052763998271013e-01 1.000000000000000000e+02,1.160303269386108838e+02,1.018627021014927303e+00,2.248784981616459844e-02,2.688111547114307651e-02,3.326105131778724355e-01 1.000000000000000000e+02,1.163414374640396005e+02,6.644299545804077667e-01,1.508088417713602906e-02,4.451836657613789106e-02,3.245643044204808425e-01 -""".strip("\n\r ")), header=None).values - -Ytrain_ = pd.read_csv(StringIO(""" +""".strip( + "\n\r " + ) + ), + header=None, +).values + +Ytrain_ = pd.read_csv( + StringIO( + """ 1.810324564191880370e+01 4.686462914930641377e-01 1.032271142638131778e+01 @@ -85,9 +114,16 @@ 1.652864171243088975e+01 2.491797751537555006e-01 3.413210402096089169e+00 -""".strip("\n\r ")), header=None).values - -Ytest_ = pd.read_csv(StringIO(""" +""".strip( + "\n\r " + ) + ), + header=None, +).values + +Ytest_ = pd.read_csv( + StringIO( + """ 1.836586066727948463e+01 1.848708258852349573e+01 1.641115566770171341e+00 @@ -98,7 +134,12 @@ 2.289825832992571009e+01 2.353204496952379898e+01 2.237280571788585348e+01 -""".strip("\n\r ")), header=None).values +""".strip( + "\n\r " + ) + ), + header=None, +).values THRESHOLD = "0.4.0" @@ -106,16 +147,22 @@ class TestSklearnGaussianProcessRegressor(unittest.TestCase): - def remove_dim1(self, arr): new_shape = tuple(v for v in arr.shape if v != 1) if new_shape != arr.shape: arr = arr.reshape(new_shape) return arr - def check_outputs(self, model, model_onnx, Xtest, - predict_attributes, decimal=5, - skip_if_float32=False, disable_optimisation=True): + def check_outputs( + self, + model, + model_onnx, + Xtest, + predict_attributes, + decimal=5, + skip_if_float32=False, + disable_optimisation=True, + ): if "TransposeScaleMatMul" in str(model_onnx): raise RuntimeError("This node must not be added.") if predict_attributes is None: @@ -123,16 +170,17 @@ def check_outputs(self, model, model_onnx, Xtest, exp = model.predict(Xtest, **predict_attributes) if disable_optimisation and GraphOptimizationLevel is not None: opts = SessionOptions() - opts.graph_optimization_level = ( - GraphOptimizationLevel.ORT_DISABLE_ALL) + opts.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL sess = InferenceSession( - model_onnx.SerializeToString(), sess_options=opts, - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), + sess_options=opts, + providers=["CPUExecutionProvider"], + ) else: sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': Xtest}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": Xtest}) if isinstance(exp, tuple): if len(exp) != len(got): raise AssertionError("Mismatched number of outputs.") @@ -140,636 +188,750 @@ def check_outputs(self, model, model_onnx, Xtest, if skip_if_float32 and g.dtype == np.float32: continue try: - assert_almost_equal(self.remove_dim1(e), - self.remove_dim1(g), - decimal=decimal) + assert_almost_equal( + self.remove_dim1(e), self.remove_dim1(g), decimal=decimal + ) except AssertionError as e: # noqa raise AssertionError( "Mismatch for output {} and attributes {}" - ".".format(i, predict_attributes)) from e + ".".format(i, predict_attributes) + ) from e else: if skip_if_float32 and Xtest.dtype == np.float32: return - assert_almost_equal(np.squeeze(exp), - np.squeeze(got), decimal=decimal) + assert_almost_equal(np.squeeze(exp), np.squeeze(got), decimal=decimal) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_kernel_constant1(self): - ker = C(5.) - onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32, - op_version=_TARGET_OPSET_) + ker = C(5.0) + onx = convert_kernel( + ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_ + ) model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType([None, None]))], - target_opset=TARGET_OPSET) + inputs=[("X", FloatTensorType([None, None]))], target_opset=TARGET_OPSET + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0] m1 = res m2 = ker(Xtest_) assert_almost_equal(m1, m2, decimal=5) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_kernel_cosine_float(self): - ker = PairwiseKernel(metric='cosine') + ker = PairwiseKernel(metric="cosine") # X, X - onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32, - op_version=_TARGET_OPSET_) + onx = convert_kernel( + ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_ + ) model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType([None, None]))], - target_opset=TARGET_OPSET) + inputs=[("X", FloatTensorType([None, None]))], target_opset=TARGET_OPSET + ) x = np.random.randn(4, 3) - x[0, 0] = x[1, 1] = x[2, 2] = 10. - x[3, 2] = 5. + x[0, 0] = x[1, 1] = x[2, 2] = 10.0 + x[3, 2] = 5.0 sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': x.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": x.astype(np.float32)})[0] m1 = res m2 = ker(x) assert_almost_equal(m1, m2, decimal=5) # X, x - onx = convert_kernel(ker, 'X', x_train=x, - output_names=['Y'], dtype=np.float32, - op_version=_TARGET_OPSET_) + onx = convert_kernel( + ker, + "X", + x_train=x, + output_names=["Y"], + dtype=np.float32, + op_version=_TARGET_OPSET_, + ) model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType([None, None]))], - target_opset=TARGET_OPSET) + inputs=[("X", FloatTensorType([None, None]))], target_opset=TARGET_OPSET + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': x.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": x.astype(np.float32)})[0] m1 = res m2 = ker(x) assert_almost_equal(m1, m2, decimal=5) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_kernel_cosine_double(self): - ker = PairwiseKernel(metric='cosine') - onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float64, - op_version=_TARGET_OPSET_) + ker = PairwiseKernel(metric="cosine") + onx = convert_kernel( + ker, "X", output_names=["Y"], dtype=np.float64, op_version=_TARGET_OPSET_ + ) model_onnx = onx.to_onnx( - inputs=[('X', DoubleTensorType([None, None]))], - target_opset=TARGET_OPSET) + inputs=[("X", DoubleTensorType([None, None]))], target_opset=TARGET_OPSET + ) x = np.random.randn(4, 3) - x[0, 0] = x[1, 1] = x[2, 2] = 10. - x[3, 2] = 5. + x[0, 0] = x[1, 1] = x[2, 2] = 10.0 + x[3, 2] = 5.0 try: sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) except NotImplemented as e: if "NOT_IMPLEMENTED" in str(e): # Failed to find kernel for FusedMatMul(1). return raise e - res = sess.run(None, {'X': x.astype(np.float64)})[0] + res = sess.run(None, {"X": x.astype(np.float64)})[0] m1 = res m2 = ker(x) assert_almost_equal(m1, m2, decimal=5) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_kernel_rbf1(self): ker = RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3)) - onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32, - op_version=_TARGET_OPSET_) - model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType([None, None]))]) + onx = convert_kernel( + ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_ + ) + model_onnx = onx.to_onnx(inputs=[("X", FloatTensorType([None, None]))]) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0] m1 = res m2 = ker(Xtest_) assert_almost_equal(m1, m2, decimal=5) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_kernel_rbf1_anisotropic(self): - ker = RBF(length_scale=np.array([1.1, 1.2, 1.3, 1.4, 1.5, 1.6], - dtype=np.float32), - length_scale_bounds=(1e-3, 1e3)) - onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32, - op_version=_TARGET_OPSET_) - model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType([None, None]))]) + ker = RBF( + length_scale=np.array([1.1, 1.2, 1.3, 1.4, 1.5, 1.6], dtype=np.float32), + length_scale_bounds=(1e-3, 1e3), + ) + onx = convert_kernel( + ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_ + ) + model_onnx = onx.to_onnx(inputs=[("X", FloatTensorType([None, None]))]) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0] m1 = res m2 = ker(Xtest_) assert_almost_equal(m1, m2, decimal=5) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_kernel_rbf10(self): ker = RBF(length_scale=10, length_scale_bounds=(1e-3, 1e3)) - onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32, - op_version=_TARGET_OPSET_) - model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType([None, None]))]) + onx = convert_kernel( + ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_ + ) + model_onnx = onx.to_onnx(inputs=[("X", FloatTensorType([None, None]))]) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0] m1 = res m2 = ker(Xtest_) assert_almost_equal(m1, m2, decimal=5) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_kernel_rbf2(self): ker = RBF(length_scale=1, length_scale_bounds="fixed") - onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32, - op_version=_TARGET_OPSET_) - model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType([None, None]))]) + onx = convert_kernel( + ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_ + ) + model_onnx = onx.to_onnx(inputs=[("X", FloatTensorType([None, None]))]) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0] m1 = res m2 = ker(Xtest_) assert_almost_equal(m1, m2, decimal=5) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_kernel_rbf_mul(self): - ker = (C(1.0, constant_value_bounds="fixed") * - RBF(1.0, length_scale_bounds="fixed")) - onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32, - op_version=_TARGET_OPSET_) + ker = C(1.0, constant_value_bounds="fixed") * RBF( + 1.0, length_scale_bounds="fixed" + ) + onx = convert_kernel( + ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_ + ) model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType([None, None]))], - target_opset=TARGET_OPSET) + inputs=[("X", FloatTensorType([None, None]))], target_opset=TARGET_OPSET + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0] m1 = res m2 = ker(Xtest_) assert_almost_equal(m1, m2, decimal=5) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_kernel_ker1_def(self): - ker = (C(1.0, (1e-3, 1e3)) * - RBF(length_scale=10, length_scale_bounds=(1e-3, 1e3))) - onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32, - op_version=_TARGET_OPSET_) + ker = C(1.0, (1e-3, 1e3)) * RBF( + length_scale=10, length_scale_bounds=(1e-3, 1e3) + ) + onx = convert_kernel( + ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_ + ) model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType([None, None]))], - target_opset=TARGET_OPSET) + inputs=[("X", FloatTensorType([None, None]))], target_opset=TARGET_OPSET + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0] m1 = res m2 = ker(Xtest_) assert_almost_equal(m1, m2, decimal=5) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_kernel_ker12_def(self): - ker = (Sum(C(0.1, (1e-3, 1e3)), C(0.1, (1e-3, 1e3)) * - RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3)))) - onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32, - op_version=_TARGET_OPSET_) + ker = Sum( + C(0.1, (1e-3, 1e3)), + C(0.1, (1e-3, 1e3)) * RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3)), + ) + onx = convert_kernel( + ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_ + ) model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType([None, None]))], - target_opset=_TARGET_OPSET_) + inputs=[("X", FloatTensorType([None, None]))], target_opset=_TARGET_OPSET_ + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0] m1 = res m2 = ker(Xtest_) assert_almost_equal(m1, m2, decimal=5) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_kernel_ker2_def(self): ker = Sum( - C(0.1, (1e-3, 1e3)) * RBF(length_scale=10, - length_scale_bounds=(1e-3, 1e3)), - C(0.1, (1e-3, 1e3)) * RBF(length_scale=1, - length_scale_bounds=(1e-3, 1e3))) - onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32, - op_version=_TARGET_OPSET_) + C(0.1, (1e-3, 1e3)) * RBF(length_scale=10, length_scale_bounds=(1e-3, 1e3)), + C(0.1, (1e-3, 1e3)) * RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3)), + ) + onx = convert_kernel( + ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_ + ) model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType([None, None]))], - target_opset=_TARGET_OPSET_) + inputs=[("X", FloatTensorType([None, None]))], target_opset=_TARGET_OPSET_ + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0] m1 = res m2 = ker(Xtest_) assert_almost_equal(m1, m2, decimal=0) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_kernel_ker2_dotproduct(self): - ker = DotProduct(sigma_0=2.) - onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32, - op_version=_TARGET_OPSET_) + ker = DotProduct(sigma_0=2.0) + onx = convert_kernel( + ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_ + ) model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType())], - outputs=[('Y', FloatTensorType())], - target_opset=_TARGET_OPSET_) + inputs=[("X", FloatTensorType())], + outputs=[("Y", FloatTensorType())], + target_opset=_TARGET_OPSET_, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) x = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float32) - res = sess.run(None, {'X': x}) + res = sess.run(None, {"X": x}) m1 = res[0] m2 = ker(x) assert_almost_equal(m1, m2, decimal=5) - res = sess.run(None, {'X': Xtest_.astype(np.float32)}) + res = sess.run(None, {"X": Xtest_.astype(np.float32)}) m1 = res[0] m2 = ker(Xtest_) assert_almost_equal(m1, m2, decimal=2) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_kernel_ker2_exp_sine_squared(self): ker = ExpSineSquared() - onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32, - op_version=_TARGET_OPSET_) + onx = convert_kernel( + ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_ + ) model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType([None, None]))], - target_opset=_TARGET_OPSET_) + inputs=[("X", FloatTensorType([None, None]))], target_opset=_TARGET_OPSET_ + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0] m1 = res m2 = ker(Xtest_) assert_almost_equal(m1, m2, decimal=4) - onx = convert_kernel(ker, 'X', output_names=['Z'], - x_train=(Xtest_ * 2).astype(np.float32), - dtype=np.float32, op_version=_TARGET_OPSET_) + onx = convert_kernel( + ker, + "X", + output_names=["Z"], + x_train=(Xtest_ * 2).astype(np.float32), + dtype=np.float32, + op_version=_TARGET_OPSET_, + ) model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType([None, None]))], - target_opset=_TARGET_OPSET_) + inputs=[("X", FloatTensorType([None, None]))], target_opset=_TARGET_OPSET_ + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0] m1 = res m2 = ker(Xtest_, Xtest_ * 2) assert_almost_equal(m1, m2, decimal=4) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_kernel_exp_sine_squared_diag(self): ker = ExpSineSquared() onx = convert_kernel_diag( - ker, 'X', output_names=['Y'], dtype=np.float32, - op_version=_TARGET_OPSET_) + ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_ + ) model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType([None, None]))], - target_opset=_TARGET_OPSET_) + inputs=[("X", FloatTensorType([None, None]))], target_opset=_TARGET_OPSET_ + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0] m1 = res m2 = ker.diag(Xtest_) assert_almost_equal(m1, m2, decimal=4) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_kernel_rational_quadratic_diag(self): ker = RationalQuadratic() onx = convert_kernel_diag( - ker, 'X', output_names=['Y'], dtype=np.float32, - op_version=_TARGET_OPSET_) + ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_ + ) model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType([None, None]))], - target_opset=_TARGET_OPSET_) + inputs=[("X", FloatTensorType([None, None]))], target_opset=_TARGET_OPSET_ + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0] m1 = res m2 = ker.diag(Xtest_) assert_almost_equal(m1, m2, decimal=4) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_kernel_dot_product_diag(self): ker = DotProduct() onx = convert_kernel_diag( - ker, 'X', output_names=['Y'], dtype=np.float32, - op_version=_TARGET_OPSET_) + ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_ + ) model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType([None, None]))], - target_opset=_TARGET_OPSET_) + inputs=[("X", FloatTensorType([None, None]))], target_opset=_TARGET_OPSET_ + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0] m1 = res m2 = ker.diag(Xtest_) assert_almost_equal(m1 / 1000, m2 / 1000, decimal=5) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_kernel_dot_product(self): ker = DotProduct() - onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32, - op_version=_TARGET_OPSET_) + onx = convert_kernel( + ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_ + ) model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType([None, None]))], - target_opset=_TARGET_OPSET_) + inputs=[("X", FloatTensorType([None, None]))], target_opset=_TARGET_OPSET_ + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0] m1 = res m2 = ker(Xtest_) assert_almost_equal(m1 / 1000, m2 / 1000, decimal=5) - onx = convert_kernel(ker, 'X', output_names=['Z'], - x_train=(Xtest_ * 2).astype(np.float32), - dtype=np.float32, op_version=_TARGET_OPSET_) + onx = convert_kernel( + ker, + "X", + output_names=["Z"], + x_train=(Xtest_ * 2).astype(np.float32), + dtype=np.float32, + op_version=_TARGET_OPSET_, + ) model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType([None, None]))], - target_opset=_TARGET_OPSET_) + inputs=[("X", FloatTensorType([None, None]))], target_opset=_TARGET_OPSET_ + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0] m1 = res m2 = ker(Xtest_, Xtest_ * 2) assert_almost_equal(m1 / 1000, m2 / 1000, decimal=5) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_kernel_rational_quadratic(self): ker = RationalQuadratic() - onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32, - op_version=_TARGET_OPSET_) + onx = convert_kernel( + ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_ + ) model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType([None, None]))], - target_opset=_TARGET_OPSET_) + inputs=[("X", FloatTensorType([None, None]))], target_opset=_TARGET_OPSET_ + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0] m1 = res m2 = ker(Xtest_) assert_almost_equal(m1, m2, decimal=5) - onx = convert_kernel(ker, 'X', output_names=['Z'], - x_train=(Xtest_ * 2).astype(np.float32), - dtype=np.float32, op_version=_TARGET_OPSET_) - model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType([None, None]))]) + onx = convert_kernel( + ker, + "X", + output_names=["Z"], + x_train=(Xtest_ * 2).astype(np.float32), + dtype=np.float32, + op_version=_TARGET_OPSET_, + ) + model_onnx = onx.to_onnx(inputs=[("X", FloatTensorType([None, None]))]) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0] m1 = res m2 = ker(Xtest_, Xtest_ * 2) assert_almost_equal(m1, m2, decimal=3) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_gpr_rbf_unfitted(self): + se = C(1.0, (1e-3, 1e3)) * RBF(length_scale=10, length_scale_bounds=(1e-3, 1e3)) + kernel = Sum( + se, + C(0.1, (1e-3, 1e3)) * RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3)), + ) - se = (C(1.0, (1e-3, 1e3)) * - RBF(length_scale=10, length_scale_bounds=(1e-3, 1e3))) - kernel = (Sum(se, C(0.1, (1e-3, 1e3)) * - RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3)))) - - gp = GaussianProcessRegressor(alpha=1e-5, kernel=kernel, - n_restarts_optimizer=25, - normalize_y=True) + gp = GaussianProcessRegressor( + alpha=1e-5, kernel=kernel, n_restarts_optimizer=25, normalize_y=True + ) # return_cov=False, return_std=False model_onnx = to_onnx( - gp, initial_types=[('X', FloatTensorType([]))], - target_opset=_TARGET_OPSET_) + gp, initial_types=[("X", FloatTensorType([]))], target_opset=_TARGET_OPSET_ + ) self.assertTrue(model_onnx is not None) - dump_data_and_model(Xtest_.astype(np.float32), gp, model_onnx, - verbose=False, - basename="SklearnGaussianProcessRBFUnfitted") + dump_data_and_model( + Xtest_.astype(np.float32), + gp, + model_onnx, + verbose=False, + basename="SklearnGaussianProcessRBFUnfitted", + ) # return_cov=True, return_std=True - options = {GaussianProcessRegressor: {"return_std": True, - "return_cov": True}} + options = {GaussianProcessRegressor: {"return_std": True, "return_cov": True}} try: - to_onnx(gp, Xtrain_.astype(np.float32), options=options, - target_opset=TARGET_OPSET) + to_onnx( + gp, + Xtrain_.astype(np.float32), + options=options, + target_opset=TARGET_OPSET, + ) except RuntimeError as e: assert "Not returning standard deviation" in str(e) # return_std=True options = {GaussianProcessRegressor: {"return_std": True}} model_onnx = to_onnx( - gp, options=options, - initial_types=[('X', FloatTensorType([None, None]))], - target_opset=TARGET_OPSET) + gp, + options=options, + initial_types=[("X", FloatTensorType([None, None]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - self.check_outputs(gp, model_onnx, Xtest_.astype(np.float32), - predict_attributes=options[ - GaussianProcessRegressor]) + self.check_outputs( + gp, + model_onnx, + Xtest_.astype(np.float32), + predict_attributes=options[GaussianProcessRegressor], + ) # return_cov=True options = {GaussianProcessRegressor: {"return_cov": True}} # model_onnx = to_onnx(gp, Xtrain_.astype(np.float32), options=options) model_onnx = to_onnx( - gp, options=options, - initial_types=[('X', FloatTensorType([None, None]))], - target_opset=TARGET_OPSET) + gp, + options=options, + initial_types=[("X", FloatTensorType([None, None]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - self.check_outputs(gp, model_onnx, Xtest_.astype(np.float32), - predict_attributes=options[ - GaussianProcessRegressor]) + self.check_outputs( + gp, + model_onnx, + Xtest_.astype(np.float32), + predict_attributes=options[GaussianProcessRegressor], + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("1.6.0"), - reason="shape_inference fails") + pv.Version(ort_version) < pv.Version("1.6.0"), reason="shape_inference fails" + ) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_gpr_rbf_fitted_true(self): - - gp = GaussianProcessRegressor(alpha=1e-5, - n_restarts_optimizer=25, - normalize_y=True) + gp = GaussianProcessRegressor( + alpha=1e-5, n_restarts_optimizer=25, normalize_y=True + ) gp, X = fit_regression_model(gp) # return_cov=False, return_std=False model_onnx = to_onnx( - gp, initial_types=[('X', DoubleTensorType([None, None]))], - target_opset=TARGET_OPSET) + gp, + initial_types=[("X", DoubleTensorType([None, None]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - dump_data_and_model(X.astype(np.float64), gp, model_onnx, - verbose=False, - basename="SklearnGaussianProcessRBFTDouble") + dump_data_and_model( + X.astype(np.float64), + gp, + model_onnx, + verbose=False, + basename="SklearnGaussianProcessRBFTDouble", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("1.6.0"), - reason="shape_inference fails") + pv.Version(ort_version) < pv.Version("1.6.0"), reason="shape_inference fails" + ) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_gpr_cosine_fitted_true_float(self): - gp = GaussianProcessRegressor(alpha=1e-5, - n_restarts_optimizer=25, - normalize_y=False, - kernel=PairwiseKernel(metric='cosine')) - gp, X = fit_regression_model( - gp, n_features=2, n_samples=20, factor=0.01) + gp = GaussianProcessRegressor( + alpha=1e-5, + n_restarts_optimizer=25, + normalize_y=False, + kernel=PairwiseKernel(metric="cosine"), + ) + gp, X = fit_regression_model(gp, n_features=2, n_samples=20, factor=0.01) # return_cov=False, return_std=False model_onnx = to_onnx( - gp, initial_types=[('X', FloatTensorType([None, None]))], - target_opset=TARGET_OPSET) + gp, + initial_types=[("X", FloatTensorType([None, None]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - dump_data_and_model(X.astype(np.float32), gp, model_onnx, - verbose=False, - basename="SklearnGaussianProcessCosineFloat-Dec2") + dump_data_and_model( + X.astype(np.float32), + gp, + model_onnx, + verbose=False, + basename="SklearnGaussianProcessCosineFloat-Dec2", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("1.6.0"), - reason="shape_inference fails") + pv.Version(ort_version) < pv.Version("1.6.0"), reason="shape_inference fails" + ) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_gpr_cosine_fitted_true_double(self): - gp = GaussianProcessRegressor(alpha=1e-5, - n_restarts_optimizer=25, - normalize_y=False, - kernel=PairwiseKernel(metric='cosine')) - gp, X = fit_regression_model( - gp, n_features=2, n_samples=20, factor=0.01) + gp = GaussianProcessRegressor( + alpha=1e-5, + n_restarts_optimizer=25, + normalize_y=False, + kernel=PairwiseKernel(metric="cosine"), + ) + gp, X = fit_regression_model(gp, n_features=2, n_samples=20, factor=0.01) # return_cov=False, return_std=False model_onnx = to_onnx( - gp, initial_types=[('X', DoubleTensorType([None, None]))], - target_opset=TARGET_OPSET) + gp, + initial_types=[("X", DoubleTensorType([None, None]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - dump_data_and_model(X.astype(np.float64), gp, model_onnx, - verbose=False, - basename="SklearnGaussianProcessCosineDouble") + dump_data_and_model( + X.astype(np.float64), + gp, + model_onnx, + verbose=False, + basename="SklearnGaussianProcessCosineDouble", + ) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_gpr_rbf_fitted_false(self): - - gp = GaussianProcessRegressor(alpha=1e-5, - n_restarts_optimizer=25, - normalize_y=False) + gp = GaussianProcessRegressor( + alpha=1e-5, n_restarts_optimizer=25, normalize_y=False + ) gp.fit(Xtrain_, Ytrain_) # return_cov=False, return_std=False model_onnx = to_onnx( - gp, initial_types=[('X', FloatTensorType([None, None]))], - target_opset=TARGET_OPSET) + gp, + initial_types=[("X", FloatTensorType([None, None]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - dump_data_and_model(Xtest_.astype(np.float32), gp, model_onnx, - verbose=False, - basename="SklearnGaussianProcessRBF-Dec4") + dump_data_and_model( + Xtest_.astype(np.float32), + gp, + model_onnx, + verbose=False, + basename="SklearnGaussianProcessRBF-Dec4", + ) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_gpr_rbf_fitted_return_std_true(self): - gp = GaussianProcessRegressor(alpha=1e-5, - n_restarts_optimizer=25, - normalize_y=True) + gp = GaussianProcessRegressor( + alpha=1e-5, n_restarts_optimizer=25, normalize_y=True + ) gp.fit(Xtrain_, Ytrain_) # return_cov=False, return_std=False options = {GaussianProcessRegressor: {"return_std": True}} try: to_onnx( - gp, initial_types=[('X', FloatTensorType([None, None]))], - options=options, target_opset=TARGET_OPSET) + gp, + initial_types=[("X", FloatTensorType([None, None]))], + options=options, + target_opset=TARGET_OPSET, + ) except RuntimeError as e: assert "The method *predict* must be called" in str(e) gp.predict(Xtrain_, return_std=True) model_onnx = to_onnx( - gp, initial_types=[('X', FloatTensorType([None, None]))], - options=options, target_opset=TARGET_OPSET) + gp, + initial_types=[("X", FloatTensorType([None, None]))], + options=options, + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - self.check_outputs(gp, model_onnx, Xtest_.astype(np.float32), - predict_attributes=options[ - GaussianProcessRegressor], - decimal=4, disable_optimisation=True) - dump_data_and_model(Xtest_.astype(np.float32), gp, model_onnx, - verbose=False, - basename="SklearnGaussianProcessRBFStd-Out0", - disable_optimisation=True) + self.check_outputs( + gp, + model_onnx, + Xtest_.astype(np.float32), + predict_attributes=options[GaussianProcessRegressor], + decimal=4, + disable_optimisation=True, + ) + dump_data_and_model( + Xtest_.astype(np.float32), + gp, + model_onnx, + verbose=False, + basename="SklearnGaussianProcessRBFStd-Out0", + disable_optimisation=True, + ) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) - @unittest.skipIf( - TARGET_OPSET >= 12, reason="TARGET_OPSET < 12") + reason="onnxruntime %s" % THRESHOLD, + ) + @unittest.skipIf(TARGET_OPSET >= 12, reason="TARGET_OPSET < 12") @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_gpr_rbf_fitted_return_std_exp_sine_squared_true(self): state = np.random.RandomState(0) @@ -780,8 +942,11 @@ def test_gpr_rbf_fitted_return_std_exp_sine_squared_true(self): X_train, X_test, y_train, _ = train_test_split(X, y) gp = GaussianProcessRegressor( kernel=ExpSineSquared(periodicity_bounds=(1e-10, 1e10)), - alpha=1e-7, n_restarts_optimizer=25, normalize_y=True, - random_state=1) + alpha=1e-7, + n_restarts_optimizer=25, + normalize_y=True, + random_state=1, + ) try: gp.fit(X_train, y_train) except (AttributeError, TypeError): @@ -792,22 +957,33 @@ def test_gpr_rbf_fitted_return_std_exp_sine_squared_true(self): options = {GaussianProcessRegressor: {"return_std": True}} gp.predict(X_train, return_std=True) model_onnx = to_onnx( - gp, initial_types=[('X', DoubleTensorType([None, None]))], - options=options, target_opset=TARGET_OPSET) + gp, + initial_types=[("X", DoubleTensorType([None, None]))], + options=options, + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test.astype(np.float64), gp, model_onnx, + X_test.astype(np.float64), + gp, + model_onnx, verbose=False, basename="SklearnGaussianProcessExpSineSquaredStdT-Out0-Dec2", - disable_optimisation=True) - self.check_outputs(gp, model_onnx, X_test.astype(np.float64), - predict_attributes=options[ - GaussianProcessRegressor], - decimal=4, disable_optimisation=True) + disable_optimisation=True, + ) + self.check_outputs( + gp, + model_onnx, + X_test.astype(np.float64), + predict_attributes=options[GaussianProcessRegressor], + decimal=4, + disable_optimisation=True, + ) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_gpr_rbf_fitted_return_std_exp_sine_squared_false(self): X = 15 * np.random.rand(100, 2) @@ -816,8 +992,11 @@ def test_gpr_rbf_fitted_return_std_exp_sine_squared_false(self): X_train, X_test, y_train, _ = train_test_split(X, y) gp = GaussianProcessRegressor( kernel=ExpSineSquared(periodicity_bounds=(1e-10, 1e10)), - alpha=1e-7, n_restarts_optimizer=20, normalize_y=False, - random_state=0) + alpha=1e-7, + n_restarts_optimizer=20, + normalize_y=False, + random_state=0, + ) try: gp.fit(X_train, y_train) except (AttributeError, TypeError): @@ -828,70 +1007,93 @@ def test_gpr_rbf_fitted_return_std_exp_sine_squared_false(self): options = {GaussianProcessRegressor: {"return_std": True}} gp.predict(X_train, return_std=True) model_onnx = to_onnx( - gp, initial_types=[('X', DoubleTensorType([None, None]))], - options=options, target_opset=TARGET_OPSET) + gp, + initial_types=[("X", DoubleTensorType([None, None]))], + options=options, + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test.astype(np.float64), gp, model_onnx, + X_test.astype(np.float64), + gp, + model_onnx, verbose=False, - basename="SklearnGaussianProcessExpSineSquaredStdF-Out0-Dec3") - self.check_outputs(gp, model_onnx, X_test.astype(np.float64), - predict_attributes=options[ - GaussianProcessRegressor], - decimal=3) + basename="SklearnGaussianProcessExpSineSquaredStdF-Out0-Dec3", + ) + self.check_outputs( + gp, + model_onnx, + X_test.astype(np.float64), + predict_attributes=options[GaussianProcessRegressor], + decimal=3, + ) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_gpr_rbf_fitted_return_std_exp_sine_squared_double_true(self): - - gp = GaussianProcessRegressor(kernel=ExpSineSquared(), - alpha=1e-7, - n_restarts_optimizer=15, - normalize_y=True) + gp = GaussianProcessRegressor( + kernel=ExpSineSquared(), + alpha=1e-7, + n_restarts_optimizer=15, + normalize_y=True, + ) try: gp.fit(Xtrain_, Ytrain_) except (AttributeError, TypeError) as e: # unstable issue fixed with scikit-learn>=0.24 warnings.warn( - "Training did not converge but fails at raising " - "a warning: %r." % e) + "Training did not converge but fails at raising " "a warning: %r." % e + ) return # return_cov=False, return_std=False options = {GaussianProcessRegressor: {"return_std": True}} gp.predict(Xtrain_, return_std=True) model_onnx = to_onnx( - gp, initial_types=[('X', DoubleTensorType([None, None]))], - options=options, target_opset=TARGET_OPSET) + gp, + initial_types=[("X", DoubleTensorType([None, None]))], + options=options, + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - Xtest_.astype(np.float64), gp, model_onnx, + Xtest_.astype(np.float64), + gp, + model_onnx, verbose=False, basename="SklearnGaussianProcessExpSineSquaredStdDouble-Out0-Dec3", - disable_optimisation=True) - self.check_outputs(gp, model_onnx, Xtest_.astype(np.float64), - predict_attributes=options[ - GaussianProcessRegressor], - decimal=3, disable_optimisation=True) + disable_optimisation=True, + ) + self.check_outputs( + gp, + model_onnx, + Xtest_.astype(np.float64), + predict_attributes=options[GaussianProcessRegressor], + decimal=3, + disable_optimisation=True, + ) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) - @unittest.skipIf( - TARGET_OPSET >= 12, reason="TARGET_OPSET < 12") + reason="onnxruntime %s" % THRESHOLD, + ) + @unittest.skipIf(TARGET_OPSET >= 12, reason="TARGET_OPSET < 12") @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_gpr_rbf_fitted_return_std_dot_product_true(self): X = 15 * np.random.rand(100, 2) y = np.sin(X[:, 0] - X[:, 1]).ravel() y += 0.5 * (0.5 - np.random.rand(X.shape[0])) X_train, X_test, y_train, _ = train_test_split(X, y) - gp = GaussianProcessRegressor(kernel=DotProduct(), - alpha=1e-2, - n_restarts_optimizer=25, - normalize_y=True, - random_state=0) + gp = GaussianProcessRegressor( + kernel=DotProduct(), + alpha=1e-2, + n_restarts_optimizer=25, + normalize_y=True, + random_state=0, + ) try: gp.fit(X_train, y_train) except (AttributeError, TypeError): @@ -903,32 +1105,43 @@ def test_gpr_rbf_fitted_return_std_dot_product_true(self): # return_cov=False, return_std=False options = {GaussianProcessRegressor: {"return_std": True}} model_onnx = to_onnx( - gp, initial_types=[('X', DoubleTensorType([None, None]))], - options=options, target_opset=TARGET_OPSET) + gp, + initial_types=[("X", DoubleTensorType([None, None]))], + options=options, + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test.astype(np.float64), gp, model_onnx, + X_test.astype(np.float64), + gp, + model_onnx, basename="SklearnGaussianProcessDotProductStdDouble-Out0-Dec3", - disable_optimisation=True) - self.check_outputs(gp, model_onnx, X_test.astype(np.float64), - predict_attributes=options[ - GaussianProcessRegressor], - decimal=3, disable_optimisation=True) + disable_optimisation=True, + ) + self.check_outputs( + gp, + model_onnx, + X_test.astype(np.float64), + predict_attributes=options[GaussianProcessRegressor], + decimal=3, + disable_optimisation=True, + ) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) - @unittest.skipIf( - TARGET_OPSET >= 12, reason="TARGET_OPSET < 12") + reason="onnxruntime %s" % THRESHOLD, + ) + @unittest.skipIf(TARGET_OPSET >= 12, reason="TARGET_OPSET < 12") @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_gpr_rbf_fitted_return_std_rational_quadratic_true(self): - X, y = make_regression(n_features=2, n_informative=2, random_state=2) X_train, X_test, y_train, _ = train_test_split(X, y) - gp = GaussianProcessRegressor(kernel=RationalQuadratic(), - alpha=1e-3, - n_restarts_optimizer=25, - normalize_y=True) + gp = GaussianProcessRegressor( + kernel=RationalQuadratic(), + alpha=1e-3, + n_restarts_optimizer=25, + normalize_y=True, + ) try: gp.fit(X_train, y_train) except (AttributeError, TypeError): @@ -939,21 +1152,31 @@ def test_gpr_rbf_fitted_return_std_rational_quadratic_true(self): # return_cov=False, return_std=False options = {GaussianProcessRegressor: {"return_std": True}} model_onnx = to_onnx( - gp, initial_types=[('X', DoubleTensorType([None, None]))], - options=options, target_opset=TARGET_OPSET) + gp, + initial_types=[("X", DoubleTensorType([None, None]))], + options=options, + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test.astype(np.float64), gp, model_onnx, + X_test.astype(np.float64), + gp, + model_onnx, basename="SklearnGaussianProcessRationalQuadraticStdDouble-Out0", - disable_optimisation=True) - self.check_outputs(gp, model_onnx, X_test.astype(np.float64), - predict_attributes=options[ - GaussianProcessRegressor], - disable_optimisation=True) + disable_optimisation=True, + ) + self.check_outputs( + gp, + model_onnx, + X_test.astype(np.float64), + predict_attributes=options[GaussianProcessRegressor], + disable_optimisation=True, + ) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_gpr_fitted_shapes(self): data = load_iris() @@ -964,26 +1187,31 @@ def test_gpr_fitted_shapes(self): gp.fit(X_train, y_train) model_onnx = to_onnx( - gp, initial_types=[('X', FloatTensorType([None, None]))], - target_opset=TARGET_OPSET) + gp, + initial_types=[("X", FloatTensorType([None, None]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) self.check_outputs(gp, model_onnx, X_test, {}, skip_if_float32=True) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_gpr_fitted_partial_float64(self): data = load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y) - gp = GaussianProcessRegressor(kernel=DotProduct(), alpha=10.) + gp = GaussianProcessRegressor(kernel=DotProduct(), alpha=10.0) gp.fit(X_train, y_train) model_onnx = to_onnx( - gp, initial_types=[('X', FloatTensorType([None, None]))], - target_opset=_TARGET_OPSET_) + gp, + initial_types=[("X", FloatTensorType([None, None]))], + target_opset=_TARGET_OPSET_, + ) self.assertTrue(model_onnx is not None) try: self.check_outputs(gp, model_onnx, X_test.astype(np.float32), {}) @@ -991,39 +1219,46 @@ def test_gpr_fitted_partial_float64(self): assert "Max relative difference:" in str(e) model_onnx = to_onnx( - gp, initial_types=[('X', DoubleTensorType([None, None]))], - target_opset=TARGET_OPSET) + gp, + initial_types=[("X", DoubleTensorType([None, None]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) self.check_outputs(gp, model_onnx, X_test, {}) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD2), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_gpr_fitted_partial_float64_operator_cdist_rbf(self): data = load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y) - gp = GaussianProcessRegressor(kernel=RBF(), alpha=10.) + gp = GaussianProcessRegressor(kernel=RBF(), alpha=10.0) gp.fit(X_train, y_train) try: to_onnx( - gp, initial_types=[('X', FloatTensorType([None, None]))], - options={GaussianProcessRegressor: {'optim': 'CDIST'}}, - target_opset=TARGET_OPSET) + gp, + initial_types=[("X", FloatTensorType([None, None]))], + options={GaussianProcessRegressor: {"optim": "CDIST"}}, + target_opset=TARGET_OPSET, + ) raise AssertionError("CDIST is not implemented") except ValueError: pass model_onnx = to_onnx( - gp, initial_types=[('X', FloatTensorType([None, None]))], - options={GaussianProcessRegressor: {'optim': 'cdist'}}, - target_opset=TARGET_OPSET) + gp, + initial_types=[("X", FloatTensorType([None, None]))], + options={GaussianProcessRegressor: {"optim": "cdist"}}, + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - name_save = inspect.currentframe().f_code.co_name + '.onnx' - with open(name_save, 'wb') as f: + name_save = inspect.currentframe().f_code.co_name + ".onnx" + with open(name_save, "wb") as f: f.write(model_onnx.SerializeToString()) try: self.check_outputs(gp, model_onnx, X_test.astype(np.float32), {}) @@ -1034,39 +1269,46 @@ def test_gpr_fitted_partial_float64_operator_cdist_rbf(self): assert "Max relative difference:" in str(e) model_onnx = to_onnx( - gp, initial_types=[('X', DoubleTensorType([None, None]))], - target_opset=TARGET_OPSET) + gp, + initial_types=[("X", DoubleTensorType([None, None]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) self.check_outputs(gp, model_onnx, X_test, {}) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD2), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_gpr_fitted_partial_float64_operator_cdist_sine(self): data = load_iris() X = data.data[:, :2] y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y) - gp = GaussianProcessRegressor(kernel=ExpSineSquared(), alpha=100.) + gp = GaussianProcessRegressor(kernel=ExpSineSquared(), alpha=100.0) gp.fit(X_train, y_train) try: to_onnx( - gp, initial_types=[('X', FloatTensorType([None, None]))], - options={GaussianProcessRegressor: {'optim': 'CDIST'}}, - target_opset=TARGET_OPSET) + gp, + initial_types=[("X", FloatTensorType([None, None]))], + options={GaussianProcessRegressor: {"optim": "CDIST"}}, + target_opset=TARGET_OPSET, + ) raise AssertionError("CDIST is not implemented") except ValueError: pass model_onnx = to_onnx( - gp, initial_types=[('X', FloatTensorType([None, None]))], - options={GaussianProcessRegressor: {'optim': 'cdist'}}, - target_opset=TARGET_OPSET) + gp, + initial_types=[("X", FloatTensorType([None, None]))], + options={GaussianProcessRegressor: {"optim": "cdist"}}, + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - name_save = inspect.currentframe().f_code.co_name + '.onnx' - with open(name_save, 'wb') as f: + name_save = inspect.currentframe().f_code.co_name + ".onnx" + with open(name_save, "wb") as f: f.write(model_onnx.SerializeToString()) try: self.check_outputs(gp, model_onnx, X_test.astype(np.float32), {}) @@ -1077,39 +1319,46 @@ def test_gpr_fitted_partial_float64_operator_cdist_sine(self): assert "Max relative difference:" in str(e) model_onnx = to_onnx( - gp, initial_types=[('X', DoubleTensorType([None, None]))], - target_opset=TARGET_OPSET) + gp, + initial_types=[("X", DoubleTensorType([None, None]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) self.check_outputs(gp, model_onnx, X_test, {}) @unittest.skipIf( pv.Version(ort_version) <= pv.Version(THRESHOLD2), - reason="onnxruntime %s" % THRESHOLD) + reason="onnxruntime %s" % THRESHOLD, + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_gpr_fitted_partial_float64_operator_cdist_quad(self): data = load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y) - gp = GaussianProcessRegressor(kernel=RationalQuadratic(), alpha=100.) + gp = GaussianProcessRegressor(kernel=RationalQuadratic(), alpha=100.0) gp.fit(X_train, y_train) try: to_onnx( - gp, initial_types=[('X', FloatTensorType([None, None]))], - options={GaussianProcessRegressor: {'optim': 'CDIST'}}, - target_opset=TARGET_OPSET) + gp, + initial_types=[("X", FloatTensorType([None, None]))], + options={GaussianProcessRegressor: {"optim": "CDIST"}}, + target_opset=TARGET_OPSET, + ) raise AssertionError("CDIST is not implemented") except ValueError: pass model_onnx = to_onnx( - gp, initial_types=[('X', FloatTensorType([None, None]))], - options={GaussianProcessRegressor: {'optim': 'cdist'}}, - target_opset=TARGET_OPSET) + gp, + initial_types=[("X", FloatTensorType([None, None]))], + options={GaussianProcessRegressor: {"optim": "cdist"}}, + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - name_save = inspect.currentframe().f_code.co_name + '.onnx' - with open(name_save, 'wb') as f: + name_save = inspect.currentframe().f_code.co_name + ".onnx" + with open(name_save, "wb") as f: f.write(model_onnx.SerializeToString()) try: self.check_outputs(gp, model_onnx, X_test.astype(np.float32), {}) @@ -1120,8 +1369,10 @@ def test_gpr_fitted_partial_float64_operator_cdist_quad(self): assert "Max relative difference:" in str(e) model_onnx = to_onnx( - gp, initial_types=[('X', DoubleTensorType([None, None]))], - target_opset=TARGET_OPSET) + gp, + initial_types=[("X", DoubleTensorType([None, None]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) self.check_outputs(gp, model_onnx, X_test, {}) @@ -1132,15 +1383,15 @@ def test_x_issue_789(self): model = GaussianProcessRegressor() pipe = make_pipeline(MinMaxScaler(feature_range=(-1, 1)), model) pipe.fit(tx1, ty1) - initial_type = [('data_in', DoubleTensorType([None, X.shape[1]]))] - onx = to_onnx(pipe, initial_types=initial_type, - target_opset=_TARGET_OPSET_) + initial_type = [("data_in", DoubleTensorType([None, X.shape[1]]))] + onx = to_onnx(pipe, initial_types=initial_type, target_opset=_TARGET_OPSET_) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - pred = sess.run(None, {'data_in': vx1.astype(np.float64)}) - assert_almost_equal(pipe.predict(vx1.astype(np.float64)).ravel(), - pred[0].ravel()) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + pred = sess.run(None, {"data_in": vx1.astype(np.float64)}) + assert_almost_equal( + pipe.predict(vx1.astype(np.float64)).ravel(), pred[0].ravel() + ) def test_x_issue_789_cdist(self): n_samples, n_features = 10000, 10 @@ -1149,89 +1400,98 @@ def test_x_issue_789_cdist(self): model = GaussianProcessRegressor() pipe = make_pipeline(MinMaxScaler(feature_range=(-1, 1)), model) pipe.fit(tx1, ty1) - initial_type = [('data_in', DoubleTensorType([None, X.shape[1]]))] - onx = to_onnx(pipe, initial_types=initial_type, - target_opset=_TARGET_OPSET_, - options={GaussianProcessRegressor: {'optim': 'cdist'}}) + initial_type = [("data_in", DoubleTensorType([None, X.shape[1]]))] + onx = to_onnx( + pipe, + initial_types=initial_type, + target_opset=_TARGET_OPSET_, + options={GaussianProcessRegressor: {"optim": "cdist"}}, + ) self.assertIn('op_type: "CDist"', str(onx)) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - pred = sess.run(None, {'data_in': vx1.astype(np.float64)}) - assert_almost_equal(pipe.predict(vx1.astype(np.float64)).ravel(), - pred[0].ravel()) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + pred = sess.run(None, {"data_in": vx1.astype(np.float64)}) + assert_almost_equal( + pipe.predict(vx1.astype(np.float64)).ravel(), pred[0].ravel() + ) def test_white_kernel_float(self): X, y = make_friedman2(n_samples=500, noise=0, random_state=0) tx1, vx1, ty1, vy1 = train_test_split(X, y) kernel = DotProduct() + WhiteKernel(noise_level=0.5) - gpr = GaussianProcessRegressor( - kernel=kernel, random_state=0).fit(tx1, ty1) - initial_type = [('data_in', FloatTensorType([None, X.shape[1]]))] - onx = to_onnx(gpr, initial_types=initial_type, - target_opset=_TARGET_OPSET_) + gpr = GaussianProcessRegressor(kernel=kernel, random_state=0).fit(tx1, ty1) + initial_type = [("data_in", FloatTensorType([None, X.shape[1]]))] + onx = to_onnx(gpr, initial_types=initial_type, target_opset=_TARGET_OPSET_) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - pred = sess.run(None, {'data_in': vx1.astype(np.float32)}) - assert_almost_equal(gpr.predict(vx1.astype(np.float32)).shape[0], - pred[0].shape[0]) - assert_allclose(gpr.predict(vx1.astype(np.float32)).ravel(), - pred[0].ravel(), rtol=1e-3) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + pred = sess.run(None, {"data_in": vx1.astype(np.float32)}) + assert_almost_equal( + gpr.predict(vx1.astype(np.float32)).shape[0], pred[0].shape[0] + ) + assert_allclose( + gpr.predict(vx1.astype(np.float32)).ravel(), pred[0].ravel(), rtol=1e-3 + ) def test_white_kernel_double(self): X, y = make_friedman2(n_samples=500, noise=0, random_state=0) tx1, vx1, ty1, vy1 = train_test_split(X, y) kernel = DotProduct() + WhiteKernel(noise_level=0.5) - gpr = GaussianProcessRegressor( - kernel=kernel, random_state=0).fit(tx1, ty1) - initial_type = [('data_in', DoubleTensorType([None, X.shape[1]]))] - onx = to_onnx(gpr, initial_types=initial_type, - target_opset=_TARGET_OPSET_) + gpr = GaussianProcessRegressor(kernel=kernel, random_state=0).fit(tx1, ty1) + initial_type = [("data_in", DoubleTensorType([None, X.shape[1]]))] + onx = to_onnx(gpr, initial_types=initial_type, target_opset=_TARGET_OPSET_) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - pred = sess.run(None, {'data_in': vx1.astype(np.float64)}) - assert_almost_equal(gpr.predict(vx1.astype(np.float64)).ravel(), - pred[0].ravel()) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + pred = sess.run(None, {"data_in": vx1.astype(np.float64)}) + assert_almost_equal( + gpr.predict(vx1.astype(np.float64)).ravel(), pred[0].ravel() + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_kernel_white_kernel(self): ker = WhiteKernel() # X, X - onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32, - op_version=_TARGET_OPSET_) + onx = convert_kernel( + ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_ + ) model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType([None, None]))], - target_opset=TARGET_OPSET) + inputs=[("X", FloatTensorType([None, None]))], target_opset=TARGET_OPSET + ) with open("debug.onnx", "wb") as f: f.write(model_onnx.SerializeToString()) x = np.random.randn(4, 3) - x[0, 0] = x[1, 1] = x[2, 2] = 10. - x[3, 2] = 5. + x[0, 0] = x[1, 1] = x[2, 2] = 10.0 + x[3, 2] = 5.0 sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': x.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": x.astype(np.float32)})[0] m1 = res m2 = ker(x) assert_almost_equal(m2, m1, decimal=5) # X, x - onx = convert_kernel(ker, 'X', x_train=x, - output_names=['Y'], dtype=np.float32, - op_version=_TARGET_OPSET_) + onx = convert_kernel( + ker, + "X", + x_train=x, + output_names=["Y"], + dtype=np.float32, + op_version=_TARGET_OPSET_, + ) model_onnx = onx.to_onnx( - inputs=[('X', FloatTensorType([None, None]))], - target_opset=TARGET_OPSET) + inputs=[("X", FloatTensorType([None, None]))], target_opset=TARGET_OPSET + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': x.astype(np.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": x.astype(np.float32)})[0] m1 = res m2 = ker(x, x) assert_almost_equal(m2, m1, decimal=5) diff --git a/tests/test_sklearn_glm_classifier_converter.py b/tests/test_sklearn_glm_classifier_converter.py index fc3f79f0c..c34ffd3c6 100644 --- a/tests/test_sklearn_glm_classifier_converter.py +++ b/tests/test_sklearn_glm_classifier_converter.py @@ -9,6 +9,7 @@ from sklearn.svm import LinearSVC from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.exceptions import ConvergenceWarning + try: # scikit-learn >= 0.22 from sklearn.utils._testing import ignore_warnings @@ -27,49 +28,55 @@ fit_classification_model, fit_multilabel_classification_model, TARGET_OPSET, - InferenceSessionEx as InferenceSession) + InferenceSessionEx as InferenceSession, +) -ort_version = ort_version.split('+')[0] +ort_version = ort_version.split("+")[0] def _sklearn_version(): # Remove development version 0.22.dev0 becomes 0.22. - v = ".".join(sklearn.__version__.split('.')[:2]) + v = ".".join(sklearn.__version__.split(".")[:2]) return pv.Version(v) class TestGLMClassifierConverter(unittest.TestCase): - @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_logistic_regression_binary_class_boolean(self): - X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], - dtype=np.float32) + X = np.array( + [[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], dtype=np.float32 + ) y = np.array([True, True, True, False, False, False]) model = linear_model.LogisticRegression(max_iter=100).fit(X, y) model_onnx = convert_sklearn( - model, "linear model", + model, + "linear model", [("input", FloatTensorType([None, X.shape[1]]))], - options={id(model): {'zipmap': False}}, - target_opset=TARGET_OPSET) + options={id(model): {"zipmap": False}}, + target_opset=TARGET_OPSET, + ) self.assertIn('name: "classlabels_ints"', str(model_onnx)) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLogitisticRegressionBinaryBoolean") + X, model, model_onnx, basename="SklearnLogitisticRegressionBinaryBoolean" + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_logistic_regression_binary_class(self): model, X = fit_classification_model( - linear_model.LogisticRegression(max_iter=100), 2) + linear_model.LogisticRegression(max_iter=100), 2 + ) model_onnx = convert_sklearn( - model, "logistic regression", + model, + "logistic regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLogitisticRegressionBinary") + X, model, model_onnx, basename="SklearnLogitisticRegressionBinary" + ) if pv.Version(ort_version) >= pv.Version("1.0.0"): sess = InferenceSession(model_onnx.SerializeToString()) out = sess.get_outputs() @@ -81,20 +88,23 @@ def test_model_logistic_regression_binary_class(self): @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_logistic_regression_binary_class_blacklist(self): model, X = fit_classification_model( - linear_model.LogisticRegression(max_iter=100), 2) + linear_model.LogisticRegression(max_iter=100), 2 + ) model_onnx = convert_sklearn( - model, "logistic regression", + model, + "logistic regression", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET, - black_op={'LinearClassifier'}) - self.assertNotIn('LinearClassifier', str(model_onnx)) + black_op={"LinearClassifier"}, + ) + self.assertNotIn("LinearClassifier", str(model_onnx)) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLogitisticRegressionBinaryBlackList") + X, model, model_onnx, basename="SklearnLogitisticRegressionBinaryBlackList" + ) if pv.Version(ort_version) >= pv.Version("1.0.0"): sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) out = sess.get_outputs() lb = out[0].type sh = out[0].shape @@ -104,16 +114,18 @@ def test_model_logistic_regression_binary_class_blacklist(self): @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_logistic_regression_binary_class_string(self): model, X = fit_classification_model( - linear_model.LogisticRegression(max_iter=100), 2, - label_string=True) + linear_model.LogisticRegression(max_iter=100), 2, label_string=True + ) model_onnx = convert_sklearn( - model, "logistic regression", + model, + "logistic regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLogitisticRegressionBinary") + X, model, model_onnx, basename="SklearnLogitisticRegressionBinary" + ) if pv.Version(ort_version) >= pv.Version("1.0.0"): sess = InferenceSession(model_onnx.SerializeToString()) out = sess.get_outputs() @@ -125,28 +137,34 @@ def test_model_logistic_regression_binary_class_string(self): @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_logistic_regression_int(self): model, X = fit_classification_model( - linear_model.LogisticRegression(max_iter=100), 3, is_int=True) + linear_model.LogisticRegression(max_iter=100), 3, is_int=True + ) model_onnx = convert_sklearn( - model, "logistic regression", + model, + "logistic regression", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLogitisticRegressionInt") + X, model, model_onnx, basename="SklearnLogitisticRegressionInt" + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_logistic_regression_bool(self): model, X = fit_classification_model( - linear_model.LogisticRegression(max_iter=100), 3, is_bool=True) + linear_model.LogisticRegression(max_iter=100), 3, is_bool=True + ) model_onnx = convert_sklearn( - model, "logistic regression", + model, + "logistic regression", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLogitisticRegressionBool") + X, model, model_onnx, basename="SklearnLogitisticRegressionBool" + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_logistic_linear_discriminant_analysis(self): @@ -155,13 +173,18 @@ def test_model_logistic_linear_discriminant_analysis(self): X_test = np.array([[-0.8, -1], [-2, -1]], dtype=np.float32) model = LinearDiscriminantAnalysis(n_components=1).fit(X, y) model_onnx = convert_sklearn( - model, "linear model", + model, + "linear model", [("input", FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnLinearDiscriminantAnalysisBin-Dec3") + X_test, + model, + model_onnx, + basename="SklearnLinearDiscriminantAnalysisBin-Dec3", + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_logistic_linear_discriminant_analysis_decfunc(self): @@ -170,15 +193,20 @@ def test_model_logistic_linear_discriminant_analysis_decfunc(self): X_test = np.array([[-0.8, -1], [0, 1]], dtype=np.float32) model = LinearDiscriminantAnalysis().fit(X, y) model_onnx = convert_sklearn( - model, "linear model", + model, + "linear model", [("input", FloatTensorType([None, X_test.shape[1]]))], - options={id(model): {'raw_scores': True}}, - target_opset=TARGET_OPSET) + options={id(model): {"raw_scores": True}}, + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X_test, model, model_onnx, + X_test, + model, + model_onnx, basename="SklearnLinearDiscriminantAnalysisBinRawScore-Out0", - methods=['predict', 'decision_function']) + methods=["predict", "decision_function"], + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_logistic_linear_discriminant_analysis_decfunc3(self): @@ -187,303 +215,366 @@ def test_model_logistic_linear_discriminant_analysis_decfunc3(self): X_test = np.array([[-0.8, -1], [0, 1]], dtype=np.float32) model = LinearDiscriminantAnalysis().fit(X, y) model_onnx = convert_sklearn( - model, "linear model", + model, + "linear model", [("input", FloatTensorType([None, X_test.shape[1]]))], - options={id(model): {'raw_scores': True}}, - target_opset=TARGET_OPSET) + options={id(model): {"raw_scores": True}}, + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X_test, model, model_onnx, + X_test, + model, + model_onnx, basename="SklearnLinearDiscriminantAnalysisBinRawScore3-Out0", - methods=['predict', 'decision_function']) + methods=["predict", "decision_function"], + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_logistic_regression_cv_binary_class(self): model, X = fit_classification_model( - linear_model.LogisticRegressionCV(max_iter=100), 2) + linear_model.LogisticRegressionCV(max_iter=100), 2 + ) model_onnx = convert_sklearn( - model, "logistic regression cv", + model, + "logistic regression cv", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLogitisticCVRegressionBinary") + X, model, model_onnx, basename="SklearnLogitisticCVRegressionBinary" + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_logistic_regression_cv_int(self): try: model, X = fit_classification_model( - linear_model.LogisticRegressionCV(max_iter=100), - 7, is_int=True) + linear_model.LogisticRegressionCV(max_iter=100), 7, is_int=True + ) except AttributeError: # AttributeError: 'str' object has no attribute 'decode' # Bug fixed in scikit-learn 0.24 due to a warning using encoding. return model_onnx = convert_sklearn( - model, "logistic regression cv", + model, + "logistic regression cv", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLogitisticRegressionCVInt") + X, model, model_onnx, basename="SklearnLogitisticRegressionCVInt" + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_logistic_regression_cv_bool(self): model, X = fit_classification_model( - linear_model.LogisticRegressionCV(max_iter=100), 3, is_bool=True) + linear_model.LogisticRegressionCV(max_iter=100), 3, is_bool=True + ) model_onnx = convert_sklearn( - model, "logistic regression cv", + model, + "logistic regression cv", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLogitisticRegressionCVBool") + X, model, model_onnx, basename="SklearnLogitisticRegressionCVBool" + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_logistic_regression_binary_class_nointercept(self): model, X = fit_classification_model( - linear_model.LogisticRegression( - fit_intercept=False, max_iter=10000), 2) + linear_model.LogisticRegression(fit_intercept=False, max_iter=10000), 2 + ) model_onnx = convert_sklearn( - model, "logistic regression", + model, + "logistic regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLogitisticRegressionBinaryNoIntercept") + X, + model, + model_onnx, + basename="SklearnLogitisticRegressionBinaryNoIntercept", + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_logistic_regression_multi_class(self): model, X = fit_classification_model( - linear_model.LogisticRegression(max_iter=10000), 4) + linear_model.LogisticRegression(max_iter=10000), 4 + ) model_onnx = convert_sklearn( - model, "multi-class logistic regression", + model, + "multi-class logistic regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLogitisticRegressionMulti") + X, model, model_onnx, basename="SklearnLogitisticRegressionMulti" + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_logistic_regression_multi_class_nocl(self): model, X = fit_classification_model( - linear_model.LogisticRegression(max_iter=10000), 4, - label_string=True) + linear_model.LogisticRegression(max_iter=10000), 4, label_string=True + ) model_onnx = convert_sklearn( - model, "multi-class logistic regression", + model, + "multi-class logistic regression", [("input", FloatTensorType([None, X.shape[1]]))], - options={id(model): {'nocl': True}}, - target_opset=TARGET_OPSET) + options={id(model): {"nocl": True}}, + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) sonx = str(model_onnx) - assert 'classlabels_strings' not in sonx - assert 'cl0' not in sonx + assert "classlabels_strings" not in sonx + assert "cl0" not in sonx dump_data_and_model( - X, model, model_onnx, classes=model.classes_, - basename="SklearnLogitisticRegressionMultiNoCl") + X, + model, + model_onnx, + classes=model.classes_, + basename="SklearnLogitisticRegressionMultiNoCl", + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_logistic_regression_multi_class_ovr(self): model, X = fit_classification_model( - linear_model.LogisticRegression( - multi_class='ovr', max_iter=10000), 3) + linear_model.LogisticRegression(multi_class="ovr", max_iter=10000), 3 + ) model_onnx = convert_sklearn( - model, "multi-class logistic regression", + model, + "multi-class logistic regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLogitisticRegressionMulti") + X, model, model_onnx, basename="SklearnLogitisticRegressionMulti" + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_logistic_regression_multi_class_multinomial(self): model, X = fit_classification_model( linear_model.LogisticRegression( - multi_class="multinomial", solver="lbfgs", - max_iter=10000), 4) + multi_class="multinomial", solver="lbfgs", max_iter=10000 + ), + 4, + ) model_onnx = convert_sklearn( - model, "multi-class logistic regression", + model, + "multi-class logistic regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLogitisticRegressionMulti") + X, model, model_onnx, basename="SklearnLogitisticRegressionMulti" + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_logistic_regression_multi_class_no_intercept(self): model, X = fit_classification_model( - linear_model.LogisticRegression( - fit_intercept=False, max_iter=10000), 3) + linear_model.LogisticRegression(fit_intercept=False, max_iter=10000), 3 + ) model_onnx = convert_sklearn( - model, "multi-class logistic regression", + model, + "multi-class logistic regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLogitisticRegressionMultiNoIntercept") + X, model, model_onnx, basename="SklearnLogitisticRegressionMultiNoIntercept" + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_logistic_regression_multi_class_lbfgs(self): - penalty = ( - 'l2' if _sklearn_version() < pv.Version('0.21.0') - else 'none') + penalty = "l2" if _sklearn_version() < pv.Version("0.21.0") else "none" model, X = fit_classification_model( linear_model.LogisticRegression( - solver='lbfgs', penalty=penalty, max_iter=10000), 5) + solver="lbfgs", penalty=penalty, max_iter=10000 + ), + 5, + ) model_onnx = convert_sklearn( - model, "multi-class logistic regression", + model, + "multi-class logistic regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLogitisticRegressionMultiLbfgs") + X, model, model_onnx, basename="SklearnLogitisticRegressionMultiLbfgs" + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_logistic_regression_multi_class_liblinear_l1(self): model, X = fit_classification_model( linear_model.LogisticRegression( - solver='liblinear', penalty='l1', max_iter=10000), 4) + solver="liblinear", penalty="l1", max_iter=10000 + ), + 4, + ) model_onnx = convert_sklearn( - model, "multi-class logistic regression", + model, + "multi-class logistic regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLogitisticRegressionMultiLiblinearL1") + X, model, model_onnx, basename="SklearnLogitisticRegressionMultiLiblinearL1" + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_logistic_regression_multi_class_saga_elasticnet(self): - if _sklearn_version() < pv.Version('0.21.0'): + if _sklearn_version() < pv.Version("0.21.0"): model, X = fit_classification_model( - linear_model.LogisticRegression( - solver='saga', max_iter=10000), 3) + linear_model.LogisticRegression(solver="saga", max_iter=10000), 3 + ) else: model, X = fit_classification_model( linear_model.LogisticRegression( - solver='saga', penalty='elasticnet', l1_ratio=0.1, - max_iter=10000), 3) + solver="saga", penalty="elasticnet", l1_ratio=0.1, max_iter=10000 + ), + 3, + ) model_onnx = convert_sklearn( - model, "multi-class logistic regression", + model, + "multi-class logistic regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLogitisticRegressionMultiSagaElasticnet") + X, + model, + model_onnx, + basename="SklearnLogitisticRegressionMultiSagaElasticnet", + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_linear_svc_binary_class(self): model, X = fit_classification_model(LinearSVC(max_iter=10000), 2) model_onnx = convert_sklearn( - model, "linear SVC", + model, + "linear SVC", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLinearSVCBinary-NoProb") + X, model, model_onnx, basename="SklearnLinearSVCBinary-NoProb" + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_linear_svc_multi_class(self): model, X = fit_classification_model(LinearSVC(max_iter=100), 5) model_onnx = convert_sklearn( - model, "multi-class linear SVC", + model, + "multi-class linear SVC", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnLinearSVCMulti") + dump_data_and_model(X, model, model_onnx, basename="SklearnLinearSVCMulti") @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_linear_svc_int(self): - model, X = fit_classification_model( - LinearSVC(max_iter=100), 5, is_int=True) + model, X = fit_classification_model(LinearSVC(max_iter=100), 5, is_int=True) model_onnx = convert_sklearn( - model, "multi-class linear SVC", + model, + "multi-class linear SVC", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnLinearSVCInt") + dump_data_and_model(X, model, model_onnx, basename="SklearnLinearSVCInt") @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_linear_svc_bool(self): - model, X = fit_classification_model( - LinearSVC(max_iter=100), 5, is_bool=True) + model, X = fit_classification_model(LinearSVC(max_iter=100), 5, is_bool=True) model_onnx = convert_sklearn( - model, "multi-class linear SVC", + model, + "multi-class linear SVC", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnLinearSVCBool") + dump_data_and_model(X, model, model_onnx, basename="SklearnLinearSVCBool") @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_ridge_classifier_binary(self): model, X = fit_classification_model(linear_model.RidgeClassifier(), 2) model_onnx = convert_sklearn( - model, "binary ridge classifier", + model, + "binary ridge classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnRidgeClassifierBin") + dump_data_and_model(X, model, model_onnx, basename="SklearnRidgeClassifierBin") @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_ridge_classifier_binary_nozipmap(self): model, X = fit_classification_model( - linear_model.LogisticRegression(max_iter=10000), 2) + linear_model.LogisticRegression(max_iter=10000), 2 + ) model_onnx = convert_sklearn( - model, "binary ridge classifier", + model, + "binary ridge classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) - assert 'zipmap' in str(model_onnx).lower() + target_opset=TARGET_OPSET, + ) + assert "zipmap" in str(model_onnx).lower() - options = {id(model): {'zipmap': True}} + options = {id(model): {"zipmap": True}} model_onnx = convert_sklearn( - model, "binary ridge classifier", + model, + "binary ridge classifier", [("input", FloatTensorType([None, X.shape[1]]))], - options=options, target_opset=TARGET_OPSET) - assert 'zipmap' in str(model_onnx).lower() + options=options, + target_opset=TARGET_OPSET, + ) + assert "zipmap" in str(model_onnx).lower() - options = {id(model): {'zipmap': False}} + options = {id(model): {"zipmap": False}} model_onnx = convert_sklearn( - model, "binary ridge classifier", + model, + "binary ridge classifier", [("input", FloatTensorType([None, X.shape[1]]))], - options=options, target_opset=TARGET_OPSET) - assert 'zipmap' not in str(model_onnx).lower() + options=options, + target_opset=TARGET_OPSET, + ) + assert "zipmap" not in str(model_onnx).lower() self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnRidgeClassifierNZMBin") + X, model, model_onnx, basename="SklearnRidgeClassifierNZMBin" + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_ridge_classifier_binary_mispelled_zipmap(self): model, X = fit_classification_model( - linear_model.LogisticRegression(max_iter=10000), 2) + linear_model.LogisticRegression(max_iter=10000), 2 + ) - options = {id(model): {'zipmap ': True}} + options = {id(model): {"zipmap ": True}} try: convert_sklearn( - model, "binary ridge classifier", + model, + "binary ridge classifier", [("input", FloatTensorType([None, X.shape[1]]))], - options=options, target_opset=TARGET_OPSET) + options=options, + target_opset=TARGET_OPSET, + ) raise AssertionError("Expecting an error.") except NameError as e: assert "Option 'zipmap ' not in" in str(e) @@ -491,14 +582,18 @@ def test_model_ridge_classifier_binary_mispelled_zipmap(self): @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_ridge_classifier_binary_mispelled_zipmap_wrong_value(self): model, X = fit_classification_model( - linear_model.LogisticRegression(max_iter=10000), 2) + linear_model.LogisticRegression(max_iter=10000), 2 + ) - options = {id(model): {'zipmap': 'True'}} + options = {id(model): {"zipmap": "True"}} try: convert_sklearn( - model, "binary ridge classifier", + model, + "binary ridge classifier", [("input", FloatTensorType([None, X.shape[1]]))], - options=options, target_opset=TARGET_OPSET) + options=options, + target_opset=TARGET_OPSET, + ) raise AssertionError("Expecting an error.") except ValueError as e: assert "Unexpected value ['True'] for option 'zipmap'" in str(e) @@ -507,142 +602,163 @@ def test_model_ridge_classifier_binary_mispelled_zipmap_wrong_value(self): def test_model_ridge_classifier_multi_class(self): model, X = fit_classification_model(linear_model.RidgeClassifier(), 5) model_onnx = convert_sklearn( - model, "multi-class ridge classifier", + model, + "multi-class ridge classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnRidgeClassifierMulti") + X, model, model_onnx, basename="SklearnRidgeClassifierMulti" + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_ridge_classifier_int(self): model, X = fit_classification_model( - linear_model.RidgeClassifier(), 5, is_int=True) + linear_model.RidgeClassifier(), 5, is_int=True + ) model_onnx = convert_sklearn( - model, "multi-class ridge classifier", + model, + "multi-class ridge classifier", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnRidgeClassifierInt") + dump_data_and_model(X, model, model_onnx, basename="SklearnRidgeClassifierInt") @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_ridge_classifier_bool(self): model, X = fit_classification_model( - linear_model.RidgeClassifier(), 4, is_bool=True) + linear_model.RidgeClassifier(), 4, is_bool=True + ) model_onnx = convert_sklearn( - model, "multi-class ridge classifier", + model, + "multi-class ridge classifier", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnRidgeClassifierBool") + dump_data_and_model(X, model, model_onnx, basename="SklearnRidgeClassifierBool") @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_ridge_classifier_cv_binary(self): - model, X = fit_classification_model( - linear_model.RidgeClassifierCV(), 2) + model, X = fit_classification_model(linear_model.RidgeClassifierCV(), 2) model_onnx = convert_sklearn( - model, "binary ridge classifier cv", + model, + "binary ridge classifier cv", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnRidgeClassifierCVBin") + X, model, model_onnx, basename="SklearnRidgeClassifierCVBin" + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_ridge_classifier_cv_int(self): model, X = fit_classification_model( - linear_model.RidgeClassifierCV(), 2, is_int=True) + linear_model.RidgeClassifierCV(), 2, is_int=True + ) model_onnx = convert_sklearn( - model, "binary ridge classifier cv", + model, + "binary ridge classifier cv", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnRidgeClassifierCVInt") + X, model, model_onnx, basename="SklearnRidgeClassifierCVInt" + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_ridge_classifier_cv_bool(self): model, X = fit_classification_model( - linear_model.RidgeClassifierCV(), 2, is_bool=True) + linear_model.RidgeClassifierCV(), 2, is_bool=True + ) model_onnx = convert_sklearn( - model, "binary ridge classifier cv", + model, + "binary ridge classifier cv", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnRidgeClassifierCVBool") + X, model, model_onnx, basename="SklearnRidgeClassifierCVBool" + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_ridge_classifier_cv_multi_class(self): - model, X = fit_classification_model( - linear_model.RidgeClassifierCV(), 5) + model, X = fit_classification_model(linear_model.RidgeClassifierCV(), 5) model_onnx = convert_sklearn( - model, "multi-class ridge classifier cv", + model, + "multi-class ridge classifier cv", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnRidgeClassifierCVMulti") + X, model, model_onnx, basename="SklearnRidgeClassifierCVMulti" + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_logistic_regression_binary_class_decision_function(self): model, X = fit_classification_model( - linear_model.LogisticRegression(max_iter=10000), 2) + linear_model.LogisticRegression(max_iter=10000), 2 + ) model_onnx = convert_sklearn( - model, "logistic regression", + model, + "logistic regression", [("input", FloatTensorType([None, X.shape[1]]))], - options={linear_model.LogisticRegression: {'raw_scores': True}}, - target_opset=TARGET_OPSET) + options={linear_model.LogisticRegression: {"raw_scores": True}}, + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X[:5], model, model_onnx, + X[:5], + model, + model_onnx, basename="SklearnLogitisticRegressionBinaryRawScore", - methods=['predict', 'decision_function_binary']) + methods=["predict", "decision_function_binary"], + ) - @unittest.skip( - reason="Scikit-learn doesn't return multi-label output.") + @unittest.skip(reason="Scikit-learn doesn't return multi-label output.") @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_ridge_classifier_cv_multilabel(self): model, X_test = fit_multilabel_classification_model( - linear_model.RidgeClassifierCV(random_state=42)) + linear_model.RidgeClassifierCV(random_state=42) + ) model_onnx = convert_sklearn( model, "scikit-learn RidgeClassifierCV", [("input", FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnRidgeClassifierCVMultiLabel") + X_test, model, model_onnx, basename="SklearnRidgeClassifierCVMultiLabel" + ) @unittest.skipIf(TARGET_OPSET < 11, reason="not available") @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_classifier_multi_zipmap_columns(self): model, X = fit_classification_model( - linear_model.LogisticRegression(), 3, - n_features=4, label_string=True) + linear_model.LogisticRegression(), 3, n_features=4, label_string=True + ) model_onnx = convert_sklearn( - model, "multi-class ridge classifier", + model, + "multi-class ridge classifier", [("input", FloatTensorType([None, X.shape[1]]))], - options={linear_model.LogisticRegression: {'zipmap': 'columns'}}, - target_opset=TARGET_OPSET) + options={linear_model.LogisticRegression: {"zipmap": "columns"}}, + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) sess = InferenceSession(model_onnx.SerializeToString()) if sess is None: return names = [_.name for _ in sess.get_outputs()] - self.assertEqual(['output_label', 'scl0', 'scl1', 'scl2'], names) + self.assertEqual(["output_label", "scl0", "scl1", "scl2"], names) xt = X[:10].astype(np.float32) - got = sess.run(None, {'input': xt}) + got = sess.run(None, {"input": xt}) prob = model.predict_proba(xt) for i in range(prob.shape[1]): assert_almost_equal(prob[:, i], got[i + 1]) @@ -651,21 +767,23 @@ def test_model_classifier_multi_zipmap_columns(self): @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_classifier_multi_class_string_zipmap_columns(self): model, X = fit_classification_model( - linear_model.LogisticRegression(), 3, - n_features=4, label_string=False) + linear_model.LogisticRegression(), 3, n_features=4, label_string=False + ) model_onnx = convert_sklearn( - model, "multi-class ridge classifier", + model, + "multi-class ridge classifier", [("input", FloatTensorType([None, X.shape[1]]))], - options={linear_model.LogisticRegression: {'zipmap': 'columns'}}, - target_opset=TARGET_OPSET) + options={linear_model.LogisticRegression: {"zipmap": "columns"}}, + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) sess = InferenceSession(model_onnx.SerializeToString()) if sess is None: return names = [_.name for _ in sess.get_outputs()] - self.assertEqual(['output_label', 'i0', 'i1', 'i2'], names) + self.assertEqual(["output_label", "i0", "i1", "i2"], names) xt = X[:10].astype(np.float32) - got = sess.run(None, {'input': xt}) + got = sess.run(None, {"input": xt}) prob = model.predict_proba(xt) for i in range(prob.shape[1]): assert_almost_equal(prob[:, i], got[i + 1]) diff --git a/tests/test_sklearn_glm_regressor_converter.py b/tests/test_sklearn_glm_regressor_converter.py index 1065b3acf..c23ffaca5 100644 --- a/tests/test_sklearn_glm_regressor_converter.py +++ b/tests/test_sklearn_glm_regressor_converter.py @@ -6,6 +6,7 @@ import packaging.version as pv import numpy from numpy.testing import assert_almost_equal + try: # scikit-learn >= 0.22 from sklearn.utils._testing import ignore_warnings @@ -18,6 +19,7 @@ from sklearn.ensemble import GradientBoostingRegressor from sklearn.neural_network import MLPRegressor from sklearn.svm import LinearSVR + try: from sklearn.linear_model import QuantileRegressor except (ImportError, AttributeError): @@ -42,11 +44,14 @@ ) from onnxruntime import __version__ as ort_version from test_utils import ( - dump_data_and_model, fit_regression_model, TARGET_OPSET, - InferenceSessionEx as InferenceSession) + dump_data_and_model, + fit_regression_model, + TARGET_OPSET, + InferenceSessionEx as InferenceSession, +) -ort_version = ort_version.split('+')[0] +ort_version = ort_version.split("+")[0] class TestGLMRegressorConverter(unittest.TestCase): @@ -54,180 +59,207 @@ class TestGLMRegressorConverter(unittest.TestCase): def test_model_linear_regression(self): model, X = fit_regression_model(linear_model.LinearRegression()) model_onnx = convert_sklearn( - model, "linear regression", + model, + "linear regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLinearRegression-Dec4") + X, model, model_onnx, basename="SklearnLinearRegression-Dec4" + ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_linear_regression_blacklist(self): model, X = fit_regression_model(linear_model.LinearRegression()) model_onnx = convert_sklearn( - model, "linear regression", + model, + "linear regression", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET, - black_op={'LinearRegressor'}) - self.assertNotIn('LinearRegressor', str(model_onnx)) + black_op={"LinearRegressor"}, + ) + self.assertNotIn("LinearRegressor", str(model_onnx)) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLinearRegressionBlackOp-Dec4") + X, model, model_onnx, basename="SklearnLinearRegressionBlackOp-Dec4" + ) @unittest.skipIf( pv.Version(ort_version) <= pv.Version("0.5.0"), - reason="old onnxruntime does not support double") + reason="old onnxruntime does not support double", + ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_linear_regression_multi(self): - model, X = fit_regression_model(linear_model.LinearRegression(), - n_targets=2) + model, X = fit_regression_model(linear_model.LinearRegression(), n_targets=2) model_onnx = convert_sklearn( - model, "linear regression", + model, + "linear regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLinearRegressionMulti-Dec4") + X, model, model_onnx, basename="SklearnLinearRegressionMulti-Dec4" + ) @unittest.skipIf( pv.Version(ort_version) <= pv.Version("0.5.0"), - reason="old onnxruntime does not support double") + reason="old onnxruntime does not support double", + ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_linear_regression64(self): model, X = fit_regression_model(linear_model.LinearRegression()) - model_onnx = convert_sklearn(model, "linear regression", - [("input", DoubleTensorType(X.shape))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "linear regression", + [("input", DoubleTensorType(X.shape))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) self.assertIn("elem_type: 11", str(model_onnx)) dump_data_and_model( - X.astype(numpy.float64), model, model_onnx, - basename="SklearnLinearRegression64-Dec4") + X.astype(numpy.float64), + model, + model_onnx, + basename="SklearnLinearRegression64-Dec4", + ) @unittest.skipIf( pv.Version(ort_version) <= pv.Version("0.5.0"), - reason="old onnxruntime does not support double") + reason="old onnxruntime does not support double", + ) def test_model_linear_regression64_multiple(self): - model, X = fit_regression_model(linear_model.LinearRegression(), - n_targets=2) - model_onnx = convert_sklearn(model, "linear regression", - [("input", DoubleTensorType(X.shape))], - target_opset=TARGET_OPSET) + model, X = fit_regression_model(linear_model.LinearRegression(), n_targets=2) + model_onnx = convert_sklearn( + model, + "linear regression", + [("input", DoubleTensorType(X.shape))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) self.assertIn("elem_type: 11", str(model_onnx)) dump_data_and_model( - X.astype(numpy.float64), model, model_onnx, - basename="SklearnLinearRegression64Multi-Dec4") + X.astype(numpy.float64), + model, + model_onnx, + basename="SklearnLinearRegression64Multi-Dec4", + ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_linear_regression_int(self): - model, X = fit_regression_model( - linear_model.LinearRegression(), is_int=True) + model, X = fit_regression_model(linear_model.LinearRegression(), is_int=True) model_onnx = convert_sklearn( - model, "linear regression", + model, + "linear regression", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLinearRegressionInt-Dec4") + X, model, model_onnx, basename="SklearnLinearRegressionInt-Dec4" + ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_linear_regression_nointercept(self): model, X = fit_regression_model( - linear_model.LinearRegression(fit_intercept=False)) + linear_model.LinearRegression(fit_intercept=False) + ) model_onnx = convert_sklearn( - model, "linear regression", + model, + "linear regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLinearRegressionNoIntercept-Dec4") + X, model, model_onnx, basename="SklearnLinearRegressionNoIntercept-Dec4" + ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_linear_regression_bool(self): - model, X = fit_regression_model( - linear_model.LinearRegression(), is_bool=True) + model, X = fit_regression_model(linear_model.LinearRegression(), is_bool=True) model_onnx = convert_sklearn( - model, "linear regression", + model, + "linear regression", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnLinearRegressionBool") + X, model, model_onnx, basename="SklearnLinearRegressionBool" + ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_linear_svr(self): model, X = fit_regression_model(LinearSVR()) model_onnx = convert_sklearn( - model, "linear SVR", + model, + "linear SVR", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnLinearSvr-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnLinearSvr-Dec4") @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_linear_svr_int(self): model, X = fit_regression_model(LinearSVR(), is_int=True) model_onnx = convert_sklearn( - model, "linear SVR", + model, + "linear SVR", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnLinearSvrInt-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnLinearSvrInt-Dec4") @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_linear_svr_bool(self): model, X = fit_regression_model(LinearSVR(), is_bool=True) model_onnx = convert_sklearn( - model, "linear SVR", + model, + "linear SVR", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnLinearSVRBool") + dump_data_and_model(X, model, model_onnx, basename="SklearnLinearSVRBool") @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_ridge(self): model, X = fit_regression_model(linear_model.Ridge()) model_onnx = convert_sklearn( - model, "ridge regression", + model, + "ridge regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, basename="SklearnRidge-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnRidge-Dec4") @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_ridge_int(self): model, X = fit_regression_model(linear_model.Ridge(), is_int=True) model_onnx = convert_sklearn( - model, "ridge regression", + model, + "ridge regression", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, basename="SklearnRidgeInt-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnRidgeInt-Dec4") @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_ridge_bool(self): model, X = fit_regression_model(linear_model.Ridge(), is_bool=True) model_onnx = convert_sklearn( - model, "ridge regression", + model, + "ridge regression", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, basename="SklearnRidgeBool") + dump_data_and_model(X, model, model_onnx, basename="SklearnRidgeBool") @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_sgd_regressor(self): @@ -236,269 +268,285 @@ def test_model_sgd_regressor(self): model, "scikit-learn SGD regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, basename="SklearnSGDRegressor-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnSGDRegressor-Dec4") @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_sgd_regressor_int(self): - model, X = fit_regression_model( - linear_model.SGDRegressor(), is_int=True) + model, X = fit_regression_model(linear_model.SGDRegressor(), is_int=True) model_onnx = convert_sklearn( - model, "SGD regression", + model, + "SGD regression", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, basename="SklearnSGDRegressorInt-Dec4") + X, model, model_onnx, basename="SklearnSGDRegressorInt-Dec4" + ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_sgd_regressor_bool(self): - model, X = fit_regression_model( - linear_model.SGDRegressor(), is_bool=True) + model, X = fit_regression_model(linear_model.SGDRegressor(), is_bool=True) model_onnx = convert_sklearn( - model, "SGD regression", + model, + "SGD regression", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnSGDRegressorBool-Dec4") + X, model, model_onnx, basename="SklearnSGDRegressorBool-Dec4" + ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_elastic_net_regressor(self): model, X = fit_regression_model(linear_model.ElasticNet()) model_onnx = convert_sklearn( - model, "scikit-learn elastic-net regression", + model, + "scikit-learn elastic-net regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnElasticNet-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnElasticNet-Dec4") @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_elastic_net_cv_regressor(self): model, X = fit_regression_model(linear_model.ElasticNetCV()) model_onnx = convert_sklearn( - model, "scikit-learn elastic-net regression", + model, + "scikit-learn elastic-net regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnElasticNetCV-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnElasticNetCV-Dec4") @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_elastic_net_regressor_int(self): model, X = fit_regression_model(linear_model.ElasticNet(), is_int=True) model_onnx = convert_sklearn( - model, "elastic net regression", + model, + "elastic net regression", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnElasticNetRegressorInt-Dec4") + X, model, model_onnx, basename="SklearnElasticNetRegressorInt-Dec4" + ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_elastic_net_regressor_bool(self): - model, X = fit_regression_model( - linear_model.ElasticNet(), is_bool=True) + model, X = fit_regression_model(linear_model.ElasticNet(), is_bool=True) model_onnx = convert_sklearn( - model, "elastic net regression", + model, + "elastic net regression", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnElasticNetRegressorBool") + X, model, model_onnx, basename="SklearnElasticNetRegressorBool" + ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_lars(self): model, X = fit_regression_model(linear_model.Lars()) model_onnx = convert_sklearn( - model, "lars", + model, + "lars", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, basename="SklearnLars-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnLars-Dec4") @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_lars_cv(self): model, X = fit_regression_model(linear_model.LarsCV()) model_onnx = convert_sklearn( - model, "lars", + model, + "lars", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnLarsCV-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnLarsCV-Dec4") @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_lasso_lars(self): model, X = fit_regression_model(linear_model.LassoLars(alpha=0.01)) model_onnx = convert_sklearn( - model, "lasso lars", + model, + "lasso lars", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnLassoLars-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnLassoLars-Dec4") @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_lasso_lars_cv(self): model, X = fit_regression_model(linear_model.LassoLarsCV()) model_onnx = convert_sklearn( - model, "lasso lars cv", + model, + "lasso lars cv", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnLassoLarsCV-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnLassoLarsCV-Dec4") @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_lasso_lars_ic(self): model, X = fit_regression_model(linear_model.LassoLarsIC()) model_onnx = convert_sklearn( - model, "lasso lars cv", + model, + "lasso lars cv", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnLassoLarsIC-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnLassoLarsIC-Dec4") @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_lasso_cv(self): model, X = fit_regression_model(linear_model.LassoCV()) model_onnx = convert_sklearn( - model, "lasso cv", + model, + "lasso cv", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnLassoCV-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnLassoCV-Dec4") @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_lasso_lars_int(self): model, X = fit_regression_model(linear_model.LassoLars(), is_int=True) model_onnx = convert_sklearn( - model, "lasso lars", + model, + "lasso lars", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnLassoLarsInt-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnLassoLarsInt-Dec4") @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_lasso_lars_bool(self): - model, X = fit_regression_model( - linear_model.LassoLars(), is_bool=True) + model, X = fit_regression_model(linear_model.LassoLars(), is_bool=True) model_onnx = convert_sklearn( - model, "lasso lars", + model, + "lasso lars", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnLassoLarsBool") + dump_data_and_model(X, model, model_onnx, basename="SklearnLassoLarsBool") @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_multi_linear_regression(self): - model, X = fit_regression_model(linear_model.LinearRegression(), - n_targets=2) + model, X = fit_regression_model(linear_model.LinearRegression(), n_targets=2) model_onnx = convert_sklearn( - model, "linear regression", + model, + "linear regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, verbose=False, - basename="SklearnMultiLinearRegression-Dec4") + X, + model, + model_onnx, + verbose=False, + basename="SklearnMultiLinearRegression-Dec4", + ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_ard_regression(self): - model, X = fit_regression_model( - linear_model.ARDRegression(), factor=0.001) + model, X = fit_regression_model(linear_model.ARDRegression(), factor=0.001) model_onnx = convert_sklearn( - model, "ard regression", + model, + "ard regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnARDRegression-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnARDRegression-Dec4") @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_theilsen(self): model, X = fit_regression_model(linear_model.TheilSenRegressor()) model_onnx = convert_sklearn( - model, "thiel-sen regressor", + model, + "thiel-sen regressor", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnTheilSen-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnTheilSen-Dec4") @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_bayesian_ridge(self): model, X = fit_regression_model(linear_model.BayesianRidge()) model_onnx = convert_sklearn( - model, "bayesian ridge", + model, + "bayesian ridge", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnBayesianRidge-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnBayesianRidge-Dec4") @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_bayesian_ridge_return_std(self): - model, X = fit_regression_model(linear_model.BayesianRidge(), - n_features=2, n_samples=20) + model, X = fit_regression_model( + linear_model.BayesianRidge(), n_features=2, n_samples=20 + ) model_onnx = convert_sklearn( - model, "bayesian ridge", + model, + "bayesian ridge", [("input", FloatTensorType([None, X.shape[1]]))], - options={linear_model.BayesianRidge: {'return_std': True}}, - target_opset=TARGET_OPSET) + options={linear_model.BayesianRidge: {"return_std": True}}, + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - outputs = sess.run(None, {'input': X}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + outputs = sess.run(None, {"input": X}) pred, std = model.predict(X, return_std=True) assert_almost_equal(pred, outputs[0].ravel(), decimal=4) assert_almost_equal(std, outputs[1].ravel(), decimal=4) - @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.3.0"), - reason="output type") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.3.0"), reason="output type" + ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_bayesian_ridge_return_std_double(self): - model, X = fit_regression_model(linear_model.BayesianRidge(), - n_features=2, n_samples=100, - n_informative=1) + model, X = fit_regression_model( + linear_model.BayesianRidge(), n_features=2, n_samples=100, n_informative=1 + ) model_onnx = convert_sklearn( - model, "bayesian ridge", + model, + "bayesian ridge", [("input", DoubleTensorType([None, X.shape[1]]))], - options={linear_model.BayesianRidge: {'return_std': True}}, - target_opset=TARGET_OPSET) + options={linear_model.BayesianRidge: {"return_std": True}}, + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) X = X.astype(numpy.float64) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - outputs = sess.run(None, {'input': X}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + outputs = sess.run(None, {"input": X}) pred, std = model.predict(X, return_std=True) assert_almost_equal(pred, outputs[0].ravel()) assert_almost_equal(std, outputs[1].ravel(), decimal=4) @@ -512,22 +560,25 @@ def test_model_bayesian_ridge_return_std_normalize(self): return model, X = fit_regression_model(model, n_features=2, n_samples=50) model_onnx = convert_sklearn( - model, "bayesian ridge", + model, + "bayesian ridge", [("input", FloatTensorType([None, X.shape[1]]))], - options={linear_model.BayesianRidge: {'return_std': True}}, - target_opset=TARGET_OPSET) + options={linear_model.BayesianRidge: {"return_std": True}}, + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - outputs = sess.run(None, {'input': X}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + outputs = sess.run(None, {"input": X}) pred, std = model.predict(X, return_std=True) assert_almost_equal(pred, outputs[0].ravel(), decimal=4) assert_almost_equal(std, outputs[1].ravel(), decimal=4) - @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.3.0"), - reason="output type") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.3.0"), reason="output type" + ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_bayesian_ridge_return_std_normalize_double(self): try: @@ -537,17 +588,19 @@ def test_model_bayesian_ridge_return_std_normalize_double(self): return model, X = fit_regression_model(model, n_features=2, n_samples=50) model_onnx = convert_sklearn( - model, "bayesian ridge", + model, + "bayesian ridge", [("input", DoubleTensorType([None, X.shape[1]]))], - options={linear_model.BayesianRidge: {'return_std': True}}, - target_opset=TARGET_OPSET) + options={linear_model.BayesianRidge: {"return_std": True}}, + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) X = X.astype(numpy.float64) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - outputs = sess.run(None, {'input': X}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + outputs = sess.run(None, {"input": X}) pred, std = model.predict(X, return_std=True) assert_almost_equal(pred, outputs[0].ravel()) assert_almost_equal(std, outputs[1].ravel(), decimal=4) @@ -556,246 +609,311 @@ def test_model_bayesian_ridge_return_std_normalize_double(self): def test_model_huber_regressor(self): model, X = fit_regression_model(linear_model.HuberRegressor()) model_onnx = convert_sklearn( - model, "huber regressor", + model, + "huber regressor", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnHuberRegressor-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnHuberRegressor-Dec4") @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_multi_task_lasso(self): - model, X = fit_regression_model(linear_model.MultiTaskLasso(), - n_targets=2) + model, X = fit_regression_model(linear_model.MultiTaskLasso(), n_targets=2) model_onnx = convert_sklearn( - model, "multi-task lasso", + model, + "multi-task lasso", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, verbose=False, - basename="SklearnMultiTaskLasso-Dec4") + X, model, model_onnx, verbose=False, basename="SklearnMultiTaskLasso-Dec4" + ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_multi_task_lasso_cv(self): - model, X = fit_regression_model(linear_model.MultiTaskLassoCV(), - n_targets=2) + model, X = fit_regression_model(linear_model.MultiTaskLassoCV(), n_targets=2) model_onnx = convert_sklearn( - model, "mutli-task lasso cv", + model, + "mutli-task lasso cv", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, verbose=False, - basename="SklearnMultiTaskLassoCV-Dec4") + X, model, model_onnx, verbose=False, basename="SklearnMultiTaskLassoCV-Dec4" + ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_multi_task_elasticnet(self): - model, X = fit_regression_model(linear_model.MultiTaskElasticNet(), - n_targets=2) + model, X = fit_regression_model(linear_model.MultiTaskElasticNet(), n_targets=2) model_onnx = convert_sklearn( - model, "multi-task elasticnet", + model, + "multi-task elasticnet", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, verbose=False, - basename="SklearnMultiTaskElasticNet-Dec4") + X, + model, + model_onnx, + verbose=False, + basename="SklearnMultiTaskElasticNet-Dec4", + ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_orthogonal_matching_pursuit(self): - model, X = fit_regression_model( - linear_model.OrthogonalMatchingPursuit()) + model, X = fit_regression_model(linear_model.OrthogonalMatchingPursuit()) model_onnx = convert_sklearn( - model, "orthogonal matching pursuit", + model, + "orthogonal matching pursuit", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, verbose=False, - basename="SklearnOrthogonalMatchingPursuit-Dec4") + X, + model, + model_onnx, + verbose=False, + basename="SklearnOrthogonalMatchingPursuit-Dec4", + ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_passive_aggressive_regressor(self): - model, X = fit_regression_model( - linear_model.PassiveAggressiveRegressor()) + model, X = fit_regression_model(linear_model.PassiveAggressiveRegressor()) model_onnx = convert_sklearn( - model, "passive aggressive regressor", + model, + "passive aggressive regressor", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, verbose=False, - basename="SklearnPassiveAggressiveRegressor-Dec4") + X, + model, + model_onnx, + verbose=False, + basename="SklearnPassiveAggressiveRegressor-Dec4", + ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_ransac_regressor_default(self): - model, X = fit_regression_model( - linear_model.RANSACRegressor()) + model, X = fit_regression_model(linear_model.RANSACRegressor()) model_onnx = convert_sklearn( - model, "ransac regressor", + model, + "ransac regressor", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, verbose=False, - basename="SklearnRANSACRegressor-Dec4") + X, model, model_onnx, verbose=False, basename="SklearnRANSACRegressor-Dec4" + ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_ransac_regressor_mlp(self): model, X = fit_regression_model( linear_model.RANSACRegressor( - MLPRegressor(solver='sgd', max_iter=20), - min_samples=5)) + MLPRegressor(solver="sgd", max_iter=20), min_samples=5 + ) + ) model_onnx = convert_sklearn( - model, "ransac regressor", + model, + "ransac regressor", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, verbose=False, - basename="SklearnRANSACRegressorMLP-Dec3") + X, + model, + model_onnx, + verbose=False, + basename="SklearnRANSACRegressorMLP-Dec3", + ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_ransac_regressor_tree(self): model, X = fit_regression_model( - linear_model.RANSACRegressor( - GradientBoostingRegressor(), - min_samples=5)) + linear_model.RANSACRegressor(GradientBoostingRegressor(), min_samples=5) + ) model_onnx = convert_sklearn( - model, "ransac regressor", + model, + "ransac regressor", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, verbose=False, - basename="SklearnRANSACRegressorTree-Dec3") + X, + model, + model_onnx, + verbose=False, + basename="SklearnRANSACRegressorTree-Dec3", + ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) def test_model_multi_task_elasticnet_cv(self): - model, X = fit_regression_model(linear_model.MultiTaskElasticNetCV(), - n_targets=2) + model, X = fit_regression_model( + linear_model.MultiTaskElasticNetCV(), n_targets=2 + ) model_onnx = convert_sklearn( - model, "multi-task elasticnet cv", + model, + "multi-task elasticnet cv", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, verbose=False, - basename="SklearnMultiTaskElasticNetCV-Dec4") + X, + model, + model_onnx, + verbose=False, + basename="SklearnMultiTaskElasticNetCV-Dec4", + ) - @ignore_warnings(category=(FutureWarning, ConvergenceWarning, - DeprecationWarning)) + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) def test_model_orthogonal_matching_pursuit_cv(self): - model, X = fit_regression_model( - linear_model.OrthogonalMatchingPursuitCV()) + model, X = fit_regression_model(linear_model.OrthogonalMatchingPursuitCV()) model_onnx = convert_sklearn( - model, "orthogonal matching pursuit cv", + model, + "orthogonal matching pursuit cv", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, verbose=False, - basename="SklearnOrthogonalMatchingPursuitCV-Dec4") + X, + model, + model_onnx, + verbose=False, + basename="SklearnOrthogonalMatchingPursuitCV-Dec4", + ) - def check_model(self, model, X, name='input'): + def check_model(self, model, X, name="input"): try: sess = InferenceSession( - model.SerializeToString(), - providers=["CPUExecutionProvider"]) + model.SerializeToString(), providers=["CPUExecutionProvider"] + ) except Exception as e: - raise AssertionError( - "Unable to load model\n%s" % str(model)) from e + raise AssertionError("Unable to load model\n%s" % str(model)) from e try: return sess.run(None, {name: X[:7]}) except Exception as e: raise AssertionError( - "Unable to run model X.shape=%r X.dtype=%r\n%s" % ( - X[:7].shape, X.dtype, str(model))) from e + "Unable to run model X.shape=%r X.dtype=%r\n%s" + % (X[:7].shape, X.dtype, str(model)) + ) from e - @ignore_warnings(category=(FutureWarning, ConvergenceWarning, - DeprecationWarning)) - @unittest.skipIf(PoissonRegressor is None, - reason="scikit-learn too old") + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) + @unittest.skipIf(PoissonRegressor is None, reason="scikit-learn too old") def test_model_poisson_regressor(self): X, y = make_regression( - n_features=5, n_samples=100, n_targets=1, random_state=42, - n_informative=3) + n_features=5, n_samples=100, n_targets=1, random_state=42, n_informative=3 + ) y = numpy.abs(y) y = y / y.max() + 1e-5 model = linear_model.PoissonRegressor().fit(X, y) model_onnx = convert_sklearn( - model, "linear regression", + model, + "linear regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.check_model(model_onnx, X.astype(numpy.float32)) dump_data_and_model( - X.astype(numpy.float32), model, model_onnx, - basename="SklearnPoissonRegressor-Dec4") + X.astype(numpy.float32), + model, + model_onnx, + basename="SklearnPoissonRegressor-Dec4", + ) model_onnx = convert_sklearn( - model, "linear regression", + model, + "linear regression", [("input", DoubleTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) dump_data_and_model( - X.astype(numpy.float64), model, model_onnx, - basename="SklearnPoissonRegressor64") + X.astype(numpy.float64), + model, + model_onnx, + basename="SklearnPoissonRegressor64", + ) - @ignore_warnings(category=(FutureWarning, ConvergenceWarning, - DeprecationWarning)) - @unittest.skipIf(TweedieRegressor is None, - reason="scikti-learn too old") + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) + @unittest.skipIf(TweedieRegressor is None, reason="scikti-learn too old") def test_model_tweedie_regressor(self): X, y = make_regression( - n_features=5, n_samples=100, n_targets=1, random_state=42, - n_informative=3) + n_features=5, n_samples=100, n_targets=1, random_state=42, n_informative=3 + ) y = numpy.abs(y) y = y / y.max() + 1e-5 for power in range(0, 4): with self.subTest(power=power): model = linear_model.TweedieRegressor(power=power).fit(X, y) model_onnx = convert_sklearn( - model, "linear regression", + model, + "linear regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.check_model(model_onnx, X.astype(numpy.float32)) dump_data_and_model( - X.astype(numpy.float32), model, model_onnx, - basename="SklearnTweedieRegressor%d-Dec4" % power) + X.astype(numpy.float32), + model, + model_onnx, + basename="SklearnTweedieRegressor%d-Dec4" % power, + ) model_onnx = convert_sklearn( - model, "linear regression", + model, + "linear regression", [("input", DoubleTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) dump_data_and_model( - X.astype(numpy.float64), model, model_onnx, - basename="SklearnTweedieRegressor64%d" % power) - - @unittest.skipIf(QuantileRegressor is None, - reason="scikit-learn<1.0") - @ignore_warnings(category=(FutureWarning, ConvergenceWarning, - DeprecationWarning)) + X.astype(numpy.float64), + model, + model_onnx, + basename="SklearnTweedieRegressor64%d" % power, + ) + + @unittest.skipIf(QuantileRegressor is None, reason="scikit-learn<1.0") + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) def test_model_quantile_regressor(self): X, y = make_regression( - n_features=5, n_samples=100, n_targets=1, random_state=42, - n_informative=3) + n_features=5, n_samples=100, n_targets=1, random_state=42, n_informative=3 + ) y = numpy.abs(y) y = y / y.max() + 1e-5 model = linear_model.QuantileRegressor(solver="highs").fit(X, y) model_onnx = convert_sklearn( - model, "linear regression", + model, + "linear regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.check_model(model_onnx, X.astype(numpy.float32)) dump_data_and_model( - X.astype(numpy.float32), model, model_onnx, - basename="SklearnQuantileRegressor-Dec4") + X.astype(numpy.float32), + model, + model_onnx, + basename="SklearnQuantileRegressor-Dec4", + ) model_onnx = convert_sklearn( - model, "linear regression", + model, + "linear regression", [("input", DoubleTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) dump_data_and_model( - X.astype(numpy.float64), model, model_onnx, - basename="SklearnQuantileRegressor64") + X.astype(numpy.float64), + model, + model_onnx, + basename="SklearnQuantileRegressor64", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_gradient_boosting_converters.py b/tests/test_sklearn_gradient_boosting_converters.py index 5f3873d65..4ff10f1b6 100644 --- a/tests/test_sklearn_gradient_boosting_converters.py +++ b/tests/test_sklearn_gradient_boosting_converters.py @@ -7,10 +7,7 @@ from pandas import DataFrame from sklearn import __version__ as skl_version from sklearn.datasets import make_classification -from sklearn.ensemble import ( - GradientBoostingClassifier, - GradientBoostingRegressor -) +from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor from sklearn.model_selection import train_test_split from onnxruntime import __version__ as ort_version from skl2onnx import convert_sklearn @@ -20,20 +17,25 @@ Int64TensorType, ) from test_utils import ( - dump_binary_classification, dump_multiple_classification, - fit_classification_model, dump_data_and_model, fit_regression_model, - TARGET_OPSET, InferenceSessionEx as InferenceSession) + dump_binary_classification, + dump_multiple_classification, + fit_classification_model, + dump_data_and_model, + fit_regression_model, + TARGET_OPSET, + InferenceSessionEx as InferenceSession, +) -ort_version = ort_version.split('+')[0] -skl_version = skl_version.split('+')[0] +ort_version = ort_version.split("+")[0] +skl_version = skl_version.split("+")[0] class TestSklearnGradientBoostingModels(unittest.TestCase): - @unittest.skipIf( pv.Version(ort_version) <= pv.Version("0.5.0"), - reason="Depends on PR #1015 onnxruntime.") + reason="Depends on PR #1015 onnxruntime.", + ) def test_gradient_boosting_classifier1Deviance(self): model = GradientBoostingClassifier(n_estimators=1, max_depth=2) X, y = make_classification(10, n_features=4, random_state=42) @@ -43,25 +45,35 @@ def test_gradient_boosting_classifier1Deviance(self): for cl in [None, 0.231, 1e-6, 0.9]: if cl is not None: model.init_.class_prior_ = np.array([cl, cl]) - initial_types = [('input', FloatTensorType((None, X.shape[1])))] - model_onnx = convert_sklearn(model, initial_types=initial_types, - target_opset=TARGET_OPSET) + initial_types = [("input", FloatTensorType((None, X.shape[1])))] + model_onnx = convert_sklearn( + model, initial_types=initial_types, target_opset=TARGET_OPSET + ) if "Regressor" in str(model_onnx): raise AssertionError(str(model_onnx)) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X.astype(np.float32)}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(np.float32)}) pred = model.predict_proba(X) delta = abs(res[1][0][0] - pred[0, 0]) if delta > 1e-5: - rows = ["diff", str(delta), - "X", str(X), - "base_values_", str(model.init_.class_prior_), - "predicted_label", str(model.predict(X)), - "expected", str(pred), - "onnxruntime", str(DataFrame(res[1])), - "model", str(model_onnx)] + rows = [ + "diff", + str(delta), + "X", + str(X), + "base_values_", + str(model.init_.class_prior_), + "predicted_label", + str(model.predict(X)), + "expected", + str(pred), + "onnxruntime", + str(DataFrame(res[1])), + "model", + str(model_onnx), + ] raise AssertionError("\n---\n".join(rows)) dump_binary_classification(model, suffix="1Deviance") @@ -75,203 +87,267 @@ def test_gradient_boosting_classifier_multi(self): def test_gradient_boosting_binary_classification(self): model, X = fit_classification_model( - GradientBoostingClassifier(n_estimators=3), 2) + GradientBoostingClassifier(n_estimators=3), 2 + ) model_onnx = convert_sklearn( - model, "gradient boosting classifier", + model, + "gradient boosting classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnGradientBoostingBinaryClassifier") + X, model, model_onnx, basename="SklearnGradientBoostingBinaryClassifier" + ) def test_gradient_boosting_binary_classification_init_zero(self): model, X = fit_classification_model( - GradientBoostingClassifier(n_estimators=4, init='zero'), 2) + GradientBoostingClassifier(n_estimators=4, init="zero"), 2 + ) model_onnx = convert_sklearn( - model, "gradient boosting classifier", + model, + "gradient boosting classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnGradientBoostingBinaryClassifierInitZero") + X, + model, + model_onnx, + basename="SklearnGradientBoostingBinaryClassifierInitZero", + ) def test_gradient_boosting_multiclass_classification(self): model, X = fit_classification_model( - GradientBoostingClassifier(n_estimators=4), 5) + GradientBoostingClassifier(n_estimators=4), 5 + ) model_onnx = convert_sklearn( - model, "gradient boosting classifier", + model, + "gradient boosting classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnGradientBoostingMultiClassClassifier") + X, model, model_onnx, basename="SklearnGradientBoostingMultiClassClassifier" + ) def test_gradient_boosting_int(self): model, X = fit_classification_model( - GradientBoostingClassifier(n_estimators=4), 5, is_int=True) + GradientBoostingClassifier(n_estimators=4), 5, is_int=True + ) model_onnx = convert_sklearn( - model, "gradient boosting classifier", + model, + "gradient boosting classifier", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnGradientBoostingInt") + dump_data_and_model(X, model, model_onnx, basename="SklearnGradientBoostingInt") def test_gradient_boosting_bool(self): model, X = fit_classification_model( - GradientBoostingClassifier(n_estimators=4), 5, is_bool=True) + GradientBoostingClassifier(n_estimators=4), 5, is_bool=True + ) model_onnx = convert_sklearn( - model, "gradient boosting classifier", + model, + "gradient boosting classifier", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnGradientBoostingBool") + X, model, model_onnx, basename="SklearnGradientBoostingBool" + ) def test_gradient_boosting_multiclass_decision_function(self): model, X = fit_classification_model( - GradientBoostingClassifier(n_estimators=4), 5) - options = {id(model): {'raw_scores': True}} + GradientBoostingClassifier(n_estimators=4), 5 + ) + options = {id(model): {"raw_scores": True}} model_onnx = convert_sklearn( - model, "gradient boosting classifier", + model, + "gradient boosting classifier", [("input", FloatTensorType([None, X.shape[1]]))], - options=options, target_opset=TARGET_OPSET) + options=options, + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, + X, + model, + model_onnx, basename="SklearnGradientBoostingMultiClassDecisionFunction", - methods=['predict', 'decision_function']) + methods=["predict", "decision_function"], + ) def test_gradient_boosting_multiclass_classification_init_zero(self): model, X = fit_classification_model( - GradientBoostingClassifier(n_estimators=4, init='zero'), 4) + GradientBoostingClassifier(n_estimators=4, init="zero"), 4 + ) model_onnx = convert_sklearn( - model, "gradient boosting classifier", + model, + "gradient boosting classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnGradientBoostingMultiClassClassifierInitZero") + X, + model, + model_onnx, + basename="SklearnGradientBoostingMultiClassClassifierInitZero", + ) @unittest.skipIf( - pv.Version(skl_version) <= pv.Version("1.0"), - reason="Loss name was removed.") + pv.Version(skl_version) <= pv.Version("1.0"), reason="Loss name was removed." + ) def test_gradient_boosting_regressor_ls_loss(self): model, X = fit_regression_model( - GradientBoostingRegressor(n_estimators=3, loss="squared_error")) + GradientBoostingRegressor(n_estimators=3, loss="squared_error") + ) model_onnx = convert_sklearn( - model, "gradient boosting regression", + model, + "gradient boosting regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnGradientBoostingRegressionLsLoss") + X, model, model_onnx, basename="SklearnGradientBoostingRegressionLsLoss" + ) @unittest.skipIf( - pv.Version(skl_version) <= pv.Version("1.0"), - reason="Loss name was removed.") + pv.Version(skl_version) <= pv.Version("1.0"), reason="Loss name was removed." + ) def test_gradient_boosting_regressor_lad_loss(self): model, X = fit_regression_model( - GradientBoostingRegressor(n_estimators=3, loss="absolute_error")) + GradientBoostingRegressor(n_estimators=3, loss="absolute_error") + ) model_onnx = convert_sklearn( - model, "gradient boosting regression", + model, + "gradient boosting regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnGradientBoostingRegressionLadLoss") + X, model, model_onnx, basename="SklearnGradientBoostingRegressionLadLoss" + ) def test_gradient_boosting_regressor_huber_loss(self): model, X = fit_regression_model( - GradientBoostingRegressor(n_estimators=3, loss="huber")) + GradientBoostingRegressor(n_estimators=3, loss="huber") + ) model_onnx = convert_sklearn( - model, "gradient boosting regression", + model, + "gradient boosting regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnGradientBoostingRegressionHuberLoss") + X, model, model_onnx, basename="SklearnGradientBoostingRegressionHuberLoss" + ) def test_gradient_boosting_regressor_quantile_loss(self): model, X = fit_regression_model( - GradientBoostingRegressor(n_estimators=3, loss="quantile")) + GradientBoostingRegressor(n_estimators=3, loss="quantile") + ) model_onnx = convert_sklearn( - model, "gradient boosting regression", + model, + "gradient boosting regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnGradientBoostingRegressionQuantileLoss-Dec4") + X, + model, + model_onnx, + basename="SklearnGradientBoostingRegressionQuantileLoss-Dec4", + ) def test_gradient_boosting_regressor_int(self): model, X = fit_regression_model( - GradientBoostingRegressor(random_state=42), is_int=True) + GradientBoostingRegressor(random_state=42), is_int=True + ) model_onnx = convert_sklearn( - model, "gradient boosting regression", + model, + "gradient boosting regression", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnGradientBoostingRegressionInt-Dec3") + X, model, model_onnx, basename="SklearnGradientBoostingRegressionInt-Dec3" + ) def test_gradient_boosting_regressor_zero_init(self): model, X = fit_regression_model( - GradientBoostingRegressor(n_estimators=30, init="zero", - random_state=42)) + GradientBoostingRegressor(n_estimators=30, init="zero", random_state=42) + ) model_onnx = convert_sklearn( - model, "gradient boosting regression", + model, + "gradient boosting regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnGradientBoostingRegressionZeroInit-Dec4") + X, + model, + model_onnx, + basename="SklearnGradientBoostingRegressionZeroInit-Dec4", + ) @unittest.skipIf( pv.Version(ort_version) <= pv.Version("0.5.0"), - reason="Depends on PR #1015 onnxruntime.") + reason="Depends on PR #1015 onnxruntime.", + ) def test_gradient_boosting_regressor_learning_rate(self): X, y = make_classification( - n_features=100, n_samples=1000, n_classes=2, n_informative=8) + n_features=100, n_samples=1000, n_classes=2, n_informative=8 + ) X_train, X_test, y_train, _ = train_test_split( - X, y, test_size=0.5, random_state=42) + X, y, test_size=0.5, random_state=42 + ) model = GradientBoostingClassifier().fit(X_train, y_train) onnx_model = convert_sklearn( - model, 'lr2', [('input', FloatTensorType(X_test.shape))], - target_opset=TARGET_OPSET) + model, + "lr2", + [("input", FloatTensorType(X_test.shape))], + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - onnx_model.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, input_feed={'input': X_test.astype(np.float32)}) + onnx_model.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, input_feed={"input": X_test.astype(np.float32)}) r1 = np.mean( - np.isclose(model.predict_proba(X_test), - list(map(lambda x: list(map(lambda y: x[y], x)), - res[1])), atol=1e-4)) + np.isclose( + model.predict_proba(X_test), + list(map(lambda x: list(map(lambda y: x[y], x)), res[1])), + atol=1e-4, + ) + ) r2 = np.mean(res[0] == model.predict(X_test)) assert r1 == r2 def test_gradient_boosting_regressor_bool(self): model, X = fit_regression_model( - GradientBoostingRegressor(random_state=42), is_bool=True) + GradientBoostingRegressor(random_state=42), is_bool=True + ) model_onnx = convert_sklearn( - model, "gradient boosting regressor", + model, + "gradient boosting regressor", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnGradientBoostingRegressorBool-Dec4") + X, model, model_onnx, basename="SklearnGradientBoostingRegressorBool-Dec4" + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_grid_search_cv_converter.py b/tests/test_sklearn_grid_search_cv_converter.py index 5239b5d68..1e29191d8 100644 --- a/tests/test_sklearn_grid_search_cv_converter.py +++ b/tests/test_sklearn_grid_search_cv_converter.py @@ -15,168 +15,216 @@ from sklearn.datasets import load_iris from skl2onnx import convert_sklearn, to_onnx from skl2onnx.common.data_types import ( - DoubleTensorType, FloatTensorType, Int64TensorType) + DoubleTensorType, + FloatTensorType, + Int64TensorType, +) from test_utils import ( - dump_data_and_model, fit_classification_model, + dump_data_and_model, + fit_classification_model, fit_clustering_model, - fit_regression_model, TARGET_OPSET) + fit_regression_model, + TARGET_OPSET, +) class TestSklearnGridSearchCVModels(unittest.TestCase): def test_grid_search_binary_float(self): - tuned_parameters = [{'C': np.logspace(-1, 0, 4)}] + tuned_parameters = [{"C": np.logspace(-1, 0, 4)}] clf = GridSearchCV( - LogisticRegression(random_state=42, max_iter=100, solver='lbfgs', - multi_class='ovr'), - tuned_parameters, cv=5) + LogisticRegression( + random_state=42, max_iter=100, solver="lbfgs", multi_class="ovr" + ), + tuned_parameters, + cv=5, + ) model, X = fit_classification_model(clf, n_classes=2) model_onnx = convert_sklearn( - model, "GridSearchCV", + model, + "GridSearchCV", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnGridSearchBinaryFloat-Dec4") + X, model, model_onnx, basename="SklearnGridSearchBinaryFloat-Dec4" + ) def test_grid_search_multiclass_float(self): - tuned_parameters = [{'C': np.logspace(-1, 0, 4)}] + tuned_parameters = [{"C": np.logspace(-1, 0, 4)}] clf = GridSearchCV( - SVC(random_state=42, probability=True, gamma='auto'), - tuned_parameters, cv=5) + SVC(random_state=42, probability=True, gamma="auto"), tuned_parameters, cv=5 + ) model, X = fit_classification_model(clf, n_classes=5) model_onnx = convert_sklearn( - model, "GridSearchCV", + model, + "GridSearchCV", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnGridSearchMulticlassFloat") + X, model, model_onnx, basename="SklearnGridSearchMulticlassFloat" + ) def test_grid_search_binary_int(self): - tuned_parameters = [{'C': np.logspace(-1, 0, 4)}] + tuned_parameters = [{"C": np.logspace(-1, 0, 4)}] clf = GridSearchCV( - LogisticRegression(random_state=42, max_iter=100, solver='lbfgs', - multi_class='ovr'), - tuned_parameters, cv=5) + LogisticRegression( + random_state=42, max_iter=100, solver="lbfgs", multi_class="ovr" + ), + tuned_parameters, + cv=5, + ) model, X = fit_classification_model(clf, n_classes=2, is_int=True) model_onnx = convert_sklearn( - model, "GridSearchCV", + model, + "GridSearchCV", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnGridSearchBinaryInt-Dec4") + X, model, model_onnx, basename="SklearnGridSearchBinaryInt-Dec4" + ) def test_grid_search_multiclass_int(self): - tuned_parameters = [{'C': np.logspace(-1, 0, 4)}] + tuned_parameters = [{"C": np.logspace(-1, 0, 4)}] clf = GridSearchCV( - LogisticRegression(random_state=42, max_iter=100, solver='lbfgs', - multi_class='multinomial'), - tuned_parameters, cv=5) + LogisticRegression( + random_state=42, max_iter=100, solver="lbfgs", multi_class="multinomial" + ), + tuned_parameters, + cv=5, + ) model, X = fit_classification_model(clf, n_classes=4, is_int=True) model_onnx = convert_sklearn( - model, "GridSearchCV", + model, + "GridSearchCV", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnGridSearchMulticlassInt-Dec4") + X, model, model_onnx, basename="SklearnGridSearchMulticlassInt-Dec4" + ) def test_grid_search_regression_int(self): - tuned_parameters = [{'alpha': np.logspace(-4, -0.5, 4)}] - clf = GridSearchCV(Lasso(max_iter=100), - tuned_parameters, cv=5) + tuned_parameters = [{"alpha": np.logspace(-4, -0.5, 4)}] + clf = GridSearchCV(Lasso(max_iter=100), tuned_parameters, cv=5) model, X = fit_regression_model(clf, is_int=True) model_onnx = convert_sklearn( - model, "GridSearchCV", + model, + "GridSearchCV", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnGridSerachRegressionInt-OneOffArray-Dec4") + X, + model, + model_onnx, + basename="SklearnGridSerachRegressionInt-OneOffArray-Dec4", + ) def test_grid_search_regressor_float(self): - tuned_parameters = [{'alpha': np.logspace(-4, -0.5, 4)}] - clf = GridSearchCV(LassoLars(max_iter=100), - tuned_parameters, cv=5) + tuned_parameters = [{"alpha": np.logspace(-4, -0.5, 4)}] + clf = GridSearchCV(LassoLars(max_iter=100), tuned_parameters, cv=5) model, X = fit_regression_model(clf) model_onnx = convert_sklearn( - model, "GridSearchCV", + model, + "GridSearchCV", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnGridSearchRegressionFloat-OneOffArray-Dec4") + X, + model, + model_onnx, + basename="SklearnGridSearchRegressionFloat-OneOffArray-Dec4", + ) @unittest.skipIf( - pv.Version(ort_version) <= pv.Version('0.4.0'), - reason="onnxruntime %s" % '0.4.0') + pv.Version(ort_version) <= pv.Version("0.4.0"), + reason="onnxruntime %s" % "0.4.0", + ) def test_grid_search_gaussian_regressor_float(self): - tuned_parameters = [{'alpha': np.logspace(-4, -0.5, 4)}] - clf = GridSearchCV(GaussianProcessRegressor(), - tuned_parameters, cv=5) + tuned_parameters = [{"alpha": np.logspace(-4, -0.5, 4)}] + clf = GridSearchCV(GaussianProcessRegressor(), tuned_parameters, cv=5) model, X = fit_regression_model(clf) model_onnx = convert_sklearn( - model, "GridSearchCV", + model, + "GridSearchCV", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnGridSearchGaussianRegressionFloat" - "-OneOffArray-Dec4") + X, + model, + model_onnx, + basename="SklearnGridSearchGaussianRegressionFloat" "-OneOffArray-Dec4", + ) @unittest.skipIf( - pv.Version(ort_version) <= pv.Version('0.4.0'), - reason="onnxruntime %s" % '0.4.0') + pv.Version(ort_version) <= pv.Version("0.4.0"), + reason="onnxruntime %s" % "0.4.0", + ) def test_grid_search_gaussian_regressor_double(self): - tuned_parameters = [{'alpha': np.logspace(-4, -0.5, 4)}] - clf = GridSearchCV(GaussianProcessRegressor(), - tuned_parameters, cv=3) + tuned_parameters = [{"alpha": np.logspace(-4, -0.5, 4)}] + clf = GridSearchCV(GaussianProcessRegressor(), tuned_parameters, cv=3) model, X = fit_regression_model(clf) model_onnx = convert_sklearn( - model, "GridSearchCV", + model, + "GridSearchCV", [("input", DoubleTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X.astype(np.float64), model, model_onnx, - basename="SklearnGridSearchGaussianRegressionDouble" - "-OneOffArray-Dec4") + X.astype(np.float64), + model, + model_onnx, + basename="SklearnGridSearchGaussianRegressionDouble" "-OneOffArray-Dec4", + ) def test_grid_search_binary_float_nozipmap(self): - tuned_parameters = [{'C': np.logspace(-1, 0, 30)}] + tuned_parameters = [{"C": np.logspace(-1, 0, 30)}] clf = GridSearchCV( - LogisticRegression(random_state=42, max_iter=100, solver='lbfgs', - multi_class='ovr'), - tuned_parameters, cv=5) + LogisticRegression( + random_state=42, max_iter=100, solver="lbfgs", multi_class="ovr" + ), + tuned_parameters, + cv=5, + ) model, X = fit_classification_model(clf, n_classes=2) model_onnx = convert_sklearn( - model, "GridSearchCV", + model, + "GridSearchCV", [("input", FloatTensorType([None, X.shape[1]]))], - options={id(clf): {'zipmap': False, 'raw_scores': True}}, - target_opset=TARGET_OPSET) + options={id(clf): {"zipmap": False, "raw_scores": True}}, + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) assert "zipmap" not in str(model_onnx).lower() assert '"LOGISTIC"' not in str(model_onnx).lower() dump_data_and_model( - X, model, model_onnx, + X, + model, + model_onnx, basename="SklearnGridSearchBinaryFloat-Out0", - methods=['predict', 'decision_function']) + methods=["predict", "decision_function"], + ) def test_grid_search_svm(self): rand_seed = 0 np.random.seed(rand_seed) def convert_to_onnx(sklearn_model, X, model_savename): - onnx_model = to_onnx(sklearn_model, X[:1].astype(np.float32), - target_opset=TARGET_OPSET) + onnx_model = to_onnx( + sklearn_model, X[:1].astype(np.float32), target_opset=TARGET_OPSET + ) onnx.checker.check_model(onnx_model) return onnx_model @@ -185,14 +233,19 @@ def load_train_test(): X = iris.data y = iris.target X_train, X_test, y_train, y_test = train_test_split( - X, y, train_size=0.8, random_state=rand_seed) + X, y, train_size=0.8, random_state=rand_seed + ) return X_train, X_test, y_train, y_test def train_svc_gs(X_train, y_train, apply_fix=False): - param_grid = {'C': [0.1, 1, 1e1], 'gamma': [1e-3, 1e-2, 1e-1]} - clf_est = SVC(kernel='rbf', coef0=0.0, degree=3, - decision_function_shape='ovr', - probability=True) + param_grid = {"C": [0.1, 1, 1e1], "gamma": [1e-3, 1e-2, 1e-1]} + clf_est = SVC( + kernel="rbf", + coef0=0.0, + degree=3, + decision_function_shape="ovr", + probability=True, + ) clf = GridSearchCV(clf_est, param_grid) clf.fit(X_train, y_train) return clf @@ -206,22 +259,27 @@ def run(): x_test, model, model_onnx = run() dump_data_and_model( - x_test.astype(np.float32), model, model_onnx, - basename="SklearnGridSearchSVC-Out0") + x_test.astype(np.float32), + model, + model_onnx, + basename="SklearnGridSearchSVC-Out0", + ) def test_grid_search_binary_kmeans(self): - tuned_parameters = [{'n_clusters': [2, 3]}] + tuned_parameters = [{"n_clusters": [2, 3]}] clf = GridSearchCV(KMeans(), tuned_parameters, cv=5) model, X = fit_clustering_model(clf, n_classes=2) X = X.astype(np.float32) model_onnx = convert_sklearn( - model, "GridSearchCV", + model, + "GridSearchCV", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model.best_estimator_, model_onnx, - basename="SklearnGridSearchKMeans") + X, model.best_estimator_, model_onnx, basename="SklearnGridSearchKMeans" + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_imputer_converter.py b/tests/test_sklearn_imputer_converter.py index 96eabbe7e..c514651c9 100644 --- a/tests/test_sklearn_imputer_converter.py +++ b/tests/test_sklearn_imputer_converter.py @@ -9,6 +9,7 @@ import pandas as pd from numpy.testing import assert_almost_equal import sklearn + try: from sklearn.preprocessing import Imputer except ImportError: @@ -22,48 +23,51 @@ from skl2onnx import convert_sklearn from skl2onnx.common.data_types import ( - FloatTensorType, Int64TensorType, StringTensorType) + FloatTensorType, + Int64TensorType, + StringTensorType, +) from test_utils import ( - dump_data_and_model, TARGET_OPSET, - InferenceSessionEx as InferenceSession) + dump_data_and_model, + TARGET_OPSET, + InferenceSessionEx as InferenceSession, +) -skl_ver = '.'.join(sklearn.__version__.split('.')[:2]) +skl_ver = ".".join(sklearn.__version__.split(".")[:2]) class TestSklearnImputerConverter(unittest.TestCase): - def _check_outputs_ints(self, model, model_onnx, data): sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - idata = {'input': np.array(data).astype(np.int64)} + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + idata = {"input": np.array(data).astype(np.int64)} res = sess.run(None, idata)[0] exp = model.transform(data) assert_almost_equal(res, exp) - def _check_outputs_strings(self, model, model_onnx, data, - verbose=0): - idata = {'input': np.array(data).astype(np.str_)} + def _check_outputs_strings(self, model, model_onnx, data, verbose=0): + idata = {"input": np.array(data).astype(np.str_)} sess = InferenceSession( model_onnx.SerializeToString(), providers=["CPUExecutionProvider"], - verbose=verbose) + verbose=verbose, + ) res = sess.run(None, idata)[0] exp = model.transform(data) if list(exp.ravel()) != list(res.ravel()): - raise AssertionError( - "Unexpected output expected %r != %r." % (exp, res)) + raise AssertionError("Unexpected output expected %r != %r." % (exp, res)) - @unittest.skipIf(Imputer is None, - reason="Imputer removed in 0.21") + @unittest.skipIf(Imputer is None, reason="Imputer removed in 0.21") def test_imputer_float_inputs(self): model = Imputer(missing_values="NaN", strategy="mean", axis=0) data = [[1, 2], [np.nan, 3], [7, 6]] model.fit(data) - model_onnx = convert_sklearn(model, "scikit-learn imputer", - [("input", FloatTensorType([None, 2]))]) + model_onnx = convert_sklearn( + model, "scikit-learn imputer", [("input", FloatTensorType([None, 2]))] + ) self.assertTrue(model_onnx.graph.node is not None) # should contain only node @@ -72,8 +76,7 @@ def test_imputer_float_inputs(self): # last node should contain the Imputer outputs = model_onnx.graph.output self.assertEqual(len(outputs), 1) - self.assertEqual(outputs[0].type.tensor_type.shape.dim[-1].dim_value, - 2) + self.assertEqual(outputs[0].type.tensor_type.shape.dim[-1].dim_value, 2) dump_data_and_model( np.array(data, dtype=np.float32), model, @@ -81,8 +84,7 @@ def test_imputer_float_inputs(self): basename="SklearnImputerMeanFloat32", ) - @unittest.skipIf(SimpleImputer is None, - reason="SimpleImputer changed in 0.20") + @unittest.skipIf(SimpleImputer is None, reason="SimpleImputer changed in 0.20") def test_simple_imputer_float_inputs(self): model = SimpleImputer(strategy="mean", fill_value="nan") data = [[1, 2], [np.nan, 3], [7, 6]] @@ -92,7 +94,8 @@ def test_simple_imputer_float_inputs(self): model, "scikit-learn simple imputer", [("input", FloatTensorType([None, 2]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx.graph.node is not None) # should contain only node @@ -101,15 +104,15 @@ def test_simple_imputer_float_inputs(self): # last node should contain the Imputer outputs = model_onnx.graph.output self.assertEqual(len(outputs), 1) - self.assertEqual( - outputs[0].type.tensor_type.shape.dim[-1].dim_value, 2) + self.assertEqual(outputs[0].type.tensor_type.shape.dim[-1].dim_value, 2) dump_data_and_model( np.array(data, dtype=np.float32), - model, model_onnx, - basename="SklearnSimpleImputerMeanFloat32") + model, + model_onnx, + basename="SklearnSimpleImputerMeanFloat32", + ) - @unittest.skipIf(SimpleImputer is None, - reason="SimpleImputer changed in 0.20") + @unittest.skipIf(SimpleImputer is None, reason="SimpleImputer changed in 0.20") def test_simple_imputer_float_inputs_int_mostf(self): model = SimpleImputer(strategy="most_frequent", fill_value="nan") data = [[1, 2], [np.nan, 3], [7, 6], [8, np.nan]] @@ -119,7 +122,8 @@ def test_simple_imputer_float_inputs_int_mostf(self): model, "scikit-learn simple imputer", [("input", Int64TensorType([None, 2]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx.graph.node is not None) # should contain only node @@ -130,8 +134,7 @@ def test_simple_imputer_float_inputs_int_mostf(self): self.assertEqual(len(outputs), 1) self._check_outputs_ints(model, model_onnx, data) - @unittest.skipIf(SimpleImputer is None, - reason="SimpleImputer changed in 0.20") + @unittest.skipIf(SimpleImputer is None, reason="SimpleImputer changed in 0.20") def test_simple_imputer_float_inputs_int_mean(self): model = SimpleImputer(strategy="mean", fill_value="nan") data = [[1, 2], [np.nan, 3], [7, 6], [8, np.nan]] @@ -142,43 +145,50 @@ def test_simple_imputer_float_inputs_int_mean(self): model, "scikit-learn simple imputer", [("input", Int64TensorType([None, 2]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) except RuntimeError as e: assert "nan values are replaced by float" in str(e) - @unittest.skipIf(SimpleImputer is None, - reason="SimpleImputer changed in 0.20") + @unittest.skipIf(SimpleImputer is None, reason="SimpleImputer changed in 0.20") @unittest.skipIf( - pv.Version(skl_ver) < pv.Version('0.24'), - reason="SimpleImputer does not support strings") + pv.Version(skl_ver) < pv.Version("0.24"), + reason="SimpleImputer does not support strings", + ) def test_simple_imputer_string_inputs_int_mostf(self): model = SimpleImputer( - strategy="most_frequent", fill_value="nan", missing_values="") + strategy="most_frequent", fill_value="nan", missing_values="" + ) data = [["s1", "s2"], ["", "s3"], ["s7", "s6"], ["s8", ""]] model.fit(data) model_onnx = convert_sklearn( - model, "scikit-learn simple imputer", + model, + "scikit-learn simple imputer", [("input", StringTensorType([None, 2]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIn("ai.onnx.ml", str(model_onnx)) self.assertTrue(model_onnx.graph.node is not None) self.assertEqual(len(model_onnx.graph.output), 1) self._check_outputs_strings(model, model_onnx, data) - @unittest.skipIf(SimpleImputer is None, - reason="SimpleImputer changed in 0.20") + @unittest.skipIf(SimpleImputer is None, reason="SimpleImputer changed in 0.20") @unittest.skipIf( - pv.Version(skl_ver) < pv.Version('0.24'), - reason="SimpleImputer does not support strings") + pv.Version(skl_ver) < pv.Version("0.24"), + reason="SimpleImputer does not support strings", + ) def test_simple_imputer_string_inputs_int_mostf_default(self): - model = SimpleImputer(strategy="most_frequent", missing_values='') - data = pd.DataFrame([["s1", "s2"], ["s1", "s2"], ["", "s3"], - ["s7", "s6"], ["s8", ""]]) + model = SimpleImputer(strategy="most_frequent", missing_values="") + data = pd.DataFrame( + [["s1", "s2"], ["s1", "s2"], ["", "s3"], ["s7", "s6"], ["s8", ""]] + ) model.fit(data) model_onnx = convert_sklearn( - model, "scikit-learn simple imputer", + model, + "scikit-learn simple imputer", [("input", StringTensorType([None, 2]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIn("ai.onnx.ml", str(model_onnx)) self.assertTrue(model_onnx.graph.node is not None) self.assertEqual(len(model_onnx.graph.output), 1) diff --git a/tests/test_sklearn_isolation_forest.py b/tests/test_sklearn_isolation_forest.py index 2108f8cb5..3c1bbf199 100644 --- a/tests/test_sklearn_isolation_forest.py +++ b/tests/test_sklearn_isolation_forest.py @@ -9,57 +9,64 @@ from numpy.testing import assert_almost_equal from onnxruntime import InferenceSession from sklearn import __version__ as sklv + try: from sklearn.ensemble import IsolationForest except ImportError: IsolationForest = None from skl2onnx import to_onnx from test_utils import dump_data_and_model, TARGET_OPSET, TARGET_OPSET_ML + try: from onnxruntime.capi.onnxruntime_pybind11_state import NotImplemented except ImportError: NotImplemented = RuntimeError -sklv2 = '.'.join(sklv.split('.')[:2]) +sklv2 = ".".join(sklv.split(".")[:2]) class TestSklearnIsolationForest(unittest.TestCase): - @unittest.skipIf(IsolationForest is None, reason="old scikit-learn") - @unittest.skipIf(pv.Version(sklv2) < pv.Version('0.22.0'), - reason="tree structure is different.") + @unittest.skipIf( + pv.Version(sklv2) < pv.Version("0.22.0"), reason="tree structure is different." + ) @unittest.skipIf(TARGET_OPSET < 12, reason="not available") def test_isolation_forest(self): isol = IsolationForest(n_estimators=3, random_state=0) - data = np.array([[-1.1, -1.2], [0.3, 0.2], - [0.5, 0.4], [100., 99.]], dtype=np.float32) + data = np.array( + [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]], dtype=np.float32 + ) model = isol.fit(data) model_onnx = to_onnx( - model, data, - target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML}) + model, data, target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML} + ) self.assertIsNotNone(model_onnx) - dump_data_and_model(data, model, model_onnx, - basename="IsolationForest") + dump_data_and_model(data, model, model_onnx, basename="IsolationForest") @unittest.skipIf(IsolationForest is None, reason="old scikit-learn") - @unittest.skipIf(pv.Version(sklv2) < pv.Version('0.22.0'), - reason="tree structure is different.") + @unittest.skipIf( + pv.Version(sklv2) < pv.Version("0.22.0"), reason="tree structure is different." + ) @unittest.skipIf(TARGET_OPSET < 12, reason="not available") def test_isolation_forest_score_samples(self): isol = IsolationForest(n_estimators=3, random_state=0) - data = np.array([[-1.1, -1.2], [0.3, 0.2], - [0.5, 0.4], [100., 99.]], dtype=np.float32) + data = np.array( + [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]], dtype=np.float32 + ) model = isol.fit(data) model_onnx = to_onnx( - model, data, options={'score_samples': True}, - target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML}) + model, + data, + options={"score_samples": True}, + target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML}, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) names = [o.name for o in sess.get_outputs()] - self.assertEqual(names, ['label', 'scores', 'score_samples']) - got = sess.run(None, {'X': data}) + self.assertEqual(names, ["label", "scores", "score_samples"]) + got = sess.run(None, {"X": data}) self.assertEqual(len(got), 3) expected_label = isol.predict(data) expected_decif = isol.decision_function(data) @@ -69,36 +76,37 @@ def test_isolation_forest_score_samples(self): assert_almost_equal(expected_score, got[2].ravel()) @unittest.skipIf(IsolationForest is None, reason="old scikit-learn") - @unittest.skipIf(pv.Version(sklv2) < pv.Version('0.22.0'), - reason="tree structure is different.") + @unittest.skipIf( + pv.Version(sklv2) < pv.Version("0.22.0"), reason="tree structure is different." + ) @unittest.skipIf(TARGET_OPSET < 12, reason="not available") def test_isolation_forest_op1(self): isol = IsolationForest(n_estimators=3, random_state=0) - data = np.array([[-1.1, -1.2], [0.3, 0.2], - [0.5, 0.4], [100., 99.]], dtype=np.float32) + data = np.array( + [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]], dtype=np.float32 + ) model = isol.fit(data) with self.assertRaises(RuntimeError): - to_onnx(model, data, - target_opset={'': TARGET_OPSET, 'ai.onnx.ml': 1}) + to_onnx(model, data, target_opset={"": TARGET_OPSET, "ai.onnx.ml": 1}) @unittest.skipIf(IsolationForest is None, reason="old scikit-learn") - @unittest.skipIf(pv.Version(sklv2) < pv.Version('0.22.0'), - reason="tree structure is different.") + @unittest.skipIf( + pv.Version(sklv2) < pv.Version("0.22.0"), reason="tree structure is different." + ) @unittest.skipIf(TARGET_OPSET < 12, reason="not available") def test_isolation_forest_rnd(self): isol = IsolationForest(n_estimators=2, random_state=0) rs = np.random.RandomState(0) data = rs.randn(100, 4).astype(np.float32) - data[-1, 2:] = 99. - data[-2, :2] = -99. + data[-1, 2:] = 99.0 + data[-2, :2] = -99.0 model = isol.fit(data) model_onnx = to_onnx( - model, data, - target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML}) + model, data, target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML} + ) self.assertIsNotNone(model_onnx) - dump_data_and_model(data, model, model_onnx, - basename="IsolationForestRnd") + dump_data_and_model(data, model, model_onnx, basename="IsolationForestRnd") -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_sklearn_k_bins_discretiser_converter.py b/tests/test_sklearn_k_bins_discretiser_converter.py index c3313d9bd..079527eb1 100644 --- a/tests/test_sklearn_k_bins_discretiser_converter.py +++ b/tests/test_sklearn_k_bins_discretiser_converter.py @@ -6,6 +6,7 @@ import unittest import numpy as np + try: from sklearn.preprocessing import KBinsDiscretizer except ImportError: @@ -22,144 +23,190 @@ class TestSklearnKBinsDiscretiser(unittest.TestCase): reason="KBinsDiscretizer available since 0.20", ) def test_model_k_bins_discretiser_ordinal_uniform(self): - X = np.array([[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0], - [0, 3.2, 4.7, -8.9]]) - model = KBinsDiscretizer(n_bins=3, - encode="ordinal", - strategy="uniform").fit(X) + X = np.array( + [[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0], [0, 3.2, 4.7, -8.9]] + ) + model = KBinsDiscretizer(n_bins=3, encode="ordinal", strategy="uniform").fit(X) model_onnx = convert_sklearn( model, "scikit-learn KBinsDiscretiser", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnKBinsDiscretiserOrdinalUniform") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnKBinsDiscretiserOrdinalUniform", + ) @unittest.skipIf( KBinsDiscretizer is None, reason="KBinsDiscretizer available since 0.20", ) def test_model_k_bins_discretiser_ordinal_quantile(self): - X = np.array([ - [1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0], - [0, 3.2, 4.7, -8.9], [0.2, 1.3, 0.6, -9.4], - [0.8, 4.2, -14.7, -28.9], [8.2, 1.9, 2.6, -5.4], - [4.8, -9.2, 33.7, 3.9], [81.2, 1., 0.6, 12.4], - [6.8, 11.2, -1.7, -2.9], [11.2, 12.9, 4.3, -1.4], - ]) - model = KBinsDiscretizer(n_bins=[3, 2, 3, 4], - encode="ordinal", - strategy="quantile").fit(X) + X = np.array( + [ + [1.2, 3.2, 1.3, -5.6], + [4.3, -3.2, 5.7, 1.0], + [0, 3.2, 4.7, -8.9], + [0.2, 1.3, 0.6, -9.4], + [0.8, 4.2, -14.7, -28.9], + [8.2, 1.9, 2.6, -5.4], + [4.8, -9.2, 33.7, 3.9], + [81.2, 1.0, 0.6, 12.4], + [6.8, 11.2, -1.7, -2.9], + [11.2, 12.9, 4.3, -1.4], + ] + ) + model = KBinsDiscretizer( + n_bins=[3, 2, 3, 4], encode="ordinal", strategy="quantile" + ).fit(X) model_onnx = convert_sklearn( model, "scikit-learn KBinsDiscretiser", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnKBinsDiscretiserOrdinalQuantile") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnKBinsDiscretiserOrdinalQuantile", + ) @unittest.skipIf( KBinsDiscretizer is None, reason="KBinsDiscretizer available since 0.20", ) def test_model_k_bins_discretiser_ordinal_kmeans(self): - X = np.array([ - [1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0], - [0, 3.2, 4.7, -8.9], [0.2, 1.3, 0.6, -9.4], - [0.8, 4.2, -14.7, -28.9], [8.2, 1.9, 2.6, -5.4], - [4.8, -9.2, 33.7, 3.9], [81.2, 1., 0.6, 12.4], - [6.8, 11.2, -1.7, -2.9], [11.2, 12.9, 4.3, -1.4], - ]) - model = KBinsDiscretizer(n_bins=3, encode="ordinal", - strategy="kmeans").fit(X) + X = np.array( + [ + [1.2, 3.2, 1.3, -5.6], + [4.3, -3.2, 5.7, 1.0], + [0, 3.2, 4.7, -8.9], + [0.2, 1.3, 0.6, -9.4], + [0.8, 4.2, -14.7, -28.9], + [8.2, 1.9, 2.6, -5.4], + [4.8, -9.2, 33.7, 3.9], + [81.2, 1.0, 0.6, 12.4], + [6.8, 11.2, -1.7, -2.9], + [11.2, 12.9, 4.3, -1.4], + ] + ) + model = KBinsDiscretizer(n_bins=3, encode="ordinal", strategy="kmeans").fit(X) model_onnx = convert_sklearn( model, "scikit-learn KBinsDiscretiser", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnKBinsDiscretiserOrdinalKMeans") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnKBinsDiscretiserOrdinalKMeans", + ) @unittest.skipIf( KBinsDiscretizer is None, reason="KBinsDiscretizer available since 0.20", ) def test_model_k_bins_discretiser_onehot_dense_uniform(self): - X = np.array([[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0], - [0, 3.2, 4.7, -8.9]]) - model = KBinsDiscretizer(n_bins=[3, 2, 3, 4], - encode="onehot-dense", - strategy="uniform").fit(X) + X = np.array( + [[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0], [0, 3.2, 4.7, -8.9]] + ) + model = KBinsDiscretizer( + n_bins=[3, 2, 3, 4], encode="onehot-dense", strategy="uniform" + ).fit(X) model_onnx = convert_sklearn( model, "scikit-learn KBinsDiscretiser", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnKBinsDiscretiserOneHotDenseUniform") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnKBinsDiscretiserOneHotDenseUniform", + ) @unittest.skipIf( KBinsDiscretizer is None, reason="KBinsDiscretizer available since 0.20", ) def test_model_k_bins_discretiser_onehot_dense_quantile(self): - X = np.array([ - [1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0], - [0, 3.2, 4.7, -8.9], [0.2, 1.3, 0.6, -9.4], - [0.8, 4.2, -14.7, -28.9], [8.2, 1.9, 2.6, -5.4], - [4.8, -9.2, 33.7, 3.9], [81.2, 1., 0.6, 12.4], - [6.8, 11.2, -1.7, -2.9], [11.2, 12.9, 4.3, -1.4], - ]) - model = KBinsDiscretizer(n_bins=[3, 2, 3, 4], - encode="onehot-dense", - strategy="quantile").fit(X) + X = np.array( + [ + [1.2, 3.2, 1.3, -5.6], + [4.3, -3.2, 5.7, 1.0], + [0, 3.2, 4.7, -8.9], + [0.2, 1.3, 0.6, -9.4], + [0.8, 4.2, -14.7, -28.9], + [8.2, 1.9, 2.6, -5.4], + [4.8, -9.2, 33.7, 3.9], + [81.2, 1.0, 0.6, 12.4], + [6.8, 11.2, -1.7, -2.9], + [11.2, 12.9, 4.3, -1.4], + ] + ) + model = KBinsDiscretizer( + n_bins=[3, 2, 3, 4], encode="onehot-dense", strategy="quantile" + ).fit(X) model_onnx = convert_sklearn( model, "scikit-learn KBinsDiscretiser", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnKBinsDiscretiserOneHotDenseQuantile") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnKBinsDiscretiserOneHotDenseQuantile", + ) @unittest.skipIf( - KBinsDiscretizer is None, - reason="KBinsDiscretizer available since 0.20") + KBinsDiscretizer is None, reason="KBinsDiscretizer available since 0.20" + ) def test_model_k_bins_discretiser_onehot_dense_kmeans(self): - X = np.array([ - [1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0], - [0, 3.2, 4.7, -8.9], [0.2, 1.3, 0.6, -9.4], - [0.8, 4.2, -14.7, -28.9], [8.2, 1.9, 2.6, -5.4], - [4.8, -9.2, 33.7, 3.9], [81.2, 1., 0.6, 12.4], - [6.8, 11.2, -1.7, -2.9], [11.2, 12.9, 4.3, -1.4], - ]) - model = KBinsDiscretizer(n_bins=3, - encode="onehot-dense", - strategy="kmeans").fit(X) + X = np.array( + [ + [1.2, 3.2, 1.3, -5.6], + [4.3, -3.2, 5.7, 1.0], + [0, 3.2, 4.7, -8.9], + [0.2, 1.3, 0.6, -9.4], + [0.8, 4.2, -14.7, -28.9], + [8.2, 1.9, 2.6, -5.4], + [4.8, -9.2, 33.7, 3.9], + [81.2, 1.0, 0.6, 12.4], + [6.8, 11.2, -1.7, -2.9], + [11.2, 12.9, 4.3, -1.4], + ] + ) + model = KBinsDiscretizer( + n_bins=3, encode="onehot-dense", strategy="kmeans" + ).fit(X) model_onnx = convert_sklearn( model, "scikit-learn KBinsDiscretiser", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.float32), model, model_onnx, verbose=0, - basename="SklearnKBinsDiscretiserOneHotDenseKMeans") + X.astype(np.float32), + model, + model_onnx, + verbose=0, + basename="SklearnKBinsDiscretiserOneHotDenseKMeans", + ) @unittest.skipIf( KBinsDiscretizer is None, @@ -167,65 +214,85 @@ def test_model_k_bins_discretiser_onehot_dense_kmeans(self): ) def test_model_k_bins_discretiser_ordinal_uniform_int(self): X = np.array([[1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9]]) - model = KBinsDiscretizer(n_bins=3, - encode="ordinal", - strategy="uniform").fit(X) + model = KBinsDiscretizer(n_bins=3, encode="ordinal", strategy="uniform").fit(X) model_onnx = convert_sklearn( model, "scikit-learn KBinsDiscretiser", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.int64), model, model_onnx, - basename="SklearnKBinsDiscretiserOrdinalUniformInt") + X.astype(np.int64), + model, + model_onnx, + basename="SklearnKBinsDiscretiserOrdinalUniformInt", + ) @unittest.skipIf( KBinsDiscretizer is None, reason="KBinsDiscretizer available since 0.20", ) def test_model_k_bins_discretiser_ordinal_quantile_int(self): - X = np.array([ - [1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9], - [-1, 0, 1, -16], [31, -5, 15, 10], [12, -2, 8, -19], - [12, 13, 31, -16], [0, -21, 15, 30], [10, 22, 71, -91] - ]) - model = KBinsDiscretizer(n_bins=[3, 2, 3, 4], - encode="ordinal", - strategy="quantile").fit(X) + X = np.array( + [ + [1, 3, 3, -6], + [3, -2, 5, 0], + [0, 2, 7, -9], + [-1, 0, 1, -16], + [31, -5, 15, 10], + [12, -2, 8, -19], + [12, 13, 31, -16], + [0, -21, 15, 30], + [10, 22, 71, -91], + ] + ) + model = KBinsDiscretizer( + n_bins=[3, 2, 3, 4], encode="ordinal", strategy="quantile" + ).fit(X) model_onnx = convert_sklearn( model, "scikit-learn KBinsDiscretiser", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.int64), model, model_onnx, - basename="SklearnKBinsDiscretiserOrdinalQuantileInt") + X.astype(np.int64), + model, + model_onnx, + basename="SklearnKBinsDiscretiserOrdinalQuantileInt", + ) @unittest.skipIf( KBinsDiscretizer is None, reason="KBinsDiscretizer available since 0.20", ) def test_model_k_bins_discretiser_ordinal_kmeans_int(self): - X = np.array([ - [1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9], - [-1, 0, 1, -16], [31, -5, 15, 10], [12, -2, 8, -19] - ]) - model = KBinsDiscretizer(n_bins=3, encode="ordinal", - strategy="kmeans").fit(X) + X = np.array( + [ + [1, 3, 3, -6], + [3, -2, 5, 0], + [0, 2, 7, -9], + [-1, 0, 1, -16], + [31, -5, 15, 10], + [12, -2, 8, -19], + ] + ) + model = KBinsDiscretizer(n_bins=3, encode="ordinal", strategy="kmeans").fit(X) model_onnx = convert_sklearn( model, "scikit-learn KBinsDiscretiser", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.int64), model, model_onnx, - basename="SklearnKBinsDiscretiserOrdinalKMeansInt") + X.astype(np.int64), + model, + model_onnx, + basename="SklearnKBinsDiscretiserOrdinalKMeansInt", + ) @unittest.skipIf( KBinsDiscretizer is None, @@ -233,19 +300,22 @@ def test_model_k_bins_discretiser_ordinal_kmeans_int(self): ) def test_model_k_bins_discretiser_onehot_dense_uniform_int(self): X = np.array([[1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9]]) - model = KBinsDiscretizer(n_bins=[3, 2, 3, 4], - encode="onehot-dense", - strategy="uniform").fit(X) + model = KBinsDiscretizer( + n_bins=[3, 2, 3, 4], encode="onehot-dense", strategy="uniform" + ).fit(X) model_onnx = convert_sklearn( model, "scikit-learn KBinsDiscretiser", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.int64), model, model_onnx, - basename="SklearnKBinsDiscretiserOneHotDenseUniformInt") + X.astype(np.int64), + model, + model_onnx, + basename="SklearnKBinsDiscretiserOneHotDenseUniformInt", + ) @unittest.skipIf( KBinsDiscretizer is None, @@ -253,43 +323,57 @@ def test_model_k_bins_discretiser_onehot_dense_uniform_int(self): ) def test_model_k_bins_discretiser_onehot_dense_quantile_int(self): X = np.array([[1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9]]) - model = KBinsDiscretizer(n_bins=[3, 2, 3, 4], - encode="onehot-dense", - strategy="quantile").fit(X) + model = KBinsDiscretizer( + n_bins=[3, 2, 3, 4], encode="onehot-dense", strategy="quantile" + ).fit(X) model_onnx = convert_sklearn( model, "scikit-learn KBinsDiscretiser", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.int64), model, model_onnx, - basename="SklearnKBinsDiscretiserOneHotDenseQuantileInt") + X.astype(np.int64), + model, + model_onnx, + basename="SklearnKBinsDiscretiserOneHotDenseQuantileInt", + ) @unittest.skipIf( KBinsDiscretizer is None, reason="KBinsDiscretizer available since 0.20", ) def test_model_k_bins_discretiser_onehot_dense_kmeans_int(self): - X = np.array([ - [1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9], - [-1, 12, 32, -16], [31, -20, 51, 7], [10, 23, 73, -90], - [1, 23, 36, -61], [93, -12, 15, 10], [20, 12, 17, -19] - ]) - model = KBinsDiscretizer(n_bins=3, - encode="onehot-dense", - strategy="kmeans").fit(X) + X = np.array( + [ + [1, 3, 3, -6], + [3, -2, 5, 0], + [0, 2, 7, -9], + [-1, 12, 32, -16], + [31, -20, 51, 7], + [10, 23, 73, -90], + [1, 23, 36, -61], + [93, -12, 15, 10], + [20, 12, 17, -19], + ] + ) + model = KBinsDiscretizer( + n_bins=3, encode="onehot-dense", strategy="kmeans" + ).fit(X) model_onnx = convert_sklearn( model, "scikit-learn KBinsDiscretiser", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.int64), model, model_onnx, - basename="SklearnKBinsDiscretiserOneHotDenseKMeansInt") + X.astype(np.int64), + model, + model_onnx, + basename="SklearnKBinsDiscretiserOneHotDenseKMeansInt", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_k_means_converter.py b/tests/test_sklearn_k_means_converter.py index 6e9466bdd..41651b8db 100644 --- a/tests/test_sklearn_k_means_converter.py +++ b/tests/test_sklearn_k_means_converter.py @@ -16,43 +16,54 @@ def test_kmeans_clustering(self): X = data.data model = KMeans(n_clusters=3, n_init=3) model.fit(X) - model_onnx = convert_sklearn(model, "kmeans", - [("input", FloatTensorType([None, 4]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "kmeans", + [("input", FloatTensorType([None, 4]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32)[40:60], - model, model_onnx, - basename="SklearnKMeans-Dec4") + model, + model_onnx, + basename="SklearnKMeans-Dec4", + ) def test_kmeans_clustering_noshape(self): data = load_iris() X = data.data model = KMeans(n_clusters=3, n_init=3) model.fit(X) - model_onnx = convert_sklearn(model, "kmeans", - [("input", FloatTensorType([]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, "kmeans", [("input", FloatTensorType([]))], target_opset=TARGET_OPSET + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32)[40:60], - model, model_onnx, - basename="SklearnKMeans-Dec4") + model, + model_onnx, + basename="SklearnKMeans-Dec4", + ) def test_batchkmeans_clustering(self): data = load_iris() X = data.data model = MiniBatchKMeans(n_clusters=3, n_init=3) model.fit(X) - model_onnx = convert_sklearn(model, "kmeans", - [("input", FloatTensorType([None, 4]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "kmeans", + [("input", FloatTensorType([None, 4]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32)[40:60], model, model_onnx, - basename="SklearnKMeans-Dec4") + basename="SklearnKMeans-Dec4", + ) @unittest.skipIf(TARGET_OPSET < 9, reason="not available") def test_batchkmeans_clustering_opset9(self): @@ -60,15 +71,16 @@ def test_batchkmeans_clustering_opset9(self): X = data.data model = MiniBatchKMeans(n_clusters=3, n_init=3) model.fit(X) - model_onnx = convert_sklearn(model, "kmeans", - [("input", FloatTensorType([None, 4]))], - target_opset=9) + model_onnx = convert_sklearn( + model, "kmeans", [("input", FloatTensorType([None, 4]))], target_opset=9 + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32)[40:60], model, model_onnx, - basename="SklearnKMeansOp9-Dec4") + basename="SklearnKMeansOp9-Dec4", + ) @unittest.skipIf(TARGET_OPSET < 11, reason="not available") def test_batchkmeans_clustering_opset11(self): @@ -76,15 +88,16 @@ def test_batchkmeans_clustering_opset11(self): X = data.data model = MiniBatchKMeans(n_clusters=3, n_init=3) model.fit(X) - model_onnx = convert_sklearn(model, "kmeans", - [("input", FloatTensorType([None, 4]))], - target_opset=11) + model_onnx = convert_sklearn( + model, "kmeans", [("input", FloatTensorType([None, 4]))], target_opset=11 + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32)[40:60], model, model_onnx, - basename="SklearnKMeansOp9-Dec4") + basename="SklearnKMeansOp9-Dec4", + ) def test_batchkmeans_clustering_opset1(self): data = load_iris() @@ -92,9 +105,9 @@ def test_batchkmeans_clustering_opset1(self): model = MiniBatchKMeans(n_clusters=3, n_init=3) model.fit(X) try: - convert_sklearn(model, "kmeans", - [("input", FloatTensorType([None, 4]))], - target_opset=1) + convert_sklearn( + model, "kmeans", [("input", FloatTensorType([None, 4]))], target_opset=1 + ) except RuntimeError as e: assert "Node 'OnnxAdd' has been changed since version" in str(e) @@ -103,32 +116,38 @@ def test_kmeans_clustering_int(self): X = data.data model = KMeans(n_clusters=4, n_init=3) model.fit(X) - model_onnx = convert_sklearn(model, "kmeans", - [("input", Int64TensorType([None, - X.shape[1]]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "kmeans", + [("input", Int64TensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.int64)[40:60], model, model_onnx, - basename="SklearnKMeansInt-Dec4") + basename="SklearnKMeansInt-Dec4", + ) def test_batchkmeans_clustering_int(self): data = load_digits() X = data.data model = MiniBatchKMeans(n_clusters=4, n_init=3) model.fit(X) - model_onnx = convert_sklearn(model, "kmeans", - [("input", Int64TensorType([None, - X.shape[1]]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "kmeans", + [("input", Int64TensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.int64)[40:60], model, model_onnx, - basename="SklearnBatchKMeansInt-Dec4") + basename="SklearnBatchKMeansInt-Dec4", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_kernel_pca_converter.py b/tests/test_sklearn_kernel_pca_converter.py index ed64b05e2..c9b33df9f 100644 --- a/tests/test_sklearn_kernel_pca_converter.py +++ b/tests/test_sklearn_kernel_pca_converter.py @@ -8,6 +8,7 @@ from sklearn.datasets import load_diabetes from sklearn.decomposition import KernelPCA from sklearn.model_selection import train_test_split + try: # scikit-learn >= 0.22 from sklearn.utils._testing import ignore_warnings @@ -18,94 +19,91 @@ from test_utils import dump_data_and_model, TARGET_OPSET -ort_version = ".".join(ort_version.split('.')[:2]) +ort_version = ".".join(ort_version.split(".")[:2]) class TestSklearnKernelPCAConverter(unittest.TestCase): - def _fit_model(self, model, dtype=np.float32): data = load_diabetes() X_train, X_test, *_ = train_test_split( - data.data, data.target, test_size=0.2, random_state=42) + data.data, data.target, test_size=0.2, random_state=42 + ) model.fit(X_train) return model, X_test.astype(np.float32) - @unittest.skipIf(TARGET_OPSET < 11, - reason="all needed operators not available") - @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.3.0'), - reason="discrepancies") + @unittest.skipIf(TARGET_OPSET < 11, reason="all needed operators not available") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.3.0"), reason="discrepancies" + ) @ignore_warnings(category=(FutureWarning, DeprecationWarning)) def test_kernel_pca_default_float(self): - model, X_test = self._fit_model( - KernelPCA(random_state=42)) + model, X_test = self._fit_model(KernelPCA(random_state=42)) model_onnx = to_onnx(model, X_test, target_opset=TARGET_OPSET) - dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnKernelPCA32") + dump_data_and_model(X_test, model, model_onnx, basename="SklearnKernelPCA32") - @unittest.skipIf(TARGET_OPSET < 11, - reason="all needed operators not available") - @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.3.0'), - reason="discrepancies") + @unittest.skipIf(TARGET_OPSET < 11, reason="all needed operators not available") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.3.0"), reason="discrepancies" + ) @ignore_warnings(category=(FutureWarning, DeprecationWarning)) def test_kernel_pca_default_double(self): model, X_test = self._fit_model( - KernelPCA(random_state=42, n_components=2), dtype=np.float64) + KernelPCA(random_state=42, n_components=2), dtype=np.float64 + ) model_onnx = to_onnx(model, X_test, target_opset=TARGET_OPSET) - dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnKernelPCA64") + dump_data_and_model(X_test, model, model_onnx, basename="SklearnKernelPCA64") - @unittest.skipIf(TARGET_OPSET < 13, - reason="all needed operators not available") - @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.3.0'), - reason="discrepancies") + @unittest.skipIf(TARGET_OPSET < 13, reason="all needed operators not available") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.3.0"), reason="discrepancies" + ) @ignore_warnings(category=(FutureWarning, DeprecationWarning)) def test_kernel_pca_float(self): - for kernel in ['rbf', 'cosine', 'sigmoid', 'poly', 'linear']: + for kernel in ["rbf", "cosine", "sigmoid", "poly", "linear"]: with self.subTest(kernel=kernel): model, X_test = self._fit_model( - KernelPCA(random_state=42, kernel=kernel, - n_components=4)) + KernelPCA(random_state=42, kernel=kernel, n_components=4) + ) model_onnx = to_onnx(model, X_test, target_opset=TARGET_OPSET) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnKernelPCA%s32" % kernel) + X_test, model, model_onnx, basename="SklearnKernelPCA%s32" % kernel + ) - @unittest.skipIf(TARGET_OPSET < 13, - reason="all needed operators not available") - @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.3.0'), - reason="discrepancies") + @unittest.skipIf(TARGET_OPSET < 13, reason="all needed operators not available") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.3.0"), reason="discrepancies" + ) @ignore_warnings(category=(FutureWarning, DeprecationWarning)) def test_kernel_pca_double(self): - for kernel in ['linear', 'poly', 'rbf', 'sigmoid', 'cosine']: + for kernel in ["linear", "poly", "rbf", "sigmoid", "cosine"]: with self.subTest(kernel=kernel): model, X_test = self._fit_model( - KernelPCA(random_state=42, kernel=kernel, - n_components=4), - dtype=np.float64) + KernelPCA(random_state=42, kernel=kernel, n_components=4), + dtype=np.float64, + ) model_onnx = to_onnx(model, X_test, target_opset=TARGET_OPSET) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnKernelPCA%s64" % kernel) + X_test, model, model_onnx, basename="SklearnKernelPCA%s64" % kernel + ) - @unittest.skipIf(TARGET_OPSET < 13, - reason="all needed operators not available") - @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.3.0'), - reason="discrepancies") + @unittest.skipIf(TARGET_OPSET < 13, reason="all needed operators not available") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.3.0"), reason="discrepancies" + ) @ignore_warnings(category=(FutureWarning, DeprecationWarning)) def test_kernel_pca_double_cdist(self): - for kernel in ['linear', 'poly', 'rbf', 'sigmoid', 'cosine']: + for kernel in ["linear", "poly", "rbf", "sigmoid", "cosine"]: with self.subTest(kernel=kernel): model, X_test = self._fit_model( - KernelPCA(random_state=42, kernel=kernel, - n_components=4), - dtype=np.float64) - model_onnx = to_onnx(model, X_test, target_opset=TARGET_OPSET, - options={'optim': 'cdist'}) + KernelPCA(random_state=42, kernel=kernel, n_components=4), + dtype=np.float64, + ) + model_onnx = to_onnx( + model, X_test, target_opset=TARGET_OPSET, options={"optim": "cdist"} + ) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnKernelPCA%s64" % kernel) + X_test, model, model_onnx, basename="SklearnKernelPCA%s64" % kernel + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_label_binariser_converter.py b/tests/test_sklearn_label_binariser_converter.py index cd329f2d6..e08914199 100644 --- a/tests/test_sklearn_label_binariser_converter.py +++ b/tests/test_sklearn_label_binariser_converter.py @@ -22,14 +22,15 @@ def test_model_label_binariser_default(self): model, "scikit-learn label binariser", [("input", Int64TensorType([None]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, - basename="SklearnLabelBinariserDefault") + basename="SklearnLabelBinariserDefault", + ) def test_model_label_binariser_neg_label(self): X = np.array([1, 2, 6, 4, 2]) @@ -38,14 +39,15 @@ def test_model_label_binariser_neg_label(self): model, "scikit-learn label binariser", [("input", Int64TensorType([None]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, - basename="SklearnLabelBinariserNegLabel") + basename="SklearnLabelBinariserNegLabel", + ) def test_model_label_binariser_pos_label(self): X = np.array([1, 2, 6, 4, 2]) @@ -54,14 +56,15 @@ def test_model_label_binariser_pos_label(self): model, "scikit-learn label binariser", [("input", Int64TensorType([None]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, - basename="SklearnLabelBinariserPosLabel") + basename="SklearnLabelBinariserPosLabel", + ) def test_model_label_binariser_neg_pos_label(self): X = np.array([1, 2, 6, 4, 2]) @@ -70,14 +73,15 @@ def test_model_label_binariser_neg_pos_label(self): model, "scikit-learn label binariser", [("input", Int64TensorType([None]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, - basename="SklearnLabelBinariserNegPosLabel") + basename="SklearnLabelBinariserNegPosLabel", + ) def test_model_label_binariser_binary_labels(self): X = np.array([1, 0, 0, 0, 1]) @@ -86,27 +90,30 @@ def test_model_label_binariser_binary_labels(self): model, "scikit-learn label binariser", [("input", Int64TensorType([None]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, - basename="SklearnLabelBinariserBinaryLabels") + basename="SklearnLabelBinariserBinaryLabels", + ) def test_model_label_binariser_2d(self): X1 = np.array([[0, 1, 1], [1, 0, 0]], dtype=np.int64) model = LabelBinarizer().fit(X1) onnx_fs = convert_sklearn( - model, 'lb', - [('float_input', Int64TensorType([None, X1.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "lb", + [("float_input", Int64TensorType([None, X1.shape[1]]))], + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - onnx_fs.SerializeToString(), - providers=["CPUExecutionProvider"]) + onnx_fs.SerializeToString(), providers=["CPUExecutionProvider"] + ) - res = sess.run(None, input_feed={'float_input': X1}) + res = sess.run(None, input_feed={"float_input": X1}) exp = model.transform(X1) got = res[0] assert_almost_equal(exp, got) diff --git a/tests/test_sklearn_label_encoder_converter.py b/tests/test_sklearn_label_encoder_converter.py index ebedef95a..9abe444bd 100644 --- a/tests/test_sklearn_label_encoder_converter.py +++ b/tests/test_sklearn_label_encoder_converter.py @@ -16,14 +16,13 @@ from test_utils import dump_data_and_model, TARGET_OPSET -ort_version = ".".join(ort_version.split('.')[:2]) +ort_version = ".".join(ort_version.split(".")[:2]) class TestSklearnLabelEncoderConverter(unittest.TestCase): - @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.3.0"), - reason="onnxruntime too old") + pv.Version(ort_version) < pv.Version("0.3.0"), reason="onnxruntime too old" + ) def test_model_label_encoder(self): model = LabelEncoder() data = ["str3", "str2", "str0", "str1", "str3"] @@ -32,21 +31,19 @@ def test_model_label_encoder(self): model, "scikit-learn label encoder", [("input", StringTensorType([None]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) self.assertTrue(model_onnx.graph.node is not None) if model_onnx.ir_version >= 7 and TARGET_OPSET < 12: raise AssertionError("Incompatbilities") dump_data_and_model( - np.array(data), - model, - model_onnx, - basename="SklearnLabelEncoder") + np.array(data), model, model_onnx, basename="SklearnLabelEncoder" + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.3.0"), - reason="onnxruntime too old") + pv.Version(ort_version) < pv.Version("0.3.0"), reason="onnxruntime too old" + ) def test_model_label_encoder_float(self): model = LabelEncoder() data = np.array([1.2, 3.4, 5.4, 1.2], dtype=np.float32) @@ -55,22 +52,20 @@ def test_model_label_encoder_float(self): model, "scikit-learn label encoder", [("input", FloatTensorType([None]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) self.assertTrue(model_onnx.graph.node is not None) if model_onnx.ir_version >= 7 and TARGET_OPSET < 12: raise AssertionError("Incompatbilities") dump_data_and_model( - data, - model, - model_onnx, - basename="SklearnLabelEncoderFloat") + data, model, model_onnx, basename="SklearnLabelEncoderFloat" + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.3.0"), - reason="onnxruntime too old") - @unittest.skipIf(TARGET_OPSET < 12, reason='not available') + pv.Version(ort_version) < pv.Version("0.3.0"), reason="onnxruntime too old" + ) + @unittest.skipIf(TARGET_OPSET < 12, reason="not available") def test_model_label_encoder_int(self): model = LabelEncoder() data = np.array([10, 3, 5, -34, 0], dtype=np.int64) @@ -83,16 +78,15 @@ def test_model_label_encoder_int(self): model, "scikit-learn label encoder", [("input", Int64TensorType([None]))], - target_opset=op) + target_opset=op, + ) self.assertTrue(model_onnx is not None) self.assertTrue(model_onnx.graph.node is not None) if model_onnx.ir_version >= 7 and TARGET_OPSET < 12: raise AssertionError("Incompatbilities") dump_data_and_model( - data, - model, - model_onnx, - basename="SklearnLabelEncoderInt") + data, model, model_onnx, basename="SklearnLabelEncoderInt" + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_local_outlier_factor.py b/tests/test_sklearn_local_outlier_factor.py index 8ed3edb40..f833a3bb2 100644 --- a/tests/test_sklearn_local_outlier_factor.py +++ b/tests/test_sklearn_local_outlier_factor.py @@ -9,6 +9,7 @@ from numpy.testing import assert_almost_equal from onnxruntime import __version__ as ort_version from onnxruntime import InferenceSession + try: from onnxruntime.capi.onnxruntime_pybind11_state import InvalidGraph except ImportError: @@ -19,35 +20,36 @@ LocalOutlierFactor = None from skl2onnx import to_onnx from test_utils import TARGET_OPSET + try: from onnxruntime.capi.onnxruntime_pybind11_state import NotImplemented except ImportError: NotImplemented = RuntimeError -ort_version = ".".join(ort_version.split('.')[:2]) +ort_version = ".".join(ort_version.split(".")[:2]) class TestSklearnLocalOutlierForest(unittest.TestCase): - @unittest.skipIf(LocalOutlierFactor is None, reason="old scikit-learn") def test_local_outlier_factor(self): lof = LocalOutlierFactor(n_neighbors=2, novelty=True) - data = np.array([[-1.1, -1.2], [0.3, 0.2], - [0.5, 0.4], [100., 99.]], dtype=np.float32) + data = np.array( + [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]], dtype=np.float32 + ) model = lof.fit(data) model_onnx = to_onnx(model, data, target_opset=TARGET_OPSET) - self.assertNotIn('CDist', str(model_onnx)) + self.assertNotIn("CDist", str(model_onnx)) data = data.copy() data[:, 0] += 0.1 sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) names = [o.name for o in sess.get_outputs()] - self.assertEqual(names, ['label', 'scores']) - got = sess.run(None, {'X': data}) + self.assertEqual(names, ["label", "scores"]) + got = sess.run(None, {"X": data}) self.assertEqual(len(got), 2) expected_label = lof.predict(data) expected_decif = lof.decision_function(data) @@ -57,21 +59,22 @@ def test_local_outlier_factor(self): @unittest.skipIf(LocalOutlierFactor is None, reason="old scikit-learn") def test_local_outlier_factor_n_neighbors_greater_than_observations(self): lof = LocalOutlierFactor(n_neighbors=25, novelty=True) - data = np.array([[-1.1, -1.2], [0.3, 0.2], - [0.5, 0.4], [100., 99.]], dtype=np.float32) + data = np.array( + [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]], dtype=np.float32 + ) model = lof.fit(data) model_onnx = to_onnx(model, data, target_opset=TARGET_OPSET) - self.assertNotIn('CDist', str(model_onnx)) + self.assertNotIn("CDist", str(model_onnx)) data = data.copy() data[:, 0] += 0.1 sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) names = [o.name for o in sess.get_outputs()] - self.assertEqual(names, ['label', 'scores']) - got = sess.run(None, {'X': data}) + self.assertEqual(names, ["label", "scores"]) + got = sess.run(None, {"X": data}) self.assertEqual(len(got), 2) expected_label = lof.predict(data) expected_decif = lof.decision_function(data) @@ -79,26 +82,27 @@ def test_local_outlier_factor_n_neighbors_greater_than_observations(self): assert_almost_equal(expected_decif, got[1].ravel(), decimal=5) @unittest.skipIf(LocalOutlierFactor is None, reason="old scikit-learn") - @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.5.0"), - reason="CDist") + @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.5.0"), reason="CDist") def test_local_outlier_factor_cdist(self): lof = LocalOutlierFactor(n_neighbors=2, novelty=True) - data = np.array([[-1.1, -1.2], [0.3, 0.2], - [0.5, 0.4], [100., 99.]], dtype=np.float32) + data = np.array( + [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]], dtype=np.float32 + ) model = lof.fit(data) - model_onnx = to_onnx(model, data, target_opset=TARGET_OPSET, - options={'optim': 'cdist'}) - self.assertIn('CDist', str(model_onnx)) + model_onnx = to_onnx( + model, data, target_opset=TARGET_OPSET, options={"optim": "cdist"} + ) + self.assertIn("CDist", str(model_onnx)) data = data.copy() data[:, 0] += 0.1 sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) names = [o.name for o in sess.get_outputs()] - self.assertEqual(names, ['label', 'scores']) - got = sess.run(None, {'X': data}) + self.assertEqual(names, ["label", "scores"]) + got = sess.run(None, {"X": data}) self.assertEqual(len(got), 2) expected_label = lof.predict(data) expected_decif = lof.decision_function(data) @@ -106,25 +110,25 @@ def test_local_outlier_factor_cdist(self): assert_almost_equal(expected_decif, got[1].ravel()) @unittest.skipIf(LocalOutlierFactor is None, reason="old scikit-learn") - @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.5.0"), - reason="CDist") + @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.5.0"), reason="CDist") def test_local_outlier_factor_p3(self): lof = LocalOutlierFactor(n_neighbors=2, novelty=True, p=3) - data = np.array([[-1.1, -1.2], [0.3, 0.2], - [0.5, 0.4], [100., 99.]], dtype=np.float32) + data = np.array( + [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]], dtype=np.float32 + ) model = lof.fit(data) model_onnx = to_onnx(model, data, target_opset=TARGET_OPSET) - self.assertNotIn('CDist', str(model_onnx)) + self.assertNotIn("CDist", str(model_onnx)) data = data.copy() data[:, 0] += 0.1 sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) names = [o.name for o in sess.get_outputs()] - self.assertEqual(names, ['label', 'scores']) - got = sess.run(None, {'X': data}) + self.assertEqual(names, ["label", "scores"]) + got = sess.run(None, {"X": data}) self.assertEqual(len(got), 2) expected_label = lof.predict(data) expected_decif = lof.decision_function(data) @@ -132,32 +136,33 @@ def test_local_outlier_factor_p3(self): assert_almost_equal(expected_decif, got[1].ravel(), decimal=5) @unittest.skipIf(LocalOutlierFactor is None, reason="old scikit-learn") - @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.5.0"), - reason="CDist") + @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.5.0"), reason="CDist") def test_local_outlier_factor_cdist_p3(self): lof = LocalOutlierFactor(n_neighbors=2, novelty=True, p=3) - data = np.array([[-1.1, -1.2], [0.3, 0.2], - [0.5, 0.4], [100., 99.]], dtype=np.float32) + data = np.array( + [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]], dtype=np.float32 + ) model = lof.fit(data) - model_onnx = to_onnx(model, data, target_opset=TARGET_OPSET, - options={'optim': 'cdist'}) - self.assertIn('CDist', str(model_onnx)) + model_onnx = to_onnx( + model, data, target_opset=TARGET_OPSET, options={"optim": "cdist"} + ) + self.assertIn("CDist", str(model_onnx)) data = data.copy() data[:, 0] += 0.1 try: sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) except InvalidGraph as e: if "Unrecognized attribute: p for operator CDist" in str(e): return raise e names = [o.name for o in sess.get_outputs()] - self.assertEqual(names, ['label', 'scores']) - got = sess.run(None, {'X': data}) + self.assertEqual(names, ["label", "scores"]) + got = sess.run(None, {"X": data}) self.assertEqual(len(got), 2) expected_label = lof.predict(data) expected_decif = lof.decision_function(data) @@ -166,12 +171,13 @@ def test_local_outlier_factor_cdist_p3(self): @unittest.skipIf(LocalOutlierFactor is None, reason="old scikit-learn") def test_local_outlier_factor_metric(self): - for metric in ['cityblock', 'euclidean', 'manhattan', 'sqeuclidean']: + for metric in ["cityblock", "euclidean", "manhattan", "sqeuclidean"]: with self.subTest(metric=metric): - lof = LocalOutlierFactor(n_neighbors=2, novelty=True, - metric=metric) - data = np.array([[-1.1, -1.2], [0.3, 0.2], - [0.5, 0.4], [100., 99.]], dtype=np.float32) + lof = LocalOutlierFactor(n_neighbors=2, novelty=True, metric=metric) + data = np.array( + [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]], + dtype=np.float32, + ) model = lof.fit(data) model_onnx = to_onnx(model, data, target_opset=TARGET_OPSET) @@ -179,11 +185,11 @@ def test_local_outlier_factor_metric(self): data[:, 0] += 0.1 sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) names = [o.name for o in sess.get_outputs()] - self.assertEqual(names, ['label', 'scores']) - got = sess.run(None, {'X': data}) + self.assertEqual(names, ["label", "scores"]) + got = sess.run(None, {"X": data}) self.assertEqual(len(got), 2) expected_label = lof.predict(data) expected_decif = lof.decision_function(data) @@ -191,28 +197,29 @@ def test_local_outlier_factor_metric(self): assert_almost_equal(expected_decif, got[1].ravel(), decimal=4) @unittest.skipIf(LocalOutlierFactor is None, reason="old scikit-learn") - @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.5.0"), - reason="CDist") + @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.5.0"), reason="CDist") def test_local_outlier_factor_metric_cdist(self): - for metric in ['euclidean', 'sqeuclidean']: + for metric in ["euclidean", "sqeuclidean"]: with self.subTest(metric=metric): - lof = LocalOutlierFactor(n_neighbors=2, novelty=True, - metric=metric) - data = np.array([[-1.1, -1.2], [0.3, 0.2], - [0.5, 0.4], [100., 99.]], dtype=np.float32) + lof = LocalOutlierFactor(n_neighbors=2, novelty=True, metric=metric) + data = np.array( + [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]], + dtype=np.float32, + ) model = lof.fit(data) - model_onnx = to_onnx(model, data, target_opset=TARGET_OPSET, - options={'optim': 'cdist'}) + model_onnx = to_onnx( + model, data, target_opset=TARGET_OPSET, options={"optim": "cdist"} + ) data = data.copy() data[:, 0] += 0.1 sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) names = [o.name for o in sess.get_outputs()] - self.assertEqual(names, ['label', 'scores']) - got = sess.run(None, {'X': data}) + self.assertEqual(names, ["label", "scores"]) + got = sess.run(None, {"X": data}) self.assertEqual(len(got), 2) expected_label = lof.predict(data) expected_decif = lof.decision_function(data) @@ -221,21 +228,21 @@ def test_local_outlier_factor_metric_cdist(self): @unittest.skipIf(LocalOutlierFactor is None, reason="old scikit-learn") @unittest.skipIf(TARGET_OPSET < 13, reason="TopK") - @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.7.0"), - reason="TopK") + @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.7.0"), reason="TopK") def test_local_outlier_factor_double(self): lof = LocalOutlierFactor(n_neighbors=2, novelty=True) - data = np.array([[-1.1, -1.2], [0.3, 0.2], - [0.5, 0.4], [100., 99.]], dtype=np.float64) + data = np.array( + [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]], dtype=np.float64 + ) model = lof.fit(data) model_onnx = to_onnx(model, data, target_opset=TARGET_OPSET) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) names = [o.name for o in sess.get_outputs()] - self.assertEqual(names, ['label', 'scores']) - got = sess.run(None, {'X': data}) + self.assertEqual(names, ["label", "scores"]) + got = sess.run(None, {"X": data}) self.assertEqual(len(got), 2) expected_label = lof.predict(data) expected_decif = lof.decision_function(data) @@ -245,17 +252,19 @@ def test_local_outlier_factor_double(self): @unittest.skipIf(LocalOutlierFactor is None, reason="old scikit-learn") def test_local_outlier_factor_score_samples(self): lof = LocalOutlierFactor(n_neighbors=2, novelty=True) - data = np.array([[-1.1, -1.2], [0.3, 0.2], - [0.5, 0.4], [100., 99.]], dtype=np.float32) + data = np.array( + [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]], dtype=np.float32 + ) model = lof.fit(data) - model_onnx = to_onnx(model, data, target_opset=TARGET_OPSET, - options={'score_samples': True}) + model_onnx = to_onnx( + model, data, target_opset=TARGET_OPSET, options={"score_samples": True} + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) names = [o.name for o in sess.get_outputs()] - self.assertEqual(names, ['label', 'scores', 'score_samples']) - got = sess.run(None, {'X': data}) + self.assertEqual(names, ["label", "scores", "score_samples"]) + got = sess.run(None, {"X": data}) self.assertEqual(len(got), 3) expected_label = lof.predict(data) expected_decif = lof.decision_function(data) @@ -269,17 +278,17 @@ def test_local_outlier_factor_rnd(self): lof = LocalOutlierFactor(n_neighbors=2, novelty=True) rs = np.random.RandomState(0) data = rs.randn(100, 4).astype(np.float32) - data[-1, 2:] = 99. - data[-2, :2] = -99. + data[-1, 2:] = 99.0 + data[-2, :2] = -99.0 model = lof.fit(data) model_onnx = to_onnx(model, data, target_opset=TARGET_OPSET) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) names = [o.name for o in sess.get_outputs()] - self.assertEqual(names, ['label', 'scores']) - got = sess.run(None, {'X': data}) + self.assertEqual(names, ["label", "scores"]) + got = sess.run(None, {"X": data}) self.assertEqual(len(got), 2) expected_label = lof.predict(data) expected_decif = lof.decision_function(data) @@ -287,5 +296,5 @@ def test_local_outlier_factor_rnd(self): assert_almost_equal(expected_decif, got[1].ravel(), decimal=5) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_sklearn_mlp_converter.py b/tests/test_sklearn_mlp_converter.py index 3555cf065..8aa18ed6d 100644 --- a/tests/test_sklearn_mlp_converter.py +++ b/tests/test_sklearn_mlp_converter.py @@ -10,15 +10,18 @@ from sklearn.neural_network import MLPClassifier, MLPRegressor from sklearn.model_selection import train_test_split from sklearn.datasets import make_multilabel_classification + try: from sklearn.utils._testing import ignore_warnings except ImportError: try: from sklearn.utils.testing import ignore_warnings except ImportError: + def ignore_warnings(category=Warning): return lambda x: x + from sklearn.exceptions import ConvergenceWarning from onnxruntime import InferenceSession, __version__ as ort_version from skl2onnx import convert_sklearn @@ -32,275 +35,311 @@ def ignore_warnings(category=Warning): fit_classification_model, fit_multilabel_classification_model, fit_regression_model, - TARGET_OPSET + TARGET_OPSET, ) -ort_version = ".".join(ort_version.split('.')[:2]) +ort_version = ".".join(ort_version.split(".")[:2]) class TestSklearnMLPConverters(unittest.TestCase): @ignore_warnings(category=(ConvergenceWarning, FutureWarning)) def test_model_mlp_classifier_binary(self): - model, X_test = fit_classification_model( - MLPClassifier(random_state=42), 2) + model, X_test = fit_classification_model(MLPClassifier(random_state=42), 2) model_onnx = convert_sklearn( model, "scikit-learn MLPClassifier", [("input", FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnMLPClassifierBinary") + X_test, model, model_onnx, basename="SklearnMLPClassifierBinary" + ) @ignore_warnings(category=(ConvergenceWarning, FutureWarning)) def test_model_mlp_classifier_multiclass_default(self): - model, X_test = fit_classification_model( - MLPClassifier(random_state=42), 4) + model, X_test = fit_classification_model(MLPClassifier(random_state=42), 4) model_onnx = convert_sklearn( model, "scikit-learn MLPClassifier", [("input", FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnMLPClassifierMultiClass") + X_test, model, model_onnx, basename="SklearnMLPClassifierMultiClass" + ) @ignore_warnings(category=(ConvergenceWarning, FutureWarning)) def test_model_mlp_classifier_multiclass_default_uint8(self): model, X_test = fit_classification_model( - MLPClassifier(random_state=42), 4, cls_dtype=np.uint8) + MLPClassifier(random_state=42), 4, cls_dtype=np.uint8 + ) model_onnx = convert_sklearn( model, "scikit-learn MLPClassifier", [("input", FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnMLPClassifierMultiClassU8") + X_test, model, model_onnx, basename="SklearnMLPClassifierMultiClassU8" + ) @ignore_warnings(category=(ConvergenceWarning, FutureWarning)) def test_model_mlp_classifier_multiclass_default_uint64(self): model, X_test = fit_classification_model( - MLPClassifier(random_state=42), 4, cls_dtype=np.uint64) + MLPClassifier(random_state=42), 4, cls_dtype=np.uint64 + ) model_onnx = convert_sklearn( model, "scikit-learn MLPClassifier", [("input", FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnMLPClassifierMultiClassU64") + X_test, model, model_onnx, basename="SklearnMLPClassifierMultiClassU64" + ) @ignore_warnings(category=(ConvergenceWarning, FutureWarning)) def test_model_mlp_classifier_multilabel_default(self): model, X_test = fit_multilabel_classification_model( - MLPClassifier(random_state=42)) + MLPClassifier(random_state=42) + ) model_onnx = convert_sklearn( model, "scikit-learn MLPClassifier", [("input", FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnMLPClassifierMultiLabel") + X_test, model, model_onnx, basename="SklearnMLPClassifierMultiLabel" + ) @ignore_warnings(category=(ConvergenceWarning, FutureWarning)) def test_model_mlp_regressor_default(self): - model, X_test = fit_regression_model( - MLPRegressor(random_state=42)) + model, X_test = fit_regression_model(MLPRegressor(random_state=42)) model_onnx = convert_sklearn( model, "scikit-learn MLPRegressor", [("input", FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnMLPRegressor-Dec4") + X_test, model, model_onnx, basename="SklearnMLPRegressor-Dec4" + ) @ignore_warnings(category=(ConvergenceWarning, FutureWarning)) def test_model_mlp_classifier_multiclass_identity(self): model, X_test = fit_classification_model( - MLPClassifier(random_state=42, activation="identity"), 3, - is_int=True) + MLPClassifier(random_state=42, activation="identity"), 3, is_int=True + ) model_onnx = convert_sklearn( model, "scikit-learn MLPClassifier", [("input", Int64TensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnMLPClassifierMultiClassIdentityActivation") + X_test, + model, + model_onnx, + basename="SklearnMLPClassifierMultiClassIdentityActivation", + ) @ignore_warnings(category=(ConvergenceWarning, FutureWarning)) def test_model_mlp_classifier_multilabel_identity(self): model, X_test = fit_multilabel_classification_model( - MLPClassifier(random_state=42, activation="identity"), - is_int=True) + MLPClassifier(random_state=42, activation="identity"), is_int=True + ) model_onnx = convert_sklearn( model, "scikit-learn MLPClassifier", [("input", Int64TensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnMLPClassifierMultiLabelIdentityActivation") + X_test, + model, + model_onnx, + basename="SklearnMLPClassifierMultiLabelIdentityActivation", + ) @ignore_warnings(category=(ConvergenceWarning, FutureWarning)) def test_model_mlp_regressor_identity(self): model, X_test = fit_regression_model( - MLPRegressor(random_state=42, activation="identity"), is_int=True) + MLPRegressor(random_state=42, activation="identity"), is_int=True + ) model_onnx = convert_sklearn( model, "scikit-learn MLPRegressor", [("input", Int64TensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnMLPRegressorIdentityActivation-Dec4") + X_test, + model, + model_onnx, + basename="SklearnMLPRegressorIdentityActivation-Dec4", + ) @ignore_warnings(category=(ConvergenceWarning, FutureWarning)) def test_model_mlp_classifier_multiclass_logistic(self): model, X_test = fit_classification_model( - MLPClassifier(random_state=42, activation="logistic"), 5) + MLPClassifier(random_state=42, activation="logistic"), 5 + ) model_onnx = convert_sklearn( model, "scikit-learn MLPClassifier", [("input", FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnMLPClassifierMultiClassLogisticActivation") + X_test, + model, + model_onnx, + basename="SklearnMLPClassifierMultiClassLogisticActivation", + ) @ignore_warnings(category=(ConvergenceWarning, FutureWarning)) def test_model_mlp_classifier_multilabel_logistic(self): model, X_test = fit_multilabel_classification_model( - MLPClassifier(random_state=42, activation="logistic"), n_classes=4) + MLPClassifier(random_state=42, activation="logistic"), n_classes=4 + ) model_onnx = convert_sklearn( model, "scikit-learn MLPClassifier", [("input", FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnMLPClassifierMultiLabelLogisticActivation") + X_test, + model, + model_onnx, + basename="SklearnMLPClassifierMultiLabelLogisticActivation", + ) @ignore_warnings(category=(ConvergenceWarning, FutureWarning)) def test_model_mlp_regressor_logistic(self): model, X_test = fit_regression_model( - MLPRegressor(random_state=42, activation="logistic")) + MLPRegressor(random_state=42, activation="logistic") + ) model_onnx = convert_sklearn( model, "scikit-learn MLPRegressor", [("input", FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnMLPRegressorLogisticActivation-Dec4") + X_test, + model, + model_onnx, + basename="SklearnMLPRegressorLogisticActivation-Dec4", + ) @ignore_warnings(category=(ConvergenceWarning, FutureWarning)) def test_model_mlp_classifier_multiclass_tanh(self): model, X_test = fit_classification_model( - MLPClassifier(random_state=42, activation="tanh"), 3) + MLPClassifier(random_state=42, activation="tanh"), 3 + ) model_onnx = convert_sklearn( model, "scikit-learn MLPClassifier", [("input", FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnMLPClassifierMultiClassTanhActivation") + X_test, + model, + model_onnx, + basename="SklearnMLPClassifierMultiClassTanhActivation", + ) @ignore_warnings(category=(ConvergenceWarning, FutureWarning)) def test_model_mlp_classifier_multilabel_tanh(self): model, X_test = fit_multilabel_classification_model( - MLPClassifier(random_state=42, activation="tanh"), n_labels=3) + MLPClassifier(random_state=42, activation="tanh"), n_labels=3 + ) model_onnx = convert_sklearn( model, "scikit-learn MLPClassifier", [("input", FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnMLPClassifierMultiLabelTanhActivation") + X_test, + model, + model_onnx, + basename="SklearnMLPClassifierMultiLabelTanhActivation", + ) @ignore_warnings(category=(ConvergenceWarning, FutureWarning)) def test_model_mlp_regressor_tanh(self): model, X_test = fit_regression_model( - MLPRegressor(random_state=42, activation="tanh")) + MLPRegressor(random_state=42, activation="tanh") + ) model_onnx = convert_sklearn( model, "scikit-learn MLPRegressor", [("input", FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnMLPRegressorTanhActivation-Dec4") + X_test, model, model_onnx, basename="SklearnMLPRegressorTanhActivation-Dec4" + ) @ignore_warnings(category=(ConvergenceWarning, FutureWarning)) def test_model_mlp_regressor_bool(self): model, X_test = fit_regression_model( - MLPRegressor(random_state=42), is_bool=True) + MLPRegressor(random_state=42), is_bool=True + ) model_onnx = convert_sklearn( model, "scikit-learn MLPRegressor", [("input", BooleanTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnMLPRegressorBool") + X_test, model, model_onnx, basename="SklearnMLPRegressorBool" + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version('1.0.0'), - reason="onnxruntime %s" % '1.0.0') + pv.Version(ort_version) < pv.Version("1.0.0"), reason="onnxruntime %s" % "1.0.0" + ) @ignore_warnings(category=(ConvergenceWarning, FutureWarning)) def test_model_mlp_classifier_nozipmap(self): X, y = make_multilabel_classification(n_labels=5, n_classes=10) X = X.astype(np.float32) X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.5, random_state=42) + X, y, test_size=0.5, random_state=42 + ) model = MLPClassifier().fit(X_train, y_train) - options = {id(model): {'zipmap': False}} + options = {id(model): {"zipmap": False}} model_onnx = convert_sklearn( - model, 'mlp', - [('input', FloatTensorType([None, X_test.shape[1]]))], - options=options, target_opset=TARGET_OPSET) + model, + "mlp", + [("input", FloatTensorType([None, X_test.shape[1]]))], + options=options, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, input_feed={'input': X_test}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, input_feed={"input": X_test}) assert_almost_equal(res[1], model.predict_proba(X_test), decimal=5) assert_almost_equal(res[0], model.predict(X_test), decimal=5) diff --git a/tests/test_sklearn_multi_output.py b/tests/test_sklearn_multi_output.py index 3970d9ae8..8c65c1220 100644 --- a/tests/test_sklearn_multi_output.py +++ b/tests/test_sklearn_multi_output.py @@ -9,6 +9,7 @@ from sklearn.datasets import load_linnerud, make_multilabel_classification from sklearn.multioutput import MultiOutputRegressor, MultiOutputClassifier from sklearn.linear_model import Ridge, LogisticRegression + try: from sklearn.utils._testing import ignore_warnings except ImportError: @@ -18,14 +19,13 @@ from test_utils import dump_data_and_model, TARGET_OPSET -skl_ver = ".".join(skl_ver.split('.')[:2]) +skl_ver = ".".join(skl_ver.split(".")[:2]) class TestMultiOutputConverter(unittest.TestCase): - def setUp(self): if __name__ == "__main__": - log = getLogger('skl2onnx') + log = getLogger("skl2onnx") log.disabled = True # log.setLevel(logging.DEBUG) # logging.basicConfig(level=logging.DEBUG) @@ -34,28 +34,24 @@ def setUp(self): def test_multi_output_regressor(self): X, y = load_linnerud(return_X_y=True) clf = MultiOutputRegressor(Ridge(random_state=123)).fit(X, y) - onx = to_onnx(clf, X[:1].astype(numpy.float32), - target_opset=TARGET_OPSET) + onx = to_onnx(clf, X[:1].astype(numpy.float32), target_opset=TARGET_OPSET) dump_data_and_model( - X.astype(numpy.float32), clf, onx, - basename="SklearnMultiOutputRegressor") + X.astype(numpy.float32), clf, onx, basename="SklearnMultiOutputRegressor" + ) - @unittest.skipIf(TARGET_OPSET < 11, - reason="SequenceConstruct not available.") - @ignore_warnings(category=(FutureWarning, - DeprecationWarning)) + @unittest.skipIf(TARGET_OPSET < 11, reason="SequenceConstruct not available.") + @ignore_warnings(category=(FutureWarning, DeprecationWarning)) def test_multi_output_classifier(self): X, y = make_multilabel_classification(n_classes=3, random_state=0) X = X.astype(numpy.float32) clf = MultiOutputClassifier(LogisticRegression()).fit(X, y) - onx = to_onnx(clf, X[:1], target_opset=TARGET_OPSET, - options={'zipmap': False}) + onx = to_onnx(clf, X[:1], target_opset=TARGET_OPSET, options={"zipmap": False}) self.assertNotIn("ZipMap", str(onx)) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': X}) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": X}) exp_lab = clf.predict(X) exp_prb = clf.predict_proba(X) assert_almost_equal(exp_lab, res[0]) @@ -64,14 +60,18 @@ def test_multi_output_classifier(self): assert_almost_equal(e, g, decimal=5) # check option nocl=True - onx = to_onnx(clf, X[:1], target_opset=TARGET_OPSET, - options={id(clf): {'nocl': True, 'zipmap': False}}) + onx = to_onnx( + clf, + X[:1], + target_opset=TARGET_OPSET, + options={id(clf): {"nocl": True, "zipmap": False}}, + ) self.assertNotIn("ZipMap", str(onx)) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': X}) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": X}) exp_lab = clf.predict(X) exp_prb = clf.predict_proba(X) assert_almost_equal(exp_lab, res[0]) @@ -80,14 +80,18 @@ def test_multi_output_classifier(self): assert_almost_equal(e, g, decimal=5) # check option nocl=False - onx = to_onnx(clf, X[:1], target_opset=TARGET_OPSET, - options={id(clf): {'nocl': False, 'zipmap': False}}) + onx = to_onnx( + clf, + X[:1], + target_opset=TARGET_OPSET, + options={id(clf): {"nocl": False, "zipmap": False}}, + ) self.assertNotIn("ZipMap", str(onx)) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': X}) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": X}) exp_lab = clf.predict(X) exp_prb = clf.predict_proba(X) assert_almost_equal(exp_lab, res[0]) @@ -95,38 +99,44 @@ def test_multi_output_classifier(self): for e, g in zip(exp_prb, res[1]): assert_almost_equal(e, g, decimal=5) - @unittest.skipIf(TARGET_OPSET < 11, - reason="SequenceConstruct not available.") - @unittest.skipIf(pv.Version(skl_ver) < pv.Version("0.22"), - reason="classes_ attribute is missing") - @ignore_warnings(category=(FutureWarning, - DeprecationWarning)) + @unittest.skipIf(TARGET_OPSET < 11, reason="SequenceConstruct not available.") + @unittest.skipIf( + pv.Version(skl_ver) < pv.Version("0.22"), reason="classes_ attribute is missing" + ) + @ignore_warnings(category=(FutureWarning, DeprecationWarning)) def test_multi_output_classifier_exc(self): X, y = make_multilabel_classification(n_classes=3, random_state=0) X = X.astype(numpy.float32) clf = MultiOutputClassifier(LogisticRegression()).fit(X, y) clf.classes_ = numpy.array(clf.classes_) with self.assertRaises(RuntimeError): - to_onnx(clf, X[:1], target_opset=TARGET_OPSET, - options={'zipmap': False, 'output_class_labels': True}) - - @unittest.skipIf(TARGET_OPSET < 11, - reason="SequenceConstruct not available.") - @unittest.skipIf(pv.Version(skl_ver) < pv.Version("0.22"), - reason="classes_ attribute is missing") - @ignore_warnings(category=(FutureWarning, - DeprecationWarning)) + to_onnx( + clf, + X[:1], + target_opset=TARGET_OPSET, + options={"zipmap": False, "output_class_labels": True}, + ) + + @unittest.skipIf(TARGET_OPSET < 11, reason="SequenceConstruct not available.") + @unittest.skipIf( + pv.Version(skl_ver) < pv.Version("0.22"), reason="classes_ attribute is missing" + ) + @ignore_warnings(category=(FutureWarning, DeprecationWarning)) def test_multi_output_classifier_fallback(self): X, y = make_multilabel_classification(n_classes=3, random_state=0) X = X.astype(numpy.float32) clf = MultiOutputClassifier(LogisticRegression()).fit(X, y) del clf.classes_ - onx = to_onnx(clf, X[:1], target_opset=TARGET_OPSET, - options={'zipmap': False, 'output_class_labels': True}) + onx = to_onnx( + clf, + X[:1], + target_opset=TARGET_OPSET, + options={"zipmap": False, "output_class_labels": True}, + ) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'X': X}) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"X": X}) exp_lab = clf.predict(X) exp_prb = clf.predict_proba(X) assert_almost_equal(exp_lab, res[0]) diff --git a/tests/test_sklearn_naive_bayes_converter.py b/tests/test_sklearn_naive_bayes_converter.py index 1920bd0c1..4b95f26de 100644 --- a/tests/test_sklearn_naive_bayes_converter.py +++ b/tests/test_sklearn_naive_bayes_converter.py @@ -7,6 +7,7 @@ GaussianNB, MultinomialNB, ) + try: from sklearn.naive_bayes import CategoricalNB except ImportError: @@ -23,72 +24,60 @@ FloatTensorType, Int64TensorType, ) -from test_utils import ( - dump_data_and_model, - fit_classification_model, - TARGET_OPSET -) +from test_utils import dump_data_and_model, fit_classification_model, TARGET_OPSET class TestNaiveBayesConverter(unittest.TestCase): - def test_model_multinomial_nb_binary_classification(self): - model, X = fit_classification_model( - MultinomialNB(), 2, pos_features=True) + model, X = fit_classification_model(MultinomialNB(), 2, pos_features=True) model_onnx = convert_sklearn( model, "multinomial naive bayes", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(np.float32), model, model_onnx, - basename="SklearnBinMultinomialNB-Dec4") + basename="SklearnBinMultinomialNB-Dec4", + ) @unittest.skipIf(TARGET_OPSET < 9, reason="not available") def test_model_bernoulli_nb_binary_classification(self): - model, X = fit_classification_model( - BernoulliNB(), 2) + model, X = fit_classification_model(BernoulliNB(), 2) model_onnx = convert_sklearn( model, "bernoulli naive bayes", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnBinBernoulliNB") + dump_data_and_model(X, model, model_onnx, basename="SklearnBinBernoulliNB") def test_model_multinomial_nb_multiclass(self): - model, X = fit_classification_model( - MultinomialNB(), 5, pos_features=True) + model, X = fit_classification_model(MultinomialNB(), 5, pos_features=True) model_onnx = convert_sklearn( model, "multinomial naive bayes", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnMclMultinomialNB-Dec4") + X, model, model_onnx, basename="SklearnMclMultinomialNB-Dec4" + ) def test_model_multinomial_nb_multiclass_params(self): model, X = fit_classification_model( - MultinomialNB(alpha=0.5, fit_prior=False), 5, pos_features=True) + MultinomialNB(alpha=0.5, fit_prior=False), 5, pos_features=True + ) model_onnx = convert_sklearn( model, "multinomial naive bayes", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) pp = model.predict_proba(X) @@ -97,64 +86,56 @@ def test_model_multinomial_nb_multiclass_params(self): diff = pps[:, col - 1] - pps[:, col - 2] ind = diff >= 1e-4 dump_data_and_model( - X[ind], - model, - model_onnx, - basename="SklearnMclMultinomialNBParams-Dec4") + X[ind], model, model_onnx, basename="SklearnMclMultinomialNBParams-Dec4" + ) @unittest.skipIf(TARGET_OPSET < 9, reason="not available") def test_model_bernoulli_nb_multiclass(self): - model, X = fit_classification_model( - BernoulliNB(), 4) + model, X = fit_classification_model(BernoulliNB(), 4) model_onnx = convert_sklearn( model, "bernoulli naive bayes", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnMclBernoulliNB") + dump_data_and_model(X, model, model_onnx, basename="SklearnMclBernoulliNB") @unittest.skipIf(TARGET_OPSET < 9, reason="not available") def test_model_bernoulli_nb_multiclass_params(self): model, X = fit_classification_model( - BernoulliNB(alpha=0, binarize=1.0, fit_prior=False), 4) + BernoulliNB(alpha=0, binarize=1.0, fit_prior=False), 4 + ) model_onnx = convert_sklearn( model, "bernoulli naive bayes", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnMclBernoulliNBParams") + X, model, model_onnx, basename="SklearnMclBernoulliNBParams" + ) def test_model_multinomial_nb_binary_classification_int(self): model, X = fit_classification_model( - MultinomialNB(), 2, is_int=True, pos_features=True) + MultinomialNB(), 2, is_int=True, pos_features=True + ) model_onnx = convert_sklearn( model, "multinomial naive bayes", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnBinMultinomialNBInt-Dec4") + X, model, model_onnx, basename="SklearnBinMultinomialNBInt-Dec4" + ) def test_model_multinomial_nb_binary_classification_bool(self): model, X = fit_classification_model( - MultinomialNB(), 2, is_bool=True, pos_features=True) + MultinomialNB(), 2, is_bool=True, pos_features=True + ) model_onnx = convert_sklearn( model, "multinomial naive bayes", @@ -163,32 +144,24 @@ def test_model_multinomial_nb_binary_classification_bool(self): ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnBinMultinomialNBBool-Dec4") + X, model, model_onnx, basename="SklearnBinMultinomialNBBool-Dec4" + ) @unittest.skipIf(TARGET_OPSET < 9, reason="not available") def test_model_bernoulli_nb_binary_classification_int(self): - model, X = fit_classification_model( - BernoulliNB(), 2, is_int=True) + model, X = fit_classification_model(BernoulliNB(), 2, is_int=True) model_onnx = convert_sklearn( model, "bernoulli naive bayes", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnBinBernoulliNBInt") + dump_data_and_model(X, model, model_onnx, basename="SklearnBinBernoulliNBInt") @unittest.skipIf(TARGET_OPSET < 9, reason="not available") def test_model_bernoulli_nb_binary_classification_bool(self): - model, X = fit_classification_model( - BernoulliNB(), 2, is_bool=True) + model, X = fit_classification_model(BernoulliNB(), 2, is_bool=True) model_onnx = convert_sklearn( model, "bernoulli naive bayes", @@ -196,112 +169,85 @@ def test_model_bernoulli_nb_binary_classification_bool(self): target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnBinBernoulliNBBool") + dump_data_and_model(X, model, model_onnx, basename="SklearnBinBernoulliNBBool") def test_model_multinomial_nb_multiclass_int(self): model, X = fit_classification_model( - MultinomialNB(), 5, is_int=True, pos_features=True) + MultinomialNB(), 5, is_int=True, pos_features=True + ) model_onnx = convert_sklearn( model, "multinomial naive bayes", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnMclMultinomialNBInt-Dec4") + X, model, model_onnx, basename="SklearnMclMultinomialNBInt-Dec4" + ) @unittest.skipIf(TARGET_OPSET < 9, reason="not available") def test_model_bernoulli_nb_multiclass_int(self): - model, X = fit_classification_model( - BernoulliNB(), 4, is_int=True) + model, X = fit_classification_model(BernoulliNB(), 4, is_int=True) model_onnx = convert_sklearn( model, "bernoulli naive bayes", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnMclBernoulliNBInt-Dec4") + X, model, model_onnx, basename="SklearnMclBernoulliNBInt-Dec4" + ) def test_model_gaussian_nb_binary_classification(self): - model, X = fit_classification_model( - GaussianNB(), 2) + model, X = fit_classification_model(GaussianNB(), 2) model_onnx = convert_sklearn( model, "gaussian naive bayes", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnBinGaussianNB") + dump_data_and_model(X, model, model_onnx, basename="SklearnBinGaussianNB") def test_model_gaussian_nb_multiclass(self): - model, X = fit_classification_model( - GaussianNB(), 4) + model, X = fit_classification_model(GaussianNB(), 4) model_onnx = convert_sklearn( model, "gaussian naive bayes", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnMclGaussianNB") + dump_data_and_model(X, model, model_onnx, basename="SklearnMclGaussianNB") def test_model_gaussian_nb_binary_classification_int(self): - model, X = fit_classification_model( - GaussianNB(), 2, is_int=True) + model, X = fit_classification_model(GaussianNB(), 2, is_int=True) model_onnx = convert_sklearn( model, "gaussian naive bayes", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnBinGaussianNBInt") + dump_data_and_model(X, model, model_onnx, basename="SklearnBinGaussianNBInt") def test_model_gaussian_nb_multiclass_int(self): - model, X = fit_classification_model( - GaussianNB(), 5, is_int=True) + model, X = fit_classification_model(GaussianNB(), 5, is_int=True) model_onnx = convert_sklearn( model, "gaussian naive bayes", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnMclGaussianNBInt-Dec4") + X, model, model_onnx, basename="SklearnMclGaussianNBInt-Dec4" + ) def test_model_gaussian_nb_multiclass_bool(self): - model, X = fit_classification_model( - GaussianNB(), 5, is_bool=True) + model, X = fit_classification_model(GaussianNB(), 5, is_bool=True) model_onnx = convert_sklearn( model, "gaussian naive bayes", @@ -310,136 +256,121 @@ def test_model_gaussian_nb_multiclass_bool(self): ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnMclGaussianNBBool-Dec4") + X, model, model_onnx, basename="SklearnMclGaussianNBBool-Dec4" + ) - @unittest.skipIf(ComplementNB is None, - reason="new in scikit version 0.20") + @unittest.skipIf(ComplementNB is None, reason="new in scikit version 0.20") def test_model_complement_nb_binary_classification(self): - model, X = fit_classification_model( - ComplementNB(), 2, pos_features=True) + model, X = fit_classification_model(ComplementNB(), 2, pos_features=True) model_onnx = convert_sklearn( model, "complement naive bayes", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnBinComplementNB-Dec4") + X, model, model_onnx, basename="SklearnBinComplementNB-Dec4" + ) - @unittest.skipIf(ComplementNB is None, - reason="new in scikit version 0.20") + @unittest.skipIf(ComplementNB is None, reason="new in scikit version 0.20") def test_model_complement_nb_multiclass(self): - model, X = fit_classification_model( - ComplementNB(), 4, pos_features=True) + model, X = fit_classification_model(ComplementNB(), 4, pos_features=True) model_onnx = convert_sklearn( model, "complement naive bayes", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnMclComplementNB-Dec4") + X, model, model_onnx, basename="SklearnMclComplementNB-Dec4" + ) - @unittest.skipIf(ComplementNB is None, - reason="new in scikit version 0.20") + @unittest.skipIf(ComplementNB is None, reason="new in scikit version 0.20") def test_model_complement_nb_binary_classification_int(self): model, X = fit_classification_model( - ComplementNB(), 2, is_int=True, pos_features=True) + ComplementNB(), 2, is_int=True, pos_features=True + ) model_onnx = convert_sklearn( model, "complement naive bayes", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnBinComplementNBInt-Dec4") + X, model, model_onnx, basename="SklearnBinComplementNBInt-Dec4" + ) - @unittest.skipIf(ComplementNB is None, - reason="new in scikit version 0.20") + @unittest.skipIf(ComplementNB is None, reason="new in scikit version 0.20") def test_model_complement_nb_multiclass_int(self): model, X = fit_classification_model( - ComplementNB(), 5, is_int=True, pos_features=True) + ComplementNB(), 5, is_int=True, pos_features=True + ) model_onnx = convert_sklearn( model, "complement naive bayes", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnMclComplementNBInt-Dec4") + X, model, model_onnx, basename="SklearnMclComplementNBInt-Dec4" + ) - @unittest.skipIf(ComplementNB is None, - reason="new in scikit version 0.20") + @unittest.skipIf(ComplementNB is None, reason="new in scikit version 0.20") def test_model_complement_nb_multiclass_bool(self): model, X = fit_classification_model( - ComplementNB(), 5, is_bool=True, pos_features=True) + ComplementNB(), 5, is_bool=True, pos_features=True + ) model_onnx = convert_sklearn( model, "complement naive bayes", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnMclComplementNBBool-Dec4") + X, model, model_onnx, basename="SklearnMclComplementNBBool-Dec4" + ) - @unittest.skipIf(CategoricalNB is None, - reason="new in scikit version 0.22") + @unittest.skipIf(CategoricalNB is None, reason="new in scikit version 0.22") def test_model_categorical_nb(self): model, X = fit_classification_model( - CategoricalNB(), 3, is_int=True, pos_features=True) + CategoricalNB(), 3, is_int=True, pos_features=True + ) model_onnx = convert_sklearn( model, "categorical naive bayes", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X[10:13], - model, - model_onnx, - basename="SklearnCategoricalNB") + X[10:13], model, model_onnx, basename="SklearnCategoricalNB" + ) def test_model_gaussian_nb_multi_class_nocl(self): - model, X = fit_classification_model( - GaussianNB(), - 2, label_string=True) + model, X = fit_classification_model(GaussianNB(), 2, label_string=True) model_onnx = convert_sklearn( model, "GaussianNB multi-class nocl", [("input", FloatTensorType([None, X.shape[1]]))], - options={id(model): {'nocl': True}}, - target_opset=TARGET_OPSET) + options={id(model): {"nocl": True}}, + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) sonx = str(model_onnx) - assert 'classlabels_strings' not in sonx - assert 'cl0' not in sonx + assert "classlabels_strings" not in sonx + assert "cl0" not in sonx dump_data_and_model( - X, model, model_onnx, classes=model.classes_, - basename="SklearnGaussianNBMultiNoCl") + X, + model, + model_onnx, + classes=model.classes_, + basename="SklearnGaussianNBMultiNoCl", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_nearest_neighbour_converter.py b/tests/test_sklearn_nearest_neighbour_converter.py index 761f55191..d924e5ba5 100644 --- a/tests/test_sklearn_nearest_neighbour_converter.py +++ b/tests/test_sklearn_nearest_neighbour_converter.py @@ -12,6 +12,7 @@ from numpy.testing import assert_almost_equal from onnxruntime import __version__ as ort_version from pandas import DataFrame + try: from sklearn.utils._testing import ignore_warnings except ImportError: @@ -20,14 +21,16 @@ from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.neighbors import ( - KNeighborsRegressor, RadiusNeighborsRegressor, - KNeighborsClassifier, RadiusNeighborsClassifier, - NearestNeighbors) + KNeighborsRegressor, + RadiusNeighborsRegressor, + KNeighborsClassifier, + RadiusNeighborsClassifier, + NearestNeighbors, +) + try: from sklearn.impute import KNNImputer - from sklearn.neighbors import ( - KNeighborsTransformer, - NeighborhoodComponentsAnalysis) + from sklearn.neighbors import KNeighborsTransformer, NeighborhoodComponentsAnalysis except ImportError: # New in 0.22 KNNImputer = None @@ -35,9 +38,9 @@ NeighborhoodComponentsAnalysis = None from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler + try: - from onnxruntime.capi.onnxruntime_pybind11_state import ( - NotImplemented as OrtImpl) + from onnxruntime.capi.onnxruntime_pybind11_state import NotImplemented as OrtImpl except ImportError: OrtImpl = RuntimeError from skl2onnx import convert_sklearn, to_onnx @@ -46,28 +49,27 @@ FloatTensorType, Int64TensorType, ) -from skl2onnx.common.data_types import onnx_built_with_ml from skl2onnx.helpers.onnx_helper import ( - enumerate_model_node_outputs, select_model_inputs_outputs) + enumerate_model_node_outputs, + select_model_inputs_outputs, +) from test_utils import ( dump_data_and_model, fit_classification_model, fit_multilabel_classification_model, TARGET_OPSET, - InferenceSessionEx as InferenceSession) + InferenceSessionEx as InferenceSession, +) def dont_test_radius(): - return ( - pv.Version(ort_version) <= pv.Version("1.3.0") or - TARGET_OPSET <= 11) + return pv.Version(ort_version) <= pv.Version("1.3.0") or TARGET_OPSET <= 11 -ort_version = ".".join(ort_version.split('.')[:2]) +ort_version = ".".join(ort_version.split(".")[:2]) class TestNearestNeighbourConverter(unittest.TestCase): - @functools.lru_cache(maxsize=1) def _get_iris(self): iris = datasets.load_iris() @@ -91,20 +93,22 @@ def _fit_model_multiclass_classification(self, model, use_string=False): @functools.lru_cache(maxsize=20) def _get_reg_data(self, n, n_features, n_targets, n_informative=10): X, y = datasets.make_regression( - n, n_features=n_features, random_state=0, - n_targets=n_targets, n_informative=n_informative) + n, + n_features=n_features, + random_state=0, + n_targets=n_targets, + n_informative=n_informative, + ) return X, y - def _fit_model(self, model, n_targets=1, label_int=False, - n_informative=10): + def _fit_model(self, model, n_targets=1, label_int=False, n_informative=10): X, y = self._get_reg_data(20, 4, n_targets, n_informative) if label_int: y = y.astype(numpy.int64) model.fit(X, y) return model, X - def _fit_model_simple(self, model, n_targets=1, label_int=False, - n_informative=3): + def _fit_model_simple(self, model, n_targets=1, label_int=False, n_informative=3): X, y = self._get_reg_data(20, 2, n_targets, n_informative) y /= 100 if label_int: @@ -113,174 +117,206 @@ def _fit_model_simple(self, model, n_targets=1, label_int=False, return model, X @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) @ignore_warnings(category=DeprecationWarning) def test_model_knn_regressor(self): model, X = self._fit_model(KNeighborsRegressor(n_neighbors=2)) - model_onnx = convert_sklearn(model, "KNN regressor", - [("input", FloatTensorType([None, 4]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "KNN regressor", + [("input", FloatTensorType([None, 4]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32)[:7], - model, model_onnx, - basename="SklearnKNeighborsRegressor-Dec4") + model, + model_onnx, + basename="SklearnKNeighborsRegressor-Dec4", + ) dump_data_and_model( (X + 0.1).astype(numpy.float32)[:7], - model, model_onnx, - basename="SklearnKNeighborsRegressor-Dec4") + model, + model_onnx, + basename="SklearnKNeighborsRegressor-Dec4", + ) @unittest.skipIf(dont_test_radius(), reason="not available") @unittest.skipIf( - pv.Version(ort_version) < pv.Version("1.8.0"), - reason="produces nan values") + pv.Version(ort_version) < pv.Version("1.8.0"), reason="produces nan values" + ) @ignore_warnings(category=DeprecationWarning) def test_model_knn_regressor_radius(self): model, X = self._fit_model(RadiusNeighborsRegressor()) - model_onnx = convert_sklearn(model, "KNN regressor", - [("input", FloatTensorType([None, 4]))], - target_opset=TARGET_OPSET, - options={id(model): {'optim': 'cdist'}}) + model_onnx = convert_sklearn( + model, + "KNN regressor", + [("input", FloatTensorType([None, 4]))], + target_opset=TARGET_OPSET, + options={id(model): {"optim": "cdist"}}, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) X = X[:5] - got = sess.run(None, {'input': X.astype(numpy.float32)})[0] + got = sess.run(None, {"input": X.astype(numpy.float32)})[0] exp = model.predict(X.astype(numpy.float32)) if any(numpy.isnan(got.ravel())): # The model is unexpectedly producing nan values # not on all platforms. - rows = ['--EXP--', str(exp), '--GOT--', str(got), - '--EVERY-OUTPUT--'] - for out in enumerate_model_node_outputs( - model_onnx, add_node=False): + rows = ["--EXP--", str(exp), "--GOT--", str(got), "--EVERY-OUTPUT--"] + for out in enumerate_model_node_outputs(model_onnx, add_node=False): onx = select_model_inputs_outputs(model_onnx, out) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run( - None, {'input': X.astype(numpy.float32)}) - rows.append('--{}--'.format(out)) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(numpy.float32)}) + rows.append("--{}--".format(out)) rows.append(str(res)) - if (pv.Version(ort_version) < - pv.Version("1.4.0")): + if pv.Version(ort_version) < pv.Version("1.4.0"): return - raise AssertionError('\n'.join(rows)) + raise AssertionError("\n".join(rows)) assert_almost_equal(exp.ravel(), got.ravel(), decimal=3) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) @unittest.skipIf(TARGET_OPSET < 11, reason="not available") @ignore_warnings(category=DeprecationWarning) def test_model_knn_regressor_double(self): model, X = self._fit_model(KNeighborsRegressor(n_neighbors=2)) model_onnx = convert_sklearn( - model, "KNN regressor", + model, + "KNN regressor", [("input", DoubleTensorType([None, 4]))], target_opset=TARGET_OPSET, - options={id(model): {'optim': 'cdist'}}) + options={id(model): {"optim": "cdist"}}, + ) self.assertIsNotNone(model_onnx) try: InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) except OrtImpl as e: - if ("Could not find an implementation for the node " - "To_TopK:TopK(11)") in str(e): + if ( + "Could not find an implementation for the node " "To_TopK:TopK(11)" + ) in str(e): # onnxruntime does not declare TopK(11) for double return raise e dump_data_and_model( X.astype(numpy.float64)[:7], - model, model_onnx, - basename="SklearnKNeighborsRegressor64") + model, + model_onnx, + basename="SklearnKNeighborsRegressor64", + ) @unittest.skipIf(dont_test_radius(), reason="not available") @unittest.skipIf( pv.Version(ort_version) < pv.Version("1.7.0"), - reason="nan may happen during computation") + reason="nan may happen during computation", + ) @ignore_warnings(category=DeprecationWarning) def test_model_knn_regressor_double_radius(self): model, X = self._fit_model(RadiusNeighborsRegressor()) model_onnx = convert_sklearn( - model, "KNN regressor", + model, + "KNN regressor", [("input", DoubleTensorType([None, 4]))], target_opset=TARGET_OPSET, - options={id(model): {'optim': 'cdist'}}) + options={id(model): {"optim": "cdist"}}, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float64)[:7], - model, model_onnx, - basename="SklearnRadiusNeighborsRegressor64") + model, + model_onnx, + basename="SklearnRadiusNeighborsRegressor64", + ) dump_data_and_model( - (X + 10.).astype(numpy.float64)[:7], - model, model_onnx, - basename="SklearnRadiusNeighborsRegressor64") + (X + 10.0).astype(numpy.float64)[:7], + model, + model_onnx, + basename="SklearnRadiusNeighborsRegressor64", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) @ignore_warnings(category=DeprecationWarning) def test_model_knn_regressor_yint(self): - model, X = self._fit_model( - KNeighborsRegressor(n_neighbors=2), label_int=True) - model_onnx = convert_sklearn(model, "KNN regressor", - [("input", FloatTensorType([None, 4]))], - target_opset=TARGET_OPSET) + model, X = self._fit_model(KNeighborsRegressor(n_neighbors=2), label_int=True) + model_onnx = convert_sklearn( + model, + "KNN regressor", + [("input", FloatTensorType([None, 4]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32)[:7], - model, model_onnx, - basename="SklearnKNeighborsRegressorYInt") + model, + model_onnx, + basename="SklearnKNeighborsRegressorYInt", + ) @unittest.skipIf(dont_test_radius(), reason="not available") @ignore_warnings(category=DeprecationWarning) def test_model_knn_regressor_yint_radius(self): - model, X = self._fit_model( - RadiusNeighborsRegressor(), label_int=True) - model_onnx = convert_sklearn(model, "KNN regressor", - [("input", FloatTensorType([None, 4]))], - target_opset=TARGET_OPSET) + model, X = self._fit_model(RadiusNeighborsRegressor(), label_int=True) + model_onnx = convert_sklearn( + model, + "KNN regressor", + [("input", FloatTensorType([None, 4]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32)[:7], - model, model_onnx, - basename="SklearnRadiusNeighborsRegressorYInt") + model, + model_onnx, + basename="SklearnRadiusNeighborsRegressorYInt", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) @ignore_warnings(category=DeprecationWarning) def test_model_knn_regressor2_1(self): - model, X = self._fit_model(KNeighborsRegressor(n_neighbors=1), - n_targets=2) - model_onnx = convert_sklearn(model, "KNN regressor", - [("input", FloatTensorType([None, 4]))], - target_opset=TARGET_OPSET) + model, X = self._fit_model(KNeighborsRegressor(n_neighbors=1), n_targets=2) + model_onnx = convert_sklearn( + model, + "KNN regressor", + [("input", FloatTensorType([None, 4]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32)[:3], - model, model_onnx, - basename="SklearnKNeighborsRegressor2") + model, + model_onnx, + basename="SklearnKNeighborsRegressor2", + ) @unittest.skipIf(dont_test_radius(), reason="not available") @ignore_warnings(category=DeprecationWarning) def test_model_knn_regressor2_1_radius(self): model, X = self._fit_model_simple( - RadiusNeighborsRegressor(algorithm="brute"), - n_targets=2) + RadiusNeighborsRegressor(algorithm="brute"), n_targets=2 + ) X = X[:-1] model_onnx = convert_sklearn( - model, "KNN regressor", + model, + "KNN regressor", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'input': X.astype(numpy.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"input": X.astype(numpy.float32)})[0] exp = model.predict(X.astype(numpy.float32)) if any(numpy.isnan(got.ravel())): # The model is unexpectedly producing nan values @@ -290,299 +326,344 @@ def test_model_knn_regressor2_1_radius(self): # and contains only 0 or 1 values. # The output contains nan values on the first row # but not on the second one. - rows = ['--EXP--', str(exp), '--GOT--', str(got), - '--EVERY-OUTPUT--'] - for out in enumerate_model_node_outputs( - model_onnx, add_node=False): + rows = ["--EXP--", str(exp), "--GOT--", str(got), "--EVERY-OUTPUT--"] + for out in enumerate_model_node_outputs(model_onnx, add_node=False): onx = select_model_inputs_outputs(model_onnx, out) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run( - None, {'input': X.astype(numpy.float32)}) - rows.append('--{}--'.format(out)) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(numpy.float32)}) + rows.append("--{}--".format(out)) rows.append(str(res)) - if (ort_version.startswith('1.4.') or - ort_version.startswith('1.5.')): + if ort_version.startswith("1.4.") or ort_version.startswith("1.5."): # TODO: investigate the regression in onnxruntime 1.4 # One broadcasted multiplication unexpectedly produces nan. - whole = '\n'.join(rows) + whole = "\n".join(rows) if "[ nan" in whole: warnings.warn(whole) return raise AssertionError(whole) - if (ort_version.startswith('1.3.') and - sys.platform == 'win32'): + if ort_version.startswith("1.3.") and sys.platform == "win32": # Same error but different line number for further # investigation. raise AssertionError(whole) - raise AssertionError('\n'.join(rows)) + raise AssertionError("\n".join(rows)) assert_almost_equal(exp, got, decimal=5) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) @unittest.skipIf(TARGET_OPSET < 9, reason="not available") @ignore_warnings(category=DeprecationWarning) def test_model_knn_regressor2_1_opset(self): - model, X = self._fit_model(KNeighborsRegressor(n_neighbors=1), - n_targets=2) + model, X = self._fit_model(KNeighborsRegressor(n_neighbors=1), n_targets=2) for op in [TARGET_OPSET, 12, 11, 10, 9]: if op > TARGET_OPSET: continue with self.subTest(opset=op): model_onnx = convert_sklearn( - model, "KNN regressor", + model, + "KNN regressor", [("input", FloatTensorType([None, 4]))], - target_opset=op) + target_opset=op, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32)[:3], - model, model_onnx, - basename="SklearnKNeighborsRegressor2%d" % op) + model, + model_onnx, + basename="SklearnKNeighborsRegressor2%d" % op, + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) @ignore_warnings(category=DeprecationWarning) def test_model_knn_regressor2_2(self): - model, X = self._fit_model(KNeighborsRegressor(n_neighbors=2), - n_targets=2) - model_onnx = convert_sklearn(model, "KNN regressor", - [("input", FloatTensorType([None, 4]))], - target_opset=TARGET_OPSET) + model, X = self._fit_model(KNeighborsRegressor(n_neighbors=2), n_targets=2) + model_onnx = convert_sklearn( + model, + "KNN regressor", + [("input", FloatTensorType([None, 4]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32)[:2], - model, model_onnx, - basename="SklearnKNeighborsRegressor2") + model, + model_onnx, + basename="SklearnKNeighborsRegressor2", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") - @unittest.skipIf(TARGET_OPSET < 9, - reason="needs higher target_opset") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) + @unittest.skipIf(TARGET_OPSET < 9, reason="needs higher target_opset") @ignore_warnings(category=DeprecationWarning) def test_model_knn_regressor_weights_distance_11(self): model, X = self._fit_model( - KNeighborsRegressor( - weights="distance", algorithm="brute", n_neighbors=1)) + KNeighborsRegressor(weights="distance", algorithm="brute", n_neighbors=1) + ) for op in sorted(set([9, 10, 11, 12, TARGET_OPSET])): if op > TARGET_OPSET: continue with self.subTest(opset=op): model_onnx = convert_sklearn( - model, "KNN regressor", + model, + "KNN regressor", [("input", FloatTensorType([None, 4]))], - target_opset=op) + target_opset=op, + ) if op < 12 and model_onnx.ir_version > 6: raise AssertionError( "ir_version ({}, op={}) must be <= 6.".format( - model_onnx.ir_version, op)) + model_onnx.ir_version, op + ) + ) if op < 11 and model_onnx.ir_version > 5: raise AssertionError( "ir_version ({}, op={}) must be <= 5.".format( - model_onnx.ir_version, op)) + model_onnx.ir_version, op + ) + ) if op < 10 and model_onnx.ir_version > 4: raise AssertionError( "ir_version ({}, op={}) must be <= 4.".format( - model_onnx.ir_version, op)) + model_onnx.ir_version, op + ) + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32)[:3], - model, model_onnx, - basename="SklearnKNeighborsRegressorWDist%d-Dec3" % op) + model, + model_onnx, + basename="SklearnKNeighborsRegressorWDist%d-Dec3" % op, + ) @unittest.skipIf(dont_test_radius(), reason="not available") @ignore_warnings(category=DeprecationWarning) def test_model_knn_regressor_weights_distance_11_radius(self): model, X = self._fit_model_simple( - RadiusNeighborsRegressor( - weights="distance", algorithm="brute", radius=100)) + RadiusNeighborsRegressor(weights="distance", algorithm="brute", radius=100) + ) for op in sorted(set([TARGET_OPSET, 12, 11])): if op > TARGET_OPSET: continue with self.subTest(opset=op): model_onnx = convert_sklearn( - model, "KNN regressor", + model, + "KNN regressor", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=op) + target_opset=op, + ) self.assertIsNotNone(model_onnx) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'input': X.astype(numpy.float32)})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"input": X.astype(numpy.float32)})[0] exp = model.predict(X.astype(numpy.float32)) assert_almost_equal(exp, got.ravel(), decimal=3) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) @ignore_warnings(category=DeprecationWarning) def test_model_knn_regressor_metric_cityblock(self): model, X = self._fit_model(KNeighborsRegressor(metric="cityblock")) - model_onnx = convert_sklearn(model, "KNN regressor", - [("input", FloatTensorType([None, 4]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "KNN regressor", + [("input", FloatTensorType([None, 4]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32)[:7], - model, model_onnx, - basename="SklearnKNeighborsRegressorMetricCityblock") + model, + model_onnx, + basename="SklearnKNeighborsRegressorMetricCityblock", + ) - @unittest.skipIf(not onnx_built_with_ml(), - reason="Requires ONNX-ML extension.") @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") - @unittest.skipIf(TARGET_OPSET < TARGET_OPSET, - reason="needs higher target_opset") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) + @unittest.skipIf(TARGET_OPSET < TARGET_OPSET, reason="needs higher target_opset") @ignore_warnings(category=DeprecationWarning) def test_model_knn_classifier_binary_class(self): - model, X = self._fit_model_binary_classification( - KNeighborsClassifier()) + model, X = self._fit_model_binary_classification(KNeighborsClassifier()) model_onnx = convert_sklearn( model, "KNN classifier binary", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32), - model, model_onnx, - basename="SklearnKNeighborsClassifierBinary") + model, + model_onnx, + basename="SklearnKNeighborsClassifierBinary", + ) @unittest.skipIf(dont_test_radius(), reason="not available") - @unittest.skipIf(TARGET_OPSET < 12, - reason="needs higher target_opset") + @unittest.skipIf(TARGET_OPSET < 12, reason="needs higher target_opset") @ignore_warnings(category=DeprecationWarning) def test_model_knn_classifier_binary_class_radius(self): - model, X = self._fit_model_binary_classification( - RadiusNeighborsClassifier()) + model, X = self._fit_model_binary_classification(RadiusNeighborsClassifier()) model_onnx = convert_sklearn( - model, "KNN classifier binary", + model, + "KNN classifier binary", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32), - model, model_onnx, - basename="SklearnRadiusNeighborsClassifierBinary") + model, + model_onnx, + basename="SklearnRadiusNeighborsClassifierBinary", + ) - @unittest.skipIf(not onnx_built_with_ml(), - reason="Requires ONNX-ML extension.") @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) @ignore_warnings(category=DeprecationWarning) def test_model_knn_classifier_multi_class(self): - model, X = self._fit_model_multiclass_classification( - KNeighborsClassifier()) + model, X = self._fit_model_multiclass_classification(KNeighborsClassifier()) model_onnx = convert_sklearn( model, "KNN classifier multi-class", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32), - model, model_onnx, - basename="SklearnKNeighborsClassifierMulti") + model, + model_onnx, + basename="SklearnKNeighborsClassifierMulti", + ) @unittest.skipIf(dont_test_radius(), reason="not available") - @unittest.skipIf(TARGET_OPSET < 12, - reason="needs higher target_opset") + @unittest.skipIf(TARGET_OPSET < 12, reason="needs higher target_opset") @ignore_warnings(category=DeprecationWarning) def test_model_knn_classifier_multi_class_radius(self): model, X = self._fit_model_multiclass_classification( - RadiusNeighborsClassifier()) + RadiusNeighborsClassifier() + ) model_onnx = convert_sklearn( - model, "KNN classifier multi-class", + model, + "KNN classifier multi-class", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET, - options={id(model): {'optim': 'cdist'}}) + options={id(model): {"optim": "cdist"}}, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32)[:5], - model, model_onnx, - basename="SklearnRadiusNeighborsClassifierMulti") + model, + model_onnx, + basename="SklearnRadiusNeighborsClassifierMulti", + ) - @unittest.skipIf(not onnx_built_with_ml(), - reason="Requires ONNX-ML extension.") @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) @ignore_warnings(category=DeprecationWarning) def test_model_knn_classifier_multi_class_string(self): model, X = self._fit_model_multiclass_classification( - KNeighborsClassifier(), use_string=True) + KNeighborsClassifier(), use_string=True + ) model_onnx = convert_sklearn( model, "KNN classifier multi-class", [("input", FloatTensorType([None, 3]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32), - model, model_onnx, - basename="SklearnKNeighborsClassifierMulti") + model, + model_onnx, + basename="SklearnKNeighborsClassifierMulti", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) @ignore_warnings(category=DeprecationWarning) def test_model_knn_classifier_weights_distance(self): model, X = self._fit_model_multiclass_classification( - KNeighborsClassifier(weights='distance')) + KNeighborsClassifier(weights="distance") + ) model_onnx = convert_sklearn( - model, 'KNN classifier', [('input', FloatTensorType([None, 3]))], - target_opset=TARGET_OPSET) + model, + "KNN classifier", + [("input", FloatTensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X.astype(numpy.float32)[:7], model, model_onnx, - basename="SklearnKNeighborsClassifierWeightsDistance") + X.astype(numpy.float32)[:7], + model, + model_onnx, + basename="SklearnKNeighborsClassifierWeightsDistance", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) @ignore_warnings(category=DeprecationWarning) def test_model_knn_classifier_metric_cityblock(self): model, X = self._fit_model_multiclass_classification( - KNeighborsClassifier(metric='cityblock')) + KNeighborsClassifier(metric="cityblock") + ) model_onnx = convert_sklearn( - model, 'KNN classifier', [('input', FloatTensorType([None, 3]))], - target_opset=TARGET_OPSET) + model, + "KNN classifier", + [("input", FloatTensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X.astype(numpy.float32)[:7], model, model_onnx, - basename="SklearnKNeighborsClassifierMetricCityblock") + X.astype(numpy.float32)[:7], + model, + model_onnx, + basename="SklearnKNeighborsClassifierMetricCityblock", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) @ignore_warnings(category=DeprecationWarning) def test_model_knn_classifier_multilabel(self): model, X_test = fit_multilabel_classification_model( - KNeighborsClassifier(), n_classes=7, n_labels=3, - n_samples=100, n_features=10) - options = {id(model): {'zipmap': False}} + KNeighborsClassifier(), + n_classes=7, + n_labels=3, + n_samples=100, + n_features=10, + ) + options = {id(model): {"zipmap": False}} model_onnx = convert_sklearn( model, "scikit-learn KNN Classifier", [("input", FloatTensorType([None, X_test.shape[1]]))], - options=options, target_opset=TARGET_OPSET) + options=options, + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - assert 'zipmap' not in str(model_onnx).lower() + assert "zipmap" not in str(model_onnx).lower() dump_data_and_model( - X_test[:10], model, model_onnx, - basename="SklearnKNNClassifierMultiLabel-Out0") + X_test[:10], + model, + model_onnx, + basename="SklearnKNNClassifierMultiLabel-Out0", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) @ignore_warnings(category=DeprecationWarning) def test_model_knn_regressor_int(self): model, X = self._fit_model(KNeighborsRegressor()) @@ -591,37 +672,38 @@ def test_model_knn_regressor_int(self): model, "KNN regressor", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnKNNRegressorInt-Dec4" + X, model, model_onnx, basename="SklearnKNNRegressorInt-Dec4" ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) @ignore_warnings(category=DeprecationWarning) def test_model_knn_regressor_equal(self): - X, y = datasets.make_regression( - n_samples=1000, n_features=100, random_state=42) + X, y = datasets.make_regression(n_samples=1000, n_features=100, random_state=42) X = X.astype(numpy.int64) X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.5, random_state=42) - model = KNeighborsRegressor( - algorithm='brute', metric='manhattan').fit(X_train, y_train) + X, y, test_size=0.5, random_state=42 + ) + model = KNeighborsRegressor(algorithm="brute", metric="manhattan").fit( + X_train, y_train + ) model_onnx = convert_sklearn( - model, 'knn', - [('input', Int64TensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "knn", + [("input", Int64TensorType([None, X_test.shape[1]]))], + target_opset=TARGET_OPSET, + ) exp = model.predict(X_test) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': numpy.array(X_test)})[0].ravel() + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": numpy.array(X_test)})[0].ravel() # The conversion has discrepencies when # neighbours are at the exact same distance. @@ -637,47 +719,57 @@ def test_model_knn_regressor_equal(self): # assert_almost_equal(exp, res) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) @ignore_warnings(category=DeprecationWarning) def test_model_knn_multi_class_nocl(self): model, X = fit_classification_model( - KNeighborsClassifier(), - 2, label_string=True) + KNeighborsClassifier(), 2, label_string=True + ) model_onnx = convert_sklearn( - model, "KNN multi-class nocl", + model, + "KNN multi-class nocl", [("input", FloatTensorType([None, X.shape[1]]))], - options={id(model): {'nocl': True}}, - target_opset=TARGET_OPSET) + options={id(model): {"nocl": True}}, + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) sonx = str(model_onnx) - assert 'classlabels_strings' not in sonx - assert 'cl0' not in sonx + assert "classlabels_strings" not in sonx + assert "cl0" not in sonx dump_data_and_model( - X, model, model_onnx, classes=model.classes_, - basename="SklearnKNNMultiNoCl", verbose=False) + X, + model, + model_onnx, + classes=model.classes_, + basename="SklearnKNNMultiNoCl", + verbose=False, + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) @ignore_warnings(category=DeprecationWarning) def test_model_knn_regressor2_2_pipee(self): - pipe = make_pipeline(StandardScaler(), - KNeighborsClassifier()) + pipe = make_pipeline(StandardScaler(), KNeighborsClassifier()) model, X = self._fit_model_binary_classification(pipe) model_onnx = convert_sklearn( - model, "KNN pipe", + model, + "KNN pipe", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32)[:2], - model, model_onnx, - basename="SklearnKNeighborsRegressorPipe2") + model, + model_onnx, + basename="SklearnKNeighborsRegressorPipe2", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) @ignore_warnings(category=DeprecationWarning) def test_onnx_test_knn_transform(self): iris = datasets.load_iris() @@ -690,179 +782,200 @@ def test_onnx_test_knn_transform(self): for to in (9, 10, 11): if to > TARGET_OPSET: break - model_def = to_onnx(clr, X_train.astype(numpy.float32), - target_opset=to) + model_def = to_onnx(clr, X_train.astype(numpy.float32), target_opset=to) oinf = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) X_test = X_test[:3] - y = oinf.run(None, {'X': X_test.astype(numpy.float32)}) + y = oinf.run(None, {"X": X_test.astype(numpy.float32)}) dist, ind = clr.kneighbors(X_test) assert_almost_equal(dist, DataFrame(y[1]).values, decimal=5) assert_almost_equal(ind, y[0]) - @unittest.skipIf(NeighborhoodComponentsAnalysis is None, - reason="new in 0.22") + @unittest.skipIf(NeighborhoodComponentsAnalysis is None, reason="new in 0.22") @ignore_warnings(category=DeprecationWarning) def test_sklearn_nca_default(self): model, X_test = fit_classification_model( - NeighborhoodComponentsAnalysis(random_state=42), 3) + NeighborhoodComponentsAnalysis(random_state=42), 3 + ) model_onnx = convert_sklearn( - model, "NCA", + model, + "NCA", [("input", FloatTensorType((None, X_test.shape[1])))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnNCADefault") + dump_data_and_model(X_test, model, model_onnx, basename="SklearnNCADefault") - @unittest.skipIf(NeighborhoodComponentsAnalysis is None, - reason="new in 0.22") + @unittest.skipIf(NeighborhoodComponentsAnalysis is None, reason="new in 0.22") @ignore_warnings(category=DeprecationWarning) def test_sklearn_nca_identity(self): model, X_test = fit_classification_model( NeighborhoodComponentsAnalysis( - init='identity', max_iter=4, random_state=42), 3) + init="identity", max_iter=4, random_state=42 + ), + 3, + ) model_onnx = convert_sklearn( - model, "NCA", + model, + "NCA", [("input", FloatTensorType((None, X_test.shape[1])))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X_test, model, - model_onnx, basename="SklearnNCAIdentity") + dump_data_and_model(X_test, model, model_onnx, basename="SklearnNCAIdentity") - @unittest.skipIf(NeighborhoodComponentsAnalysis is None, - reason="new in 0.22") + @unittest.skipIf(NeighborhoodComponentsAnalysis is None, reason="new in 0.22") @ignore_warnings(category=DeprecationWarning) def test_sklearn_nca_double(self): model, X_test = fit_classification_model( - NeighborhoodComponentsAnalysis( - n_components=2, max_iter=4, random_state=42), 3) + NeighborhoodComponentsAnalysis(n_components=2, max_iter=4, random_state=42), + 3, + ) X_test = X_test.astype(numpy.float64) model_onnx = convert_sklearn( - model, "NCA", + model, + "NCA", [("input", DoubleTensorType((None, X_test.shape[1])))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnNCADouble") + dump_data_and_model(X_test, model, model_onnx, basename="SklearnNCADouble") - @unittest.skipIf(NeighborhoodComponentsAnalysis is None, - reason="new in 0.22") + @unittest.skipIf(NeighborhoodComponentsAnalysis is None, reason="new in 0.22") @ignore_warnings(category=DeprecationWarning) def test_sklearn_nca_int(self): model, X_test = fit_classification_model( - NeighborhoodComponentsAnalysis( - init='pca', max_iter=4, random_state=42), 3, is_int=True) + NeighborhoodComponentsAnalysis(init="pca", max_iter=4, random_state=42), + 3, + is_int=True, + ) model_onnx = convert_sklearn( - model, "NCA", + model, + "NCA", [("input", Int64TensorType((None, X_test.shape[1])))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnNCAInt") + dump_data_and_model(X_test, model, model_onnx, basename="SklearnNCAInt") - @unittest.skipIf(KNeighborsTransformer is None, - reason="new in 0.22") + @unittest.skipIf(KNeighborsTransformer is None, reason="new in 0.22") @ignore_warnings(category=DeprecationWarning) def test_sklearn_k_neighbours_transformer_distance(self): model, X_test = fit_classification_model( - KNeighborsTransformer( - n_neighbors=4, mode='distance'), 2) + KNeighborsTransformer(n_neighbors=4, mode="distance"), 2 + ) model_onnx = convert_sklearn( - model, "KNN transformer", + model, + "KNN transformer", [("input", FloatTensorType((None, X_test.shape[1])))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnKNNTransformerDistance") + X_test, model, model_onnx, basename="SklearnKNNTransformerDistance" + ) - @unittest.skipIf(KNeighborsTransformer is None, - reason="new in 0.22") + @unittest.skipIf(KNeighborsTransformer is None, reason="new in 0.22") @ignore_warnings(category=DeprecationWarning) def test_sklearn_k_neighbours_transformer_connectivity(self): model, X_test = fit_classification_model( - KNeighborsTransformer( - n_neighbors=3, mode='connectivity'), 3) + KNeighborsTransformer(n_neighbors=3, mode="connectivity"), 3 + ) model_onnx = convert_sklearn( - model, "KNN transformer", + model, + "KNN transformer", [("input", FloatTensorType((None, X_test.shape[1])))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnKNNTransformerConnectivity") + X_test, model, model_onnx, basename="SklearnKNNTransformerConnectivity" + ) - @unittest.skipIf(KNNImputer is None, - reason="new in 0.22") + @unittest.skipIf(KNNImputer is None, reason="new in 0.22") @unittest.skipIf(TARGET_OPSET < 9, reason="not available") @ignore_warnings(category=DeprecationWarning) def test_sklearn_knn_imputer(self): x_train = numpy.array( - [[1, 2, numpy.nan, 12], [3, numpy.nan, 3, 13], - [1, 4, numpy.nan, 1], [numpy.nan, 4, 3, 12]], dtype=numpy.float32) + [ + [1, 2, numpy.nan, 12], + [3, numpy.nan, 3, 13], + [1, 4, numpy.nan, 1], + [numpy.nan, 4, 3, 12], + ], + dtype=numpy.float32, + ) x_test = numpy.array( [[1.3, 2.4, numpy.nan, 1], [-1.3, numpy.nan, 3.1, numpy.nan]], - dtype=numpy.float32) - model = KNNImputer(n_neighbors=3, metric='nan_euclidean').fit(x_train) + dtype=numpy.float32, + ) + model = KNNImputer(n_neighbors=3, metric="nan_euclidean").fit(x_train) for opset in [TARGET_OPSET, 9, 10, 11, 12]: if opset > TARGET_OPSET: continue model_onnx = convert_sklearn( - model, "KNN imputer", + model, + "KNN imputer", [("input", FloatTensorType((None, x_test.shape[1])))], - target_opset=opset) + target_opset=opset, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - x_test, model, model_onnx, - basename="SklearnKNNImputer%d" % opset) + x_test, model, model_onnx, basename="SklearnKNNImputer%d" % opset + ) - @unittest.skipIf(KNNImputer is None, - reason="new in 0.22") + @unittest.skipIf(KNNImputer is None, reason="new in 0.22") @unittest.skipIf(TARGET_OPSET < 9, reason="not available") @ignore_warnings(category=DeprecationWarning) def test_sklearn_knn_imputer_cdist(self): x_train = numpy.array( - [[1, 2, numpy.nan, 12], [3, numpy.nan, 3, 13], - [1, 4, numpy.nan, 1], [numpy.nan, 4, 3, 12]], dtype=numpy.float32) + [ + [1, 2, numpy.nan, 12], + [3, numpy.nan, 3, 13], + [1, 4, numpy.nan, 1], + [numpy.nan, 4, 3, 12], + ], + dtype=numpy.float32, + ) x_test = numpy.array( [[1.3, 2.4, numpy.nan, 1], [-1.3, numpy.nan, 3.1, numpy.nan]], - dtype=numpy.float32) - model = KNNImputer(n_neighbors=3, metric='nan_euclidean').fit(x_train) + dtype=numpy.float32, + ) + model = KNNImputer(n_neighbors=3, metric="nan_euclidean").fit(x_train) with self.assertRaises(NameError): convert_sklearn( - model, "KNN imputer", + model, + "KNN imputer", [("input", FloatTensorType((None, x_test.shape[1])))], target_opset=TARGET_OPSET, - options={id(model): {'optim2': 'cdist'}}) + options={id(model): {"optim2": "cdist"}}, + ) for opset in [TARGET_OPSET, 12, 11, 10, 9]: if opset > TARGET_OPSET: continue model_onnx = convert_sklearn( - model, "KNN imputer", + model, + "KNN imputer", [("input", FloatTensorType((None, x_test.shape[1])))], target_opset=opset, - options={id(model): {'optim': 'cdist'}}) + options={id(model): {"optim": "cdist"}}, + ) self.assertIsNotNone(model_onnx) self.assertIn('op_type: "cdist"', str(model_onnx).lower()) - self.assertNotIn('scan', str(model_onnx).lower()) + self.assertNotIn("scan", str(model_onnx).lower()) dump_data_and_model( - x_test, model, model_onnx, - basename="SklearnKNNImputer%dcdist" % opset) + x_test, model, model_onnx, basename="SklearnKNNImputer%dcdist" % opset + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") - @unittest.skipIf(TARGET_OPSET < 11, - reason="needs higher target_opset") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) + @unittest.skipIf(TARGET_OPSET < 11, reason="needs higher target_opset") @ignore_warnings(category=DeprecationWarning) def test_model_knn_iris_regressor_multi_reg(self): iris = datasets.load_iris() @@ -870,15 +983,21 @@ def test_model_knn_iris_regressor_multi_reg(self): y = iris.target.astype(numpy.float32) y = numpy.vstack([y, 1 - y, y + 10]).T model = KNeighborsRegressor( - algorithm='brute', weights='distance', n_neighbors=7) + algorithm="brute", weights="distance", n_neighbors=7 + ) model.fit(X[:13], y[:13]) - onx = to_onnx(model, X[:1], - options={id(model): {'optim': 'cdist'}}, - target_opset=TARGET_OPSET) + onx = to_onnx( + model, + X[:1], + options={id(model): {"optim": "cdist"}}, + target_opset=TARGET_OPSET, + ) dump_data_and_model( X.astype(numpy.float32)[:7], - model, onx, - basename="SklearnKNeighborsRegressorMReg") + model, + onx, + basename="SklearnKNeighborsRegressorMReg", + ) @unittest.skipIf(dont_test_radius(), reason="not available") @ignore_warnings(category=DeprecationWarning) @@ -887,26 +1006,31 @@ def test_model_knn_iris_regressor_multi_reg_radius(self): X = iris.data.astype(numpy.float32) y = iris.target.astype(numpy.float32) y = numpy.vstack([y, 1 - y, y + 10]).T - model = KNeighborsRegressor( - algorithm='brute', weights='distance') + model = KNeighborsRegressor(algorithm="brute", weights="distance") model.fit(X[:13], y[:13]) - onx = to_onnx(model, X[:1], - options={id(model): {'optim': 'cdist'}}, - target_opset=TARGET_OPSET) + onx = to_onnx( + model, + X[:1], + options={id(model): {"optim": "cdist"}}, + target_opset=TARGET_OPSET, + ) dump_data_and_model( X.astype(numpy.float32)[:7], - model, onx, - basename="SklearnRadiusNeighborsRegressorMReg") + model, + onx, + basename="SklearnRadiusNeighborsRegressorMReg", + ) dump_data_and_model( (X + 0.1).astype(numpy.float32)[:7], - model, onx, - basename="SklearnRadiusNeighborsRegressorMReg") + model, + onx, + basename="SklearnRadiusNeighborsRegressorMReg", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") - @unittest.skipIf(TARGET_OPSET < 11, - reason="needs higher target_opset") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) + @unittest.skipIf(TARGET_OPSET < 11, reason="needs higher target_opset") @ignore_warnings(category=DeprecationWarning) def test_model_knn_iris_classifier_multi_reg2_weight(self): iris = datasets.load_iris() @@ -914,16 +1038,21 @@ def test_model_knn_iris_classifier_multi_reg2_weight(self): y = iris.target.astype(numpy.int64) y = numpy.vstack([(y + 1) % 2, y % 2]).T model = KNeighborsClassifier( - algorithm='brute', weights='distance', n_neighbors=7) + algorithm="brute", weights="distance", n_neighbors=7 + ) model.fit(X[:13], y[:13]) - onx = to_onnx(model, X[:1], - options={id(model): {'optim': 'cdist', - 'zipmap': False}}, - target_opset=TARGET_OPSET) + onx = to_onnx( + model, + X[:1], + options={id(model): {"optim": "cdist", "zipmap": False}}, + target_opset=TARGET_OPSET, + ) dump_data_and_model( X.astype(numpy.float32)[:11], - model, onx, - basename="SklearnKNeighborsClassifierMReg2-Out0") + model, + onx, + basename="SklearnKNeighborsClassifierMReg2-Out0", + ) @unittest.skipIf(dont_test_radius(), reason="not available") @ignore_warnings(category=DeprecationWarning) @@ -932,23 +1061,25 @@ def test_model_knn_iris_classifier_multi_reg2_weight_radius(self): X = iris.data.astype(numpy.float32) y = iris.target.astype(numpy.int64) y = numpy.vstack([(y + 1) % 2, y % 2]).T - model = RadiusNeighborsClassifier( - algorithm='brute', weights='distance') + model = RadiusNeighborsClassifier(algorithm="brute", weights="distance") model.fit(X[:13], y[:13]) - onx = to_onnx(model, X[:1], - options={id(model): {'optim': 'cdist', - 'zipmap': False}}, - target_opset=TARGET_OPSET) + onx = to_onnx( + model, + X[:1], + options={id(model): {"optim": "cdist", "zipmap": False}}, + target_opset=TARGET_OPSET, + ) dump_data_and_model( X.astype(numpy.float32)[:11], - model, onx, - basename="SklearnRadiusNeighborsClassifierMReg2-Out0") + model, + onx, + basename="SklearnRadiusNeighborsClassifierMReg2-Out0", + ) @unittest.skipIf( - pv.Version(ort_version) < pv.Version("0.5.0"), - reason="not available") - @unittest.skipIf(TARGET_OPSET < 11, - reason="needs higher target_opset") + pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available" + ) + @unittest.skipIf(TARGET_OPSET < 11, reason="needs higher target_opset") @ignore_warnings(category=DeprecationWarning) def test_model_knn_iris_classifier_multi_reg3_weight(self): iris = datasets.load_iris() @@ -956,17 +1087,21 @@ def test_model_knn_iris_classifier_multi_reg3_weight(self): y = iris.target.astype(numpy.int64) y = numpy.vstack([y % 2, y % 2, (y + 1) % 2]).T model = KNeighborsClassifier( - algorithm='brute', weights='distance', - n_neighbors=7) + algorithm="brute", weights="distance", n_neighbors=7 + ) model.fit(X[:13], y[:13]) - onx = to_onnx(model, X[:1], - options={id(model): {'optim': 'cdist', - 'zipmap': False}}, - target_opset=TARGET_OPSET) + onx = to_onnx( + model, + X[:1], + options={id(model): {"optim": "cdist", "zipmap": False}}, + target_opset=TARGET_OPSET, + ) dump_data_and_model( X.astype(numpy.float32)[:11], - model, onx, - basename="SklearnKNeighborsClassifierMReg3-Out0") + model, + onx, + basename="SklearnKNeighborsClassifierMReg3-Out0", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_normalizer_converter.py b/tests/test_sklearn_normalizer_converter.py index 9559871f3..5975da08b 100644 --- a/tests/test_sklearn_normalizer_converter.py +++ b/tests/test_sklearn_normalizer_converter.py @@ -8,7 +8,10 @@ from sklearn.preprocessing import Normalizer from skl2onnx import convert_sklearn from skl2onnx.common.data_types import ( - Int64TensorType, FloatTensorType, DoubleTensorType) + Int64TensorType, + FloatTensorType, + DoubleTensorType, +) from test_utils import dump_data_and_model, TARGET_OPSET @@ -18,9 +21,11 @@ def test_model_normalizer(self): x = numpy.random.randn(10, 1).astype(numpy.int64) model.fit(x) model_onnx = convert_sklearn( - model, "scikit-learn normalizer", + model, + "scikit-learn normalizer", [("input", Int64TensorType([None, 1]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) self.assertTrue(len(model_onnx.graph.node) == 1) @@ -29,88 +34,112 @@ def test_model_normalizer_blackop(self): x = numpy.random.randn(10, 3).astype(numpy.float32) model.fit(x) model_onnx = convert_sklearn( - model, "scikit-learn normalizer", + model, + "scikit-learn normalizer", [("input", FloatTensorType([None, 3]))], target_opset=TARGET_OPSET, - black_op={"Normalizer"}) + black_op={"Normalizer"}, + ) self.assertNotIn('op_type: "Normalizer', str(model_onnx)) dump_data_and_model( numpy.array([[1, -1, 3], [3, 1, 2]], dtype=numpy.float32), - model, model_onnx, - basename="SklearnNormalizerL1BlackOp-SkipDim1") + model, + model_onnx, + basename="SklearnNormalizerL1BlackOp-SkipDim1", + ) def test_model_normalizer_float_l1(self): model = Normalizer(norm="l1") x = numpy.random.randn(10, 3).astype(numpy.float32) model.fit(x) model_onnx = convert_sklearn( - model, "scikit-learn normalizer", + model, + "scikit-learn normalizer", [("input", FloatTensorType([None, 3]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) self.assertTrue(len(model_onnx.graph.node) == 1) dump_data_and_model( numpy.array([[1, -1, 3], [3, 1, 2]], dtype=numpy.float32), - model, model_onnx, - basename="SklearnNormalizerL1-SkipDim1") + model, + model_onnx, + basename="SklearnNormalizerL1-SkipDim1", + ) def test_model_normalizer_float_l2(self): model = Normalizer(norm="l2") x = numpy.random.randn(10, 3).astype(numpy.float32) model.fit(x) model_onnx = convert_sklearn( - model, "scikit-learn normalizer", + model, + "scikit-learn normalizer", [("input", FloatTensorType([None, 3]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) self.assertTrue(len(model_onnx.graph.node) == 1) dump_data_and_model( numpy.array([[1, -1, 3], [3, 1, 2]], dtype=numpy.float32), - model, model_onnx, - basename="SklearnNormalizerL2-SkipDim1") + model, + model_onnx, + basename="SklearnNormalizerL2-SkipDim1", + ) def test_model_normalizer_double_l1(self): model = Normalizer(norm="l1") x = numpy.random.randn(10, 3).astype(numpy.float64) model.fit(x) model_onnx = convert_sklearn( - model, "scikit-learn normalizer", + model, + "scikit-learn normalizer", [("input", DoubleTensorType([None, 3]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( numpy.array([[1, -1, 3], [3, 1, 2]], dtype=numpy.float64), - model, model_onnx, - basename="SklearnNormalizerL1Double-SkipDim1") + model, + model_onnx, + basename="SklearnNormalizerL1Double-SkipDim1", + ) def test_model_normalizer_double_l2(self): model = Normalizer(norm="l2") x = numpy.random.randn(10, 3).astype(numpy.float64) model.fit(x) model_onnx = convert_sklearn( - model, "scikit-learn normalizer", + model, + "scikit-learn normalizer", [("input", DoubleTensorType([None, 3]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( numpy.array([[1, -1, 3], [3, 1, 2]], dtype=numpy.float64), - model, model_onnx, - basename="SklearnNormalizerL2Double-SkipDim1") + model, + model_onnx, + basename="SklearnNormalizerL2Double-SkipDim1", + ) def test_model_normalizer_float_noshape(self): model = Normalizer(norm="l2") x = numpy.random.randn(10, 3).astype(numpy.float32) model.fit(x) model_onnx = convert_sklearn( - model, "scikit-learn normalizer", + model, + "scikit-learn normalizer", [("input", FloatTensorType([]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) self.assertTrue(len(model_onnx.graph.node) == 1) dump_data_and_model( numpy.array([[1, -1, 3], [3, 1, 2]], dtype=numpy.float32), - model, model_onnx, - basename="SklearnNormalizerL2NoShape-SkipDim1") + model, + model_onnx, + basename="SklearnNormalizerL2NoShape-SkipDim1", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_one_hot_encoder_converter.py b/tests/test_sklearn_one_hot_encoder_converter.py index 36ac8a191..60d5fd36d 100644 --- a/tests/test_sklearn_one_hot_encoder_converter.py +++ b/tests/test_sklearn_one_hot_encoder_converter.py @@ -20,84 +20,102 @@ def one_hot_encoder_supports_string(): # pv.Version does not work with development versions - vers = '.'.join(sklearn_version.split('.')[:2]) + vers = ".".join(sklearn_version.split(".")[:2]) return pv.Version(vers) >= pv.Version("0.20.0") def one_hot_encoder_supports_drop(): # pv.Version does not work with development versions - vers = '.'.join(sklearn_version.split('.')[:2]) + vers = ".".join(sklearn_version.split(".")[:2]) return pv.Version(vers) >= pv.Version("0.21.0") class TestSklearnOneHotEncoderConverter(unittest.TestCase): - @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"), - reason="issues with shapes") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes" + ) @unittest.skipIf( not one_hot_encoder_supports_string(), - reason="OneHotEncoder did not have categories_ before 0.20") + reason="OneHotEncoder did not have categories_ before 0.20", + ) def test_model_one_hot_encoder(self): - model = OneHotEncoder(categories='auto') - data = numpy.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], - dtype=numpy.int64) + model = OneHotEncoder(categories="auto") + data = numpy.array( + [[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=numpy.int64 + ) model.fit(data) model_onnx = convert_sklearn( - model, "scikit-learn one-hot encoder", + model, + "scikit-learn one-hot encoder", [("input", Int64TensorType([None, 3]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data, model, model_onnx, - basename="SklearnOneHotEncoderInt64-SkipDim1") + data, model, model_onnx, basename="SklearnOneHotEncoderInt64-SkipDim1" + ) - @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"), - reason="issues with shapes") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes" + ) @unittest.skipIf( not one_hot_encoder_supports_string(), - reason="OneHotEncoder did not have categories_ before 0.20") + reason="OneHotEncoder did not have categories_ before 0.20", + ) def test_model_one_hot_encoder_int32(self): - model = OneHotEncoder(categories='auto') - data = numpy.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], - dtype=numpy.int32) + model = OneHotEncoder(categories="auto") + data = numpy.array( + [[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=numpy.int32 + ) model.fit(data) model_onnx = convert_sklearn( - model, "scikit-learn one-hot encoder", + model, + "scikit-learn one-hot encoder", [("input", Int32TensorType([None, 3]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) str_model_onnx = str(model_onnx) assert "int64_data" in str_model_onnx self.assertTrue(model_onnx is not None) dump_data_and_model( - data, model, model_onnx, - basename="SklearnOneHotEncoderInt32-SkipDim1") + data, model, model_onnx, basename="SklearnOneHotEncoderInt32-SkipDim1" + ) - @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"), - reason="issues with shapes") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes" + ) @unittest.skipIf( not one_hot_encoder_supports_string(), - reason="OneHotEncoder did not have categories_ before 0.20") + reason="OneHotEncoder did not have categories_ before 0.20", + ) def test_model_one_hot_encoder_int32_scaler(self): - model = make_pipeline(OneHotEncoder(categories='auto', sparse=False), - RobustScaler()) - data = numpy.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], - dtype=numpy.int32) + model = make_pipeline( + OneHotEncoder(categories="auto", sparse=False), RobustScaler() + ) + data = numpy.array( + [[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=numpy.int32 + ) model.fit(data) model_onnx = convert_sklearn( - model, "scikit-learn one-hot encoder", + model, + "scikit-learn one-hot encoder", [("input", Int32TensorType([None, 3]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) str_model_onnx = str(model_onnx) assert "int64_data" in str_model_onnx self.assertTrue(model_onnx is not None) dump_data_and_model( - data, model, model_onnx, - basename="SklearnOneHotEncoderInt32Scaler-SkipDim1") + data, model, model_onnx, basename="SklearnOneHotEncoderInt32Scaler-SkipDim1" + ) - @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"), - reason="issues with shapes") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes" + ) @unittest.skipIf( not one_hot_encoder_supports_drop(), - reason="OneHotEncoder does not support drop in scikit versions < 0.21") + reason="OneHotEncoder does not support drop in scikit versions < 0.21", + ) def test_one_hot_encoder_mixed_string_int_drop(self): data = [ ["c0.4", "c0.2", 3], @@ -108,124 +126,147 @@ def test_one_hot_encoder_mixed_string_int_drop(self): ["c0.2", "c2.2", 1], ] test = [["c0.2", "c2.2", 1]] - model = OneHotEncoder(categories="auto", drop=['c0.4', 'c0.2', 3]) + model = OneHotEncoder(categories="auto", drop=["c0.4", "c0.2", 3]) model.fit(data) inputs = [ ("input1", StringTensorType([None, 2])), ("input2", Int64TensorType([None, 1])), ] model_onnx = convert_sklearn( - model, "one-hot encoder", inputs, target_opset=TARGET_OPSET) + model, "one-hot encoder", inputs, target_opset=TARGET_OPSET + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - test, model, model_onnx, verbose=False, - basename="SklearnOneHotEncoderMixedStringIntDrop") + test, + model, + model_onnx, + verbose=False, + basename="SklearnOneHotEncoderMixedStringIntDrop", + ) - @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"), - reason="issues with shapes") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes" + ) @unittest.skipIf( not one_hot_encoder_supports_string(), - reason="OneHotEncoder does not support strings in 0.19") + reason="OneHotEncoder does not support strings in 0.19", + ) def test_one_hot_encoder_onecat(self): data = [["cat"], ["cat"]] model = OneHotEncoder(categories="auto") model.fit(data) inputs = [("input1", StringTensorType([None, 1]))] - model_onnx = convert_sklearn(model, "one-hot encoder one string cat", - inputs, target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, "one-hot encoder one string cat", inputs, target_opset=TARGET_OPSET + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data, model, model_onnx, - basename="SklearnOneHotEncoderOneStringCat") + data, model, model_onnx, basename="SklearnOneHotEncoderOneStringCat" + ) - @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"), - reason="issues with shapes") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes" + ) @unittest.skipIf( not one_hot_encoder_supports_string(), - reason="OneHotEncoder does not support strings in 0.19") + reason="OneHotEncoder does not support strings in 0.19", + ) def test_one_hot_encoder_twocats(self): data = [["cat2"], ["cat1"]] model = OneHotEncoder(categories="auto") model.fit(data) inputs = [("input1", StringTensorType([None, 1]))] - model_onnx = convert_sklearn(model, "one-hot encoder two string cats", - inputs, target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, "one-hot encoder two string cats", inputs, target_opset=TARGET_OPSET + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data, model, model_onnx, - basename="SklearnOneHotEncoderTwoStringCat") + data, model, model_onnx, basename="SklearnOneHotEncoderTwoStringCat" + ) - @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"), - reason="issues with shapes") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes" + ) @unittest.skipIf( not one_hot_encoder_supports_drop(), - reason="OneHotEncoder does not support drop in scikit versions < 0.21") + reason="OneHotEncoder does not support drop in scikit versions < 0.21", + ) def test_one_hot_encoder_string_drop_first(self): - data = [['Male', 'First'], ['Female', 'First'], ['Female', 'Second']] - test_data = [['Male', 'Second']] - model = OneHotEncoder(drop='first', - categories='auto') + data = [["Male", "First"], ["Female", "First"], ["Female", "Second"]] + test_data = [["Male", "Second"]] + model = OneHotEncoder(drop="first", categories="auto") model.fit(data) inputs = [ ("input1", StringTensorType([None, 1])), ("input2", StringTensorType([None, 1])), ] model_onnx = convert_sklearn( - model, "one-hot encoder", inputs, target_opset=TARGET_OPSET) + model, "one-hot encoder", inputs, target_opset=TARGET_OPSET + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - test_data, model, model_onnx, - basename="SklearnOneHotEncoderStringDropFirst") + test_data, model, model_onnx, basename="SklearnOneHotEncoderStringDropFirst" + ) - @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"), - reason="issues with shapes") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes" + ) @unittest.skipIf( not one_hot_encoder_supports_string(), - reason="OneHotEncoder does not support this in 0.19") + reason="OneHotEncoder does not support this in 0.19", + ) def test_model_one_hot_encoder_list_sparse(self): - model = OneHotEncoder(categories=[[0, 1, 4, 5], - [1, 2, 3, 5], - [0, 3, 4, 6]], - sparse=True) - data = numpy.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], - dtype=numpy.int64) + model = OneHotEncoder( + categories=[[0, 1, 4, 5], [1, 2, 3, 5], [0, 3, 4, 6]], sparse=True + ) + data = numpy.array( + [[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=numpy.int64 + ) model.fit(data) model_onnx = convert_sklearn( - model, "scikit-learn one-hot encoder", + model, + "scikit-learn one-hot encoder", [("input1", Int64TensorType([None, 3]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data, model, model_onnx, - basename="SklearnOneHotEncoderCatSparse-SkipDim1") + data, model, model_onnx, basename="SklearnOneHotEncoderCatSparse-SkipDim1" + ) - @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"), - reason="issues with shapes") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes" + ) @unittest.skipIf( not one_hot_encoder_supports_string(), - reason="OneHotEncoder does not support this in 0.19") + reason="OneHotEncoder does not support this in 0.19", + ) def test_model_one_hot_encoder_list_dense(self): - model = OneHotEncoder(categories=[[0, 1, 4, 5], - [1, 2, 3, 5], - [0, 3, 4, 6]], - sparse=False) - data = numpy.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], - dtype=numpy.int64) + model = OneHotEncoder( + categories=[[0, 1, 4, 5], [1, 2, 3, 5], [0, 3, 4, 6]], sparse=False + ) + data = numpy.array( + [[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=numpy.int64 + ) model.fit(data) model_onnx = convert_sklearn( - model, "scikit-learn one-hot encoder", + model, + "scikit-learn one-hot encoder", [("input", Int64TensorType([None, 3]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data, model, model_onnx, - basename="SklearnOneHotEncoderCatDense-SkipDim1") + data, model, model_onnx, basename="SklearnOneHotEncoderCatDense-SkipDim1" + ) - @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"), - reason="issues with shapes") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes" + ) @unittest.skipIf( not one_hot_encoder_supports_drop(), - reason="OneHotEncoder does not support drop in scikit versions < 0.21") + reason="OneHotEncoder does not support drop in scikit versions < 0.21", + ) def test_one_hot_encoder_int_drop(self): data = [ [1, 2, 3], @@ -236,25 +277,26 @@ def test_one_hot_encoder_int_drop(self): [0, 3, 3], ] test = numpy.array([[2, 2, 1], [4, 2, 1]], dtype=numpy.int64) - model = OneHotEncoder(categories="auto", drop=[0, 1, 3], - dtype=numpy.float32) + model = OneHotEncoder(categories="auto", drop=[0, 1, 3], dtype=numpy.float32) model.fit(data) inputs = [ ("input1", Int64TensorType([None, 3])), ] model_onnx = convert_sklearn( - model, "one-hot encoder", inputs, - target_opset=TARGET_OPSET) + model, "one-hot encoder", inputs, target_opset=TARGET_OPSET + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - test, model, model_onnx, - basename="SklearnOneHotEncoderIntDrop") + test, model, model_onnx, basename="SklearnOneHotEncoderIntDrop" + ) - @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"), - reason="issues with shapes") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes" + ) @unittest.skipIf( not one_hot_encoder_supports_drop(), - reason="OneHotEncoder does not support drop in scikit versions < 0.21") + reason="OneHotEncoder does not support drop in scikit versions < 0.21", + ) def test_one_hot_encoder_int_drop_first(self): data = [ [1, 2, 3], @@ -265,37 +307,40 @@ def test_one_hot_encoder_int_drop_first(self): [0, 3, 3], ] test = numpy.array([[2, 2, 1], [1, 3, 3]], dtype=numpy.int64) - model = OneHotEncoder(categories="auto", drop='first', - dtype=numpy.int64) + model = OneHotEncoder(categories="auto", drop="first", dtype=numpy.int64) model.fit(data) inputs = [ ("input1", Int64TensorType([None, 3])), ] model_onnx = convert_sklearn( - model, "one-hot encoder", inputs, target_opset=TARGET_OPSET) + model, "one-hot encoder", inputs, target_opset=TARGET_OPSET + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - test, model, model_onnx, - basename="SklearnOneHotEncoderIntDropFirst") + test, model, model_onnx, basename="SklearnOneHotEncoderIntDropFirst" + ) - @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"), - reason="issues with shapes") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes" + ) @unittest.skipIf( not one_hot_encoder_supports_drop(), - reason="OneHotEncoder does not support drop in scikit versions < 0.21") + reason="OneHotEncoder does not support drop in scikit versions < 0.21", + ) def test_one_hot_encoder_string_drop_first_2(self): - data = [['Male', 'First'], ['Female', 'First'], ['Female', 'Second']] - model = OneHotEncoder(drop='first') + data = [["Male", "First"], ["Female", "First"], ["Female", "Second"]] + model = OneHotEncoder(drop="first") model.fit(data) inputs = [ ("input", StringTensorType([None, 2])), ] model_onnx = convert_sklearn( - model, "one-hot encoder", inputs, target_opset=TARGET_OPSET) + model, "one-hot encoder", inputs, target_opset=TARGET_OPSET + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data, model, model_onnx, - basename="SklearnOneHotEncoderStringDropFirst2") + data, model, model_onnx, basename="SklearnOneHotEncoderStringDropFirst2" + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_one_vs_one_classifier_converter.py b/tests/test_sklearn_one_vs_one_classifier_converter.py index c2c19081b..2cebe87cf 100644 --- a/tests/test_sklearn_one_vs_one_classifier_converter.py +++ b/tests/test_sklearn_one_vs_one_classifier_converter.py @@ -12,113 +12,127 @@ from sklearn.linear_model import LogisticRegression from sklearn.svm import LinearSVC from skl2onnx import convert_sklearn -from skl2onnx.common.data_types import ( - DoubleTensorType, - FloatTensorType) +from skl2onnx.common.data_types import DoubleTensorType, FloatTensorType from test_utils import TARGET_OPSET warnings_to_skip = (DeprecationWarning, FutureWarning, ConvergenceWarning) -ort_version = '.'.join(ort_version.split('.')[:2]) +ort_version = ".".join(ort_version.split(".")[:2]) class TestOneVsOneClassifierConverter(unittest.TestCase): - def test_one_vs_one_classifier_converter_linearsvc(self): X, y = load_iris(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.33, shuffle=True, random_state=0) - model = OneVsOneClassifier(LinearSVC(random_state=0)).fit( - X_train, y_train) + X, y, test_size=0.33, shuffle=True, random_state=0 + ) + model = OneVsOneClassifier(LinearSVC(random_state=0)).fit(X_train, y_train) exp_label = model.predict(X_test[:10]) exp_prob = model.decision_function(X_test[:10]) model_onnx = convert_sklearn( - model, "scikit-learn OneVsOne Classifier", + model, + "scikit-learn OneVsOne Classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET, options={'zipmap': False}) + target_opset=TARGET_OPSET, + options={"zipmap": False}, + ) XI = X_test[:10].astype(np.float32) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'input': XI}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"input": XI}) assert_almost_equal(exp_label.ravel(), got[0].ravel()) assert_almost_equal(exp_prob, got[1]) def test_one_vs_one_classifier_converter_logisticregression(self): X, y = load_iris(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.33, shuffle=True, random_state=0) + X, y, test_size=0.33, shuffle=True, random_state=0 + ) model = OneVsOneClassifier(LogisticRegression(random_state=0)).fit( - X_train, y_train) + X_train, y_train + ) exp_label = model.predict(X_test[:10]) exp_prob = model.decision_function(X_test[:10]) model_onnx = convert_sklearn( - model, "scikit-learn OneVsOne Classifier", + model, + "scikit-learn OneVsOne Classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET, options={'zipmap': False}) + target_opset=TARGET_OPSET, + options={"zipmap": False}, + ) XI = X_test[:10].astype(np.float32) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'input': XI}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"input": XI}) assert_almost_equal(exp_label.ravel(), got[0].ravel()) assert_almost_equal(exp_prob, got[1]) def test_one_vs_one_classifier_converter_logisticregression_double(self): X, y = load_iris(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.33, shuffle=True, random_state=0) + X, y, test_size=0.33, shuffle=True, random_state=0 + ) model = OneVsOneClassifier(LogisticRegression(random_state=0)).fit( - X_train, y_train) + X_train, y_train + ) exp_label = model.predict(X_test[:10]) exp_prob = model.decision_function(X_test[:10]) model_onnx = convert_sklearn( - model, "scikit-learn OneVsOne Classifier", + model, + "scikit-learn OneVsOne Classifier", [("input", DoubleTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET, options={'zipmap': False}) + target_opset=TARGET_OPSET, + options={"zipmap": False}, + ) XI = X_test[:10].astype(np.float64) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'input': XI}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"input": XI}) assert_almost_equal(exp_label.ravel(), got[0].ravel()) assert_almost_equal(exp_prob, got[1]) def test_one_vs_one_classifier_converter_decisiontree(self): X, y = load_iris(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.33, shuffle=True, random_state=0) + X, y, test_size=0.33, shuffle=True, random_state=0 + ) model = OneVsOneClassifier(DecisionTreeClassifier(max_depth=3)).fit( - X_train, y_train) + X_train, y_train + ) limit = 10 exp_label = model.predict(X_test[:limit]) exp_prob = model.decision_function(X_test[:limit]) model_onnx = convert_sklearn( - model, "scikit-learn OneVsOne Classifier", + model, + "scikit-learn OneVsOne Classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET, options={'zipmap': False}) + target_opset=TARGET_OPSET, + options={"zipmap": False}, + ) XI = X_test[:limit].astype(np.float32) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'input': XI}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"input": XI}) assert_almost_equal(exp_label.ravel(), got[0].ravel()) assert_almost_equal(exp_prob, got[1]) if __name__ == "__main__": - # TestOneVsOneClassifierConverter().test_one_vs_one_classifier_converter_logisticregression() unittest.main() diff --git a/tests/test_sklearn_one_vs_rest_classifier_converter.py b/tests/test_sklearn_one_vs_rest_classifier_converter.py index 3032ae8d4..10c6bafbd 100644 --- a/tests/test_sklearn_one_vs_rest_classifier_converter.py +++ b/tests/test_sklearn_one_vs_rest_classifier_converter.py @@ -5,8 +5,7 @@ import numpy as np from numpy.testing import assert_almost_equal from onnxruntime import InferenceSession, __version__ as ort_version -from sklearn.ensemble import ( - GradientBoostingClassifier, GradientBoostingRegressor) +from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor from sklearn.linear_model import LogisticRegression, LinearRegression from sklearn.multiclass import OneVsRestClassifier from sklearn.neural_network import MLPClassifier, MLPRegressor @@ -15,25 +14,25 @@ from sklearn.ensemble import RandomForestClassifier from sklearn.exceptions import ConvergenceWarning from sklearn.svm import LinearSVC + try: from sklearn.utils._testing import ignore_warnings except ImportError: from sklearn.utils.testing import ignore_warnings from skl2onnx import convert_sklearn -from skl2onnx.common.data_types import ( - FloatTensorType, - Int64TensorType) +from skl2onnx.common.data_types import FloatTensorType, Int64TensorType from test_utils import ( dump_data_and_model, dump_multiple_classification, fit_classification_model, fit_multilabel_classification_model, - TARGET_OPSET) + TARGET_OPSET, +) warnings_to_skip = (DeprecationWarning, FutureWarning, ConvergenceWarning) -ort_version = '.'.join(ort_version.split('.')[:2]) +ort_version = ".".join(ort_version.split(".")[:2]) class TestOneVsRestClassifierConverter(unittest.TestCase): @@ -41,35 +40,32 @@ class TestOneVsRestClassifierConverter(unittest.TestCase): @ignore_warnings(category=warnings_to_skip) def test_ovr_linear_svc(self): model = OneVsRestClassifier(LinearSVC()) - dump_multiple_classification( - model, target_opset=TARGET_OPSET, verbose=False) + dump_multiple_classification(model, target_opset=TARGET_OPSET, verbose=False) @ignore_warnings(category=warnings_to_skip) def test_ovr_logistic_regression(self): model = OneVsRestClassifier(LogisticRegression()) - dump_multiple_classification( - model, target_opset=TARGET_OPSET) + dump_multiple_classification(model, target_opset=TARGET_OPSET) @unittest.skipIf( - pv.Version(ort_version) <= pv.Version('1.4.0'), - reason="onnxruntime too old") + pv.Version(ort_version) <= pv.Version("1.4.0"), reason="onnxruntime too old" + ) @ignore_warnings(category=warnings_to_skip) def test_ovr_rf(self): - model = OneVsRestClassifier( - RandomForestClassifier(n_estimators=2, max_depth=2)) - model, X = fit_classification_model( - model, 3, is_int=True, n_features=3) + model = OneVsRestClassifier(RandomForestClassifier(n_estimators=2, max_depth=2)) + model, X = fit_classification_model(model, 3, is_int=True, n_features=3) model_onnx = convert_sklearn( - model, initial_types=[ - ('input', Int64TensorType([None, X.shape[1]]))], + model, + initial_types=[("input", Int64TensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET, - options={id(model): {'zipmap': False}}) + options={id(model): {"zipmap": False}}, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) XI = X.astype(np.int64) - got = sess.run(None, {'input': XI}) + got = sess.run(None, {"input": XI}) exp_label = model.predict(XI) exp_proba = model.predict_proba(XI) assert_almost_equal(exp_proba, got[1], decimal=5) @@ -85,8 +81,8 @@ def test_ovr_rf(self): assert_almost_equal(exp_label, got[0]) @unittest.skipIf( - pv.Version(ort_version) <= pv.Version('1.3.0'), - reason="onnxruntime too old") + pv.Version(ort_version) <= pv.Version("1.3.0"), reason="onnxruntime too old" + ) @ignore_warnings(category=warnings_to_skip) def test_ovr_rf_multilabel_float(self): for opset in [12, TARGET_OPSET]: @@ -94,20 +90,26 @@ def test_ovr_rf_multilabel_float(self): continue with self.subTest(opset=opset): model = OneVsRestClassifier( - RandomForestClassifier(n_estimators=2, max_depth=3)) + RandomForestClassifier(n_estimators=2, max_depth=3) + ) model, X = fit_multilabel_classification_model( - model, 3, is_int=False, n_features=5) + model, 3, is_int=False, n_features=5 + ) model_onnx = convert_sklearn( - model, initial_types=[ - ('input', FloatTensorType([None, X.shape[1]]))], - target_opset=opset) + model, + initial_types=[("input", FloatTensorType([None, X.shape[1]]))], + target_opset=opset, + ) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnOVRRFMultiLabelFloat%d" % opset) + X.astype(np.float32), + model, + model_onnx, + basename="SklearnOVRRFMultiLabelFloat%d" % opset, + ) @unittest.skipIf( - pv.Version(ort_version) <= pv.Version('1.3.0'), - reason="onnxruntime too old") + pv.Version(ort_version) <= pv.Version("1.3.0"), reason="onnxruntime too old" + ) @ignore_warnings(category=warnings_to_skip) def test_ovr_rf_multilabel_float_11(self): for opset in [9, 10, 11]: @@ -115,21 +117,27 @@ def test_ovr_rf_multilabel_float_11(self): continue with self.subTest(opset=opset): model = OneVsRestClassifier( - RandomForestClassifier(n_estimators=2, max_depth=3)) + RandomForestClassifier(n_estimators=2, max_depth=3) + ) model, X = fit_multilabel_classification_model( - model, 3, is_int=False, n_features=5) + model, 3, is_int=False, n_features=5 + ) model_onnx = convert_sklearn( - model, initial_types=[ - ('input', FloatTensorType([None, X.shape[1]]))], - target_opset=opset) + model, + initial_types=[("input", FloatTensorType([None, X.shape[1]]))], + target_opset=opset, + ) self.assertNotIn('"Clip"', str(model_onnx)) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnOVRRFMultiLabelFloat%d" % opset) + X.astype(np.float32), + model, + model_onnx, + basename="SklearnOVRRFMultiLabelFloat%d" % opset, + ) @unittest.skipIf( - pv.Version(ort_version) <= pv.Version('1.3.0'), - reason="onnxruntime too old") + pv.Version(ort_version) <= pv.Version("1.3.0"), reason="onnxruntime too old" + ) @ignore_warnings(category=warnings_to_skip) def test_ovr_rf_multilabel_int(self): for opset in [12, TARGET_OPSET]: @@ -137,20 +145,26 @@ def test_ovr_rf_multilabel_int(self): continue with self.subTest(opset=opset): model = OneVsRestClassifier( - RandomForestClassifier(n_estimators=2, max_depth=3)) + RandomForestClassifier(n_estimators=2, max_depth=3) + ) model, X = fit_multilabel_classification_model( - model, 3, is_int=True, n_features=5) + model, 3, is_int=True, n_features=5 + ) model_onnx = convert_sklearn( - model, initial_types=[ - ('input', Int64TensorType([None, X.shape[1]]))], - target_opset=opset) + model, + initial_types=[("input", Int64TensorType([None, X.shape[1]]))], + target_opset=opset, + ) dump_data_and_model( - X.astype(np.int64), model, model_onnx, - basename="SklearnOVRRFMultiLabelInt64%d" % opset) + X.astype(np.int64), + model, + model_onnx, + basename="SklearnOVRRFMultiLabelInt64%d" % opset, + ) @unittest.skipIf( - pv.Version(ort_version) <= pv.Version('1.3.0'), - reason="onnxruntime too old") + pv.Version(ort_version) <= pv.Version("1.3.0"), reason="onnxruntime too old" + ) @ignore_warnings(category=warnings_to_skip) def test_ovr_rf_multilabel_int_11(self): for opset in [9, 10, 11]: @@ -158,26 +172,29 @@ def test_ovr_rf_multilabel_int_11(self): continue with self.subTest(opset=opset): model = OneVsRestClassifier( - RandomForestClassifier(n_estimators=2, max_depth=3)) + RandomForestClassifier(n_estimators=2, max_depth=3) + ) model, X = fit_multilabel_classification_model( - model, 3, is_int=True, n_features=5) + model, 3, is_int=True, n_features=5 + ) model_onnx = convert_sklearn( - model, initial_types=[ - ('input', Int64TensorType([None, X.shape[1]]))], - target_opset=opset) + model, + initial_types=[("input", Int64TensorType([None, X.shape[1]]))], + target_opset=opset, + ) self.assertNotIn('"Clip"', str(model_onnx)) dump_data_and_model( - X.astype(np.int64), model, model_onnx, - basename="SklearnOVRRFMultiLabelInt64%d" % opset) + X.astype(np.int64), + model, + model_onnx, + basename="SklearnOVRRFMultiLabelInt64%d" % opset, + ) @ignore_warnings(category=warnings_to_skip) def test_ovr_02(self): model = OneVsRestClassifier(LogisticRegression()) dump_multiple_classification( - model, - first_class=2, - suffix="F2", - target_opset=TARGET_OPSET + model, first_class=2, suffix="F2", target_opset=TARGET_OPSET ) @ignore_warnings(category=warnings_to_skip) @@ -188,37 +205,37 @@ def test_ovr_string(self): verbose=False, label_string=True, suffix="String", - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) @ignore_warnings(category=warnings_to_skip) def test_ovr_classification_float(self): model, X = fit_classification_model( - OneVsRestClassifier(LogisticRegression(solver='liblinear')), 3) + OneVsRestClassifier(LogisticRegression(solver="liblinear")), 3 + ) model_onnx = convert_sklearn( model, "ovr classification", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnOVRClassificationFloat") + X, model, model_onnx, basename="SklearnOVRClassificationFloat" + ) @ignore_warnings(category=warnings_to_skip) def test_ovr_classification_decision_function(self): model, X = fit_classification_model( - OneVsRestClassifier(LogisticRegression()), 4) - options = {id(model): {'raw_scores': True}} + OneVsRestClassifier(LogisticRegression()), 4 + ) + options = {id(model): {"raw_scores": True}} model_onnx = convert_sklearn( model, "ovr classification", [("input", FloatTensorType([None, X.shape[1]]))], options=options, - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( @@ -226,32 +243,37 @@ def test_ovr_classification_decision_function(self): model, model_onnx, basename="SklearnOVRClassificationDecisionFunction", - methods=['predict', 'decision_function']) + methods=["predict", "decision_function"], + ) if pv.Version(ort_version) < pv.Version("1.0.0"): return - options = {id(model): {'raw_scores': True, 'zipmap': False}} + options = {id(model): {"raw_scores": True, "zipmap": False}} model_onnx = convert_sklearn( - model, "ovr classification", + model, + "ovr classification", [("input", FloatTensorType([None, X.shape[1]]))], - options=options, target_opset=TARGET_OPSET) + options=options, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'input': X})[1] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"input": X})[1] dec = model.decision_function(X) assert_almost_equal(got, dec, decimal=4) @ignore_warnings(category=warnings_to_skip) def test_ovr_classification_decision_function_binary(self): model, X = fit_classification_model( - OneVsRestClassifier(LogisticRegression()), 2) - options = {id(model): {'raw_scores': True}} + OneVsRestClassifier(LogisticRegression()), 2 + ) + options = {id(model): {"raw_scores": True}} model_onnx = convert_sklearn( model, "ovr classification", [("input", FloatTensorType([None, X.shape[1]]))], options=options, - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( @@ -259,18 +281,22 @@ def test_ovr_classification_decision_function_binary(self): model, model_onnx, basename="SklearnOVRClassificationDecisionFunctionBinary", - methods=['predict', 'decision_function_binary']) + methods=["predict", "decision_function_binary"], + ) if pv.Version(ort_version) < pv.Version("1.0.0"): return - options = {id(model): {'raw_scores': True, 'zipmap': False}} + options = {id(model): {"raw_scores": True, "zipmap": False}} model_onnx = convert_sklearn( - model, "ovr classification", + model, + "ovr classification", [("input", FloatTensorType([None, X.shape[1]]))], - options=options, target_opset=TARGET_OPSET) + options=options, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'input': X})[1] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"input": X})[1] dec = model.decision_function(X) assert_almost_equal(got[:, 1], dec, decimal=4) assert_almost_equal(-got[:, 0], dec, decimal=4) @@ -278,135 +304,129 @@ def test_ovr_classification_decision_function_binary(self): @ignore_warnings(category=warnings_to_skip) def test_ovr_classification_int(self): model, X = fit_classification_model( - OneVsRestClassifier(LogisticRegression()), 5, is_int=True) + OneVsRestClassifier(LogisticRegression()), 5, is_int=True + ) model_onnx = convert_sklearn( model, "ovr classification", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnOVRClassificationInt") + X, model, model_onnx, basename="SklearnOVRClassificationInt" + ) @ignore_warnings(category=warnings_to_skip) def test_ovr_classification_float_binary(self): model, X = fit_classification_model( - OneVsRestClassifier(LogisticRegression()), 2) + OneVsRestClassifier(LogisticRegression()), 2 + ) model_onnx = convert_sklearn( model, "ovr classification", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnOVRClassificationFloatBin") + X, model, model_onnx, basename="SklearnOVRClassificationFloatBin" + ) @ignore_warnings(category=warnings_to_skip) def test_ovr_classification_float_binary_nozipmap(self): model, X = fit_classification_model( - OneVsRestClassifier(LogisticRegression()), 2) + OneVsRestClassifier(LogisticRegression()), 2 + ) model_onnx = convert_sklearn( - model, "ovr classification", + model, + "ovr classification", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET, - options={id(model): {'zipmap': False}}) + options={id(model): {"zipmap": False}}, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnOVRClassificationFloatBinNoZipMap") + X, model, model_onnx, basename="SklearnOVRClassificationFloatBinNoZipMap" + ) @ignore_warnings(category=warnings_to_skip) def test_ovr_classification_int_binary(self): model, X = fit_classification_model( - OneVsRestClassifier(LogisticRegression()), 2, is_int=True) + OneVsRestClassifier(LogisticRegression()), 2, is_int=True + ) model_onnx = convert_sklearn( model, "ovr classification", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnOVRClassificationIntBin") + X, model, model_onnx, basename="SklearnOVRClassificationIntBin" + ) @ignore_warnings(category=warnings_to_skip) def test_ovr_classification_float_mlp(self): - model, X = fit_classification_model( - OneVsRestClassifier(MLPClassifier()), 4) + model, X = fit_classification_model(OneVsRestClassifier(MLPClassifier()), 4) model_onnx = convert_sklearn( model, "ovr classification", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnOVRClassificationFloatMLP") + X, model, model_onnx, basename="SklearnOVRClassificationFloatMLP" + ) @ignore_warnings(category=warnings_to_skip) def test_ovr_classification_int_ensemble(self): model, X = fit_classification_model( - OneVsRestClassifier(GradientBoostingClassifier()), 5, is_int=True) + OneVsRestClassifier(GradientBoostingClassifier()), 5, is_int=True + ) model_onnx = convert_sklearn( model, "ovr classification", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnOVRClassificationIntEnsemble") + X, model, model_onnx, basename="SklearnOVRClassificationIntEnsemble" + ) @ignore_warnings(category=warnings_to_skip) def test_ovr_classification_float_binary_ensemble(self): model, X = fit_classification_model( - OneVsRestClassifier(GradientBoostingClassifier()), 2) + OneVsRestClassifier(GradientBoostingClassifier()), 2 + ) model_onnx = convert_sklearn( model, "ovr classification", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnOVRClassificationFloatBinEnsemble") + X, model, model_onnx, basename="SklearnOVRClassificationFloatBinEnsemble" + ) @ignore_warnings(category=warnings_to_skip) def test_ovr_classification_int_binary_mlp(self): model, X = fit_classification_model( - OneVsRestClassifier(MLPClassifier()), 2, is_int=True) + OneVsRestClassifier(MLPClassifier()), 2, is_int=True + ) model_onnx = convert_sklearn( model, "ovr classification", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnOVRClassificationIntBinMLP") + X, model, model_onnx, basename="SklearnOVRClassificationIntBinMLP" + ) @ignore_warnings(category=warnings_to_skip) def test_ovr_regression_float(self): @@ -416,94 +436,93 @@ def test_ovr_regression_float(self): check only probabilities.""" rs = 11 model, X = fit_classification_model( - OneVsRestClassifier( - LinearRegression()), 3, random_state=rs) + OneVsRestClassifier(LinearRegression()), 3, random_state=rs + ) model_onnx = convert_sklearn( model, "ovr regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X[:5], - model, - model_onnx, - basename="SklearnOVRRegressionFloat-Out0") + X[:5], model, model_onnx, basename="SklearnOVRRegressionFloat-Out0" + ) @ignore_warnings(category=warnings_to_skip) def test_ovr_regression_int(self): model, X = fit_classification_model( - OneVsRestClassifier(LinearRegression()), 10, is_int=True) + OneVsRestClassifier(LinearRegression()), 10, is_int=True + ) model_onnx = convert_sklearn( model, "ovr regression", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnOVRRegressionInt-Out0") + X, model, model_onnx, basename="SklearnOVRRegressionInt-Out0" + ) @ignore_warnings(category=warnings_to_skip) def test_ovr_regression_float_mlp(self): - model, X = fit_classification_model( - OneVsRestClassifier(MLPRegressor()), 5) + model, X = fit_classification_model(OneVsRestClassifier(MLPRegressor()), 5) model_onnx = convert_sklearn( model, "ovr regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnOVRRegressionFloatMLP-Out0") + X, model, model_onnx, basename="SklearnOVRRegressionFloatMLP-Out0" + ) @ignore_warnings(category=warnings_to_skip) def test_ovr_regression_int_ensemble(self): model, X = fit_classification_model( - OneVsRestClassifier(GradientBoostingRegressor()), 4, is_int=True) + OneVsRestClassifier(GradientBoostingRegressor()), 4, is_int=True + ) model_onnx = convert_sklearn( model, "ovr regression", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnOVRRegressionIntEnsemble-Out0") + X, model, model_onnx, basename="SklearnOVRRegressionIntEnsemble-Out0" + ) - @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.2.0"), - reason="fails to load the model") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.2.0"), reason="fails to load the model" + ) def test_ovr_raw_scores(self): X, y = make_classification( - n_classes=2, n_samples=100, random_state=42, - n_features=100, n_informative=7) + n_classes=2, n_samples=100, random_state=42, n_features=100, n_informative=7 + ) X_train, X_test, y_train, _ = train_test_split( - X, y, test_size=0.5, random_state=42) + X, y, test_size=0.5, random_state=42 + ) model = OneVsRestClassifier( - estimator=GradientBoostingClassifier(random_state=42)) + estimator=GradientBoostingClassifier(random_state=42) + ) model.fit(X_train, y_train) - options = {id(model): {'raw_scores': True, 'zipmap': False}} + options = {id(model): {"raw_scores": True, "zipmap": False}} onnx_model = convert_sklearn( - model, 'lr', - [('input', FloatTensorType([None, X_test.shape[1]]))], - options=options, target_opset=TARGET_OPSET) + model, + "lr", + [("input", FloatTensorType([None, X_test.shape[1]]))], + options=options, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - onnx_model.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, input_feed={'input': X_test.astype(np.float32)}) + onnx_model.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, input_feed={"input": X_test.astype(np.float32)}) exp = model.predict(X_test) assert_almost_equal(exp, res[0]) exp = model.decision_function(X_test) diff --git a/tests/test_sklearn_ordinal_encoder.py b/tests/test_sklearn_ordinal_encoder.py index 145c17cfe..33b818bd4 100644 --- a/tests/test_sklearn_ordinal_encoder.py +++ b/tests/test_sklearn_ordinal_encoder.py @@ -6,6 +6,7 @@ import numpy as np import onnxruntime from sklearn import __version__ as sklearn_version + try: from sklearn.preprocessing import OrdinalEncoder except ImportError: @@ -20,7 +21,7 @@ def ordinal_encoder_support(): # pv.Version does not work with development versions - vers = '.'.join(sklearn_version.split('.')[:2]) + vers = ".".join(sklearn_version.split(".")[:2]) if pv.Version(vers) < pv.Version("0.20.0"): return False if pv.Version(onnxruntime.__version__) < pv.Version("0.3.0"): @@ -31,25 +32,27 @@ def ordinal_encoder_support(): class TestSklearnOrdinalEncoderConverter(unittest.TestCase): @unittest.skipIf( not ordinal_encoder_support(), - reason="OrdinalEncoder was not available before 0.20") + reason="OrdinalEncoder was not available before 0.20", + ) def test_model_ordinal_encoder(self): model = OrdinalEncoder(dtype=np.int64) - data = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], - dtype=np.int64) + data = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=np.int64) model.fit(data) model_onnx = convert_sklearn( - model, "scikit-learn ordinal encoder", + model, + "scikit-learn ordinal encoder", [("input", Int64TensorType([None, 3]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data, model, model_onnx, - basename="SklearnOrdinalEncoderInt64-SkipDim1") + data, model, model_onnx, basename="SklearnOrdinalEncoderInt64-SkipDim1" + ) @unittest.skipIf( not ordinal_encoder_support(), - reason="OrdinalEncoder was not available before 0.20") + reason="OrdinalEncoder was not available before 0.20", + ) @unittest.skipIf(TARGET_OPSET < 9, reason="not available") def test_ordinal_encoder_mixed_string_int_drop(self): data = [ @@ -68,61 +71,65 @@ def test_ordinal_encoder_mixed_string_int_drop(self): ("input2", Int64TensorType([None, 1])), ] model_onnx = convert_sklearn( - model, "ordinal encoder", inputs, target_opset=TARGET_OPSET) + model, "ordinal encoder", inputs, target_opset=TARGET_OPSET + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - test, model, model_onnx, - basename="SklearnOrdinalEncoderMixedStringIntDrop") + test, model, model_onnx, basename="SklearnOrdinalEncoderMixedStringIntDrop" + ) @unittest.skipIf( not ordinal_encoder_support(), - reason="OrdinalEncoder was not available before 0.20") + reason="OrdinalEncoder was not available before 0.20", + ) def test_ordinal_encoder_onecat(self): data = [["cat"], ["cat"]] model = OrdinalEncoder(categories="auto") model.fit(data) inputs = [("input1", StringTensorType([None, 1]))] - model_onnx = convert_sklearn(model, "ordinal encoder one string cat", - inputs, target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, "ordinal encoder one string cat", inputs, target_opset=TARGET_OPSET + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data, model, model_onnx, - basename="SklearnOrdinalEncoderOneStringCat") + data, model, model_onnx, basename="SklearnOrdinalEncoderOneStringCat" + ) @unittest.skipIf( not ordinal_encoder_support(), - reason="OrdinalEncoder was not available before 0.20") + reason="OrdinalEncoder was not available before 0.20", + ) def test_ordinal_encoder_twocats(self): data = [["cat2"], ["cat1"]] model = OrdinalEncoder(categories="auto") model.fit(data) inputs = [("input1", StringTensorType([None, 1]))] - model_onnx = convert_sklearn(model, "ordinal encoder two string cats", - inputs, target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, "ordinal encoder two string cats", inputs, target_opset=TARGET_OPSET + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data, model, model_onnx, - basename="SklearnOrdinalEncoderTwoStringCat") + data, model, model_onnx, basename="SklearnOrdinalEncoderTwoStringCat" + ) @unittest.skipIf( not ordinal_encoder_support(), - reason="OrdinalEncoder was not available before 0.20") + reason="OrdinalEncoder was not available before 0.20", + ) def test_model_ordinal_encoder_cat_list(self): - model = OrdinalEncoder(categories=[[0, 1, 4, 5], - [1, 2, 3, 5], - [0, 3, 4, 6]]) - data = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], - dtype=np.int64) + model = OrdinalEncoder(categories=[[0, 1, 4, 5], [1, 2, 3, 5], [0, 3, 4, 6]]) + data = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=np.int64) model.fit(data) model_onnx = convert_sklearn( - model, "scikit-learn ordinal encoder", + model, + "scikit-learn ordinal encoder", [("input", Int64TensorType([None, 3]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data, model, model_onnx, - basename="SklearnOrdinalEncoderCatList") + data, model, model_onnx, basename="SklearnOrdinalEncoderCatList" + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_passive_aggressive_classifier_converter.py b/tests/test_sklearn_passive_aggressive_classifier_converter.py index 35d4aedd6..45874a842 100644 --- a/tests/test_sklearn_passive_aggressive_classifier_converter.py +++ b/tests/test_sklearn_passive_aggressive_classifier_converter.py @@ -5,78 +5,81 @@ from sklearn.linear_model import PassiveAggressiveClassifier from skl2onnx import convert_sklearn from skl2onnx.common.data_types import FloatTensorType, Int64TensorType -from test_utils import ( - dump_data_and_model, - fit_classification_model, - TARGET_OPSET -) +from test_utils import dump_data_and_model, fit_classification_model, TARGET_OPSET class TestPassiveAggressiveClassifierConverter(unittest.TestCase): - def test_model_passive_aggressive_classifier_binary_class(self): model, X = fit_classification_model( - PassiveAggressiveClassifier(random_state=42), 2) + PassiveAggressiveClassifier(random_state=42), 2 + ) model_onnx = convert_sklearn( model, "scikit-learn PassiveAggressiveClassifier binary", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( X, model, model_onnx, - basename="SklearnPassiveAggressiveClassifierBinary-Out0") + basename="SklearnPassiveAggressiveClassifierBinary-Out0", + ) def test_model_passive_aggressive_classifier_multi_class(self): model, X = fit_classification_model( - PassiveAggressiveClassifier(random_state=42), 5) + PassiveAggressiveClassifier(random_state=42), 5 + ) model_onnx = convert_sklearn( model, "scikit-learn PassiveAggressiveClassifier multi-class", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( X, model, model_onnx, - basename="SklearnPassiveAggressiveClassifierMulti-Out0") + basename="SklearnPassiveAggressiveClassifierMulti-Out0", + ) def test_model_passive_aggressive_classifier_binary_class_int(self): model, X = fit_classification_model( - PassiveAggressiveClassifier(random_state=42), 2, is_int=True) + PassiveAggressiveClassifier(random_state=42), 2, is_int=True + ) model_onnx = convert_sklearn( model, "scikit-learn PassiveAggressiveClassifier binary", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( X, model, model_onnx, - basename="SklearnPassiveAggressiveClassifierBinaryInt-Out0") + basename="SklearnPassiveAggressiveClassifierBinaryInt-Out0", + ) def test_model_passive_aggressive_classifier_multi_class_int(self): model, X = fit_classification_model( - PassiveAggressiveClassifier(random_state=42), 5, is_int=True) + PassiveAggressiveClassifier(random_state=42), 5, is_int=True + ) model_onnx = convert_sklearn( model, "scikit-learn PassiveAggressiveClassifier multi-class", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( X, model, model_onnx, - basename="SklearnPassiveAggressiveClassifierMultiInt-Out0") + basename="SklearnPassiveAggressiveClassifierMultiInt-Out0", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_pca_converter.py b/tests/test_sklearn_pca_converter.py index 8a1c9d588..4acfebf9f 100644 --- a/tests/test_sklearn_pca_converter.py +++ b/tests/test_sklearn_pca_converter.py @@ -14,7 +14,8 @@ def _fit_model_pca(model): data = load_diabetes() X_train, X_test, *_ = train_test_split( - data.data, data.target, test_size=0.2, random_state=42) + data.data, data.target, test_size=0.2, random_state=42 + ) model.fit(X_train) return model, X_test.astype(np.float32) @@ -24,81 +25,93 @@ def test_pca_default(self): model, X_test = _fit_model_pca(PCA(random_state=42)) model_onnx = convert_sklearn( model, - initial_types=[("input", - FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET) + initial_types=[("input", FloatTensorType([None, X_test.shape[1]]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnPCADefault") + dump_data_and_model(X_test, model, model_onnx, basename="SklearnPCADefault") def test_incrementalpca_default(self): model, X_test = _fit_model_pca(IncrementalPCA()) model_onnx = convert_sklearn( model, - initial_types=[("input", - FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET) + initial_types=[("input", FloatTensorType([None, X_test.shape[1]]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnIncrementalPCADefault") + X_test, model, model_onnx, basename="SklearnIncrementalPCADefault" + ) def test_pca_parameters_auto(self): - model, X_test = _fit_model_pca(PCA( - random_state=42, copy=False, tol=0.1, whiten=True, - n_components=0.9005263157894737, svd_solver="auto")) + model, X_test = _fit_model_pca( + PCA( + random_state=42, + copy=False, + tol=0.1, + whiten=True, + n_components=0.9005263157894737, + svd_solver="auto", + ) + ) model_onnx = convert_sklearn( model, - initial_types=[("input", - FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET) + initial_types=[("input", FloatTensorType([None, X_test.shape[1]]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnPCAParametersAuto") + X_test, model, model_onnx, basename="SklearnPCAParametersAuto" + ) def test_pca_parameters_arpack(self): - model, X_test = _fit_model_pca(PCA( - random_state=42, n_components=4, svd_solver='arpack')) + model, X_test = _fit_model_pca( + PCA(random_state=42, n_components=4, svd_solver="arpack") + ) model_onnx = convert_sklearn( model, - initial_types=[("input", - FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET) + initial_types=[("input", FloatTensorType([None, X_test.shape[1]]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnPCAParametersArpack") + X_test, model, model_onnx, basename="SklearnPCAParametersArpack" + ) def test_pca_parameters_full(self): - model, X_test = _fit_model_pca(PCA( - random_state=42, n_components=5, svd_solver='full', whiten=True)) + model, X_test = _fit_model_pca( + PCA(random_state=42, n_components=5, svd_solver="full", whiten=True) + ) model_onnx = convert_sklearn( model, - initial_types=[("input", - FloatTensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET) + initial_types=[("input", FloatTensorType([None, X_test.shape[1]]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnPCAParametersFull") + X_test, model, model_onnx, basename="SklearnPCAParametersFull" + ) def test_pca_default_int_randomised(self): data = load_digits() X_train, X_test, *_ = train_test_split( - data.data, data.target, test_size=0.2, random_state=42) - model = PCA(random_state=42, svd_solver='randomized', - iterated_power=3).fit(X_train) + data.data, data.target, test_size=0.2, random_state=42 + ) + model = PCA(random_state=42, svd_solver="randomized", iterated_power=3).fit( + X_train + ) model_onnx = convert_sklearn( model, - initial_types=[("input", - Int64TensorType([None, X_test.shape[1]]))], - target_opset=TARGET_OPSET) + initial_types=[("input", Int64TensorType([None, X_test.shape[1]]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X_test.astype(np.int64), model, model_onnx, - basename="SklearnPCADefaultIntRandomised") + X_test.astype(np.int64), + model, + model_onnx, + basename="SklearnPCADefaultIntRandomised", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_perceptron_converter.py b/tests/test_sklearn_perceptron_converter.py index dfd806bde..343e6822b 100644 --- a/tests/test_sklearn_perceptron_converter.py +++ b/tests/test_sklearn_perceptron_converter.py @@ -7,79 +7,74 @@ from sklearn.linear_model import Perceptron from skl2onnx import convert_sklearn from skl2onnx.common.data_types import FloatTensorType, Int64TensorType -from test_utils import ( - dump_data_and_model, - fit_classification_model, - TARGET_OPSET -) +from test_utils import dump_data_and_model, fit_classification_model, TARGET_OPSET class TestPerceptronClassifierConverter(unittest.TestCase): - def test_model_perceptron_binary_class(self): - model, X = fit_classification_model( - Perceptron(random_state=42), 2) + model, X = fit_classification_model(Perceptron(random_state=42), 2) model_onnx = convert_sklearn( model, "scikit-learn Perceptron binary classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(np.float32), model, model_onnx, - basename="SklearnPerceptronClassifierBinary-Out0") + basename="SklearnPerceptronClassifierBinary-Out0", + ) def test_model_perceptron_multi_class(self): - model, X = fit_classification_model( - Perceptron(random_state=42), 5) + model, X = fit_classification_model(Perceptron(random_state=42), 5) model_onnx = convert_sklearn( model, "scikit-learn Perceptron multi-class classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(np.float32), model, model_onnx, - basename="SklearnPerceptronClassifierMulti-Out0") + basename="SklearnPerceptronClassifierMulti-Out0", + ) def test_model_perceptron_binary_class_int(self): - model, X = fit_classification_model( - Perceptron(random_state=42), 2, is_int=True) + model, X = fit_classification_model(Perceptron(random_state=42), 2, is_int=True) model_onnx = convert_sklearn( model, "scikit-learn Perceptron binary classifier", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(np.int64), model, model_onnx, - basename="SklearnPerceptronClassifierBinaryInt-Out0") + basename="SklearnPerceptronClassifierBinaryInt-Out0", + ) def test_model_perceptron_multi_class_int(self): - model, X = fit_classification_model( - Perceptron(random_state=42), 5, is_int=True) + model, X = fit_classification_model(Perceptron(random_state=42), 5, is_int=True) model_onnx = convert_sklearn( model, "scikit-learn Perceptron multi-class classifier", [("input", Int64TensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET, - options={'zipmap': False}, + options={"zipmap": False}, ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(np.int64)[:10], model, model_onnx, - basename="SklearnPerceptronClassifierMultiInt-Out0") + basename="SklearnPerceptronClassifierMultiInt-Out0", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_pipeline.py b/tests/test_sklearn_pipeline.py index 732f69b3f..6c4747696 100644 --- a/tests/test_sklearn_pipeline.py +++ b/tests/test_sklearn_pipeline.py @@ -23,8 +23,7 @@ from sklearn.utils.testing import ignore_warnings try: from sklearn.compose import ColumnTransformer - from sklearn.compose import ( - make_column_transformer, make_column_selector) + from sklearn.compose import make_column_transformer, make_column_selector except ImportError: # not available in 0.19 ColumnTransformer = None @@ -40,8 +39,11 @@ from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline, FeatureUnion from sklearn.preprocessing import ( - OneHotEncoder, StandardScaler, MinMaxScaler, - MaxAbsScaler) + OneHotEncoder, + StandardScaler, + MinMaxScaler, + MaxAbsScaler, +) from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer from sklearn.ensemble import VotingClassifier, RandomForestClassifier from sklearn.naive_bayes import MultinomialNB @@ -54,15 +56,18 @@ ) from sklearn.multioutput import MultiOutputClassifier from test_utils import ( - dump_data_and_model, fit_classification_model, TARGET_OPSET, + dump_data_and_model, + fit_classification_model, + TARGET_OPSET, InferenceSessionEx as InferenceSession, - ReferenceEvaluatorEx) + ReferenceEvaluatorEx, +) from onnxruntime import __version__ as ort_version # pv.Version does not work with development versions -ort_version = ".".join(ort_version.split('.')[:2]) -skl_version = ".".join(skl_version.split('.')[:2]) +ort_version = ".".join(ort_version.split(".")[:2]) +skl_version = ".".join(skl_version.split(".")[:2]) def check_scikit_version(): @@ -85,32 +90,31 @@ def transform(self, inp): res = self.pipe.transform(x2) return res else: - raise TypeError("Unable to predict with type {0}".format( - type(inp))) + raise TypeError("Unable to predict with type {0}".format(type(inp))) class TestSklearnPipeline(unittest.TestCase): - @ignore_warnings(category=FutureWarning) def test_pipeline(self): - data = numpy.array([[0, 0], [0, 0], [1, 1], [1, 1]], - dtype=numpy.float32) + data = numpy.array([[0, 0], [0, 0], [1, 1], [1, 1]], dtype=numpy.float32) scaler = StandardScaler() scaler.fit(data) model = Pipeline([("scaler1", scaler), ("scaler2", scaler)]) - model_onnx = convert_sklearn(model, "pipeline", - [("input", FloatTensorType([None, 2]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "pipeline", + [("input", FloatTensorType([None, 2]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - dump_data_and_model(data, model, model_onnx, - basename="SklearnPipelineScaler") + dump_data_and_model(data, model, model_onnx, basename="SklearnPipelineScaler") @ignore_warnings(category=FutureWarning) def test_combine_inputs(self): data = numpy.array( - [[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]], - dtype=numpy.float32) + [[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]], dtype=numpy.float32 + ) scaler = StandardScaler() scaler.fit(data) model = Pipeline([("scaler1", scaler), ("scaler2", scaler)]) @@ -122,7 +126,8 @@ def test_combine_inputs(self): ("input1", FloatTensorType([None, 1])), ("input2", FloatTensorType([None, 1])), ], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(len(model_onnx.graph.node[-1].output) == 1) self.assertTrue(model_onnx is not None) data = { @@ -130,12 +135,15 @@ def test_combine_inputs(self): "input2": data[:, 1].reshape((-1, 1)), } dump_data_and_model( - data, PipeConcatenateInput(model), - model_onnx, basename="SklearnPipelineScaler11") + data, + PipeConcatenateInput(model), + model_onnx, + basename="SklearnPipelineScaler11", + ) @unittest.skipIf( - pv.Version(ort_version) <= pv.Version('0.4.0'), - reason="onnxruntime too old") + pv.Version(ort_version) <= pv.Version("0.4.0"), reason="onnxruntime too old" + ) @ignore_warnings(category=FutureWarning) def test_combine_inputs_union_in_pipeline(self): from sklearn.preprocessing import StandardScaler @@ -145,16 +153,20 @@ def test_combine_inputs_union_in_pipeline(self): [[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]], dtype=numpy.float32, ) - model = Pipeline([ - ("scaler1", StandardScaler()), - ( - "union", - FeatureUnion([ - ("scaler2", StandardScaler()), - ("scaler3", MinMaxScaler()), - ]), - ), - ]) + model = Pipeline( + [ + ("scaler1", StandardScaler()), + ( + "union", + FeatureUnion( + [ + ("scaler2", StandardScaler()), + ("scaler3", MinMaxScaler()), + ] + ), + ), + ] + ) model.fit(data) model_onnx = convert_sklearn( model, @@ -163,7 +175,8 @@ def test_combine_inputs_union_in_pipeline(self): ("input1", FloatTensorType([None, 1])), ("input2", FloatTensorType([None, 1])), ], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(len(model_onnx.graph.node[-1].output) == 1) self.assertTrue(model_onnx is not None) data = { @@ -171,14 +184,18 @@ def test_combine_inputs_union_in_pipeline(self): "input2": data[:, 1].reshape((-1, 1)), } dump_data_and_model( - data, PipeConcatenateInput(model), - model_onnx, basename="SklearnPipelineScaler11Union") + data, + PipeConcatenateInput(model), + model_onnx, + basename="SklearnPipelineScaler11Union", + ) + TARGET_OPSET @unittest.skipIf(TARGET_OPSET < 15, reason="uses CastLike") @unittest.skipIf( - pv.Version(ort_version) <= pv.Version('0.4.0'), - reason="onnxruntime too old") + pv.Version(ort_version) <= pv.Version("0.4.0"), reason="onnxruntime too old" + ) @ignore_warnings(category=FutureWarning) def test_combine_inputs_floats_ints(self): data = [[0, 0.0], [0, 0.0], [1, 1.0], [1, 1.0]] @@ -193,7 +210,8 @@ def test_combine_inputs_floats_ints(self): ("input1", Int64TensorType([None, 1])), ("input2", FloatTensorType([None, 1])), ], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(len(model_onnx.graph.node[-1].output) == 1) self.assertTrue(model_onnx is not None) data = numpy.array(data) @@ -202,25 +220,26 @@ def test_combine_inputs_floats_ints(self): "input2": data[:, 1].reshape((-1, 1)).astype(numpy.float32), } dump_data_and_model( - data, PipeConcatenateInput(model), - model_onnx, basename="SklearnPipelineScalerMixed") + data, + PipeConcatenateInput(model), + model_onnx, + basename="SklearnPipelineScalerMixed", + ) @unittest.skipIf( - ColumnTransformer is None, - reason="ColumnTransformer not available in 0.19") - @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"), - reason="issues with shapes") + ColumnTransformer is None, reason="ColumnTransformer not available in 0.19" + ) + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes" + ) @ignore_warnings(category=(RuntimeWarning, FutureWarning)) def test_pipeline_column_transformer(self): - iris = datasets.load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) - X_train["vcat"] = X_train["vA"].apply(lambda x: "cat1" - if x > 0.5 else "cat2") - X_train["vcat2"] = X_train["vB"].apply(lambda x: "cat3" - if x > 0.5 else "cat4") + X_train["vcat"] = X_train["vA"].apply(lambda x: "cat1" if x > 0.5 else "cat2") + X_train["vcat2"] = X_train["vB"].apply(lambda x: "cat3" if x > 0.5 else "cat4") y_train = y % 2 numeric_features = [0, 1, 2] # ["vA", "vB", "vC"] categorical_features = [3, 4] # ["vcat", "vcat2"] @@ -228,31 +247,42 @@ def test_pipeline_column_transformer(self): classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), - n_jobs=1, max_iter=10, solver="lbfgs", tol=1e-3) + n_jobs=1, + max_iter=10, + solver="lbfgs", + tol=1e-3, + ) - numeric_transformer = Pipeline(steps=[ - ("imputer", SimpleImputer(strategy="median")), - ("scaler", StandardScaler()), - ]) + numeric_transformer = Pipeline( + steps=[ + ("imputer", SimpleImputer(strategy="median")), + ("scaler", StandardScaler()), + ] + ) - categorical_transformer = Pipeline(steps=[ - ( - "onehot", - OneHotEncoder(sparse=True, handle_unknown="ignore"), - ), - ( - "tsvd", - TruncatedSVD(n_components=1, algorithm="arpack", tol=1e-4), - ), - ]) + categorical_transformer = Pipeline( + steps=[ + ( + "onehot", + OneHotEncoder(sparse=True, handle_unknown="ignore"), + ), + ( + "tsvd", + TruncatedSVD(n_components=1, algorithm="arpack", tol=1e-4), + ), + ] + ) - preprocessor = ColumnTransformer(transformers=[ - ("num", numeric_transformer, numeric_features), - ("cat", categorical_transformer, categorical_features), - ]) + preprocessor = ColumnTransformer( + transformers=[ + ("num", numeric_transformer, numeric_features), + ("cat", categorical_transformer, categorical_features), + ] + ) - model = Pipeline(steps=[("precprocessor", - preprocessor), ("classifier", classifier)]) + model = Pipeline( + steps=[("precprocessor", preprocessor), ("classifier", classifier)] + ) model.fit(X_train, y_train) initial_type = [ @@ -261,17 +291,20 @@ def test_pipeline_column_transformer(self): ] X_train = X_train[:11] - model_onnx = convert_sklearn(model, initial_types=initial_type, - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, initial_types=initial_type, target_opset=TARGET_OPSET + ) dump_data_and_model( - X_train, model, model_onnx, - basename="SklearnPipelineColumnTransformerPipeliner") + X_train, + model, + model_onnx, + basename="SklearnPipelineColumnTransformerPipeliner", + ) if __name__ == "__main__": try: - from onnx.tools.net_drawer import ( - GetPydotGraph, GetOpNodeProducer) + from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer except ImportError: return @@ -279,7 +312,8 @@ def test_pipeline_column_transformer(self): model_onnx.graph, name=model_onnx.graph.name, rankdir="TP", - node_producer=GetOpNodeProducer("docstring")) + node_producer=GetOpNodeProducer("docstring"), + ) pydot_graph.write_dot("graph.dot") import os @@ -287,19 +321,19 @@ def test_pipeline_column_transformer(self): os.system("dot -O -G=300 -Tpng graph.dot") @unittest.skipIf( - ColumnTransformer is None, - reason="ColumnTransformer not available in 0.19") + ColumnTransformer is None, reason="ColumnTransformer not available in 0.19" + ) @unittest.skipIf( - not check_scikit_version(), - reason="Scikit 0.20 causes some mismatches") + not check_scikit_version(), reason="Scikit 0.20 causes some mismatches" + ) @ignore_warnings(category=FutureWarning) def test_pipeline_column_transformer_titanic(self): - # fit try: titanic_url = ( "https://raw.githubusercontent.com/amueller/" - "scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv") + "scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv" + ) data = pandas.read_csv(titanic_url) except url_error.URLError: # Do not fail the test if the data cannot be fetched. @@ -314,33 +348,40 @@ def test_pipeline_column_transformer_titanic(self): for cat in ["embarked", "sex", "pclass"]: X[cat].fillna("missing", inplace=True) - X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.2) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) numeric_features = ["age", "fare"] - numeric_transformer = Pipeline(steps=[ - ("imputer", SimpleImputer(strategy="median")), - ("scaler", StandardScaler()), - ]) + numeric_transformer = Pipeline( + steps=[ + ("imputer", SimpleImputer(strategy="median")), + ("scaler", StandardScaler()), + ] + ) categorical_features = ["embarked", "sex", "pclass"] - categorical_transformer = Pipeline(steps=[ - # --- SimpleImputer on string is not available - # for string in ONNX-ML specifications. - # ('imputer', - # SimpleImputer(strategy='constant', fill_value='missing')), - ("onehot", OneHotEncoder(handle_unknown="ignore")) - ]) - - preprocessor = ColumnTransformer(transformers=[ - ("num", numeric_transformer, numeric_features), - ("cat", categorical_transformer, categorical_features), - ]) - - clf = Pipeline(steps=[ - ("preprocessor", preprocessor), - # ("classifier", LogisticRegression(solver="lbfgs")), - ]) + categorical_transformer = Pipeline( + steps=[ + # --- SimpleImputer on string is not available + # for string in ONNX-ML specifications. + # ('imputer', + # SimpleImputer(strategy='constant', fill_value='missing')), + ("onehot", OneHotEncoder(handle_unknown="ignore")) + ] + ) + + preprocessor = ColumnTransformer( + transformers=[ + ("num", numeric_transformer, numeric_features), + ("cat", categorical_transformer, categorical_features), + ] + ) + + clf = Pipeline( + steps=[ + ("preprocessor", preprocessor), + # ("classifier", LogisticRegression(solver="lbfgs")), + ] + ) # inputs @@ -349,7 +390,7 @@ def convert_dataframe_schema(df, drop=None): for k, v in zip(df.columns, df.dtypes): if drop is not None and k in drop: continue - if v == 'int64': + if v == "int64": t = Int64TensorType([None, 1]) elif v == "float64": t = FloatTensorType([None, 1]) @@ -371,135 +412,174 @@ def convert_dataframe_schema(df, drop=None): X_train = X_train.copy() X_test = X_test.copy() - X_train['pclass'] = X_train['pclass'].astype(numpy.int64) - X_test['pclass'] = X_test['pclass'].astype(numpy.int64) + X_train["pclass"] = X_train["pclass"].astype(numpy.int64) + X_test["pclass"] = X_test["pclass"].astype(numpy.int64) X_train = X_train.drop(to_drop, axis=1) X_test = X_test.drop(to_drop, axis=1) # Step 1: without classifier clf.fit(X_train, y_train) initial_inputs = convert_dataframe_schema(X_train, to_drop) - model_onnx = convert_sklearn(clf, "pipeline_titanic", initial_inputs, - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + clf, "pipeline_titanic", initial_inputs, target_opset=TARGET_OPSET + ) data = X_test pred = clf.transform(data) data_types = { - 'pclass': numpy.int64, - 'age': numpy.float32, - 'sex': numpy.str_, - 'fare': numpy.float32, - 'embarked': numpy.str_, + "pclass": numpy.int64, + "age": numpy.float32, + "sex": numpy.str_, + "fare": numpy.float32, + "embarked": numpy.str_, + } + inputs = { + k: data[k].values.astype(data_types[k]).reshape(-1, 1) for k in data.columns } - inputs = {k: data[k].values.astype(data_types[k]).reshape(-1, 1) - for k in data.columns} sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) run = sess.run(None, inputs) got = run[-1] assert_almost_equal(pred, got, decimal=5) # Step 2: with classifier - clf = Pipeline(steps=[ - ("preprocessor", preprocessor), - ("classifier", LogisticRegression(solver="lbfgs")), - ]).fit(X_train, y_train) + clf = Pipeline( + steps=[ + ("preprocessor", preprocessor), + ("classifier", LogisticRegression(solver="lbfgs")), + ] + ).fit(X_train, y_train) pred = clf.predict_proba(data) - model_onnx = convert_sklearn(clf, "pipeline_titanic", initial_inputs, - target_opset=TARGET_OPSET, - options={id(clf): {'zipmap': False}}) + model_onnx = convert_sklearn( + clf, + "pipeline_titanic", + initial_inputs, + target_opset=TARGET_OPSET, + options={id(clf): {"zipmap": False}}, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) run = sess.run(None, inputs) got = run[-1] assert_almost_equal(pred, got, decimal=5) @unittest.skipIf( - ColumnTransformer is None, - reason="ColumnTransformer not available in 0.19") + ColumnTransformer is None, reason="ColumnTransformer not available in 0.19" + ) @ignore_warnings(category=FutureWarning) def test_column_transformer_weights(self): model, X = fit_classification_model( ColumnTransformer( - [('pca', PCA(n_components=5), slice(0, 10)), - ('svd', TruncatedSVD(n_components=5), slice(10, 100))], - transformer_weights={'pca': 2, 'svd': 3}), 3, n_features=100) + [ + ("pca", PCA(n_components=5), slice(0, 10)), + ("svd", TruncatedSVD(n_components=5), slice(10, 100)), + ], + transformer_weights={"pca": 2, "svd": 3}, + ), + 3, + n_features=100, + ) model_onnx = convert_sklearn( model, "column transformer weights", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnColumnTransformerWeights-Dec4") + X, model, model_onnx, basename="SklearnColumnTransformerWeights-Dec4" + ) @unittest.skipIf( - ColumnTransformer is None, - reason="ColumnTransformer not available in 0.19") + ColumnTransformer is None, reason="ColumnTransformer not available in 0.19" + ) @ignore_warnings(category=FutureWarning) def test_column_transformer_drop(self): model, X = fit_classification_model( ColumnTransformer( - [('pca', PCA(n_components=5), slice(0, 10)), - ('svd', TruncatedSVD(n_components=5), slice(80, 100))], - remainder='drop'), 3, n_features=100) + [ + ("pca", PCA(n_components=5), slice(0, 10)), + ("svd", TruncatedSVD(n_components=5), slice(80, 100)), + ], + remainder="drop", + ), + 3, + n_features=100, + ) model_onnx = convert_sklearn( model, "column transformer drop", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnColumnTransformerDrop") + X, model, model_onnx, basename="SklearnColumnTransformerDrop" + ) @unittest.skipIf( - ColumnTransformer is None, - reason="ColumnTransformer not available in 0.19") + ColumnTransformer is None, reason="ColumnTransformer not available in 0.19" + ) @ignore_warnings(category=FutureWarning) def test_column_transformer_passthrough(self): model, X = fit_classification_model( ColumnTransformer( - [('pca', PCA(n_components=5), slice(0, 10)), - ('svd', TruncatedSVD(n_components=5), slice(80, 100))], - transformer_weights={'pca': 2, 'svd': 3}, - remainder='passthrough'), 3, n_features=100) + [ + ("pca", PCA(n_components=5), slice(0, 10)), + ("svd", TruncatedSVD(n_components=5), slice(80, 100)), + ], + transformer_weights={"pca": 2, "svd": 3}, + remainder="passthrough", + ), + 3, + n_features=100, + ) model_onnx = convert_sklearn( model, "column transformer passthrough", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnColumnTransformerPassthrough") + X, model, model_onnx, basename="SklearnColumnTransformerPassthrough" + ) @unittest.skipIf( - ColumnTransformer is None, - reason="ColumnTransformer not available in 0.19") + ColumnTransformer is None, reason="ColumnTransformer not available in 0.19" + ) @ignore_warnings(category=FutureWarning) def test_column_transformer_passthrough_no_weights(self): model, X = fit_classification_model( ColumnTransformer( - [('pca', PCA(n_components=5), slice(0, 10)), - ('svd', TruncatedSVD(n_components=5), slice(70, 80))], - remainder='passthrough'), 3, n_features=100) + [ + ("pca", PCA(n_components=5), slice(0, 10)), + ("svd", TruncatedSVD(n_components=5), slice(70, 80)), + ], + remainder="passthrough", + ), + 3, + n_features=100, + ) model_onnx = convert_sklearn( model, "column transformer passthrough", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnColumnTransformerPassthroughNoWeights") + X, + model, + model_onnx, + basename="SklearnColumnTransformerPassthroughNoWeights", + ) @unittest.skipIf( - ColumnTransformer is None, - reason="ColumnTransformer not available in 0.19") + ColumnTransformer is None, reason="ColumnTransformer not available in 0.19" + ) @ignore_warnings(category=FutureWarning) def test_pipeline_dataframe(self): text = """ @@ -508,46 +588,66 @@ def test_pipeline_dataframe(self): 7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,red 7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,red 11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,red - """.replace(" ", "") + """.replace( + " ", "" + ) X_train = pandas.read_csv(StringIO(text)) for c in X_train.columns: - if c != 'color': + if c != "color": X_train[c] = X_train[c].astype(numpy.float32) - numeric_features = [c for c in X_train if c != 'color'] - - pipe = Pipeline([ - ("prep", ColumnTransformer([ - ("color", Pipeline([ - ('one', OneHotEncoder()), - ('select', ColumnTransformer( - [('sel1', 'passthrough', [0])])) - ]), ['color']), - ("others", "passthrough", numeric_features) - ])), - ]) + numeric_features = [c for c in X_train if c != "color"] + + pipe = Pipeline( + [ + ( + "prep", + ColumnTransformer( + [ + ( + "color", + Pipeline( + [ + ("one", OneHotEncoder()), + ( + "select", + ColumnTransformer( + [("sel1", "passthrough", [0])] + ), + ), + ] + ), + ["color"], + ), + ("others", "passthrough", numeric_features), + ] + ), + ), + ] + ) init_types = [ - ('fixed_acidity', FloatTensorType(shape=[None, 1])), - ('volatile_acidity', FloatTensorType(shape=[None, 1])), - ('citric_acid', FloatTensorType(shape=[None, 1])), - ('residual_sugar', FloatTensorType(shape=[None, 1])), - ('chlorides', FloatTensorType(shape=[None, 1])), - ('free_sulfur_dioxide', FloatTensorType(shape=[None, 1])), - ('total_sulfur_dioxide', FloatTensorType(shape=[None, 1])), - ('density', FloatTensorType(shape=[None, 1])), - ('pH', FloatTensorType(shape=[None, 1])), - ('sulphates', FloatTensorType(shape=[None, 1])), - ('alcohol', FloatTensorType(shape=[None, 1])), - ('quality', FloatTensorType(shape=[None, 1])), - ('color', StringTensorType(shape=[None, 1])) + ("fixed_acidity", FloatTensorType(shape=[None, 1])), + ("volatile_acidity", FloatTensorType(shape=[None, 1])), + ("citric_acid", FloatTensorType(shape=[None, 1])), + ("residual_sugar", FloatTensorType(shape=[None, 1])), + ("chlorides", FloatTensorType(shape=[None, 1])), + ("free_sulfur_dioxide", FloatTensorType(shape=[None, 1])), + ("total_sulfur_dioxide", FloatTensorType(shape=[None, 1])), + ("density", FloatTensorType(shape=[None, 1])), + ("pH", FloatTensorType(shape=[None, 1])), + ("sulphates", FloatTensorType(shape=[None, 1])), + ("alcohol", FloatTensorType(shape=[None, 1])), + ("quality", FloatTensorType(shape=[None, 1])), + ("color", StringTensorType(shape=[None, 1])), ] pipe.fit(X_train) model_onnx = convert_sklearn( - pipe, initial_types=init_types, target_opset=TARGET_OPSET) + pipe, initial_types=init_types, target_opset=TARGET_OPSET + ) oinf = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) pred = pipe.transform(X_train) inputs = {c: X_train[c].values for c in X_train.columns} @@ -558,28 +658,39 @@ def test_pipeline_dataframe(self): @ignore_warnings(category=(FutureWarning, UserWarning)) def test_pipeline_tfidf_svc(self): - pipe = Pipeline([ - ('tfidf', TfidfVectorizer()), - ('clf_svc', SVC(probability=True, kernel='linear'))]) - data = numpy.array(["first sentance", "second sentence", - "many sentances", "dummy sentance", - "no sentance at all"]) + pipe = Pipeline( + [ + ("tfidf", TfidfVectorizer()), + ("clf_svc", SVC(probability=True, kernel="linear")), + ] + ) + data = numpy.array( + [ + "first sentance", + "second sentence", + "many sentances", + "dummy sentance", + "no sentance at all", + ] + ) y = numpy.array([0, 0, 1, 0, 1]) pipe.fit(data, y) expected_label = pipe.predict(data) expected_proba = pipe.predict_proba(data) df = pandas.DataFrame(data) - df.columns = ['text'] + df.columns = ["text"] # first conversion if shape=[None, 1] model_onnx = convert_sklearn( - pipe, initial_types=[('text', StringTensorType([None, 1]))], + pipe, + initial_types=[("text", StringTensorType([None, 1]))], target_opset=TARGET_OPSET, - options={id(pipe): {'zipmap': False}}) + options={id(pipe): {"zipmap": False}}, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'text': data.reshape((-1, 1))}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"text": data.reshape((-1, 1))}) assert_almost_equal(expected_proba, got[1]) assert_almost_equal(expected_label, got[0]) # sess.run(None, {'text': df}) --> failures @@ -587,194 +698,286 @@ def test_pipeline_tfidf_svc(self): # second conversion with shape=[None] model_onnx = convert_sklearn( - pipe, initial_types=[('text', StringTensorType([None]))], + pipe, + initial_types=[("text", StringTensorType([None]))], target_opset=TARGET_OPSET, - options={id(pipe): {'zipmap': False}}) + options={id(pipe): {"zipmap": False}}, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'text': data}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"text": data}) assert_almost_equal(expected_proba, got[1]) assert_almost_equal(expected_label, got[0]) # sess.run(None, {'text': df}) failure # sess.run(None, {'text': df["text"]}) failure - sess.run(None, {'text': df["text"].values}) # success + sess.run(None, {"text": df["text"].values}) # success @ignore_warnings(category=(FutureWarning, UserWarning)) def test_pipeline_voting_tfidf_svc(self): - pipe1 = Pipeline([ - ('tfidf1', TfidfVectorizer()), - ('svc', SVC(probability=True, kernel='linear'))]) - pipe2 = Pipeline([ - ('tfidf2', TfidfVectorizer(norm='l2', use_idf=False)), - ('sgd', SGDClassifier(alpha=0.0001, penalty='l2', - loss='modified_huber'))]) - pipe3 = Pipeline([ - ('tfidf3', TfidfVectorizer()), - ('mnb', MultinomialNB())]) + pipe1 = Pipeline( + [ + ("tfidf1", TfidfVectorizer()), + ("svc", SVC(probability=True, kernel="linear")), + ] + ) + pipe2 = Pipeline( + [ + ("tfidf2", TfidfVectorizer(norm="l2", use_idf=False)), + ( + "sgd", + SGDClassifier(alpha=0.0001, penalty="l2", loss="modified_huber"), + ), + ] + ) + pipe3 = Pipeline([("tfidf3", TfidfVectorizer()), ("mnb", MultinomialNB())]) voting = VotingClassifier( - [('p1', pipe1), ('p2', pipe2), ('p3', pipe3)], - voting='soft', flatten_transform=False) - data = numpy.array(["first sentance", "second sentence", - "many sentances", "dummy sentance", - "no sentance at all"]) + [("p1", pipe1), ("p2", pipe2), ("p3", pipe3)], + voting="soft", + flatten_transform=False, + ) + data = numpy.array( + [ + "first sentance", + "second sentence", + "many sentances", + "dummy sentance", + "no sentance at all", + ] + ) y = numpy.array([0, 0, 1, 0, 1]) voting.fit(data, y) expected_label = voting.predict(data) expected_proba = voting.predict_proba(data) df = pandas.DataFrame(data) - df.columns = ['text'] + df.columns = ["text"] model_onnx = convert_sklearn( - voting, initial_types=[('text', StringTensorType([None, 1]))], + voting, + initial_types=[("text", StringTensorType([None, 1]))], target_opset=TARGET_OPSET, - options={id(voting): {'zipmap': False}}) + options={id(voting): {"zipmap": False}}, + ) # with open("debug.onnx", "wb") as f: # f.write(model_onnx.SerializeToString()) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'text': data.reshape((-1, 1))}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"text": data.reshape((-1, 1))}) assert_almost_equal(expected_proba, got[1], decimal=5) assert_almost_equal(expected_label, got[0]) @ignore_warnings(category=(FutureWarning, UserWarning)) def test_pipeline_pipeline_voting_tfidf_svc(self): - pipe1 = Pipeline([ - ('ntfidf1', Pipeline([ - ('tfidf1', TfidfVectorizer()), - ('scaler', FeatureUnion([ - ('scaler2', StandardScaler(with_mean=False)), - ('mm', MaxAbsScaler())]))])), - ('svc', SVC(probability=True, kernel='linear'))]) - pipe2 = Pipeline([ - ('tfidf2', TfidfVectorizer(norm='l2', use_idf=False)), - ('sgd', SGDClassifier(alpha=0.0001, penalty='l2', - loss='modified_huber'))]) - pipe3 = Pipeline([ - ('tfidf3', TfidfVectorizer()), - ('mnb', MultinomialNB())]) + pipe1 = Pipeline( + [ + ( + "ntfidf1", + Pipeline( + [ + ("tfidf1", TfidfVectorizer()), + ( + "scaler", + FeatureUnion( + [ + ("scaler2", StandardScaler(with_mean=False)), + ("mm", MaxAbsScaler()), + ] + ), + ), + ] + ), + ), + ("svc", SVC(probability=True, kernel="linear")), + ] + ) + pipe2 = Pipeline( + [ + ("tfidf2", TfidfVectorizer(norm="l2", use_idf=False)), + ( + "sgd", + SGDClassifier(alpha=0.0001, penalty="l2", loss="modified_huber"), + ), + ] + ) + pipe3 = Pipeline([("tfidf3", TfidfVectorizer()), ("mnb", MultinomialNB())]) voting = VotingClassifier( - [('p1', pipe1), ('p2', pipe2), ('p3', pipe3)], - voting='soft', flatten_transform=False) - data = numpy.array(["first sentance", "second sentence", - "many sentances", "dummy sentance", - "no sentance at all"]) + [("p1", pipe1), ("p2", pipe2), ("p3", pipe3)], + voting="soft", + flatten_transform=False, + ) + data = numpy.array( + [ + "first sentance", + "second sentence", + "many sentances", + "dummy sentance", + "no sentance at all", + ] + ) y = numpy.array([0, 0, 1, 0, 1]) voting.fit(data, y) expected_label = voting.predict(data) expected_proba = voting.predict_proba(data) df = pandas.DataFrame(data) - df.columns = ['text'] + df.columns = ["text"] model_onnx = convert_sklearn( - voting, initial_types=[('text', StringTensorType([None, 1]))], + voting, + initial_types=[("text", StringTensorType([None, 1]))], target_opset=TARGET_OPSET, - options={id(voting): {'zipmap': False}}) + options={id(voting): {"zipmap": False}}, + ) # with open("debug.onnx", "wb") as f: # f.write(model_onnx.SerializeToString()) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'text': data.reshape((-1, 1))}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"text": data.reshape((-1, 1))}) assert_almost_equal(expected_proba, got[1]) assert_almost_equal(expected_label, got[0]) - @unittest.skipIf(TARGET_OPSET < 11, - reason="SequenceConstruct not available") - @unittest.skipIf( - not check_scikit_version(), - reason="Scikit 0.21 too old") + @unittest.skipIf(TARGET_OPSET < 11, reason="SequenceConstruct not available") + @unittest.skipIf(not check_scikit_version(), reason="Scikit 0.21 too old") @ignore_warnings(category=(FutureWarning, UserWarning)) def test_pipeline_pipeline_rf(self): - cat_feat = ['A', 'B'] - text_feat = 'TEXT' - - pipe = Pipeline(steps=[ - ('preprocessor', ColumnTransformer( - transformers=[ - ('cat_tr', OneHotEncoder(handle_unknown='ignore'), - cat_feat), - ('count_vect', Pipeline(steps=[ - ('count_vect', CountVectorizer( - max_df=0.8, min_df=0.05, max_features=1000))]), - text_feat)])), - ('classifier', MultiOutputClassifier( - estimator=RandomForestClassifier( - n_estimators=5, max_depth=5)))]) - - data = numpy.array([ - ["cat1", "cat2", "cat3", "cat1", "cat2"], - ["C1", "C2", "C3", "C3", "C4"], - ["first sentance", "second sentence", - "many sentances", "dummy sentance", - "no sentance at all"]]).T + cat_feat = ["A", "B"] + text_feat = "TEXT" + + pipe = Pipeline( + steps=[ + ( + "preprocessor", + ColumnTransformer( + transformers=[ + ( + "cat_tr", + OneHotEncoder(handle_unknown="ignore"), + cat_feat, + ), + ( + "count_vect", + Pipeline( + steps=[ + ( + "count_vect", + CountVectorizer( + max_df=0.8, + min_df=0.05, + max_features=1000, + ), + ) + ] + ), + text_feat, + ), + ] + ), + ), + ( + "classifier", + MultiOutputClassifier( + estimator=RandomForestClassifier(n_estimators=5, max_depth=5) + ), + ), + ] + ) + + data = numpy.array( + [ + ["cat1", "cat2", "cat3", "cat1", "cat2"], + ["C1", "C2", "C3", "C3", "C4"], + [ + "first sentance", + "second sentence", + "many sentances", + "dummy sentance", + "no sentance at all", + ], + ] + ).T y = numpy.array([[0, 1], [0, 1], [1, 0], [0, 1], [1, 1]]) - df = pandas.DataFrame(data, columns=['A', 'B', 'TEXT']) + df = pandas.DataFrame(data, columns=["A", "B", "TEXT"]) pipe.fit(df, y) expected_label = pipe.predict(df) expected_proba = pipe.predict_proba(df) model_onnx = convert_sklearn( - pipe, initial_types=[ - ('A', StringTensorType([None, 1])), - ('B', StringTensorType([None, 1])), - ('TEXT', StringTensorType([None, 1]))], + pipe, + initial_types=[ + ("A", StringTensorType([None, 1])), + ("B", StringTensorType([None, 1])), + ("TEXT", StringTensorType([None, 1])), + ], target_opset=TARGET_OPSET, - options={MultiOutputClassifier: {'zipmap': False}}) + options={MultiOutputClassifier: {"zipmap": False}}, + ) # with open("debug.onnx", "wb") as f: # f.write(model_onnx.SerializeToString()) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'A': data[:, :1], 'B': data[:, 1:2], - 'TEXT': data[:, 2:]}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"A": data[:, :1], "B": data[:, 1:2], "TEXT": data[:, 2:]}) self.assertEqual(len(expected_proba), len(got[1])) for e, g in zip(expected_proba, got[1]): assert_almost_equal(e, g, decimal=5) assert_almost_equal(expected_label, got[0]) - @unittest.skipIf(TARGET_OPSET < 11, - reason="SequenceConstruct not available") - @unittest.skipIf( - not check_scikit_version(), - reason="Scikit 0.21 too old") + @unittest.skipIf(TARGET_OPSET < 11, reason="SequenceConstruct not available") + @unittest.skipIf(not check_scikit_version(), reason="Scikit 0.21 too old") @ignore_warnings(category=(DeprecationWarning, FutureWarning, UserWarning)) def test_issue_712_multio(self): dfx = pandas.DataFrame( - {'CAT1': ['985332', '985333', '985334', '985335', '985336'], - 'CAT2': ['1985332', '1985333', '1985334', '1985335', '1985336'], - 'TEXT': ["abc abc", "abc def", "def ghj", "abcdef", "abc ii"]}) - dfy = pandas.DataFrame( - {'REAL': [5, 6, 7, 6, 5], - 'CATY': [0, 1, 0, 1, 0]}) - - cat_features = ['CAT1', 'CAT2'] - categorical_transformer = OneHotEncoder(handle_unknown='ignore') - textual_feature = 'TEXT' - count_vect_transformer = Pipeline(steps=[ - ('count_vect', CountVectorizer( - max_df=0.8, min_df=0.05, max_features=1000))]) + { + "CAT1": ["985332", "985333", "985334", "985335", "985336"], + "CAT2": ["1985332", "1985333", "1985334", "1985335", "1985336"], + "TEXT": ["abc abc", "abc def", "def ghj", "abcdef", "abc ii"], + } + ) + dfy = pandas.DataFrame({"REAL": [5, 6, 7, 6, 5], "CATY": [0, 1, 0, 1, 0]}) + + cat_features = ["CAT1", "CAT2"] + categorical_transformer = OneHotEncoder(handle_unknown="ignore") + textual_feature = "TEXT" + count_vect_transformer = Pipeline( + steps=[ + ( + "count_vect", + CountVectorizer(max_df=0.8, min_df=0.05, max_features=1000), + ) + ] + ) preprocessor = ColumnTransformer( transformers=[ - ('cat_transform', categorical_transformer, cat_features), - ('count_vector', count_vect_transformer, textual_feature)]) + ("cat_transform", categorical_transformer, cat_features), + ("count_vector", count_vect_transformer, textual_feature), + ] + ) model_RF = RandomForestClassifier(random_state=42, max_depth=50) - rf_clf = Pipeline(steps=[ - ('preprocessor', preprocessor), - ('classifier', MultiOutputClassifier(estimator=model_RF))]) + rf_clf = Pipeline( + steps=[ + ("preprocessor", preprocessor), + ("classifier", MultiOutputClassifier(estimator=model_RF)), + ] + ) rf_clf.fit(dfx, dfy) expected_label = rf_clf.predict(dfx) expected_proba = rf_clf.predict_proba(dfx) - inputs = {'CAT1': dfx['CAT1'].values.reshape((-1, 1)), - 'CAT2': dfx['CAT2'].values.reshape((-1, 1)), - 'TEXT': dfx['TEXT'].values.reshape((-1, 1))} - onx = to_onnx(rf_clf, dfx, target_opset=TARGET_OPSET, - options={MultiOutputClassifier: {'zipmap': False}}) + inputs = { + "CAT1": dfx["CAT1"].values.reshape((-1, 1)), + "CAT2": dfx["CAT2"].values.reshape((-1, 1)), + "TEXT": dfx["TEXT"].values.reshape((-1, 1)), + } + onx = to_onnx( + rf_clf, + dfx, + target_opset=TARGET_OPSET, + options={MultiOutputClassifier: {"zipmap": False}}, + ) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) got = sess.run(None, inputs) assert_almost_equal(expected_label, got[0]) @@ -782,62 +985,107 @@ def test_issue_712_multio(self): for e, g in zip(expected_proba, got[1]): assert_almost_equal(e, g, decimal=5) - @unittest.skipIf(TARGET_OPSET < 11, - reason="SequenceConstruct not available") - @unittest.skipIf( - not check_scikit_version(), - reason="Scikit 0.21 too old") + @unittest.skipIf(TARGET_OPSET < 11, reason="SequenceConstruct not available") + @unittest.skipIf(not check_scikit_version(), reason="Scikit 0.21 too old") @ignore_warnings(category=(DeprecationWarning, FutureWarning, UserWarning)) def test_issue_712_svc_multio(self): for sub_model in [LinearSVC(), SVC()]: for method in ["sigmoid", "isotonic"]: with self.subTest(sub_model=sub_model, method=method): dfx = pandas.DataFrame( - {'CAT1': ['985332', '985333', '985334', '985335', - '985336', '985332', '985333', '985334', - '985335', '985336', '985336'], - 'CAT2': ['1985332', '1985333', '1985334', '1985335', - '1985336', '1985332', '1985333', '1985334', - '1985335', '1985336', '1985336'], - 'TEXT': ["abc abc", "abc def", "def ghj", "abcdef", - "abc ii", "abc abc", "abc def", "def ghj", - "abcdef", "abc ii", "abc abc"]}) + { + "CAT1": [ + "985332", + "985333", + "985334", + "985335", + "985336", + "985332", + "985333", + "985334", + "985335", + "985336", + "985336", + ], + "CAT2": [ + "1985332", + "1985333", + "1985334", + "1985335", + "1985336", + "1985332", + "1985333", + "1985334", + "1985335", + "1985336", + "1985336", + ], + "TEXT": [ + "abc abc", + "abc def", + "def ghj", + "abcdef", + "abc ii", + "abc abc", + "abc def", + "def ghj", + "abcdef", + "abc ii", + "abc abc", + ], + } + ) dfy = pandas.DataFrame( - {'REAL': [5, 6, 7, 6, 5, 5, 6, 7, 5, 6, 7], - 'CATY': [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]}) - - cat_features = ['CAT1', 'CAT2'] - categorical_transformer = OneHotEncoder( - handle_unknown='ignore') - textual_feature = 'TEXT' - count_vect_transformer = Pipeline(steps=[ - ('count_vect', CountVectorizer( - max_df=0.8, min_df=0.05, max_features=1000))]) + { + "REAL": [5, 6, 7, 6, 5, 5, 6, 7, 5, 6, 7], + "CATY": [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0], + } + ) + + cat_features = ["CAT1", "CAT2"] + categorical_transformer = OneHotEncoder(handle_unknown="ignore") + textual_feature = "TEXT" + count_vect_transformer = Pipeline( + steps=[ + ( + "count_vect", + CountVectorizer( + max_df=0.8, min_df=0.05, max_features=1000 + ), + ) + ] + ) preprocessor = ColumnTransformer( transformers=[ - ('cat_transform', categorical_transformer, - cat_features), - ('count_vector', count_vect_transformer, - textual_feature)]) - model_SVC = CalibratedClassifierCV( - sub_model, cv=2, method=method) - rf_clf = Pipeline(steps=[ - ('preprocessor', preprocessor), - ('classifier', MultiOutputClassifier( - estimator=model_SVC))]) + ("cat_transform", categorical_transformer, cat_features), + ("count_vector", count_vect_transformer, textual_feature), + ] + ) + model_SVC = CalibratedClassifierCV(sub_model, cv=2, method=method) + rf_clf = Pipeline( + steps=[ + ("preprocessor", preprocessor), + ("classifier", MultiOutputClassifier(estimator=model_SVC)), + ] + ) rf_clf.fit(dfx, dfy) expected_label = rf_clf.predict(dfx) expected_proba = rf_clf.predict_proba(dfx) - inputs = {'CAT1': dfx['CAT1'].values.reshape((-1, 1)), - 'CAT2': dfx['CAT2'].values.reshape((-1, 1)), - 'TEXT': dfx['TEXT'].values.reshape((-1, 1))} + inputs = { + "CAT1": dfx["CAT1"].values.reshape((-1, 1)), + "CAT2": dfx["CAT2"].values.reshape((-1, 1)), + "TEXT": dfx["TEXT"].values.reshape((-1, 1)), + } onx = to_onnx( - rf_clf, dfx, target_opset=TARGET_OPSET, - options={MultiOutputClassifier: {'zipmap': False}}) + rf_clf, + dfx, + target_opset=TARGET_OPSET, + options={MultiOutputClassifier: {"zipmap": False}}, + ) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) got = sess.run(None, inputs) assert_almost_equal(expected_label, got[0]) self.assertEqual(len(expected_proba), len(got[1])) @@ -848,154 +1096,239 @@ def test_issue_712_svc_multio(self): else: assert_almost_equal(e, g, decimal=5) - @unittest.skipIf(TARGET_OPSET < 11, - reason="SequenceConstruct not available") - @unittest.skipIf( - not check_scikit_version(), - reason="Scikit 0.21 too old") + @unittest.skipIf(TARGET_OPSET < 11, reason="SequenceConstruct not available") + @unittest.skipIf(not check_scikit_version(), reason="Scikit 0.21 too old") @ignore_warnings(category=(DeprecationWarning, FutureWarning, UserWarning)) def test_issue_712_svc_binary0(self): for sub_model in [LinearSVC(), SVC()]: for method in ["sigmoid", "isotonic"]: with self.subTest(sub_model=sub_model, method=method): dfx = pandas.DataFrame( - {'CAT1': ['985332', '985333', '985334', '985335', - '985336', '985332', '985333', '985334', - '985335', '985336', '985336'], - 'CAT2': ['1985332', '1985333', '1985334', '1985335', - '1985336', '1985332', '1985333', '1985334', - '1985335', '1985336', '1985336'], - 'TEXT': ["abc abc", "abc def", "def ghj", "abcdef", - "abc ii", "abc abc", "abc def", "def ghj", - "abcdef", "abc ii", "abc abc"]}) + { + "CAT1": [ + "985332", + "985333", + "985334", + "985335", + "985336", + "985332", + "985333", + "985334", + "985335", + "985336", + "985336", + ], + "CAT2": [ + "1985332", + "1985333", + "1985334", + "1985335", + "1985336", + "1985332", + "1985333", + "1985334", + "1985335", + "1985336", + "1985336", + ], + "TEXT": [ + "abc abc", + "abc def", + "def ghj", + "abcdef", + "abc ii", + "abc abc", + "abc def", + "def ghj", + "abcdef", + "abc ii", + "abc abc", + ], + } + ) dfy = numpy.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]) - cat_features = ['CAT1', 'CAT2'] - categorical_transformer = OneHotEncoder( - handle_unknown='ignore') - textual_feature = 'TEXT' - count_vect_transformer = Pipeline(steps=[ - ('count_vect', CountVectorizer( - max_df=0.8, min_df=0.05, max_features=1000))]) + cat_features = ["CAT1", "CAT2"] + categorical_transformer = OneHotEncoder(handle_unknown="ignore") + textual_feature = "TEXT" + count_vect_transformer = Pipeline( + steps=[ + ( + "count_vect", + CountVectorizer( + max_df=0.8, min_df=0.05, max_features=1000 + ), + ) + ] + ) preprocessor = ColumnTransformer( transformers=[ - ('cat_transform', categorical_transformer, - cat_features), - ('count_vector', count_vect_transformer, - textual_feature)]) - model_SVC = CalibratedClassifierCV( - sub_model, cv=2, method=method) - rf_clf = Pipeline(steps=[ - ('preprocessor', preprocessor), - ('classifier', model_SVC)]) + ("cat_transform", categorical_transformer, cat_features), + ("count_vector", count_vect_transformer, textual_feature), + ] + ) + model_SVC = CalibratedClassifierCV(sub_model, cv=2, method=method) + rf_clf = Pipeline( + steps=[ + ("preprocessor", preprocessor), + ("classifier", model_SVC), + ] + ) rf_clf.fit(dfx, dfy) expected_label = rf_clf.predict(dfx) expected_proba = rf_clf.predict_proba(dfx) - inputs = {'CAT1': dfx['CAT1'].values.reshape((-1, 1)), - 'CAT2': dfx['CAT2'].values.reshape((-1, 1)), - 'TEXT': dfx['TEXT'].values.reshape((-1, 1))} - onx = to_onnx(rf_clf, dfx, target_opset=TARGET_OPSET, - options={'zipmap': False}) + inputs = { + "CAT1": dfx["CAT1"].values.reshape((-1, 1)), + "CAT2": dfx["CAT2"].values.reshape((-1, 1)), + "TEXT": dfx["TEXT"].values.reshape((-1, 1)), + } + onx = to_onnx( + rf_clf, + dfx, + target_opset=TARGET_OPSET, + options={"zipmap": False}, + ) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) got = sess.run(None, inputs) assert_almost_equal(expected_label, got[0]) assert_almost_equal(expected_proba, got[1], decimal=5) - @unittest.skipIf(TARGET_OPSET < 11, - reason="SequenceConstruct not available") - @unittest.skipIf( - not check_scikit_version(), - reason="Scikit 0.21 too old") + @unittest.skipIf(TARGET_OPSET < 11, reason="SequenceConstruct not available") + @unittest.skipIf(not check_scikit_version(), reason="Scikit 0.21 too old") @ignore_warnings(category=(DeprecationWarning, FutureWarning, UserWarning)) def test_issue_712_svc_multi(self): for sub_model in [SVC(), LinearSVC()]: for method in ["isotonic", "sigmoid"]: with self.subTest(sub_model=sub_model, method=method): dfx = pandas.DataFrame( - {'CAT1': ['985332', '985333', '985334', '985335', - '985336', '985332', '985333', '985334', - '985335', '985336', '985336'], - 'CAT2': ['1985332', '1985333', '1985334', '1985335', - '1985336', '1985332', '1985333', '1985334', - '1985335', '1985336', '1985336'], - 'TEXT': ["abc abc", "abc def", "def ghj", "abcdef", - "abc ii", "abc abc", "abc def", "def ghj", - "abcdef", "abc ii", "abc abc"]}) + { + "CAT1": [ + "985332", + "985333", + "985334", + "985335", + "985336", + "985332", + "985333", + "985334", + "985335", + "985336", + "985336", + ], + "CAT2": [ + "1985332", + "1985333", + "1985334", + "1985335", + "1985336", + "1985332", + "1985333", + "1985334", + "1985335", + "1985336", + "1985336", + ], + "TEXT": [ + "abc abc", + "abc def", + "def ghj", + "abcdef", + "abc ii", + "abc abc", + "abc def", + "def ghj", + "abcdef", + "abc ii", + "abc abc", + ], + } + ) dfy = numpy.array([5, 6, 7, 6, 5, 5, 8, 7, 5, 6, 8]) - cat_features = ['CAT1', 'CAT2'] - categorical_transformer = OneHotEncoder( - handle_unknown='ignore') - textual_feature = 'TEXT' - count_vect_transformer = Pipeline(steps=[ - ('count_vect', CountVectorizer( - max_df=0.8, min_df=0.05, max_features=1000))]) + cat_features = ["CAT1", "CAT2"] + categorical_transformer = OneHotEncoder(handle_unknown="ignore") + textual_feature = "TEXT" + count_vect_transformer = Pipeline( + steps=[ + ( + "count_vect", + CountVectorizer( + max_df=0.8, min_df=0.05, max_features=1000 + ), + ) + ] + ) preprocessor = ColumnTransformer( transformers=[ - ('cat_transform', categorical_transformer, - cat_features), - ('count_vector', count_vect_transformer, - textual_feature)]) - model_SVC = CalibratedClassifierCV( - sub_model, cv=2, method=method) - rf_clf = Pipeline(steps=[ - ('preprocessor', preprocessor), - ('classifier', model_SVC)]) + ("cat_transform", categorical_transformer, cat_features), + ("count_vector", count_vect_transformer, textual_feature), + ] + ) + model_SVC = CalibratedClassifierCV(sub_model, cv=2, method=method) + rf_clf = Pipeline( + steps=[ + ("preprocessor", preprocessor), + ("classifier", model_SVC), + ] + ) rf_clf.fit(dfx, dfy) expected_label = rf_clf.predict(dfx) expected_proba = rf_clf.predict_proba(dfx) - inputs = {'CAT1': dfx['CAT1'].values.reshape((-1, 1)), - 'CAT2': dfx['CAT2'].values.reshape((-1, 1)), - 'TEXT': dfx['TEXT'].values.reshape((-1, 1))} - onx = to_onnx(rf_clf, dfx, target_opset=TARGET_OPSET, - options={'zipmap': False}) + inputs = { + "CAT1": dfx["CAT1"].values.reshape((-1, 1)), + "CAT2": dfx["CAT2"].values.reshape((-1, 1)), + "TEXT": dfx["TEXT"].values.reshape((-1, 1)), + } + onx = to_onnx( + rf_clf, + dfx, + target_opset=TARGET_OPSET, + options={"zipmap": False}, + ) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) got = sess.run(None, inputs) assert_almost_equal(expected_label, got[0]) if method == "isotonic": # float/double issues - assert_almost_equal( - expected_proba[2:4], got[1][2:4], decimal=3) + assert_almost_equal(expected_proba[2:4], got[1][2:4], decimal=3) else: assert_almost_equal(expected_proba, got[1], decimal=5) - @unittest.skipIf(TARGET_OPSET < 11, - reason="SequenceConstruct not available") - @unittest.skipIf( - not check_scikit_version(), - reason="Scikit 0.21 too old") + @unittest.skipIf(TARGET_OPSET < 11, reason="SequenceConstruct not available") + @unittest.skipIf(not check_scikit_version(), reason="Scikit 0.21 too old") @ignore_warnings(category=(FutureWarning, UserWarning)) def test_pipeline_make_column_selector(self): - X = pandas.DataFrame({ - 'city': ['London', 'London', 'Paris', 'Sallisaw'], - 'rating': [5, 3, 4, 5]}) - X['rating'] = X['rating'].astype(numpy.float32) + X = pandas.DataFrame( + {"city": ["London", "London", "Paris", "Sallisaw"], "rating": [5, 3, 4, 5]} + ) + X["rating"] = X["rating"].astype(numpy.float32) ct = make_column_transformer( - (StandardScaler(), make_column_selector( - dtype_include=numpy.number)), - (OneHotEncoder(), make_column_selector( - dtype_include=object))) + (StandardScaler(), make_column_selector(dtype_include=numpy.number)), + (OneHotEncoder(), make_column_selector(dtype_include=object)), + ) expected = ct.fit_transform(X) onx = to_onnx(ct, X, target_opset=TARGET_OPSET) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) names = [i.name for i in sess.get_inputs()] - got = sess.run(None, {names[0]: X[names[0]].values.reshape((-1, 1)), - names[1]: X[names[1]].values.reshape((-1, 1))}) + got = sess.run( + None, + { + names[0]: X[names[0]].values.reshape((-1, 1)), + names[1]: X[names[1]].values.reshape((-1, 1)), + }, + ) assert_almost_equal(expected, got[0]) - @unittest.skipIf( - not check_scikit_version(), - reason="Scikit 0.21 too old") + @unittest.skipIf(not check_scikit_version(), reason="Scikit 0.21 too old") def test_feature_selector_no_converter(self): - class ColumnSelector(TransformerMixin, BaseEstimator): def __init__(self, cols): if not isinstance(cols, list): @@ -1014,24 +1347,26 @@ def transform(self, X): # https://github.com/databricks/automl/blob/main/ # runtime/tests/automl_runtime/sklearn/column_selector_test.py X_in = pandas.DataFrame( - numpy.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], - dtype=numpy.float32), - columns=["a", "b", "c"]) - y = pandas.DataFrame(numpy.array([[1], [0], [1]]), - columns=["label"]) + numpy.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=numpy.float32), + columns=["a", "b", "c"], + ) + y = pandas.DataFrame(numpy.array([[1], [0], [1]]), columns=["label"]) X_out_expected = numpy.array([1, 0, 1]) standardizer = StandardScaler() selected_cols = ["a", "b"] col_selector = ColumnSelector(selected_cols) preprocessor = ColumnTransformer( - [("standardizer", standardizer, selected_cols)], remainder="drop") + [("standardizer", standardizer, selected_cols)], remainder="drop" + ) - model = Pipeline([ - ("column_selector", col_selector), - ("preprocessor", preprocessor), - ("decision_tree", DecisionTreeClassifier()) - ]) + model = Pipeline( + [ + ("column_selector", col_selector), + ("preprocessor", preprocessor), + ("decision_tree", DecisionTreeClassifier()), + ] + ) model.fit(X=X_in, y=y) # Add one column so that the dataframe for prediction is # different with the data for training @@ -1041,7 +1376,7 @@ def transform(self, X): with self.assertRaises(RuntimeError) as e: to_onnx(model, X_in) - self.assertIn('ColumnTransformer', str(e)) + self.assertIn("ColumnTransformer", str(e)) @unittest.skipIf(TARGET_OPSET < 15, reason="use CastLike") def test_feature_vectorizer_double(self): @@ -1050,18 +1385,18 @@ def test_feature_vectorizer_double(self): X["sexi"] = X["sex"].astype(numpy.int64) X = X.drop("sex", axis=1) X_train, X_test, y_train, y_test = train_test_split(X, y) - regr = Pipeline([("std", StandardScaler()), - ("reg", LinearRegression())]) + regr = Pipeline([("std", StandardScaler()), ("reg", LinearRegression())]) regr = regr.fit(X_train, y_train) onnx_model = to_onnx(regr, X=X_train) sess = InferenceSession( - onnx_model.SerializeToString(), - providers=["CPUExecutionProvider"]) + onnx_model.SerializeToString(), providers=["CPUExecutionProvider"] + ) expected = regr.predict(X_test) names = [i.name for i in sess.get_inputs()] - feeds = {n: X_test[c].values.reshape((-1, 1)) - for n, c in zip(names, X_test.columns)} + feeds = { + n: X_test[c].values.reshape((-1, 1)) for n, c in zip(names, X_test.columns) + } got = sess.run(None, feeds) assert_almost_equal(expected.ravel(), got[0].ravel(), decimal=4) if ReferenceEvaluatorEx is None: diff --git a/tests/test_sklearn_pipeline_concat_tfidf.py b/tests/test_sklearn_pipeline_concat_tfidf.py index 8a20c953e..464a5ea89 100644 --- a/tests/test_sklearn_pipeline_concat_tfidf.py +++ b/tests/test_sklearn_pipeline_concat_tfidf.py @@ -7,6 +7,7 @@ from onnxruntime import InferenceSession from onnxruntime.capi.onnxruntime_pybind11_state import Fail import pandas + try: # scikit-learn >= 0.22 from sklearn.utils._testing import ignore_warnings @@ -27,50 +28,232 @@ class TestSklearnPipelineConcatTfIdf(unittest.TestCase): - - words = ['ability', 'able', 'about', 'above', 'abroad', - 'absence', 'absolute', 'absolutely', 'absorb', - 'academic', 'accept', 'access', 'accident', 'accompany', - 'accomplish', 'according', 'account', 'accurate', 'achieve', - 'achievement', 'acid', 'acknowledge', 'acquire', 'across', - 'act', 'action', 'active', 'activity', 'actor', 'actress', - 'actual', 'actually', 'ad', 'adapt', 'add', 'addition', - 'additional', 'address', 'adequate', 'adjust', - 'adjustment', 'administration', 'administrator', 'admire', - 'admission', 'admit', 'adolescent', 'adopt', 'adult', - 'advance', 'advanced', 'advantage', 'adventure', - 'advice', 'advise', 'adviser', 'advocate', 'affair', - 'afford', 'afraid', 'after', 'afternoon', 'again', 'against', - 'age', 'agency', 'agenda', 'agent', 'aggressive', 'ago', - 'agree', 'agreement', 'agricultural', 'ah', 'ahead', 'aid', - 'aide', 'aim', 'air', 'aircraft', 'airline', 'airport', - 'alive', 'all', 'alliance', 'allow', 'ally', 'almost', - 'along', 'already', 'also', 'alter', 'alternative', - 'always', 'AM', 'amazing', 'among', 'amount', 'analysis', - 'analyze', 'ancient', 'and', 'anger', 'angle', 'angry', - 'anniversary', 'announce', 'annual', 'another', 'answer', - 'anticipate', 'anxiety', 'any', 'anybody', 'anymore', - 'anything', 'anyway', 'anywhere', 'apart', 'apartment', - 'apparently', 'appeal', 'appear', 'appearance', 'apple', - 'application', 'apply', 'appoint', 'appointment', - 'approach', 'appropriate', 'approval', 'approve', - 'architect', 'area', 'argue', 'argument', 'arise', 'arm', - 'around', 'arrange', 'arrangement', 'arrest', - 'arrival', 'arrive', 'art', 'article', 'artist', 'artistic', - 'as', 'aside', 'ask', 'asleep', 'aspect', 'assert', - 'assess', 'assessment', 'asset', 'assign', 'assignment', - 'assist', 'assistance', 'assistant', 'associate', - 'association', 'assume', 'assumption', 'assure', 'at', - 'athlete', 'athletic', 'atmosphere', 'attach', 'attack', - 'attempt', 'attend', 'attention', 'attitude', 'attract', - 'attractive', 'attribute', 'audience', 'author', 'auto', - 'available', 'average', 'avoid', 'award', 'aware', - 'away', 'baby', 'back', 'background', 'bag', 'bake', - 'balance', 'ball', 'band', 'bank', 'bar', 'barrel', - 'barrier', 'base', 'baseball', 'basic', 'basically', - 'a', 'to', 'the', 'an', 'than', 'of', 'off', 'us', - 'who', 'which', 'what', 'why', 'whom', 'at', 'from', - 'for', 'to', 'towards'] + words = [ + "ability", + "able", + "about", + "above", + "abroad", + "absence", + "absolute", + "absolutely", + "absorb", + "academic", + "accept", + "access", + "accident", + "accompany", + "accomplish", + "according", + "account", + "accurate", + "achieve", + "achievement", + "acid", + "acknowledge", + "acquire", + "across", + "act", + "action", + "active", + "activity", + "actor", + "actress", + "actual", + "actually", + "ad", + "adapt", + "add", + "addition", + "additional", + "address", + "adequate", + "adjust", + "adjustment", + "administration", + "administrator", + "admire", + "admission", + "admit", + "adolescent", + "adopt", + "adult", + "advance", + "advanced", + "advantage", + "adventure", + "advice", + "advise", + "adviser", + "advocate", + "affair", + "afford", + "afraid", + "after", + "afternoon", + "again", + "against", + "age", + "agency", + "agenda", + "agent", + "aggressive", + "ago", + "agree", + "agreement", + "agricultural", + "ah", + "ahead", + "aid", + "aide", + "aim", + "air", + "aircraft", + "airline", + "airport", + "alive", + "all", + "alliance", + "allow", + "ally", + "almost", + "along", + "already", + "also", + "alter", + "alternative", + "always", + "AM", + "amazing", + "among", + "amount", + "analysis", + "analyze", + "ancient", + "and", + "anger", + "angle", + "angry", + "anniversary", + "announce", + "annual", + "another", + "answer", + "anticipate", + "anxiety", + "any", + "anybody", + "anymore", + "anything", + "anyway", + "anywhere", + "apart", + "apartment", + "apparently", + "appeal", + "appear", + "appearance", + "apple", + "application", + "apply", + "appoint", + "appointment", + "approach", + "appropriate", + "approval", + "approve", + "architect", + "area", + "argue", + "argument", + "arise", + "arm", + "around", + "arrange", + "arrangement", + "arrest", + "arrival", + "arrive", + "art", + "article", + "artist", + "artistic", + "as", + "aside", + "ask", + "asleep", + "aspect", + "assert", + "assess", + "assessment", + "asset", + "assign", + "assignment", + "assist", + "assistance", + "assistant", + "associate", + "association", + "assume", + "assumption", + "assure", + "at", + "athlete", + "athletic", + "atmosphere", + "attach", + "attack", + "attempt", + "attend", + "attention", + "attitude", + "attract", + "attractive", + "attribute", + "audience", + "author", + "auto", + "available", + "average", + "avoid", + "award", + "aware", + "away", + "baby", + "back", + "background", + "bag", + "bake", + "balance", + "ball", + "band", + "bank", + "bar", + "barrel", + "barrier", + "base", + "baseball", + "basic", + "basically", + "a", + "to", + "the", + "an", + "than", + "of", + "off", + "us", + "who", + "which", + "what", + "why", + "whom", + "at", + "from", + "for", + "to", + "towards", + ] @staticmethod def random_cats(n=10000, start=1000, end=9000): @@ -83,60 +266,66 @@ def random_sentance(n=10000, length=7): ls = numpy.random.randint(1, length, n) text = [] for size in ls: - sentance = [words[random.randint(0, len(words) - 1)] - for i in range(size)] + sentance = [words[random.randint(0, len(words) - 1)] for i in range(size)] text.append(" ".join(sentance)) return numpy.array(text) @staticmethod def get_pipeline(N=10000): dfx = pandas.DataFrame( - {'CAT1': TestSklearnPipelineConcatTfIdf.random_cats(N, 10, 20), - 'CAT2': TestSklearnPipelineConcatTfIdf.random_cats(N, 30, 40), - 'TEXT': TestSklearnPipelineConcatTfIdf.random_sentance(N)}) + { + "CAT1": TestSklearnPipelineConcatTfIdf.random_cats(N, 10, 20), + "CAT2": TestSklearnPipelineConcatTfIdf.random_cats(N, 30, 40), + "TEXT": TestSklearnPipelineConcatTfIdf.random_sentance(N), + } + ) dfy = numpy.random.randint(0, 2, N) dfx_train, dfx_test, dfy_train, dfy_test = train_test_split(dfx, dfy) - cat_features = ['CAT1', 'CAT2'] - categorical_transformer = OneHotEncoder( - handle_unknown='ignore', sparse=True) - textual_feature = 'TEXT' - count_vect_transformer = Pipeline(steps=[ - ('count_vect', CountVectorizer( - max_df=0.8, min_df=0.02, max_features=1000))]) + cat_features = ["CAT1", "CAT2"] + categorical_transformer = OneHotEncoder(handle_unknown="ignore", sparse=True) + textual_feature = "TEXT" + count_vect_transformer = Pipeline( + steps=[ + ( + "count_vect", + CountVectorizer(max_df=0.8, min_df=0.02, max_features=1000), + ) + ] + ) preprocessor = ColumnTransformer( transformers=[ - ('cat_transform', categorical_transformer, - cat_features), - ('count_vector', count_vect_transformer, - textual_feature)]) - pipe = Pipeline(steps=[('preprocessor', preprocessor)]) + ("cat_transform", categorical_transformer, cat_features), + ("count_vector", count_vect_transformer, textual_feature), + ] + ) + pipe = Pipeline(steps=[("preprocessor", preprocessor)]) pipe.fit(dfx_train, dfy_train) dfx_test = dfx_test.reset_index(drop=True).copy() - dfx_test.loc[0, 'TEXT'] = 'about' - dfx_test.loc[1, 'TEXT'] = 'the' + dfx_test.loc[0, "TEXT"] = "about" + dfx_test.loc[1, "TEXT"] = "the" return pipe, dfx_test - @unittest.skipIf(TARGET_OPSET < 11, - reason="SequenceConstruct not available") + @unittest.skipIf(TARGET_OPSET < 11, reason="SequenceConstruct not available") @ignore_warnings(category=(DeprecationWarning, FutureWarning, UserWarning)) def test_issue_712_svc_binary(self): - pipe, dfx_test = TestSklearnPipelineConcatTfIdf.get_pipeline() expected = pipe.transform(dfx_test) - inputs = {'CAT1': dfx_test['CAT1'].values.reshape((-1, 1)), - 'CAT2': dfx_test['CAT2'].values.reshape((-1, 1)), - 'TEXT': dfx_test['TEXT'].values.reshape((-1, 1))} + inputs = { + "CAT1": dfx_test["CAT1"].values.reshape((-1, 1)), + "CAT2": dfx_test["CAT2"].values.reshape((-1, 1)), + "TEXT": dfx_test["TEXT"].values.reshape((-1, 1)), + } onx = to_onnx(pipe, dfx_test, target_opset=TARGET_OPSET) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) expected_dense = expected.todense() for i in range(dfx_test.shape[0]): - row_inputs = {k: v[i: i + 1] for k, v in inputs.items()} + row_inputs = {k: v[i : i + 1] for k, v in inputs.items()} got = sess.run(None, row_inputs) assert_almost_equal(expected_dense[i], got[0]) @@ -155,28 +344,32 @@ def test_issue_712_svc_binary(self): got = sess.run(None, inputs) # assert_almost_equal(expected.todense(), got[0]) - @unittest.skipIf(TARGET_OPSET < 11, - reason="SequenceConstruct not available") + @unittest.skipIf(TARGET_OPSET < 11, reason="SequenceConstruct not available") @ignore_warnings(category=(DeprecationWarning, FutureWarning, UserWarning)) def test_issue_712_svc_binary_empty(self): - pipe, dfx_test = TestSklearnPipelineConcatTfIdf.get_pipeline() expected = pipe.transform(dfx_test) - inputs = {'CAT1': dfx_test['CAT1'].values.reshape((-1, 1)), - 'CAT2': dfx_test['CAT2'].values.reshape((-1, 1)), - 'TEXT': dfx_test['TEXT'].values.reshape((-1, 1))} - onx = to_onnx(pipe, dfx_test, target_opset=TARGET_OPSET, - options={CountVectorizer: {'keep_empty_string': True}}) + inputs = { + "CAT1": dfx_test["CAT1"].values.reshape((-1, 1)), + "CAT2": dfx_test["CAT2"].values.reshape((-1, 1)), + "TEXT": dfx_test["TEXT"].values.reshape((-1, 1)), + } + onx = to_onnx( + pipe, + dfx_test, + target_opset=TARGET_OPSET, + options={CountVectorizer: {"keep_empty_string": True}}, + ) with open("debug.onnx", "wb") as f: f.write(onx.SerializeToString()) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) expected_dense = expected.todense() for i in range(dfx_test.shape[0]): - row_inputs = {k: v[i: i + 1] for k, v in inputs.items()} + row_inputs = {k: v[i : i + 1] for k, v in inputs.items()} got = sess.run(None, row_inputs) assert_almost_equal(expected_dense[i], got[0]) diff --git a/tests/test_sklearn_pipeline_within_pipeline.py b/tests/test_sklearn_pipeline_within_pipeline.py index 87cfffbe5..e45de2305 100644 --- a/tests/test_sklearn_pipeline_within_pipeline.py +++ b/tests/test_sklearn_pipeline_within_pipeline.py @@ -8,6 +8,7 @@ from io import StringIO import numpy as np import pandas + try: from sklearn.compose import ColumnTransformer except ImportError: @@ -22,7 +23,11 @@ from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import Pipeline from sklearn.preprocessing import ( - MinMaxScaler, RobustScaler, StandardScaler, OneHotEncoder) + MinMaxScaler, + RobustScaler, + StandardScaler, + OneHotEncoder, +) from sklearn.feature_extraction.text import CountVectorizer from skl2onnx import convert_sklearn, to_onnx from skl2onnx.common.data_types import FloatTensorType, StringTensorType @@ -30,7 +35,6 @@ class TestSklearnPipelineWithinPipeline(unittest.TestCase): - def test_pipeline_pca_pipeline_minmax(self): model = Pipeline( memory=None, @@ -51,13 +55,15 @@ def test_pipeline_pca_pipeline_minmax(self): "Pipeline", Pipeline( memory=None, - steps=[( - "MinMax scaler", - MinMaxScaler( - copy=True, - feature_range=(0, 3.7209871159509307), - ), - )], + steps=[ + ( + "MinMax scaler", + MinMaxScaler( + copy=True, + feature_range=(0, 3.7209871159509307), + ), + ) + ], ), ), ], @@ -70,14 +76,12 @@ def test_pipeline_pca_pipeline_minmax(self): model, "pipelinewithinpipeline", [("input", FloatTensorType(data.shape))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data, - model, - model_onnx, - basename="SklearnPipelinePcaPipelineMinMax") + data, model, model_onnx, basename="SklearnPipelinePcaPipelineMinMax" + ) def test_pipeline_pca_pipeline_none_lin(self): model = Pipeline( @@ -121,14 +125,12 @@ def test_pipeline_pca_pipeline_none_lin(self): model, "pipelinewithinpipeline", [("input", FloatTensorType(data.shape))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data, - model, - model_onnx, - basename="SklearnPipelinePcaPipelineMinMaxLogReg") + data, model, model_onnx, basename="SklearnPipelinePcaPipelineMinMaxLogReg" + ) def test_pipeline_pca_pipeline_multinomial(self): model = Pipeline( @@ -182,14 +184,12 @@ def test_pipeline_pca_pipeline_multinomial(self): model, "pipelinewithinpipeline", [("input", FloatTensorType(data.shape))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data, - model, - model_onnx, - basename="SklearnPipelinePcaPipelineMinMaxNB2") + data, model, model_onnx, basename="SklearnPipelinePcaPipelineMinMaxNB2" + ) def test_pipeline_pca_pipeline_multinomial_none(self): model = Pipeline( @@ -240,102 +240,130 @@ def test_pipeline_pca_pipeline_multinomial_none(self): model, "pipelinewithinpipeline", [("input", FloatTensorType(data.shape))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data, - model, - model_onnx, - basename="SklearnPipelinePcaPipelineMinMaxNBNone") + data, model, model_onnx, basename="SklearnPipelinePcaPipelineMinMaxNBNone" + ) @unittest.skipIf( - ColumnTransformer is None, - reason="ColumnTransformer not available in 0.19") + ColumnTransformer is None, reason="ColumnTransformer not available in 0.19" + ) def test_pipeline_column_transformer_pipeline_imputer_scaler_lr(self): X = np.array([[1, 2], [3, np.nan], [3, 0]], dtype=np.float32) y = np.array([1, 0, 1]) - model = Pipeline([ - ( - "ct", - ColumnTransformer([ - ( - "pipeline1", - Pipeline([ - ("imputer", SimpleImputer()), - ("scaler", StandardScaler()), - ]), - [0], - ), - ( - "pipeline2", - Pipeline([ - ("imputer", SimpleImputer()), - ("scaler", RobustScaler()), - ]), - [1], + model = Pipeline( + [ + ( + "ct", + ColumnTransformer( + [ + ( + "pipeline1", + Pipeline( + [ + ("imputer", SimpleImputer()), + ("scaler", StandardScaler()), + ] + ), + [0], + ), + ( + "pipeline2", + Pipeline( + [ + ("imputer", SimpleImputer()), + ("scaler", RobustScaler()), + ] + ), + [1], + ), + ] ), - ]), - ), - ("lr", LogisticRegression(solver="liblinear")), - ]) + ), + ("lr", LogisticRegression(solver="liblinear")), + ] + ) model.fit(X, y) model_onnx = convert_sklearn( model, "pipelinewithinpipeline", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X, - model, - model_onnx, - basename="SklearnPipelineCTPipelineImputerScalerLR") + X, model, model_onnx, basename="SklearnPipelineCTPipelineImputerScalerLR" + ) @unittest.skipIf( - ColumnTransformer is None, - reason="ColumnTransformer not available in 0.19") + ColumnTransformer is None, reason="ColumnTransformer not available in 0.19" + ) def test_complex_pipeline(self): - - df = pandas.read_csv(StringIO(dedent(""" + df = pandas.read_csv( + StringIO( + dedent( + """ CAT1,CAT2,TEXT A,M,clean B,N,text A,M,cleaning - B,N,normalizing"""))) + B,N,normalizing""" + ) + ) + ) X_train = df y_train = np.array([[1, 0, 1, 0], [1, 0, 1, 0]]).T - categorical_features = ['CAT1', 'CAT2'] - textual_feature = 'TEXT' + categorical_features = ["CAT1", "CAT2"] + textual_feature = "TEXT" preprocessor = ColumnTransformer( transformers=[ - ('cat_transform', OneHotEncoder(handle_unknown='ignore'), - categorical_features), - ('count_vector', Pipeline(steps=[ - ('count_vect', CountVectorizer( - max_df=0.8, min_df=0.05, max_features=1000))]), - textual_feature)]) + ( + "cat_transform", + OneHotEncoder(handle_unknown="ignore"), + categorical_features, + ), + ( + "count_vector", + Pipeline( + steps=[ + ( + "count_vect", + CountVectorizer( + max_df=0.8, min_df=0.05, max_features=1000 + ), + ) + ] + ), + textual_feature, + ), + ] + ) preprocessor.fit(X_train, y_train) - initial_type = [('CAT1', StringTensorType([None, 1])), - ('CAT2', StringTensorType([None, 1])), - ('TEXTs', StringTensorType([None, 1]))] + initial_type = [ + ("CAT1", StringTensorType([None, 1])), + ("CAT2", StringTensorType([None, 1])), + ("TEXTs", StringTensorType([None, 1])), + ] with self.assertRaises(RuntimeError): - to_onnx(preprocessor, initial_types=initial_type, - target_opset=TARGET_OPSET) + to_onnx(preprocessor, initial_types=initial_type, target_opset=TARGET_OPSET) - initial_type = [('CAT1', StringTensorType([None, 1])), - ('CAT2', StringTensorType([None, 1])), - ('TEXT', StringTensorType([None, 1]))] - onx = to_onnx(preprocessor, initial_types=initial_type, - target_opset=TARGET_OPSET) + initial_type = [ + ("CAT1", StringTensorType([None, 1])), + ("CAT2", StringTensorType([None, 1])), + ("TEXT", StringTensorType([None, 1])), + ] + onx = to_onnx( + preprocessor, initial_types=initial_type, target_opset=TARGET_OPSET + ) dump_data_and_model( - X_train, preprocessor, onx, - basename="SklearnPipelineComplex") + X_train, preprocessor, onx, basename="SklearnPipelineComplex" + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_pls_regression.py b/tests/test_sklearn_pls_regression.py index 981ebd718..1d9200129 100644 --- a/tests/test_sklearn_pls_regression.py +++ b/tests/test_sklearn_pls_regression.py @@ -9,65 +9,85 @@ from sklearn.cross_decomposition import PLSRegression from skl2onnx import convert_sklearn from skl2onnx.common.data_types import ( - FloatTensorType, Int64TensorType, DoubleTensorType + FloatTensorType, + Int64TensorType, + DoubleTensorType, ) from test_utils import dump_data_and_model, TARGET_OPSET class TestSklearnPLSRegressionConverters(unittest.TestCase): def test_model_pls_regression(self): - X = numpy.array([[0., 0., 1.], [1., 0., 0.], - [2., 2., 2.], [2., 5., 4.]], - numpy.float32) - Y = numpy.array([[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], - [11.9, 12.3]], - numpy.float32) + X = numpy.array( + [[0.0, 0.0, 1.0], [1.0, 0.0, 0.0], [2.0, 2.0, 2.0], [2.0, 5.0, 4.0]], + numpy.float32, + ) + Y = numpy.array( + [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]], numpy.float32 + ) pls2 = PLSRegression(n_components=2) pls2.fit(X, Y) model_onnx = convert_sklearn( - pls2, "scikit-learn pls", + pls2, + "scikit-learn pls", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X, pls2, model_onnx, methods=['predict'], - basename="SklearnPLSRegression", verbose=10) + X, + pls2, + model_onnx, + methods=["predict"], + basename="SklearnPLSRegression", + verbose=10, + ) def test_model_pls_regression64(self): - X = numpy.array([[0., 0., 1.], [1., 0., 0.], - [2., 2., 2.], [2., 5., 4.]], - numpy.float64) - Y = numpy.array([[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], - [11.9, 12.3]], - numpy.float64) + X = numpy.array( + [[0.0, 0.0, 1.0], [1.0, 0.0, 0.0], [2.0, 2.0, 2.0], [2.0, 5.0, 4.0]], + numpy.float64, + ) + Y = numpy.array( + [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]], numpy.float64 + ) pls2 = PLSRegression(n_components=2) pls2.fit(X, Y) model_onnx = convert_sklearn( - pls2, "scikit-learn pls64", + pls2, + "scikit-learn pls64", [("input", DoubleTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X, pls2, model_onnx, methods=['predict'], - basename="SklearnPLSRegression64") + X, pls2, model_onnx, methods=["predict"], basename="SklearnPLSRegression64" + ) def test_model_pls_regressionInt64(self): - X = numpy.array([[0., 0., 1.], [1., 0., 0.], - [2., 2., 2.], [2., 5., 4.]], - numpy.int64) - Y = numpy.array([[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], - [11.9, 12.3]], - numpy.int64) + X = numpy.array( + [[0.0, 0.0, 1.0], [1.0, 0.0, 0.0], [2.0, 2.0, 2.0], [2.0, 5.0, 4.0]], + numpy.int64, + ) + Y = numpy.array( + [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]], numpy.int64 + ) pls2 = PLSRegression(n_components=2) pls2.fit(X, Y) model_onnx = convert_sklearn( - pls2, "scikit-learn plsint64", + pls2, + "scikit-learn plsint64", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X, pls2, model_onnx, methods=['predict'], - basename="SklearnPLSRegressionInt64") + X, + pls2, + model_onnx, + methods=["predict"], + basename="SklearnPLSRegressionInt64", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_polynomial_features_converter.py b/tests/test_sklearn_polynomial_features_converter.py index db8eec54d..af7f39129 100644 --- a/tests/test_sklearn_polynomial_features_converter.py +++ b/tests/test_sklearn_polynomial_features_converter.py @@ -5,6 +5,7 @@ """ import unittest import numpy as np + try: # scikit-learn >= 0.22 from sklearn.utils._testing import ignore_warnings @@ -18,41 +19,53 @@ class TestSklearnPolynomialFeatures(unittest.TestCase): - @unittest.skipIf(TARGET_OPSET < 9, reason="not available") @ignore_warnings(category=FutureWarning) def test_model_polynomial_features_float_degree_2(self): - X = np.array([[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0], - [0, 3.2, 4.7, -8.9]]) + X = np.array( + [[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0], [0, 3.2, 4.7, -8.9]] + ) model = PolynomialFeatures(degree=2).fit(X) model_onnx = convert_sklearn( - model, "scikit-learn polynomial features", + model, + "scikit-learn polynomial features", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnPolynomialFeaturesFloatDegree2") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnPolynomialFeaturesFloatDegree2", + ) @unittest.skipIf(TARGET_OPSET < 9, reason="not available") @ignore_warnings(category=FutureWarning) def test_model_polynomial_features_int_degree_2(self): - X = np.array([ - [1, 3, 4, 0], - [2, 3, 4, 1], - [1, -4, 3, 7], - [3, 10, -9, 5], - [1, 0, 10, 5], - ]) + X = np.array( + [ + [1, 3, 4, 0], + [2, 3, 4, 1], + [1, -4, 3, 7], + [3, 10, -9, 5], + [1, 0, 10, 5], + ] + ) model = PolynomialFeatures(degree=2).fit(X) model_onnx = convert_sklearn( - model, "scikit-learn polynomial features", + model, + "scikit-learn polynomial features", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.int64), model, model_onnx, - basename="SklearnPolynomialFeaturesIntDegree2") + X.astype(np.int64), + model, + model_onnx, + basename="SklearnPolynomialFeaturesIntDegree2", + ) @unittest.skipIf(TARGET_OPSET < 9, reason="not available") @ignore_warnings(category=FutureWarning) @@ -60,49 +73,65 @@ def test_model_polynomial_features_float_degree_3(self): X = np.array([[1.2, 3.2, 1.2], [4.3, 3.2, 4.5], [3.2, 4.7, 1.1]]) model = PolynomialFeatures(degree=3).fit(X) model_onnx = convert_sklearn( - model, "scikit-learn polynomial features", + model, + "scikit-learn polynomial features", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnPolynomialFeaturesFloatDegree3") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnPolynomialFeaturesFloatDegree3", + ) @unittest.skipIf(TARGET_OPSET < 9, reason="not available") @ignore_warnings(category=FutureWarning) def test_model_polynomial_features_int_degree_3(self): - X = np.array([ - [1, 3, 33], - [4, 1, -11], - [3, 7, -3], - [3, 5, 4], - [1, 0, 3], - [5, 4, 9], - ]) + X = np.array( + [ + [1, 3, 33], + [4, 1, -11], + [3, 7, -3], + [3, 5, 4], + [1, 0, 3], + [5, 4, 9], + ] + ) model = PolynomialFeatures(degree=3).fit(X) model_onnx = convert_sklearn( - model, "scikit-learn polynomial features", + model, + "scikit-learn polynomial features", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.int64), model, model_onnx, - basename="SklearnPolynomialFeaturesIntDegree3") + X.astype(np.int64), + model, + model_onnx, + basename="SklearnPolynomialFeaturesIntDegree3", + ) @unittest.skipIf(TARGET_OPSET < 9, reason="not available") @ignore_warnings(category=FutureWarning) def test_model_polynomial_features_float_degree_4(self): - X = np.array([[1.2, 3.2, 3.1, 1.3], [4.3, 3.2, 0.5, 1.3], - [3.2, 4.7, 5.4, 7.1]]) + X = np.array([[1.2, 3.2, 3.1, 1.3], [4.3, 3.2, 0.5, 1.3], [3.2, 4.7, 5.4, 7.1]]) model = PolynomialFeatures(degree=4).fit(X) model_onnx = convert_sklearn( - model, "scikit-learn polynomial features", + model, + "scikit-learn polynomial features", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnPolynomialFeaturesFloatDegree4-Dec4") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnPolynomialFeaturesFloatDegree4-Dec4", + ) @unittest.skipIf(TARGET_OPSET < 9, reason="not available") @ignore_warnings(category=FutureWarning) @@ -110,13 +139,18 @@ def test_model_polynomial_features_int_degree_4(self): X = np.array([[1, 3, 4, 1], [3, 7, 3, 5], [1, 0, 5, 4]]) model = PolynomialFeatures(degree=4).fit(X) model_onnx = convert_sklearn( - model, "scikit-learn polynomial features", + model, + "scikit-learn polynomial features", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - X.astype(np.int64), model, model_onnx, - basename="SklearnPolynomialFeaturesIntDegree4") + X.astype(np.int64), + model, + model_onnx, + basename="SklearnPolynomialFeaturesIntDegree4", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_power_transformer.py b/tests/test_sklearn_power_transformer.py index 1f02af244..2f9d3141e 100644 --- a/tests/test_sklearn_power_transformer.py +++ b/tests/test_sklearn_power_transformer.py @@ -39,9 +39,7 @@ def test_powertransformer_yeo_johnson_positive_without_scaler(self): target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - data, model, model_onnx, basename="PowerTransformer" - ) + dump_data_and_model(data, model, model_onnx, basename="PowerTransformer") @unittest.skipIf(PowerTransformer is None, "Problems with import occurred") def test_powertransformer_yeo_johnson_negative_without_scaler(self): @@ -55,9 +53,7 @@ def test_powertransformer_yeo_johnson_negative_without_scaler(self): target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - data, model, model_onnx, basename="PowerTransformer" - ) + dump_data_and_model(data, model, model_onnx, basename="PowerTransformer") @unittest.skipIf(PowerTransformer is None, "Problems with import occurred") def test_powertransformer_yeo_johnson_combined_without_scaler(self): @@ -71,9 +67,7 @@ def test_powertransformer_yeo_johnson_combined_without_scaler(self): target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - data, model, model_onnx, basename="PowerTransformer" - ) + dump_data_and_model(data, model, model_onnx, basename="PowerTransformer") @unittest.skipIf(PowerTransformer is None, "Problems with import occurred") def test_powertransformer_box_cox_without_scaler(self): @@ -87,9 +81,7 @@ def test_powertransformer_box_cox_without_scaler(self): target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - data, model, model_onnx, basename="PowerTransformer" - ) + dump_data_and_model(data, model, model_onnx, basename="PowerTransformer") @unittest.skipIf(PowerTransformer is None, "Problems with import occurred") def test_powertransformer_yeo_johnson_positive_with_scaler(self): @@ -103,9 +95,7 @@ def test_powertransformer_yeo_johnson_positive_with_scaler(self): target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - data, model, model_onnx, basename="PowerTransformer" - ) + dump_data_and_model(data, model, model_onnx, basename="PowerTransformer") @unittest.skipIf(PowerTransformer is None, "Problems with import occurred") def test_powertransformer_with_scaler_blacklist(self): @@ -136,9 +126,7 @@ def test_powertransformer_yeo_johnson_negative_with_scaler(self): target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - data, model, model_onnx, basename="PowerTransformer" - ) + dump_data_and_model(data, model, model_onnx, basename="PowerTransformer") @unittest.skipIf(PowerTransformer is None, "Problems with import occurred") def test_powertransformer_yeo_johnson_combined_with_scaler(self): @@ -152,9 +140,7 @@ def test_powertransformer_yeo_johnson_combined_with_scaler(self): target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - data, model, model_onnx, basename="PowerTransformer" - ) + dump_data_and_model(data, model, model_onnx, basename="PowerTransformer") @unittest.skipIf(PowerTransformer is None, "Problems with import occurred") def test_powertransformer_box_cox_with_scaler(self): @@ -168,9 +154,7 @@ def test_powertransformer_box_cox_with_scaler(self): target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - data, model, model_onnx, basename="PowerTransformer" - ) + dump_data_and_model(data, model, model_onnx, basename="PowerTransformer") @unittest.skipIf(PowerTransformer is None, "Problems with import occurred") def test_powertransformer_zeros(self): @@ -184,9 +168,7 @@ def test_powertransformer_zeros(self): target_opset=TARGET_OPSET, ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - data, model, model_onnx, basename="PowerTransformer" - ) + dump_data_and_model(data, model, model_onnx, basename="PowerTransformer") if __name__ == "__main__": diff --git a/tests/test_sklearn_quadratic_discriminant_analysis_converter.py b/tests/test_sklearn_quadratic_discriminant_analysis_converter.py index 881dbe6ee..d9b7df667 100644 --- a/tests/test_sklearn_quadratic_discriminant_analysis_converter.py +++ b/tests/test_sklearn_quadratic_discriminant_analysis_converter.py @@ -10,25 +10,21 @@ from onnxruntime import __version__ as ort_version from onnx import __version__ as onnx_version from skl2onnx import convert_sklearn -from skl2onnx.common.data_types import ( - FloatTensorType, - DoubleTensorType -) +from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType -from test_utils import ( - dump_data_and_model, - TARGET_OPSET -) +from test_utils import dump_data_and_model, TARGET_OPSET ort_version = ".".join(ort_version.split(".")[:2]) -onnx_version = ".".join(onnx_version.split('.')[:2]) +onnx_version = ".".join(onnx_version.split(".")[:2]) class TestQuadraticDiscriminantAnalysisConverter(unittest.TestCase): - @unittest.skipIf(pv.Version(sklearn.__version__) < pv.Version('1.0'), - reason="scikit-learn<1.0") - @unittest.skipIf(pv.Version(onnx_version) < pv.Version('1.11'), - reason="fails with onnx 1.10") + @unittest.skipIf( + pv.Version(sklearn.__version__) < pv.Version("1.0"), reason="scikit-learn<1.0" + ) + @unittest.skipIf( + pv.Version(onnx_version) < pv.Version("1.11"), reason="fails with onnx 1.10" + ) def test_model_qda_2c2f_float(self): # 2 classes, 2 features X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) @@ -42,23 +38,30 @@ def test_model_qda_2c2f_float(self): skl_model, "scikit-learn QDA", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(onnx_model) - dump_data_and_model(X_test.astype(np.float32), skl_model, onnx_model, - basename="SklearnQDA_2c2f_Float") - - @unittest.skipIf(pv.Version(sklearn.__version__) < pv.Version('1.0'), - reason="scikit-learn<1.0") - @unittest.skipIf(pv.Version(onnx_version) < pv.Version('1.11'), - reason="fails with onnx 1.10") + dump_data_and_model( + X_test.astype(np.float32), + skl_model, + onnx_model, + basename="SklearnQDA_2c2f_Float", + ) + + @unittest.skipIf( + pv.Version(sklearn.__version__) < pv.Version("1.0"), reason="scikit-learn<1.0" + ) + @unittest.skipIf( + pv.Version(onnx_version) < pv.Version("1.11"), reason="fails with onnx 1.10" + ) def test_model_qda_2c3f_float(self): # 2 classes, 3 features - X = np.array([[-1, -1, 0], [-2, -1, 1], [-3, -2, 0], - [1, 1, 0], [2, 1, 1], [3, 2, 1]]) + X = np.array( + [[-1, -1, 0], [-2, -1, 1], [-3, -2, 0], [1, 1, 0], [2, 1, 1], [3, 2, 1]] + ) y = np.array([1, 1, 1, 2, 2, 2]) - X_test = np.array([[-0.8, -1, 0], [-1, -1.6, 0], - [1, 1.5, 1], [3.1, 2.1, 1]]) + X_test = np.array([[-0.8, -1, 0], [-1, -1.6, 0], [1, 1.5, 1], [3.1, 2.1, 1]]) skl_model = QuadraticDiscriminantAnalysis() skl_model.fit(X, y) @@ -67,20 +70,38 @@ def test_model_qda_2c3f_float(self): skl_model, "scikit-learn QDA", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(onnx_model) - dump_data_and_model(X_test.astype(np.float32), skl_model, onnx_model, - basename="SklearnQDA_2c3f_Float") - - @unittest.skipIf(pv.Version(sklearn.__version__) < pv.Version('1.0'), - reason="scikit-learn<1.0") - @unittest.skipIf(pv.Version(onnx_version) < pv.Version('1.11'), - reason="fails with onnx 1.10") + dump_data_and_model( + X_test.astype(np.float32), + skl_model, + onnx_model, + basename="SklearnQDA_2c3f_Float", + ) + + @unittest.skipIf( + pv.Version(sklearn.__version__) < pv.Version("1.0"), reason="scikit-learn<1.0" + ) + @unittest.skipIf( + pv.Version(onnx_version) < pv.Version("1.11"), reason="fails with onnx 1.10" + ) def test_model_qda_3c2f_float(self): # 3 classes, 2 features - X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], - [2, 1], [3, 2], [-1, 2], [-2, 3], [-2, 2]]) + X = np.array( + [ + [-1, -1], + [-2, -1], + [-3, -2], + [1, 1], + [2, 1], + [3, 2], + [-1, 2], + [-2, 3], + [-2, 2], + ] + ) y = np.array([1, 1, 1, 2, 2, 2, 3, 3, 3]) X_test = np.array([[-0.8, -1], [0.8, 1], [-0.8, 1]]) @@ -91,20 +112,28 @@ def test_model_qda_3c2f_float(self): skl_model, "scikit-learn QDA", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(onnx_model) - dump_data_and_model(X_test.astype(np.float32), skl_model, onnx_model, - basename="SklearnQDA_3c2f_Float") - - @unittest.skipIf(pv.Version(sklearn.__version__) < pv.Version('1.0'), - reason="scikit-learn<1.0") - @unittest.skipIf(pv.Version(onnx_version) < pv.Version('1.11'), - reason="fails with onnx 1.10") + dump_data_and_model( + X_test.astype(np.float32), + skl_model, + onnx_model, + basename="SklearnQDA_3c2f_Float", + ) + + @unittest.skipIf( + pv.Version(sklearn.__version__) < pv.Version("1.0"), reason="scikit-learn<1.0" + ) + @unittest.skipIf( + pv.Version(onnx_version) < pv.Version("1.11"), reason="fails with onnx 1.10" + ) def test_model_qda_2c2f_double(self): # 2 classes, 2 features - X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], - [2, 1], [3, 2]]).astype(np.double) + X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]).astype( + np.double + ) y = np.array([1, 1, 1, 2, 2, 2]) X_test = np.array([[-0.8, -1], [0.8, 1]]) @@ -115,23 +144,31 @@ def test_model_qda_2c2f_double(self): skl_model, "scikit-learn QDA", [("input", DoubleTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET, options={'zipmap': False}) + target_opset=TARGET_OPSET, + options={"zipmap": False}, + ) self.assertIsNotNone(onnx_model) - dump_data_and_model(X_test.astype(np.double), skl_model, onnx_model, - basename="SklearnQDA_2c2f_Double") - - @unittest.skipIf(pv.Version(sklearn.__version__) < pv.Version('1.0'), - reason="scikit-learn<1.0") - @unittest.skipIf(pv.Version(onnx_version) < pv.Version('1.11'), - reason="fails with onnx 1.10") + dump_data_and_model( + X_test.astype(np.double), + skl_model, + onnx_model, + basename="SklearnQDA_2c2f_Double", + ) + + @unittest.skipIf( + pv.Version(sklearn.__version__) < pv.Version("1.0"), reason="scikit-learn<1.0" + ) + @unittest.skipIf( + pv.Version(onnx_version) < pv.Version("1.11"), reason="fails with onnx 1.10" + ) def test_model_qda_2c3f_double(self): # 2 classes, 3 features - X = np.array([[-1, -1, 0], [-2, -1, 1], [-3, -2, 0], - [1, 1, 0], [2, 1, 1], [3, 2, 1]]).astype(np.double) + X = np.array( + [[-1, -1, 0], [-2, -1, 1], [-3, -2, 0], [1, 1, 0], [2, 1, 1], [3, 2, 1]] + ).astype(np.double) y = np.array([1, 1, 1, 2, 2, 2]) - X_test = np.array([[-0.8, -1, 0], [-1, -1.6, 0], - [1, 1.5, 1], [3.1, 2.1, 1]]) + X_test = np.array([[-0.8, -1, 0], [-1, -1.6, 0], [1, 1.5, 1], [3.1, 2.1, 1]]) skl_model = QuadraticDiscriminantAnalysis() skl_model.fit(X, y) @@ -140,20 +177,39 @@ def test_model_qda_2c3f_double(self): skl_model, "scikit-learn QDA", [("input", DoubleTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET, options={'zipmap': False}) + target_opset=TARGET_OPSET, + options={"zipmap": False}, + ) self.assertIsNotNone(onnx_model) - dump_data_and_model(X_test.astype(np.double), skl_model, onnx_model, - basename="SklearnQDA_2c3f_Double") - - @unittest.skipIf(pv.Version(sklearn.__version__) < pv.Version('1.0'), - reason="scikit-learn<1.0") - @unittest.skipIf(pv.Version(onnx_version) < pv.Version('1.11'), - reason="fails with onnx 1.10") + dump_data_and_model( + X_test.astype(np.double), + skl_model, + onnx_model, + basename="SklearnQDA_2c3f_Double", + ) + + @unittest.skipIf( + pv.Version(sklearn.__version__) < pv.Version("1.0"), reason="scikit-learn<1.0" + ) + @unittest.skipIf( + pv.Version(onnx_version) < pv.Version("1.11"), reason="fails with onnx 1.10" + ) def test_model_qda_3c2f_double(self): # 3 classes, 2 features - X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2], - [-1, 2], [-2, 3], [-2, 2]]).astype(np.double) + X = np.array( + [ + [-1, -1], + [-2, -1], + [-3, -2], + [1, 1], + [2, 1], + [3, 2], + [-1, 2], + [-2, 3], + [-2, 2], + ] + ).astype(np.double) y = np.array([1, 1, 1, 2, 2, 2, 3, 3, 3]) X_test = np.array([[-0.8, -1], [0.8, 1], [-0.8, 1]]) @@ -164,11 +220,17 @@ def test_model_qda_3c2f_double(self): skl_model, "scikit-learn QDA", [("input", DoubleTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET, options={'zipmap': False}) + target_opset=TARGET_OPSET, + options={"zipmap": False}, + ) self.assertIsNotNone(onnx_model) - dump_data_and_model(X_test.astype(np.double), skl_model, onnx_model, - basename="SklearnQDA_3c2f_Double") + dump_data_and_model( + X_test.astype(np.double), + skl_model, + onnx_model, + basename="SklearnQDA_3c2f_Double", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_random_forest_converters.py b/tests/test_sklearn_random_forest_converters.py index 7e369a9b6..d0ea0a6f4 100644 --- a/tests/test_sklearn_random_forest_converters.py +++ b/tests/test_sklearn_random_forest_converters.py @@ -7,15 +7,17 @@ from numpy.testing import assert_almost_equal from onnxruntime import InferenceSession, __version__ as ort_version import sklearn -from sklearn.datasets import ( - load_iris, make_regression, make_classification) +from sklearn.datasets import load_iris, make_regression, make_classification from sklearn.model_selection import train_test_split from sklearn.ensemble import ( - RandomForestClassifier, RandomForestRegressor, - ExtraTreesClassifier, ExtraTreesRegressor + RandomForestClassifier, + RandomForestRegressor, + ExtraTreesClassifier, + ExtraTreesRegressor, ) from sklearn.decomposition import PCA from sklearn.pipeline import Pipeline + try: # scikit-learn >= 0.22 from sklearn.utils._testing import ignore_warnings @@ -45,10 +47,11 @@ path_to_leaf, TARGET_OPSET, ) + try: from sklearn.ensemble import ( HistGradientBoostingClassifier, - HistGradientBoostingRegressor + HistGradientBoostingRegressor, ) except ImportError: HistGradientBoostingClassifier = None @@ -57,11 +60,11 @@ def _sklearn_version(): # Remove development version 0.22.dev0 becomes 0.22. - v = ".".join(sklearn.__version__.split('.')[:2]) + v = ".".join(sklearn.__version__.split(".")[:2]) return pv.Version(v) -ort_version = ".".join(ort_version.split('.')[:2]) +ort_version = ".".join(ort_version.split(".")[:2]) class TestSklearnTreeEnsembleModels(unittest.TestCase): @@ -70,8 +73,7 @@ def test_random_forest_classifier(self): dump_one_class_classification(model) dump_binary_classification(model) dump_binary_classification(model, label_string=False) - dump_binary_classification( - model, label_string=False, label_bool=True) + dump_binary_classification(model, label_string=False, label_bool=True) dump_multiple_classification(model) @ignore_warnings(category=FutureWarning) @@ -79,38 +81,51 @@ def test_random_forest_classifier_mismatched_estimator_counts(self): model = RandomForestClassifier(n_estimators=3) X = [[0, 1], [1, 1], [2, 0]] X = numpy.array(X, dtype=numpy.float32) - y = ['A', 'B', 'A'] + y = ["A", "B", "A"] model.fit(X, y) # Training code can manipulate n_estimators causing # n_estimators != len(estimators_). So simulate that here. model.n_estimators += 1 - model_onnx, prefix = convert_model(model, 'binary classifier', - [('input', - FloatTensorType([None, 2]))], - target_opset=TARGET_OPSET) - dump_data_and_model(X, model, model_onnx, - basename=prefix + "Bin" + - model.__class__.__name__ + - '_mismatched_estimator_counts') + model_onnx, prefix = convert_model( + model, + "binary classifier", + [("input", FloatTensorType([None, 2]))], + target_opset=TARGET_OPSET, + ) + dump_data_and_model( + X, + model, + model_onnx, + basename=prefix + + "Bin" + + model.__class__.__name__ + + "_mismatched_estimator_counts", + ) @ignore_warnings(category=FutureWarning) def test_random_forest_regressor_mismatches(self): iris = load_iris() X, y = iris.data, iris.target - X_train, X_test, y_train, _ = train_test_split( - X, y, random_state=13) + X_train, X_test, y_train, _ = train_test_split(X, y, random_state=13) X_test = X_test.astype(numpy.float32) clr = RandomForestRegressor(n_jobs=1, n_estimators=100) clr.fit(X_train, y_train) clr.fit(X, y) - model_onnx, prefix = convert_model(clr, 'reg', - [('input', - FloatTensorType([None, 4]))], - target_opset=TARGET_OPSET) - dump_data_and_model(X_test, clr, model_onnx, - basename=prefix + "RegMis" + - clr.__class__.__name__ + - '_mismatched_estimator_counts') + model_onnx, prefix = convert_model( + clr, + "reg", + [("input", FloatTensorType([None, 4]))], + target_opset=TARGET_OPSET, + ) + dump_data_and_model( + X_test, + clr, + model_onnx, + basename=prefix + + "RegMis" + + clr.__class__.__name__ + + "_mismatched_estimator_counts", + ) @ignore_warnings(category=FutureWarning) def test_random_forest_regressor(self): @@ -128,14 +143,21 @@ def test_random_forest_regressor_mismatched_estimator_counts(self): # Training code can manipulate n_estimators causing # n_estimators != len(estimators_). So simulate that here. model.n_estimators += 1 - model_onnx, prefix = convert_model(model, 'single regressor', - [('input', - FloatTensorType([None, 2]))], - target_opset=TARGET_OPSET) - dump_data_and_model(X, model, model_onnx, - basename=prefix + "Reg" + - model.__class__.__name__ + - "_mismatched_estimator_counts") + model_onnx, prefix = convert_model( + model, + "single regressor", + [("input", FloatTensorType([None, 2]))], + target_opset=TARGET_OPSET, + ) + dump_data_and_model( + X, + model, + model_onnx, + basename=prefix + + "Reg" + + model.__class__.__name__ + + "_mismatched_estimator_counts", + ) @ignore_warnings(category=FutureWarning) def test_extra_trees_classifier(self): @@ -153,41 +175,49 @@ def test_extra_trees_regressor(self): @ignore_warnings(category=FutureWarning) def test_model_multi_class_nocl(self): model, X = fit_classification_model( - RandomForestClassifier(random_state=42), - 2, label_string=True) + RandomForestClassifier(random_state=42), 2, label_string=True + ) model_onnx = convert_sklearn( - model, "multi-class nocl", + model, + "multi-class nocl", [("input", FloatTensorType([None, X.shape[1]]))], - options={id(model): {'nocl': True, 'zipmap': False}}, - target_opset=TARGET_OPSET) + options={id(model): {"nocl": True, "zipmap": False}}, + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) sonx = str(model_onnx) - assert 'classlabels_strings' not in sonx - assert 'cl0' not in sonx + assert "classlabels_strings" not in sonx + assert "cl0" not in sonx dump_data_and_model( - X[:5], model, model_onnx, classes=model.classes_, - basename="SklearnRFMultiNoCl") + X[:5], + model, + model_onnx, + classes=model.classes_, + basename="SklearnRFMultiNoCl", + ) @ignore_warnings(category=FutureWarning) def test_model_multi_class_nocl_all(self): model, X = fit_classification_model( - RandomForestClassifier(random_state=42), - 2, label_string=True) + RandomForestClassifier(random_state=42), 2, label_string=True + ) model_onnx = convert_sklearn( - model, "multi-class nocl", + model, + "multi-class nocl", [("input", FloatTensorType([None, X.shape[1]]))], - options={id(model): {'nocl': True, 'zipmap': False}}, - target_opset=TARGET_OPSET) + options={id(model): {"nocl": True, "zipmap": False}}, + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) sonx = str(model_onnx) - assert 'classlabels_strings' not in sonx - assert 'cl0' not in sonx + assert "classlabels_strings" not in sonx + assert "cl0" not in sonx exp_label = model.predict(X) exp_proba = model.predict_proba(X) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'input': X.astype(numpy.float32)}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"input": X.astype(numpy.float32)}) exp_label = numpy.array([int(cl[2:]) for cl in exp_label]) assert_almost_equal(exp_proba, got[1], decimal=5) diff = numpy.abs(exp_label - got[0]).sum() @@ -204,96 +234,113 @@ def test_model_multi_class_nocl_all(self): @ignore_warnings(category=FutureWarning) def test_random_forest_classifier_int(self): model, X = fit_classification_model( - RandomForestClassifier(n_estimators=5, random_state=42), - 3, is_int=True) + RandomForestClassifier(n_estimators=5, random_state=42), 3, is_int=True + ) model_onnx = convert_sklearn( - model, "random forest classifier", + model, + "random forest classifier", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnRandomForestClassifierInt") + X, model, model_onnx, basename="SklearnRandomForestClassifierInt" + ) @ignore_warnings(category=FutureWarning) def test_extra_trees_classifier_int(self): model, X = fit_classification_model( - ExtraTreesClassifier(n_estimators=5, random_state=42), - 4, is_int=True) + ExtraTreesClassifier(n_estimators=5, random_state=42), 4, is_int=True + ) model_onnx = convert_sklearn( - model, "extra trees classifier", + model, + "extra trees classifier", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnExtraTreesClassifierInt") + X, model, model_onnx, basename="SklearnExtraTreesClassifierInt" + ) @ignore_warnings(category=FutureWarning) def test_random_forest_classifier_bool(self): model, X = fit_classification_model( - RandomForestClassifier(n_estimators=5, random_state=42), - 3, is_bool=True) + RandomForestClassifier(n_estimators=5, random_state=42), 3, is_bool=True + ) model_onnx = convert_sklearn( - model, "random forest classifier", + model, + "random forest classifier", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnRandomForestClassifierBool") + X, model, model_onnx, basename="SklearnRandomForestClassifierBool" + ) @ignore_warnings(category=FutureWarning) def test_extra_trees_classifier_bool(self): model, X = fit_classification_model( - ExtraTreesClassifier(n_estimators=5, random_state=42), - 2, is_bool=True) + ExtraTreesClassifier(n_estimators=5, random_state=42), 2, is_bool=True + ) model_onnx = convert_sklearn( - model, "extra trees regression", + model, + "extra trees regression", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnExtraTreesClassifierBool") + X, model, model_onnx, basename="SklearnExtraTreesClassifierBool" + ) @ignore_warnings(category=FutureWarning) def test_random_forest_classifier_double(self): model, X = fit_classification_model( - RandomForestClassifier(n_estimators=5, random_state=42), - 3, is_double=True) + RandomForestClassifier(n_estimators=5, random_state=42), 3, is_double=True + ) for opv in [1, 2, 3]: model_onnx = convert_sklearn( - model, "random forest classifier", + model, + "random forest classifier", [("input", DoubleTensorType([None, X.shape[1]]))], - target_opset={'ai.onnx.ml': opv, - '': TARGET_OPSET}) + target_opset={"ai.onnx.ml": opv, "": TARGET_OPSET}, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnRandomForestClassifierDouble") + X, model, model_onnx, basename="SklearnRandomForestClassifierDouble" + ) @ignore_warnings(category=FutureWarning) def test_model_random_forest_classifier_multi_output_int(self): model, X_test = fit_multi_output_classification_model( - RandomForestClassifier(random_state=42, n_estimators=20)) - options = {id(model): {'zipmap': False}} + RandomForestClassifier(random_state=42, n_estimators=20) + ) + options = {id(model): {"zipmap": False}} model_onnx = convert_sklearn( - model, "random forest classifier", + model, + "random forest classifier", [("input", Int64TensorType([None, X_test.shape[1]]))], - options=options, target_opset=TARGET_OPSET) + options=options, + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - assert 'zipmap' not in str(model_onnx).lower() + assert "zipmap" not in str(model_onnx).lower() dump_data_and_model( - X_test.astype(numpy.int64), model, model_onnx, - basename="SklearnRandomForestClassifierMultiOutputInt") + X_test.astype(numpy.int64), + model, + model_onnx, + basename="SklearnRandomForestClassifierMultiOutputInt", + ) @ignore_warnings(category=FutureWarning) def common_test_model_hgb_regressor(self, add_nan=False): rng = numpy.random.RandomState(12345) model = HistGradientBoostingRegressor(max_iter=4, max_depth=2) - X, y = make_regression(n_features=10, n_samples=1000, - n_targets=1, random_state=42) + X, y = make_regression( + n_features=10, n_samples=1000, n_targets=1, random_state=42 + ) if add_nan: rows = rng.randint(0, X.shape[0] - 1, X.shape[0] // 3) cols = rng.randint(0, X.shape[1] - 1, X.shape[0] // 3) @@ -301,117 +348,171 @@ def common_test_model_hgb_regressor(self, add_nan=False): X = X.astype(numpy.float32) y = y.astype(numpy.float32) - X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, - random_state=42) + X_train, X_test, y_train, _ = train_test_split( + X, y, test_size=0.5, random_state=42 + ) model.fit(X_train, y_train) model_onnx = convert_sklearn( - model, "unused", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "unused", + [("input", FloatTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) X_test = X_test.astype(numpy.float32)[:10] dump_data_and_model( - X_test, model, model_onnx, - basename=f"SklearnHGBRegressor{add_nan}", verbose=False) - - @unittest.skipIf(_sklearn_version() < pv.Version('0.22.0'), - reason="missing_go_to_left is missing") - @unittest.skipIf(HistGradientBoostingRegressor is None, - reason="scikit-learn 0.22 + manual activation") - @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.2.0'), - reason="issue with nan for earlier ort") + X_test, + model, + model_onnx, + basename=f"SklearnHGBRegressor{add_nan}", + verbose=False, + ) + + @unittest.skipIf( + _sklearn_version() < pv.Version("0.22.0"), + reason="missing_go_to_left is missing", + ) + @unittest.skipIf( + HistGradientBoostingRegressor is None, + reason="scikit-learn 0.22 + manual activation", + ) + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.2.0"), + reason="issue with nan for earlier ort", + ) @ignore_warnings(category=FutureWarning) def test_model_hgb_regressor_nonan(self): self.common_test_model_hgb_regressor(False) - @unittest.skipIf(_sklearn_version() < pv.Version('0.22.0'), - reason="NaN not allowed") - @unittest.skipIf(HistGradientBoostingRegressor is None, - reason="scikit-learn 0.22 + manual activation") - @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.2.0'), - reason="issue with nan for earlier ort") + @unittest.skipIf( + _sklearn_version() < pv.Version("0.22.0"), reason="NaN not allowed" + ) + @unittest.skipIf( + HistGradientBoostingRegressor is None, + reason="scikit-learn 0.22 + manual activation", + ) + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.2.0"), + reason="issue with nan for earlier ort", + ) @ignore_warnings(category=FutureWarning) def test_model_hgb_regressor_nan(self): self.common_test_model_hgb_regressor(True) def common_test_model_hgb_classifier(self, add_nan=False, n_classes=2): model = HistGradientBoostingClassifier(max_iter=5, max_depth=2) - X, y = make_classification(n_features=10, n_samples=1000, - n_informative=4, n_classes=n_classes, - random_state=42) + X, y = make_classification( + n_features=10, + n_samples=1000, + n_informative=4, + n_classes=n_classes, + random_state=42, + ) if add_nan: rows = numpy.random.randint(0, X.shape[0] - 1, X.shape[0] // 3) cols = numpy.random.randint(0, X.shape[1] - 1, X.shape[0] // 3) X[rows, cols] = numpy.nan - X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, - random_state=42) + X_train, X_test, y_train, _ = train_test_split( + X, y, test_size=0.5, random_state=42 + ) model.fit(X_train, y_train) model_onnx = convert_sklearn( - model, "unused", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "unused", + [("input", FloatTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) X_test = X_test.astype(numpy.float32)[:5] dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnHGBClassifier%s%d" % ( - "nan" if add_nan else '', n_classes), - verbose=False) + X_test, + model, + model_onnx, + basename="SklearnHGBClassifier%s%d" % ("nan" if add_nan else "", n_classes), + verbose=False, + ) if n_classes == 2: model_onnx = convert_sklearn( - model, "unused", + model, + "unused", [("input", FloatTensorType([None, X.shape[1]]))], - options={model.__class__: {'raw_scores': True}}, - target_opset=TARGET_OPSET) + options={model.__class__: {"raw_scores": True}}, + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) X_test = X_test.astype(numpy.float32)[:5] # There is a bug in onnxruntime <= 1.1.0. # Raw scores are always positive. dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnHGBClassifierRaw%s%d" % ( - "nan" if add_nan else '', n_classes), + X_test, + model, + model_onnx, + basename="SklearnHGBClassifierRaw%s%d" + % ("nan" if add_nan else "", n_classes), verbose=False, - methods=['predict', 'decision_function_binary']) + methods=["predict", "decision_function_binary"], + ) - @unittest.skipIf(_sklearn_version() < pv.Version('0.22.0'), - reason="missing_go_to_left is missing") - @unittest.skipIf(HistGradientBoostingClassifier is None, - reason="scikit-learn 0.22 + manual activation") - @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.2.0'), - reason="issue with nan for earlier ort") + @unittest.skipIf( + _sklearn_version() < pv.Version("0.22.0"), + reason="missing_go_to_left is missing", + ) + @unittest.skipIf( + HistGradientBoostingClassifier is None, + reason="scikit-learn 0.22 + manual activation", + ) + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.2.0"), + reason="issue with nan for earlier ort", + ) @ignore_warnings(category=FutureWarning) def test_model_hgb_classifier_nonan(self): self.common_test_model_hgb_classifier(False) - @unittest.skipIf(_sklearn_version() < pv.Version('0.22.0'), - reason="NaN not allowed") - @unittest.skipIf(HistGradientBoostingClassifier is None, - reason="scikit-learn 0.22 + manual activation") - @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.2.0'), - reason="issue with nan for earlier ort") + @unittest.skipIf( + _sklearn_version() < pv.Version("0.22.0"), reason="NaN not allowed" + ) + @unittest.skipIf( + HistGradientBoostingClassifier is None, + reason="scikit-learn 0.22 + manual activation", + ) + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.2.0"), + reason="issue with nan for earlier ort", + ) @ignore_warnings(category=FutureWarning) def test_model_hgb_classifier_nan(self): self.common_test_model_hgb_classifier(True) - @unittest.skipIf(_sklearn_version() < pv.Version('0.22.0'), - reason="missing_go_to_left is missing") - @unittest.skipIf(HistGradientBoostingClassifier is None, - reason="scikit-learn 0.22 + manual activation") - @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.2.0'), - reason="issue with nan for earlier ort") + @unittest.skipIf( + _sklearn_version() < pv.Version("0.22.0"), + reason="missing_go_to_left is missing", + ) + @unittest.skipIf( + HistGradientBoostingClassifier is None, + reason="scikit-learn 0.22 + manual activation", + ) + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.2.0"), + reason="issue with nan for earlier ort", + ) @ignore_warnings(category=FutureWarning) def test_model_hgb_classifier_nonan_multi(self): self.common_test_model_hgb_classifier(False, n_classes=3) - @unittest.skipIf(_sklearn_version() < pv.Version('0.22.0'), - reason="NaN not allowed") - @unittest.skipIf(HistGradientBoostingClassifier is None, - reason="scikit-learn 0.22 + manual activation") + @unittest.skipIf( + _sklearn_version() < pv.Version("0.22.0"), reason="NaN not allowed" + ) + @unittest.skipIf( + HistGradientBoostingClassifier is None, + reason="scikit-learn 0.22 + manual activation", + ) @ignore_warnings(category=FutureWarning) def test_model_hgb_classifier_nan_multi(self): self.common_test_model_hgb_classifier(True, n_classes=3) @@ -419,135 +520,178 @@ def test_model_hgb_classifier_nan_multi(self): @ignore_warnings(category=FutureWarning) def test_model_random_forest_classifier_multilabel(self): model, X_test = fit_multilabel_classification_model( - RandomForestClassifier(random_state=42, n_estimators=5)) - options = {id(model): {'zipmap': False}} + RandomForestClassifier(random_state=42, n_estimators=5) + ) + options = {id(model): {"zipmap": False}} model_onnx = convert_sklearn( - model, "scikit-learn RandomForestClassifier", + model, + "scikit-learn RandomForestClassifier", [("input", FloatTensorType([None, X_test.shape[1]]))], - options=options, target_opset=TARGET_OPSET) + options=options, + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - assert 'zipmap' not in str(model_onnx).lower() + assert "zipmap" not in str(model_onnx).lower() dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnRandomForestClassifierMultiLabel-Out0") + X_test, + model, + model_onnx, + basename="SklearnRandomForestClassifierMultiLabel-Out0", + ) @ignore_warnings(category=FutureWarning) def test_model_random_forest_classifier_multilabel_low_samples(self): model, X_test = fit_multilabel_classification_model( - RandomForestClassifier(random_state=42, n_estimators=5), - n_samples=4) - options = {id(model): {'zipmap': False}} + RandomForestClassifier(random_state=42, n_estimators=5), n_samples=4 + ) + options = {id(model): {"zipmap": False}} model_onnx = convert_sklearn( - model, "scikit-learn RandomForestClassifier", + model, + "scikit-learn RandomForestClassifier", [("input", FloatTensorType([None, X_test.shape[1]]))], - options=options, target_opset=TARGET_OPSET) + options=options, + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - assert 'zipmap' not in str(model_onnx).lower() + assert "zipmap" not in str(model_onnx).lower() dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnRandomForestClassifierMultiLabelLowSamples-Out0") + X_test, + model, + model_onnx, + basename="SklearnRandomForestClassifierMultiLabelLowSamples-Out0", + ) @ignore_warnings(category=FutureWarning) def test_model_extra_trees_classifier_multilabel(self): model, X_test = fit_multilabel_classification_model( - ExtraTreesClassifier(random_state=42, n_estimators=5)) - options = {id(model): {'zipmap': False}} + ExtraTreesClassifier(random_state=42, n_estimators=5) + ) + options = {id(model): {"zipmap": False}} model_onnx = convert_sklearn( - model, "scikit-learn ExtraTreesClassifier", + model, + "scikit-learn ExtraTreesClassifier", [("input", FloatTensorType([None, X_test.shape[1]]))], - options=options, target_opset=TARGET_OPSET) + options=options, + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - assert 'zipmap' not in str(model_onnx).lower() + assert "zipmap" not in str(model_onnx).lower() dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnExtraTreesClassifierMultiLabel-Out0") + X_test, + model, + model_onnx, + basename="SklearnExtraTreesClassifierMultiLabel-Out0", + ) @ignore_warnings(category=FutureWarning) def test_model_extra_trees_classifier_multilabel_low_samples(self): model, X_test = fit_multilabel_classification_model( - ExtraTreesClassifier(random_state=42, n_estimators=5), - n_samples=10) - options = {id(model): {'zipmap': False}} + ExtraTreesClassifier(random_state=42, n_estimators=5), n_samples=10 + ) + options = {id(model): {"zipmap": False}} model_onnx = convert_sklearn( - model, "scikit-learn ExtraTreesClassifier", + model, + "scikit-learn ExtraTreesClassifier", [("input", FloatTensorType([None, X_test.shape[1]]))], - options=options, target_opset=TARGET_OPSET) + options=options, + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - assert 'zipmap' not in str(model_onnx).lower() + assert "zipmap" not in str(model_onnx).lower() dump_data_and_model( - X_test, model, model_onnx, - basename="SklearnExtraTreesClassifierMultiLabelLowSamples-Out0") + X_test, + model, + model_onnx, + basename="SklearnExtraTreesClassifierMultiLabelLowSamples-Out0", + ) @ignore_warnings(category=FutureWarning) def test_boston_pca_rf(self): X, y = make_regression(100, n_features=10) - X_train, X_test, y_train, y_test = train_test_split( - X, y, random_state=0) - pipe = Pipeline([ - ('acp', PCA(n_components=3)), - ('rf', RandomForestRegressor(n_estimators=100))]) + X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) + pipe = Pipeline( + [ + ("acp", PCA(n_components=3)), + ("rf", RandomForestRegressor(n_estimators=100)), + ] + ) pipe.fit(X_train, y_train) X32 = X_test.astype(numpy.float32) model_onnx = to_onnx(pipe, X32[:1], target_opset=TARGET_OPSET) dump_data_and_model( - X32, pipe, model_onnx, methods=['predict'], - basename="SklearnBostonPCARF-Dec4") + X32, + pipe, + model_onnx, + methods=["predict"], + basename="SklearnBostonPCARF-Dec4", + ) @ignore_warnings(category=FutureWarning) def test_random_forest_regressor_int(self): model, X = fit_regression_model( - RandomForestRegressor(n_estimators=5, random_state=42), - is_int=True) + RandomForestRegressor(n_estimators=5, random_state=42), is_int=True + ) model_onnx = convert_sklearn( - model, "random forest regression", + model, + "random forest regression", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnRandomForestRegressorInt-Dec4",) + X, + model, + model_onnx, + basename="SklearnRandomForestRegressorInt-Dec4", + ) @ignore_warnings(category=FutureWarning) def test_extra_trees_regressor_int(self): model, X = fit_regression_model( - ExtraTreesRegressor(n_estimators=5, random_state=42), - is_int=True) + ExtraTreesRegressor(n_estimators=5, random_state=42), is_int=True + ) model_onnx = convert_sklearn( - model, "extra trees regression", + model, + "extra trees regression", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnExtraTreesRegressorInt-Dec4") + X, model, model_onnx, basename="SklearnExtraTreesRegressorInt-Dec4" + ) @ignore_warnings(category=FutureWarning) def test_random_forest_regressor_bool(self): model, X = fit_regression_model( - RandomForestRegressor(n_estimators=5, random_state=42), - is_bool=True) + RandomForestRegressor(n_estimators=5, random_state=42), is_bool=True + ) model_onnx = convert_sklearn( - model, "random forest regression", + model, + "random forest regression", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnRandomForestRegressorBool-Dec4") + X, model, model_onnx, basename="SklearnRandomForestRegressorBool-Dec4" + ) @ignore_warnings(category=FutureWarning) def test_extra_trees_regressor_bool(self): model, X = fit_regression_model( - ExtraTreesRegressor(n_estimators=5, random_state=42), - is_bool=True) + ExtraTreesRegressor(n_estimators=5, random_state=42), is_bool=True + ) model_onnx = convert_sklearn( - model, "extra trees regression", + model, + "extra trees regression", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnExtraTreesRegressorBool-Dec4") + X, model, model_onnx, basename="SklearnExtraTreesRegressorBool-Dec4" + ) @unittest.skipIf(TARGET_OPSET < 12, reason="LabelEncoder") @ignore_warnings(category=FutureWarning) @@ -556,20 +700,22 @@ def test_randomforestregressor_decision_path(self): X, y = make_classification(10, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) - initial_types = [('input', FloatTensorType((None, X.shape[1])))] + initial_types = [("input", FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn( - model, initial_types=initial_types, - options={id(model): {'decision_path': True}}, - target_opset=TARGET_OPSET) + model, + initial_types=initial_types, + options={id(model): {"decision_path": True}}, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X.astype(numpy.float32)}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(numpy.float32)}) pred = model.predict(X) assert_almost_equal(pred, res[0].ravel()) dec = model.decision_path(X) exp = binary_array_to_string(dec[0].todense()) - got = numpy.array([''.join(row) for row in res[1]]) + got = numpy.array(["".join(row) for row in res[1]]) assert exp == got.ravel().tolist() @unittest.skipIf(TARGET_OPSET < 12, reason="LabelEncoder") @@ -579,20 +725,22 @@ def test_extratreesregressor_decision_path(self): X, y = make_classification(10, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) - initial_types = [('input', FloatTensorType((None, X.shape[1])))] + initial_types = [("input", FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn( - model, initial_types=initial_types, - options={id(model): {'decision_path': True}}, - target_opset=TARGET_OPSET) + model, + initial_types=initial_types, + options={id(model): {"decision_path": True}}, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X.astype(numpy.float32)}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(numpy.float32)}) pred = model.predict(X) assert_almost_equal(pred, res[0].ravel()) dec = model.decision_path(X) exp = binary_array_to_string(dec[0].todense()) - got = numpy.array([''.join(row) for row in res[1]]) + got = numpy.array(["".join(row) for row in res[1]]) assert exp == got.ravel().tolist() @unittest.skipIf(TARGET_OPSET < 12, reason="LabelEncoder") @@ -602,22 +750,24 @@ def test_randomforestclassifier_decision_path(self): X, y = make_classification(3, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) - initial_types = [('input', FloatTensorType((None, X.shape[1])))] + initial_types = [("input", FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn( - model, initial_types=initial_types, - options={id(model): {'decision_path': True, 'zipmap': False}}, - target_opset=TARGET_OPSET) + model, + initial_types=initial_types, + options={id(model): {"decision_path": True, "zipmap": False}}, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X.astype(numpy.float32)}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(numpy.float32)}) pred = model.predict(X) assert_almost_equal(pred, res[0].ravel()) prob = model.predict_proba(X) assert_almost_equal(prob, res[1]) dec = model.decision_path(X) exp = binary_array_to_string(dec[0].todense()) - got = numpy.array([''.join(row) for row in res[2]]) + got = numpy.array(["".join(row) for row in res[2]]) assert exp == got.ravel().tolist() @unittest.skipIf(TARGET_OPSET < 12, reason="LabelEncoder") @@ -627,22 +777,24 @@ def test_extratreesclassifier_decision_path(self): X, y = make_classification(10, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) - initial_types = [('input', FloatTensorType((None, X.shape[1])))] + initial_types = [("input", FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn( - model, initial_types=initial_types, - options={id(model): {'decision_path': True, 'zipmap': False}}, - target_opset=TARGET_OPSET) + model, + initial_types=initial_types, + options={id(model): {"decision_path": True, "zipmap": False}}, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X.astype(numpy.float32)}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(numpy.float32)}) pred = model.predict(X) assert_almost_equal(pred, res[0].ravel()) prob = model.predict_proba(X) assert_almost_equal(prob, res[1]) dec = model.decision_path(X) exp = binary_array_to_string(dec[0].todense()) - got = numpy.array([''.join(row) for row in res[2]]) + got = numpy.array(["".join(row) for row in res[2]]) assert exp == got.ravel().tolist() @unittest.skipIf(TARGET_OPSET < 12, reason="LabelEncoder") @@ -652,16 +804,18 @@ def test_rf_regressor_decision_leaf(self): X, y = make_regression(10, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) - initial_types = [('input', FloatTensorType((None, X.shape[1])))] + initial_types = [("input", FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn( - model, initial_types=initial_types, - options={id(model): {'decision_leaf': True}}, - target_opset=TARGET_OPSET) + model, + initial_types=initial_types, + options={id(model): {"decision_leaf": True}}, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X.astype(numpy.float32)}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(numpy.float32)}) pred = model.predict(X) assert_almost_equal(pred, res[0].ravel(), decimal=4) dec = model.decision_path(X) @@ -675,22 +829,23 @@ def test_rf_regressor_decision_path_leaf(self): X, y = make_regression(10, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) - initial_types = [('input', FloatTensorType((None, X.shape[1])))] + initial_types = [("input", FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn( - model, initial_types=initial_types, - options={id(model): {'decision_leaf': True, - 'decision_path': True}}, - target_opset=TARGET_OPSET) + model, + initial_types=initial_types, + options={id(model): {"decision_leaf": True, "decision_path": True}}, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X.astype(numpy.float32)}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(numpy.float32)}) pred = model.predict(X) assert_almost_equal(pred, res[0].ravel(), decimal=4) dec = model.decision_path(X) exp_leaf = path_to_leaf(model.estimators_, dec[0].todense(), dec[1]) exp_path = binary_array_to_string(dec[0].todense()) - got_path = numpy.array([''.join(row) for row in res[1]]) + got_path = numpy.array(["".join(row) for row in res[1]]) assert exp_path == got_path.ravel().tolist() assert exp_leaf.tolist() == res[2].tolist() @@ -701,15 +856,17 @@ def test_rf_classifier_decision_leaf(self): X, y = make_classification(3, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) - initial_types = [('input', FloatTensorType((None, X.shape[1])))] + initial_types = [("input", FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn( - model, initial_types=initial_types, - options={id(model): {'decision_leaf': True, 'zipmap': False}}, - target_opset=TARGET_OPSET) + model, + initial_types=initial_types, + options={id(model): {"decision_leaf": True, "zipmap": False}}, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X.astype(numpy.float32)}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(numpy.float32)}) pred = model.predict(X) assert_almost_equal(pred, res[0].ravel()) dec = model.decision_path(X) @@ -723,23 +880,29 @@ def test_rf_classifier_decision_path_leaf(self): X, y = make_classification(3, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) - initial_types = [('input', FloatTensorType((None, X.shape[1])))] + initial_types = [("input", FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn( - model, initial_types=initial_types, - options={id(model): {'decision_leaf': True, - 'decision_path': True, - 'zipmap': False}}, - target_opset=TARGET_OPSET) + model, + initial_types=initial_types, + options={ + id(model): { + "decision_leaf": True, + "decision_path": True, + "zipmap": False, + } + }, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X.astype(numpy.float32)}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(numpy.float32)}) pred = model.predict(X) assert_almost_equal(pred, res[0].ravel()) dec = model.decision_path(X) exp_leaf = path_to_leaf(model.estimators_, dec[0].todense(), dec[1]) exp_path = binary_array_to_string(dec[0].todense()) - got_path = numpy.array([''.join(row) for row in res[2]]) + got_path = numpy.array(["".join(row) for row in res[2]]) assert exp_path == got_path.ravel().tolist() assert exp_leaf.tolist() == res[3].tolist() diff --git a/tests/test_sklearn_random_projection.py b/tests/test_sklearn_random_projection.py index 67b0de17e..707b6e5a2 100644 --- a/tests/test_sklearn_random_projection.py +++ b/tests/test_sklearn_random_projection.py @@ -10,11 +10,10 @@ from skl2onnx.common.data_types import FloatTensorType from test_utils import dump_data_and_model, TARGET_OPSET -nort = pv.Version(onnxruntime.__version__) < pv.Version('0.5.0') +nort = pv.Version(onnxruntime.__version__) < pv.Version("0.5.0") class TestSklearnRandomProjection(unittest.TestCase): - @unittest.skipIf(TARGET_OPSET < 9 or nort, reason="MatMul not available") def test_gaussian_random_projection_float32(self): rng = np.random.RandomState(42) @@ -23,12 +22,15 @@ def test_gaussian_random_projection_float32(self): model = pt.fit(X) assert model.transform(X).shape[1] == 4 model_onnx = convert_sklearn( - model, "scikit-learn GaussianRandomProjection", + model, + "scikit-learn GaussianRandomProjection", [("inputs", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model(X.astype(np.float32), model, - model_onnx, basename="GaussianRandomProjection") + dump_data_and_model( + X.astype(np.float32), model, model_onnx, basename="GaussianRandomProjection" + ) @unittest.skipIf(TARGET_OPSET < 9 or nort, reason="MatMul not available") def test_gaussian_random_projection_float64(self): @@ -38,8 +40,7 @@ def test_gaussian_random_projection_float64(self): model = pt.fit(X) model_onnx = to_onnx(model, X[:1], target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) - dump_data_and_model(X, model, - model_onnx, basename="GaussianRandomProjection64") + dump_data_and_model(X, model, model_onnx, basename="GaussianRandomProjection64") if __name__ == "__main__": diff --git a/tests/test_sklearn_random_trees_embedding.py b/tests/test_sklearn_random_trees_embedding.py index c1a52db48..7d68c5272 100644 --- a/tests/test_sklearn_random_trees_embedding.py +++ b/tests/test_sklearn_random_trees_embedding.py @@ -3,6 +3,7 @@ import unittest import numpy from onnxruntime import InferenceSession + try: # scikit-learn >= 0.22 from sklearn.utils._testing import ignore_warnings @@ -11,48 +12,48 @@ from sklearn.utils.testing import ignore_warnings from sklearn.exceptions import ConvergenceWarning from sklearn.datasets import make_regression -from sklearn.ensemble import ( - RandomTreesEmbedding) +from sklearn.ensemble import RandomTreesEmbedding from skl2onnx import to_onnx from test_utils import TARGET_OPSET, dump_data_and_model class TestSklearnRandomTreeEmbeddings(unittest.TestCase): - - def check_model(self, model, X, name='X'): + def check_model(self, model, X, name="X"): try: sess = InferenceSession( - model.SerializeToString(), - providers=["CPUExecutionProvider"]) + model.SerializeToString(), providers=["CPUExecutionProvider"] + ) except Exception as e: - raise AssertionError( - "Unable to load model\n%s" % str(model)) from e + raise AssertionError("Unable to load model\n%s" % str(model)) from e try: return sess.run(None, {name: X[:7]}) except Exception as e: raise AssertionError( - "Unable to run model X.shape=%r X.dtype=%r\n%s" % ( - X[:7].shape, X.dtype, str(model))) from e + "Unable to run model X.shape=%r X.dtype=%r\n%s" + % (X[:7].shape, X.dtype, str(model)) + ) from e - @ignore_warnings(category=(FutureWarning, ConvergenceWarning, - DeprecationWarning)) + @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) def test_random_trees_embedding(self): X, _ = make_regression( - n_features=5, n_samples=100, n_targets=1, random_state=42, - n_informative=3) + n_features=5, n_samples=100, n_targets=1, random_state=42, n_informative=3 + ) X = X.astype(numpy.float32) model = RandomTreesEmbedding( - n_estimators=3, max_depth=2, sparse_output=False).fit(X) + n_estimators=3, max_depth=2, sparse_output=False + ).fit(X) model.transform(X) - model_onnx = to_onnx( - model, X[:1], target_opset=TARGET_OPSET) + model_onnx = to_onnx(model, X[:1], target_opset=TARGET_OPSET) with open("model.onnx", "wb") as f: f.write(model_onnx.SerializeToString()) self.check_model(model_onnx, X) dump_data_and_model( - X.astype(numpy.float32), model, model_onnx, - basename="SklearnRandomTreesEmbedding") + X.astype(numpy.float32), + model, + model_onnx, + basename="SklearnRandomTreesEmbedding", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_replace_transformer.py b/tests/test_sklearn_replace_transformer.py index 5bc980660..9f8559d5b 100644 --- a/tests/test_sklearn_replace_transformer.py +++ b/tests/test_sklearn_replace_transformer.py @@ -6,6 +6,7 @@ import unittest import numpy from sklearn.pipeline import Pipeline + try: from sklearn.compose import ColumnTransformer except ImportError: @@ -17,30 +18,35 @@ class TestSklearnCastTransformerConverter(unittest.TestCase): - def common_test_replace_transformer(self, dtype, input_type): - model = Pipeline([ - ('replace', ReplaceTransformer(dtype=numpy.float32)), - ]) - data = numpy.array([[0.1, 0.2, 3.1], [1, 1, 0], - [0, 2, 1], [1, 0, 2]], - dtype=numpy.float32) + model = Pipeline( + [ + ("replace", ReplaceTransformer(dtype=numpy.float32)), + ] + ) + data = numpy.array( + [[0.1, 0.2, 3.1], [1, 1, 0], [0, 2, 1], [1, 0, 2]], dtype=numpy.float32 + ) model.fit(data) pred = model.steps[0][1].transform(data) assert pred.dtype == dtype model_onnx = convert_sklearn( - model, "cast", [("input", FloatTensorType([None, 3]))], - target_opset=TARGET_OPSET) + model, + "cast", + [("input", FloatTensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - data, model, model_onnx, - basename="SklearnCastTransformer{}".format( - input_type.__class__.__name__)) + data, + model, + model_onnx, + basename="SklearnCastTransformer{}".format(input_type.__class__.__name__), + ) @unittest.skipIf(TARGET_OPSET < 11, reason="not supported") def test_replace_transformer(self): - self.common_test_replace_transformer( - numpy.float32, FloatTensorType) + self.common_test_replace_transformer(numpy.float32, FloatTensorType) if __name__ == "__main__": diff --git a/tests/test_sklearn_scaler_converter.py b/tests/test_sklearn_scaler_converter.py index 553731f2e..cde0231a5 100644 --- a/tests/test_sklearn_scaler_converter.py +++ b/tests/test_sklearn_scaler_converter.py @@ -8,7 +8,12 @@ import numpy from onnxruntime import __version__ as ort_version from sklearn.preprocessing import ( - StandardScaler, RobustScaler, MinMaxScaler, MaxAbsScaler) + StandardScaler, + RobustScaler, + MinMaxScaler, + MaxAbsScaler, +) + try: # scikit-learn >= 0.22 from sklearn.utils._testing import ignore_warnings @@ -17,72 +22,93 @@ from sklearn.utils.testing import ignore_warnings from skl2onnx import convert_sklearn from skl2onnx.common.data_types import ( - Int64TensorType, FloatTensorType, DoubleTensorType) + Int64TensorType, + FloatTensorType, + DoubleTensorType, +) from test_utils import dump_data_and_model, TARGET_OPSET -ort_version = ".".join(ort_version.split('.')[:2]) +ort_version = ".".join(ort_version.split(".")[:2]) class TestSklearnScalerConverter(unittest.TestCase): - @ignore_warnings(category=DeprecationWarning) def test_standard_scaler_int(self): model = StandardScaler() data = [[0, 0, 3], [1, 1, 0], [0, 2, 1], [1, 0, 2]] model.fit(data) - model_onnx = convert_sklearn(model, "scaler", - [("input", Int64TensorType([None, 3]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "scaler", + [("input", Int64TensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( numpy.array(data, dtype=numpy.int64), - model, model_onnx, - basename="SklearnStandardScalerInt64") + model, + model_onnx, + basename="SklearnStandardScalerInt64", + ) @ignore_warnings(category=DeprecationWarning) def test_min_max_scaler_int(self): model = MinMaxScaler() data = [[0, 0, 3], [1, 1, 0], [0, 2, 1], [1, 0, 2]] model.fit(data) - model_onnx = convert_sklearn(model, "scaler", - [("input", Int64TensorType([None, 3]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "scaler", + [("input", Int64TensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( numpy.array(data, dtype=numpy.int64), - model, model_onnx, - basename="SklearnMinMaxScalerInt64") + model, + model_onnx, + basename="SklearnMinMaxScalerInt64", + ) @ignore_warnings(category=DeprecationWarning) def test_standard_scaler_double(self): model = StandardScaler() data = [[0, 0, 3], [1, 1, 0], [0, 2, 1], [1, 0, 2]] model.fit(data) - model_onnx = convert_sklearn(model, "scaler", - [("input", DoubleTensorType([None, 3]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "scaler", + [("input", DoubleTensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( numpy.array(data, dtype=numpy.float64), - model, model_onnx, - basename="SklearnStandardScalerDouble") + model, + model_onnx, + basename="SklearnStandardScalerDouble", + ) @ignore_warnings(category=DeprecationWarning) def test_standard_scaler_blacklist(self): model = StandardScaler() - data = numpy.array([[0, 0, 3], [1, 1, 0], [0, 2, 1], [1, 0, 2]], - dtype=numpy.float32) + data = numpy.array( + [[0, 0, 3], [1, 1, 0], [0, 2, 1], [1, 0, 2]], dtype=numpy.float32 + ) model.fit(data) - model_onnx = convert_sklearn(model, "scaler", - [("input", FloatTensorType([None, 3]))], - target_opset=TARGET_OPSET, - black_op={'Normalizer', 'Scaler'}) - self.assertNotIn('Normalizer', str(model_onnx)) - self.assertNotIn('Scaler', str(model_onnx)) + model_onnx = convert_sklearn( + model, + "scaler", + [("input", FloatTensorType([None, 3]))], + target_opset=TARGET_OPSET, + black_op={"Normalizer", "Scaler"}, + ) + self.assertNotIn("Normalizer", str(model_onnx)) + self.assertNotIn("Scaler", str(model_onnx)) dump_data_and_model( - data, model, model_onnx, - basename="SklearnStandardScalerBlackList") + data, model, model_onnx, basename="SklearnStandardScalerBlackList" + ) @ignore_warnings(category=DeprecationWarning) def test_standard_scaler_floats(self): @@ -94,13 +120,18 @@ def test_standard_scaler_floats(self): [1.0, 0.0, 2.0], ] model.fit(data) - model_onnx = convert_sklearn(model, "scaler", - [("input", FloatTensorType([None, 3]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "scaler", + [("input", FloatTensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( numpy.array(data, dtype=numpy.float32), - model, basename="SklearnStandardScalerFloat32") + model, + basename="SklearnStandardScalerFloat32", + ) @ignore_warnings(category=DeprecationWarning) def test_standard_scaler_floats_div(self): @@ -113,13 +144,18 @@ def test_standard_scaler_floats_div(self): ] model.fit(data) model_onnx = convert_sklearn( - model, "scaler", [("input", FloatTensorType([None, 3]))], - options={id(model): {'div': 'div'}}) + model, + "scaler", + [("input", FloatTensorType([None, 3]))], + options={id(model): {"div": "div"}}, + ) assert 'op_type: "Div"' in str(model_onnx) self.assertTrue(model_onnx is not None) dump_data_and_model( numpy.array(data, dtype=numpy.float32), - model, basename="SklearnStandardScalerFloat32Div") + model, + basename="SklearnStandardScalerFloat32Div", + ) @ignore_warnings(category=DeprecationWarning) def test_standard_scaler_floats_div_cast(self): @@ -132,16 +168,21 @@ def test_standard_scaler_floats_div_cast(self): ] model.fit(data) model_onnx = convert_sklearn( - model, "cast", [("input", FloatTensorType([None, 3]))], - options={id(model): {'div': 'div_cast'}}, - target_opset=TARGET_OPSET) + model, + "cast", + [("input", FloatTensorType([None, 3]))], + options={id(model): {"div": "div_cast"}}, + target_opset=TARGET_OPSET, + ) assert 'op_type: "Div"' in str(model_onnx) assert 'caler"' not in str(model_onnx) assert "double_data:" in str(model_onnx) self.assertTrue(model_onnx is not None) dump_data_and_model( numpy.array(data, dtype=numpy.float32), - model, basename="SklearnStandardScalerFloat32DivCast") + model, + basename="SklearnStandardScalerFloat32DivCast", + ) @ignore_warnings(category=DeprecationWarning) def test_standard_scaler_floats_no_std(self): @@ -153,13 +194,18 @@ def test_standard_scaler_floats_no_std(self): [1.0, 0.0, 2.0], ] model.fit(data) - model_onnx = convert_sklearn(model, "scaler", - [("input", FloatTensorType([None, 3]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "scaler", + [("input", FloatTensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( numpy.array(data, dtype=numpy.float32), - model, basename="SklearnStandardScalerFloat32NoStd") + model, + basename="SklearnStandardScalerFloat32NoStd", + ) @ignore_warnings(category=DeprecationWarning) def test_standard_scaler_floats_no_mean(self): @@ -171,13 +217,18 @@ def test_standard_scaler_floats_no_mean(self): [1.0, 0.0, 2.0], ] model.fit(data) - model_onnx = convert_sklearn(model, "scaler", - [("input", FloatTensorType([None, 3]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "scaler", + [("input", FloatTensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( numpy.array(data, dtype=numpy.float32), - model, basename="SklearnStandardScalerFloat32NoMean") + model, + basename="SklearnStandardScalerFloat32NoMean", + ) @ignore_warnings(category=DeprecationWarning) def test_standard_scaler_floats_no_mean_std(self): @@ -189,13 +240,18 @@ def test_standard_scaler_floats_no_mean_std(self): [1.0, 0.0, 2.0], ] model.fit(data) - model_onnx = convert_sklearn(model, "scaler", - [("input", FloatTensorType([None, 3]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "scaler", + [("input", FloatTensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( numpy.array(data, dtype=numpy.float32), - model, basename="SklearnStandardScalerFloat32NoMeanStd") + model, + basename="SklearnStandardScalerFloat32NoMeanStd", + ) @ignore_warnings(category=DeprecationWarning) def test_robust_scaler_floats(self): @@ -207,13 +263,18 @@ def test_robust_scaler_floats(self): [1.0, 0.0, 2.0], ] model.fit(data) - model_onnx = convert_sklearn(model, "scaler", - [("input", FloatTensorType([None, 3]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "scaler", + [("input", FloatTensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( numpy.array(data, dtype=numpy.float32), - model, basename="SklearnRobustScalerFloat32") + model, + basename="SklearnRobustScalerFloat32", + ) @ignore_warnings(category=DeprecationWarning) def test_robust_scaler_doubles(self): @@ -225,13 +286,19 @@ def test_robust_scaler_doubles(self): [1.0, 0.0, 2.0], ] model.fit(data) - model_onnx = convert_sklearn(model, "scaler", - [("input", DoubleTensorType([None, 3]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "scaler", + [("input", DoubleTensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( numpy.array(data, dtype=numpy.float64), - model, model_onnx, basename="SklearnRobustScalerFloat64") + model, + model_onnx, + basename="SklearnRobustScalerFloat64", + ) @ignore_warnings(category=DeprecationWarning) def test_robust_scaler_floats_no_bias(self): @@ -243,14 +310,18 @@ def test_robust_scaler_floats_no_bias(self): [1.0, 0.0, 2.0], ] model.fit(data) - model_onnx = convert_sklearn(model, "scaler", - [("input", FloatTensorType([None, 3]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "scaler", + [("input", FloatTensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( numpy.array(data, dtype=numpy.float32), model, - basename="SklearnRobustScalerWithCenteringFloat32") + basename="SklearnRobustScalerWithCenteringFloat32", + ) @ignore_warnings(category=DeprecationWarning) def test_robust_scaler_floats_no_scaling(self): @@ -262,13 +333,18 @@ def test_robust_scaler_floats_no_scaling(self): [1.0, 0.0, 2.0], ] model.fit(data) - model_onnx = convert_sklearn(model, "scaler", - [("input", FloatTensorType([None, 3]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "scaler", + [("input", FloatTensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( numpy.array(data, dtype=numpy.float32), - model, basename="SklearnRobustScalerNoScalingFloat32") + model, + basename="SklearnRobustScalerNoScalingFloat32", + ) @ignore_warnings(category=DeprecationWarning) def test_robust_scaler_floats_no_centering_scaling(self): @@ -280,14 +356,18 @@ def test_robust_scaler_floats_no_centering_scaling(self): [1.0, 0.0, 2.0], ] model.fit(data) - model_onnx = convert_sklearn(model, "scaler", - [("input", FloatTensorType([None, 3]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "scaler", + [("input", FloatTensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( numpy.array(data, dtype=numpy.float32), model, - basename="SklearnRobustScalerNoCenteringScalingFloat32") + basename="SklearnRobustScalerNoCenteringScalingFloat32", + ) @ignore_warnings(category=DeprecationWarning) def test_min_max_scaler(self): @@ -299,13 +379,18 @@ def test_min_max_scaler(self): [1.0, 0.0, 2.0], ] model.fit(data) - model_onnx = convert_sklearn(model, "scaler", - [("input", FloatTensorType([None, 3]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "scaler", + [("input", FloatTensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( numpy.array(data, dtype=numpy.float32), - model, basename="SklearnMinMaxScaler") + model, + basename="SklearnMinMaxScaler", + ) @ignore_warnings(category=DeprecationWarning) def test_min_max_scaler_double(self): @@ -317,18 +402,26 @@ def test_min_max_scaler_double(self): [1.0, 0.0, 2.0], ] model.fit(data) - model_onnx = convert_sklearn(model, "scaler", - [("input", DoubleTensorType([None, 3]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "scaler", + [("input", DoubleTensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( numpy.array(data, dtype=numpy.float64), - model, model_onnx, basename="SklearnMinMaxScalerDouble") + model, + model_onnx, + basename="SklearnMinMaxScalerDouble", + ) @ignore_warnings(category=DeprecationWarning) @unittest.skipIf(TARGET_OPSET < 15, reason="old signature for clip") - @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.9.0"), - reason="Operator clip not fully implemented") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.9.0"), + reason="Operator clip not fully implemented", + ) def test_min_max_scaler_clip(self): model = MinMaxScaler(clip=True) data = [ @@ -338,21 +431,29 @@ def test_min_max_scaler_clip(self): [1.0, 0.0, 2.0], ] model.fit(data) - model_onnx = convert_sklearn(model, "scaler", - [("input", FloatTensorType([None, 3]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "scaler", + [("input", FloatTensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) data[0][0] = 1e6 data[0][1] = 5 data[0][2] = -1.0 dump_data_and_model( numpy.array(data, dtype=numpy.float32), - model, model_onnx, basename="SklearnMinMaxScalerClip") + model, + model_onnx, + basename="SklearnMinMaxScalerClip", + ) @ignore_warnings(category=DeprecationWarning) @unittest.skipIf(TARGET_OPSET < 15, reason="old signature for clip") - @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.9.0"), - reason="Operator clip not fully implemented") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.9.0"), + reason="Operator clip not fully implemented", + ) def test_min_max_scaler_double_clip(self): model = MinMaxScaler(clip=True) data = [ @@ -362,16 +463,22 @@ def test_min_max_scaler_double_clip(self): [1.0, 0.0, 2.0], ] model.fit(data) - model_onnx = convert_sklearn(model, "scaler", - [("input", DoubleTensorType([None, 3]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "scaler", + [("input", DoubleTensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) data[0][0] = 1e6 data[0][1] = 5 data[0][2] = -1.0 dump_data_and_model( numpy.array(data, dtype=numpy.float64), - model, model_onnx, basename="SklearnMinMaxScalerDouble") + model, + model_onnx, + basename="SklearnMinMaxScalerDouble", + ) @ignore_warnings(category=DeprecationWarning) def test_max_abs_scaler(self): @@ -383,13 +490,18 @@ def test_max_abs_scaler(self): [1.0, 0.0, 2.0], ] model.fit(data) - model_onnx = convert_sklearn(model, "scaler", - [("input", FloatTensorType([None, 3]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "scaler", + [("input", FloatTensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( numpy.array(data, dtype=numpy.float32), - model, basename="SklearnMaxAbsScaler") + model, + basename="SklearnMaxAbsScaler", + ) @ignore_warnings(category=DeprecationWarning) def test_max_abs_scaler_double(self): @@ -401,13 +513,19 @@ def test_max_abs_scaler_double(self): [1.0, 0.0, 2.0], ] model.fit(data) - model_onnx = convert_sklearn(model, "scaler", - [("input", DoubleTensorType([None, 3]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "scaler", + [("input", DoubleTensorType([None, 3]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( numpy.array(data, dtype=numpy.float64), - model, model_onnx, basename="SklearnMaxAbsScalerDouble") + model, + model_onnx, + basename="SklearnMaxAbsScalerDouble", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_sgd_classifier_converter.py b/tests/test_sklearn_sgd_classifier_converter.py index 7db1d99d4..a9d291917 100644 --- a/tests/test_sklearn_sgd_classifier_converter.py +++ b/tests/test_sklearn_sgd_classifier_converter.py @@ -14,329 +14,465 @@ FloatTensorType, Int64TensorType, ) -from test_utils import ( - dump_data_and_model, - fit_classification_model, - TARGET_OPSET -) +from test_utils import dump_data_and_model, fit_classification_model, TARGET_OPSET ort_version = ".".join(ort_version.split(".")[:2]) -LOG_LOSS = ("log_loss" if pv.Version(skl_version) >= pv.Version("1.1") - else "log") +LOG_LOSS = "log_loss" if pv.Version(skl_version) >= pv.Version("1.1") else "log" class TestSGDClassifierConverter(unittest.TestCase): - def test_model_sgd_binary_class_hinge(self): model, X = fit_classification_model( - SGDClassifier(loss='hinge', random_state=42), 2) + SGDClassifier(loss="hinge", random_state=42), 2 + ) model_onnx = convert_sklearn( model, "scikit-learn SGD binary classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnSGDClassifierBinaryHinge-Out0") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnSGDClassifierBinaryHinge-Out0", + ) def test_model_sgd_multi_class_hinge(self): model, X = fit_classification_model( - SGDClassifier(loss='hinge', random_state=42), 5) + SGDClassifier(loss="hinge", random_state=42), 5 + ) model_onnx = convert_sklearn( - model, "scikit-learn SGD multi-class classifier", + model, + "scikit-learn SGD multi-class classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnSGDClassifierMultiHinge-Out0") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnSGDClassifierMultiHinge-Out0", + ) def test_model_sgd_multi_class_hinge_string(self): model, X = fit_classification_model( - SGDClassifier(loss='hinge', random_state=42), 5, label_string=True) + SGDClassifier(loss="hinge", random_state=42), 5, label_string=True + ) model_onnx = convert_sklearn( - model, "scikit-learn SGD multi-class classifier", + model, + "scikit-learn SGD multi-class classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnSGDClassifierMultiHinge-Out0") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnSGDClassifierMultiHinge-Out0", + ) - @unittest.skipIf(TARGET_OPSET < 13, - reason="duplicated test") + @unittest.skipIf(TARGET_OPSET < 13, reason="duplicated test") def test_model_sgd_binary_class_log_sigmoid(self): model, X = fit_classification_model( - SGDClassifier(loss=LOG_LOSS, random_state=42), 2, n_features=2) + SGDClassifier(loss=LOG_LOSS, random_state=42), 2, n_features=2 + ) model_onnx = convert_sklearn( - model, "scikit-learn SGD binary classifier", + model, + "scikit-learn SGD binary classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=10, options={'zipmap': False}) + target_opset=10, + options={"zipmap": False}, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X.astype(np.float32)[:5], model, model_onnx, + X.astype(np.float32)[:5], + model, + model_onnx, basename="SklearnSGDClassifierBinaryLog-Dec4", - verbose=False) + verbose=False, + ) model_onnx = convert_sklearn( - model, "scikit-learn SGD binary classifier", + model, + "scikit-learn SGD binary classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET, options={'zipmap': False}) + target_opset=TARGET_OPSET, + options={"zipmap": False}, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X.astype(np.float32)[:5], model, model_onnx, + X.astype(np.float32)[:5], + model, + model_onnx, basename="SklearnSGDClassifierBinaryLog13-Dec4", - verbose=False) + verbose=False, + ) def test_model_sgd_binary_class_log(self): model, X = fit_classification_model( - SGDClassifier(loss=LOG_LOSS, random_state=42), 2) + SGDClassifier(loss=LOG_LOSS, random_state=42), 2 + ) model_onnx = convert_sklearn( - model, "scikit-learn SGD binary classifier", + model, + "scikit-learn SGD binary classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=min(TARGET_OPSET, 10)) + target_opset=min(TARGET_OPSET, 10), + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnSGDClassifierBinaryLog-Dec4") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnSGDClassifierBinaryLog-Dec4", + ) def test_model_sgd_binary_class_log_decision_function(self): model, X = fit_classification_model( - SGDClassifier(loss=LOG_LOSS, random_state=42), 2) - options = {id(model): {'raw_scores': True}} + SGDClassifier(loss=LOG_LOSS, random_state=42), 2 + ) + options = {id(model): {"raw_scores": True}} model_onnx = convert_sklearn( - model, "scikit-learn SGD binary classifier", + model, + "scikit-learn SGD binary classifier", [("input", FloatTensorType([None, X.shape[1]]))], options=options, - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X.astype(np.float32), model, model_onnx, + X.astype(np.float32), + model, + model_onnx, basename="SklearnSGDClassifierBinaryLogDecisionFunction-Dec3", - methods=['predict', 'decision_function_binary']) + methods=["predict", "decision_function_binary"], + ) def test_model_sgd_multi_class_log(self): model, X = fit_classification_model( - SGDClassifier(loss=LOG_LOSS, random_state=42), 5) + SGDClassifier(loss=LOG_LOSS, random_state=42), 5 + ) model_onnx = convert_sklearn( - model, "scikit-learn SGD multi-class classifier", + model, + "scikit-learn SGD multi-class classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=min(12, TARGET_OPSET)) + target_opset=min(12, TARGET_OPSET), + ) X = np.array([X[1], X[1]]) self.assertIsNotNone(model_onnx) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnSGDClassifierMultiLog") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnSGDClassifierMultiLog", + ) @unittest.skipIf(TARGET_OPSET < 13, reason="duplicated test") def test_model_sgd_multi_class_log_sigmoid(self): model, X = fit_classification_model( - SGDClassifier(loss=LOG_LOSS, random_state=42), 5) + SGDClassifier(loss=LOG_LOSS, random_state=42), 5 + ) model_onnx = convert_sklearn( - model, "scikit-learn SGD multi-class classifier", + model, + "scikit-learn SGD multi-class classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET, options={'zipmap': False}) + target_opset=TARGET_OPSET, + options={"zipmap": False}, + ) X = np.array([X[1], X[1]]) dump_data_and_model( - X.astype(np.float32), model, model_onnx, verbose=False, - basename="SklearnSGDClassifierMultiLog13") + X.astype(np.float32), + model, + model_onnx, + verbose=False, + basename="SklearnSGDClassifierMultiLog13", + ) def test_model_sgd_multi_class_log_decision_function(self): model, X = fit_classification_model( - SGDClassifier(loss=LOG_LOSS, random_state=42), 3) - options = {id(model): {'raw_scores': True}} + SGDClassifier(loss=LOG_LOSS, random_state=42), 3 + ) + options = {id(model): {"raw_scores": True}} model_onnx = convert_sklearn( - model, "scikit-learn SGD multi-class classifier", + model, + "scikit-learn SGD multi-class classifier", [("input", FloatTensorType([None, X.shape[1]]))], - options=options, target_opset=TARGET_OPSET) + options=options, + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X.astype(np.float32), model, model_onnx, + X.astype(np.float32), + model, + model_onnx, basename="SklearnSGDClassifierMultiLogDecisionFunction-Dec3", - methods=['predict', 'decision_function']) + methods=["predict", "decision_function"], + ) def test_model_sgd_binary_class_log_l1_no_intercept(self): model, X = fit_classification_model( - SGDClassifier(loss=LOG_LOSS, penalty='l1', fit_intercept=False, - random_state=42), 2) + SGDClassifier( + loss=LOG_LOSS, penalty="l1", fit_intercept=False, random_state=42 + ), + 2, + ) model_onnx = convert_sklearn( - model, "scikit-learn SGD binary classifier", + model, + "scikit-learn SGD binary classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnSGDClassifierBinaryLogL1NoIntercept-Dec4") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnSGDClassifierBinaryLogL1NoIntercept-Dec4", + ) - @unittest.skipIf(pv.Version(ort_version) <= pv.Version("1.0.0"), - reason="discrepencies") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("1.0.0"), reason="discrepencies" + ) def test_model_sgd_multi_class_log_l1_no_intercept(self): model, X = fit_classification_model( - SGDClassifier(loss=LOG_LOSS, penalty='l1', fit_intercept=False, - random_state=43), 3, n_features=7) + SGDClassifier( + loss=LOG_LOSS, penalty="l1", fit_intercept=False, random_state=43 + ), + 3, + n_features=7, + ) X = np.array([X[4], X[4]]) model_onnx = convert_sklearn( - model, "scikit-learn SGD multi-class classifier", + model, + "scikit-learn SGD multi-class classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(np.float32), - model, model_onnx, verbose=False, - basename="SklearnSGDClassifierMultiLogL1NoIntercept-Dec4") + model, + model_onnx, + verbose=False, + basename="SklearnSGDClassifierMultiLogL1NoIntercept-Dec4", + ) def test_model_sgd_binary_class_elasticnet_power_t(self): model, X = fit_classification_model( - SGDClassifier(penalty='elasticnet', l1_ratio=0.3, - power_t=2, random_state=42), 2) + SGDClassifier( + penalty="elasticnet", l1_ratio=0.3, power_t=2, random_state=42 + ), + 2, + ) model_onnx = convert_sklearn( - model, "scikit-learn SGD binary classifier", + model, + "scikit-learn SGD binary classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnSGDClassifierBinaryElasticnetPowerT-Out0") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnSGDClassifierBinaryElasticnetPowerT-Out0", + ) def test_model_sgd_multi_class_elasticnet_power_t(self): model, X = fit_classification_model( - SGDClassifier(penalty='elasticnet', l1_ratio=0.3, - power_t=2, random_state=42), 5) + SGDClassifier( + penalty="elasticnet", l1_ratio=0.3, power_t=2, random_state=42 + ), + 5, + ) model_onnx = convert_sklearn( - model, "scikit-learn SGD multi-class classifier", + model, + "scikit-learn SGD multi-class classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnSGDClassifierMultiElasticnetPowerT-Out0") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnSGDClassifierMultiElasticnetPowerT-Out0", + ) def test_model_sgd_binary_class_squared_hinge(self): model, X = fit_classification_model( - SGDClassifier(loss='squared_hinge', random_state=42), 2) + SGDClassifier(loss="squared_hinge", random_state=42), 2 + ) model_onnx = convert_sklearn( - model, "scikit-learn SGD binary classifier", + model, + "scikit-learn SGD binary classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnSGDClassifierBinarySquaredHinge-Out0") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnSGDClassifierBinarySquaredHinge-Out0", + ) def test_model_sgd_multi_class_squared_hinge(self): model, X = fit_classification_model( - SGDClassifier(loss='squared_hinge', random_state=42), 5) + SGDClassifier(loss="squared_hinge", random_state=42), 5 + ) model_onnx = convert_sklearn( - model, "scikit-learn SGD multi-class classifier", + model, + "scikit-learn SGD multi-class classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnSGDClassifierMultiSquaredHinge-Out0") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnSGDClassifierMultiSquaredHinge-Out0", + ) def test_model_sgd_binary_class_perceptron(self): model, X = fit_classification_model( - SGDClassifier(loss='perceptron', random_state=42), 2) + SGDClassifier(loss="perceptron", random_state=42), 2 + ) model_onnx = convert_sklearn( - model, "scikit-learn SGD binary classifier", + model, + "scikit-learn SGD binary classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnSGDClassifierBinaryPerceptron-Out0") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnSGDClassifierBinaryPerceptron-Out0", + ) def test_model_sgd_multi_class_perceptron(self): model, X = fit_classification_model( - SGDClassifier(loss='perceptron', random_state=42), 5) + SGDClassifier(loss="perceptron", random_state=42), 5 + ) model_onnx = convert_sklearn( - model, "scikit-learn SGD multi-class classifier", + model, + "scikit-learn SGD multi-class classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X.astype(np.float32), model, model_onnx, - basename="SklearnSGDClassifierMultiPerceptron-Out0") + X.astype(np.float32), + model, + model_onnx, + basename="SklearnSGDClassifierMultiPerceptron-Out0", + ) def test_model_sgd_binary_class_hinge_int(self): model, X = fit_classification_model( - SGDClassifier(loss='hinge', random_state=42), 2, is_int=True) + SGDClassifier(loss="hinge", random_state=42), 2, is_int=True + ) model_onnx = convert_sklearn( - model, "scikit-learn SGD binary classifier", + model, + "scikit-learn SGD binary classifier", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnSGDClassifierBinaryHingeInt-Out0") + X, model, model_onnx, basename="SklearnSGDClassifierBinaryHingeInt-Out0" + ) def test_model_sgd_multi_class_hinge_int(self): model, X = fit_classification_model( - SGDClassifier(loss='hinge', random_state=42), 5, is_int=True) + SGDClassifier(loss="hinge", random_state=42), 5, is_int=True + ) model_onnx = convert_sklearn( - model, "scikit-learn SGD multi-class classifier", + model, + "scikit-learn SGD multi-class classifier", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnSGDClassifierMultiHingeInt-Out0") + X, model, model_onnx, basename="SklearnSGDClassifierMultiHingeInt-Out0" + ) def test_model_sgd_binary_class_log_int(self): model, X = fit_classification_model( - SGDClassifier(loss=LOG_LOSS, random_state=42), 2, is_int=True) + SGDClassifier(loss=LOG_LOSS, random_state=42), 2, is_int=True + ) model_onnx = convert_sklearn( - model, "scikit-learn SGD binary classifier", + model, + "scikit-learn SGD binary classifier", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnSGDClassifierBinaryLogInt") + X, model, model_onnx, basename="SklearnSGDClassifierBinaryLogInt" + ) def test_model_sgd_binary_class_log_bool(self): model, X = fit_classification_model( - SGDClassifier(loss=LOG_LOSS, random_state=42), 2, is_bool=True) + SGDClassifier(loss=LOG_LOSS, random_state=42), 2, is_bool=True + ) model_onnx = convert_sklearn( - model, "scikit-learn SGD binary classifier", + model, + "scikit-learn SGD binary classifier", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnSGDClassifierBinaryLogBool") + X, model, model_onnx, basename="SklearnSGDClassifierBinaryLogBool" + ) def test_model_sgd_multi_class_log_int(self): model, X = fit_classification_model( - SGDClassifier(loss=LOG_LOSS, random_state=42), 5, is_int=True) + SGDClassifier(loss=LOG_LOSS, random_state=42), 5, is_int=True + ) model_onnx = convert_sklearn( - model, "scikit-learn SGD multi-class classifier", + model, + "scikit-learn SGD multi-class classifier", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) X = X[6:8] self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, - basename="SklearnSGDClassifierMultiLogInt") + X, model, model_onnx, basename="SklearnSGDClassifierMultiLogInt" + ) def test_model_multi_class_nocl(self): model, X = fit_classification_model( - SGDClassifier(loss=LOG_LOSS, random_state=42), - 2, label_string=True) + SGDClassifier(loss=LOG_LOSS, random_state=42), 2, label_string=True + ) model_onnx = convert_sklearn( - model, "multi-class nocl", + model, + "multi-class nocl", [("input", FloatTensorType([None, X.shape[1]]))], - options={id(model): {'nocl': True}}, - target_opset=TARGET_OPSET) + options={id(model): {"nocl": True}}, + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) sonx = str(model_onnx) - assert 'classlabels_strings' not in sonx - assert 'cl0' not in sonx + assert "classlabels_strings" not in sonx + assert "cl0" not in sonx dump_data_and_model( - X[6:8], model, model_onnx, classes=model.classes_, - basename="SklearnSGDMultiNoCl", verbose=False) + X[6:8], + model, + model_onnx, + classes=model.classes_, + basename="SklearnSGDMultiNoCl", + verbose=False, + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_sgd_oneclass_svm_converter.py b/tests/test_sklearn_sgd_oneclass_svm_converter.py index e4c69a35b..5837ae1da 100644 --- a/tests/test_sklearn_sgd_oneclass_svm_converter.py +++ b/tests/test_sklearn_sgd_oneclass_svm_converter.py @@ -4,6 +4,7 @@ import unittest import numpy as np + try: from sklearn.linear_model import SGDOneClassSVM except ImportError: @@ -15,21 +16,15 @@ FloatTensorType, ) -from test_utils import ( - dump_data_and_model, - TARGET_OPSET -) +from test_utils import dump_data_and_model, TARGET_OPSET ort_version = ".".join(ort_version.split(".")[:2]) class TestSGDOneClassSVMConverter(unittest.TestCase): - @unittest.skipIf(SGDOneClassSVM is None, - reason="scikit-learn<1.0") + @unittest.skipIf(SGDOneClassSVM is None, reason="scikit-learn<1.0") def test_model_sgd_oneclass_svm(self): - X = np.array([ - [-1, -1], [-2, -1], [1, 1], [2, 1] - ]) + X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]]) model = SGDOneClassSVM(random_state=42) model.fit(X) test_x = np.array([[0, 0], [-1, -1], [1, 1]]).astype(np.float32) @@ -39,11 +34,16 @@ def test_model_sgd_oneclass_svm(self): model, "scikit-learn SGD OneClass SVM", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model(test_x.astype(np.float32), model, model_onnx, - basename="SklearnSGDOneClassSVMBinaryHinge") + dump_data_and_model( + test_x.astype(np.float32), + model, + model_onnx, + basename="SklearnSGDOneClassSVMBinaryHinge", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_stacking.py b/tests/test_sklearn_stacking.py index 734f856a4..531c1d702 100644 --- a/tests/test_sklearn_stacking.py +++ b/tests/test_sklearn_stacking.py @@ -14,8 +14,8 @@ from sklearn.pipeline import make_pipeline, Pipeline from sklearn.preprocessing import OneHotEncoder, Normalizer from sklearn.neighbors import KNeighborsClassifier -from sklearn.ensemble import ( - RandomForestClassifier, GradientBoostingClassifier) +from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier + try: from sklearn.ensemble import StackingRegressor, StackingClassifier except ImportError: @@ -27,262 +27,359 @@ except ImportError: from sklearn.utils.testing import ignore_warnings from skl2onnx import ( - convert_sklearn, to_onnx, update_registered_converter, - get_model_alias) + convert_sklearn, + to_onnx, + update_registered_converter, + get_model_alias, +) from skl2onnx.common.data_types import FloatTensorType from test_utils import ( - dump_data_and_model, fit_regression_model, - fit_classification_model, TARGET_OPSET) + dump_data_and_model, + fit_regression_model, + fit_classification_model, + TARGET_OPSET, +) def model_to_test_reg(passthrough=False): - estimators = [ - ('dt', DecisionTreeRegressor()), - ('las', LinearRegression())] + estimators = [("dt", DecisionTreeRegressor()), ("las", LinearRegression())] stacking_regressor = StackingRegressor( - estimators=estimators, final_estimator=LinearRegression(), - passthrough=passthrough) + estimators=estimators, + final_estimator=LinearRegression(), + passthrough=passthrough, + ) return stacking_regressor def model_to_test_cl(passthrough=False): - estimators = [ - ('dt', DecisionTreeClassifier()), - ('las', LogisticRegression())] + estimators = [("dt", DecisionTreeClassifier()), ("las", LogisticRegression())] stacking_regressor = StackingClassifier( - estimators=estimators, final_estimator=LogisticRegression(), - passthrough=passthrough) + estimators=estimators, + final_estimator=LogisticRegression(), + passthrough=passthrough, + ) return stacking_regressor class TestStackingConverter(unittest.TestCase): - - @unittest.skipIf(StackingRegressor is None, - reason="new in 0.22") + @unittest.skipIf(StackingRegressor is None, reason="new in 0.22") @ignore_warnings(category=FutureWarning) def test_model_stacking_regression(self): model, X = fit_regression_model(model_to_test_reg()) model_onnx = convert_sklearn( - model, "stacking regressor", + model, + "stacking regressor", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, + X, + model, + model_onnx, basename="SklearnStackingRegressor-Dec4", - comparable_outputs=[0]) + comparable_outputs=[0], + ) - @unittest.skipIf(StackingRegressor is None, - reason="new in 0.22") + @unittest.skipIf(StackingRegressor is None, reason="new in 0.22") @ignore_warnings(category=FutureWarning) def test_model_stacking_regression_passthrough(self): - model, X = fit_regression_model(model_to_test_reg(passthrough=True), - factor=0.1) + model, X = fit_regression_model(model_to_test_reg(passthrough=True), factor=0.1) model_onnx = convert_sklearn( - model, "stacking regressor", + model, + "stacking regressor", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, + X, + model, + model_onnx, basename="SklearnStackingRegressorPassthrough", - comparable_outputs=[0]) + comparable_outputs=[0], + ) - @unittest.skipIf(StackingClassifier is None, - reason="new in 0.22") + @unittest.skipIf(StackingClassifier is None, reason="new in 0.22") @ignore_warnings(category=FutureWarning) def test_model_stacking_classifier(self): - model, X = fit_classification_model( - model_to_test_cl(), n_classes=2) + model, X = fit_classification_model(model_to_test_cl(), n_classes=2) model_onnx = convert_sklearn( - model, "stacking classifier", + model, + "stacking classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, + X, + model, + model_onnx, basename="SklearnStackingClassifier", - comparable_outputs=[0]) + comparable_outputs=[0], + ) - @unittest.skipIf(StackingClassifier is None, - reason="new in 0.22") + @unittest.skipIf(StackingClassifier is None, reason="new in 0.22") @ignore_warnings(category=FutureWarning) def test_model_stacking_classifier_passthrough(self): model, X = fit_classification_model( - model_to_test_cl(passthrough=True), n_classes=2) + model_to_test_cl(passthrough=True), n_classes=2 + ) model_onnx = convert_sklearn( - model, "stacking classifier", + model, + "stacking classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, + X, + model, + model_onnx, basename="SklearnStackingClassifierPassthrough", - comparable_outputs=[0]) + comparable_outputs=[0], + ) - @unittest.skipIf(StackingClassifier is None, - reason="new in 0.22") + @unittest.skipIf(StackingClassifier is None, reason="new in 0.22") @ignore_warnings(category=FutureWarning) def test_model_stacking_classifier_nozipmap(self): - model, X = fit_classification_model( - model_to_test_cl(), n_classes=2) + model, X = fit_classification_model(model_to_test_cl(), n_classes=2) model_onnx = convert_sklearn( - model, "stacking classifier", + model, + "stacking classifier", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET, - options={id(model): {'zipmap': False}}) + options={id(model): {"zipmap": False}}, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, + X, + model, + model_onnx, basename="SklearnStackingClassifierNoZipMap", - comparable_outputs=[0]) + comparable_outputs=[0], + ) - @unittest.skipIf(StackingClassifier is None, - reason="new in 0.22") + @unittest.skipIf(StackingClassifier is None, reason="new in 0.22") @ignore_warnings(category=FutureWarning) def test_model_stacking_classifier_nozipmap_passthrough(self): model, X = fit_classification_model( - model_to_test_cl(passthrough=True), n_classes=2) + model_to_test_cl(passthrough=True), n_classes=2 + ) model_onnx = convert_sklearn( - model, "stacking classifier", + model, + "stacking classifier", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET, - options={id(model): {'zipmap': False}}) + options={id(model): {"zipmap": False}}, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, + X, + model, + model_onnx, basename="SklearnStackingClassifierNoZipMapPassthrough", - comparable_outputs=[0]) + comparable_outputs=[0], + ) - @unittest.skipIf(StackingClassifier is None, - reason="new in 0.22") + @unittest.skipIf(StackingClassifier is None, reason="new in 0.22") @ignore_warnings(category=FutureWarning) def test_issue_786_exc(self): pipeline = make_pipeline( - OneHotEncoder(handle_unknown='ignore', sparse=False), - StackingClassifier(estimators=[ - ("rf", RandomForestClassifier(n_estimators=10, - random_state=42)), - ("gb", GradientBoostingClassifier(n_estimators=10, - random_state=42)), - ("knn", KNeighborsClassifier(n_neighbors=2)) - ], final_estimator=LogisticRegression(), cv=2)) + OneHotEncoder(handle_unknown="ignore", sparse=False), + StackingClassifier( + estimators=[ + ("rf", RandomForestClassifier(n_estimators=10, random_state=42)), + ( + "gb", + GradientBoostingClassifier(n_estimators=10, random_state=42), + ), + ("knn", KNeighborsClassifier(n_neighbors=2)), + ], + final_estimator=LogisticRegression(), + cv=2, + ), + ) X_train = pandas.DataFrame( - dict(text=['A', 'B', 'A', 'B', 'AA', 'B', - 'A', 'B', 'A', 'AA', 'B', 'B'], - val=[0.5, 0.6, 0.7, 0.61, 0.51, 0.67, - 0.51, 0.61, 0.71, 0.611, 0.511, 0.671])) - X_train['val'] = X_train.val.astype(numpy.float32) - y_train = numpy.array([0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1]) + dict( + text=["A", "B", "A", "B", "AA", "B", "A", "B", "A", "AA", "B", "B"], + val=[ + 0.5, + 0.6, + 0.7, + 0.61, + 0.51, + 0.67, + 0.51, + 0.61, + 0.71, + 0.611, + 0.511, + 0.671, + ], + ) + ) + X_train["val"] = X_train.val.astype(numpy.float32) + y_train = numpy.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]) pipeline.fit(X_train, y_train) with self.assertRaises(RuntimeError): to_onnx(pipeline, X=X_train[:1], target_opset=TARGET_OPSET) - @unittest.skipIf(StackingClassifier is None, - reason="new in 0.22") + @unittest.skipIf(StackingClassifier is None, reason="new in 0.22") @ignore_warnings(category=FutureWarning) def test_issue_786(self): pipeline = make_pipeline( - OneHotEncoder(handle_unknown='ignore', sparse=False), - StackingClassifier(estimators=[ - ("rf", RandomForestClassifier(n_estimators=10, - random_state=42)), - ("gb", GradientBoostingClassifier(n_estimators=10, - random_state=42)), - ("knn", KNeighborsClassifier(n_neighbors=2)) - ], final_estimator=LogisticRegression(), cv=2)) + OneHotEncoder(handle_unknown="ignore", sparse=False), + StackingClassifier( + estimators=[ + ("rf", RandomForestClassifier(n_estimators=10, random_state=42)), + ( + "gb", + GradientBoostingClassifier(n_estimators=10, random_state=42), + ), + ("knn", KNeighborsClassifier(n_neighbors=2)), + ], + final_estimator=LogisticRegression(), + cv=2, + ), + ) X_train = pandas.DataFrame( - dict(text=['A', 'B', 'A', 'B', 'AA', 'B', - 'A', 'B', 'A', 'AA', 'B', 'B'], - val=[0.5, 0.6, 0.7, 0.61, 0.51, 0.67, - 0.51, 0.61, 0.71, 0.611, 0.511, 0.671])) - X_train['val'] = (X_train.val * 1000).astype(numpy.float32) - y_train = numpy.array([0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1]) + dict( + text=["A", "B", "A", "B", "AA", "B", "A", "B", "A", "AA", "B", "B"], + val=[ + 0.5, + 0.6, + 0.7, + 0.61, + 0.51, + 0.67, + 0.51, + 0.61, + 0.71, + 0.611, + 0.511, + 0.671, + ], + ) + ) + X_train["val"] = (X_train.val * 1000).astype(numpy.float32) + y_train = numpy.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]) pipeline.fit(X_train, y_train) - onx = to_onnx(pipeline, X=X_train[:1], - options={'zipmap': False}, - target_opset=TARGET_OPSET) + onx = to_onnx( + pipeline, + X=X_train[:1], + options={"zipmap": False}, + target_opset=TARGET_OPSET, + ) # with open("ohe_debug.onnx", "wb") as f: # f.write(onx.SerializeToString()) sess = InferenceSession( - onx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'text': X_train.text.values.reshape((-1, 1)), - 'val': X_train.val.values.reshape((-1, 1))}) + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run( + None, + { + "text": X_train.text.values.reshape((-1, 1)), + "val": X_train.val.values.reshape((-1, 1)), + }, + ) assert_almost_equal(pipeline.predict(X_train), res[0]) assert_almost_equal(pipeline.predict_proba(X_train), res[1]) - @unittest.skipIf(StackingClassifier is None, - reason="new in 0.22") + @unittest.skipIf(StackingClassifier is None, reason="new in 0.22") @ignore_warnings(category=FutureWarning) def test_model_stacking_classifier_column_transformer(self): classifiers = { - 'A': RandomForestClassifier(n_estimators=5, random_state=42), - 'B': GradientBoostingClassifier(n_estimators=5, random_state=42) + "A": RandomForestClassifier(n_estimators=5, random_state=42), + "B": GradientBoostingClassifier(n_estimators=5, random_state=42), } - model_to_test = Pipeline(steps=[ - ('cbe', ColumnTransformer([ - ("norm1", Normalizer(norm='l1'), [0, 1]), - ("norm2", Normalizer(norm='l2'), [2, 3])])), - ('sc', StackingClassifier( - estimators=list(map(tuple, classifiers.items())), - stack_method='predict_proba', - passthrough=False - )) - ]) - model, X = fit_classification_model( - model_to_test, n_classes=2) + model_to_test = Pipeline( + steps=[ + ( + "cbe", + ColumnTransformer( + [ + ("norm1", Normalizer(norm="l1"), [0, 1]), + ("norm2", Normalizer(norm="l2"), [2, 3]), + ] + ), + ), + ( + "sc", + StackingClassifier( + estimators=list(map(tuple, classifiers.items())), + stack_method="predict_proba", + passthrough=False, + ), + ), + ] + ) + model, X = fit_classification_model(model_to_test, n_classes=2) model_onnx = convert_sklearn( - model, "stacking classifier", + model, + "stacking classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, + X, + model, + model_onnx, basename="SklearnStackingClassifierPipe", - comparable_outputs=[0]) + comparable_outputs=[0], + ) - @unittest.skipIf(StackingClassifier is None, - reason="new in 0.22") + @unittest.skipIf(StackingClassifier is None, reason="new in 0.22") @ignore_warnings(category=FutureWarning) def test_model_stacking_classifier_column_transformer_passthrough(self): classifiers = { - 'A': RandomForestClassifier(n_estimators=5, random_state=42), - 'B': GradientBoostingClassifier(n_estimators=5, random_state=42) + "A": RandomForestClassifier(n_estimators=5, random_state=42), + "B": GradientBoostingClassifier(n_estimators=5, random_state=42), } - model_to_test = Pipeline(steps=[ - ('cbe', ColumnTransformer([ - ("norm1", Normalizer(norm='l1'), [0, 1]), - ("norm2", Normalizer(norm='l2'), [2, 3])])), - ('sc', StackingClassifier( - estimators=list(map(tuple, classifiers.items())), - stack_method='predict_proba', - passthrough=True - )) - ]) - model, X = fit_classification_model( - model_to_test, n_classes=2) + model_to_test = Pipeline( + steps=[ + ( + "cbe", + ColumnTransformer( + [ + ("norm1", Normalizer(norm="l1"), [0, 1]), + ("norm2", Normalizer(norm="l2"), [2, 3]), + ] + ), + ), + ( + "sc", + StackingClassifier( + estimators=list(map(tuple, classifiers.items())), + stack_method="predict_proba", + passthrough=True, + ), + ), + ] + ) + model, X = fit_classification_model(model_to_test, n_classes=2) model_onnx = convert_sklearn( - model, "stacking classifier", + model, + "stacking classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, + X, + model, + model_onnx, basename="SklearnStackingClassifierPipePassthrough", - comparable_outputs=[0]) + comparable_outputs=[0], + ) - @unittest.skipIf(StackingClassifier is None, - reason="new in 0.22") + @unittest.skipIf(StackingClassifier is None, reason="new in 0.22") @ignore_warnings(category=FutureWarning) def test_concat_stacking(self): - class CustomTransformer: - def fit(self, X, y=None): return self @@ -296,10 +393,10 @@ def parser(scope, model, inputs, custom_parsers=None): alias = get_model_alias(type(model)) op = scope.declare_local_operator(alias, model) op.inputs = inputs - n_features = sum( - list(map(lambda x: x.type.shape[1], op.inputs))) + n_features = sum(list(map(lambda x: x.type.shape[1], op.inputs))) variable = scope.declare_local_variable( - "c_outputs", FloatTensorType([None, n_features])) + "c_outputs", FloatTensorType([None, n_features]) + ) op.outputs.append(variable) return op.outputs @@ -308,60 +405,70 @@ def converter(scope, operator, container): for index in range(operator.inputs[0].type.shape[1]): index_name = scope.get_unique_variable_name("ind%d" % index) - container.add_initializer( - index_name, TensorProto.INT64, [], [index]) - feature_column_name = scope.get_unique_variable_name( - "fc%d" % index) + container.add_initializer(index_name, TensorProto.INT64, [], [index]) + feature_column_name = scope.get_unique_variable_name("fc%d" % index) container.add_node( "ArrayFeatureExtractor", [operator.inputs[0].full_name, index_name], - feature_column_name, op_domain="ai.onnx.ml", - name=scope.get_unique_operator_name("AFE%d" % index)) + feature_column_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("AFE%d" % index), + ) output_cols.append(feature_column_name) container.add_node( - "Concat", output_cols, + "Concat", + output_cols, operator.outputs[0].full_name, name=scope.get_unique_operator_name("CUSTOMCONCAT"), - axis=-1) + axis=-1, + ) update_registered_converter( - CustomTransformer, "CustomTransformerUT", - shape_calculator, converter, parser=parser, overwrite=True) + CustomTransformer, + "CustomTransformerUT", + shape_calculator, + converter, + parser=parser, + overwrite=True, + ) clf1 = RandomForestClassifier(n_estimators=5) clf2 = RandomForestClassifier(n_estimators=5) - classifiers = {'clf1': clf1, 'clf2': clf2} + classifiers = {"clf1": clf1, "clf2": clf2} stacking_ensemble = StackingClassifier( estimators=list(map(tuple, classifiers.items())), - n_jobs=1, stack_method='predict_proba', - passthrough=False) + n_jobs=1, + stack_method="predict_proba", + passthrough=False, + ) - pipe = Pipeline(steps=[ - ('ct', CustomTransformer()), ('sc', stacking_ensemble)]) + pipe = Pipeline(steps=[("ct", CustomTransformer()), ("sc", stacking_ensemble)]) x = numpy.random.randn(20, 4).astype(numpy.float32) y = numpy.random.randint(2, size=20).astype(numpy.int64) pipe.fit(x, y) input_types = [("X", FloatTensorType([None, x.shape[1]]))] model_onnx = convert_sklearn( - pipe, 'bug', input_types, target_opset=TARGET_OPSET, - verbose=0, options={'zipmap': False}) + pipe, + "bug", + input_types, + target_opset=TARGET_OPSET, + verbose=0, + options={"zipmap": False}, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': x})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": x})[0] self.assertEqual(got.shape[0], x.shape[0]) - @unittest.skipIf(StackingClassifier is None, - reason="new in 0.22") + @unittest.skipIf(StackingClassifier is None, reason="new in 0.22") @ignore_warnings(category=FutureWarning) def test_concat_stacking_passthrough(self): - class CustomTransformer: - def fit(self, X, y=None): return self @@ -375,10 +482,10 @@ def parser(scope, model, inputs, custom_parsers=None): alias = get_model_alias(type(model)) op = scope.declare_local_operator(alias, model) op.inputs = inputs - n_features = sum( - list(map(lambda x: x.type.shape[1], op.inputs))) + n_features = sum(list(map(lambda x: x.type.shape[1], op.inputs))) variable = scope.declare_local_variable( - "c_outputs", FloatTensorType([None, n_features])) + "c_outputs", FloatTensorType([None, n_features]) + ) op.outputs.append(variable) return op.outputs @@ -387,51 +494,64 @@ def converter(scope, operator, container): for index in range(operator.inputs[0].type.shape[1]): index_name = scope.get_unique_variable_name("ind%d" % index) - container.add_initializer( - index_name, TensorProto.INT64, [], [index]) - feature_column_name = scope.get_unique_variable_name( - "fc%d" % index) + container.add_initializer(index_name, TensorProto.INT64, [], [index]) + feature_column_name = scope.get_unique_variable_name("fc%d" % index) container.add_node( "ArrayFeatureExtractor", [operator.inputs[0].full_name, index_name], - feature_column_name, op_domain="ai.onnx.ml", - name=scope.get_unique_operator_name("AFE%d" % index)) + feature_column_name, + op_domain="ai.onnx.ml", + name=scope.get_unique_operator_name("AFE%d" % index), + ) output_cols.append(feature_column_name) container.add_node( - "Concat", output_cols, + "Concat", + output_cols, operator.outputs[0].full_name, name=scope.get_unique_operator_name("CUSTOMCONCAT"), - axis=-1) + axis=-1, + ) update_registered_converter( - CustomTransformer, "CustomTransformerUT", - shape_calculator, converter, parser=parser, overwrite=True) + CustomTransformer, + "CustomTransformerUT", + shape_calculator, + converter, + parser=parser, + overwrite=True, + ) clf1 = RandomForestClassifier(n_estimators=5) clf2 = RandomForestClassifier(n_estimators=5) - classifiers = {'clf1': clf1, 'clf2': clf2} + classifiers = {"clf1": clf1, "clf2": clf2} stacking_ensemble = StackingClassifier( estimators=list(map(tuple, classifiers.items())), - n_jobs=1, stack_method='predict_proba', - passthrough=True) + n_jobs=1, + stack_method="predict_proba", + passthrough=True, + ) - pipe = Pipeline(steps=[ - ('ct', CustomTransformer()), ('sc', stacking_ensemble)]) + pipe = Pipeline(steps=[("ct", CustomTransformer()), ("sc", stacking_ensemble)]) x = numpy.random.randn(20, 4).astype(numpy.float32) y = numpy.random.randint(2, size=20).astype(numpy.int64) pipe.fit(x, y) input_types = [("X", FloatTensorType([None, x.shape[1]]))] model_onnx = convert_sklearn( - pipe, 'bug', input_types, target_opset=TARGET_OPSET, - verbose=0, options={'zipmap': False}) + pipe, + "bug", + input_types, + target_opset=TARGET_OPSET, + verbose=0, + options={"zipmap": False}, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': x})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": x})[0] self.assertEqual(got.shape[0], x.shape[0]) diff --git a/tests/test_sklearn_svm_converters.py b/tests/test_sklearn_svm_converters.py index 0f0eb3fc8..da6db75eb 100644 --- a/tests/test_sklearn_svm_converters.py +++ b/tests/test_sklearn_svm_converters.py @@ -9,6 +9,7 @@ from numpy.testing import assert_almost_equal from sklearn.datasets import load_iris from sklearn.svm import SVC, SVR, NuSVC, NuSVR, OneClassSVM, LinearSVC + try: from skl2onnx.common._apply_operation import apply_less except ImportError: @@ -23,15 +24,17 @@ from skl2onnx.operator_converters.ada_boost import _scikit_learn_before_022 from onnxruntime import __version__ as ort_version from test_utils import ( - dump_data_and_model, fit_regression_model, TARGET_OPSET, - InferenceSessionEx as InferenceSession) + dump_data_and_model, + fit_regression_model, + TARGET_OPSET, + InferenceSessionEx as InferenceSession, +) -ort_version = ort_version.split('+')[0] +ort_version = ort_version.split("+")[0] class TestSklearnSVM(unittest.TestCase): - def _fit_binary_classification(self, model): iris = load_iris() X = iris.data[:, :3] @@ -86,12 +89,15 @@ def _check_attributes(self, node, attribute_test): def test_convert_svc_binary_linear_pfalse(self): model, X = self._fit_binary_classification( - SVC(kernel="linear", probability=False, - decision_function_shape='ovo')) + SVC(kernel="linear", probability=False, decision_function_shape="ovo") + ) model_onnx = convert_sklearn( - model, "SVC", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "SVC", + [("input", FloatTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) nodes = model_onnx.graph.node self.assertIsNotNone(nodes) svc_node = nodes[0] @@ -108,26 +114,32 @@ def test_convert_svc_binary_linear_pfalse(self): }, ) dump_data_and_model( - X, model, model_onnx, - basename="SklearnBinSVCLinearPF-NoProbOpp") + X, model, model_onnx, basename="SklearnBinSVCLinearPF-NoProbOpp" + ) model_onnx = convert_sklearn( - model, "SVC", [("input", FloatTensorType([None, X.shape[1]]))], - options={id(model): {'zipmap': False}}, - target_opset=TARGET_OPSET) + model, + "SVC", + [("input", FloatTensorType([None, X.shape[1]]))], + options={id(model): {"zipmap": False}}, + target_opset=TARGET_OPSET, + ) nodes = model_onnx.graph.node self.assertIsNotNone(nodes) dump_data_and_model( - X, model, model_onnx, - basename="SklearnBinSVCLinearPF-NoProbOpp") + X, model, model_onnx, basename="SklearnBinSVCLinearPF-NoProbOpp" + ) def test_convert_svc_binary_linear_ptrue(self): model, X = self._fit_binary_classification( - SVC(kernel="linear", probability=True)) + SVC(kernel="linear", probability=True) + ) model_onnx = convert_sklearn( - model, "SVC", [("input", - FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "SVC", + [("input", FloatTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) nodes = model_onnx.graph.node self.assertIsNotNone(nodes) svc_node = nodes[0] @@ -143,72 +155,89 @@ def test_convert_svc_binary_linear_ptrue(self): "vectors_per_class": None, }, ) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnBinSVCLinearPT") + dump_data_and_model(X, model, model_onnx, basename="SklearnBinSVCLinearPT") def test_convert_svc_multi_linear_pfalse(self): model, X = self._fit_multi_classification( - SVC(kernel="linear", probability=False, - decision_function_shape="ovo")) + SVC(kernel="linear", probability=False, decision_function_shape="ovo") + ) model_onnx = convert_sklearn( - model, "SVC", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "SVC", + [("input", FloatTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) nodes = model_onnx.graph.node self.assertIsNotNone(nodes) svc_node = nodes[0] self._check_attributes( - svc_node, { - "coefficients": None, "kernel_params": None, - "kernel_type": "LINEAR", "post_transform": None, - "rho": None, "support_vectors": None, - "vectors_per_class": None}) + svc_node, + { + "coefficients": None, + "kernel_params": None, + "kernel_type": "LINEAR", + "post_transform": None, + "rho": None, + "support_vectors": None, + "vectors_per_class": None, + }, + ) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnMclSVCLinearPF-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnMclSVCLinearPF-Dec4") @unittest.skipIf(apply_less is None, reason="onnxconverter-common old") def test_convert_svc_multi_linear_pfalse_ovr(self): model, X = self._fit_multi_classification( - SVC(kernel="linear", probability=False, - decision_function_shape='ovr')) + SVC(kernel="linear", probability=False, decision_function_shape="ovr") + ) model_onnx = convert_sklearn( - model, "SVC", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnMclSVCOVR-Dec4") + model, + "SVC", + [("input", FloatTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) + dump_data_and_model(X, model, model_onnx, basename="SklearnMclSVCOVR-Dec4") def test_convert_svc_multi_linear_ptrue(self): model, X = self._fit_multi_classification( - SVC(kernel="linear", probability=True), - nbclass=3) + SVC(kernel="linear", probability=True), nbclass=3 + ) model_onnx = convert_sklearn( - model, "SVC", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "SVC", + [("input", FloatTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) nodes = model_onnx.graph.node self.assertIsNotNone(nodes) svc_node = nodes[0] self._check_attributes( - svc_node, { - "coefficients": None, "kernel_params": None, - "kernel_type": "LINEAR", "post_transform": None, - "rho": None, "support_vectors": None, - "vectors_per_class": None}) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnMclSVCLinearPT-Dec2") + svc_node, + { + "coefficients": None, + "kernel_params": None, + "kernel_type": "LINEAR", + "post_transform": None, + "rho": None, + "support_vectors": None, + "vectors_per_class": None, + }, + ) + dump_data_and_model(X, model, model_onnx, basename="SklearnMclSVCLinearPT-Dec2") @unittest.skipIf( pv.Version(ort_version) <= pv.Version("0.4.0"), - reason="use of recent Cast operator") + reason="use of recent Cast operator", + ) def test_convert_svr_linear(self): model, X = self._fit_binary_classification(SVR(kernel="linear")) model_onnx = convert_sklearn( - model, "SVR", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "SVR", + [("input", FloatTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) nodes = model_onnx.graph.node self.assertIsNotNone(nodes) self._check_attributes( @@ -222,15 +251,18 @@ def test_convert_svr_linear(self): "support_vectors": None, }, ) - dump_data_and_model(X, model, model_onnx, - basename="SklearnRegSVRLinear-Dec3") + dump_data_and_model(X, model, model_onnx, basename="SklearnRegSVRLinear-Dec3") def test_convert_nusvc_binary_pfalse(self): model, X = self._fit_binary_classification( - NuSVC(probability=False, decision_function_shape='ovo')) + NuSVC(probability=False, decision_function_shape="ovo") + ) model_onnx = convert_sklearn( - model, "SVC", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "SVC", + [("input", FloatTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) nodes = model_onnx.graph.node self.assertIsNotNone(nodes) svc_node = nodes[0] @@ -247,17 +279,21 @@ def test_convert_nusvc_binary_pfalse(self): }, ) dump_data_and_model( - X, model, model_onnx, - basename="SklearnBinNuSVCPF-NoProbOpp") + X, model, model_onnx, basename="SklearnBinNuSVCPF-NoProbOpp" + ) @unittest.skipIf( pv.Version(ort_version) <= pv.Version("0.4.0"), - reason="use of recent Cast operator") + reason="use of recent Cast operator", + ) def test_convert_nusvc_binary_ptrue(self): model, X = self._fit_binary_classification(NuSVC(probability=True)) model_onnx = convert_sklearn( - model, "SVC", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "SVC", + [("input", FloatTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) nodes = model_onnx.graph.node self.assertIsNotNone(nodes) svc_node = nodes[0] @@ -273,17 +309,18 @@ def test_convert_nusvc_binary_ptrue(self): "vectors_per_class": None, }, ) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnBinNuSVCPT") + dump_data_and_model(X, model, model_onnx, basename="SklearnBinNuSVCPT") def test_convert_nusvc_multi_pfalse(self): model, X = self._fit_multi_classification( - NuSVC(probability=False, nu=0.1, - decision_function_shape='ovo')) + NuSVC(probability=False, nu=0.1, decision_function_shape="ovo") + ) model_onnx = convert_sklearn( - model, "SVC", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "SVC", + [("input", FloatTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) nodes = model_onnx.graph.node self.assertIsNotNone(nodes) svc_node = nodes[0] @@ -299,55 +336,64 @@ def test_convert_nusvc_multi_pfalse(self): "vectors_per_class": None, }, ) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnMclNuSVCPF-Dec1") + dump_data_and_model(X, model, model_onnx, basename="SklearnMclNuSVCPF-Dec1") def test_convert_svc_multi_pfalse_4(self): model, X = self._fit_multi_classification( - SVC(probability=False, - decision_function_shape='ovo'), 4) + SVC(probability=False, decision_function_shape="ovo"), 4 + ) model_onnx = convert_sklearn( - model, "SVC", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "SVC", + [("input", FloatTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) nodes = model_onnx.graph.node self.assertIsNotNone(nodes) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnMcSVCPF") + dump_data_and_model(X, model, model_onnx, basename="SklearnMcSVCPF") - @unittest.skipIf(_scikit_learn_before_022(), - reason="break_ties introduced after 0.22") + @unittest.skipIf( + _scikit_learn_before_022(), reason="break_ties introduced after 0.22" + ) def test_convert_svc_multi_pfalse_4_break_ties(self): model, X = self._fit_multi_classification( - SVC(probability=True, break_ties=True), 4) + SVC(probability=True, break_ties=True), 4 + ) model_onnx = convert_sklearn( - model, "unused", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "unused", + [("input", FloatTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) nodes = model_onnx.graph.node self.assertIsNotNone(nodes) dump_data_and_model( X.astype(numpy.float32), - model, model_onnx, - basename="SklearnMcSVCPFBTF-Dec4") + model, + model_onnx, + basename="SklearnMcSVCPFBTF-Dec4", + ) def test_convert_svc_multi_ptrue_4(self): model, X = self._fit_multi_classification(SVC(probability=True), 4) model_onnx = convert_sklearn( - model, "SVC", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "SVC", + [("input", FloatTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) nodes = model_onnx.graph.node self.assertIsNotNone(nodes) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnMcSVCPF4-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnMcSVCPF4-Dec4") def test_convert_nusvc_multi_ptrue(self): - model, X = self._fit_multi_classification( - NuSVC(probability=True, nu=0.1)) + model, X = self._fit_multi_classification(NuSVC(probability=True, nu=0.1)) model_onnx = convert_sklearn( - model, "SVC", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "SVC", + [("input", FloatTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) nodes = model_onnx.graph.node self.assertIsNotNone(nodes) svc_node = nodes[0] @@ -363,18 +409,20 @@ def test_convert_nusvc_multi_ptrue(self): "vectors_per_class": None, }, ) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnMclNuSVCPT-Dec3") + dump_data_and_model(X, model, model_onnx, basename="SklearnMclNuSVCPT-Dec3") @unittest.skipIf( pv.Version(ort_version) <= pv.Version("0.4.0"), - reason="use of recent Cast operator") + reason="use of recent Cast operator", + ) def test_convert_nusvr(self): model, X = self._fit_binary_classification(NuSVR()) model_onnx = convert_sklearn( - model, "SVR", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "SVR", + [("input", FloatTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) node = model_onnx.graph.node[0] self.assertIsNotNone(node) self._check_attributes( @@ -388,89 +436,90 @@ def test_convert_nusvr(self): "support_vectors": None, }, ) - dump_data_and_model(X, model, model_onnx, - basename="SklearnRegNuSVR") + dump_data_and_model(X, model, model_onnx, basename="SklearnRegNuSVR") @unittest.skipIf( pv.Version(ort_version) <= pv.Version("0.4.0"), - reason="use of recent Cast operator") + reason="use of recent Cast operator", + ) def test_convert_nusvr_default(self): model, X = self._fit_binary_classification(NuSVR()) model_onnx = convert_sklearn( - model, "SVR", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "SVR", + [("input", FloatTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model(X, model, model_onnx, basename="SklearnRegNuSVR2") def test_convert_svr_int(self): - model, X = fit_regression_model( - SVR(), is_int=True) + model, X = fit_regression_model(SVR(), is_int=True) model_onnx = convert_sklearn( - model, "SVR", + model, + "SVR", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnSVRInt-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnSVRInt-Dec4") def test_convert_nusvr_int(self): - model, X = fit_regression_model( - NuSVR(), is_int=True) + model, X = fit_regression_model(NuSVR(), is_int=True) model_onnx = convert_sklearn( - model, "NuSVR", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + model, + "NuSVR", + [("input", Int64TensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnNuSVRInt-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnNuSVRInt-Dec4") def test_convert_svr_bool(self): - model, X = fit_regression_model( - SVR(), is_bool=True) + model, X = fit_regression_model(SVR(), is_bool=True) model_onnx = convert_sklearn( - model, "SVR", + model, + "SVR", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnSVRBool-Dec4") + dump_data_and_model(X, model, model_onnx, basename="SklearnSVRBool-Dec4") def test_convert_nusvr_bool(self): - model, X = fit_regression_model( - NuSVR(), is_bool=True) + model, X = fit_regression_model(NuSVR(), is_bool=True) model_onnx = convert_sklearn( - model, "NuSVR", + model, + "NuSVR", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnNuSVRBool") + dump_data_and_model(X, model, model_onnx, basename="SklearnNuSVRBool") - @unittest.skipIf( - TARGET_OPSET < 9, - reason="operator sign available since opset 9") + @unittest.skipIf(TARGET_OPSET < 9, reason="operator sign available since opset 9") def test_convert_oneclasssvm(self): model, X = self._fit_one_class_svm(OneClassSVM()) model_onnx = convert_sklearn( - model, "OCSVM", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) - dump_data_and_model( - X, model, model_onnx, - basename="SklearnBinOneClassSVM") + model, + "OCSVM", + [("input", FloatTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) + dump_data_and_model(X, model, model_onnx, basename="SklearnBinOneClassSVM") def test_model_linear_svc_binary_class(self): model, X = self._fit_binary_classification(LinearSVC(max_iter=10000)) model_onnx = convert_sklearn( - model, "linear SVC", + model, + "linear SVC", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X}) label = model.predict(X) proba = model.decision_function(X) assert_almost_equal(proba, res[1].ravel(), decimal=5) @@ -479,13 +528,15 @@ def test_model_linear_svc_binary_class(self): def test_model_linear_svc_multi_class(self): model, X = self._fit_multi_classification(LinearSVC(max_iter=10000)) model_onnx = convert_sklearn( - model, "linear SVC", + model, + "linear SVC", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X}) label = model.predict(X) proba = model.decision_function(X) assert_almost_equal(proba, res[1], decimal=5) @@ -494,13 +545,15 @@ def test_model_linear_svc_multi_class(self): def test_model_svc_binary_class_false(self): model, X = self._fit_binary_classification(SVC(max_iter=10000)) model_onnx = convert_sklearn( - model, "linear SVC", + model, + "linear SVC", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X}) label = model.predict(X) proba = model.decision_function(X) assert_almost_equal(proba, res[1][:, 0], decimal=5) @@ -510,13 +563,15 @@ def test_model_svc_binary_class_false(self): def test_model_svc_multi_class_false(self): model, X = self._fit_multi_classification(SVC(max_iter=10000)) model_onnx = convert_sklearn( - model, "linear SVC", + model, + "linear SVC", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X}) label = model.predict(X) proba = model.decision_function(X) assert_almost_equal(proba, res[1], decimal=5) @@ -524,31 +579,37 @@ def test_model_svc_multi_class_false(self): def test_model_svc_binary_class_true(self): model, X = self._fit_binary_classification( - SVC(max_iter=10000, probability=True)) + SVC(max_iter=10000, probability=True) + ) model_onnx = convert_sklearn( - model, "linear SVC", + model, + "linear SVC", [("input", FloatTensorType([None, X.shape[1]]))], - options={'zipmap': False}, target_opset=TARGET_OPSET) + options={"zipmap": False}, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X}) label = model.predict(X) proba = model.predict_proba(X) assert_almost_equal(proba, res[1], decimal=5) assert_almost_equal(label, res[0]) def test_model_svc_multi_class_true(self): - model, X = self._fit_multi_classification( - SVC(max_iter=10000, probability=True)) + model, X = self._fit_multi_classification(SVC(max_iter=10000, probability=True)) model_onnx = convert_sklearn( - model, "linear SVC", + model, + "linear SVC", [("input", FloatTensorType([None, X.shape[1]]))], - options={'zipmap': False}, target_opset=TARGET_OPSET) + options={"zipmap": False}, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X}) label = model.predict(X) proba = model.predict_proba(X) assert_almost_equal(proba, res[1], decimal=5) @@ -557,13 +618,15 @@ def test_model_svc_multi_class_true(self): def test_model_nusvc_binary_class_false(self): model, X = self._fit_binary_classification(NuSVC(max_iter=10000)) model_onnx = convert_sklearn( - model, "linear SVC", + model, + "linear SVC", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X}) label = model.predict(X) proba = model.decision_function(X) assert_almost_equal(proba, res[1][:, 0], decimal=5) @@ -571,16 +634,17 @@ def test_model_nusvc_binary_class_false(self): @unittest.skipIf(TARGET_OPSET < 12, reason="operator Less") def test_model_nusvc_multi_class_false(self): - model, X = self._fit_multi_classification( - NuSVC(max_iter=10000, nu=0.1)) + model, X = self._fit_multi_classification(NuSVC(max_iter=10000, nu=0.1)) model_onnx = convert_sklearn( - model, "linear SVC", + model, + "linear SVC", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X}) label = model.predict(X) proba = model.decision_function(X) assert_almost_equal(proba, res[1], decimal=4) @@ -588,15 +652,19 @@ def test_model_nusvc_multi_class_false(self): def test_model_nusvc_binary_class_true(self): model, X = self._fit_binary_classification( - NuSVC(max_iter=10000, probability=True)) + NuSVC(max_iter=10000, probability=True) + ) model_onnx = convert_sklearn( - model, "linear SVC", + model, + "linear SVC", [("input", FloatTensorType([None, X.shape[1]]))], - options={'zipmap': False}, target_opset=TARGET_OPSET) + options={"zipmap": False}, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X}) label = model.predict(X) proba = model.predict_proba(X) assert_almost_equal(proba, res[1], decimal=5) @@ -604,15 +672,19 @@ def test_model_nusvc_binary_class_true(self): def test_model_nusvc_multi_class_true(self): model, X = self._fit_multi_classification( - NuSVC(max_iter=10000, probability=True, nu=0.1)) + NuSVC(max_iter=10000, probability=True, nu=0.1) + ) model_onnx = convert_sklearn( - model, "linear SVC", + model, + "linear SVC", [("input", FloatTensorType([None, X.shape[1]]))], - options={'zipmap': False}, target_opset=TARGET_OPSET) + options={"zipmap": False}, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X}) label = model.predict(X) proba = model.predict_proba(X) assert_almost_equal(proba, res[1], decimal=3) diff --git a/tests/test_sklearn_text.py b/tests/test_sklearn_text.py index ea1e68d99..ae8f33ea0 100644 --- a/tests/test_sklearn_text.py +++ b/tests/test_sklearn_text.py @@ -15,16 +15,16 @@ class TestSklearnText(unittest.TestCase): - def test_count_vectorizer(self): - - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - "", - ]).reshape((5, )) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + "", + ] + ).reshape((5,)) for ng in [(1, 1), (1, 2), (2, 2), (1, 3)]: mod1 = CountVectorizer(ngram_range=ng) @@ -42,22 +42,22 @@ def test_count_vectorizer(self): self.assertIsInstance(k, tuple) def test_count_vectorizer_regex(self): - - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - "", - ]).reshape((5, )) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + "", + ] + ).reshape((5,)) for pattern in ["[a-zA-Z ]{1,4}", "[a-zA-Z]{1,4}"]: for ng in [(1, 1), (1, 2), (2, 2), (1, 3)]: mod1 = CountVectorizer(ngram_range=ng, token_pattern=pattern) mod1.fit(corpus) - mod2 = TraceableCountVectorizer(ngram_range=ng, - token_pattern=pattern) + mod2 = TraceableCountVectorizer(ngram_range=ng, token_pattern=pattern) mod2.fit(corpus) pred1 = mod1.transform(corpus) @@ -72,19 +72,20 @@ def test_count_vectorizer_regex(self): for k in voc: self.assertIsInstance(k, tuple) for i in k: - if ' ' in i: + if " " in i: spaces += 1 self.assertGreater(spaces, 1) def test_tfidf_vectorizer(self): - - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - "", - ]).reshape((5, )) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + "", + ] + ).reshape((5,)) for ng in [(1, 1), (1, 2), (2, 2), (1, 3)]: mod1 = TfidfVectorizer(ngram_range=ng) @@ -102,26 +103,27 @@ def test_tfidf_vectorizer(self): self.assertIsInstance(k, tuple) def test_tfidf_vectorizer_english(self): - - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - "", - ]).reshape((5, )) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + "", + ] + ).reshape((5,)) for ng in [(1, 1), (1, 2), (2, 2), (1, 3)]: with self.subTest(ngram_range=ng): mod1 = TfidfVectorizer(ngram_range=ng, stop_words="english") mod1.fit(corpus) - mod2 = TraceableTfidfVectorizer( - ngram_range=ng, stop_words="english") + mod2 = TraceableTfidfVectorizer(ngram_range=ng, stop_words="english") mod2.fit(corpus) if len(mod1.vocabulary_) != len(mod2.vocabulary_): raise AssertionError( - f"mod1={mod1.vocabulary_}, mod2={mod2.vocabulary_}") + f"mod1={mod1.vocabulary_}, mod2={mod2.vocabulary_}" + ) pred1 = mod1.transform(corpus) pred2 = mod2.transform(corpus) @@ -132,14 +134,15 @@ def test_tfidf_vectorizer_english(self): self.assertIsInstance(k, tuple) def test_count_vectorizer_english2(self): - - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - "", - ]).reshape((5, )) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + "", + ] + ).reshape((5,)) for ng in [(1, 1), (1, 2), (1, 3)]: with self.subTest(ngram_range=ng): @@ -149,7 +152,8 @@ def test_count_vectorizer_english2(self): token_pattern="[\\w_]{2,}", lowercase=True, min_df=2, - max_features=100000) + max_features=100000, + ) mod1.fit(corpus) mod2 = TraceableCountVectorizer( @@ -158,19 +162,21 @@ def test_count_vectorizer_english2(self): token_pattern="[\\w_]{2,}", lowercase=True, min_df=2, - max_features=100000) + max_features=100000, + ) mod2.fit(corpus) if mod1.token_pattern != mod2.token_pattern: raise AssertionError( - f"{mod1.token_pattern!r} != {mod2.token_pattern!r}") + f"{mod1.token_pattern!r} != {mod2.token_pattern!r}" + ) if len(mod1.stop_words_) != len(mod2.stop_words_): - raise AssertionError( - f"{mod1.stop_words_} != {mod2.stop_words_}") + raise AssertionError(f"{mod1.stop_words_} != {mod2.stop_words_}") if len(mod1.vocabulary_) != len(mod2.vocabulary_): raise AssertionError( f"skl_version={skl_version!r}, " f"skl_file={skl_file!r},\n" - f"mod1={mod1.vocabulary_}, mod2={mod2.vocabulary_}") + f"mod1={mod1.vocabulary_}, mod2={mod2.vocabulary_}" + ) pred1 = mod1.transform(corpus) pred2 = mod2.transform(corpus) @@ -181,14 +187,15 @@ def test_count_vectorizer_english2(self): self.assertIsInstance(k, tuple) def test_tfidf_vectorizer_english2(self): - - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - "", - ]).reshape((5, )) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + "", + ] + ).reshape((5,)) for ng in [(1, 1), (1, 2), (1, 3)]: with self.subTest(ngram_range=ng): @@ -198,7 +205,8 @@ def test_tfidf_vectorizer_english2(self): token_pattern="[\\w_]{2,}", lowercase=True, min_df=2, - max_features=100000) + max_features=100000, + ) mod1.fit(corpus) mod2 = TraceableTfidfVectorizer( @@ -207,19 +215,21 @@ def test_tfidf_vectorizer_english2(self): token_pattern="[\\w_]{2,}", lowercase=True, min_df=2, - max_features=100000) + max_features=100000, + ) mod2.fit(corpus) if mod1.token_pattern != mod2.token_pattern: raise AssertionError( - f"{mod1.token_pattern!r} != {mod2.token_pattern!r}") + f"{mod1.token_pattern!r} != {mod2.token_pattern!r}" + ) if len(mod1.stop_words_) != len(mod2.stop_words_): - raise AssertionError( - f"{mod1.stop_words_} != {mod2.stop_words_}") + raise AssertionError(f"{mod1.stop_words_} != {mod2.stop_words_}") if len(mod1.vocabulary_) != len(mod2.vocabulary_): raise AssertionError( f"skl_version={skl_version!r}, " f"skl_file={skl_file!r},\n" - f"mod1={mod1.vocabulary_}, mod2={mod2.vocabulary_}") + f"mod1={mod1.vocabulary_}, mod2={mod2.vocabulary_}" + ) pred1 = mod1.transform(corpus) pred2 = mod2.transform(corpus) @@ -230,33 +240,34 @@ def test_tfidf_vectorizer_english2(self): self.assertIsInstance(k, tuple) def test_tfidf_vectorizer_regex(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - "", - ]).reshape((5, )) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + "", + ] + ).reshape((5,)) for pattern in ["[a-zA-Z ]{1,4}", "[a-zA-Z]{1,4}"]: for ng in [(1, 1), (1, 2), (2, 2), (1, 3)]: mod1 = TfidfVectorizer(ngram_range=ng, token_pattern=pattern) mod1.fit(corpus) - mod2 = TraceableTfidfVectorizer(ngram_range=ng, - token_pattern=pattern) + mod2 = TraceableTfidfVectorizer(ngram_range=ng, token_pattern=pattern) mod2.fit(corpus) pred1 = mod1.transform(corpus) pred2 = mod2.transform(corpus) - if ' ]' in pattern: + if " ]" in pattern: voc = mod2.vocabulary_ spaces = 0 for k in voc: self.assertIsInstance(k, tuple) for i in k: - if ' ' in i: + if " " in i: spaces += 1 self.assertGreater(spaces, 1) assert_almost_equal(pred1.todense(), pred2.todense()) @@ -264,21 +275,29 @@ def test_tfidf_vectorizer_regex(self): @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer_issue(self): register() - corpus = numpy.array([ - 'the-first document.', - 'this-is the-third-one.', - 'this-the first-document?', - ]).reshape((3, 1)) + corpus = numpy.array( + [ + "the-first document.", + "this-is the-third-one.", + "this-the first-document?", + ] + ).reshape((3, 1)) vect = TraceableTfidfVectorizer( - ngram_range=(1, 2), - token_pattern=r"\b[a-z ]+\b") + ngram_range=(1, 2), token_pattern=r"\b[a-z ]+\b" + ) vect.fit(corpus.ravel()) - model_onnx = to_onnx(vect, 'TfidfVectorizer', - initial_types=[('input', StringTensorType([1]))], - target_opset=TARGET_OPSET) + model_onnx = to_onnx( + vect, + "TfidfVectorizer", + initial_types=[("input", StringTensorType([1]))], + target_opset=TARGET_OPSET, + ) dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizerIssue-OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizerIssue-OneOff-SklCol", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_tfidf_transformer_converter.py b/tests/test_sklearn_tfidf_transformer_converter.py index 1dd3ba9c1..f848da48d 100644 --- a/tests/test_sklearn_tfidf_transformer_converter.py +++ b/tests/test_sklearn_tfidf_transformer_converter.py @@ -13,17 +13,19 @@ class TestSklearnTfidfTransformerConverter(unittest.TestCase): - def test_model_tfidf_transform(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - "Troisième document en français", - ]).reshape((5, 1)) - data = (CountVectorizer(ngram_range=(1, 1)).fit_transform( - corpus.ravel()).todense()) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + "Troisième document en français", + ] + ).reshape((5, 1)) + data = ( + CountVectorizer(ngram_range=(1, 1)).fit_transform(corpus.ravel()).todense() + ) data = numpy.array(data.astype(numpy.float32)) for sublinear_tf in (False, True): @@ -44,9 +46,8 @@ def test_model_tfidf_transform(self): model_onnx = convert_sklearn( model, "TfidfTransformer", - [("input", - FloatTensorType([None, data.shape[1]]))], - target_opset=TARGET_OPSET + [("input", FloatTensorType([None, data.shape[1]]))], + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) suffix = norm.upper() if norm else "" @@ -57,7 +58,8 @@ def test_model_tfidf_transform(self): data, model, model_onnx, - basename="SklearnTfidfTransform" + suffix) + basename="SklearnTfidfTransform" + suffix, + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_tfidf_transformer_converter_sparse.py b/tests/test_sklearn_tfidf_transformer_converter_sparse.py index 3c0805f05..cb3353a7b 100644 --- a/tests/test_sklearn_tfidf_transformer_converter_sparse.py +++ b/tests/test_sklearn_tfidf_transformer_converter_sparse.py @@ -19,9 +19,12 @@ class TestSklearnTfidfVectorizerSparse(unittest.TestCase): @unittest.skipIf( TARGET_OPSET < 9, # issue with encoding - reason="https://github.com/onnx/onnx/pull/1734") - @unittest.skipIf(pv.Version(ort.__version__) <= pv.Version("0.2.1"), - reason="sparse not supported") + reason="https://github.com/onnx/onnx/pull/1734", + ) + @unittest.skipIf( + pv.Version(ort.__version__) <= pv.Version("0.2.1"), + reason="sparse not supported", + ) def test_model_tfidf_transform_bug(self): categories = [ "alt.atheism", @@ -29,25 +32,26 @@ def test_model_tfidf_transform_bug(self): "comp.graphics", "sci.med", ] - twenty_train = fetch_20newsgroups(subset="train", - categories=categories, - shuffle=True, - random_state=0) - text_clf = Pipeline([("vect", CountVectorizer()), - ("tfidf", TfidfTransformer())]) + twenty_train = fetch_20newsgroups( + subset="train", categories=categories, shuffle=True, random_state=0 + ) + text_clf = Pipeline( + [("vect", CountVectorizer()), ("tfidf", TfidfTransformer())] + ) twenty_train.data[0] = "bruît " + twenty_train.data[0] text_clf.fit(twenty_train.data, twenty_train.target) model_onnx = convert_sklearn( text_clf, name="DocClassifierCV-Tfidf", initial_types=[("input", StringTensorType([5]))], - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) dump_data_and_model( twenty_train.data[5:10], text_clf, model_onnx, - basename="SklearnPipelineTfidfTransformer") + basename="SklearnPipelineTfidfTransformer", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_tfidf_vectorizer_converter.py b/tests/test_sklearn_tfidf_vectorizer_converter.py index 09ff5ead6..69c2eb445 100644 --- a/tests/test_sklearn_tfidf_vectorizer_converter.py +++ b/tests/test_sklearn_tfidf_vectorizer_converter.py @@ -10,6 +10,7 @@ from numpy.testing import assert_almost_equal from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.svm import SVC + try: from sklearn.compose import ColumnTransformer except ImportError: @@ -24,314 +25,372 @@ from skl2onnx.common.data_types import StringTensorType, FloatTensorType from onnxruntime import __version__ as ort_version from test_utils import ( - dump_data_and_model, TARGET_OPSET, - InferenceSessionEx as InferenceSession) + dump_data_and_model, + TARGET_OPSET, + InferenceSessionEx as InferenceSession, +) -ort_version = '.'.join(ort_version.split('.')[:2]) +ort_version = ".".join(ort_version.split(".")[:2]) class TestSklearnTfidfVectorizer(unittest.TestCase): - def get_options(self): return {TfidfVectorizer: {"tokenexp": None}} @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @unittest.skipIf( - pv.Version(ort_version) <= pv.Version("0.3.0"), - reason="Requires opset 9.") + pv.Version(ort_version) <= pv.Version("0.3.0"), reason="Requires opset 9." + ) def test_model_tfidf_vectorizer11(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 1), norm=None) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, "TfidfVectorizer", - [("input", StringTensorType())], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType())], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, - vect, - model_onnx, - basename="SklearnTfidfVectorizer11-OneOff-SklCol") + corpus, vect, model_onnx, basename="SklearnTfidfVectorizer11-OneOff-SklCol" + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': corpus.ravel()})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": corpus.ravel()})[0] assert res.shape == (4, 9) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @unittest.skipIf( - pv.Version(ort_version) <= pv.Version("0.3.0"), - reason="Requires opset 9.") + pv.Version(ort_version) <= pv.Version("0.3.0"), reason="Requires opset 9." + ) def test_model_tfidf_vectorizer11_nolowercase(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 1), norm=None, lowercase=False) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, "TfidfVectorizer", - [("input", StringTensorType())], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType())], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer11NoL-OneOff-SklCol") + basename="SklearnTfidfVectorizer11NoL-OneOff-SklCol", + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': corpus.ravel()})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": corpus.ravel()})[0] assert res.shape == (4, 11) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") - @unittest.skipIf( - ColumnTransformer is None, - reason="Requires newer scikit-learn") + @unittest.skipIf(ColumnTransformer is None, reason="Requires newer scikit-learn") def test_model_tfidf_vectorizer11_compose(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) corpus = numpy.hstack([corpus, corpus]) y = numpy.array([0, 1, 0, 1]) - model = ColumnTransformer([ - ('a', TfidfVectorizer(), 0), - ('b', TfidfVectorizer(), 1), - ]) + model = ColumnTransformer( + [ + ("a", TfidfVectorizer(), 0), + ("b", TfidfVectorizer(), 1), + ] + ) model.fit(corpus, y) - model_onnx = convert_sklearn(model, "TfIdfcomp", - [("input", StringTensorType([4, 2]))], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + model, + "TfIdfcomp", + [("input", StringTensorType([4, 2]))], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': corpus})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": corpus})[0] exp = model.transform(corpus) assert_almost_equal(res, exp) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer11_empty_string_case1(self): - corpus = numpy.array([ - 'This is the first document.', - 'This document is the second document.', - 'And this is the third one.', - ' ', - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + " ", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 1), norm=None) vect.fit(corpus[:3].ravel()) - model_onnx = convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) # TfidfVectorizer in onnxruntime fails with empty strings, # which was fixed in version 0.3.0 afterward dump_data_and_model( - corpus[2:], vect, model_onnx, - basename="SklearnTfidfVectorizer11EmptyStringSepCase1-" - "OneOff-SklCol") + corpus[2:], + vect, + model_onnx, + basename="SklearnTfidfVectorizer11EmptyStringSepCase1-" "OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer11_empty_string_case2(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "", - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 1), norm=None) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, "TfidfVectorizer", - [("input", StringTensorType([1]))], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) # onnxruntime fails with empty strings dump_data_and_model( corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer11EmptyString-OneOff-SklCol") + basename="SklearnTfidfVectorizer11EmptyString-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer11_out_vocabulary(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 1), norm=None) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, "TfidfVectorizer", - [("input", StringTensorType([1]))], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - corpus = numpy.array([ - "AZZ ZZ This is the first document.", - "BZZ ZZ This document is the second document.", - "ZZZ ZZ And this is the third one.", - "WZZ ZZ Is this the first document?", - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "AZZ ZZ This is the first document.", + "BZZ ZZ This document is the second document.", + "ZZZ ZZ And this is the third one.", + "WZZ ZZ Is this the first document?", + ] + ).reshape((4, 1)) dump_data_and_model( corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer11OutVocab-OneOff-SklCol") + basename="SklearnTfidfVectorizer11OutVocab-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer22(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(2, 2), norm=None) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, "TfidfVectorizer", - [("input", StringTensorType([1]))], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, - vect, - model_onnx, - basename="SklearnTfidfVectorizer22-OneOff-SklCol") + corpus, vect, model_onnx, basename="SklearnTfidfVectorizer22-OneOff-SklCol" + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer21(self): corpus = numpy.array(["AA AA", "AA AA BB"]).reshape((2, 1)) vect = TfidfVectorizer(ngram_range=(1, 2), norm=None) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, "TfidfVectorizer", - [("input", StringTensorType([1]))], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, - vect, - model_onnx, - basename="SklearnTfidfVectorizer22S-OneOff-SklCol") + corpus, vect, model_onnx, basename="SklearnTfidfVectorizer22S-OneOff-SklCol" + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer12(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 2), norm=None) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, "TfidfVectorizer", - [("input", StringTensorType([1]))], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, - vect, - model_onnx, - basename="SklearnTfidfVectorizer22-OneOff-SklCol") + corpus, vect, model_onnx, basename="SklearnTfidfVectorizer22-OneOff-SklCol" + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer12_normL1(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 2), norm="l1") vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, "TfidfVectorizer", - [("input", StringTensorType([1]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer22L1-OneOff-SklCol") + basename="SklearnTfidfVectorizer22L1-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer12_normL2(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 2), norm="l2") vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, "TfidfVectorizer", - [("input", StringTensorType([1]))], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer22L2-OneOff-SklCol") + basename="SklearnTfidfVectorizer22L2-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer13(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 3), norm=None) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, "TfidfVectorizer", - [("input", StringTensorType([1]))], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, - vect, - model_onnx, - basename="SklearnTfidfVectorizer13-OneOff-SklCol") + corpus, vect, model_onnx, basename="SklearnTfidfVectorizer13-OneOff-SklCol" + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer11parenthesis_class(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the (first) document?", - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the (first) document?", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 1), norm=None) vect.fit(corpus.ravel()) extra = { TfidfVectorizer: { - "separators": [ - " ", "\\.", "\\?", ",", ";", ":", "\\!", "\\(", "\\)" - ] + "separators": [" ", "\\.", "\\?", ",", ";", ":", "\\!", "\\(", "\\)"] } } model_onnx = convert_sklearn( @@ -339,7 +398,7 @@ def test_model_tfidf_vectorizer11parenthesis_class(self): "TfidfVectorizer", [("input", StringTensorType([1]))], options=extra, - target_opset=TARGET_OPSET + target_opset=TARGET_OPSET, ) self.assertTrue(model_onnx is not None) # This test depends on this issue: @@ -348,105 +407,122 @@ def test_model_tfidf_vectorizer11parenthesis_class(self): corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer11ParenthesisClass-OneOff-SklCol") + basename="SklearnTfidfVectorizer11ParenthesisClass-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer11_idparenthesis_id(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the (first) document?", - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the (first) document?", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 1), norm=None) vect.fit(corpus.ravel()) - extra = { - id(vect): {"sep2": [" ", ".", "?", ",", ";", ":", "!", "(", ")"]} - } + extra = {id(vect): {"sep2": [" ", ".", "?", ",", ";", ":", "!", "(", ")"]}} try: convert_sklearn( vect, "TfidfVectorizer", [("input", StringTensorType([None, 1]))], - options=extra, target_opset=TARGET_OPSET) + options=extra, + target_opset=TARGET_OPSET, + ) except (RuntimeError, NameError): pass extra = { id(vect): { - "separators": [ - " ", "[.]", "\\?", ",", ";", ":", "\\!", "\\(", "\\)" - ] + "separators": [" ", "[.]", "\\?", ",", ";", ":", "\\!", "\\(", "\\)"] } } model_onnx = convert_sklearn( vect, "TfidfVectorizer", [("input", StringTensorType([1]))], - options=extra, target_opset=TARGET_OPSET) + options=extra, + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer11ParenthesisId-OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer11ParenthesisId-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer_binary(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(binary=True) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, "TfidfVectorizer", - [("input", StringTensorType([1]))], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( corpus, vect, model_onnx, - basename="SklearnTfidfVectorizerBinary-OneOff-SklCol") + basename="SklearnTfidfVectorizerBinary-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @unittest.skipIf( - pv.Version(ort_version) <= pv.Version("0.3.0"), - reason="Requires opset 9.") + pv.Version(ort_version) <= pv.Version("0.3.0"), reason="Requires opset 9." + ) def test_model_tfidf_vectorizer11_64(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 1), norm=None) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, "TfidfVectorizer", - [("input", StringTensorType())], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType())], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer1164-OneOff-SklCol") + basename="SklearnTfidfVectorizer1164-OneOff-SklCol", + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': corpus.ravel()})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": corpus.ravel()})[0] assert res.shape == (4, 9) - @unittest.skipIf( - apply_less is None, reason="onnxconverter-common too old") + @unittest.skipIf(apply_less is None, reason="onnxconverter-common too old") @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @unittest.skipIf( - pv.Version(ort_version) < pv.Version("1.3.0"), - reason="Requires opset 9.") + pv.Version(ort_version) < pv.Version("1.3.0"), reason="Requires opset 9." + ) def test_tfidf_svm(self): data = [ ["schedule a meeting", 0], @@ -454,7 +530,7 @@ def test_tfidf_svm(self): ["slot in a meeting", 0], ["call ron", 1], ["make a phone call", 1], - ["call in on the phone", 2] + ["call in on the phone", 2], ] docs = [doc for (doc, _) in data] labels = [label for (_, label) in data] @@ -469,63 +545,75 @@ def test_tfidf_svm(self): embeddings = embeddings.astype(numpy.float32).todense() exp = clf.predict(embeddings) - initial_type = [('input', FloatTensorType([None, dim]))] - model_onnx = convert_sklearn(clf, initial_types=initial_type, - target_opset=TARGET_OPSET) + initial_type = [("input", FloatTensorType([None, dim]))] + model_onnx = convert_sklearn( + clf, initial_types=initial_type, target_opset=TARGET_OPSET + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': embeddings})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": embeddings})[0] assert_almost_equal(exp, res) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @unittest.skipIf( - pv.Version(ort_version) <= pv.Version("1.0.0"), - reason="Requires opset 10.") + pv.Version(ort_version) <= pv.Version("1.0.0"), reason="Requires opset 10." + ) def test_model_tfidf_vectorizer_nan(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 1), norm=None) vect.fit(corpus.ravel()) options = copy.deepcopy(self.get_options()) - options[TfidfVectorizer]['nan'] = True - model_onnx = convert_sklearn(vect, "TfidfVectorizer", - [("input", StringTensorType())], - options=options, - target_opset=TARGET_OPSET) + options[TfidfVectorizer]["nan"] = True + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType())], + options=options, + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': corpus.ravel()})[0] + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": corpus.ravel()})[0] assert res.shape == (4, 9) assert numpy.isnan(res[0, 0]) @unittest.skipIf(TARGET_OPSET < 9, reason="not available") def test_model_tfidf_vectorizer11_custom_vocabulary(self): - corpus = numpy.array([ - "This is the first document.", - "This document is the second document.", - "And this is the third one.", - "Is this the first document?", - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vc = ["first", "second", "third", "document", "this"] vect = TfidfVectorizer(ngram_range=(1, 1), norm=None, vocabulary=vc) vect.fit(corpus.ravel()) self.assertFalse(hasattr(vect, "stop_words_")) - model_onnx = convert_sklearn(vect, "TfidfVectorizer", - [("input", StringTensorType())], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType())], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer11CustomVocab-OneOff-SklCol") + basename="SklearnTfidfVectorizer11CustomVocab-OneOff-SklCol", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_tfidf_vectorizer_converter_char.py b/tests/test_sklearn_tfidf_vectorizer_converter_char.py index 6b1149817..f0096b18e 100644 --- a/tests/test_sklearn_tfidf_vectorizer_converter_char.py +++ b/tests/test_sklearn_tfidf_vectorizer_converter_char.py @@ -12,128 +12,176 @@ class TestSklearnTfidfVectorizerRegex(unittest.TestCase): - @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer11_short_word(self): - corpus = numpy.array([ - 'This is the first document.', - 'This document is the second document.', - ]).reshape((2, 1)) - vect = TfidfVectorizer(ngram_range=(1, 1), norm=None, - analyzer='word', token_pattern=".{1,2}") + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + ] + ).reshape((2, 1)) + vect = TfidfVectorizer( + ngram_range=(1, 1), norm=None, analyzer="word", token_pattern=".{1,2}" + ) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer11CharW2-OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer11CharW2-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer22_short_word(self): - corpus = numpy.array([ - 'This is the first document.', - 'This document is the second document.', - ]).reshape((2, 1)) - vect = TfidfVectorizer(ngram_range=(1, 2), norm=None, - analyzer='word', token_pattern=".{1,5}") + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + ] + ).reshape((2, 1)) + vect = TfidfVectorizer( + ngram_range=(1, 2), norm=None, analyzer="word", token_pattern=".{1,5}" + ) vect.fit(corpus.ravel()) try: - convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - target_opset=TARGET_OPSET) + convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + target_opset=TARGET_OPSET, + ) except RuntimeError as e: assert ("Unable to split n-grams 'e fir st do'") in str(e) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer11_char(self): - corpus = numpy.array([ - 'This is the first document.', - 'This document is the second document.', - ]).reshape((2, 1)) - vect = TfidfVectorizer(ngram_range=(1, 1), norm=None, - analyzer='char') + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + ] + ).reshape((2, 1)) + vect = TfidfVectorizer(ngram_range=(1, 1), norm=None, analyzer="char") vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer11Char-OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer11Char-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @unittest.skipIf(True, reason="expected failure") def test_model_tfidf_vectorizer11_char_doublespace(self): - corpus = numpy.array([ - 'This is the first document.', - 'This document is the second document.', - ]).reshape((2, 1)) - vect = TfidfVectorizer(ngram_range=(1, 1), norm=None, - analyzer='char') + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + ] + ).reshape((2, 1)) + vect = TfidfVectorizer(ngram_range=(1, 1), norm=None, analyzer="char") vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer11CharSpace-OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer11CharSpace-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer12_char(self): - corpus = numpy.array([ - 'This is the first document.', - 'This document is the second document.', - ]).reshape((2, 1)) - vect = TfidfVectorizer(ngram_range=(1, 2), norm=None, - analyzer='char') + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + ] + ).reshape((2, 1)) + vect = TfidfVectorizer(ngram_range=(1, 2), norm=None, analyzer="char") vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer12Char-OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer12Char-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer12_normL1_char(self): - corpus = numpy.array([ - 'This is the first document.', - 'This document is the second document.', - 'And this is the third one.', - 'Is this the first document?', - ]).reshape((4, 1)) - vect = TfidfVectorizer(ngram_range=(1, 2), norm='l1', analyzer='char') + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) + vect = TfidfVectorizer(ngram_range=(1, 2), norm="l1", analyzer="char") vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer12L1Char-OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer12L1Char-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer12_short_word_spaces(self): - corpus = numpy.array([ - 'This is the first document.', - 'This document is the second document.', - ]).reshape((2, 1)) - vect = TfidfVectorizer(ngram_range=(1, 2), norm=None, - analyzer='word', token_pattern=".{1,3}") + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + ] + ).reshape((2, 1)) + vect = TfidfVectorizer( + ngram_range=(1, 2), norm=None, analyzer="word", token_pattern=".{1,3}" + ) vect.fit(corpus.ravel()) try: model_onnx = convert_sklearn( - vect, 'TfidfVectorizer', - [('input', StringTensorType([None, 1]))], - target_opset=TARGET_OPSET) + vect, + "TfidfVectorizer", + [("input", StringTensorType([None, 1]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) except RuntimeError as e: if "Unable to split n-grams 't i s t'" not in str(e): @@ -141,21 +189,30 @@ def test_model_tfidf_vectorizer12_short_word_spaces(self): @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer11_short_word_spaces(self): - corpus = numpy.array([ - 'This is the first document.', - 'This document is the second document.', - ]).reshape((2, 1)) - vect = TfidfVectorizer(ngram_range=(1, 1), norm=None, - analyzer='word', token_pattern=".{1,3}") + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + ] + ).reshape((2, 1)) + vect = TfidfVectorizer( + ngram_range=(1, 1), norm=None, analyzer="word", token_pattern=".{1,3}" + ) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer11CharW2-OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer11CharW2-OneOff-SklCol", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py b/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py index e4504cd20..9ed7306ae 100644 --- a/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py +++ b/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py @@ -14,36 +14,46 @@ class TestSklearnTfidfVectorizerDataSet(unittest.TestCase): - @unittest.skipIf(TARGET_OPSET < 9, reason="not available") def test_tfidf_20newsgroups(self): data = fetch_20newsgroups() X, y = np.array(data.data)[:100], np.array(data.target)[:100] X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.5, random_state=42) + X, y, test_size=0.5, random_state=42 + ) model = TfidfVectorizer().fit(X_train) onnx_model = convert_sklearn( - model, 'cv', [('input', StringTensorType(X_test.shape))], - target_opset=TARGET_OPSET) + model, + "cv", + [("input", StringTensorType(X_test.shape))], + target_opset=TARGET_OPSET, + ) dump_data_and_model( - X_test, model, onnx_model, - basename="SklearnTfidfVectorizer20newsgroups") + X_test, model, onnx_model, basename="SklearnTfidfVectorizer20newsgroups" + ) @unittest.skipIf(TARGET_OPSET < 9, reason="not available") def test_tfidf_20newsgroups_nolowercase(self): data = fetch_20newsgroups() X, y = np.array(data.data)[:100], np.array(data.target)[:100] X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.5, random_state=42) + X, y, test_size=0.5, random_state=42 + ) model = TfidfVectorizer(lowercase=False).fit(X_train) onnx_model = convert_sklearn( - model, 'cv', [('input', StringTensorType(X_test.shape))], - target_opset=TARGET_OPSET) + model, + "cv", + [("input", StringTensorType(X_test.shape))], + target_opset=TARGET_OPSET, + ) dump_data_and_model( - X_test, model, onnx_model, - basename="SklearnTfidfVectorizer20newsgroupsNOLower") + X_test, + model, + onnx_model, + basename="SklearnTfidfVectorizer20newsgroupsNOLower", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_tfidf_vectorizer_converter_pipeline.py b/tests/test_sklearn_tfidf_vectorizer_converter_pipeline.py index 634bc23c7..5872d0f09 100644 --- a/tests/test_sklearn_tfidf_vectorizer_converter_pipeline.py +++ b/tests/test_sklearn_tfidf_vectorizer_converter_pipeline.py @@ -18,77 +18,123 @@ class TestSklearnTfidfVectorizerPipeline(unittest.TestCase): - - def common_test_model_tfidf_vectorizer_pipeline_cls( - self, kind=None, verbose=False): - if kind == 'stop': - if pv.Version(ort_version) >= pv.Version('1.4.0'): + def common_test_model_tfidf_vectorizer_pipeline_cls(self, kind=None, verbose=False): + if kind == "stop": + if pv.Version(ort_version) >= pv.Version("1.4.0"): # regression with stopwords in onnxruntime 1.4+ - stopwords = ['theh'] + stopwords = ["theh"] else: - stopwords = ['the', 'and', 'is'] + stopwords = ["the", "and", "is"] else: stopwords = None - X_train = numpy.array([ - "This is the first document", - "This document is the second document.", - "And this is the third one", - "Is this the first document?", - ]).reshape((4, 1)) + X_train = numpy.array( + [ + "This is the first document", + "This document is the second document.", + "And this is the third one", + "Is this the first document?", + ] + ).reshape((4, 1)) y_train = numpy.array([0, 1, 0, 1]) if kind is None: - model_pipeline = Pipeline([ - ('vectorizer', TfidfVectorizer( - stop_words=stopwords, lowercase=True, use_idf=True, - ngram_range=(1, 3), max_features=30000)), - ]) - elif kind == 'cls': - model_pipeline = Pipeline([ - ('vectorizer', TfidfVectorizer( - stop_words=stopwords, lowercase=True, use_idf=True, - ngram_range=(1, 3), max_features=30000)), - ('feature_selector', SelectKBest(k=10)), - ('classifier', SVC( - class_weight='balanced', kernel='rbf', gamma='scale', - probability=True)) - ]) - elif kind == 'stop': - model_pipeline = Pipeline([ - ('vectorizer', CountVectorizer( - stop_words=stopwords, lowercase=True, - ngram_range=(1, 2), max_features=30000)), - ]) - elif kind == 'reg': - model_pipeline = Pipeline([ - ('vectorizer', TfidfVectorizer( - stop_words=stopwords, lowercase=True, use_idf=True, - ngram_range=(1, 3), max_features=30000)), - ('feature_selector', SelectKBest(k=10)), - ('classifier', SVR(kernel='rbf', gamma='scale')) - ]) + model_pipeline = Pipeline( + [ + ( + "vectorizer", + TfidfVectorizer( + stop_words=stopwords, + lowercase=True, + use_idf=True, + ngram_range=(1, 3), + max_features=30000, + ), + ), + ] + ) + elif kind == "cls": + model_pipeline = Pipeline( + [ + ( + "vectorizer", + TfidfVectorizer( + stop_words=stopwords, + lowercase=True, + use_idf=True, + ngram_range=(1, 3), + max_features=30000, + ), + ), + ("feature_selector", SelectKBest(k=10)), + ( + "classifier", + SVC( + class_weight="balanced", + kernel="rbf", + gamma="scale", + probability=True, + ), + ), + ] + ) + elif kind == "stop": + model_pipeline = Pipeline( + [ + ( + "vectorizer", + CountVectorizer( + stop_words=stopwords, + lowercase=True, + ngram_range=(1, 2), + max_features=30000, + ), + ), + ] + ) + elif kind == "reg": + model_pipeline = Pipeline( + [ + ( + "vectorizer", + TfidfVectorizer( + stop_words=stopwords, + lowercase=True, + use_idf=True, + ngram_range=(1, 3), + max_features=30000, + ), + ), + ("feature_selector", SelectKBest(k=10)), + ("classifier", SVR(kernel="rbf", gamma="scale")), + ] + ) else: raise AssertionError(kind) model_pipeline.fit(X_train.ravel(), y_train) - initial_type = [('input', StringTensorType([None, 1]))] + initial_type = [("input", StringTensorType([None, 1]))] model_onnx = convert_sklearn( - model_pipeline, "cv", initial_types=initial_type, - options={SVC: {'zipmap': False}}, - target_opset=TARGET_OPSET) + model_pipeline, + "cv", + initial_types=initial_type, + options={SVC: {"zipmap": False}}, + target_opset=TARGET_OPSET, + ) - if kind in (None, 'stop'): + if kind in (None, "stop"): exp = [model_pipeline.transform(X_train.ravel()).toarray()] - elif kind == 'cls': - exp = [model_pipeline.predict(X_train.ravel()), - model_pipeline.predict_proba(X_train.ravel())] - elif kind == 'reg': + elif kind == "cls": + exp = [ + model_pipeline.predict(X_train.ravel()), + model_pipeline.predict_proba(X_train.ravel()), + ] + elif kind == "reg": exp = [model_pipeline.predict(X_train.ravel()).reshape((-1, 1))] sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'input': X_train}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"input": X_train}) if verbose: voc = model_pipeline.steps[0][-1].vocabulary_ voc = list(sorted([(v, k) for k, v in voc.items()])) @@ -102,23 +148,23 @@ def common_test_model_tfidf_vectorizer_pipeline_cls( assert_almost_equal(a, b) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") - @unittest.skipIf( - pv.Version(ort_version) < pv.Version("1.0.0"), - reason="Too old") + @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.0.0"), reason="Too old") def test_model_tfidf_vectorizer_pipeline(self): - for kind in [None, 'cls', 'reg']: + for kind in [None, "cls", "reg"]: with self.subTest(kind=kind): self.common_test_model_tfidf_vectorizer_pipeline_cls(kind) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @unittest.skipIf( pv.Version(ort_version) < pv.Version("1.4.0"), - reason="Wrong handling of stopwods and n-grams") + reason="Wrong handling of stopwods and n-grams", + ) def test_model_tfidf_vectorizer_pipeline_stop_words(self): - for kind in ['stop']: + for kind in ["stop"]: with self.subTest(kind=kind): self.common_test_model_tfidf_vectorizer_pipeline_cls( - kind, verbose=False) + kind, verbose=False + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_tfidf_vectorizer_converter_regex.py b/tests/test_sklearn_tfidf_vectorizer_converter_regex.py index 4e383d492..df94705f4 100644 --- a/tests/test_sklearn_tfidf_vectorizer_converter_regex.py +++ b/tests/test_sklearn_tfidf_vectorizer_converter_regex.py @@ -12,45 +12,57 @@ class TestSklearnTfidfVectorizerRegex(unittest.TestCase): - def get_options(self): return {TfidfVectorizer: {"tokenexp": ""}} @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer11(self): - corpus = numpy.array([ - 'This is the first document.', - 'This document is the second document.', - 'And this is the third one.', - 'Is this the first document?', - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 1), norm=None) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer11Regex-OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer11Regex-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer11_opset(self): - corpus = numpy.array([ - 'This is the first document.', - 'This document is the second document.', - 'And this is the third one.', - 'Is this the first document?', - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 1), norm=None) vect.fit(corpus.ravel()) for opset in range(8, TARGET_OPSET + 1): try: model_onnx = convert_sklearn( - vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - options=self.get_options(), target_opset=opset) + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=self.get_options(), + target_opset=opset, + ) except RuntimeError as e: if "only works for opset" in str(e): continue @@ -62,284 +74,390 @@ def test_model_tfidf_vectorizer11_opset(self): @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer11_word4(self): - corpus = numpy.array([ - 'This is the first document.', - 'This document is the second document.', - 'And this is the third one.', - 'Is this the first document?', - ]).reshape((4, 1)) - vect = TfidfVectorizer(ngram_range=( - 1, 1), norm=None, token_pattern="[a-zA-Z]{1,4}") + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) + vect = TfidfVectorizer( + ngram_range=(1, 1), norm=None, token_pattern="[a-zA-Z]{1,4}" + ) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer11Regex4-OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer11Regex4-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer11_empty_string(self): - corpus = numpy.array([ - 'This is the first document.', - 'This document is the second document.', - 'And this is the third one.', - '', - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 1), norm=None) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) # TfidfVectorizer in onnxruntime fails with empty strings dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer11EmptyStringRegex-OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer11EmptyStringRegex-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer11_out_vocabulary(self): - corpus = numpy.array([ - 'This is the first document.', - 'This document is the second document.', - 'And this is the third one.', - 'Is this the first document?', - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 1), norm=None) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - corpus = numpy.array([ - 'AZZ ZZ This is the first document.', - 'BZZ ZZ This document is the second document.', - 'ZZZ ZZ And this is the third one.', - 'WZZ ZZ Is this the first document?', - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "AZZ ZZ This is the first document.", + "BZZ ZZ This document is the second document.", + "ZZZ ZZ And this is the third one.", + "WZZ ZZ Is this the first document?", + ] + ).reshape((4, 1)) dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer11OutVocabRegex-OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer11OutVocabRegex-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer22(self): - corpus = numpy.array([ - 'This is the first document.', - 'This document is the second document.', - 'And this is the third one.', - 'Is this the first document?', - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(2, 2), norm=None) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer22Regex-OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer22Regex-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer12(self): - corpus = numpy.array([ - 'AA AA', - 'AA AA BB', - ]).reshape((2, 1)) + corpus = numpy.array( + [ + "AA AA", + "AA AA BB", + ] + ).reshape((2, 1)) vect = TfidfVectorizer(ngram_range=(1, 2), norm=None) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer12SRegex-OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer12SRegex-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer122(self): - corpus = numpy.array([ - 'This is the first document.', - 'This document is the second document.', - 'And this is the third one.', - 'Is this the first document?', - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 2), norm=None) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer12Regex-OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer12Regex-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer12_normL1(self): - corpus = numpy.array([ - 'This is the first document.', - 'This document is the second document.', - 'And this is the third one.', - 'Is this the first document?', - ]).reshape((4, 1)) - vect = TfidfVectorizer(ngram_range=(1, 2), norm='l1') + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) + vect = TfidfVectorizer(ngram_range=(1, 2), norm="l1") vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer12L1Regex-OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer12L1Regex-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer12_normL2(self): - corpus = numpy.array([ - 'This is the first document.', - 'This document is the second document.', - 'And this is the third one.', - 'Is this the first document?', - ]).reshape((4, 1)) - vect = TfidfVectorizer(ngram_range=(1, 2), norm='l2') + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) + vect = TfidfVectorizer(ngram_range=(1, 2), norm="l2") vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer12L2Regex-OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer12L2Regex-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer13(self): - corpus = numpy.array([ - 'This is the first document.', - 'This document is the second document.', - 'And this is the third one.', - 'Is this the first document?', - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 3), norm=None) vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer13Regex-OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer13Regex-OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer11parenthesis_class(self): - corpus = numpy.array([ - 'This is the first document.', - 'This document is the second document.', - 'And this is the third one.', - 'Is this the (first) document?', - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the (first) document?", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 1), norm=None) vect.fit(corpus.ravel()) - extra = {TfidfVectorizer: {'separators': [ - ' ', '[.]', '\\?', ',', ';', - ':', '\\!', '\\(', '\\)' - ], - 'tokenexp': None}} - model_onnx = convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - options=extra, - target_opset=TARGET_OPSET) + extra = { + TfidfVectorizer: { + "separators": [" ", "[.]", "\\?", ",", ";", ":", "\\!", "\\(", "\\)"], + "tokenexp": None, + } + } + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=extra, + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) # This test depends on this issue: # https://github.com/Microsoft/onnxruntime/issues/957. dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer11ParenthesisClassRegex-" - "OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer11ParenthesisClassRegex-" "OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer11_idparenthesis_id(self): - corpus = numpy.array([ - 'This is the first document.', - 'This document is the second document.', - 'And this is the third one.', - 'Is this the (first) document?', - ]).reshape((4, 1)) + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the (first) document?", + ] + ).reshape((4, 1)) vect = TfidfVectorizer(ngram_range=(1, 1), norm=None) vect.fit(corpus.ravel()) - extra = {id(vect): {"sep2": [' ', '.', '?', ',', ';', ':', - '!', '(', ')'], - 'regex': None}} + extra = { + id(vect): { + "sep2": [" ", ".", "?", ",", ";", ":", "!", "(", ")"], + "regex": None, + } + } try: - convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - options=extra, - target_opset=TARGET_OPSET) + convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=extra, + target_opset=TARGET_OPSET, + ) except (RuntimeError, NameError): pass - extra = {id(vect): {"separators": [ - ' ', '[.]', '\\?', ',', ';', ':', - '\\!', '\\(', '\\)' - ], - "tokenexp": None}} - model_onnx = convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - options=extra, - target_opset=TARGET_OPSET) + extra = { + id(vect): { + "separators": [" ", "[.]", "\\?", ",", ";", ":", "\\!", "\\(", "\\)"], + "tokenexp": None, + } + } + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=extra, + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) # This test depends on this issue: # https://github.com/Microsoft/onnxruntime/issues/957. dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizer11ParenthesisIdRegex-" - "OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer11ParenthesisIdRegex-" "OneOff-SklCol", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer_issue(self): - corpus = numpy.array([ - 'the-first document.', - 'this-is the-third-one.', - 'this-the first-document?', - ]).reshape((3, 1)) - vect = TfidfVectorizer( - ngram_range=(1, 2), - token_pattern=r"\b[a-z ]+\b") + corpus = numpy.array( + [ + "the-first document.", + "this-is the-third-one.", + "this-the first-document?", + ] + ).reshape((3, 1)) + vect = TfidfVectorizer(ngram_range=(1, 2), token_pattern=r"\b[a-z ]+\b") vect.fit(corpus.ravel()) with self.assertRaises(RuntimeError) as e: - convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - options=self.get_options(), - target_opset=TARGET_OPSET) + convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertIn("More one decomposition in tokens", str(e)) self.assertIn( - "Unable to split n-grams 'the first document' into tokens.", - str(e)) + "Unable to split n-grams 'the first document' into tokens.", str(e) + ) - corpus = numpy.array([ - 'first document.', - 'this-is the-third-one.', - 'the first document', - ]).reshape((3, 1)) - vect = TfidfVectorizer( - ngram_range=(1, 2), - token_pattern=r"\b[a-z ]+\b") + corpus = numpy.array( + [ + "first document.", + "this-is the-third-one.", + "the first document", + ] + ).reshape((3, 1)) + vect = TfidfVectorizer(ngram_range=(1, 2), token_pattern=r"\b[a-z ]+\b") vect.fit(corpus.ravel()) - model_onnx = convert_sklearn(vect, 'TfidfVectorizer', - [('input', StringTensorType([1]))], - options=self.get_options(), - target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType([1]))], + options=self.get_options(), + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, - basename="SklearnTfidfVectorizerIssue-OneOff-SklCol") + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizerIssue-OneOff-SklCol", + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_truncated_svd.py b/tests/test_sklearn_truncated_svd.py index 46baeba9e..b6f46afbf 100644 --- a/tests/test_sklearn_truncated_svd.py +++ b/tests/test_sklearn_truncated_svd.py @@ -22,39 +22,37 @@ def test_truncated_svd(self): svd = TruncatedSVD(n_components=K) svd.fit(x) - model_onnx = convert_sklearn(svd, - initial_types=[ - ("input", - FloatTensorType(shape=[None, C])) - ], target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + svd, + initial_types=[("input", FloatTensorType(shape=[None, C]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) dump_data_and_model(x, svd, model_onnx, basename="SklearnTruncatedSVD") def test_truncated_svd_arpack(self): X = create_tensor(10, 10) - svd = TruncatedSVD(n_components=5, algorithm='arpack', n_iter=10, - tol=0.1, random_state=42).fit(X) - model_onnx = convert_sklearn(svd, - initial_types=[ - ("input", - FloatTensorType(shape=X.shape)) - ], target_opset=TARGET_OPSET) + svd = TruncatedSVD( + n_components=5, algorithm="arpack", n_iter=10, tol=0.1, random_state=42 + ).fit(X) + model_onnx = convert_sklearn( + svd, + initial_types=[("input", FloatTensorType(shape=X.shape))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - dump_data_and_model(X, svd, model_onnx, - basename="SklearnTruncatedSVDArpack") + dump_data_and_model(X, svd, model_onnx, basename="SklearnTruncatedSVDArpack") def test_truncated_svd_int(self): X = create_tensor(5, 5).astype(np.int64) svd = TruncatedSVD(n_iter=20, random_state=42).fit(X) - model_onnx = convert_sklearn(svd, - initial_types=[ - ("input", - Int64TensorType([None, X.shape[1]])) - ], target_opset=TARGET_OPSET) + model_onnx = convert_sklearn( + svd, + initial_types=[("input", Int64TensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET, + ) self.assertTrue(model_onnx is not None) - dump_data_and_model( - X, svd, model_onnx, - basename="SklearnTruncatedSVDInt") + dump_data_and_model(X, svd, model_onnx, basename="SklearnTruncatedSVDInt") if __name__ == "__main__": diff --git a/tests/test_sklearn_voting_classifier_converter.py b/tests/test_sklearn_voting_classifier_converter.py index 0faf8c141..cc571f782 100644 --- a/tests/test_sklearn_voting_classifier_converter.py +++ b/tests/test_sklearn_voting_classifier_converter.py @@ -14,7 +14,7 @@ dump_multiple_classification, dump_binary_classification, dump_data_and_model, - TARGET_OPSET + TARGET_OPSET, ) @@ -43,28 +43,27 @@ def custom_tranform_converter(scope, operator, container): weights = [0.5, 0.1, 10] shape = [len(weights), 1] container.add_initializer(weights_name, atype, shape, weights) - apply_mul(scope, [input.full_name, weights_name], output.full_name, - container) + apply_mul(scope, [input.full_name, weights_name], output.full_name, container) class TestVotingClassifierConverter(unittest.TestCase): def test_operator_mul(self): - model = CustomTransform() Xd = numpy.array([[1, 2], [3, 4], [4, 5]]) model_onnx = convert_sklearn( - model, "CustomTransform", + model, + "CustomTransform", [("input", FloatTensorType([None, Xd.shape[1]]))], custom_shape_calculators={ CustomTransform: custom_transform_shape_calculator }, - custom_conversion_functions={ - CustomTransform: custom_tranform_converter - }, target_opset=TARGET_OPSET) + custom_conversion_functions={CustomTransform: custom_tranform_converter}, + target_opset=TARGET_OPSET, + ) dump_data_and_model( - Xd.astype(numpy.float32), model, model_onnx, - basename="CustomTransformerMul") + Xd.astype(numpy.float32), model, model_onnx, basename="CustomTransformerMul" + ) @unittest.skipIf(TARGET_OPSET < 9, reason="not available") def test_voting_hard_binary(self): @@ -78,8 +77,8 @@ def test_voting_hard_binary(self): ) # predict_proba is not defined when voting is hard. dump_binary_classification( - model, suffix="Hard", comparable_outputs=[0], - target_opset=TARGET_OPSET) + model, suffix="Hard", comparable_outputs=[0], target_opset=TARGET_OPSET + ) @unittest.skipIf(TARGET_OPSET < 9, reason="not available") def test_voting_hard_binary_weights(self): @@ -94,8 +93,11 @@ def test_voting_hard_binary_weights(self): ) # predict_proba is not defined when voting is hard. dump_binary_classification( - model, suffix="WeightsHard", comparable_outputs=[0], - target_opset=TARGET_OPSET) + model, + suffix="WeightsHard", + comparable_outputs=[0], + target_opset=TARGET_OPSET, + ) def test_voting_soft_binary(self): model = VotingClassifier( @@ -107,8 +109,8 @@ def test_voting_soft_binary(self): ], ) dump_binary_classification( - model, suffix="Soft", comparable_outputs=[0, 1], - target_opset=TARGET_OPSET) + model, suffix="Soft", comparable_outputs=[0, 1], target_opset=TARGET_OPSET + ) def test_voting_soft_binary_weighted(self): model = VotingClassifier( @@ -121,8 +123,8 @@ def test_voting_soft_binary_weighted(self): ], ) dump_binary_classification( - model, suffix="WeightedSoft", - target_opset=TARGET_OPSET) + model, suffix="WeightedSoft", target_opset=TARGET_OPSET + ) @unittest.skipIf(TARGET_OPSET < 9, reason="not available") def test_voting_hard_multi(self): @@ -136,8 +138,8 @@ def test_voting_hard_multi(self): ], ) dump_multiple_classification( - model, suffix="Hard", comparable_outputs=[0], - target_opset=TARGET_OPSET) + model, suffix="Hard", comparable_outputs=[0], target_opset=TARGET_OPSET + ) @unittest.skipIf(TARGET_OPSET < 9, reason="not available") def test_voting_hard_multi_weighted(self): @@ -152,8 +154,11 @@ def test_voting_hard_multi_weighted(self): ], ) dump_multiple_classification( - model, suffix="WeightedHard", comparable_outputs=[0], - target_opset=TARGET_OPSET) + model, + suffix="WeightedHard", + comparable_outputs=[0], + target_opset=TARGET_OPSET, + ) def test_voting_soft_multi(self): model = VotingClassifier( @@ -164,8 +169,7 @@ def test_voting_soft_multi(self): ("lr2", LogisticRegression()), ], ) - dump_multiple_classification( - model, suffix="Soft", target_opset=TARGET_OPSET) + dump_multiple_classification(model, suffix="Soft", target_opset=TARGET_OPSET) def test_voting_soft_multi_string(self): model = VotingClassifier( @@ -177,8 +181,8 @@ def test_voting_soft_multi_string(self): ], ) dump_multiple_classification( - model, label_string=True, suffix="Soft", - target_opset=TARGET_OPSET) + model, label_string=True, suffix="Soft", target_opset=TARGET_OPSET + ) def test_voting_soft_multi_weighted(self): model = VotingClassifier( @@ -191,8 +195,8 @@ def test_voting_soft_multi_weighted(self): ], ) dump_multiple_classification( - model, suffix="WeightedSoft", - target_opset=TARGET_OPSET) + model, suffix="WeightedSoft", target_opset=TARGET_OPSET + ) def test_voting_soft_multi_weighted4(self): model = VotingClassifier( @@ -207,8 +211,8 @@ def test_voting_soft_multi_weighted4(self): ], ) dump_multiple_classification( - model, suffix="Weighted4Soft", - target_opset=TARGET_OPSET) + model, suffix="Weighted4Soft", target_opset=TARGET_OPSET + ) def test_voting_soft_multi_weighted42(self): model = VotingClassifier( @@ -223,8 +227,8 @@ def test_voting_soft_multi_weighted42(self): ], ) dump_multiple_classification( - model, suffix="Weighted42Soft", - target_opset=TARGET_OPSET) + model, suffix="Weighted42Soft", target_opset=TARGET_OPSET + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_voting_regressor_converter.py b/tests/test_sklearn_voting_regressor_converter.py index e4f6639ac..005a1d336 100644 --- a/tests/test_sklearn_voting_regressor_converter.py +++ b/tests/test_sklearn_voting_regressor_converter.py @@ -5,6 +5,7 @@ import unittest import numpy from sklearn.linear_model import LinearRegression + try: from sklearn.ensemble import VotingRegressor except ImportError: @@ -17,58 +18,72 @@ FloatTensorType, Int64TensorType, ) -from test_utils import ( - dump_data_and_model, fit_regression_model, TARGET_OPSET) +from test_utils import dump_data_and_model, fit_regression_model, TARGET_OPSET def model_to_test(): - return VotingRegressor([ - ('lr', LinearRegression()), - ('dt', DecisionTreeRegressor()), - ]) + return VotingRegressor( + [ + ("lr", LinearRegression()), + ("dt", DecisionTreeRegressor()), + ] + ) class TestVotingRegressorConverter(unittest.TestCase): - @unittest.skipIf(VotingRegressor is None, reason="new in 0.21") def test_model_voting_regression(self): model, X = fit_regression_model(model_to_test()) model_onnx = convert_sklearn( - model, "voting regression", + model, + "voting regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32), - model, model_onnx, + model, + model_onnx, basename="SklearnVotingRegressor-Dec4", - comparable_outputs=[0]) + comparable_outputs=[0], + ) @unittest.skipIf(VotingRegressor is None, reason="new in 0.21") def test_model_voting_regression_int(self): model, X = fit_regression_model(model_to_test(), is_int=True) model_onnx = convert_sklearn( - model, "voting regression", + model, + "voting regression", [("input", Int64TensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, + X, + model, + model_onnx, basename="SklearnVotingRegressorInt-Dec4", - comparable_outputs=[0]) + comparable_outputs=[0], + ) @unittest.skipIf(VotingRegressor is None, reason="new in 0.21") def test_model_voting_regression_bool(self): model, X = fit_regression_model(model_to_test(), is_bool=True) model_onnx = convert_sklearn( - model, "voting regression", + model, + "voting regression", [("input", BooleanTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, + X, + model, + model_onnx, basename="SklearnVotingRegressorBool", - comparable_outputs=[0]) + comparable_outputs=[0], + ) if __name__ == "__main__": diff --git a/tests/test_sklearn_woe_transformer.py b/tests/test_sklearn_woe_transformer.py index 398d89f47..e54a9c5f6 100644 --- a/tests/test_sklearn_woe_transformer.py +++ b/tests/test_sklearn_woe_transformer.py @@ -6,6 +6,7 @@ import unittest import numpy from numpy.testing import assert_almost_equal + try: from onnxruntime.capi.onnxruntime_pybind11_state import InvalidArgument except ImportError: @@ -22,116 +23,139 @@ class TestSklearnWOETransformerConverter(unittest.TestCase): - - @unittest.skipIf(TARGET_OPSET < 12, reason='OneHotEncoder') + @unittest.skipIf(TARGET_OPSET < 12, reason="OneHotEncoder") def test_woe_transformer(self): x = numpy.array( - [[0.5, 0.7, 0.9], [0.51, 0.71, 0.91], [0.7, 0.5, 0.92]], - dtype=numpy.float32) - woe = WOETransformer(intervals=[ - [(0.5, 0.7, False, False), - (0.5, 0.7, True, False), - (0.5, 0.7, False, True), - (0.5, 0.7, True, True)], - [(0.9, numpy.inf), - (-numpy.inf, 0.9)]]) + [[0.5, 0.7, 0.9], [0.51, 0.71, 0.91], [0.7, 0.5, 0.92]], dtype=numpy.float32 + ) + woe = WOETransformer( + intervals=[ + [ + (0.5, 0.7, False, False), + (0.5, 0.7, True, False), + (0.5, 0.7, False, True), + (0.5, 0.7, True, True), + ], + [(0.9, numpy.inf), (-numpy.inf, 0.9)], + ] + ) woe.fit(x) self.assertEqual(woe.indices_, [(0, 4), (4, 6), (6, 7)]) self.assertEqual(woe.n_dims_, 7) - self.assertEqual(woe.intervals_, [ - [(0.5, 0.7, False, False), - (0.5, 0.7, True, False), - (0.5, 0.7, False, True), - (0.5, 0.7, True, True)], - [(0.9, numpy.inf, False, True), - (-numpy.inf, 0.9, False, True)], - None]) - self.assertEqual(woe.weights_, [ - [1, 1, 1, 1], [1, 1], None]) + self.assertEqual( + woe.intervals_, + [ + [ + (0.5, 0.7, False, False), + (0.5, 0.7, True, False), + (0.5, 0.7, False, True), + (0.5, 0.7, True, True), + ], + [(0.9, numpy.inf, False, True), (-numpy.inf, 0.9, False, True)], + None, + ], + ) + self.assertEqual(woe.weights_, [[1, 1, 1, 1], [1, 1], None]) names = woe.get_feature_names() self.assertEqual( names, - [']0.5,0.7[', '[0.5,0.7[', ']0.5,0.7]', '[0.5,0.7]', - ']0.9,inf]', ']-inf,0.9]', 'X2']) + [ + "]0.5,0.7[", + "[0.5,0.7[", + "]0.5,0.7]", + "[0.5,0.7]", + "]0.9,inf]", + "]-inf,0.9]", + "X2", + ], + ) x2 = woe.transform(x) expected = numpy.array( - [[0, 1, 0, 1, 0, 1, 0.9], - [1, 1, 1, 1, 0, 1, 0.91], - [0, 0, 1, 1, 0, 1, 0.92]], - dtype=numpy.float32) + [ + [0, 1, 0, 1, 0, 1, 0.9], + [1, 1, 1, 1, 0, 1, 0.91], + [0, 0, 1, 1, 0, 1, 0.92], + ], + dtype=numpy.float32, + ) assert_almost_equal(expected, x2) - @unittest.skipIf(TARGET_OPSET < 12, reason='OneHotEncoder') + @unittest.skipIf(TARGET_OPSET < 12, reason="OneHotEncoder") def test_woe_transformer_conv_ext(self): x = numpy.array( - [[0.4, 1.4, 2.4, 3.4], - [0.5, 1.5, 2.5, 3.5], - [0.6, 1.6, 2.6, 3.6], - [0.7, 1.7, 2.7, 3.7]], - dtype=numpy.float32) - woe = WOETransformer(intervals=[ - [(0.4, 0.6, False, False)], - [(1.4, 1.6, False, True)], - [(2.4, 2.6, True, False)], - [(3.4, 3.6, True, True)]]) + [ + [0.4, 1.4, 2.4, 3.4], + [0.5, 1.5, 2.5, 3.5], + [0.6, 1.6, 2.6, 3.6], + [0.7, 1.7, 2.7, 3.7], + ], + dtype=numpy.float32, + ) + woe = WOETransformer( + intervals=[ + [(0.4, 0.6, False, False)], + [(1.4, 1.6, False, True)], + [(2.4, 2.6, True, False)], + [(3.4, 3.6, True, True)], + ] + ) woe.fit(x) expected = woe.transform(x) onnx_model = to_onnx(woe, x, target_opset=TARGET_OPSET) sess = InferenceSession( - onnx_model.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': x})[0] + onnx_model.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": x})[0] assert_almost_equal(expected, got) - @unittest.skipIf(TARGET_OPSET < 12, reason='OneHotEncoder') + @unittest.skipIf(TARGET_OPSET < 12, reason="OneHotEncoder") def test_woe_transformer_conv_ext2(self): - for inca, incb in [(False, False), (True, True), - (False, True), (True, False)]: + for inca, incb in [(False, False), (True, True), (False, True), (True, False)]: with self.subTest(inca=inca, incb=incb): x = numpy.array([[0.45], [0.5], [0.55]], dtype=numpy.float32) - woe = WOETransformer(intervals=[ - [(0.4, 0.5, False, inca), (0.5, 0.6, incb, False)]]) + woe = WOETransformer( + intervals=[[(0.4, 0.5, False, inca), (0.5, 0.6, incb, False)]] + ) woe.fit(x) expected = woe.transform(x) - onnx_model = to_onnx( - woe, x, target_opset=TARGET_OPSET, verbose=0) + onnx_model = to_onnx(woe, x, target_opset=TARGET_OPSET, verbose=0) sess = InferenceSession( - onnx_model.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': x})[0] + onnx_model.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": x})[0] assert_almost_equal(expected, got) - @unittest.skipIf(TARGET_OPSET < 12, reason='OneHotEncoder') + @unittest.skipIf(TARGET_OPSET < 12, reason="OneHotEncoder") def test_woe_transformer_conv_ext3(self): x = numpy.array( - [[0.4, 1.4, 2.4, 3.4], - [0.5, 1.5, 2.5, 3.5], - [0.6, 1.6, 2.6, 3.6]], - dtype=numpy.float32) - woe = WOETransformer(intervals=[ - [(0.4, 0.5, False, False), (0.5, 0.6, False, False)], - [(1.4, 1.5, False, True), (1.5, 1.6, False, True)], - [(2.4, 2.5, True, False), (2.5, 2.6, True, False)], - [(3.4, 3.5, True, True), (3.5, 3.6, True, True)]]) + [[0.4, 1.4, 2.4, 3.4], [0.5, 1.5, 2.5, 3.5], [0.6, 1.6, 2.6, 3.6]], + dtype=numpy.float32, + ) + woe = WOETransformer( + intervals=[ + [(0.4, 0.5, False, False), (0.5, 0.6, False, False)], + [(1.4, 1.5, False, True), (1.5, 1.6, False, True)], + [(2.4, 2.5, True, False), (2.5, 2.6, True, False)], + [(3.4, 3.5, True, True), (3.5, 3.6, True, True)], + ] + ) woe.fit(x) expected = woe.transform(x) onnx_model = to_onnx(woe, x, target_opset=TARGET_OPSET) sess = InferenceSession( - onnx_model.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': x})[0] + onnx_model.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": x})[0] assert_almost_equal(expected, got) - @unittest.skipIf(TARGET_OPSET < 12, reason='OneHotEncoder') + @unittest.skipIf(TARGET_OPSET < 12, reason="OneHotEncoder") def test_woe_transformer_conv(self): x = numpy.array( - [[0.2, 0.7, 0.9], - [0.51, 0.71, 0.91], - [0.7, 1.5, 0.92]], - dtype=numpy.float32) - woe = WOETransformer(intervals=[ - [(0.4, 0.6, False, True)], - [(0.9, numpy.inf), (-numpy.inf, 0.9)]]) + [[0.2, 0.7, 0.9], [0.51, 0.71, 0.91], [0.7, 1.5, 0.92]], dtype=numpy.float32 + ) + woe = WOETransformer( + intervals=[[(0.4, 0.6, False, True)], [(0.9, numpy.inf), (-numpy.inf, 0.9)]] + ) woe.fit(x) expected = woe.transform(x) @@ -141,131 +165,138 @@ def test_woe_transformer_conv(self): f.write(onnx_model.SerializeToString()) sess = InferenceSession( - onnx_model.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': x})[0] + onnx_model.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": x})[0] assert_almost_equal(expected, got) - @unittest.skipIf(TARGET_OPSET < 12, reason='OneHotEncoder') + @unittest.skipIf(TARGET_OPSET < 12, reason="OneHotEncoder") def test_woe_transformer_conv_weights(self): x = numpy.array( - [[0.2, 0.7, 0.9], - [0.51, 0.71, 0.91], - [0.7, 1.5, 0.92]], - dtype=numpy.float32) + [[0.2, 0.7, 0.9], [0.51, 0.71, 0.91], [0.7, 1.5, 0.92]], dtype=numpy.float32 + ) woe = WOETransformer( - intervals=[[(0.4, 0.6, False, True)], - [(0.9, numpy.inf), (-numpy.inf, 0.9)]], - weights=[[2.7], [3.5, 6.7]]) + intervals=[ + [(0.4, 0.6, False, True)], + [(0.9, numpy.inf), (-numpy.inf, 0.9)], + ], + weights=[[2.7], [3.5, 6.7]], + ) woe.fit(x) expected = woe.transform(x) onnx_model = to_onnx(woe, x, target_opset=TARGET_OPSET) sess = InferenceSession( - onnx_model.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': x})[0] + onnx_model.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": x})[0] assert_almost_equal(expected, got) - @unittest.skipIf(InvalidArgument is None, - reason='onnxruntime is too old') - @unittest.skipIf(TARGET_OPSET < 12, reason='OneHotEncoder') + @unittest.skipIf(InvalidArgument is None, reason="onnxruntime is too old") + @unittest.skipIf(TARGET_OPSET < 12, reason="OneHotEncoder") def test_woe_transformer_conv_weights_onnx(self): x = numpy.array( - [[0.2, 0.7, 0.9], - [0.51, 0.71, 0.91], - [0.7, 1.5, 0.92]], - dtype=numpy.float32) + [[0.2, 0.7, 0.9], [0.51, 0.71, 0.91], [0.7, 1.5, 0.92]], dtype=numpy.float32 + ) woe = WOETransformer( - intervals=[[(0.4, 0.6, False, True)], - [(0.9, numpy.inf), (-numpy.inf, 0.9)]], - weights=[[2.7], [3.5, 6.7]]) + intervals=[ + [(0.4, 0.6, False, True)], + [(0.9, numpy.inf), (-numpy.inf, 0.9)], + ], + weights=[[2.7], [3.5, 6.7]], + ) woe.fit(x) expected = woe.transform(x) onnx_model = woe_transformer_to_onnx(woe, TARGET_OPSET) try: sess = InferenceSession( - onnx_model.SerializeToString(), - providers=["CPUExecutionProvider"]) + onnx_model.SerializeToString(), providers=["CPUExecutionProvider"] + ) except InvalidArgument as e: - raise AssertionError( - "Cannot load model:\n%s" % str(onnx_model)) from e - got = sess.run(None, {'X': x})[0] + raise AssertionError("Cannot load model:\n%s" % str(onnx_model)) from e + got = sess.run(None, {"X": x})[0] assert_almost_equal(expected, got) - @unittest.skipIf(InvalidArgument is None, - reason='onnxruntime is too old') + @unittest.skipIf(InvalidArgument is None, reason="onnxruntime is too old") def test_woe_transformer_conv_weights_onnx_noonehot(self): x = numpy.array( - [[0.2, 0.7, 0.9], - [0.51, 0.71, 0.91], - [0.7, 1.5, 0.92]], - dtype=numpy.float32) + [[0.2, 0.7, 0.9], [0.51, 0.71, 0.91], [0.7, 1.5, 0.92]], dtype=numpy.float32 + ) woe = WOETransformer( - intervals=[[(0.4, 0.6, False, True)], - [(0.9, numpy.inf), (-numpy.inf, 0.9)]], + intervals=[ + [(0.4, 0.6, False, True)], + [(0.9, numpy.inf), (-numpy.inf, 0.9)], + ], weights=[[2.7], [3.5, 6.7]], - onehot=False) + onehot=False, + ) woe.fit(x) expected = woe.transform(x) - manual = numpy.array([[0., 6.7, 0.9], - [2.7, 6.7, 0.91], - [0., 3.5, 0.92]], dtype=numpy.float32) + manual = numpy.array( + [[0.0, 6.7, 0.9], [2.7, 6.7, 0.91], [0.0, 3.5, 0.92]], dtype=numpy.float32 + ) assert_almost_equal(manual, expected) - with self.subTest(way='skl2onnx'): + with self.subTest(way="skl2onnx"): onnx_model = to_onnx(woe, x, target_opset=TARGET_OPSET, verbose=0) sess = InferenceSession( - onnx_model.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'X': x})[0] + onnx_model.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"X": x})[0] assert_almost_equal(expected, got) - with self.subTest(way='onnx'): + with self.subTest(way="onnx"): onnx_model = woe_transformer_to_onnx(woe, TARGET_OPSET) try: sess = InferenceSession( - onnx_model.SerializeToString(), - providers=["CPUExecutionProvider"]) + onnx_model.SerializeToString(), providers=["CPUExecutionProvider"] + ) except InvalidArgument as e: - raise AssertionError( - "Cannot load model:\n%s" % str(onnx_model)) from e - got = sess.run(None, {'X': x})[0] + raise AssertionError("Cannot load model:\n%s" % str(onnx_model)) from e + got = sess.run(None, {"X": x})[0] assert_almost_equal(expected, got) - @unittest.skipIf(InvalidArgument is None, - reason='onnxruntime is too old') - @unittest.skipIf(TARGET_OPSET < 12, reason='OneHotEncoder') + @unittest.skipIf(InvalidArgument is None, reason="onnxruntime is too old") + @unittest.skipIf(TARGET_OPSET < 12, reason="OneHotEncoder") def test_woe_transformer_bigger(self): x = numpy.array([[0, 1, 2, 3, 4, 5, 6, -1]], dtype=numpy.float32).T - intervals = [[(0.0, 1.0, False, True), (1.0, 2.0, False, True), - (2.0, 3.0, False, True), (3.0, 4.0, False, True)]] - weights = [[-1.4057124469769924, -1.7241661780955269, - 2.545531271604435, 0.9614111671546247]] - woe = WOETransformer(intervals=intervals, weights=weights, - onehot=False) + intervals = [ + [ + (0.0, 1.0, False, True), + (1.0, 2.0, False, True), + (2.0, 3.0, False, True), + (3.0, 4.0, False, True), + ] + ] + weights = [ + [ + -1.4057124469769924, + -1.7241661780955269, + 2.545531271604435, + 0.9614111671546247, + ] + ] + woe = WOETransformer(intervals=intervals, weights=weights, onehot=False) woe.fit(x) expected = woe.transform(x) onnx_model = to_onnx(woe, x, target_opset=TARGET_OPSET) try: sess = InferenceSession( - onnx_model.SerializeToString(), - providers=["CPUExecutionProvider"]) + onnx_model.SerializeToString(), providers=["CPUExecutionProvider"] + ) except InvalidArgument as e: - raise AssertionError( - "Cannot load model:\n%s" % str(onnx_model)) from e - got = sess.run(None, {'X': x})[0] + raise AssertionError("Cannot load model:\n%s" % str(onnx_model)) from e + got = sess.run(None, {"X": x})[0] assert_almost_equal(expected, got) onnx_model = woe_transformer_to_onnx(woe, TARGET_OPSET) try: sess = InferenceSession( - onnx_model.SerializeToString(), - providers=["CPUExecutionProvider"]) + onnx_model.SerializeToString(), providers=["CPUExecutionProvider"] + ) except InvalidArgument as e: - raise AssertionError( - "Cannot load model:\n%s" % str(onnx_model)) from e - got = sess.run(None, {'X': x})[0] + raise AssertionError("Cannot load model:\n%s" % str(onnx_model)) from e + got = sess.run(None, {"X": x})[0] assert_almost_equal(expected, got) diff --git a/tests/test_supported_converters.py b/tests/test_supported_converters.py index 2dc3bf9bd..a59657f7c 100644 --- a/tests/test_supported_converters.py +++ b/tests/test_supported_converters.py @@ -12,12 +12,17 @@ from sklearn.preprocessing import StandardScaler from skl2onnx.common.data_types import FloatTensorType from skl2onnx import ( - supported_converters, convert_sklearn, to_onnx, - update_registered_converter) + supported_converters, + convert_sklearn, + to_onnx, + update_registered_converter, +) from skl2onnx.operator_converters.linear_classifier import ( - convert_sklearn_linear_classifier) + convert_sklearn_linear_classifier, +) from skl2onnx.shape_calculators.linear_classifier import ( - calculate_linear_classifier_output_shapes) + calculate_linear_classifier_output_shapes, +) from test_utils import fit_regression_model, TARGET_OPSET @@ -38,61 +43,76 @@ def test_sklearn_converters(self): def test_ir_version(self): model, X = fit_regression_model( - GradientBoostingRegressor(n_estimators=3, loss="huber")) + GradientBoostingRegressor(n_estimators=3, loss="huber") + ) model_onnx = convert_sklearn( model, "gradient boosting regression", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) sub = "ir_version: " if sub not in str(model_onnx): raise AssertionError( "Unable to find '{}' (opset={}) in\n{}".format( - sub, TARGET_OPSET, str(model_onnx))) + sub, TARGET_OPSET, str(model_onnx) + ) + ) def test_register_classifier(self): update_registered_converter( - DummyClassifier, 'DummyClassifierAlias', + DummyClassifier, + "DummyClassifierAlias", calculate_linear_classifier_output_shapes, convert_sklearn_linear_classifier, - options={'nocl': [True, False], - 'zipmap': [True, False, 'columns'], - 'output_class_labels': [False, True], - 'raw_scores': [True, False]}) - pipe = Pipeline([('st', StandardScaler()), ('d', DummyClassifier())]) + options={ + "nocl": [True, False], + "zipmap": [True, False, "columns"], + "output_class_labels": [False, True], + "raw_scores": [True, False], + }, + ) + pipe = Pipeline([("st", StandardScaler()), ("d", DummyClassifier())]) X = np.array([[0, 1], [1, 0], [0.5, 0.5]], dtype=np.float64) y = np.array([1, 0, 1], dtype=np.int64) pipe.fit(X, y) model_onnx = to_onnx(pipe, X.astype(np.float32)) assert "zipmap" in str(model_onnx).lower() - model_onnx = to_onnx(pipe, X.astype(np.float32), - options={'d__zipmap': False}) + model_onnx = to_onnx(pipe, X.astype(np.float32), options={"d__zipmap": False}) assert "zipmap" not in str(model_onnx).lower() model_onnx = to_onnx( - pipe, X.astype(np.float32), - options={DummyClassifier: {'zipmap': False, - 'output_class_labels': True}}) + pipe, + X.astype(np.float32), + options={DummyClassifier: {"zipmap": False, "output_class_labels": True}}, + ) assert "zipmap" not in str(model_onnx).lower() self.assertEqual(3, len(model_onnx.graph.output)) model_onnx = to_onnx( - pipe, X.astype(np.float32), - options={id(pipe.steps[-1][-1]): { - 'zipmap': False, 'output_class_labels': True}}) + pipe, + X.astype(np.float32), + options={ + id(pipe.steps[-1][-1]): {"zipmap": False, "output_class_labels": True} + }, + ) assert "zipmap" not in str(model_onnx).lower() self.assertEqual(3, len(model_onnx.graph.output)) model_onnx = to_onnx( - pipe, X.astype(np.float32), - options={'d__zipmap': False, 'd__output_class_labels': True}) + pipe, + X.astype(np.float32), + options={"d__zipmap": False, "d__output_class_labels": True}, + ) assert "zipmap" not in str(model_onnx).lower() self.assertEqual(3, len(model_onnx.graph.output)) model_onnx = to_onnx( - pipe, X.astype(np.float32), - options={'zipmap': False, 'output_class_labels': True}) + pipe, + X.astype(np.float32), + options={"zipmap": False, "output_class_labels": True}, + ) assert "zipmap" not in str(model_onnx).lower() self.assertEqual(3, len(model_onnx.graph.output)) diff --git a/tests/test_topology_prune.py b/tests/test_topology_prune.py index b27ab1260..a5372dcfd 100644 --- a/tests/test_topology_prune.py +++ b/tests/test_topology_prune.py @@ -9,6 +9,7 @@ from sklearn.base import BaseEstimator, TransformerMixin from sklearn.pipeline import make_pipeline from sklearn import datasets + try: # scikit-learn >= 0.22 from sklearn.utils._testing import ignore_warnings @@ -35,7 +36,6 @@ def transform(self, X): class identity(IdentityTransformer): - def __init__(self): IdentityTransformer.__init__(self) @@ -50,16 +50,13 @@ def dummy_converter(scope, operator, container): out = operator.outputs id1 = OnnxIdentity(X, op_version=TARGET_OPSET) - id2 = OnnxIdentity(id1, output_names=out[:1], - op_version=TARGET_OPSET) + id2 = OnnxIdentity(id1, output_names=out[:1], op_version=TARGET_OPSET) id2.add_to(scope, container) class TestTopologyPrune(unittest.TestCase): - @ignore_warnings(category=DeprecationWarning) def test_dummy_identity(self): - digits = datasets.load_digits(n_class=6) Xd = digits.data[:20] yd = digits.target[:20] @@ -68,67 +65,74 @@ def test_dummy_identity(self): idtr = make_pipeline(IdentityTransformer(), identity()) idtr.fit(Xd, yd) - update_registered_converter(IdentityTransformer, "IdentityTransformer", - dummy_shape_calculator, dummy_converter) - update_registered_converter(identity, "identity", - dummy_shape_calculator, dummy_converter) + update_registered_converter( + IdentityTransformer, + "IdentityTransformer", + dummy_shape_calculator, + dummy_converter, + ) + update_registered_converter( + identity, "identity", dummy_shape_calculator, dummy_converter + ) model_onnx = convert_sklearn( - idtr, "idtr", + idtr, + "idtr", [("input", FloatTensorType([None, Xd.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) - idnode = [node for node in model_onnx.graph.node - if node.op_type == "Identity"] + idnode = [node for node in model_onnx.graph.node if node.op_type == "Identity"] self.assertEqual(len(idnode), 1) @ignore_warnings(category=DeprecationWarning) def test_onnx_subgraphs1(self): - x = numpy.array([1, 2, 4, 5, 5, 4]).astype( - numpy.float32).reshape((3, 2)) + x = numpy.array([1, 2, 4, 5, 5, 4]).astype(numpy.float32).reshape((3, 2)) cop = OnnxAdd( - OnnxIdentity('input', op_version=TARGET_OPSET), - 'input', op_version=TARGET_OPSET) - cdist = onnx_squareform_pdist( - cop, dtype=numpy.float32, op_version=TARGET_OPSET) - cop2 = OnnxIdentity(cdist, output_names=['cdist'], - op_version=TARGET_OPSET) + OnnxIdentity("input", op_version=TARGET_OPSET), + "input", + op_version=TARGET_OPSET, + ) + cdist = onnx_squareform_pdist(cop, dtype=numpy.float32, op_version=TARGET_OPSET) + cop2 = OnnxIdentity(cdist, output_names=["cdist"], op_version=TARGET_OPSET) model_def = cop2.to_onnx( - {'input': FloatTensorType([None, None])}, - outputs=[('cdist', FloatTensorType([None, None]))], - target_opset=TARGET_OPSET) + {"input": FloatTensorType([None, None])}, + outputs=[("cdist", FloatTensorType([None, None]))], + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': x}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": x}) self.assertEqual(len(res), 1) @ignore_warnings(category=DeprecationWarning) def test_onnx_subgraphs2(self): - x = numpy.array([1, 2, 4, 5, 5, 4]).astype( - numpy.float32).reshape((3, 2)) + x = numpy.array([1, 2, 4, 5, 5, 4]).astype(numpy.float32).reshape((3, 2)) cop = OnnxAdd( - OnnxIdentity('input', op_version=TARGET_OPSET), - 'input', op_version=TARGET_OPSET) - cdist = onnx_squareform_pdist( - cop, dtype=numpy.float32, op_version=TARGET_OPSET) - id1 = [id(a) for a in cdist.onx_op.graph_algebra['body']] + OnnxIdentity("input", op_version=TARGET_OPSET), + "input", + op_version=TARGET_OPSET, + ) + cdist = onnx_squareform_pdist(cop, dtype=numpy.float32, op_version=TARGET_OPSET) + id1 = [id(a) for a in cdist.onx_op.graph_algebra["body"]] cdist2 = onnx_squareform_pdist( - cop, dtype=numpy.float32, op_version=TARGET_OPSET) - id2 = [id(a) for a in cdist2.onx_op.graph_algebra['body']] + cop, dtype=numpy.float32, op_version=TARGET_OPSET + ) + id2 = [id(a) for a in cdist2.onx_op.graph_algebra["body"]] self.assertNotEqual(id1, id2) - cop2 = OnnxAdd(cdist, cdist2, output_names=['cdist'], - op_version=TARGET_OPSET) + cop2 = OnnxAdd(cdist, cdist2, output_names=["cdist"], op_version=TARGET_OPSET) model_def = cop2.to_onnx( - {'input': FloatTensorType([None, None])}, - outputs=[('cdist', FloatTensorType([None, None]))], - target_opset=TARGET_OPSET) + {"input": FloatTensorType([None, None])}, + outputs=[("cdist", FloatTensorType([None, None]))], + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_def.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': x}) + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": x}) self.assertEqual(len(res), 1) diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py index 736d910e9..a336ebf56 100644 --- a/tests/test_utils/__init__.py +++ b/tests/test_utils/__init__.py @@ -26,24 +26,27 @@ binary_array_to_string, path_to_leaf, ) + try: from .utils_backend_onnx import ReferenceEvaluatorEx except ImportError: + def ReferenceEvaluatorEx(*args, **kwargs): raise NotImplementedError( "onnx package does not implement class ReferenceEvaluator. " - "Update to onnx>=1.13.0.") + "Update to onnx>=1.13.0." + ) def InferenceSessionEx(onx, *args, verbose=0, **kwargs): from onnxruntime import InferenceSession + if "providers" not in kwargs: kwargs["providers"] = ["CPUExecutionProvider"] try: return InferenceSession(onx, *args, **kwargs) except Exception as e: - if (TARGET_OPSET >= 18 and - "support for domain ai.onnx is till opset" in str(e)): + if TARGET_OPSET >= 18 and "support for domain ai.onnx is till opset" in str(e): return ReferenceEvaluatorEx(onx, verbose=verbose) raise e diff --git a/tests/test_utils/reference_implementation_afe.py b/tests/test_utils/reference_implementation_afe.py index 437f2ec56..2275ea67a 100644 --- a/tests/test_utils/reference_implementation_afe.py +++ b/tests/test_utils/reference_implementation_afe.py @@ -28,8 +28,7 @@ def _array_feature_extrator(data, indices): try: tem = data[..., index] except IndexError as e: - raise RuntimeError( - f"data.shape={data.shape}, indices={indices}") from e + raise RuntimeError(f"data.shape={data.shape}, indices={indices}") from e res = tem.reshape(new_shape) return res @@ -38,7 +37,6 @@ def _array_feature_extrator(data, indices): from onnx.reference.op_run import OpRun class ArrayFeatureExtractor(OpRun): - op_domain = "ai.onnx.ml" def _run(self, data, indices): diff --git a/tests/test_utils/reference_implementation_helper.py b/tests/test_utils/reference_implementation_helper.py index e703d3831..676a3ffbe 100644 --- a/tests/test_utils/reference_implementation_helper.py +++ b/tests/test_utils/reference_implementation_helper.py @@ -3,19 +3,19 @@ def ErfInv(x): - sgn = -1. if x < 0 else 1. - x = (1. - x) * (1 + x) + sgn = -1.0 if x < 0 else 1.0 + x = (1.0 - x) * (1 + x) log = np.log(x) - v = 2. / (3.14159 * 0.147) + 0.5 * log - v2 = 1. / 0.147 * log + v = 2.0 / (3.14159 * 0.147) + 0.5 * log + v2 = 1.0 / 0.147 * log v3 = -v + np.sqrt(v * v - v2) x = sgn * np.sqrt(v3) return x def ComputeLogistic(val): - v = 1. / (1. + np.exp(-np.abs(val))) - return (1. - v) if val < 0 else v + v = 1.0 / (1.0 + np.exp(-np.abs(val))) + return (1.0 - v) if val < 0 else v def ComputeProbit(val): @@ -55,8 +55,8 @@ def sigmoid_probability(score, proba, probb): def multiclass_probability(k, R): max_iter = max(100, k) Q = np.empty((k, k), dtype=R.dtype) - Qp = np.empty((k, ), dtype=R.dtype) - P = np.empty((k, ), dtype=R.dtype) + Qp = np.empty((k,), dtype=R.dtype) + P = np.empty((k,), dtype=R.dtype) eps = 0.005 / k for t in range(0, k): @@ -89,11 +89,10 @@ def multiclass_probability(k, R): for t in range(k): diff = (-Qp[t] + pQp) / Q[t, t] P[t] += diff - pQp = ((pQp + diff * (diff * Q[t, t] + 2 * Qp[t])) / - (1 + diff) ** 2) + pQp = (pQp + diff * (diff * Q[t, t] + 2 * Qp[t])) / (1 + diff) ** 2 for j in range(k): Qp[j] = (Qp[j] + diff * Q[t, j]) / (1 + diff) - P[j] /= (1 + diff) + P[j] /= 1 + diff return P @@ -123,17 +122,26 @@ def write_scores(n_classes, scores, post_transform, add_second_class): res = np.array([1 - scores[0], scores[0]], dtype=scores.dtype) elif add_second_class in (2, 3): if post_transform == "LOGISTIC": - return np.array([ComputeLogistic(-scores[0]), - ComputeLogistic(scores[0])], - dtype=scores.dtype) + return np.array( + [ComputeLogistic(-scores[0]), ComputeLogistic(scores[0])], + dtype=scores.dtype, + ) return np.array([-scores[0], scores[0]], dtype=scores.dtype) return np.array([scores[0]], dtype=scores.dtype) raise NotImplementedError(f"n_classes={n_classes} not supported.") -def set_score_svm(max_weight, maxclass, n, post_transform, - has_proba, weights_are_all_positive_, - classlabels, posclass, negclass): +def set_score_svm( + max_weight, + maxclass, + n, + post_transform, + has_proba, + weights_are_all_positive_, + classlabels, + posclass, + negclass, +): write_additional_scores = -1 if len(classlabels) == 2: write_additional_scores = 2 if post_transform == "NONE" else 0 diff --git a/tests/test_utils/reference_implementation_ml.py b/tests/test_utils/reference_implementation_ml.py index ea9cc7f2a..d25daec33 100644 --- a/tests/test_utils/reference_implementation_ml.py +++ b/tests/test_utils/reference_implementation_ml.py @@ -9,7 +9,6 @@ from onnx.reference.op_run import OpRun class FusedMatMul(OpRun): - @staticmethod def _fmatmul00(a, b, alpha): return np.matmul(a, b) * alpha @@ -38,15 +37,20 @@ def _transpose(x, trans, transBatch): x = np.transpose(x, perm) return x - def _run(self, a, b, alpha=None, transA=None, transB=None, - transBatchA=None, transBatchB=None): - + def _run( + self, + a, + b, + alpha=None, + transA=None, + transB=None, + transBatchA=None, + transBatchB=None, + ): if transA: - _meth = (FusedMatMul._fmatmul11 if transB - else FusedMatMul._fmatmul10) + _meth = FusedMatMul._fmatmul11 if transB else FusedMatMul._fmatmul10 else: - _meth = (FusedMatMul._fmatmul01 if transB - else FusedMatMul._fmatmul00) + _meth = FusedMatMul._fmatmul01 if transB else FusedMatMul._fmatmul00 _meth = lambda a, b: _meth(a, b, alpha) # noqa # more recent versions of the operator if transBatchA is None: @@ -54,12 +58,11 @@ def _run(self, a, b, alpha=None, transA=None, transB=None, if transBatchB is None: transBatchB = 0 - if (transBatchA or transBatchB or - len(a.shape) != 2 or len(b.shape) != 2): + if transBatchA or transBatchB or len(a.shape) != 2 or len(b.shape) != 2: ta = self._transpose(a, transA, transBatchA) tb = self._transpose(b, transB, transBatchB) try: - return (np.matmul(ta, tb) * alpha, ) + return (np.matmul(ta, tb) * alpha,) except ValueError as e: raise ValueError( f"Unable to multiply shape {a.shape}x{b.shape} " @@ -68,9 +71,10 @@ def _run(self, a, b, alpha=None, transA=None, transB=None, f"transB={transB}, " f"transBatchA={transBatchA}, " f"transBatchB={transBatchB}, " - f"meth={_meth}.") from e + f"meth={_meth}." + ) from e try: - return (_meth(a, b), ) + return (_meth(a, b),) except ValueError as e: raise ValueError( f"Unable to multiply shape {a.shape}x{b.shape} " @@ -78,10 +82,10 @@ def _run(self, a, b, alpha=None, transA=None, transB=None, f"transB={transB}, " f"transBatchA={transBatchA}, " f"transBatchB={transBatchB}, " - f"meth={_meth}.") from e + f"meth={_meth}." + ) from e class Scaler(OpRun): - op_domain = "ai.onnx.ml" def _run(self, x, offset=None, scale=None): @@ -89,12 +93,10 @@ def _run(self, x, offset=None, scale=None): return (dx * scale,) class LinearClassifier(OpRun): - op_domain = "ai.onnx.ml" @staticmethod - def _post_process_predicted_label(label, scores, - classlabels_ints_string): + def _post_process_predicted_label(label, scores, classlabels_ints_string): """ Replaces int64 predicted labels by the corresponding strings. @@ -104,21 +106,21 @@ def _post_process_predicted_label(label, scores, return label, scores def _run( - self, - x, - classlabels_ints=None, - classlabels_strings=None, - coefficients=None, - intercepts=None, - multi_class=None, - post_transform=None): + self, + x, + classlabels_ints=None, + classlabels_strings=None, + coefficients=None, + intercepts=None, + multi_class=None, + post_transform=None, + ): dtype = x.dtype if dtype != np.float64: x = x.astype(np.float32) coefficients = np.array(coefficients).astype(x.dtype) intercepts = np.array(intercepts).astype(x.dtype) - n_class = max( - len(classlabels_ints or []), len(classlabels_strings or [])) + n_class = max(len(classlabels_ints or []), len(classlabels_strings or [])) n = coefficients.shape[0] // n_class coefficients = coefficients.reshape(n_class, n).T scores = np.dot(x, coefficients) @@ -130,13 +132,13 @@ def _run( elif post_transform == "LOGISTIC": scores = expit(scores) elif post_transform == "SOFTMAX": - np.subtract( - scores, scores.max(axis=1)[:, np.newaxis], out=scores) + np.subtract(scores, scores.max(axis=1)[:, np.newaxis], out=scores) scores = np.exp(scores) scores = np.divide(scores, scores.sum(axis=1)[:, np.newaxis]) else: raise NotImplementedError( # pragma: no cover - f"Unknown post_transform: '{post_transform}'.") + f"Unknown post_transform: '{post_transform}'." + ) if coefficients.shape[1] == 1: labels = np.zeros((scores.shape[0],), dtype=x.dtype) @@ -144,23 +146,17 @@ def _run( else: labels = np.argmax(scores, axis=1) if classlabels_ints is not None: - labels = np.array( - [classlabels_ints[i] for i in labels], dtype=np.int64) + labels = np.array([classlabels_ints[i] for i in labels], dtype=np.int64) elif classlabels_strings is not None: labels = np.array([classlabels_strings[i] for i in labels]) return (labels, scores) class LinearRegressor(OpRun): - op_domain = "ai.onnx.ml" def _run( - self, - x, - coefficients=None, - intercepts=None, - targets=1, - post_transform=None): + self, x, coefficients=None, intercepts=None, targets=1, post_transform=None + ): coefficients = np.array(coefficients).astype(x.dtype) intercepts = np.array(intercepts).astype(x.dtype) n = coefficients.shape[0] // targets @@ -172,11 +168,11 @@ def _run( pass else: raise NotImplementedError( - f"Unknown post_transform: '{self.post_transform}'.") + f"Unknown post_transform: '{self.post_transform}'." + ) return (score,) class Normalizer(OpRun): - op_domain = "ai.onnx.ml" @staticmethod @@ -213,7 +209,6 @@ def _run(self, x, norm=None): return (_norm(x),) class OneHotEncoder(OpRun): - op_domain = "ai.onnx.ml" def _run(self, x, cats_int64s=None, cats_strings=None, zeros=None): @@ -240,8 +235,8 @@ def _run(self, x, cats_int64s=None, cats_strings=None, zeros=None): res[a, i, j] = 1.0 else: raise RuntimeError( - f"This operator is not implemented " - f"for " f"shape {x.shape}.") + f"This operator is not implemented " f"for " f"shape {x.shape}." + ) if not self.zeros: red = res.sum(axis=len(res.shape) - 1) @@ -267,7 +262,6 @@ def _run(self, x, cats_int64s=None, cats_strings=None, zeros=None): return (res,) class Binarizer(OpRun): - op_domain = "ai.onnx.ml" def _run(self, x, threshold=None): @@ -279,40 +273,39 @@ def _run(self, x, threshold=None): return (X,) class FeatureVectorizer(OpRun): - op_domain = "ai.onnx.ml" def _preprocess(self, a, axis): if axis >= len(a.shape): - new_shape = a.shape + (1, ) * (axis + 1 - len(a.shape)) + new_shape = a.shape + (1,) * (axis + 1 - len(a.shape)) return a.reshape(new_shape) return a def _run(self, *args, inputdimensions=None): - args = [self._preprocess(a, axis) - for a, axis in zip(args, inputdimensions)] + args = [self._preprocess(a, axis) for a, axis in zip(args, inputdimensions)] dimensions = set(inputdimensions) if len(set(dimensions)) == 1: res = np.concatenate(args, axis=inputdimensions[0]) - return (res, ) + return (res,) raise RuntimeError( - f"inputdimensions={inputdimensions} is not supported yet.") + f"inputdimensions={inputdimensions} is not supported yet." + ) class Imputer(OpRun): - op_domain = "ai.onnx.ml" - def _run(self, x, - imputed_value_floats=None, - imputed_value_int64s=None, - replaced_value_float=None, - replaced_value_int64=None): - if (imputed_value_floats is not None and - len(imputed_value_floats) > 0): + def _run( + self, + x, + imputed_value_floats=None, + imputed_value_int64s=None, + replaced_value_float=None, + replaced_value_int64=None, + ): + if imputed_value_floats is not None and len(imputed_value_floats) > 0: values = imputed_value_floats replace = replaced_value_float - elif (imputed_value_int64s is not None and - len(imputed_value_int64s) > 0): + elif imputed_value_int64s is not None and len(imputed_value_int64s) > 0: values = imputed_value_int64s replace = replaced_value_int64 else: @@ -321,11 +314,11 @@ def _run(self, x, if isinstance(values, list): values = np.array(values) if len(x.shape) != 2: - raise TypeError( - f"x must be a matrix but shape is {x.shape}") + raise TypeError(f"x must be a matrix but shape is {x.shape}") if values.shape[0] not in (x.shape[1], 1): raise TypeError( # pragma: no cover - f"Dimension mismatch {values.shape[0]} != {x.shape[1]}") + f"Dimension mismatch {values.shape[0]} != {x.shape[1]}" + ) x = x.copy() if np.isnan(replace): for i in range(0, x.shape[1]): @@ -336,22 +329,24 @@ def _run(self, x, val = values[min(i, values.shape[0] - 1)] x[x[:, i] == replace, i] = val - return (x, ) + return (x,) class LabelEncoder(OpRun): - op_domain = "ai.onnx.ml" - def _run(self, x, - default_float=None, - default_int64=None, - default_string=None, - keys_floats=None, - keys_int64s=None, - keys_strings=None, - values_floats=None, - values_int64s=None, - values_strings=None): + def _run( + self, + x, + default_float=None, + default_int64=None, + default_string=None, + keys_floats=None, + keys_int64s=None, + keys_strings=None, + values_floats=None, + values_int64s=None, + values_strings=None, + ): keys = keys_floats or keys_int64s or keys_strings values = values_floats or values_int64s or values_strings classes = {k: v for k, v in zip(keys, values)} @@ -370,7 +365,7 @@ def _run(self, x, else: defval = default_string if not isinstance(defval, str): - defval = '' + defval = "" dtype = np.str_ shape = x.shape if len(x.shape) > 1: @@ -379,16 +374,13 @@ def _run(self, x, for i in range(0, x.shape[0]): v = classes.get(cast(x[i]), defval) res.append(v) - return (np.array(res, dtype=dtype).reshape(shape), ) + return (np.array(res, dtype=dtype).reshape(shape),) class DictVectorizer(OpRun): - op_domain = "ai.onnx.ml" def _run(self, x, int64_vocabulary=None, string_vocabulary=None): - if isinstance(x, (np.ndarray, list)): - dict_labels = {} if int64_vocabulary: for i, v in enumerate(int64_vocabulary): @@ -399,7 +391,8 @@ def _run(self, x, int64_vocabulary=None, string_vocabulary=None): if len(dict_labels) == 0: raise RuntimeError( "int64_vocabulary and string_vocabulary " - "cannot be both empty.") + "cannot be both empty." + ) values = [] rows = [] @@ -412,16 +405,17 @@ def _run(self, x, int64_vocabulary=None, string_vocabulary=None): values = np.array(values) rows = np.array(rows) cols = np.array(cols) - return (coo_matrix( - (values, (rows, cols)), - shape=(len(x), len(dict_labels))).todense(), ) + return ( + coo_matrix( + (values, (rows, cols)), shape=(len(x), len(dict_labels)) + ).todense(), + ) if isinstance(x, dict): keys = int64_vocabulary or string_vocabulary res = [] for k in keys: res.append(x.get(k, 0)) - return (np.array(res), ) + return (np.array(res),) - raise TypeError( # pragma: no cover - f"x must be iterable not {type(x)}.") + raise TypeError(f"x must be iterable not {type(x)}.") # pragma: no cover diff --git a/tests/test_utils/reference_implementation_svm.py b/tests/test_utils/reference_implementation_svm.py index 8f6d263d5..77692d8f4 100644 --- a/tests/test_utils/reference_implementation_svm.py +++ b/tests/test_utils/reference_implementation_svm.py @@ -28,8 +28,7 @@ def _attribute_value(attr): return list(attr.ints) if attr.strings: return list(map(_to_str, attr.strings)) - raise NotImplementedError( - "Unable to return a value for attribute %r." % attr) + raise NotImplementedError("Unable to return a value for attribute %r." % attr) class SVMAttributes: @@ -37,8 +36,8 @@ def __init__(self): self._names = [] def add(self, name, value): - if isinstance(value, list) and name not in {'kernel_params'}: - if name in {'vectors_per_class'}: + if isinstance(value, list) and name not in {"kernel_params"}: + if name in {"vectors_per_class"}: value = np.array(value, dtype=np.int64) else: value = np.array(value, dtype=np.float32) @@ -67,13 +66,12 @@ def __init__(self, **kwargs): self.coef0_ = self.atts.kernel_params[1] self.degree_ = int(self.atts.kernel_params[2]) else: - self.gamma_ = 0. - self.coef0_ = 0. + self.gamma_ = 0.0 + self.coef0_ = 0.0 self.degree_ = 0 def __str__(self): - rows = ["TreeEnsemble", - f"root_index={self.root_index}", str(self.atts)] + rows = ["TreeEnsemble", f"root_index={self.root_index}", str(self.atts)] return "\n".join(rows) def kernel_dot(self, pA, pB, kernel): @@ -81,7 +79,7 @@ def kernel_dot(self, pA, pB, kernel): if k == "poly": s = np.dot(pA, pB) s = s * self.gamma_ + self.coef0_ - return s ** self.degree_ + return s**self.degree_ if k == "sigmoid": s = np.dot(pA, pB) s = s * self.gamma_ + self.coef0_ @@ -95,7 +93,6 @@ def kernel_dot(self, pA, pB, kernel): raise ValueError(f"Unexpected kernel={kernel!r}.") def run(self, X): - if self.atts.n_supports > 0: # length of each support vector mode_ = "SVM_SVC" @@ -107,7 +104,7 @@ def run(self, X): z = np.empty((X.shape[0], 1), dtype=X.dtype) for n in range(X.shape[0]): - s = 0. + s = 0.0 if mode_ == "SVM_SVC": for j in range(self.atts.n_supports): @@ -127,31 +124,38 @@ def run(self, X): if onnx_opset_version() >= 18: from onnx.reference.op_run import OpRun + try: from .reference_implementation_helper import ( - write_scores, set_score_svm, multiclass_probability, - sigmoid_probability) + write_scores, + set_score_svm, + multiclass_probability, + sigmoid_probability, + ) except ImportError: from reference_implementation_helper import ( - write_scores, set_score_svm, multiclass_probability, - sigmoid_probability) + write_scores, + set_score_svm, + multiclass_probability, + sigmoid_probability, + ) class SVMRegressor(OpRun): - op_domain = "ai.onnx.ml" def _run( - self, - X, - coefficients=None, - kernel_params=None, - kernel_type=None, - n_targets=None, - n_supports=None, - one_class=None, - post_transform=None, - rho=None, - support_vectors=None): + self, + X, + coefficients=None, + kernel_params=None, + kernel_type=None, + n_targets=None, + n_supports=None, + one_class=None, + post_transform=None, + rho=None, + support_vectors=None, + ): svm = SVMCommon( coefficients=coefficients, kernel_params=kernel_params, @@ -161,17 +165,18 @@ def _run( one_class=one_class, post_transform=post_transform, rho=rho, - support_vectors=support_vectors) + support_vectors=support_vectors, + ) self._svm = svm res = svm.run(X) if post_transform in (None, "NONE"): return (res,) raise NotImplementedError( - f"post_transform={post_transform!r} not implemented.") + f"post_transform={post_transform!r} not implemented." + ) class SVMClassifier(OpRun): - op_domain = "ai.onnx.ml" def _run_linear(self, X, coefs, class_count_, kernel_type_): @@ -182,8 +187,16 @@ def _run_linear(self, X, coefs, class_count_, kernel_type_): scores.append(score) return np.array(scores, dtype=X.dtype) - def _run_svm(self, X, sv, vector_count_, kernel_type_, - class_count_, starting_vector_, coefs): + def _run_svm( + self, + X, + sv, + vector_count_, + kernel_type_, + class_count_, + starting_vector_, + coefs, + ): evals = 0 kernels = [] @@ -201,10 +214,14 @@ def _run_svm(self, X, sv, vector_count_, kernel_type_, si_j = starting_vector_[j] class_j_sc = self._svm.atts.vectors_per_class[j] - s1 = np.dot(coefs[j - 1, si_i: si_i+class_i_sc], - kernels[si_i: si_i+class_i_sc]) - s2 = np.dot(coefs[i, si_j: si_j+class_j_sc], - kernels[si_j: si_j+class_j_sc]) + s1 = np.dot( + coefs[j - 1, si_i : si_i + class_i_sc], + kernels[si_i : si_i + class_i_sc], + ) + s2 = np.dot( + coefs[i, si_j : si_j + class_j_sc], + kernels[si_j : si_j + class_j_sc], + ) s = self._svm.atts.rho[evals] + s1 + s2 scores.append(s) @@ -216,15 +233,16 @@ def _run_svm(self, X, sv, vector_count_, kernel_type_, return votes, np.array(scores, dtype=X.dtype) def _probabilities(self, scores, class_count_): - probsp2 = np.zeros((class_count_, class_count_), - dtype=scores.dtype) + probsp2 = np.zeros((class_count_, class_count_), dtype=scores.dtype) index = 0 for i in range(class_count_): for j in range(i + 1, class_count_): - val1 = sigmoid_probability(scores[index], - self._svm.atts.prob_a[index], - self._svm.atts.prob_b[index]) + val1 = sigmoid_probability( + scores[index], + self._svm.atts.prob_a[index], + self._svm.atts.prob_b[index], + ) val2 = max(val1, 1.0e-7) val2 = min(val2, (1 - 1.0e-7)) probsp2[i, j] = val2 @@ -232,10 +250,9 @@ def _probabilities(self, scores, class_count_): index += 1 return multiclass_probability(class_count_, probsp2) - def _compute_final_scores(self, votes, scores, - weights_are_all_positive_, - has_proba, classlabels_ints): - + def _compute_final_scores( + self, votes, scores, weights_are_all_positive_, has_proba, classlabels_ints + ): max_weight = 0 if len(votes): max_class = np.argmax(votes) @@ -247,33 +264,44 @@ def _compute_final_scores(self, votes, scores, write_additional_scores = -1 if self._svm.atts.rho.size == 1: label, write_additional_scores = set_score_svm( - max_weight, max_class, 0, - self._svm.atts.post_transform, has_proba, - weights_are_all_positive_, classlabels_ints, 1, 0) + max_weight, + max_class, + 0, + self._svm.atts.post_transform, + has_proba, + weights_are_all_positive_, + classlabels_ints, + 1, + 0, + ) elif classlabels_ints is not None and len(classlabels_ints) > 0: label = classlabels_ints[max_class] else: label = max_class - new_scores = write_scores(scores.size, scores, - self._svm.atts.post_transform, - write_additional_scores) + new_scores = write_scores( + scores.size, + scores, + self._svm.atts.post_transform, + write_additional_scores, + ) return label, new_scores def _run( - self, - X, - classlabels_ints=None, - classlabels_strings=None, - coefficients=None, - kernel_params=None, - kernel_type=None, - post_transform=None, - prob_a=None, - prob_b=None, - rho=None, - support_vectors=None, - vectors_per_class=None): + self, + X, + classlabels_ints=None, + classlabels_strings=None, + coefficients=None, + kernel_params=None, + kernel_type=None, + post_transform=None, + prob_a=None, + prob_b=None, + rho=None, + support_vectors=None, + vectors_per_class=None, + ): svm = SVMCommon( coefficients=coefficients, kernel_params=kernel_params, @@ -283,7 +311,8 @@ def _run( prob_b=prob_b, rho=rho, support_vectors=support_vectors, - vectors_per_class=vectors_per_class) + vectors_per_class=vectors_per_class, + ) self._svm = svm vector_count_ = 0 @@ -293,8 +322,7 @@ def _run( starting_vector_.append(vector_count_) vector_count_ += vc - class_count_ = max(len(classlabels_ints or - classlabels_strings or []), 1) + class_count_ = max(len(classlabels_ints or classlabels_strings or []), 1) if vector_count_ > 0: # length of each support vector mode_ = "SVM_SVC" @@ -313,25 +341,32 @@ def _run( if vector_count_ == 0 and mode_ == "SVM_LINEAR": res = np.empty((X.shape[0], class_count_), dtype=X.dtype) for n in range(X.shape[0]): - scores = self._run_linear( - X[n], coefs, class_count_, kernel_type_) + scores = self._run_linear(X[n], coefs, class_count_, kernel_type_) res[n, :] = scores else: res = np.empty( - (X.shape[0], class_count_ * (class_count_ - 1) // 2), - dtype=X.dtype) + (X.shape[0], class_count_ * (class_count_ - 1) // 2), dtype=X.dtype + ) votes = np.empty((X.shape[0], class_count_), dtype=X.dtype) for n in range(X.shape[0]): vote, scores = self._run_svm( - X[n], sv, vector_count_, kernel_type_, class_count_, - starting_vector_, coefs) + X[n], + sv, + vector_count_, + kernel_type_, + class_count_, + starting_vector_, + coefs, + ) res[n, :] = scores votes[n, :] = vote # proba - if (self._svm.atts.prob_a is not None and - len(self._svm.atts.prob_a) > 0 and - mode_ == "SVM_SVC"): + if ( + self._svm.atts.prob_a is not None + and len(self._svm.atts.prob_a) > 0 + and mode_ == "SVM_SVC" + ): scores = np.empty((res.shape[0], class_count_), dtype=X.dtype) for n in range(scores.shape[0]): s = self._probabilities(res[n], class_count_) @@ -346,18 +381,23 @@ def _run( labels = [] for n in range(scores.shape[0]): label, new_scores = self._compute_final_scores( - votes[n], scores[n], weights_are_all_positive_, - has_proba, classlabels_ints) + votes[n], + scores[n], + weights_are_all_positive_, + has_proba, + classlabels_ints, + ) if final_scores is None: - final_scores = np.empty((X.shape[0], new_scores.size), - dtype=X.dtype) + final_scores = np.empty( + (X.shape[0], new_scores.size), dtype=X.dtype + ) final_scores[n, :] = new_scores labels.append(label) # labels - if (classlabels_strings is not None and - len(classlabels_strings) > 0): - return (np.array([classlabels_strings[i] - for i in labels]), - final_scores) + if classlabels_strings is not None and len(classlabels_strings) > 0: + return ( + np.array([classlabels_strings[i] for i in labels]), + final_scores, + ) return (np.array(labels, dtype=np.int64), final_scores) diff --git a/tests/test_utils/reference_implementation_text.py b/tests/test_utils/reference_implementation_text.py index f459dc4a7..0ff6497e0 100644 --- a/tests/test_utils/reference_implementation_text.py +++ b/tests/test_utils/reference_implementation_text.py @@ -13,46 +13,49 @@ from onnx.reference.ops.op_tfidf_vectorizer import ( WeightingCriteria, NgramPart, - populate_grams) + populate_grams, + ) class Tokenizer(OpRun): - op_domain = "com.microsoft" def _run( - self, - text, - mark=None, - mincharnum=None, - pad_value=None, - separators=None, - tokenexp=None, - tokenexpsplit=None, - stopwords=None): - char_tokenization_ = ( - tokenexp == "." or list(separators or []) == [""]) + self, + text, + mark=None, + mincharnum=None, + pad_value=None, + separators=None, + tokenexp=None, + tokenexpsplit=None, + stopwords=None, + ): + char_tokenization_ = tokenexp == "." or list(separators or []) == [""] stops_ = set(stopwords or []) try: str_separators_ = set(_ for _ in (separators or "")) except AttributeError as e: # pragma: no cover raise TypeError( - f"Unable to interpret separators {separators!r}.") from e + f"Unable to interpret separators {separators!r}." + ) from e if tokenexp not in (None, ""): tokenexp_ = re.compile(tokenexp) if char_tokenization_: - return self._run_char_tokenization( - text, stops_, mark, pad_value) + return self._run_char_tokenization(text, stops_, mark, pad_value) if str_separators_ is not None and len(str_separators_) > 0: str_separators = [re.compile(s) for s in str_separators_] return self._run_sep_tokenization( - text, stops_, str_separators, mark, pad_value) + text, stops_, str_separators, mark, pad_value + ) if tokenexp not in (None, ""): return self._run_regex_tokenization( - text, stops_, tokenexp_, tokenexpsplit, mark, pad_value) + text, stops_, tokenexp_, tokenexpsplit, mark, pad_value + ) raise RuntimeError( # pragma: no cover "Unable to guess which tokenization to use, sep={}, " - "tokenexp='{}'.".format(separators, tokenexp)) + "tokenexp='{}'.".format(separators, tokenexp) + ) @staticmethod def _run_tokenization(text, stops, split, mark, pad_value): @@ -96,8 +99,8 @@ def _run_tokenization(text, stops, split, mark, pad_value): res = np.array(res) else: raise RuntimeError( # pragma: no cover - f"Only vector or matrices are supported " - f"not shape {text.shape}.") + f"Only vector or matrices are supported " f"not shape {text.shape}." + ) return (res,) @staticmethod @@ -110,8 +113,7 @@ def split(t): for c in t: yield c - return Tokenizer._run_tokenization( - text, stops, split, mark, pad_value) + return Tokenizer._run_tokenization(text, stops, split, mark, pad_value) @staticmethod def _run_sep_tokenization(text, stops, separators, mark, pad_value): @@ -126,8 +128,10 @@ def split(t): while pos < len(t): for sep in separators: if isinstance(sep, str): - if (pos + len(sep) <= len(t) and - sep == t[pos: pos + len(sep)]): + if ( + pos + len(sep) <= len(t) + and sep == t[pos : pos + len(sep)] + ): word = t[begin:pos] yield word begin = pos + len(sep) @@ -145,12 +149,10 @@ def split(t): word = t[begin:pos] yield word - return Tokenizer._run_tokenization( - text, stops, split, mark, pad_value) + return Tokenizer._run_tokenization(text, stops, split, mark, pad_value) @staticmethod - def _run_regex_tokenization(text, stops, exp, tokenexpsplit, - mark, pad_value): + def _run_regex_tokenization(text, stops, exp, tokenexpsplit, mark, pad_value): """ Tokenizes using a regular expression. """ @@ -164,8 +166,7 @@ def split(t): def split(t): return filter(lambda x: x, exp.findall(t)) - return Tokenizer._run_tokenization( - text, stops, split, mark, pad_value) + return Tokenizer._run_tokenization(text, stops, split, mark, pad_value) class TfIdfVectorizer(OpRun): def __init__(self, onnx_node, run_params): # type: ignore @@ -178,7 +179,8 @@ def __init__(self, onnx_node, run_params): # type: ignore if value is None: raise ValueError( f"Unexpected mode={mode!r}, " - f"not found in {dir(WeightingCriteria)}.") + f"not found in {dir(WeightingCriteria)}." + ) self.weighting_criteria_ = value # type: ignore self.min_gram_length_ = self.min_gram_length # type: ignore @@ -215,39 +217,41 @@ def __init__(self, onnx_node, run_params): # type: ignore # Load into dictionary only required gram sizes ngram_size = 1 for i in range(len(self.ngram_counts_)): - start_idx = self.ngram_counts_[i] end_idx = ( self.ngram_counts_[i + 1] if (i + 1) < len(self.ngram_counts_) - else total_items) + else total_items + ) items = end_idx - start_idx if items > 0: ngrams = items // ngram_size - if (ngram_size >= self.min_gram_length_ and - ngram_size <= self.max_gram_length_): + if ( + ngram_size >= self.min_gram_length_ + and ngram_size <= self.max_gram_length_ + ): ngram_id = populate_grams( self.pool_int64s_, start_idx, ngrams, ngram_size, ngram_id, - self.int64_map_) + self.int64_map_, + ) else: ngram_id += ngrams ngram_size += 1 - def increment_count(self, ngram_id: int, row_num: int, - frequencies: List[int]) -> None: + def increment_count( + self, ngram_id: int, row_num: int, frequencies: List[int] + ) -> None: ngram_id -= 1 # assert(ngram_id < ngram_indexes_.size()); - output_idx = ( - row_num * self.output_size_ + self.ngram_indexes_[ngram_id]) + output_idx = row_num * self.output_size_ + self.ngram_indexes_[ngram_id] # assert(static_cast(output_idx) < frequencies.size()); frequencies[output_idx] += 1 def output_result(self, B: int, frequencies: List[int]) -> np.ndarray: - def _getattr(cls, name): try: return getattr(cls, name) @@ -269,14 +273,12 @@ def _getattr(cls, name): Y = np.empty((total_dims,), dtype=np.float32) w = self.weights_ - if self.weighting_criteria_ == _getattr( - WeightingCriteria, "TF"): + if self.weighting_criteria_ == _getattr(WeightingCriteria, "TF"): i = 0 for f in frequencies: Y[i] = f i += 1 - elif self.weighting_criteria_ == _getattr( - WeightingCriteria, "IDF"): + elif self.weighting_criteria_ == _getattr(WeightingCriteria, "IDF"): if len(w) > 0: p = 0 for _batch in range(B): @@ -288,8 +290,7 @@ def _getattr(cls, name): for f in frequencies: Y[p] = 1 if f > 0 else 0 p += 1 - elif self.weighting_criteria_ == _getattr( - WeightingCriteria, "TFIDF"): + elif self.weighting_criteria_ == _getattr(WeightingCriteria, "TFIDF"): if len(w) > 0: p = 0 for _batch in range(B): @@ -306,21 +307,21 @@ def _getattr(cls, name): return Y.reshape(output_dims) def compute_impl( # type: ignore - self, - X: np.ndarray, - row_num: int, - row_size: int, - frequencies: List[int], - max_gram_length=None, - max_skip_count=None, - min_gram_length=None, - mode=None, - ngram_counts=None, - ngram_indexes=None, - pool_int64s=None, - pool_strings=None, - weights=None) -> None: - + self, + X: np.ndarray, + row_num: int, + row_size: int, + frequencies: List[int], + max_gram_length=None, + max_skip_count=None, + min_gram_length=None, + mode=None, + ngram_counts=None, + ngram_indexes=None, + pool_int64s=None, + pool_strings=None, + weights=None, + ) -> None: if len(X.shape) > 1: X_flat = X[row_num] else: @@ -338,17 +339,18 @@ def compute_impl( # type: ignore while ngram_start < ngram_row_end: # We went far enough so no n-grams of any size can be # gathered - at_least_this = ngram_start + skip_distance * ( - start_ngram_size - 1) + at_least_this = ngram_start + skip_distance * (start_ngram_size - 1) if at_least_this >= ngram_row_end: break ngram_item = ngram_start int_map = self.int64_map_ ngram_size = 1 - while (int_map.has_leaves() and - ngram_size <= max_gram_length and - ngram_item < ngram_row_end): + while ( + int_map.has_leaves() + and ngram_size <= max_gram_length + and ngram_item < ngram_row_end + ): val = X_flat[ngram_item] hit = int_map.find(val) if hit is None: @@ -370,17 +372,18 @@ def compute_impl( # type: ignore break def _run( # type: ignore - self, - X, - max_gram_length=None, - max_skip_count=None, - min_gram_length=None, - mode=None, - ngram_counts=None, - ngram_indexes=None, - pool_int64s=None, - pool_strings=None, - weights=None): + self, + X, + max_gram_length=None, + max_skip_count=None, + min_gram_length=None, + mode=None, + ngram_counts=None, + ngram_indexes=None, + pool_int64s=None, + pool_strings=None, + weights=None, + ): if self.mapping_ is not None: xi = np.empty(X.shape, dtype=np.int64) for i in range(0, X.shape[0]): @@ -406,8 +409,7 @@ def _run( # type: ignore num_rows = 1 C = 1 if total_items != 1: - raise ValueError( - f"Unexpected total of items {total_items}.") + raise ValueError(f"Unexpected total of items {total_items}.") elif len(input_dims) == 1: num_rows = 1 C = input_dims[0] @@ -418,22 +420,24 @@ def _run( # type: ignore if B < 1: raise ValueError( f"Input shape must have either [C] or [B,C] " - f"dimensions with B > 0, B={B}, C={C}.") + f"dimensions with B > 0, B={B}, C={C}." + ) else: raise ValueError( f"Input shape must have either [C] or [B,C] " - f"dimensions with B > 0, B={B}, C={C}.") + f"dimensions with B > 0, B={B}, C={C}." + ) if num_rows * C != total_items: raise ValueError( f"Unexpected total of items, num_rows * C = " - f"{num_rows * C} != total_items = {total_items}.") + f"{num_rows * C} != total_items = {total_items}." + ) # Frequency holder allocate [B..output_size_] and init all to zero - frequencies = np.zeros( - (num_rows * self.output_size_,), dtype=np.int64) + frequencies = np.zeros((num_rows * self.output_size_,), dtype=np.int64) if total_items == 0 or self.int64_map_.empty(): - return (self.output_result(B, frequencies), ) + return (self.output_result(B, frequencies),) def fn(row_num): self.compute_impl( @@ -449,7 +453,8 @@ def fn(row_num): ngram_indexes=ngram_indexes, pool_int64s=pool_int64s, pool_strings=pool_strings, - weights=weights) + weights=weights, + ) # can be parallelized. for i in range(num_rows): diff --git a/tests/test_utils/reference_implementation_tree.py b/tests/test_utils/reference_implementation_tree.py index 1793c1c3a..6cf7abbf0 100644 --- a/tests/test_utils/reference_implementation_tree.py +++ b/tests/test_utils/reference_implementation_tree.py @@ -28,8 +28,7 @@ def _attribute_value(attr): return list(attr.ints) if attr.strings: return list(map(_to_str, attr.strings)) - raise NotImplementedError( - "Unable to return a value for attribute %r." % attr) + raise NotImplementedError("Unable to return a value for attribute %r." % attr) class TreeEnsembleAttributes: @@ -41,10 +40,11 @@ def add(self, name, value): self._names.append(name) if isinstance(value, list): if name in { - "base_values", - "class_weights", - "nodes_values", - "nodes_hitrates"}: + "base_values", + "class_weights", + "nodes_values", + "nodes_hitrates", + }: value = np.array(value, dtype=np.float32) elif name.endswith("as_tensor"): value = np.array(value) @@ -71,18 +71,18 @@ def __init__(self, **kwargs): self.atts.add(name, value) self.tree_ids = list(sorted(set(self.atts.nodes_treeids))) - self.root_index = {tid: len(self.atts.nodes_treeids) - for tid in self.tree_ids} + self.root_index = {tid: len(self.atts.nodes_treeids) for tid in self.tree_ids} for index, tree_id in enumerate(self.atts.nodes_treeids): self.root_index[tree_id] = min(self.root_index[tree_id], index) self.node_index = { (tid, nid): i for i, (tid, nid) in enumerate( - zip(self.atts.nodes_treeids, self.atts.nodes_nodeids))} + zip(self.atts.nodes_treeids, self.atts.nodes_nodeids) + ) + } def __str__(self): - rows = ["TreeEnsemble", - f"root_index={self.root_index}", str(self.atts)] + rows = ["TreeEnsemble", f"root_index={self.root_index}", str(self.atts)] return "\n".join(rows) def leaf_index_tree(self, X, tree_id): @@ -111,9 +111,13 @@ def leaf_index_tree(self, X, tree_id): r = x != th else: raise ValueError( - f"Unexpected rule {rule!r} for node index {index}.") - nid = (self.atts.nodes_truenodeids[index] - if r else self.atts.nodes_falsenodeids[index]) + f"Unexpected rule {rule!r} for node index {index}." + ) + nid = ( + self.atts.nodes_truenodeids[index] + if r + else self.atts.nodes_falsenodeids[index] + ) index = self.node_index[tree_id, nid] return index @@ -134,41 +138,40 @@ def leave_index_tree(self, X): if onnx_opset_version() >= 18: from onnx.reference.op_run import OpRun + try: - from .reference_implementation_helper import ( - ComputeProbit, write_scores) + from .reference_implementation_helper import ComputeProbit, write_scores except ImportError: - from reference_implementation_helper import ( - ComputeProbit, write_scores) + from reference_implementation_helper import ComputeProbit, write_scores class TreeEnsembleRegressor(OpRun): - op_domain = "ai.onnx.ml" def _run( - self, - X, - aggregate_function=None, - base_values=None, - base_values_as_tensor=None, - n_targets=None, - nodes_falsenodeids=None, - nodes_featureids=None, - nodes_hitrates=None, - nodes_hitrates_as_tensor=None, - nodes_missing_value_tracks_true=None, - nodes_modes=None, - nodes_nodeids=None, - nodes_treeids=None, - nodes_truenodeids=None, - nodes_values=None, - nodes_values_as_tensor=None, - post_transform=None, - target_ids=None, - target_nodeids=None, - target_treeids=None, - target_weights=None, - target_weights_as_tensor=None): + self, + X, + aggregate_function=None, + base_values=None, + base_values_as_tensor=None, + n_targets=None, + nodes_falsenodeids=None, + nodes_featureids=None, + nodes_hitrates=None, + nodes_hitrates_as_tensor=None, + nodes_missing_value_tracks_true=None, + nodes_modes=None, + nodes_nodeids=None, + nodes_treeids=None, + nodes_truenodeids=None, + nodes_values=None, + nodes_values_as_tensor=None, + post_transform=None, + target_ids=None, + target_nodeids=None, + target_treeids=None, + target_weights=None, + target_weights_as_tensor=None, + ): nmv = nodes_missing_value_tracks_true tr = TreeEnsemble( base_values=base_values, @@ -185,69 +188,70 @@ def _run( nodes_values=nodes_values, nodes_values_as_tensor=nodes_values_as_tensor, target_weights=target_weights, - target_weights_as_tensor=target_weights_as_tensor) + target_weights_as_tensor=target_weights_as_tensor, + ) self._tree = tr leaves_index = tr.leave_index_tree(X) - res = np.empty( - (leaves_index.shape[0], n_targets), dtype=X.dtype) + res = np.empty((leaves_index.shape[0], n_targets), dtype=X.dtype) if base_values is None: res[:, :] = 0 else: res[:, :] = np.array(base_values).reshape((1, -1)) target_index = {} - for i, (tid, nid) in enumerate( - zip(target_treeids, target_nodeids)): + for i, (tid, nid) in enumerate(zip(target_treeids, target_nodeids)): if (tid, nid) not in target_index: target_index[tid, nid] = [] target_index[tid, nid].append(i) for i in range(res.shape[0]): indices = leaves_index[i] - t_index = [target_index[nodes_treeids[i], nodes_nodeids[i]] - for i in indices] + t_index = [ + target_index[nodes_treeids[i], nodes_nodeids[i]] for i in indices + ] if aggregate_function == "SUM": for its in t_index: for it in its: - res[i, target_ids[it]] += ( - tr.atts.target_weights[it]) + res[i, target_ids[it]] += tr.atts.target_weights[it] else: raise NotImplementedError( f"aggregate_transform={aggregate_function!r} " - f"not supported yet.") + f"not supported yet." + ) if post_transform in (None, "NONE"): return (res,) raise NotImplementedError( - f"post_transform={post_transform!r} not implemented.") + f"post_transform={post_transform!r} not implemented." + ) class TreeEnsembleClassifier(OpRun): - op_domain = "ai.onnx.ml" def _run( - self, - X, - base_values=None, - base_values_as_tensor=None, - class_ids=None, - class_nodeids=None, - class_treeids=None, - class_weights=None, - class_weights_as_tensor=None, - classlabels_int64s=None, - classlabels_strings=None, - nodes_falsenodeids=None, - nodes_featureids=None, - nodes_hitrates=None, - nodes_hitrates_as_tensor=None, - nodes_missing_value_tracks_true=None, - nodes_modes=None, - nodes_nodeids=None, - nodes_treeids=None, - nodes_truenodeids=None, - nodes_values=None, - nodes_values_as_tensor=None, - post_transform=None): + self, + X, + base_values=None, + base_values_as_tensor=None, + class_ids=None, + class_nodeids=None, + class_treeids=None, + class_weights=None, + class_weights_as_tensor=None, + classlabels_int64s=None, + classlabels_strings=None, + nodes_falsenodeids=None, + nodes_featureids=None, + nodes_hitrates=None, + nodes_hitrates_as_tensor=None, + nodes_missing_value_tracks_true=None, + nodes_modes=None, + nodes_nodeids=None, + nodes_treeids=None, + nodes_truenodeids=None, + nodes_values=None, + nodes_values_as_tensor=None, + post_transform=None, + ): nmv = nodes_missing_value_tracks_true tr = TreeEnsemble( nodes_falsenodeids=nodes_falsenodeids, @@ -262,15 +266,16 @@ def _run( nodes_values=nodes_values, nodes_values_as_tensor=nodes_values_as_tensor, class_weights=class_weights, - class_weights_as_tensor=class_weights_as_tensor) + class_weights_as_tensor=class_weights_as_tensor, + ) self._tree = tr if X.dtype not in (np.float32, np.float64): X = X.astype(np.float32) leaves_index = tr.leave_index_tree(X) n_classes = max( - len(classlabels_int64s or []), len(classlabels_strings or [])) - res = np.empty( - (leaves_index.shape[0], n_classes), dtype=np.float32) + len(classlabels_int64s or []), len(classlabels_strings or []) + ) + res = np.empty((leaves_index.shape[0], n_classes), dtype=np.float32) if base_values is None: res[:, :] = 0 else: @@ -283,8 +288,9 @@ def _run( class_index[tid, nid].append(i) for i in range(res.shape[0]): indices = leaves_index[i] - t_index = [class_index[nodes_treeids[i], nodes_nodeids[i]] - for i in indices] + t_index = [ + class_index[nodes_treeids[i], nodes_nodeids[i]] for i in indices + ] for its in t_index: for it in its: res[i, class_ids[it]] += tr.atts.class_weights[it] @@ -317,8 +323,8 @@ def _run( new_scores = np.empty((res.shape[0], nc), dtype=res.dtype) for i in range(res.shape[0]): new_scores[i, :] = write_scores( - res.shape[1], res[i], post_transform, - add_second_class) + res.shape[1], res[i], post_transform, add_second_class + ) # labels labels = np.argmax(new_scores, axis=1).astype(np.int64) @@ -326,21 +332,21 @@ def _run( if len(classlabels_int64s) == 1: if classlabels_int64s[0] == 1: d = {1: 1} - labels = np.array( - [d.get(i, 0) for i in labels], dtype=np.int64) + labels = np.array([d.get(i, 0) for i in labels], dtype=np.int64) else: raise NotImplementedError( f"classlabels_int64s={classlabels_int64s}, " - f"not supported.") + f"not supported." + ) else: labels = np.array( - [classlabels_int64s[i] for i in labels], - dtype=np.int64) + [classlabels_int64s[i] for i in labels], dtype=np.int64 + ) elif classlabels_strings is not None: if len(classlabels_strings) == 1: raise NotImplementedError( - f"classlabels_strings={classlabels_strings}, " - f"not supported.") + f"classlabels_strings={classlabels_strings}, " f"not supported." + ) labels = np.array([classlabels_strings[i] for i in labels]) return labels, new_scores @@ -352,27 +358,26 @@ def _run( from sklearn.ensemble import ( RandomForestRegressor, RandomForestClassifier, - BaggingClassifier) + BaggingClassifier, + ) from skl2onnx import to_onnx from reference_implementation_afe import ArrayFeatureExtractor class ArgMax(_ArgMax): - def _run(self, data, axis=None, keepdims=None, - select_last_index=None): + def _run(self, data, axis=None, keepdims=None, select_last_index=None): if select_last_index == 0: # type: ignore - return _ArgMax._run( - self, data, axis=axis, keepdims=keepdims) + return _ArgMax._run(self, data, axis=axis, keepdims=keepdims) raise NotImplementedError("Unused in sklearn-onnx.") # classification 1 X, y = make_classification( - 100, n_features=6, n_classes=3, n_informative=3, n_redundant=0) + 100, n_features=6, n_classes=3, n_informative=3, n_redundant=0 + ) model = BaggingClassifier().fit(X, y) - onx = to_onnx(model, X.astype(np.float32), - options={"zipmap": False}) + onx = to_onnx(model, X.astype(np.float32), options={"zipmap": False}) tr = ReferenceEvaluator( - onx, new_ops=[TreeEnsembleClassifier, - ArrayFeatureExtractor, ArgMax]) + onx, new_ops=[TreeEnsembleClassifier, ArrayFeatureExtractor, ArgMax] + ) print("-----------------------") print(tr.run(None, {"X": X[:10].astype(np.float32)})) print("--") @@ -382,8 +387,7 @@ def _run(self, data, axis=None, keepdims=None, # classification 2 model = RandomForestClassifier(max_depth=3, n_estimators=2).fit(X, y) - onx = to_onnx(model, X.astype(np.float32), - options={"zipmap": False}) + onx = to_onnx(model, X.astype(np.float32), options={"zipmap": False}) tr = ReferenceEvaluator(onx, new_ops=[TreeEnsembleClassifier]) print(tr.run(None, {"X": X[:5].astype(np.float32)})) print(model.predict(X[:5].astype(np.float32))) diff --git a/tests/test_utils/reference_implementation_zipmap.py b/tests/test_utils/reference_implementation_zipmap.py index 33aa66981..002328a4d 100644 --- a/tests/test_utils/reference_implementation_zipmap.py +++ b/tests/test_utils/reference_implementation_zipmap.py @@ -46,17 +46,13 @@ def __init__(self, rev_keys, values, mat=None): self._mat = mat def __eq__(self, o): - raise NotImplementedError( - "__eq__ not available for ZipMapDictionary." - ) + raise NotImplementedError("__eq__ not available for ZipMapDictionary.") def __getstate__(self): """ For pickle. """ - return dict( - _rev_keys=self._rev_keys, _values=self._values, _mat=self._mat - ) + return dict(_rev_keys=self._rev_keys, _values=self._values, _mat=self._mat) def __setstate__(self, state): """ @@ -84,9 +80,7 @@ def __len__(self): """ Returns the number of items. """ - return ( - len(self._values) if self._mat is None else self._mat.shape[1] - ) + return len(self._values) if self._mat is None else self._mat.shape[1] def __iter__(self): for k in self._rev_keys: @@ -153,9 +147,7 @@ def __init__(self, rev_keys, mat): self._mat = mat def __eq__(self, o): - raise NotImplementedError( - "__eq__ not available for ArrayZipMapDictionary." - ) + raise NotImplementedError("__eq__ not available for ArrayZipMapDictionary.") @property def dtype(self): @@ -172,9 +164,7 @@ def __getitem__(self, i): return ZipMapDictionary(self._rev_keys, i, self._mat) def __setitem__(self, pos, value): - raise LookupError( - f"Changing an element is not supported (pos=[{pos}])." - ) + raise LookupError(f"Changing an element is not supported (pos=[{pos}]).") @property def values(self): @@ -217,15 +207,13 @@ def __str__(self): return f"ZipMaps[{', '.join(map(str, self))}]" class ZipMap(OpRun): - op_domain = "ai.onnx.ml" def _run(self, x, classlabels_int64s=None, classlabels_strings=None): if classlabels_int64s: rev_keys_ = ZipMapDictionary.build_rev_keys(classlabels_int64s) elif classlabels_strings: - rev_keys_ = ZipMapDictionary.build_rev_keys( - classlabels_strings) + rev_keys_ = ZipMapDictionary.build_rev_keys(classlabels_strings) else: rev_keys_ = {} res = ArrayZipMapDictionary(rev_keys_, x) diff --git a/tests/test_utils/tests_helper.py b/tests/test_utils/tests_helper.py index 2c75b5552..733064f0c 100644 --- a/tests/test_utils/tests_helper.py +++ b/tests/test_utils/tests_helper.py @@ -42,9 +42,7 @@ def _has_decision_function(model): return hasattr(model, "decision_function") -disable_dump = ( - os.environ.get("AZURE_HTTP_USER_AGENT", "undefined") != "undefined" -) +disable_dump = os.environ.get("AZURE_HTTP_USER_AGENT", "undefined") != "undefined" def _has_transform_model(model): @@ -86,9 +84,7 @@ def fit_classification_model( X = numpy.abs(X) if is_bool: X = X.astype(bool) - X_train, X_test, y_train, _ = train_test_split( - X, y, test_size=0.5, random_state=42 - ) + X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, random_state=42) model.fit(X_train, y_train) return model, X_test @@ -137,9 +133,7 @@ def fit_multilabel_classification_model( random_state=42, ) X = X.astype(numpy.int64) if is_int else X.astype(numpy.float32) - X_train, X_test, y_train, _ = train_test_split( - X, y, test_size=0.5, random_state=42 - ) + X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, random_state=42) model.fit(X_train, y_train) return model, X_test @@ -153,9 +147,7 @@ def fit_multi_output_classification_model( n_outputs=2, ): numpy.random.seed(0) - X_train = numpy.random.randint( - 0, n_informative, size=(n_samples, n_features) - ) + X_train = numpy.random.randint(0, n_informative, size=(n_samples, n_features)) y_train = numpy.random.randint(0, n_classes, size=(n_samples, n_outputs)) model = RandomForestClassifier() model.fit(X_train, y_train) @@ -184,9 +176,7 @@ def fit_regression_model( X = X.astype(numpy.int64) if is_int or is_bool else X.astype(numpy.float32) if is_bool: X = X.astype(bool) - X_train, X_test, y_train, _ = train_test_split( - X, y, test_size=0.5, random_state=42 - ) + X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, random_state=42) model.fit(X_train, y_train) return model, X_test @@ -332,9 +322,7 @@ def _raw_score_binary_classification(model, X): scores = scores.reshape(-1, 1) if len(scores.shape) != 2 or scores.shape[1] != 1: raise RuntimeError( - "Unexpected shape {} for a binary classifiation".format( - scores.shape - ) + "Unexpected shape {} for a binary classifiation".format(scores.shape) ) return numpy.hstack([-scores, scores]) @@ -351,9 +339,10 @@ def call(X, model=model): call = getattr(model, method) except AttributeError as e: if method == "decision_function_binary": + def call(X, model=model): - return _raw_score_binary_classification( - model, X) + return _raw_score_binary_classification(model, X) + else: raise e if callable(call): @@ -364,9 +353,7 @@ def lambda_original(): return call(dataone) # noqa else: - raise RuntimeError( - "Method '{0}' is not callable.".format(method) - ) + raise RuntimeError("Method '{0}' is not callable.".format(method)) else: if hasattr(model, "predict"): if _has_predict_proba(model): @@ -383,9 +370,9 @@ def lambda_original(): model.decision_function(data), ] - def lambda_original(): return model.decision_function( - dataone - ) # noqa + def lambda_original(): + return model.decision_function(dataone) # noqa + elif _has_transform_model(model): # clustering try: @@ -433,9 +420,7 @@ def lambda_original(): else: raise TypeError( - "Model has no predict or transform method: {0}".format( - type(model) - ) + "Model has no predict or transform method: {0}".format(type(model)) ) runtime_test["expected"] = prediction @@ -547,9 +532,7 @@ def lambda_original(): if output is not None: if not disable_dump: - dest = os.path.join( - folder, basename + ".backend.{0}.pkl".format(b) - ) + dest = os.path.join(folder, basename + ".backend.{0}.pkl".format(b)) names.append(dest) with open(dest, "wb") as f: pickle.dump(output, f) @@ -560,8 +543,8 @@ def lambda_original(): ): # run a benchmark obs = compute_benchmark( - {"onnxrt": lambda_onnx, - "original": lambda_original}) + {"onnxrt": lambda_onnx, "original": lambda_original} + ) df = pandas.DataFrame(obs) df["input_size"] = sys.getsizeof(dataone) dest = os.path.join(folder, basename + ".bench") @@ -589,9 +572,7 @@ def convert_model(model, name, input_types, target_opset=None): "Sklearn", ) if model is None: - raise RuntimeError( - "Unable to convert model of type '{0}'.".format(type(model)) - ) + raise RuntimeError("Unable to convert model of type '{0}'.".format(type(model))) return model, prefix @@ -726,9 +707,7 @@ def dump_multiple_classification( y = [i + first_class for i in y] if label_string: if label_uint8: - raise AssertionError( - "label_string and label_uint8 cannot be both True" - ) + raise AssertionError("label_string and label_uint8 cannot be both True") y = ["l%d" % i for i in y] suffix += "String" elif label_uint8: @@ -737,9 +716,7 @@ def dump_multiple_classification( model.fit(X, y) if verbose: print( - "[dump_multiple_classification] model '{}'".format( - model.__class__.__name__ - ) + "[dump_multiple_classification] model '{}'".format(model.__class__.__name__) ) model_onnx, prefix = convert_model( model, @@ -767,9 +744,7 @@ def dump_multiple_classification( model.fit(X, y) if verbose: print( - "[dump_multiple_classification] model '{}'".format( - model.__class__.__name__ - ) + "[dump_multiple_classification] model '{}'".format(model.__class__.__name__) ) model_onnx, prefix = convert_model( model, @@ -862,8 +837,7 @@ def dump_multilabel_classification( ) ) model_onnx, prefix = convert_model( - model, "multi-class classifier", [("input", - FloatTensorType([None, 2]))] + model, "multi-class classifier", [("input", FloatTensorType([None, 2]))] ) if verbose: print("[make_multilabel_classification] model was converted") @@ -880,13 +854,14 @@ def dump_multilabel_classification( def dump_multiple_regression( - model, - suffix="", - folder=None, - allow_failure=None, - comparable_outputs=None, - verbose=False, - target_opset=None): + model, + suffix="", + folder=None, + allow_failure=None, + comparable_outputs=None, + verbose=False, + target_opset=None, +): """ Trains and dumps a model for a multi regression problem. The function trains a model and calls @@ -903,7 +878,8 @@ def dump_multiple_regression( model, "multi-regressor", [("input", FloatTensorType([None, 2]))], - target_opset=target_opset) + target_opset=target_opset, + ) dump_data_and_model( X, model, @@ -912,16 +888,18 @@ def dump_multiple_regression( allow_failure=allow_failure, basename=prefix + "MRg" + model.__class__.__name__ + suffix, verbose=verbose, - comparable_outputs=comparable_outputs) + comparable_outputs=comparable_outputs, + ) def dump_single_regression( - model, - suffix="", - folder=None, - allow_failure=None, - comparable_outputs=None, - target_opset=None): + model, + suffix="", + folder=None, + allow_failure=None, + comparable_outputs=None, + target_opset=None, +): """ Trains and dumps a model for a regression problem. The function trains a model and calls @@ -938,7 +916,8 @@ def dump_single_regression( model, "single regressor", [("input", FloatTensorType([None, 2]))], - target_opset=target_opset) + target_opset=target_opset, + ) dump_data_and_model( X, model, @@ -946,7 +925,8 @@ def dump_single_regression( folder=folder, allow_failure=allow_failure, basename=prefix + "Reg" + model.__class__.__name__ + suffix, - comparable_outputs=comparable_outputs) + comparable_outputs=comparable_outputs, + ) def timeit_repeat(fct, number, repeat): @@ -994,7 +974,8 @@ def timeexec(fct, number, repeat): repeat=repeat, min5=mini, max5=maxi, - run=number) + run=number, + ) def compute_benchmark(fcts, number=10, repeat=100): @@ -1125,8 +1106,8 @@ def make_report_backend(folder, as_df=False, verbose=0): if benched == 0: raise RuntimeError( - "No benchmark files in '{0}', found:\n{1}".format( - folder, "\n".join(files))) + "No benchmark files in '{0}', found:\n{1}".format(folder, "\n".join(files)) + ) def dict_update(d, u): d.update(u) @@ -1170,8 +1151,7 @@ def dict_update(d, u): # execution failed pass try: - row["ratio_nodes"] = ( - row["nb_onnx_nodes"] / row["nb_estimators"]) + row["ratio_nodes"] = row["nb_onnx_nodes"] / row["nb_estimators"] except KeyError: # execution failed pass @@ -1195,8 +1175,7 @@ def binary_array_to_string(mat): def path_to_leaf(tree, mat, tree_indices=None): if tree_indices is None: # single tree - leave = set([i for i in range(tree.node_count) - if tree.children_left[i] <= i]) + leave = set([i for i in range(tree.node_count) if tree.children_left[i] <= i]) res = [] for row in range(mat.shape[0]): leaf = None @@ -1211,7 +1190,7 @@ def path_to_leaf(tree, mat, tree_indices=None): leaves = [] for i in range(0, len(tree)): - mm = mat[:, tree_indices[i]: tree_indices[i + 1]] + mm = mat[:, tree_indices[i] : tree_indices[i + 1]] tt = tree[i].tree_ if hasattr(tree[i], "tree_") else tree[i] res = path_to_leaf(tt, mm) leaves.append(numpy.array(res, dtype=numpy.int64)) diff --git a/tests/test_utils/utils_backend.py b/tests/test_utils/utils_backend.py index 841b6f4c0..1e91c38b3 100644 --- a/tests/test_utils/utils_backend.py +++ b/tests/test_utils/utils_backend.py @@ -75,9 +75,7 @@ def is_backend_enabled(backend): return False if backend == "onnx": return onnx_opset_version() >= 18 - raise NotImplementedError( - "Not implemented for backend '{0}'".format(backend) - ) + raise NotImplementedError("Not implemented for backend '{0}'".format(backend)) def compare_backend( @@ -279,17 +277,12 @@ def compare_outputs(expected, output, verbose=False, **kwargs): kwargs["decimal"] = min(kwargs["decimal"], 2) if Dec1: kwargs["decimal"] = min(kwargs["decimal"], 1) - if isinstance(expected, numpy.ndarray) and isinstance( - output, numpy.ndarray - ): + if isinstance(expected, numpy.ndarray) and isinstance(output, numpy.ndarray): if SkipDim1: # Arrays like (2, 1, 2, 3) becomes (2, 2, 3) # as one dimension is useless. - expected = expected.reshape( - tuple([d for d in expected.shape if d > 1]) - ) - output = output.reshape( - tuple([d for d in expected.shape if d > 1])) + expected = expected.reshape(tuple([d for d in expected.shape if d > 1])) + output = output.reshape(tuple([d for d in expected.shape if d > 1])) if NoProb or NoProbOpp: # One vector is (N,) with scores, negative for class 0 # positive for class 1 @@ -315,21 +308,11 @@ def compare_outputs(expected, output, verbose=False, **kwargs): output = -output elif expected.shape != output.shape: raise NotImplementedError( - "Shape mismatch: {0} != {1}".format( - expected.shape, output.shape - ) + "Shape mismatch: {0} != {1}".format(expected.shape, output.shape) ) - if ( - len(expected.shape) == 1 - and len(output.shape) == 2 - and output.shape[1] == 1 - ): + if len(expected.shape) == 1 and len(output.shape) == 2 and output.shape[1] == 1: output = output.ravel() - if ( - len(output.shape) == 3 - and output.shape[0] == 1 - and len(expected.shape) == 2 - ): + if len(output.shape) == 3 and output.shape[0] == 1 and len(expected.shape) == 2: output = output.reshape(output.shape[1:]) if expected.dtype in ( numpy.str_, @@ -346,14 +329,10 @@ def compare_outputs(expected, output, verbose=False, **kwargs): return OnnxRuntimeAssertionError(str(e)) else: try: - assert_array_almost_equal( - expected, output, verbose=verbose, **kwargs - ) + assert_array_almost_equal(expected, output, verbose=verbose, **kwargs) except Exception as e: longer = ( - "\n--EXPECTED--\n{0}\n--OUTPUT--\n{1}".format( - expected, output - ) + "\n--EXPECTED--\n{0}\n--OUTPUT--\n{1}".format(expected, output) if verbose else "" ) @@ -364,8 +343,7 @@ def compare_outputs(expected, output, verbose=False, **kwargs): diff = numpy.abs(expected_ - output_).max() else: diff = max( - (1 if ci != cj else 0) - for ci, cj in zip(expected_, output_) + (1 if ci != cj else 0) for ci, cj in zip(expected_, output_) ) if diff == 0: return None diff --git a/tests/test_utils/utils_backend_onnx.py b/tests/test_utils/utils_backend_onnx.py index 1df2ae22f..ff9cb5763 100644 --- a/tests/test_utils/utils_backend_onnx.py +++ b/tests/test_utils/utils_backend_onnx.py @@ -13,6 +13,7 @@ from onnx import AttributeProto, numpy_helper import onnx as onnx_package from onnx.defs import onnx_opset_version + try: from onnx.helper import tensor_dtype_to_string except ImportError: @@ -20,7 +21,8 @@ from skl2onnx.helpers.onnx_helper import ( select_model_inputs_outputs, enumerate_model_node_outputs, - enumerate_model_initializers) + enumerate_model_initializers, +) from skl2onnx.algebra.type_helper import _guess_type from scipy.spatial.distance import cdist from .utils_backend import ( @@ -29,7 +31,8 @@ ExpectedAssertionError, OnnxRuntimeAssertionError, OnnxRuntimeMissingNewOnnxOperatorException, - compare_outputs) + compare_outputs, +) if onnx_opset_version() >= 18: @@ -58,8 +61,7 @@ def _run(self, x, y, metric="euclidean"): from onnx.reference.op_run import RuntimeContextError from onnx.reference.ops.op_argmin import _ArgMin, _argmin from onnx.reference.ops.op_argmax import _ArgMax, _argmax - from onnx.reference.ops.op_reduce_log_sum_exp import ( - compute_log_sum_exp) + from onnx.reference.ops.op_reduce_log_sum_exp import compute_log_sum_exp from onnx.reference.ops.op_scan import Scan as _Scan from .reference_implementation_ml import ( Binarizer, @@ -78,35 +80,30 @@ def _run(self, x, y, metric="euclidean"): from .reference_implementation_afe import ArrayFeatureExtractor from .reference_implementation_tree import ( TreeEnsembleClassifier, - TreeEnsembleRegressor) - from .reference_implementation_svm import ( - SVMClassifier, - SVMRegressor) + TreeEnsembleRegressor, + ) + from .reference_implementation_svm import SVMClassifier, SVMRegressor from .reference_implementation_text import TfIdfVectorizer class ArgMin(_ArgMin): - def _run(self, data, axis=None, keepdims=None, - select_last_index=None): + def _run(self, data, axis=None, keepdims=None, select_last_index=None): if select_last_index == 0: if keepdims == 0: - return _ArgMin._run( - self, data, axis=axis, keepdims=keepdims) + return _ArgMin._run(self, data, axis=axis, keepdims=keepdims) return (_argmin(data, axis=axis, keepdims=keepdims),) raise NotImplementedError("Unused in sklearn-onnx.") class ArgMax(_ArgMax): - def _run(self, data, axis=None, keepdims=None, - select_last_index=None): + def _run(self, data, axis=None, keepdims=None, select_last_index=None): if select_last_index == 0: if keepdims == 0: - return _ArgMax._run( - self, data, axis=axis, keepdims=keepdims) + return _ArgMax._run(self, data, axis=axis, keepdims=keepdims) try: return (_argmax(data, axis=axis, keepdims=keepdims),) except Exception as e: raise RuntimeError( - f"Issue with shape={data.shape} " - f"and axis={axis}.") from e + f"Issue with shape={data.shape} " f"and axis={axis}." + ) from e raise NotImplementedError("Unused in sklearn-onnx.") class ReduceLogSumExp_1(OpRunReduceNumpy): @@ -115,8 +112,7 @@ def _run(self, data, axes=None, keepdims=None, **kwargs): return compute_log_sum_exp(data, tax, keepdims) class ReduceLogSumExp_18(OpRunReduceNumpy): - def _run(self, data, axes=None, keepdims=None, - noop_with_empty_axes=None): + def _run(self, data, axes=None, keepdims=None, noop_with_empty_axes=None): assert noop_with_empty_axes != 1 tax = tuple(axes) if axes is not None else None return compute_log_sum_exp(data, tax, keepdims) @@ -126,91 +122,88 @@ def _run(self, data, axes=None, keepdims=1, **kwargs): axes = tuple(axes) if axes is not None else None keepdims = keepdims != 0 # type: ignore return ( - np.sqrt(np.sum(np.square(data), axis=axes, - keepdims=keepdims)).astype( - dtype=data.dtype),) + np.sqrt( + np.sum(np.square(data), axis=axes, keepdims=keepdims) + ).astype(dtype=data.dtype), + ) class ReduceL2_18(OpRunReduceNumpy): - def _run(self, data, axes=None, keepdims=None, - noop_with_empty_axes=None): + def _run(self, data, axes=None, keepdims=None, noop_with_empty_axes=None): assert noop_with_empty_axes != 1 axes = tuple(axes) if axes is not None else None keepdims = keepdims != 0 # type: ignore return ( - np.sqrt(np.sum(np.square(data), axis=axes, - keepdims=keepdims)).astype( - dtype=data.dtype),) + np.sqrt( + np.sum(np.square(data), axis=axes, keepdims=keepdims) + ).astype(dtype=data.dtype), + ) class ReduceMean_1(OpRunReduceNumpy): def _run(self, data, axes=None, keepdims=None, **kwargs): axes = tuple(axes) if axes is not None else None keepdims = keepdims != 0 # type: ignore - return (np.mean(data, axis=axes, - keepdims=keepdims).astype(data.dtype),) + return (np.mean(data, axis=axes, keepdims=keepdims).astype(data.dtype),) class ReduceMean_18(OpRunReduceNumpy): - def _run(self, data, axes=None, keepdims=None, - noop_with_empty_axes=None): + def _run(self, data, axes=None, keepdims=None, noop_with_empty_axes=None): assert noop_with_empty_axes != 1 axes = tuple(axes) if axes is not None else None keepdims = keepdims != 0 # type: ignore - return (np.mean(data, axis=axes, - keepdims=keepdims).astype(data.dtype),) + return (np.mean(data, axis=axes, keepdims=keepdims).astype(data.dtype),) class ReduceMax_1(OpRunReduceNumpy): def _run(self, data, axes=None, keepdims=None, **kwargs): axes = tuple(axes) if axes is not None else None keepdims = keepdims != 0 # type: ignore - return (np.max(data, axis=axes, - keepdims=keepdims).astype(data.dtype),) + return (np.max(data, axis=axes, keepdims=keepdims).astype(data.dtype),) class ReduceMax_18(OpRunReduceNumpy): - def _run(self, data, axes=None, keepdims=None, - noop_with_empty_axes=None): + def _run(self, data, axes=None, keepdims=None, noop_with_empty_axes=None): assert noop_with_empty_axes != 1 axes = tuple(axes) if axes is not None else None keepdims = keepdims != 0 # type: ignore - return (np.max(data, axis=axes, - keepdims=keepdims).astype(data.dtype),) + return (np.max(data, axis=axes, keepdims=keepdims).astype(data.dtype),) class ReduceProd_1(OpRunReduceNumpy): def _run(self, data, axes=None, keepdims=None, **kwargs): axes = tuple(axes) if axes is not None else None keepdims = keepdims != 0 # type: ignore - return (np.prod(data, axis=axes, - keepdims=keepdims).astype(data.dtype),) + return (np.prod(data, axis=axes, keepdims=keepdims).astype(data.dtype),) class ReduceProd_18(OpRunReduceNumpy): - def _run(self, data, axes=None, keepdims=None, - noop_with_empty_axes=None): + def _run(self, data, axes=None, keepdims=None, noop_with_empty_axes=None): assert noop_with_empty_axes != 1 axes = tuple(axes) if axes is not None else None keepdims = keepdims != 0 # type: ignore - return (np.prod(data, axis=axes, - keepdims=keepdims).astype(data.dtype),) + return (np.prod(data, axis=axes, keepdims=keepdims).astype(data.dtype),) class ReduceSumSquare_1(OpRunReduceNumpy): def _run(self, data, axes=None, keepdims=None, **kwargs): axes = tuple(axes) if axes is not None else None keepdims = keepdims != 0 # type: ignore - return (np.sum(np.square(data), axis=axes, - keepdims=keepdims).astype(data.dtype),) + return ( + np.sum(np.square(data), axis=axes, keepdims=keepdims).astype( + data.dtype + ), + ) class ReduceSumSquare_18(OpRunReduceNumpy): - def _run(self, data, axes=None, keepdims=None, - noop_with_empty_axes=None): + def _run(self, data, axes=None, keepdims=None, noop_with_empty_axes=None): assert noop_with_empty_axes != 1 axes = tuple(axes) if axes is not None else None keepdims = keepdims != 0 # type: ignore - return (np.sum(np.square(data), axis=axes, - keepdims=keepdims).astype(data.dtype),) + return ( + np.sum(np.square(data), axis=axes, keepdims=keepdims).astype( + data.dtype + ), + ) class ConstantOfShape(OpRun): def __init__(self, onnx_node, run_params): # type: ignore OpRun.__init__(self, onnx_node, run_params) self.cst = ( - self.value[0] if isinstance(self.value, np.ndarray) - else self.value) + self.value[0] if isinstance(self.value, np.ndarray) else self.value + ) if isinstance(self.cst, int): self.cst = np.int64(self.cst) elif isinstance(self.cst, float): @@ -218,8 +211,9 @@ def __init__(self, onnx_node, run_params): # type: ignore elif self.cst is None: self.cst = np.float32(0) if not isinstance( - self.cst, (np.float32, np.float64, np.int64, - np.int32, np.bool_, np.float16)): + self.cst, + (np.float32, np.float64, np.int64, np.int32, np.bool_, np.float16), + ): raise TypeError(f"cst must be a real not {type(self.cst)}") def _run(self, data, value=None): @@ -229,18 +223,21 @@ def _run(self, data, value=None): raise RuntimeError( f"Unable to create a constant of shape {data!r} " f"with value {self.cst!r} " - f"(raw value={value!r}).") from e + f"(raw value={value!r})." + ) from e return (res,) class Where(OpRun): def _run(self, condition, x, y): # type: ignore - if (x.dtype != y.dtype and - x.dtype not in (np.object_,) and - not (x.dtype.type is np.str_ and - y.dtype.type is np.str_)): + if ( + x.dtype != y.dtype + and x.dtype not in (np.object_,) + and not (x.dtype.type is np.str_ and y.dtype.type is np.str_) + ): raise RuntimeError( f"x and y should share the same dtype " - f"{x.dtype} != {y.dtype}") + f"{x.dtype} != {y.dtype}" + ) return (np.where(condition, x, y).astype(x.dtype),) class Scan(_Scan): @@ -251,42 +248,51 @@ def _extract_attribute_value(self, att, ref_att=None): att.g, opsets=self.run_params["opsets"], verbose=max(0, self.run_params.get("verbose", 0) - 2), - new_ops=None if new_ops is None else new_ops.values()) + new_ops=None if new_ops is None else new_ops.values(), + ) return super()._extract_attribute_value(att, ref_att) - additional_implementations.extend([ - # ai.onnx - ArgMax, - ArgMin, - ConstantOfShape, - ReduceL2_1, ReduceL2_18, - ReduceLogSumExp_1, ReduceLogSumExp_18, - ReduceMax_1, ReduceMax_18, - ReduceMean_1, ReduceMean_18, - ReduceProd_1, ReduceProd_18, - ReduceSumSquare_1, ReduceSumSquare_18, - Where, - # ai.onnx.ml - ArrayFeatureExtractor, - Binarizer, - DictVectorizer, - FeatureVectorizer, - FusedMatMul, - Imputer, - LabelEncoder, - LinearClassifier, - LinearRegressor, - Normalizer, - OneHotEncoder, - TfIdfVectorizer, - TreeEnsembleClassifier, - TreeEnsembleRegressor, - Scaler, - Scan, - SVMClassifier, - SVMRegressor, - ZipMap, - ]) + additional_implementations.extend( + [ + # ai.onnx + ArgMax, + ArgMin, + ConstantOfShape, + ReduceL2_1, + ReduceL2_18, + ReduceLogSumExp_1, + ReduceLogSumExp_18, + ReduceMax_1, + ReduceMax_18, + ReduceMean_1, + ReduceMean_18, + ReduceProd_1, + ReduceProd_18, + ReduceSumSquare_1, + ReduceSumSquare_18, + Where, + # ai.onnx.ml + ArrayFeatureExtractor, + Binarizer, + DictVectorizer, + FeatureVectorizer, + FusedMatMul, + Imputer, + LabelEncoder, + LinearClassifier, + LinearRegressor, + Normalizer, + OneHotEncoder, + TfIdfVectorizer, + TreeEnsembleClassifier, + TreeEnsembleRegressor, + Scaler, + Scan, + SVMClassifier, + SVMRegressor, + ZipMap, + ] + ) class ReferenceEvaluatorEx(ReferenceEvaluator): def __init__(self, *args, new_ops=None, **kwargs): @@ -303,7 +309,7 @@ def __init__(self, *args, new_ops=None, **kwargs): raise TypeError(f"Not implemented for {type(args[0])}.") main_domain = None for dom in model.opset_import: - if dom.domain == '': + if dom.domain == "": main_domain = dom.version if main_domain is None: main_domain = 1 @@ -316,7 +322,7 @@ def __init__(self, *args, new_ops=None, **kwargs): new_new_ops = [] many = {} for op in new_ops: - if op.op_domain != '': + if op.op_domain != "": new_new_ops.append(op) continue name = op.__name__ @@ -401,22 +407,24 @@ def _log_arg(self, a): elements = a.ravel().tolist() if len(elements) > 5: elements = elements[:5] - return ( - f"{a.dtype}:{a.shape}:" - f"{','.join(map(str, elements))}...") + return f"{a.dtype}:{a.shape}:" f"{','.join(map(str, elements))}..." return f"{a.dtype}:{a.shape}:{elements}" if hasattr(a, "append"): return ", ".join(map(self._log_arg, a)) return a def get_inputs(self): - res = [InputDef(n, list(get_shape(t, True)), get_type(t)) - for n, t in zip(self.input_names, self.input_types)] + res = [ + InputDef(n, list(get_shape(t, True)), get_type(t)) + for n, t in zip(self.input_names, self.input_types) + ] return res def get_outputs(self): - res = [InputDef(n, list(get_shape(t, True)), get_type(t)) - for n, t in zip(self.output_names, self.output_types)] + res = [ + InputDef(n, list(get_shape(t, True)), get_type(t)) + for n, t in zip(self.output_names, self.output_types) + ] return res def run(self, *args, **kwargs): @@ -431,20 +439,23 @@ def replay_run(self, verbose=10): args, kwargs = self.last_inputs with contextlib.redirect_stdout(st): self.run(*args, **kwargs) - classes = [st.getvalue(), - "--", - f"main_domain={self._main_domain}", - "--", - "\n".join(sorted(map(str, self._new_ops))), - "--", - "\n".join(map(str, self._opset_import)), - "--"] + classes = [ + st.getvalue(), + "--", + f"main_domain={self._main_domain}", + "--", + "\n".join(sorted(map(str, self._new_ops))), + "--", + "\n".join(map(str, self._opset_import)), + "--", + ] for rt in self.rt_nodes_: classes.append(str(type(rt))) if hasattr(rt, "body"): for rt2 in rt.body.rt_nodes_: classes.append(f" {str(type(rt2))}") return "\n".join(classes) + else: ReferenceEvaluatorEx = None @@ -491,8 +502,7 @@ def __init__(self, name, shape, dtype): def get_shape(t, use_none=False): if t.tensor_type: - dims = [getattr(d, 'dim_value', None) - for d in t.tensor_type.shape.dim] + dims = [getattr(d, "dim_value", None) for d in t.tensor_type.shape.dim] if use_none: return tuple(r if r != 0 else None for r in dims) return tuple(dims) @@ -506,33 +516,35 @@ def get_type(t): else: res = tensor_dtype_to_string(t.tensor_type.elem_type) maps = { - 'TensorProto.STRING': 'tensor(string)', - 'TensorProto.INT64': 'tensor(int64)', - 'TensorProto.INT32': 'tensor(int32)', - 'TensorProto.DOUBLE': 'tensor(double)', - 'TensorProto.FLOAT': 'tensor(float)', - 'TensorProto.BOOL': 'tensor(bool)', + "TensorProto.STRING": "tensor(string)", + "TensorProto.INT64": "tensor(int64)", + "TensorProto.INT32": "tensor(int32)", + "TensorProto.DOUBLE": "tensor(double)", + "TensorProto.FLOAT": "tensor(float)", + "TensorProto.BOOL": "tensor(bool)", } return maps[res] return None def get_inputs(sess): - return [InputDef(n, get_shape(t), get_type(t)) - for n, t in zip(sess.input_names, - sess.input_types)] + return [ + InputDef(n, get_shape(t), get_type(t)) + for n, t in zip(sess.input_names, sess.input_types) + ] def compare_runtime( - test, - decimal=5, - options=None, - verbose=0, - context=None, - comparable_outputs=None, - intermediate_steps=False, - classes=None, - disable_optimisation=False): + test, + decimal=5, + options=None, + verbose=0, + context=None, + comparable_outputs=None, + intermediate_steps=False, + classes=None, + disable_optimisation=False, +): """ The function compares the expected output (computed with the model before being converted to ONNX) and the ONNX output @@ -586,24 +598,29 @@ def compare_runtime( _display_intermediate_steps(onx, None, disable_optimisation) if verbose: import onnx + model = onnx.load(onx) smodel = "\nJSON ONNX\n" + str(model) else: smodel = "" - if ("NOT_IMPLEMENTED : Could not find an implementation " - "for the node" in str(e)): + if "NOT_IMPLEMENTED : Could not find an implementation " "for the node" in str( + e + ): # onnxruntime does not implement a specific node yet. raise OnnxRuntimeMissingNewOnnxOperatorException( "ReferenceEvaluator does not implement a new operator " - "'{0}'\n{1}\nONNX\n{2}".format(onx, e, smodel)) + "'{0}'\n{1}\nONNX\n{2}".format(onx, e, smodel) + ) if "is not a registered function/op" in str(e): content = onnx_package.load(onx) raise OnnxRuntimeAssertionError( "Missing op? '{0}'\nONNX\n{1}\n{2}\n---\n{3}".format( - onx, smodel, e, content)) + onx, smodel, e, content + ) + ) raise OnnxRuntimeAssertionError( - "Unable to load onnx '{0}'\nONNX\n{1}\n{2}" - ".".format(onx, smodel, e)) + "Unable to load onnx '{0}'\nONNX\n{1}\n{2}" ".".format(onx, smodel, e) + ) input = load["data"] DF = options.pop("DF", False) @@ -624,24 +641,26 @@ def compare_runtime( inputs = {inp[0].name: input} elif isinstance(input, np.ndarray): shape = sum( - i.shape[1] if len(i.shape) == 2 - else i.shape[0] for i in inp) + i.shape[1] if len(i.shape) == 2 else i.shape[0] for i in inp + ) if shape == input.shape[1]: inputs = {n.name: input[:, i] for i, n in enumerate(inp)} else: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0} != " "original shape {1}, onnx='{2}'".format( - len(inp), input.shape, onx)) + len(inp), input.shape, onx + ) + ) elif isinstance(input, list): try: array_input = np.array(input) except Exception: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0} != " - "original {1}, onnx='{2}'".format( - len(inp), len(input), onx)) - if hasattr(inp[0], 'shape'): + "original {1}, onnx='{2}'".format(len(inp), len(input), onx) + ) + if hasattr(inp[0], "shape"): shape = sum(i.shape[1] for i in inp) if shape == array_input.shape[1]: inputs = {} @@ -649,13 +668,16 @@ def compare_runtime( for i, n in enumerate(inp): d = c + n.shape[1] inputs[n.name] = _create_column( - [row[c:d] for row in input], n.type) + [row[c:d] for row in input], n.type + ) c = d else: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0} != " "original shape {1}, onnx='{2}'*".format( - len(inp), array_input.shape, onx)) + len(inp), array_input.shape, onx + ) + ) else: array_input = array_input.reshape((-1, len(inp))) inputs = {i.name: r for i, r in zip(inp, array_input.T)} @@ -665,33 +687,35 @@ def compare_runtime( except Exception: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0} != " - "original {1}, onnx='{2}'".format( - len(inp), len(input), onx)) - if hasattr(inp[0], 'shape'): + "original {1}, onnx='{2}'".format(len(inp), len(input), onx) + ) + if hasattr(inp[0], "shape"): shape = sum(i.shape[1] for i in inp) if shape == array_input.shape[1]: inputs = {} c = 0 for i, n in enumerate(inp): d = c + n.shape[1] - inputs[n.name] = _create_column( - input.iloc[:, c:d], n.type) + inputs[n.name] = _create_column(input.iloc[:, c:d], n.type) c = d else: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0}={1} columns != " "original shape {2}, onnx='{3}'*".format( - len(inp), shape, array_input.shape, onx)) + len(inp), shape, array_input.shape, onx + ) + ) else: array_input = array_input.reshape((-1, len(inp))) inputs = {i.name: r for i, r in zip(inp, array_input.T)} else: raise OnnxRuntimeAssertionError( - "Wrong type of inputs onnx {0}, onnx='{1}'".format( - type(input), onx)) + "Wrong type of inputs onnx {0}, onnx='{1}'".format(type(input), onx) + ) else: raise OnnxRuntimeAssertionError( - "Dict or list is expected, not {0}".format(type(input))) + "Dict or list is expected, not {0}".format(type(input)) + ) for k in inputs: if isinstance(inputs[k], list): @@ -704,8 +728,8 @@ def compare_runtime( if verbose: print( "[compare_runtime] OneOff: type(inputs)={} " - "len={} OneOffArray={}".format( - type(input), len(inputs), OneOffArray)) + "len={} OneOffArray={}".format(type(input), len(inputs), OneOffArray) + ) if len(inputs) == 1 and not OneOffArray: name, values = list(inputs.items())[0] res = [] @@ -713,28 +737,31 @@ def compare_runtime( try: one = sess.run(None, {name: input}) if lambda_onnx is None: - lambda_onnx = ( - lambda sess=sess, input=input: sess.run( # noqa - None, {name: input})) + lambda_onnx = lambda sess=sess, input=input: sess.run( # noqa + None, {name: input} + ) if verbose: import pprint + pprint.pprint(one) except ExpectedAssertionError as expe: raise expe except Exception as e: if intermediate_steps: _display_intermediate_steps( - onx, {name: input}, disable_optimisation) - if hasattr(sess, 'replay_run'): + onx, {name: input}, disable_optimisation + ) + if hasattr(sess, "replay_run"): # ReferenceEvaluator res = sess.replay_run() raise OnnxRuntimeAssertionError( - f"Unable to run model\n---\n{res}\n----\n{e}") + f"Unable to run model\n---\n{res}\n----\n{e}" + ) if verbose: raise OnnxRuntimeAssertionError( - f"Unable to run model due to {e}\n{onx}") - raise OnnxRuntimeAssertionError( - f"Unable to run onnx model {e}") + f"Unable to run model due to {e}\n{onx}" + ) + raise OnnxRuntimeAssertionError(f"Unable to run onnx model {e}") res.append(one) if verbose: @@ -743,8 +770,7 @@ def compare_runtime( else: def to_array(vv): - if isinstance( - vv, (np.ndarray, np.int64, np.float32, str)): + if isinstance(vv, (np.ndarray, np.int64, np.float32, str)): return np.array([vv]) return np.array([vv], dtype=np.float32) @@ -755,18 +781,18 @@ def to_array(vv): try: one = sess.run(None, iii) if lambda_onnx is None: - lambda_onnx = ( - lambda sess=sess, iii=iii: sess.run( # noqa - None, iii)) + lambda_onnx = lambda sess=sess, iii=iii: sess.run( # noqa + None, iii + ) if verbose: import pprint + pprint.pprint(one) except ExpectedAssertionError as expe: raise expe except Exception as e: if intermediate_steps: - _display_intermediate_steps( - onx, iii, disable_optimisation) + _display_intermediate_steps(onx, iii, disable_optimisation) if verbose: import onnx @@ -774,16 +800,17 @@ def to_array(vv): smodel = "\nJSON ONNX\n" + str(model) else: smodel = "" - if hasattr(sess, 'replay_run'): + if hasattr(sess, "replay_run"): # ReferenceEvaluator res = sess.replay_run() raise OnnxRuntimeAssertionError( - f"Unable to run\n---\n{res}\n----\n{e}") + f"Unable to run\n---\n{res}\n----\n{e}" + ) if verbose: raise OnnxRuntimeAssertionError( - f"Unable to run model due to {e}{smodel}") - raise OnnxRuntimeAssertionError( - f"Unable to run model due to {e}") + f"Unable to run model due to {e}{smodel}" + ) + raise OnnxRuntimeAssertionError(f"Unable to run model due to {e}") res.append(one) if verbose: print("[compare_runtime] OneOff: _post_process_output2") @@ -794,14 +821,17 @@ def to_array(vv): pass elif not isinstance(output, np.ndarray): raise TypeError( - "output must be an array, not {}".format(type(output))) + "output must be an array, not {}".format(type(output)) + ) else: output = [output] else: if verbose: print( "[compare_runtime] type(inputs)={} len={} names={}".format( - type(input), len(inputs), list(sorted(inputs)))) + type(input), len(inputs), list(sorted(inputs)) + ) + ) try: output = sess.run(None, inputs) @@ -810,6 +840,7 @@ def lambda_onnx(): if verbose: import pprint + pprint.pprint(output) except ExpectedAssertionError as expe: raise expe @@ -818,31 +849,33 @@ def lambda_onnx(): _display_intermediate_steps(onx, inputs, disable_optimisation) if "-Fail" in onx: raise ExpectedAssertionError( - "onnxruntime cannot compute the " - "prediction for '{0}'".format(onx)) + "onnxruntime cannot compute the " "prediction for '{0}'".format(onx) + ) else: if verbose: import onnx + model = onnx.load(onx) smodel = "\nJSON ONNX\n" + str(model) else: smodel = "" - ops = "\n".join(map(lambda x: str(x.__class__), - sess.rt_nodes_)) + ops = "\n".join(map(lambda x: str(x.__class__), sess.rt_nodes_)) raise OnnxRuntimeAssertionError( f"ReferenceEvaluator cannot compute the prediction" - f" for {onx!r} due to {e}\nops={ops}\n{smodel}") + f" for {onx!r} due to {e}\nops={ops}\n{smodel}" + ) except Exception as e: - if hasattr(sess, 'replay_run'): + if hasattr(sess, "replay_run"): # ReferenceEvaluator res = sess.replay_run() raise OnnxRuntimeAssertionError( - f"Unable to run model\n---\n{res}\n----\n{e}") + f"Unable to run model\n---\n{res}\n----\n{e}" + ) if verbose: raise OnnxRuntimeAssertionError( - f"Unable to run model due to {e}\n{onx}") - raise OnnxRuntimeAssertionError( - f"Unable to run model due to {e}") + f"Unable to run model due to {e}\n{onx}" + ) + raise OnnxRuntimeAssertionError(f"Unable to run model due to {e}") if verbose: print("[compare_runtime] done type={}".format(type(output))) @@ -864,7 +897,8 @@ def lambda_onnx(): decimal=decimal, verbose=verbose, classes=classes, - **options) + **options, + ) except OnnxRuntimeAssertionError as de: if isinstance(onx, str): import onnx @@ -893,7 +927,8 @@ def lambda_onnx(): smodel = "" raise OnnxRuntimeAssertionError( "Model '{0}' has discrepencies with backend=" - "'onnx'.\n{1}: {2}{3}".format(onx, type(e), e, smodel)) + "'onnx'.\n{1}: {2}{3}".format(onx, type(e), e, smodel) + ) return output0, lambda_onnx @@ -919,7 +954,8 @@ def _post_process_output(res): if mi != max(ls): raise NotImplementedError( "Unable to postprocess various number of " - "outputs in [{0}, {1}]".format(min(ls), max(ls))) + "outputs in [{0}, {1}]".format(min(ls), max(ls)) + ) if mi > 1: output = [] for i in range(mi): @@ -935,7 +971,8 @@ def _post_process_output(res): return res if len(res[0]) != 1: raise NotImplementedError( - "Not conversion implemented for {0}".format(res)) + "Not conversion implemented for {0}".format(res) + ) st = [r[0] for r in res] return np.vstack(st) return res @@ -951,18 +988,13 @@ def _create_column(values, dtype): if str(dtype) == "tensor(string)": return np.array(values, dtype=np.str_) raise OnnxRuntimeAssertionError( - "Unable to create one column from dtype '{0}'".format(dtype)) + "Unable to create one column from dtype '{0}'".format(dtype) + ) def _compare_expected( - expected, - output, - sess, - onnx, - decimal=5, - verbose=False, - classes=None, - **kwargs): + expected, output, sess, onnx, decimal=5, verbose=False, classes=None, **kwargs +): """ Compares the expected output against the runtime outputs. This is specific to *ReferenceEvaluator* due to variable *sess* @@ -983,12 +1015,13 @@ def _compare_expected( del kwargs["Reshape"] output = np.hstack(output).ravel() output = output.reshape( - (len(expected), len(output.ravel()) // len(expected))) + (len(expected), len(output.ravel()) // len(expected)) + ) if len(expected) != len(output): raise OnnxRuntimeAssertionError( "Unexpected number of outputs '{0}', " - "expected={1}, got={2}".format( - onnx, len(expected), len(output))) + "expected={1}, got={2}".format(onnx, len(expected), len(output)) + ) for exp, out in zip(expected, output): _compare_expected( exp, @@ -998,37 +1031,38 @@ def _compare_expected( decimal=5, verbose=verbose, classes=classes, - **kwargs) + **kwargs, + ) tested += 1 else: raise OnnxRuntimeAssertionError( - "Type mismatch for '{0}', output type is {1}".format( - onnx, type(output))) + "Type mismatch for '{0}', output type is {1}".format(onnx, type(output)) + ) elif isinstance(expected, dict): if not isinstance(output, dict): - raise OnnxRuntimeAssertionError( - "Type mismatch for '{0}'".format(onnx)) + raise OnnxRuntimeAssertionError("Type mismatch for '{0}'".format(onnx)) for k, v in output.items(): if k not in expected: continue msg = compare_outputs( - expected[k], v, decimal=decimal, verbose=verbose, **kwargs) + expected[k], v, decimal=decimal, verbose=verbose, **kwargs + ) if msg: - if hasattr(sess, 'replay_run'): + if hasattr(sess, "replay_run"): # ReferenceEvaluator res = sess.replay_run() raise OnnxRuntimeAssertionError( - f"Unexpected output '{k}'\n---\n{res}\n----\n{msg}") + f"Unexpected output '{k}'\n---\n{res}\n----\n{msg}" + ) elif verbose: raise OnnxRuntimeAssertionError( - f"Unexpected output {k!r} in model {onnx}\n{msg}") - raise OnnxRuntimeAssertionError( - f"Unexpected output {k!r}\n{msg}") + f"Unexpected output {k!r} in model {onnx}\n{msg}" + ) + raise OnnxRuntimeAssertionError(f"Unexpected output {k!r}\n{msg}") tested += 1 elif isinstance(expected, np.ndarray): if isinstance(output, list): - if (expected.shape[0] == len(output) and - isinstance(output[0], dict)): + if expected.shape[0] == len(output) and isinstance(output[0], dict): import pandas output = pandas.DataFrame(output) @@ -1041,36 +1075,43 @@ def _compare_expected( ex = ex[:170] + "..." raise OnnxRuntimeAssertionError( "More than one output when 1 is expected " - "for onnx '{0}'\n{1}".format(onnx, ex)) + "for onnx '{0}'\n{1}".format(onnx, ex) + ) output = output[-1] if not isinstance(output, np.ndarray): raise OnnxRuntimeAssertionError( "output must be an array for onnx '{0}' not {1}".format( - onnx, type(output))) - if (classes is not None and ( - expected.dtype == np.str_ or - expected.dtype.char == "U")): + onnx, type(output) + ) + ) + if classes is not None and ( + expected.dtype == np.str_ or expected.dtype.char == "U" + ): try: output = np.array([classes[cl] for cl in output]) except IndexError as e: raise RuntimeError( - "Unable to handle\n{}\n{}\n{}".format( - expected, output, classes)) from e + "Unable to handle\n{}\n{}\n{}".format(expected, output, classes) + ) from e msg = compare_outputs( - expected, output, decimal=decimal, verbose=verbose, **kwargs) + expected, output, decimal=decimal, verbose=verbose, **kwargs + ) if isinstance(msg, ExpectedAssertionError): raise msg if msg: - if hasattr(sess, 'replay_run'): + if hasattr(sess, "replay_run"): # ReferenceEvaluator res = sess.replay_run() raise OnnxRuntimeAssertionError( - f"Unexpected output\n---\n{res}\n----\n{msg}") + f"Unexpected output\n---\n{res}\n----\n{msg}" + ) elif verbose: raise OnnxRuntimeAssertionError( - f"Unexpected output in model {onnx}\n{msg}") + f"Unexpected output in model {onnx}\n{msg}" + ) raise OnnxRuntimeAssertionError( - f"Unexpected output ({type(sess)} - {dir(sess)})\n{msg}") + f"Unexpected output ({type(sess)} - {dir(sess)})\n{msg}" + ) tested += 1 else: from scipy.sparse import csr_matrix @@ -1080,21 +1121,25 @@ def _compare_expected( one_array = np.array(output) dense = np.asarray(expected.todense()) msg = compare_outputs( - dense, one_array, decimal=decimal, verbose=verbose, **kwargs) + dense, one_array, decimal=decimal, verbose=verbose, **kwargs + ) if msg: - if hasattr(sess, 'replay_run'): + if hasattr(sess, "replay_run"): # ReferenceEvaluator res = sess.replay_run() raise OnnxRuntimeAssertionError( - f"Unexpected output\n---\n{res}\n----\n{msg}") + f"Unexpected output\n---\n{res}\n----\n{msg}" + ) elif verbose: raise OnnxRuntimeAssertionError( - f"Unexpected output in model '{onnx}'\n{msg}") + f"Unexpected output in model '{onnx}'\n{msg}" + ) raise OnnxRuntimeAssertionError(f"Unexpected output\n{msg}") tested += 1 else: raise OnnxRuntimeAssertionError( "Unexpected type for expected output ({1}) " - "and onnx '{0}'".format(onnx, type(expected))) + "and onnx '{0}'".format(onnx, type(expected)) + ) if tested == 0: raise OnnxRuntimeAssertionError("No test for onnx '{0}'".format(onnx)) diff --git a/tests/test_utils/utils_backend_onnxruntime.py b/tests/test_utils/utils_backend_onnxruntime.py index a191ed7b1..38150a6f3 100644 --- a/tests/test_utils/utils_backend_onnxruntime.py +++ b/tests/test_utils/utils_backend_onnxruntime.py @@ -40,9 +40,7 @@ def _display_intermediate_steps(model_onnx, inputs, disable_optimisation): print("-") print("OUTPUT: {} from {}".format(out, node.name)) step = select_model_inputs_outputs(model_onnx, out) - if disable_optimisation and hasattr( - onnxruntime, "GraphOptimizationLevel" - ): + if disable_optimisation and hasattr(onnxruntime, "GraphOptimizationLevel"): opts = onnxruntime.SessionOptions() opts.graph_optimization_level = ( onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL @@ -53,13 +51,15 @@ def _display_intermediate_steps(model_onnx, inputs, disable_optimisation): step_sess = onnxruntime.InferenceSession( step.SerializeToString(), sess_options=opts, - providers=["CPUExecutionProvider"]) + providers=["CPUExecutionProvider"], + ) except Exception as e: if "support for domain ai.onnx is till opset 17" in str(e): return raise RuntimeError( "Unable to load ONNX model with onnxruntime. " - "Last added node is:\n{}".format(node)) from e + "Last added node is:\n{}".format(node) + ) from e for o in step_sess.get_inputs(): print("IN :", o) for o in step_sess.get_outputs(): @@ -145,13 +145,15 @@ def compare_runtime( onx = onx.SerializeToString() try: sess = onnxruntime.InferenceSession( - onx, sess_options=opts, providers=["CPUExecutionProvider"]) + onx, sess_options=opts, providers=["CPUExecutionProvider"] + ) except ExpectedAssertionError as expe: raise expe except Exception as e: if "CannotLoad" in options: raise ExpectedAssertionError( - "Unable to load onnx '{0}' due to\n{1}".format(onx, e)) + "Unable to load onnx '{0}' due to\n{1}".format(onx, e) + ) else: if intermediate_steps: _display_intermediate_steps(onx, None, disable_optimisation) @@ -162,24 +164,29 @@ def compare_runtime( smodel = "\nJSON ONNX\n" + str(model) else: smodel = "" - if ("NOT_IMPLEMENTED : Could not find an implementation " - "for the node" in str(e)): + if ( + "NOT_IMPLEMENTED : Could not find an implementation " + "for the node" in str(e) + ): # onnxruntime does not implement a specific node yet. raise OnnxRuntimeMissingNewOnnxOperatorException( "onnxruntime does not implement a new operator " - "'{0}'\n{1}\nONNX\n{2}".format(onx, e, smodel)) + "'{0}'\n{1}\nONNX\n{2}".format(onx, e, smodel) + ) if "is not a registered function/op" in str(e): content = onnx_package.load(onx) raise OnnxRuntimeAssertionError( "Missing op? '{0}'\nONNX\n{1}\n{2}\n---\n{3}".format( - onx, smodel, e, content)) + onx, smodel, e, content + ) + ) msg = "Current official support for domain ai.onnx is till opset" if msg in str(e): # ReferenceEvaluator must work on this one. return None, None raise OnnxRuntimeAssertionError( - "Unable to load onnx '{0}'\nONNX\n{1}\n{2}".format( - onx, smodel, e)) + "Unable to load onnx '{0}'\nONNX\n{1}\n{2}".format(onx, smodel, e) + ) input = load["data"] DF = options.pop("DF", False) @@ -200,8 +207,8 @@ def compare_runtime( inputs = {inp[0].name: input} elif isinstance(input, numpy.ndarray): shape = sum( - i.shape[1] if len(i.shape) == 2 - else i.shape[0] for i in inp) + i.shape[1] if len(i.shape) == 2 else i.shape[0] for i in inp + ) if shape == input.shape[1]: inputs = {n.name: input[:, i] for i, n in enumerate(inp)} else: @@ -217,8 +224,8 @@ def compare_runtime( except Exception: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0} != " - "original {1}, onnx='{2}'".format( - len(inp), len(input), onx)) + "original {1}, onnx='{2}'".format(len(inp), len(input), onx) + ) shape = sum(i.shape[1] for i in inp) if shape == array_input.shape[1]: inputs = {} @@ -226,43 +233,47 @@ def compare_runtime( for i, n in enumerate(inp): d = c + n.shape[1] inputs[n.name] = _create_column( - [row[c:d] for row in input], n.type) + [row[c:d] for row in input], n.type + ) c = d else: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0} != " "original shape {1}, onnx='{2}'*".format( - len(inp), array_input.shape, onx)) + len(inp), array_input.shape, onx + ) + ) elif isinstance(input, pandas.DataFrame): try: array_input = numpy.array(input) except Exception: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0} != " - "original {1}, onnx='{2}'".format( - len(inp), len(input), onx)) + "original {1}, onnx='{2}'".format(len(inp), len(input), onx) + ) shape = sum(i.shape[1] for i in inp) if shape == array_input.shape[1]: inputs = {} c = 0 for i, n in enumerate(inp): d = c + n.shape[1] - inputs[n.name] = _create_column( - input.iloc[:, c:d], n.type - ) + inputs[n.name] = _create_column(input.iloc[:, c:d], n.type) c = d else: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0}={1} columns != " "original shape {2}, onnx='{3}'*".format( - len(inp), shape, array_input.shape, onx)) + len(inp), shape, array_input.shape, onx + ) + ) else: raise OnnxRuntimeAssertionError( - "Wrong type of inputs onnx {0}, onnx='{1}'".format( - type(input), onx)) + "Wrong type of inputs onnx {0}, onnx='{1}'".format(type(input), onx) + ) else: raise OnnxRuntimeAssertionError( - "Dict or list is expected, not {0}".format(type(input))) + "Dict or list is expected, not {0}".format(type(input)) + ) for k in inputs: if isinstance(inputs[k], list): @@ -275,8 +286,8 @@ def compare_runtime( if verbose: print( "[compare_runtime] OneOff: type(inputs)={} " - "len={} OneOffArray={}".format( - type(input), len(inputs), OneOffArray)) + "len={} OneOffArray={}".format(type(input), len(inputs), OneOffArray) + ) if len(inputs) == 1 and not OneOffArray: name, values = list(inputs.items())[0] res = [] @@ -297,12 +308,13 @@ def lambda_onnx(): except Exception as e: if intermediate_steps: _display_intermediate_steps( - onx, {name: input}, disable_optimisation) + onx, {name: input}, disable_optimisation + ) if verbose: raise OnnxRuntimeAssertionError( - f"Unable to run model due to {e}\n{onx}") - raise OnnxRuntimeAssertionError( - f"Unable to run model due to {e}") + f"Unable to run model due to {e}\n{onx}" + ) + raise OnnxRuntimeAssertionError(f"Unable to run model due to {e}") res.append(one) if verbose: print("[compare_runtime] OneOff: _post_process_output1") @@ -310,8 +322,7 @@ def lambda_onnx(): else: def to_array(vv): - if isinstance( - vv, (numpy.ndarray, numpy.int64, numpy.float32, str)): + if isinstance(vv, (numpy.ndarray, numpy.int64, numpy.float32, str)): return numpy.array([vv]) else: return numpy.array([vv], dtype=numpy.float32) @@ -335,9 +346,7 @@ def lambda_onnx(): raise expe except Exception as e: if intermediate_steps: - _display_intermediate_steps( - onx, iii, disable_optimisation - ) + _display_intermediate_steps(onx, iii, disable_optimisation) if verbose: import onnx @@ -347,9 +356,11 @@ def lambda_onnx(): smodel = "" if verbose: raise OnnxRuntimeAssertionError( - f"Unable to run onnx due to {e}{smodel}\n{onx}") + f"Unable to run onnx due to {e}{smodel}\n{onx}" + ) raise OnnxRuntimeAssertionError( - f"Unable to run onnx due to {e}{smodel}") + f"Unable to run onnx due to {e}{smodel}" + ) res.append(one) if verbose: print("[compare_runtime] OneOff: _post_process_output2") @@ -360,14 +371,17 @@ def lambda_onnx(): pass elif not isinstance(output, numpy.ndarray): raise TypeError( - "output must be an array, not {}".format(type(output))) + "output must be an array, not {}".format(type(output)) + ) else: output = [output] else: if verbose: print( "[compare_runtime] type(inputs)={} len={} names={}".format( - type(input), len(inputs), list(sorted(inputs)))) + type(input), len(inputs), list(sorted(inputs)) + ) + ) if verbose: run_options = onnxruntime.RunOptions() if hasattr(run_options, "run_log_verbosity_level"): @@ -393,8 +407,8 @@ def lambda_onnx(): _display_intermediate_steps(onx, inputs, disable_optimisation) if "-Fail" in onx: raise ExpectedAssertionError( - "onnxruntime cannot compute the " - "prediction for '{0}'".format(onx)) + "onnxruntime cannot compute the " "prediction for '{0}'".format(onx) + ) else: if verbose: import onnx @@ -405,13 +419,12 @@ def lambda_onnx(): smodel = "" raise OnnxRuntimeAssertionError( "onnxruntime cannot compute the prediction" - " for '{0}' due to {1}{2}".format(onx, e, smodel)) + " for '{0}' due to {1}{2}".format(onx, e, smodel) + ) except Exception as e: if verbose: - raise OnnxRuntimeAssertionError( - f"Unable to run onnx due to {e}\n{onx}") - raise OnnxRuntimeAssertionError( - f"Unable to run onnx due to {e}") + raise OnnxRuntimeAssertionError(f"Unable to run onnx due to {e}\n{onx}") + raise OnnxRuntimeAssertionError(f"Unable to run onnx due to {e}") if verbose: print("[compare_runtime] done type={}".format(type(output))) @@ -433,7 +446,8 @@ def lambda_onnx(): decimal=decimal, verbose=verbose, classes=classes, - **options) + **options, + ) except ExpectedAssertionError as expe: raise expe except Exception as e: @@ -446,7 +460,9 @@ def lambda_onnx(): smodel = "" raise OnnxRuntimeAssertionError( "Model '{0}' has discrepencies.\n{1}: {2}{3}".format( - onx, type(e), e, smodel)) + onx, type(e), e, smodel + ) + ) return output0, lambda_onnx @@ -473,7 +489,8 @@ def _post_process_output(res): if mi != max(ls): raise NotImplementedError( "Unable to postprocess various number of " - "outputs in [{0}, {1}]".format(min(ls), max(ls))) + "outputs in [{0}, {1}]".format(min(ls), max(ls)) + ) if mi > 1: output = [] for i in range(mi): @@ -490,7 +507,8 @@ def _post_process_output(res): else: if len(res[0]) != 1: raise NotImplementedError( - "Not conversion implemented for {0}".format(res)) + "Not conversion implemented for {0}".format(res) + ) st = [r[0] for r in res] return numpy.vstack(st) else: @@ -508,18 +526,13 @@ def _create_column(values, dtype): if str(dtype) == "tensor(string)": return numpy.array(values, dtype=numpy.str_) raise OnnxRuntimeAssertionError( - "Unable to create one column from dtype '{0}'".format(dtype)) + "Unable to create one column from dtype '{0}'".format(dtype) + ) def _compare_expected( - expected, - output, - sess, - onnx, - decimal=5, - verbose=False, - classes=None, - **kwargs): + expected, output, sess, onnx, decimal=5, verbose=False, classes=None, **kwargs +): """ Compares the expected output against the runtime outputs. This is specific to *onnxruntime* due to variable *sess* @@ -540,12 +553,13 @@ def _compare_expected( del kwargs["Reshape"] output = numpy.hstack(output).ravel() output = output.reshape( - (len(expected), len(output.ravel()) // len(expected))) + (len(expected), len(output.ravel()) // len(expected)) + ) if len(expected) != len(output): raise OnnxRuntimeAssertionError( "Unexpected number of outputs '{0}', " - "expected={1}, got={2}".format( - onnx, len(expected), len(output))) + "expected={1}, got={2}".format(onnx, len(expected), len(output)) + ) for exp, out in zip(expected, output): _compare_expected( exp, @@ -555,32 +569,32 @@ def _compare_expected( decimal=5, verbose=verbose, classes=classes, - **kwargs) + **kwargs, + ) tested += 1 else: raise OnnxRuntimeAssertionError( - "Type mismatch for '{0}', output type is {1}".format( - onnx, type(output))) + "Type mismatch for '{0}', output type is {1}".format(onnx, type(output)) + ) elif isinstance(expected, dict): if not isinstance(output, dict): - raise OnnxRuntimeAssertionError( - "Type mismatch for '{0}'".format(onnx)) + raise OnnxRuntimeAssertionError("Type mismatch for '{0}'".format(onnx)) for k, v in output.items(): if k not in expected: continue msg = compare_outputs( - expected[k], v, decimal=decimal, verbose=verbose, **kwargs) + expected[k], v, decimal=decimal, verbose=verbose, **kwargs + ) if msg: if verbose: raise OnnxRuntimeAssertionError( - f"Unexpected output {k!r} in model {onnx}\n{msg}") - raise OnnxRuntimeAssertionError( - f"Unexpected output {k!r}\n{msg}") + f"Unexpected output {k!r} in model {onnx}\n{msg}" + ) + raise OnnxRuntimeAssertionError(f"Unexpected output {k!r}\n{msg}") tested += 1 elif isinstance(expected, numpy.ndarray): if isinstance(output, list): - if (expected.shape[0] == len(output) and - isinstance(output[0], dict)): + if expected.shape[0] == len(output) and isinstance(output[0], dict): import pandas output = pandas.DataFrame(output) @@ -593,31 +607,35 @@ def _compare_expected( ex = ex[:170] + "..." raise OnnxRuntimeAssertionError( "More than one output when 1 is expected " - "for onnx '{0}'\n{1}".format(onnx, ex)) + "for onnx '{0}'\n{1}".format(onnx, ex) + ) output = output[-1] if not isinstance(output, numpy.ndarray): raise OnnxRuntimeAssertionError( "output must be an array for onnx '{0}' not {1}".format( - onnx, type(output))) - if (classes is not None and ( - expected.dtype == numpy.str_ or - expected.dtype.char == "U")): + onnx, type(output) + ) + ) + if classes is not None and ( + expected.dtype == numpy.str_ or expected.dtype.char == "U" + ): try: output = numpy.array([classes[cl] for cl in output]) except IndexError as e: raise RuntimeError( - "Unable to handle\n{}\n{}\n{}".format( - expected, output, classes)) from e + "Unable to handle\n{}\n{}\n{}".format(expected, output, classes) + ) from e msg = compare_outputs( - expected, output, decimal=decimal, verbose=verbose, **kwargs) + expected, output, decimal=decimal, verbose=verbose, **kwargs + ) if isinstance(msg, ExpectedAssertionError): raise msg if msg: if verbose: raise OnnxRuntimeAssertionError( - f"Unexpected output in model {onnx}\n{msg}") - raise OnnxRuntimeAssertionError( - f"Unexpected output\n{msg}") + f"Unexpected output in model {onnx}\n{msg}" + ) + raise OnnxRuntimeAssertionError(f"Unexpected output\n{msg}") tested += 1 else: from scipy.sparse import csr_matrix @@ -627,17 +645,19 @@ def _compare_expected( one_array = numpy.array(output) dense = numpy.asarray(expected.todense()) msg = compare_outputs( - dense, one_array, decimal=decimal, verbose=verbose, **kwargs) + dense, one_array, decimal=decimal, verbose=verbose, **kwargs + ) if msg: if verbose: raise OnnxRuntimeAssertionError( - f"Unexpected output in model {onnx}\n{msg}") - raise OnnxRuntimeAssertionError( - f"Unexpected output\n{msg}") + f"Unexpected output in model {onnx}\n{msg}" + ) + raise OnnxRuntimeAssertionError(f"Unexpected output\n{msg}") tested += 1 else: raise OnnxRuntimeAssertionError( "Unexpected type for expected output ({1}) " - "and onnx '{0}'".format(onnx, type(expected))) + "and onnx '{0}'".format(onnx, type(expected)) + ) if tested == 0: raise OnnxRuntimeAssertionError("No test for onnx '{0}'".format(onnx)) diff --git a/tests/test_utils_sklearn.py b/tests/test_utils_sklearn.py index 5390c6096..be9209334 100644 --- a/tests/test_utils_sklearn.py +++ b/tests/test_utils_sklearn.py @@ -16,6 +16,7 @@ from sklearn.tree import DecisionTreeRegressor from sklearn.pipeline import Pipeline, FeatureUnion from sklearn.preprocessing import OneHotEncoder, StandardScaler, MinMaxScaler + try: from sklearn.ensemble import VotingRegressor except ImportError: @@ -32,29 +33,24 @@ from sklearn.preprocessing import Imputer as SimpleImputer from skl2onnx.common.utils_sklearn import enumerate_model_names from skl2onnx import convert_sklearn, to_onnx -from skl2onnx.common.data_types import ( - FloatTensorType, StringTensorType) -from skl2onnx.common.utils_sklearn import ( - _process_options, _process_pipeline_options) -from test_utils import ( - dump_data_and_model, fit_regression_model, TARGET_OPSET) +from skl2onnx.common.data_types import FloatTensorType, StringTensorType +from skl2onnx.common.utils_sklearn import _process_options, _process_pipeline_options +from test_utils import dump_data_and_model, fit_regression_model, TARGET_OPSET -ort_version = ort_version.split('+')[0] +ort_version = ort_version.split("+")[0] class TestUtilsSklearn(unittest.TestCase): - - @unittest.skipIf(VotingRegressor is None, - reason="new in 0.21") + @unittest.skipIf(VotingRegressor is None, reason="new in 0.21") def test_voting_regression(self): - model = VotingRegressor([ - ('lr', LinearRegression()), - ('dt', DecisionTreeRegressor())]) + model = VotingRegressor( + [("lr", LinearRegression()), ("dt", DecisionTreeRegressor())] + ) model, _ = fit_regression_model(model) names = list(enumerate_model_names(model)) assert len(names) == 3 - assert [_[0] for _ in names] == ['', 'lr', 'dt'] + assert [_[0] for _ in names] == ["", "lr", "dt"] assert all(map(lambda x: isinstance(x, tuple), names)) assert all(map(lambda x: len(x) == 2, names)) @@ -70,62 +66,67 @@ def test_pipeline(self): [[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]], dtype=numpy.float32, ) - model = Pipeline([ - ("scaler1", StandardScaler()), - ( - "union", - FeatureUnion([ - ("scaler2", StandardScaler()), - ("scaler3", MinMaxScaler()), - ]), - ), - ]) + model = Pipeline( + [ + ("scaler1", StandardScaler()), + ( + "union", + FeatureUnion( + [ + ("scaler2", StandardScaler()), + ("scaler3", MinMaxScaler()), + ] + ), + ), + ] + ) model.fit(data) names = list(enumerate_model_names(model)) - assert [_[0] for _ in names] == ['', 'scaler1', 'union', - 'union__scaler2', 'union__scaler3'] + assert [_[0] for _ in names] == [ + "", + "scaler1", + "union", + "union__scaler2", + "union__scaler3", + ] def test_pipeline_lr(self): data = numpy.array( - [[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]], - dtype=numpy.float32) + [[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]], dtype=numpy.float32 + ) yd = numpy.array([0, 1, 0, 2], dtype=numpy.float32) - pipe = Pipeline([ - ('norm', MinMaxScaler()), - ('clr', LogisticRegression()) - ]) + pipe = Pipeline([("norm", MinMaxScaler()), ("clr", LogisticRegression())]) pipe.fit(data, yd) - options = {'clr__raw_scores': True, 'clr__zipmap': False} + options = {"clr__raw_scores": True, "clr__zipmap": False} new_options = _process_options(pipe, options) - exp = {'raw_scores': True, 'zipmap': False} + exp = {"raw_scores": True, "zipmap": False} op = pipe.steps[1][1] self.assertIn(id(op), new_options) self.assertEqual(new_options[id(op)], exp) model_def = to_onnx( - pipe, data, - options={'clr__raw_scores': True, 'clr__zipmap': False}, - target_opset=TARGET_OPSET) + pipe, + data, + options={"clr__raw_scores": True, "clr__zipmap": False}, + target_opset=TARGET_OPSET, + ) sonx = str(model_def) assert "SOFTMAX" not in sonx @unittest.skipIf( - ColumnTransformer is None, - reason="ColumnTransformer not available in 0.19") + ColumnTransformer is None, reason="ColumnTransformer not available in 0.19" + ) @unittest.skipIf( - pv.Version(ort_version) <= pv.Version('0.4.0'), - reason="onnxruntime too old") + pv.Version(ort_version) <= pv.Version("0.4.0"), reason="onnxruntime too old" + ) def test_pipeline_column_transformer(self): - iris = load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) - X_train["vcat"] = X_train["vA"].apply( - lambda x: "cat1" if x > 0.5 else "cat2") - X_train["vcat2"] = X_train["vB"].apply( - lambda x: "cat3" if x > 0.5 else "cat4") + X_train["vcat"] = X_train["vA"].apply(lambda x: "cat1" if x > 0.5 else "cat2") + X_train["vcat2"] = X_train["vB"].apply(lambda x: "cat3" if x > 0.5 else "cat4") y_train = y % 2 numeric_features = [0, 1, 2] # ["vA", "vB", "vC"] categorical_features = [3, 4] # ["vcat", "vcat2"] @@ -133,26 +134,36 @@ def test_pipeline_column_transformer(self): classifier = LogisticRegression( C=0.01, class_weight=dict(zip([False, True], [0.2, 0.8])), - n_jobs=1, max_iter=10, solver="lbfgs", tol=1e-3) + n_jobs=1, + max_iter=10, + solver="lbfgs", + tol=1e-3, + ) - numeric_transformer = Pipeline(steps=[ - ("imputer", SimpleImputer(strategy="median")), - ("scaler", StandardScaler())]) + numeric_transformer = Pipeline( + steps=[ + ("imputer", SimpleImputer(strategy="median")), + ("scaler", StandardScaler()), + ] + ) - categorical_transformer = Pipeline(steps=[ - ( - "onehot", - OneHotEncoder(sparse=True, handle_unknown="ignore")), - ( - "tsvd", - TruncatedSVD(n_components=1, algorithm="arpack", tol=1e-4))]) + categorical_transformer = Pipeline( + steps=[ + ("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore")), + ("tsvd", TruncatedSVD(n_components=1, algorithm="arpack", tol=1e-4)), + ] + ) - preprocessor = ColumnTransformer(transformers=[ - ("num", numeric_transformer, numeric_features), - ("cat", categorical_transformer, categorical_features)]) + preprocessor = ColumnTransformer( + transformers=[ + ("num", numeric_transformer, numeric_features), + ("cat", categorical_transformer, categorical_features), + ] + ) - model = Pipeline(steps=[("precprocessor", - preprocessor), ("classifier", classifier)]) + model = Pipeline( + steps=[("precprocessor", preprocessor), ("classifier", classifier)] + ) model.fit(X_train, y_train) names = list(enumerate_model_names(model, short=False)) @@ -162,59 +173,86 @@ def test_pipeline_column_transformer(self): simple2 = [_[0] for _ in names] assert len(simple2) == len(simple) exp = [ - '', 'precprocessor', 'precprocessor__num', - 'precprocessor__num__imputer', 'precprocessor__num__scaler', - 'precprocessor__cat', 'precprocessor__cat__onehot', - 'precprocessor__cat__onehot__categories___0', - 'precprocessor__cat__onehot__categories___1', - 'precprocessor__cat__tsvd', 'classifier'] - self.assertEqual(simple2[:len(exp) - 2], exp[:-2]) + "", + "precprocessor", + "precprocessor__num", + "precprocessor__num__imputer", + "precprocessor__num__scaler", + "precprocessor__cat", + "precprocessor__cat__onehot", + "precprocessor__cat__onehot__categories___0", + "precprocessor__cat__onehot__categories___1", + "precprocessor__cat__tsvd", + "classifier", + ] + self.assertEqual(simple2[: len(exp) - 2], exp[:-2]) initial_type = [ ("numfeat", FloatTensorType([None, 3])), - ("strfeat", StringTensorType([None, 2]))] - model_onnx = convert_sklearn(model, initial_types=initial_type, - target_opset=TARGET_OPSET) + ("strfeat", StringTensorType([None, 2])), + ] + model_onnx = convert_sklearn( + model, initial_types=initial_type, target_opset=TARGET_OPSET + ) dump_data_and_model( - X_train, model, model_onnx, - basename="SklearnPipelineColumnTransformerPipelinerOptions1") + X_train, + model, + model_onnx, + basename="SklearnPipelineColumnTransformerPipelinerOptions1", + ) - options = {'classifier': {'zipmap': False}} + options = {"classifier": {"zipmap": False}} new_options = _process_options(model, options) assert len(new_options) == 2 model_onnx = convert_sklearn( - model, initial_types=initial_type, - options={'classifier': {'zipmap': False}}, - target_opset=TARGET_OPSET) - assert 'zipmap' not in str(model_onnx).lower() + model, + initial_types=initial_type, + options={"classifier": {"zipmap": False}}, + target_opset=TARGET_OPSET, + ) + assert "zipmap" not in str(model_onnx).lower() dump_data_and_model( - X_train, model, model_onnx, - basename="SklearnPipelineColumnTransformerPipelinerOptions2") + X_train, + model, + model_onnx, + basename="SklearnPipelineColumnTransformerPipelinerOptions2", + ) - options = {'classifier__zipmap': False} + options = {"classifier__zipmap": False} new_options = _process_options(model, options) assert len(new_options) == 2 model_onnx = convert_sklearn( - model, initial_types=initial_type, - options=options, target_opset=TARGET_OPSET) - assert 'zipmap' not in str(model_onnx).lower() + model, + initial_types=initial_type, + options=options, + target_opset=TARGET_OPSET, + ) + assert "zipmap" not in str(model_onnx).lower() dump_data_and_model( - X_train, model, model_onnx, - basename="SklearnPipelineColumnTransformerPipelinerOptions2") + X_train, + model, + model_onnx, + basename="SklearnPipelineColumnTransformerPipelinerOptions2", + ) - options = {id(model): {'zipmap': False}} + options = {id(model): {"zipmap": False}} new_options = _process_pipeline_options(model, options) model_onnx = convert_sklearn( - model, initial_types=initial_type, - options={id(model): {'zipmap': False}}, - target_opset=TARGET_OPSET) - assert 'zipmap' not in str(model_onnx).lower() + model, + initial_types=initial_type, + options={id(model): {"zipmap": False}}, + target_opset=TARGET_OPSET, + ) + assert "zipmap" not in str(model_onnx).lower() dump_data_and_model( - X_train, model, model_onnx, - basename="SklearnPipelineColumnTransformerPipelinerOptions2") + X_train, + model, + model_onnx, + basename="SklearnPipelineColumnTransformerPipelinerOptions2", + ) if __name__ == "__main__": diff --git a/tests/test_variable_names.py b/tests/test_variable_names.py index e8e32b976..ab4630be8 100644 --- a/tests/test_variable_names.py +++ b/tests/test_variable_names.py @@ -14,6 +14,7 @@ from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.linear_model import LinearRegression from onnxruntime import InferenceSession + try: from onnxruntime.capi.onnxruntime_pybind11_state import InvalidArgument except ImportError: @@ -22,12 +23,14 @@ from skl2onnx.algebra.onnx_ops import OnnxIdentity from skl2onnx import convert_sklearn, to_onnx from onnxconverter_common.data_types import ( - FloatTensorType, Int64TensorType, StringTensorType) + FloatTensorType, + Int64TensorType, + StringTensorType, +) from test_utils import fit_regression_model, TARGET_OPSET class Passthrough: - def fit(self, X, y=None): return self @@ -41,21 +44,20 @@ def parser(scope, model, inputs, custom_parsers=None): operator.inputs = inputs for op_input in inputs: op_output = scope.declare_local_variable( - op_input.raw_name, copy.deepcopy(op_input.type)) + op_input.raw_name, copy.deepcopy(op_input.type) + ) operator.outputs.append(op_output) return operator.outputs def shape_calculator(operator): - op_input_map = {op_input.raw_name: op_input - for op_input in operator.inputs} + op_input_map = {op_input.raw_name: op_input for op_input in operator.inputs} for op_output in operator.outputs: op_output.type.shape = op_input_map[op_output.raw_name].type.shape def converter(scope, operator, container): - op_input_map = {op_input.raw_name: op_input - for op_input in operator.inputs} + op_input_map = {op_input.raw_name: op_input for op_input in operator.inputs} for op_output in operator.outputs: op_input = op_input_map[op_output.raw_name] OnnxIdentity( @@ -66,25 +68,23 @@ def converter(scope, operator, container): class TestVariableNames(unittest.TestCase): - @classmethod def setUpClass(cls): update_registered_converter( - Passthrough, "Passthrough", - shape_calculator, converter, - parser=parser) + Passthrough, "Passthrough", shape_calculator, converter, parser=parser + ) def test_variable_names(self): pipeline = Pipeline([("passthrough", Passthrough())]) initial_types = [("input", FloatTensorType([None, 2]))] - model_onnx = convert_sklearn(pipeline, initial_types=initial_types, - target_opset=TARGET_OPSET, - verbose=0) - self.assertIn('Identity', str(model_onnx)) + model_onnx = convert_sklearn( + pipeline, initial_types=initial_types, target_opset=TARGET_OPSET, verbose=0 + ) + self.assertIn("Identity", str(model_onnx)) x = np.array([0, 1, 1, 0], dtype=np.float32).reshape((-1, 2)) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) name = sess.get_inputs()[0].name got = sess.run(None, {name: x}) assert_almost_equal(x, got[0]) @@ -93,15 +93,18 @@ def test_variable_names_distinct(self): pipeline = Pipeline([("passthrough", Passthrough())]) initial_types = [("INPUTA", FloatTensorType([None, 2]))] final_types = [("OUTPUTA", FloatTensorType([None, 2]))] - model_onnx = convert_sklearn(pipeline, initial_types=initial_types, - target_opset=TARGET_OPSET, - final_types=final_types, - verbose=0) + model_onnx = convert_sklearn( + pipeline, + initial_types=initial_types, + target_opset=TARGET_OPSET, + final_types=final_types, + verbose=0, + ) x = np.array([0, 1, 1, 0], dtype=np.float32).reshape((-1, 2)) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - got = sess.run(None, {'INPUTA': x}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"INPUTA": x}) assert_almost_equal(x, got[0]) def test_variable_names_output(self): @@ -109,26 +112,31 @@ def test_variable_names_output(self): initial_types = [("input", FloatTensorType([None, 2]))] final_types = initial_types with self.assertRaises(RuntimeError): - convert_sklearn(pipeline, initial_types=initial_types, - target_opset=TARGET_OPSET, - final_types=final_types) + convert_sklearn( + pipeline, + initial_types=initial_types, + target_opset=TARGET_OPSET, + final_types=final_types, + ) def _test_non_ascii_variable_name(self): model, X = fit_regression_model(LinearRegression()) model_onnx = to_onnx( - model, name="linear regression", + model, + name="linear regression", initial_types=[("年齢", FloatTensorType([None, X.shape[1]]))], - target_opset=TARGET_OPSET) + target_opset=TARGET_OPSET, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) # Invalid Feed Input Name:\u5e74\u9f62 # sess.run(None, {'年齢': X}) self.assertTrue(sess is not None) def test_non_ascii_variable_name_pipeline(self): - - data = dedent(""" + data = dedent( + """ pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest 1,1,"A",female,29.0,0,0,24160,211.3375,B5,S,2,,"MO" 1,1,"B",male,0.9167,1,2,113781,151.55,C22 C26,S,11,,"Can" @@ -150,39 +158,48 @@ def test_non_ascii_variable_name_pipeline(self): 1,1,"Q",female,50.0,0,1,PC 17558,247.5208,B58 B60,C,6,,"PQ" 1,1,"R",female,32.0,0,0,11813,76.2917,D15,C,8,, 1,0,"S",male,36.0,0,0,13050,75.2417,C6,C,A,,"MN" - """).strip(" \n") + """ + ).strip(" \n") data = pd.read_csv(StringIO(data)) data.rename(columns={"age": "年齢"}, inplace=True) - X = data.drop('survived', axis=1) + X = data.drop("survived", axis=1) # y = data['survived'] - cols = ['embarked', 'sex', 'pclass', '年齢', 'fare'] + cols = ["embarked", "sex", "pclass", "年齢", "fare"] X = X[cols] - for cat in ['embarked', 'sex', 'pclass']: - X[cat].fillna('missing', inplace=True) - numeric_features = ['年齢', 'fare'] - numeric_transformer = Pipeline(steps=[ - ('imputer', SimpleImputer(strategy='median')), - ('scaler', StandardScaler())]) - categorical_features = ['embarked', 'sex', 'pclass'] - categorical_transformer = Pipeline(steps=[ - ('onehot', OneHotEncoder(handle_unknown='ignore'))]) + for cat in ["embarked", "sex", "pclass"]: + X[cat].fillna("missing", inplace=True) + numeric_features = ["年齢", "fare"] + numeric_transformer = Pipeline( + steps=[ + ("imputer", SimpleImputer(strategy="median")), + ("scaler", StandardScaler()), + ] + ) + categorical_features = ["embarked", "sex", "pclass"] + categorical_transformer = Pipeline( + steps=[("onehot", OneHotEncoder(handle_unknown="ignore"))] + ) preprocessor = ColumnTransformer( transformers=[ - ('num', numeric_transformer, numeric_features), - ('cat', categorical_transformer, categorical_features)]) + ("num", numeric_transformer, numeric_features), + ("cat", categorical_transformer, categorical_features), + ] + ) preprocessor.fit_transform(X) - initial_type = [('pclass', Int64TensorType(shape=[None, 1])), - ('sex', StringTensorType(shape=[None, 1])), - ('年齢', FloatTensorType(shape=[None, 1])), - ('fare', FloatTensorType(shape=[None, 1])), - ('embarked', StringTensorType(shape=[None, 1]))] + initial_type = [ + ("pclass", Int64TensorType(shape=[None, 1])), + ("sex", StringTensorType(shape=[None, 1])), + ("年齢", FloatTensorType(shape=[None, 1])), + ("fare", FloatTensorType(shape=[None, 1])), + ("embarked", StringTensorType(shape=[None, 1])), + ] onnx_object = convert_sklearn( - preprocessor, initial_types=initial_type, - target_opset=TARGET_OPSET) + preprocessor, initial_types=initial_type, target_opset=TARGET_OPSET + ) sess = InferenceSession( - onnx_object.SerializeToString(), - providers=["CPUExecutionProvider"]) + onnx_object.SerializeToString(), providers=["CPUExecutionProvider"] + ) self.assertTrue(sess is not None) # Invalid Feed Input Name:\u5e74\u9f62 # onx_data = {} diff --git a/tests_onnxmltools/test_columns.py b/tests_onnxmltools/test_columns.py index 48db6a082..6c7f28815 100644 --- a/tests_onnxmltools/test_columns.py +++ b/tests_onnxmltools/test_columns.py @@ -11,69 +11,77 @@ from skl2onnx.common.data_types import FloatTensorType from skl2onnx import update_registered_converter, convert_sklearn from skl2onnx.common.shape_calculator import ( - calculate_linear_classifier_output_shapes) # noqa + calculate_linear_classifier_output_shapes, +) # noqa from skl2onnx._parse import _parse_sklearn_classifier from onnxmltools.convert.lightgbm.operator_converters.LightGbm import ( - convert_lightgbm) # noqa + convert_lightgbm, +) # noqa from onnxmltools.convert.xgboost.operator_converters.XGBoost import ( - convert_xgboost) # noqa + convert_xgboost, +) # noqa try: from test_utils import fit_classification_model except ImportError: import os import sys - sys.path.append( - os.path.join( - os.path.dirname(__file__), "..", "tests")) + + sys.path.append(os.path.join(os.path.dirname(__file__), "..", "tests")) from test_utils import fit_classification_model from test_utils import TARGET_OPSET, TARGET_OPSET_ML class TestOptionColumns(unittest.TestCase): - @classmethod def setUpClass(self): - update_registered_converter( - LGBMClassifier, 'LightGbmLGBMClassifier', + LGBMClassifier, + "LightGbmLGBMClassifier", calculate_linear_classifier_output_shapes, - convert_lightgbm, options={ - 'zipmap': [True, False, 'columns'], 'nocl': [True, False]}) + convert_lightgbm, + options={"zipmap": [True, False, "columns"], "nocl": [True, False]}, + ) def custom_parser(scope, model, inputs, custom_parsers=None): if custom_parsers is not None and model in custom_parsers: return custom_parsers[model]( - scope, model, inputs, custom_parsers=custom_parsers) - if not all(isinstance(i, (numbers.Real, bool, np.bool_)) - for i in model.classes_): + scope, model, inputs, custom_parsers=custom_parsers + ) + if not all( + isinstance(i, (numbers.Real, bool, np.bool_)) for i in model.classes_ + ): raise NotImplementedError( - "Current converter does not support string labels.") + "Current converter does not support string labels." + ) return _parse_sklearn_classifier(scope, model, inputs) update_registered_converter( - XGBClassifier, 'XGBClassifier', + XGBClassifier, + "XGBClassifier", calculate_linear_classifier_output_shapes, - convert_xgboost, parser=custom_parser, - options={'zipmap': [True, False, 'columns'], - 'nocl': [True, False]}) + convert_xgboost, + parser=custom_parser, + options={"zipmap": [True, False, "columns"], "nocl": [True, False]}, + ) def c_test_model(self, model): - model, X = fit_classification_model( - model, 3, n_features=4, label_string=False) + model, X = fit_classification_model(model, 3, n_features=4, label_string=False) model_onnx = convert_sklearn( - model, "multi-class ridge classifier", + model, + "multi-class ridge classifier", [("input", FloatTensorType([None, X.shape[1]]))], - options={id(model): {'zipmap': 'columns'}}, - target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML}) + options={id(model): {"zipmap": "columns"}}, + target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML}, + ) self.assertIsNotNone(model_onnx) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) names = [_.name for _ in sess.get_outputs()] - self.assertEqual(['output_label', 'i0', 'i1', 'i2'], names) + self.assertEqual(["output_label", "i0", "i1", "i2"], names) xt = X[:10].astype(np.float32) - got = sess.run(None, {'input': xt}) + got = sess.run(None, {"input": xt}) prob = model.predict_proba(xt) for i in range(prob.shape[1]): assert_almost_equal(prob[:, i], got[i + 1]) diff --git a/tests_onnxmltools/test_lightgbm.py b/tests_onnxmltools/test_lightgbm.py index ad53aa339..dc9e7a94e 100644 --- a/tests_onnxmltools/test_lightgbm.py +++ b/tests_onnxmltools/test_lightgbm.py @@ -5,6 +5,7 @@ import numpy from numpy.testing import assert_almost_equal from onnxruntime import InferenceSession + try: from onnxruntime.capi.onnxruntime_pybind11_state import InvalidArgument except ImportError: @@ -19,79 +20,85 @@ calculate_linear_regressor_output_shapes, ) from onnxmltools.convert.lightgbm.operator_converters.LightGbm import ( - convert_lightgbm # noqa + convert_lightgbm, # noqa ) import onnxmltools from onnxmltools.convert.lightgbm._parse import WrappedBooster # noqa from skl2onnx import to_onnx -from skl2onnx._parse import ( - _parse_sklearn_classifier, _parse_sklearn_simple_model) +from skl2onnx._parse import _parse_sklearn_classifier, _parse_sklearn_simple_model try: from test_utils import dump_single_regression except ImportError: import os import sys - sys.path.append( - os.path.join( - os.path.dirname(__file__), "..", "tests")) + + sys.path.append(os.path.join(os.path.dirname(__file__), "..", "tests")) from test_utils import dump_single_regression from test_utils import ( - dump_binary_classification, dump_multiple_classification, - TARGET_OPSET, TARGET_OPSET_ML) + dump_binary_classification, + dump_multiple_classification, + TARGET_OPSET, + TARGET_OPSET_ML, +) def calculate_lightgbm_output_shapes(operator): op = operator.raw_operator if hasattr(op, "_model_dict"): - objective = op._model_dict['objective'] - elif hasattr(op, 'objective_'): + objective = op._model_dict["objective"] + elif hasattr(op, "objective_"): objective = op.objective_ else: raise RuntimeError( # pragma: no cover "Unable to find attributes '_model_dict' or 'objective_' in " - "instance of type %r (list of attributes=%r)." % ( - type(op), dir(op))) - if objective.startswith('binary') or objective.startswith('multiclass'): + "instance of type %r (list of attributes=%r)." % (type(op), dir(op)) + ) + if objective.startswith("binary") or objective.startswith("multiclass"): return calculate_linear_classifier_output_shapes(operator) - if objective.startswith('regression'): # pragma: no cover + if objective.startswith("regression"): # pragma: no cover return calculate_linear_regressor_output_shapes(operator) raise NotImplementedError( # pragma: no cover - "Objective '{}' is not implemented yet.".format(objective)) + "Objective '{}' is not implemented yet.".format(objective) + ) def lightgbm_parser(scope, model, inputs, custom_parsers=None): if hasattr(model, "fit"): raise TypeError( # pragma: no cover - "This converter does not apply on type '{}'." - "".format(type(model))) + "This converter does not apply on type '{}'." "".format(type(model)) + ) if len(inputs) == 1: wrapped = WrappedBooster(model) objective = wrapped.get_objective() - if objective.startswith('binary'): + if objective.startswith("binary"): wrapped = WrappedLightGbmBoosterClassifier(wrapped) return _parse_sklearn_classifier( - scope, wrapped, inputs, custom_parsers=custom_parsers) - if objective.startswith('multiclass'): + scope, wrapped, inputs, custom_parsers=custom_parsers + ) + if objective.startswith("multiclass"): wrapped = WrappedLightGbmBoosterClassifier(wrapped) return _parse_sklearn_classifier( - scope, wrapped, inputs, custom_parsers=custom_parsers) - if objective.startswith('regression'): # pragma: no cover + scope, wrapped, inputs, custom_parsers=custom_parsers + ) + if objective.startswith("regression"): # pragma: no cover return _parse_sklearn_simple_model( - scope, wrapped, inputs, custom_parsers=custom_parsers) + scope, wrapped, inputs, custom_parsers=custom_parsers + ) raise NotImplementedError( # pragma: no cover - "Objective '{}' is not implemented yet.".format(objective)) + "Objective '{}' is not implemented yet.".format(objective) + ) # Multiple columns - this_operator = scope.declare_local_operator('LightGBMConcat') + this_operator = scope.declare_local_operator("LightGBMConcat") this_operator.raw_operator = model this_operator.inputs = inputs - var = scope.declare_local_variable( - 'Xlgbm', inputs[0].type.__class__([None, None])) + var = scope.declare_local_variable("Xlgbm", inputs[0].type.__class__([None, None])) this_operator.outputs.append(var) - return lightgbm_parser(scope, model, this_operator.outputs, - custom_parsers=custom_parsers) + return lightgbm_parser( + scope, model, this_operator.outputs, custom_parsers=custom_parsers + ) class WrappedLightGbmBoosterClassifier(ClassifierMixin): @@ -100,111 +107,147 @@ class WrappedLightGbmBoosterClassifier(ClassifierMixin): """ def __init__(self, wrapped): # pylint: disable=W0231 - for k in {'boosting_type', '_model_dict', '_model_dict_info', - 'operator_name', 'classes_', 'booster_', 'n_features_', - 'objective_', 'boosting_type', 'n_features_in_', - 'n_features_out_'}: + for k in { + "boosting_type", + "_model_dict", + "_model_dict_info", + "operator_name", + "classes_", + "booster_", + "n_features_", + "objective_", + "boosting_type", + "n_features_in_", + "n_features_out_", + }: if hasattr(wrapped, k): setattr(self, k, getattr(wrapped, k)) class TestLightGbmTreeEnsembleModels(unittest.TestCase): - @classmethod def setUpClass(self): - update_registered_converter( - LGBMClassifier, 'LightGbmLGBMClassifier', + LGBMClassifier, + "LightGbmLGBMClassifier", calculate_linear_classifier_output_shapes, - convert_lightgbm, options={ - 'zipmap': [True, False, 'columns'], 'nocl': [True, False]}) + convert_lightgbm, + options={"zipmap": [True, False, "columns"], "nocl": [True, False]}, + ) update_registered_converter( - LGBMRegressor, 'LgbmRegressor', + LGBMRegressor, + "LgbmRegressor", calculate_linear_regressor_output_shapes, - convert_lightgbm) + convert_lightgbm, + ) @unittest.skipIf( - pv.Version(onnxmltools.__version__) < pv.Version('1.11'), - reason="converter for lightgbm is too old") + pv.Version(onnxmltools.__version__) < pv.Version("1.11"), + reason="converter for lightgbm is too old", + ) def test_lightgbm_classifier(self): model = LGBMClassifier(n_estimators=3, min_child_samples=1) dump_binary_classification( - model, - target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML}) + model, target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML} + ) dump_multiple_classification( - model, - target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML}) + model, target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML} + ) @unittest.skipIf( - pv.Version(onnxmltools.__version__) < pv.Version('1.11'), - reason="converter for lightgbm is too old") + pv.Version(onnxmltools.__version__) < pv.Version("1.11"), + reason="converter for lightgbm is too old", + ) def test_lightgbm_regressor(self): model = LGBMRegressor(n_estimators=3, min_child_samples=1) dump_single_regression( - model, - target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML}) + model, target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML} + ) @unittest.skipIf( - pv.Version(onnxmltools.__version__) < pv.Version('1.11'), - reason="converter for lightgbm is too old") + pv.Version(onnxmltools.__version__) < pv.Version("1.11"), + reason="converter for lightgbm is too old", + ) def test_lightgbm_regressor1(self): model = LGBMRegressor(n_estimators=1, min_child_samples=1) dump_single_regression( - model, suffix="1", - target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML}) + model, + suffix="1", + target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML}, + ) @unittest.skipIf( - pv.Version(onnxmltools.__version__) < pv.Version('1.11'), - reason="converter for lightgbm is too old") + pv.Version(onnxmltools.__version__) < pv.Version("1.11"), + reason="converter for lightgbm is too old", + ) def test_lightgbm_regressor2(self): model = LGBMRegressor(n_estimators=2, max_depth=1, min_child_samples=1) dump_single_regression( - model, suffix="2", - target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML}) + model, + suffix="2", + target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML}, + ) @unittest.skipIf( - pv.Version(onnxmltools.__version__) < pv.Version('1.11'), - reason="converter for lightgbm is too old") + pv.Version(onnxmltools.__version__) < pv.Version("1.11"), + reason="converter for lightgbm is too old", + ) def test_lightgbm_booster_multi_classifier(self): X = [[0, 1], [1, 1], [2, 0], [1, 2], [-1, 2], [1, -2]] X = numpy.array(X, dtype=numpy.float32) y = [0, 1, 0, 1, 2, 2] data = Dataset(X, label=y) model = train( - {'boosting_type': 'gbdt', 'objective': 'multiclass', - 'n_estimators': 3, 'min_child_samples': 1, 'num_class': 3}, - data) + { + "boosting_type": "gbdt", + "objective": "multiclass", + "n_estimators": 3, + "min_child_samples": 1, + "num_class": 3, + }, + data, + ) update_registered_converter( WrappedLightGbmBoosterClassifier, - 'WrappedLightGbmBoosterClassifier', + "WrappedLightGbmBoosterClassifier", calculate_lightgbm_output_shapes, - convert_lightgbm, parser=lightgbm_parser, - options={'zipmap': [False, True], 'nocl': [False, True]}) + convert_lightgbm, + parser=lightgbm_parser, + options={"zipmap": [False, True], "nocl": [False, True]}, + ) update_registered_converter( - WrappedBooster, 'WrappedBooster', + WrappedBooster, + "WrappedBooster", calculate_lightgbm_output_shapes, - convert_lightgbm, parser=lightgbm_parser, - options={'zipmap': [False, True], 'nocl': [False, True]}) + convert_lightgbm, + parser=lightgbm_parser, + options={"zipmap": [False, True], "nocl": [False, True]}, + ) update_registered_converter( - Booster, 'LightGbmBooster', calculate_lightgbm_output_shapes, - convert_lightgbm, parser=lightgbm_parser) + Booster, + "LightGbmBooster", + calculate_lightgbm_output_shapes, + convert_lightgbm, + parser=lightgbm_parser, + ) model_onnx = to_onnx( - model, initial_types=[('X', FloatTensorType([None, 2]))], - options={WrappedLightGbmBoosterClassifier: {'zipmap': False}}, - target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML}) + model, + initial_types=[("X", FloatTensorType([None, 2]))], + options={WrappedLightGbmBoosterClassifier: {"zipmap": False}}, + target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML}, + ) try: sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) except InvalidArgument as e: - raise AssertionError( - "Cannot load model\n%r" % str(model_onnx)) from e + raise AssertionError("Cannot load model\n%r" % str(model_onnx)) from e expected = model.predict(X) - res = sess.run(None, {'X': X}) + res = sess.run(None, {"X": X}) assert_almost_equal(expected, res[1]) diff --git a/tests_onnxmltools/test_xgboost_converters.py b/tests_onnxmltools/test_xgboost_converters.py index 4a8201f0a..404796a52 100644 --- a/tests_onnxmltools/test_xgboost_converters.py +++ b/tests_onnxmltools/test_xgboost_converters.py @@ -12,6 +12,7 @@ from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.preprocessing import Normalizer + try: from sklearn.ensemble import StackingClassifier except ImportError: @@ -21,12 +22,13 @@ from skl2onnx.common.data_types import FloatTensorType from skl2onnx.common.shape_calculator import ( calculate_linear_classifier_output_shapes, # noqa - calculate_linear_regressor_output_shapes) + calculate_linear_regressor_output_shapes, +) from skl2onnx._parse import _parse_sklearn_classifier from xgboost import XGBRegressor, XGBClassifier import onnxmltools from onnxmltools.convert.xgboost.operator_converters.XGBoost import ( - convert_xgboost # noqa + convert_xgboost, # noqa ) try: @@ -34,43 +36,47 @@ except ImportError: import os import sys - sys.path.append( - os.path.join( - os.path.dirname(__file__), "..", "tests")) + + sys.path.append(os.path.join(os.path.dirname(__file__), "..", "tests")) from test_utils import dump_single_regression -from test_utils import ( - dump_multiple_classification, TARGET_OPSET, TARGET_OPSET_ML) +from test_utils import dump_multiple_classification, TARGET_OPSET, TARGET_OPSET_ML class TestXGBoostModels(unittest.TestCase): - @classmethod def setUpClass(self): - def custom_parser(scope, model, inputs, custom_parsers=None): if custom_parsers is not None and model in custom_parsers: return custom_parsers[model]( - scope, model, inputs, custom_parsers=custom_parsers) - if not all(isinstance(i, (numbers.Real, bool, np.bool_)) - for i in model.classes_): + scope, model, inputs, custom_parsers=custom_parsers + ) + if not all( + isinstance(i, (numbers.Real, bool, np.bool_)) for i in model.classes_ + ): raise NotImplementedError( - "Current converter does not support string labels.") + "Current converter does not support string labels." + ) return _parse_sklearn_classifier(scope, model, inputs) update_registered_converter( - XGBClassifier, 'XGBClassifier', + XGBClassifier, + "XGBClassifier", calculate_linear_classifier_output_shapes, - convert_xgboost, parser=custom_parser, - options={'zipmap': [True, False, 'columns'], - 'nocl': [True, False]}) + convert_xgboost, + parser=custom_parser, + options={"zipmap": [True, False, "columns"], "nocl": [True, False]}, + ) update_registered_converter( - XGBRegressor, 'XGBRegressor', + XGBRegressor, + "XGBRegressor", calculate_linear_regressor_output_shapes, - convert_xgboost) + convert_xgboost, + ) @unittest.skipIf( - pv.Version(onnxmltools.__version__) < pv.Version('1.11'), - reason="converter for xgboost is too old") + pv.Version(onnxmltools.__version__) < pv.Version("1.11"), + reason="converter for xgboost is too old", + ) def test_xgb_regressor(self): iris = load_iris() X = iris.data[:, :2] @@ -80,13 +86,15 @@ def test_xgb_regressor(self): xgb.fit(X, y) conv_model = convert_sklearn( xgb, - initial_types=[ - ('input', FloatTensorType(shape=[None, X.shape[1]]))], - target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML}) + initial_types=[("input", FloatTensorType(shape=[None, X.shape[1]]))], + target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML}, + ) self.assertTrue(conv_model is not None) dump_single_regression( - xgb, suffix="-Dec4", - target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML}) + xgb, + suffix="-Dec4", + target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML}, + ) def test_xgb_classifier(self): xgb = XGBClassifier(n_estimators=2, max_depth=2) @@ -96,20 +104,22 @@ def test_xgb_classifier(self): y[y == 2] = 0 xgb.fit(X, y) conv_model = convert_sklearn( - xgb, initial_types=[ - ('input', FloatTensorType(shape=[None, X.shape[1]]))], - options={id(xgb): {'zipmap': False}}, - target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML}) + xgb, + initial_types=[("input", FloatTensorType(shape=[None, X.shape[1]]))], + options={id(xgb): {"zipmap": False}}, + target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML}, + ) sess = InferenceSession( - conv_model.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X.astype(np.float32)}) + conv_model.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(np.float32)}) assert_almost_equal(xgb.predict_proba(X), res[1]) assert_almost_equal(xgb.predict(X), res[0]) @unittest.skipIf( - pv.Version(onnxmltools.__version__) < pv.Version('1.11'), - reason="converter for xgboost is too old") + pv.Version(onnxmltools.__version__) < pv.Version("1.11"), + reason="converter for xgboost is too old", + ) def test_xgb_classifier_multi(self): iris = load_iris() X = iris.data[:, :2] @@ -119,123 +129,151 @@ def test_xgb_classifier_multi(self): xgb.fit(X, y) conv_model = convert_sklearn( xgb, - initial_types=[ - ('input', FloatTensorType(shape=[None, X.shape[1]]))], - target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML}) + initial_types=[("input", FloatTensorType(shape=[None, X.shape[1]]))], + target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML}, + ) self.assertTrue(conv_model is not None) dump_multiple_classification( - xgb, - target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML}) + xgb, target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML} + ) @unittest.skipIf( - pv.Version(onnxmltools.__version__) < pv.Version('1.11'), - reason="converter for xgboost is too old") + pv.Version(onnxmltools.__version__) < pv.Version("1.11"), + reason="converter for xgboost is too old", + ) def test_xgb_classifier_multi_reglog(self): iris = load_iris() X = iris.data[:, :2] y = iris.target - xgb = XGBClassifier(objective='reg:logistic') + xgb = XGBClassifier(objective="reg:logistic") xgb.fit(X, y) conv_model = convert_sklearn( - xgb, initial_types=[ - ('input', FloatTensorType(shape=[None, X.shape[1]]))], - target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML}) + xgb, + initial_types=[("input", FloatTensorType(shape=[None, X.shape[1]]))], + target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML}, + ) self.assertTrue(conv_model is not None) dump_multiple_classification( - xgb, suffix="RegLog", - target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML}) + xgb, + suffix="RegLog", + target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML}, + ) def test_xgb_classifier_reglog(self): iris = load_iris() X = iris.data[:, :2] y = iris.target y[y == 2] = 0 - xgb = XGBClassifier(objective='binary:logistic') + xgb = XGBClassifier(objective="binary:logistic") xgb.fit(X, y) conv_model = convert_sklearn( - xgb, initial_types=[ - ('input', FloatTensorType(shape=[None, X.shape[1]]))], - options={id(xgb): {'zipmap': False}}, - target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML}) + xgb, + initial_types=[("input", FloatTensorType(shape=[None, X.shape[1]]))], + options={id(xgb): {"zipmap": False}}, + target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML}, + ) self.assertTrue(conv_model is not None) sess = InferenceSession( - conv_model.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X.astype(np.float32)}) + conv_model.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(np.float32)}) assert_almost_equal(xgb.predict_proba(X), res[1]) assert_almost_equal(xgb.predict(X), res[0]) - @unittest.skipIf(StackingClassifier is None, - reason="new in 0.22") + @unittest.skipIf(StackingClassifier is None, reason="new in 0.22") def test_model_stacking_classifier_column_transformer(self): classifiers = { - 'A': XGBClassifier(n_estimators=5, random_state=42), - 'B': XGBClassifier(n_estimators=5, random_state=42) + "A": XGBClassifier(n_estimators=5, random_state=42), + "B": XGBClassifier(n_estimators=5, random_state=42), } - model_to_test = Pipeline(steps=[ - ('cbe', ColumnTransformer([ - ("norm1", Normalizer(norm='l1'), [0, 1]), - ("norm2", Normalizer(norm='l2'), [2, 3])])), - ('sc', StackingClassifier( - estimators=list(map(tuple, classifiers.items())), - stack_method='predict_proba', - passthrough=False - )) - ]) + model_to_test = Pipeline( + steps=[ + ( + "cbe", + ColumnTransformer( + [ + ("norm1", Normalizer(norm="l1"), [0, 1]), + ("norm2", Normalizer(norm="l2"), [2, 3]), + ] + ), + ), + ( + "sc", + StackingClassifier( + estimators=list(map(tuple, classifiers.items())), + stack_method="predict_proba", + passthrough=False, + ), + ), + ] + ) iris = load_iris() X = iris.data.astype(np.float32) y = (iris.target == 0).astype(np.int32) model_to_test.fit(X, y) model_onnx = convert_sklearn( - model_to_test, "stacking classifier", + model_to_test, + "stacking classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML}, - options={'zipmap': False}) + target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML}, + options={"zipmap": False}, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X.astype(np.float32)}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(np.float32)}) assert_almost_equal(model_to_test.predict_proba(X), res[1]) assert_almost_equal(model_to_test.predict(X), res[0]) - @unittest.skipIf(StackingClassifier is None, - reason="new in 0.22") + @unittest.skipIf(StackingClassifier is None, reason="new in 0.22") def test_model_stacking_classifier_column_transformer_custom(self): - classifiers = { - 'A': XGBClassifier(n_estimators=5, random_state=42), - 'B': XGBClassifier(n_estimators=5, random_state=42) + "A": XGBClassifier(n_estimators=5, random_state=42), + "B": XGBClassifier(n_estimators=5, random_state=42), } - model_to_test = Pipeline(steps=[ - ('cbe', ColumnTransformer([ - ("norm1", Normalizer(norm='l1'), [0, 1]), - ("norm2", Normalizer(norm='l2'), [2, 3])])), - ('sc', StackingClassifier( - estimators=list(map(tuple, classifiers.items())), - stack_method='predict_proba', - passthrough=False - )) - ]) + model_to_test = Pipeline( + steps=[ + ( + "cbe", + ColumnTransformer( + [ + ("norm1", Normalizer(norm="l1"), [0, 1]), + ("norm2", Normalizer(norm="l2"), [2, 3]), + ] + ), + ), + ( + "sc", + StackingClassifier( + estimators=list(map(tuple, classifiers.items())), + stack_method="predict_proba", + passthrough=False, + ), + ), + ] + ) iris = load_iris() X = iris.data.astype(np.float32) df = pandas.DataFrame(X) - df.columns = ['A', 'B', 'C', 'D'] + df.columns = ["A", "B", "C", "D"] X[:, 0] = X[:, 0].astype(np.int64).astype(X.dtype) - df['A'] = df.A.astype(np.int64) - df['B'] = df.B.astype(np.float32) - df['C'] = df.C.astype(np.str_) + df["A"] = df.A.astype(np.int64) + df["B"] = df.B.astype(np.float32) + df["C"] = df.C.astype(np.str_) y = (iris.target == 0).astype(np.int32) model_to_test.fit(df, y) model_onnx = convert_sklearn( - model_to_test, "stacking classifier", + model_to_test, + "stacking classifier", [("input", FloatTensorType([None, X.shape[1]]))], - target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML}, - options={'zipmap': False}) + target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML}, + options={"zipmap": False}, + ) sess = InferenceSession( - model_onnx.SerializeToString(), - providers=["CPUExecutionProvider"]) - res = sess.run(None, {'input': X.astype(np.float32)}) + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": X.astype(np.float32)}) assert_almost_equal(model_to_test.predict_proba(df), res[1]) assert_almost_equal(model_to_test.predict(df), res[0])