diff --git a/.azure-pipelines/linux-conda-CI.yml b/.azure-pipelines/linux-conda-CI.yml
index 0c5f44de7..40d9ff887 100644
--- a/.azure-pipelines/linux-conda-CI.yml
+++ b/.azure-pipelines/linux-conda-CI.yml
@@ -243,10 +243,6 @@ jobs:
fi
displayName: 'install onnx'
- - script: |
- pip install flake8
- displayName: 'install flake8'
-
- script: |
pip install $(onnxrt.version)
displayName: 'install onnxruntime'
@@ -334,11 +330,10 @@ jobs:
displayName: 'pytest-onnxmltools'
condition: eq(variables['run.example'], '1')
- # Check flake8 after the tests to get more feedback.
- # It is checked before the tests on the windows build.
- script: |
- flake8 skl2onnx tests tests_onnxmltools
- displayName: 'flake8'
+ python -m pip install ruff
+ ruff skl2onnx tests tests_onnxmltools
+ displayName: 'ruff'
- script: |
if [ '$(onnx.target_opset)' != '' ]
diff --git a/.azure-pipelines/win32-conda-CI.yml b/.azure-pipelines/win32-conda-CI.yml
index 9fb907347..21b7e8cd6 100644
--- a/.azure-pipelines/win32-conda-CI.yml
+++ b/.azure-pipelines/win32-conda-CI.yml
@@ -157,13 +157,9 @@ jobs:
- script: |
call activate skl2onnxEnvironment
- pip install flake8
- displayName: 'install flake8'
-
- - script: |
- call activate skl2onnxEnvironment
- flake8 skl2onnx tests tests_onnxmltools
- displayName: 'flake8'
+ python -m pip install ruff
+ ruff skl2onnx tests tests_onnxmltools
+ displayName: 'ruff'
- script: |
call activate skl2onnxEnvironment
diff --git a/.github/workflows/black-ruff.yml b/.github/workflows/black-ruff.yml
new file mode 100644
index 000000000..c48dc25fd
--- /dev/null
+++ b/.github/workflows/black-ruff.yml
@@ -0,0 +1,16 @@
+name: Black Format Checker
+on: [push, pull_request]
+jobs:
+ black-format-check:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v2
+ - uses: psf/black@stable
+ with:
+ options: "--diff --check"
+ src: "."
+ ruff-format-check:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - uses: chartboost/ruff-action@v1
diff --git a/README.md b/README.md
index 437f56a92..6f79fd921 100644
--- a/README.md
+++ b/README.md
@@ -2,14 +2,18 @@
-| Linux | Windows |
-|-------|---------|
-| [![Build Status](https://dev.azure.com/onnxmltools/sklearn-onnx/_apis/build/status/sklearn-onnx-linux-conda-ci?branchName=master)](https://dev.azure.com/onnxmltools/sklearn-onnx/_build/latest?definitionId=5?branchName=master) | [![Build Status](https://dev.azure.com/onnxmltools/sklearn-onnx/_apis/build/status/sklearn-onnx-win32-conda-ci?branchName=master)](https://dev.azure.com/onnxmltools/sklearn-onnx/_build/latest?definitionId=5?branchName=master)|
+[![Build Status Linux](https://dev.azure.com/onnxmltools/sklearn-onnx/_apis/build/status%2Fonnx.sklearn-onnx.linux.CI?branchName=refs%2Fpull%2F1009%2Fmerge)](https://dev.azure.com/onnxmltools/sklearn-onnx/_build/latest?definitionId=21&branchName=refs%2Fpull%2F1009%2Fmerge)
+
+[![Build Status Windows](https://dev.azure.com/onnxmltools/sklearn-onnx/_apis/build/status%2Fonnx.sklearn-onnx.win.CI?branchName=refs%2Fpull%2F1009%2Fmerge)](https://dev.azure.com/onnxmltools/sklearn-onnx/_build/latest?definitionId=22&branchName=refs%2Fpull%2F1009%2Fmerge)
+
+[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
## Introduction
*sklearn-onnx* converts [scikit-learn](https://scikit-learn.org/stable/) models to [ONNX](https://github.com/onnx/onnx).
Once in the ONNX format, you can use tools like [ONNX Runtime](https://github.com/Microsoft/onnxruntime) for high performance scoring.
All converters are tested with [onnxruntime](https://onnxruntime.ai/).
+Any external converter can be registered to convert scikit-learn pipeline
+including models or transformers coming from external libraries.
## Documentation
Full documentation including tutorials is available at [https://onnx.ai/sklearn-onnx/](https://onnx.ai/sklearn-onnx/).
diff --git a/benchmarks/bench_plot_onnxruntime_decision_tree.py b/benchmarks/bench_plot_onnxruntime_decision_tree.py
index 0d06b1bb5..8b08aca8b 100644
--- a/benchmarks/bench_plot_onnxruntime_decision_tree.py
+++ b/benchmarks/bench_plot_onnxruntime_decision_tree.py
@@ -14,6 +14,7 @@
import pandas
from sklearn import config_context
from sklearn.tree import DecisionTreeClassifier
+
try:
# scikit-learn >= 0.22
from sklearn.utils._testing import ignore_warnings
@@ -29,14 +30,18 @@
# Implementations to benchmark.
##############################
+
def fcts_model(X, y, max_depth):
"DecisionTreeClassifier."
rf = DecisionTreeClassifier(max_depth=max_depth)
rf.fit(X, y)
- initial_types = [('X', FloatTensorType([None, X.shape[1]]))]
- onx = convert_sklearn(rf, initial_types=initial_types,
- options={DecisionTreeClassifier: {'zipmap': False}})
+ initial_types = [("X", FloatTensorType([None, X.shape[1]]))]
+ onx = convert_sklearn(
+ rf,
+ initial_types=initial_types,
+ options={DecisionTreeClassifier: {"zipmap": False}},
+ )
f = BytesIO()
f.write(onx.SerializeToString())
content = f.getvalue()
@@ -51,30 +56,29 @@ def predict_skl_predict_proba(X, model=rf):
return rf.predict_proba(X)
def predict_onnxrt_predict(X, sess=sess):
- return sess.run(outputs[:1], {'X': X})[0]
+ return sess.run(outputs[:1], {"X": X})[0]
def predict_onnxrt_predict_proba(X, sess=sess):
- return sess.run(outputs[1:], {'X': X})[0]
+ return sess.run(outputs[1:], {"X": X})[0]
- return {'predict': (predict_skl_predict,
- predict_onnxrt_predict),
- 'predict_proba': (predict_skl_predict_proba,
- predict_onnxrt_predict_proba)}
+ return {
+ "predict": (predict_skl_predict, predict_onnxrt_predict),
+ "predict_proba": (predict_skl_predict_proba, predict_onnxrt_predict_proba),
+ }
##############################
# Benchmarks
##############################
+
def allow_configuration(**kwargs):
return True
-def bench(n_obs, n_features, max_depths, methods,
- repeat=10, verbose=False):
+def bench(n_obs, n_features, max_depths, methods, repeat=10, verbose=False):
res = []
for nfeat in n_features:
-
ntrain = 100000
X_train = np.empty((ntrain, nfeat))
X_train[:, :] = rand(ntrain, nfeat)[:, :].astype(np.float32)
@@ -88,15 +92,12 @@ def bench(n_obs, n_features, max_depths, methods,
for n in n_obs:
for method in methods:
-
fct1, fct2 = fcts[method]
- if not allow_configuration(
- n=n, nfeat=nfeat, max_depth=max_depth):
+ if not allow_configuration(n=n, nfeat=nfeat, max_depth=max_depth):
continue
- obs = dict(n_obs=n, nfeat=nfeat,
- max_depth=max_depth, method=method)
+ obs = dict(n_obs=n, nfeat=nfeat, max_depth=max_depth, method=method)
# creates different inputs to avoid caching in any ways
Xs = []
@@ -143,11 +144,11 @@ def bench(n_obs, n_features, max_depths, methods,
# Plots.
##############################
+
def plot_results(df, verbose=False):
nrows = max(len(set(df.max_depth)) * len(set(df.n_obs)), 2)
ncols = max(len(set(df.method)), 2)
- fig, ax = plt.subplots(nrows, ncols,
- figsize=(ncols * 4, nrows * 4))
+ fig, ax = plt.subplots(nrows, ncols, figsize=(ncols * 4, nrows * 4))
pos = 0
row = 0
for n_obs in sorted(set(df.n_obs)):
@@ -156,31 +157,49 @@ def plot_results(df, verbose=False):
for method in sorted(set(df.method)):
a = ax[row, pos]
if row == ax.shape[0] - 1:
- a.set_xlabel("N features", fontsize='x-small')
+ a.set_xlabel("N features", fontsize="x-small")
if pos == 0:
a.set_ylabel(
- "Time (s) n_obs={}\nmax_depth={}".format(
- n_obs, max_depth),
- fontsize='x-small')
-
- color = 'b'
- subset = df[(df.method == method) & (df.n_obs == n_obs) &
- (df.max_depth == max_depth)]
+ "Time (s) n_obs={}\nmax_depth={}".format(n_obs, max_depth),
+ fontsize="x-small",
+ )
+
+ color = "b"
+ subset = df[
+ (df.method == method)
+ & (df.n_obs == n_obs)
+ & (df.max_depth == max_depth)
+ ]
if subset.shape[0] == 0:
continue
subset = subset.sort_values("nfeat")
if verbose:
print(subset)
label = "skl"
- subset.plot(x="nfeat", y="time_skl", label=label, ax=a,
- logx=True, logy=True, c=color, style='--')
+ subset.plot(
+ x="nfeat",
+ y="time_skl",
+ label=label,
+ ax=a,
+ logx=True,
+ logy=True,
+ c=color,
+ style="--",
+ )
label = "ort"
- subset.plot(x="nfeat", y="time_ort", label=label, ax=a,
- logx=True, logy=True, c=color)
-
- a.legend(loc=0, fontsize='x-small')
+ subset.plot(
+ x="nfeat",
+ y="time_ort",
+ label=label,
+ ax=a,
+ logx=True,
+ logy=True,
+ c=color,
+ )
+
+ a.legend(loc=0, fontsize="x-small")
if row == 0:
- a.set_title("method={}".format(method), fontsize='x-small')
+ a.set_title("method={}".format(method), fontsize="x-small")
pos += 1
row += 1
@@ -190,13 +209,14 @@ def plot_results(df, verbose=False):
@ignore_warnings(category=FutureWarning)
def run_bench(repeat=100, verbose=False):
n_obs = [1, 10, 100, 1000, 10000, 100000]
- methods = ['predict', 'predict_proba']
+ methods = ["predict", "predict_proba"]
n_features = [1, 5, 10, 20, 50, 100, 200]
max_depths = [2, 5, 10, 20]
start = time()
- results = bench(n_obs, n_features, max_depths, methods,
- repeat=repeat, verbose=verbose)
+ results = bench(
+ n_obs, n_features, max_depths, methods, repeat=repeat, verbose=verbose
+ )
end = time()
results_df = pandas.DataFrame(results)
@@ -207,21 +227,24 @@ def run_bench(repeat=100, verbose=False):
return results_df
-if __name__ == '__main__':
+if __name__ == "__main__":
from datetime import datetime
import sklearn
import numpy
import onnx
import onnxruntime
import skl2onnx
- df = pandas.DataFrame([
- {"name": "date", "version": str(datetime.now())},
- {"name": "numpy", "version": numpy.__version__},
- {"name": "scikit-learn", "version": sklearn.__version__},
- {"name": "onnx", "version": onnx.__version__},
- {"name": "onnxruntime", "version": onnxruntime.__version__},
- {"name": "skl2onnx", "version": skl2onnx.__version__},
- ])
+
+ df = pandas.DataFrame(
+ [
+ {"name": "date", "version": str(datetime.now())},
+ {"name": "numpy", "version": numpy.__version__},
+ {"name": "scikit-learn", "version": sklearn.__version__},
+ {"name": "onnx", "version": onnx.__version__},
+ {"name": "onnxruntime", "version": onnxruntime.__version__},
+ {"name": "skl2onnx", "version": skl2onnx.__version__},
+ ]
+ )
df.to_csv("bench_plot_onnxruntime_decision_tree.time.csv", index=False)
print(df)
df = run_bench(verbose=True)
diff --git a/benchmarks/bench_plot_onnxruntime_hgb.py b/benchmarks/bench_plot_onnxruntime_hgb.py
index 8a2523120..5c2d0f23d 100644
--- a/benchmarks/bench_plot_onnxruntime_hgb.py
+++ b/benchmarks/bench_plot_onnxruntime_hgb.py
@@ -24,13 +24,13 @@
# Implementations to benchmark.
##############################
+
def fcts_model(X, y, max_depth, n_estimators):
"RandomForestClassifier."
- rf = HistGradientBoostingRegressor(
- max_depth=max_depth, max_iter=n_estimators)
+ rf = HistGradientBoostingRegressor(max_depth=max_depth, max_iter=n_estimators)
rf.fit(X, y)
- initial_types = [('X', FloatTensorType([None, X.shape[1]]))]
+ initial_types = [("X", FloatTensorType([None, X.shape[1]]))]
onx = convert_sklearn(rf, initial_types=initial_types)
f = BytesIO()
f.write(onx.SerializeToString())
@@ -42,28 +42,31 @@ def predict_skl_predict(X, model=rf):
return rf.predict(X)
def predict_onnxrt_predict(X, sess=sess):
- return sess.run(outputs[:1], {'X': X})[0]
+ return sess.run(outputs[:1], {"X": X})[0]
- return {'predict': (
- predict_skl_predict,
- predict_onnxrt_predict,
- None,
- )}
+ return {
+ "predict": (
+ predict_skl_predict,
+ predict_onnxrt_predict,
+ None,
+ )
+ }
##############################
# Benchmarks
##############################
+
def allow_configuration(**kwargs):
return True
-def bench(n_obs, n_features, max_depths, n_estimatorss,
- methods, repeat=10, verbose=False):
+def bench(
+ n_obs, n_features, max_depths, n_estimatorss, methods, repeat=10, verbose=False
+):
res = []
for nfeat in n_features:
-
ntrain = 100000
X_train = np.empty((ntrain, nfeat)).astype(np.float32)
X_train[:, :] = rand(ntrain, nfeat)[:, :]
@@ -76,17 +79,24 @@ def bench(n_obs, n_features, max_depths, n_estimatorss,
for n in n_obs:
for method in methods:
-
fct1, fct2, fct3 = fcts[method]
- if not allow_configuration(n=n, nfeat=nfeat,
- max_depth=max_depth,
- n_estimator=n_estimators,
- method=method):
+ if not allow_configuration(
+ n=n,
+ nfeat=nfeat,
+ max_depth=max_depth,
+ n_estimator=n_estimators,
+ method=method,
+ ):
continue
- obs = dict(n_obs=n, nfeat=nfeat, max_depth=max_depth,
- n_estimators=n_estimators, method=method)
+ obs = dict(
+ n_obs=n,
+ nfeat=nfeat,
+ max_depth=max_depth,
+ n_estimators=n_estimators,
+ method=method,
+ )
# creates different inputs to avoid caching in any ways
Xs = []
@@ -128,8 +138,7 @@ def bench(n_obs, n_features, max_depths, n_estimatorss,
if len(p1.shape) == 1 and len(p2.shape) == 2:
p2 = p2.ravel()
try:
- assert_almost_equal(
- p1.ravel(), p2.ravel(), decimal=5)
+ assert_almost_equal(p1.ravel(), p2.ravel(), decimal=5)
except AssertionError as e:
warnings.warn(str(e))
return res
@@ -139,11 +148,11 @@ def bench(n_obs, n_features, max_depths, n_estimatorss,
# Plots.
##############################
+
def plot_results(df, verbose=False):
nrows = max(len(set(df.max_depth)) * len(set(df.n_obs)), 2)
ncols = 2
- fig, ax = plt.subplots(nrows, ncols,
- figsize=(ncols * 4, nrows * 4))
+ fig, ax = plt.subplots(nrows, ncols, figsize=(ncols * 4, nrows * 4))
pos = 0
row = 0
for n_obs in sorted(set(df.n_obs)):
@@ -152,17 +161,19 @@ def plot_results(df, verbose=False):
for n_jobs in [1]:
a = ax[row, pos]
if row == ax.shape[0] - 1:
- a.set_xlabel("N features", fontsize='x-small')
+ a.set_xlabel("N features", fontsize="x-small")
if pos == 0:
a.set_ylabel(
- "Time (s) n_obs={}\nmax_depth={}".format(
- n_obs, max_depth), fontsize='x-small')
-
- for color, n_estimators in zip(
- 'brgyc', sorted(set(df.n_estimators))):
- subset = df[(df.n_obs == n_obs)
- & (df.max_depth == max_depth)
- & (df.n_estimators == n_estimators)]
+ "Time (s) n_obs={}\nmax_depth={}".format(n_obs, max_depth),
+ fontsize="x-small",
+ )
+
+ for color, n_estimators in zip("brgyc", sorted(set(df.n_estimators))):
+ subset = df[
+ (df.n_obs == n_obs)
+ & (df.max_depth == max_depth)
+ & (df.n_estimators == n_estimators)
+ ]
if subset.shape[0] == 0:
continue
subset = subset.sort_values("nfeat")
@@ -171,19 +182,43 @@ def plot_results(df, verbose=False):
label = "skl ne={}".format(n_estimators)
subset.plot(
- x="nfeat", y="time_skl", label=label, ax=a,
- logx=True, logy=True, c=color, style='--', lw=5)
+ x="nfeat",
+ y="time_skl",
+ label=label,
+ ax=a,
+ logx=True,
+ logy=True,
+ c=color,
+ style="--",
+ lw=5,
+ )
label = "ort ne={}".format(n_estimators)
- subset.plot(x="nfeat", y="time_ort", label=label, ax=a,
- logx=True, logy=True, c=color, lw=3)
+ subset.plot(
+ x="nfeat",
+ y="time_ort",
+ label=label,
+ ax=a,
+ logx=True,
+ logy=True,
+ c=color,
+ lw=3,
+ )
label = "lite ne={}".format(n_estimators)
subset.plot(
- x="nfeat", y="time_lite", label=label, ax=a,
- logx=True, logy=True, c=color, style='-.', lw=3)
-
- a.legend(loc=0, fontsize='x-small')
+ x="nfeat",
+ y="time_lite",
+ label=label,
+ ax=a,
+ logx=True,
+ logy=True,
+ c=color,
+ style="-.",
+ lw=3,
+ )
+
+ a.legend(loc=0, fontsize="x-small")
if row == 0:
- a.set_title("---", fontsize='x-small')
+ a.set_title("---", fontsize="x-small")
pos += 1
row += 1
@@ -193,14 +228,21 @@ def plot_results(df, verbose=False):
@ignore_warnings(category=FutureWarning)
def run_bench(repeat=100, verbose=False):
n_obs = [1, 10, 100, 1000, 10000, 100000]
- methods = ['predict']
+ methods = ["predict"]
n_features = [30, 100]
max_depths = [10]
n_estimatorss = [100, 200]
start = time()
- results = bench(n_obs, n_features, max_depths, n_estimatorss,
- methods, repeat=repeat, verbose=verbose)
+ results = bench(
+ n_obs,
+ n_features,
+ max_depths,
+ n_estimatorss,
+ methods,
+ repeat=repeat,
+ verbose=verbose,
+ )
end = time()
results_df = pandas.DataFrame(results)
@@ -211,21 +253,24 @@ def run_bench(repeat=100, verbose=False):
return results_df
-if __name__ == '__main__':
+if __name__ == "__main__":
from datetime import datetime
import sklearn
import numpy
import onnx
import onnxruntime
import skl2onnx
- df = pandas.DataFrame([
- {"name": "date", "version": str(datetime.now())},
- {"name": "numpy", "version": numpy.__version__},
- {"name": "scikit-learn", "version": sklearn.__version__},
- {"name": "onnx", "version": onnx.__version__},
- {"name": "onnxruntime", "version": onnxruntime.__version__},
- {"name": "skl2onnx", "version": skl2onnx.__version__},
- ])
+
+ df = pandas.DataFrame(
+ [
+ {"name": "date", "version": str(datetime.now())},
+ {"name": "numpy", "version": numpy.__version__},
+ {"name": "scikit-learn", "version": sklearn.__version__},
+ {"name": "onnx", "version": onnx.__version__},
+ {"name": "onnxruntime", "version": onnxruntime.__version__},
+ {"name": "skl2onnx", "version": skl2onnx.__version__},
+ ]
+ )
df.to_csv("bench_plot_onnxruntime_hgb.time.csv", index=False)
print(df)
df = run_bench(verbose=True)
diff --git a/benchmarks/bench_plot_onnxruntime_linreg.py b/benchmarks/bench_plot_onnxruntime_linreg.py
index aa180f71e..82490c22d 100644
--- a/benchmarks/bench_plot_onnxruntime_linreg.py
+++ b/benchmarks/bench_plot_onnxruntime_linreg.py
@@ -14,6 +14,7 @@
import pandas
from sklearn import config_context
from sklearn.linear_model import LinearRegression
+
try:
# scikit-learn >= 0.22
from sklearn.utils._testing import ignore_warnings
@@ -29,12 +30,13 @@
# Implementations to benchmark.
##############################
+
def fcts_model(X, y, fit_intercept):
"LinearRegression."
rf = LinearRegression(fit_intercept=fit_intercept)
rf.fit(X, y)
- initial_types = [('X', FloatTensorType([None, X.shape[1]]))]
+ initial_types = [("X", FloatTensorType([None, X.shape[1]]))]
onx = convert_sklearn(rf, initial_types=initial_types)
f = BytesIO()
f.write(onx.SerializeToString())
@@ -47,25 +49,23 @@ def predict_skl_predict(X, model=rf):
return rf.predict(X)
def predict_onnxrt_predict(X, sess=sess):
- return sess.run(outputs[:1], {'X': X})[0]
+ return sess.run(outputs[:1], {"X": X})[0]
- return {'predict': (predict_skl_predict,
- predict_onnxrt_predict)}
+ return {"predict": (predict_skl_predict, predict_onnxrt_predict)}
##############################
# Benchmarks
##############################
+
def allow_configuration(**kwargs):
return True
-def bench(n_obs, n_features, fit_intercepts, methods,
- repeat=10, verbose=False):
+def bench(n_obs, n_features, fit_intercepts, methods, repeat=10, verbose=False):
res = []
for nfeat in n_features:
-
ntrain = 10000
X_train = np.empty((ntrain, nfeat))
X_train[:, :] = rand(ntrain, nfeat)[:, :]
@@ -83,16 +83,20 @@ def bench(n_obs, n_features, fit_intercepts, methods,
else:
loop_repeat = repeat
for method in methods:
-
fct1, fct2 = fcts[method]
if not allow_configuration(
- n=n, nfeat=nfeat, fit_intercept=fit_intercept):
+ n=n, nfeat=nfeat, fit_intercept=fit_intercept
+ ):
continue
- obs = dict(n_obs=n, nfeat=nfeat,
- fit_intercept=fit_intercept, method=method,
- repeat=loop_repeat)
+ obs = dict(
+ n_obs=n,
+ nfeat=nfeat,
+ fit_intercept=fit_intercept,
+ method=method,
+ repeat=loop_repeat,
+ )
# creates different inputs to avoid caching in any ways
Xs = []
@@ -128,8 +132,7 @@ def bench(n_obs, n_features, fit_intercepts, methods,
if len(p1.shape) == 1 and len(p2.shape) == 2:
p2 = p2.ravel()
try:
- assert_almost_equal(
- p1.ravel(), p2.ravel(), decimal=5)
+ assert_almost_equal(p1.ravel(), p2.ravel(), decimal=5)
except AssertionError as e:
warnings.warn(str(e))
return res
@@ -139,11 +142,11 @@ def bench(n_obs, n_features, fit_intercepts, methods,
# Plots.
##############################
+
def plot_results(df, verbose=False):
nrows = max(len(set(df.fit_intercept)) * len(set(df.n_obs)), 2)
ncols = max(len(set(df.method)), 2)
- fig, ax = plt.subplots(nrows, ncols,
- figsize=(ncols * 4, nrows * 4))
+ fig, ax = plt.subplots(nrows, ncols, figsize=(ncols * 4, nrows * 4))
pos = 0
row = 0
for n_obs in sorted(set(df.n_obs)):
@@ -152,48 +155,68 @@ def plot_results(df, verbose=False):
for method in sorted(set(df.method)):
a = ax[row, pos]
if row == ax.shape[0] - 1:
- a.set_xlabel("N features", fontsize='x-small')
+ a.set_xlabel("N features", fontsize="x-small")
if pos == 0:
a.set_ylabel(
"Time (s) n_obs={}\nfit_intercept={}".format(
- n_obs, fit_intercept),
- fontsize='x-small')
-
- color = 'b'
- subset = df[(df.method == method) & (df.n_obs == n_obs) &
- (df.fit_intercept == fit_intercept)]
+ n_obs, fit_intercept
+ ),
+ fontsize="x-small",
+ )
+
+ color = "b"
+ subset = df[
+ (df.method == method)
+ & (df.n_obs == n_obs)
+ & (df.fit_intercept == fit_intercept)
+ ]
if subset.shape[0] == 0:
continue
subset = subset.sort_values("nfeat")
if verbose:
print(subset)
label = "skl"
- subset.plot(x="nfeat", y="time_skl", label=label, ax=a,
- logx=True, logy=True, c=color, style='--')
+ subset.plot(
+ x="nfeat",
+ y="time_skl",
+ label=label,
+ ax=a,
+ logx=True,
+ logy=True,
+ c=color,
+ style="--",
+ )
label = "ort"
- subset.plot(x="nfeat", y="time_ort", label=label, ax=a,
- logx=True, logy=True, c=color)
-
- a.legend(loc=0, fontsize='x-small')
+ subset.plot(
+ x="nfeat",
+ y="time_ort",
+ label=label,
+ ax=a,
+ logx=True,
+ logy=True,
+ c=color,
+ )
+
+ a.legend(loc=0, fontsize="x-small")
if row == 0:
- a.set_title("method={}".format(method), fontsize='x-small')
+ a.set_title("method={}".format(method), fontsize="x-small")
pos += 1
row += 1
- plt.suptitle(
- "Benchmark for LinearRegression sklearn/onnxruntime", fontsize=16)
+ plt.suptitle("Benchmark for LinearRegression sklearn/onnxruntime", fontsize=16)
@ignore_warnings(category=FutureWarning)
def run_bench(repeat=2000, verbose=False):
n_obs = [1, 10, 100, 1000, 10000, 100000]
- methods = ['predict']
+ methods = ["predict"]
n_features = [10, 50, 100]
fit_intercepts = [True]
start = time()
- results = bench(n_obs, n_features, fit_intercepts, methods,
- repeat=repeat, verbose=verbose)
+ results = bench(
+ n_obs, n_features, fit_intercepts, methods, repeat=repeat, verbose=verbose
+ )
end = time()
results_df = pandas.DataFrame(results)
@@ -204,21 +227,24 @@ def run_bench(repeat=2000, verbose=False):
return results_df
-if __name__ == '__main__':
+if __name__ == "__main__":
from datetime import datetime
import sklearn
import numpy
import onnx
import onnxruntime
import skl2onnx
- df = pandas.DataFrame([
- {"name": "date", "version": str(datetime.now())},
- {"name": "numpy", "version": numpy.__version__},
- {"name": "scikit-learn", "version": sklearn.__version__},
- {"name": "onnx", "version": onnx.__version__},
- {"name": "onnxruntime", "version": onnxruntime.__version__},
- {"name": "skl2onnx", "version": skl2onnx.__version__},
- ])
+
+ df = pandas.DataFrame(
+ [
+ {"name": "date", "version": str(datetime.now())},
+ {"name": "numpy", "version": numpy.__version__},
+ {"name": "scikit-learn", "version": sklearn.__version__},
+ {"name": "onnx", "version": onnx.__version__},
+ {"name": "onnxruntime", "version": onnxruntime.__version__},
+ {"name": "skl2onnx", "version": skl2onnx.__version__},
+ ]
+ )
df.to_csv("bench_plot_onnxruntime_linreg.time.csv", index=False)
print(df)
df = run_bench(verbose=True)
diff --git a/benchmarks/bench_plot_onnxruntime_logreg.py b/benchmarks/bench_plot_onnxruntime_logreg.py
index f1a2511e6..dc2f1ec37 100644
--- a/benchmarks/bench_plot_onnxruntime_logreg.py
+++ b/benchmarks/bench_plot_onnxruntime_logreg.py
@@ -14,6 +14,7 @@
import pandas
from sklearn import config_context
from sklearn.linear_model import LogisticRegression
+
try:
# scikit-learn >= 0.22
from sklearn.utils._testing import ignore_warnings
@@ -29,14 +30,16 @@
# Implementations to benchmark.
##############################
+
def fcts_model(X, y, fit_intercept):
"LogisticRegression."
rf = LogisticRegression(fit_intercept=fit_intercept)
rf.fit(X, y)
- initial_types = [('X', FloatTensorType([None, X.shape[1]]))]
- onx = convert_sklearn(rf, initial_types=initial_types,
- options={LogisticRegression: {'zipmap': False}})
+ initial_types = [("X", FloatTensorType([None, X.shape[1]]))]
+ onx = convert_sklearn(
+ rf, initial_types=initial_types, options={LogisticRegression: {"zipmap": False}}
+ )
f = BytesIO()
f.write(onx.SerializeToString())
content = f.getvalue()
@@ -51,30 +54,29 @@ def predict_skl_predict_proba(X, model=rf):
return rf.predict_proba(X)
def predict_onnxrt_predict(X, sess=sess):
- return sess.run(outputs[:1], {'X': X})[0]
+ return sess.run(outputs[:1], {"X": X})[0]
def predict_onnxrt_predict_proba(X, sess=sess):
- return sess.run(outputs[1:], {'X': X})[0]
+ return sess.run(outputs[1:], {"X": X})[0]
- return {'predict': (predict_skl_predict,
- predict_onnxrt_predict),
- 'predict_proba': (predict_skl_predict_proba,
- predict_onnxrt_predict_proba)}
+ return {
+ "predict": (predict_skl_predict, predict_onnxrt_predict),
+ "predict_proba": (predict_skl_predict_proba, predict_onnxrt_predict_proba),
+ }
##############################
# Benchmarks
##############################
+
def allow_configuration(**kwargs):
return True
-def bench(n_obs, n_features, fit_intercepts, methods,
- repeat=10, verbose=False):
+def bench(n_obs, n_features, fit_intercepts, methods, repeat=10, verbose=False):
res = []
for nfeat in n_features:
-
ntrain = 10000
X_train = np.empty((ntrain, nfeat))
X_train[:, :] = rand(ntrain, nfeat)[:, :].astype(np.float32)
@@ -94,16 +96,20 @@ def bench(n_obs, n_features, fit_intercepts, methods,
else:
loop_repeat = repeat
for method in methods:
-
fct1, fct2 = fcts[method]
if not allow_configuration(
- n=n, nfeat=nfeat, fit_intercept=fit_intercept):
+ n=n, nfeat=nfeat, fit_intercept=fit_intercept
+ ):
continue
- obs = dict(n_obs=n, nfeat=nfeat,
- fit_intercept=fit_intercept, method=method,
- repeat=loop_repeat)
+ obs = dict(
+ n_obs=n,
+ nfeat=nfeat,
+ fit_intercept=fit_intercept,
+ method=method,
+ repeat=loop_repeat,
+ )
# creates different inputs to avoid caching in any ways
Xs = []
@@ -146,11 +152,11 @@ def bench(n_obs, n_features, fit_intercepts, methods,
# Plots.
##############################
+
def plot_results(df, verbose=False):
nrows = max(len(set(df.fit_intercept)) * len(set(df.n_obs)), 2)
ncols = max(len(set(df.method)), 2)
- fig, ax = plt.subplots(nrows, ncols,
- figsize=(ncols * 4, nrows * 4))
+ fig, ax = plt.subplots(nrows, ncols, figsize=(ncols * 4, nrows * 4))
pos = 0
row = 0
for n_obs in sorted(set(df.n_obs)):
@@ -159,48 +165,68 @@ def plot_results(df, verbose=False):
for method in sorted(set(df.method)):
a = ax[row, pos]
if row == ax.shape[0] - 1:
- a.set_xlabel("N features", fontsize='x-small')
+ a.set_xlabel("N features", fontsize="x-small")
if pos == 0:
a.set_ylabel(
"Time (s) n_obs={}\nfit_intercept={}".format(
- n_obs, fit_intercept),
- fontsize='x-small')
-
- color = 'b'
- subset = df[(df.method == method) & (df.n_obs == n_obs) &
- (df.fit_intercept == fit_intercept)]
+ n_obs, fit_intercept
+ ),
+ fontsize="x-small",
+ )
+
+ color = "b"
+ subset = df[
+ (df.method == method)
+ & (df.n_obs == n_obs)
+ & (df.fit_intercept == fit_intercept)
+ ]
if subset.shape[0] == 0:
continue
subset = subset.sort_values("nfeat")
if verbose:
print(subset)
label = "skl"
- subset.plot(x="nfeat", y="time_skl", label=label, ax=a,
- logx=True, logy=True, c=color, style='--')
+ subset.plot(
+ x="nfeat",
+ y="time_skl",
+ label=label,
+ ax=a,
+ logx=True,
+ logy=True,
+ c=color,
+ style="--",
+ )
label = "ort"
- subset.plot(x="nfeat", y="time_ort", label=label, ax=a,
- logx=True, logy=True, c=color)
-
- a.legend(loc=0, fontsize='x-small')
+ subset.plot(
+ x="nfeat",
+ y="time_ort",
+ label=label,
+ ax=a,
+ logx=True,
+ logy=True,
+ c=color,
+ )
+
+ a.legend(loc=0, fontsize="x-small")
if row == 0:
- a.set_title("method={}".format(method), fontsize='x-small')
+ a.set_title("method={}".format(method), fontsize="x-small")
pos += 1
row += 1
- plt.suptitle(
- "Benchmark for LogisticRegression sklearn/onnxruntime", fontsize=16)
+ plt.suptitle("Benchmark for LogisticRegression sklearn/onnxruntime", fontsize=16)
@ignore_warnings(category=FutureWarning)
def run_bench(repeat=1000, verbose=False):
n_obs = [1, 10, 100, 1000, 10000, 100000]
- methods = ['predict_proba'] # ['predict', 'predict_proba']
+ methods = ["predict_proba"] # ['predict', 'predict_proba']
n_features = [10, 50]
fit_intercepts = [True]
start = time()
- results = bench(n_obs, n_features, fit_intercepts, methods,
- repeat=repeat, verbose=verbose)
+ results = bench(
+ n_obs, n_features, fit_intercepts, methods, repeat=repeat, verbose=verbose
+ )
end = time()
results_df = pandas.DataFrame(results)
@@ -211,21 +237,24 @@ def run_bench(repeat=1000, verbose=False):
return results_df
-if __name__ == '__main__':
+if __name__ == "__main__":
from datetime import datetime
import sklearn
import numpy
import onnx
import onnxruntime
import skl2onnx
- df = pandas.DataFrame([
- {"name": "date", "version": str(datetime.now())},
- {"name": "numpy", "version": numpy.__version__},
- {"name": "scikit-learn", "version": sklearn.__version__},
- {"name": "onnx", "version": onnx.__version__},
- {"name": "onnxruntime", "version": onnxruntime.__version__},
- {"name": "skl2onnx", "version": skl2onnx.__version__},
- ])
+
+ df = pandas.DataFrame(
+ [
+ {"name": "date", "version": str(datetime.now())},
+ {"name": "numpy", "version": numpy.__version__},
+ {"name": "scikit-learn", "version": sklearn.__version__},
+ {"name": "onnx", "version": onnx.__version__},
+ {"name": "onnxruntime", "version": onnxruntime.__version__},
+ {"name": "skl2onnx", "version": skl2onnx.__version__},
+ ]
+ )
df.to_csv("bench_plot_onnxruntime_logreg.time.csv", index=False)
print(df)
df = run_bench(verbose=True)
diff --git a/benchmarks/bench_plot_onnxruntime_random_forest.py b/benchmarks/bench_plot_onnxruntime_random_forest.py
index 7fdef4d15..c968b5457 100644
--- a/benchmarks/bench_plot_onnxruntime_random_forest.py
+++ b/benchmarks/bench_plot_onnxruntime_random_forest.py
@@ -14,6 +14,7 @@
import pandas
from sklearn import config_context
from sklearn.ensemble import RandomForestClassifier
+
try:
# scikit-learn >= 0.22
from sklearn.utils._testing import ignore_warnings
@@ -29,12 +30,13 @@
# Implementations to benchmark.
##############################
+
def fcts_model(X, y, max_depth, n_estimators):
"RandomForestClassifier."
rf = RandomForestClassifier(max_depth=max_depth, n_estimators=n_estimators)
rf.fit(X, y)
- initial_types = [('X', FloatTensorType([None, X.shape[1]]))]
+ initial_types = [("X", FloatTensorType([None, X.shape[1]]))]
onx = convert_sklearn(rf, initial_types=initial_types)
f = BytesIO()
f.write(onx.SerializeToString())
@@ -50,10 +52,10 @@ def predict_skl_predict_proba(X, model=rf):
return rf.predict_proba(X)
def predict_onnxrt_predict(X, sess=sess):
- return numpy.array(sess.run(outputs[:1], {'X': X.astype(np.float32)}))
+ return numpy.array(sess.run(outputs[:1], {"X": X.astype(np.float32)}))
def predict_onnxrt_predict_proba(X, sess=sess):
- res = sess.run(outputs[1:], {'X': X.astype(np.float32)})[0]
+ res = sess.run(outputs[1:], {"X": X.astype(np.float32)})[0]
# do not use DataFrame to convert the output into array,
# it takes too much time
out = numpy.empty((len(res), len(res[0])), dtype=numpy.float32)
@@ -62,25 +64,26 @@ def predict_onnxrt_predict_proba(X, sess=sess):
out[i, k] = v
return out
- return {'predict': (predict_skl_predict,
- predict_onnxrt_predict),
- 'predict_proba': (predict_skl_predict_proba,
- predict_onnxrt_predict_proba)}
+ return {
+ "predict": (predict_skl_predict, predict_onnxrt_predict),
+ "predict_proba": (predict_skl_predict_proba, predict_onnxrt_predict_proba),
+ }
##############################
# Benchmarks
##############################
+
def allow_configuration(**kwargs):
return True
-def bench(n_obs, n_features, max_depths, n_estimatorss, methods,
- repeat=10, verbose=False):
+def bench(
+ n_obs, n_features, max_depths, n_estimatorss, methods, repeat=10, verbose=False
+):
res = []
for nfeat in n_features:
-
ntrain = 100000
X_train = np.empty((ntrain, nfeat))
X_train[:, :] = rand(ntrain, nfeat)[:, :]
@@ -95,16 +98,23 @@ def bench(n_obs, n_features, max_depths, n_estimatorss, methods,
for n in n_obs:
for method in methods:
-
fct1, fct2 = fcts[method]
if not allow_configuration(
- n=n, nfeat=nfeat,
- max_depth=max_depth, n_estimator=n_estimators):
+ n=n,
+ nfeat=nfeat,
+ max_depth=max_depth,
+ n_estimator=n_estimators,
+ ):
continue
- obs = dict(n_obs=n, nfeat=nfeat, max_depth=max_depth,
- n_estimators=n_estimators, method=method)
+ obs = dict(
+ n_obs=n,
+ nfeat=nfeat,
+ max_depth=max_depth,
+ n_estimators=n_estimators,
+ method=method,
+ )
# creates different inputs to avoid caching in any ways
Xs = []
@@ -151,11 +161,11 @@ def bench(n_obs, n_features, max_depths, n_estimatorss, methods,
# Plots.
##############################
+
def plot_results(df, verbose=False):
nrows = max(len(set(df.max_depth)) * len(set(df.n_obs)), 2)
ncols = max(len(set(df.method)), 2)
- fig, ax = plt.subplots(nrows, ncols,
- figsize=(ncols * 4, nrows * 4))
+ fig, ax = plt.subplots(nrows, ncols, figsize=(ncols * 4, nrows * 4))
pos = 0
row = 0
for n_obs in sorted(set(df.n_obs)):
@@ -164,33 +174,50 @@ def plot_results(df, verbose=False):
for method in sorted(set(df.method)):
a = ax[row, pos]
if row == ax.shape[0] - 1:
- a.set_xlabel("N features", fontsize='x-small')
+ a.set_xlabel("N features", fontsize="x-small")
if pos == 0:
a.set_ylabel(
- "Time (s) n_obs={}\nmax_depth={}".format(
- n_obs, max_depth),
- fontsize='x-small')
-
- for color, n_estimators in zip(
- 'brgyc', sorted(set(df.n_estimators))):
- subset = df[(df.method == method) & (df.n_obs == n_obs)
- & (df.max_depth == max_depth)
- & (df.n_estimators == n_estimators)]
+ "Time (s) n_obs={}\nmax_depth={}".format(n_obs, max_depth),
+ fontsize="x-small",
+ )
+
+ for color, n_estimators in zip("brgyc", sorted(set(df.n_estimators))):
+ subset = df[
+ (df.method == method)
+ & (df.n_obs == n_obs)
+ & (df.max_depth == max_depth)
+ & (df.n_estimators == n_estimators)
+ ]
if subset.shape[0] == 0:
continue
subset = subset.sort_values("nfeat")
if verbose:
print(subset)
label = "skl ne={}".format(n_estimators)
- subset.plot(x="nfeat", y="time_skl", label=label, ax=a,
- logx=True, logy=True, c=color, style='--')
+ subset.plot(
+ x="nfeat",
+ y="time_skl",
+ label=label,
+ ax=a,
+ logx=True,
+ logy=True,
+ c=color,
+ style="--",
+ )
label = "ort ne={}".format(n_estimators)
- subset.plot(x="nfeat", y="time_ort", label=label, ax=a,
- logx=True, logy=True, c=color)
-
- a.legend(loc=0, fontsize='x-small')
+ subset.plot(
+ x="nfeat",
+ y="time_ort",
+ label=label,
+ ax=a,
+ logx=True,
+ logy=True,
+ c=color,
+ )
+
+ a.legend(loc=0, fontsize="x-small")
if row == 0:
- a.set_title("method={}".format(method), fontsize='x-small')
+ a.set_title("method={}".format(method), fontsize="x-small")
pos += 1
row += 1
@@ -200,14 +227,21 @@ def plot_results(df, verbose=False):
@ignore_warnings(category=FutureWarning)
def run_bench(repeat=100, verbose=False):
n_obs = [1, 100]
- methods = ['predict', 'predict_proba']
+ methods = ["predict", "predict_proba"]
n_features = [1, 5, 10, 20, 50, 100]
max_depths = [2, 5, 10]
n_estimatorss = [1, 10, 100]
start = time()
- results = bench(n_obs, n_features, max_depths, n_estimatorss, methods,
- repeat=repeat, verbose=verbose)
+ results = bench(
+ n_obs,
+ n_features,
+ max_depths,
+ n_estimatorss,
+ methods,
+ repeat=repeat,
+ verbose=verbose,
+ )
end = time()
results_df = pandas.DataFrame(results)
@@ -218,21 +252,24 @@ def run_bench(repeat=100, verbose=False):
return results_df
-if __name__ == '__main__':
+if __name__ == "__main__":
from datetime import datetime
import sklearn
import numpy
import onnx
import onnxruntime
import skl2onnx
- df = pandas.DataFrame([
- {"name": "date", "version": str(datetime.now())},
- {"name": "numpy", "version": numpy.__version__},
- {"name": "scikit-learn", "version": sklearn.__version__},
- {"name": "onnx", "version": onnx.__version__},
- {"name": "onnxruntime", "version": onnxruntime.__version__},
- {"name": "skl2onnx", "version": skl2onnx.__version__},
- ])
+
+ df = pandas.DataFrame(
+ [
+ {"name": "date", "version": str(datetime.now())},
+ {"name": "numpy", "version": numpy.__version__},
+ {"name": "scikit-learn", "version": sklearn.__version__},
+ {"name": "onnx", "version": onnx.__version__},
+ {"name": "onnxruntime", "version": onnxruntime.__version__},
+ {"name": "skl2onnx", "version": skl2onnx.__version__},
+ ]
+ )
df.to_csv("bench_plot_onnxruntime_random_forest.time.csv", index=False)
print(df)
df = run_bench(verbose=True)
diff --git a/benchmarks/bench_plot_onnxruntime_random_forest_reg.py b/benchmarks/bench_plot_onnxruntime_random_forest_reg.py
index ffee920b0..c942e72e5 100644
--- a/benchmarks/bench_plot_onnxruntime_random_forest_reg.py
+++ b/benchmarks/bench_plot_onnxruntime_random_forest_reg.py
@@ -25,13 +25,15 @@
# Implementations to benchmark.
##############################
+
def fcts_model(X, y, max_depth, n_estimators, n_jobs):
"RandomForestClassifier."
- rf = RandomForestRegressor(max_depth=max_depth, n_estimators=n_estimators,
- n_jobs=n_jobs)
+ rf = RandomForestRegressor(
+ max_depth=max_depth, n_estimators=n_estimators, n_jobs=n_jobs
+ )
rf.fit(X, y)
- initial_types = [('X', FloatTensorType([None, X.shape[1]]))]
+ initial_types = [("X", FloatTensorType([None, X.shape[1]]))]
onx = convert_sklearn(rf, initial_types=initial_types)
f = BytesIO()
f.write(onx.SerializeToString())
@@ -42,48 +44,62 @@ def fcts_model(X, y, max_depth, n_estimators, n_jobs):
if False:
import treelite.sklearn
import treelite_runtime
+
try:
lite = treelite.sklearn.import_model(rf)
name = "lite{}.dll".format(id(rf))
lite.export_lib(
- toolchain='msvc' if sys.platform == "win32" else "gcc",
- libpath=name, verbose=False)
+ toolchain="msvc" if sys.platform == "win32" else "gcc",
+ libpath=name,
+ verbose=False,
+ )
lite_predictor = treelite_runtime.Predictor(name, verbose=False)
- except (treelite.util.TreeliteError, PermissionError,
- UnicodeDecodeError):
+ except (treelite.util.TreeliteError, PermissionError, UnicodeDecodeError):
lite_predictor = None
def predict_skl_predict(X, model=rf):
return rf.predict(X)
def predict_onnxrt_predict(X, sess=sess):
- return sess.run(outputs[:1], {'X': X})[0]
+ return sess.run(outputs[:1], {"X": X})[0]
def predict_treelite_predict(X, sess=sess):
return numpy.array(
lite_predictor.predict(
- treelite_runtime.Batch.from_npy2d(X.astype(np.float32))))
+ treelite_runtime.Batch.from_npy2d(X.astype(np.float32))
+ )
+ )
- return {'predict': (
- predict_skl_predict,
- predict_onnxrt_predict,
- None,
- )}
+ return {
+ "predict": (
+ predict_skl_predict,
+ predict_onnxrt_predict,
+ None,
+ )
+ }
##############################
# Benchmarks
##############################
+
def allow_configuration(**kwargs):
return True
-def bench(n_obs, n_features, max_depths, n_estimatorss, n_jobss,
- methods, repeat=10, verbose=False):
+def bench(
+ n_obs,
+ n_features,
+ max_depths,
+ n_estimatorss,
+ n_jobss,
+ methods,
+ repeat=10,
+ verbose=False,
+):
res = []
for nfeat in n_features:
-
ntrain = 100000
X_train = np.empty((ntrain, nfeat)).astype(np.float32)
X_train[:, :] = rand(ntrain, nfeat)[:, :]
@@ -93,25 +109,30 @@ def bench(n_obs, n_features, max_depths, n_estimatorss, n_jobss,
for n_jobs in n_jobss:
for max_depth in max_depths:
for n_estimators in n_estimatorss:
- fcts = fcts_model(X_train, y_train,
- max_depth, n_estimators, n_jobs)
+ fcts = fcts_model(X_train, y_train, max_depth, n_estimators, n_jobs)
for n in n_obs:
for method in methods:
-
fct1, fct2, fct3 = fcts[method]
if not allow_configuration(
- n=n, nfeat=nfeat,
- max_depth=max_depth,
- n_estimator=n_estimators,
- n_jobs=n_jobs, method=method):
+ n=n,
+ nfeat=nfeat,
+ max_depth=max_depth,
+ n_estimator=n_estimators,
+ n_jobs=n_jobs,
+ method=method,
+ ):
continue
obs = dict(
- n_obs=n, nfeat=nfeat, max_depth=max_depth,
- n_estimators=n_estimators, method=method,
- n_jobs=n_jobs)
+ n_obs=n,
+ nfeat=nfeat,
+ max_depth=max_depth,
+ n_estimators=n_estimators,
+ method=method,
+ n_jobs=n_jobs,
+ )
# creates different inputs to avoid caching
# in any ways
@@ -167,7 +188,8 @@ def bench(n_obs, n_features, max_depths, n_estimatorss, n_jobss,
p2 = p2.ravel()
try:
assert_almost_equal(
- p1.ravel(), p2.ravel(), decimal=5)
+ p1.ravel(), p2.ravel(), decimal=5
+ )
except AssertionError as e:
warnings.warn(str(e))
return res
@@ -177,11 +199,11 @@ def bench(n_obs, n_features, max_depths, n_estimatorss, n_jobss,
# Plots.
##############################
+
def plot_results(df, verbose=False):
nrows = max(len(set(df.max_depth)) * len(set(df.n_obs)), 2)
ncols = max(len(set(df.n_jobs)), 2)
- fig, ax = plt.subplots(nrows, ncols,
- figsize=(ncols * 4, nrows * 4))
+ fig, ax = plt.subplots(nrows, ncols, figsize=(ncols * 4, nrows * 4))
pos = 0
row = 0
for n_obs in sorted(set(df.n_obs)):
@@ -190,17 +212,22 @@ def plot_results(df, verbose=False):
for n_jobs in sorted(set(df.n_jobs)):
a = ax[row, pos]
if row == ax.shape[0] - 1:
- a.set_xlabel("N features", fontsize='x-small')
+ a.set_xlabel("N features", fontsize="x-small")
if pos == 0:
a.set_ylabel(
"Time (s) n_obs={}\nmax_depth={} n_jobs={}".format(
- n_obs, max_depth, n_jobs), fontsize='x-small')
-
- for color, n_estimators in zip(
- 'brgyc', sorted(set(df.n_estimators))):
- subset = df[(df.n_jobs == n_jobs) & (df.n_obs == n_obs)
- & (df.max_depth == max_depth)
- & (df.n_estimators == n_estimators)]
+ n_obs, max_depth, n_jobs
+ ),
+ fontsize="x-small",
+ )
+
+ for color, n_estimators in zip("brgyc", sorted(set(df.n_estimators))):
+ subset = df[
+ (df.n_jobs == n_jobs)
+ & (df.n_obs == n_obs)
+ & (df.max_depth == max_depth)
+ & (df.n_estimators == n_estimators)
+ ]
if subset.shape[0] == 0:
continue
subset = subset.sort_values("nfeat")
@@ -209,20 +236,43 @@ def plot_results(df, verbose=False):
label = "skl ne={}".format(n_estimators)
subset.plot(
- x="nfeat", y="time_skl", label=label, ax=a,
- logx=True, logy=True, c=color, style='--', lw=5)
+ x="nfeat",
+ y="time_skl",
+ label=label,
+ ax=a,
+ logx=True,
+ logy=True,
+ c=color,
+ style="--",
+ lw=5,
+ )
label = "ort ne={}".format(n_estimators)
subset.plot(
- x="nfeat", y="time_ort", label=label, ax=a,
- logx=True, logy=True, c=color, lw=3)
+ x="nfeat",
+ y="time_ort",
+ label=label,
+ ax=a,
+ logx=True,
+ logy=True,
+ c=color,
+ lw=3,
+ )
label = "lite ne={}".format(n_estimators)
subset.plot(
- x="nfeat", y="time_lite", label=label, ax=a,
- logx=True, logy=True, c=color, style='-.', lw=3)
-
- a.legend(loc=0, fontsize='x-small')
+ x="nfeat",
+ y="time_lite",
+ label=label,
+ ax=a,
+ logx=True,
+ logy=True,
+ c=color,
+ style="-.",
+ lw=3,
+ )
+
+ a.legend(loc=0, fontsize="x-small")
if row == 0:
- a.set_title("n_jobs={}".format(n_jobs), fontsize='x-small')
+ a.set_title("n_jobs={}".format(n_jobs), fontsize="x-small")
pos += 1
row += 1
@@ -232,15 +282,23 @@ def plot_results(df, verbose=False):
@ignore_warnings(category=FutureWarning)
def run_bench(repeat=100, verbose=False):
n_obs = [1, 10, 100, 1000, 10000, 100000]
- methods = ['predict']
+ methods = ["predict"]
n_features = [30, 100]
max_depths = [10]
n_estimatorss = [100, 200]
n_jobss = [4]
start = time()
- results = bench(n_obs, n_features, max_depths, n_estimatorss, n_jobss,
- methods, repeat=repeat, verbose=verbose)
+ results = bench(
+ n_obs,
+ n_features,
+ max_depths,
+ n_estimatorss,
+ n_jobss,
+ methods,
+ repeat=repeat,
+ verbose=verbose,
+ )
end = time()
results_df = pandas.DataFrame(results)
@@ -251,7 +309,7 @@ def run_bench(repeat=100, verbose=False):
return results_df
-if __name__ == '__main__':
+if __name__ == "__main__":
from datetime import datetime
import sklearn
import numpy
@@ -260,16 +318,19 @@ def run_bench(repeat=100, verbose=False):
import skl2onnx
import treelite
import treelite_runtime
- df = pandas.DataFrame([
- {"name": "date", "version": str(datetime.now())},
- {"name": "numpy", "version": numpy.__version__},
- {"name": "scikit-learn", "version": sklearn.__version__},
- {"name": "onnx", "version": onnx.__version__},
- {"name": "onnxruntime", "version": onnxruntime.__version__},
- {"name": "skl2onnx", "version": skl2onnx.__version__},
- {"name": "treelite", "version": treelite.__version__},
- {"name": "treelite_runtime", "version": treelite_runtime.__version__},
- ])
+
+ df = pandas.DataFrame(
+ [
+ {"name": "date", "version": str(datetime.now())},
+ {"name": "numpy", "version": numpy.__version__},
+ {"name": "scikit-learn", "version": sklearn.__version__},
+ {"name": "onnx", "version": onnx.__version__},
+ {"name": "onnxruntime", "version": onnxruntime.__version__},
+ {"name": "skl2onnx", "version": skl2onnx.__version__},
+ {"name": "treelite", "version": treelite.__version__},
+ {"name": "treelite_runtime", "version": treelite_runtime.__version__},
+ ]
+ )
df.to_csv("bench_plot_onnxruntime_random_forest_reg.time.csv", index=False)
print(df)
df = run_bench(verbose=True)
diff --git a/benchmarks/bench_plot_onnxruntime_svm_reg.py b/benchmarks/bench_plot_onnxruntime_svm_reg.py
index c1fa9b84c..2f38010c6 100644
--- a/benchmarks/bench_plot_onnxruntime_svm_reg.py
+++ b/benchmarks/bench_plot_onnxruntime_svm_reg.py
@@ -26,12 +26,13 @@
# Implementations to benchmark.
##############################
+
def fcts_model(X, y, kernel):
"SVR."
rf = SVR(kernel=kernel)
rf.fit(X, y)
- initial_types = [('X', FloatTensorType([None, X.shape[1]]))]
+ initial_types = [("X", FloatTensorType([None, X.shape[1]]))]
onx = convert_sklearn(rf, initial_types=initial_types)
f = BytesIO()
f.write(onx.SerializeToString())
@@ -43,27 +44,28 @@ def predict_skl_predict(X, model=rf):
return rf.predict(X)
def predict_onnxrt_predict(X, sess=sess):
- return sess.run(outputs[:1], {'X': X})[0]
+ return sess.run(outputs[:1], {"X": X})[0]
- return {'predict': (
- predict_skl_predict,
- predict_onnxrt_predict,
- )}
+ return {
+ "predict": (
+ predict_skl_predict,
+ predict_onnxrt_predict,
+ )
+ }
##############################
# Benchmarks
##############################
+
def allow_configuration(**kwargs):
return True
-def bench(n_obs, n_features, kernels,
- methods, repeat=10, verbose=False):
+def bench(n_obs, n_features, kernels, methods, repeat=10, verbose=False):
res = []
for nfeat in n_features:
-
ntrain = 1000
X_train = np.empty((ntrain, nfeat)).astype(np.float32)
X_train[:, :] = rand(ntrain, nfeat)[:, :]
@@ -75,11 +77,9 @@ def bench(n_obs, n_features, kernels,
for n in n_obs:
for method in methods:
-
fct1, fct2 = fcts[method]
- if not allow_configuration(n=n, nfeat=nfeat,
- kernel=kernel):
+ if not allow_configuration(n=n, nfeat=nfeat, kernel=kernel):
continue
obs = dict(n_obs=n, nfeat=nfeat, kernel=kernel)
@@ -124,8 +124,7 @@ def bench(n_obs, n_features, kernels,
if len(p1.shape) == 1 and len(p2.shape) == 2:
p2 = p2.ravel()
try:
- assert_almost_equal(
- p1.ravel(), p2.ravel(), decimal=3)
+ assert_almost_equal(p1.ravel(), p2.ravel(), decimal=3)
except AssertionError as e:
warnings.warn(str(e))
return res
@@ -135,22 +134,21 @@ def bench(n_obs, n_features, kernels,
# Plots.
##############################
+
def plot_results(df, verbose=False):
nrows = max(len(set(df.n_obs)), 2)
ncols = 2
- fig, ax = plt.subplots(nrows, ncols,
- figsize=(ncols * 4, nrows * 4))
+ fig, ax = plt.subplots(nrows, ncols, figsize=(ncols * 4, nrows * 4))
pos = 0
row = 0
for n_obs in sorted(set(df.n_obs)):
a = ax[row, pos]
if row == ax.shape[0] - 1:
- a.set_xlabel("N features", fontsize='x-small')
+ a.set_xlabel("N features", fontsize="x-small")
if pos == 0:
- a.set_ylabel(
- "Time (s) n_obs={}".format(n_obs), fontsize='x-small')
+ a.set_ylabel("Time (s) n_obs={}".format(n_obs), fontsize="x-small")
- for color, kernel in zip('brgyc', sorted(set(df.kernel))):
+ for color, kernel in zip("brgyc", sorted(set(df.kernel))):
subset = df[(df.kernel == kernel)]
if subset.shape[0] == 0:
continue
@@ -159,13 +157,30 @@ def plot_results(df, verbose=False):
print(subset)
label = "skl %s" % kernel
- subset.plot(x="nfeat", y="time_skl", label=label, ax=a,
- logx=True, logy=True, c=color, style='--', lw=5)
+ subset.plot(
+ x="nfeat",
+ y="time_skl",
+ label=label,
+ ax=a,
+ logx=True,
+ logy=True,
+ c=color,
+ style="--",
+ lw=5,
+ )
label = "ort %s" % kernel
- subset.plot(x="nfeat", y="time_ort", label=label, ax=a,
- logx=True, logy=True, c=color, lw=3)
-
- a.legend(loc=0, fontsize='x-small')
+ subset.plot(
+ x="nfeat",
+ y="time_ort",
+ label=label,
+ ax=a,
+ logx=True,
+ logy=True,
+ c=color,
+ lw=3,
+ )
+
+ a.legend(loc=0, fontsize="x-small")
if row == 0:
pass # a.set_title("n_jobs={}".format(n_jobs), fontsize='x-small')
row += 1
@@ -176,13 +191,12 @@ def plot_results(df, verbose=False):
@ignore_warnings(category=FutureWarning)
def run_bench(repeat=100, verbose=False):
n_obs = [1, 10, 100, 1000, 10000, 100000]
- methods = ['predict']
+ methods = ["predict"]
n_features = [10, 50]
kernels = ["linear", "poly", "rbf", "sigmoid"]
start = time()
- results = bench(n_obs, n_features, kernels,
- methods, repeat=repeat, verbose=verbose)
+ results = bench(n_obs, n_features, kernels, methods, repeat=repeat, verbose=verbose)
end = time()
results_df = pandas.DataFrame(results)
@@ -193,21 +207,24 @@ def run_bench(repeat=100, verbose=False):
return results_df
-if __name__ == '__main__':
+if __name__ == "__main__":
from datetime import datetime
import sklearn
import numpy
import onnx
import onnxruntime
import skl2onnx
- df = pandas.DataFrame([
- {"name": "date", "version": str(datetime.now())},
- {"name": "numpy", "version": numpy.__version__},
- {"name": "scikit-learn", "version": sklearn.__version__},
- {"name": "onnx", "version": onnx.__version__},
- {"name": "onnxruntime", "version": onnxruntime.__version__},
- {"name": "skl2onnx", "version": skl2onnx.__version__},
- ])
+
+ df = pandas.DataFrame(
+ [
+ {"name": "date", "version": str(datetime.now())},
+ {"name": "numpy", "version": numpy.__version__},
+ {"name": "scikit-learn", "version": sklearn.__version__},
+ {"name": "onnx", "version": onnx.__version__},
+ {"name": "onnxruntime", "version": onnxruntime.__version__},
+ {"name": "skl2onnx", "version": skl2onnx.__version__},
+ ]
+ )
df.to_csv("bench_plot_onnxruntime_svm_reg.time.csv", index=False)
print(df)
df = run_bench(verbose=True)
diff --git a/benchmarks/post_graph.py b/benchmarks/post_graph.py
index ff4e64671..2d3276e72 100644
--- a/benchmarks/post_graph.py
+++ b/benchmarks/post_graph.py
@@ -11,17 +11,20 @@
def autolabel(ax, rects):
for rect in rects:
height = rect.get_height()
- ax.annotate('%1.1fx' % height,
- xy=(rect.get_x() + rect.get_width() / 2, height),
- xytext=(0, 3), # 3 points vertical offset
- textcoords="offset points",
- ha='center', va='bottom',
- fontsize=8)
+ ax.annotate(
+ "%1.1fx" % height,
+ xy=(rect.get_x() + rect.get_width() / 2, height),
+ xytext=(0, 3), # 3 points vertical offset
+ textcoords="offset points",
+ ha="center",
+ va="bottom",
+ fontsize=8,
+ )
def linear_models():
- filename1 = os.path.join(HERE, 'bench_plot_onnxruntime_linreg.csv')
- filename2 = os.path.join(HERE, 'bench_plot_onnxruntime_logreg.csv')
+ filename1 = os.path.join(HERE, "bench_plot_onnxruntime_linreg.csv")
+ filename2 = os.path.join(HERE, "bench_plot_onnxruntime_logreg.csv")
if not os.path.exists(filename1) or not os.path.exists(filename2):
return
dfr = read_csv(filename1)
@@ -45,12 +48,12 @@ def linear_models():
x = numpy.arange(len(labels))
width = 0.90
- rects1 = ax.bar(x, means, width, label='Speedup')
+ rects1 = ax.bar(x, means, width, label="Speedup")
if pos == 0:
- ax.set_ylabel('Speedup')
- ax.set_title('%s %d features' % (name, nf))
- ax.set_xlabel('batch size')
+ ax.set_ylabel("Speedup")
+ ax.set_title("%s %d features" % (name, nf))
+ ax.set_xlabel("batch size")
ax.set_xticks(x)
ax.set_xticklabels(labels)
autolabel(ax, rects1)
@@ -65,20 +68,20 @@ def linear_models():
def svm_models():
- filename = os.path.join(HERE, 'bench_plot_onnxruntime_svm_reg.csv')
+ filename = os.path.join(HERE, "bench_plot_onnxruntime_svm_reg.csv")
if not os.path.exists(filename):
return
dfr = read_csv(filename)
dfr["speedup"] = dfr["time_skl"] / dfr["time_ort"]
print(dfr.tail())
- ncols = len(set(dfr['kernel']))
+ ncols = len(set(dfr["kernel"]))
fig, axs = plt.subplots(1, ncols, figsize=(14, 4), sharey=True)
name = "SVR"
nf = 50
pos = 0
- for kernel in sorted(set(dfr['kernel'])):
+ for kernel in sorted(set(dfr["kernel"])):
sub = dfr[(dfr.kernel == kernel) & (dfr.nfeat == nf)]
ax = axs[pos]
labels = sub.n_obs
@@ -87,12 +90,12 @@ def svm_models():
x = numpy.arange(len(labels))
width = 0.90
- rects1 = ax.bar(x, means, width, label='Speedup')
+ rects1 = ax.bar(x, means, width, label="Speedup")
if pos == 0:
- ax.set_ylabel('Speedup')
- ax.set_title('%s %s - %d features' % (name, kernel, nf))
- ax.set_xlabel('batch size')
+ ax.set_ylabel("Speedup")
+ ax.set_title("%s %s - %d features" % (name, kernel, nf))
+ ax.set_xlabel("batch size")
ax.set_xticks(x)
ax.set_xticklabels(labels)
autolabel(ax, rects1)
@@ -107,8 +110,7 @@ def svm_models():
def rf_models():
- filename = os.path.join(
- HERE, 'bench_plot_onnxruntime_random_forest_reg.csv')
+ filename = os.path.join(HERE, "bench_plot_onnxruntime_random_forest_reg.csv")
if not os.path.exists(filename):
return
dfr = read_csv(filename)
@@ -125,8 +127,11 @@ def rf_models():
for est in [100, 200]:
for n_jobs in [4]:
sub = dfr[
- (dfr.max_depth == max_depth) & (dfr.nfeat == nf) &
- (dfr.n_estimators == est) & (dfr.n_jobs == n_jobs)]
+ (dfr.max_depth == max_depth)
+ & (dfr.nfeat == nf)
+ & (dfr.n_estimators == est)
+ & (dfr.n_jobs == n_jobs)
+ ]
ax = axs[pos]
labels = sub.n_obs
means = sub.speedup
@@ -134,17 +139,18 @@ def rf_models():
x = numpy.arange(len(labels))
width = 0.90
- rects1 = ax.bar(x, means, width, label='Speedup')
+ rects1 = ax.bar(x, means, width, label="Speedup")
if pos == 0:
- ax.set_yscale('log')
- ax.set_ylim([0.1, max(dfr['speedup'])])
+ ax.set_yscale("log")
+ ax.set_ylim([0.1, max(dfr["speedup"])])
if pos == 0:
- ax.set_ylabel('Speedup')
+ ax.set_ylabel("Speedup")
ax.set_title(
- '%s\ndepth %d - %d features\n %d estimators %d jobs'
- '' % (name, max_depth, nf, est, n_jobs))
- ax.set_xlabel('batch size')
+ "%s\ndepth %d - %d features\n %d estimators %d jobs"
+ "" % (name, max_depth, nf, est, n_jobs)
+ )
+ ax.set_xlabel("batch size")
ax.set_xticks(x)
ax.set_xticklabels(labels)
autolabel(ax, rects1)
diff --git a/docs/conf.py b/docs/conf.py
index cbad4d184..a730d3b02 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -7,56 +7,55 @@
import sys
import warnings
import skl2onnx
-import pydata_sphinx_theme
-sys.path.append(os.path.abspath('exts'))
+sys.path.append(os.path.abspath("exts"))
from github_link import make_linkcode_resolve # noqa
# -- Project information -----------------------------------------------------
-project = 'sklearn-onnx'
-copyright = '2018-2023, Microsoft'
-author = 'Microsoft'
+project = "sklearn-onnx"
+copyright = "2018-2023, Microsoft"
+author = "Microsoft"
version = skl2onnx.__version__
release = version
# -- General configuration ---------------------------------------------------
extensions = [
- 'sphinx.ext.intersphinx',
- 'sphinx.ext.imgmath',
- 'sphinx.ext.ifconfig',
- 'sphinx.ext.viewcode',
+ "sphinx.ext.intersphinx",
+ "sphinx.ext.imgmath",
+ "sphinx.ext.ifconfig",
+ "sphinx.ext.viewcode",
"sphinx.ext.autodoc",
- 'sphinx.ext.githubpages',
+ "sphinx.ext.githubpages",
"sphinx_gallery.gen_gallery",
- 'sphinx.ext.autodoc',
- 'sphinx.ext.graphviz',
- 'sphinx_skl2onnx_extension',
- 'matplotlib.sphinxext.plot_directive',
- 'pyquickhelper.sphinxext.sphinx_cmdref_extension',
- 'pyquickhelper.sphinxext.sphinx_collapse_extension',
- 'pyquickhelper.sphinxext.sphinx_docassert_extension',
- 'pyquickhelper.sphinxext.sphinx_epkg_extension',
- 'pyquickhelper.sphinxext.sphinx_exref_extension',
- 'pyquickhelper.sphinxext.sphinx_faqref_extension',
- 'pyquickhelper.sphinxext.sphinx_gdot_extension',
- 'pyquickhelper.sphinxext.sphinx_runpython_extension',
+ "sphinx.ext.autodoc",
+ "sphinx.ext.graphviz",
+ "sphinx_skl2onnx_extension",
+ "matplotlib.sphinxext.plot_directive",
+ "pyquickhelper.sphinxext.sphinx_cmdref_extension",
+ "pyquickhelper.sphinxext.sphinx_collapse_extension",
+ "pyquickhelper.sphinxext.sphinx_docassert_extension",
+ "pyquickhelper.sphinxext.sphinx_epkg_extension",
+ "pyquickhelper.sphinxext.sphinx_exref_extension",
+ "pyquickhelper.sphinxext.sphinx_faqref_extension",
+ "pyquickhelper.sphinxext.sphinx_gdot_extension",
+ "pyquickhelper.sphinxext.sphinx_runpython_extension",
"sphinxcontrib.blockdiag",
]
-templates_path = ['_templates']
-source_suffix = ['.rst']
+templates_path = ["_templates"]
+source_suffix = [".rst"]
-master_doc = 'index'
+master_doc = "index"
language = "en"
exclude_patterns = []
-pygments_style = 'default'
+pygments_style = "default"
# -- Options for HTML output -------------------------------------------------
-html_static_path = ['_static']
+html_static_path = ["_static"]
html_theme = "furo"
html_logo = "logo_main.png"
@@ -67,42 +66,37 @@
# -- Options for intersphinx extension ---------------------------------------
# Example configuration for intersphinx: refer to the Python standard library.
-intersphinx_mapping = {'https://docs.python.org/': None}
+intersphinx_mapping = {"https://docs.python.org/": None}
# -- Options for Sphinx Gallery ----------------------------------------------
linkcode_resolve = make_linkcode_resolve(
- 'skl2onnx',
- 'https://github.com/onnx/skl2onnx/blob/{revision}/'
- '{package}/{path}#L{lineno}')
+ "skl2onnx",
+ "https://github.com/onnx/skl2onnx/blob/{revision}/" "{package}/{path}#L{lineno}",
+)
intersphinx_mapping = {
- 'joblib': ('https://joblib.readthedocs.io/en/latest/', None),
- 'python': ('https://docs.python.org/{.major}'.format(
- sys.version_info), None),
- 'matplotlib': ('https://matplotlib.org/', None),
- 'mlinsights': (
- 'http://www.xavierdupre.fr/app/mlinsights/helpsphinx/', None),
- 'numpy': ('https://docs.scipy.org/doc/numpy/', None),
- 'pyquickhelper': (
- 'http://www.xavierdupre.fr/app/pyquickhelper/helpsphinx/', None),
- 'onnxruntime': ('https://onnxruntime.ai/docs/api/python/', None),
- 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None),
- 'scipy': ('https://docs.scipy.org/doc/scipy/reference', None),
- 'seaborn': ('https://seaborn.pydata.org/', None),
- 'scikit-learn': (
- 'https://scikit-learn.org/stable/',
- None),
- 'sklearn': ('https://scikit-learn.org/stable/', None),
- 'skl2onnx': ('https://onnx.ai/sklearn-onnx/', None),
- 'sklearn-onnx': ('https://onnx.ai/sklearn-onnx/', None),
+ "joblib": ("https://joblib.readthedocs.io/en/latest/", None),
+ "python": ("https://docs.python.org/{.major}".format(sys.version_info), None),
+ "matplotlib": ("https://matplotlib.org/", None),
+ "mlinsights": ("http://www.xavierdupre.fr/app/mlinsights/helpsphinx/", None),
+ "numpy": ("https://docs.scipy.org/doc/numpy/", None),
+ "pyquickhelper": ("http://www.xavierdupre.fr/app/pyquickhelper/helpsphinx/", None),
+ "onnxruntime": ("https://onnxruntime.ai/docs/api/python/", None),
+ "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
+ "scipy": ("https://docs.scipy.org/doc/scipy/reference", None),
+ "seaborn": ("https://seaborn.pydata.org/", None),
+ "scikit-learn": ("https://scikit-learn.org/stable/", None),
+ "sklearn": ("https://scikit-learn.org/stable/", None),
+ "skl2onnx": ("https://onnx.ai/sklearn-onnx/", None),
+ "sklearn-onnx": ("https://onnx.ai/sklearn-onnx/", None),
}
sphinx_gallery_conf = {
- 'examples_dirs': ['examples', 'tutorial'],
- 'gallery_dirs': ['auto_examples', 'auto_tutorial'],
- 'capture_repr': ('_repr_html_', '__repr__'),
- 'ignore_repr_types': r'matplotlib.text|matplotlib.axes',
+ "examples_dirs": ["examples", "tutorial"],
+ "gallery_dirs": ["auto_examples", "auto_tutorial"],
+ "capture_repr": ("_repr_html_", "__repr__"),
+ "ignore_repr_types": r"matplotlib.text|matplotlib.axes",
# 'binder': {
# 'org': 'onnx',
# 'repo': 'onnx.ai/sklearn-onnx/',
@@ -114,35 +108,32 @@
}
epkg_dictionary = {
- 'C': 'https://en.wikipedia.org/wiki/C_(programming_language)',
- 'C++': 'https://en.wikipedia.org/wiki/C%2B%2B',
- 'cython': 'https://cython.org/',
- 'DOT': 'https://www.graphviz.org/doc/info/lang.html',
- 'ImageNet': 'http://www.image-net.org/',
- 'LightGBM': 'https://lightgbm.readthedocs.io/en/latest/',
- 'lightgbm': 'https://lightgbm.readthedocs.io/en/latest/',
- 'NMF':
- 'https://scikit-learn.org/stable/modules/generated/'
- 'sklearn.decomposition.NMF.html',
- 'numpy': 'https://numpy.org/',
- 'onnx': 'https://github.com/onnx/onnx',
- 'ONNX': 'https://onnx.ai/',
- 'ONNX operators':
- 'https://github.com/onnx/onnx/blob/master/docs/Operators.md',
- 'ONNX ML operators':
- 'https://github.com/onnx/onnx/blob/master/docs/Operators-ml.md',
- 'onnxmltools': 'https://github.com/onnx/onnxmltools',
- 'onnxruntime': 'https://microsoft.github.io/onnxruntime/',
- 'openmp': 'https://en.wikipedia.org/wiki/OpenMP',
- 'pyinstrument': 'https://github.com/joerick/pyinstrument',
- 'python': 'https://www.python.org/',
- 'pytorch': 'https://pytorch.org/',
- 'scikit-learn': 'https://scikit-learn.org/stable/',
- 'skorch': 'https://skorch.readthedocs.io/en/stable/',
- 'sklearn-onnx': 'https://github.com/onnx/sklearn-onnx',
- 'sphinx-gallery': 'https://github.com/sphinx-gallery/sphinx-gallery',
- 'xgboost': 'https://xgboost.readthedocs.io/en/latest/',
- 'XGBoost': 'https://xgboost.readthedocs.io/en/latest/',
+ "C": "https://en.wikipedia.org/wiki/C_(programming_language)",
+ "C++": "https://en.wikipedia.org/wiki/C%2B%2B",
+ "cython": "https://cython.org/",
+ "DOT": "https://www.graphviz.org/doc/info/lang.html",
+ "ImageNet": "http://www.image-net.org/",
+ "LightGBM": "https://lightgbm.readthedocs.io/en/latest/",
+ "lightgbm": "https://lightgbm.readthedocs.io/en/latest/",
+ "NMF": "https://scikit-learn.org/stable/modules/generated/"
+ "sklearn.decomposition.NMF.html",
+ "numpy": "https://numpy.org/",
+ "onnx": "https://github.com/onnx/onnx",
+ "ONNX": "https://onnx.ai/",
+ "ONNX operators": "https://github.com/onnx/onnx/blob/master/docs/Operators.md",
+ "ONNX ML operators": "https://github.com/onnx/onnx/blob/master/docs/Operators-ml.md",
+ "onnxmltools": "https://github.com/onnx/onnxmltools",
+ "onnxruntime": "https://microsoft.github.io/onnxruntime/",
+ "openmp": "https://en.wikipedia.org/wiki/OpenMP",
+ "pyinstrument": "https://github.com/joerick/pyinstrument",
+ "python": "https://www.python.org/",
+ "pytorch": "https://pytorch.org/",
+ "scikit-learn": "https://scikit-learn.org/stable/",
+ "skorch": "https://skorch.readthedocs.io/en/stable/",
+ "sklearn-onnx": "https://github.com/onnx/sklearn-onnx",
+ "sphinx-gallery": "https://github.com/sphinx-gallery/sphinx-gallery",
+ "xgboost": "https://xgboost.readthedocs.io/en/latest/",
+ "XGBoost": "https://xgboost.readthedocs.io/en/latest/",
}
warnings.filterwarnings("ignore", category=FutureWarning)
diff --git a/docs/examples/plot_backend.py b/docs/examples/plot_backend.py
index dd127c252..251399c02 100644
--- a/docs/examples/plot_backend.py
+++ b/docs/examples/plot_backend.py
@@ -44,11 +44,11 @@
# Let's use ONNX backend API to test it.
model = onnx.load(name)
-rep = backend.prepare(model, 'CPU')
-x = np.array([[-1.0, -2.0, 5.0, 6.0],
- [-1.0, -2.0, -3.0, -4.0],
- [-1.0, -2.0, 7.0, 8.0]],
- dtype=np.float32)
+rep = backend.prepare(model, "CPU")
+x = np.array(
+ [[-1.0, -2.0, 5.0, 6.0], [-1.0, -2.0, -3.0, -4.0], [-1.0, -2.0, 7.0, 8.0]],
+ dtype=np.float32,
+)
label, proba = rep.run(x)
print("label={}".format(label))
print("probabilities={}".format(proba))
@@ -62,11 +62,11 @@
# The backend can also directly load the model
# without using *onnx*.
-rep = backend.prepare(name, 'CPU')
-x = np.array([[-1.0, -2.0, -3.0, -4.0],
- [-1.0, -2.0, -3.0, -4.0],
- [-1.0, -2.0, -3.0, -4.0]],
- dtype=np.float32)
+rep = backend.prepare(name, "CPU")
+x = np.array(
+ [[-1.0, -2.0, -3.0, -4.0], [-1.0, -2.0, -3.0, -4.0], [-1.0, -2.0, -3.0, -4.0]],
+ dtype=np.float32,
+)
label, proba = rep.run(x)
print("label={}".format(label))
print("probabilities={}".format(proba))
diff --git a/docs/examples/plot_benchmark_cdist.py b/docs/examples/plot_benchmark_cdist.py
index b02074b36..9247d7a93 100644
--- a/docs/examples/plot_benchmark_cdist.py
+++ b/docs/examples/plot_benchmark_cdist.py
@@ -34,15 +34,13 @@
X = np.ones((2, 4), dtype=np.float32)
Y = np.ones((3, 4), dtype=np.float32)
Y *= 2
-print(cdist(X, Y, metric='euclidean'))
+print(cdist(X, Y, metric="euclidean"))
####################################
# ONNX
-op = OnnxCDist('X', 'Y', op_version=12, output_names=['Z'],
- metric='euclidean')
-onx = op.to_onnx({'X': X, 'Y': Y},
- outputs=[('Z', FloatTensorType())])
+op = OnnxCDist("X", "Y", op_version=12, output_names=["Z"], metric="euclidean")
+onx = op.to_onnx({"X": X, "Y": Y}, outputs=[("Z", FloatTensorType())])
print(onx)
@@ -53,9 +51,8 @@
# We compute the output of CDist operator
# with onnxruntime.
-sess = InferenceSession(onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
-res = sess.run(None, {'X': X, 'Y': Y})
+sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
+res = sess.run(None, {"X": X, "Y": Y})
print(res)
#####################################
@@ -67,25 +64,30 @@
def measure_time(name, stmt, context, repeat=100, number=20):
tim = Timer(stmt, globals=context)
- res = np.array(
- tim.repeat(repeat=repeat, number=number))
+ res = np.array(tim.repeat(repeat=repeat, number=number))
res /= number
mean = np.mean(res)
- dev = np.mean(res ** 2)
+ dev = np.mean(res**2)
dev = (dev - mean**2) ** 0.5
return dict(
- average=mean, deviation=dev, min_exec=np.min(res),
- max_exec=np.max(res), repeat=repeat, number=number,
- nrows=context['X'].shape[0], ncols=context['Y'].shape[1],
- name=name)
+ average=mean,
+ deviation=dev,
+ min_exec=np.min(res),
+ max_exec=np.max(res),
+ repeat=repeat,
+ number=number,
+ nrows=context["X"].shape[0],
+ ncols=context["Y"].shape[1],
+ name=name,
+ )
##############################
# scipy
time_scipy = measure_time(
- "scipy", "cdist(X, Y)",
- context={'cdist': cdist, 'X': X, 'Y': Y})
+ "scipy", "cdist(X, Y)", context={"cdist": cdist, "X": X, "Y": Y}
+)
pprint(time_scipy)
@@ -93,8 +95,8 @@ def measure_time(name, stmt, context, repeat=100, number=20):
# onnxruntime
time_ort = measure_time(
- "ort", "sess.run(None, {'X': X, 'Y': Y})",
- context={'sess': sess, 'X': X, 'Y': Y})
+ "ort", "sess.run(None, {'X': X, 'Y': Y})", context={"sess": sess, "X": X, "Y": Y}
+)
pprint(time_ort)
############################################
@@ -108,20 +110,21 @@ def measure_time(name, stmt, context, repeat=100, number=20):
Y = np.random.randn(10, 4).astype(np.float32)
time_scipy = measure_time(
- "scipy", "cdist(X, Y)",
- context={'cdist': cdist, 'X': X, 'Y': Y})
+ "scipy", "cdist(X, Y)", context={"cdist": cdist, "X": X, "Y": Y}
+ )
time_ort = measure_time(
- "ort", "sess.run(None, {'X': X, 'Y': Y})",
- context={'sess': sess, 'X': X, 'Y': Y})
- metric = dict(N=dim, scipy=time_scipy['average'],
- ort=time_ort['average'])
+ "ort",
+ "sess.run(None, {'X': X, 'Y': Y})",
+ context={"sess": sess, "X": X, "Y": Y},
+ )
+ metric = dict(N=dim, scipy=time_scipy["average"], ort=time_ort["average"])
metrics.append(metric)
df = DataFrame(metrics)
-df['scipy/ort'] = df['scipy'] / df['ort']
+df["scipy/ort"] = df["scipy"] / df["ort"]
print(df)
-df.plot(x='N', y=['scipy/ort'])
+df.plot(x="N", y=["scipy/ort"])
#################################
# **Versions used for this example**
diff --git a/docs/examples/plot_benchmark_pipeline.py b/docs/examples/plot_benchmark_pipeline.py
index 8db485cc8..5634cfcf2 100644
--- a/docs/examples/plot_benchmark_pipeline.py
+++ b/docs/examples/plot_benchmark_pipeline.py
@@ -40,7 +40,7 @@
logistic = LogisticRegression()
pca = PCA()
-pipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)])
+pipe = Pipeline(steps=[("pca", pca), ("logistic", logistic)])
digits = datasets.load_digits()
X_digits = digits.data[:1000]
@@ -53,15 +53,15 @@
# ++++++++++++++++++
-initial_types = [('input', FloatTensorType((None, X_digits.shape[1])))]
-model_onnx = convert_sklearn(pipe, initial_types=initial_types,
- target_opset=12)
+initial_types = [("input", FloatTensorType((None, X_digits.shape[1])))]
+model_onnx = convert_sklearn(pipe, initial_types=initial_types, target_opset=12)
-sess = rt.InferenceSession(model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+sess = rt.InferenceSession(
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+)
print("skl predict_proba")
print(pipe.predict_proba(X_digits[:2]))
-onx_pred = sess.run(None, {'input': X_digits[:2].astype(np.float32)})[1]
+onx_pred = sess.run(None, {"input": X_digits[:2].astype(np.float32)})[1]
df = pd.DataFrame(onx_pred)
print("onnx predict_proba")
print(df.values)
@@ -78,11 +78,15 @@
# ++++++++++
print("scikit-learn")
-print(timeit("pipe.predict_proba(X_digits[:1])",
- number=10000, globals=globals()))
+print(timeit("pipe.predict_proba(X_digits[:1])", number=10000, globals=globals()))
print("onnxruntime")
-print(timeit("sess.run(None, {'input': X_digits[:1].astype(np.float32)})[1]",
- number=10000, globals=globals()))
+print(
+ timeit(
+ "sess.run(None, {'input': X_digits[:1].astype(np.float32)})[1]",
+ number=10000,
+ globals=globals(),
+ )
+)
###############################################
# Intermediate steps
@@ -95,34 +99,47 @@
# an smaller ONNX graph for every operator.
-steps = collect_intermediate_steps(
- pipe, "pipeline", initial_types)
+steps = collect_intermediate_steps(pipe, "pipeline", initial_types)
assert len(steps) == 2
pipe.predict_proba(X_digits[:2])
for i, step in enumerate(steps):
- onnx_step = step['onnx_step']
- sess = rt.InferenceSession(onnx_step.SerializeToString(),
- providers=["CPUExecutionProvider"])
- onnx_outputs = sess.run(None, {'input': X_digits[:2].astype(np.float32)})
- skl_outputs = step['model']._debug.outputs
- if 'transform' in skl_outputs:
- compare_objects(skl_outputs['transform'], onnx_outputs[0])
- print("benchmark", step['model'].__class__)
+ onnx_step = step["onnx_step"]
+ sess = rt.InferenceSession(
+ onnx_step.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ onnx_outputs = sess.run(None, {"input": X_digits[:2].astype(np.float32)})
+ skl_outputs = step["model"]._debug.outputs
+ if "transform" in skl_outputs:
+ compare_objects(skl_outputs["transform"], onnx_outputs[0])
+ print("benchmark", step["model"].__class__)
print("scikit-learn")
- print(timeit("step['model'].transform(X_digits[:1])",
- number=10000, globals=globals()))
+ print(
+ timeit(
+ "step['model'].transform(X_digits[:1])", number=10000, globals=globals()
+ )
+ )
else:
- compare_objects(skl_outputs['predict_proba'], onnx_outputs[1])
- print("benchmark", step['model'].__class__)
+ compare_objects(skl_outputs["predict_proba"], onnx_outputs[1])
+ print("benchmark", step["model"].__class__)
print("scikit-learn")
- print(timeit("step['model'].predict_proba(X_digits[:1])",
- number=10000, globals=globals()))
+ print(
+ timeit(
+ "step['model'].predict_proba(X_digits[:1])",
+ number=10000,
+ globals=globals(),
+ )
+ )
print("onnxruntime")
- print(timeit("sess.run(None, {'input': X_digits[:1].astype(np.float32)})",
- number=10000, globals=globals()))
+ print(
+ timeit(
+ "sess.run(None, {'input': X_digits[:1].astype(np.float32)})",
+ number=10000,
+ globals=globals(),
+ )
+ )
#################################
# **Versions used for this example**
diff --git a/docs/examples/plot_black_op.py b/docs/examples/plot_black_op.py
index 5969a7406..d5c46d639 100644
--- a/docs/examples/plot_black_op.py
+++ b/docs/examples/plot_black_op.py
@@ -43,32 +43,39 @@
# ++++++++++++++++++
model_onnx = to_onnx(
- model, X_train[:1].astype(np.float32),
- options={id(model): {'score_samples': True}},
- target_opset=12)
-sess = InferenceSession(model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model,
+ X_train[:1].astype(np.float32),
+ options={id(model): {"score_samples": True}},
+ target_opset=12,
+)
+sess = InferenceSession(
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+)
xt = X_test[:5].astype(np.float32)
print(model.score_samples(xt))
-print(sess.run(None, {'X': xt})[2])
+print(sess.run(None, {"X": xt})[2])
##################################
# Display the ONNX graph.
pydot_graph = GetPydotGraph(
- model_onnx.graph, name=model_onnx.graph.name, rankdir="TB",
- node_producer=GetOpNodeProducer("docstring", color="yellow",
- fillcolor="yellow", style="filled"))
+ model_onnx.graph,
+ name=model_onnx.graph.name,
+ rankdir="TB",
+ node_producer=GetOpNodeProducer(
+ "docstring", color="yellow", fillcolor="yellow", style="filled"
+ ),
+)
pydot_graph.write_dot("mixture.dot")
-os.system('dot -O -Gdpi=300 -Tpng mixture.dot')
+os.system("dot -O -Gdpi=300 -Tpng mixture.dot")
image = plt.imread("mixture.dot.png")
fig, ax = plt.subplots(figsize=(40, 20))
ax.imshow(image)
-ax.axis('off')
+ax.axis("off")
###################################
@@ -80,43 +87,58 @@
# produces in that case.
model_onnx2 = to_onnx(
- model, X_train[:1].astype(np.float32),
- options={id(model): {'score_samples': True}},
- black_op={'ReduceLogSumExp'},
- target_opset=12)
-sess2 = InferenceSession(model_onnx2.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model,
+ X_train[:1].astype(np.float32),
+ options={id(model): {"score_samples": True}},
+ black_op={"ReduceLogSumExp"},
+ target_opset=12,
+)
+sess2 = InferenceSession(
+ model_onnx2.SerializeToString(), providers=["CPUExecutionProvider"]
+)
xt = X_test[:5].astype(np.float32)
print(model.score_samples(xt))
-print(sess2.run(None, {'X': xt})[2])
+print(sess2.run(None, {"X": xt})[2])
##################################
# Display the ONNX graph.
pydot_graph = GetPydotGraph(
- model_onnx2.graph, name=model_onnx2.graph.name, rankdir="TB",
- node_producer=GetOpNodeProducer("docstring", color="yellow",
- fillcolor="yellow", style="filled"))
+ model_onnx2.graph,
+ name=model_onnx2.graph.name,
+ rankdir="TB",
+ node_producer=GetOpNodeProducer(
+ "docstring", color="yellow", fillcolor="yellow", style="filled"
+ ),
+)
pydot_graph.write_dot("mixture2.dot")
-os.system('dot -O -Gdpi=300 -Tpng mixture2.dot')
+os.system("dot -O -Gdpi=300 -Tpng mixture2.dot")
image = plt.imread("mixture2.dot.png")
fig, ax = plt.subplots(figsize=(40, 20))
ax.imshow(image)
-ax.axis('off')
+ax.axis("off")
#######################################
# Processing time
# +++++++++++++++
-print(timeit(stmt="sess.run(None, {'X': xt})",
- number=10000, globals={'sess': sess, 'xt': xt}))
+print(
+ timeit(
+ stmt="sess.run(None, {'X': xt})", number=10000, globals={"sess": sess, "xt": xt}
+ )
+)
-print(timeit(stmt="sess2.run(None, {'X': xt})",
- number=10000, globals={'sess2': sess2, 'xt': xt}))
+print(
+ timeit(
+ stmt="sess2.run(None, {'X': xt})",
+ number=10000,
+ globals={"sess2": sess2, "xt": xt},
+ )
+)
#################################
# The model using ReduceLogSumExp is much faster.
@@ -132,21 +154,25 @@
try:
to_onnx(
- model, X_train[:1].astype(np.float32),
- options={id(model): {'score_samples': True}},
- black_op={'ReduceLogSumExp', 'Add'},
- target_opset=12)
+ model,
+ X_train[:1].astype(np.float32),
+ options={id(model): {"score_samples": True}},
+ black_op={"ReduceLogSumExp", "Add"},
+ target_opset=12,
+ )
except RuntimeError as e:
- print('Error:', e)
+ print("Error:", e)
#################################
# **Versions used for this example**
import sklearn # noqa
+
print("numpy:", numpy.__version__)
print("scikit-learn:", sklearn.__version__)
import skl2onnx # noqa
+
print("onnx: ", onnx.__version__)
print("onnxruntime: ", onnxruntime.__version__)
print("skl2onnx: ", skl2onnx.__version__)
diff --git a/docs/examples/plot_cast_transformer.py b/docs/examples/plot_cast_transformer.py
index 11449b808..34efc74f8 100644
--- a/docs/examples/plot_cast_transformer.py
+++ b/docs/examples/plot_cast_transformer.py
@@ -50,38 +50,32 @@
# The weird data.
X, y = make_regression(10000, 10, random_state=3)
-X_train, X_test, y_train, _ = train_test_split(
- X, y, random_state=3)
+X_train, X_test, y_train, _ = train_test_split(X, y, random_state=3)
Xi_train, yi_train = X_train.copy(), y_train.copy()
Xi_test = X_test.copy()
for i in range(X.shape[1]):
- Xi_train[:, i] = (Xi_train[:, i] * math.pi * 2 ** i).astype(
- np.int64)
- Xi_test[:, i] = (Xi_test[:, i] * math.pi * 2 ** i).astype(
- np.int64)
+ Xi_train[:, i] = (Xi_train[:, i] * math.pi * 2**i).astype(np.int64)
+ Xi_test[:, i] = (Xi_test[:, i] * math.pi * 2**i).astype(np.int64)
max_depth = 10
Xi_test = Xi_test.astype(np.float32)
#################################
# A simple model.
-model1 = Pipeline([
- ('scaler', StandardScaler()),
- ('dt', DecisionTreeRegressor(max_depth=max_depth))
-])
+model1 = Pipeline(
+ [("scaler", StandardScaler()), ("dt", DecisionTreeRegressor(max_depth=max_depth))]
+)
model1.fit(Xi_train, yi_train)
exp1 = model1.predict(Xi_test)
#################################
# Conversion into ONNX.
-onx1 = to_onnx(model1, X_train[:1].astype(np.float32),
- target_opset=15)
-sess1 = InferenceSession(onx1.SerializeToString(),
- providers=["CPUExecutionProvider"])
+onx1 = to_onnx(model1, X_train[:1].astype(np.float32), target_opset=15)
+sess1 = InferenceSession(onx1.SerializeToString(), providers=["CPUExecutionProvider"])
###################################
# And the maximum difference.
-got1 = sess1.run(None, {'X': Xi_test})[0]
+got1 = sess1.run(None, {"X": Xi_test})[0]
def maxdiff(a1, a2):
@@ -96,17 +90,21 @@ def maxdiff(a1, a2):
# The graph.
pydot_graph = GetPydotGraph(
- onx1.graph, name=onx1.graph.name, rankdir="TB",
- node_producer=GetOpNodeProducer("docstring", color="yellow",
- fillcolor="yellow", style="filled"))
+ onx1.graph,
+ name=onx1.graph.name,
+ rankdir="TB",
+ node_producer=GetOpNodeProducer(
+ "docstring", color="yellow", fillcolor="yellow", style="filled"
+ ),
+)
pydot_graph.write_dot("cast1.dot")
-os.system('dot -O -Gdpi=300 -Tpng cast1.dot')
+os.system("dot -O -Gdpi=300 -Tpng cast1.dot")
image = plt.imread("cast1.dot.png")
fig, ax = plt.subplots(figsize=(40, 20))
ax.imshow(image)
-ax.axis('off')
+ax.axis("off")
########################################
# New pipeline
@@ -124,23 +122,27 @@ def maxdiff(a1, a2):
# `'div'`) and to use double by inserting an explicit
# Cast.
-model2 = Pipeline([
- ('cast64', CastTransformer(dtype=np.float64)),
- ('scaler', StandardScaler()),
- ('cast', CastTransformer()),
- ('dt', DecisionTreeRegressor(max_depth=max_depth))
-])
+model2 = Pipeline(
+ [
+ ("cast64", CastTransformer(dtype=np.float64)),
+ ("scaler", StandardScaler()),
+ ("cast", CastTransformer()),
+ ("dt", DecisionTreeRegressor(max_depth=max_depth)),
+ ]
+)
model2.fit(Xi_train, yi_train)
exp2 = model2.predict(Xi_test)
-onx2 = to_onnx(model2, X_train[:1].astype(np.float32),
- options={StandardScaler: {'div': 'div_cast'}},
- target_opset=15)
+onx2 = to_onnx(
+ model2,
+ X_train[:1].astype(np.float32),
+ options={StandardScaler: {"div": "div_cast"}},
+ target_opset=15,
+)
-sess2 = InferenceSession(onx2.SerializeToString(),
- providers=["CPUExecutionProvider"])
-got2 = sess2.run(None, {'X': Xi_test})[0]
+sess2 = InferenceSession(onx2.SerializeToString(), providers=["CPUExecutionProvider"])
+got2 = sess2.run(None, {"X": Xi_test})[0]
md2 = maxdiff(exp2, got2)
print(md2)
@@ -149,25 +151,31 @@ def maxdiff(a1, a2):
# The graph.
pydot_graph = GetPydotGraph(
- onx2.graph, name=onx2.graph.name, rankdir="TB",
- node_producer=GetOpNodeProducer("docstring", color="yellow",
- fillcolor="yellow", style="filled"))
+ onx2.graph,
+ name=onx2.graph.name,
+ rankdir="TB",
+ node_producer=GetOpNodeProducer(
+ "docstring", color="yellow", fillcolor="yellow", style="filled"
+ ),
+)
pydot_graph.write_dot("cast2.dot")
-os.system('dot -O -Gdpi=300 -Tpng cast2.dot')
+os.system("dot -O -Gdpi=300 -Tpng cast2.dot")
image = plt.imread("cast2.dot.png")
fig, ax = plt.subplots(figsize=(40, 20))
ax.imshow(image)
-ax.axis('off')
+ax.axis("off")
#################################
# **Versions used for this example**
import sklearn # noqa
+
print("numpy:", np.__version__)
print("scikit-learn:", sklearn.__version__)
import skl2onnx # noqa
+
print("onnx: ", onnx.__version__)
print("onnxruntime: ", onnxruntime.__version__)
print("skl2onnx: ", skl2onnx.__version__)
diff --git a/docs/examples/plot_complex_pipeline.py b/docs/examples/plot_complex_pipeline.py
index 1e4c58a2c..f9404d944 100644
--- a/docs/examples/plot_complex_pipeline.py
+++ b/docs/examples/plot_complex_pipeline.py
@@ -50,40 +50,50 @@
from skl2onnx.common.data_types import FloatTensorType, StringTensorType
from skl2onnx.common.data_types import Int64TensorType
-titanic_url = ('https://raw.githubusercontent.com/amueller/'
- 'scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv')
+titanic_url = (
+ "https://raw.githubusercontent.com/amueller/"
+ "scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv"
+)
data = pd.read_csv(titanic_url)
-X = data.drop('survived', axis=1)
-y = data['survived']
+X = data.drop("survived", axis=1)
+y = data["survived"]
print(data.dtypes)
# SimpleImputer on string is not available for
# string in ONNX-ML specifications.
# So we do it beforehand.
-for cat in ['embarked', 'sex', 'pclass']:
- X[cat].fillna('missing', inplace=True)
+for cat in ["embarked", "sex", "pclass"]:
+ X[cat].fillna("missing", inplace=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
-numeric_features = ['age', 'fare']
-numeric_transformer = Pipeline(steps=[
- ('imputer', SimpleImputer(strategy='median')),
- ('scaler', StandardScaler())])
+numeric_features = ["age", "fare"]
+numeric_transformer = Pipeline(
+ steps=[("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())]
+)
-categorical_features = ['embarked', 'sex', 'pclass']
-categorical_transformer = Pipeline(steps=[
- # --- SimpleImputer is not available for strings in ONNX-ML specifications.
- # ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
- ('onehot', OneHotEncoder(handle_unknown='ignore'))])
+categorical_features = ["embarked", "sex", "pclass"]
+categorical_transformer = Pipeline(
+ steps=[
+ # --- SimpleImputer is not available for strings in ONNX-ML specifications.
+ # ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
+ ("onehot", OneHotEncoder(handle_unknown="ignore"))
+ ]
+)
preprocessor = ColumnTransformer(
transformers=[
- ('num', numeric_transformer, numeric_features),
- ('cat', categorical_transformer, categorical_features),
- ])
+ ("num", numeric_transformer, numeric_features),
+ ("cat", categorical_transformer, categorical_features),
+ ]
+)
-clf = Pipeline(steps=[('preprocessor', preprocessor),
- ('classifier', LogisticRegression(solver='lbfgs'))])
+clf = Pipeline(
+ steps=[
+ ("preprocessor", preprocessor),
+ ("classifier", LogisticRegression(solver="lbfgs")),
+ ]
+)
clf.fit(X_train, y_train)
@@ -106,9 +116,9 @@ def convert_dataframe_schema(df, drop=None):
for k, v in zip(df.columns, df.dtypes):
if drop is not None and k in drop:
continue
- if v == 'int64':
+ if v == "int64":
t = Int64TensorType([None, 1])
- elif v == 'float64':
+ elif v == "float64":
t = FloatTensorType([None, 1])
else:
t = StringTensorType([None, 1])
@@ -130,8 +140,9 @@ def convert_dataframe_schema(df, drop=None):
# ++++++++++++++++++++++++++++++
try:
- model_onnx = convert_sklearn(clf, 'pipeline_titanic', initial_inputs,
- target_opset=12)
+ model_onnx = convert_sklearn(
+ clf, "pipeline_titanic", initial_inputs, target_opset=12
+ )
except Exception as e:
print(e)
@@ -140,12 +151,12 @@ def convert_dataframe_schema(df, drop=None):
# That's why the converter checks that there is no unused input.
# They need to be removed from the graph inputs.
-to_drop = {'parch', 'sibsp', 'cabin', 'ticket',
- 'name', 'body', 'home.dest', 'boat'}
+to_drop = {"parch", "sibsp", "cabin", "ticket", "name", "body", "home.dest", "boat"}
initial_inputs = convert_dataframe_schema(X_train, to_drop)
try:
- model_onnx = convert_sklearn(clf, 'pipeline_titanic', initial_inputs,
- target_opset=12)
+ model_onnx = convert_sklearn(
+ clf, "pipeline_titanic", initial_inputs, target_opset=12
+ )
except Exception as e:
print(e)
@@ -156,8 +167,7 @@ def convert_dataframe_schema(df, drop=None):
initial_inputs = convert_dataframe_schema(X_train, to_drop)
-model_onnx = convert_sklearn(clf, 'pipeline_titanic', initial_inputs,
- target_opset=12)
+model_onnx = convert_sklearn(clf, "pipeline_titanic", initial_inputs, target_opset=12)
# And save.
@@ -196,8 +206,7 @@ def convert_dataframe_schema(df, drop=None):
################################
# We are ready to run *onnxruntime*.
-sess = rt.InferenceSession("pipeline_titanic.onnx",
- providers=["CPUExecutionProvider"])
+sess = rt.InferenceSession("pipeline_titanic.onnx", providers=["CPUExecutionProvider"])
pred_onx = sess.run(None, inputs)
print("predict", pred_onx[0][:5])
print("predict_proba", pred_onx[1][:2])
@@ -207,14 +216,19 @@ def convert_dataframe_schema(df, drop=None):
# Let's swith to an array but that requires to convert again with
# an additional option zipmap.
-model_onnx = convert_sklearn(clf, 'pipeline_titanic', initial_inputs,
- target_opset=12,
- options={id(clf): {'zipmap': False}})
+model_onnx = convert_sklearn(
+ clf,
+ "pipeline_titanic",
+ initial_inputs,
+ target_opset=12,
+ options={id(clf): {"zipmap": False}},
+)
with open("pipeline_titanic_nozipmap.onnx", "wb") as f:
f.write(model_onnx.SerializeToString())
-sess = rt.InferenceSession("pipeline_titanic_nozipmap.onnx",
- providers=["CPUExecutionProvider"])
+sess = rt.InferenceSession(
+ "pipeline_titanic_nozipmap.onnx", providers=["CPUExecutionProvider"]
+)
pred_onx = sess.run(None, inputs)
print("predict", pred_onx[0][:5])
print("predict_proba", pred_onx[1][:2])
@@ -231,20 +245,22 @@ def convert_dataframe_schema(df, drop=None):
#
# Finally, let's see the graph converted with *sklearn-onnx*.
-pydot_graph = GetPydotGraph(model_onnx.graph, name=model_onnx.graph.name,
- rankdir="TB",
- node_producer=GetOpNodeProducer("docstring",
- color="yellow",
- fillcolor="yellow",
- style="filled"))
+pydot_graph = GetPydotGraph(
+ model_onnx.graph,
+ name=model_onnx.graph.name,
+ rankdir="TB",
+ node_producer=GetOpNodeProducer(
+ "docstring", color="yellow", fillcolor="yellow", style="filled"
+ ),
+)
pydot_graph.write_dot("pipeline_titanic.dot")
-os.system('dot -O -Gdpi=300 -Tpng pipeline_titanic.dot')
+os.system("dot -O -Gdpi=300 -Tpng pipeline_titanic.dot")
image = plt.imread("pipeline_titanic.dot.png")
fig, ax = plt.subplots(figsize=(40, 20))
ax.imshow(image)
-ax.axis('off')
+ax.axis("off")
#################################
# **Versions used for this example**
diff --git a/docs/examples/plot_convert_decision_function.py b/docs/examples/plot_convert_decision_function.py
index 8c15a70fb..886501325 100644
--- a/docs/examples/plot_convert_decision_function.py
+++ b/docs/examples/plot_convert_decision_function.py
@@ -37,9 +37,8 @@
clr.fit(X_train, y_train)
print(clr)
-initial_type = [('float_input', FloatTensorType([None, 4]))]
-onx = convert_sklearn(clr, initial_types=initial_type,
- target_opset=12)
+initial_type = [("float_input", FloatTensorType([None, 4]))]
+onx = convert_sklearn(clr, initial_types=initial_type, target_opset=12)
############################
# Output type
@@ -48,9 +47,8 @@
# Let's confirm the output type of the probabilities
# is a list of dictionaries with onnxruntime.
-sess = rt.InferenceSession(onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
-res = sess.run(None, {'float_input': X_test.astype(numpy.float32)})
+sess = rt.InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
+res = sess.run(None, {"float_input": X_test.astype(numpy.float32)})
print("skl", clr.predict_proba(X_test[:1]))
print("onnx", res[1][:2])
@@ -59,14 +57,16 @@
# ++++++++++++++++++++++++++++++++
#
-initial_type = [('float_input', FloatTensorType([None, 4]))]
-options = {id(clr): {'raw_scores': True}}
-onx2 = convert_sklearn(clr, initial_types=initial_type, options=options,
- target_opset=12)
+initial_type = [("float_input", FloatTensorType([None, 4]))]
+options = {id(clr): {"raw_scores": True}}
+onx2 = convert_sklearn(
+ clr, initial_types=initial_type, options=options, target_opset=12
+)
-sess2 = rt.InferenceSession(onx2.SerializeToString(),
- providers=["CPUExecutionProvider"])
-res2 = sess2.run(None, {'float_input': X_test.astype(numpy.float32)})
+sess2 = rt.InferenceSession(
+ onx2.SerializeToString(), providers=["CPUExecutionProvider"]
+)
+res2 = sess2.run(None, {"float_input": X_test.astype(numpy.float32)})
print("skl", clr.decision_function(X_test[:1]))
print("onnx", res2[1][:2])
diff --git a/docs/examples/plot_convert_model.py b/docs/examples/plot_convert_model.py
index 2ccbf5eb8..af00277ef 100644
--- a/docs/examples/plot_convert_model.py
+++ b/docs/examples/plot_convert_model.py
@@ -32,6 +32,7 @@
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
+
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
@@ -43,9 +44,8 @@
# Convert a model into ONNX
# +++++++++++++++++++++++++
-initial_type = [('float_input', FloatTensorType([None, 4]))]
-onx = convert_sklearn(clr, initial_types=initial_type,
- target_opset=12)
+initial_type = [("float_input", FloatTensorType([None, 4]))]
+onx = convert_sklearn(clr, initial_types=initial_type, target_opset=12)
with open("rf_iris.onnx", "wb") as f:
f.write(onx.SerializeToString())
@@ -56,8 +56,7 @@
sess = rt.InferenceSession("rf_iris.onnx", providers=["CPUExecutionProvider"])
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
-pred_onx = sess.run(
- [label_name], {input_name: X_test.astype(numpy.float32)})[0]
+pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0]
print(pred_onx)
#######################################
@@ -65,17 +64,15 @@
clr = LogisticRegression()
clr.fit(X_train, y_train)
-initial_type = [('float_input', FloatTensorType([None, X_train.shape[1]]))]
-onx = convert_sklearn(clr, initial_types=initial_type,
- target_opset=12)
+initial_type = [("float_input", FloatTensorType([None, X_train.shape[1]]))]
+onx = convert_sklearn(clr, initial_types=initial_type, target_opset=12)
with open("logreg_iris.onnx", "wb") as f:
f.write(onx.SerializeToString())
sess = rt.InferenceSession("logreg_iris.onnx")
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
-pred_onx = sess.run([label_name],
- {input_name: X_test.astype(numpy.float32)})[0]
+pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0]
print(pred_onx)
diff --git a/docs/examples/plot_convert_syntax.py b/docs/examples/plot_convert_syntax.py
index 2eced5ec4..bdff42218 100644
--- a/docs/examples/plot_convert_syntax.py
+++ b/docs/examples/plot_convert_syntax.py
@@ -36,6 +36,7 @@ def predict_with_onnxruntime(onx, X):
res = sess.run(None, {input_name: X.astype(np.float32)})
return res[0]
+
#################################
# Simple KMeans
# +++++++++++++
@@ -48,8 +49,8 @@ def predict_with_onnxruntime(onx, X):
tr.fit(X)
onx = convert_sklearn(
- tr, initial_types=[('X', FloatTensorType((None, X.shape[1])))],
- target_opset=12)
+ tr, initial_types=[("X", FloatTensorType((None, X.shape[1])))], target_opset=12
+)
print(predict_with_onnxruntime(onx, X))
#################################
@@ -83,8 +84,7 @@ def predict_with_onnxruntime(onx, X):
# before fitting the model.
X = np.arange(20).reshape(10, 2)
-tr = wrap_as_onnx_mixin(KMeans(n_clusters=2),
- target_opset=12)
+tr = wrap_as_onnx_mixin(KMeans(n_clusters=2), target_opset=12)
tr.fit(X)
onx = tr.to_onnx(X.astype(np.float32))
@@ -97,9 +97,7 @@ def predict_with_onnxruntime(onx, X):
# This is a simple scaler.
-class CustomOpTransformer(BaseEstimator, TransformerMixin,
- OnnxOperatorMixin):
-
+class CustomOpTransformer(BaseEstimator, TransformerMixin, OnnxOperatorMixin):
def __init__(self):
BaseEstimator.__init__(self)
TransformerMixin.__init__(self)
@@ -116,20 +114,22 @@ def transform(self, X):
def onnx_shape_calculator(self):
def shape_calculator(operator):
operator.outputs[0].type = operator.inputs[0].type
+
return shape_calculator
- def to_onnx_operator(self, inputs=None, outputs=('Y', ),
- target_opset=None, **kwargs):
+ def to_onnx_operator(
+ self, inputs=None, outputs=("Y",), target_opset=None, **kwargs
+ ):
if inputs is None:
- raise RuntimeError("Parameter inputs should contain at least "
- "one name.")
+ raise RuntimeError("Parameter inputs should contain at least " "one name.")
opv = target_opset or self.op_version
i0 = self.get_inputs(inputs, 0)
W = self.W_.astype(np.float32)
S = self.S_.astype(np.float32)
- return OnnxDiv(OnnxSub(i0, W, op_version=12), S,
- output_names=outputs,
- op_version=opv)
+ return OnnxDiv(
+ OnnxSub(i0, W, op_version=12), S, output_names=outputs, op_version=opv
+ )
+
#############################
# Way 1
@@ -140,8 +140,8 @@ def to_onnx_operator(self, inputs=None, outputs=('Y', ),
tr.fit(X)
onx = convert_sklearn(
- tr, initial_types=[('X', FloatTensorType((None, X.shape[1])))],
- target_opset=12)
+ tr, initial_types=[("X", FloatTensorType((None, X.shape[1])))], target_opset=12
+)
print(predict_with_onnxruntime(onx, X))
#############################
@@ -171,8 +171,8 @@ def to_onnx_operator(self, inputs=None, outputs=('Y', ),
X = np.arange(20).reshape(10, 2)
tr = wrap_as_onnx_mixin(
- make_pipeline(CustomOpTransformer(), KMeans(n_clusters=2)),
- target_opset=12)
+ make_pipeline(CustomOpTransformer(), KMeans(n_clusters=2)), target_opset=12
+)
tr.fit(X)
@@ -186,28 +186,37 @@ def to_onnx_operator(self, inputs=None, outputs=('Y', ),
# Finally, let's see the graph converted with *sklearn-onnx*.
from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer # noqa
-pydot_graph = GetPydotGraph(onx.graph, name=onx.graph.name, rankdir="TB",
- node_producer=GetOpNodeProducer(
- "docstring", color="yellow",
- fillcolor="yellow", style="filled"))
+
+pydot_graph = GetPydotGraph(
+ onx.graph,
+ name=onx.graph.name,
+ rankdir="TB",
+ node_producer=GetOpNodeProducer(
+ "docstring", color="yellow", fillcolor="yellow", style="filled"
+ ),
+)
pydot_graph.write_dot("pipeline_onnx_mixin.dot")
import os # noqa
-os.system('dot -O -Gdpi=300 -Tpng pipeline_onnx_mixin.dot')
+
+os.system("dot -O -Gdpi=300 -Tpng pipeline_onnx_mixin.dot")
import matplotlib.pyplot as plt # noqa
+
image = plt.imread("pipeline_onnx_mixin.dot.png")
fig, ax = plt.subplots(figsize=(40, 20))
ax.imshow(image)
-ax.axis('off')
+ax.axis("off")
#################################
# **Versions used for this example**
import sklearn # noqa
+
print("numpy:", numpy.__version__)
print("scikit-learn:", sklearn.__version__)
import skl2onnx # noqa
+
print("onnx: ", onnx.__version__)
print("onnxruntime: ", onnxruntime.__version__)
print("skl2onnx: ", skl2onnx.__version__)
diff --git a/docs/examples/plot_convert_zipmap.py b/docs/examples/plot_convert_zipmap.py
index 1b0eaa48e..fbe48207b 100644
--- a/docs/examples/plot_convert_zipmap.py
+++ b/docs/examples/plot_convert_zipmap.py
@@ -38,9 +38,8 @@
clr.fit(X_train, y_train)
print(clr)
-initial_type = [('float_input', FloatTensorType([None, 4]))]
-onx = convert_sklearn(clr, initial_types=initial_type,
- target_opset=12)
+initial_type = [("float_input", FloatTensorType([None, 4]))]
+onx = convert_sklearn(clr, initial_types=initial_type, target_opset=12)
############################
# Output type
@@ -50,7 +49,7 @@
# is a list of dictionaries with onnxruntime.
sess = rt.InferenceSession(onx.SerializeToString())
-res = sess.run(None, {'float_input': X_test.astype(numpy.float32)})
+res = sess.run(None, {"float_input": X_test.astype(numpy.float32)})
print(res[1][:2])
print("probabilities type:", type(res[1]))
print("type for the first observations:", type(res[1][0]))
@@ -61,13 +60,14 @@
#
# Let's remove the ZipMap operator.
-initial_type = [('float_input', FloatTensorType([None, 4]))]
-options = {id(clr): {'zipmap': False}}
-onx2 = convert_sklearn(clr, initial_types=initial_type, options=options,
- target_opset=12)
+initial_type = [("float_input", FloatTensorType([None, 4]))]
+options = {id(clr): {"zipmap": False}}
+onx2 = convert_sklearn(
+ clr, initial_types=initial_type, options=options, target_opset=12
+)
sess2 = rt.InferenceSession(onx2.SerializeToString())
-res2 = sess2.run(None, {'float_input': X_test.astype(numpy.float32)})
+res2 = sess2.run(None, {"float_input": X_test.astype(numpy.float32)})
print(res2[1][:2])
print("probabilities type:", type(res2[1]))
print("type for the first observations:", type(res2[1][0]))
@@ -80,15 +80,19 @@
# the probabilities into columns. The final model produces
# one output for the label, and one output per class.
-options = {id(clr): {'zipmap': 'columns'}}
-onx3 = convert_sklearn(clr, initial_types=initial_type, options=options,
- target_opset=12)
+options = {id(clr): {"zipmap": "columns"}}
+onx3 = convert_sklearn(
+ clr, initial_types=initial_type, options=options, target_opset=12
+)
sess3 = rt.InferenceSession(onx3.SerializeToString())
-res3 = sess3.run(None, {'float_input': X_test.astype(numpy.float32)})
+res3 = sess3.run(None, {"float_input": X_test.astype(numpy.float32)})
for i, out in enumerate(sess3.get_outputs()):
- print("output: '{}' shape={} values={}...".format(
- out.name, res3[i].shape, res3[i][:2]))
+ print(
+ "output: '{}' shape={} values={}...".format(
+ out.name, res3[i].shape, res3[i][:2]
+ )
+ )
###################################
@@ -98,16 +102,13 @@
X32 = X_test.astype(numpy.float32)
print("Time with ZipMap:")
-print(repeat(lambda: sess.run(None, {'float_input': X32}),
- number=100, repeat=10))
+print(repeat(lambda: sess.run(None, {"float_input": X32}), number=100, repeat=10))
print("Time without ZipMap:")
-print(repeat(lambda: sess2.run(None, {'float_input': X32}),
- number=100, repeat=10))
+print(repeat(lambda: sess2.run(None, {"float_input": X32}), number=100, repeat=10))
print("Time without ZipMap but with columns:")
-print(repeat(lambda: sess3.run(None, {'float_input': X32}),
- number=100, repeat=10))
+print(repeat(lambda: sess3.run(None, {"float_input": X32}), number=100, repeat=10))
# The prediction is much faster without ZipMap
# on this example.
diff --git a/docs/examples/plot_custom_model.py b/docs/examples/plot_custom_model.py
index a2a687ad5..ac14d66c3 100644
--- a/docs/examples/plot_custom_model.py
+++ b/docs/examples/plot_custom_model.py
@@ -63,9 +63,14 @@
class PredictableTSNE(BaseEstimator, TransformerMixin):
-
- def __init__(self, transformer=None, estimator=None,
- normalize=True, keep_tsne_outputs=False, **kwargs):
+ def __init__(
+ self,
+ transformer=None,
+ estimator=None,
+ normalize=True,
+ keep_tsne_outputs=False,
+ **kwargs
+ ):
"""
:param transformer: `TSNE` by default
:param estimator: `MLPRegressor` by default
@@ -90,11 +95,12 @@ def __init__(self, transformer=None, estimator=None,
if not hasattr(transformer, "fit_transform"):
raise AttributeError(
"Transformer {} does not have a 'fit_transform' "
- "method.".format(type(transformer)))
+ "method.".format(type(transformer))
+ )
if not hasattr(estimator, "predict"):
raise AttributeError(
- "Estimator {} does not have a 'predict' method.".format(
- type(estimator)))
+ "Estimator {} does not have a 'predict' method.".format(type(estimator))
+ )
self.normalize = normalize
if kwargs:
self.set_params(**kwargs)
@@ -132,21 +138,22 @@ def fit(self, X, y, sample_weight=None):
sig = inspect.signature(self.transformer.fit_transform)
pars = {}
- for p in ['sample_weight', 'y']:
+ for p in ["sample_weight", "y"]:
if p in sig.parameters and p in params:
pars[p] = params[p]
target = self.transformer_.fit_transform(X, **pars)
sig = inspect.signature(self.estimator.fit)
- if 'sample_weight' in sig.parameters:
+ if "sample_weight" in sig.parameters:
self.estimator_ = clone(self.estimator).fit(
- X, target, sample_weight=sample_weight)
+ X, target, sample_weight=sample_weight
+ )
else:
self.estimator_ = clone(self.estimator).fit(X, target)
mean = target.mean(axis=0)
var = target.std(axis=0)
self.mean_ = mean
- self.inv_std_ = 1. / var
+ self.inv_std_ = 1.0 / var
exp = (target - mean) * self.inv_std_
got = (self.estimator_.predict(X) - mean) * self.inv_std_
self.loss_ = mean_squared_error(exp, got)
@@ -191,11 +198,11 @@ def set_params(self, **values):
"""
pt, pe, pn = {}, {}, {}
for k, v in values.items():
- if k.startswith('e_'):
+ if k.startswith("e_"):
pe[k[2:]] = v
- elif k.startswith('t_'):
+ elif k.startswith("t_"):
pt[k[2:]] = v
- elif k.startswith('n_'):
+ elif k.startswith("n_"):
pn[k[2:]] = v
else:
raise ValueError("Unexpected parameter name '{0}'.".format(k))
@@ -217,10 +224,9 @@ def set_params(self, **values):
n_samples, n_features = Xd.shape
n_samples, n_features
-X_train, X_test, y_train, y_test, imgs_train, imgs_test = train_test_split(
- Xd, yd, imgs)
+X_train, X_test, y_train, y_test, imgs_train, imgs_test = train_test_split(Xd, yd, imgs)
-tsne = TSNE(n_components=2, init='pca', random_state=0)
+tsne = TSNE(n_components=2, init="pca", random_state=0)
def plot_embedding(Xp, y, imgs, title=None, figsize=(12, 4)):
@@ -229,13 +235,17 @@ def plot_embedding(Xp, y, imgs, title=None, figsize=(12, 4)):
fig, ax = plt.subplots(1, 2, figsize=figsize)
for i in range(X.shape[0]):
- ax[0].text(X[i, 0], X[i, 1], str(y[i]),
- color=plt.cm.Set1(y[i] / 10.),
- fontdict={'weight': 'bold', 'size': 9})
-
- if hasattr(offsetbox, 'AnnotationBbox'):
+ ax[0].text(
+ X[i, 0],
+ X[i, 1],
+ str(y[i]),
+ color=plt.cm.Set1(y[i] / 10.0),
+ fontdict={"weight": "bold", "size": 9},
+ )
+
+ if hasattr(offsetbox, "AnnotationBbox"):
# only print thumbnails with matplotlib > 1.0
- shown_images = numpy.array([[1., 1.]]) # just something big
+ shown_images = numpy.array([[1.0, 1.0]]) # just something big
for i in range(X.shape[0]):
dist = numpy.sum((X[i] - shown_images) ** 2, 1)
if numpy.min(dist) < 4e-3:
@@ -243,19 +253,18 @@ def plot_embedding(Xp, y, imgs, title=None, figsize=(12, 4)):
continue
shown_images = numpy.r_[shown_images, [X[i]]]
imagebox = offsetbox.AnnotationBbox(
- offsetbox.OffsetImage(imgs[i], cmap=plt.cm.gray_r),
- X[i])
+ offsetbox.OffsetImage(imgs[i], cmap=plt.cm.gray_r), X[i]
+ )
ax[0].add_artist(imagebox)
ax[0].set_xticks([]), ax[0].set_yticks([])
- ax[1].plot(Xp[:, 0], Xp[:, 1], '.')
+ ax[1].plot(Xp[:, 0], Xp[:, 1], ".")
if title is not None:
ax[0].set_title(title)
return ax
X_train_tsne = tsne.fit_transform(X_train)
-plot_embedding(X_train_tsne, y_train, imgs_train,
- "t-SNE embedding of the digits")
+plot_embedding(X_train_tsne, y_train, imgs_train, "t-SNE embedding of the digits")
#######################################
# Repeatable t-SNE
@@ -267,18 +276,24 @@ def plot_embedding(Xp, y, imgs, title=None, figsize=(12, 4)):
ptsne_knn.fit(X_train, y_train)
X_train_tsne2 = ptsne_knn.transform(X_train)
-plot_embedding(X_train_tsne2, y_train, imgs_train,
- "Predictable t-SNE of the digits\n"
- "StandardScaler+KNeighborsRegressor")
+plot_embedding(
+ X_train_tsne2,
+ y_train,
+ imgs_train,
+ "Predictable t-SNE of the digits\n" "StandardScaler+KNeighborsRegressor",
+)
################################
# We check on test set.
X_test_tsne2 = ptsne_knn.transform(X_test)
-plot_embedding(X_test_tsne2, y_test, imgs_test,
- "Predictable t-SNE of the digits\n"
- "StandardScaler+KNeighborsRegressor")
+plot_embedding(
+ X_test_tsne2,
+ y_test,
+ imgs_test,
+ "Predictable t-SNE of the digits\n" "StandardScaler+KNeighborsRegressor",
+)
#######################################
# ONNX - shape_calculator, converter
@@ -292,13 +307,12 @@ def plot_embedding(Xp, y, imgs, title=None, figsize=(12, 4)):
def predictable_tsne_shape_calculator(operator):
-
- input = operator.inputs[0] # inputs in ONNX graph
+ input = operator.inputs[0] # inputs in ONNX graph
# output = operator.outputs[0] # output in ONNX graph
- op = operator.raw_operator # scikit-learn model (mmust be fitted)
+ op = operator.raw_operator # scikit-learn model (mmust be fitted)
- N = input.type.shape[0] # number of observations
- C = op.estimator_._y.shape[1] # dimension of outputs
+ N = input.type.shape[0] # number of observations
+ C = op.estimator_._y.shape[1] # dimension of outputs
# new output definition
operator.outputs[0].type = FloatTensorType([N, C])
@@ -317,8 +331,8 @@ def predictable_tsne_converter(scope, operator, container):
:param container: contains the ONNX graph
"""
# input = operator.inputs[0] # input in ONNX graph
- output = operator.outputs[0] # output in ONNX graph
- op = operator.raw_operator # scikit-learn model (mmust be fitted)
+ output = operator.outputs[0] # output in ONNX graph
+ op = operator.raw_operator # scikit-learn model (mmust be fitted)
# First step is the k nearest-neighbours,
# we reuse existing converter and declare it as local
@@ -329,7 +343,7 @@ def predictable_tsne_converter(scope, operator, container):
knn_op.inputs = operator.inputs
# We add an intermediate outputs.
- knn_output = scope.declare_local_variable('knn_output', FloatTensorType())
+ knn_output = scope.declare_local_variable("knn_output", FloatTensorType())
knn_op.outputs.append(knn_output)
# We adjust the output of the submodel.
@@ -337,27 +351,38 @@ def predictable_tsne_converter(scope, operator, container):
shape_calc(knn_op)
# We add the normalizer which needs a unique node name.
- name = scope.get_unique_operator_name('Scaler')
+ name = scope.get_unique_operator_name("Scaler")
# The parameter follows the specifications of ONNX
# https://github.com/onnx/onnx/blob/master/docs/Operators-ml.md#ai.onnx.ml.Scaler
- attrs = dict(name=name,
- scale=op.inv_std_.ravel().astype(numpy.float32),
- offset=op.mean_.ravel().astype(numpy.float32))
+ attrs = dict(
+ name=name,
+ scale=op.inv_std_.ravel().astype(numpy.float32),
+ offset=op.mean_.ravel().astype(numpy.float32),
+ )
# Let's finally add the scaler which connects the output
# of the k-nearest neighbours model to output of the whole model
# declared in ONNX graph
- container.add_node('Scaler', [knn_output.onnx_name], [output.full_name],
- op_domain='ai.onnx.ml', **attrs)
+ container.add_node(
+ "Scaler",
+ [knn_output.onnx_name],
+ [output.full_name],
+ op_domain="ai.onnx.ml",
+ **attrs
+ )
+
##################################
# We now need to declare the new converter.
-update_registered_converter(PredictableTSNE, 'CustomPredictableTSNE',
- predictable_tsne_shape_calculator,
- predictable_tsne_converter)
+update_registered_converter(
+ PredictableTSNE,
+ "CustomPredictableTSNE",
+ predictable_tsne_shape_calculator,
+ predictable_tsne_converter,
+)
####################################
# Conversion to ONNX
@@ -367,9 +392,11 @@ def predictable_tsne_converter(scope, operator, container):
# to convert.
model_onnx = convert_sklearn(
- ptsne_knn, 'predictable_tsne',
- [('input', FloatTensorType([None, X_test.shape[1]]))],
- target_opset=12)
+ ptsne_knn,
+ "predictable_tsne",
+ [("input", FloatTensorType([None, X_test.shape[1]]))],
+ target_opset=12,
+)
# And save.
with open("predictable_tsne.onnx", "wb") as f:
@@ -401,17 +428,21 @@ def predictable_tsne_converter(scope, operator, container):
# ++++++++++++++++++++++
pydot_graph = GetPydotGraph(
- model_onnx.graph, name=model_onnx.graph.name, rankdir="TB",
+ model_onnx.graph,
+ name=model_onnx.graph.name,
+ rankdir="TB",
node_producer=GetOpNodeProducer(
- "docstring", color="yellow", fillcolor="yellow", style="filled"))
+ "docstring", color="yellow", fillcolor="yellow", style="filled"
+ ),
+)
pydot_graph.write_dot("pipeline_tsne.dot")
-os.system('dot -O -Gdpi=300 -Tpng pipeline_tsne.dot')
+os.system("dot -O -Gdpi=300 -Tpng pipeline_tsne.dot")
image = plt.imread("pipeline_tsne.dot.png")
fig, ax = plt.subplots(figsize=(40, 20))
ax.imshow(image)
-ax.axis('off')
+ax.axis("off")
#################################
# **Versions used for this example**
diff --git a/docs/examples/plot_custom_parser.py b/docs/examples/plot_custom_parser.py
index d79b32bad..ecbe99f95 100644
--- a/docs/examples/plot_custom_parser.py
+++ b/docs/examples/plot_custom_parser.py
@@ -33,9 +33,7 @@
import os
from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer
import onnxruntime as rt
-from onnxconverter_common.onnx_ops import (
- apply_identity, apply_cast, apply_greater
-)
+from onnxconverter_common.onnx_ops import apply_identity, apply_cast, apply_greater
from skl2onnx import to_onnx, get_model_alias
from skl2onnx.proto import onnx_proto
from skl2onnx.common._registration import get_shape_calculator
@@ -44,20 +42,20 @@
class ValidatorClassifier(BaseEstimator, ClassifierMixin):
-
def __init__(self, estimator=None, threshold=0.75):
ClassifierMixin.__init__(self)
BaseEstimator.__init__(self)
if estimator is None:
- estimator = LogisticRegression(solver='liblinear')
+ estimator = LogisticRegression(solver="liblinear")
self.estimator = estimator
self.threshold = threshold
def fit(self, X, y, sample_weight=None):
sig = inspect.signature(self.estimator.fit)
- if 'sample_weight' in sig.parameters:
+ if "sample_weight" in sig.parameters:
self.estimator_ = clone(self.estimator).fit(
- X, y, sample_weight=sample_weight)
+ X, y, sample_weight=sample_weight
+ )
else:
self.estimator_ = clone(self.estimator).fit(X, y)
return self
@@ -97,8 +95,7 @@ def validate(self, X):
# to this new model.
try:
- to_onnx(model, X_train[:1].astype(np.float32),
- target_opset=12)
+ to_onnx(model, X_train[:1].astype(np.float32), target_opset=12)
except RuntimeError as e:
print(e)
@@ -112,27 +109,27 @@ def validate(self, X):
def validator_classifier_shape_calculator(operator):
-
input0 = operator.inputs[0] # inputs in ONNX graph
outputs = operator.outputs # outputs in ONNX graph
op = operator.raw_operator # scikit-learn model (mmust be fitted)
if len(outputs) != 3:
raise RuntimeError("3 outputs expected not {}.".format(len(outputs)))
- N = input0.type.shape[0] # number of observations
- C = op.estimator_.classes_.shape[0] # dimension of outputs
+ N = input0.type.shape[0] # number of observations
+ C = op.estimator_.classes_.shape[0] # dimension of outputs
+
+ outputs[0].type = Int64TensorType([N]) # label
+ outputs[1].type = FloatTensorType([N, C]) # probabilities
+ outputs[2].type = Int64TensorType([C]) # validation
- outputs[0].type = Int64TensorType([N]) # label
- outputs[1].type = FloatTensorType([N, C]) # probabilities
- outputs[2].type = Int64TensorType([C]) # validation
#############################
# Then the converter.
def validator_classifier_converter(scope, operator, container):
- outputs = operator.outputs # outputs in ONNX graph
- op = operator.raw_operator # scikit-learn model (mmust be fitted)
+ outputs = operator.outputs # outputs in ONNX graph
+ op = operator.raw_operator # scikit-learn model (mmust be fitted)
# We reuse existing converter and declare it
# as a local operator.
@@ -142,8 +139,8 @@ def validator_classifier_converter(scope, operator, container):
val_op.inputs = operator.inputs
# We add an intermediate outputs.
- val_label = scope.declare_local_variable('val_label', Int64TensorType())
- val_prob = scope.declare_local_variable('val_prob', FloatTensorType())
+ val_label = scope.declare_local_variable("val_label", Int64TensorType())
+ val_prob = scope.declare_local_variable("val_prob", FloatTensorType())
val_op.outputs.append(val_label)
val_op.outputs.append(val_prob)
@@ -152,30 +149,36 @@ def validator_classifier_converter(scope, operator, container):
shape_calc(val_op)
# We now handle the validation.
- val_max = scope.get_unique_variable_name('val_max')
+ val_max = scope.get_unique_variable_name("val_max")
if container.target_opset >= 18:
- axis_name = scope.get_unique_variable_name('axis')
- container.add_initializer(
- axis_name, onnx_proto.TensorProto.INT64, [1], [1])
+ axis_name = scope.get_unique_variable_name("axis")
+ container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [1])
container.add_node(
- 'ReduceMax', [val_prob.full_name, axis_name], val_max,
- name=scope.get_unique_operator_name('ReduceMax'),
- keepdims=0)
+ "ReduceMax",
+ [val_prob.full_name, axis_name],
+ val_max,
+ name=scope.get_unique_operator_name("ReduceMax"),
+ keepdims=0,
+ )
else:
container.add_node(
- 'ReduceMax', val_prob.full_name, val_max,
- name=scope.get_unique_operator_name('ReduceMax'),
- axes=[1], keepdims=0)
-
- th_name = scope.get_unique_variable_name('threshold')
+ "ReduceMax",
+ val_prob.full_name,
+ val_max,
+ name=scope.get_unique_operator_name("ReduceMax"),
+ axes=[1],
+ keepdims=0,
+ )
+
+ th_name = scope.get_unique_variable_name("threshold")
container.add_initializer(
- th_name, onnx_proto.TensorProto.FLOAT, [1], [op.threshold])
- val_bin = scope.get_unique_variable_name('val_bin')
+ th_name, onnx_proto.TensorProto.FLOAT, [1], [op.threshold]
+ )
+ val_bin = scope.get_unique_variable_name("val_bin")
apply_greater(scope, [val_max, th_name], val_bin, container)
- val_val = scope.get_unique_variable_name('validate')
- apply_cast(scope, val_bin, val_val, container,
- to=onnx_proto.TensorProto.INT64)
+ val_val = scope.get_unique_variable_name("validate")
+ apply_cast(scope, val_bin, val_val, container, to=onnx_proto.TensorProto.INT64)
# We finally link the intermediate output to the shared converter.
apply_identity(scope, val_label.full_name, outputs[0].full_name, container)
@@ -187,16 +190,18 @@ def validator_classifier_converter(scope, operator, container):
# Then the registration.
-update_registered_converter(ValidatorClassifier, 'CustomValidatorClassifier',
- validator_classifier_shape_calculator,
- validator_classifier_converter)
+update_registered_converter(
+ ValidatorClassifier,
+ "CustomValidatorClassifier",
+ validator_classifier_shape_calculator,
+ validator_classifier_converter,
+)
########################
# And conversion...
try:
- to_onnx(model, X_test[:1].astype(np.float32),
- target_opset=12)
+ to_onnx(model, X_test[:1].astype(np.float32), target_opset=12)
except RuntimeError as e:
print(e)
@@ -218,9 +223,9 @@ def validator_classifier_parser(scope, model, inputs, custom_parsers=None):
this_operator.inputs.append(inputs[0])
# outputs
- val_label = scope.declare_local_variable('val_label', Int64TensorType())
- val_prob = scope.declare_local_variable('val_prob', FloatTensorType())
- val_val = scope.declare_local_variable('val_val', Int64TensorType())
+ val_label = scope.declare_local_variable("val_label", Int64TensorType())
+ val_prob = scope.declare_local_variable("val_prob", FloatTensorType())
+ val_val = scope.declare_local_variable("val_val", Int64TensorType())
this_operator.outputs.append(val_label)
this_operator.outputs.append(val_prob)
this_operator.outputs.append(val_val)
@@ -228,20 +233,23 @@ def validator_classifier_parser(scope, model, inputs, custom_parsers=None):
# end
return this_operator.outputs
+
###############################
# Registration.
-update_registered_converter(ValidatorClassifier, 'CustomValidatorClassifier',
- validator_classifier_shape_calculator,
- validator_classifier_converter,
- parser=validator_classifier_parser)
+update_registered_converter(
+ ValidatorClassifier,
+ "CustomValidatorClassifier",
+ validator_classifier_shape_calculator,
+ validator_classifier_converter,
+ parser=validator_classifier_parser,
+)
#############################
# And conversion again.
-model_onnx = to_onnx(model, X_test[:1].astype(np.float32),
- target_opset=12)
+model_onnx = to_onnx(model, X_test[:1].astype(np.float32), target_opset=12)
#######################################
# Final test
@@ -252,7 +260,7 @@ def validator_classifier_parser(scope, model, inputs, custom_parsers=None):
X32 = X_test[:5].astype(np.float32)
sess = rt.InferenceSession(model_onnx.SerializeToString())
-results = sess.run(None, {'X': X32})
+results = sess.run(None, {"X": X32})
print("--labels--")
print("sklearn", model.predict(X32))
@@ -271,17 +279,21 @@ def validator_classifier_parser(scope, model, inputs, custom_parsers=None):
# ++++++++++++++++++++++
pydot_graph = GetPydotGraph(
- model_onnx.graph, name=model_onnx.graph.name, rankdir="TB",
+ model_onnx.graph,
+ name=model_onnx.graph.name,
+ rankdir="TB",
node_producer=GetOpNodeProducer(
- "docstring", color="yellow", fillcolor="yellow", style="filled"))
+ "docstring", color="yellow", fillcolor="yellow", style="filled"
+ ),
+)
pydot_graph.write_dot("validator_classifier.dot")
-os.system('dot -O -Gdpi=300 -Tpng validator_classifier.dot')
+os.system("dot -O -Gdpi=300 -Tpng validator_classifier.dot")
image = plt.imread("validator_classifier.dot.png")
fig, ax = plt.subplots(figsize=(40, 20))
ax.imshow(image)
-ax.axis('off')
+ax.axis("off")
#################################
# **Versions used for this example**
diff --git a/docs/examples/plot_custom_parser_alternative.py b/docs/examples/plot_custom_parser_alternative.py
index 617d4b838..48b41a0b1 100644
--- a/docs/examples/plot_custom_parser_alternative.py
+++ b/docs/examples/plot_custom_parser_alternative.py
@@ -42,27 +42,30 @@
from skl2onnx.proto import onnx_proto
from skl2onnx.common.data_types import FloatTensorType, Int64TensorType
from skl2onnx.algebra.onnx_ops import (
- OnnxGreater, OnnxCast, OnnxReduceMaxApi18, OnnxIdentity
+ OnnxGreater,
+ OnnxCast,
+ OnnxReduceMaxApi18,
+ OnnxIdentity,
)
from skl2onnx.algebra.onnx_operator import OnnxSubEstimator
import matplotlib.pyplot as plt
class ValidatorClassifier(BaseEstimator, ClassifierMixin):
-
def __init__(self, estimator=None, threshold=0.75):
ClassifierMixin.__init__(self)
BaseEstimator.__init__(self)
if estimator is None:
- estimator = LogisticRegression(solver='liblinear')
+ estimator = LogisticRegression(solver="liblinear")
self.estimator = estimator
self.threshold = threshold
def fit(self, X, y, sample_weight=None):
sig = inspect.signature(self.estimator.fit)
- if 'sample_weight' in sig.parameters:
+ if "sample_weight" in sig.parameters:
self.estimator_ = clone(self.estimator).fit(
- X, y, sample_weight=sample_weight)
+ X, y, sample_weight=sample_weight
+ )
else:
self.estimator_ = clone(self.estimator).fit(X, y)
return self
@@ -102,8 +105,7 @@ def validate(self, X):
# to this new model.
try:
- to_onnx(model, X_train[:1].astype(np.float32),
- target_opset=12)
+ to_onnx(model, X_train[:1].astype(np.float32), target_opset=12)
except RuntimeError as e:
print(e)
@@ -117,48 +119,44 @@ def validate(self, X):
def validator_classifier_shape_calculator(operator):
-
- input0 = operator.inputs[0] # first input in ONNX graph
- outputs = operator.outputs # outputs in ONNX graph
- op = operator.raw_operator # scikit-learn model (mmust be fitted)
+ input0 = operator.inputs[0] # first input in ONNX graph
+ outputs = operator.outputs # outputs in ONNX graph
+ op = operator.raw_operator # scikit-learn model (mmust be fitted)
if len(outputs) != 3:
raise RuntimeError("3 outputs expected not {}.".format(len(outputs)))
- N = input0.type.shape[0] # number of observations
- C = op.estimator_.classes_.shape[0] # dimension of outputs
+ N = input0.type.shape[0] # number of observations
+ C = op.estimator_.classes_.shape[0] # dimension of outputs
+
+ outputs[0].type = Int64TensorType([N]) # label
+ outputs[1].type = FloatTensorType([N, C]) # probabilities
+ outputs[2].type = Int64TensorType([C]) # validation
- outputs[0].type = Int64TensorType([N]) # label
- outputs[1].type = FloatTensorType([N, C]) # probabilities
- outputs[2].type = Int64TensorType([C]) # validation
#############################
# Then the converter.
def validator_classifier_converter(scope, operator, container):
- input0 = operator.inputs[0] # first input in ONNX graph
- outputs = operator.outputs # outputs in ONNX graph
- op = operator.raw_operator # scikit-learn model (mmust be fitted)
+ input0 = operator.inputs[0] # first input in ONNX graph
+ outputs = operator.outputs # outputs in ONNX graph
+ op = operator.raw_operator # scikit-learn model (mmust be fitted)
opv = container.target_opset
# The model calls another one. The class `OnnxSubEstimator`
# calls the converter for this operator.
model = op.estimator_
- onnx_op = OnnxSubEstimator(model, input0, op_version=opv,
- options={'zipmap': False})
+ onnx_op = OnnxSubEstimator(model, input0, op_version=opv, options={"zipmap": False})
rmax = OnnxReduceMaxApi18(onnx_op[1], axes=[1], keepdims=0, op_version=opv)
- great = OnnxGreater(rmax, np.array([op.threshold], dtype=np.float32),
- op_version=opv)
- valid = OnnxCast(great, to=onnx_proto.TensorProto.INT64,
- op_version=opv)
-
- r1 = OnnxIdentity(onnx_op[0], output_names=[outputs[0].full_name],
- op_version=opv)
- r2 = OnnxIdentity(onnx_op[1], output_names=[outputs[1].full_name],
- op_version=opv)
- r3 = OnnxIdentity(valid, output_names=[outputs[2].full_name],
- op_version=opv)
+ great = OnnxGreater(
+ rmax, np.array([op.threshold], dtype=np.float32), op_version=opv
+ )
+ valid = OnnxCast(great, to=onnx_proto.TensorProto.INT64, op_version=opv)
+
+ r1 = OnnxIdentity(onnx_op[0], output_names=[outputs[0].full_name], op_version=opv)
+ r2 = OnnxIdentity(onnx_op[1], output_names=[outputs[1].full_name], op_version=opv)
+ r3 = OnnxIdentity(valid, output_names=[outputs[2].full_name], op_version=opv)
r1.add_to(scope, container)
r2.add_to(scope, container)
@@ -169,16 +167,18 @@ def validator_classifier_converter(scope, operator, container):
# Then the registration.
-update_registered_converter(ValidatorClassifier, 'CustomValidatorClassifier',
- validator_classifier_shape_calculator,
- validator_classifier_converter)
+update_registered_converter(
+ ValidatorClassifier,
+ "CustomValidatorClassifier",
+ validator_classifier_shape_calculator,
+ validator_classifier_converter,
+)
########################
# And conversion...
try:
- to_onnx(model, X_test[:1].astype(np.float32),
- target_opset=12)
+ to_onnx(model, X_test[:1].astype(np.float32), target_opset=12)
except RuntimeError as e:
print(e)
@@ -200,9 +200,9 @@ def validator_classifier_parser(scope, model, inputs, custom_parsers=None):
this_operator.inputs.append(inputs[0])
# outputs
- val_label = scope.declare_local_variable('val_label', Int64TensorType())
- val_prob = scope.declare_local_variable('val_prob', FloatTensorType())
- val_val = scope.declare_local_variable('val_val', Int64TensorType())
+ val_label = scope.declare_local_variable("val_label", Int64TensorType())
+ val_prob = scope.declare_local_variable("val_prob", FloatTensorType())
+ val_val = scope.declare_local_variable("val_val", Int64TensorType())
this_operator.outputs.append(val_label)
this_operator.outputs.append(val_prob)
this_operator.outputs.append(val_val)
@@ -210,20 +210,23 @@ def validator_classifier_parser(scope, model, inputs, custom_parsers=None):
# ends
return this_operator.outputs
+
###############################
# Registration.
-update_registered_converter(ValidatorClassifier, 'CustomValidatorClassifier',
- validator_classifier_shape_calculator,
- validator_classifier_converter,
- parser=validator_classifier_parser)
+update_registered_converter(
+ ValidatorClassifier,
+ "CustomValidatorClassifier",
+ validator_classifier_shape_calculator,
+ validator_classifier_converter,
+ parser=validator_classifier_parser,
+)
#############################
# And conversion again.
-model_onnx = to_onnx(model, X_test[:1].astype(np.float32),
- target_opset=12)
+model_onnx = to_onnx(model, X_test[:1].astype(np.float32), target_opset=12)
#######################################
# Final test
@@ -234,7 +237,7 @@ def validator_classifier_parser(scope, model, inputs, custom_parsers=None):
X32 = X_test[:5].astype(np.float32)
sess = rt.InferenceSession(model_onnx.SerializeToString())
-results = sess.run(None, {'X': X32})
+results = sess.run(None, {"X": X32})
print("--labels--")
print("sklearn", model.predict(X32))
@@ -253,17 +256,21 @@ def validator_classifier_parser(scope, model, inputs, custom_parsers=None):
# ++++++++++++++++++++++
pydot_graph = GetPydotGraph(
- model_onnx.graph, name=model_onnx.graph.name, rankdir="TB",
+ model_onnx.graph,
+ name=model_onnx.graph.name,
+ rankdir="TB",
node_producer=GetOpNodeProducer(
- "docstring", color="yellow", fillcolor="yellow", style="filled"))
+ "docstring", color="yellow", fillcolor="yellow", style="filled"
+ ),
+)
pydot_graph.write_dot("validator_classifier.dot")
-os.system('dot -O -Gdpi=300 -Tpng validator_classifier.dot')
+os.system("dot -O -Gdpi=300 -Tpng validator_classifier.dot")
image = plt.imread("validator_classifier.dot.png")
fig, ax = plt.subplots(figsize=(40, 20))
ax.imshow(image)
-ax.axis('off')
+ax.axis("off")
#################################
# **Versions used for this example**
diff --git a/docs/examples/plot_errors_onnxruntime.py b/docs/examples/plot_errors_onnxruntime.py
index c4ddd3e50..5e92fdd30 100644
--- a/docs/examples/plot_errors_onnxruntime.py
+++ b/docs/examples/plot_errors_onnxruntime.py
@@ -24,6 +24,7 @@
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
+
try:
from onnxruntime.capi.onnxruntime_pybind11_state import InvalidArgument
except ImportError:
@@ -35,8 +36,9 @@
with open("logreg_iris.onnx", "wb") as f:
f.write(
skl2onnx.to_onnx(
- clr, data.data[:, :2].astype(np.float32),
- target_opset=12).SerializeToString())
+ clr, data.data[:, :2].astype(np.float32), target_opset=12
+ ).SerializeToString()
+ )
example2 = "logreg_iris.onnx"
sess = rt.InferenceSession(example2)
@@ -50,8 +52,7 @@
# and cannot handle any other kind of floats.
try:
- x = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]],
- dtype=np.float64)
+ x = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]], dtype=np.float64)
sess.run([output_name], {input_name: x})
except Exception as e:
print("Unexpected type")
@@ -92,11 +93,12 @@
# dimension is a multiple of the expected input dimension.
for x in [
- np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32),
- np.array([[1.0, 2.0, 3.0, 4.0]], dtype=np.float32),
- np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32),
- np.array([1.0, 2.0, 3.0], dtype=np.float32),
- np.array([[1.0, 2.0, 3.0]], dtype=np.float32)]:
+ np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32),
+ np.array([[1.0, 2.0, 3.0, 4.0]], dtype=np.float32),
+ np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32),
+ np.array([1.0, 2.0, 3.0], dtype=np.float32),
+ np.array([[1.0, 2.0, 3.0]], dtype=np.float32),
+]:
try:
r = sess.run([output_name], {input_name: x})
print("Shape={0} and predicted labels={1}".format(x.shape, r))
@@ -104,15 +106,15 @@
print("Shape={0} and error={1}".format(x.shape, e))
for x in [
- np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32),
- np.array([[1.0, 2.0, 3.0, 4.0]], dtype=np.float32),
- np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32),
- np.array([1.0, 2.0, 3.0], dtype=np.float32),
- np.array([[1.0, 2.0, 3.0]], dtype=np.float32)]:
+ np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32),
+ np.array([[1.0, 2.0, 3.0, 4.0]], dtype=np.float32),
+ np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32),
+ np.array([1.0, 2.0, 3.0], dtype=np.float32),
+ np.array([[1.0, 2.0, 3.0]], dtype=np.float32),
+]:
try:
r = sess.run(None, {input_name: x})
- print("Shape={0} and predicted probabilities={1}".format(
- x.shape, r[1]))
+ print("Shape={0} and predicted probabilities={1}".format(x.shape, r[1]))
except (RuntimeError, InvalidArgument) as e:
print("Shape={0} and error={1}".format(x.shape, e))
@@ -121,9 +123,10 @@
# is higher than expects but produces a warning.
for x in [
- np.array([[[1.0, 2.0], [3.0, 4.0]]], dtype=np.float32),
- np.array([[[1.0, 2.0, 3.0]]], dtype=np.float32),
- np.array([[[1.0, 2.0]], [[3.0, 4.0]]], dtype=np.float32)]:
+ np.array([[[1.0, 2.0], [3.0, 4.0]]], dtype=np.float32),
+ np.array([[[1.0, 2.0, 3.0]]], dtype=np.float32),
+ np.array([[[1.0, 2.0]], [[3.0, 4.0]]], dtype=np.float32),
+]:
try:
r = sess.run([output_name], {input_name: x})
print("Shape={0} and predicted labels={1}".format(x.shape, r))
diff --git a/docs/examples/plot_gpr.py b/docs/examples/plot_gpr.py
index 402fbe1b6..b38412ecd 100644
--- a/docs/examples/plot_gpr.py
+++ b/docs/examples/plot_gpr.py
@@ -37,7 +37,7 @@
dataset = load_diabetes()
X, y = dataset.data, dataset.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
-gpr = GaussianProcessRegressor(DotProduct() + RBF(), alpha=1.)
+gpr = GaussianProcessRegressor(DotProduct() + RBF(), alpha=1.0)
gpr.fit(X_train, y_train)
print(gpr)
@@ -48,14 +48,12 @@
# The documentation suggests the following way to
# convert a model into ONNX.
-initial_type = [('X', FloatTensorType([None, X_train.shape[1]]))]
-onx = convert_sklearn(gpr, initial_types=initial_type,
- target_opset=12)
+initial_type = [("X", FloatTensorType([None, X_train.shape[1]]))]
+onx = convert_sklearn(gpr, initial_types=initial_type, target_opset=12)
sess = rt.InferenceSession(onx.SerializeToString())
try:
- pred_onx = sess.run(
- None, {'X': X_test.astype(numpy.float32)})[0]
+ pred_onx = sess.run(None, {"X": X_test.astype(numpy.float32)})[0]
except RuntimeError as e:
print(str(e))
@@ -73,13 +71,11 @@
# the fixed dimensions by an empty value.
# (see next line).
-initial_type = [('X', FloatTensorType([None, None]))]
-onx = convert_sklearn(gpr, initial_types=initial_type,
- target_opset=12)
+initial_type = [("X", FloatTensorType([None, None]))]
+onx = convert_sklearn(gpr, initial_types=initial_type, target_opset=12)
sess = rt.InferenceSession(onx.SerializeToString())
-pred_onx = sess.run(
- None, {'X': X_test.astype(numpy.float32)})[0]
+pred_onx = sess.run(None, {"X": X_test.astype(numpy.float32)})[0]
pred_skl = gpr.predict(X_test)
print(pred_skl[:10])
@@ -90,10 +86,9 @@
# Let's confirm that by looking at the biggest
# differences.
-diff = numpy.sort(numpy.abs(numpy.squeeze(pred_skl) -
- numpy.squeeze(pred_onx)))[-5:]
+diff = numpy.sort(numpy.abs(numpy.squeeze(pred_skl) - numpy.squeeze(pred_onx)))[-5:]
print(diff)
-print('min(Y)-max(Y):', min(y_test), max(y_test))
+print("min(Y)-max(Y):", min(y_test), max(y_test))
###########################
# Third attempt: use of double
@@ -113,22 +108,20 @@
# constant matrix such as the trained coefficients
# will be dumped as doubles and not as floats anymore.
-initial_type = [('X', DoubleTensorType([None, None]))]
-onx64 = convert_sklearn(gpr, initial_types=initial_type,
- target_opset=12)
+initial_type = [("X", DoubleTensorType([None, None]))]
+onx64 = convert_sklearn(gpr, initial_types=initial_type, target_opset=12)
sess64 = rt.InferenceSession(onx64.SerializeToString())
-pred_onx64 = sess64.run(None, {'X': X_test})[0]
+pred_onx64 = sess64.run(None, {"X": X_test})[0]
print(pred_onx64[0, :10])
################################
# The new differences look much better.
-diff = numpy.sort(numpy.abs(numpy.squeeze(pred_skl) -
- numpy.squeeze(pred_onx64)))[-5:]
+diff = numpy.sort(numpy.abs(numpy.squeeze(pred_skl) - numpy.squeeze(pred_onx64)))[-5:]
print(diff)
-print('min(Y)-max(Y):', min(y_test), max(y_test))
+print("min(Y)-max(Y):", min(y_test), max(y_test))
####################################
# Size increase
@@ -156,11 +149,12 @@
# That's done through the option mechanism
# (see :ref:`l-conv-options`).
-initial_type = [('X', DoubleTensorType([None, None]))]
-options = {GaussianProcessRegressor: {'return_std': True}}
+initial_type = [("X", DoubleTensorType([None, None]))]
+options = {GaussianProcessRegressor: {"return_std": True}}
try:
- onx64_std = convert_sklearn(gpr, initial_types=initial_type,
- options=options, target_opset=12)
+ onx64_std = convert_sklearn(
+ gpr, initial_types=initial_type, options=options, target_opset=12
+ )
except RuntimeError as e:
print(e)
@@ -171,11 +165,12 @@
# predict at least once and then converting again.
gpr.predict(X_test[:1], return_std=True)
-onx64_std = convert_sklearn(gpr, initial_types=initial_type,
- options=options, target_opset=12)
+onx64_std = convert_sklearn(
+ gpr, initial_types=initial_type, options=options, target_opset=12
+)
sess64_std = rt.InferenceSession(onx64_std.SerializeToString())
-pred_onx64_std = sess64_std.run(None, {'X': X_test[:5]})
+pred_onx64_std = sess64_std.run(None, {"X": X_test[:5]})
pprint.pprint(pred_onx64_std)
@@ -188,12 +183,13 @@
# It looks good. Let's do a better checks.
-pred_onx64_std = sess64_std.run(None, {'X': X_test})
+pred_onx64_std = sess64_std.run(None, {"X": X_test})
pred_std = gpr.predict(X_test, return_std=True)
-diff = numpy.sort(numpy.abs(numpy.squeeze(pred_onx64_std[1]) -
- numpy.squeeze(pred_std[1])))[-5:]
+diff = numpy.sort(
+ numpy.abs(numpy.squeeze(pred_onx64_std[1]) - numpy.squeeze(pred_std[1]))
+)[-5:]
print(diff)
#################################
diff --git a/docs/examples/plot_intermediate_outputs.py b/docs/examples/plot_intermediate_outputs.py
index deb5483ed..98937ce83 100644
--- a/docs/examples/plot_intermediate_outputs.py
+++ b/docs/examples/plot_intermediate_outputs.py
@@ -40,7 +40,10 @@
from skl2onnx import convert_sklearn
import pprint
from skl2onnx.common.data_types import (
- FloatTensorType, StringTensorType, Int64TensorType)
+ FloatTensorType,
+ StringTensorType,
+ Int64TensorType,
+)
import numpy as np
import pandas as pd
from sklearn.compose import ColumnTransformer
@@ -50,39 +53,49 @@
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
-titanic_url = ('https://raw.githubusercontent.com/amueller/'
- 'scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv')
+titanic_url = (
+ "https://raw.githubusercontent.com/amueller/"
+ "scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv"
+)
data = pd.read_csv(titanic_url)
-X = data.drop('survived', axis=1)
-y = data['survived']
+X = data.drop("survived", axis=1)
+y = data["survived"]
# SimpleImputer on string is not available
# for string in ONNX-ML specifications.
# So we do it beforehand.
-for cat in ['embarked', 'sex', 'pclass']:
- X[cat].fillna('missing', inplace=True)
+for cat in ["embarked", "sex", "pclass"]:
+ X[cat].fillna("missing", inplace=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
-numeric_features = ['age', 'fare']
-numeric_transformer = Pipeline(steps=[
- ('imputer', SimpleImputer(strategy='median')),
- ('scaler', StandardScaler())])
+numeric_features = ["age", "fare"]
+numeric_transformer = Pipeline(
+ steps=[("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())]
+)
-categorical_features = ['embarked', 'sex', 'pclass']
-categorical_transformer = Pipeline(steps=[
- # --- SimpleImputer is not available for strings in ONNX-ML specifications.
- # ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
- ('onehot', OneHotEncoder(handle_unknown='ignore'))])
+categorical_features = ["embarked", "sex", "pclass"]
+categorical_transformer = Pipeline(
+ steps=[
+ # --- SimpleImputer is not available for strings in ONNX-ML specifications.
+ # ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
+ ("onehot", OneHotEncoder(handle_unknown="ignore"))
+ ]
+)
preprocessor = ColumnTransformer(
transformers=[
- ('num', numeric_transformer, numeric_features),
- ('cat', categorical_transformer, categorical_features),
- ])
-
-clf = Pipeline(steps=[('preprocessor', preprocessor),
- ('classifier', LogisticRegression(solver='lbfgs'))])
+ ("num", numeric_transformer, numeric_features),
+ ("cat", categorical_transformer, categorical_features),
+ ]
+)
+
+clf = Pipeline(
+ steps=[
+ ("preprocessor", preprocessor),
+ ("classifier", LogisticRegression(solver="lbfgs")),
+ ]
+)
clf.fit(X_train, y_train)
@@ -104,9 +117,9 @@ def convert_dataframe_schema(df, drop=None):
for k, v in zip(df.columns, df.dtypes):
if drop is not None and k in drop:
continue
- if v == 'int64':
+ if v == "int64":
t = Int64TensorType([None, 1])
- elif v == 'float64':
+ elif v == "float64":
t = FloatTensorType([None, 1])
else:
t = StringTensorType([None, 1])
@@ -128,8 +141,7 @@ def convert_dataframe_schema(df, drop=None):
# ++++++++++++++++++++++++++++++
try:
- model_onnx = convert_sklearn(clf, 'pipeline_titanic', inputs,
- target_opset=12)
+ model_onnx = convert_sklearn(clf, "pipeline_titanic", inputs, target_opset=12)
except Exception as e:
print(e)
@@ -138,14 +150,13 @@ def convert_dataframe_schema(df, drop=None):
# *sklearn-onnx* does not. The ONNX version of *OneHotEncoder*
# must be applied on columns of the same type.
-X_train['pclass'] = X_train['pclass'].astype(str)
-X_test['pclass'] = X_test['pclass'].astype(str)
+X_train["pclass"] = X_train["pclass"].astype(str)
+X_test["pclass"] = X_test["pclass"].astype(str)
white_list = numeric_features + categorical_features
to_drop = [c for c in X_train.columns if c not in white_list]
inputs = convert_dataframe_schema(X_train, to_drop)
-model_onnx = convert_sklearn(clf, 'pipeline_titanic', inputs,
- target_opset=12)
+model_onnx = convert_sklearn(clf, "pipeline_titanic", inputs, target_opset=12)
# And save.
@@ -211,7 +222,7 @@ def convert_dataframe_schema(df, drop=None):
# and textual pipeline: *variable1*, *variable2*.
# Let's look into the numerical pipeline first.
-num_onnx = select_model_inputs_outputs(model_onnx, 'variable1')
+num_onnx = select_model_inputs_outputs(model_onnx, "variable1")
save_onnx_model(num_onnx, "pipeline_titanic_numerical.onnx")
################################
@@ -225,7 +236,7 @@ def convert_dataframe_schema(df, drop=None):
# We do the same for the textual features.
print(model_onnx)
-text_onnx = select_model_inputs_outputs(model_onnx, 'variable2')
+text_onnx = select_model_inputs_outputs(model_onnx, "variable2")
save_onnx_model(text_onnx, "pipeline_titanic_textual.onnx")
sess = rt.InferenceSession("pipeline_titanic_textual.onnx")
numT = sess.run(None, inputs)
@@ -238,33 +249,41 @@ def convert_dataframe_schema(df, drop=None):
# Finally, let's see both subgraphs. First, numerical pipeline.
pydot_graph = GetPydotGraph(
- num_onnx.graph, name=num_onnx.graph.name, rankdir="TB",
+ num_onnx.graph,
+ name=num_onnx.graph.name,
+ rankdir="TB",
node_producer=GetOpNodeProducer(
- "docstring", color="yellow", fillcolor="yellow", style="filled"))
+ "docstring", color="yellow", fillcolor="yellow", style="filled"
+ ),
+)
pydot_graph.write_dot("pipeline_titanic_num.dot")
-os.system('dot -O -Gdpi=300 -Tpng pipeline_titanic_num.dot')
+os.system("dot -O -Gdpi=300 -Tpng pipeline_titanic_num.dot")
image = plt.imread("pipeline_titanic_num.dot.png")
fig, ax = plt.subplots(figsize=(40, 20))
ax.imshow(image)
-ax.axis('off')
+ax.axis("off")
######################################
# Then textual pipeline.
pydot_graph = GetPydotGraph(
- text_onnx.graph, name=text_onnx.graph.name, rankdir="TB",
+ text_onnx.graph,
+ name=text_onnx.graph.name,
+ rankdir="TB",
node_producer=GetOpNodeProducer(
- "docstring", color="yellow", fillcolor="yellow", style="filled"))
+ "docstring", color="yellow", fillcolor="yellow", style="filled"
+ ),
+)
pydot_graph.write_dot("pipeline_titanic_text.dot")
-os.system('dot -O -Gdpi=300 -Tpng pipeline_titanic_text.dot')
+os.system("dot -O -Gdpi=300 -Tpng pipeline_titanic_text.dot")
image = plt.imread("pipeline_titanic_text.dot.png")
fig, ax = plt.subplots(figsize=(40, 20))
ax.imshow(image)
-ax.axis('off')
+ax.axis("off")
#################################
# **Versions used for this example**
diff --git a/docs/examples/plot_investigate_pipeline.py b/docs/examples/plot_investigate_pipeline.py
index 51fc56afc..b131d4fb8 100644
--- a/docs/examples/plot_investigate_pipeline.py
+++ b/docs/examples/plot_investigate_pipeline.py
@@ -39,8 +39,7 @@
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
-pipe = Pipeline(steps=[('pca', PCA()),
- ('logistic', LogisticRegression())])
+pipe = Pipeline(steps=[("pca", PCA()), ("logistic", LogisticRegression())])
digits = datasets.load_digits()
X_digits = digits.data[:1000]
@@ -53,14 +52,13 @@
# ++++++++++++++++++
-initial_types = [('input', FloatTensorType((None, X_digits.shape[1])))]
-model_onnx = convert_sklearn(pipe, initial_types=initial_types,
- target_opset=12)
+initial_types = [("input", FloatTensorType((None, X_digits.shape[1])))]
+model_onnx = convert_sklearn(pipe, initial_types=initial_types, target_opset=12)
sess = rt.InferenceSession(model_onnx.SerializeToString())
print("skl predict_proba")
print(pipe.predict_proba(X_digits[:2]))
-onx_pred = sess.run(None, {'input': X_digits[:2].astype(np.float32)})[1]
+onx_pred = sess.run(None, {"input": X_digits[:2].astype(np.float32)})[1]
df = pd.DataFrame(onx_pred)
print("onnx predict_proba")
print(df.values)
@@ -76,19 +74,18 @@
# an smaller ONNX graph for every operator.
-steps = collect_intermediate_steps(pipe, "pipeline",
- initial_types)
+steps = collect_intermediate_steps(pipe, "pipeline", initial_types)
assert len(steps) == 2
pipe.predict_proba(X_digits[:2])
for i, step in enumerate(steps):
- onnx_step = step['onnx_step']
+ onnx_step = step["onnx_step"]
sess = rt.InferenceSession(onnx_step.SerializeToString())
- onnx_outputs = sess.run(None, {'input': X_digits[:2].astype(np.float32)})
- skl_outputs = step['model']._debug.outputs
- print("step 1", type(step['model']))
+ onnx_outputs = sess.run(None, {"input": X_digits[:2].astype(np.float32)})
+ skl_outputs = step["model"]._debug.outputs
+ print("step 1", type(step["model"]))
print("skl outputs")
print(skl_outputs)
print("onnx outputs")
@@ -104,21 +101,21 @@
# needed to *replay* the prediction of the model.
to_save = {
- 'model': steps[1]['model'],
- 'data_input': steps[1]['model']._debug.inputs,
- 'data_output': steps[1]['model']._debug.outputs,
- 'inputs': steps[1]['inputs'],
- 'outputs': steps[1]['outputs'],
+ "model": steps[1]["model"],
+ "data_input": steps[1]["model"]._debug.inputs,
+ "data_output": steps[1]["model"]._debug.outputs,
+ "inputs": steps[1]["inputs"],
+ "outputs": steps[1]["outputs"],
}
-del steps[1]['model']._debug
+del steps[1]["model"]._debug
-with open('classifier.pkl', 'wb') as f:
+with open("classifier.pkl", "wb") as f:
pickle.dump(to_save, f)
-with open('classifier.pkl', 'rb') as f:
+with open("classifier.pkl", "rb") as f:
restored = pickle.load(f)
-print(restored['model'].predict_proba(restored['data_input']['predict_proba']))
+print(restored["model"].predict_proba(restored["data_input"]["predict_proba"]))
#################################
# **Versions used for this example**
diff --git a/docs/examples/plot_logging.py b/docs/examples/plot_logging.py
index fcaad1000..724d35657 100644
--- a/docs/examples/plot_logging.py
+++ b/docs/examples/plot_logging.py
@@ -42,16 +42,14 @@
# Convert a model into ONNX
# +++++++++++++++++++++++++
-initial_type = [('float_input', FloatTensorType([None, 4]))]
-onx = convert_sklearn(clr, initial_types=initial_type,
- target_opset=12)
+initial_type = [("float_input", FloatTensorType([None, 4]))]
+onx = convert_sklearn(clr, initial_types=initial_type, target_opset=12)
sess = rt.InferenceSession(onx.SerializeToString())
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
-pred_onx = sess.run([label_name],
- {input_name: X_test.astype(numpy.float32)})[0]
+pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0]
print(pred_onx)
########################################
@@ -74,7 +72,7 @@
# This information may be useful when a custom converter is being
# implemented.
-logger = logging.getLogger('skl2onnx')
+logger = logging.getLogger("skl2onnx")
logger.setLevel(logging.DEBUG)
logging.basicConfig(level=logging.DEBUG)
diff --git a/docs/examples/plot_nmf.py b/docs/examples/plot_nmf.py
index a6aec7d9a..4b9be8605 100644
--- a/docs/examples/plot_nmf.py
+++ b/docs/examples/plot_nmf.py
@@ -32,15 +32,16 @@
import matplotlib.pyplot as plt
from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer
import onnx
-from skl2onnx.algebra.onnx_ops import (
- OnnxArrayFeatureExtractor, OnnxMul, OnnxReduceSum)
+from skl2onnx.algebra.onnx_ops import OnnxArrayFeatureExtractor, OnnxMul, OnnxReduceSum
from skl2onnx.common.data_types import FloatTensorType
from onnxruntime import InferenceSession
-mat = np.array([[1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0],
- [1, 0, 0, 0], [1, 0, 0, 0]], dtype=np.float64)
-mat[:mat.shape[1], :] += np.identity(mat.shape[1])
+mat = np.array(
+ [[1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0]],
+ dtype=np.float64,
+)
+mat[: mat.shape[1], :] += np.identity(mat.shape[1])
mod = NMF(n_components=2)
W = mod.fit_transform(mat)
@@ -93,20 +94,20 @@ def nmf_to_onnx(W, H, op_version=12):
and returns the predictions for it. It assumes
these indices applies on the training data.
"""
- col = OnnxArrayFeatureExtractor(H, 'col')
- row = OnnxArrayFeatureExtractor(W.T, 'row')
+ col = OnnxArrayFeatureExtractor(H, "col")
+ row = OnnxArrayFeatureExtractor(W.T, "row")
dot = OnnxMul(col, row, op_version=op_version)
res = OnnxReduceSum(dot, output_names="rec", op_version=op_version)
indices_type = np.array([0], dtype=np.int64)
- onx = res.to_onnx(inputs={'col': indices_type,
- 'row': indices_type},
- outputs=[('rec', FloatTensorType((None, 1)))],
- target_opset=op_version)
+ onx = res.to_onnx(
+ inputs={"col": indices_type, "row": indices_type},
+ outputs=[("rec", FloatTensorType((None, 1)))],
+ target_opset=op_version,
+ )
return onx
-model_onnx = nmf_to_onnx(W.astype(np.float32),
- H.astype(np.float32))
+model_onnx = nmf_to_onnx(W.astype(np.float32), H.astype(np.float32))
print(model_onnx)
########################################
@@ -116,9 +117,7 @@ def nmf_to_onnx(W, H, op_version=12):
def predict_onnx(sess, row_indices, col_indices):
- res = sess.run(None,
- {'col': col_indices,
- 'row': row_indices})
+ res = sess.run(None, {"col": col_indices, "row": row_indices})
return res
@@ -136,13 +135,16 @@ def predict_onnx(sess, row_indices, col_indices):
###################################
# The ONNX graph looks like the following.
pydot_graph = GetPydotGraph(
- model_onnx.graph, name=model_onnx.graph.name,
- rankdir="TB", node_producer=GetOpNodeProducer("docstring"))
+ model_onnx.graph,
+ name=model_onnx.graph.name,
+ rankdir="TB",
+ node_producer=GetOpNodeProducer("docstring"),
+)
pydot_graph.write_dot("graph_nmf.dot")
-os.system('dot -O -Tpng graph_nmf.dot')
+os.system("dot -O -Tpng graph_nmf.dot")
image = plt.imread("graph_nmf.dot.png")
plt.imshow(image)
-plt.axis('off')
+plt.axis("off")
#################################
# **Versions used for this example**
diff --git a/docs/examples/plot_onnx_operators.py b/docs/examples/plot_onnx_operators.py
index 234384fbf..9a72fd884 100644
--- a/docs/examples/plot_onnx_operators.py
+++ b/docs/examples/plot_onnx_operators.py
@@ -46,17 +46,17 @@
from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer
# Create one input (ValueInfoProto)
-X = helper.make_tensor_value_info('X', TensorProto.FLOAT, [None, 2])
+X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [None, 2])
# Create one output (ValueInfoProto)
-Y = helper.make_tensor_value_info('Y', TensorProto.FLOAT, [None, 4])
+Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [None, 4])
# Create a node (NodeProto)
node_def = helper.make_node(
- 'Pad', # node name
- ['X'], # inputs
- ['Y'], # outputs
- mode='constant', # attributes
+ "Pad", # node name
+ ["X"], # inputs
+ ["Y"], # outputs
+ mode="constant", # attributes
value=1.5,
pads=[0, 1, 0, 1],
)
@@ -64,18 +64,18 @@
# Create the graph (GraphProto)
graph_def = helper.make_graph(
[node_def],
- 'test-model',
+ "test-model",
[X],
[Y],
)
# Create the model (ModelProto)
-model_def = helper.make_model(graph_def, producer_name='onnx-example')
+model_def = helper.make_model(graph_def, producer_name="onnx-example")
model_def.opset_import[0].version = 10
-print('The model is:\n{}'.format(model_def))
+print("The model is:\n{}".format(model_def))
onnx.checker.check_model(model_def)
-print('The model is checked!')
+print("The model is checked!")
#####################################
# Same example with sklearn-onnx
@@ -87,19 +87,24 @@
from skl2onnx.algebra.onnx_ops import OnnxPad # noqa
-pad = OnnxPad('X', output_names=['Y'], mode='constant', value=1.5,
- pads=[0, 1, 0, 1], op_version=10)
-model_def = pad.to_onnx({'X': X}, target_opset=10)
+pad = OnnxPad(
+ "X",
+ output_names=["Y"],
+ mode="constant",
+ value=1.5,
+ pads=[0, 1, 0, 1],
+ op_version=10,
+)
+model_def = pad.to_onnx({"X": X}, target_opset=10)
-print('The model is:\n{}'.format(model_def))
+print("The model is:\n{}".format(model_def))
onnx.checker.check_model(model_def)
-print('The model is checked!')
+print("The model is checked!")
####################################
# Inputs and outputs can also be skipped.
-pad = OnnxPad(mode='constant', value=1.5,
- pads=[0, 1, 0, 1], op_version=10)
+pad = OnnxPad(mode="constant", value=1.5, pads=[0, 1, 0, 1], op_version=10)
model_def = pad.to_onnx({pad.inputs[0].name: X}, target_opset=10)
onnx.checker.check_model(model_def)
@@ -112,17 +117,17 @@
# Preprocessing: create a model with two nodes, Y's shape is unknown
-node1 = helper.make_node('Transpose', ['X'], ['Y'], perm=[1, 0, 2])
-node2 = helper.make_node('Transpose', ['Y'], ['Z'], perm=[1, 0, 2])
+node1 = helper.make_node("Transpose", ["X"], ["Y"], perm=[1, 0, 2])
+node2 = helper.make_node("Transpose", ["Y"], ["Z"], perm=[1, 0, 2])
graph = helper.make_graph(
[node1, node2],
- 'two-transposes',
- [helper.make_tensor_value_info('X', TensorProto.FLOAT, (2, 3, 4))],
- [helper.make_tensor_value_info('Z', TensorProto.FLOAT, (2, 3, 4))],
+ "two-transposes",
+ [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3, 4))],
+ [helper.make_tensor_value_info("Z", TensorProto.FLOAT, (2, 3, 4))],
)
-original_model = helper.make_model(graph, producer_name='onnx-examples')
+original_model = helper.make_model(graph, producer_name="onnx-examples")
# Check the model and print Y's shape information
onnx.checker.check_model(original_model)
@@ -133,12 +138,12 @@
from skl2onnx.algebra.onnx_ops import OnnxTranspose # noqa
node = OnnxTranspose(
- OnnxTranspose('X', perm=[1, 0, 2], op_version=12),
- perm=[1, 0, 2], op_version=12)
+ OnnxTranspose("X", perm=[1, 0, 2], op_version=12), perm=[1, 0, 2], op_version=12
+)
X = np.arange(2 * 3 * 4).reshape((2, 3, 4)).astype(np.float32)
# numpy arrays are good enough to define the input shape
-model_def = node.to_onnx({'X': X}, target_opset=12)
+model_def = node.to_onnx({"X": X}, target_opset=12)
onnx.checker.check_model(model_def)
######################################
@@ -147,6 +152,7 @@
def predict_with_onnxruntime(model_def, *inputs):
import onnxruntime as ort
+
sess = ort.InferenceSession(model_def.SerializeToString())
names = [i.name for i in sess.get_inputs()]
dinputs = {name: input for name, input in zip(names, inputs)}
@@ -163,25 +169,31 @@ def predict_with_onnxruntime(model_def, *inputs):
# ++++++++++++++++++++++
pydot_graph = GetPydotGraph(
- model_def.graph, name=model_def.graph.name, rankdir="TB",
- node_producer=GetOpNodeProducer("docstring", color="yellow",
- fillcolor="yellow", style="filled"))
+ model_def.graph,
+ name=model_def.graph.name,
+ rankdir="TB",
+ node_producer=GetOpNodeProducer(
+ "docstring", color="yellow", fillcolor="yellow", style="filled"
+ ),
+)
pydot_graph.write_dot("pipeline_transpose2x.dot")
-os.system('dot -O -Gdpi=300 -Tpng pipeline_transpose2x.dot')
+os.system("dot -O -Gdpi=300 -Tpng pipeline_transpose2x.dot")
image = plt.imread("pipeline_transpose2x.dot.png")
fig, ax = plt.subplots(figsize=(40, 20))
ax.imshow(image)
-ax.axis('off')
+ax.axis("off")
#################################
# **Versions used for this example**
import sklearn # noqa
+
print("numpy:", numpy.__version__)
print("scikit-learn:", sklearn.__version__)
import skl2onnx # noqa
+
print("onnx: ", onnx.__version__)
print("onnxruntime: ", onnxruntime.__version__)
print("skl2onnx: ", skl2onnx.__version__)
diff --git a/docs/examples/plot_pipeline.py b/docs/examples/plot_pipeline.py
index 7c5475005..6eae7ed14 100644
--- a/docs/examples/plot_pipeline.py
+++ b/docs/examples/plot_pipeline.py
@@ -29,13 +29,14 @@
from skl2onnx.algebra.onnx_ops import OnnxAdd, OnnxMul
onnx_fct = OnnxAdd(
- OnnxMul('X', numpy.array([2], dtype=numpy.float32),
- op_version=12),
+ OnnxMul("X", numpy.array([2], dtype=numpy.float32), op_version=12),
numpy.array([[1, 0], [0, 1]], dtype=numpy.float32),
- output_names=['Y'], op_version=12)
+ output_names=["Y"],
+ op_version=12,
+)
X = numpy.array([[4, 5], [-2, 3]], dtype=numpy.float32)
-model = onnx_fct.to_onnx({'X': X}, target_opset=12)
+model = onnx_fct.to_onnx({"X": X}, target_opset=12)
print(model)
filename = "example1.onnx"
@@ -54,25 +55,29 @@
model = ModelProto()
-with open(filename, 'rb') as fid:
+with open(filename, "rb") as fid:
content = fid.read()
model.ParseFromString(content)
###################################
# We convert it into a graph.
-pydot_graph = GetPydotGraph(model.graph, name=model.graph.name, rankdir="TB",
- node_producer=GetOpNodeProducer("docstring"))
+pydot_graph = GetPydotGraph(
+ model.graph,
+ name=model.graph.name,
+ rankdir="TB",
+ node_producer=GetOpNodeProducer("docstring"),
+)
pydot_graph.write_dot("graph.dot")
#######################################
# Then into an image
-os.system('dot -O -Tpng graph.dot')
+os.system("dot -O -Tpng graph.dot")
################################
# Which we display...
image = plt.imread("graph.dot.png")
plt.imshow(image)
-plt.axis('off')
+plt.axis("off")
#################################
# **Versions used for this example**
diff --git a/docs/examples/plot_pipeline_lightgbm.py b/docs/examples/plot_pipeline_lightgbm.py
index a57bca298..fb584e5a1 100644
--- a/docs/examples/plot_pipeline_lightgbm.py
+++ b/docs/examples/plot_pipeline_lightgbm.py
@@ -30,8 +30,12 @@
import onnxruntime as rt
from onnxruntime.capi.onnxruntime_pybind11_state import Fail as OrtFail
from skl2onnx import convert_sklearn, update_registered_converter
-from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes # noqa
-from onnxmltools.convert.lightgbm.operator_converters.LightGbm import convert_lightgbm # noqa
+from skl2onnx.common.shape_calculator import (
+ calculate_linear_classifier_output_shapes,
+) # noqa
+from onnxmltools.convert.lightgbm.operator_converters.LightGbm import (
+ convert_lightgbm,
+) # noqa
import onnxmltools.convert.common.data_types
from skl2onnx.common.data_types import FloatTensorType
import numpy
@@ -49,8 +53,9 @@
X = X[ind, :].copy()
y = y[ind].copy()
-pipe = Pipeline([('scaler', StandardScaler()),
- ('lgbm', LGBMClassifier(n_estimators=3))])
+pipe = Pipeline(
+ [("scaler", StandardScaler()), ("lgbm", LGBMClassifier(n_estimators=3))]
+)
pipe.fit(X, y)
######################################
@@ -72,18 +77,23 @@
###########################
# Let's register the new converter.
update_registered_converter(
- LGBMClassifier, 'LightGbmLGBMClassifier',
- calculate_linear_classifier_output_shapes, convert_lightgbm,
- options={'nocl': [True, False], 'zipmap': [True, False, 'columns']})
+ LGBMClassifier,
+ "LightGbmLGBMClassifier",
+ calculate_linear_classifier_output_shapes,
+ convert_lightgbm,
+ options={"nocl": [True, False], "zipmap": [True, False, "columns"]},
+)
##################################
# Convert again
# +++++++++++++
model_onnx = convert_sklearn(
- pipe, 'pipeline_lightgbm',
- [('input', FloatTensorType([None, 2]))],
- target_opset={'': 12, 'ai.onnx.ml': 2})
+ pipe,
+ "pipeline_lightgbm",
+ [("input", FloatTensorType([None, 2]))],
+ target_opset={"": 12, "ai.onnx.ml": 2},
+)
# And save.
with open("pipeline_lightgbm.onnx", "wb") as f:
@@ -118,18 +128,21 @@
# ++++++++++++++++++++++
pydot_graph = GetPydotGraph(
- model_onnx.graph, name=model_onnx.graph.name, rankdir="TB",
+ model_onnx.graph,
+ name=model_onnx.graph.name,
+ rankdir="TB",
node_producer=GetOpNodeProducer(
- "docstring", color="yellow",
- fillcolor="yellow", style="filled"))
+ "docstring", color="yellow", fillcolor="yellow", style="filled"
+ ),
+)
pydot_graph.write_dot("pipeline.dot")
-os.system('dot -O -Gdpi=300 -Tpng pipeline.dot')
+os.system("dot -O -Gdpi=300 -Tpng pipeline.dot")
image = plt.imread("pipeline.dot.png")
fig, ax = plt.subplots(figsize=(40, 20))
ax.imshow(image)
-ax.axis('off')
+ax.axis("off")
#################################
# **Versions used for this example**
diff --git a/docs/examples/plot_pipeline_xgboost.py b/docs/examples/plot_pipeline_xgboost.py
index 5de606c31..3feaa6d87 100644
--- a/docs/examples/plot_pipeline_xgboost.py
+++ b/docs/examples/plot_pipeline_xgboost.py
@@ -34,9 +34,13 @@
import skl2onnx
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import convert_sklearn, update_registered_converter
-from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes # noqa
+from skl2onnx.common.shape_calculator import (
+ calculate_linear_classifier_output_shapes,
+) # noqa
import onnxmltools
-from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost # noqa
+from onnxmltools.convert.xgboost.operator_converters.XGBoost import (
+ convert_xgboost,
+) # noqa
import onnxmltools.convert.common.data_types
data = load_iris()
@@ -48,16 +52,18 @@
X = X[ind, :].copy()
y = y[ind].copy()
-pipe = Pipeline([('scaler', StandardScaler()),
- ('lgbm', XGBClassifier(n_estimators=3))])
+pipe = Pipeline([("scaler", StandardScaler()), ("lgbm", XGBClassifier(n_estimators=3))])
pipe.fit(X, y)
# The conversion fails but it is expected.
try:
- convert_sklearn(pipe, 'pipeline_xgboost',
- [('input', FloatTensorType([None, 2]))],
- target_opset={'': 12, 'ai.onnx.ml': 2})
+ convert_sklearn(
+ pipe,
+ "pipeline_xgboost",
+ [("input", FloatTensorType([None, 2]))],
+ target_opset={"": 12, "ai.onnx.ml": 2},
+ )
except Exception as e:
print(e)
@@ -88,18 +94,23 @@
###########################
# Let's register the new converter.
update_registered_converter(
- XGBClassifier, 'XGBoostXGBClassifier',
- calculate_linear_classifier_output_shapes, convert_xgboost,
- options={'nocl': [True, False], 'zipmap': [True, False, 'columns']})
+ XGBClassifier,
+ "XGBoostXGBClassifier",
+ calculate_linear_classifier_output_shapes,
+ convert_xgboost,
+ options={"nocl": [True, False], "zipmap": [True, False, "columns"]},
+)
##################################
# Convert again
# +++++++++++++
model_onnx = convert_sklearn(
- pipe, 'pipeline_xgboost',
- [('input', FloatTensorType([None, 2]))],
- target_opset={'': 12, 'ai.onnx.ml': 2})
+ pipe,
+ "pipeline_xgboost",
+ [("input", FloatTensorType([None, 2]))],
+ target_opset={"": 12, "ai.onnx.ml": 2},
+)
# And save.
with open("pipeline_xgboost.onnx", "wb") as f:
@@ -127,18 +138,21 @@
# ++++++++++++++++++++++
pydot_graph = GetPydotGraph(
- model_onnx.graph, name=model_onnx.graph.name, rankdir="TB",
+ model_onnx.graph,
+ name=model_onnx.graph.name,
+ rankdir="TB",
node_producer=GetOpNodeProducer(
- "docstring", color="yellow",
- fillcolor="yellow", style="filled"))
+ "docstring", color="yellow", fillcolor="yellow", style="filled"
+ ),
+)
pydot_graph.write_dot("pipeline.dot")
-os.system('dot -O -Gdpi=300 -Tpng pipeline.dot')
+os.system("dot -O -Gdpi=300 -Tpng pipeline.dot")
image = plt.imread("pipeline.dot.png")
fig, ax = plt.subplots(figsize=(40, 20))
ax.imshow(image)
-ax.axis('off')
+ax.axis("off")
#################################
# **Versions used for this example**
diff --git a/docs/examples/plot_tfidfvectorizer.py b/docs/examples/plot_tfidfvectorizer.py
index b33b6765b..96321bfbf 100644
--- a/docs/examples/plot_tfidfvectorizer.py
+++ b/docs/examples/plot_tfidfvectorizer.py
@@ -32,13 +32,18 @@
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.datasets import fetch_20newsgroups
+
try:
from sklearn.datasets._twenty_newsgroups import (
- strip_newsgroup_footer, strip_newsgroup_quoting)
+ strip_newsgroup_footer,
+ strip_newsgroup_quoting,
+ )
except ImportError:
# scikit-learn < 0.24
from sklearn.datasets.twenty_newsgroups import (
- strip_newsgroup_footer, strip_newsgroup_quoting)
+ strip_newsgroup_footer,
+ strip_newsgroup_quoting,
+ )
from sklearn.decomposition import TruncatedSVD
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
@@ -48,15 +53,17 @@
# limit the list of categories to make running this example faster.
-categories = ['alt.atheism', 'talk.religion.misc']
-train = fetch_20newsgroups(random_state=1,
- subset='train',
- categories=categories,
- )
-test = fetch_20newsgroups(random_state=1,
- subset='test',
- categories=categories,
- )
+categories = ["alt.atheism", "talk.religion.misc"]
+train = fetch_20newsgroups(
+ random_state=1,
+ subset="train",
+ categories=categories,
+)
+test = fetch_20newsgroups(
+ random_state=1,
+ subset="test",
+ categories=categories,
+)
##############################
# The first transform extract two fields from the data.
@@ -78,16 +85,16 @@ def transform(self, posts):
# first column = 'subject' and second column = 'body'
features = np.empty(shape=(len(posts), 2), dtype=object)
for i, text in enumerate(posts):
- headers, _, bod = text.partition('\n\n')
+ headers, _, bod = text.partition("\n\n")
bod = strip_newsgroup_footer(bod)
bod = strip_newsgroup_quoting(bod)
features[i, 1] = bod
- prefix = 'Subject:'
- sub = ''
- for line in headers.split('\n'):
+ prefix = "Subject:"
+ sub = ""
+ for line in headers.split("\n"):
if line.startswith(prefix):
- sub = line[len(prefix):]
+ sub = line[len(prefix) :]
break
features[i, 0] = sub
@@ -101,35 +108,42 @@ def transform(self, posts):
# The pipeline is almost the same except
# we remove the custom features.
-pipeline = Pipeline([
- ('union', ColumnTransformer(
- [
- ('subject', TfidfVectorizer(min_df=50, max_features=500), 0),
-
- ('body_bow', Pipeline([
- ('tfidf', TfidfVectorizer()),
- ('best', TruncatedSVD(n_components=50)),
- ]), 1),
-
- # Removed from the original example as
- # it requires a custom converter.
- # ('body_stats', Pipeline([
- # ('stats', TextStats()), # returns a list of dicts
- # ('vect', DictVectorizer()), # list of dicts -> feature matrix
- # ]), 1),
- ],
-
- transformer_weights={
- 'subject': 0.8,
- 'body_bow': 0.5,
- # 'body_stats': 1.0,
- }
- )),
-
- # Use a LogisticRegression classifier on the combined features.
- # Instead of LinearSVC (not fully ready in onnxruntime).
- ('logreg', LogisticRegression()),
-])
+pipeline = Pipeline(
+ [
+ (
+ "union",
+ ColumnTransformer(
+ [
+ ("subject", TfidfVectorizer(min_df=50, max_features=500), 0),
+ (
+ "body_bow",
+ Pipeline(
+ [
+ ("tfidf", TfidfVectorizer()),
+ ("best", TruncatedSVD(n_components=50)),
+ ]
+ ),
+ 1,
+ ),
+ # Removed from the original example as
+ # it requires a custom converter.
+ # ('body_stats', Pipeline([
+ # ('stats', TextStats()), # returns a list of dicts
+ # ('vect', DictVectorizer()), # list of dicts -> feature matrix
+ # ]), 1),
+ ],
+ transformer_weights={
+ "subject": 0.8,
+ "body_bow": 0.5,
+ # 'body_stats': 1.0,
+ },
+ ),
+ ),
+ # Use a LogisticRegression classifier on the combined features.
+ # Instead of LinearSVC (not fully ready in onnxruntime).
+ ("logreg", LogisticRegression()),
+ ]
+)
pipeline.fit(train_data, train.target)
print(classification_report(pipeline.predict(test_data), test.target))
@@ -149,16 +163,32 @@ def transform(self, posts):
seps = {
TfidfVectorizer: {
"separators": [
- ' ', '.', '\\?', ',', ';', ':', '!',
- '\\(', '\\)', '\n', '"', "'",
- "-", "\\[", "\\]", "@"
+ " ",
+ ".",
+ "\\?",
+ ",",
+ ";",
+ ":",
+ "!",
+ "\\(",
+ "\\)",
+ "\n",
+ '"',
+ "'",
+ "-",
+ "\\[",
+ "\\]",
+ "@",
]
}
}
model_onnx = convert_sklearn(
- pipeline, "tfidf",
+ pipeline,
+ "tfidf",
initial_types=[("input", StringTensorType([None, 2]))],
- options=seps, target_opset=12)
+ options=seps,
+ target_opset=12,
+)
#################################
# And save.
@@ -169,8 +199,8 @@ def transform(self, posts):
# Predictions with onnxruntime.
sess = rt.InferenceSession("pipeline_tfidf.onnx")
-print('---', train_data[0])
-inputs = {'input': train_data[:1]}
+print("---", train_data[0])
+inputs = {"input": train_data[:1]}
pred_onx = sess.run(None, inputs)
print("predict", pred_onx[0])
print("predict_proba", pred_onx[1])
@@ -192,16 +222,18 @@ def transform(self, posts):
# Finally, let's see the graph converted with *sklearn-onnx*.
pydot_graph = GetPydotGraph(
- model_onnx.graph, name=model_onnx.graph.name,
- rankdir="TB", node_producer=GetOpNodeProducer("docstring",
- color="yellow",
- fillcolor="yellow",
- style="filled"))
+ model_onnx.graph,
+ name=model_onnx.graph.name,
+ rankdir="TB",
+ node_producer=GetOpNodeProducer(
+ "docstring", color="yellow", fillcolor="yellow", style="filled"
+ ),
+)
pydot_graph.write_dot("pipeline_tfidf.dot")
-os.system('dot -O -Gdpi=300 -Tpng pipeline_tfidf.dot')
+os.system("dot -O -Gdpi=300 -Tpng pipeline_tfidf.dot")
image = plt.imread("pipeline_tfidf.dot.png")
fig, ax = plt.subplots(figsize=(40, 20))
ax.imshow(image)
-ax.axis('off')
+ax.axis("off")
diff --git a/docs/exts/github_link.py b/docs/exts/github_link.py
index 5bd939f1f..9d87a68de 100644
--- a/docs/exts/github_link.py
+++ b/docs/exts/github_link.py
@@ -9,16 +9,16 @@
import sys
from functools import partial
-REVISION_CMD = 'git rev-parse --short HEAD'
+REVISION_CMD = "git rev-parse --short HEAD"
def _get_git_revision():
try:
revision = subprocess.check_output(REVISION_CMD.split()).strip()
except (subprocess.CalledProcessError, OSError):
- print('Failed to execute git to get revision')
+ print("Failed to execute git to get revision")
return None
- return revision.decode('utf-8')
+ return revision.decode("utf-8")
def _linkcode_resolve(domain, info, package, url_fmt, revision):
@@ -36,14 +36,14 @@ def _linkcode_resolve(domain, info, package, url_fmt, revision):
if revision is None:
return
- if domain not in ('py', 'pyx'):
+ if domain not in ("py", "pyx"):
return
- if not info.get('module') or not info.get('fullname'):
+ if not info.get("module") or not info.get("fullname"):
return
- class_name = info['fullname'].split('.')[0]
- module = __import__(info['module'], fromlist=[class_name])
- obj = attrgetter(info['fullname'])(module)
+ class_name = info["fullname"].split(".")[0]
+ module = __import__(info["module"], fromlist=[class_name])
+ obj = attrgetter(info["fullname"])(module)
# Unwrap the object to get the correct source
# file in case that is wrapped by a decorator
@@ -61,14 +61,12 @@ def _linkcode_resolve(domain, info, package, url_fmt, revision):
if not fn:
return
- fn = os.path.relpath(fn,
- start=os.path.dirname(__import__(package).__file__))
+ fn = os.path.relpath(fn, start=os.path.dirname(__import__(package).__file__))
try:
lineno = inspect.getsourcelines(obj)[1]
except Exception:
- lineno = ''
- return url_fmt.format(revision=revision, package=package,
- path=fn, lineno=lineno)
+ lineno = ""
+ return url_fmt.format(revision=revision, package=package, path=fn, lineno=lineno)
def make_linkcode_resolve(package, url_fmt):
@@ -80,5 +78,6 @@ def make_linkcode_resolve(package, url_fmt):
'{path}#L{lineno}')
"""
revision = _get_git_revision()
- return partial(_linkcode_resolve, revision=revision, package=package,
- url_fmt=url_fmt)
+ return partial(
+ _linkcode_resolve, revision=revision, package=package, url_fmt=url_fmt
+ )
diff --git a/docs/exts/sphinx_skl2onnx_extension.py b/docs/exts/sphinx_skl2onnx_extension.py
index e92aac08c..f7cf24cae 100644
--- a/docs/exts/sphinx_skl2onnx_extension.py
+++ b/docs/exts/sphinx_skl2onnx_extension.py
@@ -17,8 +17,9 @@
import onnxruntime
-def skl2onnx_version_role(role, rawtext, text, lineno, inliner,
- options=None, content=None):
+def skl2onnx_version_role(
+ role, rawtext, text, lineno, inliner, options=None, content=None
+):
"""
Defines custom role *skl2onnx-version* which returns
*skl2onnx* version.
@@ -27,14 +28,14 @@ def skl2onnx_version_role(role, rawtext, text, lineno, inliner,
options = {}
if content is None:
content = []
- if text == 'v':
- version = 'v' + skl2onnx.__version__
- elif text == 'rt':
- version = 'v' + onnxruntime.__version__
+ if text == "v":
+ version = "v" + skl2onnx.__version__
+ elif text == "rt":
+ version = "v" + onnxruntime.__version__
else:
raise RuntimeError(
- "skl2onnx_version_role cannot interpret content '{0}'."
- "".format(text))
+ "skl2onnx_version_role cannot interpret content '{0}'." "".format(text)
+ )
node = nodes.literal(version)
return [node], []
@@ -44,6 +45,7 @@ class SupportedSkl2OnnxDirective(Directive):
Automatically displays the list of models
*skl2onnx* can currently convert.
"""
+
required_arguments = False
optional_arguments = 0
final_argument_whitespace = True
@@ -57,7 +59,7 @@ def run(self):
for mod in models:
par = nodes.paragraph()
par += nodes.Text(mod)
- bullets += nodes.list_item('', par)
+ bullets += nodes.list_item("", par)
return ns
@@ -66,6 +68,7 @@ class SupportedOnnxOpsDirective(Directive):
Automatically displays the list of supported ONNX models
*skl2onnx* can use to build converters.
"""
+
required_arguments = False
optional_arguments = 0
final_argument_whitespace = True
@@ -92,10 +95,10 @@ def make_ref(name):
if i + cut * 2 < len(sorted_keys):
row.append(make_ref(sorted_keys[i + cut * 2]))
else:
- row.append('')
+ row.append("")
else:
- row.append('')
- row.append('')
+ row.append("")
+ row.append("")
table.append(row)
rst = tabulate(table, tablefmt="rst")
@@ -106,17 +109,16 @@ def make_ref(name):
nested_parse_with_titles(self.state, st, node)
main += node
- rows.append('')
+ rows.append("")
for name in sorted_keys:
rows = []
cl = cls[name]
- rows.append('.. _l-onnx-{}:'.format(cl.__name__))
- rows.append('')
+ rows.append(".. _l-onnx-{}:".format(cl.__name__))
+ rows.append("")
rows.append(cl.__name__)
- rows.append('=' * len(cl.__name__))
- rows.append('')
- rows.append(
- ".. autoclass:: skl2onnx.algebra.onnx_ops.{}".format(name))
+ rows.append("=" * len(cl.__name__))
+ rows.append("")
+ rows.append(".. autoclass:: skl2onnx.algebra.onnx_ops.{}".format(name))
st = StringList(rows)
node = nodes.container()
nested_parse_with_titles(self.state, st, node)
@@ -129,6 +131,7 @@ class SupportedSklearnOpsDirective(Directive):
"""
Automatically displays the list of available converters.
"""
+
required_arguments = False
optional_arguments = 0
final_argument_whitespace = True
@@ -155,10 +158,10 @@ def make_ref(name):
if i + cut * 2 < len(sorted_keys):
row.append(make_ref(sorted_keys[i + cut * 2]))
else:
- row.append('')
+ row.append("")
else:
- row.append('')
- row.append('')
+ row.append("")
+ row.append("")
table.append(row)
rst = tabulate(table, tablefmt="rst")
@@ -169,17 +172,16 @@ def make_ref(name):
nested_parse_with_titles(self.state, st, node)
main += node
- rows.append('')
+ rows.append("")
for name in sorted_keys:
rows = []
cl = cls[name]
- rows.append('.. _l-sklops-{}:'.format(cl.__name__))
- rows.append('')
+ rows.append(".. _l-sklops-{}:".format(cl.__name__))
+ rows.append("")
rows.append(cl.__name__)
- rows.append('=' * len(cl.__name__))
- rows.append('')
- rows.append(
- ".. autoclass:: skl2onnx.algebra.sklearn_ops.{}".format(name))
+ rows.append("=" * len(cl.__name__))
+ rows.append("")
+ rows.append(".. autoclass:: skl2onnx.algebra.sklearn_ops.{}".format(name))
st = StringList(rows)
node = nodes.container()
nested_parse_with_titles(self.state, st, node)
@@ -194,6 +196,7 @@ def missing_ops():
"""
from sklearn import __all__
from sklearn.base import BaseEstimator
+
found = []
for sub in __all__:
try:
@@ -209,11 +212,17 @@ def missing_ops():
issub = issubclass(cl, BaseEstimator)
except TypeError:
continue
- if cl.__name__ in {'Pipeline', 'ColumnTransformer',
- 'FeatureUnion', 'BaseEstimator'}:
+ if cl.__name__ in {
+ "Pipeline",
+ "ColumnTransformer",
+ "FeatureUnion",
+ "BaseEstimator",
+ }:
continue
- if (sub in {'calibration', 'dummy', 'manifold'} and
- 'Calibrated' not in cl.__name__):
+ if (
+ sub in {"calibration", "dummy", "manifold"}
+ and "Calibrated" not in cl.__name__
+ ):
continue
if issub:
found.append((cl.__name__, sub, cl))
@@ -226,6 +235,7 @@ class AllSklearnOpsDirective(Directive):
Displays the list of models implemented in scikit-learn
and whether or not there is an associated converter.
"""
+
required_arguments = False
optional_arguments = 0
final_argument_whitespace = True
@@ -234,12 +244,19 @@ class AllSklearnOpsDirective(Directive):
def run(self):
from sklearn import __version__ as skver
+
found = missing_ops()
nbconverters = 0
supported = set(build_sklearn_operator_name_map())
- rows = [".. list-table::", " :header-rows: 1",
- " :widths: 10 7 4",
- "", " * - Name", " - Package", " - Supported"]
+ rows = [
+ ".. list-table::",
+ " :header-rows: 1",
+ " :widths: 10 7 4",
+ "",
+ " * - Name",
+ " - Package",
+ " - Supported",
+ ]
for name, sub, cl in found:
rows.append(" * - " + name)
rows.append(" - " + sub)
@@ -251,8 +268,7 @@ def run(self):
rows.append("")
rows.append("scikit-learn's version is **{0}**.".format(skver))
- rows.append(
- "{0}/{1} models are covered.".format(nbconverters, len(found)))
+ rows.append("{0}/{1} models are covered.".format(nbconverters, len(found)))
node = nodes.container()
st = StringList(rows)
@@ -265,9 +281,9 @@ def run(self):
def setup(app):
# Placeholder to initialize the folder before
# generating the documentation.
- app.add_role('skl2onnxversion', skl2onnx_version_role)
- app.add_directive('supported-skl2onnx', SupportedSkl2OnnxDirective)
- app.add_directive('supported-onnx-ops', SupportedOnnxOpsDirective)
- app.add_directive('supported-sklearn-ops', SupportedSklearnOpsDirective)
- app.add_directive('covered-sklearn-ops', AllSklearnOpsDirective)
- return {'version': sphinx.__display_version__, 'parallel_read_safe': True}
+ app.add_role("skl2onnxversion", skl2onnx_version_role)
+ app.add_directive("supported-skl2onnx", SupportedSkl2OnnxDirective)
+ app.add_directive("supported-onnx-ops", SupportedOnnxOpsDirective)
+ app.add_directive("supported-sklearn-ops", SupportedSklearnOpsDirective)
+ app.add_directive("covered-sklearn-ops", AllSklearnOpsDirective)
+ return {"version": sphinx.__display_version__, "parallel_read_safe": True}
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 401b34eff..cf3e6427e 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -5,7 +5,7 @@ coverage
flake8
furo
joblib
-lightgbm
+lightgbm<4.0
loky
matplotlib
mlinsights>=0.3.631
diff --git a/docs/tests/test_documentation_examples.py b/docs/tests/test_documentation_examples.py
index 2b23b97fb..7c930431f 100644
--- a/docs/tests/test_documentation_examples.py
+++ b/docs/tests/test_documentation_examples.py
@@ -15,29 +15,26 @@
def import_source(module_file_path, module_name):
if not os.path.exists(module_file_path):
raise FileNotFoundError(module_file_path)
- module_spec = importlib.util.spec_from_file_location(
- module_name, module_file_path)
+ module_spec = importlib.util.spec_from_file_location(module_name, module_file_path)
if module_spec is None:
raise FileNotFoundError(
- "Unable to find '{}' in '{}'.".format(
- module_name, module_file_path))
+ "Unable to find '{}' in '{}'.".format(module_name, module_file_path)
+ )
module = importlib.util.module_from_spec(module_spec)
return module_spec.loader.exec_module(module)
class TestDocumentationExample(unittest.TestCase):
-
def test_documentation_examples(self):
-
this = os.path.abspath(os.path.dirname(__file__))
- fold = os.path.normpath(os.path.join(this, '..', 'examples'))
+ fold = os.path.normpath(os.path.join(this, "..", "examples"))
found = os.listdir(fold)
tested = 0
for name in found:
if name.startswith("plot_") and name.endswith(".py"):
- if (name == "plot_pipeline_lightgbm.py" and
- pv.Version(onnxruntime.__version__) <
- pv.Version('1.0.0')):
+ if name == "plot_pipeline_lightgbm.py" and pv.Version(
+ onnxruntime.__version__
+ ) < pv.Version("1.0.0"):
continue
print("run %r" % name)
try:
@@ -45,14 +42,14 @@ def test_documentation_examples(self):
assert mod is not None
except FileNotFoundError:
# try another way
- cmds = [sys.executable, "-u",
- os.path.join(fold, name)]
+ cmds = [sys.executable, "-u", os.path.join(fold, name)]
p = subprocess.Popen(
- cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+ )
res = p.communicate()
out, err = res
- st = err.decode('ascii', errors='ignore')
- if len(st) > 0 and 'Traceback' in st:
+ st = err.decode("ascii", errors="ignore")
+ if len(st) > 0 and "Traceback" in st:
if "No such file or directory: 'dot'" in st:
# dot not installed, this part
# is tested in onnx framework
@@ -61,13 +58,14 @@ def test_documentation_examples(self):
# dot not installed, this part
# is tested in onnx framework
pass
- elif ('Please fix either the inputs or '
- 'the model.') in st:
+ elif ("Please fix either the inputs or " "the model.") in st:
# onnxruntime datasets changed in master branch,
# still the same in released version on pypi
pass
- elif ('Current official support for domain ai.onnx '
- 'is till opset 12.') in st:
+ elif (
+ "Current official support for domain ai.onnx "
+ "is till opset 12."
+ ) in st:
# one example is using opset 13 but onnxruntime
# only support up to opset 12.
pass
@@ -78,7 +76,8 @@ def test_documentation_examples(self):
raise RuntimeError(
"Example '{}' (cmd: {} - exec_prefix='{}') "
"failed due to\n{}"
- "".format(name, cmds, sys.exec_prefix, st))
+ "".format(name, cmds, sys.exec_prefix, st)
+ )
tested += 1
if tested == 0:
raise RuntimeError("No example was tested.")
diff --git a/docs/tests/test_documentation_tutorial.py b/docs/tests/test_documentation_tutorial.py
index d607bf9a8..6e6f41c61 100644
--- a/docs/tests/test_documentation_tutorial.py
+++ b/docs/tests/test_documentation_tutorial.py
@@ -13,22 +13,19 @@
def import_source(module_file_path, module_name):
if not os.path.exists(module_file_path):
raise FileNotFoundError(module_file_path)
- module_spec = importlib.util.spec_from_file_location(
- module_name, module_file_path)
+ module_spec = importlib.util.spec_from_file_location(module_name, module_file_path)
if module_spec is None:
raise FileNotFoundError(
- "Unable to find '{}' in '{}'.".format(
- module_name, module_file_path))
+ "Unable to find '{}' in '{}'.".format(module_name, module_file_path)
+ )
module = importlib.util.module_from_spec(module_spec)
return module_spec.loader.exec_module(module)
class TestDocumentationTutorial(unittest.TestCase):
-
def test_documentation_tutorial(self):
-
this = os.path.abspath(os.path.dirname(__file__))
- fold = os.path.normpath(os.path.join(this, '..', 'tutorial'))
+ fold = os.path.normpath(os.path.join(this, "..", "tutorial"))
found = os.listdir(fold)
tested = 0
for name in found:
@@ -39,14 +36,14 @@ def test_documentation_tutorial(self):
assert mod is not None
except FileNotFoundError:
# try another way
- cmds = [sys.executable, "-u",
- os.path.join(fold, name)]
+ cmds = [sys.executable, "-u", os.path.join(fold, name)]
p = subprocess.Popen(
- cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+ )
res = p.communicate()
out, err = res
- st = err.decode('ascii', errors='ignore')
- if len(st) > 0 and 'Traceback' in st:
+ st = err.decode("ascii", errors="ignore")
+ if len(st) > 0 and "Traceback" in st:
if "No such file or directory: 'dot'" in st:
# dot not installed, this part
# is tested in onnx framework
@@ -55,26 +52,30 @@ def test_documentation_tutorial(self):
# dot not installed, this part
# is tested in onnx framework
pass
- elif ("cannot import name 'LightGbmModelContainer' "
- "from 'onnxmltools.convert.common."
- "_container'") in st:
+ elif (
+ "cannot import name 'LightGbmModelContainer' "
+ "from 'onnxmltools.convert.common."
+ "_container'"
+ ) in st:
# onnxmltools not recent enough
pass
- elif ('Please fix either the inputs or '
- 'the model.') in st:
+ elif ("Please fix either the inputs or " "the model.") in st:
# onnxruntime datasets changed in master branch,
# still the same in released version on pypi
pass
- elif ('Current official support for domain ai.onnx '
- 'is till opset 12.') in st:
+ elif (
+ "Current official support for domain ai.onnx "
+ "is till opset 12."
+ ) in st:
# one example is using opset 13 but onnxruntime
# only support up to opset 12.
pass
elif "'str' object has no attribute 'decode'" in st:
# unstable bug in scikit-learn<0.24
pass
- elif ("This method should be overwritten for "
- "operator") in st:
+ elif (
+ "This method should be overwritten for " "operator"
+ ) in st:
# raised by old version of packages
# used in the documentation
pass
@@ -82,7 +83,8 @@ def test_documentation_tutorial(self):
raise RuntimeError(
"Example '{}' (cmd: {} - exec_prefix='{}') "
"failed due to\n{}"
- "".format(name, cmds, sys.exec_prefix, st))
+ "".format(name, cmds, sys.exec_prefix, st)
+ )
tested += 1
if tested == 0:
raise RuntimeError("No example was tested.")
diff --git a/docs/tests/test_utils_benchmark.py b/docs/tests/test_utils_benchmark.py
index dbfa979ef..516081a3d 100644
--- a/docs/tests/test_utils_benchmark.py
+++ b/docs/tests/test_utils_benchmark.py
@@ -10,19 +10,16 @@
class TestMeasureTime(unittest.TestCase):
-
def test_vector_count(self):
def fct():
X = numpy.ones((1000, 5))
return X
- res = measure_time(
- "fct", context={"fct": fct}, div_by_number=False, number=100)
+
+ res = measure_time("fct", context={"fct": fct}, div_by_number=False, number=100)
self.assertIn("average", res)
- res = measure_time(
- "fct", context={"fct": fct}, div_by_number=True, number=100)
+ res = measure_time("fct", context={"fct": fct}, div_by_number=True, number=100)
self.assertIn("average", res)
- res = measure_time(
- "fct", context={"fct": fct}, div_by_number=True, number=1000)
+ res = measure_time("fct", context={"fct": fct}, div_by_number=True, number=1000)
self.assertIn("average", res)
diff --git a/docs/tests/test_utils_classes.py b/docs/tests/test_utils_classes.py
index a1dbd634f..5a16aaaa9 100644
--- a/docs/tests/test_utils_classes.py
+++ b/docs/tests/test_utils_classes.py
@@ -9,7 +9,6 @@
class TestUtilsClasses(unittest.TestCase):
-
def test_classes(self):
cl = class_names
self.assertIsInstance(cl, dict)
diff --git a/docs/tutorial/plot_abegin_convert_pipeline.py b/docs/tutorial/plot_abegin_convert_pipeline.py
index f41c5f5c1..9596fe305 100644
--- a/docs/tutorial/plot_abegin_convert_pipeline.py
+++ b/docs/tutorial/plot_abegin_convert_pipeline.py
@@ -17,13 +17,14 @@
Training a pipeline
+++++++++++++++++++
"""
-from pyquickhelper.helpgen.graphviz_helper import plot_graphviz
import numpy
from onnxruntime import InferenceSession
from sklearn.datasets import load_diabetes
from sklearn.ensemble import (
- GradientBoostingRegressor, RandomForestRegressor,
- VotingRegressor)
+ GradientBoostingRegressor,
+ RandomForestRegressor,
+ VotingRegressor,
+)
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
@@ -39,9 +40,11 @@
reg2 = RandomForestRegressor(random_state=1, n_estimators=5)
reg3 = LinearRegression()
-ereg = Pipeline(steps=[
- ('voting', VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)])),
-])
+ereg = Pipeline(
+ steps=[
+ ("voting", VotingRegressor([("gb", reg1), ("rf", reg2), ("lr", reg3)])),
+ ]
+)
ereg.fit(X_train, y_train)
#################################
@@ -54,8 +57,7 @@
# into single float and ONNX runtimes may not fully
# support doubles.
-onx = to_onnx(ereg, X_train[:1].astype(numpy.float32),
- target_opset=12)
+onx = to_onnx(ereg, X_train[:1].astype(numpy.float32), target_opset=12)
###################################
# Prediction with ONNX
@@ -64,7 +66,7 @@
# The first example uses :epkg:`onnxruntime`.
sess = InferenceSession(onx.SerializeToString())
-pred_ort = sess.run(None, {'X': X_test.astype(numpy.float32)})[0]
+pred_ort = sess.run(None, {"X": X_test.astype(numpy.float32)})[0]
pred_skl = ereg.predict(X_test.astype(numpy.float32))
@@ -113,5 +115,5 @@ def diff(p1, p2):
##########################################
# It works almost the same way.
-pred_pyrt = oinf.run(None, {'X': X_test.astype(numpy.float32)})[0]
+pred_pyrt = oinf.run(None, {"X": X_test.astype(numpy.float32)})[0]
print(diff(pred_skl, pred_pyrt))
diff --git a/docs/tutorial/plot_bbegin_measure_time.py b/docs/tutorial/plot_bbegin_measure_time.py
index 3acabe3e4..823211506 100644
--- a/docs/tutorial/plot_bbegin_measure_time.py
+++ b/docs/tutorial/plot_bbegin_measure_time.py
@@ -21,8 +21,10 @@
from sklearn import config_context
from sklearn.datasets import make_regression
from sklearn.ensemble import (
- GradientBoostingRegressor, RandomForestRegressor,
- VotingRegressor)
+ GradientBoostingRegressor,
+ RandomForestRegressor,
+ VotingRegressor,
+)
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from onnxruntime import InferenceSession
@@ -32,15 +34,14 @@
N = 11000
X, y = make_regression(N, n_features=10)
-X_train, X_test, y_train, y_test = train_test_split(
- X, y, train_size=0.01)
+X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.01)
print("Train shape", X_train.shape)
print("Test shape", X_test.shape)
reg1 = GradientBoostingRegressor(random_state=1)
reg2 = RandomForestRegressor(random_state=1)
reg3 = LinearRegression()
-ereg = VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)])
+ereg = VotingRegressor([("gb", reg1), ("rf", reg2), ("lr", reg3)])
ereg.fit(X_train, y_train)
#################################
@@ -59,12 +60,12 @@
with config_context(assume_finite=True):
obs = []
for batch_size, repeat in tqdm(sizes):
- context = {"ereg": ereg, 'X': X_test[:batch_size]}
+ context = {"ereg": ereg, "X": X_test[:batch_size]}
mt = measure_time(
- "ereg.predict(X)", context, div_by_number=True,
- number=10, repeat=repeat)
- mt['size'] = context['X'].shape[0]
- mt['mean_obs'] = mt['average'] / mt['size']
+ "ereg.predict(X)", context, div_by_number=True, number=10, repeat=repeat
+ )
+ mt["size"] = context["X"].shape[0]
+ mt["mean_obs"] = mt["average"] / mt["size"]
obs.append(mt)
df_skl = DataFrame(obs)
@@ -73,8 +74,7 @@
#####################################
# Graphe.
-df_skl.set_index('size')[['mean_obs']].plot(
- title="scikit-learn", logx=True, logy=True)
+df_skl.set_index("size")[["mean_obs"]].plot(title="scikit-learn", logx=True, logy=True)
###############################
# ONNX runtime
@@ -83,36 +83,41 @@
# The same is done with the two ONNX runtime
# available.
-onx = to_onnx(ereg, X_train[:1].astype(numpy.float32),
- target_opset=14)
-sess = InferenceSession(onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+onx = to_onnx(ereg, X_train[:1].astype(numpy.float32), target_opset=14)
+sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
oinf = ReferenceEvaluator(onx)
obs = []
for batch_size, repeat in tqdm(sizes):
-
# scikit-learn
- context = {"ereg": ereg, 'X': X_test[:batch_size].astype(numpy.float32)}
+ context = {"ereg": ereg, "X": X_test[:batch_size].astype(numpy.float32)}
mt = measure_time(
- "ereg.predict(X)", context, div_by_number=True,
- number=10, repeat=repeat)
- mt['size'] = context['X'].shape[0]
- mt['skl'] = mt['average'] / mt['size']
+ "ereg.predict(X)", context, div_by_number=True, number=10, repeat=repeat
+ )
+ mt["size"] = context["X"].shape[0]
+ mt["skl"] = mt["average"] / mt["size"]
# onnxruntime
- context = {"sess": sess, 'X': X_test[:batch_size].astype(numpy.float32)}
+ context = {"sess": sess, "X": X_test[:batch_size].astype(numpy.float32)}
mt2 = measure_time(
- "sess.run(None, {'X': X})[0]", context, div_by_number=True,
- number=10, repeat=repeat)
- mt['ort'] = mt2['average'] / mt['size']
+ "sess.run(None, {'X': X})[0]",
+ context,
+ div_by_number=True,
+ number=10,
+ repeat=repeat,
+ )
+ mt["ort"] = mt2["average"] / mt["size"]
# ReferenceEvaluator
- context = {"oinf": oinf, 'X': X_test[:batch_size].astype(numpy.float32)}
+ context = {"oinf": oinf, "X": X_test[:batch_size].astype(numpy.float32)}
mt2 = measure_time(
- "oinf.run(None, {'X': X})[0]", context, div_by_number=True,
- number=10, repeat=repeat)
- mt['pyrt'] = mt2['average'] / mt['size']
+ "oinf.run(None, {'X': X})[0]",
+ context,
+ div_by_number=True,
+ number=10,
+ repeat=repeat,
+ )
+ mt["pyrt"] = mt2["average"] / mt["size"]
# end
obs.append(mt)
@@ -124,9 +129,9 @@
#####################################
# Graph.
-df.set_index('size')[['skl', 'ort', 'pyrt']].plot(
- title="Average prediction time per runtime",
- logx=True, logy=True)
+df.set_index("size")[["skl", "ort", "pyrt"]].plot(
+ title="Average prediction time per runtime", logx=True, logy=True
+)
#####################################
# :epkg:`ONNX` runtimes are much faster than :epkg:`scikit-learn`
diff --git a/docs/tutorial/plot_catwoe_transformer.py b/docs/tutorial/plot_catwoe_transformer.py
index 338cf5939..bf9bed2fe 100644
--- a/docs/tutorial/plot_catwoe_transformer.py
+++ b/docs/tutorial/plot_catwoe_transformer.py
@@ -68,10 +68,10 @@ def ordenc_to_sklearn(op_mapping):
"Converts OrdinalEncoder mapping to scikit-learn OrdinalEncoder."
cats = []
for column_map in op_mapping:
- col = column_map['col']
+ col = column_map["col"]
while len(cats) <= col:
cats.append(None)
- mapping = column_map['mapping']
+ mapping = column_map["mapping"]
res = []
for i in range(mapping.shape[0]):
if np.isnan(mapping.index[i]):
@@ -88,8 +88,7 @@ def ordenc_to_sklearn(op_mapping):
def ordinal_encoder_shape_calculator(operator):
- check_input_and_output_numbers(
- operator, input_count_range=1, output_count_range=1)
+ check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1)
input_type = operator.inputs[0].type.__class__
input_dim = operator.inputs[0].get_first_dimension()
shape = operator.inputs[0].type.shape
@@ -104,15 +103,18 @@ def ordinal_encoder_converter(scope, operator, container):
X = operator.inputs[0]
skl_ord = ordenc_to_sklearn(op.mapping)
- cat = OnnxSubEstimator(skl_ord, X, op_version=opv,
- output_names=operator.outputs[:1])
+ cat = OnnxSubEstimator(
+ skl_ord, X, op_version=opv, output_names=operator.outputs[:1]
+ )
cat.add_to(scope, container)
update_registered_converter(
- OrdinalEncoder, "CategoricalEncoderOrdinalEncoder",
+ OrdinalEncoder,
+ "CategoricalEncoderOrdinalEncoder",
ordinal_encoder_shape_calculator,
- ordinal_encoder_converter)
+ ordinal_encoder_converter,
+)
###################################
@@ -130,7 +132,7 @@ def ordinal_encoder_converter(scope, operator, container):
ord_onx = to_onnx(enc, X[:1], target_opset=14)
sess = InferenceSession(ord_onx.SerializeToString())
-print(sess.run(None, {'X': X[:5]})[0])
+print(sess.run(None, {"X": X[:5]})[0])
######################################
# That works.
@@ -149,7 +151,7 @@ def woeenc_to_sklearn(op_mapping):
for column_map in op_mapping.items():
col = column_map[0]
while len(cats) <= col:
- cats.append('passthrough')
+ cats.append("passthrough")
ws.append(None)
mapping = column_map[1]
intervals = []
@@ -168,25 +170,22 @@ def woeenc_to_sklearn(op_mapping):
return skl
-def woe_encoder_parser(
- scope, model, inputs, custom_parsers=None):
+def woe_encoder_parser(scope, model, inputs, custom_parsers=None):
if len(inputs) != 1:
- raise RuntimeError(
- "Unexpected number of inputs: %d != 1." % len(inputs))
+ raise RuntimeError("Unexpected number of inputs: %d != 1." % len(inputs))
if inputs[0].type is None:
- raise RuntimeError(
- "Unexpected type: %r." % (inputs[0], ))
+ raise RuntimeError("Unexpected type: %r." % (inputs[0],))
alias = get_model_alias(type(model))
this_operator = scope.declare_local_operator(alias, model)
this_operator.inputs.append(inputs[0])
this_operator.outputs.append(
- scope.declare_local_variable('catwoe', FloatTensorType()))
+ scope.declare_local_variable("catwoe", FloatTensorType())
+ )
return this_operator.outputs
def woe_encoder_shape_calculator(operator):
- check_input_and_output_numbers(
- operator, input_count_range=1, output_count_range=1)
+ check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1)
input_dim = operator.inputs[0].get_first_dimension()
shape = operator.inputs[0].type.shape
second_dim = None if len(shape) != 2 else shape[1]
@@ -199,21 +198,26 @@ def woe_encoder_converter(scope, operator, container):
opv = container.target_opset
X = operator.inputs[0]
- sub = OnnxSubEstimator(op.ordinal_encoder, X,
- op_version=opv)
+ sub = OnnxSubEstimator(op.ordinal_encoder, X, op_version=opv)
cast = OnnxCast(sub, op_version=opv, to=np.float32)
skl_ord = woeenc_to_sklearn(op.mapping)
- cat = OnnxSubEstimator(skl_ord, cast, op_version=opv,
- output_names=operator.outputs[:1],
- input_types=[FloatTensorType()])
+ cat = OnnxSubEstimator(
+ skl_ord,
+ cast,
+ op_version=opv,
+ output_names=operator.outputs[:1],
+ input_types=[FloatTensorType()],
+ )
cat.add_to(scope, container)
update_registered_converter(
- WOEEncoder, "CategoricalEncoderWOEEncoder",
+ WOEEncoder,
+ "CategoricalEncoderWOEEncoder",
woe_encoder_shape_calculator,
woe_encoder_converter,
- parser=woe_encoder_parser)
+ parser=woe_encoder_parser,
+)
###################################
@@ -229,4 +233,4 @@ def woe_encoder_converter(scope, operator, container):
woe_onx = to_onnx(woe, X[:1], target_opset=14)
sess = InferenceSession(woe_onx.SerializeToString())
-print(sess.run(None, {'X': X[:5]})[0])
+print(sess.run(None, {"X": X[:5]})[0])
diff --git a/docs/tutorial/plot_cbegin_opset.py b/docs/tutorial/plot_cbegin_opset.py
index 89f3a6458..b0c0bfb38 100644
--- a/docs/tutorial/plot_cbegin_opset.py
+++ b/docs/tutorial/plot_cbegin_opset.py
@@ -48,7 +48,7 @@
fig, ax = plt.subplots(1, 1)
for k in (-1, 1):
- ax.plot(X[labels == k, 0], X[labels == k, 1], 'o', label="cl%d" % k)
+ ax.plot(X[labels == k, 0], X[labels == k, 1], "o", label="cl%d" % k)
ax.set_title("Sample")
#######################################
@@ -56,8 +56,9 @@
# ++++
-onx = to_onnx(model, X[:1].astype(numpy.float32),
- target_opset={'': 15, 'ai.onnx.ml': 2})
+onx = to_onnx(
+ model, X[:1].astype(numpy.float32), target_opset={"": 15, "ai.onnx.ml": 2}
+)
print(onx)
##########################
@@ -82,20 +83,22 @@
def get_domain_opset(onx):
domains = onx.opset_import
- res = [{'domain': dom.domain, 'version': dom.version}
- for dom in domains]
- return {d['domain']: d['version'] for d in res}
+ res = [{"domain": dom.domain, "version": dom.version} for dom in domains]
+ return {d["domain"]: d["version"] for d in res}
for opset in range(6, onnx_opset_version() + 1):
try:
- onx = to_onnx(model, X[:1].astype(numpy.float32),
- target_opset={'': opset, 'ai.onnx.ml': 2})
+ onx = to_onnx(
+ model,
+ X[:1].astype(numpy.float32),
+ target_opset={"": opset, "ai.onnx.ml": 2},
+ )
except RuntimeError as e:
- print('target: %r error: %r' % (opset, e))
+ print("target: %r error: %r" % (opset, e))
continue
nodes = len(onx.graph.node)
- print('target: %r --> %s %d' % (opset, get_domain_opset(onx), nodes))
+ print("target: %r --> %s %d" % (opset, get_domain_opset(onx), nodes))
########################################
# It shows that the model cannot be converted for opset
@@ -112,13 +115,12 @@ def get_domain_opset(onx):
for opset in range(9, onnx_opset_version() + 1):
for opset_ml in range(1, 4):
- tops = {'': opset, 'ai.onnx.ml': opset_ml}
+ tops = {"": opset, "ai.onnx.ml": opset_ml}
try:
print("try target_opset:", tops)
- onx = to_onnx(
- model, X[:1].astype(numpy.float32), target_opset=tops)
+ onx = to_onnx(model, X[:1].astype(numpy.float32), target_opset=tops)
except RuntimeError as e:
- print('target: %r error: %r' % (opset, e))
+ print("target: %r error: %r" % (opset, e))
continue
nodes = len(onx.graph.node)
- print('target: %r --> %s %d' % (opset, get_domain_opset(onx), nodes))
+ print("target: %r --> %s %d" % (opset, get_domain_opset(onx), nodes))
diff --git a/docs/tutorial/plot_dbegin_options.py b/docs/tutorial/plot_dbegin_options.py
index 1d0468aa6..47b813f3a 100644
--- a/docs/tutorial/plot_dbegin_options.py
+++ b/docs/tutorial/plot_dbegin_options.py
@@ -69,7 +69,6 @@
from pprint import pformat
import numpy
-from pyquickhelper.helpgen.graphviz_helper import plot_graphviz
from onnx.reference import ReferenceEvaluator
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler
@@ -87,8 +86,9 @@
clr = LogisticRegression()
clr.fit(X_train, y_train)
-model_def = to_onnx(clr, X_train.astype(numpy.float32),
- options={id(clr): {'zipmap': False}})
+model_def = to_onnx(
+ clr, X_train.astype(numpy.float32), options={id(clr): {"zipmap": False}}
+)
oinf = ReferenceEvaluator(model_def)
print(oinf)
@@ -97,8 +97,7 @@
# Using function *id* has one flaw: it is not pickable.
# It is just better to use strings.
-model_def = to_onnx(clr, X_train.astype(numpy.float32),
- options={'zipmap': False})
+model_def = to_onnx(clr, X_train.astype(numpy.float32), options={"zipmap": False})
oinf = ReferenceEvaluator(model_def)
print(oinf)
@@ -111,14 +110,10 @@
# name convention.
-pipe = Pipeline([
- ('norm', MinMaxScaler()),
- ('clr', LogisticRegression())
-])
+pipe = Pipeline([("norm", MinMaxScaler()), ("clr", LogisticRegression())])
pipe.fit(X_train, y_train)
-model_def = to_onnx(pipe, X_train.astype(numpy.float32),
- options={'clr__zipmap': False})
+model_def = to_onnx(pipe, X_train.astype(numpy.float32), options={"clr__zipmap": False})
oinf = ReferenceEvaluator(model_def)
print(oinf)
@@ -132,29 +127,28 @@
# First, with probabilities:
-pipe = Pipeline([
- ('norm', MinMaxScaler()),
- ('clr', LogisticRegression())
-])
+pipe = Pipeline([("norm", MinMaxScaler()), ("clr", LogisticRegression())])
pipe.fit(X_train, y_train)
model_def = to_onnx(
- pipe, X_train.astype(numpy.float32),
- options={id(pipe): {'zipmap': False}})
+ pipe, X_train.astype(numpy.float32), options={id(pipe): {"zipmap": False}}
+)
oinf = ReferenceEvaluator(model_def)
-print(oinf.run(None, {'X': X.astype(numpy.float32)[:5]}))
+print(oinf.run(None, {"X": X.astype(numpy.float32)[:5]}))
#######################################
# Then with raw scores:
model_def = to_onnx(
- pipe, X_train.astype(numpy.float32),
- options={id(pipe): {'raw_scores': True, 'zipmap': False}})
+ pipe,
+ X_train.astype(numpy.float32),
+ options={id(pipe): {"raw_scores": True, "zipmap": False}},
+)
oinf = ReferenceEvaluator(model_def)
-print(oinf.run(None, {'X': X.astype(numpy.float32)[:5]}))
+print(oinf.run(None, {"X": X.astype(numpy.float32)[:5]}))
#########################################
# It did not seem to work... We need to tell
@@ -162,22 +156,26 @@
# and not the whole pipeline.
model_def = to_onnx(
- pipe, X_train.astype(numpy.float32),
- options={id(pipe.steps[1][1]): {'raw_scores': True, 'zipmap': False}})
+ pipe,
+ X_train.astype(numpy.float32),
+ options={id(pipe.steps[1][1]): {"raw_scores": True, "zipmap": False}},
+)
oinf = ReferenceEvaluator(model_def)
-print(oinf.run(None, {'X': X.astype(numpy.float32)[:5]}))
+print(oinf.run(None, {"X": X.astype(numpy.float32)[:5]}))
###########################################
# There are negative values. That works.
# Strings are still easier to use.
model_def = to_onnx(
- pipe, X_train.astype(numpy.float32),
- options={'clr__raw_scores': True, 'clr__zipmap': False})
+ pipe,
+ X_train.astype(numpy.float32),
+ options={"clr__raw_scores": True, "clr__zipmap": False},
+)
oinf = ReferenceEvaluator(model_def)
-print(oinf.run(None, {'X': X.astype(numpy.float32)[:5]}))
+print(oinf.run(None, {"X": X.astype(numpy.float32)[:5]}))
#########################################
@@ -196,9 +194,11 @@
paths, n_nodes_ptr = clrrf.decision_path(X_test[:2])
print(paths.todense())
-model_def = to_onnx(clrrf, X_train.astype(numpy.float32),
- options={id(clrrf): {'decision_path': True,
- 'zipmap': False}})
+model_def = to_onnx(
+ clrrf,
+ X_train.astype(numpy.float32),
+ options={id(clrrf): {"decision_path": True, "zipmap": False}},
+)
sess = InferenceSession(model_def.SerializeToString())
##########################################
@@ -209,7 +209,7 @@
##########################################
# Let's display the last one.
-res = sess.run(None, {'X': X_test[:2].astype(numpy.float32)})
+res = sess.run(None, {"X": X_test[:2].astype(numpy.float32)})
print(res[-1])
############################################################
@@ -225,9 +225,9 @@
opts = v.get_allowed_options()
if not isinstance(opts, dict):
continue
- name = k.replace('Sklearn', '')
- print('%s%s %r' % (name, " " * (30 - len(name)), opts))
+ name = k.replace("Sklearn", "")
+ print("%s%s %r" % (name, " " * (30 - len(name)), opts))
for o in opts:
all_opts.add(o)
-print('all options:', pformat(list(sorted(all_opts))))
+print("all options:", pformat(list(sorted(all_opts))))
diff --git a/docs/tutorial/plot_dbegin_options_list.py b/docs/tutorial/plot_dbegin_options_list.py
index b6c02fc7e..16dedc475 100644
--- a/docs/tutorial/plot_dbegin_options_list.py
+++ b/docs/tutorial/plot_dbegin_options_list.py
@@ -19,8 +19,6 @@
The first converter to change its behaviour depending on a black list
of operators is for model *GaussianMixture*.
"""
-from pyquickhelper.helpgen.graphviz_helper import plot_graphviz
-from onnx.reference import ReferenceEvaluator
from timeit import timeit
import numpy
from onnxruntime import InferenceSession
@@ -39,15 +37,18 @@
# ++++++++++++++++++
model_onnx = to_onnx(
- model, X_train[:1].astype(numpy.float32),
- options={id(model): {'score_samples': True}},
- target_opset=12)
-sess = InferenceSession(model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model,
+ X_train[:1].astype(numpy.float32),
+ options={id(model): {"score_samples": True}},
+ target_opset=12,
+)
+sess = InferenceSession(
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+)
xt = X_test[:5].astype(numpy.float32)
print(model.score_samples(xt))
-print(sess.run(None, {'X': xt})[2])
+print(sess.run(None, {"X": xt})[2])
###################################
@@ -59,26 +60,37 @@
# produces in that case.
model_onnx2 = to_onnx(
- model, X_train[:1].astype(numpy.float32),
- options={id(model): {'score_samples': True}},
- black_op={'ReduceLogSumExp'},
- target_opset=12)
-sess2 = InferenceSession(model_onnx2.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model,
+ X_train[:1].astype(numpy.float32),
+ options={id(model): {"score_samples": True}},
+ black_op={"ReduceLogSumExp"},
+ target_opset=12,
+)
+sess2 = InferenceSession(
+ model_onnx2.SerializeToString(), providers=["CPUExecutionProvider"]
+)
xt = X_test[:5].astype(numpy.float32)
print(model.score_samples(xt))
-print(sess2.run(None, {'X': xt})[2])
+print(sess2.run(None, {"X": xt})[2])
#######################################
# Processing time
# +++++++++++++++
-print(timeit(stmt="sess.run(None, {'X': xt})",
- number=10000, globals={'sess': sess, 'xt': xt}))
+print(
+ timeit(
+ stmt="sess.run(None, {'X': xt})", number=10000, globals={"sess": sess, "xt": xt}
+ )
+)
-print(timeit(stmt="sess2.run(None, {'X': xt})",
- number=10000, globals={'sess2': sess2, 'xt': xt}))
+print(
+ timeit(
+ stmt="sess2.run(None, {'X': xt})",
+ number=10000,
+ globals={"sess2": sess2, "xt": xt},
+ )
+)
#################################
# The model using ReduceLogSumExp is much faster.
@@ -94,9 +106,11 @@
try:
to_onnx(
- model, X_train[:1].astype(numpy.float32),
- options={id(model): {'score_samples': True}},
- black_op={'ReduceLogSumExp', 'Add'},
- target_opset=12)
+ model,
+ X_train[:1].astype(numpy.float32),
+ options={id(model): {"score_samples": True}},
+ black_op={"ReduceLogSumExp", "Add"},
+ target_opset=12,
+ )
except RuntimeError as e:
- print('Error:', e)
+ print("Error:", e)
diff --git a/docs/tutorial/plot_dbegin_options_zipmap.py b/docs/tutorial/plot_dbegin_options_zipmap.py
index 9194233ee..d7eae7be8 100644
--- a/docs/tutorial/plot_dbegin_options_zipmap.py
+++ b/docs/tutorial/plot_dbegin_options_zipmap.py
@@ -49,7 +49,7 @@
# dictionaries.
sess = rt.InferenceSession(onx.SerializeToString())
-res = sess.run(None, {'X': X_test})
+res = sess.run(None, {"X": X_test})
print(res[1][:2])
print("probabilities type:", type(res[1]))
print("type for the first observations:", type(res[1][0]))
@@ -60,12 +60,12 @@
#
# Probabilities are now a matrix.
-initial_type = [('float_input', FloatTensorType([None, 4]))]
-options = {id(clr): {'zipmap': False}}
+initial_type = [("float_input", FloatTensorType([None, 4]))]
+options = {id(clr): {"zipmap": False}}
onx2 = to_onnx(clr, X_train, options=options, target_opset=12)
sess2 = rt.InferenceSession(onx2.SerializeToString())
-res2 = sess2.run(None, {'X': X_test})
+res2 = sess2.run(None, {"X": X_test})
print(res2[1][:2])
print("probabilities type:", type(res2[1]))
print("type for the first observations:", type(res2[1][0]))
@@ -78,14 +78,17 @@
# the probabilities into columns. The final model produces
# one output for the label, and one output per class.
-options = {id(clr): {'zipmap': 'columns'}}
+options = {id(clr): {"zipmap": "columns"}}
onx3 = to_onnx(clr, X_train, options=options, target_opset=12)
sess3 = rt.InferenceSession(onx3.SerializeToString())
-res3 = sess3.run(None, {'X': X_test})
+res3 = sess3.run(None, {"X": X_test})
for i, out in enumerate(sess3.get_outputs()):
- print("output: '{}' shape={} values={}...".format(
- out.name, res3[i].shape, res3[i][:2]))
+ print(
+ "output: '{}' shape={} values={}...".format(
+ out.name, res3[i].shape, res3[i][:2]
+ )
+ )
###################################
@@ -93,16 +96,13 @@
# +++++++++++++++++++++++++++++
print("Average time with ZipMap:")
-print(sum(repeat(lambda: sess.run(None, {'X': X_test}),
- number=100, repeat=10)) / 10)
+print(sum(repeat(lambda: sess.run(None, {"X": X_test}), number=100, repeat=10)) / 10)
print("Average time without ZipMap:")
-print(sum(repeat(lambda: sess2.run(None, {'X': X_test}),
- number=100, repeat=10)) / 10)
+print(sum(repeat(lambda: sess2.run(None, {"X": X_test}), number=100, repeat=10)) / 10)
print("Average time without ZipMap but with columns:")
-print(sum(repeat(lambda: sess3.run(None, {'X': X_test}),
- number=100, repeat=10)) / 10)
+print(sum(repeat(lambda: sess3.run(None, {"X": X_test}), number=100, repeat=10)) / 10)
# The prediction is much faster without ZipMap
# on this example.
@@ -120,12 +120,12 @@
# `output_class_labels` can be used to expose the labels
# as a third output.
-initial_type = [('float_input', FloatTensorType([None, 4]))]
-options = {id(clr): {'zipmap': False, 'output_class_labels': True}}
+initial_type = [("float_input", FloatTensorType([None, 4]))]
+options = {id(clr): {"zipmap": False, "output_class_labels": True}}
onx4 = to_onnx(clr, X_train, options=options, target_opset=12)
sess4 = rt.InferenceSession(onx4.SerializeToString())
-res4 = sess4.run(None, {'X': X_test})
+res4 = sess4.run(None, {"X": X_test})
print(res4[1][:2])
print("probabilities type:", type(res4[1]))
print("class labels:", res4[2])
@@ -134,8 +134,7 @@
# Processing time.
print("Average time without ZipMap but with output_class_labels:")
-print(sum(repeat(lambda: sess4.run(None, {'X': X_test}),
- number=100, repeat=10)) / 10)
+print(sum(repeat(lambda: sess4.run(None, {"X": X_test}), number=100, repeat=10)) / 10)
###########################################
# MultiOutputClassifier
@@ -161,18 +160,22 @@
onx5 = to_onnx(clr, X_train, target_opset=12)
sess5 = rt.InferenceSession(onx5.SerializeToString())
-res5 = sess5.run(None, {'X': X_test[:3]})
+res5 = sess5.run(None, {"X": X_test[:3]})
print(res5)
########################################
# Option zipmap is ignored. Labels are missing but they can be
# added back as a third output.
-onx6 = to_onnx(clr, X_train, target_opset=12,
- options={'zipmap': False, 'output_class_labels': True})
+onx6 = to_onnx(
+ clr,
+ X_train,
+ target_opset=12,
+ options={"zipmap": False, "output_class_labels": True},
+)
sess6 = rt.InferenceSession(onx6.SerializeToString())
-res6 = sess6.run(None, {'X': X_test[:3]})
+res6 = sess6.run(None, {"X": X_test[:3]})
print("predicted labels", res6[0])
print("predicted probabilies", res6[1])
print("class labels", res6[2])
diff --git a/docs/tutorial/plot_ebegin_float_double.py b/docs/tutorial/plot_ebegin_float_double.py
index e95e979b6..f61db244c 100644
--- a/docs/tutorial/plot_ebegin_float_double.py
+++ b/docs/tutorial/plot_ebegin_float_double.py
@@ -63,15 +63,18 @@
def area_mismatch_rule(N, delta, factor, rule=None):
if rule is None:
- def rule(t): return numpy.float32(t)
+
+ def rule(t):
+ return numpy.float32(t)
+
xst = []
yst = []
xsf = []
ysf = []
for x in range(-N, N):
for y in range(-N, N):
- dx = (1. + x * delta) * factor
- dy = (1. + y * delta) * factor
+ dx = (1.0 + x * delta) * factor
+ dy = (1.0 + y * delta) * factor
c1 = 1 if numpy.float64(dx) <= numpy.float64(dy) else 0
c2 = 1 if numpy.float32(dx) <= rule(dy) else 0
key = abs(c1 - c2)
@@ -90,12 +93,12 @@ def rule(t): return numpy.float32(t)
fig, ax = plt.subplots(1, 1, figsize=(5, 5))
-ax.plot(xst, yst, '.', label="agree")
-ax.plot(xsf, ysf, '.', label="disagree")
+ax.plot(xst, yst, ".", label="agree")
+ax.plot(xsf, ysf, ".", label="disagree")
ax.set_title("Region where x <= y and (float)x <= (float)y agree")
ax.set_xlabel("x")
ax.set_ylabel("y")
-ax.plot([min(xst), max(xst)], [min(yst), max(yst)], 'k--')
+ax.plot([min(xst), max(xst)], [min(yst), max(yst)], "k--")
ax.legend()
@@ -115,15 +118,14 @@ def rule(t): return numpy.float32(t)
Xi_train, yi_train = X_train.copy(), y_train.copy()
Xi_test, yi_test = X_test.copy(), y_test.copy()
for i in range(X.shape[1]):
- Xi_train[:, i] = (Xi_train[:, i] * 2 ** i).astype(numpy.int64)
- Xi_test[:, i] = (Xi_test[:, i] * 2 ** i).astype(numpy.int64)
+ Xi_train[:, i] = (Xi_train[:, i] * 2**i).astype(numpy.int64)
+ Xi_test[:, i] = (Xi_test[:, i] * 2**i).astype(numpy.int64)
max_depth = 10
-model = Pipeline([
- ('scaler', StandardScaler()),
- ('dt', DecisionTreeRegressor(max_depth=max_depth))
-])
+model = Pipeline(
+ [("scaler", StandardScaler()), ("dt", DecisionTreeRegressor(max_depth=max_depth))]
+)
model.fit(Xi_train, yi_train)
@@ -143,15 +145,14 @@ def diff(p1, p2):
return d.max(), (d / numpy.abs(p1)).max()
-onx = to_onnx(model, Xi_train[:1].astype(numpy.float32),
- target_opset=15)
+onx = to_onnx(model, Xi_train[:1].astype(numpy.float32), target_opset=15)
sess = InferenceSession(onx.SerializeToString())
X32 = Xi_test.astype(numpy.float32)
skl = model.predict(X32)
-ort = sess.run(None, {'X': X32})[0]
+ort = sess.run(None, {"X": X32})[0]
print(diff(skl, ort))
@@ -191,24 +192,25 @@ def diff(p1, p2):
#
-model2 = Pipeline([
- ('scaler', StandardScaler()),
- ('cast', CastTransformer()),
- ('dt', DecisionTreeRegressor(max_depth=max_depth))
-])
+model2 = Pipeline(
+ [
+ ("scaler", StandardScaler()),
+ ("cast", CastTransformer()),
+ ("dt", DecisionTreeRegressor(max_depth=max_depth)),
+ ]
+)
model2.fit(Xi_train, yi_train)
##########################################
# The discrepencies.
-onx2 = to_onnx(model2, Xi_train[:1].astype(numpy.float32),
- target_opset=15)
+onx2 = to_onnx(model2, Xi_train[:1].astype(numpy.float32), target_opset=15)
sess2 = InferenceSession(onx2.SerializeToString())
skl2 = model2.predict(X32)
-ort2 = sess2.run(None, {'X': X32})[0]
+ort2 = sess2.run(None, {"X": X32})[0]
print(diff(skl2, ort2))
@@ -219,22 +221,27 @@ def diff(p1, p2):
# the *dx* is still here. To remove it, we need to use
# double in ONNX normalizer.
-model3 = Pipeline([
- ('cast64', CastTransformer(dtype=numpy.float64)),
- ('scaler', StandardScaler()),
- ('cast', CastTransformer()),
- ('dt', DecisionTreeRegressor(max_depth=max_depth))
-])
+model3 = Pipeline(
+ [
+ ("cast64", CastTransformer(dtype=numpy.float64)),
+ ("scaler", StandardScaler()),
+ ("cast", CastTransformer()),
+ ("dt", DecisionTreeRegressor(max_depth=max_depth)),
+ ]
+)
model3.fit(Xi_train, yi_train)
-onx3 = to_onnx(model3, Xi_train[:1].astype(numpy.float32),
- options={StandardScaler: {'div': 'div_cast'}},
- target_opset=15)
+onx3 = to_onnx(
+ model3,
+ Xi_train[:1].astype(numpy.float32),
+ options={StandardScaler: {"div": "div_cast"}},
+ target_opset=15,
+)
sess3 = InferenceSession(onx3.SerializeToString())
skl3 = model3.predict(X32)
-ort3 = sess3.run(None, {'X': X32})[0]
+ort3 = sess3.run(None, {"X": X32})[0]
print(diff(skl3, ort3))
diff --git a/docs/tutorial/plot_fbegin_investigate.py b/docs/tutorial/plot_fbegin_investigate.py
index 422be0798..daecc4ca8 100644
--- a/docs/tutorial/plot_fbegin_investigate.py
+++ b/docs/tutorial/plot_fbegin_investigate.py
@@ -27,7 +27,6 @@
has *n* steps, it converts the pipeline with step 1,
then the pipeline with steps 1, 2, then 1, 2, 3...
"""
-from pyquickhelper.helpgen.graphviz_helper import plot_graphviz
import numpy
from onnx.reference import ReferenceEvaluator
from onnxruntime import InferenceSession
@@ -45,10 +44,7 @@
data = load_iris()
X = data.data
-pipe = Pipeline(steps=[
- ('std', StandardScaler()),
- ('km', KMeans(3, n_init=3))
-])
+pipe = Pipeline(steps=[("std", StandardScaler()), ("km", KMeans(3, n_init=3))])
pipe.fit(X)
#################################
@@ -56,9 +52,8 @@
# overloads the methods *transform* and
# returns an ONNX graph for every step.
steps = collect_intermediate_steps(
- pipe, "pipeline",
- [("X", FloatTensorType([None, X.shape[1]]))],
- target_opset=17)
+ pipe, "pipeline", [("X", FloatTensorType([None, X.shape[1]]))], target_opset=17
+)
#####################################
# We call method transform to population the
@@ -70,14 +65,15 @@
# ONNX and scikit-learn outputs.
for step in steps:
- print('----------------------------')
- print(step['model'])
- onnx_step = step['onnx_step']
- sess = InferenceSession(onnx_step.SerializeToString(),
- providers=["CPUExecutionProvider"])
- onnx_outputs = sess.run(None, {'X': X.astype(numpy.float32)})
+ print("----------------------------")
+ print(step["model"])
+ onnx_step = step["onnx_step"]
+ sess = InferenceSession(
+ onnx_step.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ onnx_outputs = sess.run(None, {"X": X.astype(numpy.float32)})
onnx_output = onnx_outputs[-1]
- skl_outputs = step['model']._debug.outputs['transform']
+ skl_outputs = step["model"]._debug.outputs["transform"]
# comparison
diff = numpy.abs(skl_outputs.ravel() - onnx_output.ravel()).max()
@@ -99,17 +95,16 @@
# fails due to nan values or a dimension mismatch.
-onx = to_onnx(pipe, X[:1].astype(numpy.float32),
- target_opset=17)
+onx = to_onnx(pipe, X[:1].astype(numpy.float32), target_opset=17)
oinf = ReferenceEvaluator(onx, verbose=1)
-oinf.run(None, {'X': X[:2].astype(numpy.float32)})
+oinf.run(None, {"X": X[:2].astype(numpy.float32)})
###################################
# And to get a sense of the intermediate results.
oinf = ReferenceEvaluator(onx, verbose=3)
-oinf.run(None, {'X': X[:2].astype(numpy.float32)})
+oinf.run(None, {"X": X[:2].astype(numpy.float32)})
# This way is usually better if you need to investigate
# issues within the code of the runtime for an operator.
diff --git a/docs/tutorial/plot_gbegin_cst.py b/docs/tutorial/plot_gbegin_cst.py
index b2a9e33ba..0b4e21c18 100644
--- a/docs/tutorial/plot_gbegin_cst.py
+++ b/docs/tutorial/plot_gbegin_cst.py
@@ -26,18 +26,18 @@
from sklearn.model_selection import train_test_split
from skl2onnx import to_onnx
from skl2onnx.helpers.onnx_helper import (
- add_output_initializer, select_model_inputs_outputs)
+ add_output_initializer,
+ select_model_inputs_outputs,
+)
data = load_iris()
X, y = data.data.astype(numpy.float32), data.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
-model = LogisticRegression(penalty='elasticnet', C=2.,
- solver='saga', l1_ratio=0.5)
+model = LogisticRegression(penalty="elasticnet", C=2.0, solver="saga", l1_ratio=0.5)
model.fit(X_train, y_train)
-onx = to_onnx(model, X_train[:1], target_opset=12,
- options={'zipmap': False})
+onx = to_onnx(model, X_train[:1], target_opset=12, options={"zipmap": False})
########################################
# Add training parameter
@@ -45,9 +45,8 @@
#
new_onx = add_output_initializer(
- onx,
- ['C', 'l1_ratio'],
- [numpy.array([model.C]), numpy.array([model.l1_ratio])])
+ onx, ["C", "l1_ratio"], [numpy.array([model.C]), numpy.array([model.l1_ratio])]
+)
########################################
# Inference
@@ -55,7 +54,7 @@
sess = InferenceSession(new_onx.SerializeToString())
print("output names:", [o.name for o in sess.get_outputs()])
-res = sess.run(None, {'X': X_test[:2]})
+res = sess.run(None, {"X": X_test[:2]})
print("outputs")
pprint.pprint(res)
@@ -72,11 +71,11 @@
# Next function removes unneeded outputs from a model,
# not only the constants. Next model only keeps the probabilities.
-simple_onx = select_model_inputs_outputs(new_onx, ['probabilities'])
+simple_onx = select_model_inputs_outputs(new_onx, ["probabilities"])
sess = InferenceSession(simple_onx.SerializeToString())
print("output names:", [o.name for o in sess.get_outputs()])
-res = sess.run(None, {'X': X_test[:2]})
+res = sess.run(None, {"X": X_test[:2]})
print("outputs")
pprint.pprint(res)
@@ -102,6 +101,6 @@
sess = InferenceSession(model.SerializeToString())
print("output names:", [o.name for o in sess.get_outputs()])
-res = sess.run(None, {'X': X_test[:2]})
+res = sess.run(None, {"X": X_test[:2]})
print("outputs")
pprint.pprint(res)
diff --git a/docs/tutorial/plot_gbegin_dataframe.py b/docs/tutorial/plot_gbegin_dataframe.py
index 739a056ba..d30dab6e7 100644
--- a/docs/tutorial/plot_gbegin_dataframe.py
+++ b/docs/tutorial/plot_gbegin_dataframe.py
@@ -20,37 +20,39 @@
import pprint
from onnx.reference import ReferenceEvaluator
from onnxruntime import InferenceSession
-from pyquickhelper.helpgen.graphviz_helper import plot_graphviz
-from skl2onnx import to_onnx
from pandas import DataFrame
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
+from pyquickhelper.helpgen.graphviz_helper import plot_graphviz
+from skl2onnx import to_onnx
+from skl2onnx.algebra.type_helper import guess_initial_types
-data = DataFrame([
- dict(CAT1='a', CAT2='c', num1=0.5, num2=0.6, y=0),
- dict(CAT1='b', CAT2='d', num1=0.4, num2=0.8, y=1),
- dict(CAT1='a', CAT2='d', num1=0.5, num2=0.56, y=0),
- dict(CAT1='a', CAT2='d', num1=0.55, num2=0.56, y=1),
- dict(CAT1='a', CAT2='c', num1=0.35, num2=0.86, y=0),
- dict(CAT1='a', CAT2='c', num1=0.5, num2=0.68, y=1),
-])
+data = DataFrame(
+ [
+ dict(CAT1="a", CAT2="c", num1=0.5, num2=0.6, y=0),
+ dict(CAT1="b", CAT2="d", num1=0.4, num2=0.8, y=1),
+ dict(CAT1="a", CAT2="d", num1=0.5, num2=0.56, y=0),
+ dict(CAT1="a", CAT2="d", num1=0.55, num2=0.56, y=1),
+ dict(CAT1="a", CAT2="c", num1=0.35, num2=0.86, y=0),
+ dict(CAT1="a", CAT2="c", num1=0.5, num2=0.68, y=1),
+ ]
+)
-cat_cols = ['CAT1', 'CAT2']
-train_data = data.drop('y', axis=1)
+cat_cols = ["CAT1", "CAT2"]
+train_data = data.drop("y", axis=1)
-categorical_transformer = Pipeline([
- ('onehot', OneHotEncoder(sparse=False, handle_unknown='ignore'))])
+categorical_transformer = Pipeline(
+ [("onehot", OneHotEncoder(sparse=False, handle_unknown="ignore"))]
+)
preprocessor = ColumnTransformer(
- transformers=[
- ('cat', categorical_transformer, cat_cols)],
- remainder='passthrough')
-pipe = Pipeline([('preprocess', preprocessor),
- ('rf', RandomForestClassifier())])
-pipe.fit(train_data, data['y'])
+ transformers=[("cat", categorical_transformer, cat_cols)], remainder="passthrough"
+)
+pipe = Pipeline([("preprocess", preprocessor), ("rf", RandomForestClassifier())])
+pipe.fit(train_data, data["y"])
#####################################
# Display.
@@ -67,19 +69,7 @@
# Function *to_onnx* does not handle dataframes.
-try:
- onx = to_onnx(pipe, train_data[:1])
-except NotImplementedError as e:
- print(e)
-
-###################################
-# But it possible to use an extended one.
-
-
-onx = to_onnx_ext(
- pipe, train_data[:1],
- options={RandomForestClassifier: {'zipmap': False}})
-
+onx = to_onnx(pipe, train_data[:1], options={RandomForestClassifier: {"zipmap": False}})
#################################
# Prediction with ONNX
@@ -100,13 +90,13 @@
oinf = ReferenceEvaluator(onx)
got = oinf.run(None, train_data)
print(pipe.predict(train_data))
-print(got['label'])
+print(got["label"])
#################################
# And probilities.
print(pipe.predict_proba(train_data))
-print(got['probabilities'])
+print(got["probabilities"])
######################################
# It looks ok. Let's dig into the details to
@@ -121,6 +111,21 @@
# the input type is the column type.
+def guess_schema_from_data(X):
+ init = guess_initial_types(X)
+ unique = set()
+ for _, col in init:
+ if len(col.shape) != 2:
+ return init
+ if col.shape[0] is not None:
+ return init
+ if len(unique) > 0 and col.__class__ not in unique:
+ return init
+ unique.add(col.__class__)
+ unique = list(unique)
+ return [("X", unique[0]([None, sum(_[1].shape[1] for _ in init)]))]
+
+
init = guess_schema_from_data(train_data)
pprint.pprint(init)
@@ -133,6 +138,7 @@
if c not in cat_cols:
train_data[c] = train_data[c].astype(numpy.float32)
+
init = guess_schema_from_data(train_data)
pprint.pprint(init)
@@ -140,8 +146,8 @@
# Let's convert with *skl2onnx* only.
onx2 = to_onnx(
- pipe, initial_types=init,
- options={RandomForestClassifier: {'zipmap': False}})
+ pipe, initial_types=init, options={RandomForestClassifier: {"zipmap": False}}
+)
#####################################
# Let's run it with onnxruntime.
@@ -149,8 +155,7 @@
# where column names become keys, and column values become
# values.
-inputs = {c: train_data[c].values.reshape((-1, 1))
- for c in train_data.columns}
+inputs = {c: train_data[c].values.reshape((-1, 1)) for c in train_data.columns}
pprint.pprint(inputs)
#############################
diff --git a/docs/tutorial/plot_gconverting.py b/docs/tutorial/plot_gconverting.py
index 880444bc6..3d2f41ab1 100644
--- a/docs/tutorial/plot_gconverting.py
+++ b/docs/tutorial/plot_gconverting.py
@@ -28,8 +28,7 @@
clr.fit(X_train, y_train)
-onx = to_onnx(clr, X, options={'zipmap': False},
- target_opset=15)
+onx = to_onnx(clr, X, options={"zipmap": False}, target_opset=15)
sess = InferenceSession(onx.SerializeToString())
input_names = [i.name for i in sess.get_inputs()]
@@ -46,9 +45,13 @@
# parameter *initial_types*. However, the user must specify the input
# types as well.
-onx = to_onnx(clr, X, options={'zipmap': False},
- initial_types=[('X56', FloatTensorType([None, X.shape[1]]))],
- target_opset=15)
+onx = to_onnx(
+ clr,
+ X,
+ options={"zipmap": False},
+ initial_types=[("X56", FloatTensorType([None, X.shape[1]]))],
+ target_opset=15,
+)
sess = InferenceSession(onx.SerializeToString())
input_names = [i.name for i in sess.get_inputs()]
@@ -64,10 +67,13 @@
# It is possible to change the input name by using the
# parameter *final_types*.
-onx = to_onnx(clr, X, options={'zipmap': False},
- final_types=[('L', Int64TensorType([None])),
- ('P', FloatTensorType([None, 3]))],
- target_opset=15)
+onx = to_onnx(
+ clr,
+ X,
+ options={"zipmap": False},
+ final_types=[("L", Int64TensorType([None])), ("P", FloatTensorType([None, 3]))],
+ target_opset=15,
+)
sess = InferenceSession(onx.SerializeToString())
input_names = [i.name for i in sess.get_inputs()]
@@ -92,8 +98,7 @@ def rename_results(proposed_name, existing_names):
return result
-onx = to_onnx(clr, X, options={'zipmap': False},
- naming=rename_results, target_opset=15)
+onx = to_onnx(clr, X, options={"zipmap": False}, naming=rename_results, target_opset=15)
sess = InferenceSession(onx.SerializeToString())
input_names = [i.name for i in sess.get_inputs()]
diff --git a/docs/tutorial/plot_gexternal_catboost.py b/docs/tutorial/plot_gexternal_catboost.py
index 5cf1d72a6..d4ee99478 100644
--- a/docs/tutorial/plot_gexternal_catboost.py
+++ b/docs/tutorial/plot_gexternal_catboost.py
@@ -18,7 +18,6 @@
Train a CatBoostClassifier
++++++++++++++++++++++++++
"""
-from pyquickhelper.helpgen.graphviz_helper import plot_graphviz
import numpy
from onnx.helper import get_attribute_value
from sklearn.datasets import load_iris
@@ -26,8 +25,14 @@
from sklearn.preprocessing import StandardScaler
import onnxruntime as rt
from skl2onnx import convert_sklearn, update_registered_converter
-from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes # noqa
-from skl2onnx.common.data_types import FloatTensorType, Int64TensorType, guess_tensor_type
+from skl2onnx.common.shape_calculator import (
+ calculate_linear_classifier_output_shapes,
+) # noqa
+from skl2onnx.common.data_types import (
+ FloatTensorType,
+ Int64TensorType,
+ guess_tensor_type,
+)
from skl2onnx._parse import _apply_zipmap, _get_sklearn_operator_name
from catboost import CatBoostClassifier
from catboost.utils import convert_to_onnx_object
@@ -41,8 +46,9 @@
X = X[ind, :].copy()
y = y[ind].copy()
-pipe = Pipeline([('scaler', StandardScaler()),
- ('lgbm', CatBoostClassifier(n_estimators=3))])
+pipe = Pipeline(
+ [("scaler", StandardScaler()), ("lgbm", CatBoostClassifier(n_estimators=3))]
+)
pipe.fit(X, y)
######################################
@@ -55,18 +61,19 @@
# needs to be wrapped.
-def skl2onnx_parser_castboost_classifier(scope, model, inputs,
- custom_parsers=None):
+def skl2onnx_parser_castboost_classifier(scope, model, inputs, custom_parsers=None):
options = scope.get_options(model, dict(zipmap=True))
- no_zipmap = isinstance(options['zipmap'], bool) and not options['zipmap']
+ no_zipmap = isinstance(options["zipmap"], bool) and not options["zipmap"]
alias = _get_sklearn_operator_name(type(model))
this_operator = scope.declare_local_operator(alias, model)
this_operator.inputs = inputs
- label_variable = scope.declare_local_variable('label', Int64TensorType())
+ label_variable = scope.declare_local_variable("label", Int64TensorType())
prob_dtype = guess_tensor_type(inputs[0].type)
- probability_tensor_variable = scope.declare_local_variable('probabilities', prob_dtype)
+ probability_tensor_variable = scope.declare_local_variable(
+ "probabilities", prob_dtype
+ )
this_operator.outputs.append(label_variable)
this_operator.outputs.append(probability_tensor_variable)
probability_tensor = this_operator.outputs
@@ -74,8 +81,9 @@ def skl2onnx_parser_castboost_classifier(scope, model, inputs,
if no_zipmap:
return probability_tensor
- return _apply_zipmap(options['zipmap'], scope, model,
- inputs[0].type, probability_tensor)
+ return _apply_zipmap(
+ options["zipmap"], scope, model, inputs[0].type, probability_tensor
+ )
def skl2onnx_convert_catboost(scope, operator, container):
@@ -85,45 +93,55 @@ def skl2onnx_convert_catboost(scope, operator, container):
"""
onx = convert_to_onnx_object(operator.raw_operator)
opsets = {d.domain: d.version for d in onx.opset_import}
- if '' in opsets and opsets[''] >= container.target_opset:
- raise RuntimeError(
- "CatBoost uses an opset more recent than the target one.")
+ if "" in opsets and opsets[""] >= container.target_opset:
+ raise RuntimeError("CatBoost uses an opset more recent than the target one.")
if len(onx.graph.initializer) > 0 or len(onx.graph.sparse_initializer) > 0:
raise NotImplementedError(
- "CatBoost returns a model initializers. This option is not implemented yet.")
- if (len(onx.graph.node) not in (1, 2) or not onx.graph.node[0].op_type.startswith("TreeEnsemble") or
- (len(onx.graph.node) == 2 and onx.graph.node[1].op_type != "ZipMap")):
+ "CatBoost returns a model initializers. This option is not implemented yet."
+ )
+ if (
+ len(onx.graph.node) not in (1, 2)
+ or not onx.graph.node[0].op_type.startswith("TreeEnsemble")
+ or (len(onx.graph.node) == 2 and onx.graph.node[1].op_type != "ZipMap")
+ ):
types = ", ".join(map(lambda n: n.op_type, onx.graph.node))
raise NotImplementedError(
f"CatBoost returns {len(onx.graph.node)} != 1 (types={types}). "
- f"This option is not implemented yet.")
+ f"This option is not implemented yet."
+ )
node = onx.graph.node[0]
atts = {}
for att in node.attribute:
atts[att.name] = get_attribute_value(att)
container.add_node(
- node.op_type, [operator.inputs[0].full_name],
+ node.op_type,
+ [operator.inputs[0].full_name],
[operator.outputs[0].full_name, operator.outputs[1].full_name],
- op_domain=node.domain, op_version=opsets.get(node.domain, None),
- **atts)
+ op_domain=node.domain,
+ op_version=opsets.get(node.domain, None),
+ **atts,
+ )
update_registered_converter(
CatBoostClassifier,
- 'CatBoostCatBoostClassifier',
+ "CatBoostCatBoostClassifier",
calculate_linear_classifier_output_shapes,
skl2onnx_convert_catboost,
parser=skl2onnx_parser_castboost_classifier,
- options={'nocl': [True, False], 'zipmap': [True, False, 'columns']})
+ options={"nocl": [True, False], "zipmap": [True, False, "columns"]},
+)
##################################
# Convert
# +++++++
model_onnx = convert_sklearn(
- pipe, 'pipeline_catboost',
- [('input', FloatTensorType([None, 2]))],
- target_opset={'': 12, 'ai.onnx.ml': 2})
+ pipe,
+ "pipeline_catboost",
+ [("input", FloatTensorType([None, 2]))],
+ target_opset={"": 12, "ai.onnx.ml": 2},
+)
# And save.
with open("pipeline_catboost.onnx", "wb") as f:
diff --git a/docs/tutorial/plot_gexternal_lightgbm.py b/docs/tutorial/plot_gexternal_lightgbm.py
index e11f38afe..1149e93d5 100644
--- a/docs/tutorial/plot_gexternal_lightgbm.py
+++ b/docs/tutorial/plot_gexternal_lightgbm.py
@@ -19,11 +19,14 @@
Train a LightGBM classifier
+++++++++++++++++++++++++++
"""
-from pyquickhelper.helpgen.graphviz_helper import plot_graphviz
import onnxruntime as rt
from skl2onnx import convert_sklearn, update_registered_converter
-from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes # noqa
-from onnxmltools.convert.lightgbm.operator_converters.LightGbm import convert_lightgbm # noqa
+from skl2onnx.common.shape_calculator import (
+ calculate_linear_classifier_output_shapes,
+) # noqa
+from onnxmltools.convert.lightgbm.operator_converters.LightGbm import (
+ convert_lightgbm,
+) # noqa
from skl2onnx.common.data_types import FloatTensorType
import numpy
from sklearn.datasets import load_iris
@@ -40,8 +43,9 @@
X = X[ind, :].copy()
y = y[ind].copy()
-pipe = Pipeline([('scaler', StandardScaler()),
- ('lgbm', LGBMClassifier(n_estimators=3))])
+pipe = Pipeline(
+ [("scaler", StandardScaler()), ("lgbm", LGBMClassifier(n_estimators=3))]
+)
pipe.fit(X, y)
######################################
@@ -58,18 +62,23 @@
# lightgbm/shape_calculators/Classifier.py>`_.
update_registered_converter(
- LGBMClassifier, 'LightGbmLGBMClassifier',
- calculate_linear_classifier_output_shapes, convert_lightgbm,
- options={'nocl': [True, False], 'zipmap': [True, False, 'columns']})
+ LGBMClassifier,
+ "LightGbmLGBMClassifier",
+ calculate_linear_classifier_output_shapes,
+ convert_lightgbm,
+ options={"nocl": [True, False], "zipmap": [True, False, "columns"]},
+)
##################################
# Convert again
# +++++++++++++
model_onnx = convert_sklearn(
- pipe, 'pipeline_lightgbm',
- [('input', FloatTensorType([None, 2]))],
- target_opset={'': 12, 'ai.onnx.ml': 2})
+ pipe,
+ "pipeline_lightgbm",
+ [("input", FloatTensorType([None, 2]))],
+ target_opset={"": 12, "ai.onnx.ml": 2},
+)
# And save.
with open("pipeline_lightgbm.onnx", "wb") as f:
diff --git a/docs/tutorial/plot_gexternal_lightgbm_reg.py b/docs/tutorial/plot_gexternal_lightgbm_reg.py
index 9ccbfa594..2f243d002 100644
--- a/docs/tutorial/plot_gexternal_lightgbm_reg.py
+++ b/docs/tutorial/plot_gexternal_lightgbm_reg.py
@@ -45,15 +45,20 @@
from lightgbm import LGBMRegressor
from onnxruntime import InferenceSession
from skl2onnx import to_onnx, update_registered_converter
-from skl2onnx.common.shape_calculator import calculate_linear_regressor_output_shapes # noqa
+from skl2onnx.common.shape_calculator import (
+ calculate_linear_regressor_output_shapes,
+) # noqa
from onnxmltools import __version__ as oml_version
-from onnxmltools.convert.lightgbm.operator_converters.LightGbm import convert_lightgbm # noqa
+from onnxmltools.convert.lightgbm.operator_converters.LightGbm import (
+ convert_lightgbm,
+) # noqa
N = 1000
X = numpy.random.randn(N, 20)
-y = (numpy.random.randn(N) +
- numpy.random.randn(N) * 100 * numpy.random.randint(0, 1, 1000))
+y = numpy.random.randn(N) + numpy.random.randn(N) * 100 * numpy.random.randint(
+ 0, 1, 1000
+)
reg = LGBMRegressor(n_estimators=1000)
reg.fit(X, y)
@@ -74,22 +79,25 @@
def skl2onnx_convert_lightgbm(scope, operator, container):
options = scope.get_options(operator.raw_operator)
- if 'split' in options:
- if pv.Version(oml_version) < pv.Version('1.9.2'):
+ if "split" in options:
+ if pv.Version(oml_version) < pv.Version("1.9.2"):
warnings.warn(
"Option split was released in version 1.9.2 but %s is "
- "installed. It will be ignored." % oml_version)
- operator.split = options['split']
+ "installed. It will be ignored." % oml_version
+ )
+ operator.split = options["split"]
else:
operator.split = None
convert_lightgbm(scope, operator, container)
update_registered_converter(
- LGBMRegressor, 'LightGbmLGBMRegressor',
+ LGBMRegressor,
+ "LightGbmLGBMRegressor",
calculate_linear_regressor_output_shapes,
skl2onnx_convert_lightgbm,
- options={'split': None})
+ options={"split": None},
+)
##################################
# Convert
@@ -99,11 +107,15 @@ def skl2onnx_convert_lightgbm(scope, operator, container):
# TreeEnsembleRegressor node, or more. *split* parameter is the number of
# trees per node TreeEnsembleRegressor.
-model_onnx = to_onnx(reg, X[:1].astype(numpy.float32),
- target_opset={'': 14, 'ai.onnx.ml': 2})
-model_onnx_split = to_onnx(reg, X[:1].astype(numpy.float32),
- target_opset={'': 14, 'ai.onnx.ml': 2},
- options={'split': 100})
+model_onnx = to_onnx(
+ reg, X[:1].astype(numpy.float32), target_opset={"": 14, "ai.onnx.ml": 2}
+)
+model_onnx_split = to_onnx(
+ reg,
+ X[:1].astype(numpy.float32),
+ target_opset={"": 14, "ai.onnx.ml": 2},
+ options={"split": 100},
+)
##########################
# Discrepancies
@@ -114,15 +126,14 @@ def skl2onnx_convert_lightgbm(scope, operator, container):
X32 = X.astype(numpy.float32)
expected = reg.predict(X32)
-got = sess.run(None, {'X': X32})[0].ravel()
-got_split = sess_split.run(None, {'X': X32})[0].ravel()
+got = sess.run(None, {"X": X32})[0].ravel()
+got_split = sess_split.run(None, {"X": X32})[0].ravel()
disp = numpy.abs(got - expected).sum()
disp_split = numpy.abs(got_split - expected).sum()
print("sum of discrepancies 1 node", disp)
-print("sum of discrepancies split node",
- disp_split, "ratio:", disp / disp_split)
+print("sum of discrepancies split node", disp_split, "ratio:", disp / disp_split)
######################################
# The sum of the discrepancies were reduced 4, 5 times.
@@ -140,12 +151,14 @@ def skl2onnx_convert_lightgbm(scope, operator, container):
#
# The processing time is slower but not much.
-print("processing time no split",
- timeit.timeit(
- lambda: sess.run(None, {'X': X32})[0], number=150))
-print("processing time split",
- timeit.timeit(
- lambda: sess_split.run(None, {'X': X32})[0], number=150))
+print(
+ "processing time no split",
+ timeit.timeit(lambda: sess.run(None, {"X": X32})[0], number=150),
+)
+print(
+ "processing time split",
+ timeit.timeit(lambda: sess_split.run(None, {"X": X32})[0], number=150),
+)
#############################################
# Split influence
@@ -156,23 +169,27 @@ def skl2onnx_convert_lightgbm(scope, operator, container):
res = []
for i in tqdm(list(range(20, 170, 20)) + [200, 300, 400, 500]):
- model_onnx_split = to_onnx(reg, X[:1].astype(numpy.float32),
- target_opset={'': 14, 'ai.onnx.ml': 2},
- options={'split': i})
+ model_onnx_split = to_onnx(
+ reg,
+ X[:1].astype(numpy.float32),
+ target_opset={"": 14, "ai.onnx.ml": 2},
+ options={"split": i},
+ )
sess_split = InferenceSession(model_onnx_split.SerializeToString())
- got_split = sess_split.run(None, {'X': X32})[0].ravel()
+ got_split = sess_split.run(None, {"X": X32})[0].ravel()
disc_split = numpy.abs(got_split - expected).max()
res.append(dict(split=i, disc=disc_split))
-df = DataFrame(res).set_index('split')
+df = DataFrame(res).set_index("split")
df["baseline"] = disc
print(df)
##########################################
# Graph.
_, ax = plt.subplots(1, 1)
-df.plot(title="Sum of discrepancies against split\n"
- "split = number of tree per node",
- ax=ax)
+df.plot(
+ title="Sum of discrepancies against split\n" "split = number of tree per node",
+ ax=ax,
+)
# plt.show()
diff --git a/docs/tutorial/plot_gexternal_xgboost.py b/docs/tutorial/plot_gexternal_xgboost.py
index 3cf467ff5..15ed13263 100644
--- a/docs/tutorial/plot_gexternal_xgboost.py
+++ b/docs/tutorial/plot_gexternal_xgboost.py
@@ -20,7 +20,6 @@
Train a XGBoost classifier
++++++++++++++++++++++++++
"""
-from pyquickhelper.helpgen.graphviz_helper import plot_graphviz
import numpy
import onnxruntime as rt
from sklearn.datasets import load_iris, load_diabetes, make_classification
@@ -32,9 +31,9 @@
from skl2onnx import convert_sklearn, to_onnx, update_registered_converter
from skl2onnx.common.shape_calculator import (
calculate_linear_classifier_output_shapes,
- calculate_linear_regressor_output_shapes)
-from onnxmltools.convert.xgboost.operator_converters.XGBoost import (
- convert_xgboost)
+ calculate_linear_regressor_output_shapes,
+)
+from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost
from onnxmltools.convert import convert_xgboost as convert_xgboost_booster
@@ -47,16 +46,18 @@
X = X[ind, :].copy()
y = y[ind].copy()
-pipe = Pipeline([('scaler', StandardScaler()),
- ('xgb', XGBClassifier(n_estimators=3))])
+pipe = Pipeline([("scaler", StandardScaler()), ("xgb", XGBClassifier(n_estimators=3))])
pipe.fit(X, y)
# The conversion fails but it is expected.
try:
- convert_sklearn(pipe, 'pipeline_xgboost',
- [('input', FloatTensorType([None, 2]))],
- target_opset={'': 12, 'ai.onnx.ml': 2})
+ convert_sklearn(
+ pipe,
+ "pipeline_xgboost",
+ [("input", FloatTensorType([None, 2]))],
+ target_opset={"": 12, "ai.onnx.ml": 2},
+ )
except Exception as e:
print(e)
@@ -82,18 +83,23 @@
# xgboost/shape_calculators/Classifier.py>`_.
update_registered_converter(
- XGBClassifier, 'XGBoostXGBClassifier',
- calculate_linear_classifier_output_shapes, convert_xgboost,
- options={'nocl': [True, False], 'zipmap': [True, False, 'columns']})
+ XGBClassifier,
+ "XGBoostXGBClassifier",
+ calculate_linear_classifier_output_shapes,
+ convert_xgboost,
+ options={"nocl": [True, False], "zipmap": [True, False, "columns"]},
+)
##################################
# Convert again
# +++++++++++++
model_onnx = convert_sklearn(
- pipe, 'pipeline_xgboost',
- [('input', FloatTensorType([None, 2]))],
- target_opset={'': 12, 'ai.onnx.ml': 2})
+ pipe,
+ "pipeline_xgboost",
+ [("input", FloatTensorType([None, 2]))],
+ target_opset={"": 12, "ai.onnx.ml": 2},
+)
# And save.
with open("pipeline_xgboost.onnx", "wb") as f:
@@ -121,8 +127,11 @@
# ++++++++++++++++++++++++++++++
update_registered_converter(
- XGBRegressor, 'XGBoostXGBRegressor',
- calculate_linear_regressor_output_shapes, convert_xgboost)
+ XGBRegressor,
+ "XGBoostXGBRegressor",
+ calculate_linear_regressor_output_shapes,
+ convert_xgboost,
+)
data = load_diabetes()
@@ -130,8 +139,7 @@
y = data.target
X_train, X_test, y_train, _ = train_test_split(x, y, test_size=0.5)
-pipe = Pipeline([('scaler', StandardScaler()),
- ('xgb', XGBRegressor(n_estimators=3))])
+pipe = Pipeline([("scaler", StandardScaler()), ("xgb", XGBRegressor(n_estimators=3))])
pipe.fit(X_train, y_train)
print("predict", pipe.predict(X_test[:5]))
@@ -139,8 +147,9 @@
#############################
# ONNX
-onx = to_onnx(pipe, X_train.astype(numpy.float32),
- target_opset={'': 12, 'ai.onnx.ml': 2})
+onx = to_onnx(
+ pipe, X_train.astype(numpy.float32), target_opset={"": 12, "ai.onnx.ml": 2}
+)
sess = rt.InferenceSession(onx.SerializeToString())
pred_onx = sess.run(None, {"X": X_test[:5].astype(numpy.float32)})
@@ -158,18 +167,17 @@
# a different conversion function because it does not
# follow :epkg:`scikit-learn` API.
-x, y = make_classification(n_classes=2, n_features=5,
- n_samples=100,
- random_state=42, n_informative=3)
-X_train, X_test, y_train, _ = train_test_split(x, y, test_size=0.5,
- random_state=42)
+x, y = make_classification(
+ n_classes=2, n_features=5, n_samples=100, random_state=42, n_informative=3
+)
+X_train, X_test, y_train, _ = train_test_split(x, y, test_size=0.5, random_state=42)
dtrain = DMatrix(X_train, label=y_train)
-param = {'objective': 'multi:softmax', 'num_class': 3}
+param = {"objective": "multi:softmax", "num_class": 3}
bst = train_xgb(param, dtrain, 10)
-initial_type = [('float_input', FloatTensorType([None, X_train.shape[1]]))]
+initial_type = [("float_input", FloatTensorType([None, X_train.shape[1]]))]
try:
onx = convert_xgboost_booster(bst, "name", initial_types=initial_type)
@@ -182,6 +190,5 @@
sess = rt.InferenceSession(onx.SerializeToString())
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
- pred_onx = sess.run(
- [label_name], {input_name: X_test.astype(numpy.float32)})[0]
+ pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0]
print(pred_onx)
diff --git a/docs/tutorial/plot_icustom_converter.py b/docs/tutorial/plot_icustom_converter.py
index 9db907adf..7a82d94b8 100644
--- a/docs/tutorial/plot_icustom_converter.py
+++ b/docs/tutorial/plot_icustom_converter.py
@@ -31,7 +31,6 @@
If *X* is a matrix of features, :math:`V=\\frac{1}{n}X'X`
is the covariance matrix. We compute :math:`X V^{1/2}`.
"""
-from pyquickhelper.helpgen.graphviz_helper import plot_graphviz
import pickle
from io import BytesIO
import numpy
@@ -58,15 +57,14 @@ class DecorrelateTransformer(TransformerMixin, BaseEstimator):
* `self.coef_`: square root of the coveriance matrix
"""
- def __init__(self, alpha=0.):
+ def __init__(self, alpha=0.0):
BaseEstimator.__init__(self)
TransformerMixin.__init__(self)
self.alpha = alpha
def fit(self, X, y=None, sample_weights=None):
if sample_weights is not None:
- raise NotImplementedError(
- "sample_weights != None is not implemented.")
+ raise NotImplementedError("sample_weights != None is not implemented.")
self.mean_ = numpy.mean(X, axis=0, keepdims=True)
X = X - self.mean_
V = X.T @ X / X.shape[0]
@@ -186,7 +184,9 @@ def decorrelate_transformer_converter(scope, operator, container):
Y = OnnxMatMul(
OnnxSub(X, op.mean_.astype(dtype), op_version=opv),
op.coef_.astype(dtype),
- op_version=opv, output_names=out[:1])
+ op_version=opv,
+ output_names=out[:1],
+ )
Y.add_to(scope, container)
@@ -195,18 +195,19 @@ def decorrelate_transformer_converter(scope, operator, container):
update_registered_converter(
- DecorrelateTransformer, "SklearnDecorrelateTransformer",
+ DecorrelateTransformer,
+ "SklearnDecorrelateTransformer",
decorrelate_transformer_shape_calculator,
- decorrelate_transformer_converter)
+ decorrelate_transformer_converter,
+)
onx = to_onnx(dec, X.astype(numpy.float32))
-sess = InferenceSession(onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
exp = dec.transform(X.astype(numpy.float32))
-got = sess.run(None, {'X': X.astype(numpy.float32)})[0]
+got = sess.run(None, {"X": X.astype(numpy.float32)})[0]
def diff(p1, p2):
@@ -223,11 +224,10 @@ def diff(p1, p2):
onx = to_onnx(dec, X.astype(numpy.float64))
-sess = InferenceSession(onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
exp = dec.transform(X.astype(numpy.float64))
-got = sess.run(None, {'X': X.astype(numpy.float64)})[0]
+got = sess.run(None, {"X": X.astype(numpy.float64)})[0]
print(diff(exp, got))
#############################################
diff --git a/docs/tutorial/plot_jcustom_syntax.py b/docs/tutorial/plot_jcustom_syntax.py
index f509507c5..acdd6795e 100644
--- a/docs/tutorial/plot_jcustom_syntax.py
+++ b/docs/tutorial/plot_jcustom_syntax.py
@@ -47,15 +47,14 @@ class DecorrelateTransformer(TransformerMixin, BaseEstimator):
* `self.coef_`: square root of the coveriance matrix
"""
- def __init__(self, alpha=0.):
+ def __init__(self, alpha=0.0):
BaseEstimator.__init__(self)
TransformerMixin.__init__(self)
self.alpha = alpha
def fit(self, X, y=None, sample_weights=None):
if sample_weights is not None:
- raise NotImplementedError(
- "sample_weights != None is not implemented.")
+ raise NotImplementedError("sample_weights != None is not implemented.")
self.mean_ = numpy.mean(X, axis=0, keepdims=True)
X = X - self.mean_
V = X.T @ X / X.shape[0]
@@ -87,6 +86,7 @@ def transform(self, X):
#
# The shape calculator does not change.
+
def decorrelate_transformer_shape_calculator(operator):
op = operator.raw_operator
input_type = operator.inputs[0].type.__class__
@@ -115,37 +115,40 @@ def decorrelate_transformer_converter(scope, operator, container):
# type as the input.
proto_dtype = guess_proto_type(X.type)
- mean_name = scope.get_unique_variable_name('mean')
- container.add_initializer(mean_name, proto_dtype,
- op.mean_.shape, list(op.mean_.ravel()))
+ mean_name = scope.get_unique_variable_name("mean")
+ container.add_initializer(
+ mean_name, proto_dtype, op.mean_.shape, list(op.mean_.ravel())
+ )
- coef_name = scope.get_unique_variable_name('coef')
- container.add_initializer(coef_name, proto_dtype,
- op.coef_.shape, list(op.coef_.ravel()))
+ coef_name = scope.get_unique_variable_name("coef")
+ container.add_initializer(
+ coef_name, proto_dtype, op.coef_.shape, list(op.coef_.ravel())
+ )
- op_name = scope.get_unique_operator_name('sub')
- sub_name = scope.get_unique_variable_name('sub')
+ op_name = scope.get_unique_operator_name("sub")
+ sub_name = scope.get_unique_variable_name("sub")
# This function is defined in package onnxconverter_common.
# Most common operators can be added to the graph with
# these functions. It handles the case when specifications
# changed accross opsets (a parameter becomes an input
# for example).
- apply_sub(scope, [X.full_name, mean_name], sub_name, container,
- operator_name=op_name)
+ apply_sub(
+ scope, [X.full_name, mean_name], sub_name, container, operator_name=op_name
+ )
- op_name = scope.get_unique_operator_name('matmul')
- container.add_node(
- 'MatMul', [sub_name, coef_name],
- out[0].full_name, name=op_name)
+ op_name = scope.get_unique_operator_name("matmul")
+ container.add_node("MatMul", [sub_name, coef_name], out[0].full_name, name=op_name)
##########################################
# We need to let *skl2onnx* know about the new converter.
update_registered_converter(
- DecorrelateTransformer, "SklearnDecorrelateTransformer",
+ DecorrelateTransformer,
+ "SklearnDecorrelateTransformer",
decorrelate_transformer_shape_calculator,
- decorrelate_transformer_converter)
+ decorrelate_transformer_converter,
+)
onx = to_onnx(dec, X.astype(numpy.float32))
@@ -153,7 +156,7 @@ def decorrelate_transformer_converter(scope, operator, container):
sess = InferenceSession(onx.SerializeToString())
exp = dec.transform(X.astype(numpy.float32))
-got = sess.run(None, {'X': X.astype(numpy.float32)})[0]
+got = sess.run(None, {"X": X.astype(numpy.float32)})[0]
def diff(p1, p2):
@@ -173,7 +176,7 @@ def diff(p1, p2):
sess = InferenceSession(onx.SerializeToString())
exp = dec.transform(X.astype(numpy.float64))
-got = sess.run(None, {'X': X.astype(numpy.float64)})[0]
+got = sess.run(None, {"X": X.astype(numpy.float64)})[0]
print(diff(exp, got))
#############################################
diff --git a/docs/tutorial/plot_kcustom_converter_wrapper.py b/docs/tutorial/plot_kcustom_converter_wrapper.py
index 6a4fd37d8..dc9388bed 100644
--- a/docs/tutorial/plot_kcustom_converter_wrapper.py
+++ b/docs/tutorial/plot_kcustom_converter_wrapper.py
@@ -25,7 +25,6 @@
If *X* is a matrix of features, :math:`V=\\frac{1}{n}X'X`
is the covariance matrix. We compute :math:`X V^{1/2}`.
"""
-from pyquickhelper.helpgen.graphviz_helper import plot_graphviz
import pickle
from io import BytesIO
import numpy
@@ -52,7 +51,7 @@ class DecorrelateTransformer(TransformerMixin, BaseEstimator):
* `self.coef_`: square root of the coveriance matrix
"""
- def __init__(self, alpha=0.):
+ def __init__(self, alpha=0.0):
BaseEstimator.__init__(self)
TransformerMixin.__init__(self)
self.alpha = alpha
@@ -75,7 +74,7 @@ def test_decorrelate_transformer():
pred = dec.transform(X)
cov = pred.T @ pred
for i in range(cov.shape[0]):
- cov[i, i] = 1.
+ cov[i, i] = 1.0
assert_almost_equal(numpy.identity(4), cov)
st = BytesIO()
@@ -152,9 +151,11 @@ def decorrelate_transformer_converter(scope, operator, container):
update_registered_converter(
- DecorrelateTransformer, "SklearnDecorrelateTransformer",
+ DecorrelateTransformer,
+ "SklearnDecorrelateTransformer",
decorrelate_transformer_shape_calculator,
- decorrelate_transformer_converter)
+ decorrelate_transformer_converter,
+)
onx = to_onnx(dec, X.astype(numpy.float32))
@@ -162,7 +163,7 @@ def decorrelate_transformer_converter(scope, operator, container):
sess = InferenceSession(onx.SerializeToString())
exp = dec.transform(X.astype(numpy.float32))
-got = sess.run(None, {'X': X.astype(numpy.float32)})[0]
+got = sess.run(None, {"X": X.astype(numpy.float32)})[0]
def diff(p1, p2):
@@ -182,7 +183,7 @@ def diff(p1, p2):
sess = InferenceSession(onx.SerializeToString())
exp = dec.transform(X.astype(numpy.float64))
-got = sess.run(None, {'X': X.astype(numpy.float64)})[0]
+got = sess.run(None, {"X": X.astype(numpy.float64)})[0]
print(diff(exp, got))
#############################################
diff --git a/docs/tutorial/plot_lcustom_options.py b/docs/tutorial/plot_lcustom_options.py
index e9e0b894d..637f19793 100644
--- a/docs/tutorial/plot_lcustom_options.py
+++ b/docs/tutorial/plot_lcustom_options.py
@@ -22,7 +22,6 @@
++++++++++++
"""
-from pyquickhelper.helpgen.graphviz_helper import plot_graphviz
from pandas import DataFrame
from skl2onnx.tutorial import measure_time
import numpy
@@ -31,8 +30,7 @@
from sklearn.datasets import load_iris
from skl2onnx import update_registered_converter
from skl2onnx.common.data_types import guess_numpy_type
-from skl2onnx.algebra.onnx_ops import (
- OnnxSub, OnnxMatMul, OnnxGemm)
+from skl2onnx.algebra.onnx_ops import OnnxSub, OnnxMatMul, OnnxGemm
from skl2onnx import to_onnx
@@ -49,15 +47,14 @@ class DecorrelateTransformer(TransformerMixin, BaseEstimator):
* `self.coef_`: square root of the coveriance matrix
"""
- def __init__(self, alpha=0.):
+ def __init__(self, alpha=0.0):
BaseEstimator.__init__(self)
TransformerMixin.__init__(self)
self.alpha = alpha
def fit(self, X, y=None, sample_weights=None):
if sample_weights is not None:
- raise NotImplementedError(
- "sample_weights != None is not implemented.")
+ raise NotImplementedError("sample_weights != None is not implemented.")
self.mean_ = numpy.mean(X, axis=0, keepdims=True)
X = X - self.mean_
V = X.T @ X / X.shape[0]
@@ -107,19 +104,26 @@ def decorrelate_transformer_converter(scope, operator, container):
dtype = guess_numpy_type(X.type)
options = container.get_options(op, dict(use_gemm=False))
- use_gemm = options['use_gemm']
- print('conversion: use_gemm=', use_gemm)
+ use_gemm = options["use_gemm"]
+ print("conversion: use_gemm=", use_gemm)
if use_gemm:
- Y = OnnxGemm(X, op.coef_.astype(dtype),
- (- op.mean_ @ op.coef_).astype(dtype),
- op_version=opv, alpha=1., beta=1.,
- output_names=out[:1])
+ Y = OnnxGemm(
+ X,
+ op.coef_.astype(dtype),
+ (-op.mean_ @ op.coef_).astype(dtype),
+ op_version=opv,
+ alpha=1.0,
+ beta=1.0,
+ output_names=out[:1],
+ )
else:
Y = OnnxMatMul(
OnnxSub(X, op.mean_.astype(dtype), op_version=opv),
op.coef_.astype(dtype),
- op_version=opv, output_names=out[:1])
+ op_version=opv,
+ output_names=out[:1],
+ )
Y.add_to(scope, container)
@@ -129,10 +133,12 @@ def decorrelate_transformer_converter(scope, operator, container):
update_registered_converter(
- DecorrelateTransformer, "SklearnDecorrelateTransformer",
+ DecorrelateTransformer,
+ "SklearnDecorrelateTransformer",
decorrelate_transformer_shape_calculator,
decorrelate_transformer_converter,
- options={'use_gemm': [True, False]})
+ options={"use_gemm": [True, False]},
+)
onx = to_onnx(dec, X.astype(numpy.float32))
@@ -140,7 +146,7 @@ def decorrelate_transformer_converter(scope, operator, container):
sess = InferenceSession(onx.SerializeToString())
exp = dec.transform(X.astype(numpy.float32))
-got = sess.run(None, {'X': X.astype(numpy.float32)})[0]
+got = sess.run(None, {"X": X.astype(numpy.float32)})[0]
def diff(p1, p2):
@@ -155,13 +161,12 @@ def diff(p1, p2):
############################################
# We try the non default option, `use_gemm: True`.
-onx2 = to_onnx(dec, X.astype(numpy.float32),
- options={'use_gemm': True})
+onx2 = to_onnx(dec, X.astype(numpy.float32), options={"use_gemm": True})
sess2 = InferenceSession(onx2.SerializeToString())
exp = dec.transform(X.astype(numpy.float32))
-got2 = sess2.run(None, {'X': X.astype(numpy.float32)})[0]
+got2 = sess2.run(None, {"X": X.astype(numpy.float32)})[0]
print(diff(exp, got2))
@@ -176,18 +181,18 @@ def diff(p1, p2):
X32 = X.astype(numpy.float32)
obs = []
-context = {'sess': sess, 'X32': X32}
+context = {"sess": sess, "X32": X32}
mt = measure_time(
- "sess.run(None, {'X': X32})", context, div_by_number=True,
- number=100, repeat=1000)
-mt['use_gemm'] = False
+ "sess.run(None, {'X': X32})", context, div_by_number=True, number=100, repeat=1000
+)
+mt["use_gemm"] = False
obs.append(mt)
-context = {'sess2': sess2, 'X32': X32}
+context = {"sess2": sess2, "X32": X32}
mt2 = measure_time(
- "sess2.run(None, {'X': X32})", context, div_by_number=True,
- number=10, repeat=100)
-mt2['use_gemm'] = True
+ "sess2.run(None, {'X': X32})", context, div_by_number=True, number=10, repeat=100
+)
+mt2["use_gemm"] = True
obs.append(mt2)
DataFrame(obs).T
diff --git a/docs/tutorial/plot_mcustom_parser.py b/docs/tutorial/plot_mcustom_parser.py
index 49bcd97dc..84dcd29f2 100644
--- a/docs/tutorial/plot_mcustom_parser.py
+++ b/docs/tutorial/plot_mcustom_parser.py
@@ -24,15 +24,13 @@
A new transformer
+++++++++++++++++
"""
-from pyquickhelper.helpgen.graphviz_helper import plot_graphviz
import numpy
from onnxruntime import InferenceSession
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.datasets import load_iris
from skl2onnx import update_registered_converter
from skl2onnx.common.data_types import guess_numpy_type
-from skl2onnx.algebra.onnx_ops import (
- OnnxSub, OnnxMatMul, OnnxGemm)
+from skl2onnx.algebra.onnx_ops import OnnxSub, OnnxMatMul, OnnxGemm
from skl2onnx import to_onnx, get_model_alias
@@ -49,15 +47,14 @@ class DecorrelateTransformer(TransformerMixin, BaseEstimator):
* `self.coef_`: square root of the coveriance matrix
"""
- def __init__(self, alpha=0.):
+ def __init__(self, alpha=0.0):
BaseEstimator.__init__(self)
TransformerMixin.__init__(self)
self.alpha = alpha
def fit(self, X, y=None, sample_weights=None):
if sample_weights is not None:
- raise NotImplementedError(
- "sample_weights != None is not implemented.")
+ raise NotImplementedError("sample_weights != None is not implemented.")
self.mean_ = numpy.mean(X, axis=0, keepdims=True)
X = X - self.mean_
V = X.T @ X / X.shape[0]
@@ -110,19 +107,25 @@ def decorrelate_transformer_converter(scope, operator, container):
Y1 = OnnxMatMul(
OnnxSub(X, op.mean_.astype(dtype), op_version=opv),
op.coef_.astype(dtype),
- op_version=opv, output_names=out[:1])
+ op_version=opv,
+ output_names=out[:1],
+ )
- Y2 = OnnxGemm(X, op.coef_.astype(dtype),
- (- op.mean_ @ op.coef_).astype(dtype),
- op_version=opv, alpha=1., beta=1.,
- output_names=out[1:2])
+ Y2 = OnnxGemm(
+ X,
+ op.coef_.astype(dtype),
+ (-op.mean_ @ op.coef_).astype(dtype),
+ op_version=opv,
+ alpha=1.0,
+ beta=1.0,
+ output_names=out[1:2],
+ )
Y1.add_to(scope, container)
Y2.add_to(scope, container)
-def decorrelate_transformer_parser(
- scope, model, inputs, custom_parsers=None):
+def decorrelate_transformer_parser(scope, model, inputs, custom_parsers=None):
alias = get_model_alias(type(model))
this_operator = scope.declare_local_operator(alias, model)
@@ -131,35 +134,37 @@ def decorrelate_transformer_parser(
# outputs
cls_type = inputs[0].type.__class__
- val_y1 = scope.declare_local_variable('nogemm', cls_type())
- val_y2 = scope.declare_local_variable('gemm', cls_type())
+ val_y1 = scope.declare_local_variable("nogemm", cls_type())
+ val_y2 = scope.declare_local_variable("gemm", cls_type())
this_operator.outputs.append(val_y1)
this_operator.outputs.append(val_y2)
# ends
return this_operator.outputs
+
###################################
# The registration needs to declare the parser as well.
update_registered_converter(
- DecorrelateTransformer, "SklearnDecorrelateTransformer",
+ DecorrelateTransformer,
+ "SklearnDecorrelateTransformer",
decorrelate_transformer_shape_calculator,
decorrelate_transformer_converter,
- parser=decorrelate_transformer_parser)
+ parser=decorrelate_transformer_parser,
+)
#############################################
# And conversion.
-onx = to_onnx(dec, X.astype(numpy.float32),
- target_opset=14)
+onx = to_onnx(dec, X.astype(numpy.float32), target_opset=14)
sess = InferenceSession(onx.SerializeToString())
exp = dec.transform(X.astype(numpy.float32))
-results = sess.run(None, {'X': X.astype(numpy.float32)})
+results = sess.run(None, {"X": X.astype(numpy.float32)})
y1 = results[0]
y2 = results[1]
diff --git a/docs/tutorial/plot_ngrams.py b/docs/tutorial/plot_ngrams.py
index b64fb9f4a..65d8540c9 100644
--- a/docs/tutorial/plot_ngrams.py
+++ b/docs/tutorial/plot_ngrams.py
@@ -24,16 +24,17 @@
from skl2onnx.sklapi import TraceableTfidfVectorizer
import skl2onnx.sklapi.register # noqa
-corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "Is this the first document?",
- "",
-]).reshape((4, ))
+corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "Is this the first document?",
+ "",
+ ]
+).reshape((4,))
pattern = r"\b[a-z ]{1,10}\b"
-mod1 = TfidfVectorizer(ngram_range=(1, 2),
- token_pattern=pattern)
+mod1 = TfidfVectorizer(ngram_range=(1, 2), token_pattern=pattern)
mod1.fit(corpus)
@@ -63,8 +64,7 @@
# instead of concatenating every piece into a string.
-mod2 = TraceableTfidfVectorizer(
- ngram_range=(1, 2), token_pattern=pattern)
+mod2 = TraceableTfidfVectorizer(ngram_range=(1, 2), token_pattern=pattern)
mod2.fit(corpus)
pprint.pprint(mod2.vocabulary_)
@@ -72,8 +72,7 @@
#######################################
# Let's check it produces the same results.
-assert_almost_equal(mod1.transform(corpus).todense(),
- mod2.transform(corpus).todense())
+assert_almost_equal(mod1.transform(corpus).todense(), mod2.transform(corpus).todense())
####################################
# Conversion. Line `import skl2onnx.sklapi.register`
@@ -83,7 +82,7 @@
onx = to_onnx(mod2, corpus)
sess = InferenceSession(onx.SerializeToString())
-got = sess.run(None, {'X': corpus})
+got = sess.run(None, {"X": corpus})
###################################
# Let's check if there are discrepancies...
diff --git a/docs/tutorial/plot_transformer_discrepancy.py b/docs/tutorial/plot_transformer_discrepancy.py
index 5adb28646..f6c093957 100644
--- a/docs/tutorial/plot_transformer_discrepancy.py
+++ b/docs/tutorial/plot_transformer_discrepancy.py
@@ -34,8 +34,8 @@ def print_sparse_matrix(m):
if mi == ma:
ma += 1
mat = numpy.empty(m.shape, dtype=numpy.str_)
- mat[:, :] = '.'
- if hasattr(m, 'todense'):
+ mat[:, :] = "."
+ if hasattr(m, "todense"):
dense = m.todense()
else:
dense = m
@@ -43,18 +43,19 @@ def print_sparse_matrix(m):
for j in range(m.shape[1]):
if dense[i, j] > 0:
c = int((dense[i, j] - mi) / (ma - mi) * 25)
- mat[i, j] = chr(ord('A') + c)
- return '\n'.join(''.join(line) for line in mat)
+ mat[i, j] = chr(ord("A") + c)
+ return "\n".join("".join(line) for line in mat)
def diff(a, b):
if a.shape != b.shape:
raise ValueError(
- f"Cannot compare matrices with different shapes "
- f"{a.shape} != {b.shape}.")
+ f"Cannot compare matrices with different shapes " f"{a.shape} != {b.shape}."
+ )
d = numpy.abs(a - b).sum() / a.size
return d
+
##########################################
# Artificial datasets
# +++++++++++++++++++
@@ -62,16 +63,18 @@ def diff(a, b):
# Iris + a text column.
-strings = numpy.array([
- "This a sentence.",
- "This a sentence with more characters $^*&'(-...",
- """var = ClassName(var2, user=mail@anywhere.com, pwd"""
- """=")_~-('&]@^\\`|[{#")""",
- "c79857654",
- "https://complex-url.com/;76543u3456?g=hhh&h=23",
- "01-03-05T11:12:13",
- "https://complex-url.com/;dd76543u3456?g=ddhhh&h=23",
-]).reshape((-1, 1))
+strings = numpy.array(
+ [
+ "This a sentence.",
+ "This a sentence with more characters $^*&'(-...",
+ """var = ClassName(var2, user=mail@anywhere.com, pwd"""
+ """=")_~-('&]@^\\`|[{#")""",
+ "c79857654",
+ "https://complex-url.com/;76543u3456?g=hhh&h=23",
+ "01-03-05T11:12:13",
+ "https://complex-url.com/;dd76543u3456?g=ddhhh&h=23",
+ ]
+).reshape((-1, 1))
pprint.pprint(strings)
@@ -79,11 +82,7 @@ def diff(a, b):
# Fit a TfIdfVectorizer
# +++++++++++++++++++++
-tfidf = Pipeline([
- ('pre', ColumnTransformer([
- ('tfidf', TfidfVectorizer(), 0)
- ]))
-])
+tfidf = Pipeline([("pre", ColumnTransformer([("tfidf", TfidfVectorizer(), 0)]))])
#############################
# We leave a couple of strings out of the training set.
@@ -107,6 +106,6 @@ def diff(a, b):
# +++++++++++++++++++
sess = InferenceSession(onx.SerializeToString())
-got = sess.run(None, {'X': strings})[0]
+got = sess.run(None, {"X": strings})[0]
print(f"differences={diff(tr, got):g}")
print(print_sparse_matrix(got))
diff --git a/docs/tutorial/plot_usparse_xgboost.py b/docs/tutorial/plot_usparse_xgboost.py
index 92521f95d..b8ae97aa7 100644
--- a/docs/tutorial/plot_usparse_xgboost.py
+++ b/docs/tutorial/plot_usparse_xgboost.py
@@ -34,6 +34,7 @@
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.ensemble import RandomForestClassifier
+
try:
from sklearn.ensemble import HistGradientBoostingClassifier
except ImportError:
@@ -43,22 +44,25 @@
from skl2onnx.common.data_types import FloatTensorType, StringTensorType
from skl2onnx import to_onnx, update_registered_converter
from skl2onnx.sklapi import CastTransformer, ReplaceTransformer
-from skl2onnx.common.shape_calculator import (
- calculate_linear_classifier_output_shapes)
-from onnxmltools.convert.xgboost.operator_converters.XGBoost import (
- convert_xgboost)
-from onnxmltools.convert.lightgbm.operator_converters.LightGbm import (
- convert_lightgbm)
+from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes
+from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost
+from onnxmltools.convert.lightgbm.operator_converters.LightGbm import convert_lightgbm
update_registered_converter(
- XGBClassifier, 'XGBoostXGBClassifier',
- calculate_linear_classifier_output_shapes, convert_xgboost,
- options={'nocl': [True, False], 'zipmap': [True, False, 'columns']})
+ XGBClassifier,
+ "XGBoostXGBClassifier",
+ calculate_linear_classifier_output_shapes,
+ convert_xgboost,
+ options={"nocl": [True, False], "zipmap": [True, False, "columns"]},
+)
update_registered_converter(
- LGBMClassifier, 'LightGbmLGBMClassifier',
- calculate_linear_classifier_output_shapes, convert_lightgbm,
- options={'nocl': [True, False], 'zipmap': [True, False]})
+ LGBMClassifier,
+ "LightGbmLGBMClassifier",
+ calculate_linear_classifier_output_shapes,
+ convert_lightgbm,
+ options={"nocl": [True, False], "zipmap": [True, False]},
+)
##########################################
@@ -67,7 +71,7 @@
#
# Iris + a text column.
-cst = ['class zero', 'class one', 'class two']
+cst = ["class zero", "class one", "class two"]
data = load_iris()
X = data.data[:, :2]
@@ -93,19 +97,25 @@
# sparse matrices to be converted into dense matrices.
-def make_pipelines(df_train, y_train, models=None,
- sparse_threshold=1., replace_nan=False,
- insert_replace=False):
-
+def make_pipelines(
+ df_train,
+ y_train,
+ models=None,
+ sparse_threshold=1.0,
+ replace_nan=False,
+ insert_replace=False,
+):
if models is None:
models = [
- RandomForestClassifier, HistGradientBoostingClassifier,
- XGBClassifier, LGBMClassifier]
+ RandomForestClassifier,
+ HistGradientBoostingClassifier,
+ XGBClassifier,
+ LGBMClassifier,
+ ]
models = [_ for _ in models if _ is not None]
pipes = []
for model in tqdm(models):
-
if model == HistGradientBoostingClassifier:
kwargs = dict(max_iter=5)
elif model == XGBClassifier:
@@ -114,79 +124,107 @@ def make_pipelines(df_train, y_train, models=None,
kwargs = dict(n_estimators=5)
if insert_replace:
- pipe = Pipeline([
- ('union', ColumnTransformer([
- ('scale1', StandardScaler(), [0, 1]),
- ('subject',
- Pipeline([
- ('count', CountVectorizer()),
- ('tfidf', TfidfTransformer()),
- ('repl', ReplaceTransformer()),
- ]), "text"),
- ], sparse_threshold=sparse_threshold)),
- ('cast', CastTransformer()),
- ('cls', model(max_depth=3, **kwargs)),
- ])
+ pipe = Pipeline(
+ [
+ (
+ "union",
+ ColumnTransformer(
+ [
+ ("scale1", StandardScaler(), [0, 1]),
+ (
+ "subject",
+ Pipeline(
+ [
+ ("count", CountVectorizer()),
+ ("tfidf", TfidfTransformer()),
+ ("repl", ReplaceTransformer()),
+ ]
+ ),
+ "text",
+ ),
+ ],
+ sparse_threshold=sparse_threshold,
+ ),
+ ),
+ ("cast", CastTransformer()),
+ ("cls", model(max_depth=3, **kwargs)),
+ ]
+ )
else:
- pipe = Pipeline([
- ('union', ColumnTransformer([
- ('scale1', StandardScaler(), [0, 1]),
- ('subject',
- Pipeline([
- ('count', CountVectorizer()),
- ('tfidf', TfidfTransformer())
- ]), "text"),
- ], sparse_threshold=sparse_threshold)),
- ('cast', CastTransformer()),
- ('cls', model(max_depth=3, **kwargs)),
- ])
+ pipe = Pipeline(
+ [
+ (
+ "union",
+ ColumnTransformer(
+ [
+ ("scale1", StandardScaler(), [0, 1]),
+ (
+ "subject",
+ Pipeline(
+ [
+ ("count", CountVectorizer()),
+ ("tfidf", TfidfTransformer()),
+ ]
+ ),
+ "text",
+ ),
+ ],
+ sparse_threshold=sparse_threshold,
+ ),
+ ),
+ ("cast", CastTransformer()),
+ ("cls", model(max_depth=3, **kwargs)),
+ ]
+ )
try:
pipe.fit(df_train, y_train)
except TypeError as e:
- obs = dict(model=model.__name__, pipe=pipe, error=e,
- model_onnx=None)
+ obs = dict(model=model.__name__, pipe=pipe, error=e, model_onnx=None)
pipes.append(obs)
continue
- options = {model: {'zipmap': False}}
+ options = {model: {"zipmap": False}}
if replace_nan:
- options[TfidfTransformer] = {'nan': True}
+ options[TfidfTransformer] = {"nan": True}
# convert
with warnings.catch_warnings(record=False):
warnings.simplefilter("ignore", (FutureWarning, UserWarning))
model_onnx = to_onnx(
pipe,
- initial_types=[('input', FloatTensorType([None, 2])),
- ('text', StringTensorType([None, 1]))],
- target_opset={'': 12, 'ai.onnx.ml': 2},
- options=options)
-
- with open('model.onnx', 'wb') as f:
+ initial_types=[
+ ("input", FloatTensorType([None, 2])),
+ ("text", StringTensorType([None, 1])),
+ ],
+ target_opset={"": 12, "ai.onnx.ml": 2},
+ options=options,
+ )
+
+ with open("model.onnx", "wb") as f:
f.write(model_onnx.SerializeToString())
sess = rt.InferenceSession(model_onnx.SerializeToString())
- inputs = {"input": df[["c0", "c1"]].values.astype(numpy.float32),
- "text": df[["text"]].values}
+ inputs = {
+ "input": df[["c0", "c1"]].values.astype(numpy.float32),
+ "text": df[["text"]].values,
+ }
pred_onx = sess.run(None, inputs)
- diff = numpy.abs(
- pred_onx[1].ravel() -
- pipe.predict_proba(df).ravel()).sum()
+ diff = numpy.abs(pred_onx[1].ravel() - pipe.predict_proba(df).ravel()).sum()
- obs = dict(model=model.__name__,
- discrepencies=diff,
- model_onnx=model_onnx, pipe=pipe)
+ obs = dict(
+ model=model.__name__, discrepencies=diff, model_onnx=model_onnx, pipe=pipe
+ )
pipes.append(obs)
return pipes
data_sparse = make_pipelines(df, y)
-stat = pandas.DataFrame(data_sparse).drop(['model_onnx', 'pipe'], axis=1)
-if 'error' in stat.columns:
- print(stat.drop('error', axis=1))
+stat = pandas.DataFrame(data_sparse).drop(["model_onnx", "pipe"], axis=1)
+if "error" in stat.columns:
+ print(stat.drop("error", axis=1))
stat
############################
@@ -198,10 +236,10 @@ def make_pipelines(df_train, y_train, models=None,
# Let's replace sparse data with dense by using `sparse_threshold=0.`
-data_dense = make_pipelines(df, y, sparse_threshold=0.)
-stat = pandas.DataFrame(data_dense).drop(['model_onnx', 'pipe'], axis=1)
-if 'error' in stat.columns:
- print(stat.drop('error', axis=1))
+data_dense = make_pipelines(df, y, sparse_threshold=0.0)
+stat = pandas.DataFrame(data_dense).drop(["model_onnx", "pipe"], axis=1)
+if "error" in stat.columns:
+ print(stat.drop("error", axis=1))
stat
####################################
@@ -209,10 +247,10 @@ def make_pipelines(df_train, y_train, models=None,
# applies on the data.
print("sparse")
-print(data_sparse[-1]['pipe'].steps[0][-1].transform(df)[:2])
+print(data_sparse[-1]["pipe"].steps[0][-1].transform(df)[:2])
print()
print("dense")
-print(data_dense[-1]['pipe'].steps[0][-1].transform(df)[:2])
+print(data_dense[-1]["pipe"].steps[0][-1].transform(df)[:2])
####################################
# This shows `RandomForestClassifier
@@ -235,10 +273,10 @@ def make_pipelines(df_train, y_train, models=None,
# Let's keep sparse data in the scikit-learn pipeline but
# replace null values by nan in the onnx graph.
-data_dense = make_pipelines(df, y, sparse_threshold=1., replace_nan=True)
-stat = pandas.DataFrame(data_dense).drop(['model_onnx', 'pipe'], axis=1)
-if 'error' in stat.columns:
- print(stat.drop('error', axis=1))
+data_dense = make_pipelines(df, y, sparse_threshold=1.0, replace_nan=True)
+stat = pandas.DataFrame(data_dense).drop(["model_onnx", "pipe"], axis=1)
+if "error" in stat.columns:
+ print(stat.drop("error", axis=1))
stat
@@ -253,11 +291,12 @@ def make_pipelines(df_train, y_train, models=None,
# It is equivalent to the previous options except it is
# more explicit.
-data_dense = make_pipelines(df, y, sparse_threshold=1., replace_nan=False,
- insert_replace=True)
-stat = pandas.DataFrame(data_dense).drop(['model_onnx', 'pipe'], axis=1)
-if 'error' in stat.columns:
- print(stat.drop('error', axis=1))
+data_dense = make_pipelines(
+ df, y, sparse_threshold=1.0, replace_nan=False, insert_replace=True
+)
+stat = pandas.DataFrame(data_dense).drop(["model_onnx", "pipe"], axis=1)
+if "error" in stat.columns:
+ print(stat.drop("error", axis=1))
stat
######################################
diff --git a/docs/tutorial/plot_wext_pyod_forest.py b/docs/tutorial/plot_wext_pyod_forest.py
index 23c40d713..01d977069 100644
--- a/docs/tutorial/plot_wext_pyod_forest.py
+++ b/docs/tutorial/plot_wext_pyod_forest.py
@@ -27,12 +27,22 @@
from sklearn.preprocessing import MinMaxScaler
from skl2onnx.proto import onnx_proto
from skl2onnx.common.data_types import (
- FloatTensorType, Int64TensorType, guess_numpy_type)
+ FloatTensorType,
+ Int64TensorType,
+ guess_numpy_type,
+)
from skl2onnx import to_onnx, update_registered_converter, get_model_alias
from skl2onnx.algebra.onnx_ops import (
- OnnxIdentity, OnnxMul, OnnxLess, OnnxConcat, OnnxCast, OnnxAdd,
- OnnxClip)
+ OnnxIdentity,
+ OnnxMul,
+ OnnxLess,
+ OnnxConcat,
+ OnnxCast,
+ OnnxAdd,
+ OnnxClip,
+)
from skl2onnx.algebra.onnx_operator import OnnxSubEstimator
+
try:
from pyod.models.iforest import IForest
except (ValueError, ImportError) as e:
@@ -40,21 +50,28 @@
IForest = None
if IForest is not None:
- data1 = {'First': [500, 500, 400, 100, 200, 300, 100],
- 'Second': ['a', 'b', 'a', 'b', 'a', 'b', 'c']}
+ data1 = {
+ "First": [500, 500, 400, 100, 200, 300, 100],
+ "Second": ["a", "b", "a", "b", "a", "b", "c"],
+ }
- df1 = pd.DataFrame(data1, columns=['First', 'Second'])
+ df1 = pd.DataFrame(data1, columns=["First", "Second"])
dumdf1 = pd.get_dummies(df1)
scaler = MinMaxScaler()
scaler.partial_fit(dumdf1)
sc_data = scaler.transform(dumdf1)
- model1 = IForest(n_estimators=10, bootstrap=True, behaviour='new',
- contamination=0.1, random_state=np.random.RandomState(42),
- verbose=1, n_jobs=-1).fit(sc_data)
+ model1 = IForest(
+ n_estimators=10,
+ bootstrap=True,
+ behaviour="new",
+ contamination=0.1,
+ random_state=np.random.RandomState(42),
+ verbose=1,
+ n_jobs=-1,
+ ).fit(sc_data)
feature_names2 = dumdf1.columns
- initial_type = [('float_input',
- FloatTensorType([None, len(feature_names2)]))]
+ initial_type = [("float_input", FloatTensorType([None, len(feature_names2)]))]
#############################################
@@ -75,6 +92,7 @@
# The parser defines the number of outputs and their type.
# The shape calculator defines their dimensions.
+
def pyod_iforest_parser(scope, model, inputs, custom_parsers=None):
alias = get_model_alias(type(model))
this_operator = scope.declare_local_operator(alias, model)
@@ -84,8 +102,8 @@ def pyod_iforest_parser(scope, model, inputs, custom_parsers=None):
# outputs
cls_type = inputs[0].type.__class__
- val_y1 = scope.declare_local_variable('label', Int64TensorType())
- val_y2 = scope.declare_local_variable('probability', cls_type())
+ val_y1 = scope.declare_local_variable("label", Int64TensorType())
+ val_y2 = scope.declare_local_variable("probability", cls_type())
this_operator.outputs.append(val_y1)
this_operator.outputs.append(val_y2)
@@ -98,6 +116,7 @@ def pyod_iforest_shape_calculator(operator):
operator.outputs[0].type.shape = [N, 1]
operator.outputs[1].type.shape = [N, 2]
+
############################################
# Then the converter.
@@ -122,54 +141,60 @@ def pyod_iforest_converter(scope, operator, container):
# labels
threshold = op.threshold_
- above = OnnxLess(scores, np.array([threshold], dtype=dtype),
- op_version=opv)
- labels = OnnxCast(above, op_version=opv, to=onnx_proto.TensorProto.INT64,
- output_names=out[:1])
+ above = OnnxLess(scores, np.array([threshold], dtype=dtype), op_version=opv)
+ labels = OnnxCast(
+ above, op_version=opv, to=onnx_proto.TensorProto.INT64, output_names=out[:1]
+ )
# probabilities
train_scores = op.decision_scores_
scaler = MinMaxScaler().fit(train_scores.reshape(-1, 1))
- scores_ = OnnxMul(scores, np.array([-1], dtype=dtype),
- op_version=opv)
+ scores_ = OnnxMul(scores, np.array([-1], dtype=dtype), op_version=opv)
print(scaler.min_)
print(scaler.scale_)
scaled = OnnxMul(scores_, scaler.scale_.astype(dtype), op_version=opv)
- scaled_centered = OnnxAdd(scaled, scaler.min_.astype(dtype),
- op_version=opv)
- clipped = OnnxClip(scaled_centered, np.array([0], dtype=dtype),
- np.array([1], dtype=dtype),
- op_version=opv)
+ scaled_centered = OnnxAdd(scaled, scaler.min_.astype(dtype), op_version=opv)
+ clipped = OnnxClip(
+ scaled_centered,
+ np.array([0], dtype=dtype),
+ np.array([1], dtype=dtype),
+ op_version=opv,
+ )
clipped_ = OnnxAdd(
- OnnxMul(clipped, np.array([-1], dtype=dtype),
- op_version=opv),
+ OnnxMul(clipped, np.array([-1], dtype=dtype), op_version=opv),
np.array([1], dtype=dtype),
- op_version=opv)
+ op_version=opv,
+ )
- scores_2d = OnnxConcat(clipped_, clipped, axis=1, op_version=opv,
- output_names=out[1:])
+ scores_2d = OnnxConcat(
+ clipped_, clipped, axis=1, op_version=opv, output_names=out[1:]
+ )
labels.add_to(scope, container)
scores_2d.add_to(scope, container)
+
########################################
# Finally the registration.
if IForest is not None:
update_registered_converter(
- IForest, "PyodIForest",
+ IForest,
+ "PyodIForest",
pyod_iforest_shape_calculator,
pyod_iforest_converter,
- parser=pyod_iforest_parser)
+ parser=pyod_iforest_parser,
+ )
#############################################
# And the conversion.
if IForest is not None:
- onx = to_onnx(model1, initial_types=initial_type,
- target_opset={'': 14, 'ai.onnx.ml': 2})
+ onx = to_onnx(
+ model1, initial_types=initial_type, target_opset={"": 14, "ai.onnx.ml": 2}
+ )
###############################################
# Checking discrepencies
@@ -182,7 +207,7 @@ def pyod_iforest_converter(scope, operator, container):
expected_proba = model1.predict_proba(data)
sess = InferenceSession(onx.SerializeToString())
- res = sess.run(None, {'float_input': data})
+ res = sess.run(None, {"float_input": data})
onx_labels = res[0]
onx_proba = res[1]
diff --git a/docs/tutorial/plot_woe_transformer.py b/docs/tutorial/plot_woe_transformer.py
index bf466eef1..ba45ef414 100644
--- a/docs/tutorial/plot_woe_transformer.py
+++ b/docs/tutorial/plot_woe_transformer.py
@@ -30,20 +30,19 @@
import matplotlib.pyplot as plt
from skl2onnx import to_onnx
from skl2onnx.sklapi import WOETransformer
+
# automatically registers the converter for WOETransformer
import skl2onnx.sklapi.register # noqa
X = np.arange(10).astype(np.float32).reshape((-1, 1))
-intervals = [
- [(1., 3., False, False),
- (5., 7., True, True)]]
+intervals = [[(1.0, 3.0, False, False), (5.0, 7.0, True, True)]]
weights = [[55, 107]]
woe1 = WOETransformer(intervals, onehot=False, weights=weights)
woe1.fit(X)
prd = woe1.transform(X)
-df = pd.DataFrame({'X': X.ravel(), 'woe': prd.ravel()})
+df = pd.DataFrame({"X": X.ravel(), "woe": prd.ravel()})
df
######################################
@@ -57,8 +56,8 @@
woe2.fit(X)
prd = woe2.transform(X)
df = pd.DataFrame(prd)
-df.columns = ['I1', 'I2']
-df['X'] = X
+df.columns = ["I1", "I2"]
+df["X"] = X
df
##########################################
@@ -69,8 +68,8 @@
woe.fit(X)
prd = woe.transform(X)
df = pd.DataFrame(prd)
-df.columns = ['I1', 'I2']
-df['X'] = X
+df.columns = ["I1", "I2"]
+df["X"] = X
df
###########################################
@@ -82,14 +81,14 @@
# onehot=False
onx1 = to_onnx(woe1, X)
sess = InferenceSession(onx1.SerializeToString())
-print(sess.run(None, {'X': X})[0])
+print(sess.run(None, {"X": X})[0])
##################################
# onehot=True
onx2 = to_onnx(woe2, X)
sess = InferenceSession(onx2.SerializeToString())
-print(sess.run(None, {'X': X})[0])
+print(sess.run(None, {"X": X})[0])
################################################
# ONNX Graphs
@@ -98,33 +97,41 @@
# onehot=False
pydot_graph = GetPydotGraph(
- onx1.graph, name=onx1.graph.name, rankdir="TB",
+ onx1.graph,
+ name=onx1.graph.name,
+ rankdir="TB",
node_producer=GetOpNodeProducer(
- "docstring", color="yellow", fillcolor="yellow", style="filled"))
+ "docstring", color="yellow", fillcolor="yellow", style="filled"
+ ),
+)
pydot_graph.write_dot("woe1.dot")
-os.system('dot -O -Gdpi=300 -Tpng woe1.dot')
+os.system("dot -O -Gdpi=300 -Tpng woe1.dot")
image = plt.imread("woe1.dot.png")
fig, ax = plt.subplots(figsize=(10, 10))
ax.imshow(image)
-ax.axis('off')
+ax.axis("off")
#######################################
# onehot=True
pydot_graph = GetPydotGraph(
- onx2.graph, name=onx2.graph.name, rankdir="TB",
+ onx2.graph,
+ name=onx2.graph.name,
+ rankdir="TB",
node_producer=GetOpNodeProducer(
- "docstring", color="yellow", fillcolor="yellow", style="filled"))
+ "docstring", color="yellow", fillcolor="yellow", style="filled"
+ ),
+)
pydot_graph.write_dot("woe2.dot")
-os.system('dot -O -Gdpi=300 -Tpng woe2.dot')
+os.system("dot -O -Gdpi=300 -Tpng woe2.dot")
image = plt.imread("woe2.dot.png")
fig, ax = plt.subplots(figsize=(10, 10))
ax.imshow(image)
-ax.axis('off')
+ax.axis("off")
########################################
# Half-line
@@ -133,15 +140,13 @@
# An interval may have only one extremity defined and the other
# can be infinite.
-intervals = [
- [(-np.inf, 3., True, True),
- (5., np.inf, True, True)]]
+intervals = [[(-np.inf, 3.0, True, True), (5.0, np.inf, True, True)]]
weights = [[55, 107]]
woe1 = WOETransformer(intervals, onehot=False, weights=weights)
woe1.fit(X)
prd = woe1.transform(X)
-df = pd.DataFrame({'X': X.ravel(), 'woe': prd.ravel()})
+df = pd.DataFrame({"X": X.ravel(), "woe": prd.ravel()})
df
#################################
@@ -149,4 +154,4 @@
onxinf = to_onnx(woe1, X)
sess = InferenceSession(onxinf.SerializeToString())
-print(sess.run(None, {'X': X})[0])
+print(sess.run(None, {"X": X})[0])
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 000000000..c04c85d1c
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,17 @@
+[tool.ruff]
+
+exclude = [
+ ".eggs",
+ ".git",
+ "build",
+ "dist",
+]
+
+# Same as Black.
+line-length = 88
+
+[tool.ruff.mccabe]
+max-complexity = 10
+
+[tool.ruff.per-file-ignores]
+"skl2onnx/algebra/onnx_ops.py" = ["F821"]
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 4698ca051..ea181f954 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,18 +1,19 @@
# tests
-flatbuffers
+black
pandas
py-cpuinfo
pybind11
pytest
pytest-cov
+ruff
wheel
# docs
+furo
sphinx
-sphinx_modern_theme_modified
sphinxcontrib-blockdiag
tqdm
# docs/examples
-lightgbm
+lightgbm<4.0
matplotlib
diff --git a/setup.py b/setup.py
index a1af4338e..d0a04d373 100644
--- a/setup.py
+++ b/setup.py
@@ -5,53 +5,59 @@
from distutils.core import setup
from setuptools import find_packages
import os
+
this = os.path.dirname(__file__)
with open(os.path.join(this, "requirements.txt"), "r") as f:
- requirements = [_ for _ in [_.strip("\r\n ")
- for _ in f.readlines()] if _ is not None]
+ requirements = [
+ _ for _ in [_.strip("\r\n ") for _ in f.readlines()] if _ is not None
+ ]
packages = find_packages()
assert packages
# read version from the package file.
-version_str = '1.0.0'
-with (open(os.path.join(this, 'skl2onnx/__init__.py'), "r")) as f:
- line = [_ for _ in [_.strip("\r\n ")
- for _ in f.readlines()] if _.startswith("__version__")]
+version_str = "1.0.0"
+with open(os.path.join(this, "skl2onnx/__init__.py"), "r") as f:
+ line = [
+ _
+ for _ in [_.strip("\r\n ") for _ in f.readlines()]
+ if _.startswith("__version__")
+ ]
if len(line) > 0:
- version_str = line[0].split('=')[1].strip('" ')
+ version_str = line[0].split("=")[1].strip('" ')
README = os.path.join(os.getcwd(), "README.md")
with open(README) as f:
long_description = f.read()
- start_pos = long_description.find('## Introduction')
+ start_pos = long_description.find("## Introduction")
if start_pos >= 0:
long_description = long_description[start_pos:]
setup(
- name='skl2onnx',
+ name="skl2onnx",
version=version_str,
description="Convert scikit-learn models to ONNX",
long_description=long_description,
- long_description_content_type='text/markdown',
- license='Apache License v2.0',
- author='ONNX',
- author_email='onnx-technical-discuss@lists.lfaidata.foundation',
- url='https://github.com/onnx/sklearn-onnx',
+ long_description_content_type="text/markdown",
+ license="Apache License v2.0",
+ author="ONNX",
+ author_email="onnx-technical-discuss@lists.lfaidata.foundation",
+ url="https://github.com/onnx/sklearn-onnx",
packages=packages,
include_package_data=True,
install_requires=requirements,
classifiers=[
- 'Development Status :: 4 - Beta',
- 'Environment :: Console',
- 'Intended Audience :: Developers',
- 'Operating System :: MacOS :: MacOS X',
- 'Operating System :: Microsoft :: Windows',
- 'Programming Language :: Python',
- 'Programming Language :: Python :: 3.7',
- 'Programming Language :: Python :: 3.8',
- 'Programming Language :: Python :: 3.9',
- 'Programming Language :: Python :: 3.10',
- 'License :: OSI Approved :: Apache Software License'],
+ "Development Status :: 4 - Beta",
+ "Environment :: Console",
+ "Intended Audience :: Developers",
+ "Operating System :: MacOS :: MacOS X",
+ "Operating System :: Microsoft :: Windows",
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "License :: OSI Approved :: Apache Software License",
+ ],
)
diff --git a/skl2onnx/__init__.py b/skl2onnx/__init__.py
index 4250c8a4f..4dbc458cc 100644
--- a/skl2onnx/__init__.py
+++ b/skl2onnx/__init__.py
@@ -13,9 +13,7 @@
from .convert import convert_sklearn, to_onnx, wrap_as_onnx_mixin # noqa
-from ._supported_operators import ( # noqa
- update_registered_converter, get_model_alias
-)
+from ._supported_operators import update_registered_converter, get_model_alias # noqa
from ._parse import update_registered_parser # noqa
from .proto import get_latest_tested_opset_version # noqa
@@ -35,11 +33,12 @@ def supported_converters(from_sklearn=False):
:return: list of supported models as string
"""
from .common._registration import _converter_pool # noqa
+
# The two following lines populates the list of supported converters.
from . import shape_calculators # noqa
from . import operator_converters # noqa
names = sorted(_converter_pool.keys())
if from_sklearn:
- return [_[7:] for _ in names if _.startswith('Sklearn')]
+ return [_[7:] for _ in names if _.startswith("Sklearn")]
return list(names)
diff --git a/skl2onnx/__main__.py b/skl2onnx/__main__.py
index 30a640af8..77333e9df 100644
--- a/skl2onnx/__main__.py
+++ b/skl2onnx/__main__.py
@@ -4,27 +4,32 @@
def _help():
- print(dedent("""
+ print(
+ dedent(
+ """
python -m skl2onnx [command]
command is:
setup generate rst documentation for every ONNX operator
- before building the package"""))
+ before building the package"""
+ )
+ )
def _setup():
from skl2onnx.algebra.onnx_ops import dynamic_class_creation
+
dynamic_class_creation(True)
def main(argv):
- if len(argv) <= 1 or '--help' in argv:
+ if len(argv) <= 1 or "--help" in argv:
_help()
return
if "setup" in argv:
- print('generate rst documentation for every ONNX operator')
+ print("generate rst documentation for every ONNX operator")
_setup()
return
diff --git a/skl2onnx/_parse.py b/skl2onnx/_parse.py
index 7910beb49..0b9c750c1 100644
--- a/skl2onnx/_parse.py
+++ b/skl2onnx/_parse.py
@@ -4,8 +4,8 @@
import numpy as np
from sklearn import pipeline
-from sklearn.base import (
- ClassifierMixin, ClusterMixin, is_classifier)
+from sklearn.base import ClassifierMixin, ClusterMixin, is_classifier
+
try:
from sklearn.base import OutlierMixin
except ImportError:
@@ -13,8 +13,12 @@
class OutlierMixin:
pass
+
from sklearn.ensemble import (
- IsolationForest, RandomTreesEmbedding, RandomForestClassifier)
+ IsolationForest,
+ RandomTreesEmbedding,
+ RandomForestClassifier,
+)
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.linear_model import BayesianRidge
from sklearn.model_selection import GridSearchCV
@@ -24,6 +28,7 @@ class OutlierMixin:
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC, NuSVC, SVC
+
try:
from sklearn.compose import ColumnTransformer
except ImportError:
@@ -39,17 +44,20 @@ class OutlierMixin:
# changed in 0.20
SimpleImputer = None
-from ._supported_operators import (
- _get_sklearn_operator_name, cluster_list, outlier_list)
-from ._supported_operators import (
- sklearn_classifier_list, sklearn_operator_name_map)
+from ._supported_operators import _get_sklearn_operator_name, cluster_list, outlier_list
+from ._supported_operators import sklearn_classifier_list, sklearn_operator_name_map
from .common._container import SklearnModelContainerNode
from .common._registration import _converter_pool, _shape_calculator_pool
from .common._topology import Topology, Variable
from .common.data_types import (
- DictionaryType, Int64TensorType, SequenceType,
- StringTensorType, TensorType, FloatTensorType,
- guess_tensor_type)
+ DictionaryType,
+ Int64TensorType,
+ SequenceType,
+ StringTensorType,
+ TensorType,
+ FloatTensorType,
+ guess_tensor_type,
+)
from .common.utils import get_column_indices
from .common.utils_checking import check_signature
from .common.utils_classifier import get_label_classes
@@ -57,36 +65,41 @@ class OutlierMixin:
do_not_merge_columns = tuple(
- filter(lambda op: op is not None,
- [OneHotEncoder, ColumnTransformer]))
+ filter(lambda op: op is not None, [OneHotEncoder, ColumnTransformer])
+)
def _fetch_input_slice(scope, inputs, column_indices):
if not isinstance(inputs, list):
raise TypeError("Parameter inputs must be a list.")
if len(inputs) == 0:
- raise RuntimeError("Operator ArrayFeatureExtractor requires at "
- "least one inputs.")
+ raise RuntimeError(
+ "Operator ArrayFeatureExtractor requires at " "least one inputs."
+ )
if len(inputs) != 1:
- raise RuntimeError("Operator ArrayFeatureExtractor does not support "
- "multiple input tensors.")
- if (isinstance(inputs[0].type, TensorType) and
- len(inputs[0].type.shape) == 2 and
- inputs[0].type.shape[1] == len(column_indices)):
+ raise RuntimeError(
+ "Operator ArrayFeatureExtractor does not support " "multiple input tensors."
+ )
+ if (
+ isinstance(inputs[0].type, TensorType)
+ and len(inputs[0].type.shape) == 2
+ and inputs[0].type.shape[1] == len(column_indices)
+ ):
# No need to extract.
return inputs
array_feature_extractor_operator = scope.declare_local_operator(
- 'SklearnArrayFeatureExtractor')
+ "SklearnArrayFeatureExtractor"
+ )
array_feature_extractor_operator.inputs = inputs
array_feature_extractor_operator.column_indices = column_indices
output_variable_name = scope.declare_local_variable(
- 'extracted_feature_columns', inputs[0].type)
+ "extracted_feature_columns", inputs[0].type
+ )
array_feature_extractor_operator.outputs.append(output_variable_name)
return array_feature_extractor_operator.outputs
-def _parse_sklearn_simple_model(scope, model, inputs, custom_parsers=None,
- alias=None):
+def _parse_sklearn_simple_model(scope, model, inputs, custom_parsers=None, alias=None):
"""
This function handles all non-pipeline models.
@@ -101,18 +114,19 @@ def _parse_sklearn_simple_model(scope, model, inputs, custom_parsers=None,
"""
# alias can be None
if isinstance(model, str):
- raise RuntimeError("Parameter model must be an object not a "
- "string '{0}'.".format(model))
+ raise RuntimeError(
+ "Parameter model must be an object not a " "string '{0}'.".format(model)
+ )
if any(not isinstance(i, Variable) for i in inputs):
raise TypeError(
- "One input is not a Variable for model %r - %r."
- "" % (model, inputs))
+ "One input is not a Variable for model %r - %r." "" % (model, inputs)
+ )
if alias is None:
alias = _get_sklearn_operator_name(type(model))
this_operator = scope.declare_local_operator(alias, model)
this_operator.inputs = inputs
- if hasattr(model, 'onnx_parser'):
+ if hasattr(model, "onnx_parser"):
parser_names = model.onnx_parser()
if parser_names is not None:
try:
@@ -122,9 +136,9 @@ def _parse_sklearn_simple_model(scope, model, inputs, custom_parsers=None,
"Calling parser %r for model type %r failed due to %r. "
"This warnings will become an exception in version 1.11. "
"The parser signature should parser(scope=None, "
- "inputs=None)." % (
- parser_names, e, type(model)),
- DeprecationWarning)
+ "inputs=None)." % (parser_names, e, type(model)),
+ DeprecationWarning,
+ )
names = parser_names()
if names is not None:
for name in names:
@@ -132,35 +146,38 @@ def _parse_sklearn_simple_model(scope, model, inputs, custom_parsers=None,
this_operator.outputs.append(name)
elif isinstance(name, str):
var = scope.declare_local_variable(
- name, guess_tensor_type(inputs[0].type))
+ name, guess_tensor_type(inputs[0].type)
+ )
this_operator.outputs.append(var)
elif isinstance(name, tuple) and len(name) == 2:
var = scope.declare_local_variable(
- name[0], guess_tensor_type(name[1]))
+ name[0], guess_tensor_type(name[1])
+ )
this_operator.outputs.append(var)
else:
raise RuntimeError(
"Unexpected output type %r (value=%r) for "
- "operator %r." % (
- type(name), name, type(model)))
+ "operator %r." % (type(name), name, type(model))
+ )
return this_operator.outputs
- if (type(model) in sklearn_classifier_list
- or isinstance(model, ClassifierMixin)
- or (isinstance(model, GridSearchCV)
- and is_classifier(model))):
+ if (
+ type(model) in sklearn_classifier_list
+ or isinstance(model, ClassifierMixin)
+ or (isinstance(model, GridSearchCV) and is_classifier(model))
+ ):
# For classifiers, we may have two outputs, one for label and
# the other one for probabilities of all classes. Notice that
# their types here are not necessarily correct and they will
# be fixed in shape inference phase.
- label_variable = scope.declare_local_variable(
- 'label', Int64TensorType())
+ label_variable = scope.declare_local_variable("label", Int64TensorType())
if type(model) in [RandomForestClassifier]:
prob_dtype = FloatTensorType()
else:
prob_dtype = guess_tensor_type(inputs[0].type)
probability_tensor_variable = scope.declare_local_variable(
- 'probabilities', prob_dtype)
+ "probabilities", prob_dtype
+ )
this_operator.outputs.append(label_variable)
this_operator.outputs.append(probability_tensor_variable)
@@ -169,67 +186,69 @@ def _parse_sklearn_simple_model(scope, model, inputs, custom_parsers=None,
# the other one for scores of all classes. Notice that their
# types here are not necessarily correct and they will be fixed
# in shape inference phase
- label_variable = scope.declare_local_variable(
- 'label', Int64TensorType())
+ label_variable = scope.declare_local_variable("label", Int64TensorType())
score_tensor_variable = scope.declare_local_variable(
- 'scores', guess_tensor_type(inputs[0].type))
+ "scores", guess_tensor_type(inputs[0].type)
+ )
this_operator.outputs.append(label_variable)
this_operator.outputs.append(score_tensor_variable)
elif type(model) in {IsolationForest, LocalOutlierFactor}:
- label_variable = scope.declare_local_variable(
- 'label', Int64TensorType())
+ label_variable = scope.declare_local_variable("label", Int64TensorType())
score_tensor_variable = scope.declare_local_variable(
- 'scores', guess_tensor_type(inputs[0].type))
+ "scores", guess_tensor_type(inputs[0].type)
+ )
this_operator.outputs.append(label_variable)
this_operator.outputs.append(score_tensor_variable)
options = scope.get_options(model, dict(score_samples=False))
- if options['score_samples']:
+ if options["score_samples"]:
scores_var = scope.declare_local_variable(
- 'score_samples', guess_tensor_type(inputs[0].type))
+ "score_samples", guess_tensor_type(inputs[0].type)
+ )
this_operator.outputs.append(scores_var)
elif type(model) in outlier_list or isinstance(model, OutlierMixin):
# For outliers, we may have two outputs, one for label and
# the other one for scores.
- label_variable = scope.declare_local_variable(
- 'label', Int64TensorType())
+ label_variable = scope.declare_local_variable("label", Int64TensorType())
score_tensor_variable = scope.declare_local_variable(
- 'scores', guess_tensor_type(inputs[0].type))
+ "scores", guess_tensor_type(inputs[0].type)
+ )
this_operator.outputs.append(label_variable)
this_operator.outputs.append(score_tensor_variable)
elif isinstance(model, NearestNeighbors):
# For Nearest Neighbours, we have two outputs, one for nearest
# neighbours' indices and the other one for distances
- index_variable = scope.declare_local_variable(
- 'index', Int64TensorType())
+ index_variable = scope.declare_local_variable("index", Int64TensorType())
distance_variable = scope.declare_local_variable(
- 'distance', guess_tensor_type(inputs[0].type))
+ "distance", guess_tensor_type(inputs[0].type)
+ )
this_operator.outputs.append(index_variable)
this_operator.outputs.append(distance_variable)
elif type(model) in {GaussianMixture, BayesianGaussianMixture}:
- label_variable = scope.declare_local_variable(
- 'label', Int64TensorType())
+ label_variable = scope.declare_local_variable("label", Int64TensorType())
prob_variable = scope.declare_local_variable(
- 'probabilities', guess_tensor_type(inputs[0].type))
+ "probabilities", guess_tensor_type(inputs[0].type)
+ )
this_operator.outputs.append(label_variable)
this_operator.outputs.append(prob_variable)
options = scope.get_options(model, dict(score_samples=False))
- if options['score_samples']:
+ if options["score_samples"]:
scores_var = scope.declare_local_variable(
- 'score_samples', guess_tensor_type(inputs[0].type))
+ "score_samples", guess_tensor_type(inputs[0].type)
+ )
this_operator.outputs.append(scores_var)
elif type(model) in {SimpleImputer, Imputer}:
if isinstance(inputs[0].type, (Int64TensorType, StringTensorType)):
otype = inputs[0].type.__class__()
else:
otype = guess_tensor_type(inputs[0].type)
- variable = scope.declare_local_variable('variable', otype)
+ variable = scope.declare_local_variable("variable", otype)
this_operator.outputs.append(variable)
else:
- if hasattr(model, 'get_feature_names_out'):
+ if hasattr(model, "get_feature_names_out"):
try:
out_names = model.get_feature_names_out()
except (AttributeError, ValueError):
@@ -237,20 +256,17 @@ def _parse_sklearn_simple_model(scope, model, inputs, custom_parsers=None,
out_names = None
this_operator.feature_names_out_ = out_names
input_type = guess_tensor_type(inputs[0].type)
- variable = scope.declare_local_variable(
- 'variable', input_type)
+ variable = scope.declare_local_variable("variable", input_type)
this_operator.outputs.append(variable)
options = scope.get_options(model, dict(decision_path=False), fail=False)
- if options is not None and options['decision_path']:
- dec_path = scope.declare_local_variable(
- 'decision_path', StringTensorType())
+ if options is not None and options["decision_path"]:
+ dec_path = scope.declare_local_variable("decision_path", StringTensorType())
this_operator.outputs.append(dec_path)
options = scope.get_options(model, dict(decision_leaf=False), fail=False)
- if options is not None and options['decision_leaf']:
- dec_path = scope.declare_local_variable(
- 'decision_leaf', Int64TensorType())
+ if options is not None and options["decision_leaf"]:
+ dec_path = scope.declare_local_variable("decision_leaf", Int64TensorType())
this_operator.outputs.append(dec_path)
return this_operator.outputs
@@ -270,8 +286,7 @@ def _parse_sklearn_pipeline(scope, model, inputs, custom_parsers=None):
:return: A list of output variables produced by the input pipeline
"""
for step in model.steps:
- inputs = _parse_sklearn(scope, step[1], inputs,
- custom_parsers=custom_parsers)
+ inputs = _parse_sklearn(scope, step[1], inputs, custom_parsers=custom_parsers)
return inputs
@@ -287,35 +302,34 @@ def _parse_sklearn_feature_union(scope, model, inputs, custom_parsers=None):
# Encode each transform as our IR object
for name, transform in model.transformer_list:
transformed_result_names.append(
- _parse_sklearn(
- scope, transform, inputs,
- custom_parsers=custom_parsers)[0])
- if (model.transformer_weights is not None and name in
- model.transformer_weights):
+ _parse_sklearn(scope, transform, inputs, custom_parsers=custom_parsers)[0]
+ )
+ if model.transformer_weights is not None and name in model.transformer_weights:
transform_result = [transformed_result_names.pop()]
# Create a Multiply ONNX node
- multiply_operator = scope.declare_local_operator('SklearnMultiply')
+ multiply_operator = scope.declare_local_operator("SklearnMultiply")
multiply_operator.inputs = transform_result
multiply_operator.operand = model.transformer_weights[name]
multiply_output = scope.declare_local_variable(
- 'multiply_output', guess_tensor_type(inputs[0].type))
+ "multiply_output", guess_tensor_type(inputs[0].type)
+ )
multiply_operator.outputs.append(multiply_output)
transformed_result_names.append(multiply_operator.outputs[0])
# Create a Concat ONNX node
- concat_operator = scope.declare_local_operator('SklearnConcat')
+ concat_operator = scope.declare_local_operator("SklearnConcat")
concat_operator.inputs = transformed_result_names
# Declare output name of scikit-learn FeatureUnion
union_name = scope.declare_local_variable(
- 'union', guess_tensor_type(inputs[0].type))
+ "union", guess_tensor_type(inputs[0].type)
+ )
concat_operator.outputs.append(union_name)
return concat_operator.outputs
-def _parse_sklearn_column_transformer(scope, model, inputs,
- custom_parsers=None):
+def _parse_sklearn_column_transformer(scope, model, inputs, custom_parsers=None):
"""
:param scope: Scope object
:param model: A *scikit-learn* *ColumnTransformer* object
@@ -326,14 +340,16 @@ def _parse_sklearn_column_transformer(scope, model, inputs,
transformed_result_names = []
# Encode each transform as our IR object
for name, op, column_indices in model.transformers_:
- if op == 'drop':
+ if op == "drop":
continue
if isinstance(column_indices, slice):
- column_indices = list(range(
- column_indices.start
- if column_indices.start is not None else 0,
- column_indices.stop, column_indices.step
- if column_indices.step is not None else 1))
+ column_indices = list(
+ range(
+ column_indices.start if column_indices.start is not None else 0,
+ column_indices.stop,
+ column_indices.step if column_indices.step is not None else 1,
+ )
+ )
elif isinstance(column_indices, (int, str)):
column_indices = [column_indices]
names = get_column_indices(column_indices, inputs, multiple=True)
@@ -355,9 +371,9 @@ def _parse_sklearn_column_transformer(scope, model, inputs,
# the default behaviour is to merge columns.
ty = transform_inputs[0].type.__class__([None, None])
- conc_op = scope.declare_local_operator('SklearnConcat')
+ conc_op = scope.declare_local_operator("SklearnConcat")
conc_op.inputs = transform_inputs
- conc_names = scope.declare_local_variable('merged_columns', ty)
+ conc_names = scope.declare_local_variable("merged_columns", ty)
conc_op.outputs.append(conc_names)
transform_inputs = [conc_names]
@@ -368,23 +384,27 @@ def _parse_sklearn_column_transformer(scope, model, inputs,
elif model_obj == "drop":
var_out = None
else:
- raise RuntimeError("Unknown operator alias "
- "'{0}'. These are specified in "
- "_supported_operators.py."
- "".format(model_obj))
+ raise RuntimeError(
+ "Unknown operator alias "
+ "'{0}'. These are specified in "
+ "_supported_operators.py."
+ "".format(model_obj)
+ )
else:
var_out = _parse_sklearn(
- scope, model_obj,
- transform_inputs, custom_parsers=custom_parsers)[0]
- if (model.transformer_weights is not None and name in
- model.transformer_weights):
+ scope, model_obj, transform_inputs, custom_parsers=custom_parsers
+ )[0]
+ if (
+ model.transformer_weights is not None
+ and name in model.transformer_weights
+ ):
# Create a Multiply ONNX node
- multiply_operator = scope.declare_local_operator(
- 'SklearnMultiply')
+ multiply_operator = scope.declare_local_operator("SklearnMultiply")
multiply_operator.inputs.append(var_out)
multiply_operator.operand = model.transformer_weights[name]
var_out = scope.declare_local_variable(
- 'multiply_output', guess_tensor_type(inputs[0].type))
+ "multiply_output", guess_tensor_type(inputs[0].type)
+ )
multiply_operator.outputs.append(var_out)
if var_out:
transformed_result_names.append(var_out)
@@ -392,12 +412,11 @@ def _parse_sklearn_column_transformer(scope, model, inputs,
# Create a Concat ONNX node
if len(transformed_result_names) > 1:
ty = transformed_result_names[0].type.__class__([None, None])
- concat_operator = scope.declare_local_operator('SklearnConcat')
+ concat_operator = scope.declare_local_operator("SklearnConcat")
concat_operator.inputs = transformed_result_names
# Declare output name of scikit-learn ColumnTransformer
- transformed_column_name = scope.declare_local_variable(
- 'transformed_column', ty)
+ transformed_column_name = scope.declare_local_variable("transformed_column", ty)
concat_operator.outputs.append(transformed_column_name)
return concat_operator.outputs
return transformed_result_names
@@ -407,76 +426,77 @@ def _parse_sklearn_grid_search_cv(scope, model, inputs, custom_parsers=None):
options = scope.get_options(model)
if options:
scope.add_options(id(model.best_estimator_), options)
- res = parse_sklearn(scope, model.best_estimator_, inputs,
- custom_parsers=custom_parsers)
- scope.replace_raw_operator(
- model.best_estimator_, model, "SklearnGridSearchCV")
+ res = parse_sklearn(
+ scope, model.best_estimator_, inputs, custom_parsers=custom_parsers
+ )
+ scope.replace_raw_operator(model.best_estimator_, model, "SklearnGridSearchCV")
return res
-def _parse_sklearn_random_trees_embedding(scope, model, inputs,
- custom_parsers=None):
- res = parse_sklearn(scope, model.base_estimator_, inputs,
- custom_parsers=custom_parsers)
+def _parse_sklearn_random_trees_embedding(scope, model, inputs, custom_parsers=None):
+ res = parse_sklearn(
+ scope, model.base_estimator_, inputs, custom_parsers=custom_parsers
+ )
if len(res) != 1:
- raise RuntimeError(
- "A regressor only produces one output not %r." % res)
+ raise RuntimeError("A regressor only produces one output not %r." % res)
scope.replace_raw_operator(
- model.base_estimator_, model, "SklearnRandomTreesEmbedding")
+ model.base_estimator_, model, "SklearnRandomTreesEmbedding"
+ )
return res
-def _apply_zipmap(zipmap_options, scope, model, input_type,
- probability_tensor):
- if zipmap_options == 'columns':
- zipmap_operator = scope.declare_local_operator('SklearnZipMapColumns')
+def _apply_zipmap(zipmap_options, scope, model, input_type, probability_tensor):
+ if zipmap_options == "columns":
+ zipmap_operator = scope.declare_local_operator("SklearnZipMapColumns")
classes = get_label_classes(scope, model)
classes_names = get_label_classes(scope, model, node_names=True)
else:
- zipmap_operator = scope.declare_local_operator('SklearnZipMap')
+ zipmap_operator = scope.declare_local_operator("SklearnZipMap")
classes = get_label_classes(scope, model)
zipmap_operator.inputs = probability_tensor
label_type = Int64TensorType([None])
- if (hasattr(model, "classes_") and
- isinstance(model.classes_, list) and
- isinstance(model.classes_[0], np.ndarray)):
+ if (
+ hasattr(model, "classes_")
+ and isinstance(model.classes_, list)
+ and isinstance(model.classes_[0], np.ndarray)
+ ):
# multi-label problem
pass
elif np.issubdtype(classes.dtype, np.floating):
classes = np.array(list(map(lambda x: int(x), classes)))
if set(map(lambda x: float(x), classes)) != set(model.classes_):
- raise RuntimeError("skl2onnx implicitly converts float class "
- "labels into integers but at least one label "
- "is not an integer. Class labels should "
- "be integers or strings.")
+ raise RuntimeError(
+ "skl2onnx implicitly converts float class "
+ "labels into integers but at least one label "
+ "is not an integer. Class labels should "
+ "be integers or strings."
+ )
zipmap_operator.classlabels_int64s = classes
elif np.issubdtype(classes.dtype, np.signedinteger):
zipmap_operator.classlabels_int64s = [int(i) for i in classes]
- elif (np.issubdtype(classes.dtype, np.unsignedinteger) or
- classes.dtype == np.bool_):
+ elif np.issubdtype(classes.dtype, np.unsignedinteger) or classes.dtype == np.bool_:
zipmap_operator.classlabels_int64s = [int(i) for i in classes]
else:
- classes = np.array([s.encode('utf-8') for s in classes])
+ classes = np.array([s.encode("utf-8") for s in classes])
zipmap_operator.classlabels_strings = classes
label_type = StringTensorType([None])
- zip_label = scope.declare_local_variable('output_label', label_type)
+ zip_label = scope.declare_local_variable("output_label", label_type)
if len(probability_tensor) == 2:
zipmap_operator.outputs.append(zip_label)
- if zipmap_options == 'columns':
+ if zipmap_options == "columns":
prob_type = probability_tensor[-1].type
for cl in classes_names:
output_cl = scope.declare_local_variable(cl, prob_type.__class__())
zipmap_operator.outputs.append(output_cl)
else:
zip_probability = scope.declare_local_variable(
- 'output_probability',
- SequenceType(
- DictionaryType(
- label_type, guess_tensor_type(input_type))))
+ "output_probability",
+ SequenceType(DictionaryType(label_type, guess_tensor_type(input_type))),
+ )
zipmap_operator.outputs.append(zip_probability)
zipmap_operator.init_status(is_evaluated=True)
@@ -485,65 +505,68 @@ def _apply_zipmap(zipmap_options, scope, model, input_type,
def _parse_sklearn_classifier(scope, model, inputs, custom_parsers=None):
options = scope.get_options(model, dict(zipmap=True))
- no_zipmap = (
- (isinstance(options['zipmap'], bool) and not options['zipmap']) or
- (model.__class__ in [NuSVC, SVC] and not model.probability))
+ no_zipmap = (isinstance(options["zipmap"], bool) and not options["zipmap"]) or (
+ model.__class__ in [NuSVC, SVC] and not model.probability
+ )
probability_tensor = _parse_sklearn_simple_model(
- scope, model, inputs, custom_parsers=custom_parsers)
+ scope, model, inputs, custom_parsers=custom_parsers
+ )
if no_zipmap:
- if options.get('output_class_labels', False):
+ if options.get("output_class_labels", False):
if not hasattr(model, "classes_"):
raise RuntimeError(
"Model type %r has no attribute 'classes_'. "
"Option 'output_class_labels' is invalid or a new parser "
- "must be used." % model.__class__.__name__)
+ "must be used." % model.__class__.__name__
+ )
- clout = scope.declare_local_operator('SklearnClassLabels')
+ clout = scope.declare_local_operator("SklearnClassLabels")
clout.classes = get_label_classes(scope, model)
if model.classes_.dtype in (np.int32, np.int64, np.bool_):
ctype = Int64TensorType
else:
ctype = StringTensorType
label_type = ctype(clout.classes.shape)
- class_labels = scope.declare_local_variable(
- 'class_labels', label_type)
+ class_labels = scope.declare_local_variable("class_labels", label_type)
clout.outputs.append(class_labels)
outputs = list(probability_tensor)
outputs.append(class_labels)
return outputs
return probability_tensor
- if options.get('output_class_labels', False):
+ if options.get("output_class_labels", False):
raise RuntimeError(
- "Option 'output_class_labels' is not compatible with option "
- "'zipmap'.")
+ "Option 'output_class_labels' is not compatible with option " "'zipmap'."
+ )
return _apply_zipmap(
- options['zipmap'], scope, model, inputs[0].type, probability_tensor)
+ options["zipmap"], scope, model, inputs[0].type, probability_tensor
+ )
-def _parse_sklearn_multi_output_classifier(scope, model, inputs,
- custom_parsers=None):
+def _parse_sklearn_multi_output_classifier(scope, model, inputs, custom_parsers=None):
options = scope.get_options(model, dict(zipmap=True))
- if options['zipmap']:
+ if options["zipmap"]:
warnings.warn(
"Option zipmap is ignored for model %r. "
"Set option zipmap to False to "
"remove this message." % type(model),
- UserWarning)
+ UserWarning,
+ )
alias = _get_sklearn_operator_name(type(model))
this_operator = scope.declare_local_operator(alias, model)
this_operator.inputs = inputs
- if hasattr(model, 'classes_'):
+ if hasattr(model, "classes_"):
classes = model.classes_
else:
classes = [get_label_classes(scope, m) for m in model.estimators_]
if len(set(cl.dtype for cl in classes)) != 1:
raise RuntimeError(
"Class labels may have only one type %r."
- "" % set(cl.dtype for cl in classes))
+ "" % set(cl.dtype for cl in classes)
+ )
if classes[0].dtype in (np.int32, np.int64, np.bool_):
ctype = Int64TensorType
else:
@@ -551,18 +574,19 @@ def _parse_sklearn_multi_output_classifier(scope, model, inputs,
label = scope.declare_local_variable("label", ctype())
proba = scope.declare_local_variable(
- "probabilities", SequenceType(guess_tensor_type(inputs[0].type)))
+ "probabilities", SequenceType(guess_tensor_type(inputs[0].type))
+ )
this_operator.outputs.append(label)
this_operator.outputs.append(proba)
options = scope.get_options(model)
- if options.get('output_class_labels', False):
- clout = scope.declare_local_operator('SklearnClassLabels')
+ if options.get("output_class_labels", False):
+ clout = scope.declare_local_operator("SklearnClassLabels")
clout.is_multi_output = True
clout.classes = classes
class_labels = scope.declare_local_variable(
- "class_labels",
- SequenceType(ctype()))
+ "class_labels", SequenceType(ctype())
+ )
clout.outputs.append(class_labels)
return list(this_operator.outputs) + [class_labels]
@@ -570,24 +594,26 @@ def _parse_sklearn_multi_output_classifier(scope, model, inputs,
def _parse_sklearn_gaussian_process(scope, model, inputs, custom_parsers=None):
- options = scope.get_options(
- model, dict(return_cov=False, return_std=False))
- if options['return_std'] and options['return_cov']:
+ options = scope.get_options(model, dict(return_cov=False, return_std=False))
+ if options["return_std"] and options["return_cov"]:
raise RuntimeError(
"Not returning standard deviation of predictions when "
- "returning full covariance.")
+ "returning full covariance."
+ )
alias = _get_sklearn_operator_name(type(model))
this_operator = scope.declare_local_operator(alias, model)
mean_tensor = scope.declare_local_variable(
- "GPmean", guess_tensor_type(inputs[0].type))
+ "GPmean", guess_tensor_type(inputs[0].type)
+ )
this_operator.inputs = inputs
this_operator.outputs.append(mean_tensor)
- if options['return_std'] or options['return_cov']:
+ if options["return_std"] or options["return_cov"]:
# covariance or standard deviation
covstd_tensor = scope.declare_local_variable(
- 'GPcovstd', guess_tensor_type(inputs[0].type))
+ "GPcovstd", guess_tensor_type(inputs[0].type)
+ )
this_operator.outputs.append(covstd_tensor)
return this_operator.outputs
@@ -597,14 +623,16 @@ def _parse_sklearn_bayesian_ridge(scope, model, inputs, custom_parsers=None):
alias = _get_sklearn_operator_name(type(model))
this_operator = scope.declare_local_operator(alias, model)
mean_tensor = scope.declare_local_variable(
- "variable", guess_tensor_type(inputs[0].type))
+ "variable", guess_tensor_type(inputs[0].type)
+ )
this_operator.inputs = inputs
this_operator.outputs.append(mean_tensor)
- if options['return_std']:
+ if options["return_std"]:
# covariance or standard deviation
covstd_tensor = scope.declare_local_variable(
- 'std', guess_tensor_type(inputs[0].type))
+ "std", guess_tensor_type(inputs[0].type)
+ )
this_operator.outputs.append(covstd_tensor)
return this_operator.outputs
@@ -629,28 +657,31 @@ def _parse_sklearn(scope, model, inputs, custom_parsers=None, alias=None):
for i, inp in enumerate(inputs):
if not isinstance(inp, Variable):
raise TypeError(
- "Unexpected input type %r for input %r: %r." % (
- type(inp), i, inp))
+ "Unexpected input type %r for input %r: %r." % (type(inp), i, inp)
+ )
if alias is not None:
- outputs = _parse_sklearn_simple_model(scope, model, inputs,
- custom_parsers=custom_parsers,
- alias=alias)
+ outputs = _parse_sklearn_simple_model(
+ scope, model, inputs, custom_parsers=custom_parsers, alias=alias
+ )
return outputs
tmodel = type(model)
if custom_parsers is not None and tmodel in custom_parsers:
- outputs = custom_parsers[tmodel](scope, model, inputs,
- custom_parsers=custom_parsers)
+ outputs = custom_parsers[tmodel](
+ scope, model, inputs, custom_parsers=custom_parsers
+ )
elif tmodel in sklearn_parsers_map:
- outputs = sklearn_parsers_map[tmodel](scope, model, inputs,
- custom_parsers=custom_parsers)
+ outputs = sklearn_parsers_map[tmodel](
+ scope, model, inputs, custom_parsers=custom_parsers
+ )
elif isinstance(model, pipeline.Pipeline):
parser = sklearn_parsers_map[pipeline.Pipeline]
outputs = parser(scope, model, inputs, custom_parsers=custom_parsers)
else:
- outputs = _parse_sklearn_simple_model(scope, model, inputs,
- custom_parsers=custom_parsers)
+ outputs = _parse_sklearn_simple_model(
+ scope, model, inputs, custom_parsers=custom_parsers
+ )
return outputs
@@ -681,23 +712,27 @@ def parse_sklearn(scope, model, inputs, custom_parsers=None, final_types=None):
raise RuntimeError(
"Unable to add duplicated output '{}', '{}'. "
"Output and input must have different names."
- "".format(var.onnx_name, name))
+ "".format(var.onnx_name, name)
+ )
outputs.append(var)
hidden_outputs = _parse_sklearn(
- scope, model, inputs, custom_parsers=custom_parsers)
+ scope, model, inputs, custom_parsers=custom_parsers
+ )
if len(hidden_outputs) != len(outputs):
raise RuntimeError(
"Number of declared outputs is unexpected, declared '{}' "
"found '{}'.".format(
", ".join(_.onnx_name for _ in outputs),
- ", ".join(_.onnx_name for _ in hidden_outputs)))
+ ", ".join(_.onnx_name for _ in hidden_outputs),
+ )
+ )
for h, o in zip(hidden_outputs, outputs):
if o.type is None:
- iop = scope.declare_local_operator('SklearnIdentity')
+ iop = scope.declare_local_operator("SklearnIdentity")
else:
- iop = scope.declare_local_operator('SklearnCast')
+ iop = scope.declare_local_operator("SklearnCast")
iop.inputs = [h]
iop.outputs = [o]
h.init_status(is_leaf=False)
@@ -706,20 +741,25 @@ def parse_sklearn(scope, model, inputs, custom_parsers=None, final_types=None):
o.type = h.type
return outputs
- res = _parse_sklearn(
- scope, model, inputs, custom_parsers=custom_parsers)
+ res = _parse_sklearn(scope, model, inputs, custom_parsers=custom_parsers)
for r in res:
r.init_status(is_leaf=True)
return res
-def parse_sklearn_model(model, initial_types=None, target_opset=None,
- custom_conversion_functions=None,
- custom_shape_calculators=None,
- custom_parsers=None,
- options=None, white_op=None,
- black_op=None, final_types=None,
- naming=None):
+def parse_sklearn_model(
+ model,
+ initial_types=None,
+ target_opset=None,
+ custom_conversion_functions=None,
+ custom_shape_calculators=None,
+ custom_parsers=None,
+ options=None,
+ white_op=None,
+ black_op=None,
+ final_types=None,
+ naming=None,
+):
"""
Puts *scikit-learn* object into an abstract container so that
our framework can work seamlessly on models created
@@ -761,21 +801,26 @@ def parse_sklearn_model(model, initial_types=None, target_opset=None,
options = _process_options(model, options)
raw_model_container = SklearnModelContainerNode(
- model, white_op=white_op, black_op=black_op)
+ model, white_op=white_op, black_op=black_op
+ )
# Declare a computational graph. It will become a representation of
# the input scikit-learn model after parsing.
topology = Topology(
- raw_model_container, initial_types=initial_types,
+ raw_model_container,
+ initial_types=initial_types,
target_opset=target_opset,
custom_conversion_functions=custom_conversion_functions,
custom_shape_calculators=custom_shape_calculators,
registered_models=dict(
- conv=_converter_pool, shape=_shape_calculator_pool,
- aliases=sklearn_operator_name_map))
+ conv=_converter_pool,
+ shape=_shape_calculator_pool,
+ aliases=sklearn_operator_name_map,
+ ),
+ )
# Declare an object to provide variables' and operators' naming mechanism.
- scope = topology.declare_scope('__root__', options=options, naming=naming)
+ scope = topology.declare_scope("__root__", options=options, naming=naming)
inputs = scope.input_variables
# The object raw_model_container is a part of the topology
@@ -786,9 +831,9 @@ def parse_sklearn_model(model, initial_types=None, target_opset=None,
raw_model_container.add_input(variable)
# Parse the input scikit-learn model as a Topology object.
- outputs = parse_sklearn(scope, model, inputs,
- custom_parsers=custom_parsers,
- final_types=final_types)
+ outputs = parse_sklearn(
+ scope, model, inputs, custom_parsers=custom_parsers, final_types=final_types
+ )
# The object raw_model_container is a part of the topology we're
# going to return. We use it to store the outputs of the
@@ -796,7 +841,8 @@ def parse_sklearn_model(model, initial_types=None, target_opset=None,
if final_types is not None and len(final_types) != len(outputs):
raise RuntimeError(
"Unexpected number of outputs, expected %d, got %d "
- "after parsing." % (len(final_types), len(outputs)))
+ "after parsing." % (len(final_types), len(outputs))
+ )
return topology
diff --git a/skl2onnx/_supported_operators.py b/skl2onnx/_supported_operators.py
index 1653d076b..b4a5a7278 100644
--- a/skl2onnx/_supported_operators.py
+++ b/skl2onnx/_supported_operators.py
@@ -9,11 +9,13 @@
# Linear classifiers
from sklearn.linear_model import (
- LogisticRegression, LogisticRegressionCV,
+ LogisticRegression,
+ LogisticRegressionCV,
PassiveAggressiveClassifier,
Perceptron,
- RidgeClassifier, RidgeClassifierCV,
- SGDClassifier
+ RidgeClassifier,
+ RidgeClassifierCV,
+ SGDClassifier,
)
from sklearn.svm import LinearSVC, OneClassSVM
@@ -21,22 +23,31 @@
from sklearn.linear_model import (
ARDRegression,
BayesianRidge,
- ElasticNet, ElasticNetCV,
+ ElasticNet,
+ ElasticNetCV,
HuberRegressor,
- Lars, LarsCV,
- Lasso, LassoCV,
- LassoLars, LassoLarsCV,
+ Lars,
+ LarsCV,
+ Lasso,
+ LassoCV,
+ LassoLars,
+ LassoLarsCV,
LassoLarsIC,
LinearRegression,
- MultiTaskElasticNet, MultiTaskElasticNetCV,
- MultiTaskLasso, MultiTaskLassoCV,
- OrthogonalMatchingPursuit, OrthogonalMatchingPursuitCV,
+ MultiTaskElasticNet,
+ MultiTaskElasticNetCV,
+ MultiTaskLasso,
+ MultiTaskLassoCV,
+ OrthogonalMatchingPursuit,
+ OrthogonalMatchingPursuitCV,
PassiveAggressiveRegressor,
RANSACRegressor,
- Ridge, RidgeCV,
+ Ridge,
+ RidgeCV,
SGDRegressor,
- TheilSenRegressor
+ TheilSenRegressor,
)
+
try:
from sklearn.linear_model import GammaRegressor
except ImportError:
@@ -66,31 +77,36 @@
from sklearn.svm import LinearSVR
from sklearn.discriminant_analysis import (
LinearDiscriminantAnalysis,
- QuadraticDiscriminantAnalysis
+ QuadraticDiscriminantAnalysis,
)
# Mixture
-from sklearn.mixture import (
- GaussianMixture, BayesianGaussianMixture
-)
+from sklearn.mixture import GaussianMixture, BayesianGaussianMixture
# Multi-class
from sklearn.multiclass import (
_ConstantPredictor,
OneVsRestClassifier,
- OneVsOneClassifier
+ OneVsOneClassifier,
)
# Tree-based models
from sklearn.ensemble import (
- AdaBoostClassifier, AdaBoostRegressor,
- BaggingClassifier, BaggingRegressor,
- ExtraTreesClassifier, ExtraTreesRegressor,
- GradientBoostingClassifier, GradientBoostingRegressor,
+ AdaBoostClassifier,
+ AdaBoostRegressor,
+ BaggingClassifier,
+ BaggingRegressor,
+ ExtraTreesClassifier,
+ ExtraTreesRegressor,
+ GradientBoostingClassifier,
+ GradientBoostingRegressor,
IsolationForest,
- RandomForestClassifier, RandomForestRegressor, RandomTreesEmbedding,
- VotingClassifier
+ RandomForestClassifier,
+ RandomForestRegressor,
+ RandomTreesEmbedding,
+ VotingClassifier,
)
+
try:
from sklearn.ensemble import VotingRegressor
except ImportError:
@@ -103,14 +119,14 @@
StackingClassifier = None
StackingRegressor = None
from sklearn.tree import (
- DecisionTreeClassifier, DecisionTreeRegressor,
- ExtraTreeClassifier, ExtraTreeRegressor
+ DecisionTreeClassifier,
+ DecisionTreeRegressor,
+ ExtraTreeClassifier,
+ ExtraTreeRegressor,
)
# Gaussian processes
-from sklearn.gaussian_process import (
- GaussianProcessClassifier, GaussianProcessRegressor
-)
+from sklearn.gaussian_process import GaussianProcessClassifier, GaussianProcessRegressor
# GridSearchCV
from sklearn.model_selection import GridSearchCV
@@ -130,6 +146,7 @@
RadiusNeighborsClassifier,
RadiusNeighborsRegressor,
)
+
try:
from sklearn.neighbors import (
KNeighborsTransformer,
@@ -146,6 +163,7 @@
GaussianNB,
MultinomialNB,
)
+
try:
from sklearn.naive_bayes import CategoricalNB
except ImportError:
@@ -176,14 +194,23 @@
FeatureHasher,
)
from sklearn.feature_extraction.text import (
- CountVectorizer, TfidfTransformer, TfidfVectorizer
+ CountVectorizer,
+ TfidfTransformer,
+ TfidfVectorizer,
)
from sklearn.feature_selection import (
- GenericUnivariateSelect, RFE, RFECV,
- SelectFdr, SelectFpr, SelectFromModel,
- SelectFwe, SelectKBest, SelectPercentile,
- VarianceThreshold
+ GenericUnivariateSelect,
+ RFE,
+ RFECV,
+ SelectFdr,
+ SelectFpr,
+ SelectFromModel,
+ SelectFwe,
+ SelectKBest,
+ SelectPercentile,
+ VarianceThreshold,
)
+
try:
# 0.20
from sklearn.impute import SimpleImputer
@@ -191,6 +218,7 @@
# 0.19
from sklearn.preprocessing import Imputer as SimpleImputer
from sklearn.preprocessing import Binarizer
+
try:
from sklearn.preprocessing import Imputer
except ImportError:
@@ -207,9 +235,12 @@
# not available in 0.19
KBinsDiscretizer = None
from sklearn.preprocessing import (
- LabelBinarizer, LabelEncoder,
- Normalizer, OneHotEncoder
+ LabelBinarizer,
+ LabelEncoder,
+ Normalizer,
+ OneHotEncoder,
)
+
try:
from sklearn.preprocessing import OrdinalEncoder
except ImportError:
@@ -222,7 +253,7 @@
MinMaxScaler,
PolynomialFeatures,
RobustScaler,
- StandardScaler
+ StandardScaler,
)
try:
@@ -234,7 +265,7 @@
try:
from sklearn.ensemble import (
HistGradientBoostingClassifier,
- HistGradientBoostingRegressor
+ HistGradientBoostingRegressor,
)
except ImportError:
# Second verification as these models still require
@@ -242,7 +273,7 @@
try:
from sklearn.ensemble._hist_gradient_boosting.gradient_boosting import ( # noqa
HistGradientBoostingClassifier,
- HistGradientBoostingRegressor
+ HistGradientBoostingRegressor,
)
except ImportError:
HistGradientBoostingRegressor = None
@@ -263,7 +294,7 @@
from .common._registration import register_converter, register_shape_calculator
-logger = logging.getLogger('skl2onnx')
+logger = logging.getLogger("skl2onnx")
# In most cases, scikit-learn operator produces only one output.
# However, each classifier has basically two outputs; one is the
@@ -272,40 +303,45 @@
# classifiers. In the parsing stage, we produce two outputs for objects
# included in the following list and one output for everything not in
# the list.
-sklearn_classifier_list = list(filter(lambda m: m is not None, [
- _ConstantPredictor,
- AdaBoostClassifier,
- BaggingClassifier,
- BernoulliNB,
- CategoricalNB,
- CalibratedClassifierCV,
- ComplementNB,
- DecisionTreeClassifier,
- ExtraTreeClassifier,
- ExtraTreesClassifier,
- GaussianNB,
- GaussianProcessClassifier,
- GradientBoostingClassifier,
- HistGradientBoostingClassifier,
- KNeighborsClassifier,
- LinearDiscriminantAnalysis,
- LinearSVC,
- LogisticRegression,
- LogisticRegressionCV,
- MLPClassifier,
- MultinomialNB,
- NuSVC,
- OneVsOneClassifier,
- OneVsRestClassifier,
- PassiveAggressiveClassifier,
- Perceptron,
- QuadraticDiscriminantAnalysis,
- RandomForestClassifier,
- SGDClassifier,
- StackingClassifier,
- SVC,
- VotingClassifier,
-]))
+sklearn_classifier_list = list(
+ filter(
+ lambda m: m is not None,
+ [
+ _ConstantPredictor,
+ AdaBoostClassifier,
+ BaggingClassifier,
+ BernoulliNB,
+ CategoricalNB,
+ CalibratedClassifierCV,
+ ComplementNB,
+ DecisionTreeClassifier,
+ ExtraTreeClassifier,
+ ExtraTreesClassifier,
+ GaussianNB,
+ GaussianProcessClassifier,
+ GradientBoostingClassifier,
+ HistGradientBoostingClassifier,
+ KNeighborsClassifier,
+ LinearDiscriminantAnalysis,
+ LinearSVC,
+ LogisticRegression,
+ LogisticRegressionCV,
+ MLPClassifier,
+ MultinomialNB,
+ NuSVC,
+ OneVsOneClassifier,
+ OneVsRestClassifier,
+ PassiveAggressiveClassifier,
+ Perceptron,
+ QuadraticDiscriminantAnalysis,
+ RandomForestClassifier,
+ SGDClassifier,
+ StackingClassifier,
+ SVC,
+ VotingClassifier,
+ ],
+ )
+)
# Clustering algorithms: produces two outputs, label and score for
# each cluster in most cases.
@@ -320,159 +356,166 @@
# scikit-learn models share a single name, it means their are
# equivalent in terms of conversion.
def build_sklearn_operator_name_map():
- res = {k: "Sklearn" + k.__name__ for k in [
- _ConstantPredictor,
- AdaBoostClassifier,
- AdaBoostRegressor,
- BaggingClassifier,
- BaggingRegressor,
- BayesianGaussianMixture,
- BayesianRidge,
- BernoulliNB,
- Binarizer,
- CalibratedClassifierCV,
- CategoricalNB,
- CastRegressor,
- CastTransformer,
- ColumnTransformer,
- ComplementNB,
- CountVectorizer,
- DictVectorizer,
- DecisionTreeClassifier,
- DecisionTreeRegressor,
- ExtraTreeClassifier,
- ExtraTreeRegressor,
- ExtraTreesClassifier,
- ExtraTreesRegressor,
- FeatureHasher,
- FeatureUnion,
- FunctionTransformer,
- GammaRegressor,
- GaussianNB,
- GaussianMixture,
- GaussianProcessClassifier,
- GaussianProcessRegressor,
- GaussianRandomProjection,
- GenericUnivariateSelect,
- GradientBoostingClassifier,
- GradientBoostingRegressor,
- HistGradientBoostingClassifier,
- HistGradientBoostingRegressor,
- Imputer,
- IncrementalPCA,
- IsolationForest,
- KMeans,
- LabelBinarizer,
- LabelEncoder,
- LinearRegression,
- LinearSVC,
- LinearSVR,
- LocalOutlierFactor,
- MaxAbsScaler,
- MiniBatchKMeans,
- MinMaxScaler,
- MLPClassifier,
- MLPRegressor,
- MultinomialNB,
- MultiOutputClassifier,
- MultiOutputRegressor,
- KBinsDiscretizer,
- KernelCenterer,
- KernelPCA,
- KNeighborsClassifier,
- KNeighborsRegressor,
- KNeighborsTransformer,
- KNNImputer,
- NearestNeighbors,
- NeighborhoodComponentsAnalysis,
- Normalizer,
- OneClassSVM,
- OneHotEncoder,
- OneVsOneClassifier,
- OneVsRestClassifier,
- OrdinalEncoder,
- PCA,
- PLSRegression,
- Pipeline,
- PoissonRegressor,
- PolynomialFeatures,
- PowerTransformer,
- QuadraticDiscriminantAnalysis,
- RadiusNeighborsClassifier,
- RadiusNeighborsRegressor,
- RandomForestClassifier,
- RandomForestRegressor,
- RandomTreesEmbedding,
- RANSACRegressor,
- ReplaceTransformer,
- RFE,
- RFECV,
- RobustScaler,
- SelectFdr,
- SelectFpr,
- SelectFromModel,
- SelectFwe,
- SelectKBest,
- SelectPercentile,
- SGDClassifier,
- SGDOneClassSVM,
- SimpleImputer,
- StackingClassifier,
- StackingRegressor,
- SVC,
- SVR,
- TfidfVectorizer,
- TfidfTransformer,
- TruncatedSVD,
- TweedieRegressor,
- VarianceThreshold,
- VotingClassifier,
- VotingRegressor,
- ] if k is not None}
- res.update({
- ARDRegression: 'SklearnLinearRegressor',
- ElasticNet: 'SklearnLinearRegressor',
- ElasticNetCV: 'SklearnLinearRegressor',
- GridSearchCV: 'SklearnGridSearchCV',
- HuberRegressor: 'SklearnLinearRegressor',
- LinearRegression: 'SklearnLinearRegressor',
- Lars: 'SklearnLinearRegressor',
- LarsCV: 'SklearnLinearRegressor',
- Lasso: 'SklearnLinearRegressor',
- LassoCV: 'SklearnLinearRegressor',
- LassoLars: 'SklearnLinearRegressor',
- LassoLarsCV: 'SklearnLinearRegressor',
- LassoLarsIC: 'SklearnLinearRegressor',
- LinearDiscriminantAnalysis: 'SklearnLinearClassifier',
- LogisticRegression: 'SklearnLinearClassifier',
- LogisticRegressionCV: 'SklearnLinearClassifier',
- MultiTaskElasticNet: 'SklearnLinearRegressor',
- MultiTaskElasticNetCV: 'SklearnLinearRegressor',
- MultiTaskLasso: 'SklearnLinearRegressor',
- MultiTaskLassoCV: 'SklearnLinearRegressor',
- NuSVC: 'SklearnSVC',
- NuSVR: 'SklearnSVR',
- OrthogonalMatchingPursuit: 'SklearnLinearRegressor',
- OrthogonalMatchingPursuitCV: 'SklearnLinearRegressor',
- PassiveAggressiveClassifier: 'SklearnSGDClassifier',
- PassiveAggressiveRegressor: 'SklearnLinearRegressor',
- Perceptron: 'SklearnSGDClassifier',
- QuantileRegressor: 'SklearnLinearRegressor',
- Ridge: 'SklearnLinearRegressor',
- RidgeCV: 'SklearnLinearRegressor',
- RidgeClassifier: 'SklearnLinearClassifier',
- RidgeClassifierCV: 'SklearnLinearClassifier',
- SGDRegressor: 'SklearnLinearRegressor',
- StandardScaler: 'SklearnScaler',
- TheilSenRegressor: 'SklearnLinearRegressor',
- })
+ res = {
+ k: "Sklearn" + k.__name__
+ for k in [
+ _ConstantPredictor,
+ AdaBoostClassifier,
+ AdaBoostRegressor,
+ BaggingClassifier,
+ BaggingRegressor,
+ BayesianGaussianMixture,
+ BayesianRidge,
+ BernoulliNB,
+ Binarizer,
+ CalibratedClassifierCV,
+ CategoricalNB,
+ CastRegressor,
+ CastTransformer,
+ ColumnTransformer,
+ ComplementNB,
+ CountVectorizer,
+ DictVectorizer,
+ DecisionTreeClassifier,
+ DecisionTreeRegressor,
+ ExtraTreeClassifier,
+ ExtraTreeRegressor,
+ ExtraTreesClassifier,
+ ExtraTreesRegressor,
+ FeatureHasher,
+ FeatureUnion,
+ FunctionTransformer,
+ GammaRegressor,
+ GaussianNB,
+ GaussianMixture,
+ GaussianProcessClassifier,
+ GaussianProcessRegressor,
+ GaussianRandomProjection,
+ GenericUnivariateSelect,
+ GradientBoostingClassifier,
+ GradientBoostingRegressor,
+ HistGradientBoostingClassifier,
+ HistGradientBoostingRegressor,
+ Imputer,
+ IncrementalPCA,
+ IsolationForest,
+ KMeans,
+ LabelBinarizer,
+ LabelEncoder,
+ LinearRegression,
+ LinearSVC,
+ LinearSVR,
+ LocalOutlierFactor,
+ MaxAbsScaler,
+ MiniBatchKMeans,
+ MinMaxScaler,
+ MLPClassifier,
+ MLPRegressor,
+ MultinomialNB,
+ MultiOutputClassifier,
+ MultiOutputRegressor,
+ KBinsDiscretizer,
+ KernelCenterer,
+ KernelPCA,
+ KNeighborsClassifier,
+ KNeighborsRegressor,
+ KNeighborsTransformer,
+ KNNImputer,
+ NearestNeighbors,
+ NeighborhoodComponentsAnalysis,
+ Normalizer,
+ OneClassSVM,
+ OneHotEncoder,
+ OneVsOneClassifier,
+ OneVsRestClassifier,
+ OrdinalEncoder,
+ PCA,
+ PLSRegression,
+ Pipeline,
+ PoissonRegressor,
+ PolynomialFeatures,
+ PowerTransformer,
+ QuadraticDiscriminantAnalysis,
+ RadiusNeighborsClassifier,
+ RadiusNeighborsRegressor,
+ RandomForestClassifier,
+ RandomForestRegressor,
+ RandomTreesEmbedding,
+ RANSACRegressor,
+ ReplaceTransformer,
+ RFE,
+ RFECV,
+ RobustScaler,
+ SelectFdr,
+ SelectFpr,
+ SelectFromModel,
+ SelectFwe,
+ SelectKBest,
+ SelectPercentile,
+ SGDClassifier,
+ SGDOneClassSVM,
+ SimpleImputer,
+ StackingClassifier,
+ StackingRegressor,
+ SVC,
+ SVR,
+ TfidfVectorizer,
+ TfidfTransformer,
+ TruncatedSVD,
+ TweedieRegressor,
+ VarianceThreshold,
+ VotingClassifier,
+ VotingRegressor,
+ ]
+ if k is not None
+ }
+ res.update(
+ {
+ ARDRegression: "SklearnLinearRegressor",
+ ElasticNet: "SklearnLinearRegressor",
+ ElasticNetCV: "SklearnLinearRegressor",
+ GridSearchCV: "SklearnGridSearchCV",
+ HuberRegressor: "SklearnLinearRegressor",
+ LinearRegression: "SklearnLinearRegressor",
+ Lars: "SklearnLinearRegressor",
+ LarsCV: "SklearnLinearRegressor",
+ Lasso: "SklearnLinearRegressor",
+ LassoCV: "SklearnLinearRegressor",
+ LassoLars: "SklearnLinearRegressor",
+ LassoLarsCV: "SklearnLinearRegressor",
+ LassoLarsIC: "SklearnLinearRegressor",
+ LinearDiscriminantAnalysis: "SklearnLinearClassifier",
+ LogisticRegression: "SklearnLinearClassifier",
+ LogisticRegressionCV: "SklearnLinearClassifier",
+ MultiTaskElasticNet: "SklearnLinearRegressor",
+ MultiTaskElasticNetCV: "SklearnLinearRegressor",
+ MultiTaskLasso: "SklearnLinearRegressor",
+ MultiTaskLassoCV: "SklearnLinearRegressor",
+ NuSVC: "SklearnSVC",
+ NuSVR: "SklearnSVR",
+ OrthogonalMatchingPursuit: "SklearnLinearRegressor",
+ OrthogonalMatchingPursuitCV: "SklearnLinearRegressor",
+ PassiveAggressiveClassifier: "SklearnSGDClassifier",
+ PassiveAggressiveRegressor: "SklearnLinearRegressor",
+ Perceptron: "SklearnSGDClassifier",
+ QuantileRegressor: "SklearnLinearRegressor",
+ Ridge: "SklearnLinearRegressor",
+ RidgeCV: "SklearnLinearRegressor",
+ RidgeClassifier: "SklearnLinearClassifier",
+ RidgeClassifierCV: "SklearnLinearClassifier",
+ SGDRegressor: "SklearnLinearRegressor",
+ StandardScaler: "SklearnScaler",
+ TheilSenRegressor: "SklearnLinearRegressor",
+ }
+ )
if None in res:
del res[None]
return res
-def update_registered_converter(model, alias, shape_fct, convert_fct,
- overwrite=True, parser=None, options=None):
+def update_registered_converter(
+ model, alias, shape_fct, convert_fct, overwrite=True, parser=None, options=None
+):
"""
Registers or updates a converter for a new model so that
it can be converted when inserted in a *scikit-learn* pipeline.
@@ -510,21 +553,27 @@ def update_registered_converter(model, alias, shape_fct, convert_fct,
must declare this option to let the default parser
automatically handle that option.
""" # noqa
- if (not overwrite and model in sklearn_operator_name_map
- and alias != sklearn_operator_name_map[model]):
- warnings.warn("Model '{0}' was already registered under alias "
- "'{1}'.".format(model, sklearn_operator_name_map[model]))
+ if (
+ not overwrite
+ and model in sklearn_operator_name_map
+ and alias != sklearn_operator_name_map[model]
+ ):
+ warnings.warn(
+ "Model '{0}' was already registered under alias "
+ "'{1}'.".format(model, sklearn_operator_name_map[model])
+ )
sklearn_operator_name_map[model] = alias
- register_converter(alias, convert_fct, overwrite=overwrite,
- options=options)
+ register_converter(alias, convert_fct, overwrite=overwrite, options=options)
register_shape_calculator(alias, shape_fct, overwrite=overwrite)
if parser is not None:
from ._parse import update_registered_parser
+
update_registered_parser(model, parser)
- elif (options is not None and
- ('zipmap' in options or 'output_class_labels' in options)):
- from ._parse import (
- _parse_sklearn_classifier, update_registered_parser)
+ elif options is not None and (
+ "zipmap" in options or "output_class_labels" in options
+ ):
+ from ._parse import _parse_sklearn_classifier, update_registered_parser
+
update_registered_parser(model, _parse_sklearn_classifier)
@@ -542,7 +591,7 @@ def _get_sklearn_operator_name(model_type):
alias = None
else:
alias = sklearn_operator_name_map[model_type]
- logger.debug('[parsing] found alias=%r for type=%r.', alias, model_type)
+ logger.debug("[parsing] found alias=%r for type=%r.", alias, model_type)
return alias
@@ -557,9 +606,11 @@ def get_model_alias(model_type):
"""
res = _get_sklearn_operator_name(model_type)
if res is None:
- raise RuntimeError("Unable to find alias for model '{}'. "
- "The converter is likely missing."
- "".format(model_type))
+ raise RuntimeError(
+ "Unable to find alias for model '{}'. "
+ "The converter is likely missing."
+ "".format(model_type)
+ )
return res
diff --git a/skl2onnx/algebra/automation.py b/skl2onnx/algebra/automation.py
index 5bcd08053..40220d97e 100644
--- a/skl2onnx/algebra/automation.py
+++ b/skl2onnx/algebra/automation.py
@@ -10,23 +10,25 @@ def _get_doc_template():
try:
from jinja2 import Template
except ImportError:
+
class Template:
def __init__(self, *args):
pass
def render(self, **context):
- schemas = context['schemas']
+ schemas = context["schemas"]
rows = []
for sch in schemas:
- doc = sch.doc or ''
+ doc = sch.doc or ""
name = sch.name
if name is None:
raise RuntimeError("An operator must have a name.")
- rows.extend([name, "=" * len(name),
- "", doc, ""])
+ rows.extend([name, "=" * len(name), "", doc, ""])
return "\n".join(rows)
- return Template(textwrap.dedent("""
+ return Template(
+ textwrap.dedent(
+ """
{% for sch in schemas %}
{{format_name_with_domain(sch)}}
@@ -96,7 +98,9 @@ def render(self, **context):
{% endif %}
{% endfor %}
- """))
+ """
+ )
+ )
_template_operator = _get_doc_template()
@@ -106,8 +110,9 @@ def get_domain_list():
"""
Returns the list of available domains.
"""
- return list(sorted(set(map(lambda s: s.domain,
- onnx.defs.get_all_schemas_with_history()))))
+ return list(
+ sorted(set(map(lambda s: s.domain, onnx.defs.get_all_schemas_with_history())))
+ )
def get_rst_doc(op_name=None):
@@ -124,16 +129,19 @@ def get_rst_doc(op_name=None):
if op_name is None:
schemas = onnx.defs.get_all_schemas_with_history()
elif isinstance(op_name, str):
- schemas = [schema for schema in onnx.defs.get_all_schemas_with_history(
- ) if schema.name == op_name]
+ schemas = [
+ schema
+ for schema in onnx.defs.get_all_schemas_with_history()
+ if schema.name == op_name
+ ]
if len(schemas) > 1:
raise RuntimeError(
- "Multiple operators have the same name '{}'.".format(op_name))
+ "Multiple operators have the same name '{}'.".format(op_name)
+ )
elif not isinstance(op_name, list):
schemas = [op_name]
if len(schemas) == 0:
- raise ValueError(
- "Unable to find any operator with name '{}'.".format(op_name))
+ raise ValueError("Unable to find any operator with name '{}'.".format(op_name))
# from onnx.backend.sample.ops import collect_sample_implementations
# from onnx.backend.test.case import collect_snippets
@@ -141,7 +149,7 @@ def get_rst_doc(op_name=None):
# SAMPLE_IMPLEMENTATIONS = collect_sample_implementations()
def format_name_with_domain(sch):
if sch.domain:
- return '{} ({})'.format(sch.name, sch.domain)
+ return "{} ({})".format(sch.name, sch.domain)
return sch.name
def get_type_str(obj):
@@ -161,11 +169,11 @@ def get_is_homogeneous(obj):
def format_option(obj):
opts = []
if OpSchema.FormalParameterOption.Optional == obj.option:
- opts.append('optional')
+ opts.append("optional")
elif OpSchema.FormalParameterOption.Variadic == obj.option:
- opts.append('variadic')
+ opts.append("variadic")
if get_is_homogeneous(obj):
- opts.append('heterogeneous')
+ opts.append("heterogeneous")
if opts:
return " (%s)" % ", ".join(opts)
return ""
@@ -187,28 +195,29 @@ def getname(obj, i):
def process_documentation(doc):
if doc is None:
- doc = ''
+ doc = ""
doc = textwrap.dedent(doc)
main_docs_url = "https://github.com/onnx/onnx/blob/master/"
rep = {
- '[the doc](IR.md)': '`ONNX <{0}docs/IR.md>`_',
- '[the doc](Broadcasting.md)':
- '`Broadcasting in ONNX <{0}docs/Broadcasting.md>`_',
- '': '',
- '
': '',
- '': '* ',
- '': ' ',
- '': '',
- '': '',
- '': '``',
- '': '``',
- '
': '\n',
+ "[the doc](IR.md)": "`ONNX <{0}docs/IR.md>`_",
+ "[the doc](Broadcasting.md)": (
+ "`Broadcasting in ONNX <{0}docs/Broadcasting.md>`_"
+ ),
+ "": "",
+ "
": "",
+ "": "* ",
+ "": " ",
+ "": "",
+ "": "",
+ "": "``",
+ "": "``",
+ "
": "\n",
}
for k, v in rep.items():
doc = doc.replace(k, v.format(main_docs_url))
move = 0
lines = []
- for line in doc.split('\n'):
+ for line in doc.split("\n"):
if line.startswith("```"):
if move > 0:
move -= 4
@@ -228,21 +237,28 @@ def build_doc_url(sch):
doc_url += "-ml"
doc_url += ".md"
doc_url += "#"
- if sch.domain not in (None, '', 'ai.onnx'):
+ if sch.domain not in (None, "", "ai.onnx"):
doc_url += sch.domain + "."
return doc_url
fnwd = format_name_with_domain
tmpl = _template_operator
- docs = tmpl.render(schemas=schemas, OpSchema=OpSchema,
- len=len, getattr=getattr, sorted=sorted,
- format_option=format_option,
- getconstraint=getconstraint,
- getname=getname, enumerate=enumerate,
- format_name_with_domain=fnwd,
- process_documentation=process_documentation,
- build_doc_url=build_doc_url,
- str=str, get_type_str=get_type_str)
+ docs = tmpl.render(
+ schemas=schemas,
+ OpSchema=OpSchema,
+ len=len,
+ getattr=getattr,
+ sorted=sorted,
+ format_option=format_option,
+ getconstraint=getconstraint,
+ getname=getname,
+ enumerate=enumerate,
+ format_name_with_domain=fnwd,
+ process_documentation=process_documentation,
+ build_doc_url=build_doc_url,
+ str=str,
+ get_type_str=get_type_str,
+ )
return docs
@@ -250,23 +266,25 @@ def _get_doc_template_sklearn():
try:
from jinja2 import Template
except ImportError:
+
class Template:
def __init__(self, *args):
pass
def render(self, **context):
- schemas = context['schemas']
+ schemas = context["schemas"]
rows = []
for sch in schemas:
- doc = sch.doc or ''
+ doc = sch.doc or ""
name = sch.name
if name is None:
raise RuntimeError("An operator must have a name.")
- rows.extend([name, "=" * len(name),
- "", doc, ""])
+ rows.extend([name, "=" * len(name), "", doc, ""])
return "\n".join(rows)
- return Template(textwrap.dedent("""
+ return Template(
+ textwrap.dedent(
+ """
{% for cl in classes %}
.. _l-sklops-{{cl.__name__}}:
@@ -285,7 +303,9 @@ def render(self, **context):
{{format_doc(cl)}}
{% endfor %}
- """))
+ """
+ )
+ )
_template_operator_sklearn = _get_doc_template_sklearn()
@@ -302,10 +322,12 @@ def get_rst_doc_sklearn():
The function relies on module *jinja2* or replaces it
with a simple rendering if not present.
"""
+
def format_doc(cl):
return "\n".join(cl.__doc__.split("\n")[1:])
from .sklearn_ops import dynamic_class_creation_sklearn
+
classes = dynamic_class_creation_sklearn()
tmpl = _template_operator_sklearn
values = [(k, v) for k, v in sorted(classes.items())]
diff --git a/skl2onnx/algebra/complex_functions.py b/skl2onnx/algebra/complex_functions.py
index 6aab3c45c..e704ac4ab 100644
--- a/skl2onnx/algebra/complex_functions.py
+++ b/skl2onnx/algebra/complex_functions.py
@@ -6,66 +6,86 @@
from ..common.data_types import FloatTensorType, DoubleTensorType
from ..common.utils import get_unique_subgraph
from .onnx_ops import (
- OnnxIdentity, OnnxScan, OnnxTranspose,
- OnnxSub, OnnxReduceSumSquareApi18,
- OnnxSqrt, OnnxPow, OnnxAbs, OnnxReduceSumApi11)
+ OnnxIdentity,
+ OnnxScan,
+ OnnxTranspose,
+ OnnxSub,
+ OnnxReduceSumSquareApi18,
+ OnnxSqrt,
+ OnnxPow,
+ OnnxAbs,
+ OnnxReduceSumApi11,
+)
-logger = getLogger('skl2onnx')
+logger = getLogger("skl2onnx")
-def onnx_squareform_pdist(X, metric='sqeuclidean', dtype=None,
- op_version=None, **kwargs):
+def onnx_squareform_pdist(
+ X, metric="sqeuclidean", dtype=None, op_version=None, **kwargs
+):
"""
Returns the ONNX graph which computes
``squareform(pdist(X, metric=metric))``.
"""
- if metric == 'sqeuclidean':
+ if metric == "sqeuclidean":
return _onnx_squareform_pdist_sqeuclidean(
- X, dtype=dtype, op_version=op_version, **kwargs)
- if metric == 'euclidean':
- res = _onnx_squareform_pdist_sqeuclidean(
- X, dtype=dtype, op_version=op_version)
+ X, dtype=dtype, op_version=op_version, **kwargs
+ )
+ if metric == "euclidean":
+ res = _onnx_squareform_pdist_sqeuclidean(X, dtype=dtype, op_version=op_version)
return OnnxSqrt(res, op_version=op_version, **kwargs)
- raise NotImplementedError(
- "metric='{}' is not implemented.".format(metric))
+ raise NotImplementedError("metric='{}' is not implemented.".format(metric))
-def _onnx_squareform_pdist_sqeuclidean(X, dtype=None, op_version=None,
- **kwargs):
+def _onnx_squareform_pdist_sqeuclidean(X, dtype=None, op_version=None, **kwargs):
"""
Returns the ONNX graph which computes
``squareform(pdist(X, metric='sqeuclidean'))``.
"""
unique = get_unique_subgraph()
- diff = OnnxSub('next_in', 'next',
- op_version=op_version)
- id_next = OnnxIdentity('next_in', output_names=['next_out'],
- op_version=op_version)
- flat = OnnxReduceSumSquareApi18(diff, axes=[1], op_version=op_version,
- output_names=['scan_out'], keepdims=0)
- flat.set_onnx_name_prefix('cflat_%d' % unique)
- id_next.set_onnx_name_prefix('pdistsqe_%d' % unique)
+ diff = OnnxSub("next_in", "next", op_version=op_version)
+ id_next = OnnxIdentity("next_in", output_names=["next_out"], op_version=op_version)
+ flat = OnnxReduceSumSquareApi18(
+ diff, axes=[1], op_version=op_version, output_names=["scan_out"], keepdims=0
+ )
+ flat.set_onnx_name_prefix("cflat_%d" % unique)
+ id_next.set_onnx_name_prefix("pdistsqe_%d" % unique)
tensor_type = FloatTensorType if dtype == np.float32 else DoubleTensorType
scan_body = id_next.to_onnx(
- OrderedDict([('next_in', tensor_type([None, None])),
- ('next', tensor_type([None]))]),
- outputs=[('next_out', tensor_type([None, None])),
- ('scan_out', tensor_type([None]))],
+ OrderedDict(
+ [("next_in", tensor_type([None, None])), ("next", tensor_type([None]))]
+ ),
+ outputs=[
+ ("next_out", tensor_type([None, None])),
+ ("scan_out", tensor_type([None])),
+ ],
other_outputs=[flat],
- target_opset=op_version)
-
- node = OnnxScan(X, X, output_names=['u(scan0)', 'u(scan1)'],
- num_scan_inputs=1,
- body=(scan_body.graph, [id_next, flat]),
- op_version=op_version, **kwargs)
- logger.debug('[_onnx_squareform_pdist_sqeuclidean] +Scan dtype=%r',
- dtype)
+ target_opset=op_version,
+ )
+
+ node = OnnxScan(
+ X,
+ X,
+ output_names=["u(scan0)", "u(scan1)"],
+ num_scan_inputs=1,
+ body=(scan_body.graph, [id_next, flat]),
+ op_version=op_version,
+ **kwargs
+ )
+ logger.debug("[_onnx_squareform_pdist_sqeuclidean] +Scan dtype=%r", dtype)
return node[1]
-def onnx_cdist(XA, XB, metric='sqeuclidean', dtype=None,
- op_version=None, dim_in=None, dim_out=None,
- **kwargs):
+def onnx_cdist(
+ XA,
+ XB,
+ metric="sqeuclidean",
+ dtype=None,
+ op_version=None,
+ dim_in=None,
+ dim_out=None,
+ **kwargs
+):
"""
Returns the ONNX graph which computes
``cdist(XA, XB, metric=metric)``.
@@ -82,107 +102,161 @@ def onnx_cdist(XA, XB, metric='sqeuclidean', dtype=None,
:param kwargs: addition parameter
:return: OnnxOperatorMixin
"""
- if metric == 'sqeuclidean':
+ if metric == "sqeuclidean":
return _onnx_cdist_sqeuclidean(
- XA, XB, dtype=dtype, op_version=op_version,
- dim_in=dim_in, dim_out=dim_out, **kwargs)
- elif metric == 'euclidean':
+ XA,
+ XB,
+ dtype=dtype,
+ op_version=op_version,
+ dim_in=dim_in,
+ dim_out=dim_out,
+ **kwargs
+ )
+ elif metric == "euclidean":
res = _onnx_cdist_sqeuclidean(
- XA, XB, dtype=dtype, op_version=op_version,
- dim_in=dim_in, dim_out=dim_out)
+ XA, XB, dtype=dtype, op_version=op_version, dim_in=dim_in, dim_out=dim_out
+ )
return OnnxSqrt(res, op_version=op_version, **kwargs)
- elif metric == 'minkowski':
- p = kwargs.pop('p')
+ elif metric == "minkowski":
+ p = kwargs.pop("p")
res = _onnx_cdist_minkowski(
- XA, XB, dtype=dtype, op_version=op_version, p=p,
- dim_in=dim_in, dim_out=dim_out)
- return OnnxPow(res, np.array([1. / p], dtype=dtype),
- op_version=op_version, **kwargs)
- elif metric in ('manhattan', 'cityblock'):
+ XA,
+ XB,
+ dtype=dtype,
+ op_version=op_version,
+ p=p,
+ dim_in=dim_in,
+ dim_out=dim_out,
+ )
+ return OnnxPow(
+ res, np.array([1.0 / p], dtype=dtype), op_version=op_version, **kwargs
+ )
+ elif metric in ("manhattan", "cityblock"):
return _onnx_cdist_manhattan(
- XA, XB, dtype=dtype, op_version=op_version,
- dim_in=dim_in, dim_out=dim_out, **kwargs)
+ XA,
+ XB,
+ dtype=dtype,
+ op_version=op_version,
+ dim_in=dim_in,
+ dim_out=dim_out,
+ **kwargs
+ )
else:
- raise NotImplementedError("metric='{}' is not implemented.".format(
- metric))
+ raise NotImplementedError("metric='{}' is not implemented.".format(metric))
def _onnx_cdist_begin(op_version):
- diff = OnnxSub('next_in', 'next',
- op_version=op_version)
- id_next = OnnxIdentity('next_in', output_names=['next_out'],
- op_version=op_version)
+ diff = OnnxSub("next_in", "next", op_version=op_version)
+ id_next = OnnxIdentity("next_in", output_names=["next_out"], op_version=op_version)
return diff, id_next
-def _onnx_cdist_end(XA, XB, id_next, flat, dtype, op_version,
- dim_in=None, dim_out=None, **kwargs):
+def _onnx_cdist_end(
+ XA, XB, id_next, flat, dtype, op_version, dim_in=None, dim_out=None, **kwargs
+):
unique = get_unique_subgraph()
tensor_type = FloatTensorType if dtype == np.float32 else DoubleTensorType
- id_next.set_onnx_name_prefix('cdistd_%d' % unique)
- flat.set_onnx_name_prefix('cdistdf_%d' % unique)
- shape_in = (tensor_type([None, None]) if dim_in is None
- else tensor_type([None, dim_in]))
+ id_next.set_onnx_name_prefix("cdistd_%d" % unique)
+ flat.set_onnx_name_prefix("cdistdf_%d" % unique)
+ shape_in = (
+ tensor_type([None, None]) if dim_in is None else tensor_type([None, dim_in])
+ )
scan_body = id_next.to_onnx(
- OrderedDict([('next_in', shape_in),
- ('next', tensor_type([None]))]),
- outputs=[('next_out', tensor_type([None, None])),
- ('scan_out', tensor_type([None]))],
+ OrderedDict([("next_in", shape_in), ("next", tensor_type([None]))]),
+ outputs=[
+ ("next_out", tensor_type([None, None])),
+ ("scan_out", tensor_type([None])),
+ ],
other_outputs=[flat],
- target_opset=op_version)
- logger.debug('[_onnx_cdist_end] + Scan dim_in=%r dim_out=%r dtype=%r',
- dim_in, dim_out, dtype)
+ target_opset=op_version,
+ )
+ logger.debug(
+ "[_onnx_cdist_end] + Scan dim_in=%r dim_out=%r dtype=%r", dim_in, dim_out, dtype
+ )
- node = OnnxScan(XA, XB, output_names=['u(scan0)', 'u(scan1)'],
- num_scan_inputs=1,
- body=(scan_body.graph, [id_next, flat]),
- op_version=op_version)
- return OnnxTranspose(node[1], perm=[1, 0], op_version=op_version,
- **kwargs)
+ node = OnnxScan(
+ XA,
+ XB,
+ output_names=["u(scan0)", "u(scan1)"],
+ num_scan_inputs=1,
+ body=(scan_body.graph, [id_next, flat]),
+ op_version=op_version,
+ )
+ return OnnxTranspose(node[1], perm=[1, 0], op_version=op_version, **kwargs)
-def _onnx_cdist_sqeuclidean(XA, XB, dtype=None, op_version=None,
- dim_in=None, dim_out=None, **kwargs):
+def _onnx_cdist_sqeuclidean(
+ XA, XB, dtype=None, op_version=None, dim_in=None, dim_out=None, **kwargs
+):
"""
Returns the ONNX graph which computes
``cdist(X, metric='sqeuclidean')``.
"""
diff, id_next = _onnx_cdist_begin(op_version)
- norm = OnnxReduceSumSquareApi18(
- diff, axes=[1], keepdims=0, op_version=op_version)
- flat = OnnxIdentity(norm, output_names=['scan_out'], op_version=op_version)
- return _onnx_cdist_end(XA, XB, id_next, flat, dtype, op_version,
- dim_in=dim_in, dim_out=dim_out, **kwargs)
+ norm = OnnxReduceSumSquareApi18(diff, axes=[1], keepdims=0, op_version=op_version)
+ flat = OnnxIdentity(norm, output_names=["scan_out"], op_version=op_version)
+ return _onnx_cdist_end(
+ XA,
+ XB,
+ id_next,
+ flat,
+ dtype,
+ op_version,
+ dim_in=dim_in,
+ dim_out=dim_out,
+ **kwargs
+ )
-def _onnx_cdist_minkowski(XA, XB, dtype=None, op_version=None, p=2,
- dim_in=None, dim_out=None, **kwargs):
+def _onnx_cdist_minkowski(
+ XA, XB, dtype=None, op_version=None, p=2, dim_in=None, dim_out=None, **kwargs
+):
"""
Returns the ONNX graph which computes the Minkowski distance
or ``minkowski(XA, XB, p)``.
"""
diff, id_next = _onnx_cdist_begin(op_version)
- diff_pow = OnnxPow(OnnxAbs(diff, op_version=op_version),
- np.array([p], dtype=dtype), op_version=op_version)
- norm = OnnxReduceSumApi11(
- diff_pow, axes=[1], keepdims=0, op_version=op_version)
+ diff_pow = OnnxPow(
+ OnnxAbs(diff, op_version=op_version),
+ np.array([p], dtype=dtype),
+ op_version=op_version,
+ )
+ norm = OnnxReduceSumApi11(diff_pow, axes=[1], keepdims=0, op_version=op_version)
norm.set_onnx_name_prefix("norm_%d" % id(norm))
- flat = OnnxIdentity(norm, output_names=['scan_out'], op_version=op_version)
- return _onnx_cdist_end(XA, XB, id_next, flat, dtype, op_version,
- dim_in=dim_in, dim_out=dim_out, **kwargs)
+ flat = OnnxIdentity(norm, output_names=["scan_out"], op_version=op_version)
+ return _onnx_cdist_end(
+ XA,
+ XB,
+ id_next,
+ flat,
+ dtype,
+ op_version,
+ dim_in=dim_in,
+ dim_out=dim_out,
+ **kwargs
+ )
-def _onnx_cdist_manhattan(XA, XB, dtype=None, op_version=None,
- dim_in=None, dim_out=None, **kwargs):
+def _onnx_cdist_manhattan(
+ XA, XB, dtype=None, op_version=None, dim_in=None, dim_out=None, **kwargs
+):
"""
Returns the ONNX graph which computes the Manhattan distance
or ``Manhattan(X, Y)``.
"""
diff, id_next = _onnx_cdist_begin(op_version)
diff_pow = OnnxAbs(diff, op_version=op_version)
- norm = OnnxReduceSumApi11(
- diff_pow, axes=[1], keepdims=0, op_version=op_version)
+ norm = OnnxReduceSumApi11(diff_pow, axes=[1], keepdims=0, op_version=op_version)
norm.set_onnx_name_prefix("norm_%d" % id(norm))
- flat = OnnxIdentity(norm, output_names=['scan_out'], op_version=op_version)
- return _onnx_cdist_end(XA, XB, id_next, flat, dtype, op_version,
- dim_in=dim_in, dim_out=dim_out, **kwargs)
+ flat = OnnxIdentity(norm, output_names=["scan_out"], op_version=op_version)
+ return _onnx_cdist_end(
+ XA,
+ XB,
+ id_next,
+ flat,
+ dtype,
+ op_version,
+ dim_in=dim_in,
+ dim_out=dim_out,
+ **kwargs
+ )
diff --git a/skl2onnx/algebra/custom_ops.py b/skl2onnx/algebra/custom_ops.py
index f870fd9ca..9ca39a9f7 100644
--- a/skl2onnx/algebra/custom_ops.py
+++ b/skl2onnx/algebra/custom_ops.py
@@ -10,17 +10,16 @@ class OnnxCDist(OnnxOperator):
"""
since_version = 1
- expected_inputs = [('X', 'T'), ('Y', 'T')]
- expected_outputs = [('dist', 'T')]
+ expected_inputs = [("X", "T"), ("Y", "T")]
+ expected_outputs = [("dist", "T")]
input_range = [2, 2]
output_range = [1, 1]
is_deprecated = False
- domain = 'com.microsoft'
- operator_name = 'CDist'
+ domain = "com.microsoft"
+ operator_name = "CDist"
past_version = {}
- def __init__(self, X, Y, metric='sqeuclidean', op_version=None,
- **kwargs):
+ def __init__(self, X, Y, metric="sqeuclidean", op_version=None, **kwargs):
"""
:param X: array or OnnxOperatorMixin
:param Y: array or OnnxOperatorMixin
@@ -29,8 +28,9 @@ def __init__(self, X, Y, metric='sqeuclidean', op_version=None,
:param op_version: opset version
:param kwargs: addition parameter
"""
- OnnxOperator.__init__(self, X, Y, metric=metric,
- op_version=op_version, **kwargs)
+ OnnxOperator.__init__(
+ self, X, Y, metric=metric, op_version=op_version, **kwargs
+ )
class OnnxSolve(OnnxOperator):
@@ -40,17 +40,16 @@ class OnnxSolve(OnnxOperator):
"""
since_version = 1
- expected_inputs = [('A', 'T'), ('Y', 'T')]
- expected_outputs = [('X', 'T')]
+ expected_inputs = [("A", "T"), ("Y", "T")]
+ expected_outputs = [("X", "T")]
input_range = [2, 2]
output_range = [1, 1]
is_deprecated = False
- domain = 'com.microsoft'
- operator_name = 'Solve'
+ domain = "com.microsoft"
+ operator_name = "Solve"
past_version = {}
- def __init__(self, A, Y, lower=False, transposed=False,
- op_version=None, **kwargs):
+ def __init__(self, A, Y, lower=False, transposed=False, op_version=None, **kwargs):
"""
:param A: array or OnnxOperatorMixin
:param Y: array or OnnxOperatorMixin
@@ -60,6 +59,12 @@ def __init__(self, A, Y, lower=False, transposed=False,
:param op_version: opset version
:param kwargs: additional parameters
"""
- OnnxOperator.__init__(self, A, Y,
- lower=lower, transposed=transposed,
- op_version=op_version, **kwargs)
+ OnnxOperator.__init__(
+ self,
+ A,
+ Y,
+ lower=lower,
+ transposed=transposed,
+ op_version=op_version,
+ **kwargs
+ )
diff --git a/skl2onnx/algebra/graph_state.py b/skl2onnx/algebra/graph_state.py
index 8bcec801b..b96cd4f8d 100644
--- a/skl2onnx/algebra/graph_state.py
+++ b/skl2onnx/algebra/graph_state.py
@@ -6,13 +6,20 @@
from onnx import GraphProto
from ..proto import onnx_proto, TensorProto
from ..common.data_types import (
- guess_proto_type, _guess_numpy_type, _guess_type_proto_str,
- _guess_type_proto, FloatType, DoubleType, Int64Type, copy_type)
+ guess_proto_type,
+ _guess_numpy_type,
+ _guess_type_proto_str,
+ _guess_type_proto,
+ FloatType,
+ DoubleType,
+ Int64Type,
+ copy_type,
+)
from ..common._topology import Variable
from ..common._registration import get_shape_calculator, get_converter
-logger = getLogger('skl2onnx')
+logger = getLogger("skl2onnx")
class GraphStateVar:
@@ -20,18 +27,31 @@ class GraphStateVar:
class GraphState:
-
- def __init__(self, inputs, output_names, operator_name, scope,
- container, converter, onnx_prefix_name=None,
- options=None, expected_inputs=None,
- expected_outputs=None, input_range=None,
- output_range=None, operator=None,
- run_converters=False, input_types=None, **attrs):
-
+ def __init__(
+ self,
+ inputs,
+ output_names,
+ operator_name,
+ scope,
+ container,
+ converter,
+ onnx_prefix_name=None,
+ options=None,
+ expected_inputs=None,
+ expected_outputs=None,
+ input_range=None,
+ output_range=None,
+ operator=None,
+ run_converters=False,
+ input_types=None,
+ **attrs
+ ):
logger.debug(
"[State] +%s n_inputs=%r n_outputs=%r",
- operator_name, -1 if inputs is None else len(inputs),
- -1 if output_names is None else len(output_names))
+ operator_name,
+ -1 if inputs is None else len(inputs),
+ -1 if output_names is None else len(output_names),
+ )
self.inputs = inputs
self._output_names = output_names
self._input_range = input_range.copy() if input_range else [1, 1e9]
@@ -39,8 +59,9 @@ def __init__(self, inputs, output_names, operator_name, scope,
self.scope = scope
self.run_converters = run_converters
self.operator = operator
- if hasattr(operator_name, 'fit'):
+ if hasattr(operator_name, "fit"):
from .. import get_model_alias
+
self.operator_instance = operator_name
self.is_model = True
self.operator_name = get_model_alias(type(operator_name))
@@ -54,9 +75,11 @@ def __init__(self, inputs, output_names, operator_name, scope,
self.container = container
self.converter = converter
self._expected_inputs = (
- None if expected_inputs is None else expected_inputs.copy())
+ None if expected_inputs is None else expected_inputs.copy()
+ )
self._expected_outputs = (
- None if expected_outputs is None else expected_outputs.copy())
+ None if expected_outputs is None else expected_outputs.copy()
+ )
self.computed_inputs_ = None
self.computed_outputs_ = None
self.sub_op_ = None
@@ -65,27 +88,34 @@ def __init__(self, inputs, output_names, operator_name, scope,
self.options = options
self.input_types = input_types
- for att in ['inputs', '_expected_inputs',
- '_expected_outputs', 'computed_inputs_',
- 'computed_outputs_', '_outputs']:
+ for att in [
+ "inputs",
+ "_expected_inputs",
+ "_expected_outputs",
+ "computed_inputs_",
+ "computed_outputs_",
+ "_outputs",
+ ]:
v = getattr(self, att, None)
if v is None:
continue
if not isinstance(v, list):
raise TypeError(
- "Attribute %r must be a list not %r."
- "" % (att, type(v)))
+ "Attribute %r must be a list not %r." "" % (att, type(v))
+ )
for i, vi in enumerate(v):
- if hasattr(vi, 'state') or hasattr(vi, 'onx_op'):
+ if hasattr(vi, "state") or hasattr(vi, "onx_op"):
continue
if not isinstance(vi, (tuple, str, Variable, GraphStateVar)):
raise TypeError(
"Unexpected type %r for element %d of attribute %r "
- "in %r." % (type(vi), i, att, v))
+ "in %r." % (type(vi), i, att, v)
+ )
if isinstance(vi, tuple) and len(vi) != 2:
raise ValueError(
"Unexpected value %r for element %d of attribute %r."
- "" % (vi, i, att))
+ "" % (vi, i, att)
+ )
change = []
for vi in v:
change.append((vi, None) if isinstance(vi, str) else vi)
@@ -96,8 +126,8 @@ def __init__(self, inputs, output_names, operator_name, scope,
for i in range(0, len(self._expected_outputs)):
if i < len(self._output_names):
res.append(
- (self._output_names[i],
- self._expected_outputs[i][1]))
+ (self._output_names[i], self._expected_outputs[i][1])
+ )
else:
res.append(self._expected_outputs[i])
for i in range(len(res), len(self._output_names)):
@@ -107,7 +137,7 @@ def __init__(self, inputs, output_names, operator_name, scope,
if self._expected_outputs is not None:
res = []
for p in self._expected_outputs:
- if isinstance(p[1], str) and p[1].startswith('tensor('):
+ if isinstance(p[1], str) and p[1].startswith("tensor("):
res.append((p[0], _guess_type_proto_str(p[1], None)))
else:
res.append(p)
@@ -116,7 +146,7 @@ def __init__(self, inputs, output_names, operator_name, scope,
if self._expected_inputs is not None:
res = []
for p in self._expected_inputs:
- if isinstance(p[1], str) and p[1].startswith('tensor('):
+ if isinstance(p[1], str) and p[1].startswith("tensor("):
res.append((p[0], _guess_type_proto_str(p[1], None)))
else:
res.append(p)
@@ -135,9 +165,13 @@ def outputs(self):
def _get_var_name(self, var, in_out, operator=None, index=None):
"input: True for output, False for input"
- if hasattr(var, 'add_to'):
- var.add_to(self.scope, self.container, operator=operator,
- run_converters=self.run_converters)
+ if hasattr(var, "add_to"):
+ var.add_to(
+ self.scope,
+ self.container,
+ operator=operator,
+ run_converters=self.run_converters,
+ )
outputs = var.outputs
if isinstance(outputs, list):
vars = []
@@ -149,20 +183,29 @@ def _get_var_name(self, var, in_out, operator=None, index=None):
if len(vars) == 0:
raise RuntimeError(
"Empty inputs outputs=%s var=%s in_out=%s "
- "operator=%r." % (outputs, var, in_out, operator))
+ "operator=%r." % (outputs, var, in_out, operator)
+ )
return vars
raise RuntimeError("Unexpected output type {}".format(outputs))
def __fct__(var, operator):
if isinstance(var, Variable):
return [var]
- if isinstance(var, (np.ndarray, np.bool_, np.int64,
- np.float32, np.float64,
- np.int8, np.uint8)):
+ if isinstance(
+ var,
+ (
+ np.ndarray,
+ np.bool_,
+ np.int64,
+ np.float32,
+ np.float64,
+ np.int8,
+ np.uint8,
+ ),
+ ):
return [self._add_constant(var)]
- if hasattr(var, 'ConstantValue'):
- return [
- self._add_constant(var.ConstantValue, scope=self.scope)]
+ if hasattr(var, "ConstantValue"):
+ return [self._add_constant(var.ConstantValue, scope=self.scope)]
if isinstance(var, str):
return [(var, None)]
if isinstance(var, tuple) and len(var) == 2:
@@ -172,23 +215,25 @@ def __fct__(var, operator):
return [(a, b)]
except ValueError:
pass
- raise RuntimeError("Unexpected type for parameter 'var': {0}."
- "".format(type(var)))
+ raise RuntimeError(
+ "Unexpected type for parameter 'var': {0}." "".format(type(var))
+ )
try:
v = __fct__(var, operator)
except TypeError as e:
raise RuntimeError(
"Unable to process one variable %s and operator=%s "
- "(name=%r)." % (var, operator, self.operator_name)) from e
+ "(name=%r)." % (var, operator, self.operator_name)
+ ) from e
if v is None or not isinstance(v, list) or len(v) == 0:
- raise TypeError(
- "Unexpected type or empty value %r - %s." % (type(v), v))
+ raise TypeError("Unexpected type or empty value %r - %s." % (type(v), v))
if in_out and self._output_names is not None and index is not None:
if len(v) != 1:
raise RuntimeError(
- "Mismatch number of outputs between %s and %s." % (
- v, self._output_names[index]))
+ "Mismatch number of outputs between %s and %s."
+ % (v, self._output_names[index])
+ )
v2 = self.scope.get(var[0], None)
if v2 is not None:
v = [v2]
@@ -197,91 +242,81 @@ def __fct__(var, operator):
except IndexError as e:
raise ValueError(
"Unexpected output %s in operator name %r."
- "" % (vn, self.operator_name)) from e
- if (index >= len(self._output_names) and
- index >= self._output_range[0]):
+ "" % (vn, self.operator_name)
+ ) from e
+ if index >= len(self._output_names) and index >= self._output_range[0]:
return None
try:
vin = self._output_names[index]
except IndexError as e:
raise ValueError(
"Unexpected index %s in operator name %r with ."
- "output names %s." % (
- index, self.operator_name,
- self._output_names)) from e
+ "output names %s." % (index, self.operator_name, self._output_names)
+ ) from e
if vn != vin:
raise RuntimeError(
- "Mismatched output name %r between %s and %s." % (
- vn, v, vin))
+ "Mismatched output name %r between %s and %s." % (vn, v, vin)
+ )
return v
def _add_constant(self, cst, scope):
-
def _ty_astype(cst):
astype = cst.dtype
try:
ty = guess_proto_type(_guess_numpy_type(cst.dtype, cst.shape))
except NotImplementedError as e:
st = str(astype).lower()
- if st.startswith('u') or st.startswith("= len(new_inputs) and
- j >= input_range[0]):
+ if j >= len(new_inputs) and j >= input_range[0]:
continue
if new_inputs[j].type is not None:
- new_inputs[i].set_type(
- new_inputs[j].type.__class__())
+ new_inputs[i].set_type(new_inputs[j].type.__class__())
break
# Overwrite types if input_types is specified.
@@ -390,8 +421,9 @@ def _update_inputs(inputs, names, scope, expected_inputs,
if i >= len(input_types):
raise RuntimeError(
"Mismatch between computed inputs[%d]=%r and "
- "overwritten input_types[%d]=%r." % (
- i, new_inputs, i, input_types))
+ "overwritten input_types[%d]=%r."
+ % (i, new_inputs, i, input_types)
+ )
if input_types[i] is not None:
new_inputs[i].type = input_types[i]
return new_inputs
@@ -403,15 +435,13 @@ def _update_contraints(vars1, expected1, vars2, expected2, debug=None):
if va is None or ex is None:
continue
for v, ct in zip(va, ex):
- if (isinstance(v, str) or (
- hasattr(v, 'type') and v.type is None)):
+ if isinstance(v, str) or (hasattr(v, "type") and v.type is None):
continue
- vt = (copy_type(v.type)
- if hasattr(v, 'type') else copy_type(v[1]))
+ vt = copy_type(v.type) if hasattr(v, "type") else copy_type(v[1])
if isinstance(vt, str):
continue
key = ct[1]
- if isinstance(key, str) and key[0] in ('T', 'I', 'V'):
+ if isinstance(key, str) and key[0] in ("T", "I", "V"):
if not isinstance(vt, str) and key not in memo:
memo[key] = []
memo[key].append(vt)
@@ -420,12 +450,13 @@ def _update_contraints(vars1, expected1, vars2, expected2, debug=None):
if len(set(_.__class__ for _ in v)) != 1:
raise RuntimeError(
"Conflicted constraint %r, got types %r operator=%s"
- "." % (k, v, debug))
+ "." % (k, v, debug)
+ )
for i in range(0, len(vars1)):
inp = vars1[i]
if isinstance(inp, str):
continue
- if hasattr(inp, 'type') and inp.type is None:
+ if hasattr(inp, "type") and inp.type is None:
ct = expected1[i][1]
if ct in memo:
vars1[i].set_type(copy_type(memo[ct][0]))
@@ -436,7 +467,6 @@ def _update_contraints(vars1, expected1, vars2, expected2, debug=None):
def run(self):
if self.computed_outputs_ is None:
-
# We need to register all names in subgraphs and raise
# an exception if the names are already taken.
for k, v in self.attrs.items():
@@ -448,8 +478,8 @@ def run(self):
"A name exists both in the subgraph and "
"in the main graph. Use set_onnx_name_prefix to "
"to rename one of them, attribute=%r, "
- "op_type=%r." % (
- k, self.operator_name)) from e
+ "op_type=%r." % (k, self.operator_name)
+ ) from e
if self.operator is not None:
expected_outputs = self.operator.outputs
@@ -466,9 +496,12 @@ def run(self):
expected_outputs = None
logger.debug(
- "[State.run] id=%d op_name=%r is_model=%r "
- "expected_outputs=%r",
- id(self), self.operator_name, self.is_model, expected_outputs)
+ "[State.run] id=%d op_name=%r is_model=%r " "expected_outputs=%r",
+ id(self),
+ self.operator_name,
+ self.is_model,
+ expected_outputs,
+ )
inputs = []
for i in self.inputs:
@@ -476,20 +509,25 @@ def run(self):
inputs.extend(v)
self.computed_inputs_ = GraphState._update_inputs(
- self.inputs, inputs, scope=self.scope,
+ self.inputs,
+ inputs,
+ scope=self.scope,
expected_inputs=self._expected_inputs,
input_range=self._input_range,
- input_types=self.input_types)
+ input_types=self.input_types,
+ )
logger.debug(
"[State.run] id=%d op_name=%r computed_inputs_=%r",
- id(self), self.operator_name, self.computed_inputs_)
+ id(self),
+ self.operator_name,
+ self.computed_inputs_,
+ )
name = self.scope.get_unique_operator_name(self.onnx_prefix)
if self.is_model:
if self.sub_op_ is not None:
- raise NotImplementedError(
- "Attribute 'sub_op_' is not empty.")
+ raise NotImplementedError("Attribute 'sub_op_' is not empty.")
# a model is converted into a subgraph
sub_op_inputs = self.computed_inputs_
@@ -497,56 +535,70 @@ def run(self):
if not isinstance(v, Variable):
raise TypeError(
"Every input variable must be a Variable not %r,"
- " v=%r." % (type(v), v))
+ " v=%r." % (type(v), v)
+ )
scope = v.scope
- if hasattr(scope, 'variables'):
+ if hasattr(scope, "variables"):
if v.onnx_name not in scope.variables:
raise RuntimeError(
"Variable %r missing from scope "
- "(operator=%r, model=%r), list=%r." % (
- v, self.operator,
+ "(operator=%r, model=%r), list=%r."
+ % (
+ v,
+ self.operator,
type(self.operator_instance),
- list(sorted(self.scope.variables))))
+ list(sorted(self.scope.variables)),
+ )
+ )
# output are not defined, we need to call a parser.
from .._parse import _parse_sklearn
- self.scope.add_options(
- id(self.operator_instance), self.options)
+
+ self.scope.add_options(id(self.operator_instance), self.options)
try:
sub_outputs = _parse_sklearn(
- self.scope, self.operator_instance, sub_op_inputs,
- alias=self.operator_name)
+ self.scope,
+ self.operator_instance,
+ sub_op_inputs,
+ alias=self.operator_name,
+ )
except RuntimeError as e:
raise RuntimeError(
"Unable to run parser for model type %r, inputs=%r "
- "(input_types=%r)." % (
- type(self.operator_instance), sub_op_inputs,
- self.input_types)) from e
+ "(input_types=%r)."
+ % (
+ type(self.operator_instance),
+ sub_op_inputs,
+ self.input_types,
+ )
+ ) from e
set_input_names = set(v.onnx_name for v in sub_op_inputs)
sub_op = None
for op in self.scope.operators.values():
for inp in op.inputs:
if inp.onnx_name in set_input_names:
sub_op = op
- if (sub_outputs is None or
- None in sub_outputs):
+ if sub_outputs is None or None in sub_outputs:
raise RuntimeError(
"Wrong result when parsing model {}.".format(
- type(self.operator_instance)))
+ type(self.operator_instance)
+ )
+ )
# Checks operator outputs
for out in sub_outputs:
if not isinstance(out, Variable):
- raise TypeError(
- "Output %s must be of type Variable." % out)
+ raise TypeError("Output %s must be of type Variable." % out)
self.sub_op_ = sub_op
sub_op.outputs = sub_outputs
shape_calc = get_shape_calculator(self.operator_name)
logger.debug(
- "[StateShape] call %r fed %r - %r", sub_op,
+ "[StateShape] call %r fed %r - %r",
+ sub_op,
"".join(str(i.is_fed) for i in sub_op.inputs),
- "".join(str(i.is_fed) for i in sub_op.outputs))
+ "".join(str(i.is_fed) for i in sub_op.outputs),
+ )
shape_calc(sub_op)
logger.debug("[StateShape] end - %r", sub_op)
@@ -554,91 +606,107 @@ def run(self):
# in Topology.
if sub_op.outputs is not None and len(sub_op.outputs) > 0:
outputs = [
- self.scope.declare_local_variable(
- o.onnx_name, type=o.type)
- for o in sub_op.outputs]
- elif (expected_outputs is not None and
- len(expected_outputs) > 0):
+ self.scope.declare_local_variable(o.onnx_name, type=o.type)
+ for o in sub_op.outputs
+ ]
+ elif expected_outputs is not None and len(expected_outputs) > 0:
outputs = [
- self._get_output_name(
- self._output_names, o, self.scope)
- for o in expected_outputs]
+ self._get_output_name(self._output_names, o, self.scope)
+ for o in expected_outputs
+ ]
else:
raise RuntimeError(
"sub_op.outputs is None as well as expected_outputs "
- "for operator %r." % sub_op)
+ "for operator %r." % sub_op
+ )
if len(outputs) != len(sub_op.outputs):
raise RuntimeError(
- "Mismatched number of outputs %s and %s." % (
- outputs, sub_op.outputs))
+ "Mismatched number of outputs %s and %s."
+ % (outputs, sub_op.outputs)
+ )
for i, out in enumerate(sub_op.outputs):
var = outputs[i]
self.container.add_node(
- 'Identity', [out.onnx_name], [var[0]],
- name=self.scope.get_unique_operator_name("SubOpId"))
+ "Identity",
+ [out.onnx_name],
+ [var[0]],
+ name=self.scope.get_unique_operator_name("SubOpId"),
+ )
self.computed_outputs_ = outputs
self.computed_inputs2_ = sub_op.inputs
- self.computed_outputs2_ = [
- (v[0], v[1]) for v in self.computed_outputs_]
+ self.computed_outputs2_ = [(v[0], v[1]) for v in self.computed_outputs_]
if self.run_converters:
# The parser was run on sub-operators but not the
# converter.
conv = get_converter(self.operator_name)
logger.debug(
- "[StateConv] %r fed %r - %r", sub_op,
+ "[StateConv] %r fed %r - %r",
+ sub_op,
"".join(str(i.is_fed) for i in sub_op.inputs),
- "".join(str(i.is_fed) for i in sub_op.outputs))
+ "".join(str(i.is_fed) for i in sub_op.outputs),
+ )
conv(self.scope, sub_op, self.container)
logger.debug("[StateConv] %r - end.", sub_op)
else:
- if (expected_outputs is not None and
- len(sub_op.outputs) == len(expected_outputs)):
+ if expected_outputs is not None and len(sub_op.outputs) == len(
+ expected_outputs
+ ):
for v1, v2 in zip(sub_op.outputs, expected_outputs):
if isinstance(v2, tuple):
v2 = v2[0]
- if (hasattr(v1, 'onnx_name') and
- hasattr(v2, 'onnx_name')):
+ if hasattr(v1, "onnx_name") and hasattr(v2, "onnx_name"):
if v1.onnx_name != v2.onnx_name:
# One identity is missing
- n = self.scope.get_unique_operator_name(
- 'idgstate')
+ n = self.scope.get_unique_operator_name("idgstate")
self.container.add_node(
- 'Identity', [v1.onnx_name],
- [v2.onnx_name], name=n)
+ "Identity",
+ [v1.onnx_name],
+ [v2.onnx_name],
+ name=n,
+ )
else:
def _name_(obj):
if isinstance(obj, tuple) and len(obj) == 2:
return obj[0]
- if hasattr(obj, 'onnx_name'):
+ if hasattr(obj, "onnx_name"):
return obj.onnx_name
- raise TypeError(
- "Unable to extract variable name from %r." % obj)
+ raise TypeError("Unable to extract variable name from %r." % obj)
# only one node is added
if self.options is not None:
raise RuntimeError(
- "Options must be empty for node %r but is it %r." % (
- self.operator_name, self.options))
+ "Options must be empty for node %r but is it %r."
+ % (self.operator_name, self.options)
+ )
outputs = [
self._get_output_name(self._output_names, o, self.scope)
- for o in expected_outputs]
+ for o in expected_outputs
+ ]
input_names = [_name_(i) for i in inputs]
output_names = [_name_(i) for i in outputs]
self.container.add_node(
- self.operator_name, input_names, output_names,
- name=name, **self.attrs)
+ self.operator_name,
+ input_names,
+ output_names,
+ name=name,
+ **self.attrs
+ )
computed_outputs = [
- (name, ct[1]) for name, ct in zip(
- output_names, self._expected_outputs)]
+ (name, ct[1])
+ for name, ct in zip(output_names, self._expected_outputs)
+ ]
self._update_contraints(
- computed_outputs, self._expected_outputs,
- self.computed_inputs_, self._expected_inputs,
- debug=self.operator_name)
+ computed_outputs,
+ self._expected_outputs,
+ self.computed_inputs_,
+ self._expected_inputs,
+ debug=self.operator_name,
+ )
# Registers the variables into scope.
self.computed_outputs_ = []
@@ -647,11 +715,12 @@ def _name_(obj):
self.computed_outputs_.append((name, kind))
else:
var = self.scope.declare_local_variable(
- name, kind, missing_type=True)
+ name, kind, missing_type=True
+ )
# name already comes from
# scope.get_unique_variable_name
var.set_onnx_name(name)
var.init_status(is_fed=True)
self.computed_outputs_.append(var)
- logger.debug('[State.run] end id=%d', id(self))
+ logger.debug("[State.run] end id=%d", id(self))
diff --git a/skl2onnx/algebra/onnx_operator.py b/skl2onnx/algebra/onnx_operator.py
index 2e27099f5..2dca4485c 100644
--- a/skl2onnx/algebra/onnx_operator.py
+++ b/skl2onnx/algebra/onnx_operator.py
@@ -8,11 +8,16 @@
from onnx.numpy_helper import from_array
from scipy.sparse import coo_matrix
from ..proto import TensorProto
-from ..common.data_types import (
- _guess_type_proto_str, _guess_type_proto_str_inv)
+from ..common.data_types import _guess_type_proto_str, _guess_type_proto_str_inv
from ..common._topology import (
- Variable, VariableStr, Scope, _update_domain_version, Operator,
- _get_main_opset_version, OPSET_TO_IR_VERSION)
+ Variable,
+ VariableStr,
+ Scope,
+ _update_domain_version,
+ Operator,
+ _get_main_opset_version,
+ OPSET_TO_IR_VERSION,
+)
from ..common._container import ModelComponentContainer
from ..common import utils
from ..common.data_types import guess_proto_type, _guess_numpy_type
@@ -24,7 +29,7 @@
from .type_helper import _guess_type
-logger = getLogger('skl2onnx')
+logger = getLogger("skl2onnx")
class OnnxOperatorItem:
@@ -65,8 +70,9 @@ def add_to(self, scope, container, operator=None, run_converters=False):
:param operator: overwrite inputs
:param run_converters: must be True if called from method `to_onnx`
"""
- self.onx_op.add_to(scope, container, operator=operator,
- run_converters=run_converters)
+ self.onx_op.add_to(
+ scope, container, operator=operator, run_converters=run_converters
+ )
def get_output_name(self, i=0):
"""
@@ -91,20 +97,22 @@ def outputs(self):
"""
if self.onx_op is None:
raise RuntimeError(
- "self.onx_op cannot be None, type(self)={}".format(
- type(self)))
+ "self.onx_op cannot be None, type(self)={}".format(type(self))
+ )
if self.index is None:
raise RuntimeError(
- "self.index cannot be None, type(self)={}".format(
- type(self)))
+ "self.index cannot be None, type(self)={}".format(type(self))
+ )
outputs = self.onx_op.outputs
if outputs is None:
raise RuntimeError(
"self.onx_op.outputs cannot be None, "
"type(self)={}, type(self.onx_op)={}, "
"type(self.onx_op.state)={}".format(
- type(self), type(self.onx_op), type(self.onx_op.state)))
- return outputs[self.index:self.index + 1]
+ type(self), type(self.onx_op), type(self.onx_op.state)
+ )
+ )
+ return outputs[self.index : self.index + 1]
def get_output_type_inference(self, input_shapes=None):
"""
@@ -112,20 +120,22 @@ def get_output_type_inference(self, input_shapes=None):
"""
if self.onx_op is None:
raise RuntimeError(
- "self.onx_op cannot be None, type(self)={}".format(
- type(self)))
+ "self.onx_op cannot be None, type(self)={}".format(type(self))
+ )
if self.index is None:
raise RuntimeError(
- "self.index cannot be None, type(self)={}".format(
- type(self)))
+ "self.index cannot be None, type(self)={}".format(type(self))
+ )
outputs = self.onx_op.get_output_type_inference(input_shapes)
if outputs is None:
raise RuntimeError(
"self.onx_op.outputs cannot be None, "
"type(self)={}, type(self.onx_op)={}, "
"type(self.onx_op.state)={}".format(
- type(self), type(self.onx_op), type(self.onx_op.state)))
- return outputs[self.index:self.index + 1]
+ type(self), type(self.onx_op), type(self.onx_op.state)
+ )
+ )
+ return outputs[self.index : self.index + 1]
class OnnxOperator:
@@ -157,16 +167,15 @@ class OnnxOperator:
Parameter *global_context*, *clear_subgraph_inputs*
were added.
"""
- class OnnxOperatorVariable(GraphStateVar):
+ class OnnxOperatorVariable(GraphStateVar):
def __init__(self, index, name=None):
self.index = index
self.name = name
def as_variable(self, scope):
name = "ov%s" % self.name
- if (hasattr(self, "variable_") and
- self.variable_.onnx_name == name):
+ if hasattr(self, "variable_") and self.variable_.onnx_name == name:
return self.variable_
var = Variable(name, name, scope=scope, type=None)
if scope is not None:
@@ -187,8 +196,7 @@ def __init__(self, name):
def as_variable(self, scope):
name = self.name
- if (hasattr(self, "variable_") and
- self.variable_.onnx_name == name):
+ if hasattr(self, "variable_") and self.variable_.onnx_name == name:
return self.variable_
if scope is not None:
if name in scope.variables:
@@ -208,8 +216,7 @@ def __eq__(self, name):
elif isinstance(name, OnnxOperator.UnscopedVariable):
return self.name == name.name
else:
- raise TypeError('Unsupported type for comparison {}'.format(
- type(name)))
+ raise TypeError("Unsupported type for comparison {}".format(type(name)))
def __repr__(self):
return "UnscopedVariable('%s')" % self.name
@@ -225,15 +232,12 @@ def __init__(self, value):
def as_variable(self, scope):
ha = utils.hash_array(self.value)
name = "CST%s" % ha
- if (hasattr(self, "variable_") and
- self.variable_.onnx_name == name):
+ if hasattr(self, "variable_") and self.variable_.onnx_name == name:
return self.variable_
if scope is not None:
- var = scope.declare_local_variable(
- name, type=_guess_type(self.value))
+ var = scope.declare_local_variable(name, type=_guess_type(self.value))
else:
- var = Variable(name, name, scope=scope,
- type=_guess_type(self.value))
+ var = Variable(name, name, scope=scope, type=_guess_type(self.value))
self.variable_ = var
return var
@@ -256,9 +260,11 @@ def find_schema(self, op_version):
:param op_version: requested version
:return: schema
"""
- if not hasattr(self.__class__, 'past_version'):
- raise RuntimeError("Missing attribute 'past_version', there is "
- "no other available schema.")
+ if not hasattr(self.__class__, "past_version"):
+ raise RuntimeError(
+ "Missing attribute 'past_version', there is "
+ "no other available schema."
+ )
found = None
for v in self.past_version.values():
if v.since_version > op_version:
@@ -269,20 +275,27 @@ def find_schema(self, op_version):
raise RuntimeError(
"Operator '{}': requested version {} < "
"{} schema version.".format(
- self.__class__.__name__,
- op_version, self.since_version))
+ self.__class__.__name__, op_version, self.since_version
+ )
+ )
return found
- def __init__(self, *inputs, op_version=None, output_names=None,
- domain=None, global_context=None,
- clear_subgraph_inputs=False, **kwargs):
-
- if (output_names is None and
- self.__class__.__name__.startswith("OnnxScan")):
+ def __init__(
+ self,
+ *inputs,
+ op_version=None,
+ output_names=None,
+ domain=None,
+ global_context=None,
+ clear_subgraph_inputs=False,
+ **kwargs
+ ):
+ if output_names is None and self.__class__.__name__.startswith("OnnxScan"):
raise NotImplementedError(
"The class cannot infer the number of variables "
"for node '{}' yet. output_names must be specified"
- ".".format(self.__class__.__name__))
+ ".".format(self.__class__.__name__)
+ )
if isinstance(output_names, (str, Variable)):
output_names = [output_names]
if isinstance(output_names[0], str):
@@ -290,20 +303,22 @@ def __init__(self, *inputs, op_version=None, output_names=None,
elif isinstance(output_names, Operator):
if len(output_names.outputs) == 0:
raise ValueError(
- "output_names cannot be empty (operator %r)."
- "" % output_names)
+ "output_names cannot be empty (operator %r)." "" % output_names
+ )
output_names = output_names.outputs.copy()
elif isinstance(output_names, Operator.OperatorList):
if len(output_names) == 0:
raise ValueError(
"output_names cannot be empty (operator %r)."
- "" % self.__class__.__name__)
+ "" % self.__class__.__name__
+ )
output_names = output_names.copy()
elif isinstance(output_names, list):
if len(output_names) == 0:
raise ValueError(
"output_names cannot be empty (operator %r)."
- "" % self.__class__.__name__)
+ "" % self.__class__.__name__
+ )
output_names = output_names.copy()
for i in range(len(output_names)):
if isinstance(output_names[i], str):
@@ -311,10 +326,11 @@ def __init__(self, *inputs, op_version=None, output_names=None,
elif output_names is not None:
raise TypeError(
"output_names must be a string or a list not %r."
- "" % type(output_names))
+ "" % type(output_names)
+ )
if op_version is None:
- if domain == '':
+ if domain == "":
self.op_version = get_latest_tested_opset_version()
else:
self.op_version = None
@@ -322,8 +338,7 @@ def __init__(self, *inputs, op_version=None, output_names=None,
self.op_version = op_version
self.since_version = self.__class__.since_version
- if (self.op_version is not None and
- self.op_version < self.since_version):
+ if self.op_version is not None and self.op_version < self.since_version:
schema = self.find_schema(self.op_version)
self.since_version = schema.since_version
self.expected_inputs = schema.expected_inputs.copy()
@@ -332,15 +347,18 @@ def __init__(self, *inputs, op_version=None, output_names=None,
self.output_range = schema.output_range
else:
self.expected_inputs = (
- None if self.__class__.expected_inputs is None
- else self.__class__.expected_inputs.copy())
+ None
+ if self.__class__.expected_inputs is None
+ else self.__class__.expected_inputs.copy()
+ )
self.expected_outputs = (
- None if self.__class__.expected_outputs is None
- else self.__class__.expected_outputs.copy())
+ None
+ if self.__class__.expected_outputs is None
+ else self.__class__.expected_outputs.copy()
+ )
self.input_range = self.__class__.input_range
self.output_range = self.__class__.output_range
- if self.__class__.__name__ not in {
- 'OnnxScan', 'OnnxLoop', 'OnnxIf'}:
+ if self.__class__.__name__ not in {"OnnxScan", "OnnxLoop", "OnnxIf"}:
# TODO: the minimum opset depends on embedded graph
# by default, it takes the given op_version but the
# optimal value could be lower.
@@ -348,13 +366,13 @@ def __init__(self, *inputs, op_version=None, output_names=None,
if self.op_version is None:
self.op_version = self.since_version
- if (self.op_version is not None and
- self.op_version < self.since_version):
+ if self.op_version is not None and self.op_version < self.since_version:
raise RuntimeError(
"Operator '{}': requested version {} < "
"{} schema version.".format(
- self.__class__.__name__,
- self.op_version, self.since_version))
+ self.__class__.__name__, self.op_version, self.since_version
+ )
+ )
self.state = None
self.domain = domain
@@ -364,8 +382,9 @@ def __init__(self, *inputs, op_version=None, output_names=None,
# check inputs
if len(inputs) == 0:
if self.input_range[0] == self.input_range[1]:
- self.inputs = [OnnxOperator.UnscopedVariable(_[0])
- for _ in self.expected_inputs]
+ self.inputs = [
+ OnnxOperator.UnscopedVariable(_[0]) for _ in self.expected_inputs
+ ]
else:
# The number of inputs may vary.
self.inputs = None
@@ -374,42 +393,53 @@ def __init__(self, *inputs, op_version=None, output_names=None,
for inp in inputs:
if isinstance(inp, str):
self.inputs.append(OnnxOperator.UnscopedVariable(inp))
- elif isinstance(inp, (OnnxOperator, Variable,
- OnnxOperatorItem, OnnxSubEstimator)):
+ elif isinstance(
+ inp, (OnnxOperator, Variable, OnnxOperatorItem, OnnxSubEstimator)
+ ):
self.inputs.append(inp)
elif isinstance(inp, tuple) and len(inp) == 2:
self.inputs.append(inp)
elif isinstance(inp, (np.ndarray, coo_matrix)):
- self.inputs.append(
- OnnxOperator.ConstantVariable(inp))
+ self.inputs.append(OnnxOperator.ConstantVariable(inp))
elif isinstance(inp, TensorProto):
self.inputs.append(OnnxOperator.ConstantVariable(inp))
- elif isinstance(inp, (OnnxOperator.OnnxOperatorVariable,
- OnnxOperator.ConstantVariable)):
+ elif isinstance(
+ inp,
+ (OnnxOperator.OnnxOperatorVariable, OnnxOperator.ConstantVariable),
+ ):
self.inputs.append(inp)
- elif isinstance(inp, (np.int64, np.float32,
- np.float64, np.bool_,
- np.int8, np.uint8)):
+ elif isinstance(
+ inp, (np.int64, np.float32, np.float64, np.bool_, np.int8, np.uint8)
+ ):
self.inputs.append(OnnxOperator.ConstantVariable(inp))
- elif isinstance(inp, (float, )):
+ elif isinstance(inp, (float,)):
self.inputs.append(np.float64(inp))
- elif isinstance(inp, (int, )):
+ elif isinstance(inp, (int,)):
self.inputs.append(np.int64(inp))
else:
raise TypeError(
"Unable to interpret the input name for type {} in "
"operator '{}' (value={}).".format(
- type(inp), self.__class__.__name__, inp))
+ type(inp), self.__class__.__name__, inp
+ )
+ )
if self.inputs is not None:
- if (len(self.inputs) < self.input_range[0] or
- len(self.inputs) > self.input_range[1]):
+ if (
+ len(self.inputs) < self.input_range[0]
+ or len(self.inputs) > self.input_range[1]
+ ):
raise RuntimeError(
"Operator '{}' expects a number of inputs "
"in [{}, {}] not {} (expected opset={}, "
"class opset={})".format(
- self.operator_name, *self.input_range,
- len(self.inputs), op_version, self.op_version))
+ self.operator_name,
+ *self.input_range,
+ len(self.inputs),
+ op_version,
+ self.op_version
+ )
+ )
# global context
if global_context is None:
self.global_context = None
@@ -417,12 +447,14 @@ def __init__(self, *inputs, op_version=None, output_names=None,
if not isinstance(global_context, dict):
raise TypeError(
"global_context must be a dictionary not %r."
- "" % type(global_context))
+ "" % type(global_context)
+ )
for k, v in global_context.items():
if not isinstance(v, (OnnxOperator, OnnxOperatorItem)):
raise TypeError(
"Value %r in must be an OnnxOperator or an "
- "OnnxOperatorItem not %r." % (k, type(v)))
+ "OnnxOperatorItem not %r." % (k, type(v))
+ )
self.global_context = global_context
# check output
@@ -433,7 +465,8 @@ def __init__(self, *inputs, op_version=None, output_names=None,
if len(self.output_names) == 0:
raise ValueError(
"output_names can be None but cannot be empty for "
- "operator %r." % self)
+ "operator %r." % self
+ )
if self.output_variables is None:
self.output_variables = [None for o in self.output_names]
for i in range(len(self.output_names)):
@@ -441,33 +474,32 @@ def __init__(self, *inputs, op_version=None, output_names=None,
if isinstance(name, Variable):
self.output_variables[i] = name
else:
- raise TypeError("output_names must be a list of strings "
- "and element %r is %r (%r)" % (
- i, type(name), name))
+ raise TypeError(
+ "output_names must be a list of strings "
+ "and element %r is %r (%r)" % (i, type(name), name)
+ )
if all(map(lambda x: x is None, self.output_variables)):
self.output_variables = None
- if (self.output_names is not None and (
- self.expected_outputs is None or
- len(self.output_names) > len(self.expected_outputs))):
+ if self.output_names is not None and (
+ self.expected_outputs is None
+ or len(self.output_names) > len(self.expected_outputs)
+ ):
if self.expected_outputs is None:
self.expected_outputs = []
- for i in range(len(self.expected_outputs),
- len(self.output_names)):
+ for i in range(len(self.expected_outputs), len(self.output_names)):
self.expected_outputs.append((self.output_names[i], None))
- if (self.expected_inputs is None or
- len(self.inputs) > len(self.expected_inputs)):
+ if self.expected_inputs is None or len(self.inputs) > len(self.expected_inputs):
if self.expected_inputs is None:
self.expected_inputs = []
- for i in range(len(self.expected_inputs),
- len(self.inputs)):
+ for i in range(len(self.expected_inputs), len(self.inputs)):
inp = self.inputs[i]
if isinstance(inp, GraphStateVar):
inp = tuple(inp)
elif isinstance(inp, str):
inp = (inp, None)
- elif hasattr(inp, 'add_to'):
+ elif hasattr(inp, "add_to"):
# OnnxOperator
existing = set(_[0] for _ in self.expected_inputs)
i = 10
@@ -479,11 +511,14 @@ def __init__(self, *inputs, op_version=None, output_names=None,
self.expected_inputs.append(inp)
self.output_names_ = None
- self._post_process_attributes(
- clear_subgraph_inputs=clear_subgraph_inputs)
+ self._post_process_attributes(clear_subgraph_inputs=clear_subgraph_inputs)
logger.debug(
- '[Ops] +%s-%d (%s) id=%d',
- self.__class__.__name__, self.op_version, self.domain, id(self))
+ "[Ops] +%s-%d (%s) id=%d",
+ self.__class__.__name__,
+ self.op_version,
+ self.domain,
+ id(self),
+ )
def _post_process_attributes(self, clear_subgraph_inputs=False):
"""
@@ -512,33 +547,36 @@ def _post_process_attributes(self, clear_subgraph_inputs=False):
if self.__class__.__name__ == "OnnxConstantOfShape":
if "value" in self.kwargs:
- value = self.kwargs['value']
+ value = self.kwargs["value"]
if isinstance(value, TensorProto):
return
if isinstance(value, np.ndarray):
- if value.shape == (1, ):
+ if value.shape == (1,):
val = value[0]
elif len(value.shape) == 0:
val = value
else:
raise RuntimeError(
"Unexpected shape %r for value, it must be "
- "an array of one element." % value.shape)
- self.kwargs['value'] = from_array(
- np.array([val], dtype=value.dtype))
+ "an array of one element." % value.shape
+ )
+ self.kwargs["value"] = from_array(
+ np.array([val], dtype=value.dtype)
+ )
return
raise TypeError(
"Unexpected type %r for value. It should be an array "
- "of one element." % type(value))
+ "of one element." % type(value)
+ )
return
if self.__class__.__name__ == "OnnxCast":
if "to" in self.kwargs:
- value = self.kwargs['to']
+ value = self.kwargs["to"]
if isinstance(value, int):
return
to = guess_proto_type(_guess_numpy_type(value, None))
- self.kwargs['to'] = to
+ self.kwargs["to"] = to
return
def __str__(self):
@@ -549,7 +587,9 @@ def __str__(self):
self.__class__.__name__,
len(self.inputs) if self.inputs is not None else 0,
[str(o) for o in self.output_names]
- if self.output_names is not None else "?")
+ if self.output_names is not None
+ else "?",
+ )
def set_onnx_name_prefix(self, onnx_prefix_name):
"""
@@ -562,7 +602,7 @@ def set_onnx_name_prefix(self, onnx_prefix_name):
if self.onnx_prefix_name is None:
self.onnx_prefix_name = onnx_prefix_name
for inp in self.inputs:
- if hasattr(inp, 'onnx_prefix_name'):
+ if hasattr(inp, "onnx_prefix_name"):
inp.set_onnx_name_prefix(onnx_prefix_name)
return self
@@ -588,7 +628,7 @@ def get_output_name(self, i, scope=None):
return self.state.computed_outputs_[i][0]
if self.output_names_ is not None:
return self.output_names_[i]
- self._set_output_names_(getattr(self, 'scope', None) or scope, None)
+ self._set_output_names_(getattr(self, "scope", None) or scope, None)
return self.output_names_[i]
def get_output(self, i, scope=None):
@@ -599,8 +639,8 @@ def get_output(self, i, scope=None):
res = self.output_names_[i]
if not isinstance(res, (tuple, Variable)):
raise RuntimeError(
- "Unable to retrieve output %r from %r."
- "" % (i, self))
+ "Unable to retrieve output %r from %r." "" % (i, self)
+ )
return res
def _set_output_names_(self, scope, operator):
@@ -608,8 +648,7 @@ def _set_output_names_(self, scope, operator):
if operator is not None:
self.operator_ = operator
if self.output_names_ is not None:
- raise RuntimeError(
- "output_names_ is already set.")
+ raise RuntimeError("output_names_ is already set.")
elif self.output_variables is not None:
outputs = [o.onnx_name for o in self.output_variables]
self.output_names_ = outputs
@@ -621,15 +660,16 @@ def _set_output_names_(self, scope, operator):
if operator is not None and len(louts) != len(operator.outputs):
raise RuntimeError(
"Output mismatch for '{}'\n{}\n{}".format(
- type(operator.raw_operator),
- louts, operator.outputs))
+ type(operator.raw_operator), louts, operator.outputs
+ )
+ )
outputs = []
for iname, name in enumerate(louts):
if name is None:
raise AssertionError(
- "Issue for operator '{}'.".format(
- type(operator.raw_operator)))
- if name.startswith('u(') and name[-1] == ')':
+ "Issue for operator '{}'.".format(type(operator.raw_operator))
+ )
+ if name.startswith("u(") and name[-1] == ")":
name = scope.get_unique_variable_name(name[2:-1])
elif operator is not None:
oout = operator.outputs[iname]
@@ -639,15 +679,15 @@ def _set_output_names_(self, scope, operator):
elif self.expected_outputs is None:
raise AttributeError(
"expected_outputs is None for operator=%r, output_names=%r, "
- "output_variables=%r, operator=%r" % (
- self, self.output_names, self.output_variables, operator))
+ "output_variables=%r, operator=%r"
+ % (self, self.output_names, self.output_variables, operator)
+ )
else:
if scope is None:
raise RuntimeError("scope must not be None.")
outputs = []
for name in self.expected_outputs:
- name = scope.get_unique_variable_name(
- self.onnx_prefix + "_" + name[0])
+ name = scope.get_unique_variable_name(self.onnx_prefix + "_" + name[0])
outputs.append(name)
self.output_names_ = outputs
return outputs
@@ -657,21 +697,24 @@ def _add_to_inputs(self, operator):
for input in self.inputs:
if isinstance(input, OnnxOperator.OnnxOperatorVariable):
if operator is None:
- raise RuntimeError("A placeholder cannot be replaced "
- "as an operator is not specified.")
+ raise RuntimeError(
+ "A placeholder cannot be replaced "
+ "as an operator is not specified."
+ )
if len(operator.inputs) == 0:
- raise RuntimeError("No input variable in {}.".format(
- operator))
+ raise RuntimeError("No input variable in {}.".format(operator))
# The inputs must be looked into the graph.
for i in operator.inputs:
if i.onnx_name == input.name:
inputs.append(i)
break
else:
- vars = ', '.join(map(lambda o: "'%s'" % o.onnx_name,
- operator.inputs))
- raise RuntimeError("Unable to find variable "
- "{} in {}.".format(input, vars))
+ vars = ", ".join(
+ map(lambda o: "'%s'" % o.onnx_name, operator.inputs)
+ )
+ raise RuntimeError(
+ "Unable to find variable " "{} in {}.".format(input, vars)
+ )
else:
inputs.append(input)
return inputs
@@ -693,17 +736,18 @@ def add_to(self, scope, container, operator=None, run_converters=False):
if self.is_deprecated:
raise RuntimeError(
"Node '{}' is deprecated. This API cannot deprecated "
- "nodes.".format(self.__class__.__name__))
- if (self.op_version is not None and
- self.op_version < self.since_version):
+ "nodes.".format(self.__class__.__name__)
+ )
+ if self.op_version is not None and self.op_version < self.since_version:
raise RuntimeError(
"Incompatible versions for node '{}' op_version {} "
"< since_version {}.".format(
- self.__class__.__name__, self.op_version,
- self.since_version))
- if self.kwargs.get('op_version', '') is None:
+ self.__class__.__name__, self.op_version, self.since_version
+ )
+ )
+ if self.kwargs.get("op_version", "") is None:
kwargs = self.kwargs.copy()
- del kwargs['op_version']
+ del kwargs["op_version"]
else:
kwargs = self.kwargs
@@ -715,14 +759,23 @@ def add_to(self, scope, container, operator=None, run_converters=False):
logger.debug("[Ops.add_to] state id=%d", id(self))
self.state = GraphState(
- inputs, self.output_names_, self.operator_name,
- scope, container, None, op_version=self.op_version,
- op_domain=domain, onnx_prefix_name=self.onnx_prefix,
+ inputs,
+ self.output_names_,
+ self.operator_name,
+ scope,
+ container,
+ None,
+ op_version=self.op_version,
+ op_domain=domain,
+ onnx_prefix_name=self.onnx_prefix,
expected_inputs=self.expected_inputs,
expected_outputs=self.expected_outputs,
input_range=self.input_range,
output_range=self.output_range,
- operator=operator, run_converters=run_converters, **kwargs)
+ operator=operator,
+ run_converters=run_converters,
+ **kwargs
+ )
self.state.run()
self._verify_add_to_()
@@ -730,9 +783,10 @@ def _verify_add_to_(self):
if self.state is None:
raise RuntimeError(
"Graph was not produced for operator '{}': {}."
- "".format(self.__class__.__name__, self))
+ "".format(self.__class__.__name__, self)
+ )
for i in self.inputs:
- if hasattr(i, '_verify_add_to_'):
+ if hasattr(i, "_verify_add_to_"):
i._verify_add_to_()
@property
@@ -748,25 +802,30 @@ def get_output_type_inference(self, input_shapes=None):
"""
Returns the expected output types in a list.
"""
- if (self.state is not None and
- self.state.computed_outputs_ is not None):
+ if self.state is not None and self.state.computed_outputs_ is not None:
return self.state.computed_outputs_
expected_inputs = (
self.state.computed_inputs_
- if self.expected_inputs is None else self.expected_inputs)
+ if self.expected_inputs is None
+ else self.expected_inputs
+ )
if expected_inputs is None:
raise RuntimeError(
"Attribute 'expected_inputs' is empty for %r, "
- "input_shapes=%r." % (self, input_shapes))
+ "input_shapes=%r." % (self, input_shapes)
+ )
expected_outputs = (
self.state.computed_outputs_
- if self.expected_outputs is None else self.expected_outputs)
+ if self.expected_outputs is None
+ else self.expected_outputs
+ )
if expected_outputs is None:
raise RuntimeError(
"Attribute 'expected_outputs' is empty for %r, "
- "input_shapes=%r." % (self, input_shapes))
+ "input_shapes=%r." % (self, input_shapes)
+ )
# Shape inference only work on a full graph.
if input_shapes is None:
@@ -790,15 +849,14 @@ def get_output_type_inference(self, input_shapes=None):
res = []
for name, ct in expected_outputs:
- if isinstance(ct, str) and ct[0] in ('T', 'V', 'I'):
- if (ct[0] not in rev or
- all(map(lambda k: k not in given, rev[ct]))):
+ if isinstance(ct, str) and ct[0] in ("T", "V", "I"):
+ if ct[0] not in rev or all(map(lambda k: k not in given, rev[ct])):
raise NotImplementedError(
"Unable to guess output type for (%r, %r) - "
"given=%r - rev=%r input_shapes=%r expected_inputs"
- "=%r." % (
- name, ct, given, rev, input_shapes,
- self.expected_inputs))
+ "=%r."
+ % (name, ct, given, rev, input_shapes, self.expected_inputs)
+ )
found = False
for ind in rev[ct]:
if ind in given:
@@ -809,9 +867,9 @@ def get_output_type_inference(self, input_shapes=None):
raise NotImplementedError(
"Unable to guess output type for (%r, %r) - "
"given=%r - rev=%r input_shapes=%r expected_inputs"
- "=%r." % (
- name, ct, given, rev, input_shapes,
- self.expected_inputs))
+ "=%r."
+ % (name, ct, given, rev, input_shapes, self.expected_inputs)
+ )
continue
if isinstance(ct, str):
try:
@@ -819,16 +877,17 @@ def get_output_type_inference(self, input_shapes=None):
except NotImplementedError as e:
raise NotImplementedError(
"Unable to guess output type for (%r, %r) - "
- "given=%r - rev=%r." % (name, ct, given, rev)) from e
+ "given=%r - rev=%r." % (name, ct, given, rev)
+ ) from e
res.append((name, dt))
continue
try:
- dt = _guess_type_proto_str(
- _guess_type_proto_str_inv(ct), None)
+ dt = _guess_type_proto_str(_guess_type_proto_str_inv(ct), None)
except NotImplementedError as e:
raise NotImplementedError(
"Unable to guess output type for (%r, %r) - given=%r - "
- "rev=%r." % (name, ct, given, rev)) from e
+ "rev=%r." % (name, ct, given, rev)
+ ) from e
res.append((name, dt))
return res
@@ -838,7 +897,7 @@ def _clean_attributes(self, *args, recursive=True):
Removes attributes in this node and its parents.
"""
for arg in args:
- if arg in ('state', 'output_names_'):
+ if arg in ("state", "output_names_"):
setattr(self, arg, None)
elif hasattr(self, arg):
delattr(self, arg)
@@ -847,8 +906,15 @@ def _clean_attributes(self, *args, recursive=True):
if isinstance(obj, OnnxOperator):
obj._clean_attributes(*args, recursive=True)
- def to_onnx(self, inputs=None, outputs=None, other_outputs=None,
- target_opset=None, domain=None, verbose=0):
+ def to_onnx(
+ self,
+ inputs=None,
+ outputs=None,
+ other_outputs=None,
+ target_opset=None,
+ domain=None,
+ verbose=0,
+ ):
"""
Converts this operator into an ONNX graph.
@@ -864,26 +930,32 @@ def to_onnx(self, inputs=None, outputs=None, other_outputs=None,
:param verbose: prints information
"""
if isinstance(target_opset, dict):
- dom = self.domain or ''
+ dom = self.domain or ""
target_opset = target_opset.get(dom, None)
elif isinstance(target_opset, int):
- if self.domain not in ('', None):
+ if self.domain not in ("", None):
# The target_opset is for the domain ''
# We ignore it.
target_opset = None
elif target_opset is not None:
raise TypeError(
"target_opset must be a dictionary {domain: "
- "target_opset} not %r for operator %r." % (
- target_opset, self.__class__.__name__))
- if self.domain in ('', None) and target_opset == 1:
+ "target_opset} not %r for operator %r."
+ % (target_opset, self.__class__.__name__)
+ )
+ if self.domain in ("", None) and target_opset == 1:
raise RuntimeError("target_opset cannot be 1.")
- if (self.op_version is not None and target_opset is not None and
- self.op_version > target_opset):
+ if (
+ self.op_version is not None
+ and target_opset is not None
+ and self.op_version > target_opset
+ ):
raise RuntimeError(
"target_opset={} is lower than the version={} requested "
"for this node '{}'.".format(
- target_opset, self.op_version, self.__class__.__name__))
+ target_opset, self.op_version, self.__class__.__name__
+ )
+ )
if self.state is not None:
# The conversion already happened and needs to be cleaned.
self._clean_attributes("output_names_", "state")
@@ -899,31 +971,36 @@ def to_onnx(self, inputs=None, outputs=None, other_outputs=None,
ty = _guess_type(obj[1])
new_inputs.append((obj[0], ty))
else:
- raise TypeError("Inputs must be Variable or "
- "tuple(name, type) not {}."
- "".format(type(obj)))
+ raise TypeError(
+ "Inputs must be Variable or "
+ "tuple(name, type) not {}."
+ "".format(type(obj))
+ )
inputs = new_inputs
for name, typ in inputs:
if typ is None:
- raise RuntimeError("Type input '{}' for operator '{}' "
- "is unknown. You should specify "
- "input types.".format(
- name, self.__class__.__name__))
+ raise RuntimeError(
+ "Type input '{}' for operator '{}' "
+ "is unknown. You should specify "
+ "input types.".format(name, self.__class__.__name__)
+ )
registered_models = dict(
- conv=_converter_pool, shape=_shape_calculator_pool,
- aliases=sklearn_operator_name_map)
+ conv=_converter_pool,
+ shape=_shape_calculator_pool,
+ aliases=sklearn_operator_name_map,
+ )
target_opset = self.get_latest_tested_opset_version(target_opset)
container = ModelComponentContainer(
- target_opset, registered_models=registered_models)
+ target_opset, registered_models=registered_models
+ )
model_name = self.__class__.__name__
- logger.debug(
- "[Ops.to_onnx] %s id=%d",
- self.__class__.__name__, id(self))
- scope = Scope(model_name, target_opset=target_opset,
- registered_models=registered_models)
+ logger.debug("[Ops.to_onnx] %s id=%d", self.__class__.__name__, id(self))
+ scope = Scope(
+ model_name, target_opset=target_opset, registered_models=registered_models
+ )
for inp in inputs:
var = Variable(inp[0], inp[0], scope=scope, type=inp[1])
container.add_input(var)
@@ -938,20 +1015,23 @@ def to_onnx(self, inputs=None, outputs=None, other_outputs=None,
if var.output_names is None:
# The variable name is likely to be different.
from .onnx_ops import OnnxIdentity
+
var2 = OnnxIdentity(
- var, op_version=var.op_version,
- output_names=[name])
+ var, op_version=var.op_version, output_names=[name]
+ )
else:
var2 = var
extra_outputs.append(var2)
for out in extra_outputs:
- if not hasattr(out, 'add_to'):
- raise RuntimeError(
- "Extra outputs must have method 'add_to'.")
+ if not hasattr(out, "add_to"):
+ raise RuntimeError("Extra outputs must have method 'add_to'.")
out.add_to(scope, container, run_converters=True)
logger.debug(
"[Ops.to_onnx] %s id=%d extra_outputs=%r",
- self.__class__.__name__, id(self), extra_outputs)
+ self.__class__.__name__,
+ id(self),
+ extra_outputs,
+ )
# infer shapes
if outputs:
@@ -968,26 +1048,35 @@ def to_onnx(self, inputs=None, outputs=None, other_outputs=None,
type_shape = o[1]
shapes.append(Variable(o[0], o[0], None, type_shape))
else:
- raise TypeError("Outputs must be Variable or "
- "tuple(name, type).")
+ raise TypeError("Outputs must be Variable or " "tuple(name, type).")
logger.debug(
"[Ops.to_onnx] %s id=%d outputs=%r",
- self.__class__.__name__, id(self), outputs)
+ self.__class__.__name__,
+ id(self),
+ outputs,
+ )
else:
if verbose > 0:
print("[op.to_onnx] infer outputs")
- shapes = infer_outputs(container, container.inputs,
- initializer=container.initializers,
- target_opset=target_opset)
+ shapes = infer_outputs(
+ container,
+ container.inputs,
+ initializer=container.initializers,
+ target_opset=target_opset,
+ )
if self.output_names:
- set_names = set(v.onnx_name if hasattr(v, 'onnx_name') else v
- for v in self.output_names)
- shapes = [shape for shape in shapes
- if shape.onnx_name in set_names]
+ set_names = set(
+ v.onnx_name if hasattr(v, "onnx_name") else v
+ for v in self.output_names
+ )
+ shapes = [shape for shape in shapes if shape.onnx_name in set_names]
logger.debug(
"[Ops.to_onnx] %s id=%d shapes=%r",
- self.__class__.__name__, id(self), shapes)
+ self.__class__.__name__,
+ id(self),
+ shapes,
+ )
if verbose > 0:
print("[op.to_onnx] shapes=%r" % shapes)
@@ -999,21 +1088,26 @@ def to_onnx(self, inputs=None, outputs=None, other_outputs=None,
if verbose >= 2:
print("---NODES---")
for node in container.nodes:
- print(" %s - %s: %r -> %r" % (
- node.op_type, node.name, node.input, node.output))
+ print(
+ " %s - %s: %r -> %r"
+ % (node.op_type, node.name, node.input, node.output)
+ )
# convert the graph
graph = make_graph(
- container.nodes, model_name, container.inputs,
- container.outputs, container.initializers)
+ container.nodes,
+ model_name,
+ container.inputs,
+ container.outputs,
+ container.initializers,
+ )
onnx_model = make_model(graph)
# domains
_update_domain_version(container, onnx_model)
# metadata
- opv = min(target_opset,
- _get_main_opset_version(onnx_model) or target_opset)
+ opv = min(target_opset, _get_main_opset_version(onnx_model) or target_opset)
irv = OPSET_TO_IR_VERSION.get(opv, onnx_proto.IR_VERSION)
onnx_model.ir_version = irv
onnx_model.producer_name = utils.get_producer()
@@ -1041,8 +1135,7 @@ def enumerate_variables(self):
for node in self.enumerate_nodes():
if self.inputs:
for i, input in enumerate(self.inputs):
- if isinstance(input, (OnnxOperator.UnscopedVariable,
- Variable)):
+ if isinstance(input, (OnnxOperator.UnscopedVariable, Variable)):
yield (node, i)
def enumerate_initial_types(self):
@@ -1086,24 +1179,41 @@ class OnnxSubEstimator(OnnxOperator):
input_range = [1, 1e9]
output_range = [1, 1e9]
- def __init__(self, skl_op, *inputs, op_version=None,
- output_names=None, domain=None, options=None,
- input_types=None, **kwargs):
+ def __init__(
+ self,
+ skl_op,
+ *inputs,
+ op_version=None,
+ output_names=None,
+ domain=None,
+ options=None,
+ input_types=None,
+ **kwargs
+ ):
OnnxOperator.__init__(
- self, *inputs, op_version=op_version,
- output_names=output_names, domain=domain, **kwargs)
+ self,
+ *inputs,
+ op_version=op_version,
+ output_names=output_names,
+ domain=domain,
+ **kwargs
+ )
self.operator_instance = skl_op
self.options = options
if skl_op is None and input_types is not None:
raise RuntimeError(
- "input_types is only used when a sub-operator is defined.")
+ "input_types is only used when a sub-operator is defined."
+ )
self.input_types = input_types
def __repr__(self):
return "%s(%r, %s, op_version=%r, output_names=%r)" % (
- self.__class__.__name__, self.operator_instance,
+ self.__class__.__name__,
+ self.operator_instance,
", ".join("%r" % i for i in self.inputs),
- self.op_version, self.output_names)
+ self.op_version,
+ self.output_names,
+ )
def add_to(self, scope, container, operator=None, run_converters=False):
"""
@@ -1116,9 +1226,9 @@ def add_to(self, scope, container, operator=None, run_converters=False):
:param run_converters: must be True if called from method `to_onnx`
"""
if self.state is None:
- if self.kwargs.get('op_version', '') is None:
+ if self.kwargs.get("op_version", "") is None:
kwargs = self.kwargs.copy()
- del kwargs['op_version']
+ del kwargs["op_version"]
else:
kwargs = self.kwargs
@@ -1133,18 +1243,24 @@ def add_to(self, scope, container, operator=None, run_converters=False):
louts = self.output_names
outputs = []
for name in louts:
- if (isinstance(name, str) and name.startswith('u(') and
- name[-1] == ')'):
+ if (
+ isinstance(name, str)
+ and name.startswith("u(")
+ and name[-1] == ")"
+ ):
name = VariableStr(
- scope.get_unique_variable_name(name[2:-1]),
- scope=scope)
- if (isinstance(name, Variable) and
- name.raw_name.startswith('u(') and
- name.raw_name[-1] == ')'):
+ scope.get_unique_variable_name(name[2:-1]), scope=scope
+ )
+ if (
+ isinstance(name, Variable)
+ and name.raw_name.startswith("u(")
+ and name.raw_name[-1] == ")"
+ ):
name = VariableStr(
- scope.get_unique_variable_name(
- name.raw_name[2:-1]),
- scope=scope, type=name.type)
+ scope.get_unique_variable_name(name.raw_name[2:-1]),
+ scope=scope,
+ type=name.type,
+ )
outputs.append(name)
self.output_names_ = outputs
else:
@@ -1154,27 +1270,29 @@ def add_to(self, scope, container, operator=None, run_converters=False):
for input in self.inputs:
if isinstance(input, OnnxOperator.OnnxOperatorVariable):
if operator is None:
- raise RuntimeError("A placeholder cannot be replaced "
- "as an operator is not specified.")
+ raise RuntimeError(
+ "A placeholder cannot be replaced "
+ "as an operator is not specified."
+ )
if len(operator.inputs) == 0:
- raise RuntimeError("No input variable in {}.".format(
- operator))
+ raise RuntimeError("No input variable in {}.".format(operator))
# The inputs must be looked into the graph.
for i in operator.inputs:
if i.onnx_name == input.name:
inputs.append(i)
break
else:
- vars = ', '.join(map(lambda o: "'%s'" % o.onnx_name,
- operator.inputs))
- raise RuntimeError("Unable to find variable "
- "{} in {}.".format(input, vars))
+ vars = ", ".join(
+ map(lambda o: "'%s'" % o.onnx_name, operator.inputs)
+ )
+ raise RuntimeError(
+ "Unable to find variable " "{} in {}.".format(input, vars)
+ )
elif isinstance(input, tuple) and len(input) == 2:
if scope is not None and input[0] in scope.variables:
var = scope.variables[input[0]]
else:
- var = Variable(input[0], input[0], scope=scope,
- type=input[1])
+ var = Variable(input[0], input[0], scope=scope, type=input[1])
if scope is not None:
scope.register_variable(var)
inputs.append(var)
@@ -1183,16 +1301,24 @@ def add_to(self, scope, container, operator=None, run_converters=False):
logger.debug("[SubOps.add_to] state id=%d", id(self))
self.state = GraphState(
- inputs, self.output_names_, self.operator_instance,
- scope, container, None, op_version=self.op_version,
- op_domain=None, onnx_prefix_name=self.onnx_prefix,
- options=self.options, run_converters=run_converters,
- input_types=self.input_types, **kwargs)
+ inputs,
+ self.output_names_,
+ self.operator_instance,
+ scope,
+ container,
+ None,
+ op_version=self.op_version,
+ op_domain=None,
+ onnx_prefix_name=self.onnx_prefix,
+ options=self.options,
+ run_converters=run_converters,
+ input_types=self.input_types,
+ **kwargs
+ )
self.state.run()
class WrappedModelAlias:
-
def __init__(self, model, alias):
self.model = model
self.alias = alias
@@ -1207,6 +1333,10 @@ class OnnxSubOperator(OnnxSubEstimator):
def __init__(self, *args, **kwargs):
OnnxSubEstimator.__init__(self, *args, **kwargs)
- warnings.warn(("Class OnnxSubOperator will be removed in 1.10. "
- "It should be replaced by OnnxSubEstimator."),
- DeprecationWarning)
+ warnings.warn(
+ (
+ "Class OnnxSubOperator will be removed in 1.10. "
+ "It should be replaced by OnnxSubEstimator."
+ ),
+ DeprecationWarning,
+ )
diff --git a/skl2onnx/algebra/onnx_operator_mixin.py b/skl2onnx/algebra/onnx_operator_mixin.py
index 59235b397..8fab57d66 100644
--- a/skl2onnx/algebra/onnx_operator_mixin.py
+++ b/skl2onnx/algebra/onnx_operator_mixin.py
@@ -18,9 +18,17 @@ class OnnxOperatorMixin:
sharing an API to convert object to *ONNX*.
"""
- def to_onnx(self, X=None, name=None,
- options=None, white_op=None, black_op=None,
- final_types=None, target_opset=None, verbose=0):
+ def to_onnx(
+ self,
+ X=None,
+ name=None,
+ options=None,
+ white_op=None,
+ black_op=None,
+ final_types=None,
+ target_opset=None,
+ verbose=0,
+ ):
"""
Converts the model in *ONNX* format.
It calls method *_to_onnx* which must be
@@ -43,42 +51,52 @@ def to_onnx(self, X=None, name=None,
:param verbose: displays information while converting
"""
from .. import convert_sklearn
+
if X is None:
initial_types = self.infer_initial_types()
else:
initial_types = guess_initial_types(X, None)
- if not hasattr(self, 'op_version'):
+ if not hasattr(self, "op_version"):
if name is None:
name = self.__class__.__name__
raise AttributeError(
"Attribute 'op_version' is missing for '{}' "
- "(model: '{}').".format(
- self.__class__.__name__, name))
+ "(model: '{}').".format(self.__class__.__name__, name)
+ )
return convert_sklearn(
- self, initial_types=initial_types,
- target_opset=target_opset or self.op_version, options=options,
- white_op=white_op, black_op=black_op, final_types=final_types,
- verbose=verbose)
+ self,
+ initial_types=initial_types,
+ target_opset=target_opset or self.op_version,
+ options=options,
+ white_op=white_op,
+ black_op=black_op,
+ final_types=final_types,
+ verbose=verbose,
+ )
def infer_initial_types(self):
"""
Infers initial types.
"""
- if hasattr(self, 'enumerate_initial_types'):
+ if hasattr(self, "enumerate_initial_types"):
return list(self.enumerate_initial_types())
- raise RuntimeError("Method enumerate_initial_types is missing "
- "and initial_types are not defined.")
+ raise RuntimeError(
+ "Method enumerate_initial_types is missing "
+ "and initial_types are not defined."
+ )
def _find_sklearn_parent(self):
for cl in self.__class__.__bases__:
if issubclass(cl, BaseEstimator):
return cl
- raise RuntimeError("Unable to find any parent inherited from "
- "BaseEstimator: {}.".format(
- ", ".join(map(str, self.__class__.__bases__))))
-
- def to_onnx_operator(self, inputs=None, outputs=None,
- target_opset=None, options=None):
+ raise RuntimeError(
+ "Unable to find any parent inherited from "
+ "BaseEstimator: {}.".format(", ".join(map(str, self.__class__.__bases__)))
+ )
+
+ def to_onnx_operator(
+ self, inputs=None, outputs=None, target_opset=None, options=None
+ ):
"""
This function must be overloaded.
"""
@@ -92,6 +110,7 @@ def onnx_parser(self):
mapped to the first *scikit-learn* parent
it can find.
"""
+
def parser(scope=None, inputs=None):
try:
op = self.to_onnx_operator(inputs=inputs, outputs=None)
@@ -111,6 +130,7 @@ def parser(scope=None, inputs=None):
except IndexError:
break
return names
+
return parser
def get_inputs(self, inputs, i):
@@ -130,32 +150,36 @@ def onnx_shape_calculator(self):
mapped to the first *scikit-learn* parent
it can find.
"""
- if not hasattr(self, 'op_version'):
+ if not hasattr(self, "op_version"):
raise AttributeError(
"Class '{}' should have an attribute 'op_version'.".format(
- self.__class__.__name__))
+ self.__class__.__name__
+ )
+ )
try:
op = self.to_onnx_operator()
except NotImplementedError:
parent = self._find_sklearn_parent()
- name = sklearn_operator_name_map.get(
- parent, "Sklearn" + parent.__name__)
+ name = sklearn_operator_name_map.get(parent, "Sklearn" + parent.__name__)
return get_shape_calculator(name)
def shape_calculator(operator):
- onx = op.to_onnx(operator.inputs, operator.outputs,
- target_opset=self.op_version)
+ onx = op.to_onnx(
+ operator.inputs, operator.outputs, target_opset=self.op_version
+ )
inferred_model = shape_inference.infer_shapes(onx)
shapes = Variable.from_pb(inferred_model.graph.value_info)
shapes = {shape.onnx_name: shape for shape in shapes}
for o in operator.outputs:
name = o.onnx_name
if name not in shapes:
- raise RuntimeError("Shape of output '{}' cannot be "
- "infered. onnx_shape_calculator "
- "must be overriden and return "
- "a shape calculator.".format(name))
+ raise RuntimeError(
+ "Shape of output '{}' cannot be "
+ "infered. onnx_shape_calculator "
+ "must be overriden and return "
+ "a shape calculator.".format(name)
+ )
o.set_type(shapes[name].type)
return shape_calculator
@@ -167,8 +191,10 @@ def onnx_converter(self):
mapped to the first *scikit-learn* parent
it can find.
"""
- def converter(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+
+ def converter(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+ ):
inputs = operator.inputs # getattr(self, "parsed_inputs_", None)
outputs = operator.outputs # kwargs.get('outputs', None)
op_version = container.target_opset
@@ -176,23 +202,26 @@ def converter(scope: Scope, operator: Operator,
try:
if inputs:
op = self.to_onnx_operator(
- inputs=inputs, outputs=outputs,
- target_opset=op_version, options=options)
+ inputs=inputs,
+ outputs=outputs,
+ target_opset=op_version,
+ options=options,
+ )
else:
op = self.to_onnx_operator(
- target_opset=op_version,
- outputs=outputs, options=options)
+ target_opset=op_version, outputs=outputs, options=options
+ )
except TypeError:
warnings.warn(
"Signature should be to_onnx_operator(self, inputs=None, "
"outputs=None, target_opset=None, **kwargs). "
"This will be the case in version 1.11, class=%r."
"" % type(self),
- DeprecationWarning)
+ DeprecationWarning,
+ )
try:
if inputs:
- op = self.to_onnx_operator(
- inputs=inputs, outputs=outputs)
+ op = self.to_onnx_operator(inputs=inputs, outputs=outputs)
else:
op = self.to_onnx_operator()
except NotImplementedError:
diff --git a/skl2onnx/algebra/onnx_ops.py b/skl2onnx/algebra/onnx_ops.py
index 26104c5c9..0eb8db69d 100644
--- a/skl2onnx/algebra/onnx_ops.py
+++ b/skl2onnx/algebra/onnx_ops.py
@@ -6,6 +6,7 @@
import sys
import os
import numpy as np
+
try:
from scipy.sparse import coo_matrix
except ImportError:
@@ -17,16 +18,24 @@
from ._cache import cache_folder
-def ClassFactory(class_name, op_name, inputs, outputs,
- input_range, output_range,
- domain, attr_names, doc,
- deprecated, since_version,
- past_version):
+def ClassFactory(
+ class_name,
+ op_name,
+ inputs,
+ outputs,
+ input_range,
+ output_range,
+ domain,
+ attr_names,
+ doc,
+ deprecated,
+ since_version,
+ past_version,
+):
from .onnx_operator import OnnxOperator, OnnxOperatorItem
def __init__(self, *args, **kwargs):
-
- op_version = kwargs.pop('op_version', None)
+ op_version = kwargs.pop("op_version", None)
if isinstance(op_version, dict):
op_version = op_version.get(domain, None)
@@ -34,14 +43,15 @@ def __init__(self, *args, **kwargs):
if len(args) == 0 and input_range[0] == input_range[1]:
args = [_[0] for _ in self.__class__.expected_inputs]
if not (input_range[0] <= len(args) <= input_range[1]):
- raise RuntimeError("Unexpected number of inputs, "
- "got {}, expecting {} for operator "
- "'{}'.".format(
- len(args), len(inputs), op_name))
+ raise RuntimeError(
+ "Unexpected number of inputs, "
+ "got {}, expecting {} for operator "
+ "'{}'.".format(len(args), len(inputs), op_name)
+ )
attr_names = self.attr_names
- if '_' in self.__class__.__name__:
- op_version_class = int(self.__class__.__name__.split('_')[-1])
+ if "_" in self.__class__.__name__:
+ op_version_class = int(self.__class__.__name__.split("_")[-1])
if op_version is None:
op_version = op_version_class
try:
@@ -52,8 +62,9 @@ def __init__(self, *args, **kwargs):
"class '{}' since_version {}. Parameter 'op_version' "
"is probably missing when the class "
"is instantiated.".format(
- op_version, op_version_class, class_name,
- since_version))
+ op_version, op_version_class, class_name, since_version
+ )
+ )
else:
op_version_class = None
@@ -67,26 +78,38 @@ def __init__(self, *args, **kwargs):
# attr_names refers to the most recent version of
# this operator. We may need an older one.
for op in range(op_version, 0, -1):
- name = '{}_{}'.format(self.__class__.__name__, op)
+ name = "{}_{}".format(self.__class__.__name__, op)
if name in self.past_version:
found = (name, op)
attr_names = self.past_version[name].attr_names
break
- if (op_version_class is not None and found is not None and
- found[-1] != op_version_class):
+ if (
+ op_version_class is not None
+ and found is not None
+ and found[-1] != op_version_class
+ ):
raise RuntimeError(
"op_version={} does not refer to the same opset as the class "
- "name ('{}').".format(op_version, self.__class__.__name__))
+ "name ('{}').".format(op_version, self.__class__.__name__)
+ )
for key in kwargs:
- if key in {'output_names', 'op_version', 'domain', 'ir_version',
- 'global_context', 'clear_subgraph_inputs'}:
+ if key in {
+ "output_names",
+ "op_version",
+ "domain",
+ "ir_version",
+ "global_context",
+ "clear_subgraph_inputs",
+ }:
continue
if key not in attr_names:
- raise TypeError("Argument '%s' not valid for '%s' opset=%s."
- % (key, op_name, op_version))
+ raise TypeError(
+ "Argument '%s' not valid for '%s' opset=%s."
+ % (key, op_name, op_version)
+ )
if op_version is not None:
- kwargs['op_version'] = op_version
+ kwargs["op_version"] = op_version
# This class can only be created by a user. Let's check
# types are either a variable, an operator or an array.
for i, a in enumerate(args):
@@ -94,37 +117,45 @@ def __init__(self, *args, **kwargs):
if len(a) != 2:
raise TypeError(
"Input %r is a tuple or class %r, it must have two "
- "elements (name, type) not %r." % (i, class_name, a))
- if (not isinstance(a[0], str) or
- not isinstance(a[1], DataType)):
+ "elements (name, type) not %r." % (i, class_name, a)
+ )
+ if not isinstance(a[0], str) or not isinstance(a[1], DataType):
raise TypeError(
"Input %r is a tuple or class %r, it must be a tuple "
- "(name, type) not %r." % (i, class_name, a))
+ "(name, type) not %r." % (i, class_name, a)
+ )
continue
- if not isinstance(a, (
- Variable, OnnxOperator, np.ndarray, str,
- OnnxOperatorItem, coo_matrix)):
+ if not isinstance(
+ a,
+ (Variable, OnnxOperator, np.ndarray, str, OnnxOperatorItem, coo_matrix),
+ ):
raise TypeError(
"Unexpected type %r for input %r of operator %r. "
"It must be an instance of Variable (or a string), "
"OnnxOperator, OnnxOperatorItem, numpy.ndarray, "
- "coo_matrix)." % (
- type(a), i, class_name))
+ "coo_matrix)." % (type(a), i, class_name)
+ )
OnnxOperator.__init__(self, *args, **kwargs)
- newclass = type(class_name, (OnnxOperator,),
- {"__init__": __init__, '__doc__': doc,
- 'expected_inputs': inputs,
- 'expected_outputs': outputs,
- 'operator_name': op_name,
- 'input_range': input_range,
- 'output_range': output_range,
- 'domain': domain,
- 'is_deprecated': deprecated,
- 'since_version': since_version,
- 'past_version': past_version,
- 'attr_names': attr_names,
- '__module__': __name__})
+ newclass = type(
+ class_name,
+ (OnnxOperator,),
+ {
+ "__init__": __init__,
+ "__doc__": doc,
+ "expected_inputs": inputs,
+ "expected_outputs": outputs,
+ "operator_name": op_name,
+ "input_range": input_range,
+ "output_range": output_range,
+ "domain": domain,
+ "is_deprecated": deprecated,
+ "since_version": since_version,
+ "past_version": past_version,
+ "attr_names": attr_names,
+ "__module__": __name__,
+ },
+ )
return newclass
@@ -151,54 +182,61 @@ def dynamic_class_creation(cache=False):
res[schema.name] = schema
else:
res[schema.name] = schema
- res[schema.name + '_' + str(schema.since_version)] = schema
+ res[schema.name + "_" + str(schema.since_version)] = schema
cls = {}
def _c(obj, label, i):
- name = '%s%d' % (obj.name or label, i)
+ name = "%s%d" % (obj.name or label, i)
try:
- tys = obj.type_str or ''
+ tys = obj.type_str or ""
except AttributeError:
- tys = obj.typeStr or ''
+ tys = obj.typeStr or ""
return (name, tys)
for name in sorted(res):
schema = res[name]
- inputs = [_c(o, 'I', i) for i, o in enumerate(schema.inputs)]
- outputs = [_c(o, 'O', i) for i, o in enumerate(schema.outputs)]
+ inputs = [_c(o, "I", i) for i, o in enumerate(schema.inputs)]
+ outputs = [_c(o, "O", i) for i, o in enumerate(schema.outputs)]
args = [p for p in schema.attributes]
- if '_' in name:
+ if "_" in name:
class_name = "Onnx" + name
else:
class_name = "Onnx" + schema.name
filename = os.path.join(
- cache_dir,
- schema.name + '_' + str(schema.since_version) + ".rst")
+ cache_dir, schema.name + "_" + str(schema.since_version) + ".rst"
+ )
if not cache and os.path.exists(filename):
with open(filename, "r", encoding="utf-8") as f:
doc = f.read()
else:
doc = get_rst_doc(schema)
if cache:
- with open(filename, 'w', encoding='utf-8') as f:
+ with open(filename, "w", encoding="utf-8") as f:
f.write(doc)
- cl = ClassFactory(class_name, schema.name, inputs, outputs,
- [schema.min_input, schema.max_input],
- [schema.min_output, schema.max_output],
- schema.domain, args,
- "**Version**" + doc.split('**Version**')[-1],
- getattr(schema, 'deprecated', False),
- schema.since_version, {})
+ cl = ClassFactory(
+ class_name,
+ schema.name,
+ inputs,
+ outputs,
+ [schema.min_input, schema.max_input],
+ [schema.min_output, schema.max_output],
+ schema.domain,
+ args,
+ "**Version**" + doc.split("**Version**")[-1],
+ getattr(schema, "deprecated", False),
+ schema.since_version,
+ {},
+ )
cls[class_name] = cl
# Retrieves past classes.
for name in cls:
- if '_' not in name:
+ if "_" not in name:
continue
- main, version = name.split('_')
+ main, version = name.split("_")
last = cls[main]
last.past_version[name] = cls[name]
@@ -219,8 +257,7 @@ def _update_module():
_update_module()
-def OnnxReduceSumApi11(*x, axes=None, keepdims=1, op_version=None,
- output_names=None):
+def OnnxReduceSumApi11(*x, axes=None, keepdims=1, op_version=None, output_names=None):
"""
Adds operator ReduceSum with opset>=13 following API from opset 12.
"""
@@ -229,30 +266,46 @@ def OnnxReduceSumApi11(*x, axes=None, keepdims=1, op_version=None,
if op_version is None or op_version >= 13:
if axes is None:
return OnnxReduceSum( # noqa
- *x, keepdims=keepdims, op_version=op_version,
- output_names=output_names)
+ *x, keepdims=keepdims, op_version=op_version, output_names=output_names
+ )
return OnnxReduceSum( # noqa
- *x, np.array(axes, dtype=np.int64),
- keepdims=keepdims, op_version=op_version,
- output_names=output_names)
+ *x,
+ np.array(axes, dtype=np.int64),
+ keepdims=keepdims,
+ op_version=op_version,
+ output_names=output_names,
+ )
if op_version >= 11:
if axes is None:
return OnnxReduceSum_11( # noqa
- *x, keepdims=keepdims,
- op_version=op_version, output_names=output_names)
+ *x, keepdims=keepdims, op_version=op_version, output_names=output_names
+ )
return OnnxReduceSum_11( # noqa
- *x, axes=axes, keepdims=keepdims,
- op_version=op_version, output_names=output_names)
+ *x,
+ axes=axes,
+ keepdims=keepdims,
+ op_version=op_version,
+ output_names=output_names,
+ )
if axes is None:
- return OnnxReduceSum_1(*x, keepdims=keepdims, # noqa
- op_version=op_version,
- output_names=output_names)
- return OnnxReduceSum_1(*x, axes=axes, keepdims=keepdims, # noqa
- op_version=op_version, output_names=output_names)
-
-
-def OnnxReduceAnyApi18(cl18, cl13, cl11, cl1, *x, axes=None, keepdims=1,
- op_version=None, output_names=None):
+ return OnnxReduceSum_1(
+ *x,
+ keepdims=keepdims, # noqa
+ op_version=op_version,
+ output_names=output_names,
+ )
+ return OnnxReduceSum_1(
+ *x,
+ axes=axes,
+ keepdims=keepdims, # noqa
+ op_version=op_version,
+ output_names=output_names,
+ )
+
+
+def OnnxReduceAnyApi18(
+ cl18, cl13, cl11, cl1, *x, axes=None, keepdims=1, op_version=None, output_names=None
+):
"""
Adds operator Reduce* with opset>=18 following API from opset 17.
"""
@@ -261,98 +314,155 @@ def OnnxReduceAnyApi18(cl18, cl13, cl11, cl1, *x, axes=None, keepdims=1,
if op_version is None or op_version >= 18:
if axes is None:
return cl18( # noqa
- *x, keepdims=keepdims, op_version=op_version,
- output_names=output_names)
+ *x, keepdims=keepdims, op_version=op_version, output_names=output_names
+ )
return cl18( # noqa
- *x, np.array(axes, dtype=np.int64),
- keepdims=keepdims, op_version=op_version,
- output_names=output_names)
+ *x,
+ np.array(axes, dtype=np.int64),
+ keepdims=keepdims,
+ op_version=op_version,
+ output_names=output_names,
+ )
if op_version >= 13:
if axes is None:
- return cl13(*x, keepdims=keepdims, # noqa
- op_version=op_version,
- output_names=output_names)
- return cl13(*x, axes=axes, keepdims=keepdims, # noqa
- op_version=op_version, output_names=output_names)
+ return cl13(
+ *x,
+ keepdims=keepdims, # noqa
+ op_version=op_version,
+ output_names=output_names,
+ )
+ return cl13(
+ *x,
+ axes=axes,
+ keepdims=keepdims, # noqa
+ op_version=op_version,
+ output_names=output_names,
+ )
if op_version >= 11:
if axes is None:
- return cl11(*x, keepdims=keepdims, # noqa
- op_version=op_version,
- output_names=output_names)
- return cl11(*x, axes=axes, keepdims=keepdims, # noqa
- op_version=op_version, output_names=output_names)
+ return cl11(
+ *x,
+ keepdims=keepdims, # noqa
+ op_version=op_version,
+ output_names=output_names,
+ )
+ return cl11(
+ *x,
+ axes=axes,
+ keepdims=keepdims, # noqa
+ op_version=op_version,
+ output_names=output_names,
+ )
if axes is None:
- return cl1(*x, keepdims=keepdims, # noqa
- op_version=op_version,
- output_names=output_names)
- return cl1(*x, axes=axes, keepdims=keepdims, # noqa
- op_version=op_version, output_names=output_names)
-
-
-def OnnxReduceSumSquareApi18(*x, axes=None, keepdims=1, op_version=None,
- output_names=None):
+ return cl1(
+ *x,
+ keepdims=keepdims, # noqa
+ op_version=op_version,
+ output_names=output_names,
+ )
+ return cl1(
+ *x,
+ axes=axes,
+ keepdims=keepdims, # noqa
+ op_version=op_version,
+ output_names=output_names,
+ )
+
+
+def OnnxReduceSumSquareApi18(
+ *x, axes=None, keepdims=1, op_version=None, output_names=None
+):
"""
Adds operator ReduceSumSquare with opset>=18 following API from opset 17.
"""
if axes is None or not isinstance(axes, (list, np.ndarray)):
raise TypeError(f"axes must be a list or an array not {type(axes)}.")
return OnnxReduceAnyApi18(
- OnnxReduceSumSquare, OnnxReduceSumSquare_13, # noqa
- OnnxReduceSumSquare_11, OnnxReduceSumSquare_1, # noqa
- *x, axes=axes, keepdims=keepdims, op_version=op_version,
- output_names=output_names)
-
-
-def OnnxReduceMeanApi18(*x, axes=None, keepdims=1, op_version=None,
- output_names=None):
+ OnnxReduceSumSquare,
+ OnnxReduceSumSquare_13, # noqa
+ OnnxReduceSumSquare_11,
+ OnnxReduceSumSquare_1, # noqa
+ *x,
+ axes=axes,
+ keepdims=keepdims,
+ op_version=op_version,
+ output_names=output_names,
+ )
+
+
+def OnnxReduceMeanApi18(*x, axes=None, keepdims=1, op_version=None, output_names=None):
"""
Adds operator ReduceMean with opset>=18 following API from opset 17.
"""
return OnnxReduceAnyApi18(
- OnnxReduceMean, OnnxReduceMean_13, # noqa
- OnnxReduceMean_11, OnnxReduceMean_1, # noqa
- *x, axes=axes, keepdims=keepdims, op_version=op_version,
- output_names=output_names)
-
-
-def OnnxReduceMaxApi18(*x, axes=None, keepdims=1, op_version=None,
- output_names=None):
+ OnnxReduceMean,
+ OnnxReduceMean_13, # noqa
+ OnnxReduceMean_11,
+ OnnxReduceMean_1, # noqa
+ *x,
+ axes=axes,
+ keepdims=keepdims,
+ op_version=op_version,
+ output_names=output_names,
+ )
+
+
+def OnnxReduceMaxApi18(*x, axes=None, keepdims=1, op_version=None, output_names=None):
"""
Adds operator ReduceMean with opset>=18 following API from opset 17.
"""
return OnnxReduceAnyApi18(
- OnnxReduceMax, OnnxReduceMax_13, # noqa
- OnnxReduceMax_11, OnnxReduceMax_1, # noqa
- *x, axes=axes, keepdims=keepdims, op_version=op_version,
- output_names=output_names)
-
-
-def OnnxReduceLogSumExpApi18(*x, axes=None, keepdims=1, op_version=None,
- output_names=None):
+ OnnxReduceMax,
+ OnnxReduceMax_13, # noqa
+ OnnxReduceMax_11,
+ OnnxReduceMax_1, # noqa
+ *x,
+ axes=axes,
+ keepdims=keepdims,
+ op_version=op_version,
+ output_names=output_names,
+ )
+
+
+def OnnxReduceLogSumExpApi18(
+ *x, axes=None, keepdims=1, op_version=None, output_names=None
+):
"""
Adds operator ReduceMean with opset>=18 following API from opset 17.
"""
return OnnxReduceAnyApi18(
- OnnxReduceLogSumExp, OnnxReduceLogSumExp_13, # noqa
- OnnxReduceLogSumExp_11, OnnxReduceLogSumExp_1, # noqa
- *x, axes=axes, keepdims=keepdims, op_version=op_version,
- output_names=output_names)
-
-
-def OnnxReduceL2Api18(*x, axes=None, keepdims=1, op_version=None,
- output_names=None):
+ OnnxReduceLogSumExp,
+ OnnxReduceLogSumExp_13, # noqa
+ OnnxReduceLogSumExp_11,
+ OnnxReduceLogSumExp_1, # noqa
+ *x,
+ axes=axes,
+ keepdims=keepdims,
+ op_version=op_version,
+ output_names=output_names,
+ )
+
+
+def OnnxReduceL2Api18(*x, axes=None, keepdims=1, op_version=None, output_names=None):
"""
Adds operator ReduceMean with opset>=18 following API from opset 17.
"""
return OnnxReduceAnyApi18(
- OnnxReduceL2, OnnxReduceL2_13, # noqa
- OnnxReduceL2_11, OnnxReduceL2_1, # noqa
- *x, axes=axes, keepdims=keepdims, op_version=op_version,
- output_names=output_names)
-
-
-def OnnxSplitApi18(*x, axis=0, split=None, num_outputs=None,
- op_version=None, output_names=None):
+ OnnxReduceL2,
+ OnnxReduceL2_13, # noqa
+ OnnxReduceL2_11,
+ OnnxReduceL2_1, # noqa
+ *x,
+ axes=axes,
+ keepdims=keepdims,
+ op_version=op_version,
+ output_names=output_names,
+ )
+
+
+def OnnxSplitApi18(
+ *x, axis=0, split=None, num_outputs=None, op_version=None, output_names=None
+):
"""
Adds operator Split with opset>=13 following API from opset 11.
"""
@@ -364,47 +474,68 @@ def OnnxSplitApi18(*x, axis=0, split=None, num_outputs=None,
if output_names is None:
raise RuntimeError(
"split or num_outputs or output_names "
- "must be specified since opset 18.")
+ "must be specified since opset 18."
+ )
num_outputs = len(output_names)
if num_outputs is None:
- raise AttributeError(
- "num_outputs cannot be None for Split-18.")
+ raise AttributeError("num_outputs cannot be None for Split-18.")
return OnnxSplit_18( # noqa
- *x, axis=axis, op_version=op_version,
- num_outputs=num_outputs, output_names=output_names)
+ *x,
+ axis=axis,
+ op_version=op_version,
+ num_outputs=num_outputs,
+ output_names=output_names,
+ )
if num_outputs is None:
return OnnxSplit_18( # noqa
- *x, np.array(split, dtype=np.int64), axis=axis,
- op_version=op_version, output_names=output_names)
+ *x,
+ np.array(split, dtype=np.int64),
+ axis=axis,
+ op_version=op_version,
+ output_names=output_names,
+ )
return OnnxSplit_18( # noqa
- *x, np.array(split, dtype=np.int64), axis=axis,
- num_outputs=num_outputs, op_version=op_version,
- output_names=output_names)
+ *x,
+ np.array(split, dtype=np.int64),
+ axis=axis,
+ num_outputs=num_outputs,
+ op_version=op_version,
+ output_names=output_names,
+ )
if op_version >= 13:
if split is None:
return OnnxSplit_13( # noqa
- *x, axis=axis, op_version=op_version,
- output_names=output_names)
+ *x, axis=axis, op_version=op_version, output_names=output_names
+ )
return OnnxSplit_13( # noqa
- *x, np.array(split, dtype=np.int64), axis=axis,
- op_version=op_version, output_names=output_names)
+ *x,
+ np.array(split, dtype=np.int64),
+ axis=axis,
+ op_version=op_version,
+ output_names=output_names,
+ )
if op_version >= 11:
if split is None:
return OnnxSplit_11( # noqa
- *x, axis=axis, op_version=op_version,
- output_names=output_names)
+ *x, axis=axis, op_version=op_version, output_names=output_names
+ )
return OnnxSplit_11( # noqa
- *x, split=split, axis=axis, op_version=op_version,
- output_names=output_names)
+ *x, split=split, axis=axis, op_version=op_version, output_names=output_names
+ )
if split is None:
return OnnxSplit_2( # noqa
- *x, axis=axis, op_version=op_version, output_names=output_names)
- return OnnxSplit_2(*x, split=split, axis=axis, # noqa
- op_version=op_version, output_names=output_names)
-
-
-def OnnxSqueezeApi11(*x, axes=None, op_version=None,
- output_names=None):
+ *x, axis=axis, op_version=op_version, output_names=output_names
+ )
+ return OnnxSplit_2(
+ *x,
+ split=split,
+ axis=axis, # noqa
+ op_version=op_version,
+ output_names=output_names,
+ )
+
+
+def OnnxSqueezeApi11(*x, axes=None, op_version=None, output_names=None):
"""
Adds operator Squeeze with opset>=13 following API from opset 11.
"""
@@ -412,18 +543,21 @@ def OnnxSqueezeApi11(*x, axes=None, op_version=None,
raise RuntimeError("op_version must be specified.")
if op_version is None or op_version >= 13:
return OnnxSqueeze( # noqa
- *x, np.array(axes, dtype=np.int64),
- op_version=op_version, output_names=output_names)
+ *x,
+ np.array(axes, dtype=np.int64),
+ op_version=op_version,
+ output_names=output_names,
+ )
if op_version >= 11:
return OnnxSqueeze_11( # noqa
- *x, axes=axes, op_version=op_version,
- output_names=output_names)
- return OnnxSqueeze_1(*x, axes=axes, # noqa
- op_version=op_version, output_names=output_names)
+ *x, axes=axes, op_version=op_version, output_names=output_names
+ )
+ return OnnxSqueeze_1(
+ *x, axes=axes, op_version=op_version, output_names=output_names # noqa
+ )
-def OnnxUnsqueezeApi11(*x, axes=None, op_version=None,
- output_names=None):
+def OnnxUnsqueezeApi11(*x, axes=None, op_version=None, output_names=None):
"""
Adds operator Unsqueeze with opset>=13 following API from opset 11.
"""
@@ -431,34 +565,40 @@ def OnnxUnsqueezeApi11(*x, axes=None, op_version=None,
raise RuntimeError("op_version must be specified.")
if op_version is None or op_version >= 13:
return OnnxUnsqueeze( # noqa
- *x, np.array(axes, dtype=np.int64),
- op_version=op_version, output_names=output_names)
+ *x,
+ np.array(axes, dtype=np.int64),
+ op_version=op_version,
+ output_names=output_names,
+ )
if op_version >= 11:
return OnnxUnsqueeze_11( # noqa
- *x, axes=axes, op_version=op_version,
- output_names=output_names)
- return OnnxUnsqueeze_1(*x, axes=axes, # noqa
- op_version=op_version, output_names=output_names)
+ *x, axes=axes, op_version=op_version, output_names=output_names
+ )
+ return OnnxUnsqueeze_1(
+ *x, axes=axes, op_version=op_version, output_names=output_names # noqa
+ )
-def OnnxReduceL2_typed(dtype, x, axes=None, keepdims=1, op_version=None,
- output_names=None):
+def OnnxReduceL2_typed(
+ dtype, x, axes=None, keepdims=1, op_version=None, output_names=None
+):
"""
Adds operator ReduceL2 for float or double.
"""
if dtype == np.float32:
return OnnxReduceL2Api18( # noqa
- x, axes=axes, keepdims=keepdims,
- op_version=op_version, output_names=output_names)
+ x,
+ axes=axes,
+ keepdims=keepdims,
+ op_version=op_version,
+ output_names=output_names,
+ )
x2 = OnnxMul(x, x, op_version=op_version) # noqa
- red = OnnxReduceSumApi11(
- x2, axes=[1], keepdims=1, op_version=op_version)
- return OnnxSqrt( # noqa
- red, op_version=op_version, output_names=output_names)
+ red = OnnxReduceSumApi11(x2, axes=[1], keepdims=1, op_version=op_version)
+ return OnnxSqrt(red, op_version=op_version, output_names=output_names) # noqa
-def OnnxReshapeApi13(*x, allowzero=0, op_version=None,
- output_names=None):
+def OnnxReshapeApi13(*x, allowzero=0, op_version=None, output_names=None):
"""
Adds operator Reshape with opset>=14 following API from opset 13.
"""
@@ -466,12 +606,10 @@ def OnnxReshapeApi13(*x, allowzero=0, op_version=None,
raise RuntimeError("op_version must be specified.")
if op_version is None or op_version >= 14:
return OnnxReshape( # noqa
- *x, allowzero=allowzero,
- op_version=op_version, output_names=output_names)
+ *x, allowzero=allowzero, op_version=op_version, output_names=output_names
+ )
if op_version >= 13:
return OnnxReshape_13( # noqa
- *x, op_version=op_version,
- output_names=output_names)
- return OnnxReshape_5( # noqa
- *x, op_version=op_version,
- output_names=output_names)
+ *x, op_version=op_version, output_names=output_names
+ )
+ return OnnxReshape_5(*x, op_version=op_version, output_names=output_names) # noqa
diff --git a/skl2onnx/algebra/onnx_subgraph_operator_mixin.py b/skl2onnx/algebra/onnx_subgraph_operator_mixin.py
index 0f7de9c8d..e265b7198 100644
--- a/skl2onnx/algebra/onnx_subgraph_operator_mixin.py
+++ b/skl2onnx/algebra/onnx_subgraph_operator_mixin.py
@@ -7,4 +7,5 @@ class OnnxSubGraphOperatorMixin(OnnxOperatorMixin):
"""
:class:`OnnxOperatorMixin` for converters.
"""
+
pass
diff --git a/skl2onnx/algebra/sklearn_ops.py b/skl2onnx/algebra/sklearn_ops.py
index d0dec729b..320053c18 100644
--- a/skl2onnx/algebra/sklearn_ops.py
+++ b/skl2onnx/algebra/sklearn_ops.py
@@ -6,6 +6,7 @@
import sys
import textwrap
from sklearn.pipeline import Pipeline, FeatureUnion
+
try:
from sklearn.compose import ColumnTransformer
except ImportError:
@@ -17,16 +18,21 @@
def ClassFactorySklearn(skl_obj, class_name, doc, conv, shape_calc, alias):
from .onnx_subgraph_operator_mixin import OnnxSubGraphOperatorMixin
- newclass = type(class_name, (OnnxSubGraphOperatorMixin, skl_obj),
- {'__doc__': doc,
- 'operator_name': skl_obj.__name__,
- '_fct_converter': conv,
- '_fct_shape_calc': shape_calc,
- 'input_range': [1, 1e9],
- 'output_range': [1, 1e9],
- 'op_version': None,
- 'alias': alias,
- '__module__': __name__})
+ newclass = type(
+ class_name,
+ (OnnxSubGraphOperatorMixin, skl_obj),
+ {
+ "__doc__": doc,
+ "operator_name": skl_obj.__name__,
+ "_fct_converter": conv,
+ "_fct_shape_calc": shape_calc,
+ "input_range": [1, 1e9],
+ "output_range": [1, 1e9],
+ "op_version": None,
+ "alias": alias,
+ "__module__": __name__,
+ },
+ )
return newclass
@@ -52,9 +58,7 @@ def dynamic_class_creation_sklearn():
prefix = "Sklearn" if "sklearn" in str(skl_obj) else ""
class_name = "Onnx" + prefix + skl_name
try:
- cl = ClassFactorySklearn(skl_obj, class_name,
- doc, conv, shape_calc,
- name)
+ cl = ClassFactorySklearn(skl_obj, class_name, doc, conv, shape_calc, name)
except TypeError:
continue
cls[class_name] = cl
@@ -85,11 +89,15 @@ class to *skl_cl*.
available = sorted(filter(lambda n: prefix in n, sys.modules))
raise RuntimeError(
"Unable to find a class for '{}' in\n{}".format(
- skl_cl.__name__, "\n".join(available)))
+ skl_cl.__name__, "\n".join(available)
+ )
+ )
cl = getattr(this, full_name)
if "automation" in str(cl):
- raise RuntimeError("Dynamic operation issue with class "
- "name '{}' from '{}'.".format(cl, __name__))
+ raise RuntimeError(
+ "Dynamic operation issue with class "
+ "name '{}' from '{}'.".format(cl, __name__)
+ )
return cl
@@ -109,8 +117,7 @@ def __init__(self, steps, memory=None, verbose=False, op_version=None):
if ColumnTransformer is not None:
- class OnnxSklearnColumnTransformer(ColumnTransformer,
- OnnxSubGraphOperatorMixin):
+ class OnnxSklearnColumnTransformer(ColumnTransformer, OnnxSubGraphOperatorMixin):
"""
Combines `ColumnTransformer
`.
"""
- def __init__(self, sklearn_model, white_op=None, black_op=None,
- verbose=0):
+ def __init__(self, sklearn_model, white_op=None, black_op=None, verbose=0):
super(SklearnModelContainerNode, self).__init__(
- sklearn_model, white_op=white_op, black_op=black_op,
- verbose=verbose)
+ sklearn_model, white_op=white_op, black_op=black_op, verbose=verbose
+ )
# Scikit-learn models have no input and output specified,
# so we create them and store them in this container.
self._inputs = []
@@ -217,8 +218,15 @@ class ModelComponentContainer(_WhiteBlackContainer):
*ONNX* *ModelProto*.
"""
- def __init__(self, target_opset, options=None, registered_models=None,
- white_op=None, black_op=None, verbose=0):
+ def __init__(
+ self,
+ target_opset,
+ options=None,
+ registered_models=None,
+ white_op=None,
+ black_op=None,
+ verbose=0,
+ ):
"""
:param target_opset: number, for example, 7 for *ONNX 1.2*, and
8 for *ONNX 1.3*.
@@ -231,7 +239,8 @@ def __init__(self, target_opset, options=None, registered_models=None,
:param verbose: display information while converting
"""
_WhiteBlackContainer.__init__(
- self, white_op=white_op, black_op=black_op, verbose=verbose)
+ self, white_op=white_op, black_op=black_op, verbose=verbose
+ )
# Inputs of ONNX graph. They are ValueInfoProto in ONNX.
self.inputs = []
# Outputs of ONNX graph. They are ValueInfoProto in ONNX.
@@ -253,10 +262,10 @@ def __init__(self, target_opset, options=None, registered_models=None,
# matches the ONNX version.
if isinstance(target_opset, dict):
self.target_opset_all = target_opset
- self.target_opset = target_opset.get('', None)
+ self.target_opset = target_opset.get("", None)
else:
self.target_opset = target_opset
- self.target_opset_all = {'': target_opset}
+ self.target_opset_all = {"": target_opset}
# Additional options given to converters.
self.options = options
# All registered models.
@@ -270,40 +279,45 @@ def swap_names(self, old_name, new_name):
:param new_name: new name
:return: list of impacted objects
"""
- exc_list = {'Scan', 'Loop', 'If'}
+ exc_list = {"Scan", "Loop", "If"}
for node in self.nodes:
if node.op_type not in exc_list:
continue
- if (old_name in node.input or old_name in node.output or
- new_name in node.input or new_name in node.output):
+ if (
+ old_name in node.input
+ or old_name in node.output
+ or new_name in node.input
+ or new_name in node.output
+ ):
raise NotImplementedError(
"Unable to handle subgraphs for node type %r."
- "(%r, %r)" % (node.op_type, old_name, new_name))
+ "(%r, %r)" % (node.op_type, old_name, new_name)
+ )
res = []
for inp in self.inputs:
if inp.name == old_name:
inp.name = new_name
- res.append(('Io', inp))
+ res.append(("Io", inp))
elif inp.name == new_name:
inp.name = old_name
- res.append(('In', inp))
+ res.append(("In", inp))
for inp in self.outputs:
if inp.name == old_name:
inp.name = new_name
- res.append(('Oo', inp))
+ res.append(("Oo", inp))
elif inp.name == new_name:
inp.name = old_name
- res.append(('On', inp))
+ res.append(("On", inp))
for inp in self.initializers:
if inp.name == old_name:
inp.name = new_name
- res.append(('-o', inp))
+ res.append(("-o", inp))
elif inp.name == new_name:
inp.name = old_name
- res.append(('-n', inp))
+ res.append(("-n", inp))
for node in self.nodes:
modified = False
@@ -328,8 +342,8 @@ def swap_names(self, old_name, new_name):
if modified:
if node.op_type in exc_list:
raise NotImplementedError(
- "Unable to handle subgraphs for node type %r."
- "" % node.op_type)
+ "Unable to handle subgraphs for node type %r." "" % node.op_type
+ )
node.input[:] = new_input[:]
node.output[:] = new_output[:]
res.append(("n-", node))
@@ -343,28 +357,23 @@ def __str__(self):
if self.inputs:
rows.append("INPUTS")
for inp in self.inputs:
- rows.append(
- " " + str(inp).replace(" ", "").replace("\n", " "))
+ rows.append(" " + str(inp).replace(" ", "").replace("\n", " "))
if self.outputs:
rows.append("OUTPUTS")
for out in self.outputs:
- rows.append(
- " " + str(out).replace(" ", "").replace("\n", " "))
+ rows.append(" " + str(out).replace(" ", "").replace("\n", " "))
if self.initializers:
rows.append("INITIALIZERS")
for ini in self.initializers:
- rows.append(
- " " + str(ini).replace(" ", "").replace("\n", " "))
+ rows.append(" " + str(ini).replace(" ", "").replace("\n", " "))
if self.value_info:
rows.append("NODES")
for val in self.value_info:
- rows.append(
- " " + str(val).replace(" ", "").replace("\n", " "))
+ rows.append(" " + str(val).replace(" ", "").replace("\n", " "))
if self.nodes:
rows.append("PROTO")
for nod in self.nodes:
- rows.append(
- " " + str(nod).replace(" ", "").replace("\n", " "))
+ rows.append(" " + str(nod).replace(" ", "").replace("\n", " "))
return "\n".join(rows)
def _make_value_info(self, variable):
@@ -438,54 +447,77 @@ def add_initializer(self, name, onnx_type, shape, content):
tensor.raw_data = content.raw_data
tensor.dims.extend(content.dims)
elif shape is None and isinstance(
- content, (np.float32, np.float64, np.int32,
- np.int64, float, np.int8, np.uint8,
- np.bool_, np.str_, str)):
+ content,
+ (
+ np.float32,
+ np.float64,
+ np.int32,
+ np.int64,
+ float,
+ np.int8,
+ np.uint8,
+ np.bool_,
+ np.str_,
+ str,
+ ),
+ ):
tensor = make_tensor(name, onnx_type, [], [content])
- elif (SparseTensorProto is not None and
- isinstance(content, SparseTensorProto)):
+ elif SparseTensorProto is not None and isinstance(content, SparseTensorProto):
raise NotImplementedError("Not implemented yet.")
elif shape is None:
tensor = make_attribute(name, content)
elif isinstance(content, coo_matrix):
if SparseTensorProto is None:
raise RuntimeError(
- "Sparse matrices require SparseTensorProto. Update onnx.")
+ "Sparse matrices require SparseTensorProto. Update onnx."
+ )
values_tensor = make_tensor(
- name + "_v", data_type=onnx_type,
- dims=(len(content.data), ), vals=content.data)
- indices = [i * content.shape[1] + j
- for i, j in zip(content.row, content.col)]
+ name + "_v",
+ data_type=onnx_type,
+ dims=(len(content.data),),
+ vals=content.data,
+ )
+ indices = [
+ i * content.shape[1] + j for i, j in zip(content.row, content.col)
+ ]
indices_tensor = make_tensor(
- name=name + "_i", data_type=TensorProto.INT64,
- dims=(len(indices), ), vals=indices)
+ name=name + "_i",
+ data_type=TensorProto.INT64,
+ dims=(len(indices),),
+ vals=indices,
+ )
dense_shape = list(content.shape)
sparse_tensor = make_sparse_tensor(
- values_tensor, indices_tensor, dense_shape)
+ values_tensor, indices_tensor, dense_shape
+ )
# cached value: same without names
values_tensor = make_tensor(
- "_v", data_type=onnx_type,
- dims=(len(content.data), ), vals=content.data)
+ "_v", data_type=onnx_type, dims=(len(content.data),), vals=content.data
+ )
indices_tensor = make_tensor(
- name="_i", data_type=TensorProto.INT64,
- dims=(len(indices), ), vals=indices)
+ name="_i",
+ data_type=TensorProto.INT64,
+ dims=(len(indices),),
+ vals=indices,
+ )
cached_value = make_sparse_tensor(
- values_tensor, indices_tensor, dense_shape)
+ values_tensor, indices_tensor, dense_shape
+ )
else:
if any(d is None for d in shape):
- raise ValueError('Shape of initializer cannot contain None.')
- if (hasattr(content, 'dtype') and
- content.dtype in (bool, np.bool_)):
+ raise ValueError("Shape of initializer cannot contain None.")
+ if hasattr(content, "dtype") and content.dtype in (bool, np.bool_):
content = content.astype(np.int32)
try:
tensor = make_tensor(name, onnx_type, shape, content)
except TypeError as e:
raise TypeError(
"Unable to make a tensor name=%r "
- "onnx_type=%r shape=%r content-type=%r." % (
- name, onnx_type, shape, type(content))) from e
+ "onnx_type=%r shape=%r content-type=%r."
+ % (name, onnx_type, shape, type(content))
+ ) from e
if tensor is not None:
if cached_value is None:
@@ -502,8 +534,12 @@ def add_initializer(self, name, onnx_type, shape, content):
return tensor
self.add_node(
- 'Identity', cached_name, name, op_version=self.target_opset,
- name=name + '_op')
+ "Identity",
+ cached_name,
+ name,
+ op_version=self.target_opset,
+ name=name + "_op",
+ )
return name
if sparse_tensor is not None:
@@ -512,17 +548,25 @@ def add_initializer(self, name, onnx_type, shape, content):
if cached_name is None:
self.initializers_strings[content] = name
self.add_node(
- 'Constant', [], [name], sparse_value=sparse_tensor,
- op_version=self.target_opset, name=name + '_op')
+ "Constant",
+ [],
+ [name],
+ sparse_value=sparse_tensor,
+ op_version=self.target_opset,
+ name=name + "_op",
+ )
return sparse_tensor
self.add_node(
- 'Identity', cached_name, name, op_version=self.target_opset,
- name=name + '_op')
+ "Identity",
+ cached_name,
+ name,
+ op_version=self.target_opset,
+ name=name + "_op",
+ )
return name
- raise RuntimeError(
- "Either tensor or sparse_tensor should be defined.")
+ raise RuntimeError("Either tensor or sparse_tensor should be defined.")
def add_value_info(self, variable):
self.value_info.append(self._make_value_info(variable))
@@ -534,8 +578,7 @@ def _check_operator(self, op_type):
from a function defined in this submodule by looking
into the callstack. The test is enabled for *python >= 3.6*.
"""
- if (op_type in _apply_operation_specific and
- sys.version_info[:2] >= (3, 6)):
+ if op_type in _apply_operation_specific and sys.version_info[:2] >= (3, 6):
tb = traceback.extract_stack()
operation = []
fct = _apply_operation_specific[op_type]
@@ -548,12 +591,20 @@ def _check_operator(self, op_type):
if skl2 and len(operation) == 0:
raise RuntimeError(
"Operator '{0}' should be added with function "
- "'{1}' in submodule _apply_operation.".format(
- op_type, fct.__name__))
+ "'{1}' in submodule _apply_operation.".format(op_type, fct.__name__)
+ )
self.check_white_black_list(op_type)
- def add_node(self, op_type, inputs, outputs, op_domain='', op_version=None,
- name=None, **attrs):
+ def add_node(
+ self,
+ op_type,
+ inputs,
+ outputs,
+ op_domain="",
+ op_version=None,
+ name=None,
+ **attrs,
+ ):
"""
Adds a *NodeProto* into the node list of the final ONNX model.
If the input operator's domain-version information cannot be
@@ -574,14 +625,13 @@ def add_node(self, op_type, inputs, outputs, op_domain='', op_version=None,
attributes' names and attributes' values,
respectively.
"""
- if ("axes" in attrs and
- (attrs["axes"] is None or
- not isinstance(attrs["axes"], (list, np.ndarray)))):
+ if "axes" in attrs and (
+ attrs["axes"] is None or not isinstance(attrs["axes"], (list, np.ndarray))
+ ):
raise TypeError(
- f"axes must be a list or an array not "
- f"{type(attrs['axes'])}.")
- if name is None or not isinstance(
- name, str) or name == '':
+ f"axes must be a list or an array not " f"{type(attrs['axes'])}."
+ )
+ if name is None or not isinstance(name, str) or name == "":
name = f"N{len(self.nodes)}"
existing_names = set(n.name for n in self.nodes)
if name in existing_names:
@@ -599,44 +649,50 @@ def add_node(self, op_type, inputs, outputs, op_domain='', op_version=None,
outputs = [outputs]
logger.debug(
"[Node] %r - %r -> %r (name=%r)",
- op_type, ",".join(inputs), ",".join(outputs), name)
+ op_type,
+ ",".join(inputs),
+ ",".join(outputs),
+ name,
+ )
try:
common = set(inputs) & set(outputs)
except TypeError as e:
raise TypeError(
"inputs or outputs are wrong, inputs=%r, outputs=%r, node=%r."
- "" % (inputs, outputs, op_type)) from e
+ "" % (inputs, outputs, op_type)
+ ) from e
if common:
raise RuntimeError(
"inputs and outputs cannot have "
"variables in common {} in node '{}' "
- "with name '{}'.".format(common, op_type, name))
- if not isinstance(inputs, list) or not all(
- isinstance(s, str) for s in inputs):
- type_list = ','.join(list(str(type(s)) for s in inputs))
- raise ValueError('Inputs must be a list of string but get [%s]'
- % type_list)
- if (not isinstance(outputs, list) or
- not all(isinstance(s, str) for s in outputs)):
- type_list = ','.join(list(str(type(s)) for s in outputs))
- raise ValueError('Outputs must be a list of string but get [%s]'
- % type_list)
+ "with name '{}'.".format(common, op_type, name)
+ )
+ if not isinstance(inputs, list) or not all(isinstance(s, str) for s in inputs):
+ type_list = ",".join(list(str(type(s)) for s in inputs))
+ raise ValueError("Inputs must be a list of string but get [%s]" % type_list)
+ if not isinstance(outputs, list) or not all(
+ isinstance(s, str) for s in outputs
+ ):
+ type_list = ",".join(list(str(type(s)) for s in outputs))
+ raise ValueError(
+ "Outputs must be a list of string but get [%s]" % type_list
+ )
upd = {}
dtypes = set()
for k, v in attrs.items():
if v is None:
raise ValueError(
- 'Failed to create ONNX node. Undefined '
- 'attribute pair (%s, %s) found for type %r and '
- 'version %r' % (
- k, v, op_type, op_version))
+ "Failed to create ONNX node. Undefined "
+ "attribute pair (%s, %s) found for type %r and "
+ "version %r" % (k, v, op_type, op_version)
+ )
if isinstance(v, np.ndarray):
upd[k] = v
dtypes.add(v.dtype)
if upd:
attrs.update(upd)
- if 'dtype' in attrs and op_type != 'EyeLike':
+ if "dtype" in attrs and op_type != "EyeLike":
raise RuntimeError("dtype should not be a parameter.")
new_attrs = {}
@@ -652,21 +708,30 @@ def add_node(self, op_type, inputs, outputs, op_domain='', op_version=None,
except (ValueError, TypeError) as e:
raise ValueError(
f"Unable to create node {op_type!r} with name={name!r} and "
- f"attributes={pprint.pformat(new_attrs)}.") from e
+ f"attributes={pprint.pformat(new_attrs)}."
+ ) from e
node.domain = op_domain
self.node_domain_version_pair_sets.add((op_domain, op_version))
self.nodes.append(node)
- if (self.target_opset is not None and
- op_version is not None and
- op_version > self.target_opset_any_domain(op_domain)):
+ if (
+ self.target_opset is not None
+ and op_version is not None
+ and op_version > self.target_opset_any_domain(op_domain)
+ ):
raise RuntimeError(
"Opset number {} is higher than targeted opsets {} for "
"node type '{}' name='{}' input={} "
"output={} (domain='{}').".format(
- op_version, self.target_opset_all,
- node.op_type, node.name,
- node.input, node.output, op_domain))
+ op_version,
+ self.target_opset_all,
+ node.op_type,
+ node.name,
+ node.input,
+ node.output,
+ op_domain,
+ )
+ )
def target_opset_any_domain(self, domain):
target_opset = self.target_opset_all
@@ -675,7 +740,7 @@ def target_opset_any_domain(self, domain):
to = target_opset[domain]
else:
to = None
- if to is None and domain == '':
+ if to is None and domain == "":
to = onnx_opset_version()
if to is None:
smap = C.schema_version_map()
@@ -690,14 +755,14 @@ def target_opset_any_domain(self, domain):
@property
def target_opset_onnx(self):
- return self.target_opset_any_domain('')
+ return self.target_opset_any_domain("")
def _get_op_version(self, domain, op_type):
"""
Determines the highest version of operator
*op_type* below or equal to *target_opset*.
"""
- if not hasattr(self, '_op_versions'):
+ if not hasattr(self, "_op_versions"):
self._build_op_version()
key = domain, op_type
vers = self._op_versions.get(key, None)
@@ -708,8 +773,8 @@ def _get_op_version(self, domain, op_type):
else:
warnings.warn(
"Unable to find operator '{}' in domain '{}' in ONNX, "
- "op_version is forced to 1.".format(
- op_type, domain))
+ "op_version is forced to 1.".format(op_type, domain)
+ )
vers = [1]
highest = self.target_opset_any_domain(domain)
pos = len(vers) - 1
@@ -719,8 +784,8 @@ def _get_op_version(self, domain, op_type):
pos -= 1
raise RuntimeError(
"Unable to find a suitable version for operator '{}' "
- "in domain '{}'. Available versions: {}.".format(
- op_type, domain, vers))
+ "in domain '{}'. Available versions: {}.".format(op_type, domain, vers)
+ )
def _build_op_version(self):
res = {}
@@ -738,25 +803,25 @@ def _build_op_version(self):
def _get_allowed_options(self, model):
if self.registered_models is not None:
if inspect.isfunction(model):
- if model not in self.registered_models['aliases']:
+ if model not in self.registered_models["aliases"]:
return None
- alias = self.registered_models['aliases'][model]
- elif hasattr(model, 'alias'):
+ alias = self.registered_models["aliases"][model]
+ elif hasattr(model, "alias"):
alias = model.alias
else:
- if type(model) not in self.registered_models['aliases']:
+ if type(model) not in self.registered_models["aliases"]:
return {}
- alias = self.registered_models['aliases'][type(model)]
- conv = self.registered_models['conv'][alias]
+ alias = self.registered_models["aliases"][type(model)]
+ conv = self.registered_models["conv"][alias]
allowed = conv.get_allowed_options()
if allowed is None:
return {}
return allowed
- clname = (str(model) if inspect.isfunction(model)
- else model.__class__.__name__)
+ clname = str(model) if inspect.isfunction(model) else model.__class__.__name__
raise NotImplementedError(
"No registered models, no known allowed options "
- "for model '{}'.".format(clname))
+ "for model '{}'.".format(clname)
+ )
def validate_options(self, operator):
"""
@@ -778,8 +843,12 @@ def get_options(self, model, default_values=None, fail=True):
:return: dictionary
"""
return _build_options(
- model, self.options, default_values,
- self._get_allowed_options(model), fail=fail)
+ model,
+ self.options,
+ default_values,
+ self._get_allowed_options(model),
+ fail=fail,
+ )
def has_options(self, model, option_name):
"""
@@ -838,40 +907,55 @@ def ensure_topological_order(self):
raise RuntimeError(
"Unable to sort a node (cycle). An output was "
"already ordered with name %r (iteration=%r)."
- "" % (name, n_iter))
+ "" % (name, n_iter)
+ )
order[name] = maxi
if len(missing_names) == 0:
continue
if len(missing_ops) > 0:
+
def nstr(name):
if name in order:
return "%s#%d" % (name, order[name])
return name
- rows = ["%s(%s) -> [%s]" % (
- n.name or n.op_type,
- ', '.join(map(nstr, n.input)),
- ', '.join(n.output))
- for n in missing_ops]
+
+ rows = [
+ "%s(%s) -> [%s]"
+ % (
+ n.name or n.op_type,
+ ", ".join(map(nstr, n.input)),
+ ", ".join(n.output),
+ )
+ for n in missing_ops
+ ]
rows.insert(0, "")
rows.append("--")
rows.append("--all-nodes--")
rows.append("--")
- rows.extend("%s|%s(%s) -> [%s]" % (
- n.op_type, n.name or n.op_type,
- ', '.join(map(nstr, n.input)),
- ', '.join(n.output))
- for n in self.nodes)
+ rows.extend(
+ "%s|%s(%s) -> [%s]"
+ % (
+ n.op_type,
+ n.name or n.op_type,
+ ", ".join(map(nstr, n.input)),
+ ", ".join(n.output),
+ )
+ for n in self.nodes
+ )
raise RuntimeError(
"After %d iterations for %d nodes, still unable "
"to sort names %r. The graph may be disconnected. "
- "List of operators: %s" % (
- n_iter, len(self.nodes), missing_names,
- "\n".join(rows)))
+ "List of operators: %s"
+ % (n_iter, len(self.nodes), missing_names, "\n".join(rows))
+ )
# Update order
- topo = sorted([(order[id(node)], node.op_type,
- node.name, str(id(node)))
- for node in self.nodes])
+ topo = sorted(
+ [
+ (order[id(node)], node.op_type, node.name, str(id(node)))
+ for node in self.nodes
+ ]
+ )
map_nodes = {str(id(node)): node for node in self.nodes}
self.nodes = [map_nodes[_[-1]] for _ in topo]
diff --git a/skl2onnx/common/_onnx_optimisation_common.py b/skl2onnx/common/_onnx_optimisation_common.py
index 00486c2aa..e45753f9c 100644
--- a/skl2onnx/common/_onnx_optimisation_common.py
+++ b/skl2onnx/common/_onnx_optimisation_common.py
@@ -9,8 +9,9 @@
from onnx.helper import make_attribute
-def _apply_optimisation_on_graph(fct, onnx_model, recursive=True,
- debug_info=None, **kwargs):
+def _apply_optimisation_on_graph(
+ fct, onnx_model, recursive=True, debug_info=None, **kwargs
+):
"""
Applies an optimisation function *fct* on a graph
and not on the model.
@@ -22,10 +23,8 @@ def _apply_optimisation_on_graph(fct, onnx_model, recursive=True,
:param kwargs: additional parameters
return: new onnx model
"""
- if hasattr(onnx_model, 'graph'):
- graph = fct(
- onnx_model.graph, debug_info=debug_info + ['GRAPH'],
- **kwargs)
+ if hasattr(onnx_model, "graph"):
+ graph = fct(onnx_model.graph, debug_info=debug_info + ["GRAPH"], **kwargs)
new_model = make_model(graph)
new_model.ir_version = onnx_model.ir_version
new_model.producer_name = onnx_model.producer_name
@@ -33,7 +32,7 @@ def _apply_optimisation_on_graph(fct, onnx_model, recursive=True,
new_model.domain = onnx_model.domain
new_model.model_version = onnx_model.model_version
new_model.doc_string = onnx_model.doc_string
- if hasattr(onnx_model, 'value_info'):
+ if hasattr(onnx_model, "value_info"):
graph.value_info.extend(onnx_model.value_info)
while len(new_model.opset_import) > 0:
new_model.opset_import.pop()
@@ -42,8 +41,10 @@ def _apply_optimisation_on_graph(fct, onnx_model, recursive=True,
op_set.domain = oimp.domain
op_set.version = oimp.version
return new_model
- raise TypeError("This function only works on 'ModelProto' anod not not on"
- " {}.".format(type(onnx_model)))
+ raise TypeError(
+ "This function only works on 'ModelProto' anod not not on"
+ " {}.".format(type(onnx_model))
+ )
def _apply_remove_node_fct_node(fct, node, recursive, debug_info):
@@ -54,29 +55,30 @@ def _apply_remove_node_fct_node(fct, node, recursive, debug_info):
:param recursive: does it in subgraphs as well
:return: new node
"""
- if not hasattr(node, 'attribute'):
+ if not hasattr(node, "attribute"):
return node
modified = 0
new_atts = []
for att in node.attribute:
- if att.name == 'body':
+ if att.name == "body":
new_body = fct(
- att.g, recursive=recursive,
- debug_info=debug_info + [att.name])
+ att.g, recursive=recursive, debug_info=debug_info + [att.name]
+ )
new_atts.append(_make_att_graph(att.name, new_body))
modified += 1
else:
new_atts.append(att)
if modified > 0:
- new_node = _make_node(node.op_type, node.input,
- node.output, name=node.name,
- attributes=new_atts)
+ new_node = _make_node(
+ node.op_type, node.input, node.output, name=node.name, attributes=new_atts
+ )
return new_node
return node
-def _make_node(op_type, inputs, outputs, name=None, doc_string=None,
- domain=None, attributes=None):
+def _make_node(
+ op_type, inputs, outputs, name=None, doc_string=None, domain=None, attributes=None
+):
"""
Constructs a NodeProto.
@@ -105,8 +107,8 @@ def _make_node(op_type, inputs, outputs, name=None, doc_string=None,
if isinstance(attributes, dict):
if len(attributes) > 0:
node.attribute.extend(
- make_attribute(key, value)
- for key, value in sorted(attributes.items()))
+ make_attribute(key, value) for key, value in sorted(attributes.items())
+ )
elif attributes:
for att in attributes:
node.attribute.extend([att])
@@ -132,10 +134,10 @@ def _rename_node_input(onnx_node, old_name, new_name=None):
"""
inputs = [_replace(name, old_name, new_name) for name in onnx_node.input]
outputs = list(onnx_node.output)
- if hasattr(onnx_node, 'attribute'):
+ if hasattr(onnx_node, "attribute"):
new_atts = []
for att in onnx_node.attribute:
- if att.name == 'body':
+ if att.name == "body":
new_body = _rename_graph_input(att.g, old_name, new_name)
attr = AttributeProto()
attr.name = att.name
@@ -148,8 +150,13 @@ def _rename_node_input(onnx_node, old_name, new_name=None):
else:
atts = onnx_node.attribute
node = _make_node(
- onnx_node.op_type, inputs, outputs, name=onnx_node.name,
- domain=onnx_node.domain, attributes=atts)
+ onnx_node.op_type,
+ inputs,
+ outputs,
+ name=onnx_node.name,
+ domain=onnx_node.domain,
+ attributes=atts,
+ )
return node
@@ -173,9 +180,8 @@ def _rename_graph_output(graph, old_name, new_name):
value_info.doc_string = o.type.doc_string
outputs.append(value_info)
nodes = list(graph.node)
- nodes.append(_make_node('Identity', [old_name], [new_name]))
- new_graph = make_graph(nodes, graph.name, graph.input, outputs,
- graph.initializer)
+ nodes.append(_make_node("Identity", [old_name], [new_name]))
+ new_graph = make_graph(nodes, graph.name, graph.input, outputs, graph.initializer)
new_graph.value_info.extend(graph.value_info)
return new_graph
@@ -200,9 +206,8 @@ def _rename_graph_input(graph, old_name, new_name):
value_info.doc_string = i.type.doc_string
inputs.append(value_info)
nodes = list(graph.node)
- nodes.append(_make_node('Identity', [new_name], [old_name]))
- new_graph = make_graph(nodes, graph.name, inputs, graph.output,
- graph.initializer)
+ nodes.append(_make_node("Identity", [new_name], [old_name]))
+ new_graph = make_graph(nodes, graph.name, inputs, graph.output, graph.initializer)
new_graph.value_info.extend(graph.value_info)
return new_graph
@@ -226,10 +231,10 @@ def _rename_node_output(onnx_node, old_name, new_name):
"""
inputs = list(onnx_node.input)
outputs = [_replace(name, old_name, new_name) for name in onnx_node.output]
- if hasattr(onnx_node, 'attribute'):
+ if hasattr(onnx_node, "attribute"):
new_atts = []
for att in onnx_node.attribute:
- if att.name == 'body':
+ if att.name == "body":
new_body = _rename_graph_output(att.g, old_name, new_name)
new_atts.append(_make_att_graph(att.name, new_body))
else:
@@ -238,6 +243,11 @@ def _rename_node_output(onnx_node, old_name, new_name):
else:
atts = onnx_node.attribute
node = _make_node(
- onnx_node.op_type, inputs, outputs, name=onnx_node.name,
- domain=onnx_node.domain, attributes=atts)
+ onnx_node.op_type,
+ inputs,
+ outputs,
+ name=onnx_node.name,
+ domain=onnx_node.domain,
+ attributes=atts,
+ )
return node
diff --git a/skl2onnx/common/_registration.py b/skl2onnx/common/_registration.py
index 0e2aa1bef..08070450e 100644
--- a/skl2onnx/common/_registration.py
+++ b/skl2onnx/common/_registration.py
@@ -11,15 +11,16 @@
class RegisteredConverter:
-
def __init__(self, fct, options):
self._fct = fct
self._options = options
def __call__(self, *args):
- if (len(args) == 3 and
- hasattr(args[2], '_get_allowed_options') and
- hasattr(args[1], 'raw_operator')):
+ if (
+ len(args) == 3
+ and hasattr(args[2], "_get_allowed_options")
+ and hasattr(args[1], "raw_operator")
+ ):
# Checks that the user did not specify a wrong option.
if args[1].raw_operator is not None:
args[2]._get_allowed_options(args[1].raw_operator)
@@ -37,8 +38,9 @@ def get_allowed_options(self):
_shape_calculator_pool = {}
-def register_converter(operator_name, conversion_function, overwrite=False,
- options=None):
+def register_converter(
+ operator_name, conversion_function, overwrite=False, options=None
+):
"""
:param operator_name: A unique operator ID. It is usually a string
but you can use a type as well
@@ -52,29 +54,28 @@ def register_converter(operator_name, conversion_function, overwrite=False,
(dictionary {name: supported values or None})
"""
if conversion_function is None:
- raise ValueError(
- "A converter cannot be None for %r." % operator_name)
+ raise ValueError("A converter cannot be None for %r." % operator_name)
if not overwrite and operator_name in _converter_pool:
- raise ValueError('We do not overwrite registered converter '
- 'by default')
+ raise ValueError("We do not overwrite registered converter " "by default")
if len(_converter_pool) > 0:
key = next(iter(_converter_pool))
- check_signature(conversion_function, _converter_pool[key]._fct,
- skip=('operator', ))
- _converter_pool[operator_name] = RegisteredConverter(
- conversion_function, options)
+ check_signature(
+ conversion_function, _converter_pool[key]._fct, skip=("operator",)
+ )
+ _converter_pool[operator_name] = RegisteredConverter(conversion_function, options)
def get_converter(operator_name):
if operator_name not in _converter_pool:
- msg = 'Unsupported conversion for operator %s (%d registered)' % (
- operator_name, len(_converter_pool))
+ msg = "Unsupported conversion for operator %s (%d registered)" % (
+ operator_name,
+ len(_converter_pool),
+ )
raise ValueError(msg)
return _converter_pool[operator_name]
-def register_shape_calculator(operator_name, calculator_function,
- overwrite=False):
+def register_shape_calculator(operator_name, calculator_function, overwrite=False):
"""
:param operator_name: A unique operator ID. It is usually a string
but you can use a type as well
@@ -86,21 +87,21 @@ def register_shape_calculator(operator_name, calculator_function,
to enable overwriting.
"""
if calculator_function is None:
- raise ValueError(
- "A shape calculator cannot be None for %r." % operator_name)
+ raise ValueError("A shape calculator cannot be None for %r." % operator_name)
if not overwrite and operator_name in _shape_calculator_pool:
- raise ValueError('We do not overwrite registrated shape calculator '
- 'by default')
+ raise ValueError(
+ "We do not overwrite registrated shape calculator " "by default"
+ )
if calculator_function is not None and len(_shape_calculator_pool) > 0:
key = next(iter(_shape_calculator_pool))
- check_signature(calculator_function, _shape_calculator_pool[key],
- skip=('operator', ))
+ check_signature(
+ calculator_function, _shape_calculator_pool[key], skip=("operator",)
+ )
_shape_calculator_pool[operator_name] = calculator_function
def get_shape_calculator(operator_name):
if operator_name not in _shape_calculator_pool:
- msg = ("Unsupported shape calculator for operator "
- "'%s'." % operator_name)
+ msg = "Unsupported shape calculator for operator " "'%s'." % operator_name
raise ValueError(msg)
return _shape_calculator_pool[operator_name]
diff --git a/skl2onnx/common/_topology.py b/skl2onnx/common/_topology.py
index b87da0f3a..a5f5139a9 100644
--- a/skl2onnx/common/_topology.py
+++ b/skl2onnx/common/_topology.py
@@ -10,23 +10,28 @@
from onnx import onnx_pb as onnx_proto
from onnx.helper import make_graph, make_model, make_tensor_value_info
from onnxconverter_common.data_types import ( # noqa
- DataType, TensorType,
- FloatType, Int64Type, StringType,
- DictionaryType, FloatTensorType, # noqa
- Int64TensorType, SequenceType, # noqa
- StringTensorType, DoubleTensorType,
- Int32TensorType, BooleanTensorType,
- DoubleTensorType)
+ DataType,
+ TensorType,
+ FloatType,
+ Int64Type,
+ StringType,
+ DictionaryType,
+ FloatTensorType, # noqa
+ Int64TensorType,
+ SequenceType, # noqa
+ StringTensorType,
+ DoubleTensorType,
+ Int32TensorType,
+ BooleanTensorType,
+ DoubleTensorType,
+)
+
try:
- from onnxconverter_common.data_types import (
- Int8TensorType, UInt8TensorType)
+ from onnxconverter_common.data_types import Int8TensorType, UInt8TensorType
except ImportError:
Int8TensorType = None
UInt8TensorType = None
-from ..proto import (
- get_opset_number_from_onnx,
- get_latest_tested_opset_version
-)
+from ..proto import get_opset_number_from_onnx, get_latest_tested_opset_version
from . import _registration
from . import utils
from .exceptions import MissingShapeCalculator, MissingConverter
@@ -38,22 +43,38 @@
def _default_OPSET_TO_IR_VERSION():
return {
- 1: 3, 2: 3, 3: 3, 4: 3, 5: 3, 6: 3,
- 7: 3, 8: 4, 9: 4, 10: 5, 11: 6, 12: 7,
- 13: 7, 14: 7, 15: 8, 16: 8, 17: 8, 18: 8,
- 19: 9
+ 1: 3,
+ 2: 3,
+ 3: 3,
+ 4: 3,
+ 5: 3,
+ 6: 3,
+ 7: 3,
+ 8: 4,
+ 9: 4,
+ 10: 5,
+ 11: 6,
+ 12: 7,
+ 13: 7,
+ 14: 7,
+ 15: 8,
+ 16: 8,
+ 17: 8,
+ 18: 8,
+ 19: 9,
}
try:
from onnxconverter_common.topology import OPSET_TO_IR_VERSION
+
assert OPSET_TO_IR_VERSION[18] is not None
except (ImportError, KeyError):
OPSET_TO_IR_VERSION = _default_OPSET_TO_IR_VERSION()
OPSET_ML_TO_OPSET = {1: 11, 2: 15, 3: 18}
-logger = getLogger('skl2onnx')
+logger = getLogger("skl2onnx")
def get_default_opset_for_domain(domain):
@@ -61,15 +82,16 @@ def get_default_opset_for_domain(domain):
Returns the associated for a domain given the main opset.
"""
from .. import __max_supported_opset__ as main_opset
- if domain == '':
+
+ if domain == "":
return main_opset
- if domain == 'ai.onnx.ml':
+ if domain == "ai.onnx.ml":
if main_opset >= 16:
return 3
if main_opset < 6:
return 1
return 2
- if domain == 'ai.onnx.training':
+ if domain == "ai.onnx.training":
return 1
return None
@@ -79,6 +101,7 @@ class Variable:
Defines a variable which holds any data defined
from *ONNX* types.
"""
+
_UNIQUE_NUMBER_ = 0
def __init__(self, raw_name, onnx_name, scope, type=None):
@@ -95,14 +118,13 @@ def __init__(self, raw_name, onnx_name, scope, type=None):
e.g., FloatTensorType
"""
if not isinstance(raw_name, str):
+ raise TypeError("raw_name must be a string not '%s'." % raw_name.__class__)
+ if type is not None and not hasattr(type, "shape"):
raise TypeError(
- "raw_name must be a string not '%s'." % raw_name.__class__)
- if type is not None and not hasattr(type, 'shape'):
- raise TypeError(
- "Unexpected type for variable raw_name=%r, type=%r." % (
- raw_name, type))
- if not isinstance(onnx_name, str) or '(' in onnx_name:
- if onnx_name.startswith('u(') and onnx_name[-1] == ')':
+ "Unexpected type for variable raw_name=%r, type=%r." % (raw_name, type)
+ )
+ if not isinstance(onnx_name, str) or "(" in onnx_name:
+ if onnx_name.startswith("u(") and onnx_name[-1] == ")":
onnx_name0 = onnx_name
if scope is None:
onnx_name = "UU%03dUU" % Variable._UNIQUE_NUMBER_
@@ -110,11 +132,13 @@ def __init__(self, raw_name, onnx_name, scope, type=None):
else:
onnx_name = scope.get_unique_variable_name("U")
logger.debug(
- '[Var] rename raw_name=%r, onnx_name=%r into %r',
- raw_name, onnx_name0, onnx_name)
+ "[Var] rename raw_name=%r, onnx_name=%r into %r",
+ raw_name,
+ onnx_name0,
+ onnx_name,
+ )
else:
- raise TypeError(
- "onnx_name must be a string not %r." % onnx_name)
+ raise TypeError("onnx_name must be a string not %r." % onnx_name)
if type is not None:
shape = type.shape
@@ -123,8 +147,9 @@ def __init__(self, raw_name, onnx_name, scope, type=None):
if len(not_none) and min(not_none) == 0:
raise RuntimeError(
"A variable cannot be empty, raw_name=%r, "
- "onnx_name=%r, shape=%r, type=%r." % (
- raw_name, onnx_name, shape, type))
+ "onnx_name=%r, shape=%r, type=%r."
+ % (raw_name, onnx_name, shape, type)
+ )
self._raw_name = raw_name
self._onnx_name = onnx_name
@@ -138,24 +163,26 @@ def __init__(self, raw_name, onnx_name, scope, type=None):
self._is_root = None
self._is_leaf = None
if self.type is not None and not isinstance(self.type, DataType):
- raise TypeError(
- "shape must be a DataType not {}.".format(self.type))
+ raise TypeError("shape must be a DataType not {}.".format(self.type))
if isinstance(self.type, TensorType):
shape = self.type.shape
if not isinstance(shape, (list, tuple)):
try:
shape = list(shape)
except TypeError:
- raise TypeError("shape must be a tuple or a list not "
- "{}.".format(type_fct(shape)))
+ raise TypeError(
+ "shape must be a tuple or a list not "
+ "{}.".format(type_fct(shape))
+ )
for dim in shape:
if dim is None:
continue
if not isinstance(dim, (int, np.int32, np.int64, np.intc)):
raise TypeError(
"shape must contains integers not %r (type=%r)."
- "" % (dim, dim.__class__))
- logger.debug('[Var] +%s', self)
+ "" % (dim, dim.__class__)
+ )
+ logger.debug("[Var] +%s", self)
# links to operators using those variables
self.operators_outputs_ = []
@@ -169,8 +196,8 @@ def _check(self):
continue
if not isinstance(k, (int, np.integer)):
raise ValueError(
- "Unexpected type %r for shape %r."
- "" % (type(k), self))
+ "Unexpected type %r for shape %r." "" % (type(k), self)
+ )
@property
def raw_name(self):
@@ -203,14 +230,14 @@ def is_leaf(self):
def init_status(self, is_fed=None, is_root=None, is_leaf=None):
if is_fed is not None and is_fed != self.is_fed:
logger.debug(
- '[Var] update is_fed=%r for %r, parent=%r',
- is_fed, self, self._parent)
+ "[Var] update is_fed=%r for %r, parent=%r", is_fed, self, self._parent
+ )
self._is_fed = is_fed
if is_root is not None and is_root != self.is_root:
- logger.debug('[Var] update is_root=%r for %r', is_root, self)
+ logger.debug("[Var] update is_root=%r for %r", is_root, self)
self._is_root = is_root
if is_leaf is not None and is_leaf != self.is_leaf:
- logger.debug('[Var] update is_leaf=%r for %r', is_leaf, self)
+ logger.debug("[Var] update is_leaf=%r for %r", is_leaf, self)
self._is_leaf = is_leaf
def __setattr__(self, name, value):
@@ -220,25 +247,31 @@ def __setattr__(self, name, value):
raise AttributeError("You must use method set_onnx_name.")
elif name in {"is_fed", "is_root", "is_leaf"}:
raise AttributeError("You must use method init_status.")
- elif name in {'scope', 'raw_name'}:
+ elif name in {"scope", "raw_name"}:
raise AttributeError("scope or raw_name cannot be changed.")
self.__dict__[name] = value
def set_type(self, new_type):
- if (new_type is None or isinstance(new_type, (str, Variable)) or
- not hasattr(new_type, 'shape')):
+ if (
+ new_type is None
+ or isinstance(new_type, (str, Variable))
+ or not hasattr(new_type, "shape")
+ ):
raise TypeError(
- "Unexpected new type for variable %r, new_type=%r." % (
- self, new_type))
- logger.debug('[Var] update type for %r', self)
+ "Unexpected new type for variable %r, new_type=%r." % (self, new_type)
+ )
+ logger.debug("[Var] update type for %r", self)
self._type = new_type
self._check()
def set_onnx_name(self, onnx_name):
if onnx_name != self._onnx_name:
logger.debug(
- '[Var] update onnx_name, from %r to %r in %r',
- self.onnx_name, onnx_name, self)
+ "[Var] update onnx_name, from %r to %r in %r",
+ self.onnx_name,
+ onnx_name,
+ self,
+ )
if self.scope is not None and not isinstance(self.scope, str):
self.scope.rename_onnx_name(self._onnx_name, onnx_name)
self._onnx_name = onnx_name
@@ -247,9 +280,9 @@ def set_parent(self, operator):
if self._parent is not None:
raise RuntimeError(
"This variable is already the output of operator %r. "
- "It cannot be the output of %r." % (self._parent, operator))
- logger.debug(
- '[Var] set parent for %r, parent=%r', self, operator)
+ "It cannot be the output of %r." % (self._parent, operator)
+ )
+ logger.debug("[Var] set parent for %r, parent=%r", self, operator)
self._parent = operator
def get_first_dimension(self):
@@ -257,14 +290,12 @@ def get_first_dimension(self):
Returns the first dimension (batch dimension) or
None if not specified (shape is empty).
"""
- if (self.type is None or self.type.shape is None or
- len(self.type.shape) == 0):
+ if self.type is None or self.type.shape is None or len(self.type.shape) == 0:
return None
return self.type.shape[0]
def get_second_dimension(self):
- if (self.type is None or self.type.shape is None or
- len(self.type.shape) < 2):
+ if self.type is None or self.type.shape is None or len(self.type.shape) < 2:
return None
return self.type.shape[1]
@@ -276,14 +307,16 @@ def full_name(self):
return self.onnx_name
def __repr__(self):
- return ("Variable('{0}', '{1}', type={2})".format(
- self.raw_name, self.onnx_name, self.type))
+ return "Variable('{0}', '{1}', type={2})".format(
+ self.raw_name, self.onnx_name, self.type
+ )
@staticmethod
def from_pb(obj):
"""
Creates a data type from a protobuf object.
"""
+
def get_dim(d):
r = d.dim_value
if "dim_param" in str(d):
@@ -294,10 +327,9 @@ def get_dim(d):
return r
def get_shape(tt):
- return [get_dim(tt.shape.dim[i])
- for i in range(len(tt.shape.dim))]
+ return [get_dim(tt.shape.dim[i]) for i in range(len(tt.shape.dim))]
- if hasattr(obj, 'extend'):
+ if hasattr(obj, "extend"):
return [Variable.from_pb(o) for o in obj]
name = obj.name
@@ -317,22 +349,22 @@ def get_shape(tt):
ty = Int64TensorType(shape)
elif elem == onnx_proto.TensorProto.INT32:
ty = Int32TensorType(shape)
- elif (UInt8TensorType is not None and
- elem == onnx_proto.TensorProto.UINT8):
+ elif UInt8TensorType is not None and elem == onnx_proto.TensorProto.UINT8:
ty = UInt8TensorType(shape)
- elif (Int8TensorType is not None and
- elem == onnx_proto.TensorProto.INT8):
+ elif Int8TensorType is not None and elem == onnx_proto.TensorProto.INT8:
ty = Int8TensorType(shape)
elif elem == 0:
ty = FloatTensorType(shape)
else:
raise NotImplementedError(
"Unsupported type '{}' (elem_type={}).".format(
- type(obj.type.tensor_type), elem))
+ type(obj.type.tensor_type), elem
+ )
+ )
else:
- raise NotImplementedError("Unsupported type '{}' as "
- "a string ({}).".format(
- type(obj), obj))
+ raise NotImplementedError(
+ "Unsupported type '{}' as " "a string ({}).".format(type(obj), obj)
+ )
return Variable(name, name, None, ty)
@@ -356,7 +388,6 @@ def add_operator(self, op, in_or_out):
self.operators_inputs_.append(op)
def check_compatible_type(self, other_type):
-
def empty_shape(shape):
return shape is None or len(shape) == 0
@@ -370,8 +401,8 @@ def empty_shape(shape):
if empty_shape(other_type.shape):
return
raise TypeError(
- "Incompatible type for variable %r and type %r." % (
- self, other_type))
+ "Incompatible type for variable %r and type %r." % (self, other_type)
+ )
class VariableStr(Variable):
@@ -390,8 +421,8 @@ def raw_name(self):
def onnx_name(self):
if self._onnx_name.startswith("u("):
raise RuntimeError(
- "Variable should be renamed as onnx_name=%r."
- "" % self._onnx_name)
+ "Variable should be renamed as onnx_name=%r." "" % self._onnx_name
+ )
return self._onnx_name
@@ -399,6 +430,7 @@ class Operator:
"""
Defines an operator available in *ONNX*.
"""
+
class OperatorList(list):
def __init__(self, parent, kind):
super(Operator.OperatorList, self).__init__()
@@ -406,25 +438,23 @@ def __init__(self, parent, kind):
self.kind = kind
def __eq__(self, second):
- raise NotImplementedError(
- "Operator equal not implemented and not needed.")
+ raise NotImplementedError("Operator equal not implemented and not needed.")
def append(self, v):
if not isinstance(v, Variable):
raise TypeError(
- "Input and output must be of type Variable not %r."
- "" % type(v))
- if self.kind == 'Out':
+ "Input and output must be of type Variable not %r." "" % type(v)
+ )
+ if self.kind == "Out":
v.set_parent(self.parent)
super(Operator.OperatorList, self).append(v)
logger.debug("[Op] add %s %r to %r", self.kind, v, self.parent)
- if self.kind == 'In':
+ if self.kind == "In":
v.add_operator(self.parent, False)
elif self.kind == "Out":
v.add_operator(self.parent, True)
else:
- raise RuntimeError(
- "Unexpected value for kind=%r." % self.kind)
+ raise RuntimeError("Unexpected value for kind=%r." % self.kind)
def extend(self, vs):
for v in vs:
@@ -433,35 +463,36 @@ def extend(self, vs):
def __getitem__(self, i):
v = list.__getitem__(self, i)
if isinstance(i, int) and not isinstance(v, Variable):
- raise TypeError("Element %d must be a Variable not %r." % (
- i, type(v)))
+ raise TypeError("Element %d must be a Variable not %r." % (i, type(v)))
return v
def __setitem__(self, i, v):
- raise LookupError(
- "Setter should not be used to modify an element.")
+ raise LookupError("Setter should not be used to modify an element.")
def set_element(self, i, v):
"Updates element i."
if not isinstance(v, Variable):
- raise TypeError(
- "Value v must be a Variable not %r." % type(v))
+ raise TypeError("Value v must be a Variable not %r." % type(v))
logger.debug(
"[Op] %s-change element %d from %r to %r in %r",
- self.kind, i, self[i], v, self.parent)
+ self.kind,
+ i,
+ self[i],
+ v,
+ self.parent,
+ )
list.__setitem__(self, i, v)
def to_string(self):
names = []
for o in self:
- if hasattr(o, 'onnx_name'):
+ if hasattr(o, "onnx_name"):
names.append(o.onnx_name)
else:
names.append('"%s"' % str(o))
return ",".join(names)
- def __init__(self, onnx_name, scope, type, raw_operator,
- target_opset, scope_inst):
+ def __init__(self, onnx_name, scope, type, raw_operator, target_opset, scope_inst):
"""
:param onnx_name: A unique ID, which is a string
:param scope: The name of the scope where this operator is
@@ -477,8 +508,10 @@ def __init__(self, onnx_name, scope, type, raw_operator,
:param scope_inst: :class:`Scope` instance the operator belongs to
"""
if isinstance(raw_operator, str):
- raise RuntimeError("Parameter raw_operator must be an object not "
- "a string '{0}'.".format(raw_operator))
+ raise RuntimeError(
+ "Parameter raw_operator must be an object not "
+ "a string '{0}'.".format(raw_operator)
+ )
# operator name in the converted model, if raw_operator
# is not None, output_shapes can be guessed
# from the raw model. Otherwise, it can be guessed
@@ -487,12 +520,12 @@ def __init__(self, onnx_name, scope, type, raw_operator,
self.scope = scope
self.type = type
self.raw_operator = raw_operator
- self.inputs = Operator.OperatorList(self, 'In')
- self.outputs = Operator.OperatorList(self, 'Out')
+ self.inputs = Operator.OperatorList(self, "In")
+ self.outputs = Operator.OperatorList(self, "Out")
self._is_evaluated = None
self.target_opset = target_opset
self.scope_inst = scope_inst
- logger.debug('[Op] +%r', self)
+ logger.debug("[Op] +%r", self)
def new_raw_operator(self, raw_operator, alias):
"""
@@ -500,8 +533,14 @@ def new_raw_operator(self, raw_operator, alias):
changes the raw_operator but keeps the same inputs
and outputs.
"""
- op = Operator(self.onnx_name, self.scope, alias, raw_operator,
- self.target_opset, self.scope_inst)
+ op = Operator(
+ self.onnx_name,
+ self.scope,
+ alias,
+ raw_operator,
+ self.target_opset,
+ self.scope_inst,
+ )
op.inputs = self.inputs
op.outputs = self.outputs
return op
@@ -515,30 +554,34 @@ def __repr__(self):
# The line above fails for python 3.7
textop = type(self.raw_operator)
if isinstance(textop, str) and "\n" in textop:
- textop = textop.replace('\n', '').replace(' ', '')
- return ("Operator(type='{0}', onnx_name='{1}', inputs='{2}', "
- "outputs='{3}', raw_operator={4})".format(
- self.type, self.onnx_name,
- self.inputs.to_string(),
- self.outputs.to_string(),
- textop))
+ textop = textop.replace("\n", "").replace(" ", "")
+ return (
+ "Operator(type='{0}', onnx_name='{1}', inputs='{2}', "
+ "outputs='{3}', raw_operator={4})".format(
+ self.type,
+ self.onnx_name,
+ self.inputs.to_string(),
+ self.outputs.to_string(),
+ textop,
+ )
+ )
def __setattr__(self, name, value):
- if name in ('inputs', 'outputs'):
- if (isinstance(value, list) and
- not isinstance(value, Operator.OperatorList)):
- if name == 'inputs':
- self.inputs = Operator.OperatorList(self, 'In')
+ if name in ("inputs", "outputs"):
+ if isinstance(value, list) and not isinstance(value, Operator.OperatorList):
+ if name == "inputs":
+ self.inputs = Operator.OperatorList(self, "In")
self.inputs.extend(value)
return
- if name == 'outputs':
- self.outputs = Operator.OperatorList(self, 'Out')
+ if name == "outputs":
+ self.outputs = Operator.OperatorList(self, "Out")
self.outputs.extend(value)
return
if not isinstance(value, Operator.OperatorList):
raise TypeError(
- "inputs or outputs must be of type Operator.OperatorList.")
- ioo = name == 'outputs'
+ "inputs or outputs must be of type Operator.OperatorList."
+ )
+ ioo = name == "outputs"
for v in value:
v.add_operator(self, ioo)
self.__dict__[name] = value
@@ -549,9 +592,7 @@ def is_evaluated(self):
def init_status(self, is_evaluated=None):
if is_evaluated is not None and is_evaluated != self.is_evaluated:
- logger.debug(
- '[Op] update is_evaluated=%r for %r',
- is_evaluated, self)
+ logger.debug("[Op] update is_evaluated=%r for %r", is_evaluated, self)
self._is_evaluated = is_evaluated
@property
@@ -587,25 +628,31 @@ def infer_types(self):
if self.type is None:
raise MissingShapeCalculator(
"Unable to find a shape calculator for type '{}'.".format(
- type(self.raw_operator)))
+ type(self.raw_operator)
+ )
+ )
try:
shape_calc = _registration.get_shape_calculator(self.type)
except ValueError:
raise MissingShapeCalculator(
"Unable to find a shape calculator for alias '{}' "
- "and type '{}'.".format(self.type, type(self.raw_operator)))
+ "and type '{}'.".format(self.type, type(self.raw_operator))
+ )
if shape_calc is None:
raise MissingShapeCalculator(
"Unexpected shape calculator for alias '{}' "
- "and type '{}'.".format(self.type, type(self.raw_operator)))
+ "and type '{}'.".format(self.type, type(self.raw_operator))
+ )
logger.debug(
- "[Shape-a] %r fed %r - %r", self,
+ "[Shape-a] %r fed %r - %r",
+ self,
"".join(str(i.is_fed) for i in self.inputs),
- "".join(str(i.is_fed) for i in self.outputs))
+ "".join(str(i.is_fed) for i in self.outputs),
+ )
shape_calc(self)
logger.debug(
- "[Shape-b] %r inputs=%r - outputs=%r",
- self, self.inputs, self.outputs)
+ "[Shape-b] %r inputs=%r - outputs=%r", self, self.inputs, self.outputs
+ )
class Scope:
@@ -615,9 +662,15 @@ class Scope:
provides functions to create a unique unused name.
"""
- def __init__(self, name, target_opset=None,
- custom_shape_calculators=None, options=None,
- registered_models=None, naming=None):
+ def __init__(
+ self,
+ name,
+ target_opset=None,
+ custom_shape_calculators=None,
+ options=None,
+ registered_models=None,
+ naming=None,
+ ):
"""
:param name: A string, the unique ID of this scope in a
Topology object
@@ -670,16 +723,15 @@ def __init__(self, name, target_opset=None,
if naming is None:
self._naming = Topology._generate_unique_name
elif isinstance(naming, str):
- self._naming = (
- lambda seed, names: Topology._generate_unique_name(
- self.naming + seed, names))
+ self._naming = lambda seed, names: Topology._generate_unique_name(
+ self.naming + seed, names
+ )
elif callable(self.naming):
- self._naming = (
- lambda seed, names: Topology._generate_unique_name(
- self.naming(seed, names), names))
+ self._naming = lambda seed, names: Topology._generate_unique_name(
+ self.naming(seed, names), names
+ )
else:
- raise TypeError(
- "Unexpected type for parameter naming: %r." % type(naming))
+ raise TypeError("Unexpected type for parameter naming: %r." % type(naming))
def get(self, var_name, default_value):
"Returns variable with 'name' or default value is not found."
@@ -705,13 +757,13 @@ def get_unique_variable_name(self, seed, rename=True):
Creates a unique variable ID based on the given seed.
"""
if not isinstance(seed, str):
- raise TypeError("Parameter seed must be a string not {}."
- "".format(type(seed)))
+ raise TypeError(
+ "Parameter seed must be a string not {}." "".format(type(seed))
+ )
if rename:
name = self._naming(seed, self.onnx_variable_names)
else:
- name = Topology._generate_unique_name(
- seed, self.onnx_variable_names)
+ name = Topology._generate_unique_name(seed, self.onnx_variable_names)
return name
def get_unique_operator_name(self, seed):
@@ -720,16 +772,16 @@ def get_unique_operator_name(self, seed):
"""
return self._naming(seed, self.onnx_operator_names)
- def declare_local_variable(self, raw_name, type=None, prepend=False,
- missing_type=False, rename=True):
+ def declare_local_variable(
+ self, raw_name, type=None, prepend=False, missing_type=False, rename=True
+ ):
"""
This function may create a new variable in this scope. If
*raw_name* has been used to create other variables, the new
variable will hide all other variables created using *raw_name*.
"""
if type is None and not missing_type:
- raise RuntimeError(
- "Unknown type for %r (type=%r)." % (raw_name, type))
+ raise RuntimeError("Unknown type for %r (type=%r)." % (raw_name, type))
# Get unique ID for the new variable
onnx_name = self.get_unique_variable_name(raw_name, rename=rename)
@@ -742,16 +794,16 @@ def register_variable(self, var, prepend=False):
"Adds a variable to the scope."
if var.onnx_name in self.variables:
raise RuntimeError(
- "Variable %r already registered (other=%r)." % (
- var, self.variables[var.onnx_name]))
+ "Variable %r already registered (other=%r)."
+ % (var, self.variables[var.onnx_name])
+ )
if var.raw_name in self.variable_name_mapping:
# Hide existing variables with the same raw_name
if not prepend:
self.variable_name_mapping[var.raw_name].append(var.onnx_name)
else:
- self.variable_name_mapping[var.raw_name].insert(
- 0, var.onnx_name)
+ self.variable_name_mapping[var.raw_name].insert(0, var.onnx_name)
else:
self.variable_name_mapping[var.raw_name] = [var.onnx_name]
@@ -769,50 +821,49 @@ def declare_existing_subgraph_name(self, graph_proto):
if self.has_variable_name(name):
raise NameError(
"Result name %r is already taken (outputs=%r) "
- "(node=%r)." % (
- name, output_name, node))
+ "(node=%r)." % (name, output_name, node)
+ )
self.onnx_variable_names.add(name)
if node.name in self.onnx_operator_names:
raise NameError(
"Operator name %r is already taken "
- "(node=%r)." % (
- node.name, node))
+ "(node=%r)." % (node.name, node)
+ )
self.onnx_operator_names.add(node.name)
def rename_onnx_name(self, old_name, new_name):
if new_name in self.variables:
raise RuntimeError(
- "Name %r already in variables (%r)." % (
- new_name, self.variables[new_name]))
+ "Name %r already in variables (%r)."
+ % (new_name, self.variables[new_name])
+ )
if old_name not in self.variables:
- raise RuntimeError(
- "Unable to find name %r in variables." % old_name)
- logger.debug(
- '[Scope] update onnx_name, from %r to %r',
- old_name, new_name)
+ raise RuntimeError("Unable to find name %r in variables." % old_name)
+ logger.debug("[Scope] update onnx_name, from %r to %r", old_name, new_name)
self.variables[new_name] = self.variables[old_name]
del self.variables[old_name]
- def declare_local_input(self, raw_name, type=None, prepend=False,
- rename=True):
+ def declare_local_input(self, raw_name, type=None, prepend=False, rename=True):
"""
Calls `declare_local_variable`. Registers this variable
as an input.
"""
var = self.declare_local_variable(
- raw_name, type=type, prepend=prepend, rename=rename)
+ raw_name, type=type, prepend=prepend, rename=rename
+ )
self.input_variables.append(var)
return var
- def declare_local_output(self, raw_name, type=None, prepend=False,
- missing_type=False):
+ def declare_local_output(
+ self, raw_name, type=None, prepend=False, missing_type=False
+ ):
"""
Calls `declare_local_variable`. Registers this variable
as an output.
"""
var = self.declare_local_variable(
- raw_name, type=type, prepend=prepend,
- missing_type=missing_type)
+ raw_name, type=type, prepend=prepend, missing_type=missing_type
+ )
self.output_variables.append(var)
return var
@@ -821,26 +872,29 @@ def declare_local_operator(self, type, raw_model=None):
This function is used to declare new local operator.
"""
onnx_name = self.get_unique_operator_name(str(type))
- operator = Operator(onnx_name, self.name, type, raw_model,
- self.target_opset, scope_inst=self)
+ operator = Operator(
+ onnx_name, self.name, type, raw_model, self.target_opset, scope_inst=self
+ )
self.operators[onnx_name] = operator
return operator
def _get_allowed_options(self, model, fail=True):
if self.registered_models is not None:
- if type(model) not in self.registered_models['aliases']:
+ if type(model) not in self.registered_models["aliases"]:
if fail:
raise NotImplementedError(
"No registered models, no known allowed options "
- "for model '{}'.".format(model.__class__.__name__))
+ "for model '{}'.".format(model.__class__.__name__)
+ )
return {}
- alias = self.registered_models['aliases'][type(model)]
- conv = self.registered_models['conv'][alias]
+ alias = self.registered_models["aliases"][type(model)]
+ conv = self.registered_models["conv"][alias]
allowed = conv.get_allowed_options()
return allowed
raise NotImplementedError(
"No registered models, no known allowed options "
- "for model '{}'.".format(model.__class__.__name__))
+ "for model '{}'.".format(model.__class__.__name__)
+ )
def add_options(self, model_id, options):
"""
@@ -873,9 +927,12 @@ def get_options(self, model, default_values=None, fail=True):
:return: dictionary
"""
return _build_options(
- model, self.options, default_values,
+ model,
+ self.options,
+ default_values,
self._get_allowed_options(model, fail=fail),
- fail=fail)
+ fail=fail,
+ )
def replace_raw_operator(self, op1, op2, alias):
"""
@@ -885,8 +942,8 @@ def replace_raw_operator(self, op1, op2, alias):
for v in self.operators.values():
if id(v.raw_operator) == id(op1):
logger.debug(
- '[Scope] replace %d by %d in %r.',
- id(v.raw_operator), id(op1), v)
+ "[Scope] replace %d by %d in %r.", id(v.raw_operator), id(op1), v
+ )
v.raw_operator = op2
v.type = alias
@@ -899,9 +956,16 @@ class Topology:
These are filled by the converters while a pipeline is being converted.
"""
- def __init__(self, model, default_batch_size=1, initial_types=None,
- target_opset=None, custom_conversion_functions=None,
- custom_shape_calculators=None, registered_models=None):
+ def __init__(
+ self,
+ model,
+ default_batch_size=1,
+ initial_types=None,
+ target_opset=None,
+ custom_conversion_functions=None,
+ custom_shape_calculators=None,
+ registered_models=None,
+ ):
"""
Initializes a *Topology* object, which is an intermediate
representation of a computational graph.
@@ -928,23 +992,28 @@ def __init__(self, model, default_batch_size=1, initial_types=None,
self.default_batch_size = default_batch_size
self.target_opset = target_opset
self.custom_conversion_functions = (
- custom_conversion_functions if custom_conversion_functions else {})
+ custom_conversion_functions if custom_conversion_functions else {}
+ )
self.custom_shape_calculators = (
- custom_shape_calculators if custom_shape_calculators else {})
+ custom_shape_calculators if custom_shape_calculators else {}
+ )
for k in self.custom_conversion_functions:
if not callable(k):
- raise TypeError("Keys in custom_conversion_functions must be "
- "types not strings.")
+ raise TypeError(
+ "Keys in custom_conversion_functions must be " "types not strings."
+ )
for k in self.custom_shape_calculators:
if not callable(k):
- raise TypeError("Keys in custom_shape_calculators must be "
- "types not strings.")
+ raise TypeError(
+ "Keys in custom_shape_calculators must be " "types not strings."
+ )
# A map of local overwritten model aliases.
self.model_aliases = {}
- all_model_types = (set(self.custom_conversion_functions)
- | set(self.custom_shape_calculators))
+ all_model_types = set(self.custom_conversion_functions) | set(
+ self.custom_shape_calculators
+ )
for mtype in all_model_types:
alias = "{}_{}".format(mtype.__name__, id(self))
self.model_aliases[mtype] = alias
@@ -957,8 +1026,7 @@ def __init__(self, model, default_batch_size=1, initial_types=None,
@property
def scope(self):
if len(self.scopes) != 1:
- raise RuntimeError(
- "Only one scope is allowed not %d." % len(self.scopes))
+ raise RuntimeError("Only one scope is allowed not %d." % len(self.scopes))
return self.scopes[0]
@staticmethod
@@ -970,15 +1038,15 @@ def _generate_unique_name(seed, existing_names):
produced
:return: a string similar to the seed
"""
- if seed == '':
- raise ValueError('Name seed must be a non-empty string.')
+ if seed == "":
+ raise ValueError("Name seed must be a non-empty string.")
# Make the seed meet C-style naming convention
# Only alphabets and numbers are allowed
- seed = re.sub('[^\\w+]', '_', seed)
+ seed = re.sub("[^\\w+]", "_", seed)
# The first symbol cannot be a number
- if re.match('^[0-9]', seed):
- seed = '_' + seed
+ if re.match("^[0-9]", seed):
+ seed = "_" + seed
# If seed has never been seen, we return it as it is. Otherwise,
# we will append an number to make it unique.
@@ -996,20 +1064,21 @@ def _generate_unique_name(seed, existing_names):
def get_unique_scope_name(self, seed):
return Topology._generate_unique_name(seed, self.scope_names)
- def declare_scope(self, seed, parent_scopes=None, options=None,
- naming=None):
+ def declare_scope(self, seed, parent_scopes=None, options=None, naming=None):
"""
Creates a new :class:`Scope `
and appends it to the list of existing scopes.
"""
if len(self.scopes) != 0:
- raise RuntimeError(
- "Only one scope can be created.")
+ raise RuntimeError("Only one scope can be created.")
scope = Scope(
- self.get_unique_scope_name(seed), target_opset=self.target_opset,
+ self.get_unique_scope_name(seed),
+ target_opset=self.target_opset,
custom_shape_calculators=self.custom_shape_calculators,
- options=options, registered_models=self.registered_models,
- naming=naming)
+ options=options,
+ registered_models=self.registered_models,
+ naming=naming,
+ )
# Declare input variables.
# They should be the inputs of the scikit-learn
@@ -1048,16 +1117,18 @@ def call_converter(self, operator, container, verbose=0):
"Unable to find converter for alias '{}' type "
"'{}'. You may raise an issue at "
"https://github.com/onnx/sklearn-onnx/issues."
- "".format(operator.type,
- type(getattr(operator, 'raw_model', None))))
+ "".format(operator.type, type(getattr(operator, "raw_model", None)))
+ )
container.validate_options(operator)
if verbose > 0:
print("[call_converter] call converter for %r." % operator.type)
logger.debug(
- "[Conv] call %r fed %r - %r", operator,
+ "[Conv] call %r fed %r - %r",
+ operator,
"".join(str(i.is_fed) for i in operator.inputs),
- "".join(str(i.is_fed) for i in operator.outputs))
+ "".join(str(i.is_fed) for i in operator.outputs),
+ )
conv(self.scopes[0], operator, container)
logger.debug("[Conv] end - %r", operator)
@@ -1066,13 +1137,13 @@ def call_shape_calculator(self, operator):
mtype = type(operator.raw_operator)
if mtype in self.custom_shape_calculators:
# overwritten operator.
- source = 'custom'
+ source = "custom"
shape_calc = self.custom_shape_calculators[mtype]
elif operator.type in self.custom_shape_calculators:
- source = 'custom'
+ source = "custom"
shape_calc = self.custom_shape_calculators[operator.type]
elif hasattr(operator.raw_operator, "onnx_shape_calculator"):
- source = 'onnx_shape_calculator'
+ source = "onnx_shape_calculator"
shape_calc = operator.raw_operator.onnx_shape_calculator()
else:
source = ""
@@ -1080,13 +1151,15 @@ def call_shape_calculator(self, operator):
if shape_calc is not None:
logger.debug(
- "[Shape1] %r fed %r - %r (source=%r)", operator,
+ "[Shape1] %r fed %r - %r (source=%r)",
+ operator,
",".join(str(i.is_fed) for i in operator.inputs),
",".join(str(i.is_fed) for i in operator.outputs),
- source)
+ source,
+ )
shape_calc(operator)
else:
- logger.debug('[Shape2] call infer_types for %r', operator)
+ logger.debug("[Shape2] call infer_types for %r", operator)
operator.infer_types()
def _initialize_graph_status_for_traversing(self):
@@ -1095,8 +1168,7 @@ def _initialize_graph_status_for_traversing(self):
traversing the graph. Only used by convert_operators.
"""
if len(self.scopes) != 1:
- raise RuntimeError(
- "Only one scope is allowed not %d." % len(self.scopes))
+ raise RuntimeError("Only one scope is allowed not %d." % len(self.scopes))
input_names = set(v.onnx_name for v in self.scopes[0].input_variables)
if len(input_names) == 0:
raise RuntimeError("No detected inputs.")
@@ -1107,8 +1179,7 @@ def _initialize_graph_status_for_traversing(self):
for operator in self.unordered_operator_iterator():
operator.init_status(is_evaluated=False)
- def _propagate_status(self, operator, container, fed_variables,
- verbose=0):
+ def _propagate_status(self, operator, container, fed_variables, verbose=0):
"""
Propagates status *is_fed* based on output variable
and node added in the container.
@@ -1123,8 +1194,10 @@ def _propagate_status(self, operator, container, fed_variables,
vars[i].append(node)
if verbose > 1:
- print("[_propagate_status] newly fed=%r" % list(
- v.onnx_name for v in operator.outputs if v.is_fed))
+ print(
+ "[_propagate_status] newly fed=%r"
+ % list(v.onnx_name for v in operator.outputs if v.is_fed)
+ )
stack = list(fed_variables)
scope = self.scopes[0]
while len(stack) > 0:
@@ -1155,23 +1228,28 @@ def convert_operators(self, container=None, verbose=0):
operators. It also processes new operators created by
converters.
"""
+
def _check_operator_(operator):
if not isinstance(operator.inputs, Operator.OperatorList):
raise TypeError(
"operator.inputs must be a Operator.OperatorList "
- "not %r." % type(operator.inputs))
+ "not %r." % type(operator.inputs)
+ )
if not isinstance(operator.outputs, Operator.OperatorList):
raise TypeError(
"operator.outputs must be a Operator.OperatorList "
- "not %r." % type(operator.outputs))
+ "not %r." % type(operator.outputs)
+ )
if any(not isinstance(i, Variable) for i in operator.inputs):
raise TypeError(
"One input is not a Variable for operator %r - %r."
- "" % (type(operator.raw_operator), operator))
+ "" % (type(operator.raw_operator), operator)
+ )
if any(not isinstance(i, Variable) for i in operator.outputs):
raise TypeError(
"One output is not a Variable for operator %r - %r."
- "" % (type(operator.raw_operator), operator))
+ "" % (type(operator.raw_operator), operator)
+ )
def _check_variable_in_(variable, operator):
idop = id(operator)
@@ -1179,33 +1257,43 @@ def _check_variable_in_(variable, operator):
if idop not in ids:
raise RuntimeError(
"Operator %r not registered in the list of operators "
- "of %r taking it as an input [\n%s]." % (
- operator, variable,
- "\n".join(map(str, variable.operators_inputs_))))
+ "of %r taking it as an input [\n%s]."
+ % (
+ operator,
+ variable,
+ "\n".join(map(str, variable.operators_inputs_)),
+ )
+ )
def _check_variable_out_(variable, operator):
if variable.is_fed:
add = ["", "--DEBUG-INFO--"]
for scope in self.scopes:
- add.append('---')
- add.append(pprint.pformat(
- scope.variable_name_mapping))
- add.append('---')
+ add.append("---")
+ add.append(pprint.pformat(scope.variable_name_mapping))
+ add.append("---")
for var in scope.variables.values():
- add.append(" is_fed=%s %s - n_in=%d n_out=%d" % (
- getattr(var, 'is_fed', '?'), var,
- len(var.operators_inputs_),
- len(var.operators_outputs_)))
- add.append('---')
+ add.append(
+ " is_fed=%s %s - n_in=%d n_out=%d"
+ % (
+ getattr(var, "is_fed", "?"),
+ var,
+ len(var.operators_inputs_),
+ len(var.operators_outputs_),
+ )
+ )
+ add.append("---")
for op in scope.operators.values():
- add.append(" is_evaluated=%s %s" % (
- getattr(op, 'is_evaluated', '?'), op))
- add.append('---')
+ add.append(
+ " is_evaluated=%s %s"
+ % (getattr(op, "is_evaluated", "?"), op)
+ )
+ add.append("---")
for v in operator.inputs:
add.append(" inputs={}".format(v))
for v in operator.outputs:
add.append(" outputs={}".format(v))
- add.append('--- operator producing this variable--')
+ add.append("--- operator producing this variable--")
for op in variable.operators_outputs_:
add.append(str(op))
raise RuntimeError(
@@ -1218,11 +1306,15 @@ def _check_variable_out_(variable, operator):
"of them is producing this output. "
"In that case, an identity node must be "
"added.{}".format(
- variable, operator.type,
- operator.onnx_name, operator.is_evaluated,
+ variable,
+ operator.type,
+ operator.onnx_name,
+ operator.is_evaluated,
[v.is_fed for v in operator.inputs],
[v.is_fed for v in operator.outputs],
- "\n".join(add)))
+ "\n".join(add),
+ )
+ )
if verbose > 0:
print("[convert_operators] begin")
@@ -1235,17 +1327,19 @@ def _check_variable_out_(variable, operator):
changes = 0
ops = list(self.unordered_operator_iterator())
if verbose > 0:
- print("[convert_operators] iteration %d - n_vars=%d "
- "n_ops=%d" % (
- n_iter, len(fed_variables), len(ops)))
+ print(
+ "[convert_operators] iteration %d - n_vars=%d "
+ "n_ops=%d" % (n_iter, len(fed_variables), len(ops))
+ )
for operator in ops:
_check_operator_(operator)
for var in operator.inputs:
if var.is_fed:
fed_variables[var.onnx_name] = var
- if (all(variable.is_fed for variable in operator.inputs) and
- not operator.is_evaluated):
-
+ if (
+ all(variable.is_fed for variable in operator.inputs)
+ and not operator.is_evaluated
+ ):
for variable in operator.inputs:
_check_variable_in_(variable, operator)
for variable in operator.outputs:
@@ -1258,25 +1352,28 @@ def _check_variable_out_(variable, operator):
# output variables are not necessarily known at this stage.
operator.init_status(is_evaluated=True)
for variable in operator.outputs:
- if all(op.is_evaluated
- for op in variable.operators_outputs_):
+ if all(op.is_evaluated for op in variable.operators_outputs_):
variable.init_status(is_fed=True)
fed_variables[variable.onnx_name] = variable
fed_variables.update(
- {i.name: i for i in container.initializers
- if i.name not in fed_variables})
- self._propagate_status(operator, container, fed_variables,
- verbose=verbose)
+ {
+ i.name: i
+ for i in container.initializers
+ if i.name not in fed_variables
+ }
+ )
+ self._propagate_status(
+ operator, container, fed_variables, verbose=verbose
+ )
# unfed some variables (it happens when a node
# shares an output with another node)
rem = []
for n, var in fed_variables.items():
- if not hasattr(var, 'operators_outputs_'):
+ if not hasattr(var, "operators_outputs_"):
# initializer
continue
- if any(not o.is_evaluated
- for o in var.operators_outputs_):
+ if any(not o.is_evaluated for o in var.operators_outputs_):
rem.append(n)
for r in rem:
v = fed_variables[r]
@@ -1285,8 +1382,10 @@ def _check_variable_out_(variable, operator):
changes += 1
if verbose > 0:
- print("[convert_operators] end iter: %d - n_vars=%d" % (
- n_iter, len(fed_variables)))
+ print(
+ "[convert_operators] end iter: %d - n_vars=%d"
+ % (n_iter, len(fed_variables))
+ )
if verbose > 0:
print("[convert_operators] end.")
@@ -1300,31 +1399,36 @@ def _check_variable_out_(variable, operator):
for var in self.unordered_variable_iterator():
rows.append(
"is_fed=%r is_leaf=%r is_root=%r - %r - n_in=%d n_out=%d"
- "" % (var.is_fed, var.is_leaf, var.is_root, var,
- len(var.operators_inputs_),
- len(var.operators_outputs_)))
+ ""
+ % (
+ var.is_fed,
+ var.is_leaf,
+ var.is_root,
+ var,
+ len(var.operators_inputs_),
+ len(var.operators_outputs_),
+ )
+ )
rows.append("---OPERATORS---")
for op in self.unordered_operator_iterator():
rows.append("is_eval=%r - %r" % (op.is_evaluated, op))
rows.append("---NODES---")
for node in container.nodes:
- rows.append("%s: %r -> %r" % (
- node.op_type, node.input, node.output))
+ rows.append("%s: %r -> %r" % (node.op_type, node.input, node.output))
raise RuntimeError(
"Not all operators have been evaluated. A variable name "
"is probably misspelled.\n%s"
- "" % "\n".join(rows))
+ "" % "\n".join(rows)
+ )
# Input and output
if len(self.scopes[0].input_variables) > 0:
inputs = self.scopes[0].input_variables
else:
- inputs = [v for v in self.unordered_variable_iterator()
- if v.is_root]
+ inputs = [v for v in self.unordered_variable_iterator() if v.is_root]
for i in inputs:
container.add_input(i)
- outputs = [v for v in self.unordered_variable_iterator()
- if v.is_leaf]
+ outputs = [v for v in self.unordered_variable_iterator() if v.is_leaf]
# The function checks that for output variable,
# raw_name equal onnx_name. It swaps names if it is not the case.
@@ -1339,8 +1443,9 @@ def _check_variable_out_(variable, operator):
continue
swaped.add(var.raw_name)
if verbose > 1:
- print("[convert_operators] %r <-> %r." % (
- var.raw_name, var.onnx_name))
+ print(
+ "[convert_operators] %r <-> %r." % (var.raw_name, var.onnx_name)
+ )
old_name = var.onnx_name
new_name = var.raw_name
@@ -1348,8 +1453,8 @@ def _check_variable_out_(variable, operator):
container.swap_names(old_name, new_name)
except NotImplementedError as e:
logger.debug(
- '[Topo] unable to swap %r and %r (%r).',
- old_name, new_name, e)
+ "[Topo] unable to swap %r and %r (%r).", old_name, new_name, e
+ )
continue
for v in self.unordered_variable_iterator():
@@ -1362,10 +1467,16 @@ def _check_variable_out_(variable, operator):
container.add_output(o)
-def convert_topology(topology, model_name, doc_string, target_opset,
- channel_first_inputs=None,
- options=None, remove_identity=True,
- verbose=0):
+def convert_topology(
+ topology,
+ model_name,
+ doc_string,
+ target_opset,
+ channel_first_inputs=None,
+ options=None,
+ remove_identity=True,
+ verbose=0,
+):
"""
This function is used to convert our Topology object defined in
_parser.py into a ONNX model (type: ModelProto).
@@ -1388,8 +1499,7 @@ def convert_topology(topology, model_name, doc_string, target_opset,
if target_opset is None:
target_opset = get_latest_tested_opset_version()
if isinstance(target_opset, dict):
- onnx_target_opset = target_opset.get(
- '', get_latest_tested_opset_version())
+ onnx_target_opset = target_opset.get("", get_latest_tested_opset_version())
else:
onnx_target_opset = target_opset
if onnx_target_opset > get_opset_number_from_onnx():
@@ -1399,21 +1509,23 @@ def convert_topology(topology, model_name, doc_string, target_opset,
"version of the installed onnx package. See "
"https://github.com/onnx/onnx/blob/master/docs/"
"Versioning.md#released-versions"
- ".".format(onnx_target_opset, found))
+ ".".format(onnx_target_opset, found)
+ )
if onnx_target_opset > get_latest_tested_opset_version():
warnings.warn(
"Parameter target_opset {} > {} is higher than the "
"the latest tested version"
- ".".format(
- onnx_target_opset,
- get_latest_tested_opset_version()))
+ ".".format(onnx_target_opset, get_latest_tested_opset_version())
+ )
container = ModelComponentContainer(
- target_opset, options=options,
+ target_opset,
+ options=options,
registered_models=topology.registered_models,
white_op=topology.raw_model._white_op,
black_op=topology.raw_model._black_op,
- verbose=verbose)
+ verbose=verbose,
+ )
# Traverse the graph from roots to leaves
# This loop could eventually be parallelized.
@@ -1427,8 +1539,10 @@ def convert_topology(topology, model_name, doc_string, target_opset,
if verbose >= 2:
print("---NODES---")
for node in container.nodes:
- print(" %s - %s: %r -> %r" % (
- node.op_type, node.name, node.input, node.output))
+ print(
+ " %s - %s: %r -> %r"
+ % (node.op_type, node.name, node.input, node.output)
+ )
# Create a graph from its main components
if container.target_opset_onnx < 9:
@@ -1445,28 +1559,36 @@ def convert_topology(topology, model_name, doc_string, target_opset,
# one of the original model's input, so it has been added into
# the container's input list. If this is the case, we need to
# skip one iteration to avoid duplicated inputs.
- if tensor.name in [value_info.name for value_info in
- container.inputs]:
+ if tensor.name in [value_info.name for value_info in container.inputs]:
continue
# Initializers are always tensors so we can just call
# make_tensor_value_info(...).
value_info = make_tensor_value_info(
- tensor.name, tensor.data_type, tensor.dims)
+ tensor.name, tensor.data_type, tensor.dims
+ )
extra_inputs.append(value_info)
# Before ONNX opset 9, initializers were needed to be passed in
# with inputs.
- graph = make_graph(container.nodes, model_name,
- container.inputs + extra_inputs,
- container.outputs, container.initializers)
+ graph = make_graph(
+ container.nodes,
+ model_name,
+ container.inputs + extra_inputs,
+ container.outputs,
+ container.initializers,
+ )
else:
# In ONNX opset 9 and above, initializers are included as
# operator inputs and therefore do not need to be passed as
# extra_inputs.
graph = make_graph(
- container.nodes, model_name, container.inputs,
- container.outputs, container.initializers)
+ container.nodes,
+ model_name,
+ container.inputs,
+ container.outputs,
+ container.initializers,
+ )
# Add extra information related to the graph
graph.value_info.extend(container.value_info)
@@ -1475,16 +1597,16 @@ def convert_topology(topology, model_name, doc_string, target_opset,
onnx_model = make_model(graph)
# Update domain version
- opv = min(onnx_target_opset,
- _get_main_opset_version(onnx_model) or onnx_target_opset)
+ opv = min(
+ onnx_target_opset, _get_main_opset_version(onnx_model) or onnx_target_opset
+ )
if not _update_domain_version(container, onnx_model, verbose=verbose):
# Main opset was not added. Doing it here.
op_set = onnx_model.opset_import.add()
- op_set.domain = ''
+ op_set.domain = ""
op_set.version = opv
if verbose > 0:
- print('[convert_topology] +opset: name=%r, version=%s' % (
- '', opv))
+ print("[convert_topology] +opset: name=%r, version=%s" % ("", opv))
# Add extra information
irv = OPSET_TO_IR_VERSION.get(opv, onnx_proto.IR_VERSION)
@@ -1515,7 +1637,8 @@ def _update_domain_version(container, onnx_model, verbose=0):
purified_operator_set[op_domain] = op_version
else:
purified_operator_set[op_domain] = max(
- purified_operator_set[op_domain], op_version)
+ purified_operator_set[op_domain], op_version
+ )
# Fill operator sets
i = 0
@@ -1530,28 +1653,30 @@ def _update_domain_version(container, onnx_model, verbose=0):
# Just create one ONNX element in opset_import
op_set = onnx_model.opset_import.add()
if verbose > 0:
- print('[_update_domain_version] +opset %d: name=%r, version=%s' % (
- i, op_domain, op_version))
+ print(
+ "[_update_domain_version] +opset %d: name=%r, version=%s"
+ % (i, op_domain, op_version)
+ )
op_set.domain = op_domain
- if op_set != '':
+ if op_set != "":
max_supported = get_default_opset_for_domain(op_domain)
if max_supported is not None and max_supported < op_version:
raise RuntimeError(
"The model is using version %d of domain %r not supported "
"yet by this library. You need to specify "
- "target_opset={%r: %r}." % (
- op_version, op_domain, op_domain, max_supported))
+ "target_opset={%r: %r}."
+ % (op_version, op_domain, op_domain, max_supported)
+ )
op_set.version = op_version
i += 1
if container.target_opset_any_domain(op_domain) < op_version:
raise RuntimeError(
- 'The specified opset %d is too low to convert '
- 'this model, which requires at least opset '
- '%d.' % (
- container.target_opset_any_domain(op_domain),
- op_version))
- return '' in purified_operator_set
+ "The specified opset %d is too low to convert "
+ "this model, which requires at least opset "
+ "%d." % (container.target_opset_any_domain(op_domain), op_version)
+ )
+ return "" in purified_operator_set
def _get_main_opset_version(model):
@@ -1560,7 +1685,7 @@ def _get_main_opset_version(model):
"""
mld = None
for op in model.opset_import:
- if op.domain == '':
+ if op.domain == "":
return op.version
if op.domain == "ai.onnx.ml":
mld = op.version
diff --git a/skl2onnx/common/data_types.py b/skl2onnx/common/data_types.py
index 6a0d64cff..a09971a6f 100644
--- a/skl2onnx/common/data_types.py
+++ b/skl2onnx/common/data_types.py
@@ -2,18 +2,29 @@
import numpy as np
from onnxconverter_common.data_types import ( # noqa
- DataType, Int64Type, FloatType, # noqa
- StringType, TensorType, # noqa
- Int64TensorType, Int32TensorType, BooleanTensorType, # noqa
- FloatTensorType, StringTensorType, DoubleTensorType, # noqa
- DictionaryType, SequenceType) # noqa
+ DataType,
+ Int64Type,
+ FloatType, # noqa
+ StringType,
+ TensorType, # noqa
+ Int64TensorType,
+ Int32TensorType,
+ BooleanTensorType, # noqa
+ FloatTensorType,
+ StringTensorType,
+ DoubleTensorType, # noqa
+ DictionaryType,
+ SequenceType,
+) # noqa
+
try:
from onnxconverter_common.data_types import ( # noqa
- Complex64TensorType, Complex128TensorType)
+ Complex64TensorType,
+ Complex128TensorType,
+ )
except ImportError:
Complex64TensorType = None
Complex128TensorType = None
-from onnxconverter_common.data_types import find_type_conversion, onnx_built_with_ml # noqa
from ..proto import TensorProto, onnx_proto
@@ -22,7 +33,7 @@
except ImportError:
class DoubleType(DataType):
- def __init__(self, doc_string=''):
+ def __init__(self, doc_string=""):
super(DoubleType, self).__init__([1, 1], doc_string)
def to_onnx_type(self):
@@ -41,7 +52,7 @@ def __repr__(self):
except ImportError:
class Float16TensorType(TensorType):
- def __init__(self, shape=None, doc_string=''):
+ def __init__(self, shape=None, doc_string=""):
super(Float16TensorType, self).__init__(shape, doc_string)
def _get_element_onnx_type(self):
@@ -53,7 +64,7 @@ def _get_element_onnx_type(self):
except ImportError:
class Int8TensorType(TensorType):
- def __init__(self, shape=None, doc_string=''):
+ def __init__(self, shape=None, doc_string=""):
super(Int8TensorType, self).__init__(shape, doc_string)
def _get_element_onnx_type(self):
@@ -65,7 +76,7 @@ def _get_element_onnx_type(self):
except ImportError:
class Int16TensorType(TensorType):
- def __init__(self, shape=None, doc_string=''):
+ def __init__(self, shape=None, doc_string=""):
super(Int16TensorType, self).__init__(shape, doc_string)
def _get_element_onnx_type(self):
@@ -77,7 +88,7 @@ def _get_element_onnx_type(self):
except ImportError:
class UInt16TensorType(TensorType):
- def __init__(self, shape=None, doc_string=''):
+ def __init__(self, shape=None, doc_string=""):
super(UInt16TensorType, self).__init__(shape, doc_string)
def _get_element_onnx_type(self):
@@ -89,7 +100,7 @@ def _get_element_onnx_type(self):
except ImportError:
class UInt32TensorType(TensorType):
- def __init__(self, shape=None, doc_string=''):
+ def __init__(self, shape=None, doc_string=""):
super(UInt32TensorType, self).__init__(shape, doc_string)
def _get_element_onnx_type(self):
@@ -101,7 +112,7 @@ def _get_element_onnx_type(self):
except ImportError:
class UInt64TensorType(TensorType):
- def __init__(self, shape=None, doc_string=''):
+ def __init__(self, shape=None, doc_string=""):
super(UInt64TensorType, self).__init__(shape, doc_string)
def _get_element_onnx_type(self):
@@ -113,7 +124,7 @@ def _get_element_onnx_type(self):
except ImportError:
class UInt8TensorType(TensorType):
- def __init__(self, shape=None, doc_string=''):
+ def __init__(self, shape=None, doc_string=""):
super(UInt8TensorType, self).__init__(shape, doc_string)
def _get_element_onnx_type(self):
@@ -125,7 +136,7 @@ def _get_element_onnx_type(self):
except ImportError:
class UInt8Type(DataType):
- def __init__(self, doc_string=''):
+ def __init__(self, doc_string=""):
super(UInt8Type, self).__init__([1, 1], doc_string)
def to_onnx_type(self):
@@ -144,7 +155,7 @@ def __repr__(self):
except ImportError:
class Int8Type(DataType):
- def __init__(self, doc_string=''):
+ def __init__(self, doc_string=""):
super(Int8Type, self).__init__([1, 1], doc_string)
def to_onnx_type(self):
@@ -162,8 +173,7 @@ def copy_type(vtype, empty=True):
if isinstance(vtype, SequenceType):
return vtype.__class__(copy_type(vtype.element_type))
if isinstance(vtype, DictionaryType):
- return vtype.__class__(copy_type(vtype.key_type),
- copy_type(vtype.value_type))
+ return vtype.__class__(copy_type(vtype.key_type), copy_type(vtype.value_type))
return vtype.__class__()
@@ -171,9 +181,7 @@ def _guess_type_proto(data_type, dims):
# This could be moved to onnxconverter_common.
for d in dims:
if d == 0:
- raise RuntimeError(
- "Dimension should not be null: {}.".format(
- list(dims)))
+ raise RuntimeError("Dimension should not be null: {}.".format(list(dims)))
if data_type == onnx_proto.TensorProto.FLOAT:
return FloatTensorType(dims)
if data_type == onnx_proto.TensorProto.DOUBLE:
@@ -198,7 +206,8 @@ def _guess_type_proto(data_type, dims):
raise NotImplementedError(
"Unsupported data_type '{}'. You may raise an issue "
"at https://github.com/onnx/sklearn-onnx/issues."
- "".format(data_type))
+ "".format(data_type)
+ )
def _guess_type_proto_str(data_type, dims):
@@ -227,7 +236,8 @@ def _guess_type_proto_str(data_type, dims):
raise NotImplementedError(
"Unsupported data_type '{}'. You may raise an issue "
"at https://github.com/onnx/sklearn-onnx/issues."
- "".format(data_type))
+ "".format(data_type)
+ )
def _guess_type_proto_str_inv(data_type):
@@ -247,7 +257,8 @@ def _guess_type_proto_str_inv(data_type):
raise NotImplementedError(
"Unsupported data_type '{}'. You may raise an issue "
"at https://github.com/onnx/sklearn-onnx/issues."
- "".format(data_type))
+ "".format(data_type)
+ )
def _guess_numpy_type(data_type, dims):
@@ -256,13 +267,15 @@ def _guess_numpy_type(data_type, dims):
return FloatTensorType(dims)
if data_type == np.float64:
return DoubleTensorType(dims)
- if data_type in (np.str_, str, object) or str(data_type) in ('")]
+ debug_info = [str(type(onnx_model)).split(".")[-1].strip("'>")]
else:
- debug_info = debug_info + \
- [str(type(onnx_model)).split('.')[-1].strip("'>")]
+ debug_info = debug_info + [str(type(onnx_model)).split(".")[-1].strip("'>")]
- if hasattr(onnx_model, 'graph'):
+ if hasattr(onnx_model, "graph"):
return _apply_optimisation_on_graph(
- onnx_remove_node_identity, onnx_model,
- recursive=recursive, debug_info=debug_info)
+ onnx_remove_node_identity,
+ onnx_model,
+ recursive=recursive,
+ debug_info=debug_info,
+ )
graph = onnx_model
@@ -50,7 +55,7 @@ def retrieve_idnodes(graph, existing_nodes):
for i, exnode in enumerate(existing_nodes):
if exnode is None:
continue
- if exnode.op_type == 'Identity':
+ if exnode.op_type == "Identity":
input = exnode.input[0]
output = exnode.output[0]
idnodes.append((i, exnode, input, output))
@@ -106,38 +111,40 @@ def retrieve_local_variables_nodes(nodes):
if out in nodes[j].input:
nodes[j] = _rename_node_input(nodes[j], out, inp)
logger.debug(
- '[VarId-a] rename node input %r into %r' % (
- out, inp))
+ "[VarId-a] rename node input %r into %r" % (out, inp)
+ )
rem += 1
- if nodes[j].op_type == 'Identity':
+ if nodes[j].op_type == "Identity":
restart = True
- logger.debug('[NodeId-a] remove %r' % nodes[i])
+ logger.debug("[NodeId-a] remove %r" % nodes[i])
nodes[i] = None
rem += 1
continue
- if (not restart and inp not in inputs and inp not in outputs and
- out not in outputs):
+ if (
+ not restart
+ and inp not in inputs
+ and inp not in outputs
+ and out not in outputs
+ ):
# We cannot change an input name or an output name.
for j in range(len(nodes)):
if nodes[j] is None:
continue
if inp in nodes[j].output:
nodes[j] = _rename_node_output(nodes[j], inp, out)
- logger.debug(
- '[Var] rename node output %r into %r' % (
- out, inp))
+ logger.debug("[Var] rename node output %r into %r" % (out, inp))
rem += 1
- if nodes[j].op_type == 'Identity':
+ if nodes[j].op_type == "Identity":
restart = True
if inp in nodes[j].input:
nodes[j] = _rename_node_input(nodes[j], inp, out)
logger.debug(
- '[VarId-b] rename node input %r into %r' % (
- out, inp))
+ "[VarId-b] rename node input %r into %r" % (out, inp)
+ )
rem += 1
- if nodes[j].op_type == 'Identity':
+ if nodes[j].op_type == "Identity":
restart = True
- logger.debug('[NodeId-b] remove %r' % nodes[i])
+ logger.debug("[NodeId-b] remove %r" % nodes[i])
nodes[i] = None
rem += 1
@@ -149,13 +156,20 @@ def retrieve_local_variables_nodes(nodes):
continue
nodes[i] = _apply_remove_node_fct_node(
onnx_remove_node_identity,
- node, recursive=True, debug_info=debug_info + [node.name])
+ node,
+ recursive=True,
+ debug_info=debug_info + [node.name],
+ )
# Finally create the new graph.
nodes = list(filter(lambda n: n is not None, nodes))
- graph = make_graph(nodes, onnx_model.name,
- onnx_model.input, onnx_model.output,
- onnx_model.initializer)
+ graph = make_graph(
+ nodes,
+ onnx_model.name,
+ onnx_model.input,
+ onnx_model.output,
+ onnx_model.initializer,
+ )
graph.value_info.extend(onnx_model.value_info)
return graph
diff --git a/skl2onnx/common/shape_calculator.py b/skl2onnx/common/shape_calculator.py
index f62167cd2..50d3b600e 100644
--- a/skl2onnx/common/shape_calculator.py
+++ b/skl2onnx/common/shape_calculator.py
@@ -32,20 +32,27 @@ def calculate_linear_classifier_output_shapes(operator):
def _calculate_linear_classifier_output_shapes(
- operator, decision_path=False, decision_leaf=False,
- enable_type_checking=True):
+ operator, decision_path=False, decision_leaf=False, enable_type_checking=True
+):
n_out = 0
if decision_path:
n_out += 1
if decision_leaf:
n_out += 1
out_range = [2, 2 + n_out]
- check_input_and_output_numbers(operator, input_count_range=1,
- output_count_range=out_range)
+ check_input_and_output_numbers(
+ operator, input_count_range=1, output_count_range=out_range
+ )
if enable_type_checking:
- check_input_and_output_types(operator, good_input_types=[
- BooleanTensorType, DoubleTensorType,
- FloatTensorType, Int64TensorType])
+ check_input_and_output_types(
+ operator,
+ good_input_types=[
+ BooleanTensorType,
+ DoubleTensorType,
+ FloatTensorType,
+ Int64TensorType,
+ ],
+ )
N = operator.inputs[0].get_first_dimension()
op = operator.raw_operator
@@ -55,40 +62,57 @@ def _calculate_linear_classifier_output_shapes(
if all(isinstance(i, np.ndarray) for i in class_labels):
class_labels = np.concatenate(class_labels)
if all(isinstance(i, str) for i in class_labels):
- shape = ([N, len(op.classes_)]
- if (getattr(op, 'multilabel_', False) or (
- isinstance(op.classes_, list) and
- isinstance(op.classes_[0], np.ndarray))) else [N])
+ shape = (
+ [N, len(op.classes_)]
+ if (
+ getattr(op, "multilabel_", False)
+ or (
+ isinstance(op.classes_, list)
+ and isinstance(op.classes_[0], np.ndarray)
+ )
+ )
+ else [N]
+ )
operator.outputs[0].set_type(StringTensorType(shape=shape))
- if number_of_classes > 2 or operator.type != 'SklearnLinearSVC':
- shape = ([len(op.classes_), N, max([len(x) for x in op.classes_])]
- if isinstance(op.classes_, list)
- and isinstance(op.classes_[0], np.ndarray)
- else [N, number_of_classes])
+ if number_of_classes > 2 or operator.type != "SklearnLinearSVC":
+ shape = (
+ [len(op.classes_), N, max([len(x) for x in op.classes_])]
+ if isinstance(op.classes_, list)
+ and isinstance(op.classes_[0], np.ndarray)
+ else [N, number_of_classes]
+ )
operator.outputs[1].type.shape = shape
else:
# For binary LinearSVC, we produce probability of
# the positive class
operator.outputs[1].type.shape = [N, 1]
- elif all(isinstance(i, (numbers.Real, bool, np.bool_))
- for i in class_labels):
- shape = ([N, len(op.classes_)]
- if (getattr(op, 'multilabel_', False) or (
- isinstance(op.classes_, list) and
- isinstance(op.classes_[0], np.ndarray))) else [N])
+ elif all(isinstance(i, (numbers.Real, bool, np.bool_)) for i in class_labels):
+ shape = (
+ [N, len(op.classes_)]
+ if (
+ getattr(op, "multilabel_", False)
+ or (
+ isinstance(op.classes_, list)
+ and isinstance(op.classes_[0], np.ndarray)
+ )
+ )
+ else [N]
+ )
operator.outputs[0].set_type(Int64TensorType(shape=shape))
- if number_of_classes > 2 or operator.type != 'SklearnLinearSVC':
- shape = ([len(op.classes_), N, max([len(x) for x in op.classes_])]
- if isinstance(op.classes_, list)
- and isinstance(op.classes_[0], np.ndarray)
- else [N, number_of_classes])
+ if number_of_classes > 2 or operator.type != "SklearnLinearSVC":
+ shape = (
+ [len(op.classes_), N, max([len(x) for x in op.classes_])]
+ if isinstance(op.classes_, list)
+ and isinstance(op.classes_[0], np.ndarray)
+ else [N, number_of_classes]
+ )
operator.outputs[1].type.shape = shape
else:
# For binary LinearSVC, we produce probability of
# the positive class
operator.outputs[1].type.shape = [N, 1]
else:
- raise ValueError('Label types must be all integers or all strings.')
+ raise ValueError("Label types must be all integers or all strings.")
# decision_path, decision_leaf
for n in range(2, len(operator.outputs)):
@@ -107,14 +131,18 @@ def calculate_linear_regressor_output_shapes(operator):
_calculate_linear_regressor_output_shapes(operator)
-def _calculate_linear_regressor_output_shapes(
- operator, enable_type_checking=True):
- check_input_and_output_numbers(operator, input_count_range=1,
- output_count_range=1)
+def _calculate_linear_regressor_output_shapes(operator, enable_type_checking=True):
+ check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1)
if enable_type_checking:
- check_input_and_output_types(operator, good_input_types=[
- BooleanTensorType, DoubleTensorType,
- FloatTensorType, Int64TensorType])
+ check_input_and_output_types(
+ operator,
+ good_input_types=[
+ BooleanTensorType,
+ DoubleTensorType,
+ FloatTensorType,
+ Int64TensorType,
+ ],
+ )
inp0 = operator.inputs[0].type
if isinstance(inp0, (FloatTensorType, DoubleTensorType)):
@@ -123,10 +151,13 @@ def _calculate_linear_regressor_output_shapes(
cls_type = FloatTensorType
N = operator.inputs[0].get_first_dimension()
- if (hasattr(operator.raw_operator, 'coef_') and
- len(operator.raw_operator.coef_.shape) > 1):
- operator.outputs[0].set_type(cls_type([
- N, operator.raw_operator.coef_.shape[0]]))
+ if (
+ hasattr(operator.raw_operator, "coef_")
+ and len(operator.raw_operator.coef_.shape) > 1
+ ):
+ operator.outputs[0].set_type(
+ cls_type([N, operator.raw_operator.coef_.shape[0]])
+ )
else:
operator.outputs[0].set_type(cls_type([N, 1]))
diff --git a/skl2onnx/common/tree_ensemble.py b/skl2onnx/common/tree_ensemble.py
index 2f44fc5d0..5e8b3893a 100644
--- a/skl2onnx/common/tree_ensemble.py
+++ b/skl2onnx/common/tree_ensemble.py
@@ -8,40 +8,40 @@
def get_default_tree_classifier_attribute_pairs():
attrs = {}
- attrs['post_transform'] = 'NONE'
- attrs['nodes_treeids'] = []
- attrs['nodes_nodeids'] = []
- attrs['nodes_featureids'] = []
- attrs['nodes_modes'] = []
- attrs['nodes_values'] = []
- attrs['nodes_truenodeids'] = []
- attrs['nodes_falsenodeids'] = []
- attrs['nodes_missing_value_tracks_true'] = []
- attrs['nodes_hitrates'] = []
- attrs['class_treeids'] = []
- attrs['class_nodeids'] = []
- attrs['class_ids'] = []
- attrs['class_weights'] = []
+ attrs["post_transform"] = "NONE"
+ attrs["nodes_treeids"] = []
+ attrs["nodes_nodeids"] = []
+ attrs["nodes_featureids"] = []
+ attrs["nodes_modes"] = []
+ attrs["nodes_values"] = []
+ attrs["nodes_truenodeids"] = []
+ attrs["nodes_falsenodeids"] = []
+ attrs["nodes_missing_value_tracks_true"] = []
+ attrs["nodes_hitrates"] = []
+ attrs["class_treeids"] = []
+ attrs["class_nodeids"] = []
+ attrs["class_ids"] = []
+ attrs["class_weights"] = []
return attrs
def get_default_tree_regressor_attribute_pairs():
attrs = {}
- attrs['post_transform'] = 'NONE'
- attrs['n_targets'] = 0
- attrs['nodes_treeids'] = []
- attrs['nodes_nodeids'] = []
- attrs['nodes_featureids'] = []
- attrs['nodes_modes'] = []
- attrs['nodes_values'] = []
- attrs['nodes_truenodeids'] = []
- attrs['nodes_falsenodeids'] = []
- attrs['nodes_missing_value_tracks_true'] = []
- attrs['nodes_hitrates'] = []
- attrs['target_treeids'] = []
- attrs['target_nodeids'] = []
- attrs['target_ids'] = []
- attrs['target_weights'] = []
+ attrs["post_transform"] = "NONE"
+ attrs["n_targets"] = 0
+ attrs["nodes_treeids"] = []
+ attrs["nodes_nodeids"] = []
+ attrs["nodes_featureids"] = []
+ attrs["nodes_modes"] = []
+ attrs["nodes_values"] = []
+ attrs["nodes_truenodeids"] = []
+ attrs["nodes_falsenodeids"] = []
+ attrs["nodes_missing_value_tracks_true"] = []
+ attrs["nodes_hitrates"] = []
+ attrs["target_treeids"] = []
+ attrs["target_nodeids"] = []
+ attrs["target_ids"] = []
+ attrs["target_weights"] = []
return attrs
@@ -103,39 +103,54 @@ def sklearn_threshold(dy, dtype, mode):
return bfy2
return np.float64(fy)
raise TypeError("Unexpected dtype {}.".format(dtype))
- raise RuntimeError("Threshold is not changed for other mode and "
- "'BRANCH_LEQ' (actually '{}').".format(mode))
-
-
-def add_node(attr_pairs, is_classifier, tree_id, tree_weight, node_id,
- feature_id, mode, value, true_child_id, false_child_id,
- weights, weight_id_bias, leaf_weights_are_counts,
- adjust_threshold_for_sklearn, dtype,
- nodes_missing_value_tracks_true=False):
- attr_pairs['nodes_treeids'].append(tree_id)
- attr_pairs['nodes_nodeids'].append(node_id)
- attr_pairs['nodes_featureids'].append(feature_id)
- attr_pairs['nodes_modes'].append(mode)
- if adjust_threshold_for_sklearn and mode != 'LEAF':
- attr_pairs['nodes_values'].append(
- sklearn_threshold(value, dtype, mode))
+ raise RuntimeError(
+ "Threshold is not changed for other mode and "
+ "'BRANCH_LEQ' (actually '{}').".format(mode)
+ )
+
+
+def add_node(
+ attr_pairs,
+ is_classifier,
+ tree_id,
+ tree_weight,
+ node_id,
+ feature_id,
+ mode,
+ value,
+ true_child_id,
+ false_child_id,
+ weights,
+ weight_id_bias,
+ leaf_weights_are_counts,
+ adjust_threshold_for_sklearn,
+ dtype,
+ nodes_missing_value_tracks_true=False,
+):
+ attr_pairs["nodes_treeids"].append(tree_id)
+ attr_pairs["nodes_nodeids"].append(node_id)
+ attr_pairs["nodes_featureids"].append(feature_id)
+ attr_pairs["nodes_modes"].append(mode)
+ if adjust_threshold_for_sklearn and mode != "LEAF":
+ attr_pairs["nodes_values"].append(sklearn_threshold(value, dtype, mode))
else:
- attr_pairs['nodes_values'].append(value)
- attr_pairs['nodes_truenodeids'].append(true_child_id)
- attr_pairs['nodes_falsenodeids'].append(false_child_id)
- attr_pairs['nodes_missing_value_tracks_true'].append(
- nodes_missing_value_tracks_true)
- attr_pairs['nodes_hitrates'].append(1.)
+ attr_pairs["nodes_values"].append(value)
+ attr_pairs["nodes_truenodeids"].append(true_child_id)
+ attr_pairs["nodes_falsenodeids"].append(false_child_id)
+ attr_pairs["nodes_missing_value_tracks_true"].append(
+ nodes_missing_value_tracks_true
+ )
+ attr_pairs["nodes_hitrates"].append(1.0)
# Add leaf information for making prediction
- if mode == 'LEAF':
+ if mode == "LEAF":
flattened_weights = weights.flatten()
factor = tree_weight
# If the values stored at leaves are counts of possible classes, we
# need convert them to probabilities by doing a normalization.
if leaf_weights_are_counts:
s = sum(flattened_weights)
- factor /= float(s) if s != 0. else 1.
+ factor /= float(s) if s != 0.0 else 1.0
flattened_weights = [w * factor for w in flattened_weights]
if len(flattened_weights) == 2 and is_classifier:
flattened_weights = [flattened_weights[1]]
@@ -144,77 +159,113 @@ def add_node(attr_pairs, is_classifier, tree_id, tree_weight, node_id,
# classifiers and regressors
if is_classifier:
for i, w in enumerate(flattened_weights):
- attr_pairs['class_treeids'].append(tree_id)
- attr_pairs['class_nodeids'].append(node_id)
- attr_pairs['class_ids'].append(i + weight_id_bias)
- attr_pairs['class_weights'].append(w)
+ attr_pairs["class_treeids"].append(tree_id)
+ attr_pairs["class_nodeids"].append(node_id)
+ attr_pairs["class_ids"].append(i + weight_id_bias)
+ attr_pairs["class_weights"].append(w)
else:
for i, w in enumerate(flattened_weights):
- attr_pairs['target_treeids'].append(tree_id)
- attr_pairs['target_nodeids'].append(node_id)
- attr_pairs['target_ids'].append(i + weight_id_bias)
- attr_pairs['target_weights'].append(w)
+ attr_pairs["target_treeids"].append(tree_id)
+ attr_pairs["target_nodeids"].append(node_id)
+ attr_pairs["target_ids"].append(i + weight_id_bias)
+ attr_pairs["target_weights"].append(w)
-def add_tree_to_attribute_pairs(attr_pairs, is_classifier, tree, tree_id,
- tree_weight, weight_id_bias,
- leaf_weights_are_counts,
- adjust_threshold_for_sklearn=False,
- dtype=None):
+def add_tree_to_attribute_pairs(
+ attr_pairs,
+ is_classifier,
+ tree,
+ tree_id,
+ tree_weight,
+ weight_id_bias,
+ leaf_weights_are_counts,
+ adjust_threshold_for_sklearn=False,
+ dtype=None,
+):
for i in range(tree.node_count):
node_id = i
weight = tree.value[i]
if tree.children_left[i] > i or tree.children_right[i] > i:
- mode = 'BRANCH_LEQ'
+ mode = "BRANCH_LEQ"
feat_id = tree.feature[i]
threshold = tree.threshold[i]
left_child_id = int(tree.children_left[i])
right_child_id = int(tree.children_right[i])
else:
- mode = 'LEAF'
+ mode = "LEAF"
feat_id = 0
- threshold = 0.
+ threshold = 0.0
left_child_id = 0
right_child_id = 0
- add_node(attr_pairs, is_classifier, tree_id, tree_weight, node_id,
- feat_id, mode, threshold, left_child_id, right_child_id,
- weight, weight_id_bias, leaf_weights_are_counts,
- adjust_threshold_for_sklearn=adjust_threshold_for_sklearn,
- dtype=dtype)
+ add_node(
+ attr_pairs,
+ is_classifier,
+ tree_id,
+ tree_weight,
+ node_id,
+ feat_id,
+ mode,
+ threshold,
+ left_child_id,
+ right_child_id,
+ weight,
+ weight_id_bias,
+ leaf_weights_are_counts,
+ adjust_threshold_for_sklearn=adjust_threshold_for_sklearn,
+ dtype=dtype,
+ )
def add_tree_to_attribute_pairs_hist_gradient_boosting(
- attr_pairs, is_classifier, tree, tree_id,
- tree_weight, weight_id_bias,
- leaf_weights_are_counts,
- adjust_threshold_for_sklearn=False,
- dtype=None):
+ attr_pairs,
+ is_classifier,
+ tree,
+ tree_id,
+ tree_weight,
+ weight_id_bias,
+ leaf_weights_are_counts,
+ adjust_threshold_for_sklearn=False,
+ dtype=None,
+):
for i, node in enumerate(tree.nodes):
node_id = i
- weight = node['value']
+ weight = node["value"]
- if node['is_leaf']:
- mode = 'LEAF'
+ if node["is_leaf"]:
+ mode = "LEAF"
feat_id = 0
- threshold = 0.
+ threshold = 0.0
left_child_id = 0
right_child_id = 0
missing = False
else:
- mode = 'BRANCH_LEQ'
- feat_id = node['feature_idx']
+ mode = "BRANCH_LEQ"
+ feat_id = node["feature_idx"]
try:
- threshold = node['threshold']
+ threshold = node["threshold"]
except ValueError:
- threshold = node['num_threshold']
- left_child_id = node['left']
- right_child_id = node['right']
- missing = node['missing_go_to_left']
-
- add_node(attr_pairs, is_classifier, tree_id, tree_weight, node_id,
- feat_id, mode, threshold, left_child_id, right_child_id,
- weight, weight_id_bias, leaf_weights_are_counts,
- adjust_threshold_for_sklearn=adjust_threshold_for_sklearn,
- dtype=dtype, nodes_missing_value_tracks_true=missing)
+ threshold = node["num_threshold"]
+ left_child_id = node["left"]
+ right_child_id = node["right"]
+ missing = node["missing_go_to_left"]
+
+ add_node(
+ attr_pairs,
+ is_classifier,
+ tree_id,
+ tree_weight,
+ node_id,
+ feat_id,
+ mode,
+ threshold,
+ left_child_id,
+ right_child_id,
+ weight,
+ weight_id_bias,
+ leaf_weights_are_counts,
+ adjust_threshold_for_sklearn=adjust_threshold_for_sklearn,
+ dtype=dtype,
+ nodes_missing_value_tracks_true=missing,
+ )
diff --git a/skl2onnx/common/utils.py b/skl2onnx/common/utils.py
index a391fc356..b2a98c11c 100644
--- a/skl2onnx/common/utils.py
+++ b/skl2onnx/common/utils.py
@@ -14,14 +14,14 @@
from onnxconverter_common.utils import check_input_and_output_types # noqa
from .data_types import TensorType
-_unique_index = {'subgraph': 0}
+_unique_index = {"subgraph": 0}
def get_unique_subgraph():
"Returns a unique identifier integer for subgraph."
global _unique_index
- _unique_index['subgraph'] += 1
- return _unique_index['subgraph']
+ _unique_index["subgraph"] += 1
+ return _unique_index["subgraph"]
def get_producer():
@@ -29,6 +29,7 @@ def get_producer():
Internal helper function to return the producer
"""
from .. import __producer__
+
return __producer__
@@ -37,6 +38,7 @@ def get_producer_version():
Internal helper function to return the producer version
"""
from .. import __producer_version__
+
return __producer_version__
@@ -45,6 +47,7 @@ def get_domain():
Internal helper function to return the model domain
"""
from .. import __domain__
+
return __domain__
@@ -53,6 +56,7 @@ def get_model_version():
Internal helper function to return the model version
"""
from .. import __model_version__
+
return __model_version__
@@ -82,12 +86,13 @@ def get_column_index(i, inputs):
return 0, 0
vi = 0
pos = 0
- end = (inputs[0].type.shape[1]
- if isinstance(inputs[0].type, TensorType) else 1)
+ end = inputs[0].type.shape[1] if isinstance(inputs[0].type, TensorType) else 1
if end is None:
- raise RuntimeError("Cannot extract a specific column {0} when "
- "one input ('{1}') has unknown "
- "dimension.".format(i, inputs[0]))
+ raise RuntimeError(
+ "Cannot extract a specific column {0} when "
+ "one input ('{1}') has unknown "
+ "dimension.".format(i, inputs[0])
+ )
while True:
if pos <= i < end:
return (vi, i - pos)
@@ -96,13 +101,20 @@ def get_column_index(i, inputs):
if vi >= len(inputs):
raise RuntimeError(
"Input {} (i={}, end={}) is not available in\n{}".format(
- vi, i, end, pprint.pformat(inputs)))
- rel_end = (inputs[vi].type.shape[1]
- if isinstance(inputs[vi].type, TensorType) else 1)
+ vi, i, end, pprint.pformat(inputs)
+ )
+ )
+ rel_end = (
+ inputs[vi].type.shape[1]
+ if isinstance(inputs[vi].type, TensorType)
+ else 1
+ )
if rel_end is None:
- raise RuntimeError("Cannot extract a specific column {0} when "
- "one input ('{1}') has unknown "
- "dimension.".format(i, inputs[vi]))
+ raise RuntimeError(
+ "Cannot extract a specific column {0} when "
+ "one input ('{1}') has unknown "
+ "dimension.".format(i, inputs[vi])
+ )
end += rel_end
else:
for ind, inp in enumerate(inputs):
@@ -114,8 +126,8 @@ def get_column_index(i, inputs):
"initial_types fits the column names specified in the "
"pipeline to convert. This may happen because a "
"ColumnTransformer follows a transformer without "
- "any mapped converter in a pipeline." % (
- i, [n.raw_name for n in inputs]))
+ "any mapped converter in a pipeline." % (i, [n.raw_name for n in inputs])
+ )
def get_column_indices(indices, inputs, multiple):
@@ -152,7 +164,8 @@ def get_column_indices(indices, inputs, multiple):
raise NotImplementedError(
"sklearn-onnx is not able to merge multiple columns from "
"multiple variables ({0}). You should think about merging "
- "initial types.".format(cols))
+ "initial types.".format(cols)
+ )
return onnx_var, onnx_is
@@ -162,7 +175,7 @@ def hash_array(value, length=15):
onx = from_array(value)
except AttributeError as e:
# sparse matrix for example
- if hasattr(value, 'tocoo'):
+ if hasattr(value, "tocoo"):
coo = value.tocoo()
arrs = [coo.data, coo.row, coo.col, np.array(coo.shape)]
m = hashlib.sha256()
@@ -171,18 +184,18 @@ def hash_array(value, length=15):
return m.hexdigest()[:length]
raise ValueError(
- "Unable to compute hash for type %r (value=%r)." % (
- type(value), value)) from e
+ "Unable to compute hash for type %r (value=%r)." % (type(value), value)
+ ) from e
except RuntimeError as ee:
# cannot be serialized
if isinstance(value, (np.ndarray, list)):
- b = str(value).encode('utf-8')
+ b = str(value).encode("utf-8")
m = hashlib.sha256()
m.update(b)
return m.hexdigest()[:length]
raise RuntimeError(
- "Unable to convert value type %r, (value=%r)." % (
- type(value), value)) from ee
+ "Unable to convert value type %r, (value=%r)." % (type(value), value)
+ ) from ee
m = hashlib.sha256()
m.update(onx.SerializeToString())
diff --git a/skl2onnx/common/utils_checking.py b/skl2onnx/common/utils_checking.py
index a4356a433..34086f438 100644
--- a/skl2onnx/common/utils_checking.py
+++ b/skl2onnx/common/utils_checking.py
@@ -11,10 +11,11 @@ def check_signature(fct, reference, skip=None):
(same parameter names).
Raises an exception otherwise.
"""
+
def select_parameters(pars):
new_pars = OrderedDict()
for i, (name, p) in enumerate(pars.items()):
- if i >= 3 and name in ('op_type', 'op_domain', 'op_version'):
+ if i >= 3 and name in ("op_type", "op_domain", "op_version"):
if p.default is not None:
# Parameters op_type and op_domain are skipped.
continue
@@ -28,11 +29,12 @@ def select_parameters(pars):
if len(fct_pars) != len(ref_pars):
raise TypeError(
"Function '{}' must have {} parameters but has {}."
- "".format(fct.__name__, len(ref_pars),
- len(fct_pars)))
+ "".format(fct.__name__, len(ref_pars), len(fct_pars))
+ )
for i, (a, b) in enumerate(zip(fct_pars, ref_pars)):
if a != b and skip is not None and b not in skip and a not in skip:
raise NameError(
"Parameter name mismatch at position {}."
"Function '{}' has '{}' but '{}' is expected."
- "".format(i + 1, fct.__name__, a, b))
+ "".format(i + 1, fct.__name__, a, b)
+ )
diff --git a/skl2onnx/common/utils_classifier.py b/skl2onnx/common/utils_classifier.py
index dceb59753..29d10934b 100644
--- a/skl2onnx/common/utils_classifier.py
+++ b/skl2onnx/common/utils_classifier.py
@@ -12,30 +12,30 @@ def get_label_classes(scope, op, node_names=False):
handles option ``nocl`` and ``zipmap=='columns'``
"""
options = scope.get_options(op, dict(nocl=False))
- if options['nocl']:
+ if options["nocl"]:
if len(op.classes_.shape) > 1 and op.classes_.shape[1] > 1:
raise RuntimeError(
"Options 'nocl=True' is not implemented for multi-label "
- "classification (class: {}).".format(op.__class__.__name__))
+ "classification (class: {}).".format(op.__class__.__name__)
+ )
classes = np.arange(0, len(op.classes_))
elif node_names:
try:
options = scope.get_options(op, dict(zipmap=False))
- zipcol = options['zipmap'] == 'columns'
+ zipcol = options["zipmap"] == "columns"
except NameError:
zipcol = False
if zipcol:
clnames = op.classes_.ravel()
- if (np.issubdtype(clnames.dtype, np.integer) or
- clnames.dtype == np.bool_):
- classes = np.array(['i%d' % c for c in clnames])
+ if np.issubdtype(clnames.dtype, np.integer) or clnames.dtype == np.bool_:
+ classes = np.array(["i%d" % c for c in clnames])
else:
- classes = np.array(['s%s' % c for c in clnames])
+ classes = np.array(["s%s" % c for c in clnames])
else:
classes = op.classes_
- elif hasattr(op, 'classes_'):
+ elif hasattr(op, "classes_"):
classes = op.classes_
- elif hasattr(op, 'intercept_'):
+ elif hasattr(op, "intercept_"):
classes = len(op.intercept_)
elif hasattr(op, "y_"):
# _ConstantPredictor
@@ -43,47 +43,70 @@ def get_label_classes(scope, op, node_names=False):
else:
raise RuntimeError(
"No known ways to retrieve the number of classes for class %r."
- "" % type(op))
+ "" % type(op)
+ )
return classes
-def _finalize_converter_classes(scope, argmax_output_name, output_full_name,
- container, classes, proto_dtype):
+def _finalize_converter_classes(
+ scope, argmax_output_name, output_full_name, container, classes, proto_dtype
+):
"""
See :func:`convert_voting_classifier`.
"""
- if (np.issubdtype(classes.dtype, np.floating) or
- classes.dtype == np.bool_):
+ if np.issubdtype(classes.dtype, np.floating) or classes.dtype == np.bool_:
class_type = onnx_proto.TensorProto.INT32
classes = np.array(list(map(lambda x: int(x), classes)))
elif np.issubdtype(classes.dtype, np.signedinteger):
class_type = onnx_proto.TensorProto.INT32
else:
- classes = np.array([s.encode('utf-8') for s in classes])
+ classes = np.array([s.encode("utf-8") for s in classes])
class_type = onnx_proto.TensorProto.STRING
- classes_name = scope.get_unique_variable_name('classes')
+ classes_name = scope.get_unique_variable_name("classes")
container.add_initializer(classes_name, class_type, classes.shape, classes)
array_feature_extractor_result_name = scope.get_unique_variable_name(
- 'array_feature_extractor_result')
+ "array_feature_extractor_result"
+ )
container.add_node(
- 'ArrayFeatureExtractor', [classes_name, argmax_output_name],
- array_feature_extractor_result_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
+ "ArrayFeatureExtractor",
+ [classes_name, argmax_output_name],
+ array_feature_extractor_result_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
output_shape = (-1,)
if class_type == onnx_proto.TensorProto.INT32:
- cast2_result_name = scope.get_unique_variable_name('cast2_result')
- reshaped_result_name = scope.get_unique_variable_name(
- 'reshaped_result')
- apply_cast(scope, array_feature_extractor_result_name,
- cast2_result_name, container,
- to=proto_dtype)
- apply_reshape(scope, cast2_result_name, reshaped_result_name,
- container, desired_shape=output_shape)
- apply_cast(scope, reshaped_result_name, output_full_name, container,
- to=onnx_proto.TensorProto.INT64)
+ cast2_result_name = scope.get_unique_variable_name("cast2_result")
+ reshaped_result_name = scope.get_unique_variable_name("reshaped_result")
+ apply_cast(
+ scope,
+ array_feature_extractor_result_name,
+ cast2_result_name,
+ container,
+ to=proto_dtype,
+ )
+ apply_reshape(
+ scope,
+ cast2_result_name,
+ reshaped_result_name,
+ container,
+ desired_shape=output_shape,
+ )
+ apply_cast(
+ scope,
+ reshaped_result_name,
+ output_full_name,
+ container,
+ to=onnx_proto.TensorProto.INT64,
+ )
else: # string labels
- apply_reshape(scope, array_feature_extractor_result_name,
- output_full_name, container, desired_shape=output_shape)
+ apply_reshape(
+ scope,
+ array_feature_extractor_result_name,
+ output_full_name,
+ container,
+ desired_shape=output_shape,
+ )
diff --git a/skl2onnx/common/utils_sklearn.py b/skl2onnx/common/utils_sklearn.py
index ab9022d30..4c86573b9 100644
--- a/skl2onnx/common/utils_sklearn.py
+++ b/skl2onnx/common/utils_sklearn.py
@@ -13,56 +13,63 @@ def enumerate_model_names(model, prefix="", short=True):
to the model itself.
"""
if isinstance(model, (list, tuple)):
- if all(map(lambda x: isinstance(x, tuple) and len(x) in (2, 3),
- model)):
+ if all(map(lambda x: isinstance(x, tuple) and len(x) in (2, 3), model)):
for i, named_mod in enumerate(model):
name, mod = named_mod[:2]
- p = (name if short and prefix == ""
- else "{}__{}".format(prefix, name))
+ p = name if short and prefix == "" else "{}__{}".format(prefix, name)
for t in enumerate_model_names(mod, p, short=short):
yield t
else:
for i, mod in enumerate(model):
- p = (i if short and prefix == ""
- else "{}__{}".format(prefix, i))
+ p = i if short and prefix == "" else "{}__{}".format(prefix, i)
for t in enumerate_model_names(mod, p, short=short):
yield t
elif isinstance(model, (dict, OrderedDict)):
for name, mod in model.items():
- p = (name if short and prefix == ""
- else "{}__{}".format(prefix, name))
+ p = name if short and prefix == "" else "{}__{}".format(prefix, name)
for t in enumerate_model_names(mod, p, short=short):
yield t
else:
yield (prefix, model)
- reserved_atts = {'transformers', 'steps', 'transformer_list',
- 'named_estimators_', 'named_transformers_',
- 'transformer_', 'estimator_'}
+ reserved_atts = {
+ "transformers",
+ "steps",
+ "transformer_list",
+ "named_estimators_",
+ "named_transformers_",
+ "transformer_",
+ "estimator_",
+ }
for key in dir(model):
- if (key in ('estimators_', 'estimator') and
- hasattr(model, 'named_estimators_')):
+ if key in ("estimators_", "estimator") and hasattr(
+ model, "named_estimators_"
+ ):
continue
- if (key in ('transformers_', 'transformers') and
- hasattr(model, 'named_transformers_')):
+ if key in ("transformers_", "transformers") and hasattr(
+ model, "named_transformers_"
+ ):
continue
- if (key in reserved_atts or
- (key.endswith("_") and not key.endswith("__") and
- not key.startswith('_'))):
+ if key in reserved_atts or (
+ key.endswith("_") and not key.endswith("__") and not key.startswith("_")
+ ):
try:
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
obj = getattr(model, key)
except AttributeError:
continue
- if (hasattr(obj, 'get_params') and
- isinstance(obj, BaseEstimator)):
- prefix = (key if short and prefix == ""
- else "{}__{}".format(prefix, key))
+ if hasattr(obj, "get_params") and isinstance(obj, BaseEstimator):
+ prefix = (
+ key if short and prefix == "" else "{}__{}".format(prefix, key)
+ )
yield (prefix, obj)
elif isinstance(obj, (list, tuple, dict, OrderedDict)):
if not short or key not in reserved_atts:
- prefix = (key if short and prefix == ""
- else "{}__{}".format(prefix, key))
+ prefix = (
+ key
+ if short and prefix == ""
+ else "{}__{}".format(prefix, key)
+ )
for t in enumerate_model_names(obj, prefix, short=short):
yield t
@@ -71,8 +78,7 @@ def has_pipeline(model):
"""
Tells if a model contains a pipeline.
"""
- return any(map(lambda x: isinstance(x[1], Pipeline),
- enumerate_model_names(model)))
+ return any(map(lambda x: isinstance(x[1], Pipeline), enumerate_model_names(model)))
def _process_options(model, options):
@@ -94,8 +100,8 @@ def _process_options(model, options):
new_options[id(names[k])] = v
continue
try:
- ri = k.rindex('__')
- m2, k2 = k[:ri], k[ri + 2:]
+ ri = k.rindex("__")
+ m2, k2 = k[:ri], k[ri + 2 :]
except ValueError:
key = id(model)
if key not in new_options:
@@ -110,7 +116,9 @@ def _process_options(model, options):
continue
raise RuntimeError(
"Unable to find model name '{}' or '{}' in \n{}".format(
- k, m2, list(sorted(names))))
+ k, m2, list(sorted(names))
+ )
+ )
return _process_pipeline_options(model, new_options)
@@ -129,7 +137,7 @@ def _process_pipeline_options(model, options):
last = v.steps[-1][1]
key = id(last)
for opt, val in opts.items():
- if opt not in {'zipmap', 'nocl', 'output_class_labels'}:
+ if opt not in {"zipmap", "nocl", "output_class_labels"}:
continue
if new_options is None:
new_options = copy.deepcopy(options)
diff --git a/skl2onnx/convert.py b/skl2onnx/convert.py
index 62fae01ad..db8948589 100644
--- a/skl2onnx/convert.py
+++ b/skl2onnx/convert.py
@@ -12,14 +12,25 @@
from . import operator_converters # noqa
-def convert_sklearn(model, name=None, initial_types=None, doc_string='',
- target_opset=None, custom_conversion_functions=None,
- custom_shape_calculators=None,
- custom_parsers=None, options=None,
- intermediate=False,
- white_op=None, black_op=None, final_types=None,
- dtype=None, naming=None, model_optim=True,
- verbose=0):
+def convert_sklearn(
+ model,
+ name=None,
+ initial_types=None,
+ doc_string="",
+ target_opset=None,
+ custom_conversion_functions=None,
+ custom_shape_calculators=None,
+ custom_parsers=None,
+ options=None,
+ intermediate=False,
+ white_op=None,
+ black_op=None,
+ final_types=None,
+ dtype=None,
+ naming=None,
+ model_optim=True,
+ verbose=0,
+):
"""
This function produces an equivalent
ONNX model of the given scikit-learn model.
@@ -155,39 +166,54 @@ def convert_sklearn(model, name=None, initial_types=None, doc_string='',
Parameter *naming* was added.
"""
if initial_types is None:
- if hasattr(model, 'infer_initial_types'):
+ if hasattr(model, "infer_initial_types"):
initial_types = model.infer_initial_types()
else:
- raise ValueError('Initial types are required. See usage of '
- 'convert(...) in skl2onnx.convert for details')
+ raise ValueError(
+ "Initial types are required. See usage of "
+ "convert(...) in skl2onnx.convert for details"
+ )
if name is None:
name = str(uuid4().hex)
if dtype is not None:
warnings.warn(
- "Parameter dtype is no longer supported. "
- "It will be removed in 1.9.0.",
- DeprecationWarning)
+ "Parameter dtype is no longer supported. " "It will be removed in 1.9.0.",
+ DeprecationWarning,
+ )
- target_opset = (target_opset
- if target_opset else get_latest_tested_opset_version())
+ target_opset = target_opset if target_opset else get_latest_tested_opset_version()
# Parse scikit-learn model as our internal data structure
# (i.e., Topology)
if verbose >= 1:
print("[convert_sklearn] parse_sklearn_model")
topology = parse_sklearn_model(
- model, initial_types, target_opset, custom_conversion_functions,
- custom_shape_calculators, custom_parsers, options=options,
- white_op=white_op, black_op=black_op,
- final_types=final_types, naming=naming)
+ model,
+ initial_types,
+ target_opset,
+ custom_conversion_functions,
+ custom_shape_calculators,
+ custom_parsers,
+ options=options,
+ white_op=white_op,
+ black_op=black_op,
+ final_types=final_types,
+ naming=naming,
+ )
# Convert our Topology object into ONNX. The outcome is an ONNX model.
options = _process_options(model, options)
if verbose >= 1:
print("[convert_sklearn] convert_topology")
onnx_model = convert_topology(
- topology, name, doc_string, target_opset, options=options,
- remove_identity=model_optim and not intermediate, verbose=verbose)
+ topology,
+ name,
+ doc_string,
+ target_opset,
+ options=options,
+ remove_identity=model_optim and not intermediate,
+ verbose=verbose,
+ )
if verbose >= 1:
print("[convert_sklearn] end")
if verbose >= 2:
@@ -200,20 +226,29 @@ def convert_sklearn(model, name=None, initial_types=None, doc_string='',
print(" %r" % inp)
print("---VARIABLES---")
for k, v in sorted(scope.variables.items()):
- print(" %r: is.fed=%r is_leaf=%r - %r" % (
- k, v.is_fed, v.is_leaf, v))
+ print(" %r: is.fed=%r is_leaf=%r - %r" % (k, v.is_fed, v.is_leaf, v))
print("---OPERATORS---")
for k, v in sorted(scope.operators.items()):
- print(" %r: is.evaluated=%r - %r" % (
- k, v.is_evaluated, v))
+ print(" %r: is.evaluated=%r - %r" % (k, v.is_evaluated, v))
return (onnx_model, topology) if intermediate else onnx_model
-def to_onnx(model, X=None, name=None, initial_types=None,
- target_opset=None, options=None,
- white_op=None, black_op=None, final_types=None,
- dtype=None, naming=None, model_optim=True, verbose=0):
+def to_onnx(
+ model,
+ X=None,
+ name=None,
+ initial_types=None,
+ target_opset=None,
+ options=None,
+ white_op=None,
+ black_op=None,
+ final_types=None,
+ dtype=None,
+ naming=None,
+ model_optim=True,
+ verbose=0,
+):
"""
Calls :func:`convert_sklearn` with simplified parameters.
@@ -260,20 +295,28 @@ def to_onnx(model, X=None, name=None, initial_types=None,
if isinstance(model, OnnxOperatorMixin):
if options is not None:
raise NotImplementedError(
- "options not yet implemented for OnnxOperatorMixin.")
+ "options not yet implemented for OnnxOperatorMixin."
+ )
return model.to_onnx(X=X, name=name, target_opset=target_opset)
if name is None:
name = "ONNX(%s)" % model.__class__.__name__
initial_types = guess_initial_types(X, initial_types)
if verbose >= 1:
print("[to_onnx] initial_types=%r" % initial_types)
- return convert_sklearn(model, initial_types=initial_types,
- target_opset=target_opset,
- name=name, options=options,
- white_op=white_op, black_op=black_op,
- final_types=final_types, dtype=dtype,
- verbose=verbose, naming=naming,
- model_optim=model_optim)
+ return convert_sklearn(
+ model,
+ initial_types=initial_types,
+ target_opset=target_opset,
+ name=name,
+ options=options,
+ white_op=white_op,
+ black_op=black_op,
+ final_types=final_types,
+ dtype=dtype,
+ verbose=verbose,
+ naming=naming,
+ model_optim=model_optim,
+ )
def wrap_as_onnx_mixin(model, target_opset=None):
@@ -283,6 +326,7 @@ def wrap_as_onnx_mixin(model, target_opset=None):
and *OnnxOperatorMixin* API.
"""
from .algebra.sklearn_ops import find_class
+
cl = find_class(model.__class__)
if "automation" in str(cl):
raise RuntimeError("Wrong class name '{}'.".format(cl))
diff --git a/skl2onnx/helpers/investigate.py b/skl2onnx/helpers/investigate.py
index a146eb5d8..c3ecbe4db 100644
--- a/skl2onnx/helpers/investigate.py
+++ b/skl2onnx/helpers/investigate.py
@@ -5,6 +5,7 @@
from types import MethodType
import numpy
from numpy.testing import assert_almost_equal
+
try:
from scipy.sparse import csr_matrix
except ImportError:
@@ -12,6 +13,7 @@
from sklearn.base import TransformerMixin, ClassifierMixin
from sklearn.base import RegressorMixin, BaseEstimator
from sklearn.pipeline import Pipeline, FeatureUnion
+
try:
from sklearn.compose import ColumnTransformer, TransformedTargetRegressor
except ImportError:
@@ -27,15 +29,16 @@ def enumerate_pipeline_models(pipe, coor=None, vs=None):
if coor is None:
coor = (0,)
yield coor, pipe, vs
- if hasattr(pipe, 'transformer_and_mapper_list') and len(
- pipe.transformer_and_mapper_list):
+ if hasattr(pipe, "transformer_and_mapper_list") and len(
+ pipe.transformer_and_mapper_list
+ ):
# azureml DataTransformer
raise NotImplementedError("Unable to handle this specific case.")
- elif hasattr(pipe, 'mapper') and pipe.mapper:
+ elif hasattr(pipe, "mapper") and pipe.mapper:
# azureml DataTransformer
for couple in enumerate_pipeline_models(pipe.mapper, coor + (0,)):
yield couple
- elif hasattr(pipe, 'built_features'):
+ elif hasattr(pipe, "built_features"):
# sklearn_pandas.dataframe_mapper.DataFrameMapper
for i, (columns, transformers, _) in enumerate(pipe.built_features):
if isinstance(columns, str):
@@ -43,9 +46,9 @@ def enumerate_pipeline_models(pipe, coor=None, vs=None):
if transformers is None:
yield (coor + (i,)), None, columns
else:
- for couple in enumerate_pipeline_models(transformers,
- coor + (i,),
- columns):
+ for couple in enumerate_pipeline_models(
+ transformers, coor + (i,), columns
+ ):
yield couple
elif isinstance(pipe, Pipeline):
for i, (_, model) in enumerate(pipe.steps):
@@ -54,16 +57,17 @@ def enumerate_pipeline_models(pipe, coor=None, vs=None):
elif ColumnTransformer is not None and isinstance(pipe, ColumnTransformer):
for i, (_, fitted_transformer, column) in enumerate(pipe.transformers):
for couple in enumerate_pipeline_models(
- fitted_transformer, coor + (i,), column):
+ fitted_transformer, coor + (i,), column
+ ):
yield couple
elif isinstance(pipe, FeatureUnion):
for i, (_, model) in enumerate(pipe.transformer_list):
for couple in enumerate_pipeline_models(model, coor + (i,)):
yield couple
elif TransformedTargetRegressor is not None and isinstance(
- pipe, TransformedTargetRegressor):
- raise NotImplementedError(
- "Not yet implemented for TransformedTargetRegressor.")
+ pipe, TransformedTargetRegressor
+ ):
+ raise NotImplementedError("Not yet implemented for TransformedTargetRegressor.")
elif isinstance(pipe, (TransformerMixin, ClassifierMixin, RegressorMixin)):
pass
elif isinstance(pipe, BaseEstimator):
@@ -71,7 +75,9 @@ def enumerate_pipeline_models(pipe, coor=None, vs=None):
else:
raise TypeError(
"Parameter pipe is not a scikit-learn object: {}\n{}".format(
- type(pipe), pipe))
+ type(pipe), pipe
+ )
+ )
class BaseEstimatorDebugInformation:
@@ -88,20 +94,22 @@ def __init__(self, model):
self.methods = {}
if hasattr(model, "transform") and callable(model.transform):
model._debug_transform = model.transform
- self.methods["transform"] = \
- lambda model, X: model._debug_transform(X)
+ self.methods["transform"] = lambda model, X: model._debug_transform(X)
if hasattr(model, "predict") and callable(model.predict):
model._debug_predict = model.predict
self.methods["predict"] = lambda model, X: model._debug_predict(X)
if hasattr(model, "predict_proba") and callable(model.predict_proba):
model._debug_predict_proba = model.predict_proba
- self.methods["predict_proba"] = \
- lambda model, X: model._debug_predict_proba(X)
+ self.methods["predict_proba"] = lambda model, X: model._debug_predict_proba(
+ X
+ )
if hasattr(model, "decision_function") and callable(
- model.decision_function): # noqa
+ model.decision_function
+ ): # noqa
model._debug_decision_function = model.decision_function # noqa
- self.methods["decision_function"] = \
- lambda model, X: model._debug_decision_function(X)
+ self.methods[
+ "decision_function"
+ ] = lambda model, X: model._debug_decision_function(X)
def __repr__(self):
"""
@@ -113,21 +121,21 @@ def to_str(self, nrows=5):
"""
Tries to produce a readable message.
"""
- rows = ['BaseEstimatorDebugInformation({})'.format(
- self.model.__class__.__name__)]
+ rows = [
+ "BaseEstimatorDebugInformation({})".format(self.model.__class__.__name__)
+ ]
for k in sorted(self.inputs):
if k in self.outputs:
- rows.append(' ' + k + '(')
+ rows.append(" " + k + "(")
self.display(self.inputs[k], nrows)
- rows.append(textwrap.indent(
- self.display(self.inputs[k], nrows), ' '))
- rows.append(' ) -> (')
- rows.append(textwrap.indent(
- self.display(self.outputs[k], nrows), ' '))
- rows.append(' )')
+ rows.append(textwrap.indent(self.display(self.inputs[k], nrows), " "))
+ rows.append(" ) -> (")
+ rows.append(
+ textwrap.indent(self.display(self.outputs[k], nrows), " ")
+ )
+ rows.append(" )")
else:
- raise KeyError(
- "Unable to find output for method '{}'.".format(k))
+ raise KeyError("Unable to find output for method '{}'.".format(k))
return "\n".join(rows)
def display(self, data, nrows):
@@ -135,11 +143,11 @@ def display(self, data, nrows):
Displays the first
"""
text = str(data)
- rows = text.split('\n')
+ rows = text.split("\n")
if len(rows) > nrows:
rows = rows[:nrows]
- rows.append('...')
- if hasattr(data, 'shape'):
+ rows.append("...")
+ if hasattr(data, "shape"):
rows.insert(0, "shape={}".format(data.shape))
return "\n".join(rows)
@@ -156,40 +164,42 @@ def _alter_model_for_debugging(skl_model, recursive=False):
"""
def transform(self, X, *args, **kwargs):
- self._debug.inputs['transform'] = X
- y = self._debug.methods['transform'](self, X, *args, **kwargs)
- self._debug.outputs['transform'] = y
+ self._debug.inputs["transform"] = X
+ y = self._debug.methods["transform"](self, X, *args, **kwargs)
+ self._debug.outputs["transform"] = y
return y
def predict(self, X, *args, **kwargs):
- self._debug.inputs['predict'] = X
- y = self._debug.methods['predict'](self, X, *args, **kwargs)
- self._debug.outputs['predict'] = y
+ self._debug.inputs["predict"] = X
+ y = self._debug.methods["predict"](self, X, *args, **kwargs)
+ self._debug.outputs["predict"] = y
return y
def predict_proba(self, X, *args, **kwargs):
- self._debug.inputs['predict_proba'] = X
- y = self._debug.methods['predict_proba'](self, X, *args, **kwargs)
- self._debug.outputs['predict_proba'] = y
+ self._debug.inputs["predict_proba"] = X
+ y = self._debug.methods["predict_proba"](self, X, *args, **kwargs)
+ self._debug.outputs["predict_proba"] = y
return y
def decision_function(self, X, *args, **kwargs):
- self._debug.inputs['decision_function'] = X
- y = self._debug.methods['decision_function'](self, X, *args, **kwargs)
- self._debug.outputs['decision_function'] = y
+ self._debug.inputs["decision_function"] = X
+ y = self._debug.methods["decision_function"](self, X, *args, **kwargs)
+ self._debug.outputs["decision_function"] = y
return y
new_methods = {
- 'decision_function': decision_function,
- 'transform': transform,
- 'predict': predict,
- 'predict_proba': predict_proba,
+ "decision_function": decision_function,
+ "transform": transform,
+ "predict": predict,
+ "predict_proba": predict_proba,
}
- if hasattr(skl_model, '_debug'):
- raise RuntimeError("The same operator cannot be used twice in "
- "the same pipeline or this method was called "
- "a second time.")
+ if hasattr(skl_model, "_debug"):
+ raise RuntimeError(
+ "The same operator cannot be used twice in "
+ "the same pipeline or this method was called "
+ "a second time."
+ )
if recursive:
for model_ in enumerate_pipeline_models(skl_model):
@@ -199,16 +209,20 @@ def decision_function(self, X, *args, **kwargs):
try:
setattr(model, k, MethodType(new_methods[k], model))
except AttributeError:
- warnings.warn("Unable to overwrite method '{}' for class "
- "{}.".format(k, type(model)))
+ warnings.warn(
+ "Unable to overwrite method '{}' for class "
+ "{}.".format(k, type(model))
+ )
else:
skl_model._debug = BaseEstimatorDebugInformation(skl_model)
for k in skl_model._debug.methods:
try:
setattr(skl_model, k, MethodType(new_methods[k], skl_model))
except AttributeError:
- warnings.warn("Unable to overwrite method '{}' for class "
- "{}.".format(k, type(skl_model)))
+ warnings.warn(
+ "Unable to overwrite method '{}' for class "
+ "{}.".format(k, type(skl_model))
+ )
def collect_intermediate_steps(model, *args, **kwargs):
@@ -225,17 +239,19 @@ def collect_intermediate_steps(model, *args, **kwargs):
This function is used to check every intermediate model in
a pipeline.
"""
- if 'intermediate' in kwargs:
- if not kwargs['intermediate']:
+ if "intermediate" in kwargs:
+ if not kwargs["intermediate"]:
raise ValueError("Parameter intermediate must be true.")
- del kwargs['intermediate']
+ del kwargs["intermediate"]
from .. import convert_sklearn
from ..helpers.onnx_helper import select_model_inputs_outputs
from ..common import MissingShapeCalculator, MissingConverter
+
try:
model_onnx, topology = convert_sklearn(
- model, *args, intermediate=True, **kwargs)
+ model, *args, intermediate=True, **kwargs
+ )
except (MissingShapeCalculator, MissingConverter):
# The model cannot be converted.
raise
@@ -247,14 +263,15 @@ def collect_intermediate_steps(model, *args, **kwargs):
_alter_model_for_debugging(operator.raw_operator)
inputs = [i.full_name for i in operator.inputs]
outputs = [o.full_name for o in operator.outputs]
- steps.append({
- 'model': operator.raw_operator,
- 'model_onnx': model_onnx,
- 'inputs': inputs,
- 'outputs': outputs,
- 'onnx_step': select_model_inputs_outputs(
- model_onnx, outputs=outputs)
- })
+ steps.append(
+ {
+ "model": operator.raw_operator,
+ "model_onnx": model_onnx,
+ "inputs": inputs,
+ "outputs": outputs,
+ "onnx_step": select_model_inputs_outputs(model_onnx, outputs=outputs),
+ }
+ )
return steps
@@ -314,16 +331,16 @@ def to_string(c):
if isinstance(c1, list) and isinstance(c2, list):
try:
res = c1 == c2
- reason = 'list-equal'
+ reason = "list-equal"
except ValueError: # lgtm [py/unreachable-statement]
res = False
- reason = 'list'
+ reason = "list"
elif isinstance(c1, numpy.ndarray) and isinstance(c2, numpy.ndarray):
try:
assert_almost_equal(c1, c2, decimal=decimal)
res = True
except (AssertionError, TypeError):
- reason = 'array'
+ reason = "array"
cc1 = c1.ravel()
cc2 = c2.ravel()
try:
@@ -331,7 +348,7 @@ def to_string(c):
res = True
except (AssertionError, TypeError) as e:
res = False
- reason = 'array-ravel' + str(e)
+ reason = "array-ravel" + str(e)
else:
raise TypeError("Types {} and {}".format(type(c1), type(c2)))
if not res:
diff --git a/skl2onnx/helpers/onnx_helper.py b/skl2onnx/helpers/onnx_helper.py
index aae1572e0..93f13461f 100644
--- a/skl2onnx/helpers/onnx_helper.py
+++ b/skl2onnx/helpers/onnx_helper.py
@@ -7,8 +7,12 @@
from onnx import shape_inference, TensorProto, ValueInfoProto
from onnx.numpy_helper import from_array, to_array
from onnx.helper import (
- make_tensor, make_node, make_tensor_value_info, make_graph,
- make_model)
+ make_tensor,
+ make_node,
+ make_tensor_value_info,
+ make_graph,
+ make_model,
+)
from ..proto import get_latest_tested_opset_version
from onnx import onnx_pb as onnx_proto
from ..common._topology import Variable
@@ -24,7 +28,7 @@ def load_onnx_model(onnx_file_or_bytes):
if isinstance(onnx_file_or_bytes, str):
with open(onnx_file_or_bytes, "rb") as f:
return onnx.load(f)
- elif hasattr(onnx_file_or_bytes, 'read'):
+ elif hasattr(onnx_file_or_bytes, "read"):
return onnx.load(onnx_file_or_bytes)
else:
b = BytesIO(onnx_file_or_bytes)
@@ -41,7 +45,7 @@ def save_onnx_model(model, filename=None):
"""
content = model.SerializeToString()
if filename is not None:
- if hasattr(filename, 'write'):
+ if hasattr(filename, "write"):
filename.write(content)
else:
with open(filename, "wb") as f:
@@ -60,8 +64,9 @@ def enumerate_model_node_outputs(model, add_node=False):
:return: enumerator
"""
if not hasattr(model, "graph"):
- raise TypeError("Parameter model is not an ONNX model but "
- "{}".format(type(model)))
+ raise TypeError(
+ "Parameter model is not an ONNX model but " "{}".format(type(model))
+ )
for node in model.graph.node:
for out in node.output:
yield (out, node) if add_node else out
@@ -145,8 +150,13 @@ def select_model_inputs_outputs(model, outputs=None, inputs=None):
value_info = ValueInfoProto()
value_info.name = out
var_out.append(value_info)
- graph = make_graph(keep_nodes, model.graph.name, model.graph.input,
- var_out, model.graph.initializer)
+ graph = make_graph(
+ keep_nodes,
+ model.graph.name,
+ model.graph.input,
+ var_out,
+ model.graph.initializer,
+ )
onnx_model = make_model(graph)
onnx_model.ir_version = model.ir_version
onnx_model.producer_name = model.producer_name
@@ -159,8 +169,9 @@ def select_model_inputs_outputs(model, outputs=None, inputs=None):
onnx.helper.set_model_props(onnx_model, values)
if len(onnx_model.graph.input) != len(model.graph.input):
- raise RuntimeError("Input mismatch {} != {}".format(
- len(onnx_model.input), len(model.input)))
+ raise RuntimeError(
+ "Input mismatch {} != {}".format(len(onnx_model.input), len(model.input))
+ )
# fix opset import
del onnx_model.opset_import[:]
@@ -171,33 +182,39 @@ def select_model_inputs_outputs(model, outputs=None, inputs=None):
return onnx_model
-def infer_outputs(op_type, inputs, outputs=None, initializer=None,
- target_opset=None, **atts):
+def infer_outputs(
+ op_type, inputs, outputs=None, initializer=None, target_opset=None, **atts
+):
"""
Infers outputs type and shapes given an ONNX operator.
"""
- logger = getLogger('skl2onnx')
+ logger = getLogger("skl2onnx")
logger.debug(
- '[infer_outputs] op_type=%r inputs=%r outputs=%r',
- op_type, [x.name for x in inputs], outputs)
+ "[infer_outputs] op_type=%r inputs=%r outputs=%r",
+ op_type,
+ [x.name for x in inputs],
+ outputs,
+ )
if isinstance(op_type, str):
required_outputs = []
if outputs:
for o in outputs:
- if hasattr(o, 'onnx_name'):
+ if hasattr(o, "onnx_name"):
required_outputs.append(o.onnx_name)
elif isinstance(o, str):
required_outputs.append(o)
else:
raise TypeError("Unable to require output {}.".format(o))
- node = make_node(op_type, [i.onnx_name for i in inputs],
- required_outputs, **atts)
+ node = make_node(
+ op_type, [i.onnx_name for i in inputs], required_outputs, **atts
+ )
node = [node]
- elif hasattr(op_type, 'nodes'):
+ elif hasattr(op_type, "nodes"):
node = op_type.nodes
else:
- raise RuntimeError("Unable to build ONNX nodes from type {}.".format(
- type(op_type)))
+ raise RuntimeError(
+ "Unable to build ONNX nodes from type {}.".format(type(op_type))
+ )
input_init = inputs.copy()
if initializer:
@@ -207,16 +224,18 @@ def infer_outputs(op_type, inputs, outputs=None, initializer=None,
if isinstance(input, Variable):
onnx_type = input.type.to_onnx_type()
tensor_type = onnx_type.tensor_type
- shape = [tensor_type.shape.dim[i].dim_value
- for i in range(len(tensor_type.shape.dim))]
- inp = make_tensor_value_info(input.onnx_name,
- tensor_type.elem_type,
- tuple(shape))
+ shape = [
+ tensor_type.shape.dim[i].dim_value
+ for i in range(len(tensor_type.shape.dim))
+ ]
+ inp = make_tensor_value_info(
+ input.onnx_name, tensor_type.elem_type, tuple(shape)
+ )
onnx_inputs.append(inp)
elif isinstance(input, onnx.TensorProto):
v = make_tensor_value_info(
- input.name, input.data_type.real,
- list(d for d in input.dims))
+ input.name, input.data_type.real, list(d for d in input.dims)
+ )
onnx_inputs.append(v)
elif isinstance(input, onnx.AttributeProto):
value_info = ValueInfoProto()
@@ -228,13 +247,11 @@ def infer_outputs(op_type, inputs, outputs=None, initializer=None,
else:
onnx_inputs.append(input)
- graph = make_graph(node, 'infer_shapes',
- onnx_inputs, [])
- original_model = make_model(graph, producer_name='skl2onnx')
+ graph = make_graph(node, "infer_shapes", onnx_inputs, [])
+ original_model = make_model(graph, producer_name="skl2onnx")
domains = {}
for n in node:
- domains[n.domain] = max(domains.get(n.domain, 1),
- getattr(n, 'op_version', 1))
+ domains[n.domain] = max(domains.get(n.domain, 1), getattr(n, "op_version", 1))
for i, (k, v) in enumerate(domains.items()):
if i == 0 and len(original_model.opset_import) == 1:
op_set = original_model.opset_import[0]
@@ -243,8 +260,7 @@ def infer_outputs(op_type, inputs, outputs=None, initializer=None,
op_set.domain = k
if target_opset:
if isinstance(target_opset, dict):
- op_set.version = target_opset.get(
- k, get_latest_tested_opset_version())
+ op_set.version = target_opset.get(k, get_latest_tested_opset_version())
else:
op_set.version = target_opset
else:
@@ -254,8 +270,8 @@ def infer_outputs(op_type, inputs, outputs=None, initializer=None,
inferred_model = shape_inference.infer_shapes(original_model)
except RuntimeError as e:
raise RuntimeError(
- "Unable to infer shape of node '{}'\n{}".format(
- op_type, original_model)) from e
+ "Unable to infer shape of node '{}'\n{}".format(op_type, original_model)
+ ) from e
all_shapes = Variable.from_pb(inferred_model.graph.value_info)
used = set()
for node in graph.node:
@@ -266,8 +282,9 @@ def infer_outputs(op_type, inputs, outputs=None, initializer=None,
raise RuntimeError(
f"Shape inference fails.\n*Inputs*\n{onnx_inputs}\n"
f"*all_shapes*\n{all_shapes}'\n"
- f"*Model*\n{original_model}'")
- logger.debug('[infer_outputs] shapes=%r', shapes)
+ f"*Model*\n{original_model}'"
+ )
+ logger.debug("[infer_outputs] shapes=%r", shapes)
return shapes
@@ -289,8 +306,13 @@ def change_onnx_domain(model, ops):
node.op_type = rep[0]
node.domain = rep[1]
- graph = make_graph(nodes, model.graph.name, model.graph.input,
- model.graph.output, model.graph.initializer)
+ graph = make_graph(
+ nodes,
+ model.graph.name,
+ model.graph.input,
+ model.graph.output,
+ model.graph.initializer,
+ )
onnx_model = make_model(graph)
onnx_model.ir_version = model.ir_version
onnx_model.producer_name = model.producer_name
@@ -303,8 +325,9 @@ def change_onnx_domain(model, ops):
onnx.helper.set_model_props(onnx_model, values)
if len(onnx_model.graph.input) != len(model.graph.input):
- raise RuntimeError("Input mismatch {} != {}".format(
- len(onnx_model.input), len(model.input)))
+ raise RuntimeError(
+ "Input mismatch {} != {}".format(len(onnx_model.input), len(model.input))
+ )
# fix opset import
domain_set = set()
@@ -325,7 +348,7 @@ def change_onnx_domain(model, ops):
return onnx_model
-def add_output_initializer(model_onnx, name, value, suffix='_init'):
+def add_output_initializer(model_onnx, name, value, suffix="_init"):
"""
Add a constant and link it to one output.
It allows the user to store arrays into the graph
@@ -352,7 +375,8 @@ def add_output_initializer(model_onnx, name, value, suffix='_init'):
if len(name_list) != len(value_list):
raise ValueError(
"Mismatched names and values. There are %d names and %d values."
- "" % (len(name_list), len(value_list)))
+ "" % (len(name_list), len(value_list))
+ )
nodes = list(model_onnx.graph.node)
inits = list(model_onnx.graph.initializer)
@@ -364,41 +388,46 @@ def add_output_initializer(model_onnx, name, value, suffix='_init'):
names = set(i.name for i in model_onnx.graph.initializer)
if name_output in names or name_init in names:
raise ValueError(
- "Names %r or %r is already taken by an initializer: %r." % (
- name_output, name_init, ", ".join(sorted(names))))
+ "Names %r or %r is already taken by an initializer: %r."
+ % (name_output, name_init, ", ".join(sorted(names)))
+ )
names = set(i.name for i in model_onnx.graph.output)
if name_output in names or name_init in names:
raise ValueError(
- "Names %r or %r is already taken by an output: %r." % (
- name_output, name_init, ", ".join(sorted(names))))
+ "Names %r or %r is already taken by an output: %r."
+ % (name_output, name_init, ", ".join(sorted(names)))
+ )
names = set(i.name for i in model_onnx.graph.input)
if name_output in names or name_init in names:
raise ValueError(
- "Names %r or %r is already taken by an output: %r." % (
- name_output, name_init, ", ".join(sorted(names))))
+ "Names %r or %r is already taken by an output: %r."
+ % (name_output, name_init, ", ".join(sorted(names)))
+ )
try:
cst = from_array(value, name=name_init)
except RuntimeError as e:
st = str(value.dtype).lower()
- if st.startswith('u') or st.startswith("`_.
"""
- if metric == 'cosine':
+ if metric == "cosine":
if isinstance(Y, np.ndarray):
- ny = np.sqrt(np.sum(Y ** 2, axis=1, keepdims=True))
+ ny = np.sqrt(np.sum(Y**2, axis=1, keepdims=True))
norm_y = Y / ny
norm_try = norm_y.T.astype(dtype)
else:
ny = OnnxReduceL2_typed(dtype, Y, axes=[1], op_version=op_version)
norm_y = OnnxDiv(Y, ny, op_version=op_version)
- norm_try = OnnxTranspose(norm_y, perm=[1, 0],
- op_version=op_version)
+ norm_try = OnnxTranspose(norm_y, perm=[1, 0], op_version=op_version)
nx = OnnxReduceL2_typed(dtype, X, axes=[1], op_version=op_version)
norm_x = OnnxDiv(X, nx, op_version=op_version)
@@ -200,9 +260,9 @@ def _convert_pairwise_kernel(X, Y, metric=None,
raise NotImplementedError("Metric %r is not implemented." % metric)
-def convert_kernel(kernel, X, output_names=None,
- x_train=None, dtype=None, optim=None,
- op_version=None):
+def convert_kernel(
+ kernel, X, output_names=None, x_train=None, dtype=None, optim=None, op_version=None
+):
if op_version is None:
raise RuntimeError("op_version must not be None.")
if isinstance(kernel, Sum):
@@ -213,72 +273,105 @@ def convert_kernel(kernel, X, output_names=None,
clop = None
if clop is not None:
return clop(
- convert_kernel(kernel.k1, X, x_train=x_train, dtype=dtype,
- optim=optim, op_version=op_version),
- convert_kernel(kernel.k2, X, x_train=x_train, dtype=dtype,
- optim=optim, op_version=op_version),
- output_names=output_names, op_version=op_version)
+ convert_kernel(
+ kernel.k1,
+ X,
+ x_train=x_train,
+ dtype=dtype,
+ optim=optim,
+ op_version=op_version,
+ ),
+ convert_kernel(
+ kernel.k2,
+ X,
+ x_train=x_train,
+ dtype=dtype,
+ optim=optim,
+ op_version=op_version,
+ ),
+ output_names=output_names,
+ op_version=op_version,
+ )
if isinstance(kernel, ConstantKernel):
# X and x_train should have the same number of features.
onnx_zeros_x = _zero_vector_of_size(
- X, keepdims=1, dtype=dtype, op_version=op_version)
+ X, keepdims=1, dtype=dtype, op_version=op_version
+ )
if x_train is None:
onnx_zeros_y = onnx_zeros_x
else:
onnx_zeros_y = _zero_vector_of_size(
- x_train, keepdims=1, dtype=dtype, op_version=op_version)
+ x_train, keepdims=1, dtype=dtype, op_version=op_version
+ )
tr = OnnxTranspose(onnx_zeros_y, perm=[1, 0], op_version=op_version)
mat = OnnxMatMul(onnx_zeros_x, tr, op_version=op_version)
- return OnnxAdd(mat,
- np.array([kernel.constant_value],
- dtype=dtype),
- output_names=output_names,
- op_version=op_version)
+ return OnnxAdd(
+ mat,
+ np.array([kernel.constant_value], dtype=dtype),
+ output_names=output_names,
+ op_version=op_version,
+ )
if isinstance(kernel, RBF):
# length_scale = np.squeeze(length_scale).astype(float)
- zeroh = _zero_vector_of_size(X, axis=1, keepdims=0, dtype=dtype,
- op_version=op_version)
- zerov = _zero_vector_of_size(X, axis=0, keepdims=1, dtype=dtype,
- op_version=op_version)
-
- if (isinstance(kernel.length_scale, np.ndarray) and
- len(kernel.length_scale) > 0):
+ zeroh = _zero_vector_of_size(
+ X, axis=1, keepdims=0, dtype=dtype, op_version=op_version
+ )
+ zerov = _zero_vector_of_size(
+ X, axis=0, keepdims=1, dtype=dtype, op_version=op_version
+ )
+
+ if isinstance(kernel.length_scale, np.ndarray) and len(kernel.length_scale) > 0:
const = kernel.length_scale.astype(dtype)
else:
tensor_value = py_make_float_array(
- kernel.length_scale, dtype=dtype, as_tensor=True)
+ kernel.length_scale, dtype=dtype, as_tensor=True
+ )
const = OnnxConstantOfShape(
OnnxShape(zeroh, op_version=op_version),
- value=tensor_value, op_version=op_version)
+ value=tensor_value,
+ op_version=op_version,
+ )
X_scaled = OnnxDiv(X, const, op_version=op_version)
if x_train is None:
dist = onnx_squareform_pdist(
- X_scaled, metric='sqeuclidean', dtype=dtype,
- op_version=op_version)
+ X_scaled, metric="sqeuclidean", dtype=dtype, op_version=op_version
+ )
else:
x_train_scaled = OnnxDiv(x_train, const, op_version=op_version)
if optim is None:
- dist = onnx_cdist(X_scaled, x_train_scaled,
- metric='sqeuclidean',
- dtype=dtype, op_version=op_version)
- elif optim == 'cdist':
- dist = OnnxCDist(X_scaled, x_train_scaled,
- metric='sqeuclidean',
- op_version=op_version)
+ dist = onnx_cdist(
+ X_scaled,
+ x_train_scaled,
+ metric="sqeuclidean",
+ dtype=dtype,
+ op_version=op_version,
+ )
+ elif optim == "cdist":
+ dist = OnnxCDist(
+ X_scaled,
+ x_train_scaled,
+ metric="sqeuclidean",
+ op_version=op_version,
+ )
else:
raise ValueError("Unknown optimization '{}'.".format(optim))
tensor_value = py_make_float_array(-0.5, dtype=dtype, as_tensor=True)
cst5 = OnnxConstantOfShape(
OnnxShape(zerov, op_version=op_version),
- value=tensor_value, op_version=op_version)
+ value=tensor_value,
+ op_version=op_version,
+ )
# K = np.exp(-.5 * dists)
- exp = OnnxExp(OnnxMul(dist, cst5, op_version=op_version),
- output_names=output_names, op_version=op_version)
+ exp = OnnxExp(
+ OnnxMul(dist, cst5, op_version=op_version),
+ output_names=output_names,
+ op_version=op_version,
+ )
# This should not be needed.
# K = squareform(K)
@@ -288,92 +381,129 @@ def convert_kernel(kernel, X, output_names=None,
if isinstance(kernel, ExpSineSquared):
if not isinstance(kernel.length_scale, (float, int)):
raise NotImplementedError(
- "length_scale should be float not {}.".format(
- type(kernel.length_scale)))
+ "length_scale should be float not {}.".format(type(kernel.length_scale))
+ )
return _convert_exp_sine_squared(
- X, Y=X if x_train is None else x_train,
+ X,
+ Y=X if x_train is None else x_train,
length_scale=kernel.length_scale,
- periodicity=kernel.periodicity, dtype=dtype,
- output_names=output_names, optim=optim,
- op_version=op_version)
+ periodicity=kernel.periodicity,
+ dtype=dtype,
+ output_names=output_names,
+ optim=optim,
+ op_version=op_version,
+ )
if isinstance(kernel, DotProduct):
if not isinstance(kernel.sigma_0, (float, int)):
raise NotImplementedError(
- "sigma_0 should be float not {}.".format(
- type(kernel.sigma_0)))
+ "sigma_0 should be float not {}.".format(type(kernel.sigma_0))
+ )
if x_train is None:
- return _convert_dot_product(X, X, sigma_0=kernel.sigma_0,
- dtype=dtype,
- output_names=output_names,
- op_version=op_version)
+ return _convert_dot_product(
+ X,
+ X,
+ sigma_0=kernel.sigma_0,
+ dtype=dtype,
+ output_names=output_names,
+ op_version=op_version,
+ )
else:
if len(x_train.shape) != 2:
raise NotImplementedError(
- "Only DotProduct for two dimension train set is "
- "implemented.")
+ "Only DotProduct for two dimension train set is " "implemented."
+ )
return _convert_dot_product(
- X, x_train, sigma_0=kernel.sigma_0,
- dtype=dtype, output_names=output_names,
- op_version=op_version)
+ X,
+ x_train,
+ sigma_0=kernel.sigma_0,
+ dtype=dtype,
+ output_names=output_names,
+ op_version=op_version,
+ )
if isinstance(kernel, RationalQuadratic):
if x_train is None:
return _convert_rational_quadratic(
- X, X, length_scale=kernel.length_scale,
- dtype=dtype, alpha=kernel.alpha,
+ X,
+ X,
+ length_scale=kernel.length_scale,
+ dtype=dtype,
+ alpha=kernel.alpha,
output_names=output_names,
- optim=optim, op_version=op_version)
+ optim=optim,
+ op_version=op_version,
+ )
else:
return _convert_rational_quadratic(
- X, x_train, length_scale=kernel.length_scale,
- dtype=dtype, alpha=kernel.alpha,
+ X,
+ x_train,
+ length_scale=kernel.length_scale,
+ dtype=dtype,
+ alpha=kernel.alpha,
output_names=output_names,
- optim=optim, op_version=op_version)
+ optim=optim,
+ op_version=op_version,
+ )
if isinstance(kernel, PairwiseKernel):
if x_train is None:
return _convert_pairwise_kernel(
- X, X, metric=kernel.metric,
- dtype=dtype, output_names=output_names,
- optim=optim, op_version=op_version)
+ X,
+ X,
+ metric=kernel.metric,
+ dtype=dtype,
+ output_names=output_names,
+ optim=optim,
+ op_version=op_version,
+ )
else:
return _convert_pairwise_kernel(
- X, x_train, metric=kernel.metric,
- dtype=dtype, output_names=output_names,
- optim=optim, op_version=op_version)
+ X,
+ x_train,
+ metric=kernel.metric,
+ dtype=dtype,
+ output_names=output_names,
+ optim=optim,
+ op_version=op_version,
+ )
if isinstance(kernel, WhiteKernel):
# X and x_train should have the same number of features.
onnx_zeros_x = _zero_vector_of_size(
- X, keepdims=1, dtype=dtype, op_version=op_version)
+ X, keepdims=1, dtype=dtype, op_version=op_version
+ )
if x_train is None:
onnx_zeros_y = onnx_zeros_x
else:
onnx_zeros_y = _zero_vector_of_size(
- x_train, keepdims=1, dtype=dtype, op_version=op_version)
+ x_train, keepdims=1, dtype=dtype, op_version=op_version
+ )
tr = OnnxTranspose(onnx_zeros_y, perm=[1, 0], op_version=op_version)
mat = OnnxMatMul(onnx_zeros_x, tr, op_version=op_version)
if x_train is not None:
- return OnnxIdentity(mat, op_version=op_version,
- output_names=output_names)
+ return OnnxIdentity(mat, op_version=op_version, output_names=output_names)
return OnnxMul(
OnnxEyeLike(mat, op_version=op_version),
- OnnxIdentity(np.array([kernel.noise_level], dtype=dtype),
- op_version=op_version),
+ OnnxIdentity(
+ np.array([kernel.noise_level], dtype=dtype), op_version=op_version
+ ),
op_version=op_version,
- output_names=output_names)
+ output_names=output_names,
+ )
- raise RuntimeError("Unable to convert __call__ method for "
- "class {}.".format(type(kernel)))
+ raise RuntimeError(
+ "Unable to convert __call__ method for " "class {}.".format(type(kernel))
+ )
-def _zero_vector_of_size(X, output_names=None, axis=0,
- keepdims=None, dtype=None, op_version=None):
+def _zero_vector_of_size(
+ X, output_names=None, axis=0, keepdims=None, dtype=None, op_version=None
+):
if op_version is None:
raise RuntimeError("op_version must not be None.")
if keepdims is None:
@@ -381,18 +511,27 @@ def _zero_vector_of_size(X, output_names=None, axis=0,
if dtype == np.float32:
res = OnnxReduceSumApi11(
OnnxConstantOfShape(
- OnnxShape(X, op_version=op_version),
- op_version=op_version),
- axes=[1 - axis], keepdims=keepdims,
- output_names=output_names, op_version=op_version)
+ OnnxShape(X, op_version=op_version), op_version=op_version
+ ),
+ axes=[1 - axis],
+ keepdims=keepdims,
+ output_names=output_names,
+ op_version=op_version,
+ )
elif dtype in (np.float64, np.int32, np.int64):
res = OnnxReduceSumApi11(
OnnxConstantOfShape(
- OnnxShape(X, op_version=op_version), value=py_make_float_array(
- 0, dtype=dtype, as_tensor=True), op_version=op_version),
- axes=[1 - axis], keepdims=keepdims,
- output_names=output_names, op_version=op_version)
+ OnnxShape(X, op_version=op_version),
+ value=py_make_float_array(0, dtype=dtype, as_tensor=True),
+ op_version=op_version,
+ ),
+ axes=[1 - axis],
+ keepdims=keepdims,
+ output_names=output_names,
+ op_version=op_version,
+ )
else:
raise NotImplementedError(
- "Unable to create zero vector of type {}".format(dtype))
+ "Unable to create zero vector of type {}".format(dtype)
+ )
return res
diff --git a/skl2onnx/operator_converters/ada_boost.py b/skl2onnx/operator_converters/ada_boost.py
index 87ea3d963..3e883043f 100644
--- a/skl2onnx/operator_converters/ada_boost.py
+++ b/skl2onnx/operator_converters/ada_boost.py
@@ -8,236 +8,350 @@
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
from ..common._apply_operation import (
- apply_add, apply_cast, apply_clip, apply_concat, apply_div, apply_exp,
- apply_mul, apply_reshape, apply_sub, apply_topk, apply_transpose
+ apply_add,
+ apply_cast,
+ apply_clip,
+ apply_concat,
+ apply_div,
+ apply_exp,
+ apply_mul,
+ apply_reshape,
+ apply_sub,
+ apply_topk,
+ apply_transpose,
)
from ..common.data_types import (
- FloatTensorType, DoubleTensorType, guess_proto_type, guess_numpy_type,
- Int64TensorType)
+ FloatTensorType,
+ DoubleTensorType,
+ guess_proto_type,
+ guess_numpy_type,
+ Int64TensorType,
+)
from ..common._registration import register_converter
from .._supported_operators import sklearn_operator_name_map
def _scikit_learn_before_022():
- if '.dev' in __version__:
- return pv.Version(
- __version__.split(".dev")[0]) < pv.Version("0.22")
- if '.post' in __version__:
- return pv.Version(
- __version__.split(".post")[0]) < pv.Version("0.22")
+ if ".dev" in __version__:
+ return pv.Version(__version__.split(".dev")[0]) < pv.Version("0.22")
+ if ".post" in __version__:
+ return pv.Version(__version__.split(".post")[0]) < pv.Version("0.22")
return pv.Version(__version__) < pv.Version("0.22")
-def _samme_proba(scope, container, proba_name, weight,
- zero_name, classes_ind_name, one_name):
- weight_name = scope.get_unique_variable_name('weight')
- container.add_initializer(
- weight_name, onnx_proto.TensorProto.FLOAT, [], [weight])
-
- argmax_output_name = scope.get_unique_variable_name('argmax_output')
- container.add_node('ArgMax', proba_name,
- argmax_output_name,
- name=scope.get_unique_operator_name('ArgMax'),
- axis=1)
- equal_name = scope.get_unique_variable_name('equal')
- container.add_node('Equal', [argmax_output_name, classes_ind_name],
- equal_name,
- name=scope.get_unique_operator_name('Equal'))
-
- max_proba_name = scope.get_unique_variable_name('probsmax')
- container.add_node('Where', [equal_name, one_name, zero_name],
- max_proba_name,
- name=scope.get_unique_operator_name('Where'))
-
- samme_proba_name = scope.get_unique_variable_name('samme_proba')
- apply_mul(scope, [max_proba_name, weight_name],
- samme_proba_name, container, broadcast=1)
+def _samme_proba(
+ scope, container, proba_name, weight, zero_name, classes_ind_name, one_name
+):
+ weight_name = scope.get_unique_variable_name("weight")
+ container.add_initializer(weight_name, onnx_proto.TensorProto.FLOAT, [], [weight])
+
+ argmax_output_name = scope.get_unique_variable_name("argmax_output")
+ container.add_node(
+ "ArgMax",
+ proba_name,
+ argmax_output_name,
+ name=scope.get_unique_operator_name("ArgMax"),
+ axis=1,
+ )
+ equal_name = scope.get_unique_variable_name("equal")
+ container.add_node(
+ "Equal",
+ [argmax_output_name, classes_ind_name],
+ equal_name,
+ name=scope.get_unique_operator_name("Equal"),
+ )
+
+ max_proba_name = scope.get_unique_variable_name("probsmax")
+ container.add_node(
+ "Where",
+ [equal_name, one_name, zero_name],
+ max_proba_name,
+ name=scope.get_unique_operator_name("Where"),
+ )
+
+ samme_proba_name = scope.get_unique_variable_name("samme_proba")
+ apply_mul(
+ scope, [max_proba_name, weight_name], samme_proba_name, container, broadcast=1
+ )
return samme_proba_name
def _samme_r_proba(scope, container, proba_name, n_classes, dtype, pdtype):
- clipped_proba_name = scope.get_unique_variable_name('clipped_proba')
- log_proba_name = scope.get_unique_variable_name('log_proba')
- reduced_proba_name = scope.get_unique_variable_name('reduced_proba')
- reshaped_result_name = scope.get_unique_variable_name('reshaped_result')
- inverted_n_classes_name = scope.get_unique_variable_name(
- 'inverted_n_classes')
- n_classes_minus_one_name = scope.get_unique_variable_name(
- 'n_classes_minus_one')
- prod_result_name = scope.get_unique_variable_name('prod_result')
- sub_result_name = scope.get_unique_variable_name('sub_result')
- samme_proba_name = scope.get_unique_variable_name('samme_proba')
-
- container.add_initializer(
- inverted_n_classes_name, pdtype, [], [1. / n_classes])
- container.add_initializer(
- n_classes_minus_one_name, pdtype, [], [n_classes - 1])
+ clipped_proba_name = scope.get_unique_variable_name("clipped_proba")
+ log_proba_name = scope.get_unique_variable_name("log_proba")
+ reduced_proba_name = scope.get_unique_variable_name("reduced_proba")
+ reshaped_result_name = scope.get_unique_variable_name("reshaped_result")
+ inverted_n_classes_name = scope.get_unique_variable_name("inverted_n_classes")
+ n_classes_minus_one_name = scope.get_unique_variable_name("n_classes_minus_one")
+ prod_result_name = scope.get_unique_variable_name("prod_result")
+ sub_result_name = scope.get_unique_variable_name("sub_result")
+ samme_proba_name = scope.get_unique_variable_name("samme_proba")
+
+ container.add_initializer(inverted_n_classes_name, pdtype, [], [1.0 / n_classes])
+ container.add_initializer(n_classes_minus_one_name, pdtype, [], [n_classes - 1])
try:
cst_min = np.finfo(np.float64).eps.astype(dtype)
except TypeError:
- raise TypeError("Unable to convert {} (type {}) into {}.".format(
- np.finfo(float).eps, type(np.finfo(float).eps), dtype))
+ raise TypeError(
+ "Unable to convert {} (type {}) into {}.".format(
+ np.finfo(float).eps, type(np.finfo(float).eps), dtype
+ )
+ )
apply_clip(
- scope, proba_name, clipped_proba_name, container,
- operator_name=scope.get_unique_operator_name('ClipAda'),
- min=dtype(cst_min))
+ scope,
+ proba_name,
+ clipped_proba_name,
+ container,
+ operator_name=scope.get_unique_operator_name("ClipAda"),
+ min=dtype(cst_min),
+ )
container.add_node(
- 'Log', clipped_proba_name, log_proba_name,
- name=scope.get_unique_operator_name('Log'))
+ "Log",
+ clipped_proba_name,
+ log_proba_name,
+ name=scope.get_unique_operator_name("Log"),
+ )
if container.target_opset < 13:
container.add_node(
- 'ReduceSum', log_proba_name, reduced_proba_name, axes=[1],
- name=scope.get_unique_operator_name('ReduceSum'))
+ "ReduceSum",
+ log_proba_name,
+ reduced_proba_name,
+ axes=[1],
+ name=scope.get_unique_operator_name("ReduceSum"),
+ )
else:
- axis_name = scope.get_unique_variable_name('axis')
- container.add_initializer(
- axis_name, onnx_proto.TensorProto.INT64, [1], [1])
+ axis_name = scope.get_unique_variable_name("axis")
+ container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [1])
container.add_node(
- 'ReduceSum', [log_proba_name, axis_name], reduced_proba_name,
- name=scope.get_unique_operator_name('ReduceSum'))
- apply_reshape(scope, reduced_proba_name,
- reshaped_result_name, container,
- desired_shape=(-1, 1))
- apply_mul(scope, [reshaped_result_name, inverted_n_classes_name],
- prod_result_name, container, broadcast=1)
- apply_sub(scope, [log_proba_name, prod_result_name],
- sub_result_name, container, broadcast=1)
- apply_mul(scope, [sub_result_name, n_classes_minus_one_name],
- samme_proba_name, container, broadcast=1)
+ "ReduceSum",
+ [log_proba_name, axis_name],
+ reduced_proba_name,
+ name=scope.get_unique_operator_name("ReduceSum"),
+ )
+ apply_reshape(
+ scope,
+ reduced_proba_name,
+ reshaped_result_name,
+ container,
+ desired_shape=(-1, 1),
+ )
+ apply_mul(
+ scope,
+ [reshaped_result_name, inverted_n_classes_name],
+ prod_result_name,
+ container,
+ broadcast=1,
+ )
+ apply_sub(
+ scope,
+ [log_proba_name, prod_result_name],
+ sub_result_name,
+ container,
+ broadcast=1,
+ )
+ apply_mul(
+ scope,
+ [sub_result_name, n_classes_minus_one_name],
+ samme_proba_name,
+ container,
+ broadcast=1,
+ )
return samme_proba_name
-def _normalise_probability(scope, container, operator, proba_names_list,
- model):
- est_weights_sum_name = scope.get_unique_variable_name('est_weights_sum')
- summation_prob_name = scope.get_unique_variable_name('summation_prob')
- div_result_name = scope.get_unique_variable_name('div_result')
- exp_operand_name = scope.get_unique_variable_name('exp_operand')
- exp_result_name = scope.get_unique_variable_name('exp_result')
- reduced_exp_result_name = scope.get_unique_variable_name(
- 'reduced_exp_result')
- normaliser_name = scope.get_unique_variable_name('normaliser')
- zero_scalar_name = scope.get_unique_variable_name('zero_scalar')
- comparison_result_name = scope.get_unique_variable_name(
- 'comparison_result')
- cast_output_name = scope.get_unique_variable_name('cast_output')
+def _normalise_probability(scope, container, operator, proba_names_list, model):
+ est_weights_sum_name = scope.get_unique_variable_name("est_weights_sum")
+ summation_prob_name = scope.get_unique_variable_name("summation_prob")
+ div_result_name = scope.get_unique_variable_name("div_result")
+ exp_operand_name = scope.get_unique_variable_name("exp_operand")
+ exp_result_name = scope.get_unique_variable_name("exp_result")
+ reduced_exp_result_name = scope.get_unique_variable_name("reduced_exp_result")
+ normaliser_name = scope.get_unique_variable_name("normaliser")
+ zero_scalar_name = scope.get_unique_variable_name("zero_scalar")
+ comparison_result_name = scope.get_unique_variable_name("comparison_result")
+ cast_output_name = scope.get_unique_variable_name("cast_output")
zero_filtered_normaliser_name = scope.get_unique_variable_name(
- 'zero_filtered_normaliser')
- mul_operand_name = scope.get_unique_variable_name('mul_operand')
- cast_normaliser_name = scope.get_unique_variable_name('cast_normaliser')
+ "zero_filtered_normaliser"
+ )
+ mul_operand_name = scope.get_unique_variable_name("mul_operand")
+ cast_normaliser_name = scope.get_unique_variable_name("cast_normaliser")
proto_dtype = guess_proto_type(operator.inputs[0].type)
if proto_dtype != onnx_proto.TensorProto.DOUBLE:
proto_dtype = onnx_proto.TensorProto.FLOAT
container.add_initializer(
- est_weights_sum_name, proto_dtype,
- [], [model.estimator_weights_.sum()])
+ est_weights_sum_name, proto_dtype, [], [model.estimator_weights_.sum()]
+ )
container.add_initializer(
- mul_operand_name, proto_dtype,
- [], [1. / (model.n_classes_ - 1)])
- container.add_initializer(zero_scalar_name,
- onnx_proto.TensorProto.INT32, [], [0])
-
- container.add_node('Sum', proba_names_list,
- summation_prob_name,
- name=scope.get_unique_operator_name('Sum'))
- apply_div(scope, [summation_prob_name, est_weights_sum_name],
- div_result_name, container, broadcast=1)
- apply_mul(scope, [div_result_name, mul_operand_name],
- exp_operand_name, container, broadcast=1)
+ mul_operand_name, proto_dtype, [], [1.0 / (model.n_classes_ - 1)]
+ )
+ container.add_initializer(zero_scalar_name, onnx_proto.TensorProto.INT32, [], [0])
+
+ container.add_node(
+ "Sum",
+ proba_names_list,
+ summation_prob_name,
+ name=scope.get_unique_operator_name("Sum"),
+ )
+ apply_div(
+ scope,
+ [summation_prob_name, est_weights_sum_name],
+ div_result_name,
+ container,
+ broadcast=1,
+ )
+ apply_mul(
+ scope,
+ [div_result_name, mul_operand_name],
+ exp_operand_name,
+ container,
+ broadcast=1,
+ )
apply_exp(scope, exp_operand_name, exp_result_name, container)
if container.target_opset < 13:
container.add_node(
- 'ReduceSum', exp_result_name, reduced_exp_result_name, axes=[1],
- name=scope.get_unique_operator_name('ReduceSum'))
+ "ReduceSum",
+ exp_result_name,
+ reduced_exp_result_name,
+ axes=[1],
+ name=scope.get_unique_operator_name("ReduceSum"),
+ )
else:
- axis_name = scope.get_unique_variable_name('axis')
- container.add_initializer(
- axis_name, onnx_proto.TensorProto.INT64, [1], [1])
+ axis_name = scope.get_unique_variable_name("axis")
+ container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [1])
container.add_node(
- 'ReduceSum', [exp_result_name, axis_name], reduced_exp_result_name,
- name=scope.get_unique_operator_name('ReduceSum'))
- apply_reshape(scope, reduced_exp_result_name,
- normaliser_name, container,
- desired_shape=(-1, 1))
- apply_cast(scope, normaliser_name, cast_normaliser_name,
- container, to=onnx_proto.TensorProto.INT32)
- container.add_node('Equal', [cast_normaliser_name, zero_scalar_name],
- comparison_result_name,
- name=scope.get_unique_operator_name('Equal'))
- apply_cast(scope, comparison_result_name, cast_output_name,
- container, to=proto_dtype)
- apply_add(scope, [normaliser_name, cast_output_name],
- zero_filtered_normaliser_name,
- container, broadcast=0)
- apply_div(scope, [exp_result_name, zero_filtered_normaliser_name],
- operator.outputs[1].full_name, container, broadcast=1)
+ "ReduceSum",
+ [exp_result_name, axis_name],
+ reduced_exp_result_name,
+ name=scope.get_unique_operator_name("ReduceSum"),
+ )
+ apply_reshape(
+ scope,
+ reduced_exp_result_name,
+ normaliser_name,
+ container,
+ desired_shape=(-1, 1),
+ )
+ apply_cast(
+ scope,
+ normaliser_name,
+ cast_normaliser_name,
+ container,
+ to=onnx_proto.TensorProto.INT32,
+ )
+ container.add_node(
+ "Equal",
+ [cast_normaliser_name, zero_scalar_name],
+ comparison_result_name,
+ name=scope.get_unique_operator_name("Equal"),
+ )
+ apply_cast(
+ scope, comparison_result_name, cast_output_name, container, to=proto_dtype
+ )
+ apply_add(
+ scope,
+ [normaliser_name, cast_output_name],
+ zero_filtered_normaliser_name,
+ container,
+ broadcast=0,
+ )
+ apply_div(
+ scope,
+ [exp_result_name, zero_filtered_normaliser_name],
+ operator.outputs[1].full_name,
+ container,
+ broadcast=1,
+ )
return operator.outputs[1].full_name
def _generate_raw_scores(scope, container, operator, proba_names_list, model):
- summation_prob_name = scope.get_unique_variable_name('summation_proba')
- est_weights_sum_name = scope.get_unique_variable_name('est_weights')
+ summation_prob_name = scope.get_unique_variable_name("summation_proba")
+ est_weights_sum_name = scope.get_unique_variable_name("est_weights")
proto_dtype = guess_proto_type(operator.inputs[0].type)
if proto_dtype != onnx_proto.TensorProto.DOUBLE:
proto_dtype = onnx_proto.TensorProto.FLOAT
container.add_initializer(
- est_weights_sum_name, proto_dtype,
- [], [model.estimator_weights_.sum()])
+ est_weights_sum_name, proto_dtype, [], [model.estimator_weights_.sum()]
+ )
container.add_node(
- 'Sum', proba_names_list, summation_prob_name,
- name=scope.get_unique_operator_name('Sum'))
+ "Sum",
+ proba_names_list,
+ summation_prob_name,
+ name=scope.get_unique_operator_name("Sum"),
+ )
if len(model.classes_) == 2:
- div_res_name = scope.get_unique_variable_name('div_res')
- operand_name = scope.get_unique_variable_name('operand')
- neg_name = scope.get_unique_variable_name('neg')
- mul_res_name = scope.get_unique_variable_name('mul_res')
- pos_class_scores_name = scope.get_unique_variable_name(
- 'pos_class_scores')
- neg_class_scores_name = scope.get_unique_variable_name(
- 'neg_class_scores')
- container.add_initializer(
- operand_name, proto_dtype,
- [2], [-1, 1])
- container.add_initializer(
- neg_name, proto_dtype,
- [], [-1])
-
- apply_div(scope, [summation_prob_name, est_weights_sum_name],
- div_res_name, container, broadcast=1)
- apply_mul(scope, [div_res_name, operand_name],
- mul_res_name, container, broadcast=1)
+ div_res_name = scope.get_unique_variable_name("div_res")
+ operand_name = scope.get_unique_variable_name("operand")
+ neg_name = scope.get_unique_variable_name("neg")
+ mul_res_name = scope.get_unique_variable_name("mul_res")
+ pos_class_scores_name = scope.get_unique_variable_name("pos_class_scores")
+ neg_class_scores_name = scope.get_unique_variable_name("neg_class_scores")
+ container.add_initializer(operand_name, proto_dtype, [2], [-1, 1])
+ container.add_initializer(neg_name, proto_dtype, [], [-1])
+
+ apply_div(
+ scope,
+ [summation_prob_name, est_weights_sum_name],
+ div_res_name,
+ container,
+ broadcast=1,
+ )
+ apply_mul(
+ scope, [div_res_name, operand_name], mul_res_name, container, broadcast=1
+ )
if container.target_opset < 13:
container.add_node(
- 'ReduceSum', mul_res_name, pos_class_scores_name, axes=[1],
- name=scope.get_unique_operator_name('ReduceSum'))
+ "ReduceSum",
+ mul_res_name,
+ pos_class_scores_name,
+ axes=[1],
+ name=scope.get_unique_operator_name("ReduceSum"),
+ )
else:
- axis_name = scope.get_unique_variable_name('axis')
- container.add_initializer(
- axis_name, onnx_proto.TensorProto.INT64, [1], [1])
+ axis_name = scope.get_unique_variable_name("axis")
+ container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [1])
container.add_node(
- 'ReduceSum', [mul_res_name, axis_name], pos_class_scores_name,
- name=scope.get_unique_operator_name('ReduceSum'))
- apply_mul(scope, [pos_class_scores_name, neg_name],
- neg_class_scores_name, container, broadcast=1)
+ "ReduceSum",
+ [mul_res_name, axis_name],
+ pos_class_scores_name,
+ name=scope.get_unique_operator_name("ReduceSum"),
+ )
+ apply_mul(
+ scope,
+ [pos_class_scores_name, neg_name],
+ neg_class_scores_name,
+ container,
+ broadcast=1,
+ )
apply_concat(
- scope, [neg_class_scores_name, pos_class_scores_name],
- operator.outputs[1].full_name, container, axis=1)
+ scope,
+ [neg_class_scores_name, pos_class_scores_name],
+ operator.outputs[1].full_name,
+ container,
+ axis=1,
+ )
else:
- apply_div(scope, [summation_prob_name, est_weights_sum_name],
- operator.outputs[1].full_name, container, broadcast=1)
+ apply_div(
+ scope,
+ [summation_prob_name, est_weights_sum_name],
+ operator.outputs[1].full_name,
+ container,
+ broadcast=1,
+ )
return operator.outputs[1].full_name
-def convert_sklearn_ada_boost_classifier(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_ada_boost_classifier(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converter for AdaBoost classifier.
This function goes through the list of estimators and uses
@@ -248,28 +362,31 @@ def convert_sklearn_ada_boost_classifier(scope: Scope, operator: Operator,
the probability score for the final result. Label is
calculated by simply doing an argmax of the probability scores.
"""
- if scope.get_options(operator.raw_operator, dict(nocl=False))['nocl']:
+ if scope.get_options(operator.raw_operator, dict(nocl=False))["nocl"]:
raise RuntimeError(
"Option 'nocl' is not implemented for operator '{}'.".format(
- operator.raw_operator.__class__.__name__))
+ operator.raw_operator.__class__.__name__
+ )
+ )
op = operator.raw_operator
options = container.get_options(op, dict(raw_scores=False))
- use_raw_scores = options['raw_scores']
+ use_raw_scores = options["raw_scores"]
classes = op.classes_
class_type = onnx_proto.TensorProto.STRING
if np.issubdtype(classes.dtype, np.floating):
class_type = onnx_proto.TensorProto.INT32
- classes = classes.astype('int')
+ classes = classes.astype("int")
elif np.issubdtype(classes.dtype, np.signedinteger):
class_type = onnx_proto.TensorProto.INT32
else:
- classes = np.array([s.encode('utf-8') for s in classes])
+ classes = np.array([s.encode("utf-8") for s in classes])
- argmax_output_name = scope.get_unique_variable_name('argmax_output')
+ argmax_output_name = scope.get_unique_variable_name("argmax_output")
array_feature_extractor_result_name = scope.get_unique_variable_name(
- 'array_feature_extractor_result')
+ "array_feature_extractor_result"
+ )
- classes_name = scope.get_unique_variable_name('classes')
+ classes_name = scope.get_unique_variable_name("classes")
container.add_initializer(classes_name, class_type, classes.shape, classes)
proba_names_list = []
@@ -289,9 +406,9 @@ def convert_sklearn_ada_boost_classifier(scope: Scope, operator: Operator,
for i_est, estimator in enumerate(op.estimators_):
label_name = scope.declare_local_variable(
- 'elab_name_%d' % i_est, Int64TensorType())
- proba_name = scope.declare_local_variable(
- 'eprob_name_%d' % i_est, proba_type())
+ "elab_name_%d" % i_est, Int64TensorType()
+ )
+ proba_name = scope.declare_local_variable("eprob_name_%d" % i_est, proba_type())
op_type = sklearn_operator_name_map[type(estimator)]
@@ -300,90 +417,130 @@ def convert_sklearn_ada_boost_classifier(scope: Scope, operator: Operator,
this_operator.outputs.extend([label_name, proba_name])
if add_cast:
- this_operator = scope.declare_local_operator('SklearnCast')
+ this_operator = scope.declare_local_operator("SklearnCast")
this_operator.inputs.append(proba_name)
- var_name = scope.declare_local_variable('cast', FloatTensorType())
+ var_name = scope.declare_local_variable("cast", FloatTensorType())
this_operator.outputs.append(var_name)
proba_name = var_name
- if op.algorithm == 'SAMME.R':
+ if op.algorithm == "SAMME.R":
cur_proba_name = _samme_r_proba(
- scope, container, proba_name.onnx_name, len(classes),
- dtype, proto_dtype)
+ scope, container, proba_name.onnx_name, len(classes), dtype, proto_dtype
+ )
else:
# SAMME
if _scikit_learn_before_022() and not use_raw_scores:
- weight_name = scope.get_unique_variable_name('weight')
- samme_proba_name = scope.get_unique_variable_name(
- 'samme_proba')
+ weight_name = scope.get_unique_variable_name("weight")
+ samme_proba_name = scope.get_unique_variable_name("samme_proba")
container.add_initializer(
- weight_name, onnx_proto.TensorProto.FLOAT,
- [], [op.estimator_weights_[i_est]])
- apply_mul(scope, [proba_name.onnx_name, weight_name],
- samme_proba_name, container, broadcast=1)
+ weight_name,
+ onnx_proto.TensorProto.FLOAT,
+ [],
+ [op.estimator_weights_[i_est]],
+ )
+ apply_mul(
+ scope,
+ [proba_name.onnx_name, weight_name],
+ samme_proba_name,
+ container,
+ broadcast=1,
+ )
cur_proba_name = samme_proba_name
else:
if classes_ind_name is None:
- classes_ind_name = scope.get_unique_variable_name(
- 'classes_ind3')
+ classes_ind_name = scope.get_unique_variable_name("classes_ind3")
container.add_initializer(
- classes_ind_name, onnx_proto.TensorProto.INT64,
- (1, len(classes)), list(range(len(classes))))
+ classes_ind_name,
+ onnx_proto.TensorProto.INT64,
+ (1, len(classes)),
+ list(range(len(classes))),
+ )
if zero_name is None:
- shape_name = scope.get_unique_variable_name('shape')
+ shape_name = scope.get_unique_variable_name("shape")
container.add_node(
- 'Shape', proba_name.onnx_name, shape_name,
- name=scope.get_unique_operator_name('Shape'))
+ "Shape",
+ proba_name.onnx_name,
+ shape_name,
+ name=scope.get_unique_operator_name("Shape"),
+ )
- zero_name = scope.get_unique_variable_name('zero')
+ zero_name = scope.get_unique_variable_name("zero")
container.add_node(
- 'ConstantOfShape', shape_name, zero_name,
- name=scope.get_unique_operator_name('CoSA'),
+ "ConstantOfShape",
+ shape_name,
+ zero_name,
+ name=scope.get_unique_operator_name("CoSA"),
value=make_tensor(
- "value", onnx_proto.TensorProto.FLOAT,
- (1, ), [0]))
+ "value", onnx_proto.TensorProto.FLOAT, (1,), [0]
+ ),
+ )
- one_name = scope.get_unique_variable_name('one')
+ one_name = scope.get_unique_variable_name("one")
container.add_node(
- 'ConstantOfShape', shape_name, one_name,
- name=scope.get_unique_operator_name('CoSB'),
+ "ConstantOfShape",
+ shape_name,
+ one_name,
+ name=scope.get_unique_operator_name("CoSB"),
value=make_tensor(
- "value", onnx_proto.TensorProto.FLOAT,
- (1, ), [1.]))
+ "value", onnx_proto.TensorProto.FLOAT, (1,), [1.0]
+ ),
+ )
cur_proba_name = _samme_proba(
- scope, container, proba_name.onnx_name,
- op.estimator_weights_[i_est], zero_name,
- classes_ind_name, one_name)
+ scope,
+ container,
+ proba_name.onnx_name,
+ op.estimator_weights_[i_est],
+ zero_name,
+ classes_ind_name,
+ one_name,
+ )
proba_names_list.append(cur_proba_name)
- function = (_generate_raw_scores if use_raw_scores
- else _normalise_probability)
- class_prob_name = function(scope, container, operator,
- proba_names_list, op)
- container.add_node('ArgMax', class_prob_name,
- argmax_output_name,
- name=scope.get_unique_operator_name('ArgMax'), axis=1)
+ function = _generate_raw_scores if use_raw_scores else _normalise_probability
+ class_prob_name = function(scope, container, operator, proba_names_list, op)
container.add_node(
- 'ArrayFeatureExtractor', [classes_name, argmax_output_name],
- array_feature_extractor_result_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
+ "ArgMax",
+ class_prob_name,
+ argmax_output_name,
+ name=scope.get_unique_operator_name("ArgMax"),
+ axis=1,
+ )
+ container.add_node(
+ "ArrayFeatureExtractor",
+ [classes_name, argmax_output_name],
+ array_feature_extractor_result_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
if class_type == onnx_proto.TensorProto.INT32:
- reshaped_result_name = scope.get_unique_variable_name(
- 'reshaped_result')
-
- apply_reshape(scope, array_feature_extractor_result_name,
- reshaped_result_name, container,
- desired_shape=(-1,))
- apply_cast(scope, reshaped_result_name, operator.outputs[0].full_name,
- container, to=onnx_proto.TensorProto.INT64)
+ reshaped_result_name = scope.get_unique_variable_name("reshaped_result")
+
+ apply_reshape(
+ scope,
+ array_feature_extractor_result_name,
+ reshaped_result_name,
+ container,
+ desired_shape=(-1,),
+ )
+ apply_cast(
+ scope,
+ reshaped_result_name,
+ operator.outputs[0].full_name,
+ container,
+ to=onnx_proto.TensorProto.INT64,
+ )
else:
- apply_reshape(scope, array_feature_extractor_result_name,
- operator.outputs[0].full_name, container,
- desired_shape=(-1,))
+ apply_reshape(
+ scope,
+ array_feature_extractor_result_name,
+ operator.outputs[0].full_name,
+ container,
+ desired_shape=(-1,),
+ )
def _get_estimators_label(scope, operator, container, model):
@@ -395,14 +552,14 @@ def _get_estimators_label(scope, operator, container, model):
var_type = DoubleTensorType
else:
var_type = FloatTensorType
- concatenated_labels_name = scope.get_unique_variable_name(
- 'concatenated_labels')
+ concatenated_labels_name = scope.get_unique_variable_name("concatenated_labels")
input_name = operator.inputs
estimators_results_list = []
for i, estimator in enumerate(model.estimators_):
estimator_label_name = scope.declare_local_variable(
- 'est_label_%d' % i, var_type([None, 1]))
+ "est_label_%d" % i, var_type([None, 1])
+ )
op_type = sklearn_operator_name_map[type(estimator)]
@@ -412,104 +569,139 @@ def _get_estimators_label(scope, operator, container, model):
estimators_results_list.append(estimator_label_name.onnx_name)
- apply_concat(scope, estimators_results_list, concatenated_labels_name,
- container, axis=1)
+ apply_concat(
+ scope, estimators_results_list, concatenated_labels_name, container, axis=1
+ )
return concatenated_labels_name
def cum_sum(scope, container, rnn_input_name, sequence_length, proto_dtype):
opv = container.target_opset
- weights_cdf_name = scope.get_unique_variable_name('weights_cdf')
+ weights_cdf_name = scope.get_unique_variable_name("weights_cdf")
if opv < 11:
- transposed_input_name = scope.get_unique_variable_name(
- 'transposed_input')
- reshaped_result_name = scope.get_unique_variable_name(
- 'reshaped_result')
- weights_name = scope.get_unique_variable_name('weights')
- rec_weights_name = scope.get_unique_variable_name('rec_weights')
- rnn_output_name = scope.get_unique_variable_name('rnn_output')
- permuted_rnn_y_name = scope.get_unique_variable_name('permuted_rnn_y')
-
- container.add_initializer(weights_name,
- proto_dtype, [1, 1, 1], [1])
- container.add_initializer(rec_weights_name,
- proto_dtype, [1, 1, 1], [1])
-
- apply_transpose(scope, rnn_input_name, transposed_input_name,
- container, perm=(1, 0))
- apply_reshape(scope, transposed_input_name, reshaped_result_name,
- container, desired_shape=(sequence_length, -1, 1))
+ transposed_input_name = scope.get_unique_variable_name("transposed_input")
+ reshaped_result_name = scope.get_unique_variable_name("reshaped_result")
+ weights_name = scope.get_unique_variable_name("weights")
+ rec_weights_name = scope.get_unique_variable_name("rec_weights")
+ rnn_output_name = scope.get_unique_variable_name("rnn_output")
+ permuted_rnn_y_name = scope.get_unique_variable_name("permuted_rnn_y")
+
+ container.add_initializer(weights_name, proto_dtype, [1, 1, 1], [1])
+ container.add_initializer(rec_weights_name, proto_dtype, [1, 1, 1], [1])
+
+ apply_transpose(
+ scope, rnn_input_name, transposed_input_name, container, perm=(1, 0)
+ )
+ apply_reshape(
+ scope,
+ transposed_input_name,
+ reshaped_result_name,
+ container,
+ desired_shape=(sequence_length, -1, 1),
+ )
container.add_node(
- 'RNN', inputs=[reshaped_result_name,
- weights_name, rec_weights_name],
- outputs=[rnn_output_name], activations=['Affine'],
- name=scope.get_unique_operator_name('RNN'),
- activation_alpha=[1.0], activation_beta=[0.0], hidden_size=1)
- apply_transpose(scope, rnn_output_name, permuted_rnn_y_name, container,
- perm=(2, 0, 1, 3))
+ "RNN",
+ inputs=[reshaped_result_name, weights_name, rec_weights_name],
+ outputs=[rnn_output_name],
+ activations=["Affine"],
+ name=scope.get_unique_operator_name("RNN"),
+ activation_alpha=[1.0],
+ activation_beta=[0.0],
+ hidden_size=1,
+ )
+ apply_transpose(
+ scope, rnn_output_name, permuted_rnn_y_name, container, perm=(2, 0, 1, 3)
+ )
apply_reshape(
- scope, permuted_rnn_y_name, weights_cdf_name, container,
- desired_shape=(-1, sequence_length))
+ scope,
+ permuted_rnn_y_name,
+ weights_cdf_name,
+ container,
+ desired_shape=(-1, sequence_length),
+ )
else:
- axis_name = scope.get_unique_variable_name('axis_name')
- container.add_initializer(axis_name, onnx_proto.TensorProto.INT32,
- [], [1])
+ axis_name = scope.get_unique_variable_name("axis_name")
+ container.add_initializer(axis_name, onnx_proto.TensorProto.INT32, [], [1])
container.add_node(
- 'CumSum', [rnn_input_name, axis_name], [weights_cdf_name],
- name=scope.get_unique_operator_name('CumSum'),
- op_version=11)
+ "CumSum",
+ [rnn_input_name, axis_name],
+ [weights_cdf_name],
+ name=scope.get_unique_operator_name("CumSum"),
+ op_version=11,
+ )
return weights_cdf_name
-def _apply_gather_elements(scope, container, inputs, output, axis,
- dim, zero_type, suffix):
+def _apply_gather_elements(
+ scope, container, inputs, output, axis, dim, zero_type, suffix
+):
if container.target_opset >= 11:
container.add_node(
- 'GatherElements', inputs, output, op_version=11, axis=axis,
- name=scope.get_unique_operator_name('GatEls' + suffix))
+ "GatherElements",
+ inputs,
+ output,
+ op_version=11,
+ axis=axis,
+ name=scope.get_unique_operator_name("GatEls" + suffix),
+ )
else:
- classes_ind_name = scope.get_unique_variable_name('classes_ind2')
+ classes_ind_name = scope.get_unique_variable_name("classes_ind2")
container.add_initializer(
- classes_ind_name, onnx_proto.TensorProto.INT64,
- (1, dim), list(range(dim)))
+ classes_ind_name, onnx_proto.TensorProto.INT64, (1, dim), list(range(dim))
+ )
- shape_name = scope.get_unique_variable_name('shape')
+ shape_name = scope.get_unique_variable_name("shape")
+ container.add_node(
+ "Shape", inputs[0], shape_name, name=scope.get_unique_operator_name("Shape")
+ )
+ zero_name = scope.get_unique_variable_name("zero")
+ zero_val = 0 if zero_type == onnx_proto.TensorProto.INT64 else 0.0
container.add_node(
- 'Shape', inputs[0], shape_name,
- name=scope.get_unique_operator_name('Shape'))
- zero_name = scope.get_unique_variable_name('zero')
- zero_val = (0 if zero_type == onnx_proto.TensorProto.INT64
- else 0.)
+ "ConstantOfShape",
+ shape_name,
+ zero_name,
+ name=scope.get_unique_operator_name("CoSA"),
+ value=make_tensor("value", zero_type, (1,), [zero_val]),
+ op_version=9,
+ )
+
+ equal_name = scope.get_unique_variable_name("equal")
+ container.add_node(
+ "Equal",
+ [inputs[1], classes_ind_name],
+ equal_name,
+ name=scope.get_unique_operator_name("Equal"),
+ )
+
+ selected = scope.get_unique_variable_name("selected")
container.add_node(
- 'ConstantOfShape', shape_name, zero_name,
- name=scope.get_unique_operator_name('CoSA'),
- value=make_tensor("value", zero_type,
- (1, ), [zero_val]), op_version=9)
-
- equal_name = scope.get_unique_variable_name('equal')
- container.add_node('Equal', [inputs[1], classes_ind_name],
- equal_name,
- name=scope.get_unique_operator_name('Equal'))
-
- selected = scope.get_unique_variable_name('selected')
- container.add_node('Where', [equal_name, inputs[0], zero_name],
- selected,
- name=scope.get_unique_operator_name('Where'))
+ "Where",
+ [equal_name, inputs[0], zero_name],
+ selected,
+ name=scope.get_unique_operator_name("Where"),
+ )
if container.target_opset < 13:
container.add_node(
- 'ReduceSum', selected, output, axes=[1],
- name=scope.get_unique_operator_name('ReduceSum'))
+ "ReduceSum",
+ selected,
+ output,
+ axes=[1],
+ name=scope.get_unique_operator_name("ReduceSum"),
+ )
else:
- axis_name = scope.get_unique_variable_name('axis')
- container.add_initializer(
- axis_name, onnx_proto.TensorProto.INT64, [1], [1])
+ axis_name = scope.get_unique_variable_name("axis")
+ container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [1])
container.add_node(
- 'ReduceSum', [selected, axis_name], output,
- name=scope.get_unique_operator_name('ReduceSum'))
+ "ReduceSum",
+ [selected, axis_name],
+ output,
+ name=scope.get_unique_operator_name("ReduceSum"),
+ )
-def convert_sklearn_ada_boost_regressor(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_ada_boost_regressor(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converter for AdaBoost regressor.
This function first calls _get_estimators_label() which returns a
@@ -527,91 +719,137 @@ def convert_sklearn_ada_boost_regressor(scope: Scope, operator: Operator,
op = operator.raw_operator
- negate_name = scope.get_unique_variable_name('negate')
- estimators_weights_name = scope.get_unique_variable_name(
- 'estimators_weights')
- half_scalar_name = scope.get_unique_variable_name('half_scalar')
- last_index_name = scope.get_unique_variable_name('last_index')
- negated_labels_name = scope.get_unique_variable_name('negated_labels')
- sorted_values_name = scope.get_unique_variable_name('sorted_values')
- sorted_indices_name = scope.get_unique_variable_name('sorted_indices')
+ negate_name = scope.get_unique_variable_name("negate")
+ estimators_weights_name = scope.get_unique_variable_name("estimators_weights")
+ half_scalar_name = scope.get_unique_variable_name("half_scalar")
+ last_index_name = scope.get_unique_variable_name("last_index")
+ negated_labels_name = scope.get_unique_variable_name("negated_labels")
+ sorted_values_name = scope.get_unique_variable_name("sorted_values")
+ sorted_indices_name = scope.get_unique_variable_name("sorted_indices")
array_feat_extractor_output_name = scope.get_unique_variable_name(
- 'array_feat_extractor_output')
- median_value_name = scope.get_unique_variable_name('median_value')
- comp_value_name = scope.get_unique_variable_name('comp_value')
- median_or_above_name = scope.get_unique_variable_name('median_or_above')
- median_idx_name = scope.get_unique_variable_name('median_idx')
- cast_result_name = scope.get_unique_variable_name('cast_result')
- reshaped_weights_name = scope.get_unique_variable_name('reshaped_weights')
- median_estimators_name = scope.get_unique_variable_name(
- 'median_estimators')
-
- container.add_initializer(negate_name, proto_dtype,
- [], [-1])
- container.add_initializer(estimators_weights_name,
- proto_dtype,
- [len(op.estimator_weights_)],
- op.estimator_weights_)
- container.add_initializer(half_scalar_name, proto_dtype,
- [], [0.5])
- container.add_initializer(last_index_name, onnx_proto.TensorProto.INT64,
- [], [len(op.estimators_) - 1])
-
- concatenated_labels = _get_estimators_label(scope, operator,
- container, op)
- apply_mul(scope, [concatenated_labels, negate_name],
- negated_labels_name, container, broadcast=1)
+ "array_feat_extractor_output"
+ )
+ median_value_name = scope.get_unique_variable_name("median_value")
+ comp_value_name = scope.get_unique_variable_name("comp_value")
+ median_or_above_name = scope.get_unique_variable_name("median_or_above")
+ median_idx_name = scope.get_unique_variable_name("median_idx")
+ cast_result_name = scope.get_unique_variable_name("cast_result")
+ reshaped_weights_name = scope.get_unique_variable_name("reshaped_weights")
+ median_estimators_name = scope.get_unique_variable_name("median_estimators")
+
+ container.add_initializer(negate_name, proto_dtype, [], [-1])
+ container.add_initializer(
+ estimators_weights_name,
+ proto_dtype,
+ [len(op.estimator_weights_)],
+ op.estimator_weights_,
+ )
+ container.add_initializer(half_scalar_name, proto_dtype, [], [0.5])
+ container.add_initializer(
+ last_index_name, onnx_proto.TensorProto.INT64, [], [len(op.estimators_) - 1]
+ )
+
+ concatenated_labels = _get_estimators_label(scope, operator, container, op)
+ apply_mul(
+ scope,
+ [concatenated_labels, negate_name],
+ negated_labels_name,
+ container,
+ broadcast=1,
+ )
try:
- apply_topk(scope, negated_labels_name,
- [sorted_values_name, sorted_indices_name],
- container, k=len(op.estimators_))
+ apply_topk(
+ scope,
+ negated_labels_name,
+ [sorted_values_name, sorted_indices_name],
+ container,
+ k=len(op.estimators_),
+ )
except TypeError:
# onnxconverter-common < 1.7.0
- apply_topk(scope, [negated_labels_name],
- [sorted_values_name, sorted_indices_name],
- container, k=len(op.estimators_))
+ apply_topk(
+ scope,
+ [negated_labels_name],
+ [sorted_values_name, sorted_indices_name],
+ container,
+ k=len(op.estimators_),
+ )
container.add_node(
- 'ArrayFeatureExtractor',
+ "ArrayFeatureExtractor",
[estimators_weights_name, sorted_indices_name],
- array_feat_extractor_output_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
+ array_feat_extractor_output_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
apply_reshape(
- scope, array_feat_extractor_output_name, reshaped_weights_name,
- container, desired_shape=(-1, len(op.estimators_)))
+ scope,
+ array_feat_extractor_output_name,
+ reshaped_weights_name,
+ container,
+ desired_shape=(-1, len(op.estimators_)),
+ )
weights_cdf_name = cum_sum(
- scope, container, reshaped_weights_name,
- len(op.estimators_), proto_dtype)
+ scope, container, reshaped_weights_name, len(op.estimators_), proto_dtype
+ )
container.add_node(
- 'ArrayFeatureExtractor', [weights_cdf_name, last_index_name],
- median_value_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
- apply_mul(scope, [median_value_name, half_scalar_name],
- comp_value_name, container, broadcast=1)
+ "ArrayFeatureExtractor",
+ [weights_cdf_name, last_index_name],
+ median_value_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
+ apply_mul(
+ scope,
+ [median_value_name, half_scalar_name],
+ comp_value_name,
+ container,
+ broadcast=1,
+ )
container.add_node(
- 'Less', [weights_cdf_name, comp_value_name],
+ "Less",
+ [weights_cdf_name, comp_value_name],
median_or_above_name,
- name=scope.get_unique_operator_name('Less'))
- apply_cast(scope, median_or_above_name, cast_result_name,
- container, to=proto_dtype)
- container.add_node('ArgMin', cast_result_name,
- median_idx_name,
- name=scope.get_unique_operator_name('ArgMin'), axis=1)
+ name=scope.get_unique_operator_name("Less"),
+ )
+ apply_cast(scope, median_or_above_name, cast_result_name, container, to=proto_dtype)
+ container.add_node(
+ "ArgMin",
+ cast_result_name,
+ median_idx_name,
+ name=scope.get_unique_operator_name("ArgMin"),
+ axis=1,
+ )
_apply_gather_elements(
- scope, container, [sorted_indices_name, median_idx_name],
- median_estimators_name, axis=1, dim=len(op.estimators_),
- zero_type=onnx_proto.TensorProto.INT64, suffix="A")
+ scope,
+ container,
+ [sorted_indices_name, median_idx_name],
+ median_estimators_name,
+ axis=1,
+ dim=len(op.estimators_),
+ zero_type=onnx_proto.TensorProto.INT64,
+ suffix="A",
+ )
output_name = operator.output_full_names[0]
_apply_gather_elements(
- scope, container, [concatenated_labels, median_estimators_name],
- output_name, axis=1, dim=len(op.estimators_),
- zero_type=proto_dtype, suffix="B")
-
-
-register_converter('SklearnAdaBoostClassifier',
- convert_sklearn_ada_boost_classifier,
- options={'zipmap': [True, False, 'columns'],
- 'nocl': [True, False],
- 'output_class_labels': [False, True],
- 'raw_scores': [True, False]})
-register_converter('SklearnAdaBoostRegressor',
- convert_sklearn_ada_boost_regressor)
+ scope,
+ container,
+ [concatenated_labels, median_estimators_name],
+ output_name,
+ axis=1,
+ dim=len(op.estimators_),
+ zero_type=proto_dtype,
+ suffix="B",
+ )
+
+
+register_converter(
+ "SklearnAdaBoostClassifier",
+ convert_sklearn_ada_boost_classifier,
+ options={
+ "zipmap": [True, False, "columns"],
+ "nocl": [True, False],
+ "output_class_labels": [False, True],
+ "raw_scores": [True, False],
+ },
+)
+register_converter("SklearnAdaBoostRegressor", convert_sklearn_ada_boost_regressor)
diff --git a/skl2onnx/operator_converters/array_feature_extractor.py b/skl2onnx/operator_converters/array_feature_extractor.py
index e26fc86bf..99b7639e6 100644
--- a/skl2onnx/operator_converters/array_feature_extractor.py
+++ b/skl2onnx/operator_converters/array_feature_extractor.py
@@ -8,32 +8,41 @@
def convert_sklearn_array_feature_extractor(
- scope: Scope, operator: Operator, container: ModelComponentContainer):
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Extracts a subset of columns. This is used by *ColumnTransformer*.
"""
- column_indices_name = scope.get_unique_variable_name('column_indices')
+ column_indices_name = scope.get_unique_variable_name("column_indices")
for i, ind in enumerate(operator.column_indices):
if not isinstance(ind, int):
- raise RuntimeError(("Column {0}:'{1}' indices must be specified "
- "as integers. This error may happen when "
- "column names are used to define a "
- "ColumnTransformer. Column name in input data "
- "do not necessarily match input variables "
- "defined for the ONNX model.").format(i, ind))
- container.add_initializer(column_indices_name,
- onnx_proto.TensorProto.INT64,
- [len(operator.column_indices)],
- operator.column_indices)
+ raise RuntimeError(
+ (
+ "Column {0}:'{1}' indices must be specified "
+ "as integers. This error may happen when "
+ "column names are used to define a "
+ "ColumnTransformer. Column name in input data "
+ "do not necessarily match input variables "
+ "defined for the ONNX model."
+ ).format(i, ind)
+ )
+ container.add_initializer(
+ column_indices_name,
+ onnx_proto.TensorProto.INT64,
+ [len(operator.column_indices)],
+ operator.column_indices,
+ )
container.add_node(
- 'ArrayFeatureExtractor',
+ "ArrayFeatureExtractor",
[operator.inputs[0].full_name, column_indices_name],
operator.outputs[0].full_name,
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'),
- op_domain='ai.onnx.ml')
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ op_domain="ai.onnx.ml",
+ )
-register_converter('SklearnArrayFeatureExtractor',
- convert_sklearn_array_feature_extractor)
+register_converter(
+ "SklearnArrayFeatureExtractor", convert_sklearn_array_feature_extractor
+)
diff --git a/skl2onnx/operator_converters/bagging.py b/skl2onnx/operator_converters/bagging.py
index afdd5f973..2146ae972 100644
--- a/skl2onnx/operator_converters/bagging.py
+++ b/skl2onnx/operator_converters/bagging.py
@@ -4,9 +4,7 @@
import numpy as np
from .._supported_operators import sklearn_operator_name_map
from ..common.data_types import Int64TensorType
-from ..common._apply_operation import (
- apply_cast, apply_concat,
- apply_div, apply_reshape)
+from ..common._apply_operation import apply_cast, apply_concat, apply_div, apply_reshape
from ..common._registration import register_converter
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
@@ -21,169 +19,238 @@ def _calculate_proba(scope, operator, container, model):
final_proba_name = operator.outputs[1].full_name
proba_list = []
options = container.get_options(model, dict(raw_scores=False))
- use_raw_scores = options['raw_scores']
- has_proba = (hasattr(model.estimators_[0], 'predict_proba')
- or (use_raw_scores and hasattr(
- model.estimators_[0], 'decision_function')))
+ use_raw_scores = options["raw_scores"]
+ has_proba = hasattr(model.estimators_[0], "predict_proba") or (
+ use_raw_scores and hasattr(model.estimators_[0], "decision_function")
+ )
for index, estimator in enumerate(model.estimators_):
op_type = sklearn_operator_name_map[type(estimator)]
this_operator = scope.declare_local_operator(op_type, estimator)
- if container.has_options(estimator, 'raw_scores'):
- container.add_options(
- id(estimator), {'raw_scores': use_raw_scores})
- scope.add_options(id(estimator), {'raw_scores': use_raw_scores})
+ if container.has_options(estimator, "raw_scores"):
+ container.add_options(id(estimator), {"raw_scores": use_raw_scores})
+ scope.add_options(id(estimator), {"raw_scores": use_raw_scores})
- label_name = scope.declare_local_variable(
- 'label_%d' % index, Int64TensorType())
+ label_name = scope.declare_local_variable("label_%d" % index, Int64TensorType())
proba_name = scope.declare_local_variable(
- 'proba_%d' % index, operator.inputs[0].type.__class__())
+ "proba_%d" % index, operator.inputs[0].type.__class__()
+ )
features = model.estimators_features_[index]
- n_features = (model.n_features_in_ if hasattr(model, 'n_features_in_')
- else model.n_features_)
- if (len(features) == n_features and
- list(features) == list(range(n_features))):
+ n_features = (
+ model.n_features_in_
+ if hasattr(model, "n_features_in_")
+ else model.n_features_
+ )
+ if len(features) == n_features and list(features) == list(range(n_features)):
this_operator.inputs = operator.inputs
else:
# subset of features
feat_name = scope.declare_local_variable(
- 'fsel_%d' % index, operator.inputs[0].type.__class__())
- index_name = scope.get_unique_variable_name(
- 'index_name_%d' % index)
+ "fsel_%d" % index, operator.inputs[0].type.__class__()
+ )
+ index_name = scope.get_unique_variable_name("index_name_%d" % index)
container.add_initializer(
- index_name, onnx_proto.TensorProto.INT64,
- (len(features), ), list(features))
+ index_name,
+ onnx_proto.TensorProto.INT64,
+ (len(features),),
+ list(features),
+ )
container.add_node(
- 'Gather', [operator.inputs[0].full_name, index_name],
+ "Gather",
+ [operator.inputs[0].full_name, index_name],
[feat_name.full_name],
- name=scope.get_unique_operator_name('GatherBG'), axis=1)
+ name=scope.get_unique_operator_name("GatherBG"),
+ axis=1,
+ )
this_operator.inputs.append(feat_name)
this_operator.outputs.append(label_name)
this_operator.outputs.append(proba_name)
- proba_output_name = (proba_name.onnx_name if has_proba
- else label_name.onnx_name)
+ proba_output_name = proba_name.onnx_name if has_proba else label_name.onnx_name
reshape_dim_val = len(model.classes_) if has_proba else 1
- reshaped_proba_name = scope.get_unique_variable_name('reshaped_proba')
- apply_reshape(scope, proba_output_name, reshaped_proba_name,
- container, desired_shape=(1, -1, reshape_dim_val))
+ reshaped_proba_name = scope.get_unique_variable_name("reshaped_proba")
+ apply_reshape(
+ scope,
+ proba_output_name,
+ reshaped_proba_name,
+ container,
+ desired_shape=(1, -1, reshape_dim_val),
+ )
proba_list.append(reshaped_proba_name)
- merged_proba_name = scope.get_unique_variable_name('merged_proba')
- apply_concat(scope, proba_list,
- merged_proba_name, container, axis=0)
+ merged_proba_name = scope.get_unique_variable_name("merged_proba")
+ apply_concat(scope, proba_list, merged_proba_name, container, axis=0)
if has_proba:
if container.target_opset >= 18:
- axis_name = scope.get_unique_variable_name('axis')
- container.add_initializer(
- axis_name, onnx_proto.TensorProto.INT64, [1], [0])
+ axis_name = scope.get_unique_variable_name("axis")
+ container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [0])
container.add_node(
- 'ReduceMean', [merged_proba_name, axis_name],
+ "ReduceMean",
+ [merged_proba_name, axis_name],
final_proba_name,
- name=scope.get_unique_operator_name('ReduceMean'),
- keepdims=0)
+ name=scope.get_unique_operator_name("ReduceMean"),
+ keepdims=0,
+ )
else:
container.add_node(
- 'ReduceMean', merged_proba_name,
+ "ReduceMean",
+ merged_proba_name,
final_proba_name,
- name=scope.get_unique_operator_name('ReduceMean'),
- axes=[0], keepdims=0)
+ name=scope.get_unique_operator_name("ReduceMean"),
+ axes=[0],
+ keepdims=0,
+ )
else:
- n_estimators_name = scope.get_unique_variable_name('n_estimators')
- class_labels_name = scope.get_unique_variable_name('class_labels')
- equal_result_name = scope.get_unique_variable_name('equal_result')
- cast_output_name = scope.get_unique_variable_name('cast_output')
- reduced_proba_name = scope.get_unique_variable_name('reduced_proba')
+ n_estimators_name = scope.get_unique_variable_name("n_estimators")
+ class_labels_name = scope.get_unique_variable_name("class_labels")
+ equal_result_name = scope.get_unique_variable_name("equal_result")
+ cast_output_name = scope.get_unique_variable_name("cast_output")
+ reduced_proba_name = scope.get_unique_variable_name("reduced_proba")
container.add_initializer(
- n_estimators_name, onnx_proto.TensorProto.FLOAT, [],
- [len(model.estimators_)])
+ n_estimators_name,
+ onnx_proto.TensorProto.FLOAT,
+ [],
+ [len(model.estimators_)],
+ )
container.add_initializer(
- class_labels_name, onnx_proto.TensorProto.INT64,
+ class_labels_name,
+ onnx_proto.TensorProto.INT64,
[1, 1, len(model.estimators_[0].classes_)],
- model.estimators_[0].classes_)
+ model.estimators_[0].classes_,
+ )
- container.add_node('Equal', [class_labels_name, merged_proba_name],
- equal_result_name,
- name=scope.get_unique_operator_name('Equal'))
- apply_cast(scope, equal_result_name, cast_output_name,
- container, to=onnx_proto.TensorProto.FLOAT)
+ container.add_node(
+ "Equal",
+ [class_labels_name, merged_proba_name],
+ equal_result_name,
+ name=scope.get_unique_operator_name("Equal"),
+ )
+ apply_cast(
+ scope,
+ equal_result_name,
+ cast_output_name,
+ container,
+ to=onnx_proto.TensorProto.FLOAT,
+ )
if container.target_opset < 13:
container.add_node(
- 'ReduceSum', cast_output_name, reduced_proba_name,
- name=scope.get_unique_operator_name('ReduceSum'),
- axes=[0], keepdims=0)
+ "ReduceSum",
+ cast_output_name,
+ reduced_proba_name,
+ name=scope.get_unique_operator_name("ReduceSum"),
+ axes=[0],
+ keepdims=0,
+ )
else:
- axis_name = scope.get_unique_variable_name('axis')
- container.add_initializer(
- axis_name, onnx_proto.TensorProto.INT64, [1], [1])
+ axis_name = scope.get_unique_variable_name("axis")
+ container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [1])
container.add_node(
- 'ReduceSum', [cast_output_name, axis_name],
- reduced_proba_name, keepdims=0,
- name=scope.get_unique_operator_name('ReduceSum'))
- apply_div(scope, [reduced_proba_name, n_estimators_name],
- final_proba_name, container, broadcast=1)
+ "ReduceSum",
+ [cast_output_name, axis_name],
+ reduced_proba_name,
+ keepdims=0,
+ name=scope.get_unique_operator_name("ReduceSum"),
+ )
+ apply_div(
+ scope,
+ [reduced_proba_name, n_estimators_name],
+ final_proba_name,
+ container,
+ broadcast=1,
+ )
return final_proba_name
-def convert_sklearn_bagging_classifier(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_bagging_classifier(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converter for BaggingClassifier.
"""
- if scope.get_options(operator.raw_operator, dict(nocl=False))['nocl']:
+ if scope.get_options(operator.raw_operator, dict(nocl=False))["nocl"]:
raise RuntimeError(
"Option 'nocl' is not implemented for operator '{}'.".format(
- operator.raw_operator.__class__.__name__))
+ operator.raw_operator.__class__.__name__
+ )
+ )
bagging_op = operator.raw_operator
classes = bagging_op.classes_
output_shape = (-1,)
- classes_name = scope.get_unique_variable_name('classes')
- argmax_output_name = scope.get_unique_variable_name('argmax_output')
+ classes_name = scope.get_unique_variable_name("classes")
+ argmax_output_name = scope.get_unique_variable_name("argmax_output")
array_feature_extractor_result_name = scope.get_unique_variable_name(
- 'array_feature_extractor_result')
+ "array_feature_extractor_result"
+ )
class_type = onnx_proto.TensorProto.STRING
- if (np.issubdtype(bagging_op.classes_.dtype, np.floating) or
- bagging_op.classes_.dtype == np.bool_):
+ if (
+ np.issubdtype(bagging_op.classes_.dtype, np.floating)
+ or bagging_op.classes_.dtype == np.bool_
+ ):
class_type = onnx_proto.TensorProto.INT32
classes = classes.astype(np.int32)
elif np.issubdtype(bagging_op.classes_.dtype, np.signedinteger):
class_type = onnx_proto.TensorProto.INT32
else:
- classes = np.array([s.encode('utf-8') for s in classes])
+ classes = np.array([s.encode("utf-8") for s in classes])
container.add_initializer(classes_name, class_type, classes.shape, classes)
proba_name = _calculate_proba(scope, operator, container, bagging_op)
container.add_node(
- 'ArgMax', proba_name, argmax_output_name,
- name=scope.get_unique_operator_name('ArgMax'), axis=1)
+ "ArgMax",
+ proba_name,
+ argmax_output_name,
+ name=scope.get_unique_operator_name("ArgMax"),
+ axis=1,
+ )
container.add_node(
- 'ArrayFeatureExtractor', [classes_name, argmax_output_name],
- array_feature_extractor_result_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
+ "ArrayFeatureExtractor",
+ [classes_name, argmax_output_name],
+ array_feature_extractor_result_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
if class_type == onnx_proto.TensorProto.INT32:
- cast_result_name = scope.get_unique_variable_name('cast_result')
- reshaped_result_name = scope.get_unique_variable_name(
- 'reshaped_result')
- apply_cast(scope, array_feature_extractor_result_name,
- cast_result_name, container,
- to=onnx_proto.TensorProto.INT64)
- apply_reshape(scope, cast_result_name, reshaped_result_name,
- container, desired_shape=output_shape)
- apply_cast(scope, reshaped_result_name, operator.outputs[0].full_name,
- container, to=onnx_proto.TensorProto.INT64)
+ cast_result_name = scope.get_unique_variable_name("cast_result")
+ reshaped_result_name = scope.get_unique_variable_name("reshaped_result")
+ apply_cast(
+ scope,
+ array_feature_extractor_result_name,
+ cast_result_name,
+ container,
+ to=onnx_proto.TensorProto.INT64,
+ )
+ apply_reshape(
+ scope,
+ cast_result_name,
+ reshaped_result_name,
+ container,
+ desired_shape=output_shape,
+ )
+ apply_cast(
+ scope,
+ reshaped_result_name,
+ operator.outputs[0].full_name,
+ container,
+ to=onnx_proto.TensorProto.INT64,
+ )
else: # string labels
- apply_reshape(scope, array_feature_extractor_result_name,
- operator.outputs[0].full_name, container,
- desired_shape=output_shape)
+ apply_reshape(
+ scope,
+ array_feature_extractor_result_name,
+ operator.outputs[0].full_name,
+ container,
+ desired_shape=output_shape,
+ )
-def convert_sklearn_bagging_regressor(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_bagging_regressor(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converter for BaggingRegressor.
"""
@@ -194,59 +261,79 @@ def convert_sklearn_bagging_regressor(scope: Scope, operator: Operator,
this_operator = scope.declare_local_operator(op_type, estimator)
features = bagging_op.estimators_features_[index]
- n_features = (bagging_op.n_features_in_
- if hasattr(bagging_op, 'n_features_in_')
- else bagging_op.n_features_)
- if (len(features) == n_features and
- list(features) == list(range(n_features))):
+ n_features = (
+ bagging_op.n_features_in_
+ if hasattr(bagging_op, "n_features_in_")
+ else bagging_op.n_features_
+ )
+ if len(features) == n_features and list(features) == list(range(n_features)):
this_operator.inputs = operator.inputs
else:
# subset of features
feat_name = scope.declare_local_variable(
- 'fsel_%d' % index, operator.inputs[0].type.__class__())
- index_name = scope.get_unique_variable_name('index_name')
+ "fsel_%d" % index, operator.inputs[0].type.__class__()
+ )
+ index_name = scope.get_unique_variable_name("index_name")
container.add_initializer(
- index_name, onnx_proto.TensorProto.INT64,
- (len(features), ), list(features))
+ index_name,
+ onnx_proto.TensorProto.INT64,
+ (len(features),),
+ list(features),
+ )
container.add_node(
- 'Gather', [operator.inputs[0].full_name, index_name],
+ "Gather",
+ [operator.inputs[0].full_name, index_name],
[feat_name.full_name],
- name=scope.get_unique_operator_name('GatherBG'), axis=1)
+ name=scope.get_unique_operator_name("GatherBG"),
+ axis=1,
+ )
this_operator.inputs.append(feat_name)
label_name = scope.declare_local_variable(
- 'variable_%d' % index, this_operator.inputs[0].type.__class__())
+ "variable_%d" % index, this_operator.inputs[0].type.__class__()
+ )
this_operator.outputs.append(label_name)
- reshaped_proba_name = scope.get_unique_variable_name('reshaped_proba')
- apply_reshape(scope, label_name.onnx_name, reshaped_proba_name,
- container, desired_shape=(1, -1, 1))
+ reshaped_proba_name = scope.get_unique_variable_name("reshaped_proba")
+ apply_reshape(
+ scope,
+ label_name.onnx_name,
+ reshaped_proba_name,
+ container,
+ desired_shape=(1, -1, 1),
+ )
proba_list.append(reshaped_proba_name)
- merged_proba_name = scope.get_unique_variable_name('merged_proba')
- apply_concat(scope, proba_list,
- merged_proba_name, container, axis=0)
+ merged_proba_name = scope.get_unique_variable_name("merged_proba")
+ apply_concat(scope, proba_list, merged_proba_name, container, axis=0)
if container.target_opset >= 18:
- axis_name = scope.get_unique_variable_name('axis')
- container.add_initializer(
- axis_name, onnx_proto.TensorProto.INT64, [1], [0])
+ axis_name = scope.get_unique_variable_name("axis")
+ container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [0])
container.add_node(
- 'ReduceMean', [merged_proba_name, axis_name],
+ "ReduceMean",
+ [merged_proba_name, axis_name],
operator.outputs[0].full_name,
- name=scope.get_unique_operator_name('ReduceMean'),
- keepdims=0)
+ name=scope.get_unique_operator_name("ReduceMean"),
+ keepdims=0,
+ )
else:
container.add_node(
- 'ReduceMean', merged_proba_name,
+ "ReduceMean",
+ merged_proba_name,
operator.outputs[0].full_name,
- name=scope.get_unique_operator_name('ReduceMean'),
- axes=[0], keepdims=0)
-
-
-register_converter('SklearnBaggingClassifier',
- convert_sklearn_bagging_classifier,
- options={'zipmap': [True, False, 'columns'],
- 'nocl': [True, False],
- 'output_class_labels': [False, True],
- 'raw_scores': [True, False]})
-register_converter('SklearnBaggingRegressor',
- convert_sklearn_bagging_regressor)
+ name=scope.get_unique_operator_name("ReduceMean"),
+ axes=[0],
+ keepdims=0,
+ )
+
+
+register_converter(
+ "SklearnBaggingClassifier",
+ convert_sklearn_bagging_classifier,
+ options={
+ "zipmap": [True, False, "columns"],
+ "nocl": [True, False],
+ "output_class_labels": [False, True],
+ "raw_scores": [True, False],
+ },
+)
+register_converter("SklearnBaggingRegressor", convert_sklearn_bagging_regressor)
diff --git a/skl2onnx/operator_converters/binariser.py b/skl2onnx/operator_converters/binariser.py
index d22687321..478d4c601 100644
--- a/skl2onnx/operator_converters/binariser.py
+++ b/skl2onnx/operator_converters/binariser.py
@@ -9,37 +9,47 @@
from .common import concatenate_variables
-def convert_sklearn_binarizer(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_binarizer(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
feature_name = concatenate_variables(scope, operator.inputs, container)
if isinstance(operator.inputs[0].type, DoubleTensorType):
- name0 = scope.get_unique_variable_name('cst0')
- name1 = scope.get_unique_variable_name('cst1')
- thres = scope.get_unique_variable_name('th')
+ name0 = scope.get_unique_variable_name("cst0")
+ name1 = scope.get_unique_variable_name("cst1")
+ thres = scope.get_unique_variable_name("th")
+ container.add_initializer(name0, onnx_proto.TensorProto.DOUBLE, [], [0.0])
+ container.add_initializer(name1, onnx_proto.TensorProto.DOUBLE, [], [1.0])
container.add_initializer(
- name0, onnx_proto.TensorProto.DOUBLE, [], [0.])
- container.add_initializer(
- name1, onnx_proto.TensorProto.DOUBLE, [], [1.])
- container.add_initializer(
- thres, onnx_proto.TensorProto.DOUBLE, [],
- [float(operator.raw_operator.threshold)])
- binbool = scope.get_unique_variable_name('binbool')
+ thres,
+ onnx_proto.TensorProto.DOUBLE,
+ [],
+ [float(operator.raw_operator.threshold)],
+ )
+ binbool = scope.get_unique_variable_name("binbool")
container.add_node(
- 'Less', [feature_name, thres], binbool,
- name=scope.get_unique_operator_name('Less'))
+ "Less",
+ [feature_name, thres],
+ binbool,
+ name=scope.get_unique_operator_name("Less"),
+ )
container.add_node(
- 'Where', [binbool, name0, name1], operator.output_full_names,
- name='Where')
+ "Where", [binbool, name0, name1], operator.output_full_names, name="Where"
+ )
return
- op_type = 'Binarizer'
+ op_type = "Binarizer"
attrs = {
- 'name': scope.get_unique_operator_name(op_type),
- 'threshold': float(operator.raw_operator.threshold)
+ "name": scope.get_unique_operator_name(op_type),
+ "threshold": float(operator.raw_operator.threshold),
}
- container.add_node(op_type, feature_name, operator.output_full_names,
- op_domain='ai.onnx.ml', **attrs)
+ container.add_node(
+ op_type,
+ feature_name,
+ operator.output_full_names,
+ op_domain="ai.onnx.ml",
+ **attrs
+ )
-register_converter('SklearnBinarizer', convert_sklearn_binarizer)
+register_converter("SklearnBinarizer", convert_sklearn_binarizer)
diff --git a/skl2onnx/operator_converters/calibrated_classifier_cv.py b/skl2onnx/operator_converters/calibrated_classifier_cv.py
index ccab3eb83..808958b95 100644
--- a/skl2onnx/operator_converters/calibrated_classifier_cv.py
+++ b/skl2onnx/operator_converters/calibrated_classifier_cv.py
@@ -4,55 +4,77 @@
import numpy as np
from onnx import TensorProto
from ..common._apply_operation import (
- apply_abs, apply_add, apply_cast, apply_concat, apply_clip,
- apply_div, apply_exp, apply_mul, apply_reshape, apply_sub)
+ apply_abs,
+ apply_add,
+ apply_cast,
+ apply_concat,
+ apply_clip,
+ apply_div,
+ apply_exp,
+ apply_mul,
+ apply_reshape,
+ apply_sub,
+)
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
-from ..common.data_types import (
- guess_numpy_type, Int64TensorType, guess_proto_type)
+from ..common.data_types import guess_numpy_type, Int64TensorType, guess_proto_type
from ..common._registration import register_converter
from .._supported_operators import sklearn_operator_name_map
from sklearn.ensemble import RandomForestClassifier
-def _handle_zeros(scope, container, concatenated_prob_name,
- reduced_prob_name, n_classes, proto_type):
+def _handle_zeros(
+ scope, container, concatenated_prob_name, reduced_prob_name, n_classes, proto_type
+):
"""
This function replaces 0s in concatenated_prob_name with 1s and
0s in reduced_prob_name with n_classes.
"""
- cast_prob_name = scope.get_unique_variable_name('cast_prob')
- bool_not_cast_prob_name = scope.get_unique_variable_name(
- 'bool_not_cast_prob')
- mask_name = scope.get_unique_variable_name('mask')
+ cast_prob_name = scope.get_unique_variable_name("cast_prob")
+ bool_not_cast_prob_name = scope.get_unique_variable_name("bool_not_cast_prob")
+ mask_name = scope.get_unique_variable_name("mask")
masked_concatenated_prob_name = scope.get_unique_variable_name(
- 'masked_concatenated_prob')
- n_classes_name = scope.get_unique_variable_name('n_classes')
- reduced_prob_mask_name = scope.get_unique_variable_name(
- 'reduced_prob_mask')
- masked_reduced_prob_name = scope.get_unique_variable_name(
- 'masked_reduced_prob')
+ "masked_concatenated_prob"
+ )
+ n_classes_name = scope.get_unique_variable_name("n_classes")
+ reduced_prob_mask_name = scope.get_unique_variable_name("reduced_prob_mask")
+ masked_reduced_prob_name = scope.get_unique_variable_name("masked_reduced_prob")
proto_type2 = proto_type
if proto_type2 not in (TensorProto.FLOAT, TensorProto.DOUBLE):
proto_type2 = TensorProto.FLOAT
- container.add_initializer(n_classes_name, proto_type2,
- [], [n_classes])
-
- apply_cast(scope, reduced_prob_name, cast_prob_name, container,
- to=TensorProto.BOOL)
- container.add_node('Not', cast_prob_name,
- bool_not_cast_prob_name,
- name=scope.get_unique_operator_name('Not'))
- apply_cast(scope, bool_not_cast_prob_name, mask_name, container,
- to=proto_type2)
- apply_add(scope, [concatenated_prob_name, mask_name],
- masked_concatenated_prob_name, container, broadcast=1)
- apply_mul(scope, [mask_name, n_classes_name], reduced_prob_mask_name,
- container, broadcast=1)
- apply_add(scope, [reduced_prob_name, reduced_prob_mask_name],
- masked_reduced_prob_name, container, broadcast=0)
+ container.add_initializer(n_classes_name, proto_type2, [], [n_classes])
+
+ apply_cast(scope, reduced_prob_name, cast_prob_name, container, to=TensorProto.BOOL)
+ container.add_node(
+ "Not",
+ cast_prob_name,
+ bool_not_cast_prob_name,
+ name=scope.get_unique_operator_name("Not"),
+ )
+ apply_cast(scope, bool_not_cast_prob_name, mask_name, container, to=proto_type2)
+ apply_add(
+ scope,
+ [concatenated_prob_name, mask_name],
+ masked_concatenated_prob_name,
+ container,
+ broadcast=1,
+ )
+ apply_mul(
+ scope,
+ [mask_name, n_classes_name],
+ reduced_prob_mask_name,
+ container,
+ broadcast=1,
+ )
+ apply_add(
+ scope,
+ [reduced_prob_name, reduced_prob_mask_name],
+ masked_reduced_prob_name,
+ container,
+ broadcast=0,
+ )
return masked_concatenated_prob_name, masked_reduced_prob_name
@@ -60,48 +82,53 @@ def _transform_sigmoid(scope, container, model, df_col_name, k, proto_type):
"""
Sigmoid Calibration method
"""
- a_name = scope.get_unique_variable_name('a')
- b_name = scope.get_unique_variable_name('b')
- a_df_prod_name = scope.get_unique_variable_name('a_df_prod')
- exp_parameter_name = scope.get_unique_variable_name(
- 'exp_parameter')
- exp_result_name = scope.get_unique_variable_name('exp_result')
- unity_name = scope.get_unique_variable_name('unity')
- denominator_name = scope.get_unique_variable_name('denominator')
+ a_name = scope.get_unique_variable_name("a")
+ b_name = scope.get_unique_variable_name("b")
+ a_df_prod_name = scope.get_unique_variable_name("a_df_prod")
+ exp_parameter_name = scope.get_unique_variable_name("exp_parameter")
+ exp_result_name = scope.get_unique_variable_name("exp_result")
+ unity_name = scope.get_unique_variable_name("unity")
+ denominator_name = scope.get_unique_variable_name("denominator")
sigmoid_predict_result_name = scope.get_unique_variable_name(
- 'sigmoid_predict_result')
+ "sigmoid_predict_result"
+ )
proto_type2 = proto_type
if proto_type2 not in (TensorProto.FLOAT, TensorProto.DOUBLE):
proto_type2 = TensorProto.FLOAT
- if hasattr(model, 'calibrators_'):
+ if hasattr(model, "calibrators_"):
# scikit-learn<1.1
calibrators = model.calibrators_
- elif hasattr(model, 'calibrators'):
+ elif hasattr(model, "calibrators"):
# scikit-learn>=1.1
calibrators = model.calibrators
else:
raise AttributeError(
"Unable to find attribute calibrators_ or "
"calibrators, check the model was trained, "
- "type=%r." % type(model))
+ "type=%r." % type(model)
+ )
- container.add_initializer(a_name, proto_type2,
- [], [calibrators[k].a_])
- container.add_initializer(b_name, proto_type2,
- [], [calibrators[k].b_])
+ container.add_initializer(a_name, proto_type2, [], [calibrators[k].a_])
+ container.add_initializer(b_name, proto_type2, [], [calibrators[k].b_])
container.add_initializer(unity_name, proto_type2, [], [1])
- apply_mul(scope, [a_name, df_col_name], a_df_prod_name, container,
- broadcast=0)
- apply_add(scope, [a_df_prod_name, b_name], exp_parameter_name,
- container, broadcast=0)
+ apply_mul(scope, [a_name, df_col_name], a_df_prod_name, container, broadcast=0)
+ apply_add(
+ scope, [a_df_prod_name, b_name], exp_parameter_name, container, broadcast=0
+ )
apply_exp(scope, exp_parameter_name, exp_result_name, container)
- apply_add(scope, [unity_name, exp_result_name], denominator_name,
- container, broadcast=0)
- apply_div(scope, [unity_name, denominator_name],
- sigmoid_predict_result_name, container, broadcast=0)
+ apply_add(
+ scope, [unity_name, exp_result_name], denominator_name, container, broadcast=0
+ )
+ apply_div(
+ scope,
+ [unity_name, denominator_name],
+ sigmoid_predict_result_name,
+ container,
+ broadcast=0,
+ )
return sigmoid_predict_result_name
@@ -112,86 +139,104 @@ def _transform_isotonic(scope, container, model, T, k, dtype, proto_type):
ArrayFeatureExtractor can only extract based on the last axis,
so we can't fetch different columns for different rows.
"""
- if hasattr(model, 'calibrators_'):
+ if hasattr(model, "calibrators_"):
# scikit-learn<1.1
calibrators = model.calibrators_
- elif hasattr(model, 'calibrators'):
+ elif hasattr(model, "calibrators"):
# scikit-learn>=1.1
calibrators = model.calibrators
else:
raise AttributeError(
"Unable to find attribute calibrators_ or "
"calibrators, check the model was trained, "
- "type=%r." % type(model))
-
- if calibrators[k].out_of_bounds == 'clip':
- clipped_df_name = scope.get_unique_variable_name('clipped_df')
- apply_clip(scope, T, clipped_df_name, container,
- operator_name=scope.get_unique_operator_name('Clip'),
- max=np.array(calibrators[k].X_max_, dtype=dtype),
- min=np.array(calibrators[k].X_min_, dtype=dtype))
+ "type=%r." % type(model)
+ )
+
+ if calibrators[k].out_of_bounds == "clip":
+ clipped_df_name = scope.get_unique_variable_name("clipped_df")
+ apply_clip(
+ scope,
+ T,
+ clipped_df_name,
+ container,
+ operator_name=scope.get_unique_operator_name("Clip"),
+ max=np.array(calibrators[k].X_max_, dtype=dtype),
+ min=np.array(calibrators[k].X_min_, dtype=dtype),
+ )
T = clipped_df_name
- reshaped_df_name = scope.get_unique_variable_name('reshaped_df')
- calibrator_x_name = scope.get_unique_variable_name('calibrator_x')
- calibrator_y_name = scope.get_unique_variable_name('calibrator_y')
- distance_name = scope.get_unique_variable_name('distance')
- absolute_distance_name = scope.get_unique_variable_name(
- 'absolute_distance')
- nearest_x_index_name = scope.get_unique_variable_name(
- 'nearest_x_index')
- nearest_y_name = scope.get_unique_variable_name('nearest_y')
-
- if hasattr(calibrators[k], '_X_'):
- atX, atY = '_X_', '_y_'
- elif hasattr(calibrators[k], '_necessary_X_'):
- atX, atY = '_necessary_X_', '_necessary_y_'
- elif hasattr(calibrators[k], 'X_thresholds_'):
- atX, atY = 'X_thresholds_', 'y_thresholds_'
+ reshaped_df_name = scope.get_unique_variable_name("reshaped_df")
+ calibrator_x_name = scope.get_unique_variable_name("calibrator_x")
+ calibrator_y_name = scope.get_unique_variable_name("calibrator_y")
+ distance_name = scope.get_unique_variable_name("distance")
+ absolute_distance_name = scope.get_unique_variable_name("absolute_distance")
+ nearest_x_index_name = scope.get_unique_variable_name("nearest_x_index")
+ nearest_y_name = scope.get_unique_variable_name("nearest_y")
+
+ if hasattr(calibrators[k], "_X_"):
+ atX, atY = "_X_", "_y_"
+ elif hasattr(calibrators[k], "_necessary_X_"):
+ atX, atY = "_necessary_X_", "_necessary_y_"
+ elif hasattr(calibrators[k], "X_thresholds_"):
+ atX, atY = "X_thresholds_", "y_thresholds_"
else:
raise AttributeError(
"Unable to find attribute '_X_' or '_necessary_X_' "
"for type {}\n{}."
- "".format(type(calibrators[k]),
- pprint.pformat(dir(calibrators[k]))))
+ "".format(type(calibrators[k]), pprint.pformat(dir(calibrators[k])))
+ )
proto_type2 = proto_type
if proto_type2 not in (TensorProto.FLOAT, TensorProto.DOUBLE):
proto_type2 = TensorProto.FLOAT
container.add_initializer(
- calibrator_x_name, proto_type2,
+ calibrator_x_name,
+ proto_type2,
[len(getattr(calibrators[k], atX))],
- getattr(calibrators[k], atX))
+ getattr(calibrators[k], atX),
+ )
container.add_initializer(
- calibrator_y_name, proto_type2,
+ calibrator_y_name,
+ proto_type2,
[len(getattr(calibrators[k], atY))],
- getattr(calibrators[k], atY))
-
- apply_reshape(scope, T, reshaped_df_name, container,
- desired_shape=(-1, 1))
- apply_sub(scope, [reshaped_df_name, calibrator_x_name],
- distance_name, container, broadcast=1)
+ getattr(calibrators[k], atY),
+ )
+
+ apply_reshape(scope, T, reshaped_df_name, container, desired_shape=(-1, 1))
+ apply_sub(
+ scope,
+ [reshaped_df_name, calibrator_x_name],
+ distance_name,
+ container,
+ broadcast=1,
+ )
apply_abs(scope, distance_name, absolute_distance_name, container)
- container.add_node('ArgMin', absolute_distance_name,
- nearest_x_index_name, axis=1,
- name=scope.get_unique_operator_name('ArgMin'))
container.add_node(
- 'ArrayFeatureExtractor',
+ "ArgMin",
+ absolute_distance_name,
+ nearest_x_index_name,
+ axis=1,
+ name=scope.get_unique_operator_name("ArgMin"),
+ )
+ container.add_node(
+ "ArrayFeatureExtractor",
[calibrator_y_name, nearest_x_index_name],
- nearest_y_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
-
- nearest_y_name_reshaped = scope.get_unique_variable_name(
- 'nearest_y_name_reshaped')
- apply_reshape(scope, nearest_y_name,
- nearest_y_name_reshaped, container,
- desired_shape=(-1, 1))
+ nearest_y_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
+
+ nearest_y_name_reshaped = scope.get_unique_variable_name("nearest_y_name_reshaped")
+ apply_reshape(
+ scope, nearest_y_name, nearest_y_name_reshaped, container, desired_shape=(-1, 1)
+ )
return nearest_y_name_reshaped
-def convert_calibrated_classifier_base_estimator(scope, operator, container,
- model, model_index):
+def convert_calibrated_classifier_base_estimator(
+ scope, operator, container, model, model_index
+):
# Computational graph:
#
# In the following graph, variable names are in lower case characters only
@@ -281,10 +326,12 @@ def convert_calibrated_classifier_base_estimator(scope, operator, container,
# class_prob_tensor [M, C] <--'
model_proba = {RandomForestClassifier}
- if scope.get_options(operator.raw_operator, dict(nocl=False))['nocl']:
+ if scope.get_options(operator.raw_operator, dict(nocl=False))["nocl"]:
raise RuntimeError(
"Option 'nocl' is not implemented for operator '{}'.".format(
- operator.raw_operator.__class__.__name__))
+ operator.raw_operator.__class__.__name__
+ )
+ )
proto_type = guess_proto_type(operator.inputs[0].type)
proto_type2 = proto_type
if proto_type2 not in (TensorProto.FLOAT, TensorProto.DOUBLE):
@@ -293,22 +340,27 @@ def convert_calibrated_classifier_base_estimator(scope, operator, container,
if dtype != np.float64:
dtype = np.float32
- base_model = (model.estimator if hasattr(model, 'estimator')
- else model.base_estimator)
+ base_model = (
+ model.estimator if hasattr(model, "estimator") else model.base_estimator
+ )
op_type = sklearn_operator_name_map[type(base_model)]
- n_classes = (len(model.classes_) if hasattr(model, 'classes_') else
- len(base_model.classes_))
+ n_classes = (
+ len(model.classes_) if hasattr(model, "classes_") else len(base_model.classes_)
+ )
prob_name = [None] * n_classes
this_operator = scope.declare_local_operator(op_type, base_model)
- if (container.has_options(base_model, 'raw_scores') and
- not type(base_model) in model_proba):
- container.add_options(id(base_model), {'raw_scores': True})
- scope.add_options(id(base_model), {'raw_scores': True})
+ if (
+ container.has_options(base_model, "raw_scores")
+ and type(base_model) not in model_proba
+ ):
+ container.add_options(id(base_model), {"raw_scores": True})
+ scope.add_options(id(base_model), {"raw_scores": True})
this_operator.inputs = operator.inputs
- label_name = scope.declare_local_variable('label', Int64TensorType())
+ label_name = scope.declare_local_variable("label", Int64TensorType())
df_name = scope.declare_local_variable(
- 'uncal_probability', operator.inputs[0].type.__class__())
+ "uncal_probability", operator.inputs[0].type.__class__()
+ )
this_operator.outputs.append(label_name)
this_operator.outputs.append(df_name)
df_inp = df_name.full_name
@@ -317,80 +369,105 @@ def convert_calibrated_classifier_base_estimator(scope, operator, container,
cur_k = k
if n_classes == 2:
cur_k += 1
- k_name = scope.get_unique_variable_name('k')
+ k_name = scope.get_unique_variable_name("k")
df_col_name = scope.get_unique_variable_name(
- 'tdf_col_%d_c%d' % (model_index, k))
+ "tdf_col_%d_c%d" % (model_index, k)
+ )
prob_name[k] = scope.get_unique_variable_name(
- 'prob_{}_c{}'.format(model_index, k))
+ "prob_{}_c{}".format(model_index, k)
+ )
container.add_initializer(k_name, TensorProto.INT64, [], [cur_k])
container.add_node(
- 'ArrayFeatureExtractor', [df_inp, k_name], df_col_name,
- name=scope.get_unique_operator_name(
- 'CaliAFE_%d_c%d' % (model_index, k)),
- op_domain='ai.onnx.ml')
- if model.method == 'sigmoid':
- T = _transform_sigmoid(scope, container, model, df_col_name, k,
- proto_type)
+ "ArrayFeatureExtractor",
+ [df_inp, k_name],
+ df_col_name,
+ name=scope.get_unique_operator_name("CaliAFE_%d_c%d" % (model_index, k)),
+ op_domain="ai.onnx.ml",
+ )
+ if model.method == "sigmoid":
+ T = _transform_sigmoid(scope, container, model, df_col_name, k, proto_type)
else:
- T = _transform_isotonic(scope, container, model, df_col_name,
- k, dtype, proto_type)
+ T = _transform_isotonic(
+ scope, container, model, df_col_name, k, dtype, proto_type
+ )
prob_name[k] = T
if n_classes == 2:
break
if n_classes == 2:
- zeroth_col_name = scope.get_unique_variable_name(
- 'zeroth_col%d' % model_index)
- merged_prob_name = scope.get_unique_variable_name(
- 'merged_prob%d' % model_index)
+ zeroth_col_name = scope.get_unique_variable_name("zeroth_col%d" % model_index)
+ merged_prob_name = scope.get_unique_variable_name("merged_prob%d" % model_index)
unit_float_tensor_name = scope.get_unique_variable_name(
- 'unit_float_tensor%d' % model_index)
-
- container.add_initializer(unit_float_tensor_name,
- proto_type2, [], [1.0])
-
- apply_sub(scope, [unit_float_tensor_name, prob_name[0]],
- zeroth_col_name, container, broadcast=1)
- apply_concat(scope, [zeroth_col_name, prob_name[0]],
- merged_prob_name, container, axis=1,
- operator_name=scope.get_unique_variable_name(
- 'CaliConc%d' % model_index))
+ "unit_float_tensor%d" % model_index
+ )
+
+ container.add_initializer(unit_float_tensor_name, proto_type2, [], [1.0])
+
+ apply_sub(
+ scope,
+ [unit_float_tensor_name, prob_name[0]],
+ zeroth_col_name,
+ container,
+ broadcast=1,
+ )
+ apply_concat(
+ scope,
+ [zeroth_col_name, prob_name[0]],
+ merged_prob_name,
+ container,
+ axis=1,
+ operator_name=scope.get_unique_variable_name("CaliConc%d" % model_index),
+ )
class_prob_tensor_name = merged_prob_name
else:
- concatenated_prob_name = scope.get_unique_variable_name(
- 'concatenated_prob')
- reduced_prob_name = scope.get_unique_variable_name('reduced_prob')
- calc_prob_name = scope.get_unique_variable_name('calc_prob')
+ concatenated_prob_name = scope.get_unique_variable_name("concatenated_prob")
+ reduced_prob_name = scope.get_unique_variable_name("reduced_prob")
+ calc_prob_name = scope.get_unique_variable_name("calc_prob")
- apply_concat(scope, prob_name, concatenated_prob_name,
- container, axis=1)
+ apply_concat(scope, prob_name, concatenated_prob_name, container, axis=1)
if container.target_opset < 13:
container.add_node(
- 'ReduceSum', concatenated_prob_name,
- reduced_prob_name, axes=[1],
- name=scope.get_unique_operator_name('ReduceSum'))
+ "ReduceSum",
+ concatenated_prob_name,
+ reduced_prob_name,
+ axes=[1],
+ name=scope.get_unique_operator_name("ReduceSum"),
+ )
else:
- axis_name = scope.get_unique_variable_name('axis')
+ axis_name = scope.get_unique_variable_name("axis")
container.add_initializer(axis_name, TensorProto.INT64, [1], [1])
container.add_node(
- 'ReduceSum', [concatenated_prob_name, axis_name],
+ "ReduceSum",
+ [concatenated_prob_name, axis_name],
reduced_prob_name,
- name=scope.get_unique_operator_name('ReduceSum'))
- num, deno = _handle_zeros(scope, container, concatenated_prob_name,
- reduced_prob_name, n_classes, proto_type)
- apply_div(scope, [num, deno],
- calc_prob_name, container, broadcast=1,
- operator_name=scope.get_unique_variable_name(
- 'CaliDiv%d' % model_index))
+ name=scope.get_unique_operator_name("ReduceSum"),
+ )
+ num, deno = _handle_zeros(
+ scope,
+ container,
+ concatenated_prob_name,
+ reduced_prob_name,
+ n_classes,
+ proto_type,
+ )
+ apply_div(
+ scope,
+ [num, deno],
+ calc_prob_name,
+ container,
+ broadcast=1,
+ operator_name=scope.get_unique_variable_name("CaliDiv%d" % model_index),
+ )
class_prob_tensor_name = calc_prob_name
return class_prob_tensor_name
def convert_sklearn_calibrated_classifier_cv(
- scope: Scope, operator: Operator, container: ModelComponentContainer):
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
# Computational graph:
#
# In the following graph, variable names are in lower case characters only
@@ -447,59 +524,97 @@ def convert_sklearn_calibrated_classifier_cv(
if np.issubdtype(op.classes_.dtype, np.floating):
class_type = TensorProto.INT32
classes = classes.astype(np.int32)
- elif (np.issubdtype(op.classes_.dtype, np.signedinteger) or
- op.classes_.dtype == np.bool_):
+ elif (
+ np.issubdtype(op.classes_.dtype, np.signedinteger)
+ or op.classes_.dtype == np.bool_
+ ):
class_type = TensorProto.INT32
else:
- classes = np.array([s.encode('utf-8') for s in classes])
+ classes = np.array([s.encode("utf-8") for s in classes])
clf_length = len(op.calibrated_classifiers_)
prob_scores_name = []
- clf_length_name = scope.get_unique_variable_name('clf_length')
- classes_name = scope.get_unique_variable_name('classes')
- reshaped_result_name = scope.get_unique_variable_name('reshaped_result')
- argmax_output_name = scope.get_unique_variable_name('argmax_output')
+ clf_length_name = scope.get_unique_variable_name("clf_length")
+ classes_name = scope.get_unique_variable_name("classes")
+ reshaped_result_name = scope.get_unique_variable_name("reshaped_result")
+ argmax_output_name = scope.get_unique_variable_name("argmax_output")
array_feature_extractor_result_name = scope.get_unique_variable_name(
- 'array_feature_extractor_result')
- add_result_name = scope.get_unique_variable_name('add_result')
+ "array_feature_extractor_result"
+ )
+ add_result_name = scope.get_unique_variable_name("add_result")
container.add_initializer(classes_name, class_type, classes.shape, classes)
- container.add_initializer(clf_length_name, proto_type2,
- [], [clf_length])
+ container.add_initializer(clf_length_name, proto_type2, [], [clf_length])
for clf_index, clf in enumerate(op.calibrated_classifiers_):
- prob_scores_name.append(convert_calibrated_classifier_base_estimator(
- scope, operator, container, clf, clf_index))
-
- container.add_node('Sum', [s for s in prob_scores_name],
- add_result_name, op_version=7,
- name=scope.get_unique_operator_name('Sum'))
- apply_div(scope, [add_result_name, clf_length_name],
- operator.outputs[1].full_name, container, broadcast=1)
+ prob_scores_name.append(
+ convert_calibrated_classifier_base_estimator(
+ scope, operator, container, clf, clf_index
+ )
+ )
+
+ container.add_node(
+ "Sum",
+ [s for s in prob_scores_name],
+ add_result_name,
+ op_version=7,
+ name=scope.get_unique_operator_name("Sum"),
+ )
+ apply_div(
+ scope,
+ [add_result_name, clf_length_name],
+ operator.outputs[1].full_name,
+ container,
+ broadcast=1,
+ )
class_prob_name = operator.outputs[1].full_name
- container.add_node('ArgMax', class_prob_name,
- argmax_output_name,
- name=scope.get_unique_operator_name('ArgMax'), axis=1)
container.add_node(
- 'ArrayFeatureExtractor', [classes_name, argmax_output_name],
- array_feature_extractor_result_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
+ "ArgMax",
+ class_prob_name,
+ argmax_output_name,
+ name=scope.get_unique_operator_name("ArgMax"),
+ axis=1,
+ )
+ container.add_node(
+ "ArrayFeatureExtractor",
+ [classes_name, argmax_output_name],
+ array_feature_extractor_result_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
if class_type == TensorProto.INT32:
- apply_reshape(scope, array_feature_extractor_result_name,
- reshaped_result_name, container,
- desired_shape=output_shape)
- apply_cast(scope, reshaped_result_name, operator.outputs[0].full_name,
- container, to=TensorProto.INT64)
+ apply_reshape(
+ scope,
+ array_feature_extractor_result_name,
+ reshaped_result_name,
+ container,
+ desired_shape=output_shape,
+ )
+ apply_cast(
+ scope,
+ reshaped_result_name,
+ operator.outputs[0].full_name,
+ container,
+ to=TensorProto.INT64,
+ )
else:
- apply_reshape(scope, array_feature_extractor_result_name,
- operator.outputs[0].full_name, container,
- desired_shape=output_shape)
-
-
-register_converter('SklearnCalibratedClassifierCV',
- convert_sklearn_calibrated_classifier_cv,
- options={'zipmap': [True, False, 'columns'],
- 'output_class_labels': [False, True],
- 'nocl': [True, False]})
+ apply_reshape(
+ scope,
+ array_feature_extractor_result_name,
+ operator.outputs[0].full_name,
+ container,
+ desired_shape=output_shape,
+ )
+
+
+register_converter(
+ "SklearnCalibratedClassifierCV",
+ convert_sklearn_calibrated_classifier_cv,
+ options={
+ "zipmap": [True, False, "columns"],
+ "output_class_labels": [False, True],
+ "nocl": [True, False],
+ },
+)
diff --git a/skl2onnx/operator_converters/cast_op.py b/skl2onnx/operator_converters/cast_op.py
index 1a3438a14..7f3db2360 100644
--- a/skl2onnx/operator_converters/cast_op.py
+++ b/skl2onnx/operator_converters/cast_op.py
@@ -8,19 +8,19 @@
from .._supported_operators import sklearn_operator_name_map
-def convert_sklearn_cast(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_cast(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
inp = operator.inputs[0]
exptype = operator.outputs[0]
res = exptype.type.to_onnx_type()
et = res.tensor_type.elem_type
- apply_cast(scope, inp.full_name, exptype.full_name,
- container, to=et)
+ apply_cast(scope, inp.full_name, exptype.full_name, container, to=et)
-def convert_sklearn_cast_regressor(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
-
+def convert_sklearn_cast_regressor(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
op = operator.raw_operator
estimator = op.estimator
@@ -29,17 +29,16 @@ def convert_sklearn_cast_regressor(scope: Scope, operator: Operator,
this_operator.inputs = operator.inputs
cls = operator.inputs[0].type.__class__
- var_name = scope.declare_local_variable('cast_est', cls())
+ var_name = scope.declare_local_variable("cast_est", cls())
this_operator.outputs.append(var_name)
var_name = var_name.onnx_name
exptype = operator.outputs[0]
res = exptype.type.to_onnx_type()
et = res.tensor_type.elem_type
- apply_cast(scope, var_name, exptype.full_name,
- container, to=et)
+ apply_cast(scope, var_name, exptype.full_name, container, to=et)
-register_converter('SklearnCastTransformer', convert_sklearn_cast)
-register_converter('SklearnCastRegressor', convert_sklearn_cast_regressor)
-register_converter('SklearnCast', convert_sklearn_cast)
+register_converter("SklearnCastTransformer", convert_sklearn_cast)
+register_converter("SklearnCastRegressor", convert_sklearn_cast_regressor)
+register_converter("SklearnCast", convert_sklearn_cast)
diff --git a/skl2onnx/operator_converters/class_labels.py b/skl2onnx/operator_converters/class_labels.py
index 3be5fd018..3ef9a427f 100644
--- a/skl2onnx/operator_converters/class_labels.py
+++ b/skl2onnx/operator_converters/class_labels.py
@@ -6,71 +6,93 @@
from ..common._container import ModelComponentContainer
-def convert_sklearn_class_labels(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
- if getattr(operator, 'is_multi_output', False):
+def convert_sklearn_class_labels(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
+ if getattr(operator, "is_multi_output", False):
classes = operator.classes
if not isinstance(classes, list):
raise RuntimeError(
- "classes must be a list of numpy arrays but is %r."
- "" % type(classes))
+ "classes must be a list of numpy arrays but is %r." "" % type(classes)
+ )
names = []
if classes[0].dtype in (np.int64, np.int32):
for i, cl in enumerate(classes):
cla = np.array(cl)
name = scope.get_unique_variable_name(
- operator.outputs[0].full_name + '_cst_%d' % i)
+ operator.outputs[0].full_name + "_cst_%d" % i
+ )
container.add_initializer(
- name, onnx_proto.TensorProto.INT64, list(cla.shape),
- cla.tolist())
+ name, onnx_proto.TensorProto.INT64, list(cla.shape), cla.tolist()
+ )
names.append(name)
else:
for i, cl in enumerate(classes):
name = scope.get_unique_variable_name(
- operator.outputs[0].full_name + '_cst_%d' % i)
+ operator.outputs[0].full_name + "_cst_%d" % i
+ )
clids = np.arange(len(cl), dtype=np.int64)
container.add_initializer(
- name, onnx_proto.TensorProto.INT64, list(clids.shape),
- clids.tolist())
+ name,
+ onnx_proto.TensorProto.INT64,
+ list(clids.shape),
+ clids.tolist(),
+ )
namele = scope.get_unique_variable_name(
- operator.outputs[0].full_name + '_le_%d' % i)
+ operator.outputs[0].full_name + "_le_%d" % i
+ )
container.add_node(
- 'LabelEncoder', name, namele, op_domain='ai.onnx.ml',
- op_version=2, default_string='0', keys_int64s=clids,
+ "LabelEncoder",
+ name,
+ namele,
+ op_domain="ai.onnx.ml",
+ op_version=2,
+ default_string="0",
+ keys_int64s=clids,
values_strings=cl.tolist(),
- name=scope.get_unique_operator_name(
- 'class_labels_le_%d' % i))
+ name=scope.get_unique_operator_name("class_labels_le_%d" % i),
+ )
names.append(namele)
container.add_node(
- 'SequenceConstruct', names, operator.outputs[0].full_name,
- name=scope.get_unique_operator_name('class_labels_seq'))
+ "SequenceConstruct",
+ names,
+ operator.outputs[0].full_name,
+ name=scope.get_unique_operator_name("class_labels_seq"),
+ )
else:
classes = np.array(operator.classes)
- name = scope.get_unique_variable_name(
- operator.outputs[0].full_name + '_cst')
+ name = scope.get_unique_variable_name(operator.outputs[0].full_name + "_cst")
if classes.dtype in (np.int64, np.int32):
container.add_initializer(
- name, onnx_proto.TensorProto.INT64, list(classes.shape),
- classes.tolist())
+ name,
+ onnx_proto.TensorProto.INT64,
+ list(classes.shape),
+ classes.tolist(),
+ )
else:
clids = np.arange(len(classes), dtype=np.int64)
container.add_initializer(
- name, onnx_proto.TensorProto.INT64, list(clids.shape),
- clids.tolist())
+ name, onnx_proto.TensorProto.INT64, list(clids.shape), clids.tolist()
+ )
namele = scope.get_unique_variable_name(
- operator.outputs[0].full_name + '_le')
+ operator.outputs[0].full_name + "_le"
+ )
container.add_node(
- 'LabelEncoder', name, namele, op_domain='ai.onnx.ml',
- op_version=2, default_string='0', keys_int64s=clids,
+ "LabelEncoder",
+ name,
+ namele,
+ op_domain="ai.onnx.ml",
+ op_version=2,
+ default_string="0",
+ keys_int64s=clids,
values_strings=classes.tolist(),
- name=scope.get_unique_operator_name('class_labels_le'))
+ name=scope.get_unique_operator_name("class_labels_le"),
+ )
name = namele
- container.add_node(
- 'Identity', name, operator.outputs[0].full_name)
+ container.add_node("Identity", name, operator.outputs[0].full_name)
-register_converter(
- 'SklearnClassLabels', convert_sklearn_class_labels)
+register_converter("SklearnClassLabels", convert_sklearn_class_labels)
diff --git a/skl2onnx/operator_converters/common.py b/skl2onnx/operator_converters/common.py
index 2cf29adee..57633fad9 100644
--- a/skl2onnx/operator_converters/common.py
+++ b/skl2onnx/operator_converters/common.py
@@ -3,8 +3,12 @@
from ..common._apply_operation import apply_cast
from ..common.data_types import (
- Int64TensorType, FloatTensorType, DoubleTensorType,
- StringTensorType, guess_proto_type)
+ Int64TensorType,
+ FloatTensorType,
+ DoubleTensorType,
+ StringTensorType,
+ guess_proto_type,
+)
def concatenate_variables(scope, variables, container, main_type=None):
@@ -18,11 +22,16 @@ def concatenate_variables(scope, variables, container, main_type=None):
# Check if it's possible to concatenate those inputs.
type_set = set(type(variable.type) for variable in variables)
- number_type_set = {FloatTensorType, Int64TensorType, DoubleTensorType,
- StringTensorType}
+ number_type_set = {
+ FloatTensorType,
+ Int64TensorType,
+ DoubleTensorType,
+ StringTensorType,
+ }
if any(itype not in number_type_set for itype in type_set):
- raise RuntimeError('Numerical tensor(s) and string tensor(s) '
- 'cannot be concatenated.')
+ raise RuntimeError(
+ "Numerical tensor(s) and string tensor(s) " "cannot be concatenated."
+ )
# input variables' names we want to concatenate
input_names = []
# dimensions of the variables that is going to be concatenated
@@ -32,9 +41,8 @@ def concatenate_variables(scope, variables, container, main_type=None):
for variable in variables:
if not isinstance(variable.type, main_type):
proto_type = guess_proto_type(main_type())
- new_name = scope.get_unique_variable_name('cast')
- apply_cast(scope, variable.full_name, new_name,
- container, to=proto_type)
+ new_name = scope.get_unique_variable_name("cast")
+ apply_cast(scope, variable.full_name, new_name, container, to=proto_type)
input_names.append(new_name)
else:
input_names.append(variable.full_name)
@@ -47,20 +55,23 @@ def concatenate_variables(scope, variables, container, main_type=None):
return input_names[0]
# To combine all inputs, we need a FeatureVectorizer
- op_type = 'FeatureVectorizer'
- attrs = {'name': scope.get_unique_operator_name(op_type),
- 'inputdimensions': input_dims}
+ op_type = "FeatureVectorizer"
+ attrs = {
+ "name": scope.get_unique_operator_name(op_type),
+ "inputdimensions": input_dims,
+ }
# Create a variable name to capture feature vectorizer's output
# Set up our FeatureVectorizer
- concatenated_name = scope.get_unique_variable_name('concatenated')
- container.add_node(op_type, input_names, concatenated_name,
- op_domain='ai.onnx.ml', **attrs)
+ concatenated_name = scope.get_unique_variable_name("concatenated")
+ container.add_node(
+ op_type, input_names, concatenated_name, op_domain="ai.onnx.ml", **attrs
+ )
if main_type == FloatTensorType:
return concatenated_name
# Cast output as FeatureVectorizer always produces float32.
- concatenated_name_cast = scope.get_unique_variable_name(
- 'concatenated_cast')
- container.add_node('CastLike', [concatenated_name, input_names[0]],
- concatenated_name_cast)
+ concatenated_name_cast = scope.get_unique_variable_name("concatenated_cast")
+ container.add_node(
+ "CastLike", [concatenated_name, input_names[0]], concatenated_name_cast
+ )
return concatenated_name_cast
diff --git a/skl2onnx/operator_converters/concat_op.py b/skl2onnx/operator_converters/concat_op.py
index 214f3be9f..9f600d26f 100644
--- a/skl2onnx/operator_converters/concat_op.py
+++ b/skl2onnx/operator_converters/concat_op.py
@@ -7,8 +7,9 @@
from ..common._container import ModelComponentContainer
-def convert_sklearn_concat(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_concat(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
exptype = operator.outputs[0].type
new_inputs = []
for inp in operator.inputs:
@@ -21,8 +22,7 @@ def convert_sklearn_concat(scope: Scope, operator: Operator,
apply_cast(scope, inp.full_name, name, container, to=et)
new_inputs.append(name)
- apply_concat(scope, new_inputs, operator.outputs[0].full_name,
- container, axis=1)
+ apply_concat(scope, new_inputs, operator.outputs[0].full_name, container, axis=1)
-register_converter('SklearnConcat', convert_sklearn_concat)
+register_converter("SklearnConcat", convert_sklearn_concat)
diff --git a/skl2onnx/operator_converters/cross_decomposition.py b/skl2onnx/operator_converters/cross_decomposition.py
index 1e4947f04..c1a91519f 100644
--- a/skl2onnx/operator_converters/cross_decomposition.py
+++ b/skl2onnx/operator_converters/cross_decomposition.py
@@ -5,14 +5,13 @@
from ..common._registration import register_converter
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
-from ..common.data_types import (
- Int64TensorType, guess_numpy_type, guess_proto_type)
-from ..algebra.onnx_ops import (
- OnnxAdd, OnnxCast, OnnxDiv, OnnxMatMul, OnnxSub)
+from ..common.data_types import Int64TensorType, guess_numpy_type, guess_proto_type
+from ..algebra.onnx_ops import OnnxAdd, OnnxCast, OnnxDiv, OnnxMatMul, OnnxSub
-def convert_pls_regression(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_pls_regression(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
X = operator.inputs[0]
op = operator.raw_operator
opv = container.target_opset
@@ -26,22 +25,23 @@ def convert_pls_regression(scope: Scope, operator: Operator,
if isinstance(X.type, Int64TensorType):
X = OnnxCast(X, to=proto_dtype, op_version=opv)
- coefs = op.x_mean_ if hasattr(op, 'x_mean_') else op._x_mean
- std = op.x_std_ if hasattr(op, 'x_std_') else op._x_std
- ym = op.y_mean_ if hasattr(op, 'x_mean_') else op._y_mean
+ coefs = op.x_mean_ if hasattr(op, "x_mean_") else op._x_mean
+ std = op.x_std_ if hasattr(op, "x_std_") else op._x_std
+ ym = op.y_mean_ if hasattr(op, "x_mean_") else op._y_mean
norm_x = OnnxDiv(
OnnxSub(X, coefs.astype(dtype), op_version=opv),
- std.astype(dtype), op_version=opv)
+ std.astype(dtype),
+ op_version=opv,
+ )
if hasattr(op, "set_predict_request"):
# new in 1.3
coefs = op.coef_.T.astype(dtype)
else:
coefs = op.coef_.astype(dtype)
dot = OnnxMatMul(norm_x, coefs, op_version=opv)
- pred = OnnxAdd(dot, ym.astype(dtype),
- op_version=opv, output_names=operator.outputs)
+ pred = OnnxAdd(dot, ym.astype(dtype), op_version=opv, output_names=operator.outputs)
pred.add_to(scope, container)
-register_converter('SklearnPLSRegression', convert_pls_regression)
+register_converter("SklearnPLSRegression", convert_pls_regression)
diff --git a/skl2onnx/operator_converters/decision_tree.py b/skl2onnx/operator_converters/decision_tree.py
index 8a57a1e4c..e92fe285c 100644
--- a/skl2onnx/operator_converters/decision_tree.py
+++ b/skl2onnx/operator_converters/decision_tree.py
@@ -14,8 +14,11 @@
)
from ..common._registration import register_converter
from ..common.data_types import (
- BooleanTensorType, Int64TensorType, guess_numpy_type,
- guess_proto_type)
+ BooleanTensorType,
+ Int64TensorType,
+ guess_numpy_type,
+ guess_proto_type,
+)
from ..common.tree_ensemble import (
add_tree_to_attribute_pairs,
get_default_tree_classifier_attribute_pairs,
@@ -30,69 +33,68 @@ def populate_tree_attributes(model, name, dtype):
while adding a node with TreeEnsembleClassifier ONNX op.
"""
attrs = {}
- attrs['name'] = name
- attrs['post_transform'] = 'NONE'
- attrs['nodes_treeids'] = []
- attrs['nodes_nodeids'] = []
- attrs['nodes_featureids'] = []
- attrs['nodes_modes'] = []
- attrs['nodes_values'] = []
- attrs['nodes_truenodeids'] = []
- attrs['nodes_falsenodeids'] = []
- attrs['nodes_missing_value_tracks_true'] = []
- attrs['nodes_hitrates'] = []
- attrs['class_treeids'] = []
- attrs['class_nodeids'] = []
- attrs['class_ids'] = []
- attrs['class_weights'] = []
- attrs['classlabels_int64s'] = list(range(model.tree_.node_count))
+ attrs["name"] = name
+ attrs["post_transform"] = "NONE"
+ attrs["nodes_treeids"] = []
+ attrs["nodes_nodeids"] = []
+ attrs["nodes_featureids"] = []
+ attrs["nodes_modes"] = []
+ attrs["nodes_values"] = []
+ attrs["nodes_truenodeids"] = []
+ attrs["nodes_falsenodeids"] = []
+ attrs["nodes_missing_value_tracks_true"] = []
+ attrs["nodes_hitrates"] = []
+ attrs["class_treeids"] = []
+ attrs["class_nodeids"] = []
+ attrs["class_ids"] = []
+ attrs["class_weights"] = []
+ attrs["classlabels_int64s"] = list(range(model.tree_.node_count))
for i in range(model.tree_.node_count):
node_id = i
- if (model.tree_.children_left[i] > i and
- model.tree_.children_right[i] > i):
+ if model.tree_.children_left[i] > i and model.tree_.children_right[i] > i:
feat = model.tree_.feature[i]
thresh = model.tree_.threshold[i]
left = model.tree_.children_left[i]
right = model.tree_.children_right[i]
- mode = 'BRANCH_LEQ'
+ mode = "BRANCH_LEQ"
else:
- feat, thresh, left, right = 0, 0., 0, 0
- mode = 'LEAF'
- attrs['nodes_nodeids'].append(node_id)
- attrs['nodes_treeids'].append(0)
- attrs['nodes_featureids'].append(feat)
- attrs['nodes_modes'].append(mode)
- attrs['nodes_truenodeids'].append(left)
- attrs['nodes_falsenodeids'].append(right)
- attrs['nodes_missing_value_tracks_true'].append(False)
- attrs['nodes_hitrates'].append(1.)
- attrs['nodes_values'].append(thresh)
- if mode == 'LEAF':
- attrs['class_ids'].append(node_id)
- attrs['class_weights'].append(1.)
- attrs['class_treeids'].append(0)
- attrs['class_nodeids'].append(node_id)
+ feat, thresh, left, right = 0, 0.0, 0, 0
+ mode = "LEAF"
+ attrs["nodes_nodeids"].append(node_id)
+ attrs["nodes_treeids"].append(0)
+ attrs["nodes_featureids"].append(feat)
+ attrs["nodes_modes"].append(mode)
+ attrs["nodes_truenodeids"].append(left)
+ attrs["nodes_falsenodeids"].append(right)
+ attrs["nodes_missing_value_tracks_true"].append(False)
+ attrs["nodes_hitrates"].append(1.0)
+ attrs["nodes_values"].append(thresh)
+ if mode == "LEAF":
+ attrs["class_ids"].append(node_id)
+ attrs["class_weights"].append(1.0)
+ attrs["class_treeids"].append(0)
+ attrs["class_nodeids"].append(node_id)
if dtype is not None:
for k in attrs:
- if k in ('node_values', 'class_weights', 'target_weights'):
+ if k in ("node_values", "class_weights", "target_weights"):
attrs[k] = np.array(attrs[k], dtype=dtype)
return attrs
-def predict(model, scope, operator, container,
- op_type, op_domain, op_version, is_ensemble=False):
+def predict(
+ model, scope, operator, container, op_type, op_domain, op_version, is_ensemble=False
+):
"""Predict target and calculate probability scores."""
- indices_name = scope.get_unique_variable_name('indices')
- dummy_proba_name = scope.get_unique_variable_name('dummy_proba')
- values_name = scope.get_unique_variable_name('values')
- out_values_name = scope.get_unique_variable_name('out_indices')
- transposed_result_name = scope.get_unique_variable_name(
- 'transposed_result')
- proba_output_name = scope.get_unique_variable_name('proba_output')
- cast_result_name = scope.get_unique_variable_name('cast_result')
- reshaped_indices_name = scope.get_unique_variable_name('reshaped_indices')
- sum_output_name = scope.get_unique_variable_name('sum_proba')
+ indices_name = scope.get_unique_variable_name("indices")
+ dummy_proba_name = scope.get_unique_variable_name("dummy_proba")
+ values_name = scope.get_unique_variable_name("values")
+ out_values_name = scope.get_unique_variable_name("out_indices")
+ transposed_result_name = scope.get_unique_variable_name("transposed_result")
+ proba_output_name = scope.get_unique_variable_name("proba_output")
+ cast_result_name = scope.get_unique_variable_name("cast_result")
+ reshaped_indices_name = scope.get_unique_variable_name("reshaped_indices")
+ sum_output_name = scope.get_unique_variable_name("sum_proba")
value = model.tree_.value.transpose(1, 2, 0)
proto_dtype = guess_proto_type(operator.inputs[0].type)
@@ -103,102 +105,146 @@ def predict(model, scope, operator, container,
if dtype != np.float64:
dtype = np.float32
- container.add_initializer(
- values_name, proto_dtype, value.shape, value.ravel())
+ container.add_initializer(values_name, proto_dtype, value.shape, value.ravel())
input_name = operator.input_full_names
if isinstance(operator.inputs[0].type, BooleanTensorType):
- cast_input_name = scope.get_unique_variable_name('cast_input')
+ cast_input_name = scope.get_unique_variable_name("cast_input")
- apply_cast(scope, input_name, cast_input_name,
- container, to=proto_dtype)
+ apply_cast(scope, input_name, cast_input_name, container, to=proto_dtype)
input_name = cast_input_name
if model.tree_.node_count > 1:
attrs = populate_tree_attributes(
- model, scope.get_unique_operator_name(op_type), dtype)
+ model, scope.get_unique_operator_name(op_type), dtype
+ )
container.add_node(
- op_type, input_name,
+ op_type,
+ input_name,
[indices_name, dummy_proba_name],
- op_domain=op_domain, op_version=op_version, **attrs)
+ op_domain=op_domain,
+ op_version=op_version,
+ **attrs
+ )
else:
- zero_name = scope.get_unique_variable_name('zero')
- zero_matrix_name = scope.get_unique_variable_name('zero_matrix')
- reduced_zero_matrix_name = scope.get_unique_variable_name(
- 'reduced_zero_matrix')
-
- container.add_initializer(
- zero_name, proto_dtype, [], [0])
- apply_mul(scope, [input_name[0], zero_name],
- zero_matrix_name, container, broadcast=1)
+ zero_name = scope.get_unique_variable_name("zero")
+ zero_matrix_name = scope.get_unique_variable_name("zero_matrix")
+ reduced_zero_matrix_name = scope.get_unique_variable_name("reduced_zero_matrix")
+
+ container.add_initializer(zero_name, proto_dtype, [], [0])
+ apply_mul(
+ scope, [input_name[0], zero_name], zero_matrix_name, container, broadcast=1
+ )
if container.target_opset < 13:
container.add_node(
- 'ReduceSum', zero_matrix_name, reduced_zero_matrix_name,
- axes=[1], name=scope.get_unique_operator_name('ReduceSum'))
+ "ReduceSum",
+ zero_matrix_name,
+ reduced_zero_matrix_name,
+ axes=[1],
+ name=scope.get_unique_operator_name("ReduceSum"),
+ )
else:
- axis_name = scope.get_unique_variable_name('axis')
- container.add_initializer(
- axis_name, onnx_proto.TensorProto.INT64, [1], [1])
+ axis_name = scope.get_unique_variable_name("axis")
+ container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [1])
container.add_node(
- 'ReduceSum', [zero_matrix_name, axis_name],
+ "ReduceSum",
+ [zero_matrix_name, axis_name],
reduced_zero_matrix_name,
- name=scope.get_unique_operator_name('ReduceSum'))
- apply_cast(scope, reduced_zero_matrix_name, indices_name,
- container, to=onnx_proto.TensorProto.INT64)
- apply_reshape(scope, indices_name, reshaped_indices_name,
- container, desired_shape=[1, -1])
+ name=scope.get_unique_operator_name("ReduceSum"),
+ )
+ apply_cast(
+ scope,
+ reduced_zero_matrix_name,
+ indices_name,
+ container,
+ to=onnx_proto.TensorProto.INT64,
+ )
+ apply_reshape(
+ scope, indices_name, reshaped_indices_name, container, desired_shape=[1, -1]
+ )
container.add_node(
- 'ArrayFeatureExtractor',
+ "ArrayFeatureExtractor",
[values_name, reshaped_indices_name],
- out_values_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
- apply_transpose(scope, out_values_name, proba_output_name,
- container, perm=(0, 2, 1))
+ out_values_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
+ apply_transpose(
+ scope, out_values_name, proba_output_name, container, perm=(0, 2, 1)
+ )
if is_ensemble:
- proba_result_name = scope.get_unique_variable_name('proba_result')
- apply_reducesum(scope, proba_output_name, sum_output_name,
- container, keepdims=1, axes=[2])
- apply_div(scope, [proba_output_name, sum_output_name],
- proba_result_name, container)
+ proba_result_name = scope.get_unique_variable_name("proba_result")
+ apply_reducesum(
+ scope, proba_output_name, sum_output_name, container, keepdims=1, axes=[2]
+ )
+ apply_div(
+ scope, [proba_output_name, sum_output_name], proba_result_name, container
+ )
return proba_result_name
else:
- apply_cast(scope, proba_output_name, cast_result_name,
- container, to=onnx_proto.TensorProto.BOOL)
- apply_cast(scope, cast_result_name, operator.outputs[1].full_name,
- container, to=proto_dtype)
- apply_transpose(scope, out_values_name, transposed_result_name,
- container, perm=(2, 1, 0))
+ apply_cast(
+ scope,
+ proba_output_name,
+ cast_result_name,
+ container,
+ to=onnx_proto.TensorProto.BOOL,
+ )
+ apply_cast(
+ scope,
+ cast_result_name,
+ operator.outputs[1].full_name,
+ container,
+ to=proto_dtype,
+ )
+ apply_transpose(
+ scope, out_values_name, transposed_result_name, container, perm=(2, 1, 0)
+ )
return transposed_result_name
def _append_decision_output(
- input_name, attrs, fct_label, n_out, scope, operator, container,
- op_type='TreeEnsembleClassifier',
- op_domain='ai.onnx.ml', op_version=1,
- cast_encode=False, regression=False, dtype=np.float32,
- overwrite_tree=None):
-
+ input_name,
+ attrs,
+ fct_label,
+ n_out,
+ scope,
+ operator,
+ container,
+ op_type="TreeEnsembleClassifier",
+ op_domain="ai.onnx.ml",
+ op_version=1,
+ cast_encode=False,
+ regression=False,
+ dtype=np.float32,
+ overwrite_tree=None,
+):
attrs = attrs.copy()
- attrs['name'] = scope.get_unique_operator_name(op_type)
- attrs['n_targets'] = 1
- attrs['post_transform'] = 'NONE'
+ attrs["name"] = scope.get_unique_operator_name(op_type)
+ attrs["n_targets"] = 1
+ attrs["post_transform"] = "NONE"
if regression:
- attrs['target_weights'] = np.array(
- [float(_) for _ in attrs['target_nodeids']], dtype=dtype)
+ attrs["target_weights"] = np.array(
+ [float(_) for _ in attrs["target_nodeids"]], dtype=dtype
+ )
else:
- attrs['target_ids'] = [0 for _ in attrs['class_ids']]
- attrs['target_weights'] = [float(_) for _ in attrs['class_nodeids']]
- attrs['target_nodeids'] = attrs['class_nodeids']
- attrs['target_treeids'] = attrs['class_treeids']
+ attrs["target_ids"] = [0 for _ in attrs["class_ids"]]
+ attrs["target_weights"] = [float(_) for _ in attrs["class_nodeids"]]
+ attrs["target_nodeids"] = attrs["class_nodeids"]
+ attrs["target_treeids"] = attrs["class_treeids"]
- rem = [k for k in attrs if k.startswith('class')]
+ rem = [k for k in attrs if k.startswith("class")]
for k in rem:
del attrs[k]
dpath = scope.get_unique_variable_name("dpath")
container.add_node(
- op_type.replace("Classifier", "Regressor"), input_name, dpath,
- op_domain=op_domain, op_version=op_version, **attrs)
+ op_type.replace("Classifier", "Regressor"),
+ input_name,
+ dpath,
+ op_domain=op_domain,
+ op_version=op_version,
+ **attrs
+ )
if n_out is None:
final_name = scope.get_unique_variable_name("dpatho")
@@ -207,204 +253,302 @@ def _append_decision_output(
if cast_encode:
apply_cast(
- scope, dpath, final_name,
- container, to=onnx_proto.TensorProto.INT64,
- operator_name=scope.get_unique_operator_name('TreePathType'))
+ scope,
+ dpath,
+ final_name,
+ container,
+ to=onnx_proto.TensorProto.INT64,
+ operator_name=scope.get_unique_operator_name("TreePathType"),
+ )
else:
op = operator.raw_operator
- labels = fct_label(
- overwrite_tree if overwrite_tree is not None else op.tree_)
+ labels = fct_label(overwrite_tree if overwrite_tree is not None else op.tree_)
ordered = list(sorted(labels.items()))
keys = [float(_[0]) for _ in ordered]
values = [_[1] for _ in ordered]
name = scope.get_unique_variable_name("spath")
container.add_node(
- 'LabelEncoder', dpath, name,
- op_domain=op_domain, op_version=2,
- default_string='0', keys_floats=keys, values_strings=values,
- name=scope.get_unique_operator_name('TreePath'))
+ "LabelEncoder",
+ dpath,
+ name,
+ op_domain=op_domain,
+ op_version=2,
+ default_string="0",
+ keys_floats=keys,
+ values_strings=values,
+ name=scope.get_unique_operator_name("TreePath"),
+ )
apply_reshape(
- scope, name, final_name,
- container, desired_shape=(-1, 1),
- operator_name=scope.get_unique_operator_name('TreePathShape'))
+ scope,
+ name,
+ final_name,
+ container,
+ desired_shape=(-1, 1),
+ operator_name=scope.get_unique_operator_name("TreePathShape"),
+ )
return final_name
def convert_sklearn_decision_tree_classifier(
- scope, operator, container, op_type='TreeEnsembleClassifier',
- op_domain='ai.onnx.ml', op_version=1):
+ scope,
+ operator,
+ container,
+ op_type="TreeEnsembleClassifier",
+ op_domain="ai.onnx.ml",
+ op_version=1,
+):
try:
dtype = guess_numpy_type(operator.inputs[0].type)
except NotImplementedError as e:
- raise RuntimeError(
- "Unknown variable {}.".format(operator.inputs[0])) from e
+ raise RuntimeError("Unknown variable {}.".format(operator.inputs[0])) from e
if dtype != np.float64:
dtype = np.float32
op = operator.raw_operator
- options = scope.get_options(
- op, dict(decision_path=False, decision_leaf=False))
+ options = scope.get_options(op, dict(decision_path=False, decision_leaf=False))
if op.n_outputs_ == 1:
attrs = get_default_tree_classifier_attribute_pairs()
- attrs['name'] = scope.get_unique_operator_name(op_type)
+ attrs["name"] = scope.get_unique_operator_name(op_type)
classes = get_label_classes(scope, op)
if all(isinstance(i, np.ndarray) for i in classes):
classes = np.concatenate(classes)
if all(isinstance(i, (numbers.Real, bool, np.bool_)) for i in classes):
class_labels = [int(i) for i in classes]
- attrs['classlabels_int64s'] = class_labels
+ attrs["classlabels_int64s"] = class_labels
elif all(isinstance(i, str) for i in classes):
class_labels = [str(i) for i in classes]
- attrs['classlabels_strings'] = class_labels
+ attrs["classlabels_strings"] = class_labels
else:
- raise ValueError('Labels must be all integers or all strings.')
+ raise ValueError("Labels must be all integers or all strings.")
- add_tree_to_attribute_pairs(attrs, True, op.tree_, 0, 1., 0, True,
- True, dtype=dtype)
+ add_tree_to_attribute_pairs(
+ attrs, True, op.tree_, 0, 1.0, 0, True, True, dtype=dtype
+ )
input_name = operator.input_full_names
if isinstance(operator.inputs[0].type, BooleanTensorType):
- cast_input_name = scope.get_unique_variable_name('cast_input')
-
- apply_cast(scope, input_name, cast_input_name,
- container, to=onnx_proto.TensorProto.FLOAT)
+ cast_input_name = scope.get_unique_variable_name("cast_input")
+
+ apply_cast(
+ scope,
+ input_name,
+ cast_input_name,
+ container,
+ to=onnx_proto.TensorProto.FLOAT,
+ )
input_name = cast_input_name
if dtype is not None:
for k in attrs:
- if k in ('nodes_values', 'class_weights',
- 'target_weights', 'nodes_hitrates',
- 'base_values'):
+ if k in (
+ "nodes_values",
+ "class_weights",
+ "target_weights",
+ "nodes_hitrates",
+ "base_values",
+ ):
attrs[k] = np.array(attrs[k], dtype=dtype)
container.add_node(
- op_type, input_name,
+ op_type,
+ input_name,
[operator.outputs[0].full_name, operator.outputs[1].full_name],
- op_domain=op_domain, op_version=op_version, **attrs)
+ op_domain=op_domain,
+ op_version=op_version,
+ **attrs
+ )
n_out = 2
- if options['decision_path']:
+ if options["decision_path"]:
# decision_path
_append_decision_output(
- input_name, attrs, _build_labels_path, n_out,
- scope, operator, container,
- op_type=op_type, op_domain=op_domain,
- op_version=op_version, dtype=dtype)
+ input_name,
+ attrs,
+ _build_labels_path,
+ n_out,
+ scope,
+ operator,
+ container,
+ op_type=op_type,
+ op_domain=op_domain,
+ op_version=op_version,
+ dtype=dtype,
+ )
n_out += 1
- if options['decision_leaf']:
+ if options["decision_leaf"]:
# decision_path
_append_decision_output(
- input_name, attrs, _build_labels_leaf, n_out,
- scope, operator, container,
- op_type=op_type, op_domain=op_domain,
- op_version=op_version, cast_encode=True,
- dtype=dtype)
+ input_name,
+ attrs,
+ _build_labels_leaf,
+ n_out,
+ scope,
+ operator,
+ container,
+ op_type=op_type,
+ op_domain=op_domain,
+ op_version=op_version,
+ cast_encode=True,
+ dtype=dtype,
+ )
n_out += 1
else:
transposed_result_name = predict(
- op, scope, operator, container, op_type, op_domain, op_version)
+ op, scope, operator, container, op_type, op_domain, op_version
+ )
predictions = []
for k in range(op.n_outputs_):
- preds_name = scope.get_unique_variable_name('preds')
- reshaped_preds_name = scope.get_unique_variable_name(
- 'reshaped_preds')
- k_name = scope.get_unique_variable_name('k_column')
- out_k_name = scope.get_unique_variable_name('out_k_column')
- argmax_output_name = scope.get_unique_variable_name(
- 'argmax_output')
- classes_name = scope.get_unique_variable_name('classes')
- reshaped_result_name = scope.get_unique_variable_name(
- 'reshaped_result')
-
- container.add_initializer(
- k_name, onnx_proto.TensorProto.INT64,
- [], [k])
+ preds_name = scope.get_unique_variable_name("preds")
+ reshaped_preds_name = scope.get_unique_variable_name("reshaped_preds")
+ k_name = scope.get_unique_variable_name("k_column")
+ out_k_name = scope.get_unique_variable_name("out_k_column")
+ argmax_output_name = scope.get_unique_variable_name("argmax_output")
+ classes_name = scope.get_unique_variable_name("classes")
+ reshaped_result_name = scope.get_unique_variable_name("reshaped_result")
+
+ container.add_initializer(k_name, onnx_proto.TensorProto.INT64, [], [k])
container.add_initializer(
- classes_name, onnx_proto.TensorProto.INT64,
- op.classes_[k].shape, [int(i) for i in op.classes_[k]])
+ classes_name,
+ onnx_proto.TensorProto.INT64,
+ op.classes_[k].shape,
+ [int(i) for i in op.classes_[k]],
+ )
container.add_node(
- 'ArrayFeatureExtractor', [transposed_result_name, k_name],
- out_k_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
+ "ArrayFeatureExtractor",
+ [transposed_result_name, k_name],
+ out_k_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
container.add_node(
- 'ArgMax', out_k_name, argmax_output_name,
- name=scope.get_unique_operator_name('ArgMax'), axis=1)
- apply_reshape(scope, argmax_output_name, reshaped_result_name,
- container, desired_shape=(1, -1))
+ "ArgMax",
+ out_k_name,
+ argmax_output_name,
+ name=scope.get_unique_operator_name("ArgMax"),
+ axis=1,
+ )
+ apply_reshape(
+ scope,
+ argmax_output_name,
+ reshaped_result_name,
+ container,
+ desired_shape=(1, -1),
+ )
container.add_node(
- 'ArrayFeatureExtractor', [classes_name, reshaped_result_name],
- preds_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
- apply_reshape(scope, preds_name, reshaped_preds_name,
- container, desired_shape=(-1, 1))
+ "ArrayFeatureExtractor",
+ [classes_name, reshaped_result_name],
+ preds_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
+ apply_reshape(
+ scope, preds_name, reshaped_preds_name, container, desired_shape=(-1, 1)
+ )
predictions.append(reshaped_preds_name)
- apply_concat(scope, predictions, operator.outputs[0].full_name,
- container, axis=1)
+ apply_concat(
+ scope, predictions, operator.outputs[0].full_name, container, axis=1
+ )
- if options['decision_path']:
+ if options["decision_path"]:
raise RuntimeError(
- "Option decision_path for multi-outputs "
- "is not implemented yet.")
- if options['decision_leaf']:
+ "Option decision_path for multi-outputs " "is not implemented yet."
+ )
+ if options["decision_leaf"]:
raise RuntimeError(
- "Option decision_leaf for multi-outputs "
- "is not implemented yet.")
+ "Option decision_leaf for multi-outputs " "is not implemented yet."
+ )
def convert_sklearn_decision_tree_regressor(
- scope, operator, container, op_type='TreeEnsembleRegressor',
- op_domain='ai.onnx.ml', op_version=1):
+ scope,
+ operator,
+ container,
+ op_type="TreeEnsembleRegressor",
+ op_domain="ai.onnx.ml",
+ op_version=1,
+):
dtype = guess_numpy_type(operator.inputs[0].type)
if dtype != np.float64:
dtype = np.float32
op = operator.raw_operator
attrs = get_default_tree_regressor_attribute_pairs()
- attrs['name'] = scope.get_unique_operator_name(op_type)
- attrs['n_targets'] = int(op.n_outputs_)
- add_tree_to_attribute_pairs(attrs, False, op.tree_, 0, 1., 0, False,
- True, dtype=dtype)
+ attrs["name"] = scope.get_unique_operator_name(op_type)
+ attrs["n_targets"] = int(op.n_outputs_)
+ add_tree_to_attribute_pairs(
+ attrs, False, op.tree_, 0, 1.0, 0, False, True, dtype=dtype
+ )
if dtype is not None:
for k in attrs:
- if k in ('nodes_values', 'class_weights',
- 'target_weights', 'nodes_hitrates',
- 'base_values'):
+ if k in (
+ "nodes_values",
+ "class_weights",
+ "target_weights",
+ "nodes_hitrates",
+ "base_values",
+ ):
attrs[k] = np.array(attrs[k], dtype=dtype)
input_name = operator.input_full_names
if type(operator.inputs[0].type) in (BooleanTensorType, Int64TensorType):
- cast_input_name = scope.get_unique_variable_name('cast_input')
+ cast_input_name = scope.get_unique_variable_name("cast_input")
- apply_cast(scope, operator.input_full_names, cast_input_name,
- container, to=onnx_proto.TensorProto.FLOAT)
+ apply_cast(
+ scope,
+ operator.input_full_names,
+ cast_input_name,
+ container,
+ to=onnx_proto.TensorProto.FLOAT,
+ )
input_name = [cast_input_name]
container.add_node(
- op_type, input_name, operator.outputs[0].full_name,
- op_domain=op_domain, op_version=op_version, **attrs)
+ op_type,
+ input_name,
+ operator.outputs[0].full_name,
+ op_domain=op_domain,
+ op_version=op_version,
+ **attrs
+ )
- options = scope.get_options(
- op, dict(decision_path=False, decision_leaf=False))
+ options = scope.get_options(op, dict(decision_path=False, decision_leaf=False))
# decision_path
n_out = 1
- if options['decision_path']:
+ if options["decision_path"]:
# decision_path
_append_decision_output(
- input_name, attrs, _build_labels_path, n_out,
- scope, operator, container,
- op_type=op_type, op_domain=op_domain,
- op_version=op_version, regression=True)
+ input_name,
+ attrs,
+ _build_labels_path,
+ n_out,
+ scope,
+ operator,
+ container,
+ op_type=op_type,
+ op_domain=op_domain,
+ op_version=op_version,
+ regression=True,
+ )
n_out += 1
- if options['decision_leaf']:
+ if options["decision_leaf"]:
# decision_path
_append_decision_output(
- input_name, attrs, _build_labels_leaf, n_out,
- scope, operator, container,
- op_type=op_type, op_domain=op_domain,
- op_version=op_version, regression=True, cast_encode=True)
+ input_name,
+ attrs,
+ _build_labels_leaf,
+ n_out,
+ scope,
+ operator,
+ container,
+ op_type=op_type,
+ op_domain=op_domain,
+ op_version=op_version,
+ regression=True,
+ cast_encode=True,
+ )
n_out += 1
@@ -413,11 +557,9 @@ def _recursive_build_labels(tree, index, current):
if tree.children_left[index] == -1:
yield (index, current.copy())
else:
- for it in _recursive_build_labels(
- tree, tree.children_left[index], current):
+ for it in _recursive_build_labels(tree, tree.children_left[index], current):
yield it
- for it in _recursive_build_labels(
- tree, tree.children_right[index], current):
+ for it in _recursive_build_labels(tree, tree.children_right[index], current):
yield it
current[index] = False
@@ -431,7 +573,7 @@ def _build_labels_path(tree):
for nodeid, b in path.items():
if b:
spath[nodeid] = "1"
- paths[leave_index] = ''.join(spath)
+ paths[leave_index] = "".join(spath)
return paths
@@ -444,25 +586,35 @@ def _build_labels_leaf(tree):
return paths
-register_converter('SklearnDecisionTreeClassifier',
- convert_sklearn_decision_tree_classifier,
- options={'zipmap': [True, False, 'columns'],
- 'nocl': [True, False],
- 'output_class_labels': [False, True],
- 'decision_path': [True, False],
- 'decision_leaf': [True, False]})
-register_converter('SklearnDecisionTreeRegressor',
- convert_sklearn_decision_tree_regressor,
- options={'decision_path': [True, False],
- 'decision_leaf': [True, False]})
-register_converter('SklearnExtraTreeClassifier',
- convert_sklearn_decision_tree_classifier,
- options={'zipmap': [True, False, 'columns'],
- 'nocl': [True, False],
- 'output_class_labels': [False, True],
- 'decision_path': [True, False],
- 'decision_leaf': [True, False]})
-register_converter('SklearnExtraTreeRegressor',
- convert_sklearn_decision_tree_regressor,
- options={'decision_path': [True, False],
- 'decision_leaf': [True, False]})
+register_converter(
+ "SklearnDecisionTreeClassifier",
+ convert_sklearn_decision_tree_classifier,
+ options={
+ "zipmap": [True, False, "columns"],
+ "nocl": [True, False],
+ "output_class_labels": [False, True],
+ "decision_path": [True, False],
+ "decision_leaf": [True, False],
+ },
+)
+register_converter(
+ "SklearnDecisionTreeRegressor",
+ convert_sklearn_decision_tree_regressor,
+ options={"decision_path": [True, False], "decision_leaf": [True, False]},
+)
+register_converter(
+ "SklearnExtraTreeClassifier",
+ convert_sklearn_decision_tree_classifier,
+ options={
+ "zipmap": [True, False, "columns"],
+ "nocl": [True, False],
+ "output_class_labels": [False, True],
+ "decision_path": [True, False],
+ "decision_leaf": [True, False],
+ },
+)
+register_converter(
+ "SklearnExtraTreeRegressor",
+ convert_sklearn_decision_tree_regressor,
+ options={"decision_path": [True, False], "decision_leaf": [True, False]},
+)
diff --git a/skl2onnx/operator_converters/decomposition.py b/skl2onnx/operator_converters/decomposition.py
index 5d53e8b22..7ddd3f622 100644
--- a/skl2onnx/operator_converters/decomposition.py
+++ b/skl2onnx/operator_converters/decomposition.py
@@ -2,17 +2,21 @@
from ..proto import onnx_proto
-from ..common._apply_operation import (
- apply_cast, apply_div, apply_sqrt, apply_sub)
+from ..common._apply_operation import apply_cast, apply_div, apply_sqrt, apply_sub
from ..common._registration import register_converter
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
from ..common.data_types import (
- Int64TensorType, DoubleTensorType, FloatTensorType, guess_proto_type)
+ Int64TensorType,
+ DoubleTensorType,
+ FloatTensorType,
+ guess_proto_type,
+)
-def convert_truncated_svd(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_truncated_svd(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
# Create alias for the scikit-learn truncated SVD model we
# are going to convert
svd = operator.raw_operator
@@ -23,66 +27,92 @@ def convert_truncated_svd(scope: Scope, operator: Operator,
# Transpose [K, C] matrix to [C, K], where C/K is the
# input/transformed feature dimension
transform_matrix = svd.components_.transpose()
- transform_matrix_name = scope.get_unique_variable_name('transform_matrix')
+ transform_matrix_name = scope.get_unique_variable_name("transform_matrix")
# Put the transformation into an ONNX tensor
container.add_initializer(
- transform_matrix_name, proto_dtype,
- transform_matrix.shape, transform_matrix.flatten())
+ transform_matrix_name,
+ proto_dtype,
+ transform_matrix.shape,
+ transform_matrix.flatten(),
+ )
input_name = operator.inputs[0].full_name
if isinstance(operator.inputs[0].type, Int64TensorType):
- cast_output_name = scope.get_unique_variable_name('cast_output')
+ cast_output_name = scope.get_unique_variable_name("cast_output")
- apply_cast(scope, input_name, cast_output_name, container,
- to=onnx_proto.TensorProto.FLOAT)
+ apply_cast(
+ scope,
+ input_name,
+ cast_output_name,
+ container,
+ to=onnx_proto.TensorProto.FLOAT,
+ )
input_name = cast_output_name
- if operator.type == 'SklearnTruncatedSVD':
+ if operator.type == "SklearnTruncatedSVD":
# Create the major operator, a matrix multiplication.
container.add_node(
- 'MatMul', [input_name, transform_matrix_name],
- operator.outputs[0].full_name, name=operator.full_name)
+ "MatMul",
+ [input_name, transform_matrix_name],
+ operator.outputs[0].full_name,
+ name=operator.full_name,
+ )
else: # PCA
if svd.mean_ is not None:
- mean_name = scope.get_unique_variable_name('mean')
- sub_result_name = scope.get_unique_variable_name('sub_result')
+ mean_name = scope.get_unique_variable_name("mean")
+ sub_result_name = scope.get_unique_variable_name("sub_result")
- container.add_initializer(mean_name, proto_dtype,
- svd.mean_.shape, svd.mean_)
+ container.add_initializer(
+ mean_name, proto_dtype, svd.mean_.shape, svd.mean_
+ )
# Subtract mean from input tensor
- apply_sub(scope, [input_name, mean_name],
- sub_result_name, container, broadcast=1)
+ apply_sub(
+ scope, [input_name, mean_name], sub_result_name, container, broadcast=1
+ )
else:
sub_result_name = input_name
if svd.whiten:
explained_variance_name = scope.get_unique_variable_name(
- 'explained_variance')
+ "explained_variance"
+ )
explained_variance_root_name = scope.get_unique_variable_name(
- 'explained_variance_root')
- matmul_result_name = scope.get_unique_variable_name(
- 'matmul_result')
+ "explained_variance_root"
+ )
+ matmul_result_name = scope.get_unique_variable_name("matmul_result")
container.add_initializer(
- explained_variance_name, proto_dtype,
- svd.explained_variance_.shape, svd.explained_variance_)
+ explained_variance_name,
+ proto_dtype,
+ svd.explained_variance_.shape,
+ svd.explained_variance_,
+ )
container.add_node(
- 'MatMul', [sub_result_name, transform_matrix_name],
+ "MatMul",
+ [sub_result_name, transform_matrix_name],
matmul_result_name,
- name=scope.get_unique_operator_name('MatMul'))
- apply_sqrt(scope, explained_variance_name,
- explained_variance_root_name, container)
- apply_div(scope,
- [matmul_result_name, explained_variance_root_name],
- operator.outputs[0].full_name, container, broadcast=1)
+ name=scope.get_unique_operator_name("MatMul"),
+ )
+ apply_sqrt(
+ scope, explained_variance_name, explained_variance_root_name, container
+ )
+ apply_div(
+ scope,
+ [matmul_result_name, explained_variance_root_name],
+ operator.outputs[0].full_name,
+ container,
+ broadcast=1,
+ )
else:
container.add_node(
- 'MatMul', [sub_result_name, transform_matrix_name],
+ "MatMul",
+ [sub_result_name, transform_matrix_name],
operator.outputs[0].full_name,
- name=scope.get_unique_operator_name('MatMul'))
+ name=scope.get_unique_operator_name("MatMul"),
+ )
-register_converter('SklearnIncrementalPCA', convert_truncated_svd)
-register_converter('SklearnPCA', convert_truncated_svd)
-register_converter('SklearnTruncatedSVD', convert_truncated_svd)
+register_converter("SklearnIncrementalPCA", convert_truncated_svd)
+register_converter("SklearnPCA", convert_truncated_svd)
+register_converter("SklearnTruncatedSVD", convert_truncated_svd)
diff --git a/skl2onnx/operator_converters/dict_vectoriser.py b/skl2onnx/operator_converters/dict_vectoriser.py
index 57f3fceb1..cd5ce638b 100644
--- a/skl2onnx/operator_converters/dict_vectoriser.py
+++ b/skl2onnx/operator_converters/dict_vectoriser.py
@@ -7,8 +7,9 @@
from ..common._container import ModelComponentContainer
-def convert_sklearn_dict_vectorizer(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_dict_vectorizer(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
When a *DictVectorizer* converts numbers into strings,
scikit-learn adds a separator to disambiguate strings
@@ -24,13 +25,10 @@ def convert_sklearn_dict_vectorizer(scope: Scope, operator: Operator,
This cannot be implemented in ONNX. The converter
raises an exception in that case.
"""
- op_type = 'DictVectorizer'
+ op_type = "DictVectorizer"
op = operator.raw_operator
- attrs = {
- 'name': scope.get_unique_operator_name(op_type)
- }
- if all(isinstance(feature_name, str)
- for feature_name in op.feature_names_):
+ attrs = {"name": scope.get_unique_operator_name(op_type)}
+ if all(isinstance(feature_name, str) for feature_name in op.feature_names_):
# all strings, scikit-learn does the following:
new_cats = []
unique_cats = set()
@@ -39,26 +37,30 @@ def convert_sklearn_dict_vectorizer(scope: Scope, operator: Operator,
if op.separator in i:
nbsep += 1
if i in unique_cats:
- raise RuntimeError(
- "Duplicated category '{}'.".format(i))
+ raise RuntimeError("Duplicated category '{}'.".format(i))
unique_cats.add(i)
new_cats.append(i)
if nbsep >= len(new_cats):
raise RuntimeError(
"All categories contain a separator '{}'. "
"This case is not supported by the converter. "
- "The mapping must map to numbers not string.". format(
- op.separator))
- attrs['string_vocabulary'] = new_cats
- elif all(isinstance(feature_name, numbers.Integral)
- for feature_name in op.feature_names_):
- attrs['int64_vocabulary'] = list(int(i) for i in op.feature_names_)
+ "The mapping must map to numbers not string.".format(op.separator)
+ )
+ attrs["string_vocabulary"] = new_cats
+ elif all(
+ isinstance(feature_name, numbers.Integral) for feature_name in op.feature_names_
+ ):
+ attrs["int64_vocabulary"] = list(int(i) for i in op.feature_names_)
else:
- raise ValueError('Keys must be all integers or all strings.')
+ raise ValueError("Keys must be all integers or all strings.")
- container.add_node(op_type, operator.input_full_names,
- operator.output_full_names, op_domain='ai.onnx.ml',
- **attrs)
+ container.add_node(
+ op_type,
+ operator.input_full_names,
+ operator.output_full_names,
+ op_domain="ai.onnx.ml",
+ **attrs
+ )
-register_converter('SklearnDictVectorizer', convert_sklearn_dict_vectorizer)
+register_converter("SklearnDictVectorizer", convert_sklearn_dict_vectorizer)
diff --git a/skl2onnx/operator_converters/feature_hasher.py b/skl2onnx/operator_converters/feature_hasher.py
index 9f37228f5..4183422f3 100644
--- a/skl2onnx/operator_converters/feature_hasher.py
+++ b/skl2onnx/operator_converters/feature_hasher.py
@@ -8,115 +8,125 @@
from ..common._container import ModelComponentContainer
-def convert_sklearn_feature_hasher(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_feature_hasher(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
X = operator.inputs[0]
out = operator.outputs
op = operator.raw_operator
if op.input_type != "string":
raise RuntimeError(
f"The converter for FeatureHasher only supports "
- f"input_type='string' not {op.input_type!r}.")
-
- hashed_ = scope.get_unique_variable_name('hashed_')
- container.add_node('MurmurHash3', X.full_name, hashed_,
- positive=0, seed=0, op_domain="com.microsoft",
- op_version=1)
- hashed = scope.get_unique_variable_name('hashed')
- container.add_node('Cast', hashed_, hashed, to=TensorProto.INT64)
+ f"input_type='string' not {op.input_type!r}."
+ )
+
+ hashed_ = scope.get_unique_variable_name("hashed_")
+ container.add_node(
+ "MurmurHash3",
+ X.full_name,
+ hashed_,
+ positive=0,
+ seed=0,
+ op_domain="com.microsoft",
+ op_version=1,
+ )
+ hashed = scope.get_unique_variable_name("hashed")
+ container.add_node("Cast", hashed_, hashed, to=TensorProto.INT64)
if op.dtype in (np.float32, np.float64, np.int64):
cst_neg = -1
else:
cst_neg = 4294967295
- infinite = scope.get_unique_variable_name('infinite')
- container.add_initializer(infinite, TensorProto.INT64, [1],
- [-2147483648])
+ infinite = scope.get_unique_variable_name("infinite")
+ container.add_initializer(infinite, TensorProto.INT64, [1], [-2147483648])
- infinite2 = scope.get_unique_variable_name('infinite2')
- container.add_initializer(infinite2, TensorProto.INT64, [1],
- [cst_neg])
+ infinite2 = scope.get_unique_variable_name("infinite2")
+ container.add_initializer(infinite2, TensorProto.INT64, [1], [cst_neg])
- infinite_n = scope.get_unique_variable_name('infinite_n')
- container.add_initializer(infinite_n, TensorProto.INT64, [1],
- [2147483647 - (op.n_features - 1)])
+ infinite_n = scope.get_unique_variable_name("infinite_n")
+ container.add_initializer(
+ infinite_n, TensorProto.INT64, [1], [2147483647 - (op.n_features - 1)]
+ )
- zero = scope.get_unique_variable_name('zero')
+ zero = scope.get_unique_variable_name("zero")
container.add_initializer(zero, TensorProto.INT64, [1], [0])
- one = scope.get_unique_variable_name('one')
+ one = scope.get_unique_variable_name("one")
container.add_initializer(one, TensorProto.INT64, [1], [1])
- mone = scope.get_unique_variable_name('mone')
+ mone = scope.get_unique_variable_name("mone")
container.add_initializer(mone, TensorProto.INT64, [1], [-1])
- mtwo = scope.get_unique_variable_name('mtwo')
+ mtwo = scope.get_unique_variable_name("mtwo")
container.add_initializer(mtwo, TensorProto.INT64, [1], [-2])
- nf = scope.get_unique_variable_name('nf')
+ nf = scope.get_unique_variable_name("nf")
container.add_initializer(nf, TensorProto.INT64, [1], [op.n_features])
- new_shape = scope.get_unique_variable_name('new_shape')
+ new_shape = scope.get_unique_variable_name("new_shape")
container.add_initializer(new_shape, TensorProto.INT64, [2], [-1, 1])
- new_shape2 = scope.get_unique_variable_name('new_shape2')
+ new_shape2 = scope.get_unique_variable_name("new_shape2")
container.add_initializer(new_shape2, TensorProto.INT64, [2], [1, -1])
# values
if op.alternate_sign:
- cmp = scope.get_unique_variable_name('cmp')
- container.add_node('GreaterOrEqual', [hashed, zero], cmp)
- values = scope.get_unique_variable_name('values')
- container.add_node('Where', [cmp, one, infinite2], values)
+ cmp = scope.get_unique_variable_name("cmp")
+ container.add_node("GreaterOrEqual", [hashed, zero], cmp)
+ values = scope.get_unique_variable_name("values")
+ container.add_node("Where", [cmp, one, infinite2], values)
else:
- mul = scope.get_unique_variable_name('mul')
- container.add_node('Mul', [hashed, zero], mul)
- values = scope.get_unique_variable_name('values')
- container.add_node('Add', [mul, one], values)
+ mul = scope.get_unique_variable_name("mul")
+ container.add_node("Mul", [hashed, zero], mul)
+ values = scope.get_unique_variable_name("values")
+ container.add_node("Add", [mul, one], values)
- values_reshaped = scope.get_unique_variable_name('values_reshaped')
- container.add_node('Reshape', [values, new_shape], values_reshaped)
+ values_reshaped = scope.get_unique_variable_name("values_reshaped")
+ container.add_node("Reshape", [values, new_shape], values_reshaped)
# indices
- cmp = scope.get_unique_variable_name('cmp_ind')
- container.add_node('Equal', [hashed, infinite], cmp)
- values_abs = scope.get_unique_variable_name('values_abs')
- container.add_node('Abs', hashed, values_abs)
- values_ind = scope.get_unique_variable_name('values_ind')
- container.add_node('Where', [cmp, infinite_n, values_abs], values_ind)
- indices = scope.get_unique_variable_name('indices')
- container.add_node('Mod', [values_ind, nf], indices)
- indices_reshaped = scope.get_unique_variable_name('indices_reshaped')
- container.add_node('Reshape', [indices, new_shape], indices_reshaped)
+ cmp = scope.get_unique_variable_name("cmp_ind")
+ container.add_node("Equal", [hashed, infinite], cmp)
+ values_abs = scope.get_unique_variable_name("values_abs")
+ container.add_node("Abs", hashed, values_abs)
+ values_ind = scope.get_unique_variable_name("values_ind")
+ container.add_node("Where", [cmp, infinite_n, values_abs], values_ind)
+ indices = scope.get_unique_variable_name("indices")
+ container.add_node("Mod", [values_ind, nf], indices)
+ indices_reshaped = scope.get_unique_variable_name("indices_reshaped")
+ container.add_node("Reshape", [indices, new_shape], indices_reshaped)
# scatter
- zerot_ = scope.get_unique_variable_name('zerot_')
- container.add_node('ConstantOfShape', [nf], zerot_,
- value=make_tensor("value",
- TensorProto.INT64, [1], [0]))
- zerot = scope.get_unique_variable_name('zerot')
- container.add_node('Mul', [indices_reshaped, zerot_], zerot)
-
- final = scope.get_unique_variable_name('final')
- container.add_node('ScatterElements',
- [zerot, indices_reshaped, values_reshaped],
- final, axis=1)
+ zerot_ = scope.get_unique_variable_name("zerot_")
+ container.add_node(
+ "ConstantOfShape",
+ [nf],
+ zerot_,
+ value=make_tensor("value", TensorProto.INT64, [1], [0]),
+ )
+ zerot = scope.get_unique_variable_name("zerot")
+ container.add_node("Mul", [indices_reshaped, zerot_], zerot)
+
+ final = scope.get_unique_variable_name("final")
+ container.add_node(
+ "ScatterElements", [zerot, indices_reshaped, values_reshaped], final, axis=1
+ )
# at this point, every string has been processed as if it was in
# in a single columns.
# in case there is more than one column, we need to reduce over
# the last dimension
- input_shape = scope.get_unique_variable_name('input_shape')
- container.add_node('Shape', X.full_name, input_shape)
- shape_not_last = scope.get_unique_variable_name('shape_not_last')
- container.add_node('Slice', [input_shape, zero, mone], shape_not_last)
- final_shape = scope.get_unique_variable_name('final_last')
- container.add_node('Concat', [shape_not_last, mone, nf],
- final_shape, axis=0)
- final_reshaped = scope.get_unique_variable_name('final_reshaped')
- container.add_node('Reshape', [final, final_shape], final_reshaped)
- container.add_node('ReduceSum', [final_reshaped, mtwo],
- out[0].full_name, keepdims=0)
-
-
-register_converter('SklearnFeatureHasher', convert_sklearn_feature_hasher)
+ input_shape = scope.get_unique_variable_name("input_shape")
+ container.add_node("Shape", X.full_name, input_shape)
+ shape_not_last = scope.get_unique_variable_name("shape_not_last")
+ container.add_node("Slice", [input_shape, zero, mone], shape_not_last)
+ final_shape = scope.get_unique_variable_name("final_last")
+ container.add_node("Concat", [shape_not_last, mone, nf], final_shape, axis=0)
+ final_reshaped = scope.get_unique_variable_name("final_reshaped")
+ container.add_node("Reshape", [final, final_shape], final_reshaped)
+ container.add_node(
+ "ReduceSum", [final_reshaped, mtwo], out[0].full_name, keepdims=0
+ )
+
+
+register_converter("SklearnFeatureHasher", convert_sklearn_feature_hasher)
diff --git a/skl2onnx/operator_converters/feature_selection.py b/skl2onnx/operator_converters/feature_selection.py
index 4511c781f..5ceccd2c7 100644
--- a/skl2onnx/operator_converters/feature_selection.py
+++ b/skl2onnx/operator_converters/feature_selection.py
@@ -7,8 +7,9 @@
from ..common._container import ModelComponentContainer
-def convert_sklearn_feature_selection(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_feature_selection(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
op = operator.raw_operator
# Get indices of the features selected
index = op.get_support(indices=True)
@@ -16,35 +17,36 @@ def convert_sklearn_feature_selection(scope: Scope, operator: Operator,
raise RuntimeError(
"Model '{}' did not select any feature. "
"This model cannot be converted into ONNX."
- "".format(op.__class__.__name__))
+ "".format(op.__class__.__name__)
+ )
output_name = operator.outputs[0].full_name
if index.any():
- column_indices_name = scope.get_unique_variable_name('column_indices')
+ column_indices_name = scope.get_unique_variable_name("column_indices")
- container.add_initializer(column_indices_name,
- onnx_proto.TensorProto.INT64,
- [len(index)], index)
+ container.add_initializer(
+ column_indices_name, onnx_proto.TensorProto.INT64, [len(index)], index
+ )
container.add_node(
- 'ArrayFeatureExtractor',
+ "ArrayFeatureExtractor",
[operator.inputs[0].full_name, column_indices_name],
- output_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
+ output_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
else:
- container.add_node('ConstantOfShape', operator.inputs[0].full_name,
- output_name, op_version=9)
-
-
-register_converter('SklearnGenericUnivariateSelect',
- convert_sklearn_feature_selection)
-register_converter('SklearnRFE', convert_sklearn_feature_selection)
-register_converter('SklearnRFECV', convert_sklearn_feature_selection)
-register_converter('SklearnSelectFdr', convert_sklearn_feature_selection)
-register_converter('SklearnSelectFpr', convert_sklearn_feature_selection)
-register_converter('SklearnSelectFromModel', convert_sklearn_feature_selection)
-register_converter('SklearnSelectFwe', convert_sklearn_feature_selection)
-register_converter('SklearnSelectKBest', convert_sklearn_feature_selection)
-register_converter('SklearnSelectPercentile',
- convert_sklearn_feature_selection)
-register_converter('SklearnVarianceThreshold',
- convert_sklearn_feature_selection)
+ container.add_node(
+ "ConstantOfShape", operator.inputs[0].full_name, output_name, op_version=9
+ )
+
+
+register_converter("SklearnGenericUnivariateSelect", convert_sklearn_feature_selection)
+register_converter("SklearnRFE", convert_sklearn_feature_selection)
+register_converter("SklearnRFECV", convert_sklearn_feature_selection)
+register_converter("SklearnSelectFdr", convert_sklearn_feature_selection)
+register_converter("SklearnSelectFpr", convert_sklearn_feature_selection)
+register_converter("SklearnSelectFromModel", convert_sklearn_feature_selection)
+register_converter("SklearnSelectFwe", convert_sklearn_feature_selection)
+register_converter("SklearnSelectKBest", convert_sklearn_feature_selection)
+register_converter("SklearnSelectPercentile", convert_sklearn_feature_selection)
+register_converter("SklearnVarianceThreshold", convert_sklearn_feature_selection)
diff --git a/skl2onnx/operator_converters/flatten_op.py b/skl2onnx/operator_converters/flatten_op.py
index 861ab7d39..d0ea4494a 100644
--- a/skl2onnx/operator_converters/flatten_op.py
+++ b/skl2onnx/operator_converters/flatten_op.py
@@ -5,12 +5,17 @@
from ..common._container import ModelComponentContainer
-def convert_sklearn_flatten(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
- name = scope.get_unique_operator_name('Flatten')
- container.add_node('Flatten', operator.inputs[0].full_name,
- operator.outputs[0].full_name, name=name,
- axis=1)
+def convert_sklearn_flatten(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
+ name = scope.get_unique_operator_name("Flatten")
+ container.add_node(
+ "Flatten",
+ operator.inputs[0].full_name,
+ operator.outputs[0].full_name,
+ name=name,
+ axis=1,
+ )
-register_converter('SklearnFlatten', convert_sklearn_flatten)
+register_converter("SklearnFlatten", convert_sklearn_flatten)
diff --git a/skl2onnx/operator_converters/function_transformer.py b/skl2onnx/operator_converters/function_transformer.py
index 9fd4ca28b..7cd5b2962 100644
--- a/skl2onnx/operator_converters/function_transformer.py
+++ b/skl2onnx/operator_converters/function_transformer.py
@@ -7,21 +7,31 @@
from ..common._container import ModelComponentContainer
-def convert_sklearn_function_transformer(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_function_transformer(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
op = operator.raw_operator
if op.func is not None:
- raise RuntimeError("FunctionTransformer is not supported unless the "
- "transform function is None (= identity). "
- "You may raise an issue at "
- "https://github.com/onnx/sklearn-onnx/issues.")
+ raise RuntimeError(
+ "FunctionTransformer is not supported unless the "
+ "transform function is None (= identity). "
+ "You may raise an issue at "
+ "https://github.com/onnx/sklearn-onnx/issues."
+ )
if len(operator.inputs) == 1:
- apply_identity(scope, operator.inputs[0].full_name,
- operator.outputs[0].full_name, container)
+ apply_identity(
+ scope,
+ operator.inputs[0].full_name,
+ operator.outputs[0].full_name,
+ container,
+ )
else:
- apply_concat(scope, [i.full_name for i in operator.inputs],
- operator.outputs[0].full_name, container)
+ apply_concat(
+ scope,
+ [i.full_name for i in operator.inputs],
+ operator.outputs[0].full_name,
+ container,
+ )
-register_converter('SklearnFunctionTransformer',
- convert_sklearn_function_transformer)
+register_converter("SklearnFunctionTransformer", convert_sklearn_function_transformer)
diff --git a/skl2onnx/operator_converters/gamma_regressor.py b/skl2onnx/operator_converters/gamma_regressor.py
index 0e5fd90b9..030eaa47b 100644
--- a/skl2onnx/operator_converters/gamma_regressor.py
+++ b/skl2onnx/operator_converters/gamma_regressor.py
@@ -1,18 +1,24 @@
# SPDX-License-Identifier: Apache-2.0
import numpy as np
-from ..common.data_types import (Int64TensorType, guess_numpy_type)
+from ..common.data_types import Int64TensorType, guess_numpy_type
from ..common._registration import register_converter
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
from ..algebra.onnx_ops import (
- OnnxAdd, OnnxCast, OnnxExp, OnnxIdentity, OnnxMatMul,
- OnnxReshape, OnnxSigmoid)
+ OnnxAdd,
+ OnnxCast,
+ OnnxExp,
+ OnnxIdentity,
+ OnnxMatMul,
+ OnnxReshape,
+ OnnxSigmoid,
+)
-def convert_sklearn_gamma_regressor(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
-
+def convert_sklearn_gamma_regressor(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
X = operator.inputs[0]
out = operator.outputs
op = operator.raw_operator
@@ -26,16 +32,21 @@ def convert_sklearn_gamma_regressor(scope: Scope, operator: Operator,
else:
input_var = X
- intercept = (op.intercept_.astype(dtype) if len(op.intercept_.shape) > 0
- else np.array([op.intercept_], dtype=dtype))
+ intercept = (
+ op.intercept_.astype(dtype)
+ if len(op.intercept_.shape) > 0
+ else np.array([op.intercept_], dtype=dtype)
+ )
eta = OnnxAdd(
OnnxMatMul(input_var, op.coef_.astype(dtype), op_version=opv),
- intercept, op_version=opv)
+ intercept,
+ op_version=opv,
+ )
if hasattr(op, "_link_instance"):
# scikit-learn < 1.1
- from sklearn.linear_model._glm.link import (
- IdentityLink, LogLink, LogitLink)
+ from sklearn.linear_model._glm.link import IdentityLink, LogLink, LogitLink
+
if isinstance(op._link_instance, IdentityLink):
Y = OnnxIdentity(eta, op_version=opv)
elif isinstance(op._link_instance, LogLink):
@@ -45,8 +56,8 @@ def convert_sklearn_gamma_regressor(scope: Scope, operator: Operator,
else:
raise RuntimeError(
"Unexpected type %r for _link_instance "
- "in operator type %r." % (
- type(op._link_instance), type(op)))
+ "in operator type %r." % (type(op._link_instance), type(op))
+ )
else:
# scikit-learn >= 1.1
from sklearn._loss.loss import (
@@ -57,29 +68,32 @@ def convert_sklearn_gamma_regressor(scope: Scope, operator: Operator,
HalfSquaredError,
HalfTweedieLoss,
HalfTweedieLossIdentity,
- PinballLoss
+ PinballLoss,
)
loss = op._get_loss()
if isinstance(
loss,
- (AbsoluteError, HalfSquaredError,
- HalfTweedieLossIdentity, PinballLoss)):
+ (AbsoluteError, HalfSquaredError, HalfTweedieLossIdentity, PinballLoss),
+ ):
Y = OnnxIdentity(eta, op_version=opv)
- elif isinstance(
- loss, (HalfPoissonLoss, HalfGammaLoss, HalfTweedieLoss)):
+ elif isinstance(loss, (HalfPoissonLoss, HalfGammaLoss, HalfTweedieLoss)):
Y = OnnxExp(eta, op_version=opv)
elif isinstance(loss, HalfBinomialLoss):
Y = OnnxSigmoid(eta, op_version=opv)
else:
raise RuntimeError(
- f"Unexpected type of link for {loss!r} loss "
- "in operator type {op!r}.")
+ f"Unexpected type of link for {loss!r} loss " "in operator type {op!r}."
+ )
last_dim = 1 if len(op.coef_.shape) == 1 else op.coef_.shape[-1]
- final = OnnxReshape(Y, np.array([-1, last_dim], dtype=np.int64),
- op_version=opv, output_names=out[:1])
+ final = OnnxReshape(
+ Y,
+ np.array([-1, last_dim], dtype=np.int64),
+ op_version=opv,
+ output_names=out[:1],
+ )
final.add_to(scope, container)
-register_converter('SklearnGammaRegressor', convert_sklearn_gamma_regressor)
+register_converter("SklearnGammaRegressor", convert_sklearn_gamma_regressor)
diff --git a/skl2onnx/operator_converters/gaussian_mixture.py b/skl2onnx/operator_converters/gaussian_mixture.py
index 9ace21021..81d63b76c 100644
--- a/skl2onnx/operator_converters/gaussian_mixture.py
+++ b/skl2onnx/operator_converters/gaussian_mixture.py
@@ -4,6 +4,7 @@
import numpy as np
from scipy.special import digamma
from sklearn.mixture import BayesianGaussianMixture, GaussianMixture
+
try:
from sklearn.mixture._gaussian_mixture import _compute_log_det_cholesky
except ImportError:
@@ -14,16 +15,27 @@
from ..common._container import ModelComponentContainer
from ..common.data_types import guess_numpy_type
from ..algebra.onnx_ops import (
- OnnxAdd, OnnxSub, OnnxMul, OnnxGemm, OnnxReduceSumSquareApi18,
- OnnxReduceLogSumExpApi18, OnnxExp, OnnxArgMax, OnnxConcat,
- OnnxReduceSumApi11, OnnxLog, OnnxReduceMaxApi18, OnnxEqual, OnnxCast
+ OnnxAdd,
+ OnnxSub,
+ OnnxMul,
+ OnnxGemm,
+ OnnxReduceSumSquareApi18,
+ OnnxReduceLogSumExpApi18,
+ OnnxExp,
+ OnnxArgMax,
+ OnnxConcat,
+ OnnxReduceSumApi11,
+ OnnxLog,
+ OnnxReduceMaxApi18,
+ OnnxEqual,
+ OnnxCast,
)
from ..proto import onnx_proto
-def _estimate_log_gaussian_prob(X, means, precisions_chol,
- covariance_type, dtype, op_version,
- combined_reducesum):
+def _estimate_log_gaussian_prob(
+ X, means, precisions_chol, covariance_type, dtype, op_version, combined_reducesum
+):
"""
Converts the same function into ONNX.
Returns log probabilities.
@@ -34,10 +46,10 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol,
# self._estimate_log_prob(X)
log_det = _compute_log_det_cholesky(
- precisions_chol, covariance_type, n_features).astype(
- dtype)
+ precisions_chol, covariance_type, n_features
+ ).astype(dtype)
- if covariance_type == 'full':
+ if covariance_type == "full":
# shape(op.means_) = (n_components, n_features)
# shape(op.precisions_cholesky_) =
# (n_components, n_features, n_features)
@@ -50,19 +62,25 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol,
ys = []
for c in range(n_components):
prec_chol = precisions_chol[c, :, :]
- cst = - np.dot(means[c, :], prec_chol)
- y = OnnxGemm(X, prec_chol.astype(dtype),
- cst.astype(dtype), alpha=1.,
- beta=1., op_version=opv)
+ cst = -np.dot(means[c, :], prec_chol)
+ y = OnnxGemm(
+ X,
+ prec_chol.astype(dtype),
+ cst.astype(dtype),
+ alpha=1.0,
+ beta=1.0,
+ op_version=opv,
+ )
if combined_reducesum:
- y2s = OnnxReduceSumApi11(OnnxMul(y, y, op_version=opv),
- axes=[1], op_version=opv)
+ y2s = OnnxReduceSumApi11(
+ OnnxMul(y, y, op_version=opv), axes=[1], op_version=opv
+ )
else:
y2s = OnnxReduceSumSquareApi18(y, axes=[1], op_version=opv)
ys.append(y2s)
log_prob = OnnxConcat(*ys, axis=1, op_version=opv)
- elif covariance_type == 'tied':
+ elif covariance_type == "tied":
# shape(op.means_) = (n_components, n_features)
# shape(op.precisions_cholesky_) =
# (n_features, n_features)
@@ -74,19 +92,25 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol,
ys = []
for f in range(n_components):
- cst = - np.dot(means[f, :], precisions_chol)
- y = OnnxGemm(X, precisions_chol.astype(dtype),
- cst.astype(dtype),
- alpha=1., beta=1., op_version=opv)
+ cst = -np.dot(means[f, :], precisions_chol)
+ y = OnnxGemm(
+ X,
+ precisions_chol.astype(dtype),
+ cst.astype(dtype),
+ alpha=1.0,
+ beta=1.0,
+ op_version=opv,
+ )
if combined_reducesum:
- y2s = OnnxReduceSumApi11(OnnxMul(y, y, op_version=opv),
- axes=[1], op_version=opv)
+ y2s = OnnxReduceSumApi11(
+ OnnxMul(y, y, op_version=opv), axes=[1], op_version=opv
+ )
else:
y2s = OnnxReduceSumSquareApi18(y, axes=[1], op_version=opv)
ys.append(y2s)
log_prob = OnnxConcat(*ys, axis=1, op_version=opv)
- elif covariance_type == 'diag':
+ elif covariance_type == "diag":
# shape(op.means_) = (n_components, n_features)
# shape(op.precisions_cholesky_) =
# (n_components, n_features)
@@ -96,20 +120,30 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol,
# 2. * np.dot(X, (means * precisions).T) +
# np.dot(X ** 2, precisions.T))
- precisions = (precisions_chol ** 2).astype(dtype)
- mp = np.sum((means ** 2 * precisions), 1).astype(dtype)
- zeros = np.zeros((n_components, ), dtype=dtype)
+ precisions = (precisions_chol**2).astype(dtype)
+ mp = np.sum((means**2 * precisions), 1).astype(dtype)
+ zeros = np.zeros((n_components,), dtype=dtype)
xmp = OnnxGemm(
- X, (means * precisions).T.astype(dtype),
- zeros, alpha=-2., beta=0., op_version=opv)
- term = OnnxGemm(OnnxMul(X, X, op_version=opv),
- precisions.T.astype(dtype),
- zeros, alpha=1., beta=0., op_version=opv)
+ X,
+ (means * precisions).T.astype(dtype),
+ zeros,
+ alpha=-2.0,
+ beta=0.0,
+ op_version=opv,
+ )
+ term = OnnxGemm(
+ OnnxMul(X, X, op_version=opv),
+ precisions.T.astype(dtype),
+ zeros,
+ alpha=1.0,
+ beta=0.0,
+ op_version=opv,
+ )
log_prob = OnnxAdd(
- OnnxAdd(mp.astype(dtype), xmp, op_version=opv),
- term, op_version=opv)
+ OnnxAdd(mp.astype(dtype), xmp, op_version=opv), term, op_version=opv
+ )
- elif covariance_type == 'spherical':
+ elif covariance_type == "spherical":
# shape(op.means_) = (n_components, n_features)
# shape(op.precisions_cholesky_) = (n_components, )
@@ -118,39 +152,51 @@ def _estimate_log_gaussian_prob(X, means, precisions_chol,
# 2 * np.dot(X, means.T * precisions) +
# np.outer(row_norms(X, squared=True), precisions))
- zeros = np.zeros((n_components, ), dtype=dtype)
- precisions = (precisions_chol ** 2).astype(dtype)
+ zeros = np.zeros((n_components,), dtype=dtype)
+ precisions = (precisions_chol**2).astype(dtype)
if combined_reducesum:
- normX = OnnxReduceSumApi11(OnnxMul(X, X, op_version=opv),
- axes=[1], op_version=opv)
+ normX = OnnxReduceSumApi11(
+ OnnxMul(X, X, op_version=opv), axes=[1], op_version=opv
+ )
else:
normX = OnnxReduceSumSquareApi18(X, axes=[1], op_version=opv)
outer = OnnxGemm(
- normX, precisions[np.newaxis, :].astype(dtype),
- zeros.astype(dtype), alpha=1., beta=1., op_version=opv)
+ normX,
+ precisions[np.newaxis, :].astype(dtype),
+ zeros.astype(dtype),
+ alpha=1.0,
+ beta=1.0,
+ op_version=opv,
+ )
xmp = OnnxGemm(
- X, (means.T * precisions).astype(dtype),
- zeros, alpha=-2., beta=0., op_version=opv)
- mp = (np.sum(means ** 2, 1) * precisions).astype(dtype)
- log_prob = OnnxAdd(mp, OnnxAdd(xmp, outer, op_version=opv),
- op_version=opv)
+ X,
+ (means.T * precisions).astype(dtype),
+ zeros,
+ alpha=-2.0,
+ beta=0.0,
+ op_version=opv,
+ )
+ mp = (np.sum(means**2, 1) * precisions).astype(dtype)
+ log_prob = OnnxAdd(mp, OnnxAdd(xmp, outer, op_version=opv), op_version=opv)
else:
- raise RuntimeError("Unknown op.covariance_type='{}'. Upgrade "
- "to a more recent version of skearn-onnx "
- "or raise an issue.".format(covariance_type))
+ raise RuntimeError(
+ "Unknown op.covariance_type='{}'. Upgrade "
+ "to a more recent version of skearn-onnx "
+ "or raise an issue.".format(covariance_type)
+ )
# -.5 * (cst + log_prob) + log_det
cst = np.array([n_features * np.log(2 * np.pi)]).astype(dtype)
add = OnnxAdd(cst, log_prob, op_version=opv)
- mul = OnnxMul(add, np.array([-0.5], dtype=dtype),
- op_version=opv)
+ mul = OnnxMul(add, np.array([-0.5], dtype=dtype), op_version=opv)
if isinstance(log_det, (np.float32, np.float64, float)):
log_det = np.array([log_det], dtype=dtype)
return OnnxAdd(mul, log_det.astype(dtype), op_version=opv)
-def convert_sklearn_gaussian_mixture(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_gaussian_mixture(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converter for *GaussianMixture*,
*BayesianGaussianMixture*.
@@ -162,15 +208,17 @@ def convert_sklearn_gaussian_mixture(scope: Scope, operator: Operator,
elif operator.target_opset < 11:
raise RuntimeError(
"Some needed operators are not available below opset 11"
- " to convert model %r" % type(operator.raw_operator))
+ " to convert model %r" % type(operator.raw_operator)
+ )
out = operator.outputs
op = operator.raw_operator
n_components = op.means_.shape[0]
opv = container.target_opset
options = container.get_options(op, dict(score_samples=None))
- add_score = options.get('score_samples', False)
+ add_score = options.get("score_samples", False)
combined_reducesum = not container.is_allowed(
- {'ReduceLogSumExp', 'ReduceSumSquare'})
+ {"ReduceLogSumExp", "ReduceSumSquare"}
+ )
if add_score and len(out) != 3:
raise RuntimeError("3 outputs are expected.")
@@ -179,7 +227,9 @@ def convert_sklearn_gaussian_mixture(scope: Scope, operator: Operator,
raise RuntimeError(
"Dimension mismath between expected number of features {} "
"and ONNX graphs expectations {}.".format(
- op.means_.shape[1], X.type.shape[1]))
+ op.means_.shape[1], X.type.shape[1]
+ )
+ )
n_features = op.means_.shape[1]
# All comments come from scikit-learn code and tells
@@ -188,50 +238,58 @@ def convert_sklearn_gaussian_mixture(scope: Scope, operator: Operator,
log_weights = op._estimate_log_weights().astype(dtype)
log_gauss = _estimate_log_gaussian_prob(
- X, op.means_, op.precisions_cholesky_, op.covariance_type,
- dtype, opv, combined_reducesum)
+ X,
+ op.means_,
+ op.precisions_cholesky_,
+ op.covariance_type,
+ dtype,
+ opv,
+ combined_reducesum,
+ )
if isinstance(op, BayesianGaussianMixture):
# log_gauss = (_estimate_log_gaussian_prob(
# X, self.means_, self.precisions_cholesky_, self.covariance_type) -
# .5 * n_features * np.log(self.degrees_of_freedom_))
- log_lambda = n_features * np.log(2.) + np.sum(digamma(
- .5 * (op.degrees_of_freedom_ -
- np.arange(0, n_features)[:, np.newaxis])), 0)
- cst_log_lambda = .5 * (log_lambda - n_features / op.mean_precision_)
- cst = cst_log_lambda - .5 * n_features * np.log(op.degrees_of_freedom_)
+ log_lambda = n_features * np.log(2.0) + np.sum(
+ digamma(
+ 0.5 * (op.degrees_of_freedom_ - np.arange(0, n_features)[:, np.newaxis])
+ ),
+ 0,
+ )
+ cst_log_lambda = 0.5 * (log_lambda - n_features / op.mean_precision_)
+ cst = cst_log_lambda - 0.5 * n_features * np.log(op.degrees_of_freedom_)
if isinstance(cst, np.ndarray):
cst_array = cst.astype(dtype)
else:
cst_array = np.array([cst], dtype=dtype)
log_gauss = OnnxAdd(log_gauss, cst_array, op_version=opv)
elif not isinstance(op, GaussianMixture):
- raise RuntimeError(
- "The converter does not support type {}.".format(
- type(op)))
+ raise RuntimeError("The converter does not support type {}.".format(type(op)))
# self._estimate_log_prob(X) + self._estimate_log_weights()
weighted_log_prob = OnnxAdd(log_gauss, log_weights, op_version=opv)
# labels
- if container.is_allowed('ArgMax'):
- labels = OnnxArgMax(weighted_log_prob, axis=1,
- output_names=out[:1], op_version=opv)
+ if container.is_allowed("ArgMax"):
+ labels = OnnxArgMax(
+ weighted_log_prob, axis=1, output_names=out[:1], op_version=opv
+ )
else:
- mxlabels = OnnxReduceMaxApi18(
- weighted_log_prob, axes=[1], op_version=opv)
+ mxlabels = OnnxReduceMaxApi18(weighted_log_prob, axes=[1], op_version=opv)
zeros = OnnxEqual(
OnnxSub(weighted_log_prob, mxlabels, op_version=opv),
np.array([0], dtype=dtype),
- op_version=opv)
- toint = OnnxCast(zeros, to=onnx_proto.TensorProto.INT64,
- op_version=opv)
- mulind = OnnxMul(toint,
- np.arange(n_components).astype(np.int64),
- op_version=opv)
+ op_version=opv,
+ )
+ toint = OnnxCast(zeros, to=onnx_proto.TensorProto.INT64, op_version=opv)
+ mulind = OnnxMul(
+ toint, np.arange(n_components).astype(np.int64), op_version=opv
+ )
labels = OnnxReduceMaxApi18(
- mulind, axes=[1], output_names=out[:1], op_version=opv)
+ mulind, axes=[1], output_names=out[:1], op_version=opv
+ )
# def _estimate_log_prob_resp():
# np.exp(log_resp)
@@ -245,21 +303,25 @@ def convert_sklearn_gaussian_mixture(scope: Scope, operator: Operator,
outnames = None
if combined_reducesum:
- max_weight = OnnxReduceMaxApi18(
- weighted_log_prob, axes=[1], op_version=opv)
+ max_weight = OnnxReduceMaxApi18(weighted_log_prob, axes=[1], op_version=opv)
log_prob_norm_demax = OnnxLog(
OnnxReduceSumApi11(
OnnxExp(
OnnxSub(weighted_log_prob, max_weight, op_version=opv),
- op_version=opv),
- axes=[1], op_version=opv),
- op_version=opv)
- log_prob_norm = OnnxAdd(log_prob_norm_demax, max_weight,
- op_version=opv, output_names=outnames)
+ op_version=opv,
+ ),
+ axes=[1],
+ op_version=opv,
+ ),
+ op_version=opv,
+ )
+ log_prob_norm = OnnxAdd(
+ log_prob_norm_demax, max_weight, op_version=opv, output_names=outnames
+ )
else:
log_prob_norm = OnnxReduceLogSumExpApi18(
- weighted_log_prob, axes=[1], op_version=opv,
- output_names=outnames)
+ weighted_log_prob, axes=[1], op_version=opv, output_names=outnames
+ )
log_resp = OnnxSub(weighted_log_prob, log_prob_norm, op_version=opv)
# probabilities
@@ -272,8 +334,13 @@ def convert_sklearn_gaussian_mixture(scope: Scope, operator: Operator,
log_prob_norm.add_to(scope, container)
-register_converter('SklearnGaussianMixture', convert_sklearn_gaussian_mixture,
- options={'score_samples': [True, False]})
-register_converter('SklearnBayesianGaussianMixture',
- convert_sklearn_gaussian_mixture,
- options={'score_samples': [True, False]})
+register_converter(
+ "SklearnGaussianMixture",
+ convert_sklearn_gaussian_mixture,
+ options={"score_samples": [True, False]},
+)
+register_converter(
+ "SklearnBayesianGaussianMixture",
+ convert_sklearn_gaussian_mixture,
+ options={"score_samples": [True, False]},
+)
diff --git a/skl2onnx/operator_converters/gaussian_process.py b/skl2onnx/operator_converters/gaussian_process.py
index d7335db3c..886eac706 100644
--- a/skl2onnx/operator_converters/gaussian_process.py
+++ b/skl2onnx/operator_converters/gaussian_process.py
@@ -3,6 +3,7 @@
import numpy as np
from scipy.linalg import solve_triangular
from sklearn.gaussian_process.kernels import ConstantKernel as C, RBF
+
try:
from sklearn.gaussian_process._gpc import LAMBDAS, COEFS
except ImportError:
@@ -13,13 +14,27 @@
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
from ..algebra.onnx_ops import (
- OnnxAdd, OnnxSqrt, OnnxMatMul, OnnxSub, OnnxReduceSumApi11,
- OnnxMul, OnnxMax, OnnxReshapeApi13, OnnxDiv, OnnxNot,
- OnnxReciprocal, OnnxCast, OnnxLess,
- OnnxPow, OnnxNeg, OnnxConcat, OnnxArrayFeatureExtractor,
+ OnnxAdd,
+ OnnxSqrt,
+ OnnxMatMul,
+ OnnxSub,
+ OnnxReduceSumApi11,
+ OnnxMul,
+ OnnxMax,
+ OnnxReshapeApi13,
+ OnnxDiv,
+ OnnxNot,
+ OnnxReciprocal,
+ OnnxCast,
+ OnnxLess,
+ OnnxPow,
+ OnnxNeg,
+ OnnxConcat,
+ OnnxArrayFeatureExtractor,
OnnxTranspose,
)
from ..algebra.custom_ops import OnnxSolve
+
try:
from ..algebra.onnx_ops import OnnxConstantOfShape
except ImportError:
@@ -32,15 +47,12 @@
from ..algebra.onnx_ops import OnnxEinsum
except ImportError:
OnnxEinsum = None
-from ._gp_kernels import (
- convert_kernel_diag,
- convert_kernel,
- _zero_vector_of_size
-)
+from ._gp_kernels import convert_kernel_diag, convert_kernel, _zero_vector_of_size
-def convert_gaussian_process_regressor(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_gaussian_process_regressor(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
The method *predict* from class *GaussianProcessRegressor*
may cache some results if it is called with parameter
@@ -62,30 +74,37 @@ def convert_gaussian_process_regressor(scope: Scope, operator: Operator,
dtype = np.float32
options = container.get_options(
- op, dict(return_cov=False, return_std=False, optim=None))
- if hasattr(op, 'kernel_') and op.kernel_ is not None:
+ op, dict(return_cov=False, return_std=False, optim=None)
+ )
+ if hasattr(op, "kernel_") and op.kernel_ is not None:
kernel = op.kernel_
elif op.kernel is None:
- kernel = (C(1.0, constant_value_bounds="fixed") *
- RBF(1.0, length_scale_bounds="fixed"))
+ kernel = C(1.0, constant_value_bounds="fixed") * RBF(
+ 1.0, length_scale_bounds="fixed"
+ )
else:
kernel = op.kernel
if not hasattr(op, "X_train_") or op.X_train_ is None:
- out0 = _zero_vector_of_size(X, keepdims=1, output_names=out[:1],
- dtype=dtype, op_version=opv)
+ out0 = _zero_vector_of_size(
+ X, keepdims=1, output_names=out[:1], dtype=dtype, op_version=opv
+ )
outputs = [out0]
- if options['return_cov']:
- outputs.append(convert_kernel(
- kernel, X, output_names=out[1:],
- dtype=dtype, op_version=opv))
- if options['return_std']:
+ if options["return_cov"]:
+ outputs.append(
+ convert_kernel(
+ kernel, X, output_names=out[1:], dtype=dtype, op_version=opv
+ )
+ )
+ if options["return_std"]:
outputs.append(
OnnxSqrt(
- convert_kernel_diag(
- kernel, X, dtype=dtype, op_version=opv),
- output_names=out[1:], op_version=opv))
+ convert_kernel_diag(kernel, X, dtype=dtype, op_version=opv),
+ output_names=out[1:],
+ op_version=opv,
+ )
+ )
else:
# Code scikit-learn
# K_trans = self.kernel_(X, self.X_train_)
@@ -93,17 +112,21 @@ def convert_gaussian_process_regressor(scope: Scope, operator: Operator,
# y_mean = self._y_train_mean + y_mean * self._y_train_std
k_trans = convert_kernel(
- kernel, X, x_train=op.X_train_.astype(dtype),
- dtype=dtype, optim=options.get('optim', None),
- op_version=opv)
- k_trans.set_onnx_name_prefix('kgpd')
+ kernel,
+ X,
+ x_train=op.X_train_.astype(dtype),
+ dtype=dtype,
+ optim=options.get("optim", None),
+ op_version=opv,
+ )
+ k_trans.set_onnx_name_prefix("kgpd")
y_mean_b = OnnxMatMul(k_trans, op.alpha_.astype(dtype), op_version=opv)
mean_y = op._y_train_mean.astype(dtype)
if len(mean_y.shape) == 1:
mean_y = mean_y.reshape(mean_y.shape + (1,))
- if not hasattr(op, '_y_train_std') or op._y_train_std == 1:
+ if not hasattr(op, "_y_train_std") or op._y_train_std == 1:
if isinstance(y_mean_b, (np.float32, np.float64)):
y_mean_b = np.array([y_mean_b])
if isinstance(mean_y, (np.float32, np.float64)):
@@ -122,19 +145,22 @@ def convert_gaussian_process_regressor(scope: Scope, operator: Operator,
if isinstance(mean_y, (np.float32, np.float64)):
mean_y = np.array([mean_y])
y_mean = OnnxAdd(
- OnnxMul(y_mean_b, var_y, op_version=opv),
- mean_y, op_version=opv)
+ OnnxMul(y_mean_b, var_y, op_version=opv), mean_y, op_version=opv
+ )
- y_mean.set_onnx_name_prefix('gpr')
+ y_mean.set_onnx_name_prefix("gpr")
y_mean_reshaped = OnnxReshapeApi13(
- y_mean, np.array([-1, 1], dtype=np.int64),
- op_version=opv, output_names=out[:1])
+ y_mean,
+ np.array([-1, 1], dtype=np.int64),
+ op_version=opv,
+ output_names=out[:1],
+ )
outputs = [y_mean_reshaped]
- if options['return_cov']:
+ if options["return_cov"]:
raise NotImplementedError()
- if options['return_std']:
- if hasattr(op, '_K_inv') and op._K_inv is not None:
+ if options["return_std"]:
+ if hasattr(op, "_K_inv") and op._K_inv is not None:
# scikit-learn < 0.24.2
_K_inv = op._K_inv
else:
@@ -143,40 +169,45 @@ def convert_gaussian_process_regressor(scope: Scope, operator: Operator,
_K_inv = L_inv.dot(L_inv.T)
# y_var = self.kernel_.diag(X)
- y_var = convert_kernel_diag(kernel, X, dtype=dtype,
- optim=options.get('optim', None),
- op_version=opv)
+ y_var = convert_kernel_diag(
+ kernel, X, dtype=dtype, optim=options.get("optim", None), op_version=opv
+ )
# y_var -= np.einsum("ij,ij->i",
# np.dot(K_trans, self._K_inv), K_trans)
k_dot = OnnxMatMul(k_trans, _K_inv.astype(dtype), op_version=opv)
ys_var = OnnxSub(
- y_var, OnnxReduceSumApi11(
+ y_var,
+ OnnxReduceSumApi11(
OnnxMul(k_dot, k_trans, op_version=opv),
- axes=[1], keepdims=0, op_version=opv),
- op_version=opv)
+ axes=[1],
+ keepdims=0,
+ op_version=opv,
+ ),
+ op_version=opv,
+ )
# y_var_negative = y_var < 0
# if np.any(y_var_negative):
# y_var[y_var_negative] = 0.0
- ys0_var = OnnxMax(ys_var, np.array([0], dtype=dtype),
- op_version=opv)
+ ys0_var = OnnxMax(ys_var, np.array([0], dtype=dtype), op_version=opv)
- if hasattr(op, '_y_train_std') and op._y_train_std != 1:
+ if hasattr(op, "_y_train_std") and op._y_train_std != 1:
# y_var = y_var * self._y_train_std**2
- ys0_var = OnnxMul(ys0_var, var_y ** 2, op_version=opv)
+ ys0_var = OnnxMul(ys0_var, var_y**2, op_version=opv)
# var = np.sqrt(ys0_var)
var = OnnxSqrt(ys0_var, output_names=out[1:], op_version=opv)
- var.set_onnx_name_prefix('gprv')
+ var.set_onnx_name_prefix("gprv")
outputs.append(var)
for o in outputs:
o.add_to(scope, container)
-def convert_gaussian_process_classifier(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_gaussian_process_classifier(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
The method *predict* from class *GaussianProcessClassifier*
may cache some results if it is called with parameter
@@ -196,140 +227,165 @@ def convert_gaussian_process_classifier(scope: Scope, operator: Operator,
raise RuntimeError("container.target_opset must not be None")
if OnnxEinsum is None or OnnxErf is None:
raise RuntimeError(
- "target opset must be >= 12 for operator 'einsum' and 'erf'.")
+ "target opset must be >= 12 for operator 'einsum' and 'erf'."
+ )
if LAMBDAS is None:
raise RuntimeError("Only scikit-learn>=0.22 is supported.")
outputs = []
options = container.get_options(op, dict(optim=None))
- if hasattr(op, 'kernel_') and op.kernel_ is not None:
+ if hasattr(op, "kernel_") and op.kernel_ is not None:
kernel = op.kernel_
elif op.kernel is None:
- kernel = (C(1.0, constant_value_bounds="fixed") *
- RBF(1.0, length_scale_bounds="fixed"))
+ kernel = C(1.0, constant_value_bounds="fixed") * RBF(
+ 1.0, length_scale_bounds="fixed"
+ )
else:
kernel = op.kernel
- if not hasattr(op_est, 'X_train_'):
+ if not hasattr(op_est, "X_train_"):
raise NotImplementedError("Only binary classification is iplemented.")
dtype = guess_numpy_type(X.type)
if dtype != np.float64:
dtype = np.float32
K_starT = convert_kernel(
- kernel, X, x_train=op_est.X_train_.astype(dtype), dtype=dtype,
- optim=options.get('optim', None), op_version=opv)
+ kernel,
+ X,
+ x_train=op_est.X_train_.astype(dtype),
+ dtype=dtype,
+ optim=options.get("optim", None),
+ op_version=opv,
+ )
K_star = OnnxTranspose(K_starT, op_version=opv)
- K_star.set_onnx_name_prefix('kstar')
+ K_star.set_onnx_name_prefix("kstar")
# common
# f_star = K_star.T.dot(self.y_train_ - self.pi_)
- f_star_right = (op_est.y_train_ - op_est.pi_).astype(
- dtype).reshape((-1, 1))
+ f_star_right = (op_est.y_train_ - op_est.pi_).astype(dtype).reshape((-1, 1))
f_star = OnnxMatMul(K_starT, f_star_right, op_version=opv)
- f_star.set_onnx_name_prefix('f_star')
+ f_star.set_onnx_name_prefix("f_star")
best = OnnxCast(
OnnxNot(
- OnnxLess(f_star, np.array([0], dtype=dtype), op_version=opv),
- op_version=opv),
- to=onnx_proto.TensorProto.INT64, op_version=opv)
+ OnnxLess(f_star, np.array([0], dtype=dtype), op_version=opv), op_version=opv
+ ),
+ to=onnx_proto.TensorProto.INT64,
+ op_version=opv,
+ )
classes = OnnxArrayFeatureExtractor(op.classes_.astype(np.int64), best)
labels = OnnxTranspose(classes, op_version=opv, output_names=out[:1])
- labels.set_onnx_name_prefix('labels')
+ labels.set_onnx_name_prefix("labels")
outputs.append(labels)
# predict_proba
# a x = b, x = a^-1 b
# v = solve(self.L_, self.W_sr_[:, np.newaxis] * K_star) # Line 5
- v = OnnxSolve(op_est.L_.astype(dtype),
- OnnxMul(op_est.W_sr_[:, np.newaxis].astype(dtype),
- K_star, op_version=opv),
- op_version=opv)
- v.set_onnx_name_prefix('solve')
+ v = OnnxSolve(
+ op_est.L_.astype(dtype),
+ OnnxMul(op_est.W_sr_[:, np.newaxis].astype(dtype), K_star, op_version=opv),
+ op_version=opv,
+ )
+ v.set_onnx_name_prefix("solve")
# var_f_star = self.kernel_.diag(X) - np.einsum("ij,ij->j", v, v)
var_f_star_kernel = convert_kernel_diag(
- kernel, X, dtype=dtype,
- optim=options.get('optim', None), op_version=opv)
- var_f_star_kernel.set_onnx_name_prefix('diag')
- var_f_star = OnnxSub(var_f_star_kernel,
- OnnxEinsum(v, v, equation="ij,ij->j",
- op_version=opv),
- op_version=opv)
- var_f_star.set_onnx_name_prefix('var_f_star')
+ kernel, X, dtype=dtype, optim=options.get("optim", None), op_version=opv
+ )
+ var_f_star_kernel.set_onnx_name_prefix("diag")
+ var_f_star = OnnxSub(
+ var_f_star_kernel,
+ OnnxEinsum(v, v, equation="ij,ij->j", op_version=opv),
+ op_version=opv,
+ )
+ var_f_star.set_onnx_name_prefix("var_f_star")
# alpha = 1 / (2 * var_f_star)
- alpha = OnnxReciprocal(OnnxMul(var_f_star, np.array([2], dtype=dtype),
- op_version=opv),
- op_version=opv)
- alpha.set_onnx_name_prefix('alpha')
+ alpha = OnnxReciprocal(
+ OnnxMul(var_f_star, np.array([2], dtype=dtype), op_version=opv), op_version=opv
+ )
+ alpha.set_onnx_name_prefix("alpha")
# gamma = LAMBDAS * f_star
- gamma = OnnxMul(LAMBDAS.astype(dtype),
- OnnxReshapeApi13(
- f_star, np.array([1, -1], dtype=np.int64),
- op_version=opv),
- op_version=opv)
- gamma.set_onnx_name_prefix('gamma')
+ gamma = OnnxMul(
+ LAMBDAS.astype(dtype),
+ OnnxReshapeApi13(f_star, np.array([1, -1], dtype=np.int64), op_version=opv),
+ op_version=opv,
+ )
+ gamma.set_onnx_name_prefix("gamma")
# integrals = np.sqrt(np.pi / alpha) *
# erf(gamma * np.sqrt(alpha / (alpha + LAMBDAS**2))) /
# (2 * np.sqrt(var_f_star * 2 * np.pi))
- integrals_1 = OnnxSqrt(OnnxDiv(np.array([np.pi], dtype=dtype),
- alpha, op_version=opv),
- op_version=opv)
- integrals_1.set_onnx_name_prefix('int1')
-
- integrals_2_1 = OnnxAdd(alpha, OnnxPow(LAMBDAS.astype(dtype),
- np.array([2], dtype=dtype),
- op_version=opv),
- op_version=opv)
- integrals_2_1.set_onnx_name_prefix('int21')
-
- integrals_2_2 = OnnxSqrt(OnnxDiv(alpha, integrals_2_1, op_version=opv),
- op_version=opv)
- integrals_2_2.set_onnx_name_prefix('int22')
+ integrals_1 = OnnxSqrt(
+ OnnxDiv(np.array([np.pi], dtype=dtype), alpha, op_version=opv), op_version=opv
+ )
+ integrals_1.set_onnx_name_prefix("int1")
+
+ integrals_2_1 = OnnxAdd(
+ alpha,
+ OnnxPow(LAMBDAS.astype(dtype), np.array([2], dtype=dtype), op_version=opv),
+ op_version=opv,
+ )
+ integrals_2_1.set_onnx_name_prefix("int21")
+
+ integrals_2_2 = OnnxSqrt(
+ OnnxDiv(alpha, integrals_2_1, op_version=opv), op_version=opv
+ )
+ integrals_2_2.set_onnx_name_prefix("int22")
integrals_div = OnnxMul(
np.array([2], dtype=dtype),
OnnxSqrt(
OnnxMul(
- OnnxMul(var_f_star, np.array([2], dtype=dtype),
- op_version=opv),
- np.array([np.pi], dtype=dtype), op_version=opv),
- op_version=opv),
- op_version=opv)
- integrals_div.set_onnx_name_prefix('intdiv')
+ OnnxMul(var_f_star, np.array([2], dtype=dtype), op_version=opv),
+ np.array([np.pi], dtype=dtype),
+ op_version=opv,
+ ),
+ op_version=opv,
+ ),
+ op_version=opv,
+ )
+ integrals_div.set_onnx_name_prefix("intdiv")
integrals = OnnxMul(
integrals_1,
- OnnxDiv(OnnxErf(OnnxMul(gamma, integrals_2_2, op_version=opv),
- op_version=opv),
- integrals_div, op_version=opv),
- op_version=opv)
- integrals.set_onnx_name_prefix('integrals')
+ OnnxDiv(
+ OnnxErf(OnnxMul(gamma, integrals_2_2, op_version=opv), op_version=opv),
+ integrals_div,
+ op_version=opv,
+ ),
+ op_version=opv,
+ )
+ integrals.set_onnx_name_prefix("integrals")
# pi_star = (COEFS * integrals).sum(axis=0) + .5 * COEFS.sum()
- coef_sum = (.5 * COEFS.sum()).astype(dtype)
+ coef_sum = (0.5 * COEFS.sum()).astype(dtype)
if not isinstance(coef_sum, np.ndarray):
coef_sum = np.array([coef_sum])
pi_star = OnnxAdd(
OnnxReduceSumApi11(
OnnxMul(COEFS.astype(dtype), integrals, op_version=opv),
- op_version=opv, axes=[0]),
- coef_sum, op_version=opv)
- pi_star.set_onnx_name_prefix('pi_star')
+ op_version=opv,
+ axes=[0],
+ ),
+ coef_sum,
+ op_version=opv,
+ )
+ pi_star.set_onnx_name_prefix("pi_star")
pi_star = OnnxReshapeApi13(
- pi_star, np.array([-1, 1], dtype=np.int64),
- op_version=opv)
- pi_star.set_onnx_name_prefix('pi_star2')
+ pi_star, np.array([-1, 1], dtype=np.int64), op_version=opv
+ )
+ pi_star.set_onnx_name_prefix("pi_star2")
final = OnnxConcat(
- OnnxAdd(OnnxNeg(pi_star, op_version=opv),
- np.array([1], dtype=dtype),
- op_version=opv),
- pi_star, op_version=opv, axis=1,
- output_names=out[1:2])
+ OnnxAdd(
+ OnnxNeg(pi_star, op_version=opv), np.array([1], dtype=dtype), op_version=opv
+ ),
+ pi_star,
+ op_version=opv,
+ axis=1,
+ output_names=out[1:2],
+ )
outputs.append(final)
for o in outputs:
@@ -337,16 +393,24 @@ def convert_gaussian_process_classifier(scope: Scope, operator: Operator,
if OnnxConstantOfShape is not None:
- register_converter('SklearnGaussianProcessRegressor',
- convert_gaussian_process_regressor,
- options={'return_cov': [False, True],
- 'return_std': [False, True],
- 'optim': [None, 'cdist']})
+ register_converter(
+ "SklearnGaussianProcessRegressor",
+ convert_gaussian_process_regressor,
+ options={
+ "return_cov": [False, True],
+ "return_std": [False, True],
+ "optim": [None, "cdist"],
+ },
+ )
if OnnxEinsum is not None and OnnxErf is not None:
- register_converter('SklearnGaussianProcessClassifier',
- convert_gaussian_process_classifier,
- options={'optim': [None, 'cdist'],
- 'nocl': [False, True],
- 'output_class_labels': [False, True],
- 'zipmap': [False, True]})
+ register_converter(
+ "SklearnGaussianProcessClassifier",
+ convert_gaussian_process_classifier,
+ options={
+ "optim": [None, "cdist"],
+ "nocl": [False, True],
+ "output_class_labels": [False, True],
+ "zipmap": [False, True],
+ },
+ )
diff --git a/skl2onnx/operator_converters/gradient_boosting.py b/skl2onnx/operator_converters/gradient_boosting.py
index 1d6165849..49354a411 100644
--- a/skl2onnx/operator_converters/gradient_boosting.py
+++ b/skl2onnx/operator_converters/gradient_boosting.py
@@ -4,172 +4,217 @@
import numbers
import numpy as np
from ..common._apply_operation import apply_cast
-from ..common.data_types import (
- BooleanTensorType, Int64TensorType, guess_numpy_type)
+from ..common.data_types import BooleanTensorType, Int64TensorType, guess_numpy_type
from ..common._registration import register_converter
from ..common.tree_ensemble import (
- add_tree_to_attribute_pairs, get_default_tree_classifier_attribute_pairs,
- get_default_tree_regressor_attribute_pairs)
+ add_tree_to_attribute_pairs,
+ get_default_tree_classifier_attribute_pairs,
+ get_default_tree_regressor_attribute_pairs,
+)
from ..proto import onnx_proto
def convert_sklearn_gradient_boosting_classifier(
- scope, operator, container, op_type='TreeEnsembleClassifier',
- op_domain='ai.onnx.ml', op_version=1):
+ scope,
+ operator,
+ container,
+ op_type="TreeEnsembleClassifier",
+ op_domain="ai.onnx.ml",
+ op_version=1,
+):
dtype = guess_numpy_type(operator.inputs[0].type)
if dtype != np.float64:
dtype = np.float32
op = operator.raw_operator
- if op.loss not in ('deviance', 'log_loss'):
+ if op.loss not in ("deviance", "log_loss"):
raise NotImplementedError(
"Loss '{0}' is not supported yet. You "
"may raise an issue at "
- "https://github.com/onnx/sklearn-onnx/issues.".format(op.loss))
+ "https://github.com/onnx/sklearn-onnx/issues.".format(op.loss)
+ )
attrs = get_default_tree_classifier_attribute_pairs()
- attrs['name'] = scope.get_unique_operator_name(op_type)
+ attrs["name"] = scope.get_unique_operator_name(op_type)
- transform = 'LOGISTIC' if op.n_classes_ == 2 else 'SOFTMAX'
- if op.init == 'zero':
+ transform = "LOGISTIC" if op.n_classes_ == 2 else "SOFTMAX"
+ if op.init == "zero":
loss = op._loss if hasattr(op, "_loss") else op.loss_
base_values = np.zeros(loss.K)
elif op.init is None:
- if hasattr(op.estimators_[0, 0], 'n_features_in_'):
+ if hasattr(op.estimators_[0, 0], "n_features_in_"):
# sklearn >= 1.2
n_features = op.estimators_[0, 0].n_features_in_
else:
# sklearn < 1.2
n_features = op.estimators_[0, 0].n_features_
x0 = np.zeros((1, n_features))
- if hasattr(op, '_raw_predict_init'):
+ if hasattr(op, "_raw_predict_init"):
# sklearn >= 0.21
base_values = op._raw_predict_init(x0).ravel()
- elif hasattr(op, '_init_decision_function'):
+ elif hasattr(op, "_init_decision_function"):
# sklearn >= 0.20 and sklearn < 0.21
base_values = op._init_decision_function(x0).ravel()
else:
raise RuntimeError("scikit-learn < 0.19 is not supported.")
else:
raise NotImplementedError(
- 'Setting init to an estimator is not supported, you may raise an '
- 'issue at https://github.com/onnx/sklearn-onnx/issues.')
+ "Setting init to an estimator is not supported, you may raise an "
+ "issue at https://github.com/onnx/sklearn-onnx/issues."
+ )
- attrs['base_values'] = [float(v) for v in base_values]
+ attrs["base_values"] = [float(v) for v in base_values]
options = container.get_options(op, dict(raw_scores=False))
- if not options['raw_scores']:
- attrs['post_transform'] = transform
+ if not options["raw_scores"]:
+ attrs["post_transform"] = transform
classes = op.classes_
if all(isinstance(i, (numbers.Real, bool, np.bool_)) for i in classes):
class_labels = [int(i) for i in classes]
- attrs['classlabels_int64s'] = class_labels
+ attrs["classlabels_int64s"] = class_labels
elif all(isinstance(i, str) for i in classes):
class_labels = [str(i) for i in classes]
- attrs['classlabels_strings'] = class_labels
+ attrs["classlabels_strings"] = class_labels
else:
- raise ValueError('Labels must be all integer or all strings.')
+ raise ValueError("Labels must be all integer or all strings.")
tree_weight = op.learning_rate
- n_est = (op.n_estimators_ if hasattr(op, 'n_estimators_') else
- op.n_estimators)
+ n_est = op.n_estimators_ if hasattr(op, "n_estimators_") else op.n_estimators
if op.n_classes_ == 2:
for tree_id in range(n_est):
tree = op.estimators_[tree_id][0].tree_
- add_tree_to_attribute_pairs(attrs, True, tree, tree_id,
- tree_weight, 0, False, True,
- dtype=dtype)
+ add_tree_to_attribute_pairs(
+ attrs, True, tree, tree_id, tree_weight, 0, False, True, dtype=dtype
+ )
else:
for i in range(n_est):
for c in range(op.n_classes_):
tree_id = i * op.n_classes_ + c
tree = op.estimators_[i][c].tree_
- add_tree_to_attribute_pairs(attrs, True, tree, tree_id,
- tree_weight, c, False, True,
- dtype=dtype)
+ add_tree_to_attribute_pairs(
+ attrs, True, tree, tree_id, tree_weight, c, False, True, dtype=dtype
+ )
if dtype is not None:
for k in attrs:
- if k in ('nodes_values', 'class_weights',
- 'target_weights', 'nodes_hitrates',
- 'base_values'):
+ if k in (
+ "nodes_values",
+ "class_weights",
+ "target_weights",
+ "nodes_hitrates",
+ "base_values",
+ ):
attrs[k] = np.array(attrs[k], dtype=dtype)
input_name = operator.input_full_names
if isinstance(operator.inputs[0].type, BooleanTensorType):
- cast_input_name = scope.get_unique_variable_name('cast_input')
-
- apply_cast(scope, input_name, cast_input_name,
- container, to=onnx_proto.TensorProto.FLOAT)
+ cast_input_name = scope.get_unique_variable_name("cast_input")
+
+ apply_cast(
+ scope,
+ input_name,
+ cast_input_name,
+ container,
+ to=onnx_proto.TensorProto.FLOAT,
+ )
input_name = cast_input_name
container.add_node(
- op_type, input_name,
+ op_type,
+ input_name,
[operator.outputs[0].full_name, operator.outputs[1].full_name],
- op_domain=op_domain, op_version=op_version, **attrs)
+ op_domain=op_domain,
+ op_version=op_version,
+ **attrs
+ )
def convert_sklearn_gradient_boosting_regressor(
- scope, operator, container, op_type='TreeEnsembleRegressor',
- op_domain='ai.onnx.ml', op_version=1):
+ scope,
+ operator,
+ container,
+ op_type="TreeEnsembleRegressor",
+ op_domain="ai.onnx.ml",
+ op_version=1,
+):
op = operator.raw_operator
attrs = get_default_tree_regressor_attribute_pairs()
- attrs['name'] = scope.get_unique_operator_name(op_type)
- attrs['n_targets'] = 1
+ attrs["name"] = scope.get_unique_operator_name(op_type)
+ attrs["n_targets"] = 1
- if op.init == 'zero':
+ if op.init == "zero":
loss = op._loss if hasattr(op, "_loss") else op.loss_
cst = np.zeros(loss.K)
elif op.init is None:
# constant_ was introduced in scikit-learn 0.21.
- if hasattr(op.init_, 'constant_'):
+ if hasattr(op.init_, "constant_"):
cst = [float(x) for x in op.init_.constant_]
- elif op.loss == 'ls':
+ elif op.loss == "ls":
cst = [op.init_.mean]
else:
cst = [op.init_.quantile]
else:
raise NotImplementedError(
- 'Setting init to an estimator is not supported, you may raise an '
- 'issue at https://github.com/onnx/sklearn-onnx/issues.')
+ "Setting init to an estimator is not supported, you may raise an "
+ "issue at https://github.com/onnx/sklearn-onnx/issues."
+ )
- attrs['base_values'] = [float(x) for x in cst]
+ attrs["base_values"] = [float(x) for x in cst]
tree_weight = op.learning_rate
- n_est = (op.n_estimators_ if hasattr(op, 'n_estimators_') else
- op.n_estimators)
+ n_est = op.n_estimators_ if hasattr(op, "n_estimators_") else op.n_estimators
dtype = guess_numpy_type(operator.inputs[0].type)
if dtype != np.float64:
dtype = np.float32
for i in range(n_est):
tree = op.estimators_[i][0].tree_
tree_id = i
- add_tree_to_attribute_pairs(attrs, False, tree, tree_id, tree_weight,
- 0, False, True, dtype=dtype)
+ add_tree_to_attribute_pairs(
+ attrs, False, tree, tree_id, tree_weight, 0, False, True, dtype=dtype
+ )
if dtype is not None:
for k in attrs:
- if k in ('nodes_values', 'class_weights',
- 'target_weights', 'nodes_hitrates',
- 'base_values'):
+ if k in (
+ "nodes_values",
+ "class_weights",
+ "target_weights",
+ "nodes_hitrates",
+ "base_values",
+ ):
attrs[k] = np.array(attrs[k], dtype=dtype)
input_name = operator.input_full_names
if type(operator.inputs[0].type) in (BooleanTensorType, Int64TensorType):
- cast_input_name = scope.get_unique_variable_name('cast_input')
-
- apply_cast(scope, operator.input_full_names, cast_input_name,
- container, to=onnx_proto.TensorProto.FLOAT)
+ cast_input_name = scope.get_unique_variable_name("cast_input")
+
+ apply_cast(
+ scope,
+ operator.input_full_names,
+ cast_input_name,
+ container,
+ to=onnx_proto.TensorProto.FLOAT,
+ )
input_name = cast_input_name
container.add_node(
- op_type, input_name, operator.output_full_names,
- op_domain=op_domain, op_version=op_version, **attrs)
-
-
-register_converter('SklearnGradientBoostingClassifier',
- convert_sklearn_gradient_boosting_classifier,
- options={'zipmap': [True, False, 'columns'],
- 'raw_scores': [True, False],
- 'output_class_labels': [False, True],
- 'nocl': [True, False]})
-register_converter('SklearnGradientBoostingRegressor',
- convert_sklearn_gradient_boosting_regressor)
+ op_type,
+ input_name,
+ operator.output_full_names,
+ op_domain=op_domain,
+ op_version=op_version,
+ **attrs
+ )
+
+
+register_converter(
+ "SklearnGradientBoostingClassifier",
+ convert_sklearn_gradient_boosting_classifier,
+ options={
+ "zipmap": [True, False, "columns"],
+ "raw_scores": [True, False],
+ "output_class_labels": [False, True],
+ "nocl": [True, False],
+ },
+)
+register_converter(
+ "SklearnGradientBoostingRegressor", convert_sklearn_gradient_boosting_regressor
+)
diff --git a/skl2onnx/operator_converters/grid_search_cv.py b/skl2onnx/operator_converters/grid_search_cv.py
index 6bebad905..f2c8b6135 100644
--- a/skl2onnx/operator_converters/grid_search_cv.py
+++ b/skl2onnx/operator_converters/grid_search_cv.py
@@ -7,8 +7,9 @@
from .._supported_operators import sklearn_operator_name_map
-def convert_sklearn_grid_search_cv(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_grid_search_cv(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converter for scikit-learn's GridSearchCV.
"""
@@ -16,8 +17,7 @@ def convert_sklearn_grid_search_cv(scope: Scope, operator: Operator,
grid_search_op = operator.raw_operator
best_estimator = grid_search_op.best_estimator_
op_type = sklearn_operator_name_map[type(best_estimator)]
- grid_search_operator = scope.declare_local_operator(
- op_type, best_estimator)
+ grid_search_operator = scope.declare_local_operator(op_type, best_estimator)
container.add_options(id(best_estimator), opts)
scope.add_options(id(best_estimator), opts)
grid_search_operator.inputs = operator.inputs
@@ -28,6 +28,6 @@ def convert_sklearn_grid_search_cv(scope: Scope, operator: Operator,
apply_identity(scope, v.full_name, o.full_name, container)
-register_converter('SklearnGridSearchCV',
- convert_sklearn_grid_search_cv,
- options="passthrough")
+register_converter(
+ "SklearnGridSearchCV", convert_sklearn_grid_search_cv, options="passthrough"
+)
diff --git a/skl2onnx/operator_converters/id_op.py b/skl2onnx/operator_converters/id_op.py
index f7318d4e8..5f0afd91c 100644
--- a/skl2onnx/operator_converters/id_op.py
+++ b/skl2onnx/operator_converters/id_op.py
@@ -7,12 +7,16 @@
from ..common._container import ModelComponentContainer
-def convert_sklearn_identity(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_identity(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
apply_identity(
- scope, operator.inputs[0].full_name,
- operator.outputs[0].full_name, container,
- operator_name=scope.get_unique_operator_name('CIdentity'))
+ scope,
+ operator.inputs[0].full_name,
+ operator.outputs[0].full_name,
+ container,
+ operator_name=scope.get_unique_operator_name("CIdentity"),
+ )
-register_converter('SklearnIdentity', convert_sklearn_identity)
+register_converter("SklearnIdentity", convert_sklearn_identity)
diff --git a/skl2onnx/operator_converters/imputer_op.py b/skl2onnx/operator_converters/imputer_op.py
index 82fdfe413..0aee5f656 100644
--- a/skl2onnx/operator_converters/imputer_op.py
+++ b/skl2onnx/operator_converters/imputer_op.py
@@ -11,31 +11,36 @@
from .common import concatenate_variables
-def convert_sklearn_imputer(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
- op_type = 'Imputer'
- attrs = {'name': scope.get_unique_operator_name(op_type)}
+def convert_sklearn_imputer(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
+ op_type = "Imputer"
+ attrs = {"name": scope.get_unique_operator_name(op_type)}
op = operator.raw_operator
- if (hasattr(op, 'fill_value') and isinstance(op.fill_value, str) and
- op.fill_value.lower() != 'nan'):
- raise RuntimeError("Imputer cannot fill missing values with a "
- "string '%s'." % op.fill_value)
- if not hasattr(op, 'statistics_'):
- raise RuntimeError("Member statistics_ is not present, was the "
- "model fitted?")
+ if (
+ hasattr(op, "fill_value")
+ and isinstance(op.fill_value, str)
+ and op.fill_value.lower() != "nan"
+ ):
+ raise RuntimeError(
+ "Imputer cannot fill missing values with a " "string '%s'." % op.fill_value
+ )
+ if not hasattr(op, "statistics_"):
+ raise RuntimeError(
+ "Member statistics_ is not present, was the " "model fitted?"
+ )
if isinstance(operator.inputs[0].type, StringTensorType):
if not isinstance(op.missing_values, (str, np.str_)):
raise NotImplementedError(
"The converter is implemented when the missing values "
- "are string not %r." % type(op.missing_values))
+ "are string not %r." % type(op.missing_values)
+ )
zero = scope.get_unique_variable_name("zero")
- container.add_initializer(
- zero, onnx_proto.TensorProto.INT64, [1], [0])
+ container.add_initializer(zero, onnx_proto.TensorProto.INT64, [1], [0])
- concatenated_feature = concatenate_variables(
- scope, operator.inputs, container)
+ concatenated_feature = concatenate_variables(scope, operator.inputs, container)
names = []
for i in range(op.statistics_.size):
# loop on features
@@ -45,68 +50,83 @@ def convert_sklearn_imputer(scope: Scope, operator: Operator,
else:
skl_fill_value = op.fill_value
container.add_node(
- "LabelEncoder", [zero], [fill_value],
- keys_int64s=[0], values_strings=[op.statistics_[i]],
- default_string=skl_fill_value, op_domain='ai.onnx.ml',
- op_version=2)
+ "LabelEncoder",
+ [zero],
+ [fill_value],
+ keys_int64s=[0],
+ values_strings=[op.statistics_[i]],
+ default_string=skl_fill_value,
+ op_domain="ai.onnx.ml",
+ op_version=2,
+ )
init = scope.get_unique_variable_name("i%d" % i)
- container.add_initializer(
- init, onnx_proto.TensorProto.INT64, [1], [i])
+ container.add_initializer(init, onnx_proto.TensorProto.INT64, [1], [i])
name = scope.get_unique_variable_name("impi%d" % i)
container.add_node(
- "ArrayFeatureExtractor", [concatenated_feature, init], [name],
- op_domain='ai.onnx.ml')
+ "ArrayFeatureExtractor",
+ [concatenated_feature, init],
+ [name],
+ op_domain="ai.onnx.ml",
+ )
cond = scope.get_unique_variable_name("impc%d" % i)
container.add_node(
- "LabelEncoder", [name], [cond],
+ "LabelEncoder",
+ [name],
+ [cond],
keys_strings=[str(op.missing_values)],
- values_int64s=[1], default_int64=0,
- op_domain='ai.onnx.ml', op_version=2)
+ values_int64s=[1],
+ default_int64=0,
+ op_domain="ai.onnx.ml",
+ op_version=2,
+ )
condb = scope.get_unique_variable_name("impc%d" % i)
- container.add_node("Cast", [cond], [condb],
- to=onnx_proto.TensorProto.BOOL)
+ container.add_node("Cast", [cond], [condb], to=onnx_proto.TensorProto.BOOL)
repli = scope.get_unique_variable_name("nomiss%d" % i)
container.add_node("Where", [condb, fill_value, name], [repli])
names.append(repli)
- apply_concat(
- scope, names, operator.outputs[0].full_name, container, axis=1)
+ apply_concat(scope, names, operator.outputs[0].full_name, container, axis=1)
else:
if isinstance(operator.inputs[0].type, Int64TensorType):
- attrs['imputed_value_int64s'] = op.statistics_.astype(np.int64)
+ attrs["imputed_value_int64s"] = op.statistics_.astype(np.int64)
use_int = True
- delta = np.max(
- np.abs(attrs['imputed_value_int64s'] - op.statistics_))
+ delta = np.max(np.abs(attrs["imputed_value_int64s"] - op.statistics_))
if delta != 0:
raise RuntimeError(
"SimpleImputer takes integer as input but nan values are "
"replaced by float {} != {}.".format(
- attrs['imputed_value_int64s'], op.statistics_))
+ attrs["imputed_value_int64s"], op.statistics_
+ )
+ )
else:
- attrs['imputed_value_floats'] = op.statistics_.astype(np.float32)
+ attrs["imputed_value_floats"] = op.statistics_.astype(np.float32)
use_int = False
- if isinstance(op.missing_values, str) and op.missing_values == 'NaN':
- attrs['replaced_value_float'] = np.NaN
+ if isinstance(op.missing_values, str) and op.missing_values == "NaN":
+ attrs["replaced_value_float"] = np.NaN
elif isinstance(op.missing_values, float):
if use_int:
ar = np.array([op.missing_values]).astype(np.int64)
- attrs['replaced_value_int64'] = ar[0]
+ attrs["replaced_value_int64"] = ar[0]
else:
- attrs['replaced_value_float'] = float(op.missing_values)
+ attrs["replaced_value_float"] = float(op.missing_values)
else:
raise RuntimeError(
"Unsupported proposed value '{0}'. You may raise an issue at "
"https://github.com/onnx/sklearn-onnx/issues."
- "".format(op.missing_values))
+ "".format(op.missing_values)
+ )
- concatenated_feature = concatenate_variables(
- scope, operator.inputs, container)
+ concatenated_feature = concatenate_variables(scope, operator.inputs, container)
container.add_node(
- op_type, concatenated_feature,
- operator.outputs[0].full_name, op_domain='ai.onnx.ml', **attrs)
+ op_type,
+ concatenated_feature,
+ operator.outputs[0].full_name,
+ op_domain="ai.onnx.ml",
+ **attrs
+ )
-register_converter('SklearnImputer', convert_sklearn_imputer)
-register_converter('SklearnSimpleImputer', convert_sklearn_imputer)
+register_converter("SklearnImputer", convert_sklearn_imputer)
+register_converter("SklearnSimpleImputer", convert_sklearn_imputer)
diff --git a/skl2onnx/operator_converters/isolation_forest.py b/skl2onnx/operator_converters/isolation_forest.py
index dfb63fbb4..8a17a6032 100644
--- a/skl2onnx/operator_converters/isolation_forest.py
+++ b/skl2onnx/operator_converters/isolation_forest.py
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
import numpy as np
+
try:
from sklearn.ensemble._iforest import _average_path_length
except ImportError:
@@ -8,31 +9,53 @@
from sklearn.ensemble.iforest import _average_path_length
from ..common._registration import register_converter
from ..common.data_types import (
- BooleanTensorType, Int64TensorType,
- guess_numpy_type, guess_proto_type)
+ BooleanTensorType,
+ Int64TensorType,
+ guess_numpy_type,
+ guess_proto_type,
+)
from ..common.tree_ensemble import (
add_tree_to_attribute_pairs,
- get_default_tree_regressor_attribute_pairs)
+ get_default_tree_regressor_attribute_pairs,
+)
from ..proto import onnx_proto
from ..algebra.onnx_ops import (
- OnnxTreeEnsembleRegressor_1, OnnxLog,
- OnnxCast, OnnxLess, OnnxLabelEncoder, OnnxMul,
- OnnxGreater, OnnxAdd, OnnxDiv, OnnxSum, OnnxNeg,
- OnnxReshapeApi13, OnnxEqual, OnnxPow, OnnxGather, OnnxMax)
+ OnnxTreeEnsembleRegressor_1,
+ OnnxLog,
+ OnnxCast,
+ OnnxLess,
+ OnnxLabelEncoder,
+ OnnxMul,
+ OnnxGreater,
+ OnnxAdd,
+ OnnxDiv,
+ OnnxSum,
+ OnnxNeg,
+ OnnxReshapeApi13,
+ OnnxEqual,
+ OnnxPow,
+ OnnxGather,
+ OnnxMax,
+)
def convert_sklearn_isolation_forest(
- scope, operator, container, op_type='TreeEnsembleRegressor',
- op_domain='ai.onnx.ml', op_version=1):
+ scope,
+ operator,
+ container,
+ op_type="TreeEnsembleRegressor",
+ op_domain="ai.onnx.ml",
+ op_version=1,
+):
op = operator.raw_operator
outputs = operator.outputs
opv = container.target_opset
- opvml = container.target_opset_any_domain('ai.onnx.ml')
+ opvml = container.target_opset_any_domain("ai.onnx.ml")
options = container.get_options(op, dict(score_samples=None))
if opvml < 2:
raise RuntimeError(
- "This converter requires at least opset 2 for "
- "domain 'ai.onnx.ml'.")
+ "This converter requires at least opset 2 for " "domain 'ai.onnx.ml'."
+ )
input_name = operator.inputs[0]
dtype = guess_numpy_type(operator.inputs[0].type)
@@ -44,27 +67,29 @@ def convert_sklearn_isolation_forest(
raise RuntimeError(
"Converter for IsolationForest does not support the case when "
"_max_features={} != number of given features {}.".format(
- op._max_features, operator.inputs[0].type.shape[1]))
+ op._max_features, operator.inputs[0].type.shape[1]
+ )
+ )
# decision_path
scores = []
- for i, (tree, features) in enumerate(
- zip(op.estimators_, op.estimators_features_)):
-
+ for i, (tree, features) in enumerate(zip(op.estimators_, op.estimators_features_)):
# X_subset = X[:, features]
- gather = OnnxGather(input_name, features.astype(np.int64),
- axis=1, op_version=opv)
+ gather = OnnxGather(
+ input_name, features.astype(np.int64), axis=1, op_version=opv
+ )
attrs = get_default_tree_regressor_attribute_pairs()
- attrs['n_targets'] = 1
- add_tree_to_attribute_pairs(attrs, False, tree.tree_, 0, 1., 0, False,
- True, dtype=dtype)
+ attrs["n_targets"] = 1
+ add_tree_to_attribute_pairs(
+ attrs, False, tree.tree_, 0, 1.0, 0, False, True, dtype=dtype
+ )
# tree leave
- attrs['n_targets'] = 1
- attrs['post_transform'] = 'NONE'
- attrs['target_ids'] = [0 for _ in attrs['target_ids']]
- attrs['target_weights'] = [float(_) for _ in attrs['target_nodeids']]
+ attrs["n_targets"] = 1
+ attrs["post_transform"] = "NONE"
+ attrs["target_ids"] = [0 for _ in attrs["target_ids"]]
+ attrs["target_weights"] = [float(_) for _ in attrs["target_nodeids"]]
leave = OnnxTreeEnsembleRegressor_1(gather, op_version=1, **attrs)
# tree - retrieve node_sample
@@ -75,22 +100,25 @@ def convert_sklearn_isolation_forest(
keys = [float(_[0]) for _ in ordered]
node_sample = OnnxReshapeApi13(
OnnxLabelEncoder(
- leave, op_version=opvml,
- keys_floats=keys, values_floats=values),
+ leave, op_version=opvml, keys_floats=keys, values_floats=values
+ ),
np.array([-1, 1], dtype=np.int64),
- op_version=opv)
+ op_version=opv,
+ )
else:
keys = [int(_[0]) for _ in ordered]
values = [float(_[1]) for _ in ordered]
node_sample = OnnxReshapeApi13(
OnnxLabelEncoder(
- OnnxCast(leave, op_version=opv,
- to=onnx_proto.TensorProto.INT64),
+ OnnxCast(leave, op_version=opv, to=onnx_proto.TensorProto.INT64),
op_version=opvml,
- keys_int64s=keys, values_floats=values),
+ keys_int64s=keys,
+ values_floats=values,
+ ),
np.array([-1, 1], dtype=np.int64),
- op_version=opv)
- node_sample.set_onnx_name_prefix('node_sample%d' % i)
+ op_version=opv,
+ )
+ node_sample.set_onnx_name_prefix("node_sample%d" % i)
# tree - retrieve path_length
labels = _build_labels(tree.tree_, output="path_length")
@@ -101,121 +129,134 @@ def convert_sklearn_isolation_forest(
values = [float(_[1]) for _ in ordered]
path_length = OnnxReshapeApi13(
OnnxLabelEncoder(
- leave, op_version=opvml,
- keys_floats=keys, values_floats=values),
+ leave, op_version=opvml, keys_floats=keys, values_floats=values
+ ),
np.array([-1, 1], dtype=np.int64),
- op_version=opv)
+ op_version=opv,
+ )
else:
keys = [int(_[0]) for _ in ordered]
path_length = OnnxReshapeApi13(
OnnxLabelEncoder(
- OnnxCast(leave, op_version=opv,
- to=onnx_proto.TensorProto.INT64),
+ OnnxCast(leave, op_version=opv, to=onnx_proto.TensorProto.INT64),
op_version=opvml,
- keys_int64s=keys, values_floats=values),
+ keys_int64s=keys,
+ values_floats=values,
+ ),
np.array([-1, 1], dtype=np.int64),
- op_version=opv)
- path_length.set_onnx_name_prefix('path_length%d' % i)
+ op_version=opv,
+ )
+ path_length.set_onnx_name_prefix("path_length%d" % i)
# score
eq2 = OnnxCast(
- OnnxEqual(node_sample, np.array([2], dtype=np.float32),
- op_version=opv),
- to=proto_dtype, op_version=opv)
- eq2.set_onnx_name_prefix('eq2_%d' % i)
+ OnnxEqual(node_sample, np.array([2], dtype=np.float32), op_version=opv),
+ to=proto_dtype,
+ op_version=opv,
+ )
+ eq2.set_onnx_name_prefix("eq2_%d" % i)
# 2.0 * (np.log(n_samples_leaf[not_mask] - 1.0) + np.euler_gamma)
eqp2p = OnnxCast(
- OnnxGreater(
- node_sample, np.array([2], dtype=np.float32),
- op_version=opv),
- to=proto_dtype, op_version=opv)
- eqp2p.set_onnx_name_prefix('plus2_%d' % i)
+ OnnxGreater(node_sample, np.array([2], dtype=np.float32), op_version=opv),
+ to=proto_dtype,
+ op_version=opv,
+ )
+ eqp2p.set_onnx_name_prefix("plus2_%d" % i)
eqp2ps = OnnxMul(eqp2p, node_sample, op_version=opv)
- eqp2ps.set_onnx_name_prefix('eqp2ps%d' % i)
+ eqp2ps.set_onnx_name_prefix("eqp2ps%d" % i)
- eqp2ps_1 = OnnxAdd(eqp2ps, np.array([-1], dtype=dtype),
- op_version=opv)
+ eqp2ps_1 = OnnxAdd(eqp2ps, np.array([-1], dtype=dtype), op_version=opv)
- eqp2p_m1 = OnnxMax(eqp2ps_1, np.array([1], dtype=dtype),
- op_version=opv)
- eqp2p_m1.set_onnx_name_prefix('eqp2p_m1_%d' % i)
+ eqp2p_m1 = OnnxMax(eqp2ps_1, np.array([1], dtype=dtype), op_version=opv)
+ eqp2p_m1.set_onnx_name_prefix("eqp2p_m1_%d" % i)
eqp_log = OnnxMul(
- OnnxAdd(OnnxLog(eqp2p_m1, op_version=opv),
- np.array([np.euler_gamma], dtype=dtype),
- op_version=opv),
- np.array([2], dtype=dtype), op_version=opv)
- eqp_log.set_onnx_name_prefix('eqp_log%d' % i)
+ OnnxAdd(
+ OnnxLog(eqp2p_m1, op_version=opv),
+ np.array([np.euler_gamma], dtype=dtype),
+ op_version=opv,
+ ),
+ np.array([2], dtype=dtype),
+ op_version=opv,
+ )
+ eqp_log.set_onnx_name_prefix("eqp_log%d" % i)
# - 2.0 * (n_samples_leaf[not_mask] - 1.0) / n_samples_leaf[not_mask]
- eqp2p_m0 = OnnxMax(eqp2ps_1, np.array([0], dtype=dtype),
- op_version=opv)
- eqp2p_m0.set_onnx_name_prefix('eqp2p_m1_%d' % i)
+ eqp2p_m0 = OnnxMax(eqp2ps_1, np.array([0], dtype=dtype), op_version=opv)
+ eqp2p_m0.set_onnx_name_prefix("eqp2p_m1_%d" % i)
eqp_ns = OnnxMul(
OnnxDiv(
eqp2p_m0,
- OnnxMax(eqp2ps, np.array([1], dtype=dtype),
- op_version=opv),
- op_version=opv),
- np.array([-2], dtype=dtype), op_version=opv)
- eqp_ns.set_onnx_name_prefix('eqp_ns%d' % i)
+ OnnxMax(eqp2ps, np.array([1], dtype=dtype), op_version=opv),
+ op_version=opv,
+ ),
+ np.array([-2], dtype=dtype),
+ op_version=opv,
+ )
+ eqp_ns.set_onnx_name_prefix("eqp_ns%d" % i)
# np.ravel(node_indicator.sum(axis=1))
# + _average_path_length(n_samples_leaf)
# - 1.0
av_path_length_log = OnnxMul(
- OnnxAdd(eqp_log, eqp_ns, op_version=opv),
- eqp2p, op_version=opv)
- av_path_length_log.set_onnx_name_prefix('avlog%d' % i)
+ OnnxAdd(eqp_log, eqp_ns, op_version=opv), eqp2p, op_version=opv
+ )
+ av_path_length_log.set_onnx_name_prefix("avlog%d" % i)
av_path_length = OnnxAdd(eq2, av_path_length_log, op_version=opv)
- av_path_length.set_onnx_name_prefix('avpl%d' % i)
+ av_path_length.set_onnx_name_prefix("avpl%d" % i)
depth = OnnxAdd(
OnnxAdd(path_length, av_path_length, op_version=opv),
np.array([-1], dtype=dtype),
- op_version=opv)
- depth.set_onnx_name_prefix('depth%d' % i)
+ op_version=opv,
+ )
+ depth.set_onnx_name_prefix("depth%d" % i)
scores.append(depth)
cst = len(op.estimators_) * _average_path_length([op.max_samples_])
- depths = OnnxDiv(OnnxSum(*scores, op_version=opv),
- np.array([cst], dtype=dtype),
- op_version=opv)
+ depths = OnnxDiv(
+ OnnxSum(*scores, op_version=opv), np.array([cst], dtype=dtype), op_version=opv
+ )
# decision_function
- output_names = outputs[2].full_name if options['score_samples'] else None
+ output_names = outputs[2].full_name if options["score_samples"] else None
score_samples = OnnxNeg(
- OnnxPow(np.array([2], dtype=dtype),
- OnnxNeg(depths, op_version=opv),
- op_version=opv),
- op_version=opv, output_names=output_names)
+ OnnxPow(
+ np.array([2], dtype=dtype), OnnxNeg(depths, op_version=opv), op_version=opv
+ ),
+ op_version=opv,
+ output_names=output_names,
+ )
decision = OnnxAdd(
- score_samples, np.array([-op.offset_], dtype=dtype),
- op_version=opv, output_names=outputs[1].full_name)
- decision.set_onnx_name_prefix('dec')
+ score_samples,
+ np.array([-op.offset_], dtype=dtype),
+ op_version=opv,
+ output_names=outputs[1].full_name,
+ )
+ decision.set_onnx_name_prefix("dec")
- less = OnnxLess(decision, np.array([0], dtype=dtype),
- op_version=opv)
+ less = OnnxLess(decision, np.array([0], dtype=dtype), op_version=opv)
predict = OnnxAdd(
OnnxMul(
- OnnxCast(less, op_version=opv,
- to=onnx_proto.TensorProto.INT64),
+ OnnxCast(less, op_version=opv, to=onnx_proto.TensorProto.INT64),
np.array([-2], dtype=np.int64),
- op_version=opv),
+ op_version=opv,
+ ),
np.array([1], dtype=np.int64),
op_version=opv,
- output_names=outputs[0].full_name)
- predict.set_onnx_name_prefix('predict')
+ output_names=outputs[0].full_name,
+ )
+ predict.set_onnx_name_prefix("predict")
predict.add_to(scope, container)
less.add_to(scope, container)
- if options['score_samples']:
+ if options["score_samples"]:
score_samples.add_to(scope, container)
@@ -225,25 +266,23 @@ def _recursive_build_labels(index, current):
if tree.children_left[index] == -1:
yield (index, current.copy())
else:
- for it in _recursive_build_labels(
- tree.children_left[index], current):
+ for it in _recursive_build_labels(tree.children_left[index], current):
yield it
- for it in _recursive_build_labels(
- tree.children_right[index], current):
+ for it in _recursive_build_labels(tree.children_right[index], current):
yield it
current[index] = False
paths = {}
current = {}
- if output == 'path_length':
+ if output == "path_length":
for leave_index, path in _recursive_build_labels(0, current):
spath = {}
for nodeid, b in path.items():
if b:
spath[nodeid] = 1
paths[leave_index] = sum(spath.values())
- elif output == 'node_sample':
+ elif output == "node_sample":
for leave_index, path in _recursive_build_labels(0, current):
spath = {}
for nodeid, b in path.items():
@@ -255,6 +294,8 @@ def _recursive_build_labels(index, current):
return paths
-register_converter('SklearnIsolationForest',
- convert_sklearn_isolation_forest,
- options={'score_samples': [True, False]})
+register_converter(
+ "SklearnIsolationForest",
+ convert_sklearn_isolation_forest,
+ options={"score_samples": [True, False]},
+)
diff --git a/skl2onnx/operator_converters/k_bins_discretiser.py b/skl2onnx/operator_converters/k_bins_discretiser.py
index 4d4f2f318..7f31c487e 100644
--- a/skl2onnx/operator_converters/k_bins_discretiser.py
+++ b/skl2onnx/operator_converters/k_bins_discretiser.py
@@ -5,101 +5,156 @@
from ..proto import onnx_proto
from ..common._apply_operation import (
- apply_cast, apply_concat, apply_reshape,
- apply_mul, apply_add
+ apply_cast,
+ apply_concat,
+ apply_reshape,
+ apply_mul,
+ apply_add,
)
from ..common._registration import register_converter
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
-def convert_sklearn_k_bins_discretiser(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_k_bins_discretiser(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
op = operator.raw_operator
- if op.encode == 'onehot':
- raise RuntimeError("onehot encoding not supported. "
- "ONNX does not support sparse tensors. "
- "with opset < 11. You may raise an isue at "
- "https://github.com/onnx/sklearn-onnx/issues.")
+ if op.encode == "onehot":
+ raise RuntimeError(
+ "onehot encoding not supported. "
+ "ONNX does not support sparse tensors. "
+ "with opset < 11. You may raise an isue at "
+ "https://github.com/onnx/sklearn-onnx/issues."
+ )
- ranges = list(map(lambda e: e[1:-1] if len(e) > 2
- else [np.finfo(np.float32).max], op.bin_edges_))
+ ranges = list(
+ map(
+ lambda e: e[1:-1] if len(e) > 2 else [np.finfo(np.float32).max],
+ op.bin_edges_,
+ )
+ )
digitised_output_name = [None] * len(ranges)
last_column_name = None
for i, item in enumerate(ranges):
- digitised_output_name[i] = (
- scope.get_unique_variable_name('digitised_output_{}'.format(i)))
- column_index_name = scope.get_unique_variable_name('column_index')
- range_column_name = scope.get_unique_variable_name('range_column')
- column_name = scope.get_unique_variable_name('column')
- cast_column_name = scope.get_unique_variable_name('cast_column')
- less_result_name = scope.get_unique_variable_name('less_result')
- cast_result_name = scope.get_unique_variable_name('cast_result')
- concatenated_array_name = scope.get_unique_variable_name(
- 'concatenated_array')
- argmax_output_name = scope.get_unique_variable_name('argmax_output')
+ digitised_output_name[i] = scope.get_unique_variable_name(
+ "digitised_output_{}".format(i)
+ )
+ column_index_name = scope.get_unique_variable_name("column_index")
+ range_column_name = scope.get_unique_variable_name("range_column")
+ column_name = scope.get_unique_variable_name("column")
+ cast_column_name = scope.get_unique_variable_name("cast_column")
+ less_result_name = scope.get_unique_variable_name("less_result")
+ cast_result_name = scope.get_unique_variable_name("cast_result")
+ concatenated_array_name = scope.get_unique_variable_name("concatenated_array")
+ argmax_output_name = scope.get_unique_variable_name("argmax_output")
- container.add_initializer(column_index_name,
- onnx_proto.TensorProto.INT64, [], [i])
- container.add_initializer(range_column_name,
- onnx_proto.TensorProto.FLOAT,
- [len(item)], item)
+ container.add_initializer(
+ column_index_name, onnx_proto.TensorProto.INT64, [], [i]
+ )
+ container.add_initializer(
+ range_column_name, onnx_proto.TensorProto.FLOAT, [len(item)], item
+ )
container.add_node(
- 'ArrayFeatureExtractor',
- [operator.inputs[0].full_name, column_index_name], column_name,
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'),
- op_domain='ai.onnx.ml')
- apply_cast(scope, column_name, cast_column_name,
- container, to=onnx_proto.TensorProto.FLOAT)
+ "ArrayFeatureExtractor",
+ [operator.inputs[0].full_name, column_index_name],
+ column_name,
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ op_domain="ai.onnx.ml",
+ )
+ apply_cast(
+ scope,
+ column_name,
+ cast_column_name,
+ container,
+ to=onnx_proto.TensorProto.FLOAT,
+ )
container.add_node(
- 'Less', [cast_column_name, range_column_name],
+ "Less",
+ [cast_column_name, range_column_name],
less_result_name,
- name=scope.get_unique_operator_name('Less'))
- apply_cast(scope, less_result_name, cast_result_name,
- container, to=onnx_proto.TensorProto.FLOAT)
+ name=scope.get_unique_operator_name("Less"),
+ )
+ apply_cast(
+ scope,
+ less_result_name,
+ cast_result_name,
+ container,
+ to=onnx_proto.TensorProto.FLOAT,
+ )
if last_column_name is None:
- last_column_name = scope.get_unique_variable_name('last_column')
- zero_float = scope.get_unique_variable_name('zero_float')
- one_float = scope.get_unique_variable_name('one_float')
- zero_column = scope.get_unique_variable_name('zero_column')
+ last_column_name = scope.get_unique_variable_name("last_column")
+ zero_float = scope.get_unique_variable_name("zero_float")
+ one_float = scope.get_unique_variable_name("one_float")
+ zero_column = scope.get_unique_variable_name("zero_column")
container.add_initializer(
- one_float, onnx_proto.TensorProto.FLOAT,
- [1], np.ones(1))
+ one_float, onnx_proto.TensorProto.FLOAT, [1], np.ones(1)
+ )
container.add_initializer(
- zero_float, onnx_proto.TensorProto.FLOAT,
- [1], np.zeros(1))
- apply_mul(scope, [cast_column_name, zero_float], zero_column,
- container, broadcast=1)
- apply_add(scope, [zero_column, one_float], last_column_name,
- container, broadcast=1)
+ zero_float, onnx_proto.TensorProto.FLOAT, [1], np.zeros(1)
+ )
+ apply_mul(
+ scope,
+ [cast_column_name, zero_float],
+ zero_column,
+ container,
+ broadcast=1,
+ )
+ apply_add(
+ scope,
+ [zero_column, one_float],
+ last_column_name,
+ container,
+ broadcast=1,
+ )
- apply_concat(scope, [cast_result_name, last_column_name],
- concatenated_array_name, container, axis=1)
- container.add_node('ArgMax', concatenated_array_name,
- argmax_output_name, axis=1,
- name=scope.get_unique_operator_name('ArgMax'))
- if op.encode == 'onehot-dense':
- onehot_result_name = scope.get_unique_variable_name(
- 'onehot_result')
+ apply_concat(
+ scope,
+ [cast_result_name, last_column_name],
+ concatenated_array_name,
+ container,
+ axis=1,
+ )
+ container.add_node(
+ "ArgMax",
+ concatenated_array_name,
+ argmax_output_name,
+ axis=1,
+ name=scope.get_unique_operator_name("ArgMax"),
+ )
+ if op.encode == "onehot-dense":
+ onehot_result_name = scope.get_unique_variable_name("onehot_result")
container.add_node(
- 'OneHotEncoder', argmax_output_name,
+ "OneHotEncoder",
+ argmax_output_name,
onehot_result_name,
- name=scope.get_unique_operator_name('OneHotEncoder'),
+ name=scope.get_unique_operator_name("OneHotEncoder"),
cats_int64s=list(range(op.n_bins_[i])),
- op_domain='ai.onnx.ml')
- apply_reshape(scope, onehot_result_name, digitised_output_name[i],
- container, desired_shape=(-1, op.n_bins_[i]))
+ op_domain="ai.onnx.ml",
+ )
+ apply_reshape(
+ scope,
+ onehot_result_name,
+ digitised_output_name[i],
+ container,
+ desired_shape=(-1, op.n_bins_[i]),
+ )
else:
- apply_cast(scope, argmax_output_name, digitised_output_name[i],
- container, to=onnx_proto.TensorProto.FLOAT)
- apply_concat(scope, digitised_output_name,
- operator.outputs[0].full_name, container, axis=1)
+ apply_cast(
+ scope,
+ argmax_output_name,
+ digitised_output_name[i],
+ container,
+ to=onnx_proto.TensorProto.FLOAT,
+ )
+ apply_concat(
+ scope, digitised_output_name, operator.outputs[0].full_name, container, axis=1
+ )
-register_converter('SklearnKBinsDiscretizer',
- convert_sklearn_k_bins_discretiser)
+register_converter("SklearnKBinsDiscretizer", convert_sklearn_k_bins_discretiser)
diff --git a/skl2onnx/operator_converters/k_means.py b/skl2onnx/operator_converters/k_means.py
index 10b9cabdf..901fade96 100644
--- a/skl2onnx/operator_converters/k_means.py
+++ b/skl2onnx/operator_converters/k_means.py
@@ -7,12 +7,20 @@
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
from ..algebra.onnx_ops import (
- OnnxReduceSumSquareApi18, OnnxGemm, OnnxMatMul,
- OnnxAdd, OnnxArgMin, OnnxCast, OnnxSqrt, OnnxMul)
-
-
-def convert_sklearn_kmeans(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+ OnnxReduceSumSquareApi18,
+ OnnxGemm,
+ OnnxMatMul,
+ OnnxAdd,
+ OnnxArgMin,
+ OnnxCast,
+ OnnxSqrt,
+ OnnxMul,
+)
+
+
+def convert_sklearn_kmeans(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Computation graph of distances to all centroids for a batch of examples.
Note that a centriod is just the center of a cluster. We use ``[]`` to
@@ -81,32 +89,29 @@ def convert_sklearn_kmeans(scope: Scope, operator: Operator,
C2 = row_norms(C, squared=True).astype(dtype)
C = C.astype(dtype)
- rs = OnnxReduceSumSquareApi18(
- input_name, axes=[1], keepdims=1, op_version=opv)
+ rs = OnnxReduceSumSquareApi18(input_name, axes=[1], keepdims=1, op_version=opv)
- if options['gemm']:
+ if options["gemm"]:
N = X.get_first_dimension()
if isinstance(N, int):
- zeros = np.zeros((N, ), dtype=dtype)
+ zeros = np.zeros((N,), dtype=dtype)
else:
- zeros = OnnxMul(rs, np.array([0], dtype=dtype),
- op_version=opv)
- gemm_out = OnnxGemm(input_name, C, zeros, alpha=-2.,
- transB=1, op_version=opv)
+ zeros = OnnxMul(rs, np.array([0], dtype=dtype), op_version=opv)
+ gemm_out = OnnxGemm(input_name, C, zeros, alpha=-2.0, transB=1, op_version=opv)
else:
- gemm_out = OnnxMatMul(
- input_name, (C.T * (-2)).astype(dtype), op_version=opv)
+ gemm_out = OnnxMatMul(input_name, (C.T * (-2)).astype(dtype), op_version=opv)
z = OnnxAdd(rs, gemm_out, op_version=opv)
y2 = OnnxAdd(C2, z, op_version=opv)
- ll = OnnxArgMin(y2, axis=1, keepdims=0, output_names=out[:1],
- op_version=opv)
+ ll = OnnxArgMin(y2, axis=1, keepdims=0, output_names=out[:1], op_version=opv)
y2s = OnnxSqrt(y2, output_names=out[1:], op_version=opv)
ll.add_to(scope, container)
y2s.add_to(scope, container)
-register_converter('SklearnKMeans', convert_sklearn_kmeans,
- options={'gemm': [True, False]})
-register_converter('SklearnMiniBatchKMeans', convert_sklearn_kmeans,
- options={'gemm': [True, False]})
+register_converter(
+ "SklearnKMeans", convert_sklearn_kmeans, options={"gemm": [True, False]}
+)
+register_converter(
+ "SklearnMiniBatchKMeans", convert_sklearn_kmeans, options={"gemm": [True, False]}
+)
diff --git a/skl2onnx/operator_converters/kernel_pca.py b/skl2onnx/operator_converters/kernel_pca.py
index 6ffeab0ae..5f853b99e 100644
--- a/skl2onnx/operator_converters/kernel_pca.py
+++ b/skl2onnx/operator_converters/kernel_pca.py
@@ -4,9 +4,18 @@
from sklearn.preprocessing import normalize
from ..algebra.complex_functions import onnx_cdist
from ..algebra.onnx_ops import (
- OnnxMatMul, OnnxTranspose, OnnxDiv, OnnxSub, OnnxAdd,
- OnnxMul, OnnxPow, OnnxTanh, OnnxSqrt, OnnxExp,
- OnnxReduceSumApi11)
+ OnnxMatMul,
+ OnnxTranspose,
+ OnnxDiv,
+ OnnxSub,
+ OnnxAdd,
+ OnnxMul,
+ OnnxPow,
+ OnnxTanh,
+ OnnxSqrt,
+ OnnxExp,
+ OnnxReduceSumApi11,
+)
from ..algebra.onnx_operator import OnnxSubEstimator
from ..common._registration import register_converter
from ..common._topology import Scope, Operator
@@ -14,8 +23,9 @@
from ..common.data_types import guess_numpy_type
-def kernel_centerer_converter(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def kernel_centerer_converter(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
op = operator.raw_operator
op_version = container.target_opset
X = operator.inputs[0]
@@ -23,98 +33,121 @@ def kernel_centerer_converter(scope: Scope, operator: Operator,
N = np.array([op.K_fit_rows_.shape[0]], dtype=dtype)
K_pred_cols = OnnxDiv(
- OnnxReduceSumApi11(X, axes=[1], op_version=op_version),
- N, op_version=op_version)
+ OnnxReduceSumApi11(X, axes=[1], op_version=op_version), N, op_version=op_version
+ )
# K -= self.K_fit_rows_
# K -= K_pred_cols
# K += self.K_fit_all_
K1 = OnnxSub(X, op.K_fit_rows_.astype(dtype), op_version=op_version)
K2 = OnnxSub(K1, K_pred_cols, op_version=op_version)
- final = OnnxAdd(K2, np.array([op.K_fit_all_], dtype=dtype),
- op_version=op_version,
- output_names=operator.outputs[:1])
+ final = OnnxAdd(
+ K2,
+ np.array([op.K_fit_all_], dtype=dtype),
+ op_version=op_version,
+ output_names=operator.outputs[:1],
+ )
final.add_to(scope, container)
-def kernel_pca_converter(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def kernel_pca_converter(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
op = operator.raw_operator
op_version = container.target_opset
X = operator.inputs[0]
dtype = guess_numpy_type(X.type)
options = container.get_options(op, dict(optim=None))
- optim = options['optim']
+ optim = options["optim"]
# def _get_kernel(self, X, Y=None):
# return pairwise_kernels(
# X, Y, metric=self.kernel, filter_params=True, **params)
if callable(op.kernel):
raise RuntimeError(
- "Unable to converter KernelPCA with a custom kernel %r."
- "" % op.kernel)
- if op.kernel == 'precomputed':
+ "Unable to converter KernelPCA with a custom kernel %r." "" % op.kernel
+ )
+ if op.kernel == "precomputed":
raise RuntimeError(
"The converter is not implemented when kernel=%r for "
- "type=%r." % (op.kernel, type(op)))
+ "type=%r." % (op.kernel, type(op))
+ )
kernel = op.kernel
params = {"gamma": op.gamma, "degree": op.degree, "coef0": op.coef0}
- if kernel == 'linear':
+ if kernel == "linear":
Y = op.X_fit_.astype(dtype)
dist = OnnxMatMul(
- X, OnnxTranspose(Y, perm=[1, 0], op_version=op_version),
- op_version=op_version)
- elif kernel == 'cosine':
+ X,
+ OnnxTranspose(Y, perm=[1, 0], op_version=op_version),
+ op_version=op_version,
+ )
+ elif kernel == "cosine":
yn = normalize(op.X_fit_, copy=True)
ynt = yn.astype(dtype)
norm = OnnxSqrt(
OnnxReduceSumApi11(
- OnnxPow(X, np.array([2], dtype=np.int64),
- op_version=op_version),
- axes=[1], op_version=op_version, keepdims=1),
- op_version=op_version)
+ OnnxPow(X, np.array([2], dtype=np.int64), op_version=op_version),
+ axes=[1],
+ op_version=op_version,
+ keepdims=1,
+ ),
+ op_version=op_version,
+ )
dist = OnnxMatMul(
OnnxDiv(X, norm, op_version=op_version),
OnnxTranspose(ynt, perm=[1, 0], op_version=op_version),
- op_version=op_version)
- elif kernel in ('poly', 'sigmoid'):
+ op_version=op_version,
+ )
+ elif kernel in ("poly", "sigmoid"):
Y = op.X_fit_.astype(dtype)
dot = OnnxMatMul(
- X, OnnxTranspose(Y, perm=[1, 0], op_version=op_version),
- op_version=op_version)
- if params['gamma'] is None:
- gamma = np.array([1. / Y.shape[1]], dtype=dtype)
+ X,
+ OnnxTranspose(Y, perm=[1, 0], op_version=op_version),
+ op_version=op_version,
+ )
+ if params["gamma"] is None:
+ gamma = np.array([1.0 / Y.shape[1]], dtype=dtype)
else:
- gamma = np.array([params['gamma']], dtype=dtype)
+ gamma = np.array([params["gamma"]], dtype=dtype)
dot_g = OnnxMul(dot, gamma, op_version=op_version)
- dot_c = OnnxAdd(dot_g, np.array([params['coef0']], dtype=dtype),
- op_version=op_version)
- if kernel == 'poly':
- dist = OnnxPow(dot_c,
- np.array([params['degree']], dtype=np.int64),
- op_version=op_version)
+ dot_c = OnnxAdd(
+ dot_g, np.array([params["coef0"]], dtype=dtype), op_version=op_version
+ )
+ if kernel == "poly":
+ dist = OnnxPow(
+ dot_c,
+ np.array([params["degree"]], dtype=np.int64),
+ op_version=op_version,
+ )
else:
dist = OnnxTanh(dot_c, op_version=op_version)
- elif kernel == 'rbf':
- if optim == 'cdist':
+ elif kernel == "rbf":
+ if optim == "cdist":
from skl2onnx.algebra.custom_ops import OnnxCDist
+
Y = op.X_fit_.astype(dtype)
- pair = OnnxCDist(X, Y, metric='sqeuclidean', op_version=op_version)
+ pair = OnnxCDist(X, Y, metric="sqeuclidean", op_version=op_version)
elif optim is None:
Y = op.X_fit_.astype(dtype)
- dim_in = Y.shape[1] if hasattr(Y, 'shape') else None
- dim_out = Y.shape[0] if hasattr(Y, 'shape') else None
- pair = onnx_cdist(X, Y, metric='sqeuclidean', dtype=dtype,
- op_version=op_version,
- dim_in=dim_in, dim_out=dim_out)
+ dim_in = Y.shape[1] if hasattr(Y, "shape") else None
+ dim_out = Y.shape[0] if hasattr(Y, "shape") else None
+ pair = onnx_cdist(
+ X,
+ Y,
+ metric="sqeuclidean",
+ dtype=dtype,
+ op_version=op_version,
+ dim_in=dim_in,
+ dim_out=dim_out,
+ )
else:
raise ValueError("Unknown optimisation '{}'.".format(optim))
- if params['gamma'] is None:
- gamma = np.array([-1. / Y.shape[1]], dtype=dtype)
+ if params["gamma"] is None:
+ gamma = np.array([-1.0 / Y.shape[1]], dtype=dtype)
else:
- gamma = np.array([-params['gamma']], dtype=dtype)
+ gamma = np.array([-params["gamma"]], dtype=dtype)
pair_g = OnnxMul(pair, gamma, op_version=op_version)
dist = OnnxExp(pair_g, op_version=op_version)
else:
@@ -123,29 +156,34 @@ def kernel_pca_converter(scope: Scope, operator: Operator,
# K = self._centerer.transform(self._get_kernel(X, self.X_fit_))
K = OnnxSubEstimator(op._centerer, dist, op_version=op_version)
- if hasattr(op, 'eigenvalues_'):
+ if hasattr(op, "eigenvalues_"):
# scikit-learn>=1.0
non_zeros = np.flatnonzero(op.eigenvalues_)
scaled_alphas = np.zeros_like(op.eigenvectors_)
- scaled_alphas[:, non_zeros] = (
- op.eigenvectors_[:, non_zeros] /
- np.sqrt(op.eigenvalues_[non_zeros]))
+ scaled_alphas[:, non_zeros] = op.eigenvectors_[:, non_zeros] / np.sqrt(
+ op.eigenvalues_[non_zeros]
+ )
else:
# scikit-learn<1.0
non_zeros = np.flatnonzero(op.lambdas_)
scaled_alphas = np.zeros_like(op.alphas_)
- scaled_alphas[:, non_zeros] = (
- op.alphas_[:, non_zeros] / np.sqrt(op.lambdas_[non_zeros]))
+ scaled_alphas[:, non_zeros] = op.alphas_[:, non_zeros] / np.sqrt(
+ op.lambdas_[non_zeros]
+ )
# np.dot(K, scaled_alphas)
- output = OnnxMatMul(K, scaled_alphas.astype(dtype),
- op_version=op_version,
- output_names=operator.outputs[:1])
+ output = OnnxMatMul(
+ K,
+ scaled_alphas.astype(dtype),
+ op_version=op_version,
+ output_names=operator.outputs[:1],
+ )
# register the output
output.add_to(scope, container)
-register_converter('SklearnKernelCenterer', kernel_centerer_converter)
-register_converter('SklearnKernelPCA', kernel_pca_converter,
- options={'optim': [None, 'cdist']})
+register_converter("SklearnKernelCenterer", kernel_centerer_converter)
+register_converter(
+ "SklearnKernelPCA", kernel_pca_converter, options={"optim": [None, "cdist"]}
+)
diff --git a/skl2onnx/operator_converters/label_binariser.py b/skl2onnx/operator_converters/label_binariser.py
index 7bbe840b6..35f35b9d0 100644
--- a/skl2onnx/operator_converters/label_binariser.py
+++ b/skl2onnx/operator_converters/label_binariser.py
@@ -9,87 +9,122 @@
from ..common._container import ModelComponentContainer
-def convert_sklearn_label_binariser(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_label_binariser(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""Converts Scikit Label Binariser model to onnx format."""
binariser_op = operator.raw_operator
classes = binariser_op.classes_
- if (hasattr(binariser_op, 'sparse_input_') and
- binariser_op.sparse_input_):
+ if hasattr(binariser_op, "sparse_input_") and binariser_op.sparse_input_:
raise RuntimeError("sparse is not supported for LabelBinarizer.")
- if (hasattr(binariser_op, 'y_type_') and
- binariser_op.y_type_ == "multilabel-indicator"):
+ if (
+ hasattr(binariser_op, "y_type_")
+ and binariser_op.y_type_ == "multilabel-indicator"
+ ):
if binariser_op.pos_label != 1:
- raise RuntimeError("pos_label != 1 is not supported "
- "for LabelBinarizer.")
+ raise RuntimeError("pos_label != 1 is not supported " "for LabelBinarizer.")
if list(classes) != list(range(len(classes))):
- raise RuntimeError("classes != [0, 1, ..., n_classes] is not "
- "supported for LabelBinarizer.")
- container.add_node('Identity', operator.inputs[0].full_name,
- operator.output_full_names,
- name=scope.get_unique_operator_name('identity'))
+ raise RuntimeError(
+ "classes != [0, 1, ..., n_classes] is not "
+ "supported for LabelBinarizer."
+ )
+ container.add_node(
+ "Identity",
+ operator.inputs[0].full_name,
+ operator.output_full_names,
+ name=scope.get_unique_operator_name("identity"),
+ )
else:
- zeros_tensor = np.full((1, len(classes)),
- binariser_op.neg_label, dtype=np.float32)
- unit_tensor = np.full((1, len(classes)),
- binariser_op.pos_label, dtype=np.float32)
+ zeros_tensor = np.full(
+ (1, len(classes)), binariser_op.neg_label, dtype=np.float32
+ )
+ unit_tensor = np.full(
+ (1, len(classes)), binariser_op.pos_label, dtype=np.float32
+ )
- classes_tensor_name = scope.get_unique_variable_name('classes_tensor')
+ classes_tensor_name = scope.get_unique_variable_name("classes_tensor")
equal_condition_tensor_name = scope.get_unique_variable_name(
- 'equal_condition_tensor')
- zeros_tensor_name = scope.get_unique_variable_name('zero_tensor')
- unit_tensor_name = scope.get_unique_variable_name('unit_tensor')
- where_result_name = scope.get_unique_variable_name('where_result')
+ "equal_condition_tensor"
+ )
+ zeros_tensor_name = scope.get_unique_variable_name("zero_tensor")
+ unit_tensor_name = scope.get_unique_variable_name("unit_tensor")
+ where_result_name = scope.get_unique_variable_name("where_result")
class_dtype = onnx_proto.TensorProto.STRING
- if (np.issubdtype(binariser_op.classes_.dtype, np.signedinteger) or
- binariser_op.classes_.dtype == np.bool_):
+ if (
+ np.issubdtype(binariser_op.classes_.dtype, np.signedinteger)
+ or binariser_op.classes_.dtype == np.bool_
+ ):
class_dtype = onnx_proto.TensorProto.INT64
else:
- classes = np.array([s.encode('utf-8') for s in classes])
+ classes = np.array([s.encode("utf-8") for s in classes])
- container.add_initializer(classes_tensor_name, class_dtype,
- [len(classes)], classes)
container.add_initializer(
- zeros_tensor_name, onnx_proto.TensorProto.FLOAT,
- zeros_tensor.shape, zeros_tensor.ravel())
+ classes_tensor_name, class_dtype, [len(classes)], classes
+ )
+ container.add_initializer(
+ zeros_tensor_name,
+ onnx_proto.TensorProto.FLOAT,
+ zeros_tensor.shape,
+ zeros_tensor.ravel(),
+ )
container.add_initializer(
- unit_tensor_name, onnx_proto.TensorProto.FLOAT,
- unit_tensor.shape, unit_tensor.ravel())
+ unit_tensor_name,
+ onnx_proto.TensorProto.FLOAT,
+ unit_tensor.shape,
+ unit_tensor.ravel(),
+ )
- reshaped_input_name = scope.get_unique_variable_name('reshaped_input')
- apply_reshape(scope, operator.inputs[0].full_name, reshaped_input_name,
- container, desired_shape=[-1, 1])
+ reshaped_input_name = scope.get_unique_variable_name("reshaped_input")
+ apply_reshape(
+ scope,
+ operator.inputs[0].full_name,
+ reshaped_input_name,
+ container,
+ desired_shape=[-1, 1],
+ )
# Models with classes_/inputs of string type would fail in the
# following step as Equal op does not support string comparison.
- container.add_node('Equal', [classes_tensor_name, reshaped_input_name],
- equal_condition_tensor_name,
- name=scope.get_unique_operator_name('equal'))
container.add_node(
- 'Where',
+ "Equal",
+ [classes_tensor_name, reshaped_input_name],
+ equal_condition_tensor_name,
+ name=scope.get_unique_operator_name("equal"),
+ )
+ container.add_node(
+ "Where",
[equal_condition_tensor_name, unit_tensor_name, zeros_tensor_name],
where_result_name,
- name=scope.get_unique_operator_name('where'))
+ name=scope.get_unique_operator_name("where"),
+ )
where_res = where_result_name
if len(binariser_op.classes_) == 2:
array_f_name = scope.get_unique_variable_name(
- 'array_feature_extractor_result')
- pos_class_index_name = scope.get_unique_variable_name(
- 'pos_class_index')
+ "array_feature_extractor_result"
+ )
+ pos_class_index_name = scope.get_unique_variable_name("pos_class_index")
container.add_initializer(
- pos_class_index_name, onnx_proto.TensorProto.INT64, [], [1])
+ pos_class_index_name, onnx_proto.TensorProto.INT64, [], [1]
+ )
container.add_node(
- 'ArrayFeatureExtractor',
+ "ArrayFeatureExtractor",
[where_result_name, pos_class_index_name],
- array_f_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
+ array_f_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
where_res = array_f_name
- apply_cast(scope, where_res, operator.output_full_names, container,
- to=onnx_proto.TensorProto.INT64)
+ apply_cast(
+ scope,
+ where_res,
+ operator.output_full_names,
+ container,
+ to=onnx_proto.TensorProto.INT64,
+ )
-register_converter('SklearnLabelBinarizer', convert_sklearn_label_binariser)
+register_converter("SklearnLabelBinarizer", convert_sklearn_label_binariser)
diff --git a/skl2onnx/operator_converters/label_encoder.py b/skl2onnx/operator_converters/label_encoder.py
index 378090033..d6a807ea2 100644
--- a/skl2onnx/operator_converters/label_encoder.py
+++ b/skl2onnx/operator_converters/label_encoder.py
@@ -7,30 +7,36 @@
from ..common._container import ModelComponentContainer
-def convert_sklearn_label_encoder(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_label_encoder(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
op = operator.raw_operator
- op_type = 'LabelEncoder'
- attrs = {'name': scope.get_unique_operator_name(op_type)}
+ op_type = "LabelEncoder"
+ attrs = {"name": scope.get_unique_operator_name(op_type)}
classes = op.classes_
if np.issubdtype(classes.dtype, np.floating):
- attrs['keys_floats'] = classes
- elif (np.issubdtype(classes.dtype, np.signedinteger) or
- classes.dtype == np.bool_):
- attrs['keys_int64s'] = [int(i) for i in classes]
+ attrs["keys_floats"] = classes
+ elif np.issubdtype(classes.dtype, np.signedinteger) or classes.dtype == np.bool_:
+ attrs["keys_int64s"] = [int(i) for i in classes]
else:
- attrs['keys_strings'] = np.array([s.encode('utf-8') for s in classes])
- attrs['values_int64s'] = np.arange(len(classes))
+ attrs["keys_strings"] = np.array([s.encode("utf-8") for s in classes])
+ attrs["values_int64s"] = np.arange(len(classes))
- cop = container.target_opset_any_domain('ai.onnx.ml')
+ cop = container.target_opset_any_domain("ai.onnx.ml")
if cop is not None and cop < 2:
raise RuntimeError(
"LabelEncoder requires at least opset 2 for domain 'ai.onnx.ml' "
- "not {}".format(cop))
+ "not {}".format(cop)
+ )
- container.add_node(op_type, operator.input_full_names,
- operator.output_full_names, op_domain='ai.onnx.ml',
- op_version=2, **attrs)
+ container.add_node(
+ op_type,
+ operator.input_full_names,
+ operator.output_full_names,
+ op_domain="ai.onnx.ml",
+ op_version=2,
+ **attrs
+ )
-register_converter('SklearnLabelEncoder', convert_sklearn_label_encoder)
+register_converter("SklearnLabelEncoder", convert_sklearn_label_encoder)
diff --git a/skl2onnx/operator_converters/linear_classifier.py b/skl2onnx/operator_converters/linear_classifier.py
index 058a284e1..ce8d022e8 100644
--- a/skl2onnx/operator_converters/linear_classifier.py
+++ b/skl2onnx/operator_converters/linear_classifier.py
@@ -10,42 +10,48 @@
)
from sklearn.svm import LinearSVC
from ..common._apply_operation import (
- apply_cast, apply_add, apply_sigmoid, apply_softmax,
- apply_normalizer)
+ apply_cast,
+ apply_add,
+ apply_sigmoid,
+ apply_softmax,
+ apply_normalizer,
+)
from ..common._registration import register_converter
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
-from ..common.data_types import (
- BooleanTensorType, DoubleTensorType, guess_proto_type)
-from ..common.utils_classifier import (
- get_label_classes, _finalize_converter_classes)
+from ..common.data_types import BooleanTensorType, DoubleTensorType, guess_proto_type
+from ..common.utils_classifier import get_label_classes, _finalize_converter_classes
from ..proto import onnx_proto
-def apply_logistic(scope, input_name, output_name, container,
- proto_dtype):
+def apply_logistic(scope, input_name, output_name, container, proto_dtype):
sig_name = scope.get_unique_variable_name(input_name + "sig")
apply_sigmoid(scope, input_name, sig_name, container)
apply_normalizer(
- scope, sig_name, output_name, container, norm='L1',
- use_float=proto_dtype == onnx_proto.TensorProto.FLOAT)
+ scope,
+ sig_name,
+ output_name,
+ container,
+ norm="L1",
+ use_float=proto_dtype == onnx_proto.TensorProto.FLOAT,
+ )
-def convert_sklearn_linear_classifier(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_linear_classifier(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
op = operator.raw_operator
coefficients = op.coef_.flatten().astype(float).tolist()
classes = get_label_classes(scope, op)
number_of_classes = len(classes)
- use_linear_op = container.is_allowed({'LinearClassifier'})
+ use_linear_op = container.is_allowed({"LinearClassifier"})
options = container.get_options(op, dict(raw_scores=False))
- use_raw_scores = options['raw_scores']
+ use_raw_scores = options["raw_scores"]
if isinstance(op.intercept_, (float, np.float32)) and op.intercept_ == 0:
# fit_intercept = False
- intercepts = ([0.0] * number_of_classes if number_of_classes != 2 else
- [0.0])
+ intercepts = [0.0] * number_of_classes if number_of_classes != 2 else [0.0]
else:
intercepts = op.intercept_.tolist()
@@ -54,187 +60,269 @@ def convert_sklearn_linear_classifier(scope: Scope, operator: Operator,
intercepts = list(map(lambda x: -1 * x, intercepts)) + intercepts
multi_class = 0
- if hasattr(op, 'multi_class'):
- if op.multi_class == 'ovr':
+ if hasattr(op, "multi_class"):
+ if op.multi_class == "ovr":
multi_class = 1
else:
multi_class = 2
- classifier_type = 'LinearClassifier'
- classifier_attrs = {
- 'name': scope.get_unique_operator_name(classifier_type)
- }
-
- classifier_attrs['coefficients'] = coefficients
- classifier_attrs['intercepts'] = intercepts
- classifier_attrs['multi_class'] = 1 if multi_class == 2 else 0
- if (use_raw_scores or
- isinstance(op, (LinearSVC, RidgeClassifier, RidgeClassifierCV))):
- classifier_attrs['post_transform'] = 'NONE'
+ classifier_type = "LinearClassifier"
+ classifier_attrs = {"name": scope.get_unique_operator_name(classifier_type)}
+
+ classifier_attrs["coefficients"] = coefficients
+ classifier_attrs["intercepts"] = intercepts
+ classifier_attrs["multi_class"] = 1 if multi_class == 2 else 0
+ if use_raw_scores or isinstance(
+ op, (LinearSVC, RidgeClassifier, RidgeClassifierCV)
+ ):
+ classifier_attrs["post_transform"] = "NONE"
elif isinstance(op, LogisticRegression):
- ovr = (op.multi_class in ["ovr", "warn"] or
- (op.multi_class == 'auto' and (op.classes_.size <= 2 or
- op.solver == 'liblinear')))
- classifier_attrs['post_transform'] = (
- 'LOGISTIC' if ovr else 'SOFTMAX')
+ ovr = op.multi_class in ["ovr", "warn"] or (
+ op.multi_class == "auto"
+ and (op.classes_.size <= 2 or op.solver == "liblinear")
+ )
+ classifier_attrs["post_transform"] = "LOGISTIC" if ovr else "SOFTMAX"
else:
- classifier_attrs['post_transform'] = (
- 'LOGISTIC' if multi_class > 2 else 'SOFTMAX')
+ classifier_attrs["post_transform"] = (
+ "LOGISTIC" if multi_class > 2 else "SOFTMAX"
+ )
if all(isinstance(i, str) for i in classes):
class_labels = [str(i) for i in classes]
- classifier_attrs['classlabels_strings'] = class_labels
+ classifier_attrs["classlabels_strings"] = class_labels
elif all(isinstance(i, (numbers.Real, bool, np.bool_)) for i in classes):
class_labels = [int(i) for i in classes]
- classifier_attrs['classlabels_ints'] = class_labels
+ classifier_attrs["classlabels_ints"] = class_labels
else:
- raise RuntimeError('Label vector must be a string or a integer '
- 'tensor.')
+ raise RuntimeError("Label vector must be a string or a integer " "tensor.")
- if (not use_linear_op or
- type(operator.inputs[0].type) in (DoubleTensorType, )):
+ if not use_linear_op or type(operator.inputs[0].type) in (DoubleTensorType,):
# Double -> double parameters not supported in ONNX LinearClassifier
proto_dtype = guess_proto_type(operator.inputs[0].type)
- coef = scope.get_unique_variable_name('coef')
- model_coef = np.array(
- classifier_attrs['coefficients'], dtype=np.float64)
+ coef = scope.get_unique_variable_name("coef")
+ model_coef = np.array(classifier_attrs["coefficients"], dtype=np.float64)
model_coef = model_coef.reshape((number_of_classes, -1)).T
container.add_initializer(
- coef, proto_dtype, model_coef.shape, model_coef.ravel().tolist())
- intercept = scope.get_unique_variable_name('intercept')
- model_intercept = np.array(
- classifier_attrs['intercepts'], dtype=np.float64)
+ coef, proto_dtype, model_coef.shape, model_coef.ravel().tolist()
+ )
+ intercept = scope.get_unique_variable_name("intercept")
+ model_intercept = np.array(classifier_attrs["intercepts"], dtype=np.float64)
model_intercept = model_intercept.reshape((number_of_classes, -1)).T
container.add_initializer(
- intercept, proto_dtype, model_intercept.shape,
- model_intercept.ravel().tolist())
- multiplied = scope.get_unique_variable_name('multiplied')
+ intercept,
+ proto_dtype,
+ model_intercept.shape,
+ model_intercept.ravel().tolist(),
+ )
+ multiplied = scope.get_unique_variable_name("multiplied")
container.add_node(
- 'MatMul', [operator.inputs[0].full_name, coef], multiplied,
- name=scope.get_unique_operator_name('MatMul'))
+ "MatMul",
+ [operator.inputs[0].full_name, coef],
+ multiplied,
+ name=scope.get_unique_operator_name("MatMul"),
+ )
if use_raw_scores:
raw_score_name = operator.outputs[1].full_name
else:
- raw_score_name = scope.get_unique_variable_name('raw_scores')
+ raw_score_name = scope.get_unique_variable_name("raw_scores")
apply_add(scope, [multiplied, intercept], raw_score_name, container)
- argmax_output_name = scope.get_unique_variable_name('label')
- container.add_node('ArgMax', raw_score_name, argmax_output_name,
- name=scope.get_unique_operator_name('ArgMax'),
- axis=1)
+ argmax_output_name = scope.get_unique_variable_name("label")
+ container.add_node(
+ "ArgMax",
+ raw_score_name,
+ argmax_output_name,
+ name=scope.get_unique_operator_name("ArgMax"),
+ axis=1,
+ )
_finalize_converter_classes(
- scope, argmax_output_name, operator.outputs[0].full_name,
- container, np.array(class_labels),
- onnx_proto.TensorProto.DOUBLE)
+ scope,
+ argmax_output_name,
+ operator.outputs[0].full_name,
+ container,
+ np.array(class_labels),
+ onnx_proto.TensorProto.DOUBLE,
+ )
if use_raw_scores:
return
- if classifier_attrs['post_transform'] == 'LOGISTIC':
- apply_logistic(scope, raw_score_name,
- operator.outputs[1].full_name, container,
- proto_dtype=onnx_proto.TensorProto.DOUBLE)
+ if classifier_attrs["post_transform"] == "LOGISTIC":
+ apply_logistic(
+ scope,
+ raw_score_name,
+ operator.outputs[1].full_name,
+ container,
+ proto_dtype=onnx_proto.TensorProto.DOUBLE,
+ )
return
- elif classifier_attrs['post_transform'] == 'SOFTMAX':
- apply_softmax(scope, raw_score_name,
- operator.outputs[1].full_name, container)
+ elif classifier_attrs["post_transform"] == "SOFTMAX":
+ apply_softmax(
+ scope, raw_score_name, operator.outputs[1].full_name, container
+ )
return
raise NotImplementedError(
"post_transform '{}' is not supported with double.".format(
- classifier_attrs['post_transform']))
+ classifier_attrs["post_transform"]
+ )
+ )
label_name = operator.outputs[0].full_name
input_name = operator.inputs[0].full_name
if isinstance(operator.inputs[0].type, BooleanTensorType):
- cast_input_name = scope.get_unique_variable_name('cast_input')
+ cast_input_name = scope.get_unique_variable_name("cast_input")
- apply_cast(scope, input_name, cast_input_name,
- container, to=onnx_proto.TensorProto.FLOAT)
+ apply_cast(
+ scope,
+ input_name,
+ cast_input_name,
+ container,
+ to=onnx_proto.TensorProto.FLOAT,
+ )
input_name = cast_input_name
if use_raw_scores:
- container.add_node(classifier_type, input_name,
- [label_name, operator.outputs[1].full_name],
- op_domain='ai.onnx.ml', **classifier_attrs)
- elif (isinstance(op, (LinearSVC, RidgeClassifier, RidgeClassifierCV))
- and op.classes_.shape[0] <= 2):
- raw_scores_tensor_name = scope.get_unique_variable_name(
- 'raw_scores_tensor')
+ container.add_node(
+ classifier_type,
+ input_name,
+ [label_name, operator.outputs[1].full_name],
+ op_domain="ai.onnx.ml",
+ **classifier_attrs
+ )
+ elif (
+ isinstance(op, (LinearSVC, RidgeClassifier, RidgeClassifierCV))
+ and op.classes_.shape[0] <= 2
+ ):
+ raw_scores_tensor_name = scope.get_unique_variable_name("raw_scores_tensor")
positive_class_index_name = scope.get_unique_variable_name(
- 'positive_class_index')
+ "positive_class_index"
+ )
- container.add_initializer(positive_class_index_name,
- onnx_proto.TensorProto.INT64, [], [1])
+ container.add_initializer(
+ positive_class_index_name, onnx_proto.TensorProto.INT64, [], [1]
+ )
- if (hasattr(op, '_label_binarizer') and
- op._label_binarizer.y_type_ == 'multilabel-indicator'):
- y_pred_name = scope.get_unique_variable_name('y_pred')
- binarised_label_name = scope.get_unique_variable_name(
- 'binarised_label')
+ if (
+ hasattr(op, "_label_binarizer")
+ and op._label_binarizer.y_type_ == "multilabel-indicator"
+ ):
+ y_pred_name = scope.get_unique_variable_name("y_pred")
+ binarised_label_name = scope.get_unique_variable_name("binarised_label")
- container.add_node(classifier_type, input_name,
- [y_pred_name, raw_scores_tensor_name],
- op_domain='ai.onnx.ml', **classifier_attrs)
container.add_node(
- 'Binarizer', raw_scores_tensor_name, binarised_label_name,
- op_domain='ai.onnx.ml')
+ classifier_type,
+ input_name,
+ [y_pred_name, raw_scores_tensor_name],
+ op_domain="ai.onnx.ml",
+ **classifier_attrs
+ )
+ container.add_node(
+ "Binarizer",
+ raw_scores_tensor_name,
+ binarised_label_name,
+ op_domain="ai.onnx.ml",
+ )
apply_cast(
- scope, binarised_label_name, label_name,
- container, to=onnx_proto.TensorProto.INT64)
+ scope,
+ binarised_label_name,
+ label_name,
+ container,
+ to=onnx_proto.TensorProto.INT64,
+ )
else:
- container.add_node(classifier_type, input_name,
- [label_name, raw_scores_tensor_name],
- op_domain='ai.onnx.ml', **classifier_attrs)
+ container.add_node(
+ classifier_type,
+ input_name,
+ [label_name, raw_scores_tensor_name],
+ op_domain="ai.onnx.ml",
+ **classifier_attrs
+ )
container.add_node(
- 'ArrayFeatureExtractor',
+ "ArrayFeatureExtractor",
[raw_scores_tensor_name, positive_class_index_name],
- operator.outputs[1].full_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
+ operator.outputs[1].full_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
else:
# Make sure the probability sum is 1 over all classes
if multi_class > 0 and not isinstance(
- op, (LinearSVC, RidgeClassifier, RidgeClassifierCV)):
+ op, (LinearSVC, RidgeClassifier, RidgeClassifierCV)
+ ):
probability_tensor_name = scope.get_unique_variable_name(
- 'probability_tensor')
- container.add_node(classifier_type, input_name,
- [label_name, probability_tensor_name],
- op_domain='ai.onnx.ml', **classifier_attrs)
- use_float = type(operator.inputs[0].type) not in (
- DoubleTensorType, )
+ "probability_tensor"
+ )
+ container.add_node(
+ classifier_type,
+ input_name,
+ [label_name, probability_tensor_name],
+ op_domain="ai.onnx.ml",
+ **classifier_attrs
+ )
+ use_float = type(operator.inputs[0].type) not in (DoubleTensorType,)
apply_normalizer(
- scope, probability_tensor_name, operator.outputs[1].full_name,
- container, norm='L1', use_float=use_float)
- elif (hasattr(op, '_label_binarizer') and
- op._label_binarizer.y_type_ == 'multilabel-indicator'):
- y_pred_name = scope.get_unique_variable_name('y_pred')
- binarised_label_name = scope.get_unique_variable_name(
- 'binarised_label')
+ scope,
+ probability_tensor_name,
+ operator.outputs[1].full_name,
+ container,
+ norm="L1",
+ use_float=use_float,
+ )
+ elif (
+ hasattr(op, "_label_binarizer")
+ and op._label_binarizer.y_type_ == "multilabel-indicator"
+ ):
+ y_pred_name = scope.get_unique_variable_name("y_pred")
+ binarised_label_name = scope.get_unique_variable_name("binarised_label")
container.add_node(
- classifier_type, input_name,
+ classifier_type,
+ input_name,
[y_pred_name, operator.outputs[1].full_name],
- op_domain='ai.onnx.ml', **classifier_attrs)
+ op_domain="ai.onnx.ml",
+ **classifier_attrs
+ )
container.add_node(
- 'Binarizer', operator.outputs[1].full_name,
- binarised_label_name, op_domain='ai.onnx.ml')
+ "Binarizer",
+ operator.outputs[1].full_name,
+ binarised_label_name,
+ op_domain="ai.onnx.ml",
+ )
apply_cast(
- scope, binarised_label_name, label_name,
- container, to=onnx_proto.TensorProto.INT64)
+ scope,
+ binarised_label_name,
+ label_name,
+ container,
+ to=onnx_proto.TensorProto.INT64,
+ )
else:
- container.add_node(classifier_type, input_name,
- [label_name, operator.outputs[1].full_name],
- op_domain='ai.onnx.ml', **classifier_attrs)
-
-
-register_converter('SklearnLinearClassifier',
- convert_sklearn_linear_classifier,
- options={'zipmap': [True, False, 'columns'],
- 'nocl': [True, False],
- 'output_class_labels': [False, True],
- 'raw_scores': [True, False]})
-register_converter('SklearnLinearSVC', convert_sklearn_linear_classifier,
- options={'nocl': [True, False],
- 'output_class_labels': [False, True],
- 'raw_scores': [True, False]})
+ container.add_node(
+ classifier_type,
+ input_name,
+ [label_name, operator.outputs[1].full_name],
+ op_domain="ai.onnx.ml",
+ **classifier_attrs
+ )
+
+
+register_converter(
+ "SklearnLinearClassifier",
+ convert_sklearn_linear_classifier,
+ options={
+ "zipmap": [True, False, "columns"],
+ "nocl": [True, False],
+ "output_class_labels": [False, True],
+ "raw_scores": [True, False],
+ },
+)
+register_converter(
+ "SklearnLinearSVC",
+ convert_sklearn_linear_classifier,
+ options={
+ "nocl": [True, False],
+ "output_class_labels": [False, True],
+ "raw_scores": [True, False],
+ },
+)
diff --git a/skl2onnx/operator_converters/linear_regressor.py b/skl2onnx/operator_converters/linear_regressor.py
index 8f210c5de..06055bfc2 100644
--- a/skl2onnx/operator_converters/linear_regressor.py
+++ b/skl2onnx/operator_converters/linear_regressor.py
@@ -2,140 +2,187 @@
import numpy as np
from ..common._apply_operation import (
- apply_cast, apply_add, apply_sqrt, apply_div, apply_sub,
- apply_reshape)
+ apply_cast,
+ apply_add,
+ apply_sqrt,
+ apply_div,
+ apply_sub,
+ apply_reshape,
+)
from ..common.data_types import (
- BooleanTensorType, Int64TensorType, DoubleTensorType,
- guess_numpy_type, guess_proto_type)
+ BooleanTensorType,
+ Int64TensorType,
+ DoubleTensorType,
+ guess_numpy_type,
+ guess_proto_type,
+)
from ..common._registration import register_converter
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
from ..proto import onnx_proto
from ..algebra.onnx_ops import (
- OnnxAdd, OnnxCast, OnnxExp, OnnxIdentity, OnnxMatMul,
- OnnxReshape, OnnxSigmoid)
+ OnnxAdd,
+ OnnxCast,
+ OnnxExp,
+ OnnxIdentity,
+ OnnxMatMul,
+ OnnxReshape,
+ OnnxSigmoid,
+)
-def convert_sklearn_linear_regressor(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_linear_regressor(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
op = operator.raw_operator
- use_linear_op = container.is_allowed({'LinearRegressor'})
+ use_linear_op = container.is_allowed({"LinearRegressor"})
- if (not use_linear_op or
- type(operator.inputs[0].type) in (DoubleTensorType, )):
+ if not use_linear_op or type(operator.inputs[0].type) in (DoubleTensorType,):
proto_dtype = guess_proto_type(operator.inputs[0].type)
- coef = scope.get_unique_variable_name('coef')
+ coef = scope.get_unique_variable_name("coef")
if len(op.coef_.shape) == 1:
model_coef = op.coef_.reshape((-1, 1))
else:
model_coef = op.coef_.T
container.add_initializer(
- coef, proto_dtype, model_coef.shape, model_coef.ravel().tolist())
- intercept = scope.get_unique_variable_name('intercept')
- value_intercept = op.intercept_.reshape((-1, ))
+ coef, proto_dtype, model_coef.shape, model_coef.ravel().tolist()
+ )
+ intercept = scope.get_unique_variable_name("intercept")
+ value_intercept = op.intercept_.reshape((-1,))
container.add_initializer(
- intercept, proto_dtype, value_intercept.shape,
- value_intercept.ravel().tolist())
- multiplied = scope.get_unique_variable_name('multiplied')
+ intercept,
+ proto_dtype,
+ value_intercept.shape,
+ value_intercept.ravel().tolist(),
+ )
+ multiplied = scope.get_unique_variable_name("multiplied")
container.add_node(
- 'MatMul', [operator.inputs[0].full_name, coef], multiplied,
- name=scope.get_unique_operator_name('MatMul'))
- resh = scope.get_unique_variable_name('resh')
- apply_add(scope, [multiplied, intercept],
- resh, container)
+ "MatMul",
+ [operator.inputs[0].full_name, coef],
+ multiplied,
+ name=scope.get_unique_operator_name("MatMul"),
+ )
+ resh = scope.get_unique_variable_name("resh")
+ apply_add(scope, [multiplied, intercept], resh, container)
last_dim = 1 if len(model_coef.shape) == 1 else model_coef.shape[-1]
- apply_reshape(scope, resh, operator.outputs[0].full_name,
- container, desired_shape=(-1, last_dim))
+ apply_reshape(
+ scope,
+ resh,
+ operator.outputs[0].full_name,
+ container,
+ desired_shape=(-1, last_dim),
+ )
return
- op_type = 'LinearRegressor'
+ op_type = "LinearRegressor"
dtype = guess_numpy_type(operator.inputs[0].type)
if dtype not in (np.float32, np.float64):
dtype = np.float32
- attrs = {'name': scope.get_unique_operator_name(op_type)}
- attrs['coefficients'] = op.coef_.astype(dtype).ravel()
- attrs['intercepts'] = np.array([op.intercept_], dtype=dtype).ravel()
+ attrs = {"name": scope.get_unique_operator_name(op_type)}
+ attrs["coefficients"] = op.coef_.astype(dtype).ravel()
+ attrs["intercepts"] = np.array([op.intercept_], dtype=dtype).ravel()
if len(op.coef_.shape) == 2:
- attrs['targets'] = op.coef_.shape[0]
+ attrs["targets"] = op.coef_.shape[0]
input_name = operator.input_full_names
if type(operator.inputs[0].type) in (BooleanTensorType, Int64TensorType):
- cast_input_name = scope.get_unique_variable_name('cast_input')
+ cast_input_name = scope.get_unique_variable_name("cast_input")
- apply_cast(scope, operator.input_full_names, cast_input_name,
- container,
- to=(onnx_proto.TensorProto.DOUBLE
- if dtype == np.float64
- else onnx_proto.TensorProto.FLOAT))
+ apply_cast(
+ scope,
+ operator.input_full_names,
+ cast_input_name,
+ container,
+ to=(
+ onnx_proto.TensorProto.DOUBLE
+ if dtype == np.float64
+ else onnx_proto.TensorProto.FLOAT
+ ),
+ )
input_name = cast_input_name
- container.add_node(op_type, input_name,
- operator.outputs[0].full_name, op_domain='ai.onnx.ml',
- **attrs)
+ container.add_node(
+ op_type,
+ input_name,
+ operator.outputs[0].full_name,
+ op_domain="ai.onnx.ml",
+ **attrs,
+ )
-def convert_sklearn_bayesian_ridge(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_bayesian_ridge(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
convert_sklearn_linear_regressor(scope, operator, container)
op = operator.raw_operator
options = container.get_options(op, dict(return_std=False))
- return_std = options['return_std']
+ return_std = options["return_std"]
if not return_std:
return
proto_dtype = guess_proto_type(operator.inputs[0].type)
- if hasattr(op, 'normalize') and op.normalize:
+ if hasattr(op, "normalize") and op.normalize:
# if self.normalize:
# X = (X - self.X_offset_) / self.X_scale_
- offset = scope.get_unique_variable_name('offset')
+ offset = scope.get_unique_variable_name("offset")
container.add_initializer(
- offset, proto_dtype, op.X_offset_.shape,
- op.X_offset_.ravel().tolist())
- scale = scope.get_unique_variable_name('scale')
+ offset, proto_dtype, op.X_offset_.shape, op.X_offset_.ravel().tolist()
+ )
+ scale = scope.get_unique_variable_name("scale")
container.add_initializer(
- scale, proto_dtype, op.X_scale_.shape,
- op.X_scale_.ravel().tolist())
- centered = scope.get_unique_variable_name('centered')
- apply_sub(scope, [operator.inputs[0].full_name, offset],
- centered, container)
- scaled = scope.get_unique_variable_name('scaled')
+ scale, proto_dtype, op.X_scale_.shape, op.X_scale_.ravel().tolist()
+ )
+ centered = scope.get_unique_variable_name("centered")
+ apply_sub(scope, [operator.inputs[0].full_name, offset], centered, container)
+ scaled = scope.get_unique_variable_name("scaled")
apply_div(scope, [centered, scale], scaled, container)
input_name = scaled
else:
input_name = operator.inputs[0].full_name
# sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1)
- sigma = scope.get_unique_variable_name('sigma')
+ sigma = scope.get_unique_variable_name("sigma")
container.add_initializer(
- sigma, proto_dtype, op.sigma_.shape, op.sigma_.ravel().tolist())
- sigmaed0 = scope.get_unique_variable_name('sigma0')
+ sigma, proto_dtype, op.sigma_.shape, op.sigma_.ravel().tolist()
+ )
+ sigmaed0 = scope.get_unique_variable_name("sigma0")
container.add_node(
- 'MatMul', [input_name, sigma], sigmaed0,
- name=scope.get_unique_operator_name('MatMul'))
- sigmaed = scope.get_unique_variable_name('sigma')
+ "MatMul",
+ [input_name, sigma],
+ sigmaed0,
+ name=scope.get_unique_operator_name("MatMul"),
+ )
+ sigmaed = scope.get_unique_variable_name("sigma")
if container.target_opset < 13:
container.add_node(
- 'ReduceSum', sigmaed0, sigmaed, axes=[1],
- name=scope.get_unique_operator_name('ReduceSum'))
+ "ReduceSum",
+ sigmaed0,
+ sigmaed,
+ axes=[1],
+ name=scope.get_unique_operator_name("ReduceSum"),
+ )
else:
- axis_name = scope.get_unique_variable_name('axis')
- container.add_initializer(
- axis_name, onnx_proto.TensorProto.INT64, [1], [1])
+ axis_name = scope.get_unique_variable_name("axis")
+ container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [1])
container.add_node(
- 'ReduceSum', [sigmaed0, axis_name], sigmaed,
- name=scope.get_unique_operator_name('ReduceSum'))
+ "ReduceSum",
+ [sigmaed0, axis_name],
+ sigmaed,
+ name=scope.get_unique_operator_name("ReduceSum"),
+ )
# y_std = np.sqrt(sigmas_squared_data + (1. / self.alpha_))
# return y_mean, y_std
- std0 = scope.get_unique_variable_name('std0')
- alphainv = scope.get_unique_variable_name('alphainv')
+ std0 = scope.get_unique_variable_name("std0")
+ alphainv = scope.get_unique_variable_name("alphainv")
container.add_initializer(alphainv, proto_dtype, [1], [1 / op.alpha_])
apply_add(scope, [sigmaed, alphainv], std0, container)
apply_sqrt(scope, std0, operator.outputs[1].full_name, container)
-def convert_sklearn_poisson_regressor(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_poisson_regressor(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
X = operator.inputs[0]
out = operator.outputs
op = operator.raw_operator
@@ -149,16 +196,21 @@ def convert_sklearn_poisson_regressor(scope: Scope, operator: Operator,
else:
input_var = X
- intercept = (op.intercept_.astype(dtype) if len(op.intercept_.shape) > 0
- else np.array([op.intercept_], dtype=dtype))
+ intercept = (
+ op.intercept_.astype(dtype)
+ if len(op.intercept_.shape) > 0
+ else np.array([op.intercept_], dtype=dtype)
+ )
eta = OnnxAdd(
OnnxMatMul(input_var, op.coef_.astype(dtype), op_version=opv),
- intercept, op_version=opv)
+ intercept,
+ op_version=opv,
+ )
if hasattr(op, "_link_instance"):
# scikit-learn < 1.1
- from sklearn.linear_model._glm.link import (
- IdentityLink, LogLink, LogitLink)
+ from sklearn.linear_model._glm.link import IdentityLink, LogLink, LogitLink
+
if isinstance(op._link_instance, IdentityLink):
Y = OnnxIdentity(eta, op_version=opv)
elif isinstance(op._link_instance, LogLink):
@@ -168,8 +220,8 @@ def convert_sklearn_poisson_regressor(scope: Scope, operator: Operator,
else:
raise RuntimeError(
"Unexpected type %r for _link_instance "
- "in operator type %r." % (
- type(op._link_instance), type(op)))
+ "in operator type %r." % (type(op._link_instance), type(op))
+ )
else:
# scikit-learn >= 1.1
from sklearn._loss.loss import (
@@ -180,35 +232,40 @@ def convert_sklearn_poisson_regressor(scope: Scope, operator: Operator,
HalfSquaredError,
HalfTweedieLoss,
HalfTweedieLossIdentity,
- PinballLoss
+ PinballLoss,
)
+
loss = op._get_loss()
if isinstance(
loss,
- (AbsoluteError, HalfSquaredError,
- HalfTweedieLossIdentity, PinballLoss)):
+ (AbsoluteError, HalfSquaredError, HalfTweedieLossIdentity, PinballLoss),
+ ):
Y = OnnxIdentity(eta, op_version=opv)
- elif isinstance(loss, (HalfPoissonLoss, HalfGammaLoss,
- HalfTweedieLoss)):
+ elif isinstance(loss, (HalfPoissonLoss, HalfGammaLoss, HalfTweedieLoss)):
Y = OnnxExp(eta, op_version=opv)
elif isinstance(loss, HalfBinomialLoss):
Y = OnnxSigmoid(eta, op_version=opv)
else:
raise RuntimeError(
- f"Unexpected type of link for {loss!r} loss "
- "in operator type {op!r}.")
+ f"Unexpected type of link for {loss!r} loss " "in operator type {op!r}."
+ )
last_dim = 1 if len(op.coef_.shape) == 1 else op.coef_.shape[-1]
- final = OnnxReshape(Y, np.array([-1, last_dim], dtype=np.int64),
- op_version=opv, output_names=out[:1])
+ final = OnnxReshape(
+ Y,
+ np.array([-1, last_dim], dtype=np.int64),
+ op_version=opv,
+ output_names=out[:1],
+ )
final.add_to(scope, container)
-register_converter('SklearnLinearRegressor', convert_sklearn_linear_regressor)
-register_converter('SklearnLinearSVR', convert_sklearn_linear_regressor)
-register_converter('SklearnBayesianRidge', convert_sklearn_bayesian_ridge,
- options={'return_std': [True, False]})
-register_converter('SklearnPoissonRegressor',
- convert_sklearn_poisson_regressor)
-register_converter('SklearnTweedieRegressor',
- convert_sklearn_poisson_regressor)
+register_converter("SklearnLinearRegressor", convert_sklearn_linear_regressor)
+register_converter("SklearnLinearSVR", convert_sklearn_linear_regressor)
+register_converter(
+ "SklearnBayesianRidge",
+ convert_sklearn_bayesian_ridge,
+ options={"return_std": [True, False]},
+)
+register_converter("SklearnPoissonRegressor", convert_sklearn_poisson_regressor)
+register_converter("SklearnTweedieRegressor", convert_sklearn_poisson_regressor)
diff --git a/skl2onnx/operator_converters/local_outlier_factor.py b/skl2onnx/operator_converters/local_outlier_factor.py
index bc83cc1f3..2dcd0398a 100644
--- a/skl2onnx/operator_converters/local_outlier_factor.py
+++ b/skl2onnx/operator_converters/local_outlier_factor.py
@@ -5,26 +5,41 @@
from onnx import TensorProto
from ..common._registration import register_converter
from ..common.data_types import (
- BooleanTensorType, Int64TensorType,
- guess_numpy_type, guess_proto_type)
+ BooleanTensorType,
+ Int64TensorType,
+ guess_numpy_type,
+ guess_proto_type,
+)
from ..algebra.onnx_ops import (
- OnnxCast, OnnxLess, OnnxMul, OnnxAdd, OnnxDiv,
- OnnxGather, OnnxReduceMeanApi18, OnnxMax, OnnxSqueezeApi11)
+ OnnxCast,
+ OnnxLess,
+ OnnxMul,
+ OnnxAdd,
+ OnnxDiv,
+ OnnxGather,
+ OnnxReduceMeanApi18,
+ OnnxMax,
+ OnnxSqueezeApi11,
+)
from .nearest_neighbours import onnx_nearest_neighbors_indices_k
def convert_sklearn_local_outlier_factor(
- scope, operator, container, op_type='TreeEnsembleRegressor',
- op_domain='ai.onnx.ml', op_version=1):
+ scope,
+ operator,
+ container,
+ op_type="TreeEnsembleRegressor",
+ op_domain="ai.onnx.ml",
+ op_version=1,
+):
op = operator.raw_operator
if not op.novelty:
raise RuntimeError(
- "The converter only converts the model %r is novelty is True."
- "" % type(op))
+ "The converter only converts the model %r is novelty is True." "" % type(op)
+ )
outputs = operator.outputs
opv = container.target_opset
- options = container.get_options(
- op, dict(score_samples=None, optim=None))
+ options = container.get_options(op, dict(score_samples=None, optim=None))
X = operator.inputs[0]
dtype = guess_numpy_type(operator.inputs[0].type)
@@ -32,68 +47,88 @@ def convert_sklearn_local_outlier_factor(
if type(operator.inputs[0].type) in (BooleanTensorType, Int64TensorType):
X = OnnxCast(X, to=proto_dtype, op_version=opv)
- metric = (op.effective_metric_ if hasattr(op, 'effective_metric_') else
- op.metric)
+ metric = op.effective_metric_ if hasattr(op, "effective_metric_") else op.metric
neighb = op._fit_X.astype(dtype)
k = op.n_neighbors_
kwargs = {}
if op.p != 2:
- if options['optim'] == 'cdist':
+ if options["optim"] == "cdist":
warnings.warn(
"Option p=%r may not be compatible with the runtime. "
"See https://github.com/microsoft/onnxruntime/blob/master/"
- "docs/ContribOperators.md#com.microsoft.CDist.")
- kwargs['p'] = op.p
+ "docs/ContribOperators.md#com.microsoft.CDist."
+ )
+ kwargs["p"] = op.p
top_k, dist = onnx_nearest_neighbors_indices_k(
- X, neighb, k, metric, dtype=dtype,
- op_version=opv, keep_distances=True,
- optim=options.get('optim', None),
- **kwargs)
+ X,
+ neighb,
+ k,
+ metric,
+ dtype=dtype,
+ op_version=opv,
+ keep_distances=True,
+ optim=options.get("optim", None),
+ **kwargs
+ )
# dist_k = self._distances_fit_X_[neighbors_indices, self.n_neighbors_ - 1]
# reach_dist_array = np.maximum(distances_X, dist_k)
- dist_k_ = OnnxGather(op._distances_fit_X_.astype(dtype),
- top_k, op_version=opv)
+ dist_k_ = OnnxGather(op._distances_fit_X_.astype(dtype), top_k, op_version=opv)
dist_k = OnnxSqueezeApi11(
- OnnxGather(dist_k_, np.array([op.n_neighbors_ - 1],
- dtype=np.int64),
- axis=2, op_version=opv),
- axes=[2], op_version=opv)
- dist_k.set_onnx_name_prefix('dist_k')
+ OnnxGather(
+ dist_k_,
+ np.array([op.n_neighbors_ - 1], dtype=np.int64),
+ axis=2,
+ op_version=opv,
+ ),
+ axes=[2],
+ op_version=opv,
+ )
+ dist_k.set_onnx_name_prefix("dist_k")
reach_dist_array = OnnxMax(
OnnxMul(dist, np.array([-1], dtype=dtype), op_version=opv),
- dist_k, op_version=opv)
+ dist_k,
+ op_version=opv,
+ )
# X_lrd= return 1.0 / (np.mean(reach_dist_array, axis=1) + 1e-10)
X_lrd = OnnxDiv(
np.array([1], dtype=dtype),
OnnxAdd(
- OnnxReduceMeanApi18(reach_dist_array, axes=[1],
- op_version=opv, keepdims=1),
- np.array([1e-10], dtype=dtype), op_version=opv),
- op_version=opv)
- X_lrd.set_onnx_name_prefix('X_lrd')
+ OnnxReduceMeanApi18(reach_dist_array, axes=[1], op_version=opv, keepdims=1),
+ np.array([1e-10], dtype=dtype),
+ op_version=opv,
+ ),
+ op_version=opv,
+ )
+ X_lrd.set_onnx_name_prefix("X_lrd")
# lrd_ratios_array = self._lrd[neighbors_indices_X] / X_lrd[:, np.newaxis]
lrd_ratios_array = OnnxDiv(
- OnnxGather(op._lrd.astype(dtype), top_k, op_version=opv),
- X_lrd, op_version=opv)
- lrd_ratios_array.set_onnx_name_prefix('lrd_ratios_array')
+ OnnxGather(op._lrd.astype(dtype), top_k, op_version=opv), X_lrd, op_version=opv
+ )
+ lrd_ratios_array.set_onnx_name_prefix("lrd_ratios_array")
# -np.mean(lrd_ratios_array, axis=1)
- if options['score_samples']:
+ if options["score_samples"]:
output_names_score_samples = [outputs[2]]
else:
output_names_score_samples = None
- score_samples = OnnxReduceMeanApi18(
- lrd_ratios_array, axes=[1], op_version=opv)
- score_samples.set_onnx_name_prefix('score_samples')
+ score_samples = OnnxReduceMeanApi18(lrd_ratios_array, axes=[1], op_version=opv)
+ score_samples.set_onnx_name_prefix("score_samples")
score_samples_neg = OnnxMul(
- score_samples, np.array([-1], dtype=dtype), op_version=opv,
- output_names=output_names_score_samples)
- final = OnnxAdd(score_samples_neg, np.array([-op.offset_], dtype=dtype),
- op_version=opv, output_names=[outputs[1]])
+ score_samples,
+ np.array([-1], dtype=dtype),
+ op_version=opv,
+ output_names=output_names_score_samples,
+ )
+ final = OnnxAdd(
+ score_samples_neg,
+ np.array([-op.offset_], dtype=dtype),
+ op_version=opv,
+ output_names=[outputs[1]],
+ )
# labels
# is_inlier = np.ones(X.shape[0], dtype=int)
@@ -103,19 +138,26 @@ def convert_sklearn_local_outlier_factor(
OnnxMul(
OnnxCast(
OnnxLess(final, np.array([0], dtype=dtype), op_version=opv),
- to=TensorProto.INT64, op_version=opv),
- np.array([-2], dtype=np.int64), op_version=opv),
- np.array([1], dtype=np.int64), op_version=opv,
- output_names=outputs[0].full_name)
- predict.set_onnx_name_prefix('predict')
+ to=TensorProto.INT64,
+ op_version=opv,
+ ),
+ np.array([-2], dtype=np.int64),
+ op_version=opv,
+ ),
+ np.array([1], dtype=np.int64),
+ op_version=opv,
+ output_names=outputs[0].full_name,
+ )
+ predict.set_onnx_name_prefix("predict")
predict.add_to(scope, container)
final.add_to(scope, container)
- if options['score_samples']:
+ if options["score_samples"]:
score_samples_neg.add_to(scope, container)
-register_converter('SklearnLocalOutlierFactor',
- convert_sklearn_local_outlier_factor,
- options={'score_samples': [True, False],
- 'optim': [None, 'cdist']})
+register_converter(
+ "SklearnLocalOutlierFactor",
+ convert_sklearn_local_outlier_factor,
+ options={"score_samples": [True, False], "optim": [None, "cdist"]},
+)
diff --git a/skl2onnx/operator_converters/multilayer_perceptron.py b/skl2onnx/operator_converters/multilayer_perceptron.py
index 8decac8d0..dc209b660 100644
--- a/skl2onnx/operator_converters/multilayer_perceptron.py
+++ b/skl2onnx/operator_converters/multilayer_perceptron.py
@@ -4,8 +4,13 @@
import numpy as np
from ..common.data_types import guess_proto_type
from ..common._apply_operation import (
- apply_add, apply_cast, apply_concat, apply_identity,
- apply_reshape, apply_sub)
+ apply_add,
+ apply_cast,
+ apply_concat,
+ apply_identity,
+ apply_reshape,
+ apply_sub,
+)
from ..common._registration import register_converter
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
@@ -18,50 +23,70 @@ def _forward_pass(scope, container, model, activations, proto_dtype):
the neurons in the hidden layers and the output layer.
"""
activations_map = {
- 'identity': 'Identity', 'tanh': 'Tanh', 'logistic': 'Sigmoid',
- 'relu': 'Relu', 'softmax': 'Softmax'
+ "identity": "Identity",
+ "tanh": "Tanh",
+ "logistic": "Sigmoid",
+ "relu": "Relu",
+ "softmax": "Softmax",
}
out_activation_result_name = scope.get_unique_variable_name(
- 'out_activations_result')
+ "out_activations_result"
+ )
# Iterate over the hidden layers
for i in range(model.n_layers_ - 1):
- coefficient_name = scope.get_unique_variable_name('coefficient')
- intercepts_name = scope.get_unique_variable_name('intercepts')
- mul_result_name = scope.get_unique_variable_name('mul_result')
- add_result_name = scope.get_unique_variable_name('add_result')
+ coefficient_name = scope.get_unique_variable_name("coefficient")
+ intercepts_name = scope.get_unique_variable_name("intercepts")
+ mul_result_name = scope.get_unique_variable_name("mul_result")
+ add_result_name = scope.get_unique_variable_name("add_result")
container.add_initializer(
- coefficient_name, proto_dtype,
- model.coefs_[i].shape, model.coefs_[i].ravel())
+ coefficient_name,
+ proto_dtype,
+ model.coefs_[i].shape,
+ model.coefs_[i].ravel(),
+ )
container.add_initializer(
- intercepts_name, proto_dtype,
- [1, len(model.intercepts_[i])], model.intercepts_[i])
+ intercepts_name,
+ proto_dtype,
+ [1, len(model.intercepts_[i])],
+ model.intercepts_[i],
+ )
container.add_node(
- 'MatMul', [activations[i], coefficient_name],
- mul_result_name, name=scope.get_unique_operator_name('MatMul'))
- apply_add(scope, [mul_result_name, intercepts_name],
- add_result_name, container, broadcast=1)
+ "MatMul",
+ [activations[i], coefficient_name],
+ mul_result_name,
+ name=scope.get_unique_operator_name("MatMul"),
+ )
+ apply_add(
+ scope,
+ [mul_result_name, intercepts_name],
+ add_result_name,
+ container,
+ broadcast=1,
+ )
# For the hidden layers
if (i + 1) != (model.n_layers_ - 1):
- activations_result_name = scope.get_unique_variable_name(
- 'next_activations')
+ activations_result_name = scope.get_unique_variable_name("next_activations")
container.add_node(
- activations_map[model.activation], add_result_name,
+ activations_map[model.activation],
+ add_result_name,
activations_result_name,
- name=scope.get_unique_operator_name(
- activations_map[model.activation]))
+ name=scope.get_unique_operator_name(activations_map[model.activation]),
+ )
activations.append(activations_result_name)
# For the last layer
container.add_node(
- activations_map[model.out_activation_], add_result_name,
+ activations_map[model.out_activation_],
+ add_result_name,
out_activation_result_name,
- name=scope.get_unique_operator_name(activations_map[model.activation]))
+ name=scope.get_unique_operator_name(activations_map[model.activation]),
+ )
activations.append(out_activation_result_name)
return activations
@@ -72,19 +97,18 @@ def _predict(scope, input_name, container, model, proto_dtype):
This function initialises the input layer, calls _forward_pass()
and returns the final layer.
"""
- cast_input_name = scope.get_unique_variable_name('cast_input')
+ cast_input_name = scope.get_unique_variable_name("cast_input")
- apply_cast(scope, input_name, cast_input_name,
- container, to=proto_dtype)
+ apply_cast(scope, input_name, cast_input_name, container, to=proto_dtype)
# forward propagate
- activations = _forward_pass(scope, container, model, [cast_input_name],
- proto_dtype)
+ activations = _forward_pass(scope, container, model, [cast_input_name], proto_dtype)
return activations[-1]
-def convert_sklearn_mlp_classifier(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_mlp_classifier(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converter for MLPClassifier.
This function calls _predict() which returns the probability scores
@@ -97,79 +121,119 @@ def convert_sklearn_mlp_classifier(scope: Scope, operator: Operator,
classes = mlp_op.classes_
class_type = onnx_proto.TensorProto.STRING
- argmax_output_name = scope.get_unique_variable_name('argmax_output')
+ argmax_output_name = scope.get_unique_variable_name("argmax_output")
array_feature_extractor_result_name = scope.get_unique_variable_name(
- 'array_feature_extractor_result')
+ "array_feature_extractor_result"
+ )
proto_dtype = guess_proto_type(operator.inputs[0].type)
if proto_dtype != onnx_proto.TensorProto.DOUBLE:
proto_dtype = onnx_proto.TensorProto.FLOAT
- y_pred = _predict(scope, operator.inputs[0].full_name, container, mlp_op,
- proto_dtype)
+ y_pred = _predict(
+ scope, operator.inputs[0].full_name, container, mlp_op, proto_dtype
+ )
- if (np.issubdtype(mlp_op.classes_.dtype, np.floating) or
- mlp_op.classes_.dtype == np.bool_):
+ if (
+ np.issubdtype(mlp_op.classes_.dtype, np.floating)
+ or mlp_op.classes_.dtype == np.bool_
+ ):
class_type = onnx_proto.TensorProto.INT32
classes = classes.astype(np.int32)
elif np.issubdtype(mlp_op.classes_.dtype, np.integer):
class_type = onnx_proto.TensorProto.INT32
else:
- classes = np.array([s.encode('utf-8') for s in classes])
+ classes = np.array([s.encode("utf-8") for s in classes])
if len(classes) == 2:
- unity_name = scope.get_unique_variable_name('unity')
+ unity_name = scope.get_unique_variable_name("unity")
negative_class_proba_name = scope.get_unique_variable_name(
- 'negative_class_proba')
+ "negative_class_proba"
+ )
container.add_initializer(unity_name, proto_dtype, [], [1])
- apply_sub(scope, [unity_name, y_pred],
- negative_class_proba_name, container, broadcast=1)
- apply_concat(scope, [negative_class_proba_name, y_pred],
- operator.outputs[1].full_name, container, axis=1)
+ apply_sub(
+ scope,
+ [unity_name, y_pred],
+ negative_class_proba_name,
+ container,
+ broadcast=1,
+ )
+ apply_concat(
+ scope,
+ [negative_class_proba_name, y_pred],
+ operator.outputs[1].full_name,
+ container,
+ axis=1,
+ )
else:
- apply_identity(scope, y_pred,
- operator.outputs[1].full_name, container)
+ apply_identity(scope, y_pred, operator.outputs[1].full_name, container)
- if mlp_op._label_binarizer.y_type_ == 'multilabel-indicator':
- binariser_output_name = scope.get_unique_variable_name(
- 'binariser_output')
+ if mlp_op._label_binarizer.y_type_ == "multilabel-indicator":
+ binariser_output_name = scope.get_unique_variable_name("binariser_output")
- container.add_node('Binarizer', y_pred, binariser_output_name,
- threshold=0.5, op_domain='ai.onnx.ml')
+ container.add_node(
+ "Binarizer",
+ y_pred,
+ binariser_output_name,
+ threshold=0.5,
+ op_domain="ai.onnx.ml",
+ )
apply_cast(
- scope, binariser_output_name, operator.outputs[0].full_name,
- container, to=onnx_proto.TensorProto.INT64)
+ scope,
+ binariser_output_name,
+ operator.outputs[0].full_name,
+ container,
+ to=onnx_proto.TensorProto.INT64,
+ )
else:
- classes_name = scope.get_unique_variable_name('classes')
- container.add_initializer(classes_name, class_type,
- classes.shape, classes)
+ classes_name = scope.get_unique_variable_name("classes")
+ container.add_initializer(classes_name, class_type, classes.shape, classes)
- container.add_node('ArgMax', operator.outputs[1].full_name,
- argmax_output_name, axis=1,
- name=scope.get_unique_operator_name('ArgMax'))
container.add_node(
- 'ArrayFeatureExtractor', [classes_name, argmax_output_name],
- array_feature_extractor_result_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
+ "ArgMax",
+ operator.outputs[1].full_name,
+ argmax_output_name,
+ axis=1,
+ name=scope.get_unique_operator_name("ArgMax"),
+ )
+ container.add_node(
+ "ArrayFeatureExtractor",
+ [classes_name, argmax_output_name],
+ array_feature_extractor_result_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
if class_type == onnx_proto.TensorProto.INT32:
- reshaped_result_name = scope.get_unique_variable_name(
- 'reshaped_result')
-
- apply_reshape(scope, array_feature_extractor_result_name,
- reshaped_result_name, container,
- desired_shape=(-1,))
+ reshaped_result_name = scope.get_unique_variable_name("reshaped_result")
+
+ apply_reshape(
+ scope,
+ array_feature_extractor_result_name,
+ reshaped_result_name,
+ container,
+ desired_shape=(-1,),
+ )
apply_cast(
- scope, reshaped_result_name, operator.outputs[0].full_name,
- container, to=onnx_proto.TensorProto.INT64)
+ scope,
+ reshaped_result_name,
+ operator.outputs[0].full_name,
+ container,
+ to=onnx_proto.TensorProto.INT64,
+ )
else:
- apply_reshape(scope, array_feature_extractor_result_name,
- operator.outputs[0].full_name, container,
- desired_shape=(-1,))
-
-
-def convert_sklearn_mlp_regressor(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+ apply_reshape(
+ scope,
+ array_feature_extractor_result_name,
+ operator.outputs[0].full_name,
+ container,
+ desired_shape=(-1,),
+ )
+
+
+def convert_sklearn_mlp_regressor(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converter for MLPRegressor.
This function calls _predict() which returns the scores.
@@ -180,16 +244,21 @@ def convert_sklearn_mlp_regressor(scope: Scope, operator: Operator,
if proto_dtype != onnx_proto.TensorProto.DOUBLE:
proto_dtype = onnx_proto.TensorProto.FLOAT
- y_pred = _predict(scope, operator.inputs[0].full_name, container, mlp_op,
- proto_dtype=proto_dtype)
- apply_reshape(scope, y_pred, operator.output_full_names,
- container, desired_shape=(-1, 1))
-
-
-register_converter('SklearnMLPClassifier',
- convert_sklearn_mlp_classifier,
- options={'zipmap': [True, False, 'columns'],
- 'output_class_labels': [False, True],
- 'nocl': [True, False]})
-register_converter('SklearnMLPRegressor',
- convert_sklearn_mlp_regressor)
+ y_pred = _predict(
+ scope, operator.inputs[0].full_name, container, mlp_op, proto_dtype=proto_dtype
+ )
+ apply_reshape(
+ scope, y_pred, operator.output_full_names, container, desired_shape=(-1, 1)
+ )
+
+
+register_converter(
+ "SklearnMLPClassifier",
+ convert_sklearn_mlp_classifier,
+ options={
+ "zipmap": [True, False, "columns"],
+ "output_class_labels": [False, True],
+ "nocl": [True, False],
+ },
+)
+register_converter("SklearnMLPRegressor", convert_sklearn_mlp_regressor)
diff --git a/skl2onnx/operator_converters/multioutput.py b/skl2onnx/operator_converters/multioutput.py
index f28e67ed0..41c5b7124 100644
--- a/skl2onnx/operator_converters/multioutput.py
+++ b/skl2onnx/operator_converters/multioutput.py
@@ -4,8 +4,8 @@
from ..common._registration import register_converter
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
-from ..algebra.onnx_ops import (
- OnnxConcat, OnnxReshapeApi13, OnnxIdentity)
+from ..algebra.onnx_ops import OnnxConcat, OnnxReshapeApi13, OnnxIdentity
+
try:
from ..algebra.onnx_ops import OnnxSequenceConstruct
except ImportError:
@@ -15,7 +15,8 @@
def convert_multi_output_regressor_converter(
- scope: Scope, operator: Operator, container: ModelComponentContainer):
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converts a *MultiOutputRegressor* into *ONNX* format.
"""
@@ -26,59 +27,69 @@ def convert_multi_output_regressor_converter(
OnnxReshapeApi13(
OnnxSubEstimator(sub, inp, op_version=op_version),
np.array([-1, 1], dtype=np.int64),
- op_version=op_version)
- for sub in op.estimators_]
+ op_version=op_version,
+ )
+ for sub in op.estimators_
+ ]
- output = OnnxConcat(*y_list, axis=1, op_version=op_version,
- output_names=[operator.outputs[0]])
+ output = OnnxConcat(
+ *y_list, axis=1, op_version=op_version, output_names=[operator.outputs[0]]
+ )
output.add_to(scope=scope, container=container)
def convert_multi_output_classifier_converter(
- scope: Scope, operator: Operator, container: ModelComponentContainer):
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converts a *MultiOutputClassifier* into *ONNX* format.
"""
if OnnxSequenceConstruct is None:
- raise RuntimeError(
- "This converter requires opset>=11.")
+ raise RuntimeError("This converter requires opset>=11.")
op_version = container.target_opset
op_version = container.target_opset
op = operator.raw_operator
inp = operator.inputs[0]
options = scope.get_options(op)
- if options.get('nocl', True):
+ if options.get("nocl", True):
options = options.copy()
else:
options = {}
- options.update({'zipmap': False})
- y_list = [OnnxSubEstimator(sub, inp, op_version=op_version,
- options=options)
- for sub in op.estimators_]
+ options.update({"zipmap": False})
+ y_list = [
+ OnnxSubEstimator(sub, inp, op_version=op_version, options=options)
+ for sub in op.estimators_
+ ]
# labels
- label_list = [OnnxReshapeApi13(y[0], np.array([-1, 1], dtype=np.int64),
- op_version=op_version)
- for y in y_list]
+ label_list = [
+ OnnxReshapeApi13(y[0], np.array([-1, 1], dtype=np.int64), op_version=op_version)
+ for y in y_list
+ ]
# probabilities
- proba_list = [OnnxIdentity(y[1], op_version=op_version)
- for y in y_list]
- label = OnnxConcat(*label_list, axis=1, op_version=op_version,
- output_names=[operator.outputs[0]])
+ proba_list = [OnnxIdentity(y[1], op_version=op_version) for y in y_list]
+ label = OnnxConcat(
+ *label_list, axis=1, op_version=op_version, output_names=[operator.outputs[0]]
+ )
label.add_to(scope=scope, container=container)
proba = OnnxSequenceConstruct(
- *proba_list, op_version=op_version,
- output_names=[operator.outputs[1]])
+ *proba_list, op_version=op_version, output_names=[operator.outputs[1]]
+ )
proba.add_to(scope=scope, container=container)
-register_converter('SklearnMultiOutputRegressor',
- convert_multi_output_regressor_converter)
-register_converter('SklearnMultiOutputClassifier',
- convert_multi_output_classifier_converter,
- options={'nocl': [False, True],
- 'output_class_labels': [False, True],
- 'zipmap': [False, True]})
+register_converter(
+ "SklearnMultiOutputRegressor", convert_multi_output_regressor_converter
+)
+register_converter(
+ "SklearnMultiOutputClassifier",
+ convert_multi_output_classifier_converter,
+ options={
+ "nocl": [False, True],
+ "output_class_labels": [False, True],
+ "zipmap": [False, True],
+ },
+)
diff --git a/skl2onnx/operator_converters/multiply_op.py b/skl2onnx/operator_converters/multiply_op.py
index 733e7c8d1..725e07f9c 100644
--- a/skl2onnx/operator_converters/multiply_op.py
+++ b/skl2onnx/operator_converters/multiply_op.py
@@ -8,16 +8,21 @@
from ..proto import onnx_proto
-def convert_sklearn_multiply(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
- operand_name = scope.get_unique_variable_name(
- 'operand')
+def convert_sklearn_multiply(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
+ operand_name = scope.get_unique_variable_name("operand")
- container.add_initializer(operand_name, onnx_proto.TensorProto.FLOAT,
- [], [operator.operand])
+ container.add_initializer(
+ operand_name, onnx_proto.TensorProto.FLOAT, [], [operator.operand]
+ )
- apply_mul(scope, [operator.inputs[0].full_name, operand_name],
- operator.outputs[0].full_name, container)
+ apply_mul(
+ scope,
+ [operator.inputs[0].full_name, operand_name],
+ operator.outputs[0].full_name,
+ container,
+ )
-register_converter('SklearnMultiply', convert_sklearn_multiply)
+register_converter("SklearnMultiply", convert_sklearn_multiply)
diff --git a/skl2onnx/operator_converters/naive_bayes.py b/skl2onnx/operator_converters/naive_bayes.py
index 08b7716dc..e1a63d3ae 100644
--- a/skl2onnx/operator_converters/naive_bayes.py
+++ b/skl2onnx/operator_converters/naive_bayes.py
@@ -4,13 +4,23 @@
import numpy as np
from ..proto import onnx_proto
from ..common._apply_operation import (
- apply_add, apply_cast, apply_div, apply_exp,
- apply_log, apply_mul, apply_pow, apply_sub, apply_reshape,
+ apply_add,
+ apply_cast,
+ apply_div,
+ apply_exp,
+ apply_log,
+ apply_mul,
+ apply_pow,
+ apply_sub,
+ apply_reshape,
apply_transpose,
)
from ..common.data_types import (
- BooleanTensorType, Int64TensorType, guess_numpy_type,
- guess_proto_type)
+ BooleanTensorType,
+ Int64TensorType,
+ guess_numpy_type,
+ guess_proto_type,
+)
from ..common._registration import register_converter
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
@@ -18,205 +28,292 @@
def _joint_log_likelihood_bernoulli(
- scope, container, input_name, feature_log_prob_name,
- class_log_prior_name, binarize, feature_count, proto_dtype,
- sum_result_name):
+ scope,
+ container,
+ input_name,
+ feature_log_prob_name,
+ class_log_prior_name,
+ binarize,
+ feature_count,
+ proto_dtype,
+ sum_result_name,
+):
"""
Calculate joint log likelihood for Bernoulli Naive Bayes model.
"""
- constant_name = scope.get_unique_variable_name('constant')
- exp_result_name = scope.get_unique_variable_name('exp_result')
- sub_result_name = scope.get_unique_variable_name('sub_result')
- neg_prob_name = scope.get_unique_variable_name('neg_prob')
- sum_neg_prob_name = scope.get_unique_variable_name('sum_neg_prob')
- difference_matrix_name = scope.get_unique_variable_name(
- 'difference_matrix')
- dot_prod_name = scope.get_unique_variable_name('dot_prod')
- partial_sum_result_name = scope.get_unique_variable_name(
- 'partial_sum_result')
+ constant_name = scope.get_unique_variable_name("constant")
+ exp_result_name = scope.get_unique_variable_name("exp_result")
+ sub_result_name = scope.get_unique_variable_name("sub_result")
+ neg_prob_name = scope.get_unique_variable_name("neg_prob")
+ sum_neg_prob_name = scope.get_unique_variable_name("sum_neg_prob")
+ difference_matrix_name = scope.get_unique_variable_name("difference_matrix")
+ dot_prod_name = scope.get_unique_variable_name("dot_prod")
+ partial_sum_result_name = scope.get_unique_variable_name("partial_sum_result")
# Define constant slightly greater than 1 to avoid log 0
# scenarios when calculating log (1 - x) and x=1 in line 70
container.add_initializer(constant_name, proto_dtype, [], [1.000000001])
if binarize is not None:
- threshold_name = scope.get_unique_variable_name('threshold')
- condition_name = scope.get_unique_variable_name('condition')
- cast_values_name = scope.get_unique_variable_name('cast_values')
- zero_tensor_name = scope.get_unique_variable_name('zero_tensor')
- binarised_input_name = scope.get_unique_variable_name(
- 'binarised_input')
+ threshold_name = scope.get_unique_variable_name("threshold")
+ condition_name = scope.get_unique_variable_name("condition")
+ cast_values_name = scope.get_unique_variable_name("cast_values")
+ zero_tensor_name = scope.get_unique_variable_name("zero_tensor")
+ binarised_input_name = scope.get_unique_variable_name("binarised_input")
num_features = feature_count.shape[1]
- container.add_initializer(threshold_name, proto_dtype,
- [1], [binarize])
+ container.add_initializer(threshold_name, proto_dtype, [1], [binarize])
container.add_initializer(
zero_tensor_name,
- proto_dtype, [1, num_features],
- np.zeros((1, num_features)).ravel())
+ proto_dtype,
+ [1, num_features],
+ np.zeros((1, num_features)).ravel(),
+ )
container.add_node(
- 'Greater', [input_name, threshold_name],
- condition_name, name=scope.get_unique_operator_name('Greater'),
- op_version=9)
- apply_cast(scope, condition_name, cast_values_name, container,
- to=proto_dtype)
- apply_add(scope, [zero_tensor_name, cast_values_name],
- binarised_input_name, container, broadcast=1)
+ "Greater",
+ [input_name, threshold_name],
+ condition_name,
+ name=scope.get_unique_operator_name("Greater"),
+ op_version=9,
+ )
+ apply_cast(scope, condition_name, cast_values_name, container, to=proto_dtype)
+ apply_add(
+ scope,
+ [zero_tensor_name, cast_values_name],
+ binarised_input_name,
+ container,
+ broadcast=1,
+ )
input_name = binarised_input_name
apply_exp(scope, feature_log_prob_name, exp_result_name, container)
- apply_sub(scope, [constant_name, exp_result_name], sub_result_name,
- container, broadcast=1)
+ apply_sub(
+ scope, [constant_name, exp_result_name], sub_result_name, container, broadcast=1
+ )
apply_log(scope, sub_result_name, neg_prob_name, container)
if container.target_opset < 13:
- container.add_node('ReduceSum', neg_prob_name,
- sum_neg_prob_name, axes=[0],
- name=scope.get_unique_operator_name('ReduceSum'))
+ container.add_node(
+ "ReduceSum",
+ neg_prob_name,
+ sum_neg_prob_name,
+ axes=[0],
+ name=scope.get_unique_operator_name("ReduceSum"),
+ )
else:
- axis_name = scope.get_unique_variable_name('axis')
- container.add_initializer(
- axis_name, onnx_proto.TensorProto.INT64, [1], [0])
+ axis_name = scope.get_unique_variable_name("axis")
+ container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [0])
container.add_node(
- 'ReduceSum', [neg_prob_name, axis_name], sum_neg_prob_name,
- name=scope.get_unique_operator_name('ReduceSum'))
- apply_sub(scope, [feature_log_prob_name, neg_prob_name],
- difference_matrix_name, container)
+ "ReduceSum",
+ [neg_prob_name, axis_name],
+ sum_neg_prob_name,
+ name=scope.get_unique_operator_name("ReduceSum"),
+ )
+ apply_sub(
+ scope, [feature_log_prob_name, neg_prob_name], difference_matrix_name, container
+ )
container.add_node(
- 'MatMul', [input_name, difference_matrix_name],
- dot_prod_name, name=scope.get_unique_operator_name('MatMul'))
-
- apply_add(scope, [dot_prod_name, sum_neg_prob_name],
- partial_sum_result_name, container)
- apply_add(scope, [partial_sum_result_name, class_log_prior_name],
- sum_result_name, container)
+ "MatMul",
+ [input_name, difference_matrix_name],
+ dot_prod_name,
+ name=scope.get_unique_operator_name("MatMul"),
+ )
+
+ apply_add(
+ scope, [dot_prod_name, sum_neg_prob_name], partial_sum_result_name, container
+ )
+ apply_add(
+ scope,
+ [partial_sum_result_name, class_log_prior_name],
+ sum_result_name,
+ container,
+ )
return sum_result_name
def _joint_log_likelihood_gaussian(
- scope, container, input_name, model, proto_dtype, sum_result_name):
+ scope, container, input_name, model, proto_dtype, sum_result_name
+):
"""
Calculate joint log likelihood for Gaussian Naive Bayes model.
"""
features = model.theta_.shape[1]
jointi = np.log(model.class_prior_)
- var_sigma = model.var_ if hasattr(model, 'var_') else model.sigma_
- sigma_sum_log = - 0.5 * np.sum(np.log(2. * np.pi * var_sigma), axis=1)
- theta_name = scope.get_unique_variable_name('theta')
- sigma_name = scope.get_unique_variable_name('sigma')
- sigma_sum_log_name = scope.get_unique_variable_name('sigma_sum_log')
- jointi_name = scope.get_unique_variable_name('jointi')
- exponent_name = scope.get_unique_variable_name('exponent')
- prod_operand_name = scope.get_unique_variable_name('prod_operand')
- reshaped_input_name = scope.get_unique_variable_name('reshaped_input')
- subtracted_input_name = scope.get_unique_variable_name('subtracted_input')
- pow_result_name = scope.get_unique_variable_name('pow_result')
- div_result_name = scope.get_unique_variable_name('div_result')
- reduced_sum_name = scope.get_unique_variable_name('reduced_sum')
- mul_result_name = scope.get_unique_variable_name('mul_result')
- part_log_likelihood_name = scope.get_unique_variable_name(
- 'part_log_likelihood')
+ var_sigma = model.var_ if hasattr(model, "var_") else model.sigma_
+ sigma_sum_log = -0.5 * np.sum(np.log(2.0 * np.pi * var_sigma), axis=1)
+ theta_name = scope.get_unique_variable_name("theta")
+ sigma_name = scope.get_unique_variable_name("sigma")
+ sigma_sum_log_name = scope.get_unique_variable_name("sigma_sum_log")
+ jointi_name = scope.get_unique_variable_name("jointi")
+ exponent_name = scope.get_unique_variable_name("exponent")
+ prod_operand_name = scope.get_unique_variable_name("prod_operand")
+ reshaped_input_name = scope.get_unique_variable_name("reshaped_input")
+ subtracted_input_name = scope.get_unique_variable_name("subtracted_input")
+ pow_result_name = scope.get_unique_variable_name("pow_result")
+ div_result_name = scope.get_unique_variable_name("div_result")
+ reduced_sum_name = scope.get_unique_variable_name("reduced_sum")
+ mul_result_name = scope.get_unique_variable_name("mul_result")
+ part_log_likelihood_name = scope.get_unique_variable_name("part_log_likelihood")
theta = model.theta_.reshape((1, -1, features))
sigma = var_sigma.reshape((1, -1, features))
- container.add_initializer(theta_name, proto_dtype, theta.shape,
- theta.ravel())
- container.add_initializer(sigma_name, proto_dtype, sigma.shape,
- sigma.ravel())
- container.add_initializer(jointi_name, proto_dtype, [1, jointi.shape[0]],
- jointi)
+ container.add_initializer(theta_name, proto_dtype, theta.shape, theta.ravel())
+ container.add_initializer(sigma_name, proto_dtype, sigma.shape, sigma.ravel())
+ container.add_initializer(jointi_name, proto_dtype, [1, jointi.shape[0]], jointi)
container.add_initializer(
- sigma_sum_log_name, proto_dtype,
- [1, sigma_sum_log.shape[0]], sigma_sum_log.ravel())
+ sigma_sum_log_name,
+ proto_dtype,
+ [1, sigma_sum_log.shape[0]],
+ sigma_sum_log.ravel(),
+ )
container.add_initializer(exponent_name, proto_dtype, [], [2])
container.add_initializer(prod_operand_name, proto_dtype, [], [0.5])
- apply_reshape(scope, input_name, reshaped_input_name, container,
- desired_shape=[-1, 1, features])
- apply_sub(scope, [reshaped_input_name, theta_name], subtracted_input_name,
- container, broadcast=1)
- apply_pow(scope, [subtracted_input_name, exponent_name], pow_result_name,
- container, broadcast=1)
- apply_div(scope, [pow_result_name, sigma_name], div_result_name,
- container, broadcast=1)
+ apply_reshape(
+ scope,
+ input_name,
+ reshaped_input_name,
+ container,
+ desired_shape=[-1, 1, features],
+ )
+ apply_sub(
+ scope,
+ [reshaped_input_name, theta_name],
+ subtracted_input_name,
+ container,
+ broadcast=1,
+ )
+ apply_pow(
+ scope,
+ [subtracted_input_name, exponent_name],
+ pow_result_name,
+ container,
+ broadcast=1,
+ )
+ apply_div(
+ scope, [pow_result_name, sigma_name], div_result_name, container, broadcast=1
+ )
if container.target_opset < 13:
- container.add_node('ReduceSum', div_result_name,
- reduced_sum_name, axes=[2], keepdims=0,
- name=scope.get_unique_operator_name('ReduceSum'))
+ container.add_node(
+ "ReduceSum",
+ div_result_name,
+ reduced_sum_name,
+ axes=[2],
+ keepdims=0,
+ name=scope.get_unique_operator_name("ReduceSum"),
+ )
else:
- axis_name = scope.get_unique_variable_name('axis')
- container.add_initializer(
- axis_name, onnx_proto.TensorProto.INT64, [1], [2])
+ axis_name = scope.get_unique_variable_name("axis")
+ container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [2])
container.add_node(
- 'ReduceSum', [div_result_name, axis_name], reduced_sum_name,
- keepdims=0, name=scope.get_unique_operator_name('ReduceSum'))
- apply_mul(scope, [reduced_sum_name, prod_operand_name], mul_result_name,
- container, broadcast=1)
- apply_sub(scope, [sigma_sum_log_name, mul_result_name],
- part_log_likelihood_name,
- container, broadcast=1)
- apply_add(scope, [jointi_name, part_log_likelihood_name],
- sum_result_name, container, broadcast=1)
+ "ReduceSum",
+ [div_result_name, axis_name],
+ reduced_sum_name,
+ keepdims=0,
+ name=scope.get_unique_operator_name("ReduceSum"),
+ )
+ apply_mul(
+ scope,
+ [reduced_sum_name, prod_operand_name],
+ mul_result_name,
+ container,
+ broadcast=1,
+ )
+ apply_sub(
+ scope,
+ [sigma_sum_log_name, mul_result_name],
+ part_log_likelihood_name,
+ container,
+ broadcast=1,
+ )
+ apply_add(
+ scope,
+ [jointi_name, part_log_likelihood_name],
+ sum_result_name,
+ container,
+ broadcast=1,
+ )
return sum_result_name
def _joint_log_likelihood_categorical(
- scope, container, input_name, model, sum_result_name):
+ scope, container, input_name, model, sum_result_name
+):
"""
Calculate joint log likelihood for Categorical Naive Bayes model.
"""
jll_list = []
- class_log_prior_name = scope.get_unique_variable_name('class_log_prior')
- summation_jll_name = scope.get_unique_variable_name('summation_jll')
+ class_log_prior_name = scope.get_unique_variable_name("class_log_prior")
+ summation_jll_name = scope.get_unique_variable_name("summation_jll")
container.add_initializer(
- class_log_prior_name, onnx_proto.TensorProto.FLOAT,
- model.class_log_prior_.shape, model.class_log_prior_)
+ class_log_prior_name,
+ onnx_proto.TensorProto.FLOAT,
+ model.class_log_prior_.shape,
+ model.class_log_prior_,
+ )
- n_features = (model.n_features_in_
- if hasattr(model, 'n_features_in_')
- else model.n_features_)
+ n_features = (
+ model.n_features_in_ if hasattr(model, "n_features_in_") else model.n_features_
+ )
for i in range(n_features):
- feature_index_name = scope.get_unique_variable_name('feature_index')
- indices_name = scope.get_unique_variable_name('indices')
- cast_indices_name = scope.get_unique_variable_name('cast_indices')
- feature_log_proba_name = scope.get_unique_variable_name(
- 'feature_log_proba')
- jll_name = scope.get_unique_variable_name('jll')
- transposed_jll_name = scope.get_unique_variable_name('transposed_jll')
+ feature_index_name = scope.get_unique_variable_name("feature_index")
+ indices_name = scope.get_unique_variable_name("indices")
+ cast_indices_name = scope.get_unique_variable_name("cast_indices")
+ feature_log_proba_name = scope.get_unique_variable_name("feature_log_proba")
+ jll_name = scope.get_unique_variable_name("jll")
+ transposed_jll_name = scope.get_unique_variable_name("transposed_jll")
container.add_initializer(
- feature_index_name, onnx_proto.TensorProto.INT64, [], [i])
+ feature_index_name, onnx_proto.TensorProto.INT64, [], [i]
+ )
container.add_initializer(
- feature_log_proba_name, onnx_proto.TensorProto.FLOAT,
+ feature_log_proba_name,
+ onnx_proto.TensorProto.FLOAT,
model.feature_log_prob_[i].shape,
- model.feature_log_prob_[i].ravel())
+ model.feature_log_prob_[i].ravel(),
+ )
container.add_node(
- 'ArrayFeatureExtractor', [input_name, feature_index_name],
- indices_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
- apply_cast(scope, indices_name, cast_indices_name,
- container, to=onnx_proto.TensorProto.INT64)
+ "ArrayFeatureExtractor",
+ [input_name, feature_index_name],
+ indices_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
+ apply_cast(
+ scope,
+ indices_name,
+ cast_indices_name,
+ container,
+ to=onnx_proto.TensorProto.INT64,
+ )
container.add_node(
- 'ArrayFeatureExtractor',
+ "ArrayFeatureExtractor",
[feature_log_proba_name, cast_indices_name],
- jll_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
- apply_transpose(scope, jll_name, transposed_jll_name,
- container, perm=(1, 0))
+ jll_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
+ apply_transpose(scope, jll_name, transposed_jll_name, container, perm=(1, 0))
jll_list.append(transposed_jll_name)
- container.add_node('Sum', jll_list,
- summation_jll_name,
- name=scope.get_unique_operator_name('Sum'))
- apply_add(scope, [summation_jll_name, class_log_prior_name],
- sum_result_name, container, broadcast=1)
+ container.add_node(
+ "Sum", jll_list, summation_jll_name, name=scope.get_unique_operator_name("Sum")
+ )
+ apply_add(
+ scope,
+ [summation_jll_name, class_log_prior_name],
+ sum_result_name,
+ container,
+ broadcast=1,
+ )
return sum_result_name
-def convert_sklearn_naive_bayes(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_naive_bayes(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
# Computational graph:
#
# Note: In the following graph, variable names are in lower case
@@ -380,149 +477,233 @@ def convert_sklearn_naive_bayes(scope: Scope, operator: Operator,
classes = get_label_classes(scope, nb_op)
output_shape = (-1,)
- sum_result_name = scope.get_unique_variable_name('sum_result')
- argmax_output_name = scope.get_unique_variable_name('argmax_output')
- cast2_result_name = scope.get_unique_variable_name('cast2_result')
- reshaped_result_name = scope.get_unique_variable_name('reshaped_result')
- classes_name = scope.get_unique_variable_name('classes')
+ sum_result_name = scope.get_unique_variable_name("sum_result")
+ argmax_output_name = scope.get_unique_variable_name("argmax_output")
+ cast2_result_name = scope.get_unique_variable_name("cast2_result")
+ reshaped_result_name = scope.get_unique_variable_name("reshaped_result")
+ classes_name = scope.get_unique_variable_name("classes")
reduce_log_sum_exp_result_name = scope.get_unique_variable_name(
- 'reduce_log_sum_exp_result')
- log_prob_name = scope.get_unique_variable_name('log_prob')
+ "reduce_log_sum_exp_result"
+ )
+ log_prob_name = scope.get_unique_variable_name("log_prob")
array_feature_extractor_result_name = scope.get_unique_variable_name(
- 'array_feature_extractor_result')
+ "array_feature_extractor_result"
+ )
class_type = onnx_proto.TensorProto.STRING
- if (np.issubdtype(classes.dtype, np.floating) or
- classes.dtype == np.bool_):
+ if np.issubdtype(classes.dtype, np.floating) or classes.dtype == np.bool_:
class_type = onnx_proto.TensorProto.INT32
classes = classes.astype(np.int32)
elif np.issubdtype(classes.dtype, np.signedinteger):
class_type = onnx_proto.TensorProto.INT32
else:
- classes = np.array([s.encode('utf-8') for s in classes])
+ classes = np.array([s.encode("utf-8") for s in classes])
container.add_initializer(classes_name, class_type, classes.shape, classes)
- if operator.type not in ('SklearnCategoricalNB', 'SklearnGaussianNB'):
- class_log_prior_name = scope.get_unique_variable_name(
- 'class_log_prior')
- feature_log_prob_name = scope.get_unique_variable_name(
- 'feature_log_prob')
+ if operator.type not in ("SklearnCategoricalNB", "SklearnGaussianNB"):
+ class_log_prior_name = scope.get_unique_variable_name("class_log_prior")
+ feature_log_prob_name = scope.get_unique_variable_name("feature_log_prob")
- class_log_prior = nb_op.class_log_prior_.astype(
- float_dtype).reshape((1, -1))
+ class_log_prior = nb_op.class_log_prior_.astype(float_dtype).reshape((1, -1))
feature_log_prob = nb_op.feature_log_prob_.T.astype(float_dtype)
container.add_initializer(
- feature_log_prob_name, proto_dtype,
- feature_log_prob.shape, feature_log_prob.flatten())
+ feature_log_prob_name,
+ proto_dtype,
+ feature_log_prob.shape,
+ feature_log_prob.flatten(),
+ )
container.add_initializer(
- class_log_prior_name, proto_dtype,
- class_log_prior.shape, class_log_prior.flatten())
+ class_log_prior_name,
+ proto_dtype,
+ class_log_prior.shape,
+ class_log_prior.flatten(),
+ )
input_name = operator.inputs[0].full_name
if type(operator.inputs[0].type) in (BooleanTensorType, Int64TensorType):
- cast_input_name = scope.get_unique_variable_name('cast_input')
+ cast_input_name = scope.get_unique_variable_name("cast_input")
- apply_cast(scope, operator.input_full_names, cast_input_name,
- container, to=proto_dtype)
+ apply_cast(
+ scope, operator.input_full_names, cast_input_name, container, to=proto_dtype
+ )
input_name = cast_input_name
- if operator.type == 'SklearnBernoulliNB':
+ if operator.type == "SklearnBernoulliNB":
sum_result_name = _joint_log_likelihood_bernoulli(
- scope, container, input_name, feature_log_prob_name,
- class_log_prior_name, nb_op.binarize, nb_op.feature_count_,
- proto_dtype, sum_result_name)
- elif operator.type == 'SklearnGaussianNB':
+ scope,
+ container,
+ input_name,
+ feature_log_prob_name,
+ class_log_prior_name,
+ nb_op.binarize,
+ nb_op.feature_count_,
+ proto_dtype,
+ sum_result_name,
+ )
+ elif operator.type == "SklearnGaussianNB":
sum_result_name = _joint_log_likelihood_gaussian(
- scope, container, input_name, nb_op,
- proto_dtype, sum_result_name)
- elif operator.type == 'SklearnCategoricalNB':
+ scope, container, input_name, nb_op, proto_dtype, sum_result_name
+ )
+ elif operator.type == "SklearnCategoricalNB":
sum_result_name = _joint_log_likelihood_categorical(
- scope, container, input_name, nb_op, sum_result_name)
+ scope, container, input_name, nb_op, sum_result_name
+ )
else:
# MultinomialNB or ComplementNB
matmul_result_name = (
- scope.get_unique_variable_name('matmul_result')
- if operator.type == 'SklearnMultinomialNB' or len(classes) == 1
- else sum_result_name)
+ scope.get_unique_variable_name("matmul_result")
+ if operator.type == "SklearnMultinomialNB" or len(classes) == 1
+ else sum_result_name
+ )
container.add_node(
- 'MatMul', [input_name, feature_log_prob_name],
- matmul_result_name, name=scope.get_unique_operator_name('MatMul'))
- if operator.type == 'SklearnMultinomialNB' or len(classes) == 1:
- apply_add(scope, [matmul_result_name, class_log_prior_name],
- sum_result_name, container, broadcast=1)
+ "MatMul",
+ [input_name, feature_log_prob_name],
+ matmul_result_name,
+ name=scope.get_unique_operator_name("MatMul"),
+ )
+ if operator.type == "SklearnMultinomialNB" or len(classes) == 1:
+ apply_add(
+ scope,
+ [matmul_result_name, class_log_prior_name],
+ sum_result_name,
+ container,
+ broadcast=1,
+ )
- container.add_node('ArgMax', sum_result_name,
- argmax_output_name,
- name=scope.get_unique_operator_name('ArgMax'), axis=1)
+ container.add_node(
+ "ArgMax",
+ sum_result_name,
+ argmax_output_name,
+ name=scope.get_unique_operator_name("ArgMax"),
+ axis=1,
+ )
# Calculation of class probability
log_prob_shape = [-1, 1]
- reshaped_log_prob_name = scope.get_unique_variable_name(
- 'reshaped_log_prob')
+ reshaped_log_prob_name = scope.get_unique_variable_name("reshaped_log_prob")
if container.target_opset >= 18:
- axis_name = scope.get_unique_variable_name('axis')
- container.add_initializer(
- axis_name, onnx_proto.TensorProto.INT64, [1], [1])
+ axis_name = scope.get_unique_variable_name("axis")
+ container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [1])
container.add_node(
- 'ReduceLogSumExp', [sum_result_name, axis_name],
+ "ReduceLogSumExp",
+ [sum_result_name, axis_name],
reduce_log_sum_exp_result_name,
- name=scope.get_unique_operator_name('ReduceLogSumExp'),
- keepdims=0)
+ name=scope.get_unique_operator_name("ReduceLogSumExp"),
+ keepdims=0,
+ )
else:
container.add_node(
- 'ReduceLogSumExp', sum_result_name,
+ "ReduceLogSumExp",
+ sum_result_name,
reduce_log_sum_exp_result_name,
- name=scope.get_unique_operator_name('ReduceLogSumExp'),
- axes=[1], keepdims=0)
- apply_reshape(scope, reduce_log_sum_exp_result_name,
- reshaped_log_prob_name, container,
- desired_shape=log_prob_shape)
- apply_sub(scope, [sum_result_name, reshaped_log_prob_name], log_prob_name,
- container, broadcast=1)
+ name=scope.get_unique_operator_name("ReduceLogSumExp"),
+ axes=[1],
+ keepdims=0,
+ )
+ apply_reshape(
+ scope,
+ reduce_log_sum_exp_result_name,
+ reshaped_log_prob_name,
+ container,
+ desired_shape=log_prob_shape,
+ )
+ apply_sub(
+ scope,
+ [sum_result_name, reshaped_log_prob_name],
+ log_prob_name,
+ container,
+ broadcast=1,
+ )
apply_exp(scope, log_prob_name, operator.outputs[1].full_name, container)
container.add_node(
- 'ArrayFeatureExtractor', [classes_name, argmax_output_name],
- array_feature_extractor_result_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
+ "ArrayFeatureExtractor",
+ [classes_name, argmax_output_name],
+ array_feature_extractor_result_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
# Reshape op does not seem to handle INT64 tensor even though it is
# listed as one of the supported types in the doc, so Cast was
# required here.
if class_type == onnx_proto.TensorProto.INT32:
- apply_cast(scope, array_feature_extractor_result_name,
- cast2_result_name, container,
- to=proto_dtype)
- apply_reshape(scope, cast2_result_name, reshaped_result_name,
- container, desired_shape=output_shape)
- apply_cast(scope, reshaped_result_name, operator.outputs[0].full_name,
- container, to=onnx_proto.TensorProto.INT64)
+ apply_cast(
+ scope,
+ array_feature_extractor_result_name,
+ cast2_result_name,
+ container,
+ to=proto_dtype,
+ )
+ apply_reshape(
+ scope,
+ cast2_result_name,
+ reshaped_result_name,
+ container,
+ desired_shape=output_shape,
+ )
+ apply_cast(
+ scope,
+ reshaped_result_name,
+ operator.outputs[0].full_name,
+ container,
+ to=onnx_proto.TensorProto.INT64,
+ )
else: # string labels
- apply_reshape(scope, array_feature_extractor_result_name,
- operator.outputs[0].full_name, container,
- desired_shape=output_shape)
-
-
-register_converter('SklearnBernoulliNB', convert_sklearn_naive_bayes,
- options={'zipmap': [True, False, 'columns'],
- 'output_class_labels': [False, True],
- 'nocl': [True, False]})
-register_converter('SklearnCategoricalNB', convert_sklearn_naive_bayes,
- options={'zipmap': [True, False, 'columns'],
- 'output_class_labels': [False, True],
- 'nocl': [True, False]})
-register_converter('SklearnComplementNB', convert_sklearn_naive_bayes,
- options={'zipmap': [True, False, 'columns'],
- 'output_class_labels': [False, True],
- 'nocl': [True, False]})
-register_converter('SklearnGaussianNB', convert_sklearn_naive_bayes,
- options={'zipmap': [True, False, 'columns'],
- 'output_class_labels': [False, True],
- 'nocl': [True, False]})
-register_converter('SklearnMultinomialNB', convert_sklearn_naive_bayes,
- options={'zipmap': [True, False, 'columns'],
- 'output_class_labels': [False, True],
- 'nocl': [True, False]})
+ apply_reshape(
+ scope,
+ array_feature_extractor_result_name,
+ operator.outputs[0].full_name,
+ container,
+ desired_shape=output_shape,
+ )
+
+
+register_converter(
+ "SklearnBernoulliNB",
+ convert_sklearn_naive_bayes,
+ options={
+ "zipmap": [True, False, "columns"],
+ "output_class_labels": [False, True],
+ "nocl": [True, False],
+ },
+)
+register_converter(
+ "SklearnCategoricalNB",
+ convert_sklearn_naive_bayes,
+ options={
+ "zipmap": [True, False, "columns"],
+ "output_class_labels": [False, True],
+ "nocl": [True, False],
+ },
+)
+register_converter(
+ "SklearnComplementNB",
+ convert_sklearn_naive_bayes,
+ options={
+ "zipmap": [True, False, "columns"],
+ "output_class_labels": [False, True],
+ "nocl": [True, False],
+ },
+)
+register_converter(
+ "SklearnGaussianNB",
+ convert_sklearn_naive_bayes,
+ options={
+ "zipmap": [True, False, "columns"],
+ "output_class_labels": [False, True],
+ "nocl": [True, False],
+ },
+)
+register_converter(
+ "SklearnMultinomialNB",
+ convert_sklearn_naive_bayes,
+ options={
+ "zipmap": [True, False, "columns"],
+ "output_class_labels": [False, True],
+ "nocl": [True, False],
+ },
+)
diff --git a/skl2onnx/operator_converters/nearest_neighbours.py b/skl2onnx/operator_converters/nearest_neighbours.py
index f74a56e41..2bc79ad78 100644
--- a/skl2onnx/operator_converters/nearest_neighbours.py
+++ b/skl2onnx/operator_converters/nearest_neighbours.py
@@ -29,6 +29,7 @@
OnnxTopK_1,
OnnxTranspose,
)
+
try:
from ..algebra.onnx_ops import (
OnnxConstantOfShape,
@@ -54,16 +55,27 @@
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
from ..common.data_types import (
- Int64TensorType, DoubleTensorType,
- guess_numpy_type, guess_proto_type)
+ Int64TensorType,
+ DoubleTensorType,
+ guess_numpy_type,
+ guess_proto_type,
+)
from ..common.utils_classifier import get_label_classes
from ..proto import onnx_proto
from ._gp_kernels import py_make_float_array
-def onnx_nearest_neighbors_indices_k(X, Y, k, metric='euclidean', dtype=None,
- op_version=None, keep_distances=False,
- optim=None, **kwargs):
+def onnx_nearest_neighbors_indices_k(
+ X,
+ Y,
+ k,
+ metric="euclidean",
+ dtype=None,
+ op_version=None,
+ keep_distances=False,
+ optim=None,
+ **kwargs
+):
"""
Retrieves the nearest neigbours *ONNX*.
:param X: features or *OnnxOperatorMixin*
@@ -78,45 +90,66 @@ def onnx_nearest_neighbors_indices_k(X, Y, k, metric='euclidean', dtype=None,
:param kwargs: additional parameters for function @see fn onnx_cdist
:return: top indices, top distances
"""
- kwargs_dist = {k: v for k, v in kwargs.items() if k == 'p'}
- kwargs_topk = {k: v for k, v in kwargs.items() if k != 'p'}
- if optim == 'cdist':
+ kwargs_dist = {k: v for k, v in kwargs.items() if k == "p"}
+ kwargs_topk = {k: v for k, v in kwargs.items() if k != "p"}
+ if optim == "cdist":
from skl2onnx.algebra.custom_ops import OnnxCDist
- dist = OnnxCDist(X, Y, metric=metric, op_version=op_version,
- **kwargs_dist)
+
+ dist = OnnxCDist(X, Y, metric=metric, op_version=op_version, **kwargs_dist)
elif optim is None:
- dim_in = Y.shape[1] if hasattr(Y, 'shape') else None
- dim_out = Y.shape[0] if hasattr(Y, 'shape') else None
- dist = onnx_cdist(X, Y, metric=metric, dtype=dtype,
- op_version=op_version,
- dim_in=dim_in, dim_out=dim_out,
- **kwargs_dist)
+ dim_in = Y.shape[1] if hasattr(Y, "shape") else None
+ dim_out = Y.shape[0] if hasattr(Y, "shape") else None
+ dist = onnx_cdist(
+ X,
+ Y,
+ metric=metric,
+ dtype=dtype,
+ op_version=op_version,
+ dim_in=dim_in,
+ dim_out=dim_out,
+ **kwargs_dist
+ )
else:
raise ValueError("Unknown optimisation '{}'.".format(optim))
if op_version < 10:
- neg_dist = OnnxMul(dist, np.array([-1], dtype=dtype),
- op_version=op_version)
+ neg_dist = OnnxMul(dist, np.array([-1], dtype=dtype), op_version=op_version)
node = OnnxTopK_1(neg_dist, k=k, op_version=1, **kwargs_topk)
elif op_version < 11:
- neg_dist = OnnxMul(dist, np.array([-1], dtype=dtype),
- op_version=op_version)
- node = OnnxTopK_10(neg_dist, np.array([k], dtype=np.int64),
- op_version=10, **kwargs_topk)
+ neg_dist = OnnxMul(dist, np.array([-1], dtype=dtype), op_version=op_version)
+ node = OnnxTopK_10(
+ neg_dist, np.array([k], dtype=np.int64), op_version=10, **kwargs_topk
+ )
else:
- node = OnnxTopK_11(dist, np.array([k], dtype=np.int64),
- largest=0, sorted=1,
- op_version=11, **kwargs_topk)
+ node = OnnxTopK_11(
+ dist,
+ np.array([k], dtype=np.int64),
+ largest=0,
+ sorted=1,
+ op_version=11,
+ **kwargs_topk
+ )
if keep_distances:
- return (node[1], OnnxMul(
- node[0], np.array([-1], dtype=dtype), op_version=op_version))
+ return (
+ node[1],
+ OnnxMul(node[0], np.array([-1], dtype=dtype), op_version=op_version),
+ )
if keep_distances:
return (node[1], node[0])
return node[1]
def onnx_nearest_neighbors_indices_radius(
- X, Y, radius, metric='euclidean', dtype=None, op_version=None,
- keep_distances=False, optim=None, proto_dtype=None, **kwargs):
+ X,
+ Y,
+ radius,
+ metric="euclidean",
+ dtype=None,
+ op_version=None,
+ keep_distances=False,
+ optim=None,
+ proto_dtype=None,
+ **kwargs
+):
"""
Retrieves the nearest neigbours *ONNX*.
:param X: features or *OnnxOperatorMixin*
@@ -134,44 +167,54 @@ def onnx_nearest_neighbors_indices_radius(
binary weights
"""
opv = op_version
- if optim == 'cdist':
+ if optim == "cdist":
from skl2onnx.algebra.custom_ops import OnnxCDist
- dist = OnnxCDist(X, Y, metric=metric, op_version=op_version,
- **kwargs)
+
+ dist = OnnxCDist(X, Y, metric=metric, op_version=op_version, **kwargs)
elif optim is None:
- dim_in = Y.shape[1] if hasattr(Y, 'shape') else None
- dim_out = Y.shape[0] if hasattr(Y, 'shape') else None
- dist = onnx_cdist(X, Y, metric=metric, dtype=dtype,
- op_version=op_version,
- dim_in=dim_in, dim_out=dim_out,
- **kwargs)
+ dim_in = Y.shape[1] if hasattr(Y, "shape") else None
+ dim_out = Y.shape[0] if hasattr(Y, "shape") else None
+ dist = onnx_cdist(
+ X,
+ Y,
+ metric=metric,
+ dtype=dtype,
+ op_version=op_version,
+ dim_in=dim_in,
+ dim_out=dim_out,
+ **kwargs
+ )
else:
raise ValueError("Unknown optimisation '{}'.".format(optim))
less = OnnxLess(dist, np.array([radius], dtype=dtype), op_version=opv)
- less.set_onnx_name_prefix('cond')
+ less.set_onnx_name_prefix("cond")
shape = OnnxShape(dist, op_version=opv)
zero = OnnxCast(
- OnnxConstantOfShape(shape, op_version=opv),
- op_version=opv, to=proto_dtype)
+ OnnxConstantOfShape(shape, op_version=opv), op_version=opv, to=proto_dtype
+ )
tensor_value = py_make_float_array(-1, dtype=np.float32, as_tensor=True)
minus = OnnxCast(
- OnnxConstantOfShape(
- shape, op_version=opv, value=tensor_value),
- op_version=opv, to=onnx_proto.TensorProto.INT64)
+ OnnxConstantOfShape(shape, op_version=opv, value=tensor_value),
+ op_version=opv,
+ to=onnx_proto.TensorProto.INT64,
+ )
minus_range = OnnxAdd(
OnnxNeg(
OnnxCumSum(minus, np.array([1], dtype=np.int64), op_version=opv),
- op_version=opv),
- minus, op_version=opv)
- minus_range.set_onnx_name_prefix('arange')
+ op_version=opv,
+ ),
+ minus,
+ op_version=opv,
+ )
+ minus_range.set_onnx_name_prefix("arange")
dist_only = OnnxWhere(less, dist, zero, op_version=opv)
- dist_only.set_onnx_name_prefix('nndist')
+ dist_only.set_onnx_name_prefix("nndist")
indices = OnnxWhere(less, minus_range, minus, op_version=opv)
- indices.set_onnx_name_prefix('nnind')
+ indices.set_onnx_name_prefix("nnind")
binary = OnnxCast(less, to=proto_dtype, op_version=opv)
- binary.set_onnx_name_prefix('nnbin')
+ binary.set_onnx_name_prefix("nnbin")
return indices, dist_only, binary
@@ -211,65 +254,90 @@ def _convert_nearest_neighbors(operator, container, k=None, radius=None):
options = container.get_options(op, dict(optim=None))
- single_reg = (not hasattr(op, '_y') or len(op._y.shape) == 1 or
- len(op._y.shape) == 2 and op._y.shape[1] == 1)
+ single_reg = (
+ not hasattr(op, "_y")
+ or len(op._y.shape) == 1
+ or len(op._y.shape) == 2
+ and op._y.shape[1] == 1
+ )
ndim = 1 if single_reg else op._y.shape[1]
- metric = (op.effective_metric_ if hasattr(op, 'effective_metric_') else
- op.metric)
+ metric = op.effective_metric_ if hasattr(op, "effective_metric_") else op.metric
neighb = op._fit_X.astype(dtype)
- if (hasattr(op, 'n_neighbors') and op.n_neighbors is not None and
- hasattr(op, 'radius') and op.radius is not None):
+ if (
+ hasattr(op, "n_neighbors")
+ and op.n_neighbors is not None
+ and hasattr(op, "radius")
+ and op.radius is not None
+ ):
raise RuntimeError(
"The model defines radius and n_neighbors at the "
"same time ({} and {}). "
- "This case is not supported.".format(
- op.radius, op.n_neighbors))
+ "This case is not supported.".format(op.radius, op.n_neighbors)
+ )
- if hasattr(op, 'n_neighbors') and op.n_neighbors is not None:
+ if hasattr(op, "n_neighbors") and op.n_neighbors is not None:
k = op.n_neighbors if k is None else k
radius = None
- elif hasattr(op, 'radius') and op.radius is not None:
+ elif hasattr(op, "radius") and op.radius is not None:
k = None
radius = op.radius if radius is None else radius
else:
- raise RuntimeError(
- "Cannot convert class '{}'.".format(op.__class__.__name__))
+ raise RuntimeError("Cannot convert class '{}'.".format(op.__class__.__name__))
- training_labels = op._y if hasattr(op, '_y') else None
+ training_labels = op._y if hasattr(op, "_y") else None
distance_kwargs = {}
- if metric == 'minkowski':
+ if metric == "minkowski":
if op.p != 2:
- distance_kwargs['p'] = op.p
+ distance_kwargs["p"] = op.p
else:
metric = "euclidean"
- weights = op.weights if hasattr(op, 'weights') else 'distance'
+ weights = op.weights if hasattr(op, "weights") else "distance"
binary = None
- if weights == 'uniform' and radius is None:
+ if weights == "uniform" and radius is None:
top_indices = onnx_nearest_neighbors_indices_k(
- X, neighb, k, metric=metric, dtype=dtype,
- op_version=opv, optim=options.get('optim', None),
- **distance_kwargs)
+ X,
+ neighb,
+ k,
+ metric=metric,
+ dtype=dtype,
+ op_version=opv,
+ optim=options.get("optim", None),
+ **distance_kwargs
+ )
top_distances = None
elif radius is not None:
three = onnx_nearest_neighbors_indices_radius(
- X, neighb, radius, metric=metric, dtype=dtype,
- op_version=opv, keep_distances=True,
+ X,
+ neighb,
+ radius,
+ metric=metric,
+ dtype=dtype,
+ op_version=opv,
+ keep_distances=True,
proto_dtype=proto_type,
- optim=options.get('optim', None),
- **distance_kwargs)
+ optim=options.get("optim", None),
+ **distance_kwargs
+ )
top_indices, top_distances, binary = three
- elif weights == 'distance':
+ elif weights == "distance":
top_indices, top_distances = onnx_nearest_neighbors_indices_k(
- X, neighb, k, metric=metric, dtype=dtype,
- op_version=opv, keep_distances=True,
- optim=options.get('optim', None),
- **distance_kwargs)
+ X,
+ neighb,
+ k,
+ metric=metric,
+ dtype=dtype,
+ op_version=opv,
+ keep_distances=True,
+ optim=options.get("optim", None),
+ **distance_kwargs
+ )
else:
raise RuntimeError(
- "Unable to convert KNeighborsRegressor when weights is callable.")
+ "Unable to convert KNeighborsRegressor when weights is callable."
+ )
if training_labels is not None:
if ndim > 1:
@@ -287,35 +355,38 @@ def _convert_nearest_neighbors(operator, container, k=None, radius=None):
else:
raise RuntimeError(
"Conversion of a KNeighborsRegressor for multi regression "
- "requires opset >= 9.")
+ "requires opset >= 9."
+ )
if training_labels.dtype == np.int32:
training_labels = training_labels.astype(np.int64)
flattened = OnnxFlatten(top_indices, op_version=opv)
extracted = OnnxArrayFeatureExtractor(
- training_labels, flattened, op_version=opv)
+ training_labels, flattened, op_version=opv
+ )
reshaped = OnnxReshapeApi13(extracted, shape, op_version=opv)
if ndim > 1:
reshaped = OnnxTranspose(reshaped, op_version=opv, perm=[1, 0, 2])
- reshaped.set_onnx_name_prefix('knny')
+ reshaped.set_onnx_name_prefix("knny")
else:
reshaped = None
axis = 1
if binary is not None:
- if op.weights == 'uniform':
+ if op.weights == "uniform":
wei = binary
else:
- modified = OnnxMax(top_distances, np.array([1e-6], dtype=dtype),
- op_version=opv)
- wei = OnnxMul(binary, OnnxReciprocal(modified, op_version=opv),
- op_version=opv)
+ modified = OnnxMax(
+ top_distances, np.array([1e-6], dtype=dtype), op_version=opv
+ )
+ wei = OnnxMul(
+ binary, OnnxReciprocal(modified, op_version=opv), op_version=opv
+ )
norm = OnnxReduceSumApi11(wei, op_version=opv, axes=[1], keepdims=0)
elif top_distances is not None:
- modified = OnnxMax(top_distances, np.array([1e-6], dtype=dtype),
- op_version=opv)
+ modified = OnnxMax(top_distances, np.array([1e-6], dtype=dtype), op_version=opv)
wei = OnnxReciprocal(modified, op_version=opv)
norm = OnnxReduceSumApi11(wei, op_version=opv, axes=[1], keepdims=0)
else:
@@ -323,14 +394,15 @@ def _convert_nearest_neighbors(operator, container, k=None, radius=None):
wei = None
if wei is not None:
- wei.set_onnx_name_prefix('wei')
+ wei.set_onnx_name_prefix("wei")
if norm is not None:
- norm.set_onnx_name_prefix('norm')
+ norm.set_onnx_name_prefix("norm")
return top_indices, top_distances, reshaped, wei, norm, axis
-def convert_nearest_neighbors_regressor(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_nearest_neighbors_regressor(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converts *KNeighborsRegressor* into *ONNX*.
The converted model may return different predictions depending
@@ -349,18 +421,17 @@ def convert_nearest_neighbors_regressor(scope: Scope, operator: Operator,
opv = container.target_opset
out = operator.outputs
- reshaped_cast = OnnxCast(
- reshaped, to=proto_type, op_version=opv)
+ reshaped_cast = OnnxCast(reshaped, to=proto_type, op_version=opv)
if top_distances is not None:
# Multi-target
- if (hasattr(operator.raw_operator, '_y') and
- len(operator.raw_operator._y.shape) > 1 and
- operator.raw_operator._y.shape[1] > 1):
- rs = OnnxTranspose(reshaped_cast, perm=[1, 0, 2],
- op_version=opv)
+ if (
+ hasattr(operator.raw_operator, "_y")
+ and len(operator.raw_operator._y.shape) > 1
+ and operator.raw_operator._y.shape[1] > 1
+ ):
+ rs = OnnxTranspose(reshaped_cast, perm=[1, 0, 2], op_version=opv)
weighted_rs = OnnxMul(rs, wei, op_version=opv)
- weighted = OnnxTranspose(weighted_rs, perm=[1, 0, 2],
- op_version=opv)
+ weighted = OnnxTranspose(weighted_rs, perm=[1, 0, 2], op_version=opv)
if OnnxIsNaN is not None:
# This steps sometimes produces nan (bug in onnxuntime)
@@ -371,38 +442,46 @@ def convert_nearest_neighbors_regressor(scope: Scope, operator: Operator,
weighted = OnnxWhere(isnan, csts0, weighted, op_version=opv)
# Back to original plan.
- res = OnnxReduceSumApi11(weighted, axes=[axis], op_version=opv,
- keepdims=0)
- norm2 = OnnxReshapeApi13(norm, np.array([-1, 1], dtype=np.int64),
- op_version=opv)
+ res = OnnxReduceSumApi11(weighted, axes=[axis], op_version=opv, keepdims=0)
+ norm2 = OnnxReshapeApi13(
+ norm, np.array([-1, 1], dtype=np.int64), op_version=opv
+ )
res = OnnxDiv(res, norm2, op_version=opv, output_names=out)
else:
weighted = OnnxMul(reshaped_cast, wei, op_version=opv)
- res = OnnxReduceSumApi11(weighted, axes=[axis], op_version=opv,
- keepdims=0)
- res.set_onnx_name_prefix('final')
+ res = OnnxReduceSumApi11(weighted, axes=[axis], op_version=opv, keepdims=0)
+ res.set_onnx_name_prefix("final")
if opv >= 12:
shape = OnnxShape(res, op_version=opv)
norm = OnnxReshapeApi13(norm, shape, op_version=opv)
- norm.set_onnx_name_prefix('normr')
+ norm.set_onnx_name_prefix("normr")
res = OnnxDiv(res, norm, op_version=opv)
- res = OnnxReshapeApi13(res, np.array([-1, 1], dtype=np.int64),
- output_names=out, op_version=opv)
+ res = OnnxReshapeApi13(
+ res, np.array([-1, 1], dtype=np.int64), output_names=out, op_version=opv
+ )
else:
- if (hasattr(operator.raw_operator, '_y') and
- len(np.squeeze(operator.raw_operator._y).shape) == 1):
+ if (
+ hasattr(operator.raw_operator, "_y")
+ and len(np.squeeze(operator.raw_operator._y).shape) == 1
+ ):
keepdims = 1
elif operator.raw_operator.n_neighbors == 1:
keepdims = 0
else:
keepdims = 0
- res = OnnxReduceMeanApi18(reshaped_cast, axes=[axis], op_version=opv,
- keepdims=keepdims, output_names=out)
+ res = OnnxReduceMeanApi18(
+ reshaped_cast,
+ axes=[axis],
+ op_version=opv,
+ keepdims=keepdims,
+ output_names=out,
+ )
res.add_to(scope, container)
-def get_proba_and_label(container, nb_classes, reshaped,
- wei, axis, opv, proto_type, keep_axis=True):
+def get_proba_and_label(
+ container, nb_classes, reshaped, wei, axis, opv, proto_type, keep_axis=True
+):
"""
This function calculates the label by choosing majority label
amongst the nearest neighbours.
@@ -411,25 +490,23 @@ def get_proba_and_label(container, nb_classes, reshaped,
for cl in range(nb_classes):
cst = np.array([cl], dtype=np.int64)
mat_cast = OnnxCast(
- OnnxEqual(reshaped, cst, op_version=opv),
- op_version=opv, to=proto_type)
+ OnnxEqual(reshaped, cst, op_version=opv), op_version=opv, to=proto_type
+ )
if wei is not None:
if not keep_axis:
- mat_cast = OnnxSqueezeApi11(mat_cast, axes=[-1],
- op_version=opv)
+ mat_cast = OnnxSqueezeApi11(mat_cast, axes=[-1], op_version=opv)
mat_cast = OnnxMul(mat_cast, wei, op_version=opv)
wh = OnnxReduceSumApi11(mat_cast, axes=[1], op_version=opv)
conc.append(wh)
all_together = OnnxConcat(*conc, axis=1, op_version=opv)
- sum_prob = OnnxReduceSumApi11(
- all_together, axes=[1], op_version=opv, keepdims=1)
- res = OnnxArgMax(all_together, axis=axis, op_version=opv,
- keepdims=0)
+ sum_prob = OnnxReduceSumApi11(all_together, axes=[1], op_version=opv, keepdims=1)
+ res = OnnxArgMax(all_together, axis=axis, op_version=opv, keepdims=0)
return all_together, sum_prob, res
-def convert_nearest_neighbors_classifier(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_nearest_neighbors_classifier(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converts *KNeighborsClassifier* into *ONNX*.
The converted model may return different predictions depending
@@ -453,70 +530,87 @@ def convert_nearest_neighbors_classifier(scope: Scope, operator: Operator,
if axis == 0:
raise RuntimeError(
"Binary classification not implemented in scikit-learn. "
- "Check this code is not reused for other libraries.")
+ "Check this code is not reused for other libraries."
+ )
classes = get_label_classes(scope, op)
- if hasattr(classes, 'dtype') and (
- np.issubdtype(classes.dtype, np.floating) or
- classes.dtype == np.bool_):
+ if hasattr(classes, "dtype") and (
+ np.issubdtype(classes.dtype, np.floating) or classes.dtype == np.bool_
+ ):
classes = classes.astype(np.int32)
is_integer = True
elif isinstance(classes[0], (int, np.int32, np.int64)):
is_integer = True
else:
is_integer = False
- if (isinstance(op.classes_, list)
- and isinstance(op.classes_[0], np.ndarray)):
+ if isinstance(op.classes_, list) and isinstance(op.classes_[0], np.ndarray):
# Multi-label
out_labels, out_probas = [], []
for index, cur_class in enumerate(op.classes_):
- transpose_result = OnnxTranspose(
- reshaped, op_version=opv, perm=[0, 2, 1])
+ transpose_result = OnnxTranspose(reshaped, op_version=opv, perm=[0, 2, 1])
extracted_name = OnnxArrayFeatureExtractor(
- transpose_result, np.array([index], dtype=np.int64),
- op_version=opv)
- extracted_name.set_onnx_name_prefix('tr%d' % index)
+ transpose_result, np.array([index], dtype=np.int64), op_version=opv
+ )
+ extracted_name.set_onnx_name_prefix("tr%d" % index)
all_together, sum_prob, res = get_proba_and_label(
- container, len(cur_class), extracted_name,
- wei, 1, opv, proto_type, keep_axis=False)
+ container,
+ len(cur_class),
+ extracted_name,
+ wei,
+ 1,
+ opv,
+ proto_type,
+ keep_axis=False,
+ )
probas = OnnxDiv(all_together, sum_prob, op_version=opv)
- res_name = OnnxArrayFeatureExtractor(
- cur_class, res, op_version=opv)
- res_name.set_onnx_name_prefix('div%d' % index)
+ res_name = OnnxArrayFeatureExtractor(cur_class, res, op_version=opv)
+ res_name.set_onnx_name_prefix("div%d" % index)
reshaped_labels = OnnxReshapeApi13(
- res_name, np.array([-1, 1], dtype=np.int64), op_version=opv)
+ res_name, np.array([-1, 1], dtype=np.int64), op_version=opv
+ )
reshaped_probas = OnnxReshapeApi13(
- probas, np.array([1, -1, len(cur_class)], dtype=np.int64),
- op_version=opv)
+ probas,
+ np.array([1, -1, len(cur_class)], dtype=np.int64),
+ op_version=opv,
+ )
out_labels.append(reshaped_labels)
out_probas.append(reshaped_probas)
- concatenated_labels = OnnxConcat(
- *out_labels, axis=1, op_version=opv)
+ concatenated_labels = OnnxConcat(*out_labels, axis=1, op_version=opv)
final_proba = OnnxConcat(
- *out_probas, axis=0, output_names=out[1:], op_version=opv)
+ *out_probas, axis=0, output_names=out[1:], op_version=opv
+ )
final_label = OnnxCast(
- concatenated_labels, to=onnx_proto.TensorProto.INT64,
- output_names=out[:1], op_version=opv)
+ concatenated_labels,
+ to=onnx_proto.TensorProto.INT64,
+ output_names=out[:1],
+ op_version=opv,
+ )
final_label.add_to(scope, container)
final_proba.add_to(scope, container)
else:
all_together, sum_prob, res = get_proba_and_label(
- container, nb_classes, reshaped, wei, axis, opv, proto_type)
- probas = OnnxDiv(all_together, sum_prob, op_version=opv,
- output_names=out[1:])
- probas.set_onnx_name_prefix('bprob')
+ container, nb_classes, reshaped, wei, axis, opv, proto_type
+ )
+ probas = OnnxDiv(all_together, sum_prob, op_version=opv, output_names=out[1:])
+ probas.set_onnx_name_prefix("bprob")
res_name = OnnxArrayFeatureExtractor(classes, res, op_version=opv)
if is_integer:
res_name = OnnxCast(
- res_name, to=onnx_proto.TensorProto.INT64, op_version=opv)
- out_labels = OnnxReshapeApi13(res_name, np.array([-1], dtype=np.int64),
- output_names=out[:1], op_version=opv)
- out_labels.set_onnx_name_prefix('blab')
+ res_name, to=onnx_proto.TensorProto.INT64, op_version=opv
+ )
+ out_labels = OnnxReshapeApi13(
+ res_name,
+ np.array([-1], dtype=np.int64),
+ output_names=out[:1],
+ op_version=opv,
+ )
+ out_labels.set_onnx_name_prefix("blab")
out_labels.add_to(scope, container)
probas.add_to(scope, container)
-def convert_nearest_neighbors_transform(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_nearest_neighbors_transform(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converts *NearestNeighbors* into *ONNX*.
"""
@@ -528,18 +622,23 @@ def convert_nearest_neighbors_transform(scope: Scope, operator: Operator,
out = operator.outputs
- ind = OnnxIdentity(top_indices, output_names=out[:1],
- op_version=container.target_opset)
+ ind = OnnxIdentity(
+ top_indices, output_names=out[:1], op_version=container.target_opset
+ )
dist = OnnxMul(
- top_distances, np.array([-1], dtype=dtype),
- output_names=out[1:], op_version=container.target_opset)
+ top_distances,
+ np.array([-1], dtype=dtype),
+ output_names=out[1:],
+ op_version=container.target_opset,
+ )
dist.add_to(scope, container)
ind.add_to(scope, container)
-def convert_k_neighbours_transformer(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_k_neighbours_transformer(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converts *KNeighborsTransformer* into *ONNX*.
"""
@@ -551,115 +650,148 @@ def convert_k_neighbours_transformer(scope: Scope, operator: Operator,
dtype = np.float32
transformer_op = operator.raw_operator
op_version = container.target_opset
- k = (transformer_op.n_neighbors + 1 if transformer_op.mode == 'distance'
- else transformer_op.n_neighbors)
+ k = (
+ transformer_op.n_neighbors + 1
+ if transformer_op.mode == "distance"
+ else transformer_op.n_neighbors
+ )
out = operator.outputs
- many = _convert_nearest_neighbors(
- operator, container, k=k)
+ many = _convert_nearest_neighbors(operator, container, k=k)
top_indices, top_dist = many[:2]
top_dist = (
OnnxReshapeApi13(
- OnnxMul(top_dist, np.array([-1], dtype=dtype),
- op_version=op_version),
+ OnnxMul(top_dist, np.array([-1], dtype=dtype), op_version=op_version),
np.array([-1, 1, k], dtype=np.int64),
- op_version=op_version)
- if transformer_op.mode == 'distance' else None)
+ op_version=op_version,
+ )
+ if transformer_op.mode == "distance"
+ else None
+ )
fit_samples_indices = np.array(
- np.arange(transformer_op.n_samples_fit_).reshape((1, -1, 1)),
- dtype=np.int64)
+ np.arange(transformer_op.n_samples_fit_).reshape((1, -1, 1)), dtype=np.int64
+ )
reshaped_ind = OnnxReshapeApi13(
- top_indices, np.array([-1, 1, k], dtype=np.int64),
- op_version=op_version)
+ top_indices, np.array([-1, 1, k], dtype=np.int64), op_version=op_version
+ )
comparison_res = OnnxCast(
OnnxEqual(fit_samples_indices, reshaped_ind, op_version=op_version),
- op_version=op_version, to=proto_type)
+ op_version=op_version,
+ to=proto_type,
+ )
if top_dist:
- comparison_res = OnnxMul(
- comparison_res, top_dist, op_version=op_version)
+ comparison_res = OnnxMul(comparison_res, top_dist, op_version=op_version)
res = OnnxReduceSumApi11(
- comparison_res, op_version=op_version, axes=[2],
- keepdims=0, output_names=out[:1])
+ comparison_res,
+ op_version=op_version,
+ axes=[2],
+ keepdims=0,
+ output_names=out[:1],
+ )
res.add_to(scope, container)
def _nan_euclidean_distance(
- container, model, input_name, op_version, optim, dtype, proto_type):
+ container, model, input_name, op_version, optim, dtype, proto_type
+):
training_data = model._fit_X.astype(dtype)
shape = OnnxShape(input_name, op_version=op_version)
zero = OnnxConstantOfShape(
- shape, value=make_tensor("value", proto_type, (1, ), [0]),
- op_version=op_version)
+ shape, value=make_tensor("value", proto_type, (1,), [0]), op_version=op_version
+ )
missing_input_name = OnnxIsNaN(input_name, op_version=op_version)
- masked_input_name = OnnxWhere(missing_input_name, zero, input_name,
- op_version=op_version)
+ masked_input_name = OnnxWhere(
+ missing_input_name, zero, input_name, op_version=op_version
+ )
missing_y = np.isnan(training_data)
training_data[missing_y] = 0
- d_in = training_data.shape[1] if hasattr(training_data, 'shape') else None
- d_out = training_data.shape[0] if hasattr(training_data, 'shape') else None
+ d_in = training_data.shape[1] if hasattr(training_data, "shape") else None
+ d_out = training_data.shape[0] if hasattr(training_data, "shape") else None
if optim is None:
dist = _onnx_cdist_sqeuclidean(
- masked_input_name, training_data, dtype=dtype,
- op_version=container.target_opset, dim_in=d_in, dim_out=d_out)
- elif optim == 'cdist':
+ masked_input_name,
+ training_data,
+ dtype=dtype,
+ op_version=container.target_opset,
+ dim_in=d_in,
+ dim_out=d_out,
+ )
+ elif optim == "cdist":
from skl2onnx.algebra.custom_ops import OnnxCDist
+
dist = OnnxCDist(
- masked_input_name, training_data, metric='sqeuclidean',
- op_version=container.target_opset)
+ masked_input_name,
+ training_data,
+ metric="sqeuclidean",
+ op_version=container.target_opset,
+ )
else:
raise RuntimeError("Unexpected optimization '{}'.".format(optim))
dist1 = OnnxMatMul(
OnnxMul(masked_input_name, masked_input_name, op_version=op_version),
- missing_y.T.astype(dtype), op_version=op_version)
+ missing_y.T.astype(dtype),
+ op_version=op_version,
+ )
dist2 = OnnxMatMul(
- OnnxCast(missing_input_name, to=proto_type,
- op_version=op_version),
+ OnnxCast(missing_input_name, to=proto_type, op_version=op_version),
(training_data * training_data).T.astype(dtype),
- op_version=op_version)
- distances = OnnxSub(dist, OnnxAdd(dist1, dist2, op_version=op_version),
- op_version=op_version)
+ op_version=op_version,
+ )
+ distances = OnnxSub(
+ dist, OnnxAdd(dist1, dist2, op_version=op_version), op_version=op_version
+ )
present_x = OnnxSub(
np.array([1], dtype=dtype),
- OnnxCast(missing_input_name, to=proto_type,
- op_version=op_version),
- op_version=op_version)
- present_y = (1. - missing_y).astype(dtype)
+ OnnxCast(missing_input_name, to=proto_type, op_version=op_version),
+ op_version=op_version,
+ )
+ present_y = (1.0 - missing_y).astype(dtype)
present_count = OnnxMatMul(
- present_x, present_y.T.astype(dtype), op_version=op_version)
- present_count = OnnxMax(np.array([1], dtype=dtype),
- present_count, op_version=op_version)
+ present_x, present_y.T.astype(dtype), op_version=op_version
+ )
+ present_count = OnnxMax(
+ np.array([1], dtype=dtype), present_count, op_version=op_version
+ )
dist = OnnxDiv(distances, present_count, op_version=op_version)
- return OnnxMul(
- dist, np.array([d_in], dtype=dtype),
- op_version=op_version), missing_input_name
+ return (
+ OnnxMul(dist, np.array([d_in], dtype=dtype), op_version=op_version),
+ missing_input_name,
+ )
-def _nearest_neighbours(container, model, input_name,
- op_version, optim, dtype, proto_type, **kwargs):
+def _nearest_neighbours(
+ container, model, input_name, op_version, optim, dtype, proto_type, **kwargs
+):
dist, missing_input_name = _nan_euclidean_distance(
- container, model, input_name, op_version, optim, dtype,
- proto_type)
+ container, model, input_name, op_version, optim, dtype, proto_type
+ )
if op_version < 10:
- neg_dist = OnnxMul(dist, np.array(
- [-1], dtype=dtype), op_version=op_version)
- node = OnnxTopK_1(
- neg_dist, k=model.n_neighbors, op_version=1, **kwargs)
+ neg_dist = OnnxMul(dist, np.array([-1], dtype=dtype), op_version=op_version)
+ node = OnnxTopK_1(neg_dist, k=model.n_neighbors, op_version=1, **kwargs)
elif op_version < 11:
- neg_dist = OnnxMul(dist, np.array(
- [-1], dtype=dtype), op_version=op_version)
+ neg_dist = OnnxMul(dist, np.array([-1], dtype=dtype), op_version=op_version)
node = OnnxTopK_10(
- neg_dist, np.array([model.n_neighbors], dtype=np.int64),
- op_version=10, **kwargs)
+ neg_dist,
+ np.array([model.n_neighbors], dtype=np.int64),
+ op_version=10,
+ **kwargs
+ )
else:
node = OnnxTopK_11(
- dist, np.array([model.n_neighbors], dtype=np.int64),
- largest=0, sorted=1, op_version=11, **kwargs)
+ dist,
+ np.array([model.n_neighbors], dtype=np.int64),
+ largest=0,
+ sorted=1,
+ op_version=11,
+ **kwargs
+ )
return node[1], missing_input_name
-def convert_knn_imputer(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_knn_imputer(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converts *KNNImputer* into *ONNX*.
"""
@@ -670,17 +802,16 @@ def convert_knn_imputer(scope: Scope, operator: Operator,
if proto_type != onnx_proto.TensorProto.DOUBLE:
proto_type = onnx_proto.TensorProto.FLOAT
knn_op = operator.raw_operator
- if knn_op.metric != 'nan_euclidean':
- raise RuntimeError(
- "Unable to convert KNNImputer when metric is callable.")
- if knn_op.weights not in ('uniform', 'distance'):
- raise RuntimeError(
- "Unable to convert KNNImputer when weights is callable.")
- if knn_op.weights == 'distance':
+ if knn_op.metric != "nan_euclidean":
+ raise RuntimeError("Unable to convert KNNImputer when metric is callable.")
+ if knn_op.weights not in ("uniform", "distance"):
+ raise RuntimeError("Unable to convert KNNImputer when weights is callable.")
+ if knn_op.weights == "distance":
raise NotImplementedError(
- 'KNNImputer with distance as metric is not supported, '
- 'you may raise an issue at '
- 'https://github.com/onnx/sklearn-onnx/issues.')
+ "KNNImputer with distance as metric is not supported, "
+ "you may raise an issue at "
+ "https://github.com/onnx/sklearn-onnx/issues."
+ )
options = container.get_options(knn_op, dict(optim=None))
op_version = container.target_opset
input_name = operator.inputs[0]
@@ -688,40 +819,52 @@ def convert_knn_imputer(scope: Scope, operator: Operator,
training_data[np.isnan(training_data)] = 0
out = operator.outputs
top_indices, missing_input_name = _nearest_neighbours(
- container, knn_op, input_name, op_version, options['optim'],
- dtype, proto_type)
+ container, knn_op, input_name, op_version, options["optim"], dtype, proto_type
+ )
flattened = OnnxFlatten(top_indices, op_version=op_version)
extracted = OnnxArrayFeatureExtractor(
- training_data.T, flattened, op_version=op_version)
+ training_data.T, flattened, op_version=op_version
+ )
reshaped = OnnxReshapeApi13(
- extracted, np.array([training_data.shape[1], -1, knn_op.n_neighbors],
- dtype=np.int64),
- op_version=op_version)
- transpose_result = OnnxTranspose(
- reshaped, op_version=op_version, perm=[1, 2, 0])
+ extracted,
+ np.array([training_data.shape[1], -1, knn_op.n_neighbors], dtype=np.int64),
+ op_version=op_version,
+ )
+ transpose_result = OnnxTranspose(reshaped, op_version=op_version, perm=[1, 2, 0])
reduced = OnnxReduceSumApi11(
- transpose_result, op_version=op_version, axes=[1], keepdims=0)
+ transpose_result, op_version=op_version, axes=[1], keepdims=0
+ )
cast_res = OnnxCast(
- OnnxCast(transpose_result, to=onnx_proto.TensorProto.BOOL,
- op_version=op_version),
- to=proto_type, op_version=op_version)
- deno = OnnxReduceSumApi11(
- cast_res, op_version=op_version, axes=[1], keepdims=0)
+ OnnxCast(
+ transpose_result, to=onnx_proto.TensorProto.BOOL, op_version=op_version
+ ),
+ to=proto_type,
+ op_version=op_version,
+ )
+ deno = OnnxReduceSumApi11(cast_res, op_version=op_version, axes=[1], keepdims=0)
deno_updated = OnnxAdd(
- deno, OnnxCast(
- OnnxNot(OnnxCast(deno, to=onnx_proto.TensorProto.BOOL,
- op_version=op_version), op_version=op_version),
- to=proto_type, op_version=op_version),
- op_version=op_version)
+ deno,
+ OnnxCast(
+ OnnxNot(
+ OnnxCast(deno, to=onnx_proto.TensorProto.BOOL, op_version=op_version),
+ op_version=op_version,
+ ),
+ to=proto_type,
+ op_version=op_version,
+ ),
+ op_version=op_version,
+ )
imputed_out = OnnxWhere(
missing_input_name,
- OnnxDiv(reduced, deno_updated, op_version=op_version), input_name,
- output_names=out[:1], op_version=op_version)
+ OnnxDiv(reduced, deno_updated, op_version=op_version),
+ input_name,
+ output_names=out[:1],
+ op_version=op_version,
+ )
imputed_out.add_to(scope, container)
-def convert_nca(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_nca(scope: Scope, operator: Operator, container: ModelComponentContainer):
"""
Converts *NeighborhoodComponentsAnalysis* into *ONNX*.
"""
@@ -738,44 +881,57 @@ def convert_nca(scope: Scope, operator: Operator,
X = OnnxCast(X, to=onnx_proto.TensorProto.FLOAT, op_version=op_version)
elif isinstance(X.type, DoubleTensorType):
components = OnnxCast(
- components, to=onnx_proto.TensorProto.DOUBLE,
- op_version=op_version)
+ components, to=onnx_proto.TensorProto.DOUBLE, op_version=op_version
+ )
else:
components = components.astype(dtype)
- res = OnnxMatMul(
- X, components,
- output_names=out[:1], op_version=op_version)
+ res = OnnxMatMul(X, components, output_names=out[:1], op_version=op_version)
res.add_to(scope, container)
register_converter(
- 'SklearnKNeighborsClassifier', convert_nearest_neighbors_classifier,
- options={'zipmap': [True, False, 'columns'],
- 'nocl': [True, False],
- 'raw_scores': [True, False],
- 'output_class_labels': [False, True],
- 'optim': [None, 'cdist']})
-register_converter(
- 'SklearnRadiusNeighborsClassifier', convert_nearest_neighbors_classifier,
- options={'zipmap': [True, False, 'columns'],
- 'nocl': [True, False],
- 'raw_scores': [True, False],
- 'output_class_labels': [False, True],
- 'optim': [None, 'cdist']})
+ "SklearnKNeighborsClassifier",
+ convert_nearest_neighbors_classifier,
+ options={
+ "zipmap": [True, False, "columns"],
+ "nocl": [True, False],
+ "raw_scores": [True, False],
+ "output_class_labels": [False, True],
+ "optim": [None, "cdist"],
+ },
+)
register_converter(
- 'SklearnKNeighborsRegressor', convert_nearest_neighbors_regressor,
- options={'optim': [None, 'cdist']})
+ "SklearnRadiusNeighborsClassifier",
+ convert_nearest_neighbors_classifier,
+ options={
+ "zipmap": [True, False, "columns"],
+ "nocl": [True, False],
+ "raw_scores": [True, False],
+ "output_class_labels": [False, True],
+ "optim": [None, "cdist"],
+ },
+)
register_converter(
- 'SklearnRadiusNeighborsRegressor', convert_nearest_neighbors_regressor,
- options={'optim': [None, 'cdist']})
+ "SklearnKNeighborsRegressor",
+ convert_nearest_neighbors_regressor,
+ options={"optim": [None, "cdist"]},
+)
register_converter(
- 'SklearnKNeighborsTransformer', convert_k_neighbours_transformer,
- options={'optim': [None, 'cdist']})
+ "SklearnRadiusNeighborsRegressor",
+ convert_nearest_neighbors_regressor,
+ options={"optim": [None, "cdist"]},
+)
register_converter(
- 'SklearnNearestNeighbors', convert_nearest_neighbors_transform,
- options={'optim': [None, 'cdist']})
+ "SklearnKNeighborsTransformer",
+ convert_k_neighbours_transformer,
+ options={"optim": [None, "cdist"]},
+)
register_converter(
- 'SklearnKNNImputer', convert_knn_imputer,
- options={'optim': [None, 'cdist']})
+ "SklearnNearestNeighbors",
+ convert_nearest_neighbors_transform,
+ options={"optim": [None, "cdist"]},
+)
register_converter(
- 'SklearnNeighborhoodComponentsAnalysis', convert_nca)
+ "SklearnKNNImputer", convert_knn_imputer, options={"optim": [None, "cdist"]}
+)
+register_converter("SklearnNeighborhoodComponentsAnalysis", convert_nca)
diff --git a/skl2onnx/operator_converters/normaliser.py b/skl2onnx/operator_converters/normaliser.py
index 4dafe01ac..6479acc34 100644
--- a/skl2onnx/operator_converters/normaliser.py
+++ b/skl2onnx/operator_converters/normaliser.py
@@ -9,8 +9,9 @@
from .common import concatenate_variables
-def convert_sklearn_normalizer(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_normalizer(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
if len(operator.inputs) > 1:
# If there are multiple input tensors,
# we combine them using a FeatureVectorizer
@@ -19,17 +20,24 @@ def convert_sklearn_normalizer(scope: Scope, operator: Operator,
# No concatenation is needed, we just use the first variable's name
feature_name = operator.inputs[0].full_name
op = operator.raw_operator
- norm_map = {'max': 'MAX', 'l1': 'L1', 'l2': 'L2'}
+ norm_map = {"max": "MAX", "l1": "L1", "l2": "L2"}
if op.norm in norm_map:
norm = norm_map[op.norm]
else:
- raise RuntimeError("Invalid norm '%s'. You may raise an issue"
- "at https://github.com/onnx/sklearn-onnx/"
- "issues." % op.norm)
- use_float = type(operator.inputs[0].type) not in (DoubleTensorType, )
+ raise RuntimeError(
+ "Invalid norm '%s'. You may raise an issue"
+ "at https://github.com/onnx/sklearn-onnx/"
+ "issues." % op.norm
+ )
+ use_float = type(operator.inputs[0].type) not in (DoubleTensorType,)
apply_normalizer(
- scope, feature_name, operator.outputs[0].full_name, container,
- norm=norm, use_float=use_float)
+ scope,
+ feature_name,
+ operator.outputs[0].full_name,
+ container,
+ norm=norm,
+ use_float=use_float,
+ )
-register_converter('SklearnNormalizer', convert_sklearn_normalizer)
+register_converter("SklearnNormalizer", convert_sklearn_normalizer)
diff --git a/skl2onnx/operator_converters/one_hot_encoder.py b/skl2onnx/operator_converters/one_hot_encoder.py
index 9b4b0f356..815736139 100644
--- a/skl2onnx/operator_converters/one_hot_encoder.py
+++ b/skl2onnx/operator_converters/one_hot_encoder.py
@@ -4,8 +4,11 @@
import numpy as np
from ..common._apply_operation import apply_cast, apply_concat, apply_reshape
from ..common.data_types import (
- Int64TensorType, StringTensorType, Int32TensorType,
- FloatTensorType, DoubleTensorType
+ Int64TensorType,
+ StringTensorType,
+ Int32TensorType,
+ FloatTensorType,
+ DoubleTensorType,
)
from ..common._registration import register_converter
from ..common._topology import Scope, Operator
@@ -13,8 +16,9 @@
from ..proto import onnx_proto
-def convert_sklearn_one_hot_encoder(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_one_hot_encoder(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converts *OneHotEncoder* into ONNX.
It supports multiple inputs of types
@@ -28,74 +32,92 @@ def convert_sklearn_one_hot_encoder(scope: Scope, operator: Operator,
raise RuntimeError(
"Shapes must be known when OneHotEncoder is converted. "
"There are {} inputs with the following number of columns "
- "{}.".format(len(operator.inputs), all_shapes))
+ "{}.".format(len(operator.inputs), all_shapes)
+ )
total = sum(all_shapes)
if total != len(ohe_op.categories_):
raise RuntimeError(
"Mismatch between the number of sets of categories {} and "
"the total number of inputs columns {}.".format(
- len(ohe_op.categories_), total))
+ len(ohe_op.categories_), total
+ )
+ )
enum_cats = []
index_inputs = 0
for index, cats in enumerate(ohe_op.categories_):
- while sum(all_shapes[:index_inputs + 1]) <= index:
+ while sum(all_shapes[: index_inputs + 1]) <= index:
index_inputs += 1
index_in_input = index - sum(all_shapes[:index_inputs])
inp = operator.inputs[index_inputs]
if not isinstance(
- inp.type,
- (Int64TensorType, StringTensorType, Int32TensorType,
- FloatTensorType, DoubleTensorType)):
+ inp.type,
+ (
+ Int64TensorType,
+ StringTensorType,
+ Int32TensorType,
+ FloatTensorType,
+ DoubleTensorType,
+ ),
+ ):
raise NotImplementedError(
"{} input datatype not yet supported. "
"You may raise an issue at "
"https://github.com/onnx/sklearn-onnx/issues"
- "".format(type(inp.type)))
+ "".format(type(inp.type))
+ )
if all_shapes[index_inputs] == 1:
assert index_in_input == 0
afeat = False
else:
afeat = True
- enum_cats.append(
- (afeat, index_in_input, inp.full_name, cats, inp.type))
+ enum_cats.append((afeat, index_in_input, inp.full_name, cats, inp.type))
else:
inp = operator.inputs[0]
- enum_cats = [(True, i, inp.full_name, cats, inp.type)
- for i, cats in enumerate(ohe_op.categories_)]
+ enum_cats = [
+ (True, i, inp.full_name, cats, inp.type)
+ for i, cats in enumerate(ohe_op.categories_)
+ ]
result, categories_len = [], 0
for index, enum_c in enumerate(enum_cats):
afeat, index_in, name, categories, inp_type = enum_c
container.debug(
"[conv.OneHotEncoder] cat %r/%r name=%r type=%r",
- index + 1, len(enum_cats), name, inp_type)
+ index + 1,
+ len(enum_cats),
+ name,
+ inp_type,
+ )
if len(categories) == 0:
continue
if afeat:
- index_name = scope.get_unique_variable_name(
- name + str(index_in))
+ index_name = scope.get_unique_variable_name(name + str(index_in))
container.add_initializer(
- index_name, onnx_proto.TensorProto.INT64, [], [index_in])
- out_name = scope.get_unique_variable_name(
- name + str(index_in))
+ index_name, onnx_proto.TensorProto.INT64, [], [index_in]
+ )
+ out_name = scope.get_unique_variable_name(name + str(index_in))
container.add_node(
- 'ArrayFeatureExtractor', [name, index_name],
- out_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
+ "ArrayFeatureExtractor",
+ [name, index_name],
+ out_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
name = out_name
- attrs = {'name': scope.get_unique_operator_name('OneHotEncoder')}
- attrs['zeros'] = 1 if ohe_op.handle_unknown == 'ignore' else 0
+ attrs = {"name": scope.get_unique_operator_name("OneHotEncoder")}
+ attrs["zeros"] = 1 if ohe_op.handle_unknown == "ignore" else 0
if isinstance(inp_type, (Int64TensorType, Int32TensorType)):
- attrs['cats_int64s'] = categories.astype(np.int64)
+ attrs["cats_int64s"] = categories.astype(np.int64)
elif isinstance(inp_type, StringTensorType):
- attrs['cats_strings'] = np.array(
- [str(s).encode('utf-8') for s in categories])
+ attrs["cats_strings"] = np.array(
+ [str(s).encode("utf-8") for s in categories]
+ )
elif isinstance(inp_type, (FloatTensorType, DoubleTensorType)):
# The converter checks that categories can be casted into
# integers. String is not allowed here.
@@ -105,60 +127,78 @@ def convert_sklearn_one_hot_encoder(scope: Scope, operator: Operator,
ci = int(c)
except TypeError:
raise RuntimeError(
- "Category '{}' cannot be casted into int.".format(c))
+ "Category '{}' cannot be casted into int.".format(c)
+ )
if ci != c:
raise RuntimeError(
"Category %r is not an int64. "
"The converter only supports string and int64 "
- "categories not %r." % (c, type(c)))
- attrs['cats_int64s'] = categories.astype(np.int64)
+ "categories not %r." % (c, type(c))
+ )
+ attrs["cats_int64s"] = categories.astype(np.int64)
else:
raise RuntimeError(
"Input type {} is not supported for OneHotEncoder. "
- "Ideally, it should either be integer or strings.".format(
- inp_type))
+ "Ideally, it should either be integer or strings.".format(inp_type)
+ )
- ohe_output = scope.get_unique_variable_name(name + 'out')
+ ohe_output = scope.get_unique_variable_name(name + "out")
- if 'cats_int64s' in attrs:
+ if "cats_int64s" in attrs:
# Let's cast this input in int64.
- cast_feature = scope.get_unique_variable_name(name + 'cast')
- apply_cast(scope, name, cast_feature, container,
- to=onnx_proto.TensorProto.INT64)
+ cast_feature = scope.get_unique_variable_name(name + "cast")
+ apply_cast(
+ scope, name, cast_feature, container, to=onnx_proto.TensorProto.INT64
+ )
name = cast_feature
- container.add_node('OneHotEncoder', name,
- ohe_output, op_domain='ai.onnx.ml',
- **attrs)
- if hasattr(ohe_op, 'drop_idx_') and ohe_op.drop_idx_ is not None:
- extracted_outputs_name = scope.get_unique_variable_name(
- 'extracted_outputs')
- indices_to_keep_name = scope.get_unique_variable_name(
- 'indices_to_keep')
+ container.add_node(
+ "OneHotEncoder", name, ohe_output, op_domain="ai.onnx.ml", **attrs
+ )
+ if hasattr(ohe_op, "drop_idx_") and ohe_op.drop_idx_ is not None:
+ extracted_outputs_name = scope.get_unique_variable_name("extracted_outputs")
+ indices_to_keep_name = scope.get_unique_variable_name("indices_to_keep")
indices_to_keep = np.delete(
- np.arange(len(categories)), ohe_op.drop_idx_[index])
+ np.arange(len(categories)), ohe_op.drop_idx_[index]
+ )
container.add_initializer(
- indices_to_keep_name, onnx_proto.TensorProto.INT64,
- indices_to_keep.shape, indices_to_keep)
+ indices_to_keep_name,
+ onnx_proto.TensorProto.INT64,
+ indices_to_keep.shape,
+ indices_to_keep,
+ )
container.add_node(
- 'ArrayFeatureExtractor', [ohe_output, indices_to_keep_name],
- extracted_outputs_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
+ "ArrayFeatureExtractor",
+ [ohe_output, indices_to_keep_name],
+ extracted_outputs_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
ohe_output, categories = extracted_outputs_name, indices_to_keep
result.append(ohe_output)
categories_len += len(categories)
- concat_result_name = scope.get_unique_variable_name('concat_result')
+ concat_result_name = scope.get_unique_variable_name("concat_result")
apply_concat(scope, result, concat_result_name, container, axis=2)
reshape_input = concat_result_name
if np.issubdtype(ohe_op.dtype, np.signedinteger):
- reshape_input = scope.get_unique_variable_name('cast')
- apply_cast(scope, concat_result_name, reshape_input,
- container, to=onnx_proto.TensorProto.INT64)
- apply_reshape(scope, reshape_input, operator.output_full_names,
- container, desired_shape=(-1, categories_len))
-
-
-register_converter('SklearnOneHotEncoder', convert_sklearn_one_hot_encoder)
+ reshape_input = scope.get_unique_variable_name("cast")
+ apply_cast(
+ scope,
+ concat_result_name,
+ reshape_input,
+ container,
+ to=onnx_proto.TensorProto.INT64,
+ )
+ apply_reshape(
+ scope,
+ reshape_input,
+ operator.output_full_names,
+ container,
+ desired_shape=(-1, categories_len),
+ )
+
+
+register_converter("SklearnOneHotEncoder", convert_sklearn_one_hot_encoder)
diff --git a/skl2onnx/operator_converters/one_vs_one_classifier.py b/skl2onnx/operator_converters/one_vs_one_classifier.py
index 2c716268b..80dcb3a04 100644
--- a/skl2onnx/operator_converters/one_vs_one_classifier.py
+++ b/skl2onnx/operator_converters/one_vs_one_classifier.py
@@ -10,101 +10,104 @@
from .._supported_operators import sklearn_operator_name_map
-def _iteration_one_versus(scope, container, inputs, i, estimator, cl_type,
- proto_dtype, use_raw_scores=True, prob_shape=None):
+def _iteration_one_versus(
+ scope,
+ container,
+ inputs,
+ i,
+ estimator,
+ cl_type,
+ proto_dtype,
+ use_raw_scores=True,
+ prob_shape=None,
+):
op_type = sklearn_operator_name_map[type(estimator)]
this_operator = scope.declare_local_operator(op_type, raw_model=estimator)
this_operator.inputs = inputs
if is_regressor(estimator):
- score_name = scope.declare_local_variable('score_%d' % i, cl_type())
+ score_name = scope.declare_local_variable("score_%d" % i, cl_type())
this_operator.outputs.append(score_name)
- if hasattr(estimator, 'coef_') and len(estimator.coef_.shape) == 2:
+ if hasattr(estimator, "coef_") and len(estimator.coef_.shape) == 2:
raise RuntimeError(
"OneVsRestClassifier or OneVsOneClassifier accepts "
- "regressor with only one target.")
+ "regressor with only one target."
+ )
p1 = score_name.onnx_name
return None, None, p1
- if container.has_options(estimator, 'raw_scores'):
- options = {'raw_scores': use_raw_scores}
- elif container.has_options(estimator, 'zipmap'):
- options = {'zipmap': False}
+ if container.has_options(estimator, "raw_scores"):
+ options = {"raw_scores": use_raw_scores}
+ elif container.has_options(estimator, "zipmap"):
+ options = {"zipmap": False}
else:
options = None
if options is not None:
container.add_options(id(estimator), options)
scope.add_options(id(estimator), options)
- label_name = scope.declare_local_variable(
- 'label_%d' % i, Int64TensorType())
- prob_name = scope.declare_local_variable(
- 'proba_%d' % i, inputs[0].type.__class__())
+ label_name = scope.declare_local_variable("label_%d" % i, Int64TensorType())
+ prob_name = scope.declare_local_variable("proba_%d" % i, inputs[0].type.__class__())
this_operator.outputs.append(label_name)
this_operator.outputs.append(prob_name)
# gets the label for the class 1
- label = scope.get_unique_variable_name('lab_%d' % i)
- apply_reshape(scope, label_name.onnx_name, label, container,
- desired_shape=(-1, 1))
- cast_label = scope.get_unique_variable_name('cast_lab_%d' % i)
- apply_cast(scope, label, cast_label, container,
- to=proto_dtype)
+ label = scope.get_unique_variable_name("lab_%d" % i)
+ apply_reshape(scope, label_name.onnx_name, label, container, desired_shape=(-1, 1))
+ cast_label = scope.get_unique_variable_name("cast_lab_%d" % i)
+ apply_cast(scope, label, cast_label, container, to=proto_dtype)
# get the probability for the class 1
if prob_shape is None:
# shape to use to reshape score
- cst0 = scope.get_unique_variable_name('cst0')
+ cst0 = scope.get_unique_variable_name("cst0")
container.add_initializer(cst0, onnx_proto.TensorProto.INT64, [1], [0])
- shape = scope.get_unique_variable_name('shape')
- container.add_node('Shape', [inputs[0].full_name], [shape])
- first_dim = scope.get_unique_variable_name('dim')
- container.add_node('Gather', [shape, cst0], [first_dim])
- cst_1 = scope.get_unique_variable_name('cst_1')
- container.add_initializer(
- cst_1, onnx_proto.TensorProto.INT64, [1], [-1])
- prob_shape = scope.get_unique_variable_name('shape')
+ shape = scope.get_unique_variable_name("shape")
+ container.add_node("Shape", [inputs[0].full_name], [shape])
+ first_dim = scope.get_unique_variable_name("dim")
+ container.add_node("Gather", [shape, cst0], [first_dim])
+ cst_1 = scope.get_unique_variable_name("cst_1")
+ container.add_initializer(cst_1, onnx_proto.TensorProto.INT64, [1], [-1])
+ prob_shape = scope.get_unique_variable_name("shape")
apply_concat(scope, [first_dim, cst_1], prob_shape, container, axis=0)
- prob_reshaped = scope.get_unique_variable_name('prob_%d' % i)
- container.add_node('Reshape', [prob_name.onnx_name, prob_shape],
- [prob_reshaped])
+ prob_reshaped = scope.get_unique_variable_name("prob_%d" % i)
+ container.add_node("Reshape", [prob_name.onnx_name, prob_shape], [prob_reshaped])
- cst1 = scope.get_unique_variable_name('cst1')
+ cst1 = scope.get_unique_variable_name("cst1")
container.add_initializer(cst1, onnx_proto.TensorProto.INT64, [1], [1])
- cst2 = scope.get_unique_variable_name('cst2')
+ cst2 = scope.get_unique_variable_name("cst2")
container.add_initializer(cst2, onnx_proto.TensorProto.INT64, [1], [2])
- prob1 = scope.get_unique_variable_name('prob1_%d' % i)
- container.add_node(
- 'Slice', [prob_reshaped, cst1, cst2, cst1], prob1)
+ prob1 = scope.get_unique_variable_name("prob1_%d" % i)
+ container.add_node("Slice", [prob_reshaped, cst1, cst2, cst1], prob1)
return prob_shape, cast_label, prob1
-def convert_one_vs_one_classifier(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
-
+def convert_one_vs_one_classifier(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
proto_dtype = guess_proto_type(operator.inputs[0].type)
if proto_dtype != onnx_proto.TensorProto.DOUBLE:
proto_dtype = onnx_proto.TensorProto.FLOAT
op = operator.raw_operator
# shape to use to reshape score
- cst0 = scope.get_unique_variable_name('cst0')
+ cst0 = scope.get_unique_variable_name("cst0")
container.add_initializer(cst0, onnx_proto.TensorProto.INT64, [1], [0])
- cst1 = scope.get_unique_variable_name('cst1')
+ cst1 = scope.get_unique_variable_name("cst1")
container.add_initializer(cst1, onnx_proto.TensorProto.INT64, [1], [1])
- cst2 = scope.get_unique_variable_name('cst2')
+ cst2 = scope.get_unique_variable_name("cst2")
container.add_initializer(cst2, onnx_proto.TensorProto.INT64, [1], [2])
- shape = scope.get_unique_variable_name('shape')
- container.add_node('Shape', [operator.inputs[0].full_name], [shape])
- first_dim = scope.get_unique_variable_name('dim')
- container.add_node('Gather', [shape, cst0], [first_dim])
- cst_1 = scope.get_unique_variable_name('cst_1')
+ shape = scope.get_unique_variable_name("shape")
+ container.add_node("Shape", [operator.inputs[0].full_name], [shape])
+ first_dim = scope.get_unique_variable_name("dim")
+ container.add_node("Gather", [shape, cst0], [first_dim])
+ cst_1 = scope.get_unique_variable_name("cst_1")
container.add_initializer(cst_1, onnx_proto.TensorProto.INT64, [1], [-1])
- prob_shape = scope.get_unique_variable_name('shape')
+ prob_shape = scope.get_unique_variable_name("shape")
apply_concat(scope, [first_dim, cst_1], prob_shape, container, axis=0)
label_names = []
@@ -113,42 +116,52 @@ def convert_one_vs_one_classifier(scope: Scope, operator: Operator,
cl_type = operator.inputs[0].type.__class__
for i, estimator in enumerate(op.estimators_):
prob_shape, cast_label, prob1 = _iteration_one_versus(
- scope, container, operator.inputs, i, estimator, cl_type,
- proto_dtype, True, prob_shape=prob_shape)
+ scope,
+ container,
+ operator.inputs,
+ i,
+ estimator,
+ cl_type,
+ proto_dtype,
+ True,
+ prob_shape=prob_shape,
+ )
label_names.append(cast_label)
prob_names.append(prob1)
- conc_lab_name = scope.get_unique_variable_name('concat_out_ovo_label')
+ conc_lab_name = scope.get_unique_variable_name("concat_out_ovo_label")
apply_concat(scope, label_names, conc_lab_name, container, axis=1)
- conc_prob_name = scope.get_unique_variable_name('concat_out_ovo_prob')
+ conc_prob_name = scope.get_unique_variable_name("concat_out_ovo_prob")
apply_concat(scope, prob_names, conc_prob_name, container, axis=1)
# calls _ovr_decision_function
- this_operator = scope.declare_local_operator(
- "SklearnOVRDecisionFunction", op)
+ this_operator = scope.declare_local_operator("SklearnOVRDecisionFunction", op)
cl_type = operator.inputs[0].type.__class__
label = scope.declare_local_variable("label", cl_type())
- container.add_node('Identity', [conc_lab_name], [label.onnx_name])
+ container.add_node("Identity", [conc_lab_name], [label.onnx_name])
prob_score = scope.declare_local_variable("prob_score", cl_type())
- container.add_node('Identity', [conc_prob_name], [prob_score.onnx_name])
+ container.add_node("Identity", [conc_prob_name], [prob_score.onnx_name])
this_operator.inputs.append(label)
this_operator.inputs.append(prob_score)
- ovr_name = scope.declare_local_variable('ovr_output', cl_type())
+ ovr_name = scope.declare_local_variable("ovr_output", cl_type())
this_operator.outputs.append(ovr_name)
output_name = operator.outputs[1].full_name
- container.add_node('Identity', [ovr_name.onnx_name], [output_name])
+ container.add_node("Identity", [ovr_name.onnx_name], [output_name])
- container.add_node(
- 'ArgMax', 'ovr_output', operator.outputs[0].full_name, axis=1)
+ container.add_node("ArgMax", "ovr_output", operator.outputs[0].full_name, axis=1)
-register_converter('SklearnOneVsOneClassifier',
- convert_one_vs_one_classifier,
- options={'zipmap': [True, False, 'columns'],
- 'nocl': [True, False],
- 'output_class_labels': [False, True]})
+register_converter(
+ "SklearnOneVsOneClassifier",
+ convert_one_vs_one_classifier,
+ options={
+ "zipmap": [True, False, "columns"],
+ "nocl": [True, False],
+ "output_class_labels": [False, True],
+ },
+)
diff --git a/skl2onnx/operator_converters/one_vs_rest_classifier.py b/skl2onnx/operator_converters/one_vs_rest_classifier.py
index 7a5287cb3..3709460cf 100644
--- a/skl2onnx/operator_converters/one_vs_rest_classifier.py
+++ b/skl2onnx/operator_converters/one_vs_rest_classifier.py
@@ -5,81 +5,114 @@
from sklearn.svm import LinearSVC
from ..proto import onnx_proto
from ..common._apply_operation import (
- apply_concat, apply_identity, apply_mul, apply_reshape)
+ apply_concat,
+ apply_identity,
+ apply_mul,
+ apply_reshape,
+)
from ..common._registration import register_converter
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
from ..common._apply_operation import apply_normalization
from ..common._apply_operation import (
- apply_slice, apply_sub, apply_cast, apply_abs, apply_add, apply_div)
+ apply_slice,
+ apply_sub,
+ apply_cast,
+ apply_abs,
+ apply_add,
+ apply_div,
+)
from ..common.utils_classifier import _finalize_converter_classes
from ..common.data_types import guess_proto_type, Int64TensorType
-from ..algebra.onnx_ops import (
- OnnxReshape, OnnxShape, OnnxSlice, OnnxTile)
+from ..algebra.onnx_ops import OnnxReshape, OnnxShape, OnnxSlice, OnnxTile
from .._supported_operators import sklearn_operator_name_map
-def _iteration_one_versus(scope, container, inputs, i, estimator, cl_type,
- proto_dtype, use_raw_scores=True, prob_shape=None):
+def _iteration_one_versus(
+ scope,
+ container,
+ inputs,
+ i,
+ estimator,
+ cl_type,
+ proto_dtype,
+ use_raw_scores=True,
+ prob_shape=None,
+):
op_type = sklearn_operator_name_map[type(estimator)]
- this_operator = scope.declare_local_operator(
- op_type, raw_model=estimator)
+ this_operator = scope.declare_local_operator(op_type, raw_model=estimator)
this_operator.inputs = inputs
if is_regressor(estimator):
- score_name = scope.declare_local_variable('score_%d' % i, cl_type())
+ score_name = scope.declare_local_variable("score_%d" % i, cl_type())
this_operator.outputs.append(score_name)
- if hasattr(estimator, 'coef_') and len(estimator.coef_.shape) == 2:
+ if hasattr(estimator, "coef_") and len(estimator.coef_.shape) == 2:
raise RuntimeError(
"OneVsRestClassifier or OneVsOneClassifier accepts "
- "regressor with only one target.")
+ "regressor with only one target."
+ )
p1 = score_name.onnx_name
else:
- if container.has_options(estimator, 'raw_scores'):
- container.add_options(
- id(estimator), {'raw_scores': use_raw_scores})
- scope.add_options(
- id(estimator), {'raw_scores': use_raw_scores})
- label_name = scope.declare_local_variable(
- 'label_%d' % i, Int64TensorType())
- prob_name = scope.declare_local_variable('proba_%d' % i, cl_type())
+ if container.has_options(estimator, "raw_scores"):
+ container.add_options(id(estimator), {"raw_scores": use_raw_scores})
+ scope.add_options(id(estimator), {"raw_scores": use_raw_scores})
+ label_name = scope.declare_local_variable("label_%d" % i, Int64TensorType())
+ prob_name = scope.declare_local_variable("proba_%d" % i, cl_type())
this_operator.outputs.append(label_name)
this_operator.outputs.append(prob_name)
# gets the probability for the class 1
- p1 = scope.get_unique_variable_name('probY_%d' % i)
+ p1 = scope.get_unique_variable_name("probY_%d" % i)
if isinstance(estimator, LinearSVC):
apply_identity(scope, prob_name.onnx_name, p1, container)
else:
- apply_slice(scope, prob_name.onnx_name, p1, container, starts=[1],
- ends=[2], axes=[1])
+ apply_slice(
+ scope,
+ prob_name.onnx_name,
+ p1,
+ container,
+ starts=[1],
+ ends=[2],
+ axes=[1],
+ )
return None, None, p1
-def convert_one_vs_rest_classifier(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_one_vs_rest_classifier(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converts a *OneVsRestClassifier* into *ONNX* format.
"""
- if scope.get_options(operator.raw_operator, dict(nocl=False))['nocl']:
+ if scope.get_options(operator.raw_operator, dict(nocl=False))["nocl"]:
raise RuntimeError(
"Option 'nocl' is not implemented for operator '{}'.".format(
- operator.raw_operator.__class__.__name__))
+ operator.raw_operator.__class__.__name__
+ )
+ )
proto_dtype = guess_proto_type(operator.inputs[0].type)
if proto_dtype != onnx_proto.TensorProto.DOUBLE:
proto_dtype = onnx_proto.TensorProto.FLOAT
op = operator.raw_operator
options = container.get_options(op, dict(raw_scores=False))
- use_raw_scores = options['raw_scores']
+ use_raw_scores = options["raw_scores"]
probs_names = []
cl_type = operator.inputs[0].type.__class__
prob_shape = None
for i, estimator in enumerate(op.estimators_):
prob_shape, _, p1 = _iteration_one_versus(
- scope, container, operator.inputs, i, estimator, cl_type,
- proto_dtype, use_raw_scores, prob_shape=prob_shape)
+ scope,
+ container,
+ operator.inputs,
+ i,
+ estimator,
+ cl_type,
+ proto_dtype,
+ use_raw_scores,
+ prob_shape=prob_shape,
+ )
probs_names.append(p1)
if op.multilabel_:
@@ -93,90 +126,122 @@ def convert_one_vs_rest_classifier(scope: Scope, operator: Operator,
# https://github.com/scikit-learn/scikit-learn/sklearn/
# multiclass.py#L290
# Raw score would mean: scores = conc_name.
- thresh_name = scope.get_unique_variable_name('thresh')
+ thresh_name = scope.get_unique_variable_name("thresh")
container.add_initializer(
- thresh_name, proto_dtype,
- [1, len(op.classes_)], [.5] * len(op.classes_))
- scores = scope.get_unique_variable_name('threshed')
+ thresh_name, proto_dtype, [1, len(op.classes_)], [0.5] * len(op.classes_)
+ )
+ scores = scope.get_unique_variable_name("threshed")
apply_sub(scope, [conc_name, thresh_name], scores, container)
# sign
- signed_input = scope.get_unique_variable_name('signed')
- container.add_node('Sign', [scores], [signed_input],
- name=scope.get_unique_operator_name('Sign'))
+ signed_input = scope.get_unique_variable_name("signed")
+ container.add_node(
+ "Sign",
+ [scores],
+ [signed_input],
+ name=scope.get_unique_operator_name("Sign"),
+ )
# clip
- signed_input_cast = scope.get_unique_variable_name('signed_int64')
- apply_cast(scope, signed_input, signed_input_cast,
- container, to=onnx_proto.TensorProto.INT64)
+ signed_input_cast = scope.get_unique_variable_name("signed_int64")
+ apply_cast(
+ scope,
+ signed_input,
+ signed_input_cast,
+ container,
+ to=onnx_proto.TensorProto.INT64,
+ )
- label_name = scope.get_unique_variable_name('label')
+ label_name = scope.get_unique_variable_name("label")
if container.target_opset <= 11:
- abs_name = scope.get_unique_variable_name('abs')
- add_name = scope.get_unique_variable_name('add')
- cst_2 = scope.get_unique_variable_name('cst2')
- container.add_initializer(
- cst_2, onnx_proto.TensorProto.INT64, [1], [2])
+ abs_name = scope.get_unique_variable_name("abs")
+ add_name = scope.get_unique_variable_name("add")
+ cst_2 = scope.get_unique_variable_name("cst2")
+ container.add_initializer(cst_2, onnx_proto.TensorProto.INT64, [1], [2])
apply_abs(scope, [signed_input_cast], [abs_name], container)
- apply_add(scope, [signed_input_cast, abs_name], [add_name],
- container)
- apply_div(
- scope, [add_name, cst_2], [label_name],
- container)
+ apply_add(scope, [signed_input_cast, abs_name], [add_name], container)
+ apply_div(scope, [add_name, cst_2], [label_name], container)
else:
- zero_cst = scope.get_unique_variable_name('zero')
- container.add_initializer(
- zero_cst, onnx_proto.TensorProto.INT64, [], [0])
+ zero_cst = scope.get_unique_variable_name("zero")
+ container.add_initializer(zero_cst, onnx_proto.TensorProto.INT64, [], [0])
container.add_node(
- 'Clip', [signed_input_cast, zero_cst],
+ "Clip",
+ [signed_input_cast, zero_cst],
[label_name],
- name=scope.get_unique_operator_name('Clip'))
- apply_reshape(scope, [label_name], [operator.outputs[0].full_name],
- container, desired_shape=(-1, op.n_classes_))
+ name=scope.get_unique_operator_name("Clip"),
+ )
+ apply_reshape(
+ scope,
+ [label_name],
+ [operator.outputs[0].full_name],
+ container,
+ desired_shape=(-1, op.n_classes_),
+ )
else:
# concatenates outputs
- conc_name = scope.get_unique_variable_name('concatenated')
+ conc_name = scope.get_unique_variable_name("concatenated")
apply_concat(scope, probs_names, conc_name, container, axis=1)
if len(op.estimators_) == 1:
- zeroth_col_name = scope.get_unique_variable_name('zeroth_col')
- merged_prob_name = scope.get_unique_variable_name('merged_prob')
- unit_float_tensor_name = scope.get_unique_variable_name(
- 'unit_float_tensor')
+ zeroth_col_name = scope.get_unique_variable_name("zeroth_col")
+ merged_prob_name = scope.get_unique_variable_name("merged_prob")
+ unit_float_tensor_name = scope.get_unique_variable_name("unit_float_tensor")
if use_raw_scores:
container.add_initializer(
- unit_float_tensor_name, proto_dtype, [], [-1.0])
- apply_mul(scope, [unit_float_tensor_name, conc_name],
- zeroth_col_name, container, broadcast=1)
+ unit_float_tensor_name, proto_dtype, [], [-1.0]
+ )
+ apply_mul(
+ scope,
+ [unit_float_tensor_name, conc_name],
+ zeroth_col_name,
+ container,
+ broadcast=1,
+ )
else:
container.add_initializer(
- unit_float_tensor_name, proto_dtype, [], [1.0])
- apply_sub(scope, [unit_float_tensor_name, conc_name],
- zeroth_col_name, container, broadcast=1)
- apply_concat(scope, [zeroth_col_name, conc_name],
- merged_prob_name, container, axis=1)
+ unit_float_tensor_name, proto_dtype, [], [1.0]
+ )
+ apply_sub(
+ scope,
+ [unit_float_tensor_name, conc_name],
+ zeroth_col_name,
+ container,
+ broadcast=1,
+ )
+ apply_concat(
+ scope, [zeroth_col_name, conc_name], merged_prob_name, container, axis=1
+ )
conc_name = merged_prob_name
if use_raw_scores:
- apply_identity(scope, conc_name,
- operator.outputs[1].full_name, container)
+ apply_identity(scope, conc_name, operator.outputs[1].full_name, container)
else:
# normalizes the outputs
apply_normalization(
- scope, conc_name, operator.outputs[1].full_name,
- container, axis=1, p=1)
+ scope, conc_name, operator.outputs[1].full_name, container, axis=1, p=1
+ )
# extracts the labels
- label_name = scope.get_unique_variable_name('label_name')
- container.add_node('ArgMax', conc_name, label_name,
- name=scope.get_unique_operator_name('ArgMax'),
- axis=1)
+ label_name = scope.get_unique_variable_name("label_name")
+ container.add_node(
+ "ArgMax",
+ conc_name,
+ label_name,
+ name=scope.get_unique_operator_name("ArgMax"),
+ axis=1,
+ )
- _finalize_converter_classes(scope, label_name,
- operator.outputs[0].full_name, container,
- op.classes_, proto_dtype)
+ _finalize_converter_classes(
+ scope,
+ label_name,
+ operator.outputs[0].full_name,
+ container,
+ op.classes_,
+ proto_dtype,
+ )
-def convert_constant_predictor_classifier(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_constant_predictor_classifier(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converts a *_ConstantPredictor* into *ONNX* format.
"""
@@ -185,35 +250,51 @@ def convert_constant_predictor_classifier(scope: Scope, operator: Operator,
if proto_dtype != onnx_proto.TensorProto.DOUBLE:
proto_dtype = onnx_proto.TensorProto.FLOAT
op = operator.raw_operator
- dtype = {onnx_proto.TensorProto.DOUBLE: np.float64,
- onnx_proto.TensorProto.FLOAT: np.float32}
+ dtype = {
+ onnx_proto.TensorProto.DOUBLE: np.float64,
+ onnx_proto.TensorProto.FLOAT: np.float32,
+ }
shape = OnnxShape(operator.inputs[0].full_name, op_version=op_version)
- first = OnnxSlice(shape, np.array([0], dtype=np.int64),
- np.array([1], dtype=np.int64), op_version=op_version)
+ first = OnnxSlice(
+ shape,
+ np.array([0], dtype=np.int64),
+ np.array([1], dtype=np.int64),
+ op_version=op_version,
+ )
y = op.y_.astype(dtype[proto_dtype]).ravel()
- labels = OnnxTile(y.astype(np.int64),
- first, op_version=op_version,
- output_names=[operator.outputs[0].full_name])
+ labels = OnnxTile(
+ y.astype(np.int64),
+ first,
+ op_version=op_version,
+ output_names=[operator.outputs[0].full_name],
+ )
cst = np.hstack([(1 - y).astype(y.dtype), y])
proba_flat = OnnxTile(cst, first, op_version=op_version)
proba_reshape = OnnxReshape(
- proba_flat, np.array([-1, 2], dtype=np.int64),
+ proba_flat,
+ np.array([-1, 2], dtype=np.int64),
output_names=[operator.outputs[1].full_name],
- op_version=op_version)
+ op_version=op_version,
+ )
labels.add_to(scope, container)
proba_reshape.add_to(scope, container)
-register_converter('SklearnOneVsRestClassifier',
- convert_one_vs_rest_classifier,
- options={'zipmap': [True, False, 'columns'],
- 'nocl': [True, False],
- 'output_class_labels': [False, True],
- 'raw_scores': [True, False]})
+register_converter(
+ "SklearnOneVsRestClassifier",
+ convert_one_vs_rest_classifier,
+ options={
+ "zipmap": [True, False, "columns"],
+ "nocl": [True, False],
+ "output_class_labels": [False, True],
+ "raw_scores": [True, False],
+ },
+)
-register_converter('Sklearn_ConstantPredictor',
- convert_constant_predictor_classifier,
- options={'zipmap': [True, False, 'columns'],
- 'nocl': [True, False]})
+register_converter(
+ "Sklearn_ConstantPredictor",
+ convert_constant_predictor_classifier,
+ options={"zipmap": [True, False, "columns"], "nocl": [True, False]},
+)
diff --git a/skl2onnx/operator_converters/ordinal_encoder.py b/skl2onnx/operator_converters/ordinal_encoder.py
index 00d80f056..6eec3c521 100644
--- a/skl2onnx/operator_converters/ordinal_encoder.py
+++ b/skl2onnx/operator_converters/ordinal_encoder.py
@@ -10,27 +10,33 @@
from ..proto import onnx_proto
-def convert_sklearn_ordinal_encoder(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_ordinal_encoder(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
ordinal_op = operator.raw_operator
result = []
concatenated_input_name = operator.inputs[0].full_name
- concat_result_name = scope.get_unique_variable_name('concat_result')
+ concat_result_name = scope.get_unique_variable_name("concat_result")
if len(operator.inputs) > 1:
- concatenated_input_name = scope.get_unique_variable_name(
- 'concatenated_input')
- if all(isinstance(inp.type, type(operator.inputs[0].type))
- for inp in operator.inputs):
+ concatenated_input_name = scope.get_unique_variable_name("concatenated_input")
+ if all(
+ isinstance(inp.type, type(operator.inputs[0].type))
+ for inp in operator.inputs
+ ):
input_names = list(map(lambda x: x.full_name, operator.inputs))
else:
input_names = []
for inp in operator.inputs:
if isinstance(inp.type, Int64TensorType):
- input_names.append(scope.get_unique_variable_name(
- 'cast_input'))
- apply_cast(scope, inp.full_name, input_names[-1],
- container, to=onnx_proto.TensorProto.STRING)
+ input_names.append(scope.get_unique_variable_name("cast_input"))
+ apply_cast(
+ scope,
+ inp.full_name,
+ input_names[-1],
+ container,
+ to=onnx_proto.TensorProto.STRING,
+ )
elif isinstance(inp.type, StringTensorType):
input_names.append(inp.full_name)
else:
@@ -38,57 +44,72 @@ def convert_sklearn_ordinal_encoder(scope: Scope, operator: Operator,
"{} input datatype not yet supported. "
"You may raise an issue at "
"https://github.com/onnx/sklearn-onnx/issues"
- "".format(type(inp.type)))
+ "".format(type(inp.type))
+ )
- apply_concat(scope, input_names,
- concatenated_input_name, container, axis=1)
+ apply_concat(scope, input_names, concatenated_input_name, container, axis=1)
if len(ordinal_op.categories_) == 0:
raise RuntimeError(
- "No categories found in type=%r, encoder=%r." % (
- type(ordinal_op), ordinal_op))
+ "No categories found in type=%r, encoder=%r."
+ % (type(ordinal_op), ordinal_op)
+ )
for index, categories in enumerate(ordinal_op.categories_):
- attrs = {'name': scope.get_unique_operator_name('LabelEncoder')}
+ attrs = {"name": scope.get_unique_operator_name("LabelEncoder")}
if len(categories) > 0:
- if (np.issubdtype(categories.dtype, np.floating) or
- categories.dtype == np.bool_):
- attrs['keys_floats'] = categories
+ if (
+ np.issubdtype(categories.dtype, np.floating)
+ or categories.dtype == np.bool_
+ ):
+ attrs["keys_floats"] = categories
elif np.issubdtype(categories.dtype, np.signedinteger):
- attrs['keys_int64s'] = categories
+ attrs["keys_int64s"] = categories
else:
- attrs['keys_strings'] = np.array(
- [str(s).encode('utf-8') for s in categories])
- attrs['values_int64s'] = np.arange(
- len(categories)).astype(np.int64)
+ attrs["keys_strings"] = np.array(
+ [str(s).encode("utf-8") for s in categories]
+ )
+ attrs["values_int64s"] = np.arange(len(categories)).astype(np.int64)
- index_name = scope.get_unique_variable_name('index')
- feature_column_name = scope.get_unique_variable_name(
- 'feature_column')
- result.append(scope.get_unique_variable_name('ordinal_output'))
- label_encoder_output = scope.get_unique_variable_name(
- 'label_encoder')
+ index_name = scope.get_unique_variable_name("index")
+ feature_column_name = scope.get_unique_variable_name("feature_column")
+ result.append(scope.get_unique_variable_name("ordinal_output"))
+ label_encoder_output = scope.get_unique_variable_name("label_encoder")
container.add_initializer(
- index_name, onnx_proto.TensorProto.INT64, [], [index])
+ index_name, onnx_proto.TensorProto.INT64, [], [index]
+ )
container.add_node(
- 'ArrayFeatureExtractor',
+ "ArrayFeatureExtractor",
[concatenated_input_name, index_name],
- feature_column_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
+ feature_column_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
- container.add_node('LabelEncoder', feature_column_name,
- label_encoder_output, op_domain='ai.onnx.ml',
- op_version=2, **attrs)
- apply_reshape(scope, label_encoder_output, result[-1],
- container, desired_shape=(-1, 1))
- apply_concat(scope, result, concat_result_name,
- container, axis=1)
+ container.add_node(
+ "LabelEncoder",
+ feature_column_name,
+ label_encoder_output,
+ op_domain="ai.onnx.ml",
+ op_version=2,
+ **attrs
+ )
+ apply_reshape(
+ scope,
+ label_encoder_output,
+ result[-1],
+ container,
+ desired_shape=(-1, 1),
+ )
+ apply_concat(scope, result, concat_result_name, container, axis=1)
cast_type = (
onnx_proto.TensorProto.FLOAT
if np.issubdtype(ordinal_op.dtype, np.floating)
- else onnx_proto.TensorProto.INT64)
- apply_cast(scope, concat_result_name, operator.output_full_names,
- container, to=cast_type)
+ else onnx_proto.TensorProto.INT64
+ )
+ apply_cast(
+ scope, concat_result_name, operator.output_full_names, container, to=cast_type
+ )
-register_converter('SklearnOrdinalEncoder', convert_sklearn_ordinal_encoder)
+register_converter("SklearnOrdinalEncoder", convert_sklearn_ordinal_encoder)
diff --git a/skl2onnx/operator_converters/ovr_decision_function.py b/skl2onnx/operator_converters/ovr_decision_function.py
index 11e9ed0fd..a45834598 100644
--- a/skl2onnx/operator_converters/ovr_decision_function.py
+++ b/skl2onnx/operator_converters/ovr_decision_function.py
@@ -1,8 +1,13 @@
# SPDX-License-Identifier: Apache-2.0
from ..common._apply_operation import (
- apply_concat, apply_abs,
- apply_add, apply_mul, apply_div)
+ apply_concat,
+ apply_abs,
+ apply_add,
+ apply_mul,
+ apply_div,
+)
+
try:
from ..common._apply_operation import apply_less
except ImportError:
@@ -16,9 +21,8 @@
def convert_sklearn_ovr_decision_function(
- scope: Scope, operator: Operator,
- container: ModelComponentContainer):
-
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
# Applies _ovr_decision_function.
# See https://github.com/scikit-learn/scikit-learn/blob/
# master/sklearn/utils/multiclass.py#L407:
@@ -44,88 +48,102 @@ def convert_sklearn_ovr_decision_function(
proto_dtype = onnx_proto.TensorProto.FLOAT
op = operator.raw_operator
- cst3 = scope.get_unique_variable_name('cst3')
+ cst3 = scope.get_unique_variable_name("cst3")
container.add_initializer(cst3, proto_dtype, [], [3])
- cst1 = scope.get_unique_variable_name('cst1')
+ cst1 = scope.get_unique_variable_name("cst1")
container.add_initializer(cst1, proto_dtype, [], [1])
iprediction = operator.inputs[0].full_name
score_name = operator.inputs[1].full_name
n_classes = len(op.classes_)
- sumc_name = [scope.get_unique_variable_name('svcsumc_%d' % i)
- for i in range(n_classes)]
- vote_name = [scope.get_unique_variable_name('svcvote_%d' % i)
- for i in range(n_classes)]
+ sumc_name = [
+ scope.get_unique_variable_name("svcsumc_%d" % i) for i in range(n_classes)
+ ]
+ vote_name = [
+ scope.get_unique_variable_name("svcvote_%d" % i) for i in range(n_classes)
+ ]
sumc_add = {n: [] for n in sumc_name}
vote_add = {n: [] for n in vote_name}
k = 0
for i in range(n_classes):
for j in range(i + 1, n_classes):
- ind = scope.get_unique_variable_name('Cind_%d' % k)
- container.add_initializer(
- ind, onnx_proto.TensorProto.INT64, [], [k])
+ ind = scope.get_unique_variable_name("Cind_%d" % k)
+ container.add_initializer(ind, onnx_proto.TensorProto.INT64, [], [k])
# confidences
- ext = scope.get_unique_variable_name('Csvc_%d' % k)
+ ext = scope.get_unique_variable_name("Csvc_%d" % k)
container.add_node(
- 'ArrayFeatureExtractor', [score_name, ind],
- ext, op_domain='ai.onnx.ml')
+ "ArrayFeatureExtractor", [score_name, ind], ext, op_domain="ai.onnx.ml"
+ )
sumc_add[sumc_name[j]].append(ext)
- neg = scope.get_unique_variable_name('Cneg_%d' % k)
- container.add_node('Neg', ext, neg, op_domain='', op_version=6)
+ neg = scope.get_unique_variable_name("Cneg_%d" % k)
+ container.add_node("Neg", ext, neg, op_domain="", op_version=6)
sumc_add[sumc_name[i]].append(neg)
# votes
- ext = scope.get_unique_variable_name('Vsvcv_%d' % k)
+ ext = scope.get_unique_variable_name("Vsvcv_%d" % k)
container.add_node(
- 'ArrayFeatureExtractor', [iprediction, ind],
- ext, op_domain='ai.onnx.ml')
+ "ArrayFeatureExtractor", [iprediction, ind], ext, op_domain="ai.onnx.ml"
+ )
vote_add[vote_name[j]].append(ext)
- neg = scope.get_unique_variable_name('Vnegv_%d' % k)
- container.add_node('Neg', ext, neg, op_domain='', op_version=6)
- neg1 = scope.get_unique_variable_name('Vnegv1_%d' % k)
- apply_add(scope, [neg, cst1], neg1, container, broadcast=1,
- operator_name='AddCl_%d_%d' % (i, j))
+ neg = scope.get_unique_variable_name("Vnegv_%d" % k)
+ container.add_node("Neg", ext, neg, op_domain="", op_version=6)
+ neg1 = scope.get_unique_variable_name("Vnegv1_%d" % k)
+ apply_add(
+ scope,
+ [neg, cst1],
+ neg1,
+ container,
+ broadcast=1,
+ operator_name="AddCl_%d_%d" % (i, j),
+ )
vote_add[vote_name[i]].append(neg1)
# next
k += 1
for k, v in sumc_add.items():
- name = scope.get_unique_operator_name('Sum')
- container.add_node(
- 'Sum', v, k, op_domain='', name=name, op_version=8)
+ name = scope.get_unique_operator_name("Sum")
+ container.add_node("Sum", v, k, op_domain="", name=name, op_version=8)
for k, v in vote_add.items():
- name = scope.get_unique_operator_name('Sum')
- container.add_node(
- 'Sum', v, k, op_domain='', name=name, op_version=8)
+ name = scope.get_unique_operator_name("Sum")
+ container.add_node("Sum", v, k, op_domain="", name=name, op_version=8)
- conc = scope.get_unique_variable_name('Csvcconc')
+ conc = scope.get_unique_variable_name("Csvcconc")
apply_concat(scope, sumc_name, conc, container, axis=1)
- conc_vote = scope.get_unique_variable_name('Vsvcconcv')
+ conc_vote = scope.get_unique_variable_name("Vsvcconcv")
apply_concat(scope, vote_name, conc_vote, container, axis=1)
- conc_abs = scope.get_unique_variable_name('Cabs')
+ conc_abs = scope.get_unique_variable_name("Cabs")
apply_abs(scope, conc, conc_abs, container)
- conc_abs1 = scope.get_unique_variable_name('Cconc_abs1')
- apply_add(scope, [conc_abs, cst1], conc_abs1, container, broadcast=1,
- operator_name='AddF0')
- conc_abs3 = scope.get_unique_variable_name('Cconc_abs3')
+ conc_abs1 = scope.get_unique_variable_name("Cconc_abs1")
+ apply_add(
+ scope,
+ [conc_abs, cst1],
+ conc_abs1,
+ container,
+ broadcast=1,
+ operator_name="AddF0",
+ )
+ conc_abs3 = scope.get_unique_variable_name("Cconc_abs3")
apply_mul(scope, [conc_abs1, cst3], conc_abs3, container, broadcast=1)
- final = scope.get_unique_variable_name('Csvcfinal')
- apply_div(
- scope, [conc, conc_abs3], final, container, broadcast=0)
+ final = scope.get_unique_variable_name("Csvcfinal")
+ apply_div(scope, [conc, conc_abs3], final, container, broadcast=0)
output_name = operator.outputs[0].full_name
apply_add(
- scope, [conc_vote, final], output_name, container, broadcast=0,
- operator_name='AddF1')
+ scope,
+ [conc_vote, final],
+ output_name,
+ container,
+ broadcast=0,
+ operator_name="AddF1",
+ )
-register_converter('SklearnOVRDecisionFunction',
- convert_sklearn_ovr_decision_function)
+register_converter("SklearnOVRDecisionFunction", convert_sklearn_ovr_decision_function)
diff --git a/skl2onnx/operator_converters/pipelines.py b/skl2onnx/operator_converters/pipelines.py
index c7b3ba34d..9c0ded09f 100644
--- a/skl2onnx/operator_converters/pipelines.py
+++ b/skl2onnx/operator_converters/pipelines.py
@@ -7,43 +7,48 @@
from .._parse import _parse_sklearn
-def convert_pipeline(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_pipeline(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
model = operator.raw_operator
inputs = operator.inputs
for step in model.steps:
step_model = step[1]
if is_classifier(step_model):
- scope.add_options(id(step_model), options={'zipmap': False})
- container.add_options(id(step_model), options={'zipmap': False})
- outputs = _parse_sklearn(scope, step_model, inputs,
- custom_parsers=None)
+ scope.add_options(id(step_model), options={"zipmap": False})
+ container.add_options(id(step_model), options={"zipmap": False})
+ outputs = _parse_sklearn(scope, step_model, inputs, custom_parsers=None)
inputs = outputs
if len(outputs) != len(operator.outputs):
raise RuntimeError(
"Mismatch between pipeline output %d and "
- "last step outputs %d." % (
- len(outputs), len(operator.outputs)))
+ "last step outputs %d." % (len(outputs), len(operator.outputs))
+ )
for fr, to in zip(outputs, operator.outputs):
container.add_node(
- 'Identity', fr.full_name, to.full_name,
- name=scope.get_unique_operator_name("Id" + operator.onnx_name))
+ "Identity",
+ fr.full_name,
+ to.full_name,
+ name=scope.get_unique_operator_name("Id" + operator.onnx_name),
+ )
-def convert_feature_union(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_feature_union(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
raise NotImplementedError(
- "This converter not needed so far. It is usually handled "
- "during parsing.")
+ "This converter not needed so far. It is usually handled " "during parsing."
+ )
-def convert_column_transformer(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_column_transformer(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
raise NotImplementedError(
- "This converter not needed so far. It is usually handled "
- "during parsing.")
+ "This converter not needed so far. It is usually handled " "during parsing."
+ )
-register_converter('SklearnPipeline', convert_pipeline)
-register_converter('SklearnFeatureUnion', convert_feature_union)
-register_converter('SklearnColumnTransformer', convert_column_transformer)
+register_converter("SklearnPipeline", convert_pipeline)
+register_converter("SklearnFeatureUnion", convert_feature_union)
+register_converter("SklearnColumnTransformer", convert_column_transformer)
diff --git a/skl2onnx/operator_converters/polynomial_features.py b/skl2onnx/operator_converters/polynomial_features.py
index 640310e20..27b0c9533 100644
--- a/skl2onnx/operator_converters/polynomial_features.py
+++ b/skl2onnx/operator_converters/polynomial_features.py
@@ -10,87 +10,123 @@
from ..common._container import ModelComponentContainer
-def convert_sklearn_polynomial_features(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_polynomial_features(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
op = operator.raw_operator
transformed_columns = [None] * (op.n_output_features_)
- n_features = (op.n_features_in_
- if hasattr(op, 'n_features_in_')
- else op.n_features_)
- if hasattr(op, '_min_degree'):
+ n_features = op.n_features_in_ if hasattr(op, "n_features_in_") else op.n_features_
+ if hasattr(op, "_min_degree"):
# scikit-learn >= 1.0
combinations = op._combinations(
- n_features, op._min_degree, op._max_degree, op.interaction_only,
- op.include_bias)
+ n_features,
+ op._min_degree,
+ op._max_degree,
+ op.interaction_only,
+ op.include_bias,
+ )
else:
combinations = op._combinations(
- n_features, op.degree, op.interaction_only, op.include_bias)
+ n_features, op.degree, op.interaction_only, op.include_bias
+ )
unit_name = None
last_feat = None
for i, comb in enumerate(combinations):
if len(comb) == 0:
- unit_name = scope.get_unique_variable_name('unit')
+ unit_name = scope.get_unique_variable_name("unit")
transformed_columns[i] = unit_name
else:
- comb_name = scope.get_unique_variable_name('comb')
- col_name = scope.get_unique_variable_name('col')
- prod_name = scope.get_unique_variable_name('prod')
+ comb_name = scope.get_unique_variable_name("comb")
+ col_name = scope.get_unique_variable_name("col")
+ prod_name = scope.get_unique_variable_name("prod")
- container.add_initializer(comb_name, onnx_proto.TensorProto.INT64,
- [len(comb)], list(comb))
+ container.add_initializer(
+ comb_name, onnx_proto.TensorProto.INT64, [len(comb)], list(comb)
+ )
container.add_node(
- 'ArrayFeatureExtractor',
- [operator.inputs[0].full_name, comb_name], col_name,
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'),
- op_domain='ai.onnx.ml')
+ "ArrayFeatureExtractor",
+ [operator.inputs[0].full_name, comb_name],
+ col_name,
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ op_domain="ai.onnx.ml",
+ )
reduce_prod_input = col_name
- if (operator.inputs[0].type._get_element_onnx_type()
- == onnx_proto.TensorProto.INT64):
- float_col_name = scope.get_unique_variable_name('col')
+ if (
+ operator.inputs[0].type._get_element_onnx_type()
+ == onnx_proto.TensorProto.INT64
+ ):
+ float_col_name = scope.get_unique_variable_name("col")
- apply_cast(scope, col_name, float_col_name, container,
- to=onnx_proto.TensorProto.FLOAT)
+ apply_cast(
+ scope,
+ col_name,
+ float_col_name,
+ container,
+ to=onnx_proto.TensorProto.FLOAT,
+ )
reduce_prod_input = float_col_name
if container.target_opset >= 18:
- axis_name = scope.get_unique_variable_name('axis')
+ axis_name = scope.get_unique_variable_name("axis")
container.add_initializer(
- axis_name, onnx_proto.TensorProto.INT64, [1], [1])
+ axis_name, onnx_proto.TensorProto.INT64, [1], [1]
+ )
container.add_node(
- 'ReduceProd', [reduce_prod_input, axis_name], prod_name,
- name=scope.get_unique_operator_name('ReduceProd'))
+ "ReduceProd",
+ [reduce_prod_input, axis_name],
+ prod_name,
+ name=scope.get_unique_operator_name("ReduceProd"),
+ )
else:
container.add_node(
- 'ReduceProd', reduce_prod_input, prod_name,
+ "ReduceProd",
+ reduce_prod_input,
+ prod_name,
axes=[1],
- name=scope.get_unique_operator_name('ReduceProd'))
+ name=scope.get_unique_operator_name("ReduceProd"),
+ )
transformed_columns[i] = prod_name
last_feat = prod_name
if unit_name is not None:
- shape_name = scope.get_unique_variable_name('shape')
- container.add_node('Shape', last_feat, shape_name)
- container.add_node('ConstantOfShape', shape_name, unit_name,
- value=make_tensor(
- 'ONE', TensorProto.FLOAT, [1], [1.]),
- op_version=9)
+ shape_name = scope.get_unique_variable_name("shape")
+ container.add_node("Shape", last_feat, shape_name)
+ container.add_node(
+ "ConstantOfShape",
+ shape_name,
+ unit_name,
+ value=make_tensor("ONE", TensorProto.FLOAT, [1], [1.0]),
+ op_version=9,
+ )
- if (operator.inputs[0].type._get_element_onnx_type()
- == onnx_proto.TensorProto.INT64):
- concat_result_name = scope.get_unique_variable_name('concat_result')
+ if operator.inputs[0].type._get_element_onnx_type() == onnx_proto.TensorProto.INT64:
+ concat_result_name = scope.get_unique_variable_name("concat_result")
- apply_concat(scope, [t for t in transformed_columns],
- concat_result_name, container, axis=1)
- apply_cast(scope, concat_result_name,
- operator.outputs[0].full_name, container,
- to=onnx_proto.TensorProto.INT64)
+ apply_concat(
+ scope,
+ [t for t in transformed_columns],
+ concat_result_name,
+ container,
+ axis=1,
+ )
+ apply_cast(
+ scope,
+ concat_result_name,
+ operator.outputs[0].full_name,
+ container,
+ to=onnx_proto.TensorProto.INT64,
+ )
else:
- apply_concat(scope, [t for t in transformed_columns],
- operator.outputs[0].full_name, container, axis=1)
+ apply_concat(
+ scope,
+ [t for t in transformed_columns],
+ operator.outputs[0].full_name,
+ container,
+ axis=1,
+ )
-register_converter('SklearnPolynomialFeatures',
- convert_sklearn_polynomial_features)
+register_converter("SklearnPolynomialFeatures", convert_sklearn_polynomial_features)
diff --git a/skl2onnx/operator_converters/power_transformer.py b/skl2onnx/operator_converters/power_transformer.py
index d2c69baed..461900cbe 100644
--- a/skl2onnx/operator_converters/power_transformer.py
+++ b/skl2onnx/operator_converters/power_transformer.py
@@ -7,13 +7,25 @@
from ..common._container import ModelComponentContainer
from ..common.data_types import guess_numpy_type
from ..algebra.onnx_ops import (
- OnnxAdd, OnnxSub, OnnxPow, OnnxDiv, OnnxMul,
- OnnxCast, OnnxNot, OnnxLess, OnnxLog, OnnxNeg,
- OnnxImputer, OnnxIdentity, OnnxScaler)
-
-
-def convert_powertransformer(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+ OnnxAdd,
+ OnnxSub,
+ OnnxPow,
+ OnnxDiv,
+ OnnxMul,
+ OnnxCast,
+ OnnxNot,
+ OnnxLess,
+ OnnxLog,
+ OnnxNeg,
+ OnnxImputer,
+ OnnxIdentity,
+ OnnxScaler,
+)
+
+
+def convert_powertransformer(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""Converter for PowerTransformer"""
op_in = operator.inputs[0]
op_out = operator.outputs[0].full_name
@@ -30,14 +42,14 @@ def convert_powertransformer(scope: Scope, operator: Operator,
# logical masks for input
less_than_zero = OnnxLess(op_in, zeros_, op_version=opv)
- less_mask = OnnxCast(less_than_zero,
- to=getattr(TensorProto, 'FLOAT'),
- op_version=opv)
+ less_mask = OnnxCast(
+ less_than_zero, to=getattr(TensorProto, "FLOAT"), op_version=opv
+ )
greater_than_zero = OnnxNot(less_than_zero, op_version=opv)
- greater_mask = OnnxCast(greater_than_zero,
- to=getattr(TensorProto, 'FLOAT'),
- op_version=opv)
+ greater_mask = OnnxCast(
+ greater_than_zero, to=getattr(TensorProto, "FLOAT"), op_version=opv
+ )
# logical masks for lambdas
lambda_zero_mask = np.float32(lambdas == 0)
@@ -45,7 +57,7 @@ def convert_powertransformer(scope: Scope, operator: Operator,
lambda_two_mask = np.float32(lambdas == 2)
lambda_nontwo_mask = np.float32(lambdas != 2)
- if 'yeo-johnson' in op.method:
+ if "yeo-johnson" in op.method:
y0 = OnnxAdd(op_in, ones_, op_version=opv) # For positive input
y1 = OnnxSub(ones_, op_in, op_version=opv) # For negative input
@@ -54,31 +66,37 @@ def convert_powertransformer(scope: Scope, operator: Operator,
y_gr0_l_ne0 = OnnxSub(y_gr0_l_ne0, ones_, op_version=opv)
y_gr0_l_ne0 = OnnxDiv(y_gr0_l_ne0, lambdas, op_version=opv)
y_gr0_l_ne0 = OnnxImputer(
- y_gr0_l_ne0, imputed_value_floats=[0.0],
- replaced_value_float=np.inf, op_version=opv)
- y_gr0_l_ne0 = OnnxMul(y_gr0_l_ne0, lambda_nonzero_mask,
- op_version=opv)
+ y_gr0_l_ne0,
+ imputed_value_floats=[0.0],
+ replaced_value_float=np.inf,
+ op_version=opv,
+ )
+ y_gr0_l_ne0 = OnnxMul(y_gr0_l_ne0, lambda_nonzero_mask, op_version=opv)
# positive input, lambda == 0
y_gr0_l_eq0 = OnnxLog(y0, op_version=opv)
- y_gr0_l_eq0 = OnnxMul(y_gr0_l_eq0, lambda_zero_mask,
- op_version=opv)
+ y_gr0_l_eq0 = OnnxMul(y_gr0_l_eq0, lambda_zero_mask, op_version=opv)
# positive input, an arbitrary lambda
y_gr0 = OnnxAdd(y_gr0_l_ne0, y_gr0_l_eq0, op_version=opv)
- y_gr0 = OnnxImputer(y_gr0, imputed_value_floats=[0.0],
- replaced_value_float=np.NAN,
- op_version=opv)
+ y_gr0 = OnnxImputer(
+ y_gr0,
+ imputed_value_floats=[0.0],
+ replaced_value_float=np.NAN,
+ op_version=opv,
+ )
y_gr0 = OnnxMul(y_gr0, greater_mask, op_version=opv)
# negative input, lambda != 2
y_le0_l_ne2 = OnnxPow(y1, 2 - lambdas, op_version=opv)
y_le0_l_ne2 = OnnxSub(ones_, y_le0_l_ne2, op_version=opv)
- y_le0_l_ne2 = OnnxDiv(
- y_le0_l_ne2, (2 - lambdas).astype(dtype), op_version=opv)
+ y_le0_l_ne2 = OnnxDiv(y_le0_l_ne2, (2 - lambdas).astype(dtype), op_version=opv)
y_le0_l_ne2 = OnnxImputer(
- y_le0_l_ne2, imputed_value_floats=[0.0],
- replaced_value_float=np.inf, op_version=opv)
+ y_le0_l_ne2,
+ imputed_value_floats=[0.0],
+ replaced_value_float=np.inf,
+ op_version=opv,
+ )
y_le0_l_ne2 = OnnxMul(y_le0_l_ne2, lambda_nontwo_mask, op_version=opv)
# negative input, lambda == 2
@@ -87,32 +105,38 @@ def convert_powertransformer(scope: Scope, operator: Operator,
# negative input, an arbitrary lambda
y_le0 = OnnxAdd(y_le0_l_ne2, y_le0_l_eq2, op_version=opv)
- y_le0 = OnnxImputer(y_le0, imputed_value_floats=[0.0],
- replaced_value_float=np.NAN,
- op_version=opv)
+ y_le0 = OnnxImputer(
+ y_le0,
+ imputed_value_floats=[0.0],
+ replaced_value_float=np.NAN,
+ op_version=opv,
+ )
y_le0 = OnnxMul(y_le0, less_mask, op_version=opv)
# Arbitrary input and lambda
y = OnnxAdd(y_gr0, y_le0, op_version=opv)
- elif 'box-cox' in op.method:
+ elif "box-cox" in op.method:
# positive input, lambda != 0
y_gr0_l_ne0 = OnnxPow(op_in, lambdas, op_version=opv)
y_gr0_l_ne0 = OnnxSub(y_gr0_l_ne0, ones_, op_version=opv)
y_gr0_l_ne0 = OnnxDiv(y_gr0_l_ne0, lambdas, op_version=opv)
- y_gr0_l_ne0 = OnnxImputer(y_gr0_l_ne0,
- imputed_value_floats=[0.0],
- replaced_value_float=np.inf,
- op_version=opv)
- y_gr0_l_ne0 = OnnxMul(y_gr0_l_ne0, lambda_nonzero_mask,
- op_version=opv)
+ y_gr0_l_ne0 = OnnxImputer(
+ y_gr0_l_ne0,
+ imputed_value_floats=[0.0],
+ replaced_value_float=np.inf,
+ op_version=opv,
+ )
+ y_gr0_l_ne0 = OnnxMul(y_gr0_l_ne0, lambda_nonzero_mask, op_version=opv)
# positive input, lambda == 0
y_gr0_l_eq0 = OnnxLog(op_in, op_version=opv)
- y_gr0_l_eq0 = OnnxImputer(y_gr0_l_eq0,
- imputed_value_floats=[0.0],
- replaced_value_float=np.NAN,
- op_version=opv)
+ y_gr0_l_eq0 = OnnxImputer(
+ y_gr0_l_eq0,
+ imputed_value_floats=[0.0],
+ replaced_value_float=np.NAN,
+ op_version=opv,
+ )
y_gr0_l_eq0 = OnnxMul(y_gr0_l_eq0, lambda_zero_mask, op_version=opv)
# positive input, arbitrary lambda
@@ -123,26 +147,32 @@ def convert_powertransformer(scope: Scope, operator: Operator,
# raises ValueError.
# Therefore we cannot use convert_sklearn() for that model
else:
- raise NotImplementedError(
- 'Method {} is not supported'.format(op.method))
+ raise NotImplementedError("Method {} is not supported".format(op.method))
- y.set_onnx_name_prefix('pref')
+ y.set_onnx_name_prefix("pref")
if op.standardize:
- use_scaler_op = container.is_allowed({'Scaler'})
+ use_scaler_op = container.is_allowed({"Scaler"})
if not use_scaler_op or dtype != np.float32:
sub = OnnxSub(y, op._scaler.mean_.astype(dtype), op_version=opv)
- final = OnnxDiv(sub, op._scaler.scale_.astype(dtype),
- op_version=opv, output_names=[op_out])
+ final = OnnxDiv(
+ sub,
+ op._scaler.scale_.astype(dtype),
+ op_version=opv,
+ output_names=[op_out],
+ )
else:
final = OnnxScaler(
- y, offset=op._scaler.mean_.astype(dtype),
+ y,
+ offset=op._scaler.mean_.astype(dtype),
scale=(1.0 / op._scaler.scale_).astype(dtype),
- op_version=opv, output_names=[op_out])
+ op_version=opv,
+ output_names=[op_out],
+ )
else:
final = OnnxIdentity(y, op_version=opv, output_names=[op_out])
final.add_to(scope, container)
-register_converter('SklearnPowerTransformer', convert_powertransformer)
+register_converter("SklearnPowerTransformer", convert_powertransformer)
diff --git a/skl2onnx/operator_converters/quadratic_discriminant_analysis.py b/skl2onnx/operator_converters/quadratic_discriminant_analysis.py
index 8615cee9c..d68bb9289 100644
--- a/skl2onnx/operator_converters/quadratic_discriminant_analysis.py
+++ b/skl2onnx/operator_converters/quadratic_discriminant_analysis.py
@@ -2,11 +2,22 @@
from ..common._apply_operation import (
- apply_add, apply_argmax, apply_cast, apply_concat, apply_div, apply_exp,
- apply_log, apply_matmul, apply_mul, apply_pow,
- apply_reducesum, apply_reshape, apply_sub, apply_transpose)
-from ..common.data_types import (
- BooleanTensorType, Int64TensorType, guess_proto_type)
+ apply_add,
+ apply_argmax,
+ apply_cast,
+ apply_concat,
+ apply_div,
+ apply_exp,
+ apply_log,
+ apply_matmul,
+ apply_mul,
+ apply_pow,
+ apply_reducesum,
+ apply_reshape,
+ apply_sub,
+ apply_transpose,
+)
+from ..common.data_types import BooleanTensorType, Int64TensorType, guess_proto_type
from ..common._registration import register_converter
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
@@ -14,8 +25,8 @@
def convert_quadratic_discriminant_analysis_classifier(
- scope: Scope, operator: Operator, container: ModelComponentContainer):
-
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
input_name = operator.inputs[0].full_name
model = operator.raw_operator
@@ -25,132 +36,158 @@ def convert_quadratic_discriminant_analysis_classifier(
if proto_dtype != onnx_proto.TensorProto.DOUBLE:
proto_dtype = onnx_proto.TensorProto.FLOAT
- if isinstance(operator.inputs[0].type,
- (BooleanTensorType, Int64TensorType)):
- cast_input_name = scope.get_unique_variable_name('cast_input')
- apply_cast(scope, operator.input_full_names, cast_input_name,
- container, to=proto_dtype)
+ if isinstance(operator.inputs[0].type, (BooleanTensorType, Int64TensorType)):
+ cast_input_name = scope.get_unique_variable_name("cast_input")
+ apply_cast(
+ scope, operator.input_full_names, cast_input_name, container, to=proto_dtype
+ )
input_name = cast_input_name
norm_array_name = []
sum_array_name = []
- const_n05 = scope.get_unique_variable_name('const_n05')
- const_p2 = scope.get_unique_variable_name('const_p2')
+ const_n05 = scope.get_unique_variable_name("const_n05")
+ const_p2 = scope.get_unique_variable_name("const_p2")
container.add_initializer(const_n05, proto_dtype, [], [-0.5])
container.add_initializer(const_p2, proto_dtype, [], [2])
for i in range(n_classes):
R = model.rotations_[i]
- rotation_name = scope.get_unique_variable_name('rotations')
- container.add_initializer(rotation_name, proto_dtype,
- [R.shape[0], R.shape[1]], R)
+ rotation_name = scope.get_unique_variable_name("rotations")
+ container.add_initializer(
+ rotation_name, proto_dtype, [R.shape[0], R.shape[1]], R
+ )
S = model.scalings_[i]
- scaling_name = scope.get_unique_variable_name('scalings')
+ scaling_name = scope.get_unique_variable_name("scalings")
container.add_initializer(
- scaling_name, proto_dtype, [S.shape[0], ], S)
+ scaling_name,
+ proto_dtype,
+ [
+ S.shape[0],
+ ],
+ S,
+ )
mean = model.means_[i]
- mean_name = scope.get_unique_variable_name('means')
+ mean_name = scope.get_unique_variable_name("means")
container.add_initializer(mean_name, proto_dtype, mean.shape, mean)
- Xm_name = scope.get_unique_variable_name('Xm')
+ Xm_name = scope.get_unique_variable_name("Xm")
apply_sub(scope, [input_name, mean_name], [Xm_name], container)
- s_pow_name = scope.get_unique_variable_name('s_pow_n05')
+ s_pow_name = scope.get_unique_variable_name("s_pow_n05")
apply_pow(scope, [scaling_name, const_n05], [s_pow_name], container)
- mul_name = scope.get_unique_variable_name('mul')
+ mul_name = scope.get_unique_variable_name("mul")
apply_mul(scope, [rotation_name, s_pow_name], [mul_name], container)
- x2_name = scope.get_unique_variable_name('matmul')
+ x2_name = scope.get_unique_variable_name("matmul")
apply_matmul(scope, [Xm_name, mul_name], [x2_name], container)
- pow_x2_name = scope.get_unique_variable_name('pow_x2')
+ pow_x2_name = scope.get_unique_variable_name("pow_x2")
apply_pow(scope, [x2_name, const_p2], [pow_x2_name], container)
- sum_name = scope.get_unique_variable_name('sum')
- apply_reducesum(scope, [pow_x2_name], [sum_name],
- container, axes=[1], keepdims=1)
+ sum_name = scope.get_unique_variable_name("sum")
+ apply_reducesum(
+ scope, [pow_x2_name], [sum_name], container, axes=[1], keepdims=1
+ )
norm_array_name.append(sum_name)
- log_name = scope.get_unique_variable_name('log')
+ log_name = scope.get_unique_variable_name("log")
apply_log(scope, [scaling_name], [log_name], container)
- sum_log_name = scope.get_unique_variable_name('sum_log')
- apply_reducesum(
- scope, [log_name], [sum_log_name], container, keepdims=1)
+ sum_log_name = scope.get_unique_variable_name("sum_log")
+ apply_reducesum(scope, [log_name], [sum_log_name], container, keepdims=1)
sum_array_name.append(sum_log_name)
- concat_norm_name = scope.get_unique_variable_name('concat_norm')
+ concat_norm_name = scope.get_unique_variable_name("concat_norm")
apply_concat(scope, norm_array_name, [concat_norm_name], container)
- reshape_norm_name = scope.get_unique_variable_name('reshape_concat_norm')
- apply_reshape(scope, [concat_norm_name], [reshape_norm_name],
- container, desired_shape=[n_classes, -1])
+ reshape_norm_name = scope.get_unique_variable_name("reshape_concat_norm")
+ apply_reshape(
+ scope,
+ [concat_norm_name],
+ [reshape_norm_name],
+ container,
+ desired_shape=[n_classes, -1],
+ )
- transpose_norm_name = scope.get_unique_variable_name('transpose_norm')
- apply_transpose(scope, [reshape_norm_name], [transpose_norm_name],
- container, perm=(1, 0))
+ transpose_norm_name = scope.get_unique_variable_name("transpose_norm")
+ apply_transpose(
+ scope, [reshape_norm_name], [transpose_norm_name], container, perm=(1, 0)
+ )
- concat_logsum = scope.get_unique_variable_name('concat_logsum')
+ concat_logsum = scope.get_unique_variable_name("concat_logsum")
apply_concat(scope, sum_array_name, [concat_logsum], container)
- add_norm2_u_name = scope.get_unique_variable_name('add_norm2_u')
- apply_add(scope, [transpose_norm_name, concat_logsum],
- [add_norm2_u_name], container)
+ add_norm2_u_name = scope.get_unique_variable_name("add_norm2_u")
+ apply_add(
+ scope, [transpose_norm_name, concat_logsum], [add_norm2_u_name], container
+ )
- norm2_u_n05_name = scope.get_unique_variable_name('norm2_u_n05')
- apply_mul(
- scope, [const_n05, add_norm2_u_name], [norm2_u_n05_name], container)
+ norm2_u_n05_name = scope.get_unique_variable_name("norm2_u_n05")
+ apply_mul(scope, [const_n05, add_norm2_u_name], [norm2_u_n05_name], container)
- priors = scope.get_unique_variable_name('priors')
+ priors = scope.get_unique_variable_name("priors")
container.add_initializer(
- priors, proto_dtype, [n_classes, ], model.priors_)
- log_p = scope.get_unique_variable_name('log_p')
+ priors,
+ proto_dtype,
+ [
+ n_classes,
+ ],
+ model.priors_,
+ )
+ log_p = scope.get_unique_variable_name("log_p")
apply_log(scope, [priors], [log_p], container)
- decision_fun = scope.get_unique_variable_name('decision_fun')
+ decision_fun = scope.get_unique_variable_name("decision_fun")
apply_add(scope, [norm2_u_n05_name, log_p], [decision_fun], container)
- argmax_out = scope.get_unique_variable_name('argmax_out')
+ argmax_out = scope.get_unique_variable_name("argmax_out")
apply_argmax(scope, [decision_fun], [argmax_out], container, axis=1)
- classes = scope.get_unique_variable_name('classes')
+ classes = scope.get_unique_variable_name("classes")
container.add_initializer(
- classes, onnx_proto.TensorProto.INT64, [n_classes], model.classes_)
+ classes, onnx_proto.TensorProto.INT64, [n_classes], model.classes_
+ )
container.add_node(
- 'ArrayFeatureExtractor',
+ "ArrayFeatureExtractor",
[classes, argmax_out],
[operator.outputs[0].full_name],
- op_domain='ai.onnx.ml'
+ op_domain="ai.onnx.ml",
)
- df_max = scope.get_unique_variable_name('df_max')
+ df_max = scope.get_unique_variable_name("df_max")
if container.target_opset >= 18:
- axis_name = scope.get_unique_variable_name('axis')
- container.add_initializer(
- axis_name, onnx_proto.TensorProto.INT64, [1], [1])
- container.add_node(
- 'ReduceMax', [decision_fun, axis_name], [df_max])
+ axis_name = scope.get_unique_variable_name("axis")
+ container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [1])
+ container.add_node("ReduceMax", [decision_fun, axis_name], [df_max])
else:
- container.add_node(
- 'ReduceMax', [decision_fun], [df_max], axes=[1])
- df_sub_max = scope.get_unique_variable_name('df_sub_max')
+ container.add_node("ReduceMax", [decision_fun], [df_max], axes=[1])
+ df_sub_max = scope.get_unique_variable_name("df_sub_max")
apply_sub(scope, [decision_fun, df_max], [df_sub_max], container)
- likelihood = scope.get_unique_variable_name('likelihood')
+ likelihood = scope.get_unique_variable_name("likelihood")
apply_exp(scope, [df_sub_max], [likelihood], container)
- likelihood_sum = scope.get_unique_variable_name('likelihood_sum')
- apply_reducesum(scope, [likelihood], [likelihood_sum], container,
- axes=[1], keepdims=1)
- apply_div(scope, [likelihood, likelihood_sum],
- [operator.outputs[1].full_name], container, )
-
-
-register_converter('SklearnQuadraticDiscriminantAnalysis',
- convert_quadratic_discriminant_analysis_classifier,
- options={'zipmap': [True, False, 'columns'],
- 'nocl': [True, False],
- 'output_class_labels': [False, True]})
+ likelihood_sum = scope.get_unique_variable_name("likelihood_sum")
+ apply_reducesum(
+ scope, [likelihood], [likelihood_sum], container, axes=[1], keepdims=1
+ )
+ apply_div(
+ scope,
+ [likelihood, likelihood_sum],
+ [operator.outputs[1].full_name],
+ container,
+ )
+
+
+register_converter(
+ "SklearnQuadraticDiscriminantAnalysis",
+ convert_quadratic_discriminant_analysis_classifier,
+ options={
+ "zipmap": [True, False, "columns"],
+ "nocl": [True, False],
+ "output_class_labels": [False, True],
+ },
+)
diff --git a/skl2onnx/operator_converters/random_forest.py b/skl2onnx/operator_converters/random_forest.py
index 4c873eb0f..d04c22ac4 100644
--- a/skl2onnx/operator_converters/random_forest.py
+++ b/skl2onnx/operator_converters/random_forest.py
@@ -10,19 +10,22 @@
apply_reshape,
apply_transpose,
)
-from ..common.data_types import (
- BooleanTensorType, Int64TensorType, guess_numpy_type)
+from ..common.data_types import BooleanTensorType, Int64TensorType, guess_numpy_type
from ..common._registration import register_converter
from ..common.tree_ensemble import (
add_tree_to_attribute_pairs,
add_tree_to_attribute_pairs_hist_gradient_boosting,
get_default_tree_classifier_attribute_pairs,
- get_default_tree_regressor_attribute_pairs
+ get_default_tree_regressor_attribute_pairs,
)
from ..common.utils_classifier import get_label_classes
from ..proto import onnx_proto
from .decision_tree import (
- predict, _build_labels_path, _build_labels_leaf, _append_decision_output)
+ predict,
+ _build_labels_path,
+ _build_labels_leaf,
+ _append_decision_output,
+)
def _num_estimators(op):
@@ -33,487 +36,655 @@ def _num_estimators(op):
# ...
# classifier.fit(X_tmp, y_tmp)
# classifier.n_estimators += 30
- if hasattr(op, 'estimators_'):
+ if hasattr(op, "estimators_"):
return len(op.estimators_)
- elif hasattr(op, '_predictors'):
+ elif hasattr(op, "_predictors"):
# HistGradientBoosting*
return len(op._predictors)
raise NotImplementedError(
- "Model should have attribute 'estimators_' or '_predictors'.")
+ "Model should have attribute 'estimators_' or '_predictors'."
+ )
def _calculate_labels(scope, container, model, proba):
predictions = []
- transposed_result_name = scope.get_unique_variable_name(
- 'transposed_result')
- apply_transpose(scope, proba, transposed_result_name,
- container, perm=(1, 2, 0))
+ transposed_result_name = scope.get_unique_variable_name("transposed_result")
+ apply_transpose(scope, proba, transposed_result_name, container, perm=(1, 2, 0))
for k in range(model.n_outputs_):
- preds_name = scope.get_unique_variable_name('preds')
- reshaped_preds_name = scope.get_unique_variable_name(
- 'reshaped_preds')
- k_name = scope.get_unique_variable_name('k_column')
- out_k_name = scope.get_unique_variable_name('out_k_column')
- argmax_output_name = scope.get_unique_variable_name(
- 'argmax_output')
- classes_name = scope.get_unique_variable_name('classes')
- reshaped_result_name = scope.get_unique_variable_name(
- 'reshaped_result')
-
- container.add_initializer(
- k_name, onnx_proto.TensorProto.INT64,
- [], [k])
+ preds_name = scope.get_unique_variable_name("preds")
+ reshaped_preds_name = scope.get_unique_variable_name("reshaped_preds")
+ k_name = scope.get_unique_variable_name("k_column")
+ out_k_name = scope.get_unique_variable_name("out_k_column")
+ argmax_output_name = scope.get_unique_variable_name("argmax_output")
+ classes_name = scope.get_unique_variable_name("classes")
+ reshaped_result_name = scope.get_unique_variable_name("reshaped_result")
+
+ container.add_initializer(k_name, onnx_proto.TensorProto.INT64, [], [k])
container.add_initializer(
- classes_name, onnx_proto.TensorProto.INT64,
- model.classes_[k].shape, model.classes_[k])
+ classes_name,
+ onnx_proto.TensorProto.INT64,
+ model.classes_[k].shape,
+ model.classes_[k],
+ )
container.add_node(
- 'ArrayFeatureExtractor', [transposed_result_name, k_name],
- out_k_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
+ "ArrayFeatureExtractor",
+ [transposed_result_name, k_name],
+ out_k_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
container.add_node(
- 'ArgMax', out_k_name, argmax_output_name,
- name=scope.get_unique_operator_name('ArgMax'), axis=1)
- apply_reshape(scope, argmax_output_name, reshaped_result_name,
- container, desired_shape=(1, -1))
+ "ArgMax",
+ out_k_name,
+ argmax_output_name,
+ name=scope.get_unique_operator_name("ArgMax"),
+ axis=1,
+ )
+ apply_reshape(
+ scope,
+ argmax_output_name,
+ reshaped_result_name,
+ container,
+ desired_shape=(1, -1),
+ )
container.add_node(
- 'ArrayFeatureExtractor', [classes_name, reshaped_result_name],
- preds_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
- apply_reshape(scope, preds_name, reshaped_preds_name,
- container, desired_shape=(-1, 1))
+ "ArrayFeatureExtractor",
+ [classes_name, reshaped_result_name],
+ preds_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
+ apply_reshape(
+ scope, preds_name, reshaped_preds_name, container, desired_shape=(-1, 1)
+ )
predictions.append(reshaped_preds_name)
return predictions
def convert_sklearn_random_forest_classifier(
- scope, operator, container, op_type='TreeEnsembleClassifier',
- op_domain='ai.onnx.ml', op_version=1):
+ scope,
+ operator,
+ container,
+ op_type="TreeEnsembleClassifier",
+ op_domain="ai.onnx.ml",
+ op_version=1,
+):
dtype = guess_numpy_type(operator.inputs[0].type)
if dtype != np.float64:
dtype = np.float32
attr_dtype = dtype if op_version >= 3 else np.float32
op = operator.raw_operator
- if hasattr(op, 'n_outputs_'):
+ if hasattr(op, "n_outputs_"):
n_outputs = int(op.n_outputs_)
options = container.get_options(
- op, dict(raw_scores=False, decision_path=False,
- decision_leaf=False))
- elif hasattr(op, 'n_trees_per_iteration_'):
+ op, dict(raw_scores=False, decision_path=False, decision_leaf=False)
+ )
+ elif hasattr(op, "n_trees_per_iteration_"):
# HistGradientBoostingClassifier
n_outputs = op.n_trees_per_iteration_
options = container.get_options(op, dict(raw_scores=False))
else:
raise NotImplementedError(
- "Model should have attribute 'n_outputs_' or "
- "'n_trees_per_iteration_'.")
+ "Model should have attribute 'n_outputs_' or " "'n_trees_per_iteration_'."
+ )
- use_raw_scores = options['raw_scores']
+ use_raw_scores = options["raw_scores"]
- if n_outputs == 1 or hasattr(op, 'loss_') or hasattr(op, '_loss'):
+ if n_outputs == 1 or hasattr(op, "loss_") or hasattr(op, "_loss"):
classes = get_label_classes(scope, op)
if all(isinstance(i, np.ndarray) for i in classes):
classes = np.concatenate(classes)
attr_pairs = get_default_tree_classifier_attribute_pairs()
- attr_pairs['name'] = scope.get_unique_operator_name(op_type)
+ attr_pairs["name"] = scope.get_unique_operator_name(op_type)
if all(isinstance(i, (numbers.Real, bool, np.bool_)) for i in classes):
class_labels = [int(i) for i in classes]
- attr_pairs['classlabels_int64s'] = class_labels
+ attr_pairs["classlabels_int64s"] = class_labels
elif all(isinstance(i, str) for i in classes):
class_labels = [str(i) for i in classes]
- attr_pairs['classlabels_strings'] = class_labels
+ attr_pairs["classlabels_strings"] = class_labels
else:
- raise ValueError(
- 'Only string and integer class labels are allowed.')
+ raise ValueError("Only string and integer class labels are allowed.")
# random forest calculate the final score by averaging over all trees'
# outcomes, so all trees' weights are identical.
- if hasattr(op, 'estimators_'):
+ if hasattr(op, "estimators_"):
estimator_count = len(op.estimators_)
- tree_weight = 1. / estimator_count
- elif hasattr(op, '_predictors'):
+ tree_weight = 1.0 / estimator_count
+ elif hasattr(op, "_predictors"):
# HistGradientBoostingRegressor
estimator_count = len(op._predictors)
- tree_weight = 1.
+ tree_weight = 1.0
else:
raise NotImplementedError(
- "Model should have attribute 'estimators_' or '_predictors'.")
+ "Model should have attribute 'estimators_' or '_predictors'."
+ )
for tree_id in range(estimator_count):
-
- if hasattr(op, 'estimators_'):
+ if hasattr(op, "estimators_"):
tree = op.estimators_[tree_id].tree_
add_tree_to_attribute_pairs(
- attr_pairs, True, tree, tree_id,
- tree_weight, 0, True, True,
- dtype=dtype)
+ attr_pairs,
+ True,
+ tree,
+ tree_id,
+ tree_weight,
+ 0,
+ True,
+ True,
+ dtype=dtype,
+ )
else:
# HistGradientBoostClassifier
if len(op._predictors[tree_id]) == 1:
tree = op._predictors[tree_id][0]
add_tree_to_attribute_pairs_hist_gradient_boosting(
- attr_pairs, True, tree, tree_id, tree_weight, 0,
- False, False, dtype=dtype)
+ attr_pairs,
+ True,
+ tree,
+ tree_id,
+ tree_weight,
+ 0,
+ False,
+ False,
+ dtype=dtype,
+ )
else:
for cl, tree in enumerate(op._predictors[tree_id]):
add_tree_to_attribute_pairs_hist_gradient_boosting(
- attr_pairs, True, tree, tree_id * n_outputs + cl,
- tree_weight, cl, False, False,
- dtype=dtype)
-
- if hasattr(op, '_baseline_prediction'):
+ attr_pairs,
+ True,
+ tree,
+ tree_id * n_outputs + cl,
+ tree_weight,
+ cl,
+ False,
+ False,
+ dtype=dtype,
+ )
+
+ if hasattr(op, "_baseline_prediction"):
if isinstance(op._baseline_prediction, np.ndarray):
- attr_pairs['base_values'] = list(
- op._baseline_prediction.ravel())
+ attr_pairs["base_values"] = list(op._baseline_prediction.ravel())
else:
- attr_pairs['base_values'] = [op._baseline_prediction]
+ attr_pairs["base_values"] = [op._baseline_prediction]
- if hasattr(op, 'loss_'):
+ if hasattr(op, "loss_"):
loss = op.loss_
- elif hasattr(op, '_loss'):
+ elif hasattr(op, "_loss"):
# scikit-learn >= 0.24
loss = op._loss
else:
loss = None
if loss is not None:
if use_raw_scores:
- attr_pairs['post_transform'] = "NONE"
+ attr_pairs["post_transform"] = "NONE"
+ elif loss.__class__.__name__ in ("BinaryCrossEntropy", "HalfBinomialLoss"):
+ attr_pairs["post_transform"] = "LOGISTIC"
elif loss.__class__.__name__ in (
- "BinaryCrossEntropy", "HalfBinomialLoss"):
- attr_pairs['post_transform'] = "LOGISTIC"
- elif loss.__class__.__name__ in (
- "CategoricalCrossEntropy", "HalfMultinomialLoss"):
- attr_pairs['post_transform'] = "SOFTMAX"
+ "CategoricalCrossEntropy",
+ "HalfMultinomialLoss",
+ ):
+ attr_pairs["post_transform"] = "SOFTMAX"
else:
raise NotImplementedError(
"There is no corresponding post_transform for "
- "'{}'.".format(loss.__class__.__name__))
+ "'{}'.".format(loss.__class__.__name__)
+ )
elif use_raw_scores:
raise RuntimeError(
"The converter cannot implement decision_function for "
- "'{}' and loss '{}'.".format(type(op), loss))
+ "'{}' and loss '{}'.".format(type(op), loss)
+ )
input_name = operator.input_full_names
if isinstance(operator.inputs[0].type, BooleanTensorType):
- cast_input_name = scope.get_unique_variable_name('cast_input')
-
- apply_cast(scope, input_name, cast_input_name,
- container, to=onnx_proto.TensorProto.FLOAT)
+ cast_input_name = scope.get_unique_variable_name("cast_input")
+
+ apply_cast(
+ scope,
+ input_name,
+ cast_input_name,
+ container,
+ to=onnx_proto.TensorProto.FLOAT,
+ )
input_name = cast_input_name
if dtype is not None:
for k in attr_pairs:
- if k in ('nodes_values', 'class_weights',
- 'target_weights', 'nodes_hitrates',
- 'base_values'):
- attr_pairs[k] = np.array(
- attr_pairs[k], dtype=attr_dtype).ravel()
+ if k in (
+ "nodes_values",
+ "class_weights",
+ "target_weights",
+ "nodes_hitrates",
+ "base_values",
+ ):
+ attr_pairs[k] = np.array(attr_pairs[k], dtype=attr_dtype).ravel()
container.add_node(
- op_type, input_name,
+ op_type,
+ input_name,
[operator.outputs[0].full_name, operator.outputs[1].full_name],
- op_domain=op_domain, op_version=op_version, **attr_pairs)
-
- if (not options.get('decision_path', False) and
- not options.get('decision_leaf', False)):
+ op_domain=op_domain,
+ op_version=op_version,
+ **attr_pairs
+ )
+
+ if not options.get("decision_path", False) and not options.get(
+ "decision_leaf", False
+ ):
return
# decision_path
tree_paths = []
tree_leaves = []
for i, tree in enumerate(op.estimators_):
-
attrs = get_default_tree_classifier_attribute_pairs()
- attrs['name'] = scope.get_unique_operator_name(
- "%s_%d" % (op_type, i))
- attrs['n_targets'] = int(op.n_outputs_)
+ attrs["name"] = scope.get_unique_operator_name("%s_%d" % (op_type, i))
+ attrs["n_targets"] = int(op.n_outputs_)
add_tree_to_attribute_pairs(
- attrs, True, tree.tree_, 0, 1., 0, False,
- True, dtype=dtype)
-
- attrs['n_targets'] = 1
- attrs['post_transform'] = 'NONE'
- attrs['target_ids'] = [0 for _ in attrs['class_ids']]
- attrs['target_weights'] = [
- float(_) for _ in attrs['class_nodeids']]
- attrs['target_nodeids'] = attrs['class_nodeids']
- attrs['target_treeids'] = attrs['class_treeids']
- rem = [k for k in attrs if k.startswith('class')]
+ attrs, True, tree.tree_, 0, 1.0, 0, False, True, dtype=dtype
+ )
+
+ attrs["n_targets"] = 1
+ attrs["post_transform"] = "NONE"
+ attrs["target_ids"] = [0 for _ in attrs["class_ids"]]
+ attrs["target_weights"] = [float(_) for _ in attrs["class_nodeids"]]
+ attrs["target_nodeids"] = attrs["class_nodeids"]
+ attrs["target_treeids"] = attrs["class_treeids"]
+ rem = [k for k in attrs if k.startswith("class")]
for k in rem:
del attrs[k]
if dtype is not None:
for k in attrs:
- if k in ('nodes_values', 'class_weights',
- 'target_weights', 'nodes_hitrates',
- 'base_values'):
+ if k in (
+ "nodes_values",
+ "class_weights",
+ "target_weights",
+ "nodes_hitrates",
+ "base_values",
+ ):
attrs[k] = np.array(attrs[k], dtype=attr_dtype).ravel()
- if options['decision_path']:
+ if options["decision_path"]:
# decision_path
tree_paths.append(
_append_decision_output(
- input_name, attrs, _build_labels_path, None,
- scope, operator, container,
- op_type=op_type, op_domain=op_domain,
- op_version=op_version, regression=True,
- overwrite_tree=tree.tree_))
- if options['decision_leaf']:
+ input_name,
+ attrs,
+ _build_labels_path,
+ None,
+ scope,
+ operator,
+ container,
+ op_type=op_type,
+ op_domain=op_domain,
+ op_version=op_version,
+ regression=True,
+ overwrite_tree=tree.tree_,
+ )
+ )
+ if options["decision_leaf"]:
# decision_path
tree_leaves.append(
_append_decision_output(
- input_name, attrs, _build_labels_leaf, None,
- scope, operator, container,
- op_type=op_type, op_domain=op_domain,
- op_version=op_version, regression=True,
- cast_encode=True))
+ input_name,
+ attrs,
+ _build_labels_leaf,
+ None,
+ scope,
+ operator,
+ container,
+ op_type=op_type,
+ op_domain=op_domain,
+ op_version=op_version,
+ regression=True,
+ cast_encode=True,
+ )
+ )
# merges everything
n_out = 2
- if options['decision_path']:
+ if options["decision_path"]:
apply_concat(
- scope, tree_paths, operator.outputs[n_out].full_name,
- container, axis=1,
- operator_name=scope.get_unique_operator_name('concat'))
+ scope,
+ tree_paths,
+ operator.outputs[n_out].full_name,
+ container,
+ axis=1,
+ operator_name=scope.get_unique_operator_name("concat"),
+ )
n_out += 1
- if options['decision_leaf']:
+ if options["decision_leaf"]:
# decision_path
apply_concat(
- scope, tree_leaves, operator.outputs[n_out].full_name,
- container, axis=1,
- operator_name=scope.get_unique_operator_name('concat'))
+ scope,
+ tree_leaves,
+ operator.outputs[n_out].full_name,
+ container,
+ axis=1,
+ operator_name=scope.get_unique_operator_name("concat"),
+ )
n_out += 1
else:
if use_raw_scores:
raise RuntimeError(
"The converter cannot implement decision_function for "
- "'{}'.".format(type(op)))
- concatenated_proba_name = scope.get_unique_variable_name(
- 'concatenated_proba')
+ "'{}'.".format(type(op))
+ )
+ concatenated_proba_name = scope.get_unique_variable_name("concatenated_proba")
proba = []
for est in op.estimators_:
reshaped_est_proba_name = scope.get_unique_variable_name(
- 'reshaped_est_proba')
+ "reshaped_est_proba"
+ )
est_proba = predict(
- est, scope, operator, container, op_type, op_domain,
- op_version, is_ensemble=True)
+ est,
+ scope,
+ operator,
+ container,
+ op_type,
+ op_domain,
+ op_version,
+ is_ensemble=True,
+ )
apply_reshape(
- scope, est_proba, reshaped_est_proba_name, container,
- desired_shape=(
- 1, n_outputs, -1, max([len(x) for x in op.classes_])))
+ scope,
+ est_proba,
+ reshaped_est_proba_name,
+ container,
+ desired_shape=(1, n_outputs, -1, max([len(x) for x in op.classes_])),
+ )
proba.append(reshaped_est_proba_name)
- apply_concat(scope, proba, concatenated_proba_name,
- container, axis=0)
+ apply_concat(scope, proba, concatenated_proba_name, container, axis=0)
if container.target_opset >= 18:
- axis_name = scope.get_unique_variable_name('axis')
- container.add_initializer(
- axis_name, onnx_proto.TensorProto.INT64, [1], [0])
+ axis_name = scope.get_unique_variable_name("axis")
+ container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [0])
container.add_node(
- 'ReduceMean', [concatenated_proba_name, axis_name],
+ "ReduceMean",
+ [concatenated_proba_name, axis_name],
operator.outputs[1].full_name,
- name=scope.get_unique_operator_name('ReduceMean'),
- keepdims=0)
+ name=scope.get_unique_operator_name("ReduceMean"),
+ keepdims=0,
+ )
else:
container.add_node(
- 'ReduceMean', concatenated_proba_name,
+ "ReduceMean",
+ concatenated_proba_name,
operator.outputs[1].full_name,
- name=scope.get_unique_operator_name('ReduceMean'),
- axes=[0], keepdims=0)
+ name=scope.get_unique_operator_name("ReduceMean"),
+ axes=[0],
+ keepdims=0,
+ )
predictions = _calculate_labels(
- scope, container, op, operator.outputs[1].full_name)
- apply_concat(scope, predictions, operator.outputs[0].full_name,
- container, axis=1)
+ scope, container, op, operator.outputs[1].full_name
+ )
+ apply_concat(
+ scope, predictions, operator.outputs[0].full_name, container, axis=1
+ )
- if (options.get('decision_path', False) or
- options.get('decision_leaf', False)):
+ if options.get("decision_path", False) or options.get("decision_leaf", False):
raise RuntimeError(
- "Decision output for multi-outputs is not implemented yet.")
+ "Decision output for multi-outputs is not implemented yet."
+ )
def convert_sklearn_random_forest_regressor_converter(
- scope, operator, container, op_type='TreeEnsembleRegressor',
- op_domain='ai.onnx.ml', op_version=1):
+ scope,
+ operator,
+ container,
+ op_type="TreeEnsembleRegressor",
+ op_domain="ai.onnx.ml",
+ op_version=1,
+):
dtype = guess_numpy_type(operator.inputs[0].type)
if dtype != np.float64:
dtype = np.float32
op = operator.raw_operator
attrs = get_default_tree_regressor_attribute_pairs()
- attrs['name'] = scope.get_unique_operator_name(op_type)
+ attrs["name"] = scope.get_unique_operator_name(op_type)
- if hasattr(op, 'n_outputs_'):
- attrs['n_targets'] = int(op.n_outputs_)
- elif hasattr(op, 'n_trees_per_iteration_'):
+ if hasattr(op, "n_outputs_"):
+ attrs["n_targets"] = int(op.n_outputs_)
+ elif hasattr(op, "n_trees_per_iteration_"):
# HistGradientBoostingRegressor
- attrs['n_targets'] = op.n_trees_per_iteration_
+ attrs["n_targets"] = op.n_trees_per_iteration_
else:
raise NotImplementedError(
- "Model should have attribute 'n_outputs_' or "
- "'n_trees_per_iteration_'.")
+ "Model should have attribute 'n_outputs_' or " "'n_trees_per_iteration_'."
+ )
- if hasattr(op, 'estimators_'):
+ if hasattr(op, "estimators_"):
estimator_count = len(op.estimators_)
- tree_weight = 1. / estimator_count
- elif hasattr(op, '_predictors'):
+ tree_weight = 1.0 / estimator_count
+ elif hasattr(op, "_predictors"):
# HistGradientBoostingRegressor
estimator_count = len(op._predictors)
- tree_weight = 1.
+ tree_weight = 1.0
else:
raise NotImplementedError(
- "Model should have attribute 'estimators_' or '_predictors'.")
+ "Model should have attribute 'estimators_' or '_predictors'."
+ )
# random forest calculate the final score by averaging over all trees'
# outcomes, so all trees' weights are identical.
for tree_id in range(estimator_count):
- if hasattr(op, 'estimators_'):
+ if hasattr(op, "estimators_"):
tree = op.estimators_[tree_id].tree_
- add_tree_to_attribute_pairs(attrs, False, tree, tree_id,
- tree_weight, 0, False, True,
- dtype=dtype)
+ add_tree_to_attribute_pairs(
+ attrs, False, tree, tree_id, tree_weight, 0, False, True, dtype=dtype
+ )
else:
# HistGradientBoostingRegressor
if len(op._predictors[tree_id]) != 1:
raise NotImplementedError(
"The converter does not work when the number of trees "
- "is not 1 but {}.".format(len(op._predictors[tree_id])))
+ "is not 1 but {}.".format(len(op._predictors[tree_id]))
+ )
tree = op._predictors[tree_id][0]
add_tree_to_attribute_pairs_hist_gradient_boosting(
- attrs, False, tree, tree_id, tree_weight, 0, False,
- False, dtype=dtype)
+ attrs, False, tree, tree_id, tree_weight, 0, False, False, dtype=dtype
+ )
- if hasattr(op, '_baseline_prediction'):
+ if hasattr(op, "_baseline_prediction"):
if isinstance(op._baseline_prediction, np.ndarray):
- attrs['base_values'] = list(op._baseline_prediction)
+ attrs["base_values"] = list(op._baseline_prediction)
else:
- attrs['base_values'] = [op._baseline_prediction]
+ attrs["base_values"] = [op._baseline_prediction]
input_name = operator.input_full_names
if type(operator.inputs[0].type) in (BooleanTensorType, Int64TensorType):
- cast_input_name = scope.get_unique_variable_name('cast_input')
-
- apply_cast(scope, operator.input_full_names, cast_input_name,
- container, to=onnx_proto.TensorProto.FLOAT)
+ cast_input_name = scope.get_unique_variable_name("cast_input")
+
+ apply_cast(
+ scope,
+ operator.input_full_names,
+ cast_input_name,
+ container,
+ to=onnx_proto.TensorProto.FLOAT,
+ )
input_name = cast_input_name
if dtype is not None:
for k in attrs:
- if k in ('nodes_values', 'class_weights',
- 'target_weights', 'nodes_hitrates',
- 'base_values'):
+ if k in (
+ "nodes_values",
+ "class_weights",
+ "target_weights",
+ "nodes_hitrates",
+ "base_values",
+ ):
attrs[k] = np.array(attrs[k], dtype=dtype).ravel()
container.add_node(
- op_type, input_name,
- operator.outputs[0].full_name, op_domain=op_domain,
- op_version=op_version, **attrs)
-
- if hasattr(op, 'n_trees_per_iteration_'):
+ op_type,
+ input_name,
+ operator.outputs[0].full_name,
+ op_domain=op_domain,
+ op_version=op_version,
+ **attrs
+ )
+
+ if hasattr(op, "n_trees_per_iteration_"):
# HistGradientBoostingRegressor does not implement decision_path.
return
if isinstance(op, RandomTreesEmbedding):
options = scope.get_options(op)
else:
- options = scope.get_options(
- op, dict(decision_path=False, decision_leaf=False))
+ options = scope.get_options(op, dict(decision_path=False, decision_leaf=False))
- if (not options.get('decision_path', False) and
- not options.get('decision_leaf', False)):
+ if not options.get("decision_path", False) and not options.get(
+ "decision_leaf", False
+ ):
return
# decision_path
tree_paths = []
tree_leaves = []
for i, tree in enumerate(op.estimators_):
-
attrs = get_default_tree_regressor_attribute_pairs()
- attrs['name'] = scope.get_unique_operator_name("%s_%d" % (op_type, i))
- attrs['n_targets'] = int(op.n_outputs_)
- add_tree_to_attribute_pairs(attrs, False, tree.tree_, 0, 1., 0, False,
- True, dtype=dtype)
+ attrs["name"] = scope.get_unique_operator_name("%s_%d" % (op_type, i))
+ attrs["n_targets"] = int(op.n_outputs_)
+ add_tree_to_attribute_pairs(
+ attrs, False, tree.tree_, 0, 1.0, 0, False, True, dtype=dtype
+ )
- attrs['n_targets'] = 1
- attrs['post_transform'] = 'NONE'
- attrs['target_ids'] = [0 for _ in attrs['target_ids']]
- attrs['target_weights'] = [float(_) for _ in attrs['target_nodeids']]
+ attrs["n_targets"] = 1
+ attrs["post_transform"] = "NONE"
+ attrs["target_ids"] = [0 for _ in attrs["target_ids"]]
+ attrs["target_weights"] = [float(_) for _ in attrs["target_nodeids"]]
if dtype is not None:
for k in attrs:
- if k in ('nodes_values', 'class_weights',
- 'target_weights', 'nodes_hitrates',
- 'base_values'):
+ if k in (
+ "nodes_values",
+ "class_weights",
+ "target_weights",
+ "nodes_hitrates",
+ "base_values",
+ ):
attrs[k] = np.array(attrs[k], dtype=dtype).ravel()
- if options.get('decision_path', False):
+ if options.get("decision_path", False):
# decision_path
tree_paths.append(
_append_decision_output(
- input_name, attrs, _build_labels_path, None,
- scope, operator, container,
- op_type=op_type, op_domain=op_domain,
- op_version=op_version, regression=True,
- overwrite_tree=tree.tree_))
- if options.get('decision_leaf', False):
+ input_name,
+ attrs,
+ _build_labels_path,
+ None,
+ scope,
+ operator,
+ container,
+ op_type=op_type,
+ op_domain=op_domain,
+ op_version=op_version,
+ regression=True,
+ overwrite_tree=tree.tree_,
+ )
+ )
+ if options.get("decision_leaf", False):
# decision_path
tree_leaves.append(
_append_decision_output(
- input_name, attrs, _build_labels_leaf, None,
- scope, operator, container,
- op_type=op_type, op_domain=op_domain,
- op_version=op_version, regression=True, cast_encode=True))
+ input_name,
+ attrs,
+ _build_labels_leaf,
+ None,
+ scope,
+ operator,
+ container,
+ op_type=op_type,
+ op_domain=op_domain,
+ op_version=op_version,
+ regression=True,
+ cast_encode=True,
+ )
+ )
# merges everything
n_out = 1
- if options.get('decision_path', False):
+ if options.get("decision_path", False):
apply_concat(
- scope, tree_paths, operator.outputs[n_out].full_name, container,
- axis=1, operator_name=scope.get_unique_operator_name('concat'))
+ scope,
+ tree_paths,
+ operator.outputs[n_out].full_name,
+ container,
+ axis=1,
+ operator_name=scope.get_unique_operator_name("concat"),
+ )
n_out += 1
- if options.get('decision_leaf', False):
+ if options.get("decision_leaf", False):
# decision_path
apply_concat(
- scope, tree_leaves, operator.outputs[n_out].full_name, container,
- axis=1, operator_name=scope.get_unique_operator_name('concat'))
+ scope,
+ tree_leaves,
+ operator.outputs[n_out].full_name,
+ container,
+ axis=1,
+ operator_name=scope.get_unique_operator_name("concat"),
+ )
n_out += 1
-register_converter('SklearnRandomForestClassifier',
- convert_sklearn_random_forest_classifier,
- options={'zipmap': [True, False, 'columns'],
- 'raw_scores': [True, False],
- 'nocl': [True, False],
- 'output_class_labels': [False, True],
- 'decision_path': [True, False],
- 'decision_leaf': [True, False]})
-register_converter('SklearnRandomForestRegressor',
- convert_sklearn_random_forest_regressor_converter,
- options={'decision_path': [True, False],
- 'decision_leaf': [True, False]})
-register_converter('SklearnExtraTreesClassifier',
- convert_sklearn_random_forest_classifier,
- options={'zipmap': [True, False, 'columns'],
- 'raw_scores': [True, False],
- 'nocl': [True, False],
- 'output_class_labels': [False, True],
- 'decision_path': [True, False],
- 'decision_leaf': [True, False]})
-register_converter('SklearnExtraTreesRegressor',
- convert_sklearn_random_forest_regressor_converter,
- options={'decision_path': [True, False],
- 'decision_leaf': [True, False]})
-register_converter('SklearnHistGradientBoostingClassifier',
- convert_sklearn_random_forest_classifier,
- options={'zipmap': [True, False, 'columns'],
- 'raw_scores': [True, False],
- 'output_class_labels': [False, True],
- 'nocl': [True, False]})
-register_converter('SklearnHistGradientBoostingRegressor',
- convert_sklearn_random_forest_regressor_converter,
- options={'zipmap': [True, False, 'columns'],
- 'raw_scores': [True, False],
- 'output_class_labels': [False, True],
- 'nocl': [True, False]})
+register_converter(
+ "SklearnRandomForestClassifier",
+ convert_sklearn_random_forest_classifier,
+ options={
+ "zipmap": [True, False, "columns"],
+ "raw_scores": [True, False],
+ "nocl": [True, False],
+ "output_class_labels": [False, True],
+ "decision_path": [True, False],
+ "decision_leaf": [True, False],
+ },
+)
+register_converter(
+ "SklearnRandomForestRegressor",
+ convert_sklearn_random_forest_regressor_converter,
+ options={"decision_path": [True, False], "decision_leaf": [True, False]},
+)
+register_converter(
+ "SklearnExtraTreesClassifier",
+ convert_sklearn_random_forest_classifier,
+ options={
+ "zipmap": [True, False, "columns"],
+ "raw_scores": [True, False],
+ "nocl": [True, False],
+ "output_class_labels": [False, True],
+ "decision_path": [True, False],
+ "decision_leaf": [True, False],
+ },
+)
+register_converter(
+ "SklearnExtraTreesRegressor",
+ convert_sklearn_random_forest_regressor_converter,
+ options={"decision_path": [True, False], "decision_leaf": [True, False]},
+)
+register_converter(
+ "SklearnHistGradientBoostingClassifier",
+ convert_sklearn_random_forest_classifier,
+ options={
+ "zipmap": [True, False, "columns"],
+ "raw_scores": [True, False],
+ "output_class_labels": [False, True],
+ "nocl": [True, False],
+ },
+)
+register_converter(
+ "SklearnHistGradientBoostingRegressor",
+ convert_sklearn_random_forest_regressor_converter,
+ options={
+ "zipmap": [True, False, "columns"],
+ "raw_scores": [True, False],
+ "output_class_labels": [False, True],
+ "nocl": [True, False],
+ },
+)
diff --git a/skl2onnx/operator_converters/random_projection.py b/skl2onnx/operator_converters/random_projection.py
index e9f4a2933..1f1db77f9 100644
--- a/skl2onnx/operator_converters/random_projection.py
+++ b/skl2onnx/operator_converters/random_projection.py
@@ -8,8 +8,9 @@
from ..algebra.onnx_ops import OnnxMatMul
-def convert_random_projection(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_random_projection(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""Converter for PowerTransformer"""
op_in = operator.inputs[0]
op_out = operator.outputs[0].full_name
@@ -19,10 +20,10 @@ def convert_random_projection(scope: Scope, operator: Operator,
if dtype != np.float64:
dtype = np.float32
- y = OnnxMatMul(op_in, op.components_.T.astype(dtype),
- op_version=opv, output_names=[op_out])
+ y = OnnxMatMul(
+ op_in, op.components_.T.astype(dtype), op_version=opv, output_names=[op_out]
+ )
y.add_to(scope, container)
-register_converter(
- 'SklearnGaussianRandomProjection', convert_random_projection)
+register_converter("SklearnGaussianRandomProjection", convert_random_projection)
diff --git a/skl2onnx/operator_converters/random_trees_embedding.py b/skl2onnx/operator_converters/random_trees_embedding.py
index e082e014e..6e3305d63 100644
--- a/skl2onnx/operator_converters/random_trees_embedding.py
+++ b/skl2onnx/operator_converters/random_trees_embedding.py
@@ -9,8 +9,8 @@
def convert_sklearn_random_tree_embedding(
- scope: Scope, operator: Operator, container: ModelComponentContainer):
-
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
X = operator.inputs[0]
out = operator.outputs
op = operator.raw_operator
@@ -18,21 +18,21 @@ def convert_sklearn_random_tree_embedding(
if op.sparse_output:
raise RuntimeError(
- "The converter cannot convert the model with sparse outputs.")
+ "The converter cannot convert the model with sparse outputs."
+ )
outputs = []
for est in op.estimators_:
- leave = OnnxSubEstimator(est, X, op_version=opv,
- options={'decision_leaf': True})
- outputs.append(OnnxReshape(leave[1],
- np.array([-1, 1], dtype=np.int64),
- op_version=opv))
+ leave = OnnxSubEstimator(
+ est, X, op_version=opv, options={"decision_leaf": True}
+ )
+ outputs.append(
+ OnnxReshape(leave[1], np.array([-1, 1], dtype=np.int64), op_version=opv)
+ )
merged = OnnxConcat(*outputs, axis=1, op_version=opv)
ohe = OnnxSubEstimator(op.one_hot_encoder_, merged, op_version=opv)
- y = OnnxIdentity(ohe, op_version=opv,
- output_names=out)
+ y = OnnxIdentity(ohe, op_version=opv, output_names=out)
y.add_to(scope, container)
-register_converter('SklearnRandomTreesEmbedding',
- convert_sklearn_random_tree_embedding)
+register_converter("SklearnRandomTreesEmbedding", convert_sklearn_random_tree_embedding)
diff --git a/skl2onnx/operator_converters/ransac_regressor.py b/skl2onnx/operator_converters/ransac_regressor.py
index 096ac7f02..48ab2ab5a 100644
--- a/skl2onnx/operator_converters/ransac_regressor.py
+++ b/skl2onnx/operator_converters/ransac_regressor.py
@@ -8,8 +8,9 @@
from ..common._container import ModelComponentContainer
-def convert_sklearn_ransac_regressor(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_ransac_regressor(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converter for RANSACRegressor.
"""
@@ -18,11 +19,12 @@ def convert_sklearn_ransac_regressor(scope: Scope, operator: Operator,
this_operator = scope.declare_local_operator(op_type, ransac_op.estimator_)
this_operator.inputs = operator.inputs
label_name = scope.declare_local_variable(
- 'label', operator.inputs[0].type.__class__())
+ "label", operator.inputs[0].type.__class__()
+ )
this_operator.outputs.append(label_name)
- apply_identity(scope, label_name.full_name,
- operator.outputs[0].full_name, container)
+ apply_identity(
+ scope, label_name.full_name, operator.outputs[0].full_name, container
+ )
-register_converter('SklearnRANSACRegressor',
- convert_sklearn_ransac_regressor)
+register_converter("SklearnRANSACRegressor", convert_sklearn_ransac_regressor)
diff --git a/skl2onnx/operator_converters/replace_op.py b/skl2onnx/operator_converters/replace_op.py
index 7b654e0ff..951b04c06 100644
--- a/skl2onnx/operator_converters/replace_op.py
+++ b/skl2onnx/operator_converters/replace_op.py
@@ -7,29 +7,34 @@
from ..common.data_types import guess_proto_type
-def convert_sklearn_replace_transformer(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_replace_transformer(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
op = operator.raw_operator
input_name = operator.inputs[0].full_name
output_name = operator.outputs[0].full_name
proto_dtype = guess_proto_type(operator.inputs[0].type)
- cst_nan_name = scope.get_unique_variable_name('nan_name')
+ cst_nan_name = scope.get_unique_variable_name("nan_name")
container.add_initializer(cst_nan_name, proto_dtype, [1], [op.to_value])
- cst_zero_name = scope.get_unique_variable_name('zero_name')
- container.add_initializer(
- cst_zero_name, proto_dtype, [1], [op.from_value])
-
- mask_name = scope.get_unique_variable_name('mask_name')
- container.add_node('Equal', [input_name, cst_zero_name],
- mask_name,
- name=scope.get_unique_operator_name('Equal'))
-
- container.add_node('Where', [mask_name, cst_nan_name, input_name],
- output_name,
- name=scope.get_unique_operator_name('Where'))
-
-
-register_converter(
- 'SklearnReplaceTransformer', convert_sklearn_replace_transformer)
+ cst_zero_name = scope.get_unique_variable_name("zero_name")
+ container.add_initializer(cst_zero_name, proto_dtype, [1], [op.from_value])
+
+ mask_name = scope.get_unique_variable_name("mask_name")
+ container.add_node(
+ "Equal",
+ [input_name, cst_zero_name],
+ mask_name,
+ name=scope.get_unique_operator_name("Equal"),
+ )
+
+ container.add_node(
+ "Where",
+ [mask_name, cst_nan_name, input_name],
+ output_name,
+ name=scope.get_unique_operator_name("Where"),
+ )
+
+
+register_converter("SklearnReplaceTransformer", convert_sklearn_replace_transformer)
diff --git a/skl2onnx/operator_converters/scaler_op.py b/skl2onnx/operator_converters/scaler_op.py
index f219f35d9..14d1b7e4b 100644
--- a/skl2onnx/operator_converters/scaler_op.py
+++ b/skl2onnx/operator_converters/scaler_op.py
@@ -4,8 +4,7 @@
import numpy as np
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import RobustScaler, StandardScaler
-from ..algebra.onnx_ops import (
- OnnxSub, OnnxDiv, OnnxCast, OnnxMul, OnnxClip, OnnxAdd)
+from ..algebra.onnx_ops import OnnxSub, OnnxDiv, OnnxCast, OnnxMul, OnnxClip, OnnxAdd
from ..common._registration import register_converter
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
@@ -14,8 +13,9 @@
from .common import concatenate_variables
-def convert_sklearn_scaler(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_scaler(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
# If there are multiple input variables, we need to combine them as a
# whole tensor. Integer(s) would be converted to float(s).
# Options div use true division instead of Scaler operator
@@ -28,8 +28,8 @@ def convert_sklearn_scaler(scope: Scope, operator: Operator,
C = operator.outputs[0].get_second_dimension()
op = operator.raw_operator
- op_type = 'Scaler'
- attrs = {'name': scope.get_unique_operator_name(op_type)}
+ op_type = "Scaler"
+ attrs = {"name": scope.get_unique_operator_name(op_type)}
if isinstance(op, StandardScaler):
model_C = None
@@ -41,22 +41,20 @@ def convert_sklearn_scaler(scope: Scope, operator: Operator,
model_C = op.var_.shape[0]
if model_C is None:
# Identity
- container.add_node(
- 'Identity', feature_name,
- operator.outputs[0].full_name)
+ container.add_node("Identity", feature_name, operator.outputs[0].full_name)
return
if C is not None and C != model_C:
raise RuntimeError(
"Unable Mismatch between expected shape %r and model (., %r)"
- " in operator %r." % (
- operator.outputs[0].type.shape, model_C, operator))
+ " in operator %r." % (operator.outputs[0].type.shape, model_C, operator)
+ )
C = model_C
- attrs['offset'] = (
- op.mean_ if op.with_mean else
- np.array([0.0] * C, dtype=np.float32))
- attrs['scale'] = (
- 1.0 / op.scale_ if op.with_std else
- np.array([1.0] * C, dtype=np.float32))
+ attrs["offset"] = (
+ op.mean_ if op.with_mean else np.array([0.0] * C, dtype=np.float32)
+ )
+ attrs["scale"] = (
+ 1.0 / op.scale_ if op.with_std else np.array([1.0] * C, dtype=np.float32)
+ )
inv_scale = op.scale_ if op.with_std else None
elif isinstance(op, RobustScaler):
model_C = None
@@ -66,22 +64,22 @@ def convert_sklearn_scaler(scope: Scope, operator: Operator,
model_C = op.scale_.shape[0]
if model_C is None:
# Identity
- container.add_node(
- 'Identity', feature_name,
- operator.outputs[0].full_name)
+ container.add_node("Identity", feature_name, operator.outputs[0].full_name)
return
if C is not None and C != model_C:
raise RuntimeError(
"Unable Mismatch between expected shape %r and model (., %r)"
- " in operator %r." % (
- operator.outputs[0].type.shape, model_C, operator))
+ " in operator %r." % (operator.outputs[0].type.shape, model_C, operator)
+ )
C = model_C
- attrs['offset'] = (
- op.center_ if op.with_centering else
- np.array([0.0] * C, dtype=np.float32))
- attrs['scale'] = (
- 1.0 / op.scale_ if op.with_scaling else
- np.array([1.0] * C, dtype=np.float32))
+ attrs["offset"] = (
+ op.center_ if op.with_centering else np.array([0.0] * C, dtype=np.float32)
+ )
+ attrs["scale"] = (
+ 1.0 / op.scale_
+ if op.with_scaling
+ else np.array([1.0] * C, dtype=np.float32)
+ )
inv_scale = op.scale_ if op.with_scaling else None
elif isinstance(op, MaxAbsScaler):
model_C = None
@@ -91,25 +89,25 @@ def convert_sklearn_scaler(scope: Scope, operator: Operator,
model_C = op.scale_.shape[0]
if model_C is None:
# Identity
- container.add_node(
- 'Identity', feature_name,
- operator.outputs[0].full_name)
+ container.add_node("Identity", feature_name, operator.outputs[0].full_name)
return
if C is not None and C != model_C:
raise RuntimeError(
"Unable Mismatch between expected shape %r and model (., %r)"
- " in operator %r." % (
- operator.outputs[0].type.shape, model_C, operator))
+ " in operator %r." % (operator.outputs[0].type.shape, model_C, operator)
+ )
C = model_C
- attrs['scale'] = 1.0 / op.scale_
- attrs['offset'] = np.array([0.] * C, dtype=np.float32)
+ attrs["scale"] = 1.0 / op.scale_
+ attrs["offset"] = np.array([0.0] * C, dtype=np.float32)
inv_scale = op.scale_
else:
- raise ValueError('Only scikit-learn StandardScaler and RobustScaler '
- 'are supported but got %s. You may raise '
- 'an issue at '
- 'https://github.com/onnx/sklearn-onnx/issues.'
- '' % type(op))
+ raise ValueError(
+ "Only scikit-learn StandardScaler and RobustScaler "
+ "are supported but got %s. You may raise "
+ "an issue at "
+ "https://github.com/onnx/sklearn-onnx/issues."
+ "" % type(op)
+ )
proto_dtype = guess_proto_type(operator.inputs[0].type)
if proto_dtype != onnx_proto.TensorProto.DOUBLE:
@@ -123,65 +121,77 @@ def convert_sklearn_scaler(scope: Scope, operator: Operator,
if isinstance(v, np.ndarray) and v.dtype != dtype:
attrs[k] = v.astype(dtype)
- use_scaler_op = container.is_allowed({'Scaler'})
+ use_scaler_op = container.is_allowed({"Scaler"})
if not use_scaler_op or dtype == np.float64:
opv = container.target_opset
if inv_scale is None:
sub = OnnxSub(
- feature_name, attrs['offset'].astype(dtype),
+ feature_name,
+ attrs["offset"].astype(dtype),
op_version=opv,
- output_names=[operator.outputs[0].full_name])
+ output_names=[operator.outputs[0].full_name],
+ )
sub.add_to(scope, container)
else:
- sub = OnnxSub(
- feature_name, attrs['offset'].astype(dtype),
- op_version=opv)
- div = OnnxDiv(sub, inv_scale.astype(dtype),
- op_version=opv,
- output_names=[operator.outputs[0].full_name])
+ sub = OnnxSub(feature_name, attrs["offset"].astype(dtype), op_version=opv)
+ div = OnnxDiv(
+ sub,
+ inv_scale.astype(dtype),
+ op_version=opv,
+ output_names=[operator.outputs[0].full_name],
+ )
div.add_to(scope, container)
return
if inv_scale is not None:
- options = container.get_options(op, dict(div='std'))
- div = options['div']
- if div == 'div':
+ options = container.get_options(op, dict(div="std"))
+ div = options["div"]
+ if div == "div":
opv = container.target_opset
- sub = OnnxSub(
- feature_name, attrs['offset'].astype(dtype),
- op_version=opv)
- div = OnnxDiv(sub, inv_scale.astype(dtype),
- op_version=opv,
- output_names=[operator.outputs[0].full_name])
+ sub = OnnxSub(feature_name, attrs["offset"].astype(dtype), op_version=opv)
+ div = OnnxDiv(
+ sub,
+ inv_scale.astype(dtype),
+ op_version=opv,
+ output_names=[operator.outputs[0].full_name],
+ )
div.add_to(scope, container)
return
- if div == 'div_cast':
+ if div == "div_cast":
opv = container.target_opset
- cast = OnnxCast(feature_name, to=onnx_proto.TensorProto.DOUBLE,
- op_version=opv)
- sub = OnnxSub(cast, attrs['offset'].astype(np.float64),
- op_version=opv)
+ cast = OnnxCast(
+ feature_name, to=onnx_proto.TensorProto.DOUBLE, op_version=opv
+ )
+ sub = OnnxSub(cast, attrs["offset"].astype(np.float64), op_version=opv)
div = OnnxDiv(sub, inv_scale.astype(np.float64), op_version=opv)
- cast = OnnxCast(div, to=proto_dtype, op_version=opv,
- output_names=[operator.outputs[0].full_name])
+ cast = OnnxCast(
+ div,
+ to=proto_dtype,
+ op_version=opv,
+ output_names=[operator.outputs[0].full_name],
+ )
cast.add_to(scope, container)
return
- if attrs['offset'].size != attrs['scale'].size:
+ if attrs["offset"].size != attrs["scale"].size:
# Scaler does not accept different size for offset and scale.
- size = max(attrs['offset'].size, attrs['scale'].size)
- ones = np.ones(size, dtype=attrs['offset'].dtype)
- attrs['offset'] = attrs['offset'] * ones
- attrs['scale'] = attrs['scale'] * ones
+ size = max(attrs["offset"].size, attrs["scale"].size)
+ ones = np.ones(size, dtype=attrs["offset"].dtype)
+ attrs["offset"] = attrs["offset"] * ones
+ attrs["scale"] = attrs["scale"] * ones
container.add_node(
- op_type, feature_name, operator.outputs[0].full_name,
- op_domain='ai.onnx.ml', **attrs)
+ op_type,
+ feature_name,
+ operator.outputs[0].full_name,
+ op_domain="ai.onnx.ml",
+ **attrs
+ )
def convert_sklearn_min_max_scaler(
- scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
# If there are multiple input variables, we need to combine them as a
# whole tensor. Integer(s) would be converted to float(s).
# Options div use true division instead of Scaler operator
@@ -208,30 +218,41 @@ def convert_sklearn_min_max_scaler(
# if self.clip:
# np.clip(X, self.feature_range[0], self.feature_range[1], out=X)
casted = OnnxCast(feature_name, to=proto_dtype, op_version=opv)
- scaled = OnnxMul(casted, op.scale_.astype(dtype),
- op_version=opv)
+ scaled = OnnxMul(casted, op.scale_.astype(dtype), op_version=opv)
- if getattr(op, 'clip', False):
+ if getattr(op, "clip", False):
# parameter clip was introduced in scikit-learn 0.24
- offset = OnnxAdd(scaled, op.min_.astype(dtype),
- op_version=opv)
+ offset = OnnxAdd(scaled, op.min_.astype(dtype), op_version=opv)
- clipped = OnnxClip(offset, np.array(op.feature_range[0], dtype=dtype),
- np.array(op.feature_range[1], dtype=dtype),
- op_version=opv,
- output_names=[operator.outputs[0].full_name])
+ clipped = OnnxClip(
+ offset,
+ np.array(op.feature_range[0], dtype=dtype),
+ np.array(op.feature_range[1], dtype=dtype),
+ op_version=opv,
+ output_names=[operator.outputs[0].full_name],
+ )
clipped.add_to(scope, container)
else:
- offset = OnnxAdd(scaled, op.min_.astype(dtype),
- op_version=opv,
- output_names=[operator.outputs[0].full_name])
+ offset = OnnxAdd(
+ scaled,
+ op.min_.astype(dtype),
+ op_version=opv,
+ output_names=[operator.outputs[0].full_name],
+ )
offset.add_to(scope, container)
-register_converter('SklearnRobustScaler', convert_sklearn_scaler,
- options={'div': ['std', 'div', 'div_cast']})
-register_converter('SklearnScaler', convert_sklearn_scaler,
- options={'div': ['std', 'div', 'div_cast']})
-register_converter('SklearnMinMaxScaler', convert_sklearn_min_max_scaler)
-register_converter('SklearnMaxAbsScaler', convert_sklearn_scaler,
- options={'div': ['std', 'div', 'div_cast']})
+register_converter(
+ "SklearnRobustScaler",
+ convert_sklearn_scaler,
+ options={"div": ["std", "div", "div_cast"]},
+)
+register_converter(
+ "SklearnScaler", convert_sklearn_scaler, options={"div": ["std", "div", "div_cast"]}
+)
+register_converter("SklearnMinMaxScaler", convert_sklearn_min_max_scaler)
+register_converter(
+ "SklearnMaxAbsScaler",
+ convert_sklearn_scaler,
+ options={"div": ["std", "div", "div_cast"]},
+)
diff --git a/skl2onnx/operator_converters/sequence.py b/skl2onnx/operator_converters/sequence.py
index 77b937016..534bcfcee 100644
--- a/skl2onnx/operator_converters/sequence.py
+++ b/skl2onnx/operator_converters/sequence.py
@@ -6,26 +6,30 @@
from ..common._container import ModelComponentContainer
-def convert_sklearn_sequence_at(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_sequence_at(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
i_index = operator.index
index_name = scope.get_unique_variable_name("seq_at%d" % i_index)
- container.add_initializer(
- index_name, onnx_proto.TensorProto.INT64, [], [i_index])
+ container.add_initializer(index_name, onnx_proto.TensorProto.INT64, [], [i_index])
container.add_node(
- 'SequenceAt', [operator.inputs[0].full_name, index_name],
+ "SequenceAt",
+ [operator.inputs[0].full_name, index_name],
operator.outputs[0].full_name,
- name=scope.get_unique_operator_name('SequenceAt%d' % i_index))
+ name=scope.get_unique_operator_name("SequenceAt%d" % i_index),
+ )
-def convert_sklearn_sequence_construct(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_sequence_construct(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
container.add_node(
- 'SequenceConstruct', [i.full_name for i in operator.inputs],
+ "SequenceConstruct",
+ [i.full_name for i in operator.inputs],
operator.outputs[0].full_name,
- name=scope.get_unique_operator_name('SequenceConstruct'))
+ name=scope.get_unique_operator_name("SequenceConstruct"),
+ )
-register_converter('SklearnSequenceAt', convert_sklearn_sequence_at)
-register_converter(
- 'SklearnSequenceConstruct', convert_sklearn_sequence_construct)
+register_converter("SklearnSequenceAt", convert_sklearn_sequence_at)
+register_converter("SklearnSequenceConstruct", convert_sklearn_sequence_construct)
diff --git a/skl2onnx/operator_converters/sgd_classifier.py b/skl2onnx/operator_converters/sgd_classifier.py
index adeb023b5..485bc03a3 100644
--- a/skl2onnx/operator_converters/sgd_classifier.py
+++ b/skl2onnx/operator_converters/sgd_classifier.py
@@ -3,11 +3,24 @@
import numpy as np
from ..common._apply_operation import (
- apply_add, apply_cast, apply_clip, apply_concat, apply_div, apply_exp,
- apply_identity, apply_mul, apply_reciprocal, apply_reshape, apply_sub)
+ apply_add,
+ apply_cast,
+ apply_clip,
+ apply_concat,
+ apply_div,
+ apply_exp,
+ apply_identity,
+ apply_mul,
+ apply_reciprocal,
+ apply_reshape,
+ apply_sub,
+)
from ..common.data_types import (
- BooleanTensorType, Int64TensorType, guess_numpy_type,
- guess_proto_type)
+ BooleanTensorType,
+ Int64TensorType,
+ guess_numpy_type,
+ guess_proto_type,
+)
from ..common._registration import register_converter
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
@@ -19,106 +32,150 @@ def _decision_function(scope, operator, container, model, proto_type):
"""Predict for linear model.
score = X * coefficient + intercept
"""
- coef_name = scope.get_unique_variable_name('coef')
- intercept_name = scope.get_unique_variable_name('intercept')
- matmul_result_name = scope.get_unique_variable_name(
- 'matmul_result')
- score_name = scope.get_unique_variable_name('score')
+ coef_name = scope.get_unique_variable_name("coef")
+ intercept_name = scope.get_unique_variable_name("intercept")
+ matmul_result_name = scope.get_unique_variable_name("matmul_result")
+ score_name = scope.get_unique_variable_name("score")
coef = model.coef_.T
- container.add_initializer(coef_name, proto_type,
- coef.shape, coef.ravel())
- container.add_initializer(intercept_name, proto_type,
- model.intercept_.shape, model.intercept_)
+ container.add_initializer(coef_name, proto_type, coef.shape, coef.ravel())
+ container.add_initializer(
+ intercept_name, proto_type, model.intercept_.shape, model.intercept_
+ )
input_name = operator.inputs[0].full_name
if type(operator.inputs[0].type) in (BooleanTensorType, Int64TensorType):
- cast_input_name = scope.get_unique_variable_name('cast_input')
+ cast_input_name = scope.get_unique_variable_name("cast_input")
- apply_cast(scope, operator.input_full_names, cast_input_name,
- container, to=proto_type)
+ apply_cast(
+ scope, operator.input_full_names, cast_input_name, container, to=proto_type
+ )
input_name = cast_input_name
container.add_node(
- 'MatMul', [input_name, coef_name],
+ "MatMul",
+ [input_name, coef_name],
matmul_result_name,
- name=scope.get_unique_operator_name('MatMul'))
- apply_add(scope, [matmul_result_name, intercept_name],
- score_name, container, broadcast=0)
+ name=scope.get_unique_operator_name("MatMul"),
+ )
+ apply_add(
+ scope, [matmul_result_name, intercept_name], score_name, container, broadcast=0
+ )
return score_name
-def _handle_zeros(scope, container, scores, proba, reduced_proba, num_classes,
- proto_type):
+def _handle_zeros(
+ scope, container, scores, proba, reduced_proba, num_classes, proto_type
+):
"""Handle cases where reduced_proba values are zeros to avoid NaNs in
class probability scores because of divide by 0 when we calculate
proba / reduced_proba in _normalise_proba().
This is done by replacing reduced_proba values of 0s with
num_classes and corresponding proba values with 1.
"""
- num_classes_name = scope.get_unique_variable_name('num_classes')
- bool_reduced_proba_name = scope.get_unique_variable_name(
- 'bool_reduced_proba')
+ num_classes_name = scope.get_unique_variable_name("num_classes")
+ bool_reduced_proba_name = scope.get_unique_variable_name("bool_reduced_proba")
bool_not_reduced_proba_name = scope.get_unique_variable_name(
- 'bool_not_reduced_proba')
- not_reduced_proba_name = scope.get_unique_variable_name(
- 'not_reduced_proba')
- proba_updated_name = scope.get_unique_variable_name('proba_updated')
- mask_name = scope.get_unique_variable_name('mask')
- reduced_proba_updated_name = scope.get_unique_variable_name(
- 'reduced_proba_updated')
-
- container.add_initializer(num_classes_name, proto_type,
- [], [num_classes])
-
- apply_cast(scope, reduced_proba, bool_reduced_proba_name, container,
- to=onnx_proto.TensorProto.BOOL)
- container.add_node('Not', bool_reduced_proba_name,
- bool_not_reduced_proba_name,
- name=scope.get_unique_operator_name('Not'))
- apply_cast(scope, bool_not_reduced_proba_name, not_reduced_proba_name,
- container, to=proto_type)
- apply_add(scope, [proba, not_reduced_proba_name],
- proba_updated_name, container, broadcast=1)
- apply_mul(scope, [not_reduced_proba_name, num_classes_name],
- mask_name, container, broadcast=1)
- apply_add(scope, [reduced_proba, mask_name],
- reduced_proba_updated_name, container, broadcast=0)
+ "bool_not_reduced_proba"
+ )
+ not_reduced_proba_name = scope.get_unique_variable_name("not_reduced_proba")
+ proba_updated_name = scope.get_unique_variable_name("proba_updated")
+ mask_name = scope.get_unique_variable_name("mask")
+ reduced_proba_updated_name = scope.get_unique_variable_name("reduced_proba_updated")
+
+ container.add_initializer(num_classes_name, proto_type, [], [num_classes])
+
+ apply_cast(
+ scope,
+ reduced_proba,
+ bool_reduced_proba_name,
+ container,
+ to=onnx_proto.TensorProto.BOOL,
+ )
+ container.add_node(
+ "Not",
+ bool_reduced_proba_name,
+ bool_not_reduced_proba_name,
+ name=scope.get_unique_operator_name("Not"),
+ )
+ apply_cast(
+ scope,
+ bool_not_reduced_proba_name,
+ not_reduced_proba_name,
+ container,
+ to=proto_type,
+ )
+ apply_add(
+ scope,
+ [proba, not_reduced_proba_name],
+ proba_updated_name,
+ container,
+ broadcast=1,
+ )
+ apply_mul(
+ scope,
+ [not_reduced_proba_name, num_classes_name],
+ mask_name,
+ container,
+ broadcast=1,
+ )
+ apply_add(
+ scope,
+ [reduced_proba, mask_name],
+ reduced_proba_updated_name,
+ container,
+ broadcast=0,
+ )
return proba_updated_name, reduced_proba_updated_name
-def _normalise_proba(scope, operator, container, scores, proba, num_classes,
- unity_name, proto_type):
- reduced_proba_name = scope.get_unique_variable_name('reduced_proba')
- sub_result_name = scope.get_unique_variable_name('sub_result')
+def _normalise_proba(
+ scope, operator, container, scores, proba, num_classes, unity_name, proto_type
+):
+ reduced_proba_name = scope.get_unique_variable_name("reduced_proba")
+ sub_result_name = scope.get_unique_variable_name("sub_result")
if num_classes == 2:
- apply_sub(scope, [unity_name, proba],
- sub_result_name, container, broadcast=1)
- apply_concat(scope, [sub_result_name, proba],
- operator.outputs[1].full_name, container, axis=1)
+ apply_sub(scope, [unity_name, proba], sub_result_name, container, broadcast=1)
+ apply_concat(
+ scope,
+ [sub_result_name, proba],
+ operator.outputs[1].full_name,
+ container,
+ axis=1,
+ )
else:
if container.target_opset < 13:
container.add_node(
- 'ReduceSum', proba, reduced_proba_name, axes=[1],
- name=scope.get_unique_operator_name('ReduceSum'))
+ "ReduceSum",
+ proba,
+ reduced_proba_name,
+ axes=[1],
+ name=scope.get_unique_operator_name("ReduceSum"),
+ )
else:
- axis_name = scope.get_unique_variable_name('axis')
- container.add_initializer(
- axis_name, onnx_proto.TensorProto.INT64, [1], [1])
+ axis_name = scope.get_unique_variable_name("axis")
+ container.add_initializer(axis_name, onnx_proto.TensorProto.INT64, [1], [1])
container.add_node(
- 'ReduceSum', [proba, axis_name], reduced_proba_name,
- name=scope.get_unique_operator_name('ReduceSum'))
+ "ReduceSum",
+ [proba, axis_name],
+ reduced_proba_name,
+ name=scope.get_unique_operator_name("ReduceSum"),
+ )
proba_updated, reduced_proba_updated = _handle_zeros(
- scope, container, scores, proba, reduced_proba_name, num_classes,
- proto_type)
- apply_div(scope, [proba_updated, reduced_proba_updated],
- operator.outputs[1].full_name, container, broadcast=1)
+ scope, container, scores, proba, reduced_proba_name, num_classes, proto_type
+ )
+ apply_div(
+ scope,
+ [proba_updated, reduced_proba_updated],
+ operator.outputs[1].full_name,
+ container,
+ broadcast=1,
+ )
return operator.outputs[1].full_name
-def _predict_proba_log(scope, operator, container, scores, num_classes,
- proto_type):
+def _predict_proba_log(scope, operator, container, scores, num_classes, proto_type):
"""Probability estimation for SGDClassifier with loss=log (or log_loss)
and Logistic Regression.
Positive class probabilities are computed as
@@ -126,47 +183,68 @@ def _predict_proba_log(scope, operator, container, scores, num_classes,
multiclass is handled by normalising that over all classes.
"""
if num_classes >= 3 or container.target_opset < 13:
- negated_scores_name = scope.get_unique_variable_name('negated_scores')
- negate_name = scope.get_unique_variable_name('negate')
- exp_result_name = scope.get_unique_variable_name('exp_result')
- unity_name = scope.get_unique_variable_name('unity')
- add_result_name = scope.get_unique_variable_name('add_result')
- proba_name = scope.get_unique_variable_name('proba')
+ negated_scores_name = scope.get_unique_variable_name("negated_scores")
+ negate_name = scope.get_unique_variable_name("negate")
+ exp_result_name = scope.get_unique_variable_name("exp_result")
+ unity_name = scope.get_unique_variable_name("unity")
+ add_result_name = scope.get_unique_variable_name("add_result")
+ proba_name = scope.get_unique_variable_name("proba")
container.add_initializer(negate_name, proto_type, [], [-1])
container.add_initializer(unity_name, proto_type, [], [1])
- apply_mul(scope, [scores, negate_name],
- negated_scores_name, container, broadcast=1)
+ apply_mul(
+ scope, [scores, negate_name], negated_scores_name, container, broadcast=1
+ )
apply_exp(scope, negated_scores_name, exp_result_name, container)
- apply_add(scope, [exp_result_name, unity_name],
- add_result_name, container, broadcast=1)
+ apply_add(
+ scope,
+ [exp_result_name, unity_name],
+ add_result_name,
+ container,
+ broadcast=1,
+ )
apply_reciprocal(scope, add_result_name, proba_name, container)
- return _normalise_proba(scope, operator, container, scores, proba_name,
- num_classes, unity_name, proto_type)
+ return _normalise_proba(
+ scope,
+ operator,
+ container,
+ scores,
+ proba_name,
+ num_classes,
+ unity_name,
+ proto_type,
+ )
# Sigmoid cannot be used for num_classes > 2 because
# onnxruntime has a different implementation than numpy.
# It introduces discrepancies when x < 1e16.
# Below that threshold, Sigmoid must be replaced by Exp
# because Sigmoid is not an increasing function.
- sigmo = scope.get_unique_variable_name('sigmoid')
- container.add_node('Sigmoid', [scores], [sigmo],
- name=scope.get_unique_operator_name('Sigmoid'))
+ sigmo = scope.get_unique_variable_name("sigmoid")
+ container.add_node(
+ "Sigmoid", [scores], [sigmo], name=scope.get_unique_operator_name("Sigmoid")
+ )
- unity_name = scope.get_unique_variable_name('unity')
+ unity_name = scope.get_unique_variable_name("unity")
container.add_initializer(unity_name, proto_type, [1], [1])
- sigmo_0 = scope.get_unique_variable_name('sigmo_0')
- container.add_node('Sub', [unity_name, sigmo], [sigmo_0],
- name=scope.get_unique_operator_name('Sub'))
- apply_concat(scope, [sigmo_0, sigmo], [operator.outputs[1].full_name],
- container, axis=1)
+ sigmo_0 = scope.get_unique_variable_name("sigmo_0")
+ container.add_node(
+ "Sub",
+ [unity_name, sigmo],
+ [sigmo_0],
+ name=scope.get_unique_operator_name("Sub"),
+ )
+ apply_concat(
+ scope, [sigmo_0, sigmo], [operator.outputs[1].full_name], container, axis=1
+ )
return operator.outputs[1].full_name
-def _predict_proba_modified_huber(scope, operator, container,
- scores, num_classes, proto_type):
+def _predict_proba_modified_huber(
+ scope, operator, container, scores, num_classes, proto_type
+):
"""Probability estimation for SGDClassifier with
loss=modified_huber.
Multiclass probability estimates are derived from binary
@@ -177,30 +255,48 @@ def _predict_proba_modified_huber(scope, operator, container,
dtype = guess_numpy_type(operator.inputs[0].type)
if dtype != np.float64:
dtype = np.float32
- unity_name = scope.get_unique_variable_name('unity')
- constant_name = scope.get_unique_variable_name('constant')
- add_result_name = scope.get_unique_variable_name('add_result')
- proba_name = scope.get_unique_variable_name('proba')
- clipped_scores_name = scope.get_unique_variable_name('clipped_scores')
-
- container.add_initializer(unity_name, proto_type,
- [], [1])
- container.add_initializer(constant_name, proto_type,
- [], [2])
-
- apply_clip(scope, scores, clipped_scores_name, container,
- max=np.array(1, dtype=dtype),
- min=np.array(-1, dtype=dtype))
- apply_add(scope, [clipped_scores_name, unity_name],
- add_result_name, container, broadcast=1)
- apply_div(scope, [add_result_name, constant_name],
- proba_name, container, broadcast=1)
- return _normalise_proba(scope, operator, container, scores, proba_name,
- num_classes, unity_name, proto_type)
-
-
-def convert_sklearn_sgd_classifier(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+ unity_name = scope.get_unique_variable_name("unity")
+ constant_name = scope.get_unique_variable_name("constant")
+ add_result_name = scope.get_unique_variable_name("add_result")
+ proba_name = scope.get_unique_variable_name("proba")
+ clipped_scores_name = scope.get_unique_variable_name("clipped_scores")
+
+ container.add_initializer(unity_name, proto_type, [], [1])
+ container.add_initializer(constant_name, proto_type, [], [2])
+
+ apply_clip(
+ scope,
+ scores,
+ clipped_scores_name,
+ container,
+ max=np.array(1, dtype=dtype),
+ min=np.array(-1, dtype=dtype),
+ )
+ apply_add(
+ scope,
+ [clipped_scores_name, unity_name],
+ add_result_name,
+ container,
+ broadcast=1,
+ )
+ apply_div(
+ scope, [add_result_name, constant_name], proba_name, container, broadcast=1
+ )
+ return _normalise_proba(
+ scope,
+ operator,
+ container,
+ scores,
+ proba_name,
+ num_classes,
+ unity_name,
+ proto_type,
+ )
+
+
+def convert_sklearn_sgd_classifier(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""Converter for SGDClassifier."""
sgd_op = operator.raw_operator
classes = get_label_classes(scope, sgd_op)
@@ -209,78 +305,107 @@ def convert_sklearn_sgd_classifier(scope: Scope, operator: Operator,
if proto_type != onnx_proto.TensorProto.DOUBLE:
proto_type = onnx_proto.TensorProto.FLOAT
- if (np.issubdtype(classes.dtype, np.floating) or
- classes.dtype == np.bool_):
+ if np.issubdtype(classes.dtype, np.floating) or classes.dtype == np.bool_:
class_type = onnx_proto.TensorProto.INT32
classes = classes.astype(np.int32)
elif np.issubdtype(classes.dtype, np.signedinteger):
class_type = onnx_proto.TensorProto.INT32
else:
- classes = np.array([s.encode('utf-8') for s in classes])
+ classes = np.array([s.encode("utf-8") for s in classes])
- classes_name = scope.get_unique_variable_name('classes')
- predicted_label_name = scope.get_unique_variable_name(
- 'predicted_label')
- final_label_name = scope.get_unique_variable_name('final_label')
+ classes_name = scope.get_unique_variable_name("classes")
+ predicted_label_name = scope.get_unique_variable_name("predicted_label")
+ final_label_name = scope.get_unique_variable_name("final_label")
- container.add_initializer(classes_name, class_type,
- classes.shape, classes)
+ container.add_initializer(classes_name, class_type, classes.shape, classes)
scores = _decision_function(scope, operator, container, sgd_op, proto_type)
options = container.get_options(sgd_op, dict(raw_scores=False))
- use_raw_scores = options['raw_scores']
- if sgd_op.loss in ('log', 'log_loss') and not use_raw_scores:
- proba = _predict_proba_log(scope, operator, container, scores,
- len(classes), proto_type)
- elif sgd_op.loss == 'modified_huber' and not use_raw_scores:
+ use_raw_scores = options["raw_scores"]
+ if sgd_op.loss in ("log", "log_loss") and not use_raw_scores:
+ proba = _predict_proba_log(
+ scope, operator, container, scores, len(classes), proto_type
+ )
+ elif sgd_op.loss == "modified_huber" and not use_raw_scores:
proba = _predict_proba_modified_huber(
- scope, operator, container, scores, len(classes),
- proto_type)
+ scope, operator, container, scores, len(classes), proto_type
+ )
else:
if len(classes) == 2:
- negate_name = scope.get_unique_variable_name('negate')
- negated_scores_name = scope.get_unique_variable_name(
- 'negated_scores')
-
- container.add_initializer(
- negate_name, proto_type, [], [-1])
-
- apply_mul(scope, [scores, negate_name],
- negated_scores_name, container, broadcast=1)
- apply_concat(scope, [negated_scores_name, scores],
- operator.outputs[1].full_name, container, axis=1)
+ negate_name = scope.get_unique_variable_name("negate")
+ negated_scores_name = scope.get_unique_variable_name("negated_scores")
+
+ container.add_initializer(negate_name, proto_type, [], [-1])
+
+ apply_mul(
+ scope,
+ [scores, negate_name],
+ negated_scores_name,
+ container,
+ broadcast=1,
+ )
+ apply_concat(
+ scope,
+ [negated_scores_name, scores],
+ operator.outputs[1].full_name,
+ container,
+ axis=1,
+ )
else:
- apply_identity(scope, scores,
- operator.outputs[1].full_name, container)
+ apply_identity(scope, scores, operator.outputs[1].full_name, container)
proba = operator.outputs[1].full_name
- container.add_node('ArgMax', proba,
- predicted_label_name,
- name=scope.get_unique_operator_name('ArgMax'),
- axis=1,
- keepdims=1)
container.add_node(
- 'ArrayFeatureExtractor', [classes_name, predicted_label_name],
- final_label_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
+ "ArgMax",
+ proba,
+ predicted_label_name,
+ name=scope.get_unique_operator_name("ArgMax"),
+ axis=1,
+ keepdims=1,
+ )
+ container.add_node(
+ "ArrayFeatureExtractor",
+ [classes_name, predicted_label_name],
+ final_label_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
if class_type == onnx_proto.TensorProto.INT32:
reshaped_final_label_name = scope.get_unique_variable_name(
- 'reshaped_final_label')
-
- apply_reshape(scope, final_label_name, reshaped_final_label_name,
- container, desired_shape=(-1,))
- apply_cast(scope, reshaped_final_label_name,
- operator.outputs[0].full_name, container,
- to=onnx_proto.TensorProto.INT64)
+ "reshaped_final_label"
+ )
+
+ apply_reshape(
+ scope,
+ final_label_name,
+ reshaped_final_label_name,
+ container,
+ desired_shape=(-1,),
+ )
+ apply_cast(
+ scope,
+ reshaped_final_label_name,
+ operator.outputs[0].full_name,
+ container,
+ to=onnx_proto.TensorProto.INT64,
+ )
else:
- apply_reshape(scope, final_label_name,
- operator.outputs[0].full_name, container,
- desired_shape=(-1,))
-
-
-register_converter('SklearnSGDClassifier',
- convert_sklearn_sgd_classifier,
- options={'zipmap': [True, False, 'columns'],
- 'nocl': [True, False],
- 'output_class_labels': [False, True],
- 'raw_scores': [True, False]})
+ apply_reshape(
+ scope,
+ final_label_name,
+ operator.outputs[0].full_name,
+ container,
+ desired_shape=(-1,),
+ )
+
+
+register_converter(
+ "SklearnSGDClassifier",
+ convert_sklearn_sgd_classifier,
+ options={
+ "zipmap": [True, False, "columns"],
+ "nocl": [True, False],
+ "output_class_labels": [False, True],
+ "raw_scores": [True, False],
+ },
+)
diff --git a/skl2onnx/operator_converters/sgd_oneclass_svm.py b/skl2onnx/operator_converters/sgd_oneclass_svm.py
index b4dc7778b..6c32c08e6 100644
--- a/skl2onnx/operator_converters/sgd_oneclass_svm.py
+++ b/skl2onnx/operator_converters/sgd_oneclass_svm.py
@@ -1,19 +1,17 @@
# SPDX-License-Identifier: Apache-2.0
-from ..common._apply_operation import (
- apply_cast, apply_sub)
-from ..common.data_types import (
- BooleanTensorType, Int64TensorType, guess_proto_type)
+from ..common._apply_operation import apply_cast, apply_sub
+from ..common.data_types import BooleanTensorType, Int64TensorType, guess_proto_type
from ..common._registration import register_converter
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
from ..proto import onnx_proto
-def convert_sklearn_sgd_oneclass_svm(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
-
+def convert_sklearn_sgd_oneclass_svm(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
input_name = operator.inputs[0].full_name
output_names = operator.output_full_names
model = operator.raw_operator
@@ -24,31 +22,38 @@ def convert_sklearn_sgd_oneclass_svm(scope: Scope, operator: Operator,
if proto_dtype != onnx_proto.TensorProto.DOUBLE:
proto_dtype = onnx_proto.TensorProto.FLOAT
- if isinstance(operator.inputs[0].type,
- (BooleanTensorType, Int64TensorType)):
- cast_input_name = scope.get_unique_variable_name('cast_input')
- apply_cast(scope, operator.input_full_names, cast_input_name,
- container, to=proto_dtype)
+ if isinstance(operator.inputs[0].type, (BooleanTensorType, Int64TensorType)):
+ cast_input_name = scope.get_unique_variable_name("cast_input")
+ apply_cast(
+ scope, operator.input_full_names, cast_input_name, container, to=proto_dtype
+ )
input_name = cast_input_name
- coef_name = scope.get_unique_variable_name('coef')
- container.add_initializer(coef_name, proto_dtype,
- coef.shape, coef.ravel())
+ coef_name = scope.get_unique_variable_name("coef")
+ container.add_initializer(coef_name, proto_dtype, coef.shape, coef.ravel())
- offset_name = scope.get_unique_variable_name('offset')
+ offset_name = scope.get_unique_variable_name("offset")
container.add_initializer(offset_name, proto_dtype, offset.shape, offset)
- matmul_result_name = scope.get_unique_variable_name('matmul_result')
- container.add_node('MatMul', [input_name, coef_name], matmul_result_name,
- name=scope.get_unique_operator_name('MatMul'))
+ matmul_result_name = scope.get_unique_variable_name("matmul_result")
+ container.add_node(
+ "MatMul",
+ [input_name, coef_name],
+ matmul_result_name,
+ name=scope.get_unique_operator_name("MatMul"),
+ )
- apply_sub(scope, [matmul_result_name, offset_name], output_names[1],
- container, broadcast=0)
+ apply_sub(
+ scope,
+ [matmul_result_name, offset_name],
+ output_names[1],
+ container,
+ broadcast=0,
+ )
- pred = scope.get_unique_variable_name('class_prediction')
- container.add_node('Sign', output_names[1], pred, op_version=9)
- apply_cast(scope, pred, output_names[0],
- container, to=onnx_proto.TensorProto.INT64)
+ pred = scope.get_unique_variable_name("class_prediction")
+ container.add_node("Sign", output_names[1], pred, op_version=9)
+ apply_cast(scope, pred, output_names[0], container, to=onnx_proto.TensorProto.INT64)
-register_converter('SklearnSGDOneClassSVM', convert_sklearn_sgd_oneclass_svm)
+register_converter("SklearnSGDOneClassSVM", convert_sklearn_sgd_oneclass_svm)
diff --git a/skl2onnx/operator_converters/stacking.py b/skl2onnx/operator_converters/stacking.py
index c03264db0..889081c63 100644
--- a/skl2onnx/operator_converters/stacking.py
+++ b/skl2onnx/operator_converters/stacking.py
@@ -4,8 +4,7 @@
import numpy as np
from ..proto import onnx_proto
-from ..common._apply_operation import (
- apply_cast, apply_concat, apply_reshape)
+from ..common._apply_operation import apply_cast, apply_concat, apply_reshape
from ..common._registration import register_converter
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
@@ -13,30 +12,28 @@
from .._supported_operators import sklearn_operator_name_map
-def _fetch_scores(scope, container, model, inputs, raw_scores=False,
- is_regressor=False):
+def _fetch_scores(
+ scope, container, model, inputs, raw_scores=False, is_regressor=False
+):
op_type = sklearn_operator_name_map[type(model)]
this_operator = scope.declare_local_operator(op_type, model)
- if container.has_options(model, 'raw_scores'):
- container.add_options(id(model), {'raw_scores': raw_scores})
+ if container.has_options(model, "raw_scores"):
+ container.add_options(id(model), {"raw_scores": raw_scores})
this_operator.inputs.append(inputs)
if is_regressor:
- output_proba = scope.declare_local_variable(
- 'variable', inputs.type.__class__())
+ output_proba = scope.declare_local_variable("variable", inputs.type.__class__())
this_operator.outputs.append(output_proba)
else:
- label_name = scope.declare_local_variable(
- 'label', Int64TensorType())
+ label_name = scope.declare_local_variable("label", Int64TensorType())
this_operator.outputs.append(label_name)
output_proba = scope.declare_local_variable(
- 'probability_tensor', inputs.type.__class__())
+ "probability_tensor", inputs.type.__class__()
+ )
this_operator.outputs.append(output_proba)
proto_type = guess_proto_type(inputs.type)
- new_name = scope.get_unique_variable_name(
- output_proba.full_name + '_castio')
- apply_cast(scope, output_proba.full_name, new_name,
- container, to=proto_type)
+ new_name = scope.get_unique_variable_name(output_proba.full_name + "_castio")
+ apply_cast(scope, output_proba.full_name, new_name, container, to=proto_type)
return new_name
@@ -46,61 +43,66 @@ def _add_passthrough_connection(operator, predictions):
def _transform_regressor(scope, operator, container, model):
- merged_prob_tensor = scope.get_unique_variable_name(
- 'merged_probability_tensor')
+ merged_prob_tensor = scope.get_unique_variable_name("merged_probability_tensor")
predictions = [
- _fetch_scores(
- scope, container, est, operator.inputs[0], is_regressor=True)
+ _fetch_scores(scope, container, est, operator.inputs[0], is_regressor=True)
for est in model.estimators_
]
_add_passthrough_connection(operator, predictions)
- apply_concat(
- scope, predictions, merged_prob_tensor, container, axis=1)
+ apply_concat(scope, predictions, merged_prob_tensor, container, axis=1)
return merged_prob_tensor
def _transform(scope, operator, container, model):
- merged_prob_tensor = scope.get_unique_variable_name(
- 'merged_probability_tensor')
+ merged_prob_tensor = scope.get_unique_variable_name("merged_probability_tensor")
predictions = [
- _fetch_scores(scope, container, est, operator.inputs[0],
- raw_scores=meth == 'decision_function')
+ _fetch_scores(
+ scope,
+ container,
+ est,
+ operator.inputs[0],
+ raw_scores=meth == "decision_function",
+ )
for est, meth in zip(model.estimators_, model.stack_method_)
- if est != 'drop'
+ if est != "drop"
]
op = operator.raw_operator
- select_lact_column = (len(op.classes_) == 2 and all(
- op.stack_method_[est_idx] == 'predict_proba'
- for est_idx in range(0, len(op.estimators_))))
+ select_lact_column = len(op.classes_) == 2 and all(
+ op.stack_method_[est_idx] == "predict_proba"
+ for est_idx in range(0, len(op.estimators_))
+ )
if select_lact_column:
- column_index_name = scope.get_unique_variable_name('column_index')
- container.add_initializer(column_index_name,
- onnx_proto.TensorProto.INT64, [], [1])
+ column_index_name = scope.get_unique_variable_name("column_index")
+ container.add_initializer(
+ column_index_name, onnx_proto.TensorProto.INT64, [], [1]
+ )
new_predictions = []
for ipred, pred in enumerate(predictions):
- prob1 = scope.get_unique_variable_name('stack_prob%d' % ipred)
+ prob1 = scope.get_unique_variable_name("stack_prob%d" % ipred)
container.add_node(
- 'ArrayFeatureExtractor',
- [pred, column_index_name], prob1,
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'),
- op_domain='ai.onnx.ml')
+ "ArrayFeatureExtractor",
+ [pred, column_index_name],
+ prob1,
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ op_domain="ai.onnx.ml",
+ )
new_predictions.append(prob1)
predictions = new_predictions
_add_passthrough_connection(operator, predictions)
- apply_concat(
- scope, predictions, merged_prob_tensor, container, axis=1)
+ apply_concat(scope, predictions, merged_prob_tensor, container, axis=1)
return merged_prob_tensor
-def convert_sklearn_stacking_classifier(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_stacking_classifier(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converter for StackingClassifier. It invokes converters for each
estimator, concatenating their results before calling converter
@@ -109,58 +111,89 @@ def convert_sklearn_stacking_classifier(scope: Scope, operator: Operator,
stacking_op = operator.raw_operator
classes = stacking_op.classes_
options = container.get_options(stacking_op, dict(raw_scores=False))
- use_raw_scores = options['raw_scores']
+ use_raw_scores = options["raw_scores"]
class_type = onnx_proto.TensorProto.STRING
- if (np.issubdtype(stacking_op.classes_.dtype, np.floating) or
- stacking_op.classes_.dtype == np.bool_):
+ if (
+ np.issubdtype(stacking_op.classes_.dtype, np.floating)
+ or stacking_op.classes_.dtype == np.bool_
+ ):
class_type = onnx_proto.TensorProto.INT32
classes = classes.astype(np.int32)
elif np.issubdtype(stacking_op.classes_.dtype, np.signedinteger):
class_type = onnx_proto.TensorProto.INT32
else:
- classes = np.array([s.encode('utf-8') for s in classes])
+ classes = np.array([s.encode("utf-8") for s in classes])
- classes_name = scope.get_unique_variable_name('classes')
- argmax_output_name = scope.get_unique_variable_name('argmax_output')
- reshaped_result_name = scope.get_unique_variable_name('reshaped_result')
+ classes_name = scope.get_unique_variable_name("classes")
+ argmax_output_name = scope.get_unique_variable_name("argmax_output")
+ reshaped_result_name = scope.get_unique_variable_name("reshaped_result")
array_feature_extractor_result_name = scope.get_unique_variable_name(
- 'array_feature_extractor_result')
+ "array_feature_extractor_result"
+ )
container.add_initializer(classes_name, class_type, classes.shape, classes)
- merged_proba_tensor = _transform(
- scope, operator, container, stacking_op)
+ merged_proba_tensor = _transform(scope, operator, container, stacking_op)
merge_proba = scope.declare_local_variable(
- 'merged_stacked_proba', operator.inputs[0].type.__class__())
- container.add_node(
- 'Identity', [merged_proba_tensor], [merge_proba.onnx_name])
+ "merged_stacked_proba", operator.inputs[0].type.__class__()
+ )
+ container.add_node("Identity", [merged_proba_tensor], [merge_proba.onnx_name])
prob = _fetch_scores(
- scope, container, stacking_op.final_estimator_, merge_proba,
- raw_scores=use_raw_scores)
- container.add_node('Identity', prob, operator.outputs[1].onnx_name,
- name=scope.get_unique_operator_name('OpProb'))
- container.add_node('ArgMax', prob,
- argmax_output_name,
- name=scope.get_unique_operator_name('ArgMax'), axis=1)
+ scope,
+ container,
+ stacking_op.final_estimator_,
+ merge_proba,
+ raw_scores=use_raw_scores,
+ )
+ container.add_node(
+ "Identity",
+ prob,
+ operator.outputs[1].onnx_name,
+ name=scope.get_unique_operator_name("OpProb"),
+ )
container.add_node(
- 'ArrayFeatureExtractor', [classes_name, argmax_output_name],
- array_feature_extractor_result_name, op_domain='ai.onnx.ml',
- name=scope.get_unique_operator_name('ArrayFeatureExtractor'))
+ "ArgMax",
+ prob,
+ argmax_output_name,
+ name=scope.get_unique_operator_name("ArgMax"),
+ axis=1,
+ )
+ container.add_node(
+ "ArrayFeatureExtractor",
+ [classes_name, argmax_output_name],
+ array_feature_extractor_result_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("ArrayFeatureExtractor"),
+ )
if class_type == onnx_proto.TensorProto.INT32:
- apply_reshape(scope, array_feature_extractor_result_name,
- reshaped_result_name, container,
- desired_shape=(-1,))
- apply_cast(scope, reshaped_result_name, operator.outputs[0].full_name,
- container, to=onnx_proto.TensorProto.INT64)
+ apply_reshape(
+ scope,
+ array_feature_extractor_result_name,
+ reshaped_result_name,
+ container,
+ desired_shape=(-1,),
+ )
+ apply_cast(
+ scope,
+ reshaped_result_name,
+ operator.outputs[0].full_name,
+ container,
+ to=onnx_proto.TensorProto.INT64,
+ )
else:
- apply_reshape(scope, array_feature_extractor_result_name,
- operator.outputs[0].full_name, container,
- desired_shape=(-1,))
-
-
-def convert_sklearn_stacking_regressor(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+ apply_reshape(
+ scope,
+ array_feature_extractor_result_name,
+ operator.outputs[0].full_name,
+ container,
+ desired_shape=(-1,),
+ )
+
+
+def convert_sklearn_stacking_regressor(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converter for StackingRegressor. It invokes converters for each
estimator, concatenating their results before calling converter
@@ -168,24 +201,30 @@ def convert_sklearn_stacking_regressor(scope: Scope, operator: Operator,
"""
stacking_op = operator.raw_operator
- merged_proba_tensor = _transform_regressor(
- scope, operator, container, stacking_op)
+ merged_proba_tensor = _transform_regressor(scope, operator, container, stacking_op)
merge_proba = scope.declare_local_variable(
- 'merged_stacked_proba', operator.inputs[0].type.__class__())
- container.add_node(
- 'Identity', [merged_proba_tensor], [merge_proba.onnx_name])
+ "merged_stacked_proba", operator.inputs[0].type.__class__()
+ )
+ container.add_node("Identity", [merged_proba_tensor], [merge_proba.onnx_name])
prob = _fetch_scores(
- scope, container, stacking_op.final_estimator_, merge_proba,
- is_regressor=True)
- container.add_node('Identity', prob, operator.outputs[0].full_name,
- name=scope.get_unique_operator_name('Identity'))
-
-
-register_converter('SklearnStackingClassifier',
- convert_sklearn_stacking_classifier,
- options={'zipmap': [True, False, 'columns'],
- 'nocl': [True, False],
- 'output_class_labels': [False, True],
- 'raw_scores': [True, False]})
-register_converter('SklearnStackingRegressor',
- convert_sklearn_stacking_regressor)
+ scope, container, stacking_op.final_estimator_, merge_proba, is_regressor=True
+ )
+ container.add_node(
+ "Identity",
+ prob,
+ operator.outputs[0].full_name,
+ name=scope.get_unique_operator_name("Identity"),
+ )
+
+
+register_converter(
+ "SklearnStackingClassifier",
+ convert_sklearn_stacking_classifier,
+ options={
+ "zipmap": [True, False, "columns"],
+ "nocl": [True, False],
+ "output_class_labels": [False, True],
+ "raw_scores": [True, False],
+ },
+)
+register_converter("SklearnStackingRegressor", convert_sklearn_stacking_regressor)
diff --git a/skl2onnx/operator_converters/support_vector_machines.py b/skl2onnx/operator_converters/support_vector_machines.py
index 32631ddf6..6f57771d2 100644
--- a/skl2onnx/operator_converters/support_vector_machines.py
+++ b/skl2onnx/operator_converters/support_vector_machines.py
@@ -5,12 +5,12 @@
from scipy.sparse import isspmatrix
from sklearn.svm import SVC, NuSVC, SVR, NuSVR, OneClassSVM
from ..common._apply_operation import apply_cast
-from ..common.data_types import (
- BooleanTensorType, Int64TensorType, guess_proto_type)
+from ..common.data_types import BooleanTensorType, Int64TensorType, guess_proto_type
from ..common._registration import register_converter
from ..proto import onnx_proto
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
+
try:
from ..common._apply_operation import apply_less
except ImportError:
@@ -19,9 +19,13 @@
def convert_sklearn_svm_regressor(
- scope: Scope, operator: Operator,
- container: ModelComponentContainer,
- op_type='SVMRegressor', op_domain='ai.onnx.ml', op_version=1):
+ scope: Scope,
+ operator: Operator,
+ container: ModelComponentContainer,
+ op_type="SVMRegressor",
+ op_domain="ai.onnx.ml",
+ op_version=1,
+):
"""
Converter for model
`SVR `_. *onnxruntime* returns
the raw score from *svm* algorithm as a *matrix[N, (C(C-1)/2]*.
"""
- svm_attrs = {'name': scope.get_unique_operator_name('SVM')}
+ svm_attrs = {"name": scope.get_unique_operator_name("SVM")}
op = operator.raw_operator
if isinstance(op.dual_coef_, np.ndarray):
coef = op.dual_coef_.ravel()
@@ -51,79 +55,107 @@ def convert_sklearn_svm_regressor(
else:
support_vectors = op.support_vectors_
- svm_attrs['kernel_type'] = op.kernel.upper()
- svm_attrs['kernel_params'] = [np.float32(_) for _ in
- [op._gamma, op.coef0, op.degree]]
+ svm_attrs["kernel_type"] = op.kernel.upper()
+ svm_attrs["kernel_params"] = [
+ np.float32(_) for _ in [op._gamma, op.coef0, op.degree]
+ ]
if isspmatrix(support_vectors):
- svm_attrs['support_vectors'] = support_vectors.toarray().ravel()
+ svm_attrs["support_vectors"] = support_vectors.toarray().ravel()
else:
- svm_attrs['support_vectors'] = support_vectors
+ svm_attrs["support_vectors"] = support_vectors
if isspmatrix(coef):
- svm_attrs['coefficients'] = coef.toarray().ravel()
+ svm_attrs["coefficients"] = coef.toarray().ravel()
else:
- svm_attrs['coefficients'] = coef
- svm_attrs['rho'] = intercept.astype(np.float32)
- svm_attrs['coefficients'] = svm_attrs['coefficients'].astype(np.float32)
- svm_attrs['support_vectors'] = svm_attrs['support_vectors'].astype(
- np.float32)
+ svm_attrs["coefficients"] = coef
+ svm_attrs["rho"] = intercept.astype(np.float32)
+ svm_attrs["coefficients"] = svm_attrs["coefficients"].astype(np.float32)
+ svm_attrs["support_vectors"] = svm_attrs["support_vectors"].astype(np.float32)
proto_dtype = guess_proto_type(operator.inputs[0].type)
if proto_dtype != onnx_proto.TensorProto.DOUBLE:
proto_dtype = onnx_proto.TensorProto.FLOAT
- if operator.type in ['SklearnSVR', 'SklearnNuSVR'] or isinstance(
- op, (SVR, NuSVR)):
- svm_attrs['post_transform'] = 'NONE'
- svm_attrs['n_supports'] = len(op.support_)
+ if operator.type in ["SklearnSVR", "SklearnNuSVR"] or isinstance(op, (SVR, NuSVR)):
+ svm_attrs["post_transform"] = "NONE"
+ svm_attrs["n_supports"] = len(op.support_)
input_name = operator.input_full_names
- if type(operator.inputs[0].type) in (
- BooleanTensorType, Int64TensorType):
- cast_input_name = scope.get_unique_variable_name('cast_input')
- apply_cast(scope, operator.input_full_names, cast_input_name,
- container, to=proto_dtype)
+ if type(operator.inputs[0].type) in (BooleanTensorType, Int64TensorType):
+ cast_input_name = scope.get_unique_variable_name("cast_input")
+ apply_cast(
+ scope,
+ operator.input_full_names,
+ cast_input_name,
+ container,
+ to=proto_dtype,
+ )
input_name = cast_input_name
- svm_out = scope.get_unique_variable_name('SVM03')
+ svm_out = scope.get_unique_variable_name("SVM03")
container.add_node(
- op_type, input_name, svm_out,
- op_domain=op_domain, op_version=op_version, **svm_attrs)
- apply_cast(scope, svm_out, operator.output_full_names,
- container, to=proto_dtype)
- elif (operator.type in ['SklearnOneClassSVM'] or
- isinstance(op, OneClassSVM)):
- svm_attrs['post_transform'] = 'NONE'
- svm_attrs['n_supports'] = len(op.support_)
+ op_type,
+ input_name,
+ svm_out,
+ op_domain=op_domain,
+ op_version=op_version,
+ **svm_attrs
+ )
+ apply_cast(
+ scope, svm_out, operator.output_full_names, container, to=proto_dtype
+ )
+ elif operator.type in ["SklearnOneClassSVM"] or isinstance(op, OneClassSVM):
+ svm_attrs["post_transform"] = "NONE"
+ svm_attrs["n_supports"] = len(op.support_)
input_name = operator.input_full_names
- if type(operator.inputs[0].type) in (
- BooleanTensorType, Int64TensorType):
- cast_input_name = scope.get_unique_variable_name('cast_input')
- apply_cast(scope, operator.input_full_names, cast_input_name,
- container, to=proto_dtype)
+ if type(operator.inputs[0].type) in (BooleanTensorType, Int64TensorType):
+ cast_input_name = scope.get_unique_variable_name("cast_input")
+ apply_cast(
+ scope,
+ operator.input_full_names,
+ cast_input_name,
+ container,
+ to=proto_dtype,
+ )
input_name = cast_input_name
- svm_out0 = scope.get_unique_variable_name('SVMO1')
+ svm_out0 = scope.get_unique_variable_name("SVMO1")
container.add_node(
- op_type, input_name, svm_out0,
- op_domain=op_domain, op_version=op_version, **svm_attrs)
+ op_type,
+ input_name,
+ svm_out0,
+ op_domain=op_domain,
+ op_version=op_version,
+ **svm_attrs
+ )
svm_out = operator.output_full_names[1]
apply_cast(scope, svm_out0, svm_out, container, to=proto_dtype)
- pred = scope.get_unique_variable_name('float_prediction')
- container.add_node('Sign', svm_out, pred, op_version=9)
- apply_cast(scope, pred, operator.output_full_names[0],
- container, to=onnx_proto.TensorProto.INT64)
+ pred = scope.get_unique_variable_name("float_prediction")
+ container.add_node("Sign", svm_out, pred, op_version=9)
+ apply_cast(
+ scope,
+ pred,
+ operator.output_full_names[0],
+ container,
+ to=onnx_proto.TensorProto.INT64,
+ )
else:
- raise ValueError("Unknown support vector machine model type found "
- "'{0}'.".format(operator.type))
+ raise ValueError(
+ "Unknown support vector machine model type found "
+ "'{0}'.".format(operator.type)
+ )
def convert_sklearn_svm_classifier(
- scope: Scope, operator: Operator,
- container: ModelComponentContainer,
- op_type='SVMClassifier', op_domain='ai.onnx.ml', op_version=1):
+ scope: Scope,
+ operator: Operator,
+ container: ModelComponentContainer,
+ op_type="SVMClassifier",
+ op_domain="ai.onnx.ml",
+ op_version=1,
+):
"""
Converter for model
`SVC 0:
- svm_attrs['prob_a'] = op.probA_.astype(np.float32)
+ svm_attrs["prob_a"] = op.probA_.astype(np.float32)
else:
handles_ovr = True
if len(op.probB_) > 0:
- svm_attrs['prob_b'] = op.probB_.astype(np.float32)
-
- if (hasattr(op, 'decision_function_shape') and
- op.decision_function_shape == 'ovr' and handles_ovr and
- len(op.classes_) > 2):
- output_name = scope.get_unique_variable_name('before_ovr')
+ svm_attrs["prob_b"] = op.probB_.astype(np.float32)
+
+ if (
+ hasattr(op, "decision_function_shape")
+ and op.decision_function_shape == "ovr"
+ and handles_ovr
+ and len(op.classes_) > 2
+ ):
+ output_name = scope.get_unique_variable_name("before_ovr")
elif len(op.classes_) == 2 and use_raw_scores:
- output_name = scope.get_unique_variable_name('raw_scores')
+ output_name = scope.get_unique_variable_name("raw_scores")
else:
output_name = operator.outputs[1].full_name
- svm_attrs['post_transform'] = 'NONE'
- svm_attrs['vectors_per_class'] = op.n_support_.tolist()
+ svm_attrs["post_transform"] = "NONE"
+ svm_attrs["vectors_per_class"] = op.n_support_.tolist()
label_name = operator.outputs[0].full_name
probability_tensor_name = output_name
- if all(isinstance(i, (numbers.Real, bool, np.bool_))
- for i in op.classes_):
+ if all(isinstance(i, (numbers.Real, bool, np.bool_)) for i in op.classes_):
labels = [int(i) for i in op.classes_]
- svm_attrs['classlabels_ints'] = labels
+ svm_attrs["classlabels_ints"] = labels
elif all(isinstance(i, str) for i in op.classes_):
labels = [str(i) for i in op.classes_]
- svm_attrs['classlabels_strings'] = labels
+ svm_attrs["classlabels_strings"] = labels
else:
raise RuntimeError("Invalid class label type '%s'." % op.classes_)
- svm_out = scope.get_unique_variable_name('SVM02')
+ svm_out = scope.get_unique_variable_name("SVM02")
container.add_node(
- op_type, operator.inputs[0].full_name,
+ op_type,
+ operator.inputs[0].full_name,
[label_name, svm_out],
- op_domain=op_domain, op_version=op_version, **svm_attrs)
- apply_cast(scope, svm_out, probability_tensor_name,
- container, to=proto_dtype)
+ op_domain=op_domain,
+ op_version=op_version,
+ **svm_attrs
+ )
+ apply_cast(scope, svm_out, probability_tensor_name, container, to=proto_dtype)
if len(op.classes_) == 2 and use_raw_scores:
- minus_one = scope.get_unique_variable_name('minus_one')
+ minus_one = scope.get_unique_variable_name("minus_one")
container.add_initializer(minus_one, proto_dtype, [], [-1])
container.add_node(
- 'Mul', [output_name, minus_one], operator.outputs[1].full_name,
- name=scope.get_unique_operator_name('MulRawScores'))
+ "Mul",
+ [output_name, minus_one],
+ operator.outputs[1].full_name,
+ name=scope.get_unique_operator_name("MulRawScores"),
+ )
else:
- raise ValueError("Unknown support vector machine model type found "
- "'{0}'.".format(operator.type))
-
- if (hasattr(op, 'decision_function_shape') and
- op.decision_function_shape == 'ovr' and handles_ovr and
- len(op.classes_) > 2):
+ raise ValueError(
+ "Unknown support vector machine model type found "
+ "'{0}'.".format(operator.type)
+ )
+
+ if (
+ hasattr(op, "decision_function_shape")
+ and op.decision_function_shape == "ovr"
+ and handles_ovr
+ and len(op.classes_) > 2
+ ):
# Applies _ovr_decision_function.
# See https://github.com/scikit-learn/scikit-learn/blob/
# master/sklearn/utils/multiclass.py#L407:
@@ -248,19 +291,18 @@ def convert_sklearn_svm_classifier(
if apply_less is None:
raise RuntimeError(
- "Function apply_less is missing. "
- "onnxconverter-common is too old.")
+ "Function apply_less is missing. " "onnxconverter-common is too old."
+ )
- cst0 = scope.get_unique_variable_name('cst0')
- negative = scope.get_unique_variable_name('negative')
+ cst0 = scope.get_unique_variable_name("cst0")
+ negative = scope.get_unique_variable_name("negative")
container.add_initializer(cst0, proto_dtype, [], [0])
apply_less(scope, [output_name, cst0], negative, container)
- inegative = scope.get_unique_variable_name('inegative')
- apply_cast(scope, negative, inegative, container,
- to=proto_dtype)
+ inegative = scope.get_unique_variable_name("inegative")
+ apply_cast(scope, negative, inegative, container, to=proto_dtype)
- score_name = scope.get_unique_variable_name('neg')
- container.add_node('Neg', [output_name], score_name)
+ score_name = scope.get_unique_variable_name("neg")
+ container.add_node("Neg", [output_name], score_name)
#
# ...
@@ -281,29 +323,33 @@ def convert_sklearn_svm_classifier(
# sum_of_confidences / (3 * (np.abs(sum_of_confidences) + 1)))
# return votes + transformed_confidences
- this_operator = scope.declare_local_operator(
- "SklearnOVRDecisionFunction", op)
+ this_operator = scope.declare_local_operator("SklearnOVRDecisionFunction", op)
cl_type = operator.inputs[0].type.__class__
prob_sign = scope.declare_local_variable("prob_sign", cl_type())
- container.add_node('Identity', [inegative], [prob_sign.onnx_name])
+ container.add_node("Identity", [inegative], [prob_sign.onnx_name])
prob_score = scope.declare_local_variable("prob_sign", cl_type())
- container.add_node('Identity', [score_name], [prob_score.onnx_name])
+ container.add_node("Identity", [score_name], [prob_score.onnx_name])
this_operator.inputs.append(prob_sign)
this_operator.inputs.append(prob_score)
- ovr_name = scope.declare_local_variable('ovr_output', cl_type())
+ ovr_name = scope.declare_local_variable("ovr_output", cl_type())
this_operator.outputs.append(ovr_name)
output_name = operator.outputs[1].full_name
- container.add_node('Identity', [ovr_name.onnx_name], [output_name])
-
-
-register_converter('SklearnOneClassSVM', convert_sklearn_svm_regressor)
-register_converter('SklearnSVC', convert_sklearn_svm_classifier,
- options={'zipmap': [True, False, 'columns'],
- 'nocl': [True, False],
- 'output_class_labels': [False, True],
- 'raw_scores': [True, False]})
-register_converter('SklearnSVR', convert_sklearn_svm_regressor)
+ container.add_node("Identity", [ovr_name.onnx_name], [output_name])
+
+
+register_converter("SklearnOneClassSVM", convert_sklearn_svm_regressor)
+register_converter(
+ "SklearnSVC",
+ convert_sklearn_svm_classifier,
+ options={
+ "zipmap": [True, False, "columns"],
+ "nocl": [True, False],
+ "output_class_labels": [False, True],
+ "raw_scores": [True, False],
+ },
+)
+register_converter("SklearnSVR", convert_sklearn_svm_regressor)
diff --git a/skl2onnx/operator_converters/text_vectoriser.py b/skl2onnx/operator_converters/text_vectoriser.py
index 9b72d6e3a..5d5ba7eba 100644
--- a/skl2onnx/operator_converters/text_vectoriser.py
+++ b/skl2onnx/operator_converters/text_vectoriser.py
@@ -4,8 +4,7 @@
import warnings
from collections import OrderedDict, Counter
import numpy as np
-from ..common._apply_operation import (
- apply_cast, apply_reshape, apply_identity)
+from ..common._apply_operation import apply_cast, apply_reshape, apply_identity
from ..common._registration import register_converter
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
@@ -24,7 +23,7 @@ def _intelligent_split(text, op, tokenizer, existing):
`_.
"""
- if op.analyzer == 'word':
+ if op.analyzer == "word":
if op.ngram_range[0] == op.ngram_range[1] == 1:
spl = [text]
elif op.ngram_range[0] == 1 and len(text) >= 2:
@@ -46,8 +45,7 @@ def _intelligent_split(text, op, tokenizer, existing):
if len(spl) == 1:
pass
elif len(spl) == 2:
- if (spl[0] not in op.vocabulary_ or
- spl[1] not in op.vocabulary_):
+ if spl[0] not in op.vocabulary_ or spl[1] not in op.vocabulary_:
# This is neceassarily a single token.
spl = [text]
elif spl[0] in op.vocabulary_ and spl[1] in op.vocabulary_:
@@ -57,12 +55,16 @@ def _intelligent_split(text, op, tokenizer, existing):
pass
elif len(spl) == 3:
stok = (all([s in op.vocabulary_ for s in spl]), spl)
- spl12 = (spl[2] in op.vocabulary_ and
- (spl[0] + ' ' + spl[1]) in op.vocabulary_,
- [spl[0] + ' ' + spl[1], spl[2]])
- spl23 = (spl[0] in op.vocabulary_ and
- (spl[1] + ' ' + spl[2]) in op.vocabulary_,
- [spl[0], spl[1] + ' ' + spl[2]])
+ spl12 = (
+ spl[2] in op.vocabulary_
+ and (spl[0] + " " + spl[1]) in op.vocabulary_,
+ [spl[0] + " " + spl[1], spl[2]],
+ )
+ spl23 = (
+ spl[0] in op.vocabulary_
+ and (spl[1] + " " + spl[2]) in op.vocabulary_,
+ [spl[0], spl[1] + " " + spl[2]],
+ )
c = Counter(map(lambda t: t[0], [stok, spl12, spl23]))
if c.get(True, -1) == 0:
spl = [text]
@@ -77,8 +79,10 @@ def _intelligent_split(text, op, tokenizer, existing):
pass
else:
exc = (
- "More than one decomposition in tokens: [" +
- ", ".join(map(lambda t: "-".join(t), found)) + "].")
+ "More than one decomposition in tokens: ["
+ + ", ".join(map(lambda t: "-".join(t), found))
+ + "]."
+ )
elif any(map(lambda g: g in op.vocabulary_, spl)):
# TODO: handle this case with an algorithm
# which is able to break a string into
@@ -89,7 +93,8 @@ def _intelligent_split(text, op, tokenizer, existing):
"Unable to split n-grams '{}' into tokens. "
"{} This happens when a token contain "
"spaces. Token '{}' may be a token or a n-gram '{}'."
- "".format(text, exc, text, spl))
+ "".format(text, exc, text, spl)
+ )
else:
# We reuse the tokenizer hoping that will clear
# ambiguities but this might be slow.
@@ -102,9 +107,9 @@ def _intelligent_split(text, op, tokenizer, existing):
raise RuntimeError(
f"The converter cannot guess how to split expression "
f"{text!r} into tokens. This case happens when tokens have "
- f"spaces.")
- if (op.ngram_range[0] == 1 and
- (len(op.ngram_range) == 1 or op.ngram_range[1] > 1)):
+ f"spaces."
+ )
+ if op.ngram_range[0] == 1 and (len(op.ngram_range) == 1 or op.ngram_range[1] > 1):
# All grams should be existing in the vocabulary.
for g in spl:
if g not in op.vocabulary_:
@@ -112,13 +117,15 @@ def _intelligent_split(text, op, tokenizer, existing):
"Unable to split n-grams '{}' into tokens {} "
"existing in the vocabulary. Token '{}' does not "
"exist in the vocabulary."
- ".".format(text, spl, g))
+ ".".format(text, spl, g)
+ )
existing.add(spl)
return spl
-def convert_sklearn_text_vectorizer(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_text_vectorizer(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converters for class
`TfidfVectorizer = 9."
- "".format(op.__class__.__name__))
+ "".format(op.__class__.__name__)
+ )
if op.analyzer == "char_wb":
raise NotImplementedError(
"CountVectorizer cannot be converted, "
"only tokenizer='word' is fully supported. "
"You may raise an issue at "
- "https://github.com/onnx/sklearn-onnx/issues.")
+ "https://github.com/onnx/sklearn-onnx/issues."
+ )
if op.analyzer == "char":
warnings.warn(
"The conversion of CountVectorizer may not work. "
"only tokenizer='word' is fully supported. "
"You may raise an issue at "
"https://github.com/onnx/sklearn-onnx/issues.",
- UserWarning)
+ UserWarning,
+ )
if op.strip_accents is not None:
raise NotImplementedError(
"CountVectorizer cannot be converted, "
"only strip_accents=None is supported. "
"You may raise an issue at "
- "https://github.com/onnx/sklearn-onnx/issues.")
+ "https://github.com/onnx/sklearn-onnx/issues."
+ )
options = container.get_options(
- op, dict(separators="DEFAULT",
- tokenexp=None,
- nan=False,
- keep_empty_string=False))
- if set(options) != {'separators', 'tokenexp', 'nan', 'keep_empty_string'}:
- raise RuntimeError("Unknown option {} for {}".format(
- set(options) - {'separators'}, type(op)))
-
- if op.analyzer == 'word':
- default_pattern = '(?u)\\b\\w\\w+\\b'
- if options['separators'] == "DEFAULT" and options['tokenexp'] is None:
+ op,
+ dict(separators="DEFAULT", tokenexp=None, nan=False, keep_empty_string=False),
+ )
+ if set(options) != {"separators", "tokenexp", "nan", "keep_empty_string"}:
+ raise RuntimeError(
+ "Unknown option {} for {}".format(set(options) - {"separators"}, type(op))
+ )
+
+ if op.analyzer == "word":
+ default_pattern = "(?u)\\b\\w\\w+\\b"
+ if options["separators"] == "DEFAULT" and options["tokenexp"] is None:
regex = op.token_pattern
if regex == default_pattern:
- regex = '[a-zA-Z0-9_]+'
+ regex = "[a-zA-Z0-9_]+"
default_separators = None
- elif options['tokenexp'] is not None:
- if options['tokenexp']:
- regex = options['tokenexp']
+ elif options["tokenexp"] is not None:
+ if options["tokenexp"]:
+ regex = options["tokenexp"]
else:
regex = op.token_pattern
if regex == default_pattern:
- regex = '[a-zA-Z0-9_]+'
+ regex = "[a-zA-Z0-9_]+"
default_separators = None
else:
regex = None
- default_separators = options['separators']
+ default_separators = options["separators"]
else:
- if options['separators'] != 'DEFAULT':
- raise RuntimeError("Option separators has no effect "
- "if analyser != 'word'.")
- regex = options['tokenexp'] if options['tokenexp'] else '.'
+ if options["separators"] != "DEFAULT":
+ raise RuntimeError(
+ "Option separators has no effect " "if analyser != 'word'."
+ )
+ regex = options["tokenexp"] if options["tokenexp"] else "."
default_separators = None
if op.preprocessor is not None:
raise NotImplementedError(
"Custom preprocessor cannot be converted into ONNX. "
"You may raise an issue at "
- "https://github.com/onnx/sklearn-onnx/issues.")
+ "https://github.com/onnx/sklearn-onnx/issues."
+ )
if op.tokenizer is not None:
raise NotImplementedError(
"Custom tokenizer cannot be converted into ONNX. "
"You may raise an issue at "
- "https://github.com/onnx/sklearn-onnx/issues.")
+ "https://github.com/onnx/sklearn-onnx/issues."
+ )
if op.strip_accents is not None:
raise NotImplementedError(
"Operator StringNormalizer cannot remove accents. "
"You may raise an issue at "
- "https://github.com/onnx/sklearn-onnx/issues.")
+ "https://github.com/onnx/sklearn-onnx/issues."
+ )
if hasattr(op, "stop_words_"):
- stop_words = op.stop_words_ | (
- set(op.stop_words) if op.stop_words else set())
+ stop_words = op.stop_words_ | (set(op.stop_words) if op.stop_words else set())
else:
stop_words = set()
for w in stop_words:
if not isinstance(w, str):
raise TypeError(
- f"One stop word is not a string {w!r} "
- f"in stop_words={stop_words}.")
+ f"One stop word is not a string {w!r} " f"in stop_words={stop_words}."
+ )
if op.lowercase or stop_words:
if len(operator.input_full_names) != 1:
- raise RuntimeError("Only one input is allowed, found {}.".format(
- operator.input_full_names))
+ raise RuntimeError(
+ "Only one input is allowed, found {}.".format(operator.input_full_names)
+ )
# StringNormalizer
- op_type = 'StringNormalizer'
- attrs = {'name': scope.get_unique_operator_name(op_type)}
- normalized = scope.get_unique_variable_name('normalized')
+ op_type = "StringNormalizer"
+ attrs = {"name": scope.get_unique_operator_name(op_type)}
+ normalized = scope.get_unique_variable_name("normalized")
if container.target_opset >= 10:
- attrs.update({
- 'case_change_action': 'LOWER',
- 'is_case_sensitive': not op.lowercase,
- })
+ attrs.update(
+ {
+ "case_change_action": "LOWER",
+ "is_case_sensitive": not op.lowercase,
+ }
+ )
op_version = 10
- domain = ''
+ domain = ""
else:
- attrs.update({
- 'casechangeaction': 'LOWER',
- 'is_case_sensitive': not op.lowercase,
- })
+ attrs.update(
+ {
+ "casechangeaction": "LOWER",
+ "is_case_sensitive": not op.lowercase,
+ }
+ )
op_version = 9
- domain = 'com.microsoft'
- opvs = 1 if domain == 'com.microsoft' else op_version
+ domain = "com.microsoft"
+ opvs = 1 if domain == "com.microsoft" else op_version
if stop_words:
- attrs['stopwords'] = list(sorted(stop_words))
+ attrs["stopwords"] = list(sorted(stop_words))
- if options['keep_empty_string']:
- del attrs['name']
+ if options["keep_empty_string"]:
+ del attrs["name"]
op_norm = OnnxStringNormalizer(
- 'text_in', op_version=container.target_opset,
- output_names=['text_out'], **attrs)
+ "text_in",
+ op_version=container.target_opset,
+ output_names=["text_out"],
+ **attrs,
+ )
scan_body = op_norm.to_onnx(
- OrderedDict([('text_in', StringTensorType())]),
- outputs=[('text_out', StringTensorType())],
- target_opset=op_version)
-
- vector = scope.get_unique_variable_name('vector')
- apply_reshape(scope, operator.input_full_names[0],
- vector, container,
- desired_shape=(-1, 1))
- container.add_node('Scan', vector, normalized,
- body=scan_body.graph, num_scan_inputs=1)
+ OrderedDict([("text_in", StringTensorType())]),
+ outputs=[("text_out", StringTensorType())],
+ target_opset=op_version,
+ )
+
+ vector = scope.get_unique_variable_name("vector")
+ apply_reshape(
+ scope,
+ operator.input_full_names[0],
+ vector,
+ container,
+ desired_shape=(-1, 1),
+ )
+ container.add_node(
+ "Scan", vector, normalized, body=scan_body.graph, num_scan_inputs=1
+ )
else:
- flatten = scope.get_unique_variable_name('flattened')
- apply_reshape(scope, operator.input_full_names[0],
- flatten, container,
- desired_shape=(-1, ))
- container.add_node(op_type, flatten,
- normalized, op_version=opvs,
- op_domain=domain, **attrs)
+ flatten = scope.get_unique_variable_name("flattened")
+ apply_reshape(
+ scope,
+ operator.input_full_names[0],
+ flatten,
+ container,
+ desired_shape=(-1,),
+ )
+ container.add_node(
+ op_type, flatten, normalized, op_version=opvs, op_domain=domain, **attrs
+ )
else:
normalized = operator.input_full_names
@@ -327,29 +358,36 @@ def convert_sklearn_text_vectorizer(scope: Scope, operator: Operator,
while padvalue in op.vocabulary_:
padvalue += "#"
- op_type = 'Tokenizer'
- attrs = {'name': scope.get_unique_operator_name(op_type)}
- attrs.update({
- 'pad_value': padvalue,
- 'mark': False,
- 'mincharnum': 1,
- })
+ op_type = "Tokenizer"
+ attrs = {"name": scope.get_unique_operator_name(op_type)}
+ attrs.update(
+ {
+ "pad_value": padvalue,
+ "mark": False,
+ "mincharnum": 1,
+ }
+ )
if regex is None:
- attrs['separators'] = default_separators
+ attrs["separators"] = default_separators
else:
- attrs['tokenexp'] = regex
+ attrs["tokenexp"] = regex
- tokenized = scope.get_unique_variable_name('tokenized')
- container.add_node(op_type, normalized, tokenized,
- op_domain='com.microsoft', **attrs)
+ tokenized = scope.get_unique_variable_name("tokenized")
+ container.add_node(
+ op_type, normalized, tokenized, op_domain="com.microsoft", **attrs
+ )
# Flatten
# Tokenizer outputs shape {1, C} or {1, 1, C}.
# Second shape is not allowed by TfIdfVectorizer.
# We use Flatten which produces {1, C} in both cases.
- flatt_tokenized = scope.get_unique_variable_name('flattened')
- container.add_node("Flatten", tokenized, flatt_tokenized,
- name=scope.get_unique_operator_name('Flatten'))
+ flatt_tokenized = scope.get_unique_variable_name("flattened")
+ container.add_node(
+ "Flatten",
+ tokenized,
+ flatt_tokenized,
+ name=scope.get_unique_operator_name("Flatten"),
+ )
tokenized = flatt_tokenized
# Ngram - TfIdfVectorizer
@@ -358,8 +396,8 @@ def convert_sklearn_text_vectorizer(scope: Scope, operator: Operator,
weights = [0 for i in range(C)]
for k, v in op.vocabulary_.items():
words[v] = k
- weights[v] = 1.
- mode = 'TF'
+ weights[v] = 1.0
+ mode = "TF"
# Scikit-learn sorts n-grams by alphabetical order..
# onnx assumes it is sorted by n.
@@ -371,7 +409,7 @@ def convert_sklearn_text_vectorizer(scope: Scope, operator: Operator,
if isinstance(w, tuple):
# TraceableCountVectorizer, TraceableTfIdfVectorizer
spl = list(w)
- w = ' '.join(w)
+ w = " ".join(w)
else:
# CountVectorizer, TfIdfVectorizer
try:
@@ -387,10 +425,10 @@ def convert_sklearn_text_vectorizer(scope: Scope, operator: Operator,
f"{len(errors)} errors occurred. You can fix it by using "
f"class Traceable{op.__class__.__name__}.\n"
f"You can learn more at https://github.com/scikit-learn/"
- f"scikit-learn/issues/13733.\n{err}")
+ f"scikit-learn/issues/13733.\n{err}"
+ )
- ng_split_words = sorted([(len(a[0]), a[0], i)
- for i, a in enumerate(split_words)])
+ ng_split_words = sorted([(len(a[0]), a[0], i) for i, a in enumerate(split_words)])
key_indices = [a[2] for a in ng_split_words]
ngcounts = [0 for i in range(op.ngram_range[0])]
@@ -406,75 +444,96 @@ def convert_sklearn_text_vectorizer(scope: Scope, operator: Operator,
weights[ind] = weights_[i]
# Create the node.
- attrs = {'name': scope.get_unique_operator_name("TfIdfVectorizer")}
- attrs.update({
- 'min_gram_length': op.ngram_range[0],
- 'max_gram_length': op.ngram_range[1],
- 'mode': mode,
- 'max_skip_count': 0,
- 'pool_strings': words,
- 'ngram_indexes': key_indices,
- 'ngram_counts': ngcounts,
- 'weights': list(map(np.float32, weights)),
- })
- output = scope.get_unique_variable_name('output')
+ attrs = {"name": scope.get_unique_operator_name("TfIdfVectorizer")}
+ attrs.update(
+ {
+ "min_gram_length": op.ngram_range[0],
+ "max_gram_length": op.ngram_range[1],
+ "mode": mode,
+ "max_skip_count": 0,
+ "pool_strings": words,
+ "ngram_indexes": key_indices,
+ "ngram_counts": ngcounts,
+ "weights": list(map(np.float32, weights)),
+ }
+ )
+ output = scope.get_unique_variable_name("output")
proto_dtype = guess_proto_type(operator.inputs[0].type)
if proto_dtype != onnx_proto.TensorProto.DOUBLE:
proto_dtype = onnx_proto.TensorProto.FLOAT
if proto_dtype == onnx_proto.TensorProto.DOUBLE:
- output_tf = scope.get_unique_variable_name('cast_result')
+ output_tf = scope.get_unique_variable_name("cast_result")
else:
output_tf = output
if container.target_opset < 9:
- op_type = 'Ngram'
- container.add_node(op_type, tokenized, output_tf,
- op_domain='com.microsoft', **attrs)
+ op_type = "Ngram"
+ container.add_node(
+ op_type, tokenized, output_tf, op_domain="com.microsoft", **attrs
+ )
else:
- op_type = 'TfIdfVectorizer'
- container.add_node(op_type, tokenized, output_tf, op_domain='',
- op_version=9, **attrs)
+ op_type = "TfIdfVectorizer"
+ container.add_node(
+ op_type, tokenized, output_tf, op_domain="", op_version=9, **attrs
+ )
if proto_dtype == onnx_proto.TensorProto.DOUBLE:
- apply_cast(scope, output_tf, output,
- container, to=proto_dtype)
+ apply_cast(scope, output_tf, output, container, to=proto_dtype)
if op.binary:
- cast_result_name = scope.get_unique_variable_name('cast_result')
- output_name = scope.get_unique_variable_name('output_name')
-
- apply_cast(scope, output, cast_result_name, container,
- to=onnx_proto.TensorProto.BOOL)
- apply_cast(scope, cast_result_name, output_name,
- container, to=onnx_proto.TensorProto.FLOAT)
+ cast_result_name = scope.get_unique_variable_name("cast_result")
+ output_name = scope.get_unique_variable_name("output_name")
+
+ apply_cast(
+ scope, output, cast_result_name, container, to=onnx_proto.TensorProto.BOOL
+ )
+ apply_cast(
+ scope,
+ cast_result_name,
+ output_name,
+ container,
+ to=onnx_proto.TensorProto.FLOAT,
+ )
output = output_name
options = container.get_options(op, dict(nan=False))
- replace_by_nan = options.get('nan', False)
+ replace_by_nan = options.get("nan", False)
if replace_by_nan:
# This part replaces all null values by nan.
- cst_nan_name = scope.get_unique_variable_name('nan_name')
+ cst_nan_name = scope.get_unique_variable_name("nan_name")
container.add_initializer(cst_nan_name, proto_dtype, [1], [np.nan])
- cst_zero_name = scope.get_unique_variable_name('zero_name')
+ cst_zero_name = scope.get_unique_variable_name("zero_name")
container.add_initializer(cst_zero_name, proto_dtype, [1], [0])
- mask_name = scope.get_unique_variable_name('mask_name')
- container.add_node('Equal', [output, cst_zero_name],
- mask_name,
- name=scope.get_unique_operator_name('Equal'))
-
- where_name = scope.get_unique_variable_name('where_name')
- container.add_node('Where', [mask_name, cst_nan_name, output],
- where_name,
- name=scope.get_unique_operator_name('Where'))
+ mask_name = scope.get_unique_variable_name("mask_name")
+ container.add_node(
+ "Equal",
+ [output, cst_zero_name],
+ mask_name,
+ name=scope.get_unique_operator_name("Equal"),
+ )
+
+ where_name = scope.get_unique_variable_name("where_name")
+ container.add_node(
+ "Where",
+ [mask_name, cst_nan_name, output],
+ where_name,
+ name=scope.get_unique_operator_name("Where"),
+ )
output = where_name
apply_identity(scope, output, operator.output_full_names, container)
-register_converter('SklearnCountVectorizer', convert_sklearn_text_vectorizer,
- options={'tokenexp': None, 'separators': None,
- 'nan': [True, False],
- 'keep_empty_string': [True, False]})
+register_converter(
+ "SklearnCountVectorizer",
+ convert_sklearn_text_vectorizer,
+ options={
+ "tokenexp": None,
+ "separators": None,
+ "nan": [True, False],
+ "keep_empty_string": [True, False],
+ },
+)
diff --git a/skl2onnx/operator_converters/tfidf_transformer.py b/skl2onnx/operator_converters/tfidf_transformer.py
index e755e78c8..fe6c20239 100644
--- a/skl2onnx/operator_converters/tfidf_transformer.py
+++ b/skl2onnx/operator_converters/tfidf_transformer.py
@@ -8,11 +8,17 @@
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
from ..common._apply_operation import (
- apply_add, apply_log, apply_mul, apply_identity, apply_normalizer)
+ apply_add,
+ apply_log,
+ apply_mul,
+ apply_identity,
+ apply_normalizer,
+)
-def convert_sklearn_tfidf_transformer(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_tfidf_transformer(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
# TODO: use sparse containers when available
dtype = guess_numpy_type(operator.inputs[0].type)
if dtype != np.float64:
@@ -24,7 +30,7 @@ def convert_sklearn_tfidf_transformer(scope: Scope, operator: Operator,
proto_dtype = onnx_proto.TensorProto.FLOAT
op = operator.raw_operator
data = operator.input_full_names
- output_name = scope.get_unique_variable_name('tfidftr_output')
+ output_name = scope.get_unique_variable_name("tfidftr_output")
if op.sublinear_tf:
# code scikit-learn
@@ -46,7 +52,8 @@ def convert_sklearn_tfidf_transformer(scope: Scope, operator: Operator,
# sparse containers have not yet been implemented.
raise RuntimeError(
"ONNX does not support sparse tensors before opset < 11, "
- "sublinear_tf must be False.")
+ "sublinear_tf must be False."
+ )
if op.use_idf:
cst = op.idf_.astype(float_type)
@@ -54,42 +61,55 @@ def convert_sklearn_tfidf_transformer(scope: Scope, operator: Operator,
cst = np.diag(cst)
cst = cst.ravel().flatten()
shape = [len(cst)]
- idfcst = scope.get_unique_variable_name('idfcst')
+ idfcst = scope.get_unique_variable_name("idfcst")
container.add_initializer(idfcst, proto_dtype, shape, cst)
apply_mul(scope, data + [idfcst], output_name, container, broadcast=1)
else:
output_name = data[0]
if op.norm is not None:
- norm_name = scope.get_unique_variable_name('tfidftr_norm')
+ norm_name = scope.get_unique_variable_name("tfidftr_norm")
apply_normalizer(
- scope, output_name, norm_name, container,
- norm=op.norm.upper(), use_float=float_type == np.float32)
+ scope,
+ output_name,
+ norm_name,
+ container,
+ norm=op.norm.upper(),
+ use_float=float_type == np.float32,
+ )
output_name = norm_name
options = container.get_options(op, dict(nan=False))
- replace_by_nan = options.get('nan', False)
+ replace_by_nan = options.get("nan", False)
if replace_by_nan:
# This part replaces all null values by nan.
- cst_nan_name = scope.get_unique_variable_name('nan_name')
+ cst_nan_name = scope.get_unique_variable_name("nan_name")
container.add_initializer(cst_nan_name, proto_dtype, [1], [np.nan])
- cst_zero_name = scope.get_unique_variable_name('zero_name')
+ cst_zero_name = scope.get_unique_variable_name("zero_name")
container.add_initializer(cst_zero_name, proto_dtype, [1], [0])
- mask_name = scope.get_unique_variable_name('mask_name')
- container.add_node('Equal', [output_name, cst_zero_name],
- mask_name,
- name=scope.get_unique_operator_name('Equal'))
+ mask_name = scope.get_unique_variable_name("mask_name")
+ container.add_node(
+ "Equal",
+ [output_name, cst_zero_name],
+ mask_name,
+ name=scope.get_unique_operator_name("Equal"),
+ )
- where_name = scope.get_unique_variable_name('where_name')
- container.add_node('Where', [mask_name, cst_nan_name, output_name],
- where_name,
- name=scope.get_unique_operator_name('Where'))
+ where_name = scope.get_unique_variable_name("where_name")
+ container.add_node(
+ "Where",
+ [mask_name, cst_nan_name, output_name],
+ where_name,
+ name=scope.get_unique_operator_name("Where"),
+ )
output_name = where_name
apply_identity(scope, output_name, operator.output_full_names, container)
-register_converter('SklearnTfidfTransformer',
- convert_sklearn_tfidf_transformer,
- options={'nan': [True, False]})
+register_converter(
+ "SklearnTfidfTransformer",
+ convert_sklearn_tfidf_transformer,
+ options={"nan": [True, False]},
+)
diff --git a/skl2onnx/operator_converters/tfidf_vectoriser.py b/skl2onnx/operator_converters/tfidf_vectoriser.py
index c5c9bcaba..a624b90b9 100644
--- a/skl2onnx/operator_converters/tfidf_vectoriser.py
+++ b/skl2onnx/operator_converters/tfidf_vectoriser.py
@@ -3,16 +3,16 @@
from onnx import onnx_pb as onnx_proto
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from ..common._apply_operation import apply_identity
-from ..common.data_types import (
- FloatTensorType, DoubleTensorType, guess_proto_type)
+from ..common.data_types import FloatTensorType, DoubleTensorType, guess_proto_type
from ..common._registration import register_converter
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
from .._supported_operators import sklearn_operator_name_map
-def convert_sklearn_tfidf_vectoriser(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_tfidf_vectoriser(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converter for scikit-learn's TfidfVectoriser.
"""
@@ -30,23 +30,31 @@ def convert_sklearn_tfidf_vectoriser(scope: Scope, operator: Operator,
clr = DoubleTensorType
else:
raise RuntimeError(
- "Unexpected dtype '{}'. Float or double expected.".format(
- proto_dtype))
+ "Unexpected dtype '{}'. Float or double expected.".format(proto_dtype)
+ )
cv_output_name = scope.declare_local_variable(
- 'count_vec_output', clr([None, columns]))
+ "count_vec_output", clr([None, columns])
+ )
cv_operator.outputs.append(cv_output_name)
op_type = sklearn_operator_name_map[TfidfTransformer]
tfidf_operator = scope.declare_local_operator(op_type, tfidf_op)
tfidf_operator.inputs.append(cv_output_name)
- tfidf_output_name = scope.declare_local_variable('tfidf_output', clr())
+ tfidf_output_name = scope.declare_local_variable("tfidf_output", clr())
tfidf_operator.outputs.append(tfidf_output_name)
- apply_identity(scope, tfidf_output_name.full_name,
- operator.outputs[0].full_name, container)
+ apply_identity(
+ scope, tfidf_output_name.full_name, operator.outputs[0].full_name, container
+ )
-register_converter('SklearnTfidfVectorizer', convert_sklearn_tfidf_vectoriser,
- options={'tokenexp': None, 'separators': None,
- 'nan': [True, False],
- 'keep_empty_string': [True, False]})
+register_converter(
+ "SklearnTfidfVectorizer",
+ convert_sklearn_tfidf_vectoriser,
+ options={
+ "tokenexp": None,
+ "separators": None,
+ "nan": [True, False],
+ "keep_empty_string": [True, False],
+ },
+)
diff --git a/skl2onnx/operator_converters/voting_classifier.py b/skl2onnx/operator_converters/voting_classifier.py
index 429ecd5d9..a2388998b 100644
--- a/skl2onnx/operator_converters/voting_classifier.py
+++ b/skl2onnx/operator_converters/voting_classifier.py
@@ -12,8 +12,9 @@
from ..proto import onnx_proto
-def convert_voting_classifier(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_voting_classifier(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converts a *VotingClassifier* into *ONNX* format.
@@ -26,19 +27,25 @@ def convert_voting_classifier(scope: Scope, operator: Operator,
distinction and always creates two outputs, labels
and probabilities.
"""
- if scope.get_options(operator.raw_operator, dict(nocl=False))['nocl']:
+ if scope.get_options(operator.raw_operator, dict(nocl=False))["nocl"]:
raise RuntimeError(
"Option 'nocl' is not implemented for operator '{}'.".format(
- operator.raw_operator.__class__.__name__))
+ operator.raw_operator.__class__.__name__
+ )
+ )
proto_dtype = guess_proto_type(operator.inputs[0].type)
if proto_dtype != onnx_proto.TensorProto.DOUBLE:
proto_dtype = onnx_proto.TensorProto.FLOAT
op = operator.raw_operator
n_classes = len(op.classes_)
- classes_ind_name = scope.get_unique_variable_name('classes_ind')
- container.add_initializer(classes_ind_name, onnx_proto.TensorProto.INT64,
- (1, n_classes), list(range(n_classes)))
+ classes_ind_name = scope.get_unique_variable_name("classes_ind")
+ container.add_initializer(
+ classes_ind_name,
+ onnx_proto.TensorProto.INT64,
+ (1, n_classes),
+ list(range(n_classes)),
+ )
probs_names = []
one_name = None
@@ -51,48 +58,65 @@ def convert_voting_classifier(scope: Scope, operator: Operator,
this_operator = scope.declare_local_operator(op_type, estimator)
this_operator.inputs = operator.inputs
- label_name = scope.declare_local_variable(
- 'label_%d' % i, Int64TensorType())
+ label_name = scope.declare_local_variable("label_%d" % i, Int64TensorType())
prob_name = scope.declare_local_variable(
- 'voting_proba_%d' % i, operator.inputs[0].type.__class__())
+ "voting_proba_%d" % i, operator.inputs[0].type.__class__()
+ )
this_operator.outputs.append(label_name)
this_operator.outputs.append(prob_name)
- if op.voting == 'hard':
+ if op.voting == "hard":
if one_name is None:
- shape_name = scope.get_unique_variable_name('shape')
+ shape_name = scope.get_unique_variable_name("shape")
container.add_node(
- 'Shape', prob_name.onnx_name, shape_name,
- name=scope.get_unique_operator_name('Shape'))
- zero_name = scope.get_unique_variable_name('zero')
+ "Shape",
+ prob_name.onnx_name,
+ shape_name,
+ name=scope.get_unique_operator_name("Shape"),
+ )
+ zero_name = scope.get_unique_variable_name("zero")
container.add_node(
- 'ConstantOfShape', shape_name, zero_name,
- name=scope.get_unique_operator_name('CoSA'),
- value=make_tensor("value", proto_dtype,
- (1, ), [0.]), op_version=9)
- one_name = scope.get_unique_variable_name('one')
+ "ConstantOfShape",
+ shape_name,
+ zero_name,
+ name=scope.get_unique_operator_name("CoSA"),
+ value=make_tensor("value", proto_dtype, (1,), [0.0]),
+ op_version=9,
+ )
+ one_name = scope.get_unique_variable_name("one")
container.add_node(
- 'ConstantOfShape', shape_name, one_name,
- name=scope.get_unique_operator_name('CoSB'),
- value=make_tensor("value", proto_dtype,
- (1, ), [1.]), op_version=9)
-
- argmax_output_name = scope.get_unique_variable_name(
- 'argmax_output')
- container.add_node('ArgMax', prob_name.onnx_name,
- argmax_output_name,
- name=scope.get_unique_operator_name('ArgMax'),
- axis=1)
-
- equal_name = scope.get_unique_variable_name('equal')
- container.add_node('Equal', [argmax_output_name, classes_ind_name],
- equal_name,
- name=scope.get_unique_operator_name('Equal'))
-
- max_proba_name = scope.get_unique_variable_name('probsmax')
- container.add_node('Where', [equal_name, one_name, zero_name],
- max_proba_name,
- name=scope.get_unique_operator_name('Where'))
+ "ConstantOfShape",
+ shape_name,
+ one_name,
+ name=scope.get_unique_operator_name("CoSB"),
+ value=make_tensor("value", proto_dtype, (1,), [1.0]),
+ op_version=9,
+ )
+
+ argmax_output_name = scope.get_unique_variable_name("argmax_output")
+ container.add_node(
+ "ArgMax",
+ prob_name.onnx_name,
+ argmax_output_name,
+ name=scope.get_unique_operator_name("ArgMax"),
+ axis=1,
+ )
+
+ equal_name = scope.get_unique_variable_name("equal")
+ container.add_node(
+ "Equal",
+ [argmax_output_name, classes_ind_name],
+ equal_name,
+ name=scope.get_unique_operator_name("Equal"),
+ )
+
+ max_proba_name = scope.get_unique_variable_name("probsmax")
+ container.add_node(
+ "Where",
+ [equal_name, one_name, zero_name],
+ max_proba_name,
+ name=scope.get_unique_operator_name("Where"),
+ )
prob_name = max_proba_name
else:
prob_name = prob_name.onnx_name
@@ -100,37 +124,53 @@ def convert_voting_classifier(scope: Scope, operator: Operator,
if op.weights is not None:
val = op.weights[i] / op.weights.sum()
else:
- val = 1. / len(op.estimators_)
-
- weights_name = scope.get_unique_variable_name('w%d' % i)
- container.add_initializer(
- weights_name, proto_dtype, [1], [val])
- wprob_name = scope.get_unique_variable_name('wprob_name')
- apply_mul(scope, [prob_name, weights_name],
- wprob_name, container, broadcast=1)
+ val = 1.0 / len(op.estimators_)
+
+ weights_name = scope.get_unique_variable_name("w%d" % i)
+ container.add_initializer(weights_name, proto_dtype, [1], [val])
+ wprob_name = scope.get_unique_variable_name("wprob_name")
+ apply_mul(scope, [prob_name, weights_name], wprob_name, container, broadcast=1)
probs_names.append(wprob_name)
if op.flatten_transform in (False, None):
- container.add_node('Sum', probs_names,
- operator.outputs[1].full_name,
- name=scope.get_unique_operator_name('Sum'))
+ container.add_node(
+ "Sum",
+ probs_names,
+ operator.outputs[1].full_name,
+ name=scope.get_unique_operator_name("Sum"),
+ )
else:
raise NotImplementedError(
"flatten_transform==True is not implemented yet. "
"You may raise an issue at "
- "https://github.com/onnx/sklearn-onnx/issues.")
+ "https://github.com/onnx/sklearn-onnx/issues."
+ )
# labels
- label_name = scope.get_unique_variable_name('label_name')
- container.add_node('ArgMax', operator.outputs[1].full_name, label_name,
- name=scope.get_unique_operator_name('ArgMax'), axis=1)
- _finalize_converter_classes(scope, label_name,
- operator.outputs[0].full_name, container,
- op.classes_, proto_dtype)
-
-
-register_converter('SklearnVotingClassifier',
- convert_voting_classifier,
- options={'zipmap': [True, False, 'columns'],
- 'output_class_labels': [False, True],
- 'nocl': [True, False]})
+ label_name = scope.get_unique_variable_name("label_name")
+ container.add_node(
+ "ArgMax",
+ operator.outputs[1].full_name,
+ label_name,
+ name=scope.get_unique_operator_name("ArgMax"),
+ axis=1,
+ )
+ _finalize_converter_classes(
+ scope,
+ label_name,
+ operator.outputs[0].full_name,
+ container,
+ op.classes_,
+ proto_dtype,
+ )
+
+
+register_converter(
+ "SklearnVotingClassifier",
+ convert_voting_classifier,
+ options={
+ "zipmap": [True, False, "columns"],
+ "output_class_labels": [False, True],
+ "nocl": [True, False],
+ },
+)
diff --git a/skl2onnx/operator_converters/voting_regressor.py b/skl2onnx/operator_converters/voting_regressor.py
index 73da321ac..c7ce01690 100644
--- a/skl2onnx/operator_converters/voting_regressor.py
+++ b/skl2onnx/operator_converters/voting_regressor.py
@@ -5,23 +5,22 @@
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
from ..common._apply_operation import apply_mul
-from ..common.data_types import (
- guess_proto_type, FloatTensorType, DoubleTensorType)
+from ..common.data_types import guess_proto_type, FloatTensorType, DoubleTensorType
from .._supported_operators import sklearn_operator_name_map
-def convert_voting_regressor(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_voting_regressor(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
"""
Converts a *VotingRegressor* into *ONNX* format.
"""
op = operator.raw_operator
- if not isinstance(operator.inputs[0].type,
- (FloatTensorType, DoubleTensorType)):
- this_operator = scope.declare_local_operator('SklearnCast')
+ if not isinstance(operator.inputs[0].type, (FloatTensorType, DoubleTensorType)):
+ this_operator = scope.declare_local_operator("SklearnCast")
this_operator.inputs = operator.inputs
- var_name = scope.declare_local_variable('cast', FloatTensorType())
+ var_name = scope.declare_local_variable("cast", FloatTensorType())
this_operator.outputs.append(var_name)
inputs = this_operator.outputs
else:
@@ -38,30 +37,32 @@ def convert_voting_regressor(scope: Scope, operator: Operator,
this_operator.inputs = inputs
var_name = scope.declare_local_variable(
- 'var_%d' % i, inputs[0].type.__class__())
+ "var_%d" % i, inputs[0].type.__class__()
+ )
this_operator.outputs.append(var_name)
var_name = var_name.onnx_name
if op.weights is not None:
val = op.weights[i] / op.weights.sum()
else:
- val = 1. / len(op.estimators_)
+ val = 1.0 / len(op.estimators_)
- weights_name = scope.get_unique_variable_name('w%d' % i)
+ weights_name = scope.get_unique_variable_name("w%d" % i)
proto_dtype = guess_proto_type(inputs[0].type)
- container.add_initializer(
- weights_name, proto_dtype, [1], [val])
- wvar_name = scope.get_unique_variable_name('wvar_%d' % i)
- apply_mul(scope, [var_name, weights_name],
- wvar_name, container, broadcast=1)
+ container.add_initializer(weights_name, proto_dtype, [1], [val])
+ wvar_name = scope.get_unique_variable_name("wvar_%d" % i)
+ apply_mul(scope, [var_name, weights_name], wvar_name, container, broadcast=1)
- flat_name = scope.get_unique_variable_name('fvar_%d' % i)
- container.add_node('Flatten', wvar_name, flat_name)
+ flat_name = scope.get_unique_variable_name("fvar_%d" % i)
+ container.add_node("Flatten", wvar_name, flat_name)
vars_names.append(flat_name)
- container.add_node('Sum', vars_names,
- operator.outputs[0].full_name,
- name=scope.get_unique_operator_name('Sum'))
+ container.add_node(
+ "Sum",
+ vars_names,
+ operator.outputs[0].full_name,
+ name=scope.get_unique_operator_name("Sum"),
+ )
-register_converter('SklearnVotingRegressor', convert_voting_regressor)
+register_converter("SklearnVotingRegressor", convert_voting_regressor)
diff --git a/skl2onnx/operator_converters/zip_map.py b/skl2onnx/operator_converters/zip_map.py
index 85e6fb0c4..914ea0529 100644
--- a/skl2onnx/operator_converters/zip_map.py
+++ b/skl2onnx/operator_converters/zip_map.py
@@ -2,69 +2,105 @@
from ..proto import onnx_proto
from ..common._apply_operation import (
- apply_slice, apply_cast, apply_identity, apply_reshape)
+ apply_slice,
+ apply_cast,
+ apply_identity,
+ apply_reshape,
+)
from ..common._registration import register_converter
from ..common._topology import Scope, Operator
from ..common._container import ModelComponentContainer
-def _common_convert_sklearn_zipmap(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
- zipmap_attrs = {'name': scope.get_unique_operator_name('ZipMap')}
+def _common_convert_sklearn_zipmap(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
+ zipmap_attrs = {"name": scope.get_unique_operator_name("ZipMap")}
to_type = onnx_proto.TensorProto.INT64
- if hasattr(operator, 'classlabels_int64s'):
- zipmap_attrs['classlabels_int64s'] = operator.classlabels_int64s
- elif hasattr(operator, 'classlabels_strings'):
- zipmap_attrs['classlabels_strings'] = operator.classlabels_strings
+ if hasattr(operator, "classlabels_int64s"):
+ zipmap_attrs["classlabels_int64s"] = operator.classlabels_int64s
+ elif hasattr(operator, "classlabels_strings"):
+ zipmap_attrs["classlabels_strings"] = operator.classlabels_strings
to_type = onnx_proto.TensorProto.STRING
if to_type == onnx_proto.TensorProto.STRING:
- apply_identity(scope, operator.inputs[0].full_name,
- operator.outputs[0].full_name, container)
+ apply_identity(
+ scope,
+ operator.inputs[0].full_name,
+ operator.outputs[0].full_name,
+ container,
+ )
else:
- apply_cast(scope, operator.inputs[0].full_name,
- operator.outputs[0].full_name, container, to=to_type)
+ apply_cast(
+ scope,
+ operator.inputs[0].full_name,
+ operator.outputs[0].full_name,
+ container,
+ to=to_type,
+ )
return zipmap_attrs
-def convert_sklearn_zipmap(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_zipmap(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
if len(operator.inputs) == 2:
- zipmap_attrs = _common_convert_sklearn_zipmap(
- scope, operator, container)
- container.add_node('ZipMap', operator.inputs[1].full_name,
- operator.outputs[1].full_name,
- op_domain='ai.onnx.ml', **zipmap_attrs)
+ zipmap_attrs = _common_convert_sklearn_zipmap(scope, operator, container)
+ container.add_node(
+ "ZipMap",
+ operator.inputs[1].full_name,
+ operator.outputs[1].full_name,
+ op_domain="ai.onnx.ml",
+ **zipmap_attrs
+ )
return
- if hasattr(operator, 'classlabels_int64s'):
+ if hasattr(operator, "classlabels_int64s"):
zipmap_attrs = dict(classlabels_int64s=operator.classlabels_int64s)
- elif hasattr(operator, 'classlabels_strings'):
+ elif hasattr(operator, "classlabels_strings"):
zipmap_attrs = dict(classlabels_strings=operator.classlabels_strings)
else:
raise RuntimeError(
"operator should have attribute 'classlabels_int64s' or "
- "'classlabels_strings'.")
- container.add_node('ZipMap', operator.inputs[0].full_name,
- operator.outputs[0].full_name,
- op_domain='ai.onnx.ml', **zipmap_attrs)
+ "'classlabels_strings'."
+ )
+ container.add_node(
+ "ZipMap",
+ operator.inputs[0].full_name,
+ operator.outputs[0].full_name,
+ op_domain="ai.onnx.ml",
+ **zipmap_attrs
+ )
-def convert_sklearn_zipmap_columns(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def convert_sklearn_zipmap_columns(
+ scope: Scope, operator: Operator, container: ModelComponentContainer
+):
_common_convert_sklearn_zipmap(scope, operator, container)
probs = operator.inputs[1].full_name
for i in range(1, len(operator.outputs)):
out = operator.outputs[i].full_name
flat = scope.get_unique_variable_name(out)
apply_slice(
- scope, probs, flat, container, starts=[i - 1], ends=[i], axes=[1],
- operator_name=scope.get_unique_operator_name('Slice'))
+ scope,
+ probs,
+ flat,
+ container,
+ starts=[i - 1],
+ ends=[i],
+ axes=[1],
+ operator_name=scope.get_unique_operator_name("Slice"),
+ )
apply_reshape(
- scope, flat, out, container, desired_shape=(-1, ),
- operator_name=scope.get_unique_operator_name('reshape'))
+ scope,
+ flat,
+ out,
+ container,
+ desired_shape=(-1,),
+ operator_name=scope.get_unique_operator_name("reshape"),
+ )
-register_converter('SklearnZipMap', convert_sklearn_zipmap)
-register_converter('SklearnZipMapColumns', convert_sklearn_zipmap_columns)
+register_converter("SklearnZipMap", convert_sklearn_zipmap)
+register_converter("SklearnZipMapColumns", convert_sklearn_zipmap_columns)
diff --git a/skl2onnx/proto/__init__.py b/skl2onnx/proto/__init__.py
index bd23169ca..443dfa451 100644
--- a/skl2onnx/proto/__init__.py
+++ b/skl2onnx/proto/__init__.py
@@ -12,6 +12,7 @@
# (string tensor get assigned twice)
from onnx import mapping
from onnx.onnx_pb import TensorProto, ValueInfoProto # noqa
+
try:
from onnx.onnx_pb import SparseTensorProto # noqa
except ImportError:
@@ -21,25 +22,25 @@
def make_tensor_fixed(name, data_type, dims, vals, raw=False):
- '''
+ """
Make a TensorProto with specified arguments. If raw is False, this
function will choose the corresponding proto field to store the
values based on data_type. If raw is True, use "raw_data" proto
field to store the values, and values should be of type bytes in
this case.
- '''
+ """
tensor = TensorProto()
tensor.data_type = data_type
tensor.name = name
- if (data_type == TensorProto.COMPLEX64 or
- data_type == TensorProto.COMPLEX128):
+ if data_type == TensorProto.COMPLEX64 or data_type == TensorProto.COMPLEX128:
vals = split_complex_to_pairs(vals)
if raw:
tensor.raw_data = vals
else:
field = mapping.STORAGE_TENSOR_TYPE_TO_FIELD[
- mapping.TENSOR_TYPE_TO_STORAGE_TENSOR_TYPE[data_type]]
+ mapping.TENSOR_TYPE_TO_STORAGE_TENSOR_TYPE[data_type]
+ ]
getattr(tensor, field).extend(vals)
tensor.dims.extend(dims)
@@ -63,4 +64,5 @@ def get_latest_tested_opset_version():
(return by `onnx.defs.onnx_opset_version()`).
"""
from .. import __max_supported_opset__
+
return min(__max_supported_opset__, get_opset_number_from_onnx())
diff --git a/skl2onnx/shape_calculators/array_feature_extractor.py b/skl2onnx/shape_calculators/array_feature_extractor.py
index 1c578911a..e1ac00b09 100644
--- a/skl2onnx/shape_calculators/array_feature_extractor.py
+++ b/skl2onnx/shape_calculators/array_feature_extractor.py
@@ -13,5 +13,6 @@ def calculate_sklearn_array_feature_extractor(operator):
operator.outputs[0].type = i.type.__class__([N, C])
-register_shape_calculator('SklearnArrayFeatureExtractor',
- calculate_sklearn_array_feature_extractor)
+register_shape_calculator(
+ "SklearnArrayFeatureExtractor", calculate_sklearn_array_feature_extractor
+)
diff --git a/skl2onnx/shape_calculators/cast_op.py b/skl2onnx/shape_calculators/cast_op.py
index 6667e0d50..20927f45e 100644
--- a/skl2onnx/shape_calculators/cast_op.py
+++ b/skl2onnx/shape_calculators/cast_op.py
@@ -8,21 +8,18 @@
def calculate_sklearn_cast(operator):
- check_input_and_output_numbers(
- operator, input_count_range=1, output_count_range=1)
+ check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1)
def calculate_sklearn_cast_transformer(operator):
- check_input_and_output_numbers(
- operator, input_count_range=1, output_count_range=1)
+ check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1)
op = operator.raw_operator
otype = _guess_numpy_type(op.dtype, operator.inputs[0].type.shape)
operator.outputs[0].type = otype
+register_shape_calculator("SklearnCast", calculate_sklearn_cast)
+register_shape_calculator("SklearnCastTransformer", calculate_sklearn_cast_transformer)
register_shape_calculator(
- 'SklearnCast', calculate_sklearn_cast)
-register_shape_calculator(
- 'SklearnCastTransformer', calculate_sklearn_cast_transformer)
-register_shape_calculator('SklearnCastRegressor',
- calculate_linear_regressor_output_shapes)
+ "SklearnCastRegressor", calculate_linear_regressor_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/class_labels.py b/skl2onnx/shape_calculators/class_labels.py
index b8944be83..3df221a40 100644
--- a/skl2onnx/shape_calculators/class_labels.py
+++ b/skl2onnx/shape_calculators/class_labels.py
@@ -9,5 +9,4 @@ def calculate_sklearn_class_labels(operator):
check_input_and_output_numbers(operator, output_count_range=1)
-register_shape_calculator(
- 'SklearnClassLabels', calculate_sklearn_class_labels)
+register_shape_calculator("SklearnClassLabels", calculate_sklearn_class_labels)
diff --git a/skl2onnx/shape_calculators/concat.py b/skl2onnx/shape_calculators/concat.py
index 78bd062b0..9f242b283 100644
--- a/skl2onnx/shape_calculators/concat.py
+++ b/skl2onnx/shape_calculators/concat.py
@@ -3,10 +3,17 @@
from ..common._registration import register_shape_calculator
from ..common.data_types import (
- FloatType, Int64Type, StringType, TensorType,
- DoubleType, BooleanTensorType, FloatTensorType,
- Int64TensorType, StringTensorType,
- DoubleTensorType)
+ FloatType,
+ Int64Type,
+ StringType,
+ TensorType,
+ DoubleType,
+ BooleanTensorType,
+ FloatTensorType,
+ Int64TensorType,
+ StringTensorType,
+ DoubleTensorType,
+)
from ..common.utils import check_input_and_output_numbers
@@ -27,8 +34,7 @@ def calculate_sklearn_concat(operator):
C = None
else:
C += i.type.shape[1]
- elif isinstance(i.type, (
- Int64Type, FloatType, StringType, DoubleType)):
+ elif isinstance(i.type, (Int64Type, FloatType, StringType, DoubleType)):
C += 1
else:
C = None
@@ -41,19 +47,28 @@ def more_generic(t1, t2):
raise RuntimeError(
"Cannot merge columns with types {} and {}."
"Inputs:\n{}\nOutputs:\n{}".format(
- t1, t2, operator.inputs, operator.outputs))
- for ts in [StringTensorType, DoubleTensorType, FloatTensorType,
- Int64TensorType, BooleanTensorType]:
+ t1, t2, operator.inputs, operator.outputs
+ )
+ )
+ for ts in [
+ StringTensorType,
+ DoubleTensorType,
+ FloatTensorType,
+ Int64TensorType,
+ BooleanTensorType,
+ ]:
if isinstance(t1, ts) or isinstance(t2, ts):
return ts
raise RuntimeError(
"Cannot merge columns with types {} and {}."
"Inputs:\n{}\nOutputs:\n{}".format(
- t1, t2, operator.inputs, operator.outputs))
+ t1, t2, operator.inputs, operator.outputs
+ )
+ )
raise NotImplementedError(
"Columns must be tensors."
- "Inputs:\n{}\nOutputs:\n{}".format(
- operator.inputs, operator.outputs))
+ "Inputs:\n{}\nOutputs:\n{}".format(operator.inputs, operator.outputs)
+ )
# Let's determine the resulting type
final_type = None
@@ -69,24 +84,23 @@ def more_generic(t1, t2):
raise NotImplementedError(
"Columns must be tensors.\n"
"- Inputs: {}\n- Outputs: {}\n- types: {}"
- "".format(
- operator.inputs, operator.outputs, seen_types))
+ "".format(operator.inputs, operator.outputs, seen_types)
+ )
if final_type != operator.outputs[0].type:
operator.outputs[0].type = type(final_type)([N, C])
else:
operator.outputs[0].type.shape = [N, C]
-register_shape_calculator('SklearnConcat', calculate_sklearn_concat)
-register_shape_calculator('SklearnGenericUnivariateSelect',
- calculate_sklearn_concat)
-register_shape_calculator('SklearnMultiply', calculate_sklearn_concat)
-register_shape_calculator('SklearnRFE', calculate_sklearn_concat)
-register_shape_calculator('SklearnRFECV', calculate_sklearn_concat)
-register_shape_calculator('SklearnSelectFdr', calculate_sklearn_concat)
-register_shape_calculator('SklearnSelectFpr', calculate_sklearn_concat)
-register_shape_calculator('SklearnSelectFromModel', calculate_sklearn_concat)
-register_shape_calculator('SklearnSelectFwe', calculate_sklearn_concat)
-register_shape_calculator('SklearnSelectKBest', calculate_sklearn_concat)
-register_shape_calculator('SklearnSelectPercentile', calculate_sklearn_concat)
-register_shape_calculator('SklearnVarianceThreshold', calculate_sklearn_concat)
+register_shape_calculator("SklearnConcat", calculate_sklearn_concat)
+register_shape_calculator("SklearnGenericUnivariateSelect", calculate_sklearn_concat)
+register_shape_calculator("SklearnMultiply", calculate_sklearn_concat)
+register_shape_calculator("SklearnRFE", calculate_sklearn_concat)
+register_shape_calculator("SklearnRFECV", calculate_sklearn_concat)
+register_shape_calculator("SklearnSelectFdr", calculate_sklearn_concat)
+register_shape_calculator("SklearnSelectFpr", calculate_sklearn_concat)
+register_shape_calculator("SklearnSelectFromModel", calculate_sklearn_concat)
+register_shape_calculator("SklearnSelectFwe", calculate_sklearn_concat)
+register_shape_calculator("SklearnSelectKBest", calculate_sklearn_concat)
+register_shape_calculator("SklearnSelectPercentile", calculate_sklearn_concat)
+register_shape_calculator("SklearnVarianceThreshold", calculate_sklearn_concat)
diff --git a/skl2onnx/shape_calculators/cross_decomposition.py b/skl2onnx/shape_calculators/cross_decomposition.py
index ec7a3faa8..ddeb9e152 100644
--- a/skl2onnx/shape_calculators/cross_decomposition.py
+++ b/skl2onnx/shape_calculators/cross_decomposition.py
@@ -2,20 +2,18 @@
from ..common._registration import register_shape_calculator
-from ..common.data_types import (
- FloatTensorType, Int64TensorType, DoubleTensorType)
-from ..common.utils import (
- check_input_and_output_numbers, check_input_and_output_types)
+from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType
+from ..common.utils import check_input_and_output_numbers, check_input_and_output_types
def calculate_pls_regression_output_shapes(operator):
check_input_and_output_numbers(operator, input_count_range=1)
check_input_and_output_types(
- operator, good_input_types=[
- FloatTensorType, Int64TensorType, DoubleTensorType])
+ operator, good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType]
+ )
if len(operator.inputs[0].type.shape) != 2:
- raise RuntimeError('Input must be a [N, C]-tensor')
+ raise RuntimeError("Input must be a [N, C]-tensor")
op = operator.raw_operator
cls_type = operator.inputs[0].type.__class__
@@ -25,5 +23,6 @@ def calculate_pls_regression_output_shapes(operator):
operator.outputs[0].type = cls_type([N, op.coef_.shape[1]])
-register_shape_calculator('SklearnPLSRegression',
- calculate_pls_regression_output_shapes)
+register_shape_calculator(
+ "SklearnPLSRegression", calculate_pls_regression_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/dict_vectorizer.py b/skl2onnx/shape_calculators/dict_vectorizer.py
index 59dce8105..d5b0f232f 100644
--- a/skl2onnx/shape_calculators/dict_vectorizer.py
+++ b/skl2onnx/shape_calculators/dict_vectorizer.py
@@ -6,17 +6,17 @@
def calculate_sklearn_dict_vectorizer_output_shapes(operator):
- '''
+ """
Allowed input/output patterns are
1. Map ---> [1, C]
C is the total number of allowed keys in the input dictionary.
- '''
- check_input_and_output_numbers(operator, input_count_range=1,
- output_count_range=1)
+ """
+ check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1)
C = len(operator.raw_operator.feature_names_)
operator.outputs[0].type.shape = [None, C]
-register_shape_calculator('SklearnDictVectorizer',
- calculate_sklearn_dict_vectorizer_output_shapes)
+register_shape_calculator(
+ "SklearnDictVectorizer", calculate_sklearn_dict_vectorizer_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/ensemble_shapes.py b/skl2onnx/shape_calculators/ensemble_shapes.py
index de7fbbd37..c9d955a88 100644
--- a/skl2onnx/shape_calculators/ensemble_shapes.py
+++ b/skl2onnx/shape_calculators/ensemble_shapes.py
@@ -2,12 +2,12 @@
from ..common._registration import register_shape_calculator
-from ..common.utils import (
- check_input_and_output_numbers, check_input_and_output_types)
+from ..common.utils import check_input_and_output_numbers, check_input_and_output_types
from ..common.shape_calculator import (
calculate_linear_regressor_output_shapes,
calculate_linear_classifier_output_shapes,
- _calculate_linear_classifier_output_shapes)
+ _calculate_linear_classifier_output_shapes,
+)
from ..common.data_types import (
BooleanTensorType,
DoubleTensorType,
@@ -25,24 +25,29 @@ def calculate_tree_regressor_output_shapes(operator):
batch. If the input batch size is N, the output shape may be
[N, 1].
"""
- check_input_and_output_numbers(operator, input_count_range=1,
- output_count_range=[1, 3])
- check_input_and_output_types(operator, good_input_types=[
- BooleanTensorType, DoubleTensorType,
- FloatTensorType, Int64TensorType])
+ check_input_and_output_numbers(
+ operator, input_count_range=1, output_count_range=[1, 3]
+ )
+ check_input_and_output_types(
+ operator,
+ good_input_types=[
+ BooleanTensorType,
+ DoubleTensorType,
+ FloatTensorType,
+ Int64TensorType,
+ ],
+ )
N = operator.inputs[0].get_first_dimension()
if operator.outputs[0].type is None:
- raise RuntimeError(
- "Output type is unknown for operator %r." % operator)
+ raise RuntimeError("Output type is unknown for operator %r." % operator)
operator.outputs[0].type.shape = [N, 1]
# decision_path, decision_leaf
for n in range(2, len(operator.outputs)):
- if hasattr(operator.raw_operator, 'estimators_'):
+ if hasattr(operator.raw_operator, "estimators_"):
# random forest
- operator.outputs[n].type.shape = [
- N, len(operator.raw_operator.estimators_)]
+ operator.outputs[n].type.shape = [N, len(operator.raw_operator.estimators_)]
else:
# single tree
operator.outputs[n].type.shape = [N, 1]
@@ -55,39 +60,49 @@ def calculate_tree_classifier_output_shapes(operator):
# decision_path, decision_leaf
for n in range(2, len(operator.outputs)):
if operator.outputs[n].type is None:
- raise RuntimeError(
- "Output type is unknown for operator %r." % operator)
- if hasattr(operator.raw_operator, 'estimators_'):
+ raise RuntimeError("Output type is unknown for operator %r." % operator)
+ if hasattr(operator.raw_operator, "estimators_"):
# random forest
- operator.outputs[n].type.shape = [
- N, len(operator.raw_operator.estimators_)]
+ operator.outputs[n].type.shape = [N, len(operator.raw_operator.estimators_)]
else:
# single tree
operator.outputs[n].type.shape = [N, 1]
-register_shape_calculator('SklearnDecisionTreeRegressor',
- calculate_tree_regressor_output_shapes)
-register_shape_calculator('SklearnExtraTreeRegressor',
- calculate_tree_regressor_output_shapes)
-register_shape_calculator('SklearnExtraTreesRegressor',
- calculate_tree_regressor_output_shapes)
-register_shape_calculator('SklearnGradientBoostingRegressor',
- calculate_linear_regressor_output_shapes)
-register_shape_calculator('SklearnHistGradientBoostingRegressor',
- calculate_linear_regressor_output_shapes)
-register_shape_calculator('SklearnRandomForestRegressor',
- calculate_tree_regressor_output_shapes)
+register_shape_calculator(
+ "SklearnDecisionTreeRegressor", calculate_tree_regressor_output_shapes
+)
+register_shape_calculator(
+ "SklearnExtraTreeRegressor", calculate_tree_regressor_output_shapes
+)
+register_shape_calculator(
+ "SklearnExtraTreesRegressor", calculate_tree_regressor_output_shapes
+)
+register_shape_calculator(
+ "SklearnGradientBoostingRegressor", calculate_linear_regressor_output_shapes
+)
+register_shape_calculator(
+ "SklearnHistGradientBoostingRegressor", calculate_linear_regressor_output_shapes
+)
+register_shape_calculator(
+ "SklearnRandomForestRegressor", calculate_tree_regressor_output_shapes
+)
-register_shape_calculator('SklearnDecisionTreeClassifier',
- calculate_tree_classifier_output_shapes)
-register_shape_calculator('SklearnExtraTreeClassifier',
- calculate_tree_classifier_output_shapes)
-register_shape_calculator('SklearnExtraTreesClassifier',
- calculate_tree_classifier_output_shapes)
-register_shape_calculator('SklearnGradientBoostingClassifier',
- calculate_linear_classifier_output_shapes)
-register_shape_calculator('SklearnHistGradientBoostingClassifier',
- calculate_linear_classifier_output_shapes)
-register_shape_calculator('SklearnRandomForestClassifier',
- calculate_tree_classifier_output_shapes)
+register_shape_calculator(
+ "SklearnDecisionTreeClassifier", calculate_tree_classifier_output_shapes
+)
+register_shape_calculator(
+ "SklearnExtraTreeClassifier", calculate_tree_classifier_output_shapes
+)
+register_shape_calculator(
+ "SklearnExtraTreesClassifier", calculate_tree_classifier_output_shapes
+)
+register_shape_calculator(
+ "SklearnGradientBoostingClassifier", calculate_linear_classifier_output_shapes
+)
+register_shape_calculator(
+ "SklearnHistGradientBoostingClassifier", calculate_linear_classifier_output_shapes
+)
+register_shape_calculator(
+ "SklearnRandomForestClassifier", calculate_tree_classifier_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/feature_hasher.py b/skl2onnx/shape_calculators/feature_hasher.py
index 1c8409d41..9ba186e48 100644
--- a/skl2onnx/shape_calculators/feature_hasher.py
+++ b/skl2onnx/shape_calculators/feature_hasher.py
@@ -2,8 +2,11 @@
import numpy as np
from ..common.data_types import (
- StringTensorType, Int64TensorType, FloatTensorType,
- DoubleTensorType)
+ StringTensorType,
+ Int64TensorType,
+ FloatTensorType,
+ DoubleTensorType,
+)
from ..common._registration import register_shape_calculator
from ..common.utils import check_input_and_output_numbers
from ..common.utils import check_input_and_output_types
@@ -12,7 +15,8 @@
def calculate_sklearn_feature_hasher(operator):
check_input_and_output_numbers(operator, output_count_range=1)
check_input_and_output_types(
- operator, good_input_types=[StringTensorType, Int64TensorType])
+ operator, good_input_types=[StringTensorType, Int64TensorType]
+ )
N = operator.inputs[0].get_first_dimension()
model = operator.raw_operator
@@ -25,9 +29,8 @@ def calculate_sklearn_feature_hasher(operator):
operator.outputs[0].type = Int64TensorType(shape=shape)
else:
raise RuntimeError(
- f"Converter is not implemented for "
- f"FeatureHasher.dtype={model.dtype}.")
+ f"Converter is not implemented for " f"FeatureHasher.dtype={model.dtype}."
+ )
-register_shape_calculator('SklearnFeatureHasher',
- calculate_sklearn_feature_hasher)
+register_shape_calculator("SklearnFeatureHasher", calculate_sklearn_feature_hasher)
diff --git a/skl2onnx/shape_calculators/flatten.py b/skl2onnx/shape_calculators/flatten.py
index 27ffcd625..910d69c78 100644
--- a/skl2onnx/shape_calculators/flatten.py
+++ b/skl2onnx/shape_calculators/flatten.py
@@ -7,8 +7,7 @@
def calculate_sklearn_flatten(operator):
- check_input_and_output_numbers(operator, output_count_range=1,
- input_count_range=1)
+ check_input_and_output_numbers(operator, output_count_range=1, input_count_range=1)
i = operator.inputs[0]
N = i.get_first_dimension()
if isinstance(i.type, TensorType):
@@ -26,4 +25,4 @@ def calculate_sklearn_flatten(operator):
operator.outputs[0].type.shape = [N * C]
-register_shape_calculator('SklearnFlatten', calculate_sklearn_flatten)
+register_shape_calculator("SklearnFlatten", calculate_sklearn_flatten)
diff --git a/skl2onnx/shape_calculators/function_transformer.py b/skl2onnx/shape_calculators/function_transformer.py
index c4ab2a132..2d3628be8 100644
--- a/skl2onnx/shape_calculators/function_transformer.py
+++ b/skl2onnx/shape_calculators/function_transformer.py
@@ -11,10 +11,12 @@ def calculate_sklearn_function_transformer_output_shapes(operator):
Only identity function is supported.
"""
if operator.raw_operator.func is not None:
- raise RuntimeError("FunctionTransformer is not supported unless the "
- "transform function is None (= identity). "
- "You may raise an issue at "
- "https://github.com/onnx/sklearn-onnx/issues.")
+ raise RuntimeError(
+ "FunctionTransformer is not supported unless the "
+ "transform function is None (= identity). "
+ "You may raise an issue at "
+ "https://github.com/onnx/sklearn-onnx/issues."
+ )
N = operator.inputs[0].get_first_dimension()
C = 0
for variable in operator.inputs:
@@ -28,5 +30,6 @@ def calculate_sklearn_function_transformer_output_shapes(operator):
operator.outputs[0].type.shape = [N, C]
-register_shape_calculator('SklearnFunctionTransformer',
- calculate_sklearn_function_transformer_output_shapes)
+register_shape_calculator(
+ "SklearnFunctionTransformer", calculate_sklearn_function_transformer_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/gaussian_process.py b/skl2onnx/shape_calculators/gaussian_process.py
index d38653381..3a4083d54 100644
--- a/skl2onnx/shape_calculators/gaussian_process.py
+++ b/skl2onnx/shape_calculators/gaussian_process.py
@@ -9,14 +9,16 @@
def calculate_sklearn_gaussian_process_regressor_shape(operator):
check_input_and_output_types(
- operator, good_input_types=[FloatTensorType, DoubleTensorType],
- good_output_types=[FloatTensorType, DoubleTensorType])
+ operator,
+ good_input_types=[FloatTensorType, DoubleTensorType],
+ good_output_types=[FloatTensorType, DoubleTensorType],
+ )
if len(operator.inputs) != 1:
- raise RuntimeError("Only one input vector is allowed for "
- "GaussianProcessRegressor.")
+ raise RuntimeError(
+ "Only one input vector is allowed for " "GaussianProcessRegressor."
+ )
if len(operator.outputs) not in (1, 2):
- raise RuntimeError("One output is expected for "
- "GaussianProcessRegressor.")
+ raise RuntimeError("One output is expected for " "GaussianProcessRegressor.")
variable = operator.inputs[0]
@@ -25,14 +27,17 @@ def calculate_sklearn_gaussian_process_regressor_shape(operator):
# Output 1 is mean
# Output 2 is cov or std
- if hasattr(op, 'y_train_') and op.y_train_ is not None:
+ if hasattr(op, "y_train_") and op.y_train_ is not None:
dim = 1 if len(op.y_train_.shape) == 1 else op.y_train_.shape[1]
else:
dim = 1
operator.outputs[0].type.shape = [N, dim]
-register_shape_calculator('SklearnGaussianProcessRegressor',
- calculate_sklearn_gaussian_process_regressor_shape)
-register_shape_calculator('SklearnGaussianProcessClassifier',
- calculate_linear_classifier_output_shapes)
+register_shape_calculator(
+ "SklearnGaussianProcessRegressor",
+ calculate_sklearn_gaussian_process_regressor_shape,
+)
+register_shape_calculator(
+ "SklearnGaussianProcessClassifier", calculate_linear_classifier_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/grid_search_cv.py b/skl2onnx/shape_calculators/grid_search_cv.py
index 011e866c5..bde460fce 100644
--- a/skl2onnx/shape_calculators/grid_search_cv.py
+++ b/skl2onnx/shape_calculators/grid_search_cv.py
@@ -1,9 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
import logging
-from ..common._registration import (
- register_shape_calculator,
- get_shape_calculator)
+from ..common._registration import register_shape_calculator, get_shape_calculator
from .._supported_operators import sklearn_operator_name_map
@@ -12,9 +10,12 @@ def convert_sklearn_grid_search_cv(operator):
best_estimator = grid_search_op.best_estimator_
name = sklearn_operator_name_map.get(type(best_estimator), None)
if name is None:
- logger = logging.getLogger('skl2onnx')
- logger.warn("[convert_sklearn_grid_search_cv] failed to find alias "
- "to model type %r.", type(best_estimator))
+ logger = logging.getLogger("skl2onnx")
+ logger.warn(
+ "[convert_sklearn_grid_search_cv] failed to find alias "
+ "to model type %r.",
+ type(best_estimator),
+ )
return
op = operator.new_raw_operator(best_estimator, name)
shape_calc = get_shape_calculator(name)
@@ -22,5 +23,4 @@ def convert_sklearn_grid_search_cv(operator):
operator.outputs = op.outputs
-register_shape_calculator('SklearnGridSearchCV',
- convert_sklearn_grid_search_cv)
+register_shape_calculator("SklearnGridSearchCV", convert_sklearn_grid_search_cv)
diff --git a/skl2onnx/shape_calculators/identity.py b/skl2onnx/shape_calculators/identity.py
index 5bba5ec77..aadd0160b 100644
--- a/skl2onnx/shape_calculators/identity.py
+++ b/skl2onnx/shape_calculators/identity.py
@@ -6,9 +6,8 @@
def calculate_sklearn_identity(operator):
- check_input_and_output_numbers(operator, input_count_range=1,
- output_count_range=1)
+ check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1)
operator.outputs[0].type = operator.inputs[0].type
-register_shape_calculator('SklearnIdentity', calculate_sklearn_identity)
+register_shape_calculator("SklearnIdentity", calculate_sklearn_identity)
diff --git a/skl2onnx/shape_calculators/imputer.py b/skl2onnx/shape_calculators/imputer.py
index 22effc1d7..441611ca9 100644
--- a/skl2onnx/shape_calculators/imputer.py
+++ b/skl2onnx/shape_calculators/imputer.py
@@ -3,7 +3,11 @@
from ..common._registration import register_shape_calculator
from ..common.data_types import (
- FloatTensorType, Int64TensorType, DoubleTensorType, StringTensorType)
+ FloatTensorType,
+ Int64TensorType,
+ DoubleTensorType,
+ StringTensorType,
+)
from ..common.utils import check_input_and_output_numbers
from ..common.utils import check_input_and_output_types
@@ -17,18 +21,22 @@ def calculate_sklearn_imputer_output_shapes(operator):
them along C-axis. The produced tensor's shape is used as the
output shape.
"""
- check_input_and_output_numbers(operator, input_count_range=1,
- output_count_range=1)
+ check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1)
check_input_and_output_types(
- operator, good_input_types=[
- FloatTensorType, Int64TensorType, DoubleTensorType,
- StringTensorType])
+ operator,
+ good_input_types=[
+ FloatTensorType,
+ Int64TensorType,
+ DoubleTensorType,
+ StringTensorType,
+ ],
+ )
if not isinstance(operator.inputs[0].type, type(operator.outputs[0].type)): # noqa
raise RuntimeError(
"Inputs and outputs should have the same type "
- "%r != %r." % (
- type(operator.inputs[0].type),
- type(operator.outputs[0].type)))
+ "%r != %r."
+ % (type(operator.inputs[0].type), type(operator.outputs[0].type))
+ )
N = operator.inputs[0].get_first_dimension()
C = 0
@@ -42,9 +50,8 @@ def calculate_sklearn_imputer_output_shapes(operator):
operator.outputs[0].type.shape = [N, C]
-register_shape_calculator('SklearnImputer',
- calculate_sklearn_imputer_output_shapes)
-register_shape_calculator('SklearnSimpleImputer',
- calculate_sklearn_imputer_output_shapes)
-register_shape_calculator('SklearnBinarizer',
- calculate_sklearn_imputer_output_shapes)
+register_shape_calculator("SklearnImputer", calculate_sklearn_imputer_output_shapes)
+register_shape_calculator(
+ "SklearnSimpleImputer", calculate_sklearn_imputer_output_shapes
+)
+register_shape_calculator("SklearnBinarizer", calculate_sklearn_imputer_output_shapes)
diff --git a/skl2onnx/shape_calculators/isolation_forest.py b/skl2onnx/shape_calculators/isolation_forest.py
index 5371a45bd..476101163 100644
--- a/skl2onnx/shape_calculators/isolation_forest.py
+++ b/skl2onnx/shape_calculators/isolation_forest.py
@@ -11,4 +11,5 @@ def calculate_isolation_forest_output_shapes(operator):
register_shape_calculator(
- 'SklearnIsolationForest', calculate_isolation_forest_output_shapes)
+ "SklearnIsolationForest", calculate_isolation_forest_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/k_bins_discretiser.py b/skl2onnx/shape_calculators/k_bins_discretiser.py
index 6a928ec3c..7973e49f9 100644
--- a/skl2onnx/shape_calculators/k_bins_discretiser.py
+++ b/skl2onnx/shape_calculators/k_bins_discretiser.py
@@ -1,9 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
-from ..common.data_types import (
- FloatTensorType, Int64TensorType, DoubleTensorType
-)
+from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType
from ..common._registration import register_shape_calculator
from ..common.utils import check_input_and_output_numbers
from ..common.utils import check_input_and_output_types
@@ -12,14 +10,15 @@
def calculate_sklearn_k_bins_discretiser(operator):
check_input_and_output_numbers(operator, output_count_range=1)
check_input_and_output_types(
- operator, good_input_types=[
- FloatTensorType, Int64TensorType, DoubleTensorType])
+ operator, good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType]
+ )
M = operator.inputs[0].get_first_dimension()
model = operator.raw_operator
- N = len(model.n_bins_) if model.encode == 'ordinal' else sum(model.n_bins_)
+ N = len(model.n_bins_) if model.encode == "ordinal" else sum(model.n_bins_)
operator.outputs[0].type.shape = [M, N]
-register_shape_calculator('SklearnKBinsDiscretizer',
- calculate_sklearn_k_bins_discretiser)
+register_shape_calculator(
+ "SklearnKBinsDiscretizer", calculate_sklearn_k_bins_discretiser
+)
diff --git a/skl2onnx/shape_calculators/k_means.py b/skl2onnx/shape_calculators/k_means.py
index 6c060ffd5..9428d633d 100644
--- a/skl2onnx/shape_calculators/k_means.py
+++ b/skl2onnx/shape_calculators/k_means.py
@@ -2,9 +2,7 @@
from ..common._registration import register_shape_calculator
-from ..common.data_types import (
- FloatTensorType, Int64TensorType, DoubleTensorType
-)
+from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType
from ..common.utils import check_input_and_output_types
@@ -12,7 +10,8 @@ def calculate_sklearn_kmeans_output_shapes(operator):
check_input_and_output_types(
operator,
good_input_types=[Int64TensorType, FloatTensorType, DoubleTensorType],
- good_output_types=[Int64TensorType, FloatTensorType, DoubleTensorType])
+ good_output_types=[Int64TensorType, FloatTensorType, DoubleTensorType],
+ )
if len(operator.inputs) != 1:
raise RuntimeError("Only one input vector is allowed for KMeans.")
if len(operator.outputs) != 2:
@@ -25,7 +24,7 @@ def calculate_sklearn_kmeans_output_shapes(operator):
operator.outputs[1].type.shape = [N, op.n_clusters]
-register_shape_calculator('SklearnKMeans',
- calculate_sklearn_kmeans_output_shapes)
-register_shape_calculator('SklearnMiniBatchKMeans',
- calculate_sklearn_kmeans_output_shapes)
+register_shape_calculator("SklearnKMeans", calculate_sklearn_kmeans_output_shapes)
+register_shape_calculator(
+ "SklearnMiniBatchKMeans", calculate_sklearn_kmeans_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/kernel_pca.py b/skl2onnx/shape_calculators/kernel_pca.py
index 568a23cf2..d93a32cb1 100644
--- a/skl2onnx/shape_calculators/kernel_pca.py
+++ b/skl2onnx/shape_calculators/kernel_pca.py
@@ -2,38 +2,39 @@
from ..common._registration import register_shape_calculator
-from ..common.data_types import (
- FloatTensorType, DoubleTensorType)
-from ..common.utils import (
- check_input_and_output_numbers,
- check_input_and_output_types)
+from ..common.data_types import FloatTensorType, DoubleTensorType
+from ..common.utils import check_input_and_output_numbers, check_input_and_output_types
def calculate_sklearn_kernel_pca_output_shapes(operator):
- check_input_and_output_numbers(
- operator, input_count_range=1, output_count_range=1)
+ check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1)
check_input_and_output_types(
- operator, good_input_types=[FloatTensorType, DoubleTensorType],
- good_output_types=[FloatTensorType, DoubleTensorType])
+ operator,
+ good_input_types=[FloatTensorType, DoubleTensorType],
+ good_output_types=[FloatTensorType, DoubleTensorType],
+ )
N = operator.inputs[0].get_first_dimension()
op = operator.raw_operator
- lbd = op.eigenvalues_ if hasattr(op, 'eigenvalues_') else op.lambdas_
+ lbd = op.eigenvalues_ if hasattr(op, "eigenvalues_") else op.lambdas_
C = lbd.shape[0]
operator.outputs[0].type.shape = [N, C]
def calculate_sklearn_kernel_centerer_output_shapes(operator):
- check_input_and_output_numbers(
- operator, input_count_range=1, output_count_range=1)
+ check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1)
check_input_and_output_types(
- operator, good_input_types=[FloatTensorType, DoubleTensorType],
- good_output_types=[FloatTensorType, DoubleTensorType])
+ operator,
+ good_input_types=[FloatTensorType, DoubleTensorType],
+ good_output_types=[FloatTensorType, DoubleTensorType],
+ )
N = operator.inputs[0].get_first_dimension()
C = operator.raw_operator.K_fit_rows_.shape[0]
operator.outputs[0].type.shape = [N, C]
-register_shape_calculator('SklearnKernelCenterer',
- calculate_sklearn_kernel_centerer_output_shapes)
-register_shape_calculator('SklearnKernelPCA',
- calculate_sklearn_kernel_pca_output_shapes)
+register_shape_calculator(
+ "SklearnKernelCenterer", calculate_sklearn_kernel_centerer_output_shapes
+)
+register_shape_calculator(
+ "SklearnKernelPCA", calculate_sklearn_kernel_pca_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/label_binariser.py b/skl2onnx/shape_calculators/label_binariser.py
index be360094f..1a210a4db 100644
--- a/skl2onnx/shape_calculators/label_binariser.py
+++ b/skl2onnx/shape_calculators/label_binariser.py
@@ -9,13 +9,14 @@
def calculate_sklearn_label_binariser_output_shapes(operator):
check_input_and_output_numbers(operator, output_count_range=1)
- check_input_and_output_types(operator, good_input_types=[
- Int64TensorType, StringTensorType])
+ check_input_and_output_types(
+ operator, good_input_types=[Int64TensorType, StringTensorType]
+ )
N = operator.inputs[0].get_first_dimension()
- operator.outputs[0].type = Int64TensorType(
- [N, len(operator.raw_operator.classes_)])
+ operator.outputs[0].type = Int64TensorType([N, len(operator.raw_operator.classes_)])
-register_shape_calculator('SklearnLabelBinarizer',
- calculate_sklearn_label_binariser_output_shapes)
+register_shape_calculator(
+ "SklearnLabelBinarizer", calculate_sklearn_label_binariser_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/label_encoder.py b/skl2onnx/shape_calculators/label_encoder.py
index 72601f8ff..6c7fe17d2 100644
--- a/skl2onnx/shape_calculators/label_encoder.py
+++ b/skl2onnx/shape_calculators/label_encoder.py
@@ -15,13 +15,14 @@ def calculate_sklearn_label_encoder_output_shapes(operator):
encoder only alters input features' values, not their shape.
"""
check_input_and_output_numbers(operator, output_count_range=1)
- check_input_and_output_types(operator, good_input_types=[
- FloatTensorType, Int64TensorType,
- StringTensorType])
+ check_input_and_output_types(
+ operator, good_input_types=[FloatTensorType, Int64TensorType, StringTensorType]
+ )
input_shape = copy.deepcopy(operator.inputs[0].type.shape)
operator.outputs[0].type = Int64TensorType(copy.deepcopy(input_shape))
-register_shape_calculator('SklearnLabelEncoder',
- calculate_sklearn_label_encoder_output_shapes)
+register_shape_calculator(
+ "SklearnLabelEncoder", calculate_sklearn_label_encoder_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/linear_classifier.py b/skl2onnx/shape_calculators/linear_classifier.py
index 118836512..b4068bcf0 100644
--- a/skl2onnx/shape_calculators/linear_classifier.py
+++ b/skl2onnx/shape_calculators/linear_classifier.py
@@ -5,29 +5,40 @@
from ..common.shape_calculator import calculate_linear_classifier_output_shapes
-register_shape_calculator('SklearnLinearClassifier',
- calculate_linear_classifier_output_shapes)
-register_shape_calculator('SklearnLinearSVC',
- calculate_linear_classifier_output_shapes)
-register_shape_calculator('SklearnAdaBoostClassifier',
- calculate_linear_classifier_output_shapes)
-register_shape_calculator('SklearnBaggingClassifier',
- calculate_linear_classifier_output_shapes)
-register_shape_calculator('SklearnBernoulliNB',
- calculate_linear_classifier_output_shapes)
-register_shape_calculator('SklearnCategoricalNB',
- calculate_linear_classifier_output_shapes)
-register_shape_calculator('SklearnComplementNB',
- calculate_linear_classifier_output_shapes)
-register_shape_calculator('SklearnGaussianNB',
- calculate_linear_classifier_output_shapes)
-register_shape_calculator('SklearnMultinomialNB',
- calculate_linear_classifier_output_shapes)
-register_shape_calculator('SklearnCalibratedClassifierCV',
- calculate_linear_classifier_output_shapes)
-register_shape_calculator('SklearnMLPClassifier',
- calculate_linear_classifier_output_shapes)
-register_shape_calculator('SklearnSGDClassifier',
- calculate_linear_classifier_output_shapes)
-register_shape_calculator('SklearnStackingClassifier',
- calculate_linear_classifier_output_shapes)
+register_shape_calculator(
+ "SklearnLinearClassifier", calculate_linear_classifier_output_shapes
+)
+register_shape_calculator("SklearnLinearSVC", calculate_linear_classifier_output_shapes)
+register_shape_calculator(
+ "SklearnAdaBoostClassifier", calculate_linear_classifier_output_shapes
+)
+register_shape_calculator(
+ "SklearnBaggingClassifier", calculate_linear_classifier_output_shapes
+)
+register_shape_calculator(
+ "SklearnBernoulliNB", calculate_linear_classifier_output_shapes
+)
+register_shape_calculator(
+ "SklearnCategoricalNB", calculate_linear_classifier_output_shapes
+)
+register_shape_calculator(
+ "SklearnComplementNB", calculate_linear_classifier_output_shapes
+)
+register_shape_calculator(
+ "SklearnGaussianNB", calculate_linear_classifier_output_shapes
+)
+register_shape_calculator(
+ "SklearnMultinomialNB", calculate_linear_classifier_output_shapes
+)
+register_shape_calculator(
+ "SklearnCalibratedClassifierCV", calculate_linear_classifier_output_shapes
+)
+register_shape_calculator(
+ "SklearnMLPClassifier", calculate_linear_classifier_output_shapes
+)
+register_shape_calculator(
+ "SklearnSGDClassifier", calculate_linear_classifier_output_shapes
+)
+register_shape_calculator(
+ "SklearnStackingClassifier", calculate_linear_classifier_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/linear_regressor.py b/skl2onnx/shape_calculators/linear_regressor.py
index fa4c10111..1fdab33ea 100644
--- a/skl2onnx/shape_calculators/linear_regressor.py
+++ b/skl2onnx/shape_calculators/linear_regressor.py
@@ -2,12 +2,14 @@
from ..common._registration import register_shape_calculator
-from ..common.utils import (
- check_input_and_output_numbers, check_input_and_output_types)
+from ..common.utils import check_input_and_output_numbers, check_input_and_output_types
from ..common.shape_calculator import calculate_linear_regressor_output_shapes
from ..common.data_types import (
- BooleanTensorType, DoubleTensorType,
- FloatTensorType, Int64TensorType)
+ BooleanTensorType,
+ DoubleTensorType,
+ FloatTensorType,
+ Int64TensorType,
+)
def calculate_bayesian_ridge_output_shapes(operator):
@@ -19,11 +21,18 @@ def calculate_bayesian_ridge_output_shapes(operator):
batch. If the input batch size is N, the output shape may be
[N, 1].
"""
- check_input_and_output_numbers(operator, input_count_range=1,
- output_count_range=[1, 2])
- check_input_and_output_types(operator, good_input_types=[
- BooleanTensorType, DoubleTensorType,
- FloatTensorType, Int64TensorType])
+ check_input_and_output_numbers(
+ operator, input_count_range=1, output_count_range=[1, 2]
+ )
+ check_input_and_output_types(
+ operator,
+ good_input_types=[
+ BooleanTensorType,
+ DoubleTensorType,
+ FloatTensorType,
+ Int64TensorType,
+ ],
+ )
inp0 = operator.inputs[0].type
if isinstance(inp0, (FloatTensorType, DoubleTensorType)):
@@ -32,10 +41,11 @@ def calculate_bayesian_ridge_output_shapes(operator):
cls_type = FloatTensorType
N = operator.inputs[0].get_first_dimension()
- if (hasattr(operator.raw_operator, 'coef_') and
- len(operator.raw_operator.coef_.shape) > 1):
- operator.outputs[0].type = cls_type([
- N, operator.raw_operator.coef_.shape[1]])
+ if (
+ hasattr(operator.raw_operator, "coef_")
+ and len(operator.raw_operator.coef_.shape) > 1
+ ):
+ operator.outputs[0].type = cls_type([N, operator.raw_operator.coef_.shape[1]])
else:
operator.outputs[0].type = cls_type([N, 1])
@@ -44,25 +54,34 @@ def calculate_bayesian_ridge_output_shapes(operator):
operator.outputs[1].type = cls_type([N, 1])
-register_shape_calculator('SklearnAdaBoostRegressor',
- calculate_linear_regressor_output_shapes)
-register_shape_calculator('SklearnBaggingRegressor',
- calculate_linear_regressor_output_shapes)
-register_shape_calculator('SklearnBayesianRidge',
- calculate_bayesian_ridge_output_shapes)
-register_shape_calculator('SklearnLinearRegressor',
- calculate_linear_regressor_output_shapes)
-register_shape_calculator('SklearnLinearSVR',
- calculate_linear_regressor_output_shapes)
-register_shape_calculator('SklearnMLPRegressor',
- calculate_linear_regressor_output_shapes)
-register_shape_calculator('SklearnPoissonRegressor',
- calculate_linear_regressor_output_shapes)
-register_shape_calculator('SklearnRANSACRegressor',
- calculate_linear_regressor_output_shapes)
-register_shape_calculator('SklearnStackingRegressor',
- calculate_linear_regressor_output_shapes)
-register_shape_calculator('SklearnTweedieRegressor',
- calculate_linear_regressor_output_shapes)
-register_shape_calculator('SklearnGammaRegressor',
- calculate_linear_regressor_output_shapes)
+register_shape_calculator(
+ "SklearnAdaBoostRegressor", calculate_linear_regressor_output_shapes
+)
+register_shape_calculator(
+ "SklearnBaggingRegressor", calculate_linear_regressor_output_shapes
+)
+register_shape_calculator(
+ "SklearnBayesianRidge", calculate_bayesian_ridge_output_shapes
+)
+register_shape_calculator(
+ "SklearnLinearRegressor", calculate_linear_regressor_output_shapes
+)
+register_shape_calculator("SklearnLinearSVR", calculate_linear_regressor_output_shapes)
+register_shape_calculator(
+ "SklearnMLPRegressor", calculate_linear_regressor_output_shapes
+)
+register_shape_calculator(
+ "SklearnPoissonRegressor", calculate_linear_regressor_output_shapes
+)
+register_shape_calculator(
+ "SklearnRANSACRegressor", calculate_linear_regressor_output_shapes
+)
+register_shape_calculator(
+ "SklearnStackingRegressor", calculate_linear_regressor_output_shapes
+)
+register_shape_calculator(
+ "SklearnTweedieRegressor", calculate_linear_regressor_output_shapes
+)
+register_shape_calculator(
+ "SklearnGammaRegressor", calculate_linear_regressor_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/local_outlier_factor.py b/skl2onnx/shape_calculators/local_outlier_factor.py
index 14929b78d..6bd1e2bd4 100644
--- a/skl2onnx/shape_calculators/local_outlier_factor.py
+++ b/skl2onnx/shape_calculators/local_outlier_factor.py
@@ -11,4 +11,5 @@ def calculate_local_outlier_factor_output_shapes(operator):
register_shape_calculator(
- 'SklearnLocalOutlierFactor', calculate_local_outlier_factor_output_shapes)
+ "SklearnLocalOutlierFactor", calculate_local_outlier_factor_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/mixture.py b/skl2onnx/shape_calculators/mixture.py
index 4f523d176..9a9842655 100644
--- a/skl2onnx/shape_calculators/mixture.py
+++ b/skl2onnx/shape_calculators/mixture.py
@@ -2,24 +2,20 @@
from ..common._registration import register_shape_calculator
-from ..common.data_types import (
- FloatTensorType, Int64TensorType, DoubleTensorType
-)
-from ..common.utils import (
- check_input_and_output_numbers,
- check_input_and_output_types
-)
+from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType
+from ..common.utils import check_input_and_output_numbers, check_input_and_output_types
def calculate_gaussian_mixture_output_shapes(operator):
- check_input_and_output_numbers(operator, input_count_range=1,
- output_count_range=[2, 3])
+ check_input_and_output_numbers(
+ operator, input_count_range=1, output_count_range=[2, 3]
+ )
check_input_and_output_types(
- operator, good_input_types=[
- FloatTensorType, Int64TensorType, DoubleTensorType])
+ operator, good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType]
+ )
if len(operator.inputs[0].type.shape) != 2:
- raise RuntimeError('Input must be a [N, C]-tensor')
+ raise RuntimeError("Input must be a [N, C]-tensor")
op = operator.raw_operator
N = operator.inputs[0].get_first_dimension()
@@ -29,7 +25,9 @@ def calculate_gaussian_mixture_output_shapes(operator):
operator.outputs[2].type.shape = [N, 1]
-register_shape_calculator('SklearnGaussianMixture',
- calculate_gaussian_mixture_output_shapes)
-register_shape_calculator('SklearnBayesianGaussianMixture',
- calculate_gaussian_mixture_output_shapes)
+register_shape_calculator(
+ "SklearnGaussianMixture", calculate_gaussian_mixture_output_shapes
+)
+register_shape_calculator(
+ "SklearnBayesianGaussianMixture", calculate_gaussian_mixture_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/multioutput.py b/skl2onnx/shape_calculators/multioutput.py
index 55ded66de..fe275dfb4 100644
--- a/skl2onnx/shape_calculators/multioutput.py
+++ b/skl2onnx/shape_calculators/multioutput.py
@@ -10,8 +10,7 @@
def multioutput_regressor_shape_calculator(operator):
"""Shape calculator for MultiOutputRegressor"""
- check_input_and_output_numbers(
- operator, input_count_range=1, output_count_range=1)
+ check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1)
i = operator.inputs[0]
o = operator.outputs[0]
N = i.get_first_dimension()
@@ -21,12 +20,11 @@ def multioutput_regressor_shape_calculator(operator):
def multioutput_classifier_shape_calculator(operator):
"""Shape calculator for MultiOutputClassifier"""
- check_input_and_output_numbers(
- operator, input_count_range=1, output_count_range=2)
+ check_input_and_output_numbers(operator, input_count_range=1, output_count_range=2)
if not isinstance(operator.outputs[1].type, SequenceType):
raise RuntimeError(
- "Probabilites should be a sequence not %r."
- "" % operator.outputs[1].type)
+ "Probabilites should be a sequence not %r." "" % operator.outputs[1].type
+ )
i = operator.inputs[0]
outputs = operator.outputs
N = i.get_first_dimension()
@@ -34,7 +32,9 @@ def multioutput_classifier_shape_calculator(operator):
outputs[0].type.shape = [N, C]
-register_shape_calculator('SklearnMultiOutputRegressor',
- multioutput_regressor_shape_calculator)
-register_shape_calculator('SklearnMultiOutputClassifier',
- multioutput_classifier_shape_calculator)
+register_shape_calculator(
+ "SklearnMultiOutputRegressor", multioutput_regressor_shape_calculator
+)
+register_shape_calculator(
+ "SklearnMultiOutputClassifier", multioutput_classifier_shape_calculator
+)
diff --git a/skl2onnx/shape_calculators/nearest_neighbours.py b/skl2onnx/shape_calculators/nearest_neighbours.py
index 3e5a6bdfe..ddbde8f6a 100644
--- a/skl2onnx/shape_calculators/nearest_neighbours.py
+++ b/skl2onnx/shape_calculators/nearest_neighbours.py
@@ -5,19 +5,16 @@
import numpy as np
from ..common._registration import register_shape_calculator
from ..common.shape_calculator import calculate_linear_classifier_output_shapes
-from ..common.data_types import (
- FloatTensorType, Int64TensorType, DoubleTensorType
-)
+from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType
from ..common.utils import check_input_and_output_numbers
from ..common.utils import check_input_and_output_types
def calculate_sklearn_neighbours_transformer(operator):
- check_input_and_output_numbers(operator, input_count_range=1,
- output_count_range=1)
+ check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1)
check_input_and_output_types(
- operator, good_input_types=[
- FloatTensorType, Int64TensorType, DoubleTensorType])
+ operator, good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType]
+ )
N = operator.inputs[0].get_first_dimension()
n_samples_fit = operator.raw_operator.n_samples_fit_
@@ -30,11 +27,12 @@ def calculate_sklearn_neighbours_transformer(operator):
def calculate_sklearn_nearest_neighbours(operator):
- check_input_and_output_numbers(operator, input_count_range=1,
- output_count_range=[1, 2])
+ check_input_and_output_numbers(
+ operator, input_count_range=1, output_count_range=[1, 2]
+ )
check_input_and_output_types(
- operator, good_input_types=[
- FloatTensorType, Int64TensorType, DoubleTensorType])
+ operator, good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType]
+ )
N = operator.inputs[0].get_first_dimension()
neighbours = operator.raw_operator.n_neighbors
@@ -43,15 +41,18 @@ def calculate_sklearn_nearest_neighbours(operator):
def calculate_sklearn_nearest_neighbours_regressor(operator):
- check_input_and_output_numbers(operator, input_count_range=1,
- output_count_range=[1, 2])
+ check_input_and_output_numbers(
+ operator, input_count_range=1, output_count_range=[1, 2]
+ )
check_input_and_output_types(
- operator, good_input_types=[
- FloatTensorType, Int64TensorType, DoubleTensorType])
+ operator, good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType]
+ )
N = operator.inputs[0].get_first_dimension()
- if (hasattr(operator.raw_operator, '_y') and
- len(np.squeeze(operator.raw_operator._y).shape) == 1):
+ if (
+ hasattr(operator.raw_operator, "_y")
+ and len(np.squeeze(operator.raw_operator._y).shape) == 1
+ ):
C = 1
else:
C = operator.raw_operator._y.shape[-1]
@@ -59,11 +60,10 @@ def calculate_sklearn_nearest_neighbours_regressor(operator):
def calculate_sklearn_nca(operator):
- check_input_and_output_numbers(operator, input_count_range=1,
- output_count_range=1)
+ check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1)
check_input_and_output_types(
- operator, good_input_types=[
- FloatTensorType, Int64TensorType, DoubleTensorType])
+ operator, good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType]
+ )
N = operator.inputs[0].get_first_dimension()
output_type = (
@@ -76,28 +76,32 @@ def calculate_sklearn_nca(operator):
def calculate_sklearn_knn_imputer(operator):
- check_input_and_output_numbers(operator, input_count_range=1,
- output_count_range=1)
- check_input_and_output_types(
- operator, good_input_types=[FloatTensorType])
+ check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1)
+ check_input_and_output_types(operator, good_input_types=[FloatTensorType])
operator.outputs[0].type = copy.deepcopy(operator.inputs[0].type)
operator.outputs[0].type.shape = operator.inputs[0].type.shape
-register_shape_calculator('SklearnKNeighborsRegressor',
- calculate_sklearn_nearest_neighbours_regressor)
-register_shape_calculator('SklearnRadiusNeighborsRegressor',
- calculate_sklearn_nearest_neighbours_regressor)
-register_shape_calculator('SklearnKNeighborsClassifier',
- calculate_linear_classifier_output_shapes)
-register_shape_calculator('SklearnRadiusNeighborsClassifier',
- calculate_linear_classifier_output_shapes)
-register_shape_calculator('SklearnKNNImputer',
- calculate_sklearn_knn_imputer)
-register_shape_calculator('SklearnKNeighborsTransformer',
- calculate_sklearn_neighbours_transformer)
-register_shape_calculator('SklearnNearestNeighbors',
- calculate_sklearn_nearest_neighbours)
register_shape_calculator(
- 'SklearnNeighborhoodComponentsAnalysis', calculate_sklearn_nca)
+ "SklearnKNeighborsRegressor", calculate_sklearn_nearest_neighbours_regressor
+)
+register_shape_calculator(
+ "SklearnRadiusNeighborsRegressor", calculate_sklearn_nearest_neighbours_regressor
+)
+register_shape_calculator(
+ "SklearnKNeighborsClassifier", calculate_linear_classifier_output_shapes
+)
+register_shape_calculator(
+ "SklearnRadiusNeighborsClassifier", calculate_linear_classifier_output_shapes
+)
+register_shape_calculator("SklearnKNNImputer", calculate_sklearn_knn_imputer)
+register_shape_calculator(
+ "SklearnKNeighborsTransformer", calculate_sklearn_neighbours_transformer
+)
+register_shape_calculator(
+ "SklearnNearestNeighbors", calculate_sklearn_nearest_neighbours
+)
+register_shape_calculator(
+ "SklearnNeighborhoodComponentsAnalysis", calculate_sklearn_nca
+)
diff --git a/skl2onnx/shape_calculators/one_hot_encoder.py b/skl2onnx/shape_calculators/one_hot_encoder.py
index 6734b6e42..64a8c0ad0 100644
--- a/skl2onnx/shape_calculators/one_hot_encoder.py
+++ b/skl2onnx/shape_calculators/one_hot_encoder.py
@@ -10,9 +10,8 @@ def calculate_sklearn_one_hot_encoder_output_shapes(operator):
op = operator.raw_operator
categories_len = 0
for index, categories in enumerate(op.categories_):
- if hasattr(op, 'drop_idx_') and op.drop_idx_ is not None:
- categories = (categories[np.arange(len(categories)) !=
- op.drop_idx_[index]])
+ if hasattr(op, "drop_idx_") and op.drop_idx_ is not None:
+ categories = categories[np.arange(len(categories)) != op.drop_idx_[index]]
categories_len += len(categories)
instances = operator.inputs[0].get_first_dimension()
if np.issubdtype(op.dtype, np.signedinteger):
@@ -21,5 +20,6 @@ def calculate_sklearn_one_hot_encoder_output_shapes(operator):
operator.outputs[0].type = FloatTensorType([instances, categories_len])
-register_shape_calculator('SklearnOneHotEncoder',
- calculate_sklearn_one_hot_encoder_output_shapes)
+register_shape_calculator(
+ "SklearnOneHotEncoder", calculate_sklearn_one_hot_encoder_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/one_vs_one_classifier.py b/skl2onnx/shape_calculators/one_vs_one_classifier.py
index 4f9c8e0e6..bea837f33 100644
--- a/skl2onnx/shape_calculators/one_vs_one_classifier.py
+++ b/skl2onnx/shape_calculators/one_vs_one_classifier.py
@@ -4,5 +4,6 @@
from ..common.shape_calculator import calculate_linear_classifier_output_shapes
-register_shape_calculator('SklearnOneVsOneClassifier',
- calculate_linear_classifier_output_shapes)
+register_shape_calculator(
+ "SklearnOneVsOneClassifier", calculate_linear_classifier_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/one_vs_rest_classifier.py b/skl2onnx/shape_calculators/one_vs_rest_classifier.py
index 579db17d3..c54e686be 100644
--- a/skl2onnx/shape_calculators/one_vs_rest_classifier.py
+++ b/skl2onnx/shape_calculators/one_vs_rest_classifier.py
@@ -11,8 +11,10 @@ def calculate_constant_predictor_output_shapes(operator):
operator.outputs[1].type.shape = [N, 2]
-register_shape_calculator('Sklearn_ConstantPredictor',
- calculate_constant_predictor_output_shapes)
+register_shape_calculator(
+ "Sklearn_ConstantPredictor", calculate_constant_predictor_output_shapes
+)
-register_shape_calculator('SklearnOneVsRestClassifier',
- calculate_linear_classifier_output_shapes)
+register_shape_calculator(
+ "SklearnOneVsRestClassifier", calculate_linear_classifier_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/ordinal_encoder.py b/skl2onnx/shape_calculators/ordinal_encoder.py
index 907f86a63..a08ab3bd4 100644
--- a/skl2onnx/shape_calculators/ordinal_encoder.py
+++ b/skl2onnx/shape_calculators/ordinal_encoder.py
@@ -11,11 +11,14 @@ def calculate_sklearn_ordinal_encoder_output_shapes(operator):
op_features = sum(list(map(lambda x: x.type.shape[1], operator.inputs)))
if np.issubdtype(ordinal_op.dtype, np.floating):
operator.outputs[0].type = FloatTensorType(
- [operator.inputs[0].get_first_dimension(), op_features])
+ [operator.inputs[0].get_first_dimension(), op_features]
+ )
else:
operator.outputs[0].type = Int64TensorType(
- [operator.inputs[0].get_first_dimension(), op_features])
+ [operator.inputs[0].get_first_dimension(), op_features]
+ )
-register_shape_calculator('SklearnOrdinalEncoder',
- calculate_sklearn_ordinal_encoder_output_shapes)
+register_shape_calculator(
+ "SklearnOrdinalEncoder", calculate_sklearn_ordinal_encoder_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/ovr_decision_function.py b/skl2onnx/shape_calculators/ovr_decision_function.py
index 3649aa36b..a5d1d087b 100644
--- a/skl2onnx/shape_calculators/ovr_decision_function.py
+++ b/skl2onnx/shape_calculators/ovr_decision_function.py
@@ -7,8 +7,10 @@
def calculate_sklearn_ovr_decision_function(operator):
N = operator.inputs[0].get_first_dimension()
operator.outputs[0].type = operator.inputs[0].type.__class__(
- [N, len(operator.raw_operator.classes_)])
+ [N, len(operator.raw_operator.classes_)]
+ )
-register_shape_calculator('SklearnOVRDecisionFunction',
- calculate_sklearn_ovr_decision_function)
+register_shape_calculator(
+ "SklearnOVRDecisionFunction", calculate_sklearn_ovr_decision_function
+)
diff --git a/skl2onnx/shape_calculators/pipelines.py b/skl2onnx/shape_calculators/pipelines.py
index 7384c8c55..12ce7fd85 100644
--- a/skl2onnx/shape_calculators/pipelines.py
+++ b/skl2onnx/shape_calculators/pipelines.py
@@ -15,9 +15,8 @@ def column_transformer_shape_calculator(operator):
pass
+register_shape_calculator("SklearnPipeline", pipeline_shape_calculator)
+register_shape_calculator("SklearnFeatureUnion", feature_union_shape_calculator)
register_shape_calculator(
- 'SklearnPipeline', pipeline_shape_calculator)
-register_shape_calculator(
- 'SklearnFeatureUnion', feature_union_shape_calculator)
-register_shape_calculator(
- 'SklearnColumnTransformer', column_transformer_shape_calculator)
+ "SklearnColumnTransformer", column_transformer_shape_calculator
+)
diff --git a/skl2onnx/shape_calculators/polynomial_features.py b/skl2onnx/shape_calculators/polynomial_features.py
index 719e9f3a1..871619872 100644
--- a/skl2onnx/shape_calculators/polynomial_features.py
+++ b/skl2onnx/shape_calculators/polynomial_features.py
@@ -2,9 +2,7 @@
import copy
-from ..common.data_types import (
- FloatTensorType, Int64TensorType, DoubleTensorType
-)
+from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType
from ..common._registration import register_shape_calculator
from ..common.utils import check_input_and_output_numbers
from ..common.utils import check_input_and_output_types
@@ -13,8 +11,8 @@
def calculate_sklearn_polynomial_features(operator):
check_input_and_output_numbers(operator, output_count_range=1)
check_input_and_output_types(
- operator, good_input_types=[
- FloatTensorType, Int64TensorType, DoubleTensorType])
+ operator, good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType]
+ )
N = operator.inputs[0].get_first_dimension()
model = operator.raw_operator
@@ -22,5 +20,6 @@ def calculate_sklearn_polynomial_features(operator):
operator.outputs[0].type.shape = [N, model.n_output_features_]
-register_shape_calculator('SklearnPolynomialFeatures',
- calculate_sklearn_polynomial_features)
+register_shape_calculator(
+ "SklearnPolynomialFeatures", calculate_sklearn_polynomial_features
+)
diff --git a/skl2onnx/shape_calculators/power_transformer.py b/skl2onnx/shape_calculators/power_transformer.py
index 72dd70cae..ce61ffa0b 100644
--- a/skl2onnx/shape_calculators/power_transformer.py
+++ b/skl2onnx/shape_calculators/power_transformer.py
@@ -2,9 +2,7 @@
from ..common._registration import register_shape_calculator
-from ..common.data_types import (
- FloatTensorType
-)
+from ..common.data_types import FloatTensorType
def powertransformer_shape_calculator(operator):
@@ -15,5 +13,4 @@ def powertransformer_shape_calculator(operator):
output.type = FloatTensorType([n, c])
-register_shape_calculator('SklearnPowerTransformer',
- powertransformer_shape_calculator)
+register_shape_calculator("SklearnPowerTransformer", powertransformer_shape_calculator)
diff --git a/skl2onnx/shape_calculators/quadratic_discriminant_analysis.py b/skl2onnx/shape_calculators/quadratic_discriminant_analysis.py
index cf66973bd..3dab73c60 100644
--- a/skl2onnx/shape_calculators/quadratic_discriminant_analysis.py
+++ b/skl2onnx/shape_calculators/quadratic_discriminant_analysis.py
@@ -11,5 +11,6 @@ def calculate_quadratic_discriminant_analysis_shapes(operator):
register_shape_calculator(
- 'SklearnQuadraticDiscriminantAnalysis',
- calculate_quadratic_discriminant_analysis_shapes)
+ "SklearnQuadraticDiscriminantAnalysis",
+ calculate_quadratic_discriminant_analysis_shapes,
+)
diff --git a/skl2onnx/shape_calculators/random_projection.py b/skl2onnx/shape_calculators/random_projection.py
index 805180afb..fd211cea1 100644
--- a/skl2onnx/shape_calculators/random_projection.py
+++ b/skl2onnx/shape_calculators/random_projection.py
@@ -13,5 +13,6 @@ def random_projection_shape_calculator(operator):
operator.outputs[0].type.shape = [n, c]
-register_shape_calculator('SklearnGaussianRandomProjection',
- random_projection_shape_calculator)
+register_shape_calculator(
+ "SklearnGaussianRandomProjection", random_projection_shape_calculator
+)
diff --git a/skl2onnx/shape_calculators/random_trees_embedding.py b/skl2onnx/shape_calculators/random_trees_embedding.py
index e7b07c49e..aa5490026 100644
--- a/skl2onnx/shape_calculators/random_trees_embedding.py
+++ b/skl2onnx/shape_calculators/random_trees_embedding.py
@@ -10,9 +10,8 @@ def calculate_sklearn_random_trees_embedding_output_shapes(operator):
op = operator.raw_operator.one_hot_encoder_
categories_len = 0
for index, categories in enumerate(op.categories_):
- if hasattr(op, 'drop_idx_') and op.drop_idx_ is not None:
- categories = (categories[np.arange(len(categories)) !=
- op.drop_idx_[index]])
+ if hasattr(op, "drop_idx_") and op.drop_idx_ is not None:
+ categories = categories[np.arange(len(categories)) != op.drop_idx_[index]]
categories_len += len(categories)
instances = operator.inputs[0].get_first_dimension()
if np.issubdtype(op.dtype, np.signedinteger):
@@ -22,5 +21,6 @@ def calculate_sklearn_random_trees_embedding_output_shapes(operator):
register_shape_calculator(
- 'SklearnRandomTreesEmbedding',
- calculate_sklearn_random_trees_embedding_output_shapes)
+ "SklearnRandomTreesEmbedding",
+ calculate_sklearn_random_trees_embedding_output_shapes,
+)
diff --git a/skl2onnx/shape_calculators/replace_op.py b/skl2onnx/shape_calculators/replace_op.py
index 506adf906..a34167340 100644
--- a/skl2onnx/shape_calculators/replace_op.py
+++ b/skl2onnx/shape_calculators/replace_op.py
@@ -6,10 +6,10 @@
def calculate_sklearn_replace_transformer(operator):
- check_input_and_output_numbers(
- operator, input_count_range=1, output_count_range=1)
+ check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1)
operator.outputs[0].type = operator.inputs[0].type
register_shape_calculator(
- 'SklearnReplaceTransformer', calculate_sklearn_replace_transformer)
+ "SklearnReplaceTransformer", calculate_sklearn_replace_transformer
+)
diff --git a/skl2onnx/shape_calculators/scaler.py b/skl2onnx/shape_calculators/scaler.py
index af299f123..12f7077e3 100644
--- a/skl2onnx/shape_calculators/scaler.py
+++ b/skl2onnx/shape_calculators/scaler.py
@@ -3,9 +3,7 @@
import numbers
from ..common._registration import register_shape_calculator
-from ..common.data_types import (
- FloatTensorType, Int64TensorType, DoubleTensorType
-)
+from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType
from ..common.utils import check_input_and_output_numbers
from ..common.utils import check_input_and_output_types
@@ -18,19 +16,19 @@ def calculate_sklearn_scaler_output_shapes(operator):
Similar to imputer, this operator can take multiple input feature
tensors and concatenate them along C-axis.
"""
- check_input_and_output_numbers(operator, input_count_range=[1, None],
- output_count_range=1)
+ check_input_and_output_numbers(
+ operator, input_count_range=[1, None], output_count_range=1
+ )
check_input_and_output_types(
- operator, good_input_types=[
- FloatTensorType, Int64TensorType, DoubleTensorType],
- good_output_types=[FloatTensorType, DoubleTensorType])
+ operator,
+ good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType],
+ good_output_types=[FloatTensorType, DoubleTensorType],
+ )
# Inputs: multiple float- and integer-tensors
# Output: one float tensor
for variable in operator.inputs:
- if (len(set(variable.get_first_dimension()
- for variable in operator.inputs))
- > 1):
- raise RuntimeError('Batch size must be identical across inputs.')
+ if len(set(variable.get_first_dimension() for variable in operator.inputs)) > 1:
+ raise RuntimeError("Batch size must be identical across inputs.")
N = operator.inputs[0].get_first_dimension()
C = 0
@@ -45,13 +43,8 @@ def calculate_sklearn_scaler_output_shapes(operator):
operator.outputs[0].type.shape = [N, C]
-register_shape_calculator('SklearnRobustScaler',
- calculate_sklearn_scaler_output_shapes)
-register_shape_calculator('SklearnScaler',
- calculate_sklearn_scaler_output_shapes)
-register_shape_calculator('SklearnNormalizer',
- calculate_sklearn_scaler_output_shapes)
-register_shape_calculator('SklearnMinMaxScaler',
- calculate_sklearn_scaler_output_shapes)
-register_shape_calculator('SklearnMaxAbsScaler',
- calculate_sklearn_scaler_output_shapes)
+register_shape_calculator("SklearnRobustScaler", calculate_sklearn_scaler_output_shapes)
+register_shape_calculator("SklearnScaler", calculate_sklearn_scaler_output_shapes)
+register_shape_calculator("SklearnNormalizer", calculate_sklearn_scaler_output_shapes)
+register_shape_calculator("SklearnMinMaxScaler", calculate_sklearn_scaler_output_shapes)
+register_shape_calculator("SklearnMaxAbsScaler", calculate_sklearn_scaler_output_shapes)
diff --git a/skl2onnx/shape_calculators/sequence.py b/skl2onnx/shape_calculators/sequence.py
index 4ef12cea2..b423b66aa 100644
--- a/skl2onnx/shape_calculators/sequence.py
+++ b/skl2onnx/shape_calculators/sequence.py
@@ -11,6 +11,7 @@ def calculate_sklearn_sequence_construct(operator):
pass
-register_shape_calculator('SklearnSequenceAt', calculate_sklearn_sequence_at)
+register_shape_calculator("SklearnSequenceAt", calculate_sklearn_sequence_at)
register_shape_calculator(
- 'SklearnSequenceConstruct', calculate_sklearn_sequence_construct)
+ "SklearnSequenceConstruct", calculate_sklearn_sequence_construct
+)
diff --git a/skl2onnx/shape_calculators/sgd_oneclass_svm.py b/skl2onnx/shape_calculators/sgd_oneclass_svm.py
index 3e3d5a5dd..763b5e75f 100644
--- a/skl2onnx/shape_calculators/sgd_oneclass_svm.py
+++ b/skl2onnx/shape_calculators/sgd_oneclass_svm.py
@@ -6,9 +6,16 @@
def calculate_sgd_oneclass_svm_output_shapes(operator):
N = operator.inputs[0].get_first_dimension()
- operator.outputs[0].type = Int64TensorType([N, ])
- operator.outputs[1].type.shape = [N, ]
+ operator.outputs[0].type = Int64TensorType(
+ [
+ N,
+ ]
+ )
+ operator.outputs[1].type.shape = [
+ N,
+ ]
register_shape_calculator(
- 'SklearnSGDOneClassSVM', calculate_sgd_oneclass_svm_output_shapes)
+ "SklearnSGDOneClassSVM", calculate_sgd_oneclass_svm_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/support_vector_machines.py b/skl2onnx/shape_calculators/support_vector_machines.py
index da60ddd8b..f15b5934e 100644
--- a/skl2onnx/shape_calculators/support_vector_machines.py
+++ b/skl2onnx/shape_calculators/support_vector_machines.py
@@ -32,38 +32,40 @@ def calculate_sklearn_svm_output_shapes(operator):
op = operator.raw_operator
N = operator.inputs[0].get_first_dimension()
- if operator.type in ['SklearnOneClassSVM']:
+ if operator.type in ["SklearnOneClassSVM"]:
operator.outputs[0].type = Int64TensorType([N, 1])
operator.outputs[1].type.shape = [N, 1]
- elif operator.type in ['SklearnSVC'] or isinstance(op, (SVC, NuSVC)):
+ elif operator.type in ["SklearnSVC"] or isinstance(op, (SVC, NuSVC)):
number_of_classes = len(op.classes_)
- check_input_and_output_numbers(operator, input_count_range=[1, None],
- output_count_range=[1, 2])
+ check_input_and_output_numbers(
+ operator, input_count_range=[1, None], output_count_range=[1, 2]
+ )
if all(isinstance(i, str) for i in op.classes_):
operator.outputs[0].type = StringTensorType([N])
operator.outputs[1].type.shape = [N, number_of_classes]
- elif all(isinstance(i, (numbers.Real, bool, np.bool_))
- for i in op.classes_):
+ elif all(isinstance(i, (numbers.Real, bool, np.bool_)) for i in op.classes_):
operator.outputs[0].type = Int64TensorType([N])
operator.outputs[1].type.shape = [N, number_of_classes]
else:
- raise RuntimeError('Class labels should be either all strings or '
- 'all integers. C++ backends do not support '
- 'mixed types.')
+ raise RuntimeError(
+ "Class labels should be either all strings or "
+ "all integers. C++ backends do not support "
+ "mixed types."
+ )
- elif operator.type in ['SklearnSVR']:
- check_input_and_output_numbers(operator, input_count_range=[1, None],
- output_count_range=1)
+ elif operator.type in ["SklearnSVR"]:
+ check_input_and_output_numbers(
+ operator, input_count_range=[1, None], output_count_range=1
+ )
operator.outputs[0].type.shape = [N, 1]
else:
raise RuntimeError(
- "New kind of SVM, no shape calculator exist for '{}'.".format(
- operator.type))
+ "New kind of SVM, no shape calculator exist for '{}'.".format(operator.type)
+ )
-register_shape_calculator(
- 'SklearnOneClassSVM', calculate_sklearn_svm_output_shapes)
-register_shape_calculator('SklearnSVC', calculate_sklearn_svm_output_shapes)
-register_shape_calculator('SklearnSVR', calculate_sklearn_svm_output_shapes)
+register_shape_calculator("SklearnOneClassSVM", calculate_sklearn_svm_output_shapes)
+register_shape_calculator("SklearnSVC", calculate_sklearn_svm_output_shapes)
+register_shape_calculator("SklearnSVR", calculate_sklearn_svm_output_shapes)
diff --git a/skl2onnx/shape_calculators/svd.py b/skl2onnx/shape_calculators/svd.py
index a2d6a7352..78a9c3cab 100644
--- a/skl2onnx/shape_calculators/svd.py
+++ b/skl2onnx/shape_calculators/svd.py
@@ -2,8 +2,7 @@
from ..common._registration import register_shape_calculator
-from ..common.data_types import (
- FloatTensorType, Int64TensorType, DoubleTensorType)
+from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType
from ..common.utils import check_input_and_output_numbers
from ..common.utils import check_input_and_output_types
@@ -15,30 +14,33 @@ def calculate_sklearn_truncated_svd_output_shapes(operator):
Transform feature dimension from C to K
"""
- check_input_and_output_numbers(operator, input_count_range=1,
- output_count_range=1)
+ check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1)
check_input_and_output_types(
- operator, good_input_types=[
- FloatTensorType, Int64TensorType, DoubleTensorType],
- good_output_types=[FloatTensorType, DoubleTensorType])
+ operator,
+ good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType],
+ good_output_types=[FloatTensorType, DoubleTensorType],
+ )
if len(operator.inputs[0].type.shape) != 2:
- raise RuntimeError('Only 2-D tensor(s) can be input(s).')
+ raise RuntimeError("Only 2-D tensor(s) can be input(s).")
cls_type = operator.inputs[0].type.__class__
if cls_type != DoubleTensorType:
cls_type = FloatTensorType
N = operator.inputs[0].get_first_dimension()
- K = (operator.raw_operator.n_components
- if operator.type == 'SklearnTruncatedSVD'
- else operator.raw_operator.n_components_)
+ K = (
+ operator.raw_operator.n_components
+ if operator.type == "SklearnTruncatedSVD"
+ else operator.raw_operator.n_components_
+ )
operator.outputs[0].type = cls_type([N, K])
-register_shape_calculator('SklearnIncrementalPCA',
- calculate_sklearn_truncated_svd_output_shapes)
-register_shape_calculator('SklearnPCA',
- calculate_sklearn_truncated_svd_output_shapes)
-register_shape_calculator('SklearnTruncatedSVD',
- calculate_sklearn_truncated_svd_output_shapes)
+register_shape_calculator(
+ "SklearnIncrementalPCA", calculate_sklearn_truncated_svd_output_shapes
+)
+register_shape_calculator("SklearnPCA", calculate_sklearn_truncated_svd_output_shapes)
+register_shape_calculator(
+ "SklearnTruncatedSVD", calculate_sklearn_truncated_svd_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/text_vectorizer.py b/skl2onnx/shape_calculators/text_vectorizer.py
index d58f1eb99..895f7eea3 100644
--- a/skl2onnx/shape_calculators/text_vectorizer.py
+++ b/skl2onnx/shape_calculators/text_vectorizer.py
@@ -6,20 +6,21 @@
def calculate_sklearn_text_vectorizer_output_shapes(operator):
- '''
+ """
Allowed input/output patterns are
1. Map ---> [1, C]
C is the total number of allowed keys in the input dictionary.
- '''
- check_input_and_output_numbers(operator, input_count_range=1,
- output_count_range=1)
+ """
+ check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1)
C = max(operator.raw_operator.vocabulary_.values()) + 1
operator.outputs[0].type.shape = [None, C]
-register_shape_calculator('SklearnCountVectorizer',
- calculate_sklearn_text_vectorizer_output_shapes)
-register_shape_calculator('SklearnTfidfVectorizer',
- calculate_sklearn_text_vectorizer_output_shapes)
+register_shape_calculator(
+ "SklearnCountVectorizer", calculate_sklearn_text_vectorizer_output_shapes
+)
+register_shape_calculator(
+ "SklearnTfidfVectorizer", calculate_sklearn_text_vectorizer_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/tfidf_transformer.py b/skl2onnx/shape_calculators/tfidf_transformer.py
index d9b5d20a1..443cc9c76 100644
--- a/skl2onnx/shape_calculators/tfidf_transformer.py
+++ b/skl2onnx/shape_calculators/tfidf_transformer.py
@@ -6,11 +6,11 @@
def calculate_sklearn_tfidf_transformer_output_shapes(operator):
- check_input_and_output_numbers(operator, input_count_range=1,
- output_count_range=1)
+ check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1)
C = operator.inputs[0].type.shape[1]
operator.outputs[0].type.shape = [1, C]
-register_shape_calculator('SklearnTfidfTransformer',
- calculate_sklearn_tfidf_transformer_output_shapes)
+register_shape_calculator(
+ "SklearnTfidfTransformer", calculate_sklearn_tfidf_transformer_output_shapes
+)
diff --git a/skl2onnx/shape_calculators/voting_classifier.py b/skl2onnx/shape_calculators/voting_classifier.py
index e02dcf12b..7aaecbf9a 100644
--- a/skl2onnx/shape_calculators/voting_classifier.py
+++ b/skl2onnx/shape_calculators/voting_classifier.py
@@ -1,14 +1,13 @@
# SPDX-License-Identifier: Apache-2.0
from ..common._registration import register_shape_calculator
-from ..common.shape_calculator import (
- _calculate_linear_classifier_output_shapes)
+from ..common.shape_calculator import _calculate_linear_classifier_output_shapes
def voting_classifier_shape_calculator(operator):
return _calculate_linear_classifier_output_shapes(
- operator, enable_type_checking=False)
+ operator, enable_type_checking=False
+ )
-register_shape_calculator(
- 'SklearnVotingClassifier', voting_classifier_shape_calculator)
+register_shape_calculator("SklearnVotingClassifier", voting_classifier_shape_calculator)
diff --git a/skl2onnx/shape_calculators/voting_regressor.py b/skl2onnx/shape_calculators/voting_regressor.py
index 66e044956..bc91be75a 100644
--- a/skl2onnx/shape_calculators/voting_regressor.py
+++ b/skl2onnx/shape_calculators/voting_regressor.py
@@ -7,8 +7,8 @@
def voting_regressor_shape_calculator(operator):
return _calculate_linear_regressor_output_shapes(
- operator, enable_type_checking=False)
+ operator, enable_type_checking=False
+ )
-register_shape_calculator(
- 'SklearnVotingRegressor', voting_regressor_shape_calculator)
+register_shape_calculator("SklearnVotingRegressor", voting_regressor_shape_calculator)
diff --git a/skl2onnx/shape_calculators/zip_map.py b/skl2onnx/shape_calculators/zip_map.py
index 9e0b7f8ed..7dcc15b39 100644
--- a/skl2onnx/shape_calculators/zip_map.py
+++ b/skl2onnx/shape_calculators/zip_map.py
@@ -5,26 +5,31 @@
def calculate_sklearn_zipmap(operator):
- if (len(operator.inputs) != len(operator.outputs) or
- len(operator.inputs) not in (1, 2)):
+ if len(operator.inputs) != len(operator.outputs) or len(operator.inputs) not in (
+ 1,
+ 2,
+ ):
raise RuntimeError(
- "SklearnZipMap expects the same number of inputs and outputs.")
+ "SklearnZipMap expects the same number of inputs and outputs."
+ )
if len(operator.inputs) == 2:
operator.outputs[0].type = operator.inputs[0].type.__class__(
- operator.inputs[0].type.shape)
+ operator.inputs[0].type.shape
+ )
if operator.outputs[1].type is not None:
- operator.outputs[1].type.element_type.value_type = \
- operator.inputs[1].type.__class__([])
+ operator.outputs[1].type.element_type.value_type = operator.inputs[
+ 1
+ ].type.__class__([])
def calculate_sklearn_zipmap_columns(operator):
N = operator.inputs[0].get_first_dimension()
operator.outputs[0].type = operator.inputs[0].type.__class__(
- operator.inputs[0].type.shape)
+ operator.inputs[0].type.shape
+ )
for i in range(1, len(operator.outputs)):
operator.outputs[i].type.shape = [N]
-register_shape_calculator('SklearnZipMap', calculate_sklearn_zipmap)
-register_shape_calculator(
- 'SklearnZipMapColumns', calculate_sklearn_zipmap_columns)
+register_shape_calculator("SklearnZipMap", calculate_sklearn_zipmap)
+register_shape_calculator("SklearnZipMapColumns", calculate_sklearn_zipmap_columns)
diff --git a/skl2onnx/sklapi/cast_regressor.py b/skl2onnx/sklapi/cast_regressor.py
index 4da4a4ebb..37e83da41 100644
--- a/skl2onnx/sklapi/cast_regressor.py
+++ b/skl2onnx/sklapi/cast_regressor.py
@@ -2,10 +2,13 @@
import numpy as np
from sklearn.base import RegressorMixin, BaseEstimator
+
try:
from sklearn.utils.validation import _deprecate_positional_args
except ImportError:
- def _deprecate_positional_args(x): return x # noqa
+
+ def _deprecate_positional_args(x):
+ return x # noqa
class CastRegressor(RegressorMixin, BaseEstimator): # noqa
@@ -34,8 +37,8 @@ def _cast(self, a, name):
a2 = a.astype(self.dtype)
except ValueError:
raise ValueError(
- "Unable to cast {} from {} into {}.".format(
- name, a.dtype, self.dtype))
+ "Unable to cast {} from {} into {}.".format(name, a.dtype, self.dtype)
+ )
return a2
def fit(self, X, y=None, sample_weight=None):
@@ -49,15 +52,15 @@ def predict(self, X, y=None):
"""
Predicts and casts the prediction.
"""
- return self._cast(self.estimator.predict(X), 'predict(X)')
+ return self._cast(self.estimator.predict(X), "predict(X)")
def decision_function(self, X, y=None):
"""
Calls *decision_function* and casts the outputs.
"""
- if not hasattr(self.estimator, 'decision_function'):
+ if not hasattr(self.estimator, "decision_function"):
raise AttributeError(
- "%r object has no attribute 'decision_function'." %
- self.estimator.__class__.__name__)
- return self._cast(self.estimator.decision_function(X),
- 'decision_function(X)')
+ "%r object has no attribute 'decision_function'."
+ % self.estimator.__class__.__name__
+ )
+ return self._cast(self.estimator.decision_function(X), "decision_function(X)")
diff --git a/skl2onnx/sklapi/cast_transformer.py b/skl2onnx/sklapi/cast_transformer.py
index 1c9c13f6f..703b8b5c0 100644
--- a/skl2onnx/sklapi/cast_transformer.py
+++ b/skl2onnx/sklapi/cast_transformer.py
@@ -2,10 +2,13 @@
import numpy as np
from sklearn.base import TransformerMixin, BaseEstimator
+
try:
from sklearn.utils.validation import _deprecate_positional_args
except ImportError:
- def _deprecate_positional_args(x): return x # noqa
+
+ def _deprecate_positional_args(x):
+ return x # noqa
class CastTransformer(TransformerMixin, BaseEstimator):
@@ -27,30 +30,28 @@ def __init__(self, *, dtype=np.float32):
def _cast(self, a, name):
if not isinstance(a, np.ndarray):
- if hasattr(a, 'values') and hasattr(a, 'iloc'):
+ if hasattr(a, "values") and hasattr(a, "iloc"):
# dataframe
a = a.values
- elif not hasattr(a, 'astype'):
- raise TypeError(
- "{} must be a numpy array or a dataframe.".format(
- name))
+ elif not hasattr(a, "astype"):
+ raise TypeError("{} must be a numpy array or a dataframe.".format(name))
try:
a2 = a.astype(self.dtype)
except ValueError:
raise ValueError(
- "Unable to cast {} from {} into {}.".format(
- name, a.dtype, self.dtype))
+ "Unable to cast {} from {} into {}.".format(name, a.dtype, self.dtype)
+ )
return a2
def fit(self, X, y=None, sample_weight=None):
"""
Does nothing except checking *dtype* may be applied.
"""
- self._cast(X, 'X')
+ self._cast(X, "X")
return self
def transform(self, X, y=None):
"""
Casts array X.
"""
- return self._cast(X, 'X')
+ return self._cast(X, "X")
diff --git a/skl2onnx/sklapi/replace_transformer.py b/skl2onnx/sklapi/replace_transformer.py
index f135ab0c9..46083455a 100644
--- a/skl2onnx/sklapi/replace_transformer.py
+++ b/skl2onnx/sklapi/replace_transformer.py
@@ -2,10 +2,13 @@
import numpy as np
from sklearn.base import TransformerMixin, BaseEstimator
+
try:
from sklearn.utils.validation import _deprecate_positional_args
except ImportError:
- def _deprecate_positional_args(x): return x # noqa
+
+ def _deprecate_positional_args(x):
+ return x # noqa
class ReplaceTransformer(TransformerMixin, BaseEstimator):
@@ -29,13 +32,13 @@ def __init__(self, *, from_value=0, to_value=np.nan, dtype=np.float32):
self.to_value = to_value
def _replace(self, a):
- if hasattr(a, 'todense'):
+ if hasattr(a, "todense"):
if np.isnan(self.to_value) and self.from_value == 0:
# implicit
return a
raise RuntimeError(
- "Unable to replace 0 by nan one value by another "
- "in sparse matrix.")
+ "Unable to replace 0 by nan one value by another " "in sparse matrix."
+ )
return np.where(a == self.from_value, self.to_value, a)
def fit(self, X, y=None, sample_weight=None):
diff --git a/skl2onnx/sklapi/sklearn_text.py b/skl2onnx/sklapi/sklearn_text.py
index a9efa8efe..709398dfc 100644
--- a/skl2onnx/sklapi/sklearn_text.py
+++ b/skl2onnx/sklapi/sklearn_text.py
@@ -3,9 +3,9 @@
@brief Overloads :epkg:`TfidfVectorizer` and :epkg:`CountVectorizer`.
"""
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
+
try:
- from sklearn.feature_extraction.text import (
- _VectorizerMixin as VectorizerMixin)
+ from sklearn.feature_extraction.text import _VectorizerMixin as VectorizerMixin
except ImportError: # pragma: no cover
# scikit-learn < 0.23
from sklearn.feature_extraction.text import VectorizerMixin
@@ -34,12 +34,12 @@ def _word_ngrams(self, tokens, stop_words=None):
for token in tokens:
val = (token,) if isinstance(token, str) else token
if not isinstance(val, tuple):
- raise TypeError(
- f"Unexpected type {type(val)}:{val!r} for a token.")
+ raise TypeError(f"Unexpected type {type(val)}:{val!r} for a token.")
if any(map(lambda x: not isinstance(x, str), val)):
raise TypeError(
f"Unexpected type {val!r}, one part of a "
- f"token is not a string.")
+ f"token is not a string."
+ )
new_tokens.append(val)
tokens = new_tokens
@@ -69,13 +69,13 @@ def space_join(tokens):
new_tokens.extend(token)
else:
raise TypeError( # pragma: no cover
- f"Unable to build a n-grams out of {tokens}.")
+ f"Unable to build a n-grams out of {tokens}."
+ )
return tuple(new_tokens)
- for n in range(min_n,
- min(max_n + 1, n_original_tokens + 1)):
+ for n in range(min_n, min(max_n + 1, n_original_tokens + 1)):
for i in range(n_original_tokens - n + 1):
- tokens_append(space_join(original_tokens[i: i + n]))
+ tokens_append(space_join(original_tokens[i : i + n]))
return tokens
@staticmethod
@@ -83,8 +83,7 @@ def _fix_vocabulary(expected, new_voc):
update = {}
for w, wid in new_voc.items():
if not isinstance(w, tuple):
- raise TypeError(
- f"Tuple is expected for a token not {type(w)}.")
+ raise TypeError(f"Tuple is expected for a token not {type(w)}.")
s = " ".join(w)
if s in expected:
if expected[s] != wid:
@@ -148,8 +147,7 @@ class TraceableCountVectorizer(CountVectorizer, NGramsMixin):
"""
def _word_ngrams(self, tokens, stop_words=None):
- return NGramsMixin._word_ngrams(
- self, tokens=tokens, stop_words=stop_words)
+ return NGramsMixin._word_ngrams(self, tokens=tokens, stop_words=stop_words)
def fit(self, X, y=None):
# scikit-learn implements fit_transform and fit calls it.
@@ -164,8 +162,8 @@ def fit(self, X, y=None):
self.same_ = same
if self.stop_words != same.stop_words:
raise AssertionError(
- f"Different stop_words {self.stop_words} "
- f"!= {same.stop_words}.")
+ f"Different stop_words {self.stop_words} " f"!= {same.stop_words}."
+ )
update, dups = self._fix_vocabulary(same.vocabulary_, self.vocabulary_)
self.updated_vocabulary_ = update
self.duplicated_vocabulary_ = dups
@@ -218,11 +216,10 @@ class TraceableTfidfVectorizer(TfidfVectorizer, NGramsMixin):
scikit-learn cannot distinguish between bi gram ("a b", "c") and
("a", "b c"). Therefore, there are merged into the same
column by scikit-learn. This class, even if it is able to distinguish
- between them, keeps the same ambiguity. """
+ between them, keeps the same ambiguity."""
def _word_ngrams(self, tokens, stop_words=None):
- return NGramsMixin._word_ngrams(
- self, tokens=tokens, stop_words=stop_words)
+ return NGramsMixin._word_ngrams(self, tokens=tokens, stop_words=stop_words)
def fit(self, X, y=None):
super().fit(X, y=y)
@@ -231,8 +228,8 @@ def fit(self, X, y=None):
self.same_ = same
if self.stop_words != same.stop_words:
raise AssertionError(
- f"Different stop_words {self.stop_words} "
- f"!= {same.stop_words}.")
+ f"Different stop_words {self.stop_words} " f"!= {same.stop_words}."
+ )
update, dups = self._fix_vocabulary(same.vocabulary_, self.vocabulary_)
self.updated_vocabulary_ = update
self.duplicated_vocabulary_ = dups
diff --git a/skl2onnx/sklapi/sklearn_text_onnx.py b/skl2onnx/sklapi/sklearn_text_onnx.py
index 611200e88..e3ae5e32b 100644
--- a/skl2onnx/sklapi/sklearn_text_onnx.py
+++ b/skl2onnx/sklapi/sklearn_text_onnx.py
@@ -2,11 +2,10 @@
from .. import update_registered_converter
from ..shape_calculators.text_vectorizer import (
- calculate_sklearn_text_vectorizer_output_shapes)
-from ..operator_converters.text_vectoriser import (
- convert_sklearn_text_vectorizer)
-from ..operator_converters.tfidf_vectoriser import (
- convert_sklearn_tfidf_vectoriser)
+ calculate_sklearn_text_vectorizer_output_shapes,
+)
+from ..operator_converters.text_vectoriser import convert_sklearn_text_vectorizer
+from ..operator_converters.tfidf_vectoriser import convert_sklearn_tfidf_vectoriser
from .sklearn_text import TraceableCountVectorizer, TraceableTfidfVectorizer
@@ -14,17 +13,27 @@ def register():
"""Register converter for TraceableCountVectorizer,
TraceableTfidfVectorizer."""
update_registered_converter(
- TraceableCountVectorizer, "Skl2onnxTraceableCountVectorizer",
+ TraceableCountVectorizer,
+ "Skl2onnxTraceableCountVectorizer",
calculate_sklearn_text_vectorizer_output_shapes,
convert_sklearn_text_vectorizer,
- options={'tokenexp': None, 'separators': None,
- 'nan': [True, False],
- 'keep_empty_string': [True, False]})
+ options={
+ "tokenexp": None,
+ "separators": None,
+ "nan": [True, False],
+ "keep_empty_string": [True, False],
+ },
+ )
update_registered_converter(
- TraceableTfidfVectorizer, "Skl2onnxTraceableTfidfVectorizer",
+ TraceableTfidfVectorizer,
+ "Skl2onnxTraceableTfidfVectorizer",
calculate_sklearn_text_vectorizer_output_shapes,
convert_sklearn_tfidf_vectoriser,
- options={'tokenexp': None, 'separators': None,
- 'nan': [True, False],
- 'keep_empty_string': [True, False]})
+ options={
+ "tokenexp": None,
+ "separators": None,
+ "nan": [True, False],
+ "keep_empty_string": [True, False],
+ },
+ )
diff --git a/skl2onnx/sklapi/woe_transformer.py b/skl2onnx/sklapi/woe_transformer.py
index 2ddf21196..46ade73ef 100644
--- a/skl2onnx/sklapi/woe_transformer.py
+++ b/skl2onnx/sklapi/woe_transformer.py
@@ -2,10 +2,13 @@
import numpy as np
from sklearn.base import TransformerMixin, BaseEstimator
+
try:
from sklearn.utils.validation import _deprecate_positional_args
except ImportError:
- def _deprecate_positional_args(x): return x # noqa
+
+ def _deprecate_positional_args(x):
+ return x # noqa
class WOETransformer(TransformerMixin, BaseEstimator):
@@ -70,7 +73,7 @@ def fit(self, X, y=None, sample_weight=None):
dim += 1
continue
intervals = self.intervals[i]
- if intervals == 'passthrough':
+ if intervals == "passthrough":
self.intervals_.append(None)
self.weights_.append(None)
self.indices_.append((dim, dim + 1))
@@ -78,18 +81,19 @@ def fit(self, X, y=None, sample_weight=None):
continue
if not isinstance(intervals, list):
raise TypeError(
- "Intervals for column %d must be a list not %r."
- "" % (i, intervals))
+ "Intervals for column %d must be a list not %r." "" % (i, intervals)
+ )
inlist = []
inweight = []
for index, interval in enumerate(intervals):
if not isinstance(interval, tuple):
raise TypeError(
- "Interval %d is not a tuple but %r." % (i, interval))
+ "Interval %d is not a tuple but %r." % (i, interval)
+ )
if len(interval) < 2:
raise ValueError(
- "Interval %d should have at least two values "
- "%r." % interval)
+ "Interval %d should have at least two values " "%r." % interval
+ )
res = []
for j in range(0, 2):
try:
@@ -97,13 +101,15 @@ def fit(self, X, y=None, sample_weight=None):
except (TypeError, ValueError) as e:
raise TypeError(
"Value at index %d in %r must be a float."
- "" % (j, interval)) from e
+ "" % (j, interval)
+ ) from e
res.append(fv)
if len(interval) >= 3:
if not isinstance(interval[2], bool):
raise TypeError(
"Value at index %i in %r must be a boolean."
- "" % (2, interval))
+ "" % (2, interval)
+ )
res.append(interval[2])
else:
res.append(False)
@@ -111,13 +117,17 @@ def fit(self, X, y=None, sample_weight=None):
if not isinstance(interval[3], bool):
raise TypeError(
"Value at index %i in %r must be a boolean."
- "" % (3, interval))
+ "" % (3, interval)
+ )
res.append(interval[3])
else:
res.append(True)
inlist.append(tuple(res))
- if (self.weights is None or i >= len(self.weights) or
- index >= len(self.weights[i])):
+ if (
+ self.weights is None
+ or i >= len(self.weights)
+ or index >= len(self.weights[i])
+ ):
inweight.append(1)
else:
inweight.append(self.weights[i][index])
@@ -147,8 +157,7 @@ def _transform_column(self, X, column_index):
right = col <= interval[1]
else:
right = col < interval[1]
- res[:, i] = ((left * right).astype(X.dtype) *
- self.weights_[column_index][i])
+ res[:, i] = (left * right).astype(X.dtype) * self.weights_[column_index][i]
if self.onehot:
return res
return res.sum(axis=1, keepdims=0)
@@ -164,7 +173,7 @@ def transform(self, X, y=None):
for i in range(X.shape[1]):
a, b = self.indices_[i]
if self.onehot:
- res[:, a: b] = self._transform_column(X, i)
+ res[:, a:b] = self._transform_column(X, i)
else:
res[:, i] = self._transform_column(X, i)
return res
@@ -181,8 +190,11 @@ def get_feature_names(self):
for interval in intervals:
name = [
"[" if interval[2] else "]",
- str(interval[0]), ",", str(interval[1]),
- "]" if interval[3] else "["]
+ str(interval[0]),
+ ",",
+ str(interval[1]),
+ "]" if interval[3] else "[",
+ ]
names.append("".join(name))
return names
diff --git a/skl2onnx/sklapi/woe_transformer_onnx.py b/skl2onnx/sklapi/woe_transformer_onnx.py
index be5de3df4..8b550c8f1 100644
--- a/skl2onnx/sklapi/woe_transformer_onnx.py
+++ b/skl2onnx/sklapi/woe_transformer_onnx.py
@@ -4,36 +4,57 @@
from typing import List
import numpy as np
from onnx.helper import (
- make_node, make_graph, make_model, make_tensor_value_info,
- TensorProto)
+ make_node,
+ make_graph,
+ make_model,
+ make_tensor_value_info,
+ TensorProto,
+)
from onnx.numpy_helper import from_array
from onnx import onnx_pb as onnx_proto
from sklearn.base import BaseEstimator
from ..common.data_types import (
- Int64TensorType, FloatTensorType, DoubleTensorType,
- guess_numpy_type, guess_proto_type)
+ Int64TensorType,
+ FloatTensorType,
+ DoubleTensorType,
+ guess_numpy_type,
+ guess_proto_type,
+)
from ..common._topology import Scope, Operator, Variable, OPSET_TO_IR_VERSION
from ..common._container import ModelComponentContainer
from ..common.utils import (
check_input_and_output_types,
check_input_and_output_numbers,
- get_producer, get_producer_version,
- get_domain, get_model_version)
+ get_producer,
+ get_producer_version,
+ get_domain,
+ get_model_version,
+)
from .. import update_registered_converter
from .._supported_operators import _get_sklearn_operator_name
from ..algebra.onnx_ops import (
- OnnxIdentity, OnnxMatMul, OnnxGather, OnnxConcat, OnnxReshapeApi13,
- OnnxTreeEnsembleRegressor_1, OnnxOneHotEncoder, OnnxCast)
+ OnnxIdentity,
+ OnnxMatMul,
+ OnnxGather,
+ OnnxConcat,
+ OnnxReshapeApi13,
+ OnnxTreeEnsembleRegressor_1,
+ OnnxOneHotEncoder,
+ OnnxCast,
+)
from .woe_transformer import WOETransformer
-def woe_parser(scope: Scope, model: BaseEstimator,
- inputs: List[Variable], custom_parsers: dict = None):
+def woe_parser(
+ scope: Scope,
+ model: BaseEstimator,
+ inputs: List[Variable],
+ custom_parsers: dict = None,
+):
"ONNX parser for WOETransformer: defines the output type."
alias = _get_sklearn_operator_name(type(model))
this_operator = scope.declare_local_operator(alias, model)
- output = scope.declare_local_variable(
- "encoding", inputs[0].type.__class__())
+ output = scope.declare_local_variable("encoding", inputs[0].type.__class__())
this_operator.inputs = inputs
this_operator.outputs.append(output)
return this_operator.outputs
@@ -43,9 +64,9 @@ def woe_shape_calculator(operator: Operator):
"ONNX shape calculator for WOETransformer: defines the output shape."
type_list = [Int64TensorType, FloatTensorType, DoubleTensorType]
check_input_and_output_types(
- operator, good_input_types=type_list, good_output_types=type_list)
- check_input_and_output_numbers(
- operator, input_count_range=1, output_count_range=1)
+ operator, good_input_types=type_list, good_output_types=type_list
+ )
+ check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1)
op = operator.raw_operator
x = operator.inputs[0]
N = x.get_first_dimension()
@@ -63,11 +84,10 @@ def woe_shape_calculator(operator: Operator):
class Tree:
-
class Node:
-
- def __init__(self, parent, is_left, is_leaf, feature,
- threshold, value, index=-1):
+ def __init__(
+ self, parent, is_left, is_leaf, feature, threshold, value, index=-1
+ ):
self.parent = parent
self.is_left = is_left
self.is_leaf = is_leaf
@@ -79,19 +99,34 @@ def __init__(self, parent, is_left, is_leaf, feature,
def __str__(self):
return (
"Node(%s, is_left=%r, is_leaf=%r, feature=%r, "
- "threshold=%r, value=%r, index=%r)%s" % (
- self.parent if isinstance(self.parent, int)
+ "threshold=%r, value=%r, index=%r)%s"
+ % (
+ self.parent
+ if isinstance(self.parent, int)
else "id%r" % id(self.parent),
- self.is_left, self.is_leaf, self.feature,
- self.threshold, self.value, self.index,
- " # %s %r -> %r%s%s%s" % (
- self.onnx_mode, self.onnx_threshold, self.onnx_value,
+ self.is_left,
+ self.is_leaf,
+ self.feature,
+ self.threshold,
+ self.value,
+ self.index,
+ " # %s %r -> %r%s%s%s"
+ % (
+ self.onnx_mode,
+ self.onnx_threshold,
+ self.onnx_value,
" -- %r" % self.intervals_
- if hasattr(self, 'intervals_') else '',
+ if hasattr(self, "intervals_")
+ else "",
" LL %r" % self.intervals_left_
- if hasattr(self, 'intervals_left_') else '',
+ if hasattr(self, "intervals_left_")
+ else "",
" RR %r" % self.intervals_right_
- if hasattr(self, 'intervals_right_') else '')))
+ if hasattr(self, "intervals_right_")
+ else "",
+ ),
+ )
+ )
@property
def onnx_value(self):
@@ -108,16 +143,15 @@ def onnx_mode(self):
# 'BRANCH_LEQ', 'BRANCH_LT', 'BRANCH_GTE', 'BRANCH_GT',
# 'BRANCH_EQ', 'BRANCH_NEQ', 'LEAF'
if self.is_leaf:
- return 'LEAF'
+ return "LEAF"
if self.threshold[1]:
- return 'BRANCH_LEQ'
- return 'BRANCH_LT'
+ return "BRANCH_LEQ"
+ return "BRANCH_LT"
@staticmethod
def _is_on_left_side(th, kind, x, leq, left_right_extremity):
- if kind not in ('BRANCH_LEQ', 'BRANCH_LT'):
- raise NotImplementedError(
- "Not implemented for mode %r." % kind)
+ if kind not in ("BRANCH_LEQ", "BRANCH_LT"):
+ raise NotImplementedError("Not implemented for mode %r." % kind)
if x < th:
return False
if x > th:
@@ -126,17 +160,16 @@ def _is_on_left_side(th, kind, x, leq, left_right_extremity):
return True
if left_right_extremity and not leq:
return False
- if kind == 'BRANCH_LEQ' and leq:
+ if kind == "BRANCH_LEQ" and leq:
return False
- if kind == 'BRANCH_LT' and not leq:
+ if kind == "BRANCH_LT" and not leq:
return False
return True
def is_on_left_side(self, x, leq, left_right_extremity):
th = self.threshold[0]
kind = self.onnx_mode
- res = Tree.Node._is_on_left_side(
- th, kind, x, leq, left_right_extremity)
+ res = Tree.Node._is_on_left_side(th, kind, x, leq, left_right_extremity)
return res
def __init__(self):
@@ -155,21 +188,23 @@ def __str__(self):
res = res.replace("id" + k, "n" + v)
return res
- def add_node(self, parent, is_left, is_leaf, feature, threshold,
- value=None, index=-1):
+ def add_node(
+ self, parent, is_left, is_leaf, feature, threshold, value=None, index=-1
+ ):
if is_leaf and value is None:
raise ValueError("value must be specified when is_leaf=True.")
if not is_leaf and value is not None:
raise ValueError("value must not be specified when is_leaf=False.")
- node = Tree.Node(parent, is_left, is_leaf, feature, threshold,
- value, index=index)
+ node = Tree.Node(
+ parent, is_left, is_leaf, feature, threshold, value, index=index
+ )
self.nodes.append(node)
if is_leaf:
if value in self.leave_values:
raise RuntimeError(
"The tree must contain unique tree value, %r "
- "already in %r.\n%s" % (
- value, self.leave_values, str(self)))
+ "already in %r.\n%s" % (value, self.leave_values, str(self))
+ )
self.leave_values.add(value)
return node
@@ -185,7 +220,7 @@ def onnx_attributes(self):
Operators-ml.md#ai.onnx.ml.TreeEnsembleRegressor>`_.
"""
atts = dict(
- aggregate_function='SUM',
+ aggregate_function="SUM",
base_values=[float(0)],
n_targets=1,
nodes_featureids=[n.feature for n in self.nodes],
@@ -194,14 +229,16 @@ def onnx_attributes(self):
nodes_nodeids=[i for i in range(len(self.nodes))],
nodes_treeids=[0 for n in self.nodes],
nodes_values=[float(n.onnx_threshold) for n in self.nodes],
- post_transform='NONE',
- target_ids=[0 for n in self.nodes if n.onnx_mode == 'LEAF'],
- target_nodeids=[i for i, n in enumerate(self.nodes)
- if n.onnx_mode == 'LEAF'],
- target_treeids=[0 for n in self.nodes
- if n.onnx_mode == 'LEAF'],
- target_weights=[float(n.onnx_value) for n in self.nodes
- if n.onnx_mode == 'LEAF'])
+ post_transform="NONE",
+ target_ids=[0 for n in self.nodes if n.onnx_mode == "LEAF"],
+ target_nodeids=[
+ i for i, n in enumerate(self.nodes) if n.onnx_mode == "LEAF"
+ ],
+ target_treeids=[0 for n in self.nodes if n.onnx_mode == "LEAF"],
+ target_weights=[
+ float(n.onnx_value) for n in self.nodes if n.onnx_mode == "LEAF"
+ ],
+ )
ids = {id(n): (i, n) for i, n in enumerate(self.nodes)}
nodes_truenodeids = [0 for n in self.nodes] # right
@@ -215,12 +252,14 @@ def onnx_attributes(self):
nodes_truenodeids[val[0]] = i
else:
nodes_falsenodeids[val[0]] = i
- atts.update(dict(
- nodes_falsenodeids=nodes_falsenodeids,
- nodes_truenodeids=nodes_truenodeids))
- if len(atts['target_weights']) != len(set(atts['target_weights'])):
- warnings.warn(
- "All targets should be unique %r." % atts['target_weights'])
+ atts.update(
+ dict(
+ nodes_falsenodeids=nodes_falsenodeids,
+ nodes_truenodeids=nodes_truenodeids,
+ )
+ )
+ if len(atts["target_weights"]) != len(set(atts["target_weights"])):
+ warnings.warn("All targets should be unique %r." % atts["target_weights"])
return atts
def mapping(self, intervals):
@@ -230,18 +269,21 @@ def mapping(self, intervals):
`intervals_rights_` as dictionary `{idx: interval}`
each side intersects.
"""
+
def process(node, intervals):
- if hasattr(node, 'intervals_'):
+ if hasattr(node, "intervals_"):
return 0
if node.parent is None or node.parent == -1:
node.intervals_ = intervals
else:
- if not hasattr(node.parent, 'intervals_'):
+ if not hasattr(node.parent, "intervals_"):
return 0
node.intervals_ = (
- node.parent.intervals_left_ if node.is_left
- else node.parent.intervals_right_)
+ node.parent.intervals_left_
+ if node.is_left
+ else node.parent.intervals_right_
+ )
if node.value is not None:
# leaf
@@ -267,7 +309,7 @@ def process(node, intervals):
return 1
for node in self.nodes:
- for at in ['intervals_', 'intervals_left_', 'intervals_right_']:
+ for at in ["intervals_", "intervals_left_", "intervals_right_"]:
if hasattr(node, at):
delattr(node, at)
@@ -307,8 +349,8 @@ def digitize2tree(bins, right=False, feature=0):
def add_root(index):
if index < 0 or index >= len(bins):
raise IndexError( # pragma: no cover
- "Unexpected index %d / len(bins)=%d." % (
- index, len(bins)))
+ "Unexpected index %d / len(bins)=%d." % (index, len(bins))
+ )
parent = -1
is_left = False
is_leaf = False
@@ -325,8 +367,7 @@ def add_nodes(parent, i, j, is_left):
if i == j:
# leaf
value = parent.index * 2
- n = tree.add_node(
- parent, is_left, True, 0, 0, value=value, index=i)
+ n = tree.add_node(parent, is_left, True, 0, 0, value=value, index=i)
n_nodes.append(n)
values.append(i)
return n
@@ -355,8 +396,7 @@ def add_nodes(parent, i, j, is_left):
# leaf
value = parent.index * 2 + 1
values.append(j)
- n = tree.add_node(
- parent, is_left, True, 0, 0, value=value, index=j)
+ n = tree.add_node(parent, is_left, True, 0, 0, value=value, index=j)
n_nodes.append(n)
return n
if i + 1 < j:
@@ -370,8 +410,8 @@ def add_nodes(parent, i, j, is_left):
add_nodes(n, index, j, False)
return n
raise NotImplementedError( # pragma: no cover
- "Unexpected case where i=%r, j=%r, is_left=%r." % (
- i, j, is_left))
+ "Unexpected case where i=%r, j=%r, is_left=%r." % (i, j, is_left)
+ )
index = len(bins) // 2
root = add_root(index)
@@ -401,15 +441,13 @@ def _mapping_to_key_value(mapping, weights):
if len(v) == 0:
continue
if len(v) != 1:
- raise RuntimeError(
- 'Intervals overlops in mapping %r.' % mapping)
+ raise RuntimeError("Intervals overlops in mapping %r." % mapping)
value = list(v)[0]
key_value[float(k)] = float(weights[value])
return key_value
-def woe_converter(scope: Scope, operator: Operator,
- container: ModelComponentContainer):
+def woe_converter(scope: Scope, operator: Operator, container: ModelComponentContainer):
"""
ONNX Converter for WOETransformer.
It follows *skl2onnx* API.
@@ -426,7 +464,7 @@ def woe_converter(scope: Scope, operator: Operator,
vector_shape = np.array([-1], dtype=np.int64)
dtype = guess_numpy_type(X.type)
proto_type = guess_proto_type(X.type)
- verbose = getattr(container, 'verbose', 0)
+ verbose = getattr(container, "verbose", 0)
columns = []
@@ -439,7 +477,10 @@ def woe_converter(scope: Scope, operator: Operator,
columns.append(
OnnxReshapeApi13(
OnnxGather(X, index, op_version=opv, axis=1),
- new_shape, op_version=opv))
+ new_shape,
+ op_version=opv,
+ )
+ )
continue
# encoding columns
@@ -450,29 +491,34 @@ def woe_converter(scope: Scope, operator: Operator,
if op.onehot:
node = OnnxTreeEnsembleRegressor_1(
- X, op_version=1, domain='ai.onnx.ml', **atts)
- cats = list(sorted(set(int(n.onnx_value)
- for n in tree.nodes if n.is_leaf)))
+ X, op_version=1, domain="ai.onnx.ml", **atts
+ )
+ cats = list(sorted(set(int(n.onnx_value) for n in tree.nodes if n.is_leaf)))
mat_mapping = _mapping2matrix(mapping, cats, op.weights_[i], dtype)
if verbose > 1:
print("[woe_converter] mapping=%r" % mapping)
ohe = OnnxOneHotEncoder(
- OnnxReshapeApi13(
- node, vector_shape, op_version=opv),
- op_version=opv, cats_int64s=cats)
+ OnnxReshapeApi13(node, vector_shape, op_version=opv),
+ op_version=opv,
+ cats_int64s=cats,
+ )
ren = OnnxMatMul(
OnnxCast(ohe, op_version=opv, to=proto_type),
- mat_mapping, op_version=opv)
+ mat_mapping,
+ op_version=opv,
+ )
columns.append(ren)
else:
key_value = _mapping_to_key_value(mapping, op.weights_[i])
- atts['target_weights'] = [
- key_value.get(v, 0.) for v in atts['target_weights']]
+ atts["target_weights"] = [
+ key_value.get(v, 0.0) for v in atts["target_weights"]
+ ]
if verbose > 1:
print("[woe_converter] mapping=%r" % mapping)
print("[woe_converter] key_value=%r" % key_value)
node = OnnxTreeEnsembleRegressor_1(
- X, op_version=1, domain='ai.onnx.ml', **atts)
+ X, op_version=1, domain="ai.onnx.ml", **atts
+ )
lab = OnnxReshapeApi13(node, new_shape, op_version=opv)
columns.append(lab)
@@ -504,28 +550,29 @@ def woe_transformer_to_onnx(op, opset=None):
C = len(op.intervals_)
# inputs
- X = make_tensor_value_info(
- 'X', TensorProto.FLOAT, [None, len(op.intervals_)])
- Y = make_tensor_value_info(
- 'Y', TensorProto.FLOAT, [None, C])
+ X = make_tensor_value_info("X", TensorProto.FLOAT, [None, len(op.intervals_)])
+ Y = make_tensor_value_info("Y", TensorProto.FLOAT, [None, C])
# nodes
nodes = []
columns = []
- inits = [from_array(np.array([-1, 1], dtype=np.int64), name='new_shape'),
- from_array(np.array([-1], dtype=np.int64), name='vector_shape')]
+ inits = [
+ from_array(np.array([-1, 1], dtype=np.int64), name="new_shape"),
+ from_array(np.array([-1], dtype=np.int64), name="vector_shape"),
+ ]
thresholds = op._decision_thresholds(add_index=False)
for i, threshold in enumerate(thresholds):
if threshold is None:
# Passthrough columns
- inits.append(from_array(
- np.array([i], dtype=np.int64), name='index%d' % i))
- nodes.append(make_node(
- 'Gather', ['X', 'index%d' % i], ['col%d' % i], axis=1))
- nodes.append(make_node(
- 'Reshape', ['col%d' % i, 'new_shape'], ['reshr%d' % i]))
- columns.append('reshr%d' % i)
+ inits.append(from_array(np.array([i], dtype=np.int64), name="index%d" % i))
+ nodes.append(
+ make_node("Gather", ["X", "index%d" % i], ["col%d" % i], axis=1)
+ )
+ nodes.append(
+ make_node("Reshape", ["col%d" % i, "new_shape"], ["reshr%d" % i])
+ )
+ columns.append("reshr%d" % i)
continue
# encoding columns
@@ -535,49 +582,67 @@ def woe_transformer_to_onnx(op, opset=None):
atts = tree.onnx_attributes()
if op.onehot:
- nodes.append(make_node(
- 'TreeEnsembleRegressor', ['X'], ['rf%d' % i],
- domain='ai.onnx.ml', **atts))
- cats = list(sorted(set(int(n.onnx_value)
- for n in tree.nodes if n.is_leaf)))
- mat_mapping = _mapping2matrix(
- mapping, cats, op.weights_[i], np.float32)
- nodes.append(make_node(
- 'Reshape', ['rf%d' % i, 'vector_shape'], ['resh%d' % i]))
- nodes.append(make_node(
- 'OneHotEncoder', ['resh%d' % i], ['ohe%d' % i],
- domain='ai.onnx.ml', cats_int64s=cats))
- nodes.append(make_node(
- 'Cast', ['ohe%d' % i], ['cast%d' % i], to=TensorProto.FLOAT))
- inits.append(from_array(mat_mapping, 'mat_map%i' % i))
- nodes.append(make_node(
- 'MatMul', ['cast%d' % i, 'mat_map%i' % i], ["mul%d" % i]))
+ nodes.append(
+ make_node(
+ "TreeEnsembleRegressor",
+ ["X"],
+ ["rf%d" % i],
+ domain="ai.onnx.ml",
+ **atts
+ )
+ )
+ cats = list(sorted(set(int(n.onnx_value) for n in tree.nodes if n.is_leaf)))
+ mat_mapping = _mapping2matrix(mapping, cats, op.weights_[i], np.float32)
+ nodes.append(
+ make_node("Reshape", ["rf%d" % i, "vector_shape"], ["resh%d" % i])
+ )
+ nodes.append(
+ make_node(
+ "OneHotEncoder",
+ ["resh%d" % i],
+ ["ohe%d" % i],
+ domain="ai.onnx.ml",
+ cats_int64s=cats,
+ )
+ )
+ nodes.append(
+ make_node("Cast", ["ohe%d" % i], ["cast%d" % i], to=TensorProto.FLOAT)
+ )
+ inits.append(from_array(mat_mapping, "mat_map%i" % i))
+ nodes.append(
+ make_node("MatMul", ["cast%d" % i, "mat_map%i" % i], ["mul%d" % i])
+ )
columns.append("mul%d" % i)
else:
key_value = _mapping_to_key_value(mapping, op.weights_[i])
- atts['target_weights'] = [
- key_value.get(v, 0.) for v in atts['target_weights']]
- nodes.append(make_node(
- 'TreeEnsembleRegressor', ['X'], ['rf%d' % i],
- domain='ai.onnx.ml', **atts))
- nodes.append(make_node(
- 'Reshape', ['rf%d' % i, 'new_shape'], ['lab%d' % i]))
+ atts["target_weights"] = [
+ key_value.get(v, 0.0) for v in atts["target_weights"]
+ ]
+ nodes.append(
+ make_node(
+ "TreeEnsembleRegressor",
+ ["X"],
+ ["rf%d" % i],
+ domain="ai.onnx.ml",
+ **atts
+ )
+ )
+ nodes.append(make_node("Reshape", ["rf%d" % i, "new_shape"], ["lab%d" % i]))
columns.append("lab%d" % i)
- nodes.append(make_node(
- 'Concat', columns, ['Y'], axis=1))
+ nodes.append(make_node("Concat", columns, ["Y"], axis=1))
# final graph
- graph_def = make_graph(nodes, 't1', [X], [Y], inits)
- model_def = make_model(graph_def, producer_name='skl2onnx')
+ graph_def = make_graph(nodes, "t1", [X], [Y], inits)
+ model_def = make_model(graph_def, producer_name="skl2onnx")
if opset is not None:
del model_def.opset_import[:]
op_set = model_def.opset_import.add()
- op_set.domain = ''
+ op_set.domain = ""
op_set.version = opset
op_set = model_def.opset_import.add()
- op_set.domain = 'ai.onnx.ml'
+ op_set.domain = "ai.onnx.ml"
op_set.version = 2
irv = OPSET_TO_IR_VERSION.get(opset, onnx_proto.IR_VERSION)
model_def.ir_version = irv
@@ -593,5 +658,9 @@ def woe_transformer_to_onnx(op, opset=None):
def register():
"Register converter for WOETransformer."
update_registered_converter(
- WOETransformer, "Skl2onnxWOETransformer",
- woe_shape_calculator, woe_converter, parser=woe_parser)
+ WOETransformer,
+ "Skl2onnxWOETransformer",
+ woe_shape_calculator,
+ woe_converter,
+ parser=woe_parser,
+ )
diff --git a/skl2onnx/tutorial/benchmark.py b/skl2onnx/tutorial/benchmark.py
index 7d8ccc443..411342bc9 100644
--- a/skl2onnx/tutorial/benchmark.py
+++ b/skl2onnx/tutorial/benchmark.py
@@ -38,8 +38,14 @@ def measure_time(stmt, context, repeat=10, number=50, div_by_number=False):
if div_by_number:
res /= number
mean = numpy.mean(res)
- dev = numpy.mean(res ** 2)
+ dev = numpy.mean(res**2)
dev = (dev - mean**2) ** 0.5
- mes = dict(average=mean, deviation=dev, min_exec=numpy.min(res),
- max_exec=numpy.max(res), repeat=repeat, number=number)
+ mes = dict(
+ average=mean,
+ deviation=dev,
+ min_exec=numpy.min(res),
+ max_exec=numpy.max(res),
+ repeat=repeat,
+ number=number,
+ )
return mes
diff --git a/skl2onnx/tutorial/imagenet_classes.py b/skl2onnx/tutorial/imagenet_classes.py
index 567c60650..bdeb97969 100644
--- a/skl2onnx/tutorial/imagenet_classes.py
+++ b/skl2onnx/tutorial/imagenet_classes.py
@@ -5,1029 +5,1020 @@
"""
class_names = {
- 0: 'tench, Tinca tinca',
- 1: 'goldfish, Carassius auratus',
- 2: 'great white shark, white shark, man-eater, man-eating shark, '
- 'Carcharodon carcharias',
- 3: 'tiger shark, Galeocerdo cuvieri',
- 4: 'hammerhead, hammerhead shark',
- 5: 'electric ray, crampfish, numbfish, torpedo',
- 6: 'stingray',
- 7: 'cock',
- 8: 'hen',
- 9: 'ostrich, Struthio camelus',
- 10: 'brambling, Fringilla montifringilla',
- 11: 'goldfinch, Carduelis carduelis',
- 12: 'house finch, linnet, Carpodacus mexicanus',
- 13: 'junco, snowbird',
- 14: 'indigo bunting, indigo finch, indigo bird, Passerina cyanea',
- 15: 'robin, American robin, Turdus migratorius',
- 16: 'bulbul',
- 17: 'jay',
- 18: 'magpie',
- 19: 'chickadee',
- 20: 'water ouzel, dipper',
- 21: 'kite',
- 22: 'bald eagle, American eagle, Haliaeetus leucocephalus',
- 23: 'vulture',
- 24: 'great grey owl, great gray owl, Strix nebulosa',
- 25: 'European fire salamander, Salamandra salamandra',
- 26: 'common newt, Triturus vulgaris',
- 27: 'eft',
- 28: 'spotted salamander, Ambystoma maculatum',
- 29: 'axolotl, mud puppy, Ambystoma mexicanum',
- 30: 'bullfrog, Rana catesbeiana',
- 31: 'tree frog, tree-frog',
- 32: 'tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui',
- 33: 'loggerhead, loggerhead turtle, Caretta caretta',
- 34: 'leatherback turtle, leatherback, leathery turtle, '
- 'Dermochelys coriacea',
- 35: 'mud turtle',
- 36: 'terrapin',
- 37: 'box turtle, box tortoise',
- 38: 'banded gecko',
- 39: 'common iguana, iguana, Iguana iguana',
- 40: 'American chameleon, anole, Anolis carolinensis',
- 41: 'whiptail, whiptail lizard',
- 42: 'agama',
- 43: 'frilled lizard, Chlamydosaurus kingi',
- 44: 'alligator lizard',
- 45: 'Gila monster, Heloderma suspectum',
- 46: 'green lizard, Lacerta viridis',
- 47: 'African chameleon, Chamaeleo chamaeleon',
- 48: 'Komodo dragon, Komodo lizard, dragon lizard, giant lizard, '
- 'Varanus komodoensis',
- 49: 'African crocodile, Nile crocodile, Crocodylus niloticus',
- 50: 'American alligator, Alligator mississipiensis',
- 51: 'triceratops',
- 52: 'thunder snake, worm snake, Carphophis amoenus',
- 53: 'ringneck snake, ring-necked snake, ring snake',
- 54: 'hognose snake, puff adder, sand viper',
- 55: 'green snake, grass snake',
- 56: 'king snake, kingsnake',
- 57: 'garter snake, grass snake',
- 58: 'water snake',
- 59: 'vine snake',
- 60: 'night snake, Hypsiglena torquata',
- 61: 'boa constrictor, Constrictor constrictor',
- 62: 'rock python, rock snake, Python sebae',
- 63: 'Indian cobra, Naja naja',
- 64: 'green mamba',
- 65: 'sea snake',
- 66: 'horned viper, cerastes, sand viper, horned asp, Cerastes cornutus',
- 67: 'diamondback, diamondback rattlesnake, Crotalus adamanteus',
- 68: 'sidewinder, horned rattlesnake, Crotalus cerastes',
- 69: 'trilobite',
- 70: 'harvestman, daddy longlegs, Phalangium opilio',
- 71: 'scorpion',
- 72: 'black and gold garden spider, Argiope aurantia',
- 73: 'barn spider, Araneus cavaticus',
- 74: 'garden spider, Aranea diademata',
- 75: 'black widow, Latrodectus mactans',
- 76: 'tarantula',
- 77: 'wolf spider, hunting spider',
- 78: 'tick',
- 79: 'centipede',
- 80: 'black grouse',
- 81: 'ptarmigan',
- 82: 'ruffed grouse, partridge, Bonasa umbellus',
- 83: 'prairie chicken, prairie grouse, prairie fowl',
- 84: 'peacock',
- 85: 'quail',
- 86: 'partridge',
- 87: 'African grey, African gray, Psittacus erithacus',
- 88: 'macaw',
- 89: 'sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita',
- 90: 'lorikeet',
- 91: 'coucal',
- 92: 'bee eater',
- 93: 'hornbill',
- 94: 'hummingbird',
- 95: 'jacamar',
- 96: 'toucan',
- 97: 'drake',
- 98: 'red-breasted merganser, Mergus serrator',
- 99: 'goose',
- 100: 'black swan, Cygnus atratus',
- 101: 'tusker',
- 102: 'echidna, spiny anteater, anteater',
- 103: 'platypus, duckbill, duckbilled platypus, duck-billed '
- 'platypus, Ornithorhynchus anatinus',
- 104: 'wallaby, brush kangaroo',
- 105: 'koala, koala bear, kangaroo bear, native bear, '
- 'Phascolarctos cinereus',
- 106: 'wombat',
- 107: 'jellyfish',
- 108: 'sea anemone, anemone',
- 109: 'brain coral',
- 110: 'flatworm, platyhelminth',
- 111: 'nematode, nematode worm, roundworm',
- 112: 'conch',
- 113: 'snail',
- 114: 'slug',
- 115: 'sea slug, nudibranch',
- 116: 'chiton, coat-of-mail shell, sea cradle, polyplacophore',
- 117: 'chambered nautilus, pearly nautilus, nautilus',
- 118: 'Dungeness crab, Cancer magister',
- 119: 'rock crab, Cancer irroratus',
- 120: 'fiddler crab',
- 121: 'king crab, Alaska crab, Alaskan king crab, Alaska king '
- 'crab, Paralithodes camtschatica',
- 122: 'American lobster, Northern lobster, Maine lobster, '
- 'Homarus americanus',
- 123: 'spiny lobster, langouste, rock lobster, crawfish, '
- 'crayfish, sea crawfish',
- 124: 'crayfish, crawfish, crawdad, crawdaddy',
- 125: 'hermit crab',
- 126: 'isopod',
- 127: 'white stork, Ciconia ciconia',
- 128: 'black stork, Ciconia nigra',
- 129: 'spoonbill',
- 130: 'flamingo',
- 131: 'little blue heron, Egretta caerulea',
- 132: 'American egret, great white heron, Egretta albus',
- 133: 'bittern',
- 134: 'crane',
- 135: 'limpkin, Aramus pictus',
- 136: 'European gallinule, Porphyrio porphyrio',
- 137: 'American coot, marsh hen, mud hen, water hen, Fulica americana',
- 138: 'bustard',
- 139: 'ruddy turnstone, Arenaria interpres',
- 140: 'red-backed sandpiper, dunlin, Erolia alpina',
- 141: 'redshank, Tringa totanus',
- 142: 'dowitcher',
- 143: 'oystercatcher, oyster catcher',
- 144: 'pelican',
- 145: 'king penguin, Aptenodytes patagonica',
- 146: 'albatross, mollymawk',
- 147: 'grey whale, gray whale, devilfish, Eschrichtius '
- 'gibbosus, Eschrichtius robustus',
- 148: 'killer whale, killer, orca, grampus, sea wolf, Orcinus orca',
- 149: 'dugong, Dugong dugon',
- 150: 'sea lion',
- 151: 'Chihuahua',
- 152: 'Japanese spaniel',
- 153: 'Maltese dog, Maltese terrier, Maltese',
- 154: 'Pekinese, Pekingese, Peke',
- 155: 'Shih-Tzu',
- 156: 'Blenheim spaniel',
- 157: 'papillon',
- 158: 'toy terrier',
- 159: 'Rhodesian ridgeback',
- 160: 'Afghan hound, Afghan',
- 161: 'basset, basset hound',
- 162: 'beagle',
- 163: 'bloodhound, sleuthhound',
- 164: 'bluetick',
- 165: 'black-and-tan coonhound',
- 166: 'Walker hound, Walker foxhound',
- 167: 'English foxhound',
- 168: 'redbone',
- 169: 'borzoi, Russian wolfhound',
- 170: 'Irish wolfhound',
- 171: 'Italian greyhound',
- 172: 'whippet',
- 173: 'Ibizan hound, Ibizan Podenco',
- 174: 'Norwegian elkhound, elkhound',
- 175: 'otterhound, otter hound',
- 176: 'Saluki, gazelle hound',
- 177: 'Scottish deerhound, deerhound',
- 178: 'Weimaraner',
- 179: 'Staffordshire bullterrier, Staffordshire bull terrier',
- 180: 'American Staffordshire terrier, Staffordshire terrier, '
- 'American pit bull terrier, pit bull terrier',
- 181: 'Bedlington terrier',
- 182: 'Border terrier',
- 183: 'Kerry blue terrier',
- 184: 'Irish terrier',
- 185: 'Norfolk terrier',
- 186: 'Norwich terrier',
- 187: 'Yorkshire terrier',
- 188: 'wire-haired fox terrier',
- 189: 'Lakeland terrier',
- 190: 'Sealyham terrier, Sealyham',
- 191: 'Airedale, Airedale terrier',
- 192: 'cairn, cairn terrier',
- 193: 'Australian terrier',
- 194: 'Dandie Dinmont, Dandie Dinmont terrier',
- 195: 'Boston bull, Boston terrier',
- 196: 'miniature schnauzer',
- 197: 'giant schnauzer',
- 198: 'standard schnauzer',
- 199: 'Scotch terrier, Scottish terrier, Scottie',
- 200: 'Tibetan terrier, chrysanthemum dog',
- 201: 'silky terrier, Sydney silky',
- 202: 'soft-coated wheaten terrier',
- 203: 'West Highland white terrier',
- 204: 'Lhasa, Lhasa apso',
- 205: 'flat-coated retriever',
- 206: 'curly-coated retriever',
- 207: 'golden retriever',
- 208: 'Labrador retriever',
- 209: 'Chesapeake Bay retriever',
- 210: 'German short-haired pointer',
- 211: 'vizsla, Hungarian pointer',
- 212: 'English setter',
- 213: 'Irish setter, red setter',
- 214: 'Gordon setter',
- 215: 'Brittany spaniel',
- 216: 'clumber, clumber spaniel',
- 217: 'English springer, English springer spaniel',
- 218: 'Welsh springer spaniel',
- 219: 'cocker spaniel, English cocker spaniel, cocker',
- 220: 'Sussex spaniel',
- 221: 'Irish water spaniel',
- 222: 'kuvasz',
- 223: 'schipperke',
- 224: 'groenendael',
- 225: 'malinois',
- 226: 'briard',
- 227: 'kelpie',
- 228: 'komondor',
- 229: 'Old English sheepdog, bobtail',
- 230: 'Shetland sheepdog, Shetland sheep dog, Shetland',
- 231: 'collie',
- 232: 'Border collie',
- 233: 'Bouvier des Flandres, Bouviers des Flandres',
- 234: 'Rottweiler',
- 235: 'German shepherd, German shepherd dog, German police dog, alsatian',
- 236: 'Doberman, Doberman pinscher',
- 237: 'miniature pinscher',
- 238: 'Greater Swiss Mountain dog',
- 239: 'Bernese mountain dog',
- 240: 'Appenzeller',
- 241: 'EntleBucher',
- 242: 'boxer',
- 243: 'bull mastiff',
- 244: 'Tibetan mastiff',
- 245: 'French bulldog',
- 246: 'Great Dane',
- 247: 'Saint Bernard, St Bernard',
- 248: 'Eskimo dog, husky',
- 249: 'malamute, malemute, Alaskan malamute',
- 250: 'Siberian husky',
- 251: 'dalmatian, coach dog, carriage dog',
- 252: 'affenpinscher, monkey pinscher, monkey dog',
- 253: 'basenji',
- 254: 'pug, pug-dog',
- 255: 'Leonberg',
- 256: 'Newfoundland, Newfoundland dog',
- 257: 'Great Pyrenees',
- 258: 'Samoyed, Samoyede',
- 259: 'Pomeranian',
- 260: 'chow, chow chow',
- 261: 'keeshond',
- 262: 'Brabancon griffon',
- 263: 'Pembroke, Pembroke Welsh corgi',
- 264: 'Cardigan, Cardigan Welsh corgi',
- 265: 'toy poodle',
- 266: 'miniature poodle',
- 267: 'standard poodle',
- 268: 'Mexican hairless',
- 269: 'timber wolf, grey wolf, gray wolf, Canis lupus',
- 270: 'white wolf, Arctic wolf, Canis lupus tundrarum',
- 271: 'red wolf, maned wolf, Canis rufus, Canis niger',
- 272: 'coyote, prairie wolf, brush wolf, Canis latrans',
- 273: 'dingo, warrigal, warragal, Canis dingo',
- 274: 'dhole, Cuon alpinus',
- 275: 'African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus',
- 276: 'hyena, hyaena',
- 277: 'red fox, Vulpes vulpes',
- 278: 'kit fox, Vulpes macrotis',
- 279: 'Arctic fox, white fox, Alopex lagopus',
- 280: 'grey fox, gray fox, Urocyon cinereoargenteus',
- 281: 'tabby, tabby cat',
- 282: 'tiger cat',
- 283: 'Persian cat',
- 284: 'Siamese cat, Siamese',
- 285: 'Egyptian cat',
- 286: 'cougar, puma, catamount, mountain lion, painter, '
- 'panther, Felis concolor',
- 287: 'lynx, catamount',
- 288: 'leopard, Panthera pardus',
- 289: 'snow leopard, ounce, Panthera uncia',
- 290: 'jaguar, panther, Panthera onca, Felis onca',
- 291: 'lion, king of beasts, Panthera leo',
- 292: 'tiger, Panthera tigris',
- 293: 'cheetah, chetah, Acinonyx jubatus',
- 294: 'brown bear, bruin, Ursus arctos',
- 295: 'American black bear, black bear, Ursus americanus, '
- 'Euarctos americanus',
- 296: 'ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus',
- 297: 'sloth bear, Melursus ursinus, Ursus ursinus',
- 298: 'mongoose',
- 299: 'meerkat, mierkat',
- 300: 'tiger beetle',
- 301: 'ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle',
- 302: 'ground beetle, carabid beetle',
- 303: 'long-horned beetle, longicorn, longicorn beetle',
- 304: 'leaf beetle, chrysomelid',
- 305: 'dung beetle',
- 306: 'rhinoceros beetle',
- 307: 'weevil',
- 308: 'fly',
- 309: 'bee',
- 310: 'ant, emmet, pismire',
- 311: 'grasshopper, hopper',
- 312: 'cricket',
- 313: 'walking stick, walkingstick, stick insect',
- 314: 'cockroach, roach',
- 315: 'mantis, mantid',
- 316: 'cicada, cicala',
- 317: 'leafhopper',
- 318: 'lacewing, lacewing fly',
+ 0: "tench, Tinca tinca",
+ 1: "goldfish, Carassius auratus",
+ 2: "great white shark, white shark, man-eater, man-eating shark, "
+ "Carcharodon carcharias",
+ 3: "tiger shark, Galeocerdo cuvieri",
+ 4: "hammerhead, hammerhead shark",
+ 5: "electric ray, crampfish, numbfish, torpedo",
+ 6: "stingray",
+ 7: "cock",
+ 8: "hen",
+ 9: "ostrich, Struthio camelus",
+ 10: "brambling, Fringilla montifringilla",
+ 11: "goldfinch, Carduelis carduelis",
+ 12: "house finch, linnet, Carpodacus mexicanus",
+ 13: "junco, snowbird",
+ 14: "indigo bunting, indigo finch, indigo bird, Passerina cyanea",
+ 15: "robin, American robin, Turdus migratorius",
+ 16: "bulbul",
+ 17: "jay",
+ 18: "magpie",
+ 19: "chickadee",
+ 20: "water ouzel, dipper",
+ 21: "kite",
+ 22: "bald eagle, American eagle, Haliaeetus leucocephalus",
+ 23: "vulture",
+ 24: "great grey owl, great gray owl, Strix nebulosa",
+ 25: "European fire salamander, Salamandra salamandra",
+ 26: "common newt, Triturus vulgaris",
+ 27: "eft",
+ 28: "spotted salamander, Ambystoma maculatum",
+ 29: "axolotl, mud puppy, Ambystoma mexicanum",
+ 30: "bullfrog, Rana catesbeiana",
+ 31: "tree frog, tree-frog",
+ 32: "tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui",
+ 33: "loggerhead, loggerhead turtle, Caretta caretta",
+ 34: "leatherback turtle, leatherback, leathery turtle, " "Dermochelys coriacea",
+ 35: "mud turtle",
+ 36: "terrapin",
+ 37: "box turtle, box tortoise",
+ 38: "banded gecko",
+ 39: "common iguana, iguana, Iguana iguana",
+ 40: "American chameleon, anole, Anolis carolinensis",
+ 41: "whiptail, whiptail lizard",
+ 42: "agama",
+ 43: "frilled lizard, Chlamydosaurus kingi",
+ 44: "alligator lizard",
+ 45: "Gila monster, Heloderma suspectum",
+ 46: "green lizard, Lacerta viridis",
+ 47: "African chameleon, Chamaeleo chamaeleon",
+ 48: "Komodo dragon, Komodo lizard, dragon lizard, giant lizard, "
+ "Varanus komodoensis",
+ 49: "African crocodile, Nile crocodile, Crocodylus niloticus",
+ 50: "American alligator, Alligator mississipiensis",
+ 51: "triceratops",
+ 52: "thunder snake, worm snake, Carphophis amoenus",
+ 53: "ringneck snake, ring-necked snake, ring snake",
+ 54: "hognose snake, puff adder, sand viper",
+ 55: "green snake, grass snake",
+ 56: "king snake, kingsnake",
+ 57: "garter snake, grass snake",
+ 58: "water snake",
+ 59: "vine snake",
+ 60: "night snake, Hypsiglena torquata",
+ 61: "boa constrictor, Constrictor constrictor",
+ 62: "rock python, rock snake, Python sebae",
+ 63: "Indian cobra, Naja naja",
+ 64: "green mamba",
+ 65: "sea snake",
+ 66: "horned viper, cerastes, sand viper, horned asp, Cerastes cornutus",
+ 67: "diamondback, diamondback rattlesnake, Crotalus adamanteus",
+ 68: "sidewinder, horned rattlesnake, Crotalus cerastes",
+ 69: "trilobite",
+ 70: "harvestman, daddy longlegs, Phalangium opilio",
+ 71: "scorpion",
+ 72: "black and gold garden spider, Argiope aurantia",
+ 73: "barn spider, Araneus cavaticus",
+ 74: "garden spider, Aranea diademata",
+ 75: "black widow, Latrodectus mactans",
+ 76: "tarantula",
+ 77: "wolf spider, hunting spider",
+ 78: "tick",
+ 79: "centipede",
+ 80: "black grouse",
+ 81: "ptarmigan",
+ 82: "ruffed grouse, partridge, Bonasa umbellus",
+ 83: "prairie chicken, prairie grouse, prairie fowl",
+ 84: "peacock",
+ 85: "quail",
+ 86: "partridge",
+ 87: "African grey, African gray, Psittacus erithacus",
+ 88: "macaw",
+ 89: "sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita",
+ 90: "lorikeet",
+ 91: "coucal",
+ 92: "bee eater",
+ 93: "hornbill",
+ 94: "hummingbird",
+ 95: "jacamar",
+ 96: "toucan",
+ 97: "drake",
+ 98: "red-breasted merganser, Mergus serrator",
+ 99: "goose",
+ 100: "black swan, Cygnus atratus",
+ 101: "tusker",
+ 102: "echidna, spiny anteater, anteater",
+ 103: "platypus, duckbill, duckbilled platypus, duck-billed "
+ "platypus, Ornithorhynchus anatinus",
+ 104: "wallaby, brush kangaroo",
+ 105: "koala, koala bear, kangaroo bear, native bear, " "Phascolarctos cinereus",
+ 106: "wombat",
+ 107: "jellyfish",
+ 108: "sea anemone, anemone",
+ 109: "brain coral",
+ 110: "flatworm, platyhelminth",
+ 111: "nematode, nematode worm, roundworm",
+ 112: "conch",
+ 113: "snail",
+ 114: "slug",
+ 115: "sea slug, nudibranch",
+ 116: "chiton, coat-of-mail shell, sea cradle, polyplacophore",
+ 117: "chambered nautilus, pearly nautilus, nautilus",
+ 118: "Dungeness crab, Cancer magister",
+ 119: "rock crab, Cancer irroratus",
+ 120: "fiddler crab",
+ 121: "king crab, Alaska crab, Alaskan king crab, Alaska king "
+ "crab, Paralithodes camtschatica",
+ 122: "American lobster, Northern lobster, Maine lobster, " "Homarus americanus",
+ 123: "spiny lobster, langouste, rock lobster, crawfish, " "crayfish, sea crawfish",
+ 124: "crayfish, crawfish, crawdad, crawdaddy",
+ 125: "hermit crab",
+ 126: "isopod",
+ 127: "white stork, Ciconia ciconia",
+ 128: "black stork, Ciconia nigra",
+ 129: "spoonbill",
+ 130: "flamingo",
+ 131: "little blue heron, Egretta caerulea",
+ 132: "American egret, great white heron, Egretta albus",
+ 133: "bittern",
+ 134: "crane",
+ 135: "limpkin, Aramus pictus",
+ 136: "European gallinule, Porphyrio porphyrio",
+ 137: "American coot, marsh hen, mud hen, water hen, Fulica americana",
+ 138: "bustard",
+ 139: "ruddy turnstone, Arenaria interpres",
+ 140: "red-backed sandpiper, dunlin, Erolia alpina",
+ 141: "redshank, Tringa totanus",
+ 142: "dowitcher",
+ 143: "oystercatcher, oyster catcher",
+ 144: "pelican",
+ 145: "king penguin, Aptenodytes patagonica",
+ 146: "albatross, mollymawk",
+ 147: "grey whale, gray whale, devilfish, Eschrichtius "
+ "gibbosus, Eschrichtius robustus",
+ 148: "killer whale, killer, orca, grampus, sea wolf, Orcinus orca",
+ 149: "dugong, Dugong dugon",
+ 150: "sea lion",
+ 151: "Chihuahua",
+ 152: "Japanese spaniel",
+ 153: "Maltese dog, Maltese terrier, Maltese",
+ 154: "Pekinese, Pekingese, Peke",
+ 155: "Shih-Tzu",
+ 156: "Blenheim spaniel",
+ 157: "papillon",
+ 158: "toy terrier",
+ 159: "Rhodesian ridgeback",
+ 160: "Afghan hound, Afghan",
+ 161: "basset, basset hound",
+ 162: "beagle",
+ 163: "bloodhound, sleuthhound",
+ 164: "bluetick",
+ 165: "black-and-tan coonhound",
+ 166: "Walker hound, Walker foxhound",
+ 167: "English foxhound",
+ 168: "redbone",
+ 169: "borzoi, Russian wolfhound",
+ 170: "Irish wolfhound",
+ 171: "Italian greyhound",
+ 172: "whippet",
+ 173: "Ibizan hound, Ibizan Podenco",
+ 174: "Norwegian elkhound, elkhound",
+ 175: "otterhound, otter hound",
+ 176: "Saluki, gazelle hound",
+ 177: "Scottish deerhound, deerhound",
+ 178: "Weimaraner",
+ 179: "Staffordshire bullterrier, Staffordshire bull terrier",
+ 180: "American Staffordshire terrier, Staffordshire terrier, "
+ "American pit bull terrier, pit bull terrier",
+ 181: "Bedlington terrier",
+ 182: "Border terrier",
+ 183: "Kerry blue terrier",
+ 184: "Irish terrier",
+ 185: "Norfolk terrier",
+ 186: "Norwich terrier",
+ 187: "Yorkshire terrier",
+ 188: "wire-haired fox terrier",
+ 189: "Lakeland terrier",
+ 190: "Sealyham terrier, Sealyham",
+ 191: "Airedale, Airedale terrier",
+ 192: "cairn, cairn terrier",
+ 193: "Australian terrier",
+ 194: "Dandie Dinmont, Dandie Dinmont terrier",
+ 195: "Boston bull, Boston terrier",
+ 196: "miniature schnauzer",
+ 197: "giant schnauzer",
+ 198: "standard schnauzer",
+ 199: "Scotch terrier, Scottish terrier, Scottie",
+ 200: "Tibetan terrier, chrysanthemum dog",
+ 201: "silky terrier, Sydney silky",
+ 202: "soft-coated wheaten terrier",
+ 203: "West Highland white terrier",
+ 204: "Lhasa, Lhasa apso",
+ 205: "flat-coated retriever",
+ 206: "curly-coated retriever",
+ 207: "golden retriever",
+ 208: "Labrador retriever",
+ 209: "Chesapeake Bay retriever",
+ 210: "German short-haired pointer",
+ 211: "vizsla, Hungarian pointer",
+ 212: "English setter",
+ 213: "Irish setter, red setter",
+ 214: "Gordon setter",
+ 215: "Brittany spaniel",
+ 216: "clumber, clumber spaniel",
+ 217: "English springer, English springer spaniel",
+ 218: "Welsh springer spaniel",
+ 219: "cocker spaniel, English cocker spaniel, cocker",
+ 220: "Sussex spaniel",
+ 221: "Irish water spaniel",
+ 222: "kuvasz",
+ 223: "schipperke",
+ 224: "groenendael",
+ 225: "malinois",
+ 226: "briard",
+ 227: "kelpie",
+ 228: "komondor",
+ 229: "Old English sheepdog, bobtail",
+ 230: "Shetland sheepdog, Shetland sheep dog, Shetland",
+ 231: "collie",
+ 232: "Border collie",
+ 233: "Bouvier des Flandres, Bouviers des Flandres",
+ 234: "Rottweiler",
+ 235: "German shepherd, German shepherd dog, German police dog, alsatian",
+ 236: "Doberman, Doberman pinscher",
+ 237: "miniature pinscher",
+ 238: "Greater Swiss Mountain dog",
+ 239: "Bernese mountain dog",
+ 240: "Appenzeller",
+ 241: "EntleBucher",
+ 242: "boxer",
+ 243: "bull mastiff",
+ 244: "Tibetan mastiff",
+ 245: "French bulldog",
+ 246: "Great Dane",
+ 247: "Saint Bernard, St Bernard",
+ 248: "Eskimo dog, husky",
+ 249: "malamute, malemute, Alaskan malamute",
+ 250: "Siberian husky",
+ 251: "dalmatian, coach dog, carriage dog",
+ 252: "affenpinscher, monkey pinscher, monkey dog",
+ 253: "basenji",
+ 254: "pug, pug-dog",
+ 255: "Leonberg",
+ 256: "Newfoundland, Newfoundland dog",
+ 257: "Great Pyrenees",
+ 258: "Samoyed, Samoyede",
+ 259: "Pomeranian",
+ 260: "chow, chow chow",
+ 261: "keeshond",
+ 262: "Brabancon griffon",
+ 263: "Pembroke, Pembroke Welsh corgi",
+ 264: "Cardigan, Cardigan Welsh corgi",
+ 265: "toy poodle",
+ 266: "miniature poodle",
+ 267: "standard poodle",
+ 268: "Mexican hairless",
+ 269: "timber wolf, grey wolf, gray wolf, Canis lupus",
+ 270: "white wolf, Arctic wolf, Canis lupus tundrarum",
+ 271: "red wolf, maned wolf, Canis rufus, Canis niger",
+ 272: "coyote, prairie wolf, brush wolf, Canis latrans",
+ 273: "dingo, warrigal, warragal, Canis dingo",
+ 274: "dhole, Cuon alpinus",
+ 275: "African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus",
+ 276: "hyena, hyaena",
+ 277: "red fox, Vulpes vulpes",
+ 278: "kit fox, Vulpes macrotis",
+ 279: "Arctic fox, white fox, Alopex lagopus",
+ 280: "grey fox, gray fox, Urocyon cinereoargenteus",
+ 281: "tabby, tabby cat",
+ 282: "tiger cat",
+ 283: "Persian cat",
+ 284: "Siamese cat, Siamese",
+ 285: "Egyptian cat",
+ 286: "cougar, puma, catamount, mountain lion, painter, " "panther, Felis concolor",
+ 287: "lynx, catamount",
+ 288: "leopard, Panthera pardus",
+ 289: "snow leopard, ounce, Panthera uncia",
+ 290: "jaguar, panther, Panthera onca, Felis onca",
+ 291: "lion, king of beasts, Panthera leo",
+ 292: "tiger, Panthera tigris",
+ 293: "cheetah, chetah, Acinonyx jubatus",
+ 294: "brown bear, bruin, Ursus arctos",
+ 295: "American black bear, black bear, Ursus americanus, " "Euarctos americanus",
+ 296: "ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus",
+ 297: "sloth bear, Melursus ursinus, Ursus ursinus",
+ 298: "mongoose",
+ 299: "meerkat, mierkat",
+ 300: "tiger beetle",
+ 301: "ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle",
+ 302: "ground beetle, carabid beetle",
+ 303: "long-horned beetle, longicorn, longicorn beetle",
+ 304: "leaf beetle, chrysomelid",
+ 305: "dung beetle",
+ 306: "rhinoceros beetle",
+ 307: "weevil",
+ 308: "fly",
+ 309: "bee",
+ 310: "ant, emmet, pismire",
+ 311: "grasshopper, hopper",
+ 312: "cricket",
+ 313: "walking stick, walkingstick, stick insect",
+ 314: "cockroach, roach",
+ 315: "mantis, mantid",
+ 316: "cicada, cicala",
+ 317: "leafhopper",
+ 318: "lacewing, lacewing fly",
319: "dragonfly, darning needle, devil's darning needle, "
- "sewing needle, snake feeder, snake doctor, mosquito "
- "hawk, skeeter hawk",
- 320: 'damselfly',
- 321: 'admiral',
- 322: 'ringlet, ringlet butterfly',
- 323: 'monarch, monarch butterfly, milkweed butterfly, Danaus plexippus',
- 324: 'cabbage butterfly',
- 325: 'sulphur butterfly, sulfur butterfly',
- 326: 'lycaenid, lycaenid butterfly',
- 327: 'starfish, sea star',
- 328: 'sea urchin',
- 329: 'sea cucumber, holothurian',
- 330: 'wood rabbit, cottontail, cottontail rabbit',
- 331: 'hare',
- 332: 'Angora, Angora rabbit',
- 333: 'hamster',
- 334: 'porcupine, hedgehog',
- 335: 'fox squirrel, eastern fox squirrel, Sciurus niger',
- 336: 'marmot',
- 337: 'beaver',
- 338: 'guinea pig, Cavia cobaya',
- 339: 'sorrel',
- 340: 'zebra',
- 341: 'hog, pig, grunter, squealer, Sus scrofa',
- 342: 'wild boar, boar, Sus scrofa',
- 343: 'warthog',
- 344: 'hippopotamus, hippo, river horse, Hippopotamus amphibius',
- 345: 'ox',
- 346: 'water buffalo, water ox, Asiatic buffalo, Bubalus bubalis',
- 347: 'bison',
- 348: 'ram, tup',
- 349: 'bighorn, bighorn sheep, cimarron, Rocky Mountain '
- 'bighorn, Rocky Mountain sheep, Ovis canadensis',
- 350: 'ibex, Capra ibex',
- 351: 'hartebeest',
- 352: 'impala, Aepyceros melampus',
- 353: 'gazelle',
- 354: 'Arabian camel, dromedary, Camelus dromedarius',
- 355: 'llama',
- 356: 'weasel',
- 357: 'mink',
- 358: 'polecat, fitch, foulmart, foumart, Mustela putorius',
- 359: 'black-footed ferret, ferret, Mustela nigripes',
- 360: 'otter',
- 361: 'skunk, polecat, wood pussy',
- 362: 'badger',
- 363: 'armadillo',
- 364: 'three-toed sloth, ai, Bradypus tridactylus',
- 365: 'orangutan, orang, orangutang, Pongo pygmaeus',
- 366: 'gorilla, Gorilla gorilla',
- 367: 'chimpanzee, chimp, Pan troglodytes',
- 368: 'gibbon, Hylobates lar',
- 369: 'siamang, Hylobates syndactylus, Symphalangus syndactylus',
- 370: 'guenon, guenon monkey',
- 371: 'patas, hussar monkey, Erythrocebus patas',
- 372: 'baboon',
- 373: 'macaque',
- 374: 'langur',
- 375: 'colobus, colobus monkey',
- 376: 'proboscis monkey, Nasalis larvatus',
- 377: 'marmoset',
- 378: 'capuchin, ringtail, Cebus capucinus',
- 379: 'howler monkey, howler',
- 380: 'titi, titi monkey',
- 381: 'spider monkey, Ateles geoffroyi',
- 382: 'squirrel monkey, Saimiri sciureus',
- 383: 'Madagascar cat, ring-tailed lemur, Lemur catta',
- 384: 'indri, indris, Indri indri, Indri brevicaudatus',
- 385: 'Indian elephant, Elephas maximus',
- 386: 'African elephant, Loxodonta africana',
- 387: 'lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens',
- 388: 'giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca',
- 389: 'barracouta, snoek',
- 390: 'eel',
- 391: 'coho, cohoe, coho salmon, blue jack, silver salmon, '
- 'Oncorhynchus kisutch',
- 392: 'rock beauty, Holocanthus tricolor',
- 393: 'anemone fish',
- 394: 'sturgeon',
- 395: 'gar, garfish, garpike, billfish, Lepisosteus osseus',
- 396: 'lionfish',
- 397: 'puffer, pufferfish, blowfish, globefish',
- 398: 'abacus',
- 399: 'abaya',
+ "sewing needle, snake feeder, snake doctor, mosquito "
+ "hawk, skeeter hawk",
+ 320: "damselfly",
+ 321: "admiral",
+ 322: "ringlet, ringlet butterfly",
+ 323: "monarch, monarch butterfly, milkweed butterfly, Danaus plexippus",
+ 324: "cabbage butterfly",
+ 325: "sulphur butterfly, sulfur butterfly",
+ 326: "lycaenid, lycaenid butterfly",
+ 327: "starfish, sea star",
+ 328: "sea urchin",
+ 329: "sea cucumber, holothurian",
+ 330: "wood rabbit, cottontail, cottontail rabbit",
+ 331: "hare",
+ 332: "Angora, Angora rabbit",
+ 333: "hamster",
+ 334: "porcupine, hedgehog",
+ 335: "fox squirrel, eastern fox squirrel, Sciurus niger",
+ 336: "marmot",
+ 337: "beaver",
+ 338: "guinea pig, Cavia cobaya",
+ 339: "sorrel",
+ 340: "zebra",
+ 341: "hog, pig, grunter, squealer, Sus scrofa",
+ 342: "wild boar, boar, Sus scrofa",
+ 343: "warthog",
+ 344: "hippopotamus, hippo, river horse, Hippopotamus amphibius",
+ 345: "ox",
+ 346: "water buffalo, water ox, Asiatic buffalo, Bubalus bubalis",
+ 347: "bison",
+ 348: "ram, tup",
+ 349: "bighorn, bighorn sheep, cimarron, Rocky Mountain "
+ "bighorn, Rocky Mountain sheep, Ovis canadensis",
+ 350: "ibex, Capra ibex",
+ 351: "hartebeest",
+ 352: "impala, Aepyceros melampus",
+ 353: "gazelle",
+ 354: "Arabian camel, dromedary, Camelus dromedarius",
+ 355: "llama",
+ 356: "weasel",
+ 357: "mink",
+ 358: "polecat, fitch, foulmart, foumart, Mustela putorius",
+ 359: "black-footed ferret, ferret, Mustela nigripes",
+ 360: "otter",
+ 361: "skunk, polecat, wood pussy",
+ 362: "badger",
+ 363: "armadillo",
+ 364: "three-toed sloth, ai, Bradypus tridactylus",
+ 365: "orangutan, orang, orangutang, Pongo pygmaeus",
+ 366: "gorilla, Gorilla gorilla",
+ 367: "chimpanzee, chimp, Pan troglodytes",
+ 368: "gibbon, Hylobates lar",
+ 369: "siamang, Hylobates syndactylus, Symphalangus syndactylus",
+ 370: "guenon, guenon monkey",
+ 371: "patas, hussar monkey, Erythrocebus patas",
+ 372: "baboon",
+ 373: "macaque",
+ 374: "langur",
+ 375: "colobus, colobus monkey",
+ 376: "proboscis monkey, Nasalis larvatus",
+ 377: "marmoset",
+ 378: "capuchin, ringtail, Cebus capucinus",
+ 379: "howler monkey, howler",
+ 380: "titi, titi monkey",
+ 381: "spider monkey, Ateles geoffroyi",
+ 382: "squirrel monkey, Saimiri sciureus",
+ 383: "Madagascar cat, ring-tailed lemur, Lemur catta",
+ 384: "indri, indris, Indri indri, Indri brevicaudatus",
+ 385: "Indian elephant, Elephas maximus",
+ 386: "African elephant, Loxodonta africana",
+ 387: "lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens",
+ 388: "giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca",
+ 389: "barracouta, snoek",
+ 390: "eel",
+ 391: "coho, cohoe, coho salmon, blue jack, silver salmon, " "Oncorhynchus kisutch",
+ 392: "rock beauty, Holocanthus tricolor",
+ 393: "anemone fish",
+ 394: "sturgeon",
+ 395: "gar, garfish, garpike, billfish, Lepisosteus osseus",
+ 396: "lionfish",
+ 397: "puffer, pufferfish, blowfish, globefish",
+ 398: "abacus",
+ 399: "abaya",
400: "academic gown, academic robe, judge's robe",
- 401: 'accordion, piano accordion, squeeze box',
- 402: 'acoustic guitar',
- 403: 'aircraft carrier, carrier, flattop, attack aircraft carrier',
- 404: 'airliner',
- 405: 'airship, dirigible',
- 406: 'altar',
- 407: 'ambulance',
- 408: 'amphibian, amphibious vehicle',
- 409: 'analog clock',
- 410: 'apiary, bee house',
- 411: 'apron',
- 412: 'ashcan, trash can, garbage can, wastebin, ash bin, '
- 'ash-bin, ashbin, dustbin, trash barrel, trash bin',
- 413: 'assault rifle, assault gun',
- 414: 'backpack, back pack, knapsack, packsack, rucksack, haversack',
- 415: 'bakery, bakeshop, bakehouse',
- 416: 'balance beam, beam',
- 417: 'balloon',
- 418: 'ballpoint, ballpoint pen, ballpen, Biro',
- 419: 'Band Aid',
- 420: 'banjo',
- 421: 'bannister, banister, balustrade, balusters, handrail',
- 422: 'barbell',
- 423: 'barber chair',
- 424: 'barbershop',
- 425: 'barn',
- 426: 'barometer',
- 427: 'barrel, cask',
- 428: 'barrow, garden cart, lawn cart, wheelbarrow',
- 429: 'baseball',
- 430: 'basketball',
- 431: 'bassinet',
- 432: 'bassoon',
- 433: 'bathing cap, swimming cap',
- 434: 'bath towel',
- 435: 'bathtub, bathing tub, bath, tub',
- 436: 'beach wagon, station wagon, wagon, estate car, '
- 'beach waggon, station waggon, waggon',
- 437: 'beacon, lighthouse, beacon light, pharos',
- 438: 'beaker',
- 439: 'bearskin, busby, shako',
- 440: 'beer bottle',
- 441: 'beer glass',
- 442: 'bell cote, bell cot',
- 443: 'bib',
- 444: 'bicycle-built-for-two, tandem bicycle, tandem',
- 445: 'bikini, two-piece',
- 446: 'binder, ring-binder',
- 447: 'binoculars, field glasses, opera glasses',
- 448: 'birdhouse',
- 449: 'boathouse',
- 450: 'bobsled, bobsleigh, bob',
- 451: 'bolo tie, bolo, bola tie, bola',
- 452: 'bonnet, poke bonnet',
- 453: 'bookcase',
- 454: 'bookshop, bookstore, bookstall',
- 455: 'bottlecap',
- 456: 'bow',
- 457: 'bow tie, bow-tie, bowtie',
- 458: 'brass, memorial tablet, plaque',
- 459: 'brassiere, bra, bandeau',
- 460: 'breakwater, groin, groyne, mole, bulwark, seawall, jetty',
- 461: 'breastplate, aegis, egis',
- 462: 'broom',
- 463: 'bucket, pail',
- 464: 'buckle',
- 465: 'bulletproof vest',
- 466: 'bullet train, bullet',
- 467: 'butcher shop, meat market',
- 468: 'cab, hack, taxi, taxicab',
- 469: 'caldron, cauldron',
- 470: 'candle, taper, wax light',
- 471: 'cannon',
- 472: 'canoe',
- 473: 'can opener, tin opener',
- 474: 'cardigan',
- 475: 'car mirror',
- 476: 'carousel, carrousel, merry-go-round, roundabout, whirligig',
+ 401: "accordion, piano accordion, squeeze box",
+ 402: "acoustic guitar",
+ 403: "aircraft carrier, carrier, flattop, attack aircraft carrier",
+ 404: "airliner",
+ 405: "airship, dirigible",
+ 406: "altar",
+ 407: "ambulance",
+ 408: "amphibian, amphibious vehicle",
+ 409: "analog clock",
+ 410: "apiary, bee house",
+ 411: "apron",
+ 412: "ashcan, trash can, garbage can, wastebin, ash bin, "
+ "ash-bin, ashbin, dustbin, trash barrel, trash bin",
+ 413: "assault rifle, assault gun",
+ 414: "backpack, back pack, knapsack, packsack, rucksack, haversack",
+ 415: "bakery, bakeshop, bakehouse",
+ 416: "balance beam, beam",
+ 417: "balloon",
+ 418: "ballpoint, ballpoint pen, ballpen, Biro",
+ 419: "Band Aid",
+ 420: "banjo",
+ 421: "bannister, banister, balustrade, balusters, handrail",
+ 422: "barbell",
+ 423: "barber chair",
+ 424: "barbershop",
+ 425: "barn",
+ 426: "barometer",
+ 427: "barrel, cask",
+ 428: "barrow, garden cart, lawn cart, wheelbarrow",
+ 429: "baseball",
+ 430: "basketball",
+ 431: "bassinet",
+ 432: "bassoon",
+ 433: "bathing cap, swimming cap",
+ 434: "bath towel",
+ 435: "bathtub, bathing tub, bath, tub",
+ 436: "beach wagon, station wagon, wagon, estate car, "
+ "beach waggon, station waggon, waggon",
+ 437: "beacon, lighthouse, beacon light, pharos",
+ 438: "beaker",
+ 439: "bearskin, busby, shako",
+ 440: "beer bottle",
+ 441: "beer glass",
+ 442: "bell cote, bell cot",
+ 443: "bib",
+ 444: "bicycle-built-for-two, tandem bicycle, tandem",
+ 445: "bikini, two-piece",
+ 446: "binder, ring-binder",
+ 447: "binoculars, field glasses, opera glasses",
+ 448: "birdhouse",
+ 449: "boathouse",
+ 450: "bobsled, bobsleigh, bob",
+ 451: "bolo tie, bolo, bola tie, bola",
+ 452: "bonnet, poke bonnet",
+ 453: "bookcase",
+ 454: "bookshop, bookstore, bookstall",
+ 455: "bottlecap",
+ 456: "bow",
+ 457: "bow tie, bow-tie, bowtie",
+ 458: "brass, memorial tablet, plaque",
+ 459: "brassiere, bra, bandeau",
+ 460: "breakwater, groin, groyne, mole, bulwark, seawall, jetty",
+ 461: "breastplate, aegis, egis",
+ 462: "broom",
+ 463: "bucket, pail",
+ 464: "buckle",
+ 465: "bulletproof vest",
+ 466: "bullet train, bullet",
+ 467: "butcher shop, meat market",
+ 468: "cab, hack, taxi, taxicab",
+ 469: "caldron, cauldron",
+ 470: "candle, taper, wax light",
+ 471: "cannon",
+ 472: "canoe",
+ 473: "can opener, tin opener",
+ 474: "cardigan",
+ 475: "car mirror",
+ 476: "carousel, carrousel, merry-go-round, roundabout, whirligig",
477: "carpenter's kit, tool kit",
- 478: 'carton',
- 479: 'car wheel',
- 480: 'cash machine, cash dispenser, automated teller '
- 'machine, automatic teller machine, automated teller, '
- 'automatic teller, ATM',
- 481: 'cassette',
- 482: 'cassette player',
- 483: 'castle',
- 484: 'catamaran',
- 485: 'CD player',
- 486: 'cello, violoncello',
- 487: 'cellular telephone, cellular phone, cellphone, cell, mobile phone',
- 488: 'chain',
- 489: 'chainlink fence',
- 490: 'chain mail, ring mail, mail, chain armor, chain '
- 'armour, ring armor, ring armour',
- 491: 'chain saw, chainsaw',
- 492: 'chest',
- 493: 'chiffonier, commode',
- 494: 'chime, bell, gong',
- 495: 'china cabinet, china closet',
- 496: 'Christmas stocking',
- 497: 'church, church building',
- 498: 'cinema, movie theater, movie theatre, movie house, picture palace',
- 499: 'cleaver, meat cleaver, chopper',
- 500: 'cliff dwelling',
- 501: 'cloak',
- 502: 'clog, geta, patten, sabot',
- 503: 'cocktail shaker',
- 504: 'coffee mug',
- 505: 'coffeepot',
- 506: 'coil, spiral, volute, whorl, helix',
- 507: 'combination lock',
- 508: 'computer keyboard, keypad',
- 509: 'confectionery, confectionary, candy store',
- 510: 'container ship, containership, container vessel',
- 511: 'convertible',
- 512: 'corkscrew, bottle screw',
- 513: 'cornet, horn, trumpet, trump',
- 514: 'cowboy boot',
- 515: 'cowboy hat, ten-gallon hat',
- 516: 'cradle',
- 517: 'crane',
- 518: 'crash helmet',
- 519: 'crate',
- 520: 'crib, cot',
- 521: 'Crock Pot',
- 522: 'croquet ball',
- 523: 'crutch',
- 524: 'cuirass',
- 525: 'dam, dike, dyke',
- 526: 'desk',
- 527: 'desktop computer',
- 528: 'dial telephone, dial phone',
- 529: 'diaper, nappy, napkin',
- 530: 'digital clock',
- 531: 'digital watch',
- 532: 'dining table, board',
- 533: 'dishrag, dishcloth',
- 534: 'dishwasher, dish washer, dishwashing machine',
- 535: 'disk brake, disc brake',
- 536: 'dock, dockage, docking facility',
- 537: 'dogsled, dog sled, dog sleigh',
- 538: 'dome',
- 539: 'doormat, welcome mat',
- 540: 'drilling platform, offshore rig',
- 541: 'drum, membranophone, tympan',
- 542: 'drumstick',
- 543: 'dumbbell',
- 544: 'Dutch oven',
- 545: 'electric fan, blower',
- 546: 'electric guitar',
- 547: 'electric locomotive',
- 548: 'entertainment center',
- 549: 'envelope',
- 550: 'espresso maker',
- 551: 'face powder',
- 552: 'feather boa, boa',
- 553: 'file, file cabinet, filing cabinet',
- 554: 'fireboat',
- 555: 'fire engine, fire truck',
- 556: 'fire screen, fireguard',
- 557: 'flagpole, flagstaff',
- 558: 'flute, transverse flute',
- 559: 'folding chair',
- 560: 'football helmet',
- 561: 'forklift',
- 562: 'fountain',
- 563: 'fountain pen',
- 564: 'four-poster',
- 565: 'freight car',
- 566: 'French horn, horn',
- 567: 'frying pan, frypan, skillet',
- 568: 'fur coat',
- 569: 'garbage truck, dustcart',
- 570: 'gasmask, respirator, gas helmet',
- 571: 'gas pump, gasoline pump, petrol pump, island dispenser',
- 572: 'goblet',
- 573: 'go-kart',
- 574: 'golf ball',
- 575: 'golfcart, golf cart',
- 576: 'gondola',
- 577: 'gong, tam-tam',
- 578: 'gown',
- 579: 'grand piano, grand',
- 580: 'greenhouse, nursery, glasshouse',
- 581: 'grille, radiator grille',
- 582: 'grocery store, grocery, food market, market',
- 583: 'guillotine',
- 584: 'hair slide',
- 585: 'hair spray',
- 586: 'half track',
- 587: 'hammer',
- 588: 'hamper',
- 589: 'hand blower, blow dryer, blow drier, hair dryer, hair drier',
- 590: 'hand-held computer, hand-held microcomputer',
- 591: 'handkerchief, hankie, hanky, hankey',
- 592: 'hard disc, hard disk, fixed disk',
- 593: 'harmonica, mouth organ, harp, mouth harp',
- 594: 'harp',
- 595: 'harvester, reaper',
- 596: 'hatchet',
- 597: 'holster',
- 598: 'home theater, home theatre',
- 599: 'honeycomb',
- 600: 'hook, claw',
- 601: 'hoopskirt, crinoline',
- 602: 'horizontal bar, high bar',
- 603: 'horse cart, horse-cart',
- 604: 'hourglass',
- 605: 'iPod',
- 606: 'iron, smoothing iron',
+ 478: "carton",
+ 479: "car wheel",
+ 480: "cash machine, cash dispenser, automated teller "
+ "machine, automatic teller machine, automated teller, "
+ "automatic teller, ATM",
+ 481: "cassette",
+ 482: "cassette player",
+ 483: "castle",
+ 484: "catamaran",
+ 485: "CD player",
+ 486: "cello, violoncello",
+ 487: "cellular telephone, cellular phone, cellphone, cell, mobile phone",
+ 488: "chain",
+ 489: "chainlink fence",
+ 490: "chain mail, ring mail, mail, chain armor, chain "
+ "armour, ring armor, ring armour",
+ 491: "chain saw, chainsaw",
+ 492: "chest",
+ 493: "chiffonier, commode",
+ 494: "chime, bell, gong",
+ 495: "china cabinet, china closet",
+ 496: "Christmas stocking",
+ 497: "church, church building",
+ 498: "cinema, movie theater, movie theatre, movie house, picture palace",
+ 499: "cleaver, meat cleaver, chopper",
+ 500: "cliff dwelling",
+ 501: "cloak",
+ 502: "clog, geta, patten, sabot",
+ 503: "cocktail shaker",
+ 504: "coffee mug",
+ 505: "coffeepot",
+ 506: "coil, spiral, volute, whorl, helix",
+ 507: "combination lock",
+ 508: "computer keyboard, keypad",
+ 509: "confectionery, confectionary, candy store",
+ 510: "container ship, containership, container vessel",
+ 511: "convertible",
+ 512: "corkscrew, bottle screw",
+ 513: "cornet, horn, trumpet, trump",
+ 514: "cowboy boot",
+ 515: "cowboy hat, ten-gallon hat",
+ 516: "cradle",
+ 517: "crane",
+ 518: "crash helmet",
+ 519: "crate",
+ 520: "crib, cot",
+ 521: "Crock Pot",
+ 522: "croquet ball",
+ 523: "crutch",
+ 524: "cuirass",
+ 525: "dam, dike, dyke",
+ 526: "desk",
+ 527: "desktop computer",
+ 528: "dial telephone, dial phone",
+ 529: "diaper, nappy, napkin",
+ 530: "digital clock",
+ 531: "digital watch",
+ 532: "dining table, board",
+ 533: "dishrag, dishcloth",
+ 534: "dishwasher, dish washer, dishwashing machine",
+ 535: "disk brake, disc brake",
+ 536: "dock, dockage, docking facility",
+ 537: "dogsled, dog sled, dog sleigh",
+ 538: "dome",
+ 539: "doormat, welcome mat",
+ 540: "drilling platform, offshore rig",
+ 541: "drum, membranophone, tympan",
+ 542: "drumstick",
+ 543: "dumbbell",
+ 544: "Dutch oven",
+ 545: "electric fan, blower",
+ 546: "electric guitar",
+ 547: "electric locomotive",
+ 548: "entertainment center",
+ 549: "envelope",
+ 550: "espresso maker",
+ 551: "face powder",
+ 552: "feather boa, boa",
+ 553: "file, file cabinet, filing cabinet",
+ 554: "fireboat",
+ 555: "fire engine, fire truck",
+ 556: "fire screen, fireguard",
+ 557: "flagpole, flagstaff",
+ 558: "flute, transverse flute",
+ 559: "folding chair",
+ 560: "football helmet",
+ 561: "forklift",
+ 562: "fountain",
+ 563: "fountain pen",
+ 564: "four-poster",
+ 565: "freight car",
+ 566: "French horn, horn",
+ 567: "frying pan, frypan, skillet",
+ 568: "fur coat",
+ 569: "garbage truck, dustcart",
+ 570: "gasmask, respirator, gas helmet",
+ 571: "gas pump, gasoline pump, petrol pump, island dispenser",
+ 572: "goblet",
+ 573: "go-kart",
+ 574: "golf ball",
+ 575: "golfcart, golf cart",
+ 576: "gondola",
+ 577: "gong, tam-tam",
+ 578: "gown",
+ 579: "grand piano, grand",
+ 580: "greenhouse, nursery, glasshouse",
+ 581: "grille, radiator grille",
+ 582: "grocery store, grocery, food market, market",
+ 583: "guillotine",
+ 584: "hair slide",
+ 585: "hair spray",
+ 586: "half track",
+ 587: "hammer",
+ 588: "hamper",
+ 589: "hand blower, blow dryer, blow drier, hair dryer, hair drier",
+ 590: "hand-held computer, hand-held microcomputer",
+ 591: "handkerchief, hankie, hanky, hankey",
+ 592: "hard disc, hard disk, fixed disk",
+ 593: "harmonica, mouth organ, harp, mouth harp",
+ 594: "harp",
+ 595: "harvester, reaper",
+ 596: "hatchet",
+ 597: "holster",
+ 598: "home theater, home theatre",
+ 599: "honeycomb",
+ 600: "hook, claw",
+ 601: "hoopskirt, crinoline",
+ 602: "horizontal bar, high bar",
+ 603: "horse cart, horse-cart",
+ 604: "hourglass",
+ 605: "iPod",
+ 606: "iron, smoothing iron",
607: "jack-o'-lantern",
- 608: 'jean, blue jean, denim',
- 609: 'jeep, landrover',
- 610: 'jersey, T-shirt, tee shirt',
- 611: 'jigsaw puzzle',
- 612: 'jinrikisha, ricksha, rickshaw',
- 613: 'joystick',
- 614: 'kimono',
- 615: 'knee pad',
- 616: 'knot',
- 617: 'lab coat, laboratory coat',
- 618: 'ladle',
- 619: 'lampshade, lamp shade',
- 620: 'laptop, laptop computer',
- 621: 'lawn mower, mower',
- 622: 'lens cap, lens cover',
- 623: 'letter opener, paper knife, paperknife',
- 624: 'library',
- 625: 'lifeboat',
- 626: 'lighter, light, igniter, ignitor',
- 627: 'limousine, limo',
- 628: 'liner, ocean liner',
- 629: 'lipstick, lip rouge',
- 630: 'Loafer',
- 631: 'lotion',
- 632: 'loudspeaker, speaker, speaker unit, loudspeaker '
- 'system, speaker system',
+ 608: "jean, blue jean, denim",
+ 609: "jeep, landrover",
+ 610: "jersey, T-shirt, tee shirt",
+ 611: "jigsaw puzzle",
+ 612: "jinrikisha, ricksha, rickshaw",
+ 613: "joystick",
+ 614: "kimono",
+ 615: "knee pad",
+ 616: "knot",
+ 617: "lab coat, laboratory coat",
+ 618: "ladle",
+ 619: "lampshade, lamp shade",
+ 620: "laptop, laptop computer",
+ 621: "lawn mower, mower",
+ 622: "lens cap, lens cover",
+ 623: "letter opener, paper knife, paperknife",
+ 624: "library",
+ 625: "lifeboat",
+ 626: "lighter, light, igniter, ignitor",
+ 627: "limousine, limo",
+ 628: "liner, ocean liner",
+ 629: "lipstick, lip rouge",
+ 630: "Loafer",
+ 631: "lotion",
+ 632: "loudspeaker, speaker, speaker unit, loudspeaker " "system, speaker system",
633: "loupe, jeweler's loupe",
- 634: 'lumbermill, sawmill',
- 635: 'magnetic compass',
- 636: 'mailbag, postbag',
- 637: 'mailbox, letter box',
- 638: 'maillot',
- 639: 'maillot, tank suit',
- 640: 'manhole cover',
- 641: 'maraca',
- 642: 'marimba, xylophone',
- 643: 'mask',
- 644: 'matchstick',
- 645: 'maypole',
- 646: 'maze, labyrinth',
- 647: 'measuring cup',
- 648: 'medicine chest, medicine cabinet',
- 649: 'megalith, megalithic structure',
- 650: 'microphone, mike',
- 651: 'microwave, microwave oven',
- 652: 'military uniform',
- 653: 'milk can',
- 654: 'minibus',
- 655: 'miniskirt, mini',
- 656: 'minivan',
- 657: 'missile',
- 658: 'mitten',
- 659: 'mixing bowl',
- 660: 'mobile home, manufactured home',
- 661: 'Model T',
- 662: 'modem',
- 663: 'monastery',
- 664: 'monitor',
- 665: 'moped',
- 666: 'mortar',
- 667: 'mortarboard',
- 668: 'mosque',
- 669: 'mosquito net',
- 670: 'motor scooter, scooter',
- 671: 'mountain bike, all-terrain bike, off-roader',
- 672: 'mountain tent',
- 673: 'mouse, computer mouse',
- 674: 'mousetrap',
- 675: 'moving van',
- 676: 'muzzle',
- 677: 'nail',
- 678: 'neck brace',
- 679: 'necklace',
- 680: 'nipple',
- 681: 'notebook, notebook computer',
- 682: 'obelisk',
- 683: 'oboe, hautboy, hautbois',
- 684: 'ocarina, sweet potato',
- 685: 'odometer, hodometer, mileometer, milometer',
- 686: 'oil filter',
- 687: 'organ, pipe organ',
- 688: 'oscilloscope, scope, cathode-ray oscilloscope, CRO',
- 689: 'overskirt',
- 690: 'oxcart',
- 691: 'oxygen mask',
- 692: 'packet',
- 693: 'paddle, boat paddle',
- 694: 'paddlewheel, paddle wheel',
- 695: 'padlock',
- 696: 'paintbrush',
+ 634: "lumbermill, sawmill",
+ 635: "magnetic compass",
+ 636: "mailbag, postbag",
+ 637: "mailbox, letter box",
+ 638: "maillot",
+ 639: "maillot, tank suit",
+ 640: "manhole cover",
+ 641: "maraca",
+ 642: "marimba, xylophone",
+ 643: "mask",
+ 644: "matchstick",
+ 645: "maypole",
+ 646: "maze, labyrinth",
+ 647: "measuring cup",
+ 648: "medicine chest, medicine cabinet",
+ 649: "megalith, megalithic structure",
+ 650: "microphone, mike",
+ 651: "microwave, microwave oven",
+ 652: "military uniform",
+ 653: "milk can",
+ 654: "minibus",
+ 655: "miniskirt, mini",
+ 656: "minivan",
+ 657: "missile",
+ 658: "mitten",
+ 659: "mixing bowl",
+ 660: "mobile home, manufactured home",
+ 661: "Model T",
+ 662: "modem",
+ 663: "monastery",
+ 664: "monitor",
+ 665: "moped",
+ 666: "mortar",
+ 667: "mortarboard",
+ 668: "mosque",
+ 669: "mosquito net",
+ 670: "motor scooter, scooter",
+ 671: "mountain bike, all-terrain bike, off-roader",
+ 672: "mountain tent",
+ 673: "mouse, computer mouse",
+ 674: "mousetrap",
+ 675: "moving van",
+ 676: "muzzle",
+ 677: "nail",
+ 678: "neck brace",
+ 679: "necklace",
+ 680: "nipple",
+ 681: "notebook, notebook computer",
+ 682: "obelisk",
+ 683: "oboe, hautboy, hautbois",
+ 684: "ocarina, sweet potato",
+ 685: "odometer, hodometer, mileometer, milometer",
+ 686: "oil filter",
+ 687: "organ, pipe organ",
+ 688: "oscilloscope, scope, cathode-ray oscilloscope, CRO",
+ 689: "overskirt",
+ 690: "oxcart",
+ 691: "oxygen mask",
+ 692: "packet",
+ 693: "paddle, boat paddle",
+ 694: "paddlewheel, paddle wheel",
+ 695: "padlock",
+ 696: "paintbrush",
697: "pajama, pyjama, pj's, jammies",
- 698: 'palace',
- 699: 'panpipe, pandean pipe, syrinx',
- 700: 'paper towel',
- 701: 'parachute, chute',
- 702: 'parallel bars, bars',
- 703: 'park bench',
- 704: 'parking meter',
- 705: 'passenger car, coach, carriage',
- 706: 'patio, terrace',
- 707: 'pay-phone, pay-station',
- 708: 'pedestal, plinth, footstall',
- 709: 'pencil box, pencil case',
- 710: 'pencil sharpener',
- 711: 'perfume, essence',
- 712: 'Petri dish',
- 713: 'photocopier',
- 714: 'pick, plectrum, plectron',
- 715: 'pickelhaube',
- 716: 'picket fence, paling',
- 717: 'pickup, pickup truck',
- 718: 'pier',
- 719: 'piggy bank, penny bank',
- 720: 'pill bottle',
- 721: 'pillow',
- 722: 'ping-pong ball',
- 723: 'pinwheel',
- 724: 'pirate, pirate ship',
- 725: 'pitcher, ewer',
+ 698: "palace",
+ 699: "panpipe, pandean pipe, syrinx",
+ 700: "paper towel",
+ 701: "parachute, chute",
+ 702: "parallel bars, bars",
+ 703: "park bench",
+ 704: "parking meter",
+ 705: "passenger car, coach, carriage",
+ 706: "patio, terrace",
+ 707: "pay-phone, pay-station",
+ 708: "pedestal, plinth, footstall",
+ 709: "pencil box, pencil case",
+ 710: "pencil sharpener",
+ 711: "perfume, essence",
+ 712: "Petri dish",
+ 713: "photocopier",
+ 714: "pick, plectrum, plectron",
+ 715: "pickelhaube",
+ 716: "picket fence, paling",
+ 717: "pickup, pickup truck",
+ 718: "pier",
+ 719: "piggy bank, penny bank",
+ 720: "pill bottle",
+ 721: "pillow",
+ 722: "ping-pong ball",
+ 723: "pinwheel",
+ 724: "pirate, pirate ship",
+ 725: "pitcher, ewer",
726: "plane, carpenter's plane, woodworking plane",
- 727: 'planetarium',
- 728: 'plastic bag',
- 729: 'plate rack',
- 730: 'plow, plough',
+ 727: "planetarium",
+ 728: "plastic bag",
+ 729: "plate rack",
+ 730: "plow, plough",
731: "plunger, plumber's helper",
- 732: 'Polaroid camera, Polaroid Land camera',
- 733: 'pole',
- 734: 'police van, police wagon, paddy wagon, patrol wagon, '
- 'wagon, black Maria',
- 735: 'poncho',
- 736: 'pool table, billiard table, snooker table',
- 737: 'pop bottle, soda bottle',
- 738: 'pot, flowerpot',
+ 732: "Polaroid camera, Polaroid Land camera",
+ 733: "pole",
+ 734: "police van, police wagon, paddy wagon, patrol wagon, " "wagon, black Maria",
+ 735: "poncho",
+ 736: "pool table, billiard table, snooker table",
+ 737: "pop bottle, soda bottle",
+ 738: "pot, flowerpot",
739: "potter's wheel",
- 740: 'power drill',
- 741: 'prayer rug, prayer mat',
- 742: 'printer',
- 743: 'prison, prison house',
- 744: 'projectile, missile',
- 745: 'projector',
- 746: 'puck, hockey puck',
- 747: 'punching bag, punch bag, punching ball, punchball',
- 748: 'purse',
- 749: 'quill, quill pen',
- 750: 'quilt, comforter, comfort, puff',
- 751: 'racer, race car, racing car',
- 752: 'racket, racquet',
- 753: 'radiator',
- 754: 'radio, wireless',
- 755: 'radio telescope, radio reflector',
- 756: 'rain barrel',
- 757: 'recreational vehicle, RV, R.V.',
- 758: 'reel',
- 759: 'reflex camera',
- 760: 'refrigerator, icebox',
- 761: 'remote control, remote',
- 762: 'restaurant, eating house, eating place, eatery',
- 763: 'revolver, six-gun, six-shooter',
- 764: 'rifle',
- 765: 'rocking chair, rocker',
- 766: 'rotisserie',
- 767: 'rubber eraser, rubber, pencil eraser',
- 768: 'rugby ball',
- 769: 'rule, ruler',
- 770: 'running shoe',
- 771: 'safe',
- 772: 'safety pin',
- 773: 'saltshaker, salt shaker',
- 774: 'sandal',
- 775: 'sarong',
- 776: 'sax, saxophone',
- 777: 'scabbard',
- 778: 'scale, weighing machine',
- 779: 'school bus',
- 780: 'schooner',
- 781: 'scoreboard',
- 782: 'screen, CRT screen',
- 783: 'screw',
- 784: 'screwdriver',
- 785: 'seat belt, seatbelt',
- 786: 'sewing machine',
- 787: 'shield, buckler',
- 788: 'shoe shop, shoe-shop, shoe store',
- 789: 'shoji',
- 790: 'shopping basket',
- 791: 'shopping cart',
- 792: 'shovel',
- 793: 'shower cap',
- 794: 'shower curtain',
- 795: 'ski',
- 796: 'ski mask',
- 797: 'sleeping bag',
- 798: 'slide rule, slipstick',
- 799: 'sliding door',
- 800: 'slot, one-armed bandit',
- 801: 'snorkel',
- 802: 'snowmobile',
- 803: 'snowplow, snowplough',
- 804: 'soap dispenser',
- 805: 'soccer ball',
- 806: 'sock',
- 807: 'solar dish, solar collector, solar furnace',
- 808: 'sombrero',
- 809: 'soup bowl',
- 810: 'space bar',
- 811: 'space heater',
- 812: 'space shuttle',
- 813: 'spatula',
- 814: 'speedboat',
+ 740: "power drill",
+ 741: "prayer rug, prayer mat",
+ 742: "printer",
+ 743: "prison, prison house",
+ 744: "projectile, missile",
+ 745: "projector",
+ 746: "puck, hockey puck",
+ 747: "punching bag, punch bag, punching ball, punchball",
+ 748: "purse",
+ 749: "quill, quill pen",
+ 750: "quilt, comforter, comfort, puff",
+ 751: "racer, race car, racing car",
+ 752: "racket, racquet",
+ 753: "radiator",
+ 754: "radio, wireless",
+ 755: "radio telescope, radio reflector",
+ 756: "rain barrel",
+ 757: "recreational vehicle, RV, R.V.",
+ 758: "reel",
+ 759: "reflex camera",
+ 760: "refrigerator, icebox",
+ 761: "remote control, remote",
+ 762: "restaurant, eating house, eating place, eatery",
+ 763: "revolver, six-gun, six-shooter",
+ 764: "rifle",
+ 765: "rocking chair, rocker",
+ 766: "rotisserie",
+ 767: "rubber eraser, rubber, pencil eraser",
+ 768: "rugby ball",
+ 769: "rule, ruler",
+ 770: "running shoe",
+ 771: "safe",
+ 772: "safety pin",
+ 773: "saltshaker, salt shaker",
+ 774: "sandal",
+ 775: "sarong",
+ 776: "sax, saxophone",
+ 777: "scabbard",
+ 778: "scale, weighing machine",
+ 779: "school bus",
+ 780: "schooner",
+ 781: "scoreboard",
+ 782: "screen, CRT screen",
+ 783: "screw",
+ 784: "screwdriver",
+ 785: "seat belt, seatbelt",
+ 786: "sewing machine",
+ 787: "shield, buckler",
+ 788: "shoe shop, shoe-shop, shoe store",
+ 789: "shoji",
+ 790: "shopping basket",
+ 791: "shopping cart",
+ 792: "shovel",
+ 793: "shower cap",
+ 794: "shower curtain",
+ 795: "ski",
+ 796: "ski mask",
+ 797: "sleeping bag",
+ 798: "slide rule, slipstick",
+ 799: "sliding door",
+ 800: "slot, one-armed bandit",
+ 801: "snorkel",
+ 802: "snowmobile",
+ 803: "snowplow, snowplough",
+ 804: "soap dispenser",
+ 805: "soccer ball",
+ 806: "sock",
+ 807: "solar dish, solar collector, solar furnace",
+ 808: "sombrero",
+ 809: "soup bowl",
+ 810: "space bar",
+ 811: "space heater",
+ 812: "space shuttle",
+ 813: "spatula",
+ 814: "speedboat",
815: "spider web, spider's web",
- 816: 'spindle',
- 817: 'sports car, sport car',
- 818: 'spotlight, spot',
- 819: 'stage',
- 820: 'steam locomotive',
- 821: 'steel arch bridge',
- 822: 'steel drum',
- 823: 'stethoscope',
- 824: 'stole',
- 825: 'stone wall',
- 826: 'stopwatch, stop watch',
- 827: 'stove',
- 828: 'strainer',
- 829: 'streetcar, tram, tramcar, trolley, trolley car',
- 830: 'stretcher',
- 831: 'studio couch, day bed',
- 832: 'stupa, tope',
- 833: 'submarine, pigboat, sub, U-boat',
- 834: 'suit, suit of clothes',
- 835: 'sundial',
- 836: 'sunglass',
- 837: 'sunglasses, dark glasses, shades',
- 838: 'sunscreen, sunblock, sun blocker',
- 839: 'suspension bridge',
- 840: 'swab, swob, mop',
- 841: 'sweatshirt',
- 842: 'swimming trunks, bathing trunks',
- 843: 'swing',
- 844: 'switch, electric switch, electrical switch',
- 845: 'syringe',
- 846: 'table lamp',
- 847: 'tank, army tank, armored combat vehicle, armoured combat vehicle',
- 848: 'tape player',
- 849: 'teapot',
- 850: 'teddy, teddy bear',
- 851: 'television, television system',
- 852: 'tennis ball',
- 853: 'thatch, thatched roof',
- 854: 'theater curtain, theatre curtain',
- 855: 'thimble',
- 856: 'thresher, thrasher, threshing machine',
- 857: 'throne',
- 858: 'tile roof',
- 859: 'toaster',
- 860: 'tobacco shop, tobacconist shop, tobacconist',
- 861: 'toilet seat',
- 862: 'torch',
- 863: 'totem pole',
- 864: 'tow truck, tow car, wrecker',
- 865: 'toyshop',
- 866: 'tractor',
- 867: 'trailer truck, tractor trailer, trucking rig, rig, '
- 'articulated lorry, semi',
- 868: 'tray',
- 869: 'trench coat',
- 870: 'tricycle, trike, velocipede',
- 871: 'trimaran',
- 872: 'tripod',
- 873: 'triumphal arch',
- 874: 'trolleybus, trolley coach, trackless trolley',
- 875: 'trombone',
- 876: 'tub, vat',
- 877: 'turnstile',
- 878: 'typewriter keyboard',
- 879: 'umbrella',
- 880: 'unicycle, monocycle',
- 881: 'upright, upright piano',
- 882: 'vacuum, vacuum cleaner',
- 883: 'vase',
- 884: 'vault',
- 885: 'velvet',
- 886: 'vending machine',
- 887: 'vestment',
- 888: 'viaduct',
- 889: 'violin, fiddle',
- 890: 'volleyball',
- 891: 'waffle iron',
- 892: 'wall clock',
- 893: 'wallet, billfold, notecase, pocketbook',
- 894: 'wardrobe, closet, press',
- 895: 'warplane, military plane',
- 896: 'washbasin, handbasin, washbowl, lavabo, wash-hand basin',
- 897: 'washer, automatic washer, washing machine',
- 898: 'water bottle',
- 899: 'water jug',
- 900: 'water tower',
- 901: 'whiskey jug',
- 902: 'whistle',
- 903: 'wig',
- 904: 'window screen',
- 905: 'window shade',
- 906: 'Windsor tie',
- 907: 'wine bottle',
- 908: 'wing',
- 909: 'wok',
- 910: 'wooden spoon',
- 911: 'wool, woolen, woollen',
- 912: 'worm fence, snake fence, snake-rail fence, Virginia fence',
- 913: 'wreck',
- 914: 'yawl',
- 915: 'yurt',
- 916: 'web site, website, internet site, site',
- 917: 'comic book',
- 918: 'crossword puzzle, crossword',
- 919: 'street sign',
- 920: 'traffic light, traffic signal, stoplight',
- 921: 'book jacket, dust cover, dust jacket, dust wrapper',
- 922: 'menu',
- 923: 'plate',
- 924: 'guacamole',
- 925: 'consomme',
- 926: 'hot pot, hotpot',
- 927: 'trifle',
- 928: 'ice cream, icecream',
- 929: 'ice lolly, lolly, lollipop, popsicle',
- 930: 'French loaf',
- 931: 'bagel, beigel',
- 932: 'pretzel',
- 933: 'cheeseburger',
- 934: 'hotdog, hot dog, red hot',
- 935: 'mashed potato',
- 936: 'head cabbage',
- 937: 'broccoli',
- 938: 'cauliflower',
- 939: 'zucchini, courgette',
- 940: 'spaghetti squash',
- 941: 'acorn squash',
- 942: 'butternut squash',
- 943: 'cucumber, cuke',
- 944: 'artichoke, globe artichoke',
- 945: 'bell pepper',
- 946: 'cardoon',
- 947: 'mushroom',
- 948: 'Granny Smith',
- 949: 'strawberry',
- 950: 'orange',
- 951: 'lemon',
- 952: 'fig',
- 953: 'pineapple, ananas',
- 954: 'banana',
- 955: 'jackfruit, jak, jack',
- 956: 'custard apple',
- 957: 'pomegranate',
- 958: 'hay',
- 959: 'carbonara',
- 960: 'chocolate sauce, chocolate syrup',
- 961: 'dough',
- 962: 'meat loaf, meatloaf',
- 963: 'pizza, pizza pie',
- 964: 'potpie',
- 965: 'burrito',
- 966: 'red wine',
- 967: 'espresso',
- 968: 'cup',
- 969: 'eggnog',
- 970: 'alp',
- 971: 'bubble',
- 972: 'cliff, drop, drop-off',
- 973: 'coral reef',
- 974: 'geyser',
- 975: 'lakeside, lakeshore',
- 976: 'promontory, headland, head, foreland',
- 977: 'sandbar, sand bar',
- 978: 'seashore, coast, seacoast, sea-coast',
- 979: 'valley, vale',
- 980: 'volcano',
- 981: 'ballplayer, baseball player',
- 982: 'groom, bridegroom',
- 983: 'scuba diver',
- 984: 'rapeseed',
- 985: 'daisy',
+ 816: "spindle",
+ 817: "sports car, sport car",
+ 818: "spotlight, spot",
+ 819: "stage",
+ 820: "steam locomotive",
+ 821: "steel arch bridge",
+ 822: "steel drum",
+ 823: "stethoscope",
+ 824: "stole",
+ 825: "stone wall",
+ 826: "stopwatch, stop watch",
+ 827: "stove",
+ 828: "strainer",
+ 829: "streetcar, tram, tramcar, trolley, trolley car",
+ 830: "stretcher",
+ 831: "studio couch, day bed",
+ 832: "stupa, tope",
+ 833: "submarine, pigboat, sub, U-boat",
+ 834: "suit, suit of clothes",
+ 835: "sundial",
+ 836: "sunglass",
+ 837: "sunglasses, dark glasses, shades",
+ 838: "sunscreen, sunblock, sun blocker",
+ 839: "suspension bridge",
+ 840: "swab, swob, mop",
+ 841: "sweatshirt",
+ 842: "swimming trunks, bathing trunks",
+ 843: "swing",
+ 844: "switch, electric switch, electrical switch",
+ 845: "syringe",
+ 846: "table lamp",
+ 847: "tank, army tank, armored combat vehicle, armoured combat vehicle",
+ 848: "tape player",
+ 849: "teapot",
+ 850: "teddy, teddy bear",
+ 851: "television, television system",
+ 852: "tennis ball",
+ 853: "thatch, thatched roof",
+ 854: "theater curtain, theatre curtain",
+ 855: "thimble",
+ 856: "thresher, thrasher, threshing machine",
+ 857: "throne",
+ 858: "tile roof",
+ 859: "toaster",
+ 860: "tobacco shop, tobacconist shop, tobacconist",
+ 861: "toilet seat",
+ 862: "torch",
+ 863: "totem pole",
+ 864: "tow truck, tow car, wrecker",
+ 865: "toyshop",
+ 866: "tractor",
+ 867: "trailer truck, tractor trailer, trucking rig, rig, "
+ "articulated lorry, semi",
+ 868: "tray",
+ 869: "trench coat",
+ 870: "tricycle, trike, velocipede",
+ 871: "trimaran",
+ 872: "tripod",
+ 873: "triumphal arch",
+ 874: "trolleybus, trolley coach, trackless trolley",
+ 875: "trombone",
+ 876: "tub, vat",
+ 877: "turnstile",
+ 878: "typewriter keyboard",
+ 879: "umbrella",
+ 880: "unicycle, monocycle",
+ 881: "upright, upright piano",
+ 882: "vacuum, vacuum cleaner",
+ 883: "vase",
+ 884: "vault",
+ 885: "velvet",
+ 886: "vending machine",
+ 887: "vestment",
+ 888: "viaduct",
+ 889: "violin, fiddle",
+ 890: "volleyball",
+ 891: "waffle iron",
+ 892: "wall clock",
+ 893: "wallet, billfold, notecase, pocketbook",
+ 894: "wardrobe, closet, press",
+ 895: "warplane, military plane",
+ 896: "washbasin, handbasin, washbowl, lavabo, wash-hand basin",
+ 897: "washer, automatic washer, washing machine",
+ 898: "water bottle",
+ 899: "water jug",
+ 900: "water tower",
+ 901: "whiskey jug",
+ 902: "whistle",
+ 903: "wig",
+ 904: "window screen",
+ 905: "window shade",
+ 906: "Windsor tie",
+ 907: "wine bottle",
+ 908: "wing",
+ 909: "wok",
+ 910: "wooden spoon",
+ 911: "wool, woolen, woollen",
+ 912: "worm fence, snake fence, snake-rail fence, Virginia fence",
+ 913: "wreck",
+ 914: "yawl",
+ 915: "yurt",
+ 916: "web site, website, internet site, site",
+ 917: "comic book",
+ 918: "crossword puzzle, crossword",
+ 919: "street sign",
+ 920: "traffic light, traffic signal, stoplight",
+ 921: "book jacket, dust cover, dust jacket, dust wrapper",
+ 922: "menu",
+ 923: "plate",
+ 924: "guacamole",
+ 925: "consomme",
+ 926: "hot pot, hotpot",
+ 927: "trifle",
+ 928: "ice cream, icecream",
+ 929: "ice lolly, lolly, lollipop, popsicle",
+ 930: "French loaf",
+ 931: "bagel, beigel",
+ 932: "pretzel",
+ 933: "cheeseburger",
+ 934: "hotdog, hot dog, red hot",
+ 935: "mashed potato",
+ 936: "head cabbage",
+ 937: "broccoli",
+ 938: "cauliflower",
+ 939: "zucchini, courgette",
+ 940: "spaghetti squash",
+ 941: "acorn squash",
+ 942: "butternut squash",
+ 943: "cucumber, cuke",
+ 944: "artichoke, globe artichoke",
+ 945: "bell pepper",
+ 946: "cardoon",
+ 947: "mushroom",
+ 948: "Granny Smith",
+ 949: "strawberry",
+ 950: "orange",
+ 951: "lemon",
+ 952: "fig",
+ 953: "pineapple, ananas",
+ 954: "banana",
+ 955: "jackfruit, jak, jack",
+ 956: "custard apple",
+ 957: "pomegranate",
+ 958: "hay",
+ 959: "carbonara",
+ 960: "chocolate sauce, chocolate syrup",
+ 961: "dough",
+ 962: "meat loaf, meatloaf",
+ 963: "pizza, pizza pie",
+ 964: "potpie",
+ 965: "burrito",
+ 966: "red wine",
+ 967: "espresso",
+ 968: "cup",
+ 969: "eggnog",
+ 970: "alp",
+ 971: "bubble",
+ 972: "cliff, drop, drop-off",
+ 973: "coral reef",
+ 974: "geyser",
+ 975: "lakeside, lakeshore",
+ 976: "promontory, headland, head, foreland",
+ 977: "sandbar, sand bar",
+ 978: "seashore, coast, seacoast, sea-coast",
+ 979: "valley, vale",
+ 980: "volcano",
+ 981: "ballplayer, baseball player",
+ 982: "groom, bridegroom",
+ 983: "scuba diver",
+ 984: "rapeseed",
+ 985: "daisy",
986: "yellow lady's slipper, yellow lady-slipper, Cypripedium "
- "calceolus, Cypripedium parviflorum",
- 987: 'corn',
- 988: 'acorn',
- 989: 'hip, rose hip, rosehip',
- 990: 'buckeye, horse chestnut, conker',
- 991: 'coral fungus',
- 992: 'agaric',
- 993: 'gyromitra',
- 994: 'stinkhorn, carrion fungus',
- 995: 'earthstar',
- 996: 'hen-of-the-woods, hen of the woods, Polyporus frondosus, '
- 'Grifola frondosa',
- 997: 'bolete',
- 998: 'ear, spike, capitulum',
- 999: 'toilet tissue, toilet paper, bathroom tissue'}
+ "calceolus, Cypripedium parviflorum",
+ 987: "corn",
+ 988: "acorn",
+ 989: "hip, rose hip, rosehip",
+ 990: "buckeye, horse chestnut, conker",
+ 991: "coral fungus",
+ 992: "agaric",
+ 993: "gyromitra",
+ 994: "stinkhorn, carrion fungus",
+ 995: "earthstar",
+ 996: "hen-of-the-woods, hen of the woods, Polyporus frondosus, " "Grifola frondosa",
+ 997: "bolete",
+ 998: "ear, spike, capitulum",
+ 999: "toilet tissue, toilet paper, bathroom tissue",
+}
diff --git a/tests/benchmark.py b/tests/benchmark.py
index 107288311..fc93dd6b2 100644
--- a/tests/benchmark.py
+++ b/tests/benchmark.py
@@ -70,8 +70,7 @@ def run_all_tests(folder=None, verbose=True):
print(t.__class__.__name__)
break
except TypeError as e:
- raise RuntimeError(
- "Unable to run test '{}'.".format(ts)) from e
+ raise RuntimeError("Unable to run test '{}'.".format(ts)) from e
runner.run(ts)
from test_utils.tests_helper import make_report_backend
diff --git a/tests/test_algebra_cascade.py b/tests/test_algebra_cascade.py
index d0901c2ad..0ba83580b 100644
--- a/tests/test_algebra_cascade.py
+++ b/tests/test_algebra_cascade.py
@@ -4,9 +4,13 @@
import numpy as np
from numpy.testing import assert_almost_equal
from onnx.defs import onnx_opset_version
+
try:
from onnxruntime.capi.onnxruntime_pybind11_state import (
- InvalidGraph, Fail, InvalidArgument)
+ InvalidGraph,
+ Fail,
+ InvalidArgument,
+ )
except ImportError:
InvalidGraph = RuntimeError
InvalidArgument = RuntimeError
@@ -18,69 +22,72 @@
from skl2onnx import to_onnx, convert_sklearn
from skl2onnx.proto import get_latest_tested_opset_version
from test_utils import (
- fit_regression_model, TARGET_OPSET,
- InferenceSessionEx as InferenceSession)
+ fit_regression_model,
+ TARGET_OPSET,
+ InferenceSessionEx as InferenceSession,
+)
class TestOnnxOperatorsCascade(unittest.TestCase):
-
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
def test_cascade_add(self):
-
- def generate_onnx_graph(dim, nbnode, input_name='X1', opv=None):
+ def generate_onnx_graph(dim, nbnode, input_name="X1", opv=None):
i1 = input_name
for i in range(nbnode - 1):
i2 = (np.ones((1, dim)) * nbnode * 10).astype(np.float32)
node = OnnxAdd(i1, i2, op_version=opv)
i1 = node
i2 = (np.ones((1, dim)) * nbnode * 10).astype(np.float32)
- node = OnnxAdd(i1, i2, output_names=['Y'], op_version=opv)
- onx = node.to_onnx([(input_name, FloatTensorType((None, dim)))],
- outputs=[('Y', FloatTensorType())],
- target_opset=opv)
+ node = OnnxAdd(i1, i2, output_names=["Y"], op_version=opv)
+ onx = node.to_onnx(
+ [(input_name, FloatTensorType((None, dim)))],
+ outputs=[("Y", FloatTensorType())],
+ target_opset=opv,
+ )
return onx
- exp = [np.array([[11., 11., 11., 11., 11.]]),
- np.array([[42., 42., 42., 42., 42.]]),
- np.array([[93., 93., 93., 93., 93.]]),
- np.array([[100100., 100100., 100100., 100100., 100100.]])]
- for opv in ({'': 10}, 9, 10, 11, 12, onnx_opset_version()):
+ exp = [
+ np.array([[11.0, 11.0, 11.0, 11.0, 11.0]]),
+ np.array([[42.0, 42.0, 42.0, 42.0, 42.0]]),
+ np.array([[93.0, 93.0, 93.0, 93.0, 93.0]]),
+ np.array([[100100.0, 100100.0, 100100.0, 100100.0, 100100.0]]),
+ ]
+ for opv in ({"": 10}, 9, 10, 11, 12, onnx_opset_version()):
if isinstance(opv, dict):
- if opv[''] > get_latest_tested_opset_version():
+ if opv[""] > get_latest_tested_opset_version():
continue
elif opv is not None and opv > get_latest_tested_opset_version():
continue
for i, nbnode in enumerate((1, 2, 3, 100)):
with self.subTest(n_nodes=nbnode):
onx = generate_onnx_graph(5, nbnode, opv=opv)
- if opv == {'': 10}:
+ if opv == {"": 10}:
for im in onx.opset_import:
if im.version > 10:
raise AssertionError(
- "Wrong final opset\nopv={}\n{}".format(
- opv, onx))
+ "Wrong final opset\nopv={}\n{}".format(opv, onx)
+ )
else:
for im in onx.opset_import:
if im.version > opv:
raise AssertionError(
- "Wrong final opset\nopv={}\n{}".format(
- opv, onx))
+ "Wrong final opset\nopv={}\n{}".format(opv, onx)
+ )
as_string = onx.SerializeToString()
try:
ort = InferenceSession(
- as_string, providers=["CPUExecutionProvider"])
+ as_string, providers=["CPUExecutionProvider"]
+ )
except (InvalidGraph, InvalidArgument) as e:
- if (isinstance(opv, dict) and
- opv[''] >= onnx_opset_version()):
+ if isinstance(opv, dict) and opv[""] >= onnx_opset_version():
continue
- if (isinstance(opv, int) and
- opv >= onnx_opset_version()):
+ if isinstance(opv, int) and opv >= onnx_opset_version():
continue
raise AssertionError(
- "Unable to load opv={}\n---\n{}\n---".format(
- opv, onx)) from e
+ "Unable to load opv={}\n---\n{}\n---".format(opv, onx)
+ ) from e
X = (np.ones((1, 5)) * nbnode).astype(np.float32)
- res_out = ort.run(None, {'X1': X})
+ res_out = ort.run(None, {"X1": X})
assert len(res_out) == 1
res = res_out[0]
assert_almost_equal(exp[i], res)
@@ -89,37 +96,34 @@ def generate_onnx_graph(dim, nbnode, input_name='X1', opv=None):
dim = 10
onx = generate_onnx_graph(dim, 300, opv=11)
as_string = onx.SerializeToString()
- ort = InferenceSession(
- as_string, providers=["CPUExecutionProvider"])
+ ort = InferenceSession(as_string, providers=["CPUExecutionProvider"])
X = (np.ones((1, dim)) * nbnode).astype(np.float32)
- res_out = ort.run(None, {'X1': X})
+ res_out = ort.run(None, {"X1": X})
assert len(res_out) == 1
res = res_out[0]
assert res.shape[1] == dim
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
def test_cascade_scaler(self):
-
- def generate_onnx_graph(dim, nbnode, input_name='X1', opv=1):
+ def generate_onnx_graph(dim, nbnode, input_name="X1", opv=1):
i1 = input_name
scale = list(np.ones((1, dim)).ravel())
for i in range(nbnode - 1):
- i2 = list(map(float, np.ones((1, dim)).astype(
- np.float32).ravel()))
+ i2 = list(map(float, np.ones((1, dim)).astype(np.float32).ravel()))
node = OnnxScaler(i1, offset=i2, scale=scale, op_version=opv)
i1 = node
i2 = list(map(float, np.ones((1, dim)).astype(np.float32).ravel()))
- node = OnnxScaler(i1, offset=i2, scale=scale, output_names=['Y'],
- op_version=opv)
- onx = node.to_onnx([(input_name, FloatTensorType((None, dim)))],
- outputs=[('Y', FloatTensorType((None, dim)))],
- target_opset=TARGET_OPSET)
+ node = OnnxScaler(
+ i1, offset=i2, scale=scale, output_names=["Y"], op_version=opv
+ )
+ onx = node.to_onnx(
+ [(input_name, FloatTensorType((None, dim)))],
+ outputs=[("Y", FloatTensorType((None, dim)))],
+ target_opset=TARGET_OPSET,
+ )
return onx
- exp = [np.zeros((1, 5)),
- np.zeros((1, 5)),
- np.zeros((1, 5)),
- np.zeros((1, 5))]
+ exp = [np.zeros((1, 5)), np.zeros((1, 5)), np.zeros((1, 5)), np.zeros((1, 5))]
for opv in (1, 2, 3):
if opv > get_latest_tested_opset_version():
continue
@@ -128,17 +132,18 @@ def generate_onnx_graph(dim, nbnode, input_name='X1', opv=1):
as_string = onx.SerializeToString()
try:
ort = InferenceSession(
- as_string, providers=["CPUExecutionProvider"])
+ as_string, providers=["CPUExecutionProvider"]
+ )
except InvalidGraph as e:
- if opv in (3, ):
+ if opv in (3,):
continue
if opv >= onnx_opset_version():
continue
raise AssertionError(
- "Unable to load opv={}\n---\n{}\n---".format(
- opv, onx)) from e
+ "Unable to load opv={}\n---\n{}\n---".format(opv, onx)
+ ) from e
X = (np.ones((1, 5)) * nbnode).astype(np.float32)
- res_out = ort.run(None, {'X1': X})
+ res_out = ort.run(None, {"X1": X})
assert len(res_out) == 1
res = res_out[0]
assert_almost_equal(exp[i], res)
@@ -146,10 +151,9 @@ def generate_onnx_graph(dim, nbnode, input_name='X1', opv=1):
dim = 10
onx = generate_onnx_graph(dim, 300)
as_string = onx.SerializeToString()
- ort = InferenceSession(
- as_string, providers=["CPUExecutionProvider"])
+ ort = InferenceSession(as_string, providers=["CPUExecutionProvider"])
X = (np.ones((1, dim)) * nbnode).astype(np.float32)
- res_out = ort.run(None, {'X1': X})
+ res_out = ort.run(None, {"X1": X})
assert len(res_out) == 1
res = res_out[0]
assert res.shape[1] == dim
@@ -168,49 +172,53 @@ def test_scaler_converted(self):
try:
onx = to_onnx(st, X.astype(np.float32), target_opset=opv)
except RuntimeError as e:
- if ("is higher than the number of the "
- "installed onnx package") in str(e):
+ if (
+ "is higher than the number of the " "installed onnx package"
+ ) in str(e):
continue
raise e
as_string = onx.SerializeToString()
try:
ort = InferenceSession(
- as_string, providers=["CPUExecutionProvider"])
+ as_string, providers=["CPUExecutionProvider"]
+ )
except InvalidGraph as e:
if opv > onnx_opset_version():
continue
raise AssertionError(
- "Unable to load opv={}\n---\n{}\n---".format(
- opv, onx)) from e
- res_out = ort.run(None, {'X': X.astype(np.float32)})
+ "Unable to load opv={}\n---\n{}\n---".format(opv, onx)
+ ) from e
+ res_out = ort.run(None, {"X": X.astype(np.float32)})
assert len(res_out) == 1
res = res_out[0]
assert_almost_equal(exp, res)
for opv in [1, 2] + list(range(10, onnx_opset_version() + 1)):
with self.subTest(opvml=opv):
- onx = to_onnx(st, X.astype(np.float32),
- target_opset={'ai.onnx.ml': opv,
- '': TARGET_OPSET})
+ onx = to_onnx(
+ st,
+ X.astype(np.float32),
+ target_opset={"ai.onnx.ml": opv, "": TARGET_OPSET},
+ )
as_string = onx.SerializeToString()
try:
ort = InferenceSession(
- as_string, providers=["CPUExecutionProvider"])
+ as_string, providers=["CPUExecutionProvider"]
+ )
except InvalidGraph as e:
if opv > onnx_opset_version():
continue
raise AssertionError(
- "Unable to load opv={}\n---\n{}\n---".format(
- opv, onx)) from e
- res_out = ort.run(None, {'X': X.astype(np.float32)})
+ "Unable to load opv={}\n---\n{}\n---".format(opv, onx)
+ ) from e
+ res_out = ort.run(None, {"X": X.astype(np.float32)})
assert len(res_out) == 1
res = res_out[0]
assert_almost_equal(exp, res)
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
def test_model_mlp_regressor_default(self):
- model, X_test = fit_regression_model(
- MLPRegressor(random_state=42))
+ model, X_test = fit_regression_model(MLPRegressor(random_state=42))
exp = model.predict(X_test)
for opv in (1, 2, 7, 8, 9, 10, 11, 12, 13, onnx_opset_version()):
if opv is not None and opv > TARGET_OPSET:
@@ -218,31 +226,36 @@ def test_model_mlp_regressor_default(self):
with self.subTest(opv=opv):
try:
onx = convert_sklearn(
- model, "scikit-learn MLPRegressor",
+ model,
+ "scikit-learn MLPRegressor",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- target_opset=opv)
+ target_opset=opv,
+ )
except RuntimeError as e:
- if ("is higher than the number of the "
- "installed onnx package") in str(e):
+ if (
+ "is higher than the number of the " "installed onnx package"
+ ) in str(e):
continue
raise e
as_string = onx.SerializeToString()
try:
ort = InferenceSession(
- as_string, providers=["CPUExecutionProvider"])
+ as_string, providers=["CPUExecutionProvider"]
+ )
except (RuntimeError, InvalidGraph, Fail) as e:
if opv in (None, 1, 2):
continue
if opv >= onnx_opset_version():
continue
- if ("No suitable kernel definition found for "
- "op Cast(9)") in str(e):
+ if ("No suitable kernel definition found for " "op Cast(9)") in str(
+ e
+ ):
# too old onnxruntime
continue
raise AssertionError(
- "Unable to load opv={}\n---\n{}\n---".format(
- opv, onx)) from e
- res_out = ort.run(None, {'input': X_test})
+ "Unable to load opv={}\n---\n{}\n---".format(opv, onx)
+ ) from e
+ res_out = ort.run(None, {"input": X_test})
assert len(res_out) == 1
res = res_out[0]
assert_almost_equal(exp.ravel(), res.ravel(), decimal=4)
diff --git a/tests/test_algebra_complex.py b/tests/test_algebra_complex.py
index 87da48521..330192b6f 100644
--- a/tests/test_algebra_complex.py
+++ b/tests/test_algebra_complex.py
@@ -2,51 +2,59 @@
import numpy as np
from numpy.testing import assert_almost_equal
from onnxruntime import InferenceSession
+
try:
from onnxruntime.capi.onnxruntime_pybind11_state import (
- InvalidGraph, Fail, InvalidArgument)
+ InvalidGraph,
+ Fail,
+ InvalidArgument,
+ )
except ImportError:
InvalidGraph = RuntimeError
InvalidArgument = RuntimeError
Fail = RuntimeError
-from skl2onnx.common.data_types import (
- Complex64TensorType, Complex128TensorType)
+from skl2onnx.common.data_types import Complex64TensorType, Complex128TensorType
from skl2onnx.algebra.onnx_ops import OnnxAdd
from test_utils import TARGET_OPSET
class TestAlgebraComplex(unittest.TestCase):
-
- @unittest.skipIf(Complex64TensorType is None,
- reason="not available")
+ @unittest.skipIf(Complex64TensorType is None, reason="not available")
@unittest.skipIf(TARGET_OPSET < 13, reason="not implemented")
def test_complex(self):
- for dt, var, pr in ((np.complex64, Complex64TensorType, 14),
- (np.complex128, Complex128TensorType, 15)):
- X = np.array([[1 - 2j, -12j],
- [-1 - 2j, 1 + 2j]]).astype(dt)
+ for dt, var, pr in (
+ (np.complex64, Complex64TensorType, 14),
+ (np.complex128, Complex128TensorType, 15),
+ ):
+ X = np.array([[1 - 2j, -12j], [-1 - 2j, 1 + 2j]]).astype(dt)
for opv in range(10, 20):
if opv > TARGET_OPSET:
continue
with self.subTest(dt=dt, opset=opv):
- out = OnnxAdd('X', np.array([1 + 2j], dtype=dt),
- output_names=['Y'], op_version=opv)
- onx = out.to_onnx([('X', var((None, 2)))],
- outputs=[('Y', var())],
- target_opset=opv)
- self.assertIn('elem_type: %d' % pr, str(onx))
+ out = OnnxAdd(
+ "X",
+ np.array([1 + 2j], dtype=dt),
+ output_names=["Y"],
+ op_version=opv,
+ )
+ onx = out.to_onnx(
+ [("X", var((None, 2)))],
+ outputs=[("Y", var())],
+ target_opset=opv,
+ )
+ self.assertIn("elem_type: %d" % pr, str(onx))
try:
ort = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except InvalidGraph as e:
if "Type Error: Type 'tensor(complex" in str(e):
continue
raise e
assert ort is not None
- got = ort.run(None, {'X': X})[0]
+ got = ort.run(None, {"X": X})[0]
assert_almost_equal(X + np.array([1 + 2j]), got)
diff --git a/tests/test_algebra_converters.py b/tests/test_algebra_converters.py
index ae1114c30..ed13b15c5 100644
--- a/tests/test_algebra_converters.py
+++ b/tests/test_algebra_converters.py
@@ -5,6 +5,7 @@
from numpy.testing import assert_almost_equal
from sklearn.preprocessing import StandardScaler
from skl2onnx.algebra.onnx_ops import OnnxMatMul, OnnxExp, OnnxAdd, OnnxDiv
+
try:
from skl2onnx.algebra.sklearn_ops import OnnxSklearnStandardScaler
from skl2onnx import wrap_as_onnx_mixin
@@ -14,12 +15,12 @@
class TestAlgebraConverters(unittest.TestCase):
-
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
- @unittest.skipIf(OnnxSklearnStandardScaler is None,
- reason="Cannot infer operators with current ONNX")
+ @unittest.skipIf(
+ OnnxSklearnStandardScaler is None,
+ reason="Cannot infer operators with current ONNX",
+ )
def test_algebra_converter(self):
-
X = numpy.array([[1, 2], [2, 3]])
op = OnnxSklearnStandardScaler()
op.fit(X)
@@ -28,13 +29,13 @@ def test_algebra_converter(self):
try:
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except RuntimeError as e:
raise RuntimeError("Unable to read\n{}".format(onx)) from e
X = numpy.array([[0, 1], [-1, -2]])
try:
- Y = sess.run(None, {'X': X.astype(numpy.float32)})[0]
+ Y = sess.run(None, {"X": X.astype(numpy.float32)})[0]
except RuntimeError as e:
raise RuntimeError("Unable to run\n{}".format(onx)) from e
assert_almost_equal(Y, op.transform(X))
@@ -51,13 +52,13 @@ def test_algebra_converter(self):
try:
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except RuntimeError as e:
raise RuntimeError("Unable to read\n{}".format(onx)) from e
X = numpy.array([[0, 1], [-1, -2]])
try:
- Y = sess.run(None, {'X': X.astype(numpy.float32)})[0]
+ Y = sess.run(None, {"X": X.astype(numpy.float32)})[0]
except RuntimeError as e:
raise RuntimeError("Unable to run\n{}".format(onx)) from e
assert_almost_equal(Y, op.transform(X))
@@ -68,12 +69,12 @@ def test_algebra_to_onnx(self):
beta = numpy.array([1, 2, 3, 4]) / 10
beta32 = beta.astype(numpy.float32)
onnxExpM = OnnxExp(
- OnnxMatMul('X', beta32, op_version=TARGET_OPSET),
- op_version=TARGET_OPSET)
+ OnnxMatMul("X", beta32, op_version=TARGET_OPSET), op_version=TARGET_OPSET
+ )
cst = numpy.ones((1, 3), dtype=numpy.float32)
onnxExpM1 = OnnxAdd(onnxExpM, cst, op_version=TARGET_OPSET)
onnxPred = OnnxDiv(onnxExpM, onnxExpM1, op_version=TARGET_OPSET)
- inputs = {'X': X[:1].astype(numpy.float32)}
+ inputs = {"X": X[:1].astype(numpy.float32)}
model_onnx = onnxPred.to_onnx(inputs, target_opset=TARGET_OPSET)
s1 = str(model_onnx)
model_onnx = onnxPred.to_onnx(inputs, target_opset=TARGET_OPSET)
@@ -88,14 +89,13 @@ def test_algebra_to_onnx(self):
def test_add_12(self):
idi = numpy.identity(2, dtype=numpy.float32)
- onx = OnnxAdd('X', idi, output_names=['Y'], op_version=12)
- model_def = onx.to_onnx({'X': idi.astype(numpy.float32)},
- target_opset=12)
+ onx = OnnxAdd("X", idi, output_names=["Y"], op_version=12)
+ model_def = onx.to_onnx({"X": idi.astype(numpy.float32)}, target_opset=12)
X = numpy.array([[1, 2], [3, 4]], dtype=numpy.float32)
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': X})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": X})
exp = idi + X
self.assertEqual(exp.shape, got[0].shape)
self.assertEqual(list(exp.ravel()), list(got[0].ravel()))
diff --git a/tests/test_algebra_custom_model.py b/tests/test_algebra_custom_model.py
index fa5163195..c6f78f8ba 100644
--- a/tests/test_algebra_custom_model.py
+++ b/tests/test_algebra_custom_model.py
@@ -18,9 +18,7 @@
from test_utils import dump_data_and_model, TARGET_OPSET
-class CustomOpTransformer(BaseEstimator, TransformerMixin,
- OnnxOperatorMixin):
-
+class CustomOpTransformer(BaseEstimator, TransformerMixin, OnnxOperatorMixin):
def __init__(self, op_version=None):
BaseEstimator.__init__(self)
TransformerMixin.__init__(self)
@@ -35,23 +33,28 @@ def fit(self, X, y=None):
def transform(self, X):
return (X - self.W_) / self.S_
- def to_onnx_operator(self, inputs=None, outputs=None,
- target_opset=None, **kwargs):
+ def to_onnx_operator(self, inputs=None, outputs=None, target_opset=None, **kwargs):
if inputs is None:
raise RuntimeError("inputs should contain one name")
i0 = self.get_inputs(inputs, 0)
W = self.W_.astype(np.float32)
S = self.S_.astype(np.float32)
# case if there are multiple output nodes
- return OnnxDiv(OnnxSub(i0, W, op_version=self.op_version), S,
- output_names=outputs, op_version=self.op_version)
+ return OnnxDiv(
+ OnnxSub(i0, W, op_version=self.op_version),
+ S,
+ output_names=outputs,
+ op_version=self.op_version,
+ )
class CustomOpTransformerShape(CustomOpTransformer):
def onnx_shape_calculator(self):
def shape_calculator(operator):
operator.outputs[0].type = FloatTensorType(
- shape=operator.inputs[0].type.shape)
+ shape=operator.inputs[0].type.shape
+ )
+
return shape_calculator
@@ -60,7 +63,6 @@ class CustomOpScaler(StandardScaler, OnnxOperatorMixin):
class TestCustomModelAlgebra(unittest.TestCase):
-
def test_base_api(self):
model = CustomOpScaler()
data = [[0, 0, 3], [1, 1, 0], [0, 2, 1], [1, 0, 2]]
@@ -73,7 +75,7 @@ def test_base_api(self):
@unittest.skipIf(TARGET_OPSET < 12, reason="not available")
def test_custom_scaler(self):
- mat = np.array([[0., 1.], [0., 1.], [2., 2.]])
+ mat = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]])
tr = CustomOpTransformerShape(op_version=TARGET_OPSET)
tr.fit(mat)
z = tr.transform(mat)
@@ -83,15 +85,15 @@ def test_custom_scaler(self):
model_onnx = tr.to_onnx(matf)
onnx.checker.check_model(model_onnx)
dump_data_and_model(
- mat.astype(np.float32), tr, model_onnx,
- basename="CustomTransformerAlgebra")
+ mat.astype(np.float32), tr, model_onnx, basename="CustomTransformerAlgebra"
+ )
@unittest.skipIf(TARGET_OPSET < 12, reason="not available")
def test_custom_scaler_pipeline_right(self):
pipe = make_pipeline(
- StandardScaler(),
- CustomOpTransformerShape(op_version=TARGET_OPSET))
- mat = np.array([[0., 1.], [0., 1.], [2., 2.]])
+ StandardScaler(), CustomOpTransformerShape(op_version=TARGET_OPSET)
+ )
+ mat = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]])
pipe.fit(mat)
z = pipe.transform(mat)
assert z is not None
@@ -100,15 +102,18 @@ def test_custom_scaler_pipeline_right(self):
model_onnx = to_onnx(pipe, matf, target_opset=TARGET_OPSET)
onnx.checker.check_model(model_onnx)
dump_data_and_model(
- mat.astype(np.float32), pipe, model_onnx,
- basename="CustomTransformerPipelineRightAlgebra")
+ mat.astype(np.float32),
+ pipe,
+ model_onnx,
+ basename="CustomTransformerPipelineRightAlgebra",
+ )
@unittest.skipIf(TARGET_OPSET < 8, reason="not available")
def test_custom_scaler_pipeline_left(self):
pipe = make_pipeline(
- CustomOpTransformer(op_version=TARGET_OPSET),
- StandardScaler())
- mat = np.array([[0., 1.], [0., 1.], [2., 2.]])
+ CustomOpTransformer(op_version=TARGET_OPSET), StandardScaler()
+ )
+ mat = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]])
pipe.fit(mat)
z = pipe.transform(mat)
@@ -120,9 +125,9 @@ def test_custom_scaler_pipeline_left(self):
assert "inputs should contain one name" in str(e)
pipe = make_pipeline(
- CustomOpTransformerShape(op_version=TARGET_OPSET),
- StandardScaler())
- mat = np.array([[0., 1.], [0., 1.], [2., 2.]])
+ CustomOpTransformerShape(op_version=TARGET_OPSET), StandardScaler()
+ )
+ mat = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]])
pipe.fit(mat)
z = pipe.transform(mat)
assert z is not None
@@ -136,8 +141,11 @@ def test_custom_scaler_pipeline_left(self):
onnx.checker.check_model(model_onnx)
dump_data_and_model(
- mat.astype(np.float32), pipe, model_onnx,
- basename="CustomTransformerPipelineLeftAlgebra")
+ mat.astype(np.float32),
+ pipe,
+ model_onnx,
+ basename="CustomTransformerPipelineLeftAlgebra",
+ )
if __name__ == "__main__":
diff --git a/tests/test_algebra_custom_model_sub_estimator.py b/tests/test_algebra_custom_model_sub_estimator.py
index e304a8409..cea2fd1ae 100644
--- a/tests/test_algebra_custom_model_sub_estimator.py
+++ b/tests/test_algebra_custom_model_sub_estimator.py
@@ -7,6 +7,7 @@
import warnings
import numpy as np
from numpy.testing import assert_almost_equal
+
try:
from onnxruntime.capi.onnxruntime_pybind11_state import InvalidArgument
except ImportError:
@@ -17,6 +18,7 @@
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
+
try:
# scikit-learn >= 0.22
from sklearn.utils._testing import ignore_warnings
@@ -26,21 +28,19 @@
from skl2onnx.algebra.onnx_operator_mixin import OnnxOperatorMixin
from skl2onnx import to_onnx, update_registered_converter
from skl2onnx.common.data_types import FloatTensorType
-from skl2onnx.common.shape_calculator import (
- calculate_linear_classifier_output_shapes)
+from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes
from skl2onnx.algebra.onnx_operator import OnnxSubEstimator
from skl2onnx.algebra.onnx_ops import (
OnnxArgMax,
OnnxConcat,
OnnxIdentity,
OnnxReshape,
- OnnxSoftmax)
+ OnnxSoftmax,
+)
from test_utils import TARGET_OPSET, InferenceSessionEx as InferenceSession
-class CustomOpTransformer1(BaseEstimator, TransformerMixin,
- OnnxOperatorMixin):
-
+class CustomOpTransformer1(BaseEstimator, TransformerMixin, OnnxOperatorMixin):
def __init__(self, op_version=None):
BaseEstimator.__init__(self)
TransformerMixin.__init__(self)
@@ -54,26 +54,26 @@ def fit(self, X, y=None):
def transform(self, X):
return self.norm_.transform(X)
- def to_onnx_operator(self, inputs=None, outputs=('Y', ),
- target_opset=None, **kwargs):
+ def to_onnx_operator(
+ self, inputs=None, outputs=("Y",), target_opset=None, **kwargs
+ ):
if inputs is None:
raise RuntimeError("inputs should contain one name")
opv = target_opset or self.op_version
i0 = self.get_inputs(inputs, 0)
out = OnnxSubEstimator(self.norm_, i0, op_version=opv)
- return OnnxIdentity(out, op_version=self.op_version,
- output_names=outputs)
+ return OnnxIdentity(out, op_version=self.op_version, output_names=outputs)
def onnx_shape_calculator(self):
def shape_calculator(operator):
operator.outputs[0].type = FloatTensorType(
- shape=operator.inputs[0].type.shape)
- return shape_calculator
+ shape=operator.inputs[0].type.shape
+ )
+ return shape_calculator
-class CustomOpTransformer1w(BaseEstimator, TransformerMixin,
- OnnxOperatorMixin):
+class CustomOpTransformer1w(BaseEstimator, TransformerMixin, OnnxOperatorMixin):
def __init__(self, op_version=None):
BaseEstimator.__init__(self)
TransformerMixin.__init__(self)
@@ -87,25 +87,24 @@ def fit(self, X, y=None):
def transform(self, X):
return self.norm_.transform(X)
- def to_onnx_operator(self, inputs=None, outputs=('Y', )):
+ def to_onnx_operator(self, inputs=None, outputs=("Y",)):
if inputs is None:
raise RuntimeError("inputs should contain one name")
opv = self.op_version
i0 = self.get_inputs(inputs, 0)
out = OnnxSubEstimator(self.norm_, i0, op_version=opv)
- return OnnxIdentity(out, op_version=self.op_version,
- output_names=outputs)
+ return OnnxIdentity(out, op_version=self.op_version, output_names=outputs)
def onnx_shape_calculator(self):
def shape_calculator(operator):
operator.outputs[0].type = FloatTensorType(
- shape=operator.inputs[0].type.shape)
- return shape_calculator
+ shape=operator.inputs[0].type.shape
+ )
+ return shape_calculator
-class CustomOpTransformer2(BaseEstimator, TransformerMixin,
- OnnxOperatorMixin):
+class CustomOpTransformer2(BaseEstimator, TransformerMixin, OnnxOperatorMixin):
def __init__(self, op_version=None):
BaseEstimator.__init__(self)
TransformerMixin.__init__(self)
@@ -119,26 +118,26 @@ def fit(self, X, y=None):
def transform(self, X):
return self.norm_.transform(X)
- def to_onnx_operator(self, inputs=None, outputs=('Y', ),
- target_opset=None, **kwargs):
+ def to_onnx_operator(
+ self, inputs=None, outputs=("Y",), target_opset=None, **kwargs
+ ):
if inputs is None:
raise RuntimeError("inputs should contain one name")
opv = target_opset or self.op_version
i0 = self.get_inputs(inputs, 0)
- out = OnnxSubEstimator(self.norm_, i0, op_version=opv,
- output_names=outputs)
+ out = OnnxSubEstimator(self.norm_, i0, op_version=opv, output_names=outputs)
return out
def onnx_shape_calculator(self):
def shape_calculator(operator):
operator.outputs[0].type = FloatTensorType(
- shape=operator.inputs[0].type.shape)
- return shape_calculator
+ shape=operator.inputs[0].type.shape
+ )
+ return shape_calculator
-class CustomOpTransformer3(BaseEstimator, TransformerMixin,
- OnnxOperatorMixin):
+class CustomOpTransformer3(BaseEstimator, TransformerMixin, OnnxOperatorMixin):
def __init__(self, op_version=None):
BaseEstimator.__init__(self)
TransformerMixin.__init__(self)
@@ -152,27 +151,28 @@ def fit(self, X, y=None):
def transform(self, X):
return self.norm_.predict_proba(X)
- def to_onnx_operator(self, inputs=None, outputs=('Y', ),
- target_opset=None, **kwargs):
+ def to_onnx_operator(
+ self, inputs=None, outputs=("Y",), target_opset=None, **kwargs
+ ):
if inputs is None:
raise RuntimeError("inputs should contain one name")
opv = target_opset or self.op_version
i0 = self.get_inputs(inputs, 0)
- out = OnnxSubEstimator(self.norm_, i0, op_version=opv,
- options={'zipmap': False})
- return OnnxIdentity(
- out[1], output_names=outputs, op_version=self.op_version)
+ out = OnnxSubEstimator(
+ self.norm_, i0, op_version=opv, options={"zipmap": False}
+ )
+ return OnnxIdentity(out[1], output_names=outputs, op_version=self.op_version)
def onnx_shape_calculator(self):
def shape_calculator(operator):
operator.outputs[0].type = FloatTensorType(
- shape=operator.inputs[0].type.shape)
- return shape_calculator
+ shape=operator.inputs[0].type.shape
+ )
+ return shape_calculator
-class CustomOpTransformer4(BaseEstimator, TransformerMixin,
- OnnxOperatorMixin):
+class CustomOpTransformer4(BaseEstimator, TransformerMixin, OnnxOperatorMixin):
def __init__(self, op_version=None):
BaseEstimator.__init__(self)
TransformerMixin.__init__(self)
@@ -186,25 +186,26 @@ def fit(self, X, y=None):
def transform(self, X):
return self.norm_.predict_proba(X)
- def to_onnx_operator(self, inputs=None, outputs=('Y', ),
- target_opset=None, **kwargs):
+ def to_onnx_operator(
+ self, inputs=None, outputs=("Y",), target_opset=None, **kwargs
+ ):
if inputs is None:
raise RuntimeError("inputs should contain one name")
opv = target_opset or self.op_version
i0 = self.get_inputs(inputs, 0)
out = OnnxSubEstimator(self.norm_, i0, op_version=opv)
- return OnnxIdentity(
- out[1], output_names=outputs, op_version=opv)
+ return OnnxIdentity(out[1], output_names=outputs, op_version=opv)
def onnx_shape_calculator(self):
def shape_calculator(operator):
operator.outputs[0].type = FloatTensorType(
- shape=operator.inputs[0].type.shape)
+ shape=operator.inputs[0].type.shape
+ )
+
return shape_calculator
class Custom2OpTransformer1(BaseEstimator, TransformerMixin):
-
def __init__(self):
BaseEstimator.__init__(self)
TransformerMixin.__init__(self)
@@ -218,8 +219,7 @@ def transform(self, X):
def custom_shape_calculator(operator):
- operator.outputs[0].type = FloatTensorType(
- shape=operator.inputs[0].type.shape)
+ operator.outputs[0].type = FloatTensorType(shape=operator.inputs[0].type.shape)
def custom_transformer_converter1(scope, operator, container):
@@ -228,8 +228,7 @@ def custom_transformer_converter1(scope, operator, container):
op = operator.raw_operator
opv = container.target_opset
out = OnnxSubEstimator(op.norm_, i0, op_version=opv)
- final = OnnxIdentity(out, op_version=opv,
- output_names=outputs)
+ final = OnnxIdentity(out, op_version=opv, output_names=outputs)
final.add_to(scope, container)
@@ -243,8 +242,7 @@ def custom_transformer_converter1w(scope, operator, container):
op = operator.raw_operator
opv = container.target_opset
out = OnnxSubEstimator(op.norm_, i0, op_version=opv)
- final = OnnxIdentity(out, op_version=opv,
- output_names=outputs)
+ final = OnnxIdentity(out, op_version=opv, output_names=outputs)
final.add_to(scope, container)
@@ -259,8 +257,7 @@ def custom_transformer_converter1ww(scope, operator, container):
opv = container.target_opset
idin = OnnxIdentity(i0, op_version=opv)
out = OnnxSubEstimator(op.norm_, idin, op_version=opv)
- final = OnnxIdentity(out, op_version=opv,
- output_names=outputs)
+ final = OnnxIdentity(out, op_version=opv, output_names=outputs)
final.add_to(scope, container)
@@ -273,13 +270,11 @@ def custom_transformer_converter2(scope, operator, container):
outputs = operator.outputs
op = operator.raw_operator
opv = container.target_opset
- out = OnnxSubEstimator(op.norm_, i0, op_version=opv,
- output_names=outputs)
+ out = OnnxSubEstimator(op.norm_, i0, op_version=opv, output_names=outputs)
out.add_to(scope, container)
class Custom2OpTransformer3(Custom2OpTransformer1):
-
def fit(self, X, y=None):
self.norm_ = LogisticRegression().fit(X, y)
return self
@@ -293,10 +288,8 @@ def custom_transformer_converter3(scope, operator, container):
outputs = operator.outputs
op = operator.raw_operator
opv = container.target_opset
- out = OnnxSubEstimator(op.norm_, i0, op_version=opv,
- options={'zipmap': False})
- final = OnnxIdentity(
- out[1], output_names=outputs, op_version=opv)
+ out = OnnxSubEstimator(op.norm_, i0, op_version=opv, options={"zipmap": False})
+ final = OnnxIdentity(out[1], output_names=outputs, op_version=opv)
final.add_to(scope, container)
@@ -310,13 +303,11 @@ def custom_transformer_converter4(scope, operator, container):
op = operator.raw_operator
opv = container.target_opset
out = OnnxSubEstimator(op.norm_, i0, op_version=opv)
- final = OnnxIdentity(
- out[1], output_names=outputs, op_version=opv)
+ final = OnnxIdentity(out[1], output_names=outputs, op_version=opv)
final.add_to(scope, container)
class CustomOpClassifier(BaseEstimator, ClassifierMixin):
-
def __init__(self):
BaseEstimator.__init__(self)
ClassifierMixin.__init__(self)
@@ -355,66 +346,69 @@ def custom_classifier_converter(scope, operator, container):
y_list = [
OnnxReshape(
OnnxSubEstimator(est, X, op_version=opv)[1],
- np.array([-1, 1], dtype=np.int64), op_version=opv)
- for est in op.estimators_]
+ np.array([-1, 1], dtype=np.int64),
+ op_version=opv,
+ )
+ for est in op.estimators_
+ ]
y_matrix = OnnxConcat(*y_list, axis=1, op_version=opv)
- probs = OnnxSoftmax(y_matrix, axis=1, op_version=opv,
- output_names=[outputs[1]])
+ probs = OnnxSoftmax(y_matrix, axis=1, op_version=opv, output_names=[outputs[1]])
probs.add_to(scope, container)
- labels = OnnxArgMax(probs, axis=1, keepdims=0, op_version=opv,
- output_names=[outputs[0]])
+ labels = OnnxArgMax(
+ probs, axis=1, keepdims=0, op_version=opv, output_names=[outputs[0]]
+ )
labels.add_to(scope, container)
class TestCustomModelAlgebraSubEstimator(unittest.TestCase):
-
def check_transform(self, obj, X):
expected = obj.transform(X)
onx = to_onnx(obj, X, target_opset=TARGET_OPSET)
try:
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except InvalidArgument as e:
- raise AssertionError(
- "Issue %r with\n%s" % (e, str(onx))) from e
- got = sess.run(None, {'X': X})[0]
+ raise AssertionError("Issue %r with\n%s" % (e, str(onx))) from e
+ got = sess.run(None, {"X": X})[0]
assert_almost_equal(expected, got, decimal=5)
def check_classifier(self, obj, X):
expected_labels = obj.predict(X)
expected_probas = obj.predict_proba(X)
- onx = to_onnx(obj, X, target_opset=TARGET_OPSET,
- options={id(obj): {'zipmap': False}})
+ onx = to_onnx(
+ obj, X, target_opset=TARGET_OPSET, options={id(obj): {"zipmap": False}}
+ )
try:
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except InvalidArgument as e:
- raise AssertionError(
- "Issue %r with\n%s" % (e, str(onx))) from e
- got = sess.run(None, {'X': X})
+ raise AssertionError("Issue %r with\n%s" % (e, str(onx))) from e
+ got = sess.run(None, {"X": X})
assert_almost_equal(expected_probas, got[1], decimal=5)
assert_almost_equal(expected_labels, got[0])
def test_custom_scaler_1(self):
- X = np.array([[0., 1.], [0., 1.], [2., 2.]], dtype=np.float32)
+ X = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]], dtype=np.float32)
tr = CustomOpTransformer1(op_version=TARGET_OPSET)
tr.fit(X)
self.check_transform(tr, X)
def test_custom_scaler_1_classic(self):
update_registered_converter(
- Custom2OpTransformer1, 'Custom2OpTransformer1',
+ Custom2OpTransformer1,
+ "Custom2OpTransformer1",
custom_shape_calculator,
- custom_transformer_converter1)
- X = np.array([[0., 1.], [0., 1.], [2., 2.]], dtype=np.float32)
+ custom_transformer_converter1,
+ )
+ X = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]], dtype=np.float32)
tr = Custom2OpTransformer1()
tr.fit(X)
self.check_transform(tr, X)
def test_custom_scaler_1w(self):
- X = np.array([[0., 1.], [0., 1.], [2., 2.]], dtype=np.float32)
+ X = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]], dtype=np.float32)
tr = CustomOpTransformer1w(op_version=TARGET_OPSET)
tr.fit(X)
with warnings.catch_warnings(record=True) as w:
@@ -426,42 +420,48 @@ def test_custom_scaler_1w(self):
def test_custom_scaler_1w_classic(self):
update_registered_converter(
- Custom2OpTransformer1w, 'Custom2OpTransformer1w',
+ Custom2OpTransformer1w,
+ "Custom2OpTransformer1w",
custom_shape_calculator,
- custom_transformer_converter1w)
- X = np.array([[0., 1.], [0., 1.], [2., 2.]], dtype=np.float32)
+ custom_transformer_converter1w,
+ )
+ X = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]], dtype=np.float32)
tr = Custom2OpTransformer1w()
tr.fit(X)
self.check_transform(tr, X)
def test_custom_scaler_1ww_classic(self):
update_registered_converter(
- Custom2OpTransformer1ww, 'Custom2OpTransformer1ww',
+ Custom2OpTransformer1ww,
+ "Custom2OpTransformer1ww",
custom_shape_calculator,
- custom_transformer_converter1ww)
- X = np.array([[0., 1.], [0., 1.], [2., 2.]], dtype=np.float32)
+ custom_transformer_converter1ww,
+ )
+ X = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]], dtype=np.float32)
tr = Custom2OpTransformer1ww()
tr.fit(X)
self.check_transform(tr, X)
def test_custom_scaler_2(self):
- X = np.array([[0., 1.], [0., 1.], [2., 2.]], dtype=np.float32)
+ X = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]], dtype=np.float32)
tr = CustomOpTransformer2(op_version=TARGET_OPSET)
tr.fit(X)
self.check_transform(tr, X)
def test_custom_scaler_2_classic(self):
update_registered_converter(
- Custom2OpTransformer2, 'Custom2OpTransformer2',
+ Custom2OpTransformer2,
+ "Custom2OpTransformer2",
custom_shape_calculator,
- custom_transformer_converter2)
- X = np.array([[0., 1.], [0., 1.], [2., 2.]], dtype=np.float32)
+ custom_transformer_converter2,
+ )
+ X = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]], dtype=np.float32)
tr = Custom2OpTransformer2()
tr.fit(X)
self.check_transform(tr, X)
def test_custom_scaler_3(self):
- X = np.array([[0., 1.], [0., 1.], [2., 2.]], dtype=np.float32)
+ X = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]], dtype=np.float32)
y = np.array([0, 0, 1], dtype=np.int64)
tr = CustomOpTransformer3(op_version=TARGET_OPSET)
tr.fit(X, y)
@@ -469,17 +469,19 @@ def test_custom_scaler_3(self):
def test_custom_scaler_3_classic(self):
update_registered_converter(
- Custom2OpTransformer3, 'Custom2OpTransformer3',
+ Custom2OpTransformer3,
+ "Custom2OpTransformer3",
custom_shape_calculator,
- custom_transformer_converter3)
- X = np.array([[0., 1.], [0., 1.], [2., 2.]], dtype=np.float32)
+ custom_transformer_converter3,
+ )
+ X = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]], dtype=np.float32)
y = np.array([0, 0, 1], dtype=np.int64)
tr = Custom2OpTransformer3()
tr.fit(X, y)
self.check_transform(tr, X)
def test_custom_scaler_4(self):
- X = np.array([[0., 1.], [0., 1.], [2., 2.]], dtype=np.float32)
+ X = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]], dtype=np.float32)
y = np.array([0, 0, 1], dtype=np.int64)
tr = CustomOpTransformer4(op_version=TARGET_OPSET)
tr.fit(X, y)
@@ -487,10 +489,12 @@ def test_custom_scaler_4(self):
def test_custom_scaler_4_classic(self):
update_registered_converter(
- Custom2OpTransformer4, 'Custom2OpTransformer4',
+ Custom2OpTransformer4,
+ "Custom2OpTransformer4",
custom_shape_calculator,
- custom_transformer_converter4)
- X = np.array([[0., 1.], [0., 1.], [2., 2.]], dtype=np.float32)
+ custom_transformer_converter4,
+ )
+ X = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]], dtype=np.float32)
tr = Custom2OpTransformer1()
tr.fit(X)
self.check_transform(tr, X)
@@ -498,11 +502,12 @@ def test_custom_scaler_4_classic(self):
@ignore_warnings(category=ConvergenceWarning)
def test_custom_classifier(self):
update_registered_converter(
- CustomOpClassifier, 'CustomOpClassifier',
+ CustomOpClassifier,
+ "CustomOpClassifier",
calculate_linear_classifier_output_shapes,
custom_classifier_converter,
- options={'zipmap': [False, True],
- 'nocl': [False, True]})
+ options={"zipmap": [False, True], "nocl": [False, True]},
+ )
data = load_iris()
X, y = data.data, data.target
X = X.astype(np.float32)
diff --git a/tests/test_algebra_deprecation.py b/tests/test_algebra_deprecation.py
index daa8c4ff5..bf95741c4 100644
--- a/tests/test_algebra_deprecation.py
+++ b/tests/test_algebra_deprecation.py
@@ -15,8 +15,7 @@
class DecorrelateTransformer(TransformerMixin, BaseEstimator):
-
- def __init__(self, alpha=0.):
+ def __init__(self, alpha=0.0):
BaseEstimator.__init__(self)
TransformerMixin.__init__(self)
self.alpha = alpha
@@ -54,13 +53,14 @@ def decorrelate_transformer_converter(scope, operator, container):
class TestOnnxDeprecation(unittest.TestCase):
-
@classmethod
def setUpClass(cls):
update_registered_converter(
- DecorrelateTransformer, "SklearnDecorrelateTransformer",
+ DecorrelateTransformer,
+ "SklearnDecorrelateTransformer",
decorrelate_transformer_shape_calculator,
- decorrelate_transformer_converter)
+ decorrelate_transformer_converter,
+ )
def test_decorrelate_transformer(self):
data = load_iris()
@@ -71,7 +71,7 @@ def test_decorrelate_transformer(self):
pred = dec.transform(X)
cov = pred.T @ pred
for i in range(cov.shape[0]):
- cov[i, i] = 1.
+ cov[i, i] = 1.0
assert_almost_equal(np.identity(4), cov)
st = BytesIO()
@@ -80,7 +80,6 @@ def test_decorrelate_transformer(self):
assert_almost_equal(dec.transform(X), dec2.transform(X))
def test_sub_operator(self):
-
data = load_iris()
X = data.data
@@ -89,22 +88,23 @@ def test_sub_operator(self):
with warnings.catch_warnings(record=True) as ws:
warnings.simplefilter("always")
- onx = to_onnx(dec, X.astype(np.float32),
- target_opset=TARGET_OPSET)
+ onx = to_onnx(dec, X.astype(np.float32), target_opset=TARGET_OPSET)
mes = None
for w in ws:
- if (w.category == DeprecationWarning and
- 'numpy' not in str(w.message).lower()):
+ if (
+ w.category == DeprecationWarning
+ and "numpy" not in str(w.message).lower()
+ ):
mes = w.message
self.assertTrue(mes is not None)
- self.assertIn('will be removed', str(mes))
+ self.assertIn("will be removed", str(mes))
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
exp = dec.transform(X.astype(np.float32))
- got = sess.run(None, {'X': X.astype(np.float32)})[0]
+ got = sess.run(None, {"X": X.astype(np.float32)})[0]
def diff(p1, p2):
p1 = p1.ravel()
diff --git a/tests/test_algebra_double.py b/tests/test_algebra_double.py
index e156bf28b..c02f1fff9 100644
--- a/tests/test_algebra_double.py
+++ b/tests/test_algebra_double.py
@@ -11,31 +11,29 @@
class TestAlgebraDouble(unittest.TestCase):
-
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
- @unittest.skipIf(pv.Version(onnxruntime.__version__)
- <= pv.Version("0.4.0"),
- reason="Sub(7) not available")
+ @unittest.skipIf(
+ pv.Version(onnxruntime.__version__) <= pv.Version("0.4.0"),
+ reason="Sub(7) not available",
+ )
def test_algebra_converter(self):
-
coef = numpy.array([[1, 2], [3, 4]], dtype=numpy.float64)
intercept = 1
X_test = numpy.array([[1, -2], [3, -4]], dtype=numpy.float64)
onnx_fct = OnnxSub(
- OnnxMatMul('X', coef, op_version=TARGET_OPSET),
+ OnnxMatMul("X", coef, op_version=TARGET_OPSET),
numpy.array([intercept], dtype=numpy.float64),
- output_names=['Y'],
- op_version=TARGET_OPSET)
- onnx_model = onnx_fct.to_onnx(
- {'X': X_test}, target_opset=TARGET_OPSET)
+ output_names=["Y"],
+ op_version=TARGET_OPSET,
+ )
+ onnx_model = onnx_fct.to_onnx({"X": X_test}, target_opset=TARGET_OPSET)
sess = InferenceSession(
- onnx_model.SerializeToString(),
- providers=["CPUExecutionProvider"])
- ort_pred = sess.run(None, {'X': X_test})[0]
- assert_almost_equal(ort_pred,
- numpy.array([[-6., -7.], [-10., -11.]]))
+ onnx_model.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ ort_pred = sess.run(None, {"X": X_test})[0]
+ assert_almost_equal(ort_pred, numpy.array([[-6.0, -7.0], [-10.0, -11.0]]))
if __name__ == "__main__":
diff --git a/tests/test_algebra_onnx_doc.py b/tests/test_algebra_onnx_doc.py
index be3e0a072..1a2d8b722 100644
--- a/tests/test_algebra_onnx_doc.py
+++ b/tests/test_algebra_onnx_doc.py
@@ -11,15 +11,15 @@
class TestAlgebraOnnxDoc(unittest.TestCase):
-
def setUp(self):
self._algebra = dynamic_class_creation()
def predict_with_onnxruntime(self, model_def, *inputs):
import onnxruntime as ort
+
sess = ort.InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
names = [i.name for i in sess.get_inputs()]
input = {name: input for name, input in zip(names, inputs)}
res = sess.run(None, input)
@@ -31,33 +31,36 @@ def test_transpose2(self):
from skl2onnx.algebra.onnx_ops import OnnxTranspose
node = OnnxTranspose(
- OnnxTranspose(
- 'X', perm=[1, 0, 2],
- op_version=TARGET_OPSET),
- perm=[1, 0, 2], output_names=['Y'],
- op_version=TARGET_OPSET)
+ OnnxTranspose("X", perm=[1, 0, 2], op_version=TARGET_OPSET),
+ perm=[1, 0, 2],
+ output_names=["Y"],
+ op_version=TARGET_OPSET,
+ )
X = np.arange(2 * 3 * 4).reshape((2, 3, 4)).astype(np.float32)
- model_def = node.to_onnx({'X': X})
+ model_def = node.to_onnx({"X": X})
onnx.checker.check_model(model_def)
res = self.predict_with_onnxruntime(model_def, X)
- assert_almost_equal(res['Y'], X)
+ assert_almost_equal(res["Y"], X)
- @unittest.skipIf(sys.platform.startswith("win"),
- reason="onnx schema are incorrect on Windows")
+ @unittest.skipIf(
+ sys.platform.startswith("win"), reason="onnx schema are incorrect on Windows"
+ )
def test_doc_onnx(self):
rst = get_rst_doc()
assert "**Summary**" in rst
- @unittest.skipIf(sys.platform.startswith("win"),
- reason="onnx schema are incorrect on Windows")
+ @unittest.skipIf(
+ sys.platform.startswith("win"), reason="onnx schema are incorrect on Windows"
+ )
def test_doc_sklearn(self):
try:
rst = get_rst_doc_sklearn()
assert ".. _l-sklops-OnnxSklearnBernoulliNB:" in rst
except KeyError as e:
- assert ("SklearnGaussianProcessRegressor" in str(e) or
- "SklearnGaussianProcessClassifier" in str(e))
+ assert "SklearnGaussianProcessRegressor" in str(
+ e
+ ) or "SklearnGaussianProcessClassifier" in str(e)
if __name__ == "__main__":
diff --git a/tests/test_algebra_onnx_operator_mixin_syntax.py b/tests/test_algebra_onnx_operator_mixin_syntax.py
index 1a42348f5..f5d508a28 100644
--- a/tests/test_algebra_onnx_operator_mixin_syntax.py
+++ b/tests/test_algebra_onnx_operator_mixin_syntax.py
@@ -15,9 +15,7 @@
from test_utils import dump_data_and_model, TARGET_OPSET
-class CustomOpTransformer(BaseEstimator, TransformerMixin,
- OnnxOperatorMixin):
-
+class CustomOpTransformer(BaseEstimator, TransformerMixin, OnnxOperatorMixin):
def __init__(self, op_version=TARGET_OPSET):
BaseEstimator.__init__(self)
TransformerMixin.__init__(self)
@@ -34,61 +32,62 @@ def transform(self, X):
def onnx_shape_calculator(self):
def shape_calculator(operator):
operator.outputs[0].type = operator.inputs[0].type
+
return shape_calculator
- def to_onnx_operator(self, inputs=None, outputs=('Y', ),
- target_opset=None, **kwargs):
+ def to_onnx_operator(
+ self, inputs=None, outputs=("Y",), target_opset=None, **kwargs
+ ):
if inputs is None:
raise RuntimeError("inputs should contain one name")
i0 = self.get_inputs(inputs, 0)
W = self.W_.astype(np.float32)
S = self.S_.astype(np.float32)
return OnnxDiv(
- OnnxSub(
- i0, W, op_version=self.op_version),
- S, output_names=outputs, op_version=self.op_version)
+ OnnxSub(i0, W, op_version=self.op_version),
+ S,
+ output_names=outputs,
+ op_version=self.op_version,
+ )
class TestOnnxOperatorMixinSyntax(unittest.TestCase):
-
def test_way1_convert_sklearn(self):
-
X = np.arange(20).reshape(10, 2)
tr = KMeans(n_clusters=2, n_init=10)
tr.fit(X)
onx = convert_sklearn(
- tr, initial_types=[('X', FloatTensorType((None, X.shape[1])))],
- target_opset=TARGET_OPSET)
+ tr,
+ initial_types=[("X", FloatTensorType((None, X.shape[1])))],
+ target_opset=TARGET_OPSET,
+ )
if TARGET_OPSET == 11:
sonx = str(onx)
if "version: 11" not in sonx or "ir_version: 6" not in sonx:
- raise AssertionError("Issue with TARGET_OPSET: {}\n{}".format(
- TARGET_OPSET, sonx))
+ raise AssertionError(
+ "Issue with TARGET_OPSET: {}\n{}".format(TARGET_OPSET, sonx)
+ )
dump_data_and_model(
- X.astype(np.float32), tr, onx,
- basename="MixinWay1ConvertSklearn")
+ X.astype(np.float32), tr, onx, basename="MixinWay1ConvertSklearn"
+ )
def test_way2_to_onnx(self):
-
X = np.arange(20).reshape(10, 2)
tr = KMeans(n_clusters=2, n_init=10)
tr.fit(X)
- onx = to_onnx(tr, X.astype(np.float32),
- target_opset=TARGET_OPSET)
+ onx = to_onnx(tr, X.astype(np.float32), target_opset=TARGET_OPSET)
if TARGET_OPSET == 11:
sonx = str(onx)
if "version: 11" not in sonx or "ir_version: 6" not in sonx:
- raise AssertionError("Issue with TARGET_OPSET: {}\n{}".format(
- TARGET_OPSET, sonx))
+ raise AssertionError(
+ "Issue with TARGET_OPSET: {}\n{}".format(TARGET_OPSET, sonx)
+ )
- dump_data_and_model(
- X.astype(np.float32), tr, onx,
- basename="MixinWay2ToOnnx")
+ dump_data_and_model(X.astype(np.float32), tr, onx, basename="MixinWay2ToOnnx")
def test_way3_mixin(self):
-
X = np.arange(20).reshape(10, 2)
# avoids point of different cluster to be very close
# and avoid a small discrepancy due to double/float
@@ -100,8 +99,9 @@ def test_way3_mixin(self):
try:
tr_mixin = wrap_as_onnx_mixin(tr, target_opset=TARGET_OPSET)
except KeyError as e:
- assert ("SklearnGaussianProcessRegressor" in str(e) or
- "SklearnGaussianProcessClassifier" in str(e))
+ assert "SklearnGaussianProcessRegressor" in str(
+ e
+ ) or "SklearnGaussianProcessClassifier" in str(e)
return
try:
@@ -111,18 +111,17 @@ def test_way3_mixin(self):
onx = tr_mixin.to_onnx(X.astype(np.float32))
dump_data_and_model(
- X.astype(np.float32), tr, onx,
- basename="MixinWay3OnnxMixin")
+ X.astype(np.float32), tr, onx, basename="MixinWay3OnnxMixin"
+ )
def test_way4_mixin_fit(self):
-
X = np.arange(20).reshape(10, 2)
try:
- tr = wrap_as_onnx_mixin(KMeans(n_clusters=2),
- target_opset=TARGET_OPSET)
+ tr = wrap_as_onnx_mixin(KMeans(n_clusters=2), target_opset=TARGET_OPSET)
except KeyError as e:
- assert ("SklearnGaussianProcessRegressor" in str(e) or
- "SklearnGaussianProcessClassifier" in str(e))
+ assert "SklearnGaussianProcessRegressor" in str(
+ e
+ ) or "SklearnGaussianProcessClassifier" in str(e)
return
tr.fit(X)
@@ -130,65 +129,71 @@ def test_way4_mixin_fit(self):
if TARGET_OPSET == 11:
sonx = str(onx)
if "version: 11" not in sonx or "ir_version: 6" not in sonx:
- raise AssertionError("Issue with TARGET_OPSET: {}\n{}".format(
- TARGET_OPSET, sonx))
+ raise AssertionError(
+ "Issue with TARGET_OPSET: {}\n{}".format(TARGET_OPSET, sonx)
+ )
dump_data_and_model(
- X.astype(np.float32), tr, onx,
- basename="MixinWay4OnnxMixin2")
+ X.astype(np.float32), tr, onx, basename="MixinWay4OnnxMixin2"
+ )
def test_pipe_way1_convert_sklearn(self):
-
X = np.arange(20).reshape(10, 2)
tr = make_pipeline(
CustomOpTransformer(op_version=TARGET_OPSET),
- KMeans(n_clusters=2, n_init=10))
+ KMeans(n_clusters=2, n_init=10),
+ )
tr.fit(X)
onx = convert_sklearn(
- tr, initial_types=[('X', FloatTensorType((None, X.shape[1])))],
- target_opset=TARGET_OPSET)
+ tr,
+ initial_types=[("X", FloatTensorType((None, X.shape[1])))],
+ target_opset=TARGET_OPSET,
+ )
if TARGET_OPSET == 11:
sonx = str(onx)
if "version: 11" not in sonx or "ir_version: 6" not in sonx:
- raise AssertionError("Issue with TARGET_OPSET: {}\n{}".format(
- TARGET_OPSET, sonx))
+ raise AssertionError(
+ "Issue with TARGET_OPSET: {}\n{}".format(TARGET_OPSET, sonx)
+ )
dump_data_and_model(
- X.astype(np.float32), tr, onx,
- basename="MixinPipeWay1ConvertSklearn")
+ X.astype(np.float32), tr, onx, basename="MixinPipeWay1ConvertSklearn"
+ )
def test_pipe_way2_to_onnx(self):
-
X = np.arange(20).reshape(10, 2)
tr = make_pipeline(
CustomOpTransformer(op_version=TARGET_OPSET),
- KMeans(n_clusters=2, n_init=10))
+ KMeans(n_clusters=2, n_init=10),
+ )
tr.fit(X)
onx = to_onnx(tr, X.astype(np.float32), target_opset=TARGET_OPSET)
if TARGET_OPSET == 11:
sonx = str(onx)
if "version: 11" not in sonx or "ir_version: 6" not in sonx:
- raise AssertionError("Issue with TARGET_OPSET: {}\n{}".format(
- TARGET_OPSET, sonx))
+ raise AssertionError(
+ "Issue with TARGET_OPSET: {}\n{}".format(TARGET_OPSET, sonx)
+ )
dump_data_and_model(
- X.astype(np.float32), tr, onx,
- basename="MixinPipeWay2ToOnnx")
+ X.astype(np.float32), tr, onx, basename="MixinPipeWay2ToOnnx"
+ )
def test_pipe_way3_mixin(self):
-
X = np.arange(20).reshape(10, 2)
tr = make_pipeline(
CustomOpTransformer(op_version=TARGET_OPSET),
- KMeans(n_clusters=2, n_init=10))
+ KMeans(n_clusters=2, n_init=10),
+ )
tr.fit(X)
try:
tr_mixin = wrap_as_onnx_mixin(tr, target_opset=TARGET_OPSET)
except KeyError as e:
- assert ("SklearnGaussianProcessRegressor" in str(e) or
- "SklearnGaussianProcessClassifier" in str(e))
+ assert "SklearnGaussianProcessRegressor" in str(
+ e
+ ) or "SklearnGaussianProcessClassifier" in str(e)
return
try:
@@ -199,24 +204,25 @@ def test_pipe_way3_mixin(self):
if TARGET_OPSET == 11:
sonx = str(onx)
if "version: 11" not in sonx or "ir_version: 6" not in sonx:
- raise AssertionError("Issue with TARGET_OPSET: {}\n{}".format(
- TARGET_OPSET, sonx))
+ raise AssertionError(
+ "Issue with TARGET_OPSET: {}\n{}".format(TARGET_OPSET, sonx)
+ )
dump_data_and_model(
- X.astype(np.float32), tr, onx,
- basename="MixinPipeWay3OnnxMixin")
+ X.astype(np.float32), tr, onx, basename="MixinPipeWay3OnnxMixin"
+ )
def test_pipe_way4_mixin_fit(self):
-
X = np.arange(20).reshape(10, 2)
try:
tr = wrap_as_onnx_mixin(
- make_pipeline(CustomOpTransformer(),
- KMeans(n_clusters=2, n_init=10)),
- target_opset=TARGET_OPSET)
+ make_pipeline(CustomOpTransformer(), KMeans(n_clusters=2, n_init=10)),
+ target_opset=TARGET_OPSET,
+ )
except KeyError as e:
- assert ("SklearnGaussianProcessRegressor" in str(e) or
- "SklearnGaussianProcessClassifier" in str(e))
+ assert "SklearnGaussianProcessRegressor" in str(
+ e
+ ) or "SklearnGaussianProcessClassifier" in str(e)
return
tr.fit(X)
@@ -225,38 +231,41 @@ def test_pipe_way4_mixin_fit(self):
if TARGET_OPSET == 11:
sonx = str(onx)
if "version: 11" not in sonx or "ir_version: 6" not in sonx:
- raise AssertionError("Issue with TARGET_OPSET: {}\n{}".format(
- TARGET_OPSET, sonx))
+ raise AssertionError(
+ "Issue with TARGET_OPSET: {}\n{}".format(TARGET_OPSET, sonx)
+ )
dump_data_and_model(
- X.astype(np.float32), tr, onx,
- basename="MixinPipeWay4OnnxMixin2")
+ X.astype(np.float32), tr, onx, basename="MixinPipeWay4OnnxMixin2"
+ )
- def common_test_onnxt_runtime_unary(self, onnx_cl, np_fct,
- op_version=None, debug=False):
- onx = onnx_cl('X', output_names=['Y'])
+ def common_test_onnxt_runtime_unary(
+ self, onnx_cl, np_fct, op_version=None, debug=False
+ ):
+ onx = onnx_cl("X", output_names=["Y"])
X = np.array([[1, 2], [3, -4]], dtype=np.float64)
- model_def = onx.to_onnx(
- {'X': X.astype(np.float32)}, target_opset=op_version)
+ model_def = onx.to_onnx({"X": X.astype(np.float32)}, target_opset=op_version)
if debug:
print(model_def)
try:
oinf = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except RuntimeError as e:
- if ("Could not find an implementation for the node "
- "Cl_Clip:Clip(11)" in str(e)):
+ if (
+ "Could not find an implementation for the node "
+ "Cl_Clip:Clip(11)" in str(e)
+ ):
# Not yet implemented in onnxruntime
return
raise e
X = X.astype(np.float32)
try:
- got = oinf.run(None, {'X': X})[0]
+ got = oinf.run(None, {"X": X})[0]
except Exception as e:
raise AssertionError(
- "Cannot run model due to %r\n%r\n%s" % (
- e, onx, str(model_def))) from e
+ "Cannot run model due to %r\n%r\n%s" % (e, onx, str(model_def))
+ ) from e
assert_almost_equal(np_fct(X), got, decimal=6)
@unittest.skipIf(onnx.defs.onnx_opset_version() < 10, "irrelevant")
@@ -264,31 +273,35 @@ def test_onnx_clip_10(self):
with self.subTest(name="OnnxClip_6[1e-5, 1e5]"):
self.common_test_onnxt_runtime_unary(
lambda x, output_names=None: OnnxClip_6(
- x, min=1e-5, max=1e5, output_names=output_names),
+ x, min=1e-5, max=1e5, output_names=output_names
+ ),
lambda x: np.clip(x, 1e-5, 1e5),
- op_version=10)
+ op_version=10,
+ )
with self.subTest(name="OnnxClip-10[1e-5, 1e5]"):
self.common_test_onnxt_runtime_unary(
lambda x, output_names=None: OnnxClip(
- x, min=1e-5, max=1e5, output_names=output_names,
- op_version=10),
+ x, min=1e-5, max=1e5, output_names=output_names, op_version=10
+ ),
lambda x: np.clip(x, 1e-5, 1e5),
- op_version=10)
+ op_version=10,
+ )
with self.subTest(name="OnnxClip-10[-1e5, 1e-5]"):
self.common_test_onnxt_runtime_unary(
lambda x, output_names=None: OnnxClip(
- x, max=1e-5, output_names=output_names,
- op_version=10),
+ x, max=1e-5, output_names=output_names, op_version=10
+ ),
lambda x: np.clip(x, -1e5, 1e-5),
- op_version=10)
+ op_version=10,
+ )
with self.subTest(name="OnnxClip-10[0.1, 2.1]"):
self.common_test_onnxt_runtime_unary(
lambda x, output_names=None: OnnxClip(
- x, min=0.1, max=2.1,
- output_names=output_names,
- op_version=10),
+ x, min=0.1, max=2.1, output_names=output_names, op_version=10
+ ),
lambda x: np.clip(x, 0.1, 2.1),
- op_version=10)
+ op_version=10,
+ )
if __name__ == "__main__":
diff --git a/tests/test_algebra_onnx_operators.py b/tests/test_algebra_onnx_operators.py
index 990c498c3..ed52d72b8 100644
--- a/tests/test_algebra_onnx_operators.py
+++ b/tests/test_algebra_onnx_operators.py
@@ -6,35 +6,42 @@
import numpy as np
from numpy.testing import assert_almost_equal
import onnx
-from onnx import (
- helper, TensorProto, load_model)
+from onnx import helper, TensorProto, load_model
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.cluster import KMeans
from sklearn.datasets import load_iris
from sklearn.utils.extmath import row_norms
from skl2onnx import convert_sklearn
from skl2onnx.common._topology import Variable
-from skl2onnx.common.data_types import (
- FloatTensorType, guess_numpy_type)
+from skl2onnx.common.data_types import FloatTensorType, guess_numpy_type
from skl2onnx.algebra.onnx_operator import OnnxOperator
from skl2onnx.algebra.onnx_ops import (
- OnnxSub, OnnxDiv, OnnxReshapeApi13,
- OnnxReduceSumSquareApi18, OnnxGemm,
- OnnxAdd, OnnxArgMin, OnnxSqrt,
- OnnxArrayFeatureExtractor, OnnxMul,
- OnnxPad, OnnxBatchNormalization,
- OnnxConstantOfShape, OnnxMatMul, OnnxSoftmax)
+ OnnxSub,
+ OnnxDiv,
+ OnnxReshapeApi13,
+ OnnxReduceSumSquareApi18,
+ OnnxGemm,
+ OnnxAdd,
+ OnnxArgMin,
+ OnnxSqrt,
+ OnnxArrayFeatureExtractor,
+ OnnxMul,
+ OnnxPad,
+ OnnxBatchNormalization,
+ OnnxConstantOfShape,
+ OnnxMatMul,
+ OnnxSoftmax,
+)
from test_utils import (
- dump_data_and_model, TARGET_OPSET,
- InferenceSessionEx as InferenceSession)
+ dump_data_and_model,
+ TARGET_OPSET,
+ InferenceSessionEx as InferenceSession,
+)
class TestOnnxOperators(unittest.TestCase):
-
def test_sub(self):
-
class CustomOpTransformer(BaseEstimator, TransformerMixin):
-
def __init__(self, op_version=None):
self.op_version = op_version
@@ -45,7 +52,7 @@ def fit(self, X, y=None):
def transform(self, X):
return X - self.W
- mat = np.array([[0., 1.], [1., 2.], [3., 4.]])
+ mat = np.array([[0.0, 1.0], [1.0, 2.0], [3.0, 4.0]])
tr = CustomOpTransformer(op_version=None)
tr.fit(mat)
z = tr.transform(mat)
@@ -54,21 +61,23 @@ def conv(scope, operator, container):
dtype = guess_numpy_type(operator.inputs[0].type)
W = operator.raw_operator.W.astype(dtype)
op = OnnxSub(
- operator.inputs[0], W, output_names=operator.outputs,
- op_version=TARGET_OPSET)
+ operator.inputs[0],
+ W,
+ output_names=operator.outputs,
+ op_version=TARGET_OPSET,
+ )
op.add_to(scope, container)
text = str(container)
if 'name:"Su_Sub"' not in text:
- raise AssertionError(
- "Unnamed operator: '{}'".format(text))
+ raise AssertionError("Unnamed operator: '{}'".format(text))
nin = list(op.enumerate_initial_types())
nno = list(op.enumerate_nodes())
nva = list(op.enumerate_variables())
self.assertEqual(len(nin), 1)
- self.assertEqual(nin[0][0], 'input')
+ self.assertEqual(nin[0][0], "input")
self.assertEqual(nin[0][1].shape, [None, 2])
self.assertEqual(len(nno), 1)
- self.assertEqual(nno[0].output_names[0].onnx_name, 'variable')
+ self.assertEqual(nno[0].output_names[0].onnx_name, "variable")
self.assertEqual(len(nva), 1)
assert isinstance(nva[0], tuple)
self.assertEqual(nva[0][1], 0)
@@ -79,21 +88,22 @@ def shape(operator):
operator.outputs[0].type.shape = [N, W.shape[0]]
model_onnx = convert_sklearn(
- tr, 'a-sub', [('input', FloatTensorType([None, 2]))],
+ tr,
+ "a-sub",
+ [("input", FloatTensorType([None, 2]))],
custom_shape_calculators={CustomOpTransformer: shape},
custom_conversion_functions={CustomOpTransformer: conv},
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- z2 = sess.run(None, {'input': mat.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ z2 = sess.run(None, {"input": mat.astype(np.float32)})[0]
assert_almost_equal(z, z2)
def test_sub_div(self):
-
class CustomOpTransformer(BaseEstimator, TransformerMixin):
-
def __init__(self):
pass
@@ -105,7 +115,7 @@ def fit(self, X, y=None):
def transform(self, X):
return (X - self.W) / self.S
- mat = np.array([[0., 1.], [0., 1.], [2., 2.]])
+ mat = np.array([[0.0, 1.0], [0.0, 1.0], [2.0, 2.0]])
tr = CustomOpTransformer()
tr.fit(mat)
z = tr.transform(mat)
@@ -117,8 +127,10 @@ def conv(scope, operator, container):
out = operator.outputs
op = OnnxDiv(
OnnxSub(X, W, op_version=container.target_opset),
- S, output_names=out,
- op_version=container.target_opset)
+ S,
+ output_names=out,
+ op_version=container.target_opset,
+ )
op.add_to(scope, container)
def shape(operator):
@@ -127,23 +139,26 @@ def shape(operator):
operator.outputs[0].type.shape = [N, W.shape[0]]
model_onnx = convert_sklearn(
- tr, 'a-sub-div', [('input', FloatTensorType([None, 2]))],
+ tr,
+ "a-sub-div",
+ [("input", FloatTensorType([None, 2]))],
custom_shape_calculators={CustomOpTransformer: shape},
custom_conversion_functions={CustomOpTransformer: conv},
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
try:
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except RuntimeError as e:
raise AssertionError(
- "Cannot load model\n---\n{}\n---".format(model_onnx)) from e
- z2 = sess.run(None, {'input': mat.astype(np.float32)})[0]
+ "Cannot load model\n---\n{}\n---".format(model_onnx)
+ ) from e
+ z2 = sess.run(None, {"input": mat.astype(np.float32)})[0]
assert_almost_equal(z, z2)
def test_sub_kmeans(self):
-
def conv(scope, operator, container):
X = operator.inputs[0]
out = operator.outputs
@@ -155,30 +170,35 @@ def conv(scope, operator, container):
C = C.astype(dtype)
rs = OnnxReduceSumSquareApi18(
- X, axes=[1], keepdims=1,
- op_version=container.target_opset)
+ X, axes=[1], keepdims=1, op_version=container.target_opset
+ )
N = X.type.shape[0]
if isinstance(N, int):
- zeros = np.zeros((N, ))
+ zeros = np.zeros((N,))
else:
zeros = OnnxMul(
- rs, np.array([0], dtype=np.float32),
- op_version=container.target_opset)
+ rs,
+ np.array([0], dtype=np.float32),
+ op_version=container.target_opset,
+ )
z = OnnxAdd(
rs,
OnnxGemm(
- X, C, zeros, alpha=-2., transB=1,
- op_version=container.target_opset),
- op_version=container.target_opset)
+ X, C, zeros, alpha=-2.0, transB=1, op_version=container.target_opset
+ ),
+ op_version=container.target_opset,
+ )
y2 = OnnxAdd(C2, z, op_version=container.target_opset)
lo = OnnxArgMin(
- y2, axis=1, keepdims=0, output_names=out[:1],
- op_version=container.target_opset)
- y2s = OnnxSqrt(
- y2, output_names=out[1:],
- op_version=container.target_opset)
+ y2,
+ axis=1,
+ keepdims=0,
+ output_names=out[:1],
+ op_version=container.target_opset,
+ )
+ y2s = OnnxSqrt(y2, output_names=out[1:], op_version=container.target_opset)
lo.add_to(scope, container)
y2s.add_to(scope, container)
@@ -188,13 +208,19 @@ def conv(scope, operator, container):
model = KMeans(n_clusters=3)
model.fit(X)
model_onnx = convert_sklearn(
- model, 'a-kmeans',
- [('input', FloatTensorType([None, X.shape[1]]))],
+ model,
+ "a-kmeans",
+ [("input", FloatTensorType([None, X.shape[1]]))],
custom_conversion_functions={KMeans: conv},
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
- dump_data_and_model(X.astype(np.float32)[40:60], model, model_onnx,
- basename="SklearnKMeansCustom-Dec4")
+ dump_data_and_model(
+ X.astype(np.float32)[40:60],
+ model,
+ model_onnx,
+ basename="SklearnKMeansCustom-Dec4",
+ )
def test_unscoped(self):
var2 = OnnxOperator.UnscopedVariable("a")
@@ -211,48 +237,56 @@ def test_constant_of_shape(self):
for opset in range(20, 8, -1):
if opset > TARGET_OPSET:
continue
- for value in [np.array([5], dtype=np.float32),
- np.array(5, dtype=np.float32)]:
+ for value in [
+ np.array([5], dtype=np.float32),
+ np.array(5, dtype=np.float32),
+ ]:
with self.subTest(opset=opset, value=value):
tensor_value = onnx.helper.make_tensor(
- "value", onnx.TensorProto.FLOAT,
- [1], [5])
+ "value", onnx.TensorProto.FLOAT, [1], [5]
+ )
cst = OnnxConstantOfShape(
- 'X', value=tensor_value, op_version=opset,
- output_names=['Y'])
+ "X", value=tensor_value, op_version=opset, output_names=["Y"]
+ )
shape = np.array([3, 4], dtype=np.int64)
onx = cst.to_onnx(
- {'X': shape}, target_opset=opset,
- outputs=[('Y', FloatTensorType())])
+ {"X": shape},
+ target_opset=opset,
+ outputs=[("Y", FloatTensorType())],
+ )
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': shape})
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": shape})
assert_almost_equal(
- res[0], np.full(tuple(shape), 5, dtype=np.float32))
+ res[0], np.full(tuple(shape), 5, dtype=np.float32)
+ )
cst = OnnxConstantOfShape(
- 'X', value=value, op_version=opset,
- output_names=['Y'])
+ "X", value=value, op_version=opset, output_names=["Y"]
+ )
shape = np.array([3, 4], dtype=np.int64)
onx = cst.to_onnx(
- {'X': shape}, target_opset=opset,
- outputs=[('Y', FloatTensorType())])
+ {"X": shape},
+ target_opset=opset,
+ outputs=[("Y", FloatTensorType())],
+ )
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': shape})
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": shape})
assert_almost_equal(
- res[0], np.full(tuple(shape), 5, dtype=np.float32))
+ res[0], np.full(tuple(shape), 5, dtype=np.float32)
+ )
for opset in [TARGET_OPSET]:
for value in [5, np.float32(5)]:
with self.subTest(opset=opset, value=value):
with self.assertRaises(TypeError):
OnnxConstantOfShape(
- 'X', value=value, op_version=opset,
- output_names=['Y'])
+ "X", value=value, op_version=opset, output_names=["Y"]
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_onnx_reversed_order(self):
@@ -260,16 +294,20 @@ def test_onnx_reversed_order(self):
idi2 = np.identity(2) * 2
onx = OnnxAdd(
- OnnxAdd('X', idi.astype(np.float32), op_version=TARGET_OPSET),
- idi2.astype(np.float32), output_names=['Y'],
- op_version=TARGET_OPSET)
- model_def = onx.to_onnx({'X': idi.astype(np.float32)})
+ OnnxAdd("X", idi.astype(np.float32), op_version=TARGET_OPSET),
+ idi2.astype(np.float32),
+ output_names=["Y"],
+ op_version=TARGET_OPSET,
+ )
+ model_def = onx.to_onnx({"X": idi.astype(np.float32)})
self.assertEqual(len(model_def.graph.output), 1)
onx = OnnxAdd(
idi2.astype(np.float32),
- OnnxAdd('X', idi.astype(np.float32), op_version=TARGET_OPSET),
- output_names=['Y'], op_version=TARGET_OPSET)
- model_def = onx.to_onnx({'X': idi.astype(np.float32)})
+ OnnxAdd("X", idi.astype(np.float32), op_version=TARGET_OPSET),
+ output_names=["Y"],
+ op_version=TARGET_OPSET,
+ )
+ model_def = onx.to_onnx({"X": idi.astype(np.float32)})
onnx2 = model_def.SerializeToString()
self.assertIsInstance(onx.outputs, list)
self.assertEqual(len(onx.outputs), 1)
@@ -286,74 +324,79 @@ def test_onnx_reversed_order(self):
assert reload is not None
def test_onnx_reversed_order_second(self):
- X = helper.make_tensor_value_info('X', TensorProto.FLOAT, [2, 2])
- Y = helper.make_tensor_value_info('Y', TensorProto.FLOAT, [2, 2])
+ X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 2])
+ Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 2])
nodes = [
- helper.make_node('Add', ['X', 'idi'], ['temp']),
- helper.make_node('Add', ['temp', 'idi2'], ['Y'])
+ helper.make_node("Add", ["X", "idi"], ["temp"]),
+ helper.make_node("Add", ["temp", "idi2"], ["Y"]),
]
- graph_def = helper.make_graph(nodes, 't1', [X], [Y])
- model_def = helper.make_model(graph_def, producer_name='A')
+ graph_def = helper.make_graph(nodes, "t1", [X], [Y])
+ model_def = helper.make_model(graph_def, producer_name="A")
self.assertEqual(len(model_def.graph.output), 1)
nodes = [
- helper.make_node('Add', ['X', 'idi'], ['temp']),
- helper.make_node('Add', ['idi2', 'temp'], ['Y'])
+ helper.make_node("Add", ["X", "idi"], ["temp"]),
+ helper.make_node("Add", ["idi2", "temp"], ["Y"]),
]
- graph_def = helper.make_graph(nodes, 't1', [X], [Y])
- model_def = helper.make_model(graph_def, producer_name='A')
+ graph_def = helper.make_graph(nodes, "t1", [X], [Y])
+ model_def = helper.make_model(graph_def, producer_name="A")
self.assertEqual(len(model_def.graph.output), 1)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_onnxt_array_feature_extractor(self):
onx = OnnxArrayFeatureExtractor(
- 'X', np.array([1], dtype=np.int64),
- output_names=['Y'], op_version=1)
+ "X", np.array([1], dtype=np.int64), output_names=["Y"], op_version=1
+ )
X = np.array([[1, 2], [3, 4]], dtype=np.float32)
- model_def = onx.to_onnx({'X': X},
- outputs=[('Y', FloatTensorType([2]))],
- target_opset=TARGET_OPSET)
+ model_def = onx.to_onnx(
+ {"X": X}, outputs=[("Y", FloatTensorType([2]))], target_opset=TARGET_OPSET
+ )
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': X})[0]
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": X})[0]
self.assertEqual(got.shape, (2, 1))
assert_almost_equal(X[:, 1:2], got)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_container_init(self):
onx = OnnxReshapeApi13(
- OnnxReshapeApi13('X', np.array([1, -1], dtype=np.int64),
- op_version=TARGET_OPSET),
+ OnnxReshapeApi13(
+ "X", np.array([1, -1], dtype=np.int64), op_version=TARGET_OPSET
+ ),
np.array([1, -1], dtype=np.int64),
- output_names=['Y'], op_version=TARGET_OPSET)
+ output_names=["Y"],
+ op_version=TARGET_OPSET,
+ )
X = np.array([[1, 2], [3, 4]], dtype=np.float32)
- model_def = onx.to_onnx({'X': X},
- outputs=[('Y', FloatTensorType([None, 2]))],
- target_opset=TARGET_OPSET)
+ model_def = onx.to_onnx(
+ {"X": X},
+ outputs=[("Y", FloatTensorType([None, 2]))],
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': X})[0]
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": X})[0]
assert_almost_equal(X.reshape((1, -1)), got)
- inits = [row for row in str(model_def).split('\n')
- if row.startswith(" initializer {")]
+ inits = [
+ row
+ for row in str(model_def).split("\n")
+ if row.startswith(" initializer {")
+ ]
self.assertEqual(len(inits), 1)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_default(self):
- pad = OnnxPad(mode='constant', value=1.5,
- pads=[0, 1, 0, 1], op_version=10)
+ pad = OnnxPad(mode="constant", value=1.5, pads=[0, 1, 0, 1], op_version=10)
- X = helper.make_tensor_value_info(
- 'X', onnx.TensorProto.FLOAT, [None, 2])
+ X = helper.make_tensor_value_info("X", onnx.TensorProto.FLOAT, [None, 2])
model_def = pad.to_onnx({pad.inputs[0].name: X}, target_opset=10)
onnx.checker.check_model(model_def)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_batch_normalization(self):
-
def _batchnorm_test_mode(x, s, bias, mean, var, epsilon=1e-5):
dims_x = len(x.shape)
dim_ones = (1,) * (dims_x - 2)
@@ -372,14 +415,13 @@ def _batchnorm_test_mode(x, s, bias, mean, var, epsilon=1e-5):
y = _batchnorm_test_mode(x, s, bias, mean, var).astype(np.float32)
onx = OnnxBatchNormalization(
- 'X', s, bias, mean, var, output_names=['Y'],
- op_version=TARGET_OPSET)
- model_def = onx.to_onnx({'X': x.astype(np.float32)},
- target_opset=TARGET_OPSET)
+ "X", s, bias, mean, var, output_names=["Y"], op_version=TARGET_OPSET
+ )
+ model_def = onx.to_onnx({"X": x.astype(np.float32)}, target_opset=TARGET_OPSET)
oinf = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = oinf.run(None, {'X': x})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = oinf.run(None, {"X": x})
assert_almost_equal(y, got[0], decimal=5)
# input size: (2, 3, 4, 5)
@@ -389,106 +431,112 @@ def _batchnorm_test_mode(x, s, bias, mean, var, epsilon=1e-5):
mean = np.random.randn(3).astype(np.float32)
var = np.random.rand(3).astype(np.float32)
epsilon = 1e-2
- y = _batchnorm_test_mode(
- x, s, bias, mean, var, epsilon).astype(np.float32)
+ y = _batchnorm_test_mode(x, s, bias, mean, var, epsilon).astype(np.float32)
onx = OnnxBatchNormalization(
- 'X', s, bias, mean, var,
- output_names=['Y'], epsilon=epsilon,
- op_version=TARGET_OPSET)
- model_def = onx.to_onnx({'X': x.astype(np.float32)},
- target_opset=TARGET_OPSET)
+ "X",
+ s,
+ bias,
+ mean,
+ var,
+ output_names=["Y"],
+ epsilon=epsilon,
+ op_version=TARGET_OPSET,
+ )
+ model_def = onx.to_onnx({"X": x.astype(np.float32)}, target_opset=TARGET_OPSET)
oinf = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = oinf.run(None, {'X': x})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = oinf.run(None, {"X": x})
assert_almost_equal(y, got[0], decimal=5)
@unittest.skipIf(TARGET_OPSET < 11, reason="not available")
def test_onnxt_runtime_pad(self):
- data = np.array([[1.0, 1.2], [2.3, 3.4], [4.5, 5.7]],
- dtype=np.float32)
+ data = np.array([[1.0, 1.2], [2.3, 3.4], [4.5, 5.7]], dtype=np.float32)
pads = np.array([0, 2, 0, 0], dtype=np.int64)
constant_value = np.array([0.0], dtype=np.float32)
- exp = np.array([[0.0, 0.0, 1.0, 1.2],
- [0.0, 0.0, 2.3, 3.4],
- [0.0, 0.0, 4.5, 5.7]], dtype=np.float32)
+ exp = np.array(
+ [[0.0, 0.0, 1.0, 1.2], [0.0, 0.0, 2.3, 3.4], [0.0, 0.0, 4.5, 5.7]],
+ dtype=np.float32,
+ )
onx = OnnxPad(
- 'data', 'pads', constant_value, output_names=['Y'],
- op_version=TARGET_OPSET)
- model_def = onx.to_onnx({'data': data, 'pads': pads},
- target_opset=TARGET_OPSET)
+ "data", "pads", constant_value, output_names=["Y"], op_version=TARGET_OPSET
+ )
+ model_def = onx.to_onnx({"data": data, "pads": pads}, target_opset=TARGET_OPSET)
oinf = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = oinf.run(None, {'data': data, 'pads': pads})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = oinf.run(None, {"data": data, "pads": pads})
assert_almost_equal(exp, got[0])
- data = np.array([[1.0, 1.2], [2.3, 3.4], [4.5, 5.7]],
- dtype=np.float32)
+ data = np.array([[1.0, 1.2], [2.3, 3.4], [4.5, 5.7]], dtype=np.float32)
pads = np.array([0, 2, 0, 0], dtype=np.int64)
constant_value = np.array([0.0], dtype=np.float32)
- exp = np.array([[0, 1.2, 1.0, 1.2],
- [0, 3.4, 2.3, 3.4],
- [0, 5.7, 4.5, 5.7]], dtype=np.float32)
+ exp = np.array(
+ [[0, 1.2, 1.0, 1.2], [0, 3.4, 2.3, 3.4], [0, 5.7, 4.5, 5.7]],
+ dtype=np.float32,
+ )
onx = OnnxPad(
- 'data', 'pads', output_names=['Y'],
- mode='reflect', op_version=TARGET_OPSET)
- model_def = onx.to_onnx({'data': data, 'pads': pads},
- target_opset=TARGET_OPSET)
+ "data", "pads", output_names=["Y"], mode="reflect", op_version=TARGET_OPSET
+ )
+ model_def = onx.to_onnx({"data": data, "pads": pads}, target_opset=TARGET_OPSET)
oinf = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = oinf.run(None, {'data': data, 'pads': pads})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = oinf.run(None, {"data": data, "pads": pads})
try:
assert_almost_equal(exp, got[0])
except AssertionError as e:
warnings.warn(e)
- data = np.array([[1.0, 1.2], [2.3, 3.4], [4.5, 5.7]],
- dtype=np.float32)
+ data = np.array([[1.0, 1.2], [2.3, 3.4], [4.5, 5.7]], dtype=np.float32)
pads = np.array([0, 2, 0, 0], dtype=np.int64)
constant_value = np.array([0.0], dtype=np.float32)
- exp = np.array([[1.0, 1.0, 1.0, 1.2],
- [2.3, 2.3, 2.3, 3.4],
- [4.5, 4.5, 4.5, 5.7]], dtype=np.float32)
+ exp = np.array(
+ [[1.0, 1.0, 1.0, 1.2], [2.3, 2.3, 2.3, 3.4], [4.5, 4.5, 4.5, 5.7]],
+ dtype=np.float32,
+ )
onx = OnnxPad(
- 'data', 'pads', output_names=['Y'],
- mode='edge', op_version=TARGET_OPSET)
- model_def = onx.to_onnx({'data': data, 'pads': pads},
- target_opset=TARGET_OPSET)
+ "data", "pads", output_names=["Y"], mode="edge", op_version=TARGET_OPSET
+ )
+ model_def = onx.to_onnx({"data": data, "pads": pads}, target_opset=TARGET_OPSET)
oinf = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = oinf.run(None, {'data': data, 'pads': pads})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = oinf.run(None, {"data": data, "pads": pads})
assert_almost_equal(exp, got[0])
def test_softmax(self):
X = np.random.randn(100, 4).astype(np.float32)
y = X.sum(axis=1) + np.random.randn(100) / 10
y = y.astype(np.float32)
- self.assertEqual(y.shape, (100, ))
+ self.assertEqual(y.shape, (100,))
weight = np.random.randn(4, 1).astype(np.float32)
intercept = np.random.randn(1).astype(np.float32)
node = OnnxAdd(
- OnnxMatMul('X', weight, op_version=TARGET_OPSET),
- intercept, op_version=TARGET_OPSET)
- nn_onnx = node.to_onnx({'X': X}, target_opset=TARGET_OPSET)
+ OnnxMatMul("X", weight, op_version=TARGET_OPSET),
+ intercept,
+ op_version=TARGET_OPSET,
+ )
+ nn_onnx = node.to_onnx({"X": X}, target_opset=TARGET_OPSET)
with open("debug_ort_add.onnx", "wb") as f:
f.write(nn_onnx.SerializeToString())
self.assertEqual(len(nn_onnx.graph.output), 1)
- node = OnnxMatMul('X', weight, op_version=TARGET_OPSET)
- nn_onnx = node.to_onnx({'X': X}, target_opset=TARGET_OPSET)
+ node = OnnxMatMul("X", weight, op_version=TARGET_OPSET)
+ nn_onnx = node.to_onnx({"X": X}, target_opset=TARGET_OPSET)
self.assertEqual(len(nn_onnx.graph.output), 1)
node = OnnxSoftmax(
OnnxAdd(
- OnnxMatMul('X', weight, op_version=TARGET_OPSET),
- intercept, op_version=TARGET_OPSET),
- op_version=TARGET_OPSET)
- nn_onnx = node.to_onnx({'X': X}, target_opset=TARGET_OPSET)
+ OnnxMatMul("X", weight, op_version=TARGET_OPSET),
+ intercept,
+ op_version=TARGET_OPSET,
+ ),
+ op_version=TARGET_OPSET,
+ )
+ nn_onnx = node.to_onnx({"X": X}, target_opset=TARGET_OPSET)
self.assertEqual(len(nn_onnx.graph.output), 1)
diff --git a/tests/test_algebra_onnx_operators_if.py b/tests/test_algebra_onnx_operators_if.py
index 97e8a14e6..4fd34b1b6 100644
--- a/tests/test_algebra_onnx_operators_if.py
+++ b/tests/test_algebra_onnx_operators_if.py
@@ -8,6 +8,7 @@
import onnx.helper
from onnx import TensorProto
from onnxruntime import __version__ as ort_version
+
try:
# scikit-learn >= 0.22
from sklearn.utils._testing import ignore_warnings
@@ -16,319 +17,381 @@
from sklearn.utils.testing import ignore_warnings
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx.algebra.onnx_ops import (
- OnnxAdd, OnnxSub, OnnxIf, OnnxGreater,
- OnnxReduceSum, OnnxMul, OnnxReduceMin)
-from test_utils import (
- TARGET_OPSET, TARGET_IR,
- InferenceSessionEx as InferenceSession)
+ OnnxAdd,
+ OnnxSub,
+ OnnxIf,
+ OnnxGreater,
+ OnnxReduceSum,
+ OnnxMul,
+ OnnxReduceMin,
+)
+from test_utils import TARGET_OPSET, TARGET_IR, InferenceSessionEx as InferenceSession
-ort_version = ".".join(ort_version.split('.')[:2])
+ort_version = ".".join(ort_version.split(".")[:2])
class TestOnnxOperatorsIf(unittest.TestCase):
-
@ignore_warnings(category=DeprecationWarning)
- @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.5.0'),
- reason="too old onnxruntime")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.5.0"), reason="too old onnxruntime"
+ )
def test_onnx_if_test1(self):
-
then_out = onnx.helper.make_tensor_value_info(
- 'then_out', onnx.TensorProto.FLOAT, [5])
+ "then_out", onnx.TensorProto.FLOAT, [5]
+ )
else_out = onnx.helper.make_tensor_value_info(
- 'else_out', onnx.TensorProto.FLOAT, [5])
+ "else_out", onnx.TensorProto.FLOAT, [5]
+ )
x = np.array([1, 2, 3, 4, 5]).astype(np.float32)
y = np.array([5, 4, 3, 2, 1]).astype(np.float32)
then_const_node = onnx.helper.make_node(
- 'Constant', inputs=[], outputs=['then_out'],
- value=onnx.numpy_helper.from_array(x))
+ "Constant",
+ inputs=[],
+ outputs=["then_out"],
+ value=onnx.numpy_helper.from_array(x),
+ )
else_const_node = onnx.helper.make_node(
- 'Constant', inputs=[], outputs=['else_out'],
- value=onnx.numpy_helper.from_array(y))
+ "Constant",
+ inputs=[],
+ outputs=["else_out"],
+ value=onnx.numpy_helper.from_array(y),
+ )
then_body = onnx.helper.make_graph(
- [then_const_node], 'then_body', [], [then_out])
+ [then_const_node], "then_body", [], [then_out]
+ )
else_body = onnx.helper.make_graph(
- [else_const_node], 'else_body', [], [else_out])
+ [else_const_node], "else_body", [], [else_out]
+ )
if_node = onnx.helper.make_node(
- 'If', inputs=['cond'], outputs=['Z'],
- then_branch=then_body, else_branch=else_body)
-
- cond = onnx.helper.make_tensor_value_info('cond', TensorProto.BOOL, [])
- Z = onnx.helper.make_tensor_value_info('Z', TensorProto.FLOAT, [None])
- graph_def = onnx.helper.make_graph([if_node], 'example', [cond], [Z])
- model_def = onnx.helper.make_model(graph_def, producer_name='skl2onnx')
+ "If",
+ inputs=["cond"],
+ outputs=["Z"],
+ then_branch=then_body,
+ else_branch=else_body,
+ )
+
+ cond = onnx.helper.make_tensor_value_info("cond", TensorProto.BOOL, [])
+ Z = onnx.helper.make_tensor_value_info("Z", TensorProto.FLOAT, [None])
+ graph_def = onnx.helper.make_graph([if_node], "example", [cond], [Z])
+ model_def = onnx.helper.make_model(graph_def, producer_name="skl2onnx")
del model_def.opset_import[:]
op_set = model_def.opset_import.add()
- op_set.domain = ''
+ op_set.domain = ""
op_set.version = TARGET_OPSET
model_def.ir_version = TARGET_IR
cond = np.array(1).astype(bool)
expected = x if cond else y
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'cond': cond})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"cond": cond})
assert_almost_equal(expected, res[0])
@ignore_warnings(category=DeprecationWarning)
- @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.5.0'),
- reason="too old onnxruntime")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.5.0"), reason="too old onnxruntime"
+ )
def test_onnx_if_test2(self):
-
then_out = onnx.helper.make_tensor_value_info(
- 'then_out', onnx.TensorProto.FLOAT, [5])
+ "then_out", onnx.TensorProto.FLOAT, [5]
+ )
else_out = onnx.helper.make_tensor_value_info(
- 'else_out', onnx.TensorProto.FLOAT, [5])
+ "else_out", onnx.TensorProto.FLOAT, [5]
+ )
x = np.array([1, 2, 3, 4, 5]).astype(np.float32)
y = np.array([5, 4, 3, 2, 1]).astype(np.float32)
then_const_node = onnx.helper.make_node(
- 'Constant', inputs=[], outputs=['then_out'],
- value=onnx.numpy_helper.from_array(x))
+ "Constant",
+ inputs=[],
+ outputs=["then_out"],
+ value=onnx.numpy_helper.from_array(x),
+ )
else_const_node = onnx.helper.make_node(
- 'Identity', inputs=['Y'], outputs=['else_out'])
+ "Identity", inputs=["Y"], outputs=["else_out"]
+ )
then_body = onnx.helper.make_graph(
- [then_const_node], 'then_body', [], [then_out])
+ [then_const_node], "then_body", [], [then_out]
+ )
else_body = onnx.helper.make_graph(
- [else_const_node], 'else_body', [], [else_out])
+ [else_const_node], "else_body", [], [else_out]
+ )
if_node = onnx.helper.make_node(
- 'If', inputs=['cond'], outputs=['Z'],
- then_branch=then_body, else_branch=else_body)
-
- cond = onnx.helper.make_tensor_value_info('cond', TensorProto.BOOL, [])
- Y = onnx.helper.make_tensor_value_info('Y', TensorProto.FLOAT, [None])
- Z = onnx.helper.make_tensor_value_info('Z', TensorProto.FLOAT, [None])
- graph_def = onnx.helper.make_graph(
- [if_node], 'example', [cond, Y], [Z])
- model_def = onnx.helper.make_model(graph_def, producer_name='skl2onnx')
+ "If",
+ inputs=["cond"],
+ outputs=["Z"],
+ then_branch=then_body,
+ else_branch=else_body,
+ )
+
+ cond = onnx.helper.make_tensor_value_info("cond", TensorProto.BOOL, [])
+ Y = onnx.helper.make_tensor_value_info("Y", TensorProto.FLOAT, [None])
+ Z = onnx.helper.make_tensor_value_info("Z", TensorProto.FLOAT, [None])
+ graph_def = onnx.helper.make_graph([if_node], "example", [cond, Y], [Z])
+ model_def = onnx.helper.make_model(graph_def, producer_name="skl2onnx")
del model_def.opset_import[:]
op_set = model_def.opset_import.add()
- op_set.domain = ''
+ op_set.domain = ""
op_set.version = TARGET_OPSET
model_def.ir_version = TARGET_IR
cond = np.array(1).astype(bool)
expected = x if cond else y
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'cond': cond, 'Y': y})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"cond": cond, "Y": y})
assert_almost_equal(expected, res[0])
@ignore_warnings(category=DeprecationWarning)
def test_onnx_if_algebra_direct(self):
-
opv = TARGET_OPSET
x1 = np.array([[0, 3], [7, 0]], dtype=np.float32)
x2 = np.array([[1, 0], [2, 0]], dtype=np.float32)
- node = OnnxAdd(
- 'x1', 'x2', output_names=['absxythen'], op_version=opv)
+ node = OnnxAdd("x1", "x2", output_names=["absxythen"], op_version=opv)
then_body = node.to_onnx(
- {'x1': x1, 'x2': x2}, target_opset=opv,
- outputs=[('absxythen', FloatTensorType())])
- node = OnnxSub(
- 'x1', 'x2', output_names=['absxyelse'], op_version=opv)
+ {"x1": x1, "x2": x2},
+ target_opset=opv,
+ outputs=[("absxythen", FloatTensorType())],
+ )
+ node = OnnxSub("x1", "x2", output_names=["absxyelse"], op_version=opv)
else_body = node.to_onnx(
- {'x1': x1, 'x2': x2}, target_opset=opv,
- outputs=[('absxyelse', FloatTensorType())])
+ {"x1": x1, "x2": x2},
+ target_opset=opv,
+ outputs=[("absxyelse", FloatTensorType())],
+ )
del else_body.graph.input[:]
del then_body.graph.input[:]
cond = OnnxGreater(
- OnnxReduceSum('x1', op_version=opv),
- OnnxReduceSum('x2', op_version=opv),
- op_version=opv)
- ifnode = OnnxIf(cond, then_branch=then_body.graph,
- else_branch=else_body.graph,
- op_version=opv, output_names=['y'])
+ OnnxReduceSum("x1", op_version=opv),
+ OnnxReduceSum("x2", op_version=opv),
+ op_version=opv,
+ )
+ ifnode = OnnxIf(
+ cond,
+ then_branch=then_body.graph,
+ else_branch=else_body.graph,
+ op_version=opv,
+ output_names=["y"],
+ )
model_def = ifnode.to_onnx(
- {'x1': x1, 'x2': x2}, target_opset=opv,
- outputs=[('y', FloatTensorType())])
+ {"x1": x1, "x2": x2}, target_opset=opv, outputs=[("y", FloatTensorType())]
+ )
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'x1': x1, 'x2': x2})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"x1": x1, "x2": x2})
assert_almost_equal(x1 + x2, res[0])
@ignore_warnings(category=DeprecationWarning)
def test_onnx_if_algebra_indirect(self):
-
opv = TARGET_OPSET
x1 = np.array([[0, 3], [7, 0]], dtype=np.float32)
x2 = np.array([[1, 0], [2, 0]], dtype=np.float32)
- node_xy = OnnxMul(
- 'x1', 'x2', op_version=opv, output_names=['xy'])
- node_then = OnnxAdd(
- 'x1', 'xy', output_names=['absxythen'], op_version=opv)
+ node_xy = OnnxMul("x1", "x2", op_version=opv, output_names=["xy"])
+ node_then = OnnxAdd("x1", "xy", output_names=["absxythen"], op_version=opv)
then_body = node_then.to_onnx(
- {'x1': x1, 'xy': x2}, target_opset=opv,
- outputs=[('absxythen', FloatTensorType())])
- node_else = OnnxSub(
- 'x1', 'x2', output_names=['absxyelse'], op_version=opv)
+ {"x1": x1, "xy": x2},
+ target_opset=opv,
+ outputs=[("absxythen", FloatTensorType())],
+ )
+ node_else = OnnxSub("x1", "x2", output_names=["absxyelse"], op_version=opv)
else_body = node_else.to_onnx(
- {'x1': x1, 'x2': x2}, target_opset=opv,
- outputs=[('absxyelse', FloatTensorType())])
+ {"x1": x1, "x2": x2},
+ target_opset=opv,
+ outputs=[("absxyelse", FloatTensorType())],
+ )
del else_body.graph.input[:]
del then_body.graph.input[:]
cond = OnnxGreater(
- OnnxReduceSum('x1', op_version=opv),
- OnnxReduceSum('x2', op_version=opv),
- op_version=opv)
- ifnode = OnnxIf(cond, then_branch=then_body.graph,
- else_branch=else_body.graph,
- op_version=opv, output_names=['y'],
- global_context={'xy': node_xy})
+ OnnxReduceSum("x1", op_version=opv),
+ OnnxReduceSum("x2", op_version=opv),
+ op_version=opv,
+ )
+ ifnode = OnnxIf(
+ cond,
+ then_branch=then_body.graph,
+ else_branch=else_body.graph,
+ op_version=opv,
+ output_names=["y"],
+ global_context={"xy": node_xy},
+ )
model_def = ifnode.to_onnx(
- {'x1': x1, 'x2': x2}, target_opset=opv,
- outputs=[('y', FloatTensorType())])
+ {"x1": x1, "x2": x2}, target_opset=opv, outputs=[("y", FloatTensorType())]
+ )
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'x1': x1, 'x2': x2})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"x1": x1, "x2": x2})
assert_almost_equal(x1 + x1 * x2, res[0])
@ignore_warnings(category=DeprecationWarning)
def test_onnx_if_algebra_indirect_unnamed(self):
-
opv = TARGET_OPSET
x1 = np.array([[0, 3], [7, 0]], dtype=np.float32)
x2 = np.array([[1, 0], [2, 0]], dtype=np.float32)
- node_xy = OnnxMul('x1', 'x2', op_version=opv)
- node_then = OnnxAdd(
- 'x1', 'xy', output_names=['absxythen'], op_version=opv)
+ node_xy = OnnxMul("x1", "x2", op_version=opv)
+ node_then = OnnxAdd("x1", "xy", output_names=["absxythen"], op_version=opv)
then_body = node_then.to_onnx(
- {'x1': x1, 'xy': x2}, target_opset=opv,
- outputs=[('absxythen', FloatTensorType())])
- node_else = OnnxSub(
- 'x1', 'x2', output_names=['absxyelse'], op_version=opv)
+ {"x1": x1, "xy": x2},
+ target_opset=opv,
+ outputs=[("absxythen", FloatTensorType())],
+ )
+ node_else = OnnxSub("x1", "x2", output_names=["absxyelse"], op_version=opv)
else_body = node_else.to_onnx(
- {'x1': x1, 'x2': x2}, target_opset=opv,
- outputs=[('absxyelse', FloatTensorType())])
+ {"x1": x1, "x2": x2},
+ target_opset=opv,
+ outputs=[("absxyelse", FloatTensorType())],
+ )
del else_body.graph.input[:]
del then_body.graph.input[:]
cond = OnnxGreater(
- OnnxReduceSum('x1', op_version=opv),
- OnnxReduceSum('x2', op_version=opv),
- op_version=opv)
- ifnode = OnnxIf(cond, then_branch=then_body.graph,
- else_branch=else_body.graph,
- op_version=opv, output_names=['y'],
- global_context={'xy': node_xy})
+ OnnxReduceSum("x1", op_version=opv),
+ OnnxReduceSum("x2", op_version=opv),
+ op_version=opv,
+ )
+ ifnode = OnnxIf(
+ cond,
+ then_branch=then_body.graph,
+ else_branch=else_body.graph,
+ op_version=opv,
+ output_names=["y"],
+ global_context={"xy": node_xy},
+ )
model_def = ifnode.to_onnx(
- {'x1': x1, 'x2': x2}, target_opset=opv,
- outputs=[('y', FloatTensorType())])
+ {"x1": x1, "x2": x2}, target_opset=opv, outputs=[("y", FloatTensorType())]
+ )
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'x1': x1, 'x2': x2})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"x1": x1, "x2": x2})
assert_almost_equal(x1 + x1 * x2, res[0])
@ignore_warnings(category=DeprecationWarning)
def test_onnx_if_algebra_indirect_unnamed_clear_input(self):
-
opv = TARGET_OPSET
x1 = np.array([[0, 3], [7, 0]], dtype=np.float32)
x2 = np.array([[1, 0], [2, 0]], dtype=np.float32)
- node_xy = OnnxMul('x1', 'x2', op_version=opv)
- node_then = OnnxAdd(
- 'x1', 'xy', output_names=['absxythen'], op_version=opv)
+ node_xy = OnnxMul("x1", "x2", op_version=opv)
+ node_then = OnnxAdd("x1", "xy", output_names=["absxythen"], op_version=opv)
then_body = node_then.to_onnx(
- {'x1': x1, 'xy': x2}, target_opset=opv,
- outputs=[('absxythen', FloatTensorType())])
- node_else = OnnxSub(
- 'x1', 'x2', output_names=['absxyelse'], op_version=opv)
+ {"x1": x1, "xy": x2},
+ target_opset=opv,
+ outputs=[("absxythen", FloatTensorType())],
+ )
+ node_else = OnnxSub("x1", "x2", output_names=["absxyelse"], op_version=opv)
else_body = node_else.to_onnx(
- {'x1': x1, 'x2': x2}, target_opset=opv,
- outputs=[('absxyelse', FloatTensorType())])
+ {"x1": x1, "x2": x2},
+ target_opset=opv,
+ outputs=[("absxyelse", FloatTensorType())],
+ )
cond = OnnxGreater(
- OnnxReduceSum('x1', op_version=opv),
- OnnxReduceSum('x2', op_version=opv),
- op_version=opv)
- ifnode = OnnxIf(cond, then_branch=then_body.graph,
- else_branch=else_body.graph,
- op_version=opv, output_names=['y'],
- global_context={'xy': node_xy},
- clear_subgraph_inputs=True)
+ OnnxReduceSum("x1", op_version=opv),
+ OnnxReduceSum("x2", op_version=opv),
+ op_version=opv,
+ )
+ ifnode = OnnxIf(
+ cond,
+ then_branch=then_body.graph,
+ else_branch=else_body.graph,
+ op_version=opv,
+ output_names=["y"],
+ global_context={"xy": node_xy},
+ clear_subgraph_inputs=True,
+ )
model_def = ifnode.to_onnx(
- {'x1': x1, 'x2': x2}, target_opset=opv,
- outputs=[('y', FloatTensorType())])
+ {"x1": x1, "x2": x2}, target_opset=opv, outputs=[("y", FloatTensorType())]
+ )
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'x1': x1, 'x2': x2})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"x1": x1, "x2": x2})
assert_almost_equal(x1 + x1 * x2, res[0])
@ignore_warnings(category=DeprecationWarning)
def test_onnx_if_algebra_indirect_unnamed_clear_input_recursive(self):
-
opv = TARGET_OPSET
x1 = np.array([[0, 3], [7, 0]], dtype=np.float32)
x2 = np.array([[1, 0], [2, 0]], dtype=np.float32)
- node_xy = OnnxMul('x1', 'x2', op_version=opv)
- node_then = OnnxAdd(
- 'x1', 'xy', output_names=['absxythen'], op_version=opv)
+ node_xy = OnnxMul("x1", "x2", op_version=opv)
+ node_then = OnnxAdd("x1", "xy", output_names=["absxythen"], op_version=opv)
then_body = node_then.to_onnx(
- {'x1': x1, 'xy': x2}, target_opset=opv,
- outputs=[('absxythen', FloatTensorType())])
- node_else = OnnxSub(
- 'x1', 'x2', output_names=['absxyelse'], op_version=opv)
+ {"x1": x1, "xy": x2},
+ target_opset=opv,
+ outputs=[("absxythen", FloatTensorType())],
+ )
+ node_else = OnnxSub("x1", "x2", output_names=["absxyelse"], op_version=opv)
else_body = node_else.to_onnx(
- {'x1': x1, 'x2': x2}, target_opset=opv,
- outputs=[('absxyelse', FloatTensorType())])
+ {"x1": x1, "x2": x2},
+ target_opset=opv,
+ outputs=[("absxyelse", FloatTensorType())],
+ )
cond = OnnxGreater(
- OnnxReduceSum('x1', op_version=opv),
- OnnxReduceSum('x2', op_version=opv),
- op_version=opv)
- ifnode = OnnxIf(cond, then_branch=then_body.graph,
- else_branch=else_body.graph,
- op_version=opv, output_names=['yt'],
- clear_subgraph_inputs=True)
+ OnnxReduceSum("x1", op_version=opv),
+ OnnxReduceSum("x2", op_version=opv),
+ op_version=opv,
+ )
+ ifnode = OnnxIf(
+ cond,
+ then_branch=then_body.graph,
+ else_branch=else_body.graph,
+ op_version=opv,
+ output_names=["yt"],
+ clear_subgraph_inputs=True,
+ )
subgraph = ifnode.to_onnx(
- {'x1': x1, 'x2': x2}, target_opset=opv,
- outputs=[('yt', FloatTensorType())])
+ {"x1": x1, "x2": x2}, target_opset=opv, outputs=[("yt", FloatTensorType())]
+ )
cond2 = OnnxGreater(
- OnnxReduceMin('x1', op_version=opv),
- OnnxReduceMin('x2', op_version=opv),
- op_version=opv)
- ifnode2 = OnnxIf(cond2, then_branch=then_body.graph,
- else_branch=subgraph.graph,
- op_version=opv, output_names=['y'],
- global_context={'xy': node_xy},
- clear_subgraph_inputs=True)
+ OnnxReduceMin("x1", op_version=opv),
+ OnnxReduceMin("x2", op_version=opv),
+ op_version=opv,
+ )
+ ifnode2 = OnnxIf(
+ cond2,
+ then_branch=then_body.graph,
+ else_branch=subgraph.graph,
+ op_version=opv,
+ output_names=["y"],
+ global_context={"xy": node_xy},
+ clear_subgraph_inputs=True,
+ )
model_def = ifnode2.to_onnx(
- {'x1': x1, 'x2': x2}, target_opset=opv,
- outputs=[('y', FloatTensorType())])
+ {"x1": x1, "x2": x2}, target_opset=opv, outputs=[("y", FloatTensorType())]
+ )
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'x1': x1, 'x2': x2})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"x1": x1, "x2": x2})
assert_almost_equal(x1 + x1 * x2, res[0])
diff --git a/tests/test_algebra_onnx_operators_opset.py b/tests/test_algebra_onnx_operators_opset.py
index 08fa6d3b3..719509753 100644
--- a/tests/test_algebra_onnx_operators_opset.py
+++ b/tests/test_algebra_onnx_operators_opset.py
@@ -9,23 +9,25 @@
class TestOnnxOperatorsOpset(unittest.TestCase):
-
@unittest.skipIf(onnx.defs.onnx_opset_version() < 10, "irrelevant")
def test_pad_opset_10(self):
-
- pad = OnnxPad('X', output_names=['Y'],
- mode='constant', value=1.5,
- pads=[0, 1, 0, 1],
- op_version=2)
+ pad = OnnxPad(
+ "X",
+ output_names=["Y"],
+ mode="constant",
+ value=1.5,
+ pads=[0, 1, 0, 1],
+ op_version=2,
+ )
X = np.array([[0, 1]], dtype=np.float32)
- model_def = pad.to_onnx({'X': X}, target_opset=10)
+ model_def = pad.to_onnx({"X": X}, target_opset=10)
onnx.checker.check_model(model_def)
def predict_with_onnxruntime(model_def, *inputs):
sess = ort.InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
names = [i.name for i in sess.get_inputs()]
dinputs = {name: input for name, input in zip(names, inputs)}
res = sess.run(None, dinputs)
@@ -33,8 +35,7 @@ def predict_with_onnxruntime(model_def, *inputs):
return {name: output for name, output in zip(names, res)}
Y = predict_with_onnxruntime(model_def, X)
- assert_almost_equal(
- np.array([[1.5, 0., 1., 1.5]], dtype=np.float32), Y['Y'])
+ assert_almost_equal(np.array([[1.5, 0.0, 1.0, 1.5]], dtype=np.float32), Y["Y"])
if __name__ == "__main__":
diff --git a/tests/test_algebra_onnx_operators_scan.py b/tests/test_algebra_onnx_operators_scan.py
index b26d98934..a03c9f0f3 100644
--- a/tests/test_algebra_onnx_operators_scan.py
+++ b/tests/test_algebra_onnx_operators_scan.py
@@ -10,6 +10,7 @@
import onnx
from onnx.onnx_cpp2py_export.checker import ValidationError
from onnxruntime import __version__ as ort_version
+
try:
# scikit-learn >= 0.22
from sklearn.utils._testing import ignore_warnings
@@ -18,88 +19,97 @@
from sklearn.utils.testing import ignore_warnings
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx.algebra.onnx_ops import (
- OnnxAdd, OnnxIdentity, OnnxScan,
- OnnxSub, OnnxReduceSumSquareApi18,
- OnnxSqueezeApi11, OnnxShape)
+ OnnxAdd,
+ OnnxIdentity,
+ OnnxScan,
+ OnnxSub,
+ OnnxReduceSumSquareApi18,
+ OnnxSqueezeApi11,
+ OnnxShape,
+)
from skl2onnx.algebra.custom_ops import OnnxCDist
+
try:
from skl2onnx.algebra.onnx_ops import OnnxConstantOfShape
except ImportError:
# onnx is too old
OnnxConstantOfShape = None
-from onnx import (
- helper, TensorProto, __version__ as onnx__version__)
-from skl2onnx.algebra.complex_functions import (
- onnx_squareform_pdist, onnx_cdist)
+from onnx import helper, TensorProto, __version__ as onnx__version__
+from skl2onnx.algebra.complex_functions import onnx_squareform_pdist, onnx_cdist
from skl2onnx.proto import get_latest_tested_opset_version
-from test_utils import (
- TARGET_OPSET, TARGET_IR,
- InferenceSessionEx as InferenceSession)
+from test_utils import TARGET_OPSET, TARGET_IR, InferenceSessionEx as InferenceSession
_TARGET_OPSET_ = min(get_latest_tested_opset_version(), TARGET_OPSET)
THRESHOLD = "0.4.0"
THRESHOLD2 = "0.5.0"
-ort_version = ".".join(ort_version.split('.')[:2])
+ort_version = ".".join(ort_version.split(".")[:2])
class TestOnnxOperatorsScan(unittest.TestCase):
-
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
- @unittest.skipIf(pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="fails with onnxruntime 0.4.0")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version(THRESHOLD),
+ reason="fails with onnxruntime 0.4.0",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_onnx_example(self):
sum_in = onnx.helper.make_tensor_value_info(
- 'sum_in', onnx.TensorProto.FLOAT, [2])
- next = onnx.helper.make_tensor_value_info(
- 'next', onnx.TensorProto.FLOAT, [2])
+ "sum_in", onnx.TensorProto.FLOAT, [2]
+ )
+ next = onnx.helper.make_tensor_value_info("next", onnx.TensorProto.FLOAT, [2])
sum_out = onnx.helper.make_tensor_value_info(
- 'sum_out', onnx.TensorProto.FLOAT, [2])
+ "sum_out", onnx.TensorProto.FLOAT, [2]
+ )
scan_out = onnx.helper.make_tensor_value_info(
- 'scan_out', onnx.TensorProto.FLOAT, [2])
+ "scan_out", onnx.TensorProto.FLOAT, [2]
+ )
add_node = onnx.helper.make_node(
- 'Add',
- inputs=['sum_in', 'next'],
- outputs=['sum_out']
+ "Add", inputs=["sum_in", "next"], outputs=["sum_out"]
)
id_node = onnx.helper.make_node(
- 'Identity',
- inputs=['sum_out'],
- outputs=['scan_out']
+ "Identity", inputs=["sum_out"], outputs=["scan_out"]
)
scan_body = onnx.helper.make_graph(
- [add_node, id_node],
- 'scan_body',
- [sum_in, next],
- [sum_out, scan_out]
+ [add_node, id_node], "scan_body", [sum_in, next], [sum_out, scan_out]
)
node = onnx.helper.make_node(
- 'Scan',
- inputs=['initial', 'x'],
- outputs=['y', 'z'],
+ "Scan",
+ inputs=["initial", "x"],
+ outputs=["y", "z"],
num_scan_inputs=1,
- body=scan_body
+ body=scan_body,
)
initial = helper.make_tensor_value_info(
- 'initial', TensorProto.FLOAT, [2, ])
- X = helper.make_tensor_value_info('x', TensorProto.FLOAT, [3, 2])
- Y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [2, ])
- Z = helper.make_tensor_value_info('z', TensorProto.FLOAT, [3, 2])
+ "initial",
+ TensorProto.FLOAT,
+ [
+ 2,
+ ],
+ )
+ X = helper.make_tensor_value_info("x", TensorProto.FLOAT, [3, 2])
+ Y = helper.make_tensor_value_info(
+ "y",
+ TensorProto.FLOAT,
+ [
+ 2,
+ ],
+ )
+ Z = helper.make_tensor_value_info("z", TensorProto.FLOAT, [3, 2])
graph_def = helper.make_graph(
[node],
- 'test-model',
+ "test-model",
[initial, X],
[Y, Z],
)
- model_def = helper.make_model(graph_def, producer_name='onnx-example')
+ model_def = helper.make_model(graph_def, producer_name="onnx-example")
del model_def.opset_import[:]
op_set = model_def.opset_import.add()
- op_set.domain = ''
+ op_set.domain = ""
op_set.version = TARGET_OPSET
model_def.ir_version = TARGET_IR
@@ -112,13 +122,13 @@ def test_onnx_example(self):
try:
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except Exception as e:
if "Current official support for domain ai.onnx" in str(e):
return
raise e
- res = sess.run(None, {'initial': initial, 'x': x})
+ res = sess.run(None, {"initial": initial, "x": x})
y = np.array([9, 12]).astype(np.float32).reshape((2,))
z = np.array([1, 2, 4, 6, 9, 12]).astype(np.float32).reshape((3, 2))
@@ -126,37 +136,40 @@ def test_onnx_example(self):
assert_almost_equal(z, res[1])
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
- @unittest.skipIf(pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="fails with onnxruntime 0.4.0")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version(THRESHOLD),
+ reason="fails with onnxruntime 0.4.0",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_onnx_example_algebra(self):
initial = np.array([0, 0]).astype(np.float32).reshape((2,))
x = np.array([1, 2, 3, 4, 5, 6]).astype(np.float32).reshape((3, 2))
opv = _TARGET_OPSET_
- add_node = OnnxAdd(
- 'sum_in', 'next', output_names=['sum_out'],
- op_version=opv)
- id_node = OnnxIdentity(
- add_node, output_names=['scan_out'],
- op_version=opv)
+ add_node = OnnxAdd("sum_in", "next", output_names=["sum_out"], op_version=opv)
+ id_node = OnnxIdentity(add_node, output_names=["scan_out"], op_version=opv)
scan_body = id_node.to_onnx(
- {'sum_in': initial, 'next': initial},
- outputs=[('sum_out', FloatTensorType()),
- ('scan_out', FloatTensorType())])
+ {"sum_in": initial, "next": initial},
+ outputs=[("sum_out", FloatTensorType()), ("scan_out", FloatTensorType())],
+ )
- node = OnnxScan('initial', 'x', output_names=['y', 'z'],
- num_scan_inputs=1, body=scan_body.graph,
- op_version=opv)
+ node = OnnxScan(
+ "initial",
+ "x",
+ output_names=["y", "z"],
+ num_scan_inputs=1,
+ body=scan_body.graph,
+ op_version=opv,
+ )
model_def = node.to_onnx(
- {'initial': initial, 'x': x},
- outputs=[('y', FloatTensorType()),
- ('z', FloatTensorType())])
+ {"initial": initial, "x": x},
+ outputs=[("y", FloatTensorType()), ("z", FloatTensorType())],
+ )
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'initial': initial, 'x': x})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"initial": initial, "x": x})
y = np.array([9, 12]).astype(np.float32).reshape((2,))
z = np.array([1, 2, 4, 6, 9, 12]).astype(np.float32).reshape((3, 2))
@@ -164,46 +177,53 @@ def test_onnx_example_algebra(self):
assert_almost_equal(z, res[1])
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
- @unittest.skipIf(pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="fails with onnxruntime 0.4.0")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version(THRESHOLD),
+ reason="fails with onnxruntime 0.4.0",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_onnx_example_pdist(self):
x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((3, 2))
opv = _TARGET_OPSET_
- diff = OnnxSub('next_in', 'next', output_names=['diff'],
- op_version=opv)
- id_next = OnnxIdentity(
- 'next_in', output_names=['next_out'],
- op_version=opv)
+ diff = OnnxSub("next_in", "next", output_names=["diff"], op_version=opv)
+ id_next = OnnxIdentity("next_in", output_names=["next_out"], op_version=opv)
norm = OnnxReduceSumSquareApi18(
- diff, output_names=['norm'], axes=[1],
- op_version=opv)
+ diff, output_names=["norm"], axes=[1], op_version=opv
+ )
flat = OnnxSqueezeApi11(
- norm, output_names=['scan_out'], axes=[1],
- op_version=opv)
+ norm, output_names=["scan_out"], axes=[1], op_version=opv
+ )
scan_body = id_next.to_onnx(
- OrderedDict([('next_in', x), ('next', FloatTensorType())]),
- outputs=[('next_out', FloatTensorType([3, 2])),
- ('scan_out', FloatTensorType([3]))],
+ OrderedDict([("next_in", x), ("next", FloatTensorType())]),
+ outputs=[
+ ("next_out", FloatTensorType([3, 2])),
+ ("scan_out", FloatTensorType([3])),
+ ],
other_outputs=[flat],
- target_opset=opv)
+ target_opset=opv,
+ )
sess = InferenceSession(
- scan_body.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'next_in': x, 'next': x[:1]})
+ scan_body.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"next_in": x, "next": x[:1]})
assert_almost_equal(x, res[0])
- exp = np.array([0., 18., 20.], dtype=np.float32)
+ exp = np.array([0.0, 18.0, 20.0], dtype=np.float32)
assert_almost_equal(exp, res[1])
node = OnnxScan(
- 'x', 'x', output_names=['y', 'z'],
- num_scan_inputs=1, body=scan_body.graph,
- op_version=opv)
- model_def = node.to_onnx({'x': x},
- outputs=[('y', FloatTensorType([3, 2])),
- ('z', FloatTensorType([3, 3]))])
+ "x",
+ "x",
+ output_names=["y", "z"],
+ num_scan_inputs=1,
+ body=scan_body.graph,
+ op_version=opv,
+ )
+ model_def = node.to_onnx(
+ {"x": x},
+ outputs=[("y", FloatTensorType([3, 2])), ("z", FloatTensorType([3, 3]))],
+ )
try:
onnx.checker.check_model(model_def)
except ValidationError as e:
@@ -213,257 +233,294 @@ def test_onnx_example_pdist(self):
raise e
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'x': x})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"x": x})
exp = squareform(pdist(x, metric="sqeuclidean"))
assert_almost_equal(x, res[0])
assert_almost_equal(exp, res[1])
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
- @unittest.skipIf(pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="fails with onnxruntime 0.4.0")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version(THRESHOLD),
+ reason="fails with onnxruntime 0.4.0",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_onnx_example_pdist_in(self):
opv = _TARGET_OPSET_
x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((3, 2))
- cop = OnnxAdd(
- 'input', 'input', op_version=opv)
+ cop = OnnxAdd("input", "input", op_version=opv)
cop2 = OnnxIdentity(
- onnx_squareform_pdist(
- cop, dtype=np.float32,
- op_version=opv),
- output_names=['pdist'],
- op_version=opv)
+ onnx_squareform_pdist(cop, dtype=np.float32, op_version=opv),
+ output_names=["pdist"],
+ op_version=opv,
+ )
model_def = cop2.to_onnx(
- inputs=[('input', FloatTensorType([None, None]))],
- outputs=[('pdist', FloatTensorType())])
+ inputs=[("input", FloatTensorType([None, None]))],
+ outputs=[("pdist", FloatTensorType())],
+ )
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': x})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": x})
exp = squareform(pdist(x * 2, metric="sqeuclidean"))
assert_almost_equal(exp, res[0])
x = np.array([1, 2, 4, 5]).astype(np.float32).reshape((2, 2))
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': x})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": x})
exp = squareform(pdist(x * 2, metric="sqeuclidean"))
assert_almost_equal(exp, res[0])
x = np.array([1, 2, 4, 5, 5, 6]).astype(np.float32).reshape((2, 3))
x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((2, 3))
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': x})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": x})
exp = squareform(pdist(x * 2, metric="sqeuclidean"))
assert_almost_equal(exp, res[0])
- @unittest.skipIf((OnnxConstantOfShape is None or
- pv.Version(ort_version) <= pv.Version(THRESHOLD)),
- reason="fails with onnxruntime 0.4.0")
+ @unittest.skipIf(
+ (
+ OnnxConstantOfShape is None
+ or pv.Version(ort_version) <= pv.Version(THRESHOLD)
+ ),
+ reason="fails with onnxruntime 0.4.0",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_onnx_example_constant_of_shape(self):
x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((3, 2))
opv = _TARGET_OPSET_
cop2 = OnnxConstantOfShape(
- OnnxShape('input', op_version=opv),
- output_names=['mat'], op_version=opv)
- model_def = cop2.to_onnx({'input': x},
- outputs=[('mat', FloatTensorType())])
+ OnnxShape("input", op_version=opv), output_names=["mat"], op_version=opv
+ )
+ model_def = cop2.to_onnx({"input": x}, outputs=[("mat", FloatTensorType())])
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': x})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": x})
exp = np.zeros((3, 2), dtype=np.float32)
assert_almost_equal(exp, res[0])
- tensor_value = onnx.helper.make_tensor("value", onnx.TensorProto.FLOAT,
- (1,), [-5])
+ tensor_value = onnx.helper.make_tensor(
+ "value", onnx.TensorProto.FLOAT, (1,), [-5]
+ )
cop2 = OnnxConstantOfShape(
- OnnxShape('input', op_version=opv),
- value=tensor_value, output_names=['mat'],
- op_version=opv)
- model_def = cop2.to_onnx({'input': x},
- outputs=[('mat', FloatTensorType())])
+ OnnxShape("input", op_version=opv),
+ value=tensor_value,
+ output_names=["mat"],
+ op_version=opv,
+ )
+ model_def = cop2.to_onnx({"input": x}, outputs=[("mat", FloatTensorType())])
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': x})
- exp = np.full((3, 2), -5.)
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": x})
+ exp = np.full((3, 2), -5.0)
assert_almost_equal(exp, res[0])
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
- @unittest.skipIf(pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="fails with onnxruntime 0.4.0")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version(THRESHOLD),
+ reason="fails with onnxruntime 0.4.0",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_onnx_example_cdist_in(self):
x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((3, 2))
- x2 = np.array([1.1, 2.1, 4.01, 5.01, 5.001, 4.001, 0, 0]).astype(
- np.float32).reshape((4, 2))
+ x2 = (
+ np.array([1.1, 2.1, 4.01, 5.01, 5.001, 4.001, 0, 0])
+ .astype(np.float32)
+ .reshape((4, 2))
+ )
opv = _TARGET_OPSET_
- cop = OnnxAdd(
- 'input', 'input', op_version=opv)
+ cop = OnnxAdd("input", "input", op_version=opv)
cop2 = OnnxIdentity(
- onnx_cdist(cop, x2, dtype=np.float32,
- op_version=opv),
- output_names=['cdist'], op_version=opv)
+ onnx_cdist(cop, x2, dtype=np.float32, op_version=opv),
+ output_names=["cdist"],
+ op_version=opv,
+ )
model_def = cop2.to_onnx(
- inputs=[('input', FloatTensorType([None, None]))],
- outputs=[('cdist', FloatTensorType())])
+ inputs=[("input", FloatTensorType([None, None]))],
+ outputs=[("cdist", FloatTensorType())],
+ )
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': x})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": x})
exp = scipy_cdist(x * 2, x2, metric="sqeuclidean")
assert_almost_equal(exp, res[0], decimal=5)
- x = np.array([[6.1, 2.8, 4.7, 1.2],
- [5.7, 3.8, 1.7, 0.3],
- [7.7, 2.6, 6.9, 2.3],
- [6.0, 2.9, 4.5, 1.5],
- [6.8, 2.8, 4.8, 1.4],
- [5.4, 3.4, 1.5, 0.4],
- [5.6, 2.9, 3.6, 1.3],
- [6.9, 3.1, 5.1, 2.3]], dtype=np.float32)
- cop = OnnxAdd('input', 'input', op_version=opv)
+ x = np.array(
+ [
+ [6.1, 2.8, 4.7, 1.2],
+ [5.7, 3.8, 1.7, 0.3],
+ [7.7, 2.6, 6.9, 2.3],
+ [6.0, 2.9, 4.5, 1.5],
+ [6.8, 2.8, 4.8, 1.4],
+ [5.4, 3.4, 1.5, 0.4],
+ [5.6, 2.9, 3.6, 1.3],
+ [6.9, 3.1, 5.1, 2.3],
+ ],
+ dtype=np.float32,
+ )
+ cop = OnnxAdd("input", "input", op_version=opv)
cop2 = OnnxIdentity(
onnx_cdist(cop, x, dtype=np.float32, op_version=opv),
- output_names=['cdist'],
- op_version=opv)
+ output_names=["cdist"],
+ op_version=opv,
+ )
model_def = cop2.to_onnx(
- inputs=[('input', FloatTensorType([None, None]))],
- outputs=[('cdist', FloatTensorType())])
+ inputs=[("input", FloatTensorType([None, None]))],
+ outputs=[("cdist", FloatTensorType())],
+ )
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': x})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": x})
exp = scipy_cdist(x * 2, x, metric="sqeuclidean")
assert_almost_equal(exp, res[0], decimal=4)
assert "u_scan0_" not in str(model_def)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
- @unittest.skipIf(pv.Version(ort_version) <= pv.Version(THRESHOLD2),
- reason="fails with onnxruntime 0.4.0")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version(THRESHOLD2),
+ reason="fails with onnxruntime 0.4.0",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_onnx_example_cdist_in_mink(self):
x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((3, 2))
- x2 = np.array([1.1, 2.1, 4.01, 5.01, 5.001, 4.001, 0, 0]).astype(
- np.float32).reshape((4, 2))
+ x2 = (
+ np.array([1.1, 2.1, 4.01, 5.01, 5.001, 4.001, 0, 0])
+ .astype(np.float32)
+ .reshape((4, 2))
+ )
opv = _TARGET_OPSET_
- cop = OnnxAdd(
- 'input', 'input', op_version=opv)
+ cop = OnnxAdd("input", "input", op_version=opv)
cop2 = OnnxIdentity(
- onnx_cdist(cop, x2, dtype=np.float32,
- metric="minkowski", p=2,
- op_version=opv),
- output_names=['cdist'],
- op_version=opv)
+ onnx_cdist(
+ cop, x2, dtype=np.float32, metric="minkowski", p=2, op_version=opv
+ ),
+ output_names=["cdist"],
+ op_version=opv,
+ )
model_def = cop2.to_onnx(
- inputs=[('input', FloatTensorType([None, None]))],
- outputs=[('cdist', FloatTensorType())])
+ inputs=[("input", FloatTensorType([None, None]))],
+ outputs=[("cdist", FloatTensorType())],
+ )
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': x})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": x})
exp = scipy_cdist(x * 2, x2, metric="minkowski")
assert_almost_equal(exp, res[0], decimal=5)
- x = np.array([[6.1, 2.8, 4.7, 1.2],
- [5.7, 3.8, 1.7, 0.3],
- [7.7, 2.6, 6.9, 2.3],
- [6.0, 2.9, 4.5, 1.5],
- [6.8, 2.8, 4.8, 1.4],
- [5.4, 3.4, 1.5, 0.4],
- [5.6, 2.9, 3.6, 1.3],
- [6.9, 3.1, 5.1, 2.3]], dtype=np.float32)
- cop = OnnxAdd(
- 'input', 'input', op_version=opv)
+ x = np.array(
+ [
+ [6.1, 2.8, 4.7, 1.2],
+ [5.7, 3.8, 1.7, 0.3],
+ [7.7, 2.6, 6.9, 2.3],
+ [6.0, 2.9, 4.5, 1.5],
+ [6.8, 2.8, 4.8, 1.4],
+ [5.4, 3.4, 1.5, 0.4],
+ [5.6, 2.9, 3.6, 1.3],
+ [6.9, 3.1, 5.1, 2.3],
+ ],
+ dtype=np.float32,
+ )
+ cop = OnnxAdd("input", "input", op_version=opv)
cop2 = OnnxIdentity(
- onnx_cdist(cop, x, dtype=np.float32,
- op_version=opv),
- output_names=['cdist'],
- op_version=opv)
+ onnx_cdist(cop, x, dtype=np.float32, op_version=opv),
+ output_names=["cdist"],
+ op_version=opv,
+ )
model_def = cop2.to_onnx(
- inputs=[('input', FloatTensorType([None, None]))],
- outputs=[('cdist', FloatTensorType())])
+ inputs=[("input", FloatTensorType([None, None]))],
+ outputs=[("cdist", FloatTensorType())],
+ )
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': x})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": x})
exp = scipy_cdist(x * 2, x, metric="sqeuclidean")
assert_almost_equal(exp, res[0], decimal=4)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
- @unittest.skipIf(pv.Version(ort_version) <= pv.Version(THRESHOLD2),
- reason="fails with onnxruntime 0.4.0")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version(THRESHOLD2),
+ reason="fails with onnxruntime 0.4.0",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_onnx_example_cdist_in_custom_ops(self):
x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((3, 2))
- x2 = np.array([1.1, 2.1, 4.01, 5.01, 5.001, 4.001, 0, 0]).astype(
- np.float32).reshape((4, 2))
+ x2 = (
+ np.array([1.1, 2.1, 4.01, 5.01, 5.001, 4.001, 0, 0])
+ .astype(np.float32)
+ .reshape((4, 2))
+ )
opv = _TARGET_OPSET_
- cop = OnnxAdd(
- 'input', 'input', op_version=opv)
+ cop = OnnxAdd("input", "input", op_version=opv)
cop2 = OnnxIdentity(
- OnnxCDist(cop, x2, op_version=opv),
- output_names=['cdist'],
- op_version=opv)
+ OnnxCDist(cop, x2, op_version=opv), output_names=["cdist"], op_version=opv
+ )
model_def = cop2.to_onnx(
- inputs=[('input', FloatTensorType([None, None]))],
- outputs=[('cdist', FloatTensorType())])
+ inputs=[("input", FloatTensorType([None, None]))],
+ outputs=[("cdist", FloatTensorType())],
+ )
try:
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except RuntimeError as e:
if "CDist is not a registered" in str(e):
return
- res = sess.run(None, {'input': x})
+ res = sess.run(None, {"input": x})
exp = scipy_cdist(x * 2, x2, metric="sqeuclidean")
assert_almost_equal(exp, res[0], decimal=5)
- x = np.array([[6.1, 2.8, 4.7, 1.2],
- [5.7, 3.8, 1.7, 0.3],
- [7.7, 2.6, 6.9, 2.3],
- [6.0, 2.9, 4.5, 1.5],
- [6.8, 2.8, 4.8, 1.4],
- [5.4, 3.4, 1.5, 0.4],
- [5.6, 2.9, 3.6, 1.3],
- [6.9, 3.1, 5.1, 2.3]], dtype=np.float32)
- cop = OnnxAdd(
- 'input', 'input', op_version=opv)
+ x = np.array(
+ [
+ [6.1, 2.8, 4.7, 1.2],
+ [5.7, 3.8, 1.7, 0.3],
+ [7.7, 2.6, 6.9, 2.3],
+ [6.0, 2.9, 4.5, 1.5],
+ [6.8, 2.8, 4.8, 1.4],
+ [5.4, 3.4, 1.5, 0.4],
+ [5.6, 2.9, 3.6, 1.3],
+ [6.9, 3.1, 5.1, 2.3],
+ ],
+ dtype=np.float32,
+ )
+ cop = OnnxAdd("input", "input", op_version=opv)
cop2 = OnnxIdentity(
- OnnxCDist(cop, x,
- op_version=opv),
- output_names=['cdist'],
- op_version=opv)
+ OnnxCDist(cop, x, op_version=opv), output_names=["cdist"], op_version=opv
+ )
model_def = cop2.to_onnx(
- inputs=[('input', FloatTensorType([None, None]))],
- outputs=[('cdist', FloatTensorType())])
+ inputs=[("input", FloatTensorType([None, None]))],
+ outputs=[("cdist", FloatTensorType())],
+ )
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': x})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": x})
exp = scipy_cdist(x * 2, x, metric="sqeuclidean")
assert_almost_equal(exp, res[0], decimal=4)
diff --git a/tests/test_algebra_onnx_operators_sparse.py b/tests/test_algebra_onnx_operators_sparse.py
index c8ad90a27..d52d50f44 100644
--- a/tests/test_algebra_onnx_operators_sparse.py
+++ b/tests/test_algebra_onnx_operators_sparse.py
@@ -7,14 +7,16 @@
from numpy.testing import assert_almost_equal
from scipy.sparse import coo_matrix
from onnxruntime import InferenceSession, __version__ as ort_version
+
try:
from onnxruntime.capi.onnxruntime_pybind11_state import (
- InvalidArgument as OrtInvalidArgument
+ InvalidArgument as OrtInvalidArgument,
)
except ImportError:
OrtInvalidArgument = None
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx.algebra.onnx_ops import OnnxAdd
+
try:
from skl2onnx.algebra.onnx_ops import OnnxConstantOfShape
except ImportError:
@@ -26,57 +28,54 @@
class TestOnnxOperatorsSparse(unittest.TestCase):
-
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="only available for opset >= 11")
- @unittest.skipIf(pv.Version(ort_version) < pv.Version(THRESHOLD),
- reason="fails with onnxruntime < %s" % THRESHOLD)
+ @unittest.skipIf(TARGET_OPSET < 11, reason="only available for opset >= 11")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version(THRESHOLD),
+ reason="fails with onnxruntime < %s" % THRESHOLD,
+ )
def test_onnx_init_dense(self):
X = np.array([1, 2, 3, 4, 5, 6]).astype(np.float32).reshape((3, 2))
- node = OnnxAdd('X', X, output_names=['Y'], op_version=TARGET_OPSET)
+ node = OnnxAdd("X", X, output_names=["Y"], op_version=TARGET_OPSET)
- model_def = node.to_onnx({'X': X},
- outputs=[('Y', FloatTensorType())])
+ model_def = node.to_onnx({"X": X}, outputs=[("Y", FloatTensorType())])
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': X})[0]
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": X})[0]
assert_almost_equal(X + X, res)
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="only available for opset >= 11")
- @unittest.skipIf(pv.Version(ort_version) < pv.Version(THRESHOLD),
- reason="fails with onnxruntime < %s" % THRESHOLD)
+ @unittest.skipIf(TARGET_OPSET < 11, reason="only available for opset >= 11")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version(THRESHOLD),
+ reason="fails with onnxruntime < %s" % THRESHOLD,
+ )
def test_onnx_init_sparse_coo(self):
row = np.array([0, 0, 1, 3, 1], dtype=np.float32)
col = np.array([0, 2, 1, 3, 1], dtype=np.float32)
data = np.array([1, 1, 1, 1, 1], dtype=np.float32)
X = coo_matrix((data, (row, col)), shape=(4, 4))
- node = OnnxAdd(
- 'X', X, output_names=['Y'],
- op_version=TARGET_OPSET)
+ node = OnnxAdd("X", X, output_names=["Y"], op_version=TARGET_OPSET)
- model_def = node.to_onnx(
- {'X': X}, outputs=[('Y', FloatTensorType())])
+ model_def = node.to_onnx({"X": X}, outputs=[("Y", FloatTensorType())])
try:
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except (RuntimeError, OrtInvalidArgument):
# Sparse tensor is not supported for constant.
return
try:
- res = sess.run(None, {'X': X})[0]
+ res = sess.run(None, {"X": X})[0]
except RuntimeError as e:
# Sparse tensor is not supported for constant.
warnings.warn(
- "Unable to run with %r\n---\n%s\n%s" % (
- {'X': X}, model_def, e))
+ "Unable to run with %r\n---\n%s\n%s" % ({"X": X}, model_def, e)
+ )
return
assert_almost_equal(X + X, res)
diff --git a/tests/test_algebra_onnx_operators_sub_estimator.py b/tests/test_algebra_onnx_operators_sub_estimator.py
index 81a53ffa7..231d7f7cd 100644
--- a/tests/test_algebra_onnx_operators_sub_estimator.py
+++ b/tests/test_algebra_onnx_operators_sub_estimator.py
@@ -5,40 +5,42 @@
import packaging.version as pv
import numpy as np
from numpy.testing import assert_almost_equal
-from sklearn.base import (
- BaseEstimator, ClassifierMixin, clone, TransformerMixin)
+from sklearn.base import BaseEstimator, ClassifierMixin, clone, TransformerMixin
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler
from onnxruntime import __version__ as ort_version
from skl2onnx.algebra.onnx_ops import (
- OnnxIdentity, OnnxCast, OnnxReduceMaxApi18, OnnxGreater,
- OnnxExp)
+ OnnxIdentity,
+ OnnxCast,
+ OnnxReduceMaxApi18,
+ OnnxGreater,
+ OnnxExp,
+)
from skl2onnx import update_registered_converter
from skl2onnx import to_onnx, get_model_alias
from skl2onnx.proto import onnx_proto
-from skl2onnx.common.data_types import (
- FloatTensorType, Int64TensorType)
+from skl2onnx.common.data_types import FloatTensorType, Int64TensorType
from skl2onnx.algebra.onnx_operator import OnnxSubEstimator
from test_utils import TARGET_OPSET, InferenceSessionEx as InferenceSession
class ValidatorClassifier(BaseEstimator, ClassifierMixin):
-
def __init__(self, estimator=None, threshold=0.75):
ClassifierMixin.__init__(self)
BaseEstimator.__init__(self)
if estimator is None:
- estimator = LogisticRegression(solver='liblinear')
+ estimator = LogisticRegression(solver="liblinear")
self.estimator = estimator
self.threshold = threshold
def fit(self, X, y, sample_weight=None):
sig = inspect.signature(self.estimator.fit)
- if 'sample_weight' in sig.parameters:
+ if "sample_weight" in sig.parameters:
self.estimator_ = clone(self.estimator).fit(
- X, y, sample_weight=sample_weight)
+ X, y, sample_weight=sample_weight
+ )
else:
self.estimator_ = clone(self.estimator).fit(X, y)
return self
@@ -56,45 +58,40 @@ def validate(self, X):
def validator_classifier_shape_calculator(operator):
-
input = operator.inputs[0] # inputs in ONNX graph
outputs = operator.outputs # outputs in ONNX graph
op = operator.raw_operator # scikit-learn model (mmust be fitted)
if len(outputs) != 3:
raise RuntimeError("3 outputs expected not {}.".format(len(outputs)))
- N = input.type.shape[0] # number of observations
- C = op.estimator_.classes_.shape[0] # dimension of outputs
+ N = input.type.shape[0] # number of observations
+ C = op.estimator_.classes_.shape[0] # dimension of outputs
- outputs[0].type = Int64TensorType([N]) # label
+ outputs[0].type = Int64TensorType([N]) # label
outputs[1].type = FloatTensorType([N, C]) # probabilities
- outputs[2].type = Int64TensorType([C]) # validation
+ outputs[2].type = Int64TensorType([C]) # validation
def validator_classifier_converter(scope, operator, container):
- input = operator.inputs[0] # input in ONNX graph
- outputs = operator.outputs # outputs in ONNX graph
- op = operator.raw_operator # scikit-learn model (mmust be fitted)
+ input = operator.inputs[0] # input in ONNX graph
+ outputs = operator.outputs # outputs in ONNX graph
+ op = operator.raw_operator # scikit-learn model (mmust be fitted)
opv = container.target_opset
# We reuse existing converter and declare it as local
# operator.
model = op.estimator_
- onnx_op = OnnxSubEstimator(model, input, op_version=opv,
- options={'zipmap': False})
+ onnx_op = OnnxSubEstimator(model, input, op_version=opv, options={"zipmap": False})
rmax = OnnxReduceMaxApi18(onnx_op[1], axes=[1], keepdims=0, op_version=opv)
- great = OnnxGreater(rmax, np.array([op.threshold], dtype=np.float32),
- op_version=opv)
- valid = OnnxCast(great, to=onnx_proto.TensorProto.INT64,
- op_version=opv)
-
- r1 = OnnxIdentity(onnx_op[0], output_names=[outputs[0].full_name],
- op_version=opv)
- r2 = OnnxIdentity(onnx_op[1], output_names=[outputs[1].full_name],
- op_version=opv)
- r3 = OnnxIdentity(valid, output_names=[outputs[2].full_name],
- op_version=opv)
+ great = OnnxGreater(
+ rmax, np.array([op.threshold], dtype=np.float32), op_version=opv
+ )
+ valid = OnnxCast(great, to=onnx_proto.TensorProto.INT64, op_version=opv)
+
+ r1 = OnnxIdentity(onnx_op[0], output_names=[outputs[0].full_name], op_version=opv)
+ r2 = OnnxIdentity(onnx_op[1], output_names=[outputs[1].full_name], op_version=opv)
+ r3 = OnnxIdentity(valid, output_names=[outputs[2].full_name], op_version=opv)
r1.add_to(scope, container)
r2.add_to(scope, container)
@@ -109,9 +106,9 @@ def validator_classifier_parser(scope, model, inputs, custom_parsers=None):
this_operator.inputs.append(inputs[0])
# outputs
- val_label = scope.declare_local_variable('val_label', Int64TensorType())
- val_prob = scope.declare_local_variable('val_prob', FloatTensorType())
- val_val = scope.declare_local_variable('val_val', Int64TensorType())
+ val_label = scope.declare_local_variable("val_label", Int64TensorType())
+ val_prob = scope.declare_local_variable("val_prob", FloatTensorType())
+ val_val = scope.declare_local_variable("val_val", Int64TensorType())
this_operator.outputs.append(val_label)
this_operator.outputs.append(val_prob)
this_operator.outputs.append(val_val)
@@ -141,7 +138,6 @@ def dummy_conv_2(scope, operator):
class MinMaxScalerTwo(BaseEstimator, TransformerMixin):
-
def __init__(self):
pass
@@ -161,7 +157,7 @@ def subsub_mmtwo_parser(scope, model, inputs, custom_parsers=None):
this_operator = scope.declare_local_operator(alias, model)
this_operator.inputs.append(inputs[0])
cls_type = inputs[0].type.__class__
- val = scope.declare_local_variable('variable', cls_type())
+ val = scope.declare_local_variable("variable", cls_type())
this_operator.outputs.append(val)
return this_operator.outputs
@@ -176,19 +172,18 @@ def subsub_mmtwo_converter(scope, operator, container):
out = operator.outputs
X = operator.inputs[0]
x2 = OnnxSubEstimator(op.est1_, X, op_version=opv)
- x2.set_onnx_name_prefix('AAA')
+ x2.set_onnx_name_prefix("AAA")
x2_exp = OnnxExp(x2, op_version=opv)
x3 = OnnxSubEstimator(op.est2_, x2_exp, op_version=opv)
- x3.set_onnx_name_prefix('BBB')
+ x3.set_onnx_name_prefix("BBB")
final = OnnxIdentity(x3, op_version=opv, output_names=out[:1])
final.add_to(scope, container)
class TestOnnxOperatorSubEstimator(unittest.TestCase):
-
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("1.0"),
- reason="Cast not available.")
+ pv.Version(ort_version) < pv.Version("1.0"), reason="Cast not available."
+ )
def test_sub_estimator_exc(self):
data = load_iris()
X, y = data.data, data.target
@@ -201,20 +196,24 @@ def test_sub_estimator_exc(self):
try:
update_registered_converter(
- ValidatorClassifier, 'CustomValidatorClassifier',
+ ValidatorClassifier,
+ "CustomValidatorClassifier",
validator_classifier_shape_calculator,
validator_classifier_converter,
- parser=dummy1_parser)
+ parser=dummy1_parser,
+ )
raise AssertionError("exception not raised")
except TypeError:
pass
try:
update_registered_converter(
- ValidatorClassifier, 'CustomValidatorClassifier',
+ ValidatorClassifier,
+ "CustomValidatorClassifier",
validator_classifier_shape_calculator,
validator_classifier_converter,
- parser=dummy1_parser)
+ parser=dummy1_parser,
+ )
raise AssertionError("exception not raised")
except TypeError:
pass
@@ -223,10 +222,12 @@ def test_sub_estimator_exc(self):
try:
update_registered_converter(
- ValidatorClassifier, 'CustomValidatorClassifier',
+ ValidatorClassifier,
+ "CustomValidatorClassifier",
dummy_val_2,
validator_classifier_converter,
- parser=validator_classifier_parser)
+ parser=validator_classifier_parser,
+ )
raise AssertionError("exception not raised")
except TypeError:
pass
@@ -235,27 +236,31 @@ def test_sub_estimator_exc(self):
try:
update_registered_converter(
- ValidatorClassifier, 'CustomValidatorClassifier',
+ ValidatorClassifier,
+ "CustomValidatorClassifier",
validator_classifier_shape_calculator,
dummy_conv_1,
- parser=validator_classifier_parser)
+ parser=validator_classifier_parser,
+ )
raise AssertionError("exception not raised")
except NameError:
pass
try:
update_registered_converter(
- ValidatorClassifier, 'CustomValidatorClassifier',
+ ValidatorClassifier,
+ "CustomValidatorClassifier",
validator_classifier_shape_calculator,
dummy_conv_2,
- parser=validator_classifier_parser)
+ parser=validator_classifier_parser,
+ )
raise AssertionError("exception not raised")
except TypeError:
pass
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("1.0"),
- reason="Cast not available.")
+ pv.Version(ort_version) < pv.Version("1.0"), reason="Cast not available."
+ )
def test_sub_estimator(self):
data = load_iris()
X, y = data.data, data.target
@@ -265,24 +270,26 @@ def test_sub_estimator(self):
model.fit(X_train, y_train)
update_registered_converter(
- ValidatorClassifier, 'CustomValidatorClassifier',
+ ValidatorClassifier,
+ "CustomValidatorClassifier",
validator_classifier_shape_calculator,
validator_classifier_converter,
- parser=validator_classifier_parser)
+ parser=validator_classifier_parser,
+ )
X32 = X_test[:5].astype(np.float32)
- model_onnx = to_onnx(
- model, X32, target_opset=TARGET_OPSET)
- sess = InferenceSession(model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': X32})
+ model_onnx = to_onnx(model, X32, target_opset=TARGET_OPSET)
+ sess = InferenceSession(
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": X32})
assert_almost_equal(model.predict(X32), res[0])
assert_almost_equal(model.predict_proba(X32), res[1], decimal=4)
assert_almost_equal(model.validate(X32), res[2])
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("1.0"),
- reason="Cast not available.")
+ pv.Version(ort_version) < pv.Version("1.0"), reason="Cast not available."
+ )
def test_sub_sub_estimator(self):
data = load_iris()
X, y = data.data, data.target
@@ -292,17 +299,19 @@ def test_sub_sub_estimator(self):
model.fit(X_train, y_train)
update_registered_converter(
- MinMaxScalerTwo, "SubSubDummy",
+ MinMaxScalerTwo,
+ "SubSubDummy",
subsub_mmtwo_shape_calculator,
subsub_mmtwo_converter,
- parser=subsub_mmtwo_parser)
+ parser=subsub_mmtwo_parser,
+ )
X32 = X_test[:5].astype(np.float32)
- model_onnx = to_onnx(
- model, X32, target_opset=TARGET_OPSET)
- sess = InferenceSession(model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': X32})
+ model_onnx = to_onnx(model, X32, target_opset=TARGET_OPSET)
+ sess = InferenceSession(
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": X32})
assert_almost_equal(model.transform(X32), res[0], decimal=5)
diff --git a/tests/test_algebra_onnx_operators_wrapped.py b/tests/test_algebra_onnx_operators_wrapped.py
index 8e9a046bb..d0de8dfa1 100644
--- a/tests/test_algebra_onnx_operators_wrapped.py
+++ b/tests/test_algebra_onnx_operators_wrapped.py
@@ -17,8 +17,7 @@
class DecorrelateTransformer(TransformerMixin, BaseEstimator):
-
- def __init__(self, alpha=0.):
+ def __init__(self, alpha=0.0):
BaseEstimator.__init__(self)
TransformerMixin.__init__(self)
self.alpha = alpha
@@ -33,8 +32,7 @@ def transform(self, X):
class DecorrelateTransformer2(TransformerMixin, BaseEstimator):
-
- def __init__(self, alpha=0.):
+ def __init__(self, alpha=0.0):
BaseEstimator.__init__(self)
TransformerMixin.__init__(self)
self.alpha = alpha
@@ -76,97 +74,104 @@ def decorrelate_transformer_convertor2(scope, operator, container):
class TestOnnxOperatorsWrapped(unittest.TestCase):
-
- @unittest.skipIf(pv.Version(ortv) < pv.Version('0.5.0'),
- reason="onnxruntime too old")
+ @unittest.skipIf(
+ pv.Version(ortv) < pv.Version("0.5.0"), reason="onnxruntime too old"
+ )
def test_sub(self):
-
data = load_iris()
X = data.data
dec = DecorrelateTransformer()
dec.fit(X)
update_registered_converter(
- DecorrelateTransformer, "SklearnDecorrelateTransformer",
+ DecorrelateTransformer,
+ "SklearnDecorrelateTransformer",
decorrelate_transformer_shape_calculator,
- decorrelate_transformer_convertor)
+ decorrelate_transformer_convertor,
+ )
onx = to_onnx(dec, X.astype(np.float32), target_opset=TARGET_OPSET)
self.assertIn('output: "variable"', str(onx))
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
exp = dec.transform(X.astype(np.float32))
- got = sess.run(None, {'X': X.astype(np.float32)})[0]
+ got = sess.run(None, {"X": X.astype(np.float32)})[0]
assert_almost_equal(got, exp, decimal=4)
- @unittest.skipIf(pv.Version(ortv) < pv.Version('0.5.0'),
- reason="onnxruntime too old")
+ @unittest.skipIf(
+ pv.Version(ortv) < pv.Version("0.5.0"), reason="onnxruntime too old"
+ )
def test_sub_double(self):
-
data = load_iris()
X = data.data
dec = DecorrelateTransformer()
dec.fit(X)
update_registered_converter(
- DecorrelateTransformer, "SklearnDecorrelateTransformer",
+ DecorrelateTransformer,
+ "SklearnDecorrelateTransformer",
decorrelate_transformer_shape_calculator,
- decorrelate_transformer_convertor)
+ decorrelate_transformer_convertor,
+ )
onx = to_onnx(dec, X.astype(np.float64), target_opset=TARGET_OPSET)
self.assertIn('output: "variable"', str(onx))
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
exp = dec.transform(X.astype(np.float64))
- got = sess.run(None, {'X': X.astype(np.float64)})[0]
+ got = sess.run(None, {"X": X.astype(np.float64)})[0]
assert_almost_equal(got, exp, decimal=4)
- @unittest.skipIf(pv.Version(ortv) < pv.Version('0.5.0'),
- reason="onnxruntime too old")
+ @unittest.skipIf(
+ pv.Version(ortv) < pv.Version("0.5.0"), reason="onnxruntime too old"
+ )
def test_sub_output(self):
-
data = load_iris()
X = data.data
dec = DecorrelateTransformer2()
dec.fit(X)
update_registered_converter(
- DecorrelateTransformer2, "SklearnDecorrelateTransformer2",
+ DecorrelateTransformer2,
+ "SklearnDecorrelateTransformer2",
decorrelate_transformer_shape_calculator,
- decorrelate_transformer_convertor2)
+ decorrelate_transformer_convertor2,
+ )
onx = to_onnx(dec, X.astype(np.float32), target_opset=TARGET_OPSET)
self.assertIn('output: "variable"', str(onx))
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
exp = dec.transform(X.astype(np.float32))
- got = sess.run(None, {'X': X.astype(np.float32)})[0]
+ got = sess.run(None, {"X": X.astype(np.float32)})[0]
assert_almost_equal(got, exp, decimal=4)
- @unittest.skipIf(pv.Version(ortv) < pv.Version('0.5.0'),
- reason="onnxruntime too old")
+ @unittest.skipIf(
+ pv.Version(ortv) < pv.Version("0.5.0"), reason="onnxruntime too old"
+ )
def test_sub_output_double(self):
-
data = load_iris()
X = data.data
dec = DecorrelateTransformer2()
dec.fit(X)
update_registered_converter(
- DecorrelateTransformer2, "SklearnDecorrelateTransformer2",
+ DecorrelateTransformer2,
+ "SklearnDecorrelateTransformer2",
decorrelate_transformer_shape_calculator,
- decorrelate_transformer_convertor2)
+ decorrelate_transformer_convertor2,
+ )
onx = to_onnx(dec, X.astype(np.float64), target_opset=TARGET_OPSET)
self.assertIn('output: "variable"', str(onx))
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
exp = dec.transform(X.astype(np.float64))
- got = sess.run(None, {'X': X.astype(np.float64)})[0]
+ got = sess.run(None, {"X": X.astype(np.float64)})[0]
assert_almost_equal(got, exp, decimal=4)
diff --git a/tests/test_algebra_symbolic.py b/tests/test_algebra_symbolic.py
index 923bda0e7..f84a35974 100644
--- a/tests/test_algebra_symbolic.py
+++ b/tests/test_algebra_symbolic.py
@@ -5,179 +5,165 @@
import numpy
from numpy.random import rand
from numpy.testing import assert_almost_equal
+
try:
from onnxruntime.capi.onnxruntime_pybind11_state import InvalidGraph, Fail
except ImportError:
InvalidGraph = RuntimeError
Fail = RuntimeError
from skl2onnx.common.data_types import FloatTensorType
+
try:
from skl2onnx.algebra.onnx_ops import OnnxAbs, OnnxNormalizer, OnnxArgMin
from skl2onnx.algebra.onnx_ops import OnnxSplitApi18, OnnxScaler
except ImportError:
- warnings.warn(
- 'Unable to test OnnxAbs, OnnxNormalizer, OnnxArgMin, OnnxSplit.')
+ warnings.warn("Unable to test OnnxAbs, OnnxNormalizer, OnnxArgMin, OnnxSplit.")
OnnxAbs = None
from test_utils import TARGET_OPSET, InferenceSessionEx as InferenceSession
class TestAlgebraSymbolic(unittest.TestCase):
-
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
- @unittest.skipIf(OnnxAbs is None,
- reason="Cannot infer operators with current ONNX")
+ @unittest.skipIf(OnnxAbs is None, reason="Cannot infer operators with current ONNX")
def test_algebra_abs(self):
-
- op = OnnxAbs('I0', op_version=TARGET_OPSET)
- onx = op.to_onnx({'I0': numpy.empty((1, 2), dtype=numpy.float32)})
+ op = OnnxAbs("I0", op_version=TARGET_OPSET)
+ onx = op.to_onnx({"I0": numpy.empty((1, 2), dtype=numpy.float32)})
assert onx is not None
try:
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except RuntimeError as e:
raise RuntimeError("Unable to read\n{}".format(onx)) from e
X = numpy.array([[0, 1], [-1, -2]])
try:
- Y = sess.run(None, {'I0': X.astype(numpy.float32)})[0]
+ Y = sess.run(None, {"I0": X.astype(numpy.float32)})[0]
except RuntimeError as e:
raise RuntimeError("Unable to run\n{}".format(onx)) from e
assert_almost_equal(Y, numpy.abs(X))
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
- @unittest.skipIf(OnnxAbs is None,
- reason="shape inference fails for Normalizer")
+ @unittest.skipIf(OnnxAbs is None, reason="shape inference fails for Normalizer")
def test_algebra_normalizer(self):
- op = OnnxNormalizer('I0', norm='L1', op_version=1,
- output_names=['Y'])
- onx = op.to_onnx({'I0': numpy.ones((1, 2), dtype=numpy.float32)},
- outputs=[('Y', FloatTensorType())],
- target_opset={'': 10})
+ op = OnnxNormalizer("I0", norm="L1", op_version=1, output_names=["Y"])
+ onx = op.to_onnx(
+ {"I0": numpy.ones((1, 2), dtype=numpy.float32)},
+ outputs=[("Y", FloatTensorType())],
+ target_opset={"": 10},
+ )
assert onx is not None
sonx = str(onx)
assert "ai.onnx.ml" in sonx
assert "version: 1" in sonx
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
X = numpy.array([[0, 2], [0, -2]])
exp = numpy.array([[0, 1], [0, -1]])
- Y = sess.run(None, {'I0': X.astype(numpy.float32)})[0]
+ Y = sess.run(None, {"I0": X.astype(numpy.float32)})[0]
assert_almost_equal(exp, Y)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
- @unittest.skipIf(OnnxAbs is None,
- reason="Cannot infer operators with current ONNX")
+ @unittest.skipIf(OnnxAbs is None, reason="Cannot infer operators with current ONNX")
def test_algebra_normalizer_shape(self):
-
- op = OnnxNormalizer('I0', norm='L1', op_version=1, output_names=['O0'])
- onx = op.to_onnx({'I0': numpy.ones((1, 2), dtype=numpy.float32)},
- outputs=[('O0', FloatTensorType((None, 2)))])
+ op = OnnxNormalizer("I0", norm="L1", op_version=1, output_names=["O0"])
+ onx = op.to_onnx(
+ {"I0": numpy.ones((1, 2), dtype=numpy.float32)},
+ outputs=[("O0", FloatTensorType((None, 2)))],
+ )
assert onx is not None
sonx = str(onx)
assert "ai.onnx.ml" in sonx
assert "version: 1" in sonx
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
X = numpy.array([[0, 2], [0, -2]])
exp = numpy.array([[0, 1], [0, -1]])
- Y = sess.run(None, {'I0': X.astype(numpy.float32)})[0]
+ Y = sess.run(None, {"I0": X.astype(numpy.float32)})[0]
assert_almost_equal(exp, Y)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
- @unittest.skipIf(OnnxAbs is None,
- reason="Cannot infer operators with current ONNX")
+ @unittest.skipIf(OnnxAbs is None, reason="Cannot infer operators with current ONNX")
def test_algebra_argmin(self):
-
- op = OnnxArgMin('I0', op_version=TARGET_OPSET)
- onx = op.to_onnx({'I0': numpy.ones((1, 2), dtype=numpy.float32)})
+ op = OnnxArgMin("I0", op_version=TARGET_OPSET)
+ onx = op.to_onnx({"I0": numpy.ones((1, 2), dtype=numpy.float32)})
assert onx is not None
sonx = str(onx)
assert len(sonx) > 0
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
X = numpy.array([[0, 2], [0, -2]])
exp = numpy.array([[0, 1]])
- Y = sess.run(None, {'I0': X.astype(numpy.float32)})[0]
+ Y = sess.run(None, {"I0": X.astype(numpy.float32)})[0]
assert_almost_equal(exp, Y)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
- @unittest.skipIf(OnnxAbs is None,
- reason="Cannot infer operators with current ONNX")
+ @unittest.skipIf(OnnxAbs is None, reason="Cannot infer operators with current ONNX")
def test_algebra_normalizer_argmin_named_output(self):
-
op = OnnxArgMin(
- OnnxNormalizer('I0', norm='L1', output_names=['Y']),
- op_version=TARGET_OPSET)
- onx = op.to_onnx({'I0': numpy.ones((1, 2), dtype=numpy.float32)})
+ OnnxNormalizer("I0", norm="L1", output_names=["Y"]), op_version=TARGET_OPSET
+ )
+ onx = op.to_onnx({"I0": numpy.ones((1, 2), dtype=numpy.float32)})
assert onx is not None
sonx = str(onx)
assert len(sonx) > 0
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
X = numpy.array([[0, 2], [0, -2]])
exp = numpy.array([[0, 1]])
- Y = sess.run(None, {'I0': X.astype(numpy.float32)})[0]
+ Y = sess.run(None, {"I0": X.astype(numpy.float32)})[0]
assert_almost_equal(exp, Y)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
- @unittest.skipIf(OnnxAbs is None,
- reason="Cannot infer operators with current ONNX")
+ @unittest.skipIf(OnnxAbs is None, reason="Cannot infer operators with current ONNX")
def test_algebra_normalizer_argmin(self):
-
- op = OnnxArgMin(
- OnnxNormalizer(
- 'I0', norm='L1'),
- op_version=TARGET_OPSET)
- onx = op.to_onnx({'I0': numpy.ones((1, 2), dtype=numpy.float32)})
+ op = OnnxArgMin(OnnxNormalizer("I0", norm="L1"), op_version=TARGET_OPSET)
+ onx = op.to_onnx({"I0": numpy.ones((1, 2), dtype=numpy.float32)})
assert onx is not None
sonx = str(onx)
assert len(sonx) > 0
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
X = numpy.array([[0, 2], [0, -2]])
exp = numpy.array([[0, 1]])
- Y = sess.run(None, {'I0': X.astype(numpy.float32)})[0]
+ Y = sess.run(None, {"I0": X.astype(numpy.float32)})[0]
assert_almost_equal(exp, Y)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
- @unittest.skipIf(OnnxAbs is None,
- reason="Cannot infer operators with current ONNX")
+ @unittest.skipIf(OnnxAbs is None, reason="Cannot infer operators with current ONNX")
def test_algebra_split(self):
-
- op = OnnxSplitApi18('I0', axis=0, output_names=['O1', 'O2'],
- op_version=TARGET_OPSET)
- onx = op.to_onnx({'I0': numpy.arange(6, dtype=numpy.float32)})
+ op = OnnxSplitApi18(
+ "I0", axis=0, output_names=["O1", "O2"], op_version=TARGET_OPSET
+ )
+ onx = op.to_onnx({"I0": numpy.arange(6, dtype=numpy.float32)})
assert onx is not None
sonx = str(onx)
assert len(sonx) > 0
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
X = numpy.arange(6)
exp = [numpy.array([0, 1, 2]), numpy.array([3, 4, 5])]
- Y = sess.run(None, {'I0': X.astype(numpy.float32)})
+ Y = sess.run(None, {"I0": X.astype(numpy.float32)})
assert len(Y) == len(exp)
assert_almost_equal(exp[0], Y[0])
assert_almost_equal(exp[1], Y[1])
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
- @unittest.skipIf(OnnxAbs is None,
- reason="Cannot infer operators with current ONNX")
+ @unittest.skipIf(OnnxAbs is None, reason="Cannot infer operators with current ONNX")
def test_cascade_scaler(self):
-
- def generate_onnx_graph(dim, nbnode, input_name='X1'):
+ def generate_onnx_graph(dim, nbnode, input_name="X1"):
matrices = []
scale = list(numpy.ones((1, dim)).ravel())
i1 = input_name
@@ -188,26 +174,27 @@ def generate_onnx_graph(dim, nbnode, input_name='X1'):
i1 = node
i2 = list(rand(1, dim).ravel())
matrices.append(i2)
- node = OnnxScaler(
- i1, offset=i2, scale=scale, output_names=['Y'])
- onx = node.to_onnx([(input_name, FloatTensorType((None, dim)))],
- outputs=[('Y', FloatTensorType((None, dim)))])
+ node = OnnxScaler(i1, offset=i2, scale=scale, output_names=["Y"])
+ onx = node.to_onnx(
+ [(input_name, FloatTensorType((None, dim)))],
+ outputs=[("Y", FloatTensorType((None, dim)))],
+ )
return onx, matrices
import onnxruntime as ort
+
dim = 5
for nbnode in range(1, 4):
onx = generate_onnx_graph(dim, nbnode)[0]
X = rand(1, dim)
try:
sess = ort.InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except InvalidGraph as e:
- raise AssertionError(
- "Loading error:\n{}\n{}".format(e, onx)) from e
+ raise AssertionError("Loading error:\n{}\n{}".format(e, onx)) from e
try:
- Y = sess.run(None, {'X1': X.astype(numpy.float32)})[0]
+ Y = sess.run(None, {"X1": X.astype(numpy.float32)})[0]
except RuntimeError as e:
raise RuntimeError("Run error:\n{}\n{}".format(e, onx))
assert X.shape == Y.shape
diff --git a/tests/test_algebra_test_helper.py b/tests/test_algebra_test_helper.py
index 42b208f3f..e01e75e69 100644
--- a/tests/test_algebra_test_helper.py
+++ b/tests/test_algebra_test_helper.py
@@ -5,22 +5,30 @@
from skl2onnx.proto import onnx_proto
from skl2onnx.algebra.type_helper import _guess_type
from skl2onnx.common.data_types import (
- FloatTensorType, Int64TensorType,
- Int32TensorType, StringTensorType,
- BooleanTensorType, DoubleTensorType,
- Int8TensorType, UInt8TensorType,
- guess_data_type, guess_numpy_type, _guess_numpy_type,
- guess_proto_type, guess_tensor_type, _guess_type_proto)
+ FloatTensorType,
+ Int64TensorType,
+ Int32TensorType,
+ StringTensorType,
+ BooleanTensorType,
+ DoubleTensorType,
+ Int8TensorType,
+ UInt8TensorType,
+ guess_data_type,
+ guess_numpy_type,
+ _guess_numpy_type,
+ guess_proto_type,
+ guess_tensor_type,
+ _guess_type_proto,
+)
+
try:
- from skl2onnx.common.data_types import (
- Complex64TensorType, Complex128TensorType)
+ from skl2onnx.common.data_types import Complex64TensorType, Complex128TensorType
except ImportError:
Complex64TensorType = None
Complex128TensorType = None
class TestAlgebraTestHelper(unittest.TestCase):
-
def test_guess_type(self):
dtypes = [
(np.int32, Int32TensorType),
@@ -29,7 +37,7 @@ def test_guess_type(self):
(np.str_, StringTensorType),
(np.bool_, BooleanTensorType),
(np.int8, Int8TensorType),
- (np.uint8, UInt8TensorType)
+ (np.uint8, UInt8TensorType),
]
if Complex64TensorType is not None:
dtypes.append((np.complex64, Complex64TensorType))
@@ -47,12 +55,14 @@ def test_guess_type(self):
dtypes = [np.float64]
for dtype in dtypes:
mat = np.zeros((3, 3), dtype=dtype)
- _guess_type(mat, )
+ _guess_type(
+ mat,
+ )
def test_guess_data_type(self):
ty = guess_data_type(np.array([3, 5], dtype=np.int32))
self.assertEqual(len(ty), 1)
- self.assertEqual(ty[0][0], 'input')
+ self.assertEqual(ty[0][0], "input")
assert isinstance(ty[0][1], Int32TensorType)
ty = guess_data_type("tensor(int32)", shape=[3, 5])
@@ -93,7 +103,7 @@ def test_guess_numpy_type(self):
(np.str_, StringTensorType),
(np.bool_, BooleanTensorType),
(np.int8, Int8TensorType),
- (np.uint8, UInt8TensorType)
+ (np.uint8, UInt8TensorType),
]
if Complex64TensorType is not None:
dtypes.append((np.complex64, Complex64TensorType))
@@ -116,14 +126,16 @@ def test_proto_type(self):
(np.str_, StringTensorType, onnx_proto.TensorProto.STRING),
(np.bool_, BooleanTensorType, onnx_proto.TensorProto.BOOL),
(np.int8, Int8TensorType, onnx_proto.TensorProto.INT8),
- (np.uint8, UInt8TensorType, onnx_proto.TensorProto.UINT8)
+ (np.uint8, UInt8TensorType, onnx_proto.TensorProto.UINT8),
]
if Complex64TensorType is not None:
- dtypes.append((np.complex64, Complex64TensorType,
- onnx_proto.TensorProto.COMPLEX64))
+ dtypes.append(
+ (np.complex64, Complex64TensorType, onnx_proto.TensorProto.COMPLEX64)
+ )
if Complex128TensorType is not None:
- dtypes.append((np.complex128, Complex128TensorType,
- onnx_proto.TensorProto.COMPLEX128))
+ dtypes.append(
+ (np.complex128, Complex128TensorType, onnx_proto.TensorProto.COMPLEX128)
+ )
for dtype, exp, pt in dtypes:
nt2 = guess_proto_type(exp([None, 1]))
self.assertEqual(nt2, pt)
@@ -137,14 +149,16 @@ def test_tensor_type(self):
(np.float32, FloatTensorType, onnx_proto.TensorProto.FLOAT),
(np.float64, DoubleTensorType, onnx_proto.TensorProto.DOUBLE),
(np.int8, FloatTensorType, onnx_proto.TensorProto.INT8),
- (np.uint8, FloatTensorType, onnx_proto.TensorProto.UINT8)
+ (np.uint8, FloatTensorType, onnx_proto.TensorProto.UINT8),
]
if Complex64TensorType is not None:
- dtypes.append((np.complex64, Complex64TensorType,
- onnx_proto.TensorProto.COMPLEX64))
+ dtypes.append(
+ (np.complex64, Complex64TensorType, onnx_proto.TensorProto.COMPLEX64)
+ )
if Complex128TensorType is not None:
- dtypes.append((np.complex128, Complex128TensorType,
- onnx_proto.TensorProto.COMPLEX128))
+ dtypes.append(
+ (np.complex128, Complex128TensorType, onnx_proto.TensorProto.COMPLEX128)
+ )
for dtype, exp, pt in dtypes:
nt2 = guess_tensor_type(exp([None, 1]))
self.assertEqual(nt2.__class__, exp)
diff --git a/tests/test_algebra_to_onnx.py b/tests/test_algebra_to_onnx.py
index 21f7b6a85..0ea9d7f74 100644
--- a/tests/test_algebra_to_onnx.py
+++ b/tests/test_algebra_to_onnx.py
@@ -3,9 +3,14 @@
import numpy as np
from onnx.defs import onnx_opset_version
from onnxruntime import InferenceSession, __version__ as ort_version
+
try:
from onnxruntime.capi.onnxruntime_pybind11_state import (
- InvalidGraph, Fail, InvalidArgument, NotImplemented)
+ InvalidGraph,
+ Fail,
+ InvalidArgument,
+ NotImplemented,
+ )
except ImportError:
InvalidGraph = RuntimeError
InvalidArgument = RuntimeError
@@ -19,80 +24,84 @@
from sklearn.utils.testing import ignore_warnings
from sklearn.linear_model import LinearRegression, LogisticRegression
from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType
-from skl2onnx.algebra.onnx_ops import (
- OnnxAdd, OnnxLinearRegressor, OnnxIdentity)
+from skl2onnx.algebra.onnx_ops import OnnxAdd, OnnxLinearRegressor, OnnxIdentity
from skl2onnx.algebra.onnx_operator import OnnxSubEstimator
from skl2onnx.proto import get_latest_tested_opset_version
from test_utils import TARGET_OPSET
-ort_version = ort_version.split('+')[0]
+ort_version = ort_version.split("+")[0]
class TestOnnxOperatorsToOnnx(unittest.TestCase):
-
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
@ignore_warnings(category=DeprecationWarning)
def test_onnx_ml(self):
def generate_onnx_graph(opv):
- node = OnnxAdd(('X1', FloatTensorType()),
- np.array([0.1], dtype=np.float32),
- op_version=opv)
+ node = OnnxAdd(
+ ("X1", FloatTensorType()),
+ np.array([0.1], dtype=np.float32),
+ op_version=opv,
+ )
out = OnnxLinearRegressor(
- node, coefficients=[0.3, 0.3, 0.4, 0.5, 0.6],
- intercepts=[-50.], op_version=1)
- last = OnnxIdentity(out, output_names=['Y'], op_version=opv)
- onx = last.to_onnx([('X1', FloatTensorType((None, 5)))],
- outputs=[('Y', FloatTensorType())],
- target_opset=opv)
+ node,
+ coefficients=[0.3, 0.3, 0.4, 0.5, 0.6],
+ intercepts=[-50.0],
+ op_version=1,
+ )
+ last = OnnxIdentity(out, output_names=["Y"], op_version=opv)
+ onx = last.to_onnx(
+ [("X1", FloatTensorType((None, 5)))],
+ outputs=[("Y", FloatTensorType())],
+ target_opset=opv,
+ )
return onx, (node, out, last)
- for opv in [{'': 10}] + list(range(9, TARGET_OPSET + 1)):
+ for opv in [{"": 10}] + list(range(9, TARGET_OPSET + 1)):
with self.subTest(opv=opv):
if isinstance(opv, dict):
- if opv[''] > get_latest_tested_opset_version():
+ if opv[""] > get_latest_tested_opset_version():
continue
- elif (opv is not None and
- opv > get_latest_tested_opset_version()):
+ elif opv is not None and opv > get_latest_tested_opset_version():
continue
for i, nbnode in enumerate((1, 2, 3, 100)):
onx, nodes = generate_onnx_graph(opv=opv)
- if opv == {'': 10}:
+ if opv == {"": 10}:
for im in onx.opset_import:
if im.version > 10:
raise AssertionError(
- "Wrong final opset\nopv={}\n{}".format(
- opv, onx))
+ "Wrong final opset\nopv={}\n{}".format(opv, onx)
+ )
else:
for im in onx.opset_import:
if im.version > opv:
raise AssertionError(
- "Wrong final opset\nopv={}\n{}".format(
- opv, onx))
+ "Wrong final opset\nopv={}\n{}".format(opv, onx)
+ )
as_string = onx.SerializeToString()
try:
ort = InferenceSession(
- as_string,
- providers=["CPUExecutionProvider"])
+ as_string, providers=["CPUExecutionProvider"]
+ )
except (InvalidGraph, InvalidArgument) as e:
- if (isinstance(opv, dict) and
- opv[''] >= onnx_opset_version()):
+ if isinstance(opv, dict) and opv[""] >= onnx_opset_version():
continue
- if (isinstance(opv, int) and
- opv >= onnx_opset_version()):
+ if isinstance(opv, int) and opv >= onnx_opset_version():
continue
raise AssertionError(
- "Unable to load opv={}\n---\n{}\n---".format(
- opv, onx)) from e
+ "Unable to load opv={}\n---\n{}\n---".format(opv, onx)
+ ) from e
X = (np.ones((1, 5)) * nbnode).astype(np.float32)
- res_out = ort.run(None, {'X1': X})
+ res_out = ort.run(None, {"X1": X})
assert len(res_out) == 1
res = res_out[0]
self.assertEqual(res.shape, (1, 1))
inputs = None
- expected = [[('Ad_C0', FloatTensorType(shape=[]))],
- [('Li_Y0', FloatTensorType(shape=[]))],
- [('Y', FloatTensorType(shape=[]))]]
+ expected = [
+ [("Ad_C0", FloatTensorType(shape=[]))],
+ [("Li_Y0", FloatTensorType(shape=[]))],
+ [("Y", FloatTensorType(shape=[]))],
+ ]
for i, node in enumerate(nodes):
shape = node.get_output_type_inference(inputs)
self.assertEqual(len(shape), 1)
@@ -101,71 +110,70 @@ def generate_onnx_graph(opv):
else:
self.assertEqual(
str(expected[i]),
- str([(shape[0].onnx_name, shape[0].type)]))
+ str([(shape[0].onnx_name, shape[0].type)]),
+ )
inputs = shape
- def common_test_sub_graph(self, first_input, model, options=None,
- cls_type=FloatTensorType, start=9):
+ def common_test_sub_graph(
+ self, first_input, model, options=None, cls_type=FloatTensorType, start=9
+ ):
def generate_onnx_graph(opv):
dtype = np.float32 if cls_type == FloatTensorType else np.float64
- node = OnnxAdd(first_input, np.array([0.1], dtype=dtype),
- op_version=opv)
+ node = OnnxAdd(first_input, np.array([0.1], dtype=dtype), op_version=opv)
lr = model()
lr.fit(np.ones([10, 5]), np.arange(0, 10) % 3)
out = OnnxSubEstimator(lr, node, op_version=1, options=options)
if model == LogisticRegression:
- last = OnnxIdentity(out[1], output_names=['Y'], op_version=opv)
+ last = OnnxIdentity(out[1], output_names=["Y"], op_version=opv)
else:
- last = OnnxIdentity(out, output_names=['Y'], op_version=opv)
- onx = last.to_onnx([('X1', cls_type((None, 5)))],
- outputs=[('Y', cls_type())],
- target_opset=opv)
+ last = OnnxIdentity(out, output_names=["Y"], op_version=opv)
+ onx = last.to_onnx(
+ [("X1", cls_type((None, 5)))],
+ outputs=[("Y", cls_type())],
+ target_opset=opv,
+ )
return onx
dtype = np.float32 if cls_type == FloatTensorType else np.float64
opsets = list(range(start, TARGET_OPSET + 1))
- for opv in [{'': TARGET_OPSET}] + opsets:
+ for opv in [{"": TARGET_OPSET}] + opsets:
with self.subTest(opv=opv):
if isinstance(opv, dict):
- if opv[''] > get_latest_tested_opset_version():
+ if opv[""] > get_latest_tested_opset_version():
continue
- elif (opv is not None and
- opv > get_latest_tested_opset_version()):
+ elif opv is not None and opv > get_latest_tested_opset_version():
continue
for i, nbnode in enumerate((1, 2, 3, 100)):
onx = generate_onnx_graph(opv=opv)
- if opv == {'': TARGET_OPSET}:
+ if opv == {"": TARGET_OPSET}:
for im in onx.opset_import:
if im.version > TARGET_OPSET:
raise AssertionError(
- "Wrong final opset\nopv={}\n{}".format(
- opv, onx))
+ "Wrong final opset\nopv={}\n{}".format(opv, onx)
+ )
else:
for im in onx.opset_import:
if im.version > opv:
raise AssertionError(
- "Wrong final opset\nopv={}\n{}".format(
- opv, onx))
- self.assertNotIn('zipmap', str(onx).lower())
+ "Wrong final opset\nopv={}\n{}".format(opv, onx)
+ )
+ self.assertNotIn("zipmap", str(onx).lower())
as_string = onx.SerializeToString()
try:
ort = InferenceSession(
- as_string,
- providers=["CPUExecutionProvider"])
- except (InvalidGraph, InvalidArgument, Fail,
- NotImplemented) as e:
- if (isinstance(opv, dict) and
- opv[''] >= onnx_opset_version()):
+ as_string, providers=["CPUExecutionProvider"]
+ )
+ except (InvalidGraph, InvalidArgument, Fail, NotImplemented) as e:
+ if isinstance(opv, dict) and opv[""] >= onnx_opset_version():
continue
- if (isinstance(opv, int) and
- opv >= onnx_opset_version()):
+ if isinstance(opv, int) and opv >= onnx_opset_version():
continue
raise AssertionError(
- "Unable to load opv={}\n---\n{}\n---".format(
- opv, onx)) from e
+ "Unable to load opv={}\n---\n{}\n---".format(opv, onx)
+ ) from e
X = (np.ones((1, 5)) * nbnode).astype(dtype)
- res_out = ort.run(None, {'X1': X})
+ res_out = ort.run(None, {"X1": X})
assert len(res_out) == 1
res = res_out[0]
if model == LogisticRegression:
@@ -176,72 +184,78 @@ def generate_onnx_graph(opv):
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
@ignore_warnings(category=DeprecationWarning)
def test_sub_graph_tuple(self):
- self.common_test_sub_graph(
- ('X1', FloatTensorType()), LinearRegression)
+ self.common_test_sub_graph(("X1", FloatTensorType()), LinearRegression)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("1.4.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("1.4.0"), reason="not available"
+ )
@ignore_warnings(category=DeprecationWarning)
def test_sub_graph_tuple_double(self):
self.common_test_sub_graph(
- ('X1', DoubleTensorType()), LinearRegression,
- cls_type=DoubleTensorType)
+ ("X1", DoubleTensorType()), LinearRegression, cls_type=DoubleTensorType
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
@ignore_warnings(category=DeprecationWarning)
def test_sub_graph_str(self):
- self.common_test_sub_graph('X1', LinearRegression)
+ self.common_test_sub_graph("X1", LinearRegression)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("1.4.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("1.4.0"), reason="not available"
+ )
@ignore_warnings(category=DeprecationWarning)
def test_sub_graph_str_double(self):
- self.common_test_sub_graph('X1', LinearRegression,
- cls_type=DoubleTensorType)
+ self.common_test_sub_graph("X1", LinearRegression, cls_type=DoubleTensorType)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
@ignore_warnings(category=DeprecationWarning)
def test_sub_graph_tuple_cls(self):
self.common_test_sub_graph(
- ('X1', FloatTensorType()), LogisticRegression,
- {'zipmap': False})
+ ("X1", FloatTensorType()), LogisticRegression, {"zipmap": False}
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("1.4.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("1.4.0"), reason="not available"
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version("1.10.0"),
- reason="ArgMax not available for double")
+ reason="ArgMax not available for double",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_sub_graph_tuple_cls_double(self):
self.common_test_sub_graph(
- ('X1', DoubleTensorType()), LogisticRegression,
- options={'zipmap': False}, cls_type=DoubleTensorType,
- start=13)
+ ("X1", DoubleTensorType()),
+ LogisticRegression,
+ options={"zipmap": False},
+ cls_type=DoubleTensorType,
+ start=13,
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
@ignore_warnings(category=DeprecationWarning)
def test_sub_graph_str_cls(self):
- self.common_test_sub_graph('X1', LogisticRegression,
- {'zipmap': False})
+ self.common_test_sub_graph("X1", LogisticRegression, {"zipmap": False})
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("1.4.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("1.4.0"), reason="not available"
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version("1.10.0"),
- reason="ArgMax not available for double")
+ reason="ArgMax not available for double",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_sub_graph_str_cls_double(self):
self.common_test_sub_graph(
- 'X1', LogisticRegression, options={'zipmap': False},
- cls_type=DoubleTensorType, start=13)
+ "X1",
+ LogisticRegression,
+ options={"zipmap": False},
+ cls_type=DoubleTensorType,
+ start=13,
+ )
if __name__ == "__main__":
diff --git a/tests/test_convert.py b/tests/test_convert.py
index 098be8c8a..f672ebfa5 100644
--- a/tests/test_convert.py
+++ b/tests/test_convert.py
@@ -6,6 +6,7 @@
import numpy
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.cluster import KMeans
+
try:
from sklearn.preprocessing import KBinsDiscretizer
except ImportError:
@@ -18,24 +19,21 @@
def get_domain_opset(onx):
domains = onx.opset_import
- res = [{'domain': dom.domain, 'version': dom.version}
- for dom in domains]
- return {d['domain']: d['version'] for d in res}
+ res = [{"domain": dom.domain, "version": dom.version} for dom in domains]
+ return {d["domain"]: d["version"] for d in res}
class TestConvert(unittest.TestCase):
-
def test_target_opset(self):
data = load_iris()
X = data.data
model = KMeans(n_clusters=3)
model.fit(X)
for i in range(1, TARGET_OPSET + 1):
- model_onnx = to_onnx(model, X[:1].astype(numpy.float32),
- target_opset=i)
+ model_onnx = to_onnx(model, X[:1].astype(numpy.float32), target_opset=i)
dom = get_domain_opset(model_onnx)
self.assertEqual(len(dom), 1)
- assert dom[''] <= i
+ assert dom[""] <= i
def test_target_opset_dict(self):
data = load_iris()
@@ -44,12 +42,13 @@ def test_target_opset_dict(self):
model.fit(X)
for i in range(1, TARGET_OPSET + 1):
for j in (1, 2):
- tops = {'': i, 'ai.onnx.ml': j}
- model_onnx = to_onnx(model, X[:1].astype(numpy.float32),
- target_opset=tops)
+ tops = {"": i, "ai.onnx.ml": j}
+ model_onnx = to_onnx(
+ model, X[:1].astype(numpy.float32), target_opset=tops
+ )
dom = get_domain_opset(model_onnx)
self.assertEqual(len(dom), 1)
- assert dom[''] <= i
+ assert dom[""] <= i
@unittest.skipIf(KBinsDiscretizer is None, "skl too old")
def test_target_opset_dict_kbins(self):
@@ -59,15 +58,16 @@ def test_target_opset_dict_kbins(self):
model.fit(X)
for i in range(9, TARGET_OPSET + 1):
for j in (1, 2):
- tops = {'': i, 'ai.onnx.ml': j}
- model_onnx = to_onnx(model, X[:1].astype(numpy.float32),
- target_opset=tops)
+ tops = {"": i, "ai.onnx.ml": j}
+ model_onnx = to_onnx(
+ model, X[:1].astype(numpy.float32), target_opset=tops
+ )
dom = get_domain_opset(model_onnx)
- if dom != {'ai.onnx.ml': 1, '': i}:
- assert dom[''] <= i
- assert dom['ai.onnx.ml'] == 1
+ if dom != {"ai.onnx.ml": 1, "": i}:
+ assert dom[""] <= i
+ assert dom["ai.onnx.ml"] == 1
continue
- self.assertEqual(dom, {'ai.onnx.ml': 1, '': i})
+ self.assertEqual(dom, {"ai.onnx.ml": 1, "": i})
def test_regressor(self):
data = load_iris()
@@ -77,43 +77,42 @@ def test_regressor(self):
model.fit(X, y)
for i in range(9, TARGET_OPSET + 1):
for j in (1, 2):
- tops = {'': i, 'ai.onnx.ml': j}
- model_onnx = to_onnx(model, X[:1].astype(numpy.float32),
- target_opset=tops)
+ tops = {"": i, "ai.onnx.ml": j}
+ model_onnx = to_onnx(
+ model, X[:1].astype(numpy.float32), target_opset=tops
+ )
dom = get_domain_opset(model_onnx)
self.assertEqual(len(dom), 1)
- self.assertIn(dom[''], (i, i - 1))
+ self.assertIn(dom[""], (i, i - 1))
def test_onehot(self):
try:
- model = OneHotEncoder(categories='auto')
+ model = OneHotEncoder(categories="auto")
except TypeError:
# parameter categories added in 0.20
return
- data = numpy.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]],
- dtype=numpy.int64)
+ data = numpy.array(
+ [[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=numpy.int64
+ )
model.fit(data)
for i in range(9, TARGET_OPSET + 1):
for j in (1, 2):
- tops = {'': i, 'ai.onnx.ml': j}
- model_onnx = to_onnx(model, data[:1],
- target_opset=tops)
+ tops = {"": i, "ai.onnx.ml": j}
+ model_onnx = to_onnx(model, data[:1], target_opset=tops)
dom = get_domain_opset(model_onnx)
self.assertEqual(len(dom), 2)
- self.assertIn(dom[''], list(range(9, TARGET_OPSET + 1)))
- self.assertEqual(dom['ai.onnx.ml'], 1)
+ self.assertIn(dom[""], list(range(9, TARGET_OPSET + 1)))
+ self.assertEqual(dom["ai.onnx.ml"], 1)
def test_label_encoder(self):
model = LabelEncoder()
- data = numpy.array([1.2, 3.4, 5.4, 1.2],
- dtype=numpy.float32)
+ data = numpy.array([1.2, 3.4, 5.4, 1.2], dtype=numpy.float32)
model.fit(data)
for i in range(9, TARGET_OPSET + 1):
for j in (1, 2):
- tops = {'': i, 'ai.onnx.ml': j}
+ tops = {"": i, "ai.onnx.ml": j}
try:
- model_onnx = to_onnx(model, data[:1],
- target_opset=tops)
+ model_onnx = to_onnx(model, data[:1], target_opset=tops)
except RuntimeError as e:
if j == 1:
# expected
@@ -123,7 +122,7 @@ def test_label_encoder(self):
raise AssertionError("It should fail for opset.ml == 1")
dom = get_domain_opset(model_onnx)
self.assertEqual(len(dom), 2)
- self.assertEqual(dom['ai.onnx.ml'], 2)
+ self.assertEqual(dom["ai.onnx.ml"], 2)
def test_warnings(self):
with warnings.catch_warnings(record=True) as w:
@@ -143,35 +142,46 @@ def test_name(self):
model.fit(X)
with self.assertRaises(TypeError):
- to_onnx(model, X[:1].astype(numpy.float32),
- target_opset=TARGET_OPSET, naming=(2, 3))
-
- model_onnx = to_onnx(model, X[:1].astype(numpy.float32),
- target_opset=TARGET_OPSET, naming='KBINS')
+ to_onnx(
+ model,
+ X[:1].astype(numpy.float32),
+ target_opset=TARGET_OPSET,
+ naming=(2, 3),
+ )
+
+ model_onnx = to_onnx(
+ model,
+ X[:1].astype(numpy.float32),
+ target_opset=TARGET_OPSET,
+ naming="KBINS",
+ )
inputs = set(i.name for i in model_onnx.graph.input)
outputs = set(o.name for o in model_onnx.graph.output)
for node in model_onnx.graph.node:
for i in node.input:
- if i not in inputs and not i.startswith('KBINS'):
+ if i not in inputs and not i.startswith("KBINS"):
raise AssertionError("Wrong %r." % i)
for o in node.output:
- if o not in outputs and not o.startswith('KBINS'):
+ if o not in outputs and not o.startswith("KBINS"):
raise AssertionError("Wrong %r." % o)
- model_onnx = to_onnx(model, X[:1].astype(numpy.float32),
- target_opset=TARGET_OPSET,
- naming=lambda n, ns: 'FBINS' + n)
+ model_onnx = to_onnx(
+ model,
+ X[:1].astype(numpy.float32),
+ target_opset=TARGET_OPSET,
+ naming=lambda n, ns: "FBINS" + n,
+ )
inputs = set(i.name for i in model_onnx.graph.input)
outputs = set(o.name for o in model_onnx.graph.output)
for node in model_onnx.graph.node:
for i in node.input:
- if i not in inputs and not i.startswith('FBINS'):
+ if i not in inputs and not i.startswith("FBINS"):
raise AssertionError("Wrong %r." % i)
for o in node.output:
- if o not in outputs and not o.startswith('FBINS'):
+ if o not in outputs and not o.startswith("FBINS"):
raise AssertionError("Wrong %r." % o)
- self.assertEqual(inputs, {'X'})
- self.assertEqual(outputs, {'variable'})
+ self.assertEqual(inputs, {"X"})
+ self.assertEqual(outputs, {"variable"})
if __name__ == "__main__":
diff --git a/tests/test_convert_options.py b/tests/test_convert_options.py
index 7015f4408..77c6ca04e 100644
--- a/tests/test_convert_options.py
+++ b/tests/test_convert_options.py
@@ -13,6 +13,7 @@
from sklearn.multioutput import MultiOutputClassifier
from sklearn.tree import DecisionTreeClassifier
from skl2onnx import to_onnx
+
try:
from sklearn.utils._testing import ignore_warnings
except ImportError:
@@ -21,11 +22,10 @@
from test_utils import TARGET_OPSET, InferenceSessionEx as InferenceSession
-sklver = '.'.join(sklver.split('.')[:2])
+sklver = ".".join(sklver.split(".")[:2])
class TestConvertOptions(unittest.TestCase):
-
@staticmethod
def get_model_classifiers():
models = [
@@ -67,119 +67,146 @@ def dict_to_array(proba_as_dict):
@staticmethod
def almost_equal(
- expected_label, expected_proba,
- label, probas, zipmap=False, decimal=5):
+ expected_label, expected_proba, label, probas, zipmap=False, decimal=5
+ ):
if expected_label.tolist() != label.tolist():
raise AssertionError(
- "Label mismatch %r (expected) != %r." % (
- expected_label.tolist(),
- label.tolist()))
+ "Label mismatch %r (expected) != %r."
+ % (expected_label.tolist(), label.tolist())
+ )
if zipmap:
- raise AssertionError(
- "zipmap should be False, not %r." % zipmap)
+ raise AssertionError("zipmap should be False, not %r." % zipmap)
assert_almost_equal(expected_proba, probas, decimal=decimal)
@staticmethod
def almost_equal_class_labels(
- expected_label, expected_proba, expected_class_labels,
- label, probas, class_labels,
- zipmap=False, decimal=5):
+ expected_label,
+ expected_proba,
+ expected_class_labels,
+ label,
+ probas,
+ class_labels,
+ zipmap=False,
+ decimal=5,
+ ):
if expected_class_labels.tolist() != class_labels.tolist():
raise AssertionError(
- "Class labels mismatch %r (expected) != %r." % (
- expected_class_labels.tolist(),
- class_labels.tolist()))
+ "Class labels mismatch %r (expected) != %r."
+ % (expected_class_labels.tolist(), class_labels.tolist())
+ )
if expected_label.tolist() != label.tolist():
raise AssertionError(
- "Label mismatch %r (expected) != %r." % (
- expected_label.tolist(),
- label.tolist()))
+ "Label mismatch %r (expected) != %r."
+ % (expected_label.tolist(), label.tolist())
+ )
if zipmap:
- raise AssertionError(
- "zipmap should be False, not %r." % zipmap)
+ raise AssertionError("zipmap should be False, not %r." % zipmap)
assert_almost_equal(expected_proba, probas, decimal=decimal)
def classifier_option_output_class_labels(self, use_string):
data = load_iris()
X, y = data.data, data.target
if use_string:
- y = ['cl%d' % _ for _ in y]
+ y = ["cl%d" % _ for _ in y]
X = X.astype(numpy.float32)
- X_train, X_test, y_train, y_test = train_test_split(
- X, y, random_state=42)
+ X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
for zipmap, addcl in [(False, True), (False, False)]:
for cls in TestConvertOptions.get_model_classifiers():
- with self.subTest(cls=cls.__class__.__name__, zipmap=zipmap,
- output_class_labels=addcl):
+ with self.subTest(
+ cls=cls.__class__.__name__, zipmap=zipmap, output_class_labels=addcl
+ ):
cls.fit(X_train, y_train)
expected_label = cls.predict(X_test)
expected_proba = cls.predict_proba(X_test)
onx = to_onnx(
- cls, X[:1], options={
- 'zipmap': zipmap, 'output_class_labels': addcl},
- target_opset=TARGET_OPSET)
+ cls,
+ X[:1],
+ options={"zipmap": zipmap, "output_class_labels": addcl},
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': X_test})
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": X_test})
if addcl:
TestConvertOptions.almost_equal_class_labels(
- expected_label, expected_proba, cls.classes_,
- *got, zipmap=zipmap)
+ expected_label,
+ expected_proba,
+ cls.classes_,
+ *got,
+ zipmap=zipmap
+ )
else:
TestConvertOptions.almost_equal(
- expected_label, expected_proba,
- *got, zipmap=zipmap)
+ expected_label, expected_proba, *got, zipmap=zipmap
+ )
onx = to_onnx(
- cls, X[:1],
- options={cls.__class__: {
- 'zipmap': zipmap, 'output_class_labels': addcl}},
- target_opset=TARGET_OPSET)
+ cls,
+ X[:1],
+ options={
+ cls.__class__: {
+ "zipmap": zipmap,
+ "output_class_labels": addcl,
+ }
+ },
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': X_test})
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": X_test})
if addcl:
TestConvertOptions.almost_equal_class_labels(
- expected_label, expected_proba, cls.classes_,
- *got, zipmap=zipmap)
+ expected_label,
+ expected_proba,
+ cls.classes_,
+ *got,
+ zipmap=zipmap
+ )
else:
TestConvertOptions.almost_equal(
- expected_label, expected_proba,
- *got, zipmap=zipmap)
+ expected_label, expected_proba, *got, zipmap=zipmap
+ )
onx = to_onnx(
- cls, X[:1],
- options={id(cls): {
- 'zipmap': zipmap, 'output_class_labels': addcl}},
- target_opset=TARGET_OPSET)
+ cls,
+ X[:1],
+ options={
+ id(cls): {"zipmap": zipmap, "output_class_labels": addcl}
+ },
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': X_test})
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": X_test})
if addcl:
TestConvertOptions.almost_equal_class_labels(
- expected_label, expected_proba, cls.classes_,
- *got, zipmap=zipmap)
+ expected_label,
+ expected_proba,
+ cls.classes_,
+ *got,
+ zipmap=zipmap
+ )
else:
TestConvertOptions.almost_equal(
- expected_label, expected_proba,
- *got, zipmap=zipmap)
+ expected_label, expected_proba, *got, zipmap=zipmap
+ )
- @unittest.skipIf(pv.Version(sklver) < pv.Version("0.24"),
- reason="known issue with string")
- @ignore_warnings(category=(FutureWarning, ConvergenceWarning,
- DeprecationWarning))
+ @unittest.skipIf(
+ pv.Version(sklver) < pv.Version("0.24"), reason="known issue with string"
+ )
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
def test_classifier_option_output_class_labels_int64(self):
self.classifier_option_output_class_labels(False)
- @unittest.skipIf(pv.Version(sklver) < pv.Version("0.24"),
- reason="known issue with string")
- @ignore_warnings(category=(FutureWarning, ConvergenceWarning,
- DeprecationWarning))
+ @unittest.skipIf(
+ pv.Version(sklver) < pv.Version("0.24"), reason="known issue with string"
+ )
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
def test_classifier_option_output_class_labels_str(self):
self.classifier_option_output_class_labels(True)
@@ -191,28 +218,29 @@ def get_model_multi_label():
return models
@staticmethod
- def almost_equal_multi(expected_label, expected_proba, label, *probas,
- zipmap=False, decimal=5):
+ def almost_equal_multi(
+ expected_label, expected_proba, label, *probas, zipmap=False, decimal=5
+ ):
assert_almost_equal(expected_label, label)
- if zipmap == 'columns':
+ if zipmap == "columns":
for row, pr in zip(expected_proba.T, probas):
- assert_almost_equal(
- row.ravel(), pr.ravel(), decimal=decimal)
+ assert_almost_equal(row.ravel(), pr.ravel(), decimal=decimal)
elif zipmap:
for expected, proba in zip(expected_proba, probas):
assert_almost_equal(
expected_proba,
TestConvertOptions.dict_to_array(proba),
- decimal=decimal)
+ decimal=decimal,
+ )
else:
proba = probas[0]
assert_almost_equal(expected_proba, proba, decimal=decimal)
- @unittest.skipIf(pv.Version(sklver) < pv.Version("0.24"),
- reason="known issue with string")
- @ignore_warnings(category=(FutureWarning, ConvergenceWarning,
- DeprecationWarning))
+ @unittest.skipIf(
+ pv.Version(sklver) < pv.Version("0.24"), reason="known issue with string"
+ )
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
def test_multi_label_option_zipmap(self):
data = load_iris()
X, y = data.data, data.target
@@ -221,22 +249,30 @@ def test_multi_label_option_zipmap(self):
y[0, :] = 1
X_train, X_test, y_train, y_test = train_test_split(X, y)
- for zipmap in [False, True, 'columns']:
+ for zipmap in [False, True, "columns"]:
for cls in TestConvertOptions.get_model_multi_label():
with self.subTest(cls=cls.__class__, zipmap=zipmap):
cls.fit(X_train, y_train)
expected_label = cls.predict(X_test)
expected_proba = cls.predict_proba(X_test)
- if zipmap == 'columns':
+ if zipmap == "columns":
# Not implemented.
with self.assertRaises(ValueError):
- to_onnx(cls, X[:1], options={'zipmap': zipmap},
- target_opset=TARGET_OPSET)
+ to_onnx(
+ cls,
+ X[:1],
+ options={"zipmap": zipmap},
+ target_opset=TARGET_OPSET,
+ )
continue
- onx = to_onnx(cls, X[:1], options={'zipmap': zipmap},
- target_opset=TARGET_OPSET)
+ onx = to_onnx(
+ cls,
+ X[:1],
+ options={"zipmap": zipmap},
+ target_opset=TARGET_OPSET,
+ )
if zipmap:
# The converter works but SequenceConstruct
@@ -244,52 +280,58 @@ def test_multi_label_option_zipmap(self):
continue
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': X_test})
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": X_test})
TestConvertOptions.almost_equal_multi(
- expected_label, expected_proba, *got, zipmap=zipmap)
+ expected_label, expected_proba, *got, zipmap=zipmap
+ )
onx = to_onnx(
- cls, X[:1],
- options={cls.__class__: {'zipmap': zipmap}},
- target_opset=TARGET_OPSET)
+ cls,
+ X[:1],
+ options={cls.__class__: {"zipmap": zipmap}},
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': X_test})
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": X_test})
assert_almost_equal(expected_label, got[0])
onx = to_onnx(
- cls, X[:1],
- options={id(cls): {'zipmap': zipmap}},
- target_opset=TARGET_OPSET)
+ cls,
+ X[:1],
+ options={id(cls): {"zipmap": zipmap}},
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': X_test})
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": X_test})
assert_almost_equal(expected_label, got[0])
@staticmethod
def almost_equal_multi_labels(
- expected_label, expected_proba, expected_class_labels,
- *probas, decimal=5):
+ expected_label, expected_proba, expected_class_labels, *probas, decimal=5
+ ):
if expected_label.tolist() != probas[0].tolist():
raise AssertionError(
- "Labels mismatched %r != %r." % (
- expected_label.tolist(), probas[0].tolist()))
+ "Labels mismatched %r != %r."
+ % (expected_label.tolist(), probas[0].tolist())
+ )
for pr1, pr2 in zip(expected_proba, probas[1]):
assert_almost_equal(pr1, pr2, decimal=decimal)
for la1, la2 in zip(expected_class_labels, probas[2]):
if la1.tolist() != la2.tolist():
raise AssertionError(
- "Class labels mismatched %r != %r." % (
- la1.tolist(), la2.tolist()))
+ "Class labels mismatched %r != %r." % (la1.tolist(), la2.tolist())
+ )
- @unittest.skipIf(pv.Version(sklver) < pv.Version("0.24"),
- reason="known issue with string")
- @ignore_warnings(category=(FutureWarning, ConvergenceWarning,
- DeprecationWarning))
+ @unittest.skipIf(
+ pv.Version(sklver) < pv.Version("0.24"), reason="known issue with string"
+ )
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
def test_multi_label_option_zipmap_class_labels(self):
data = load_iris()
X, y = data.data, data.target
@@ -305,48 +347,47 @@ def test_multi_label_option_zipmap_class_labels(self):
expected_label = cls.predict(X_test)
expected_proba = cls.predict_proba(X_test)
expected_class_labels = [c.classes_ for c in cls.estimators_]
- opts = {'zipmap': False, 'output_class_labels': True}
+ opts = {"zipmap": False, "output_class_labels": True}
- onx = to_onnx(cls, X[:1], options=opts,
- target_opset=TARGET_OPSET)
+ onx = to_onnx(cls, X[:1], options=opts, target_opset=TARGET_OPSET)
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': X_test})
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": X_test})
self.assertEqual(len(got), 3)
TestConvertOptions.almost_equal_multi_labels(
- expected_label, expected_proba, expected_class_labels,
- *got)
+ expected_label, expected_proba, expected_class_labels, *got
+ )
onx = to_onnx(
- cls, X[:1], options={cls.__class__: opts},
- target_opset=TARGET_OPSET)
+ cls, X[:1], options={cls.__class__: opts}, target_opset=TARGET_OPSET
+ )
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': X_test})
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": X_test})
self.assertEqual(len(got), 3)
TestConvertOptions.almost_equal_multi_labels(
- expected_label, expected_proba, expected_class_labels,
- *got)
+ expected_label, expected_proba, expected_class_labels, *got
+ )
onx = to_onnx(
- cls, X[:1], options={id(cls): opts},
- target_opset=TARGET_OPSET)
+ cls, X[:1], options={id(cls): opts}, target_opset=TARGET_OPSET
+ )
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': X_test})
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": X_test})
self.assertEqual(len(got), 3)
TestConvertOptions.almost_equal_multi_labels(
- expected_label, expected_proba, expected_class_labels,
- *got)
+ expected_label, expected_proba, expected_class_labels, *got
+ )
- @unittest.skipIf(pv.Version(sklver) < pv.Version("0.24"),
- reason="known issue with string")
- @ignore_warnings(category=(FutureWarning, ConvergenceWarning,
- DeprecationWarning))
+ @unittest.skipIf(
+ pv.Version(sklver) < pv.Version("0.24"), reason="known issue with string"
+ )
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
def test_multi_label_option_zipmap_class_labels_string(self):
data = load_iris()
X, y = data.data, data.target
@@ -354,8 +395,7 @@ def test_multi_label_option_zipmap_class_labels_string(self):
y = numpy.vstack([y, 1 - y]).T
y[0, :] = 1
y[:10, 1] = 3
- y = numpy.array(list(map(
- lambda s: "cl%d" % s, y.ravel()))).reshape(y.shape)
+ y = numpy.array(list(map(lambda s: "cl%d" % s, y.ravel()))).reshape(y.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y)
for cls in TestConvertOptions.get_model_multi_label():
@@ -364,44 +404,43 @@ def test_multi_label_option_zipmap_class_labels_string(self):
expected_label = cls.predict(X_test)
expected_proba = cls.predict_proba(X_test)
expected_class_labels = [c.classes_ for c in cls.estimators_]
- opts = {'zipmap': False, 'output_class_labels': True}
+ opts = {"zipmap": False, "output_class_labels": True}
- onx = to_onnx(cls, X[:1], options=opts,
- target_opset=TARGET_OPSET)
+ onx = to_onnx(cls, X[:1], options=opts, target_opset=TARGET_OPSET)
# with open("debugmo2.onnx", "wb") as f:
# f.write(onx.SerializeToString())
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': X_test})
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": X_test})
self.assertEqual(len(got), 3)
TestConvertOptions.almost_equal_multi_labels(
- expected_label, expected_proba, expected_class_labels,
- *got)
+ expected_label, expected_proba, expected_class_labels, *got
+ )
onx = to_onnx(
- cls, X[:1], options={cls.__class__: opts},
- target_opset=TARGET_OPSET)
+ cls, X[:1], options={cls.__class__: opts}, target_opset=TARGET_OPSET
+ )
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': X_test})
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": X_test})
self.assertEqual(len(got), 3)
TestConvertOptions.almost_equal_multi_labels(
- expected_label, expected_proba, expected_class_labels,
- *got)
+ expected_label, expected_proba, expected_class_labels, *got
+ )
onx = to_onnx(
- cls, X[:1], options={id(cls): opts},
- target_opset=TARGET_OPSET)
+ cls, X[:1], options={id(cls): opts}, target_opset=TARGET_OPSET
+ )
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': X_test})
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": X_test})
self.assertEqual(len(got), 3)
TestConvertOptions.almost_equal_multi_labels(
- expected_label, expected_proba, expected_class_labels,
- *got)
+ expected_label, expected_proba, expected_class_labels, *got
+ )
if __name__ == "__main__":
diff --git a/tests/test_custom_transformer_ordwoe.py b/tests/test_custom_transformer_ordwoe.py
index 5429ba940..ea98ba79d 100644
--- a/tests/test_custom_transformer_ordwoe.py
+++ b/tests/test_custom_transformer_ordwoe.py
@@ -29,10 +29,10 @@ def fit(self, X, y, sample_weight=None):
self.encoder_ = OrdinalEncoder().fit(X)
tr = self.encoder_.transform(X)
maxi = (tr.max(axis=1) + 1).astype(np.int64)
- intervals = [[(i - 1, i, False, True) for i in range(0, m)]
- for m in maxi]
- weights = [[10 * j + i for i in range(len(inter))]
- for j, inter in enumerate(intervals)]
+ intervals = [[(i - 1, i, False, True) for i in range(0, m)] for m in maxi]
+ weights = [
+ [10 * j + i for i in range(len(inter))] for j, inter in enumerate(intervals)
+ ]
self.woe_ = WOETransformer(intervals, onehot=False, weights=weights)
self.woe_.fit(tr)
return self
@@ -42,25 +42,22 @@ def transform(self, X):
return self.woe_.transform(tr)
-def ordwoe_encoder_parser(
- scope, model, inputs, custom_parsers=None):
+def ordwoe_encoder_parser(scope, model, inputs, custom_parsers=None):
if len(inputs) != 1:
- raise RuntimeError(
- "Unexpected number of inputs: %d != 1." % len(inputs))
+ raise RuntimeError("Unexpected number of inputs: %d != 1." % len(inputs))
if inputs[0].type is None:
- raise RuntimeError(
- "Unexpected type: %r." % (inputs[0], ))
+ raise RuntimeError("Unexpected type: %r." % (inputs[0],))
alias = get_model_alias(type(model))
this_operator = scope.declare_local_operator(alias, model)
this_operator.inputs.append(inputs[0])
this_operator.outputs.append(
- scope.declare_local_variable('catwoe', FloatTensorType()))
+ scope.declare_local_variable("catwoe", FloatTensorType())
+ )
return this_operator.outputs
def ordwoe_encoder_shape_calculator(operator):
- check_input_and_output_numbers(
- operator, input_count_range=1, output_count_range=1)
+ check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1)
input_dim = operator.inputs[0].get_first_dimension()
shape = operator.inputs[0].type.shape
second_dim = None if len(shape) != 2 else shape[1]
@@ -75,15 +72,14 @@ def ordwoe_encoder_converter(scope, operator, container):
sub = OnnxSubEstimator(op.encoder_, X, op_version=opv)
cast = OnnxCast(sub, op_version=opv, to=np.float32)
- cat = OnnxSubEstimator(op.woe_, cast, op_version=opv,
- input_types=[Int64TensorType()])
- idcat = OnnxIdentity(cat, output_names=operator.outputs[:1],
- op_version=opv)
+ cat = OnnxSubEstimator(
+ op.woe_, cast, op_version=opv, input_types=[Int64TensorType()]
+ )
+ idcat = OnnxIdentity(cat, output_names=operator.outputs[:1], op_version=opv)
idcat.add_to(scope, container)
class TestCustomTransformerOrdWOE(unittest.TestCase):
-
def test_pipeline(self):
data = load_iris()
X = data.data.astype(np.float32)
@@ -92,19 +88,20 @@ def test_pipeline(self):
expected = pipe.transform(X)
onx = to_onnx(pipe, X, target_opset=TARGET_OPSET)
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': X})[0]
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": X})[0]
assert_almost_equal(expected, got)
@unittest.skipIf(TARGET_OPSET < 12, reason="opset>=12 is required")
def test_custom_ordinal_woe(self):
-
update_registered_converter(
- OrdinalWOETransformer, "OrdinalWOETransformer",
+ OrdinalWOETransformer,
+ "OrdinalWOETransformer",
ordwoe_encoder_shape_calculator,
ordwoe_encoder_converter,
- parser=ordwoe_encoder_parser)
+ parser=ordwoe_encoder_parser,
+ )
data = load_iris()
X, y = data.data, data.target
@@ -117,9 +114,9 @@ def test_custom_ordinal_woe(self):
onx = to_onnx(ordwoe, X, target_opset=TARGET_OPSET)
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': X})[0]
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": X})[0]
assert_almost_equal(expected, got)
diff --git a/tests/test_custom_transformer_tsne.py b/tests/test_custom_transformer_tsne.py
index d9f41b769..34ea5ade2 100644
--- a/tests/test_custom_transformer_tsne.py
+++ b/tests/test_custom_transformer_tsne.py
@@ -21,16 +21,18 @@
from test_utils import dump_data_and_model, TARGET_OPSET
-ort_version = '.'.join(ort_version.split('.')[:2])
+ort_version = ".".join(ort_version.split(".")[:2])
class PredictableTSNE(BaseEstimator, TransformerMixin):
- def __init__(self,
- transformer=None,
- estimator=None,
- normalize=True,
- keep_tsne_outputs=False,
- **kwargs):
+ def __init__(
+ self,
+ transformer=None,
+ estimator=None,
+ normalize=True,
+ keep_tsne_outputs=False,
+ **kwargs
+ ):
TransformerMixin.__init__(self)
BaseEstimator.__init__(self)
if estimator is None:
@@ -43,10 +45,13 @@ def __init__(self,
if not hasattr(transformer, "fit_transform"):
raise AttributeError(
"transformer {} does not have a 'fit_transform' "
- "method.".format(type(transformer)))
+ "method.".format(type(transformer))
+ )
if not hasattr(estimator, "predict"):
- raise AttributeError("estimator {} does not have a 'predict' "
- "method.".format(type(estimator)))
+ raise AttributeError(
+ "estimator {} does not have a 'predict' "
+ "method.".format(type(estimator))
+ )
self.normalize = normalize
if kwargs:
self.set_params(**kwargs)
@@ -66,7 +71,8 @@ def fit(self, X, y, sample_weight=None):
sig = inspect.signature(self.estimator.fit)
if "sample_weight" in sig.parameters:
self.estimator_ = clone(self.estimator).fit(
- X, target, sample_weight=sample_weight)
+ X, target, sample_weight=sample_weight
+ )
else:
self.estimator_ = clone(self.estimator).fit(X, target)
mean = target.mean(axis=0)
@@ -136,15 +142,17 @@ def predictable_tsne_converter(scope, operator, container):
offset=op.mean_.ravel().astype(numpy.float32),
)
- container.add_node("Scaler", [knn_output.onnx_name], [output.full_name],
- op_domain="ai.onnx.ml",
- **attrs)
+ container.add_node(
+ "Scaler",
+ [knn_output.onnx_name],
+ [output.full_name],
+ op_domain="ai.onnx.ml",
+ **attrs
+ )
class TestCustomTransformerTSNE(unittest.TestCase):
-
def test_custom_pipeline_scaler(self):
-
digits = datasets.load_digits(n_class=6)
Xd = digits.data[:50]
yd = digits.target[:50]
@@ -164,41 +172,45 @@ def test_custom_pipeline_scaler(self):
ptsne_knn,
"predictable_tsne",
[("input", FloatTensorType([None, Xd.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
Xd.astype(numpy.float32)[:7],
ptsne_knn,
model_onnx,
- basename="CustomTransformerTSNEkNN-OneOffArray")
+ basename="CustomTransformerTSNEkNN-OneOffArray",
+ )
trace_line = []
def my_parser(scope, model, inputs, custom_parsers=None):
trace_line.append(model)
- return _parse_sklearn_simple_model(scope, model, inputs,
- custom_parsers)
+ return _parse_sklearn_simple_model(scope, model, inputs, custom_parsers)
model_onnx = convert_sklearn(
ptsne_knn,
"predictable_tsne",
[("input", FloatTensorType([None, Xd.shape[1]]))],
custom_parsers={PredictableTSNE: my_parser},
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
assert len(trace_line) == 1
dump_data_and_model(
Xd.astype(numpy.float32)[:7],
ptsne_knn,
model_onnx,
- basename="CustomTransformerTSNEkNNCustomParser-OneOffArray")
+ basename="CustomTransformerTSNEkNNCustomParser-OneOffArray",
+ )
update_registered_parser(PredictableTSNE, my_parser)
model_onnx = convert_sklearn(
ptsne_knn,
"predictable_tsne",
[("input", FloatTensorType([None, Xd.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
assert len(trace_line) == 2
diff --git a/tests/test_investigate.py b/tests/test_investigate.py
index 68dd1c694..691153976 100644
--- a/tests/test_investigate.py
+++ b/tests/test_investigate.py
@@ -5,6 +5,7 @@
from contextlib import redirect_stdout
import numpy
from numpy.testing import assert_almost_equal
+
try:
from sklearn.compose import ColumnTransformer
except ImportError:
@@ -16,12 +17,13 @@
from sklearn.preprocessing import RobustScaler, StandardScaler
from skl2onnx import convert_sklearn
from skl2onnx.helpers import (
- collect_intermediate_steps, compare_objects,
- enumerate_pipeline_models)
+ collect_intermediate_steps,
+ compare_objects,
+ enumerate_pipeline_models,
+)
from skl2onnx.helpers.investigate import _alter_model_for_debugging
from skl2onnx.common import MissingShapeCalculator
-from skl2onnx.common.data_types import (
- FloatTensorType, guess_data_type)
+from skl2onnx.common.data_types import FloatTensorType, guess_data_type
from test_utils import TARGET_OPSET, InferenceSessionEx as InferenceSession
@@ -30,61 +32,68 @@ class MyScaler(StandardScaler):
class TestInvestigate(unittest.TestCase):
-
def test_simple_pipeline(self):
for opset in (11, TARGET_OPSET):
if opset > TARGET_OPSET:
continue
- data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]],
- dtype=numpy.float32)
- model = Pipeline([("scaler1", StandardScaler()),
- ("scaler2", StandardScaler())])
+ data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]], dtype=numpy.float32)
+ model = Pipeline(
+ [("scaler1", StandardScaler()), ("scaler2", StandardScaler())]
+ )
model.fit(data)
all_models = list(enumerate_pipeline_models(model))
steps = collect_intermediate_steps(
- model, "pipeline", [("input", FloatTensorType([None, 2]))],
- target_opset=opset)
+ model,
+ "pipeline",
+ [("input", FloatTensorType([None, 2]))],
+ target_opset=opset,
+ )
self.assertEqual(len(steps), 2)
self.assertEqual(len(all_models), 3)
- expected = 'version:%d}' % opset
- expected1 = 'version:1}'
+ expected = "version:%d}" % opset
+ expected1 = "version:1}"
model.transform(data)
for step in steps:
- onnx_step = step['onnx_step']
- text = str(onnx_step).replace('\n', ' ').replace(' ', '')
+ onnx_step = step["onnx_step"]
+ text = str(onnx_step).replace("\n", " ").replace(" ", "")
if expected not in text and expected1 not in text:
raise AssertionError(
- "Unable to find '{}'\n'{}'\n".format(
- expected, text))
+ "Unable to find '{}'\n'{}'\n".format(expected, text)
+ )
sess = InferenceSession(
- onnx_step.SerializeToString(),
- providers=["CPUExecutionProvider"])
- onnx_outputs = sess.run(None, {'input': data})
+ onnx_step.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ onnx_outputs = sess.run(None, {"input": data})
onnx_output = onnx_outputs[0]
- skl_outputs = step['model']._debug.outputs['transform']
- assert str(step['model']._debug) is not None
- sdt = step['model']._debug.display(data, 5)
- assert 'shape' in sdt
+ skl_outputs = step["model"]._debug.outputs["transform"]
+ assert str(step["model"]._debug) is not None
+ sdt = step["model"]._debug.display(data, 5)
+ assert "shape" in sdt
assert_almost_equal(onnx_output, skl_outputs)
compare_objects(onnx_output, skl_outputs)
def test_missing_converter(self):
- data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]],
- dtype=numpy.float32)
- model = Pipeline([("scaler1", StandardScaler()),
- ("scaler2", StandardScaler()),
- ("scaler3", MyScaler())])
+ data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]], dtype=numpy.float32)
+ model = Pipeline(
+ [
+ ("scaler1", StandardScaler()),
+ ("scaler2", StandardScaler()),
+ ("scaler3", MyScaler()),
+ ]
+ )
model.fit(data)
all_models = list(enumerate_pipeline_models(model))
try:
collect_intermediate_steps(
- model, "pipeline",
+ model,
+ "pipeline",
[("input", FloatTensorType([None, 2]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
except MissingShapeCalculator as e:
assert "MyScaler" in str(e)
assert "gallery" in str(e)
@@ -98,159 +107,165 @@ def test_missing_converter(self):
# whole pipeline
continue
step_model = step
- data_in = step_model._debug.inputs['transform']
+ data_in = step_model._debug.inputs["transform"]
t = guess_data_type(data_in)
try:
- onnx_step = convert_sklearn(step_model, initial_types=t,
- target_opset=TARGET_OPSET)
+ onnx_step = convert_sklearn(
+ step_model, initial_types=t, target_opset=TARGET_OPSET
+ )
except MissingShapeCalculator as e:
if "MyScaler" in str(e):
continue
raise
sess = InferenceSession(
- onnx_step.SerializeToString(),
- providers=["CPUExecutionProvider"])
- onnx_outputs = sess.run(None, {'input': data_in})
+ onnx_step.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ onnx_outputs = sess.run(None, {"input": data_in})
onnx_output = onnx_outputs[0]
- skl_outputs = step_model._debug.outputs['transform']
+ skl_outputs = step_model._debug.outputs["transform"]
assert_almost_equal(onnx_output, skl_outputs)
compare_objects(onnx_output, skl_outputs)
def test_simple_column_transformer(self):
if ColumnTransformer is None:
return
- data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]],
- dtype=numpy.float32)
- model = ColumnTransformer([("scaler1", StandardScaler(), [0]),
- ("scaler2", RobustScaler(), [1])])
+ data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]], dtype=numpy.float32)
+ model = ColumnTransformer(
+ [("scaler1", StandardScaler(), [0]), ("scaler2", RobustScaler(), [1])]
+ )
model.fit(data)
all_models = list(enumerate_pipeline_models(model))
steps = collect_intermediate_steps(
- model, "coulmn transformer",
+ model,
+ "coulmn transformer",
[("input", FloatTensorType([None, 2]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
assert len(steps) == 2
assert len(all_models) == 3
model.transform(data)
for step in steps:
- onnx_step = step['onnx_step']
+ onnx_step = step["onnx_step"]
sess = InferenceSession(
- onnx_step.SerializeToString(),
- providers=["CPUExecutionProvider"])
- onnx_outputs = sess.run(None, {'input': data})
+ onnx_step.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ onnx_outputs = sess.run(None, {"input": data})
onnx_output = onnx_outputs[0]
- skl_outputs = step['model']._debug.outputs['transform']
+ skl_outputs = step["model"]._debug.outputs["transform"]
assert_almost_equal(onnx_output, skl_outputs)
compare_objects(onnx_output.tolist(), skl_outputs.tolist())
def test_simple_feature_union(self):
- data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]],
- dtype=numpy.float32)
- model = FeatureUnion([("scaler1", StandardScaler()),
- ("scaler2", RobustScaler())])
+ data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]], dtype=numpy.float32)
+ model = FeatureUnion(
+ [("scaler1", StandardScaler()), ("scaler2", RobustScaler())]
+ )
model.fit(data)
all_models = list(enumerate_pipeline_models(model))
steps = collect_intermediate_steps(
- model, "feature union",
+ model,
+ "feature union",
[("input", FloatTensorType([None, 2]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
assert len(steps) == 2
assert len(all_models) == 3
model.transform(data)
for step in steps:
- onnx_step = step['onnx_step']
+ onnx_step = step["onnx_step"]
sess = InferenceSession(
- onnx_step.SerializeToString(),
- providers=["CPUExecutionProvider"])
- onnx_outputs = sess.run(None, {'input': data})
+ onnx_step.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ onnx_outputs = sess.run(None, {"input": data})
onnx_output = onnx_outputs[0]
- skl_outputs = step['model']._debug.outputs['transform']
+ skl_outputs = step["model"]._debug.outputs["transform"]
assert_almost_equal(onnx_output, skl_outputs)
compare_objects(onnx_output, skl_outputs)
def test_simple_pipeline_predict(self):
data = load_iris()
X, y = data.data, data.target
- model = Pipeline([("scaler1", StandardScaler()),
- ("lr", LogisticRegression())])
+ model = Pipeline([("scaler1", StandardScaler()), ("lr", LogisticRegression())])
model.fit(X, y)
all_models = list(enumerate_pipeline_models(model))
steps = collect_intermediate_steps(
- model, "pipeline",
+ model,
+ "pipeline",
[("input", FloatTensorType((None, X.shape[1])))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
assert len(steps) == 2
assert len(all_models) == 3
model.predict(X)
for step in steps:
- onnx_step = step['onnx_step']
+ onnx_step = step["onnx_step"]
sess = InferenceSession(
- onnx_step.SerializeToString(),
- providers=["CPUExecutionProvider"])
- onnx_outputs = sess.run(None, {'input': X.astype(numpy.float32)})
+ onnx_step.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ onnx_outputs = sess.run(None, {"input": X.astype(numpy.float32)})
onnx_output = onnx_outputs[0]
- dbg_outputs = step['model']._debug.outputs
- skl_outputs = (dbg_outputs['transform'] if 'transform' in
- dbg_outputs else dbg_outputs['predict'])
+ dbg_outputs = step["model"]._debug.outputs
+ skl_outputs = (
+ dbg_outputs["transform"]
+ if "transform" in dbg_outputs
+ else dbg_outputs["predict"]
+ )
assert_almost_equal(onnx_output, skl_outputs, decimal=6)
compare_objects(onnx_output, skl_outputs)
def test_simple_pipeline_predict_proba(self):
data = load_iris()
X, y = data.data, data.target
- model = Pipeline([("scaler1", StandardScaler()),
- ("lr", LogisticRegression())])
+ model = Pipeline([("scaler1", StandardScaler()), ("lr", LogisticRegression())])
model.fit(X, y)
all_models = list(enumerate_pipeline_models(model))
steps = collect_intermediate_steps(
- model, "pipeline",
+ model,
+ "pipeline",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
assert len(steps) == 2
assert len(all_models) == 3
model.predict_proba(X)
for step in steps:
- onnx_step = step['onnx_step']
+ onnx_step = step["onnx_step"]
sess = InferenceSession(
- onnx_step.SerializeToString(),
- providers=["CPUExecutionProvider"])
- onnx_outputs = sess.run(None, {'input': X.astype(numpy.float32)})
- dbg_outputs = step['model']._debug.outputs
- if 'transform' in dbg_outputs:
+ onnx_step.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ onnx_outputs = sess.run(None, {"input": X.astype(numpy.float32)})
+ dbg_outputs = step["model"]._debug.outputs
+ if "transform" in dbg_outputs:
onnx_output = onnx_outputs[0]
- skl_outputs = dbg_outputs['transform']
+ skl_outputs = dbg_outputs["transform"]
else:
onnx_output = onnx_outputs[1]
- skl_outputs = dbg_outputs['predict_proba']
+ skl_outputs = dbg_outputs["predict_proba"]
assert_almost_equal(onnx_output, skl_outputs, decimal=6)
compare_objects(onnx_output, skl_outputs)
def test_verbose(self):
data = load_iris()
X, y = data.data, data.target
- model = Pipeline([("scaler1", StandardScaler()),
- ("lr", LogisticRegression())])
+ model = Pipeline([("scaler1", StandardScaler()), ("lr", LogisticRegression())])
model.fit(X, y)
st = io.StringIO()
with redirect_stdout(st):
- convert_sklearn(
- model, initial_types=[('X', FloatTensorType())],
- verbose=1)
+ convert_sklearn(model, initial_types=[("X", FloatTensorType())], verbose=1)
self.assertIn("[convert_sklearn] convert_topology", st.getvalue())
- @unittest.skipIf(TARGET_OPSET < 18,
- reason="ReferenceEvaluator not implemented")
+ @unittest.skipIf(TARGET_OPSET < 18, reason="ReferenceEvaluator not implemented")
def test_replay_run(self):
try:
from .test_utils.utils_backend_onnx import ReferenceEvaluatorEx
@@ -258,16 +273,15 @@ def test_replay_run(self):
from test_utils.utils_backend_onnx import ReferenceEvaluatorEx
data = load_iris()
X, y = data.data, data.target
- model = Pipeline([("scaler1", StandardScaler()),
- ("lr", LogisticRegression())])
+ model = Pipeline([("scaler1", StandardScaler()), ("lr", LogisticRegression())])
model.fit(X, y)
onx = convert_sklearn(
- model, initial_types=[('X', FloatTensorType())],
- options={'zipmap': False})
+ model, initial_types=[("X", FloatTensorType())], options={"zipmap": False}
+ )
sess = ReferenceEvaluatorEx(onx)
sess.run(None, {"X": X})
repl = sess.replay_run()
- self.assertIn('probability_tensor', repl)
+ self.assertIn("probability_tensor", repl)
if __name__ == "__main__":
diff --git a/tests/test_onnx_helper.py b/tests/test_onnx_helper.py
index 5778de85d..243fc7553 100644
--- a/tests/test_onnx_helper.py
+++ b/tests/test_onnx_helper.py
@@ -20,13 +20,14 @@
change_onnx_domain,
add_output_initializer,
get_initializers,
- update_onnx_initializers)
+ update_onnx_initializers,
+)
from test_utils import TARGET_OPSET, InferenceSessionEx as InferenceSession
def one_hot_encoder_supports_string():
# pv.Version does not work with development versions
- vers = '.'.join(sklearn_version.split('.')[:2])
+ vers = ".".join(sklearn_version.split(".")[:2])
return pv.Version(vers) >= pv.Version("0.20.0")
@@ -40,17 +41,20 @@ def get_model(self, model):
from onnxruntime import InferenceSession
session = InferenceSession(
- save_onnx_model(model),
- providers=["CPUExecutionProvider"])
+ save_onnx_model(model), providers=["CPUExecutionProvider"]
+ )
return lambda X: session.run(None, {"input": X})[0]
def test_onnx_helper_load_save(self):
model = make_pipeline(StandardScaler(), Binarizer(threshold=0.5))
X = numpy.array([[0.1, 1.1], [0.2, 2.2]])
model.fit(X)
- model_onnx = convert_sklearn(model, "binarizer",
- [("input", FloatTensorType([None, 2]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "binarizer",
+ [("input", FloatTensorType([None, 2]))],
+ target_opset=TARGET_OPSET,
+ )
filename = "temp_onnx_helper_load_save.onnx"
save_onnx_model(model_onnx, filename)
model = load_onnx_model(filename)
@@ -72,13 +76,17 @@ def test_onnx_helper_load_save(self):
def test_onnx_helper_load_save_init(self):
model = make_pipeline(
Binarizer(),
- OneHotEncoder(sparse=False, handle_unknown='ignore'),
- StandardScaler())
+ OneHotEncoder(sparse=False, handle_unknown="ignore"),
+ StandardScaler(),
+ )
X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]])
model.fit(X)
- model_onnx = convert_sklearn(model, "pipe3",
- [("input", FloatTensorType([None, 2]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "pipe3",
+ [("input", FloatTensorType([None, 2]))],
+ target_opset=TARGET_OPSET,
+ )
filename = "temp_onnx_helper_load_save.onnx"
save_onnx_model(model_onnx, filename)
model = load_onnx_model(filename)
@@ -98,14 +106,18 @@ def test_onnx_helper_load_save_init(self):
reason="OneHotEncoder did not have categories_ before 0.20",
)
def test_onnx_helper_load_save_init_meta(self):
- model = make_pipeline(Binarizer(), OneHotEncoder(sparse=False),
- StandardScaler())
+ model = make_pipeline(
+ Binarizer(), OneHotEncoder(sparse=False), StandardScaler()
+ )
X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]])
model.fit(X)
- model_onnx = convert_sklearn(model, "pipe3",
- [("input", FloatTensorType([None, 2]))],
- target_opset=TARGET_OPSET)
- meta = {'pA': 'one', 'pB': 'two'}
+ model_onnx = convert_sklearn(
+ model,
+ "pipe3",
+ [("input", FloatTensorType([None, 2]))],
+ target_opset=TARGET_OPSET,
+ )
+ meta = {"pA": "one", "pB": "two"}
onnx.helper.set_model_props(model_onnx, meta)
new_model = select_model_inputs_outputs(model_onnx, "variable")
vals = {p.key: p.value for p in new_model.metadata_props}
@@ -115,11 +127,13 @@ def test_change_onnx_domain(self):
model = make_pipeline(StandardScaler())
X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]])
model.fit(X)
- model_onnx = convert_sklearn(model, "pipe3",
- [("input", FloatTensorType([None, 2]))],
- target_opset=TARGET_OPSET)
- model_onnx = change_onnx_domain(
- model_onnx, {'Scaler': ('ScalerNew', 'ML2')})
+ model_onnx = convert_sklearn(
+ model,
+ "pipe3",
+ [("input", FloatTensorType([None, 2]))],
+ target_opset=TARGET_OPSET,
+ )
+ model_onnx = change_onnx_domain(model_onnx, {"Scaler": ("ScalerNew", "ML2")})
self.assertIn('domain: "ML2"', str(model_onnx))
self.assertIn('op_type: "ScalerNew"', str(model_onnx))
@@ -128,34 +142,37 @@ def test_add_output_initializer(self):
cst = numpy.array([0.5, 0.7, 0.8], dtype=numpy.int32)
X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]])
model.fit(X)
- model_onnx = convert_sklearn(model, "pipe3",
- [("input", DoubleTensorType([None, 2]))],
- target_opset=TARGET_OPSET)
- new_model_onnx = add_output_initializer(
- model_onnx, "new_output", cst)
+ model_onnx = convert_sklearn(
+ model,
+ "pipe3",
+ [("input", DoubleTensorType([None, 2]))],
+ target_opset=TARGET_OPSET,
+ )
+ new_model_onnx = add_output_initializer(model_onnx, "new_output", cst)
sess = InferenceSession(
- new_model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X})
+ new_model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X})
self.assertEqual(len(res), 2)
assert_almost_equal(cst, res[1])
self.assertEqual(model_onnx.domain, new_model_onnx.domain)
names = [o.name for o in sess.get_outputs()]
- self.assertEqual(['variable', 'new_output'], names)
+ self.assertEqual(["variable", "new_output"], names)
new_model_onnx = add_output_initializer(
- model_onnx, ["new_output1", "new_output2"], [cst, cst + 1])
+ model_onnx, ["new_output1", "new_output2"], [cst, cst + 1]
+ )
sess = InferenceSession(
- new_model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X})
+ new_model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X})
self.assertEqual(len(res), 3)
assert_almost_equal(cst, res[1])
assert_almost_equal(cst + 1, res[2])
names = [o.name for o in sess.get_outputs()]
- self.assertEqual(['variable', 'new_output1', 'new_output2'], names)
+ self.assertEqual(["variable", "new_output1", "new_output2"], names)
with self.assertRaises(ValueError):
add_output_initializer(model_onnx, "input", cst)
@@ -173,30 +190,35 @@ def test_get_initializers(self):
model = make_pipeline(StandardScaler())
X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]])
model.fit(X)
- model_onnx = convert_sklearn(model, "pipe3",
- [("input", DoubleTensorType([None, 2]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "pipe3",
+ [("input", DoubleTensorType([None, 2]))],
+ target_opset=TARGET_OPSET,
+ )
init = get_initializers(model_onnx)
self.assertEqual(len(init), 2)
- assert_almost_equal(init['Di_Divcst'],
- numpy.array([0.10897247, 0.51173724]))
- assert_almost_equal(init['Su_Subcst'], numpy.array([0.225, 1.975]))
+ assert_almost_equal(init["Di_Divcst"], numpy.array([0.10897247, 0.51173724]))
+ assert_almost_equal(init["Su_Subcst"], numpy.array([0.225, 1.975]))
def test_update_onnx_initializers(self):
model = make_pipeline(StandardScaler())
X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]])
model.fit(X)
- model_onnx = convert_sklearn(model, "pipe3",
- [("input", DoubleTensorType([None, 2]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "pipe3",
+ [("input", DoubleTensorType([None, 2]))],
+ target_opset=TARGET_OPSET,
+ )
init = get_initializers(model_onnx)
self.assertEqual(len(init), 2)
for v in init.values():
v[:] = 1.5
update_onnx_initializers(model_onnx, init)
init = get_initializers(model_onnx)
- assert_almost_equal(init['Di_Divcst'], numpy.array([1.5, 1.5]))
- assert_almost_equal(init['Su_Subcst'], numpy.array([1.5, 1.5]))
+ assert_almost_equal(init["Di_Divcst"], numpy.array([1.5, 1.5]))
+ assert_almost_equal(init["Su_Subcst"], numpy.array([1.5, 1.5]))
if __name__ == "__main__":
diff --git a/tests/test_onnx_rare_helper.py b/tests/test_onnx_rare_helper.py
index 7e47a0526..6afeeea6b 100644
--- a/tests/test_onnx_rare_helper.py
+++ b/tests/test_onnx_rare_helper.py
@@ -15,20 +15,18 @@
class TestOnnxRareHelper(unittest.TestCase):
-
def test_kmeans_upgrade(self):
data = load_iris()
X = data.data
model = KMeans(n_clusters=3)
model.fit(X)
- model_onnx = convert_sklearn(model, "kmeans",
- [("input", FloatTensorType([None, 4]))],
- target_opset=7)
+ model_onnx = convert_sklearn(
+ model, "kmeans", [("input", FloatTensorType([None, 4]))], target_opset=7
+ )
model8 = upgrade_opset_number(model_onnx, 8)
assert "version: 8" in str(model8)
- @unittest.skipIf(onnx_opset_version() < 11,
- reason="Needs opset >= 11")
+ @unittest.skipIf(onnx_opset_version() < 11, reason="Needs opset >= 11")
def test_knn_upgrade(self):
iris = load_iris()
X, _ = iris.data, iris.target
@@ -36,9 +34,9 @@ def test_knn_upgrade(self):
clr = NearestNeighbors(n_neighbors=3, radius=None)
clr.fit(X)
- model_onnx = convert_sklearn(clr, "up",
- [("input", FloatTensorType([None, 4]))],
- target_opset=9)
+ model_onnx = convert_sklearn(
+ clr, "up", [("input", FloatTensorType([None, 4]))], target_opset=9
+ )
try:
upgrade_opset_number(model_onnx, 8)
raise AssertionError()
diff --git a/tests/test_onnxruntime.py b/tests/test_onnxruntime.py
index e00f6809a..28ec705ae 100644
--- a/tests/test_onnxruntime.py
+++ b/tests/test_onnxruntime.py
@@ -4,6 +4,7 @@
import unittest
import numpy as np
from numpy.testing import assert_allclose
+
try:
import onnx.reference # noqa
from test_utils import ReferenceEvaluatorEx
@@ -13,21 +14,77 @@
class TestOnnxruntime(unittest.TestCase):
-
- X3_15 = np.array([
- [-0.32256478, 1.7266265, 0.47051477, 1.1111994, 1.9582617,
- -2.1582267, -1.9729482, -1.5662458, 1.8967382, 0.9119621,
- -0.93173814, 2.9724689, -0.7231156, 0.10379718, -1.3578224,
- 0.37283298, -0.38267845, 0.23394746, -1.6884863, 0.6374923],
- [-0.53266096, -0.767421, 1.661441, 0.52790266, 1.6549803,
- 0.5076044, -2.9024098, 0.86126643, -1.3819953, 2.5567708,
- -1.7888857, -0.07472081, 0.24990171, -0.87638474, -0.14730039,
- 1.3493251, -0.7835222, -0.9997528, -0.91080195, -3.6515126],
- [-0.8703916, 0.43145382, 1.0918913, -1.397069, -0.48047885,
- 3.1278436, 3.8035386, -0.22710086, -0.42011356, 1.4203368,
- 0.47596663, -0.44953802, -0.68278235, 0.87819546, -2.4272032,
- 0.08891433, 0.7960927, 1.2197107, 1.7008729, 1.0122501]],
- dtype=np.float32)
+ X3_15 = np.array(
+ [
+ [
+ -0.32256478,
+ 1.7266265,
+ 0.47051477,
+ 1.1111994,
+ 1.9582617,
+ -2.1582267,
+ -1.9729482,
+ -1.5662458,
+ 1.8967382,
+ 0.9119621,
+ -0.93173814,
+ 2.9724689,
+ -0.7231156,
+ 0.10379718,
+ -1.3578224,
+ 0.37283298,
+ -0.38267845,
+ 0.23394746,
+ -1.6884863,
+ 0.6374923,
+ ],
+ [
+ -0.53266096,
+ -0.767421,
+ 1.661441,
+ 0.52790266,
+ 1.6549803,
+ 0.5076044,
+ -2.9024098,
+ 0.86126643,
+ -1.3819953,
+ 2.5567708,
+ -1.7888857,
+ -0.07472081,
+ 0.24990171,
+ -0.87638474,
+ -0.14730039,
+ 1.3493251,
+ -0.7835222,
+ -0.9997528,
+ -0.91080195,
+ -3.6515126,
+ ],
+ [
+ -0.8703916,
+ 0.43145382,
+ 1.0918913,
+ -1.397069,
+ -0.48047885,
+ 3.1278436,
+ 3.8035386,
+ -0.22710086,
+ -0.42011356,
+ 1.4203368,
+ 0.47596663,
+ -0.44953802,
+ -0.68278235,
+ 0.87819546,
+ -2.4272032,
+ 0.08891433,
+ 0.7960927,
+ 1.2197107,
+ 1.7008729,
+ 1.0122501,
+ ],
+ ],
+ dtype=np.float32,
+ )
@unittest.skipIf(ReferenceEvaluatorEx is None, "onnx too old")
def test_tree_ensemble_classifier(self):
@@ -61,36 +118,33 @@ def test_tree_ensemble_classifier(self):
print(repr(X[:5]))
"""
X = self.X3_15
- name = os.path.join(os.path.dirname(__file__),
- "datasets", "treecl.onnx")
+ name = os.path.join(os.path.dirname(__file__), "datasets", "treecl.onnx")
sess = ReferenceEvaluatorEx(name)
- label, proba = sess.run(None, {'input': X})
+ label, proba = sess.run(None, {"input": X})
sesso = InferenceSession(name, providers=["CPUExecutionProvider"])
- labelo, probao = sesso.run(None, {'input': X})
+ labelo, probao = sesso.run(None, {"input": X})
assert_allclose(probao, proba, atol=1e-8)
assert_allclose(labelo, label)
@unittest.skipIf(ReferenceEvaluatorEx is None, "onnx too old")
def test_tree_ensemble_classifier_2(self):
X = self.X3_15
- name = os.path.join(os.path.dirname(__file__),
- "datasets", "treecl2.onnx")
+ name = os.path.join(os.path.dirname(__file__), "datasets", "treecl2.onnx")
sess = ReferenceEvaluatorEx(name)
- label, proba = sess.run(None, {'input': X})
+ label, proba = sess.run(None, {"input": X})
sesso = InferenceSession(name, providers=["CPUExecutionProvider"])
- labelo, probao = sesso.run(None, {'input': X})
+ labelo, probao = sesso.run(None, {"input": X})
assert_allclose(probao, proba, atol=1e-6)
assert_allclose(labelo, label)
@unittest.skipIf(ReferenceEvaluatorEx is None, "onnx too old")
def test_tree_ensemble_classifier_3(self):
X = self.X3_15[:, :10]
- name = os.path.join(os.path.dirname(__file__),
- "datasets", "treecl3.onnx")
+ name = os.path.join(os.path.dirname(__file__), "datasets", "treecl3.onnx")
sess = ReferenceEvaluatorEx(name)
- label, proba = sess.run(None, {'input': X})
+ label, proba = sess.run(None, {"input": X})
sesso = InferenceSession(name, providers=["CPUExecutionProvider"])
- labelo, probao = sesso.run(None, {'input': X})
+ labelo, probao = sesso.run(None, {"input": X})
assert_allclose(probao, proba, atol=1e-6)
assert_allclose(labelo, label)
diff --git a/tests/test_op10.py b/tests/test_op10.py
index 474bcfabf..30029e6cf 100644
--- a/tests/test_op10.py
+++ b/tests/test_op10.py
@@ -14,32 +14,36 @@
class TestOp10(unittest.TestCase):
-
def check_domain(self, model, domain="", target_opset=10):
for op in model.opset_import:
if op.domain == domain:
if op.version > target_opset:
raise RuntimeError(
- "Wrong opset {} > {} expected".format(
- op.domain, target_opset))
+ "Wrong opset {} > {} expected".format(op.domain, target_opset)
+ )
@unittest.skipIf(onnx_opset_version() < 10, reason="out of scope")
def test_logistic_regression(self):
- model, X = fit_classification_model(
- linear_model.LogisticRegression(), 3)
+ model, X = fit_classification_model(linear_model.LogisticRegression(), 3)
target_opset = 10
- model_onnx = convert_sklearn(model, "op10",
- [("input", FloatTensorType([None, 3]))],
- target_opset=target_opset)
+ model_onnx = convert_sklearn(
+ model,
+ "op10",
+ [("input", FloatTensorType([None, 3]))],
+ target_opset=target_opset,
+ )
self.check_domain(model_onnx, target_opset=target_opset)
@unittest.skipIf(onnx_opset_version() < 10, reason="out of scope")
def test_kmeans(self):
model, X = fit_classification_model(KMeans(), 3)
target_opset = 10
- model_onnx = convert_sklearn(model, "op10",
- [("input", FloatTensorType([None, 3]))],
- target_opset=target_opset)
+ model_onnx = convert_sklearn(
+ model,
+ "op10",
+ [("input", FloatTensorType([None, 3]))],
+ target_opset=target_opset,
+ )
self.check_domain(model_onnx, target_opset=target_opset)
@unittest.skipIf(onnx_opset_version() < 10, reason="out of scope")
@@ -47,18 +51,23 @@ def test_gaussian_mixture(self):
model, X = fit_classification_model(GaussianMixture(), 3)
target_opset = 10
model_onnx = convert_sklearn(
- model, "op10",
+ model,
+ "op10",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=target_opset)
+ target_opset=target_opset,
+ )
self.check_domain(model_onnx, target_opset=target_opset)
@unittest.skipIf(onnx_opset_version() < 10, reason="out of scope")
def test_gaussian_process_regressor(self):
model, X = fit_classification_model(GaussianProcessRegressor(), 3)
target_opset = 10
- model_onnx = convert_sklearn(model, "op10",
- [("input", FloatTensorType([None, 3]))],
- target_opset=target_opset)
+ model_onnx = convert_sklearn(
+ model,
+ "op10",
+ [("input", FloatTensorType([None, 3]))],
+ target_opset=target_opset,
+ )
self.check_domain(model_onnx, target_opset=target_opset)
@unittest.skipIf(onnx_opset_version() < 10, reason="out of scope")
@@ -73,9 +82,12 @@ def test_voting_classifier(self):
)
model, X = fit_classification_model(model, 3)
target_opset = 10
- model_onnx = convert_sklearn(model, "op10",
- [("input", FloatTensorType([None, 3]))],
- target_opset=target_opset)
+ model_onnx = convert_sklearn(
+ model,
+ "op10",
+ [("input", FloatTensorType([None, 3]))],
+ target_opset=target_opset,
+ )
self.check_domain(model_onnx, target_opset=target_opset)
diff --git a/tests/test_opset13.py b/tests/test_opset13.py
index 8fe3e3025..e0ffd9725 100644
--- a/tests/test_opset13.py
+++ b/tests/test_opset13.py
@@ -9,12 +9,12 @@
OnnxReduceSumApi11,
OnnxSplitApi18,
OnnxSqueezeApi11,
- OnnxUnsqueezeApi11)
+ OnnxUnsqueezeApi11,
+)
from test_utils import TARGET_OPSET
class TestOpset13(unittest.TestCase):
-
def test_reduce_sum(self):
X = numpy.array([[2, 1], [0, 1]], dtype=numpy.float32)
@@ -23,44 +23,53 @@ def test_reduce_sum(self):
continue
with self.subTest(opset=opset):
onx = OnnxReduceSumApi11(
- 'X', output_names=['Y'], keepdims=0, op_version=opset)
+ "X", output_names=["Y"], keepdims=0, op_version=opset
+ )
model_def = onx.to_onnx(
- {'X': X.astype(numpy.float32)}, target_opset=opset)
+ {"X": X.astype(numpy.float32)}, target_opset=opset
+ )
got = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"]).run(
- None, {'X': X})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ ).run(None, {"X": X})
assert_almost_equal(numpy.sum(X), got[0], decimal=6)
onx = OnnxReduceSumApi11(
- 'X', output_names=['Y'], axes=[1], op_version=opset)
+ "X", output_names=["Y"], axes=[1], op_version=opset
+ )
model_def = onx.to_onnx(
- {'X': X.astype(numpy.float32)}, target_opset=opset)
+ {"X": X.astype(numpy.float32)}, target_opset=opset
+ )
got = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"]).run(
- None, {'X': X})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ ).run(None, {"X": X})
assert_almost_equal(
- numpy.sum(X, axis=1, keepdims=True), got[0], decimal=6)
+ numpy.sum(X, axis=1, keepdims=True), got[0], decimal=6
+ )
def test_split(self):
- x = numpy.array([1., 2., 3., 4., 5., 6.]).astype(numpy.float32)
- y = [numpy.array([1., 2.]).astype(numpy.float32),
- numpy.array([3., 4.]).astype(numpy.float32),
- numpy.array([5., 6.]).astype(numpy.float32)]
+ x = numpy.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).astype(numpy.float32)
+ y = [
+ numpy.array([1.0, 2.0]).astype(numpy.float32),
+ numpy.array([3.0, 4.0]).astype(numpy.float32),
+ numpy.array([5.0, 6.0]).astype(numpy.float32),
+ ]
for opset in (10, 11, 12, 13, 17, 18):
if opset > TARGET_OPSET:
continue
with self.subTest(opset=opset):
onx = OnnxSplitApi18(
- 'X', axis=0, split=[2, 2, 2],
- output_names=['Y1', 'Y2', 'Y3'], op_version=opset)
+ "X",
+ axis=0,
+ split=[2, 2, 2],
+ output_names=["Y1", "Y2", "Y3"],
+ op_version=opset,
+ )
model_def = onx.to_onnx(
- {'X': x.astype(numpy.float32)}, target_opset=opset)
+ {"X": x.astype(numpy.float32)}, target_opset=opset
+ )
got = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"]).run(
- None, {'X': x})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ ).run(None, {"X": x})
assert_almost_equal(y[0], got[0])
assert_almost_equal(y[1], got[1])
assert_almost_equal(y[2], got[2])
@@ -73,17 +82,20 @@ def test_squeeze(self):
continue
with self.subTest(opset=opset):
onx = OnnxSqueezeApi11(
- 'X', axes=[1], output_names=['Y'], op_version=opset)
+ "X", axes=[1], output_names=["Y"], op_version=opset
+ )
model_def = onx.to_onnx(
- {'X': x.astype(numpy.float32)}, target_opset=opset)
+ {"X": x.astype(numpy.float32)}, target_opset=opset
+ )
got = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"]).run(
- None, {'X': x})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ ).run(None, {"X": x})
assert_almost_equal(y, got[0])
- @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.0.0'),
- reason="onnxruntime too old, onnx too recent")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.0.0"),
+ reason="onnxruntime too old, onnx too recent",
+ )
def test_unsqueeze(self):
x = numpy.random.randn(1, 3, 1, 5).astype(numpy.float32)
y = numpy.expand_dims(x, axis=-2)
@@ -92,13 +104,14 @@ def test_unsqueeze(self):
continue
with self.subTest(opset=opset):
onx = OnnxUnsqueezeApi11(
- 'X', axes=[-2], output_names=['Y'], op_version=opset)
+ "X", axes=[-2], output_names=["Y"], op_version=opset
+ )
model_def = onx.to_onnx(
- {'X': x.astype(numpy.float32)}, target_opset=opset)
+ {"X": x.astype(numpy.float32)}, target_opset=opset
+ )
got = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"]).run(
- None, {'X': x})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ ).run(None, {"X": x})
assert_almost_equal(y, got[0])
diff --git a/tests/test_optimisation.py b/tests/test_optimisation.py
index 4acee2ec8..b14533207 100644
--- a/tests/test_optimisation.py
+++ b/tests/test_optimisation.py
@@ -6,77 +6,93 @@
from onnx import checker
from onnx import helper
from onnx import TensorProto as tp
-from skl2onnx.common.onnx_optimisation_identity import (
- onnx_remove_node_identity)
-from test_utils import (
- TARGET_OPSET, TARGET_IR,
- InferenceSessionEx as InferenceSession)
+from skl2onnx.common.onnx_optimisation_identity import onnx_remove_node_identity
+from test_utils import TARGET_OPSET, TARGET_IR, InferenceSessionEx as InferenceSession
class TestOptimisation(unittest.TestCase):
-
- @unittest.skipIf(TARGET_OPSET <= 14,
- reason="only verified with opset 15+")
+ @unittest.skipIf(TARGET_OPSET <= 14, reason="only verified with opset 15+")
def test_coptimisation_identity_removal(self):
# investigation issue #854
then_branch = helper.make_graph(
- [helper.make_node('Identity', inputs=["identity_one"],
- outputs=["then_result"])],
- 'then_branch',
+ [
+ helper.make_node(
+ "Identity", inputs=["identity_one"], outputs=["then_result"]
+ )
+ ],
+ "then_branch",
[],
- [helper.make_tensor_value_info('then_result', tp.INT64, [1])])
+ [helper.make_tensor_value_info("then_result", tp.INT64, [1])],
+ )
else_branch = helper.make_graph(
- [helper.make_node('Identity', inputs=["identity_zero"],
- outputs=["else_result"])],
- 'else_branch',
+ [
+ helper.make_node(
+ "Identity", inputs=["identity_zero"], outputs=["else_result"]
+ )
+ ],
+ "else_branch",
[],
- [helper.make_tensor_value_info('else_result', tp.INT64, [1])])
+ [helper.make_tensor_value_info("else_result", tp.INT64, [1])],
+ )
nodes = [
- helper.make_node('Constant', inputs=[], outputs=["one"],
- value=helper.make_tensor(
- name='', data_type=tp.INT64, dims=[1],
- vals=[1])),
- helper.make_node('Constant', inputs=[], outputs=["zero"],
- value=helper.make_tensor(
- name='', data_type=tp.INT64, dims=[1],
- vals=[0])),
-
- helper.make_node('Identity', inputs=["one"],
- outputs=["identity_one"]),
- helper.make_node('Identity', inputs=["zero"],
- outputs=["identity_zero"]),
-
- helper.make_node('If', inputs=["X"], outputs=["y"],
- then_branch=then_branch,
- else_branch=else_branch)]
+ helper.make_node(
+ "Constant",
+ inputs=[],
+ outputs=["one"],
+ value=helper.make_tensor(
+ name="", data_type=tp.INT64, dims=[1], vals=[1]
+ ),
+ ),
+ helper.make_node(
+ "Constant",
+ inputs=[],
+ outputs=["zero"],
+ value=helper.make_tensor(
+ name="", data_type=tp.INT64, dims=[1], vals=[0]
+ ),
+ ),
+ helper.make_node("Identity", inputs=["one"], outputs=["identity_one"]),
+ helper.make_node("Identity", inputs=["zero"], outputs=["identity_zero"]),
+ helper.make_node(
+ "If",
+ inputs=["X"],
+ outputs=["y"],
+ then_branch=then_branch,
+ else_branch=else_branch,
+ ),
+ ]
g = helper.make_graph(
- nodes, 'if_test',
- [helper.make_tensor_value_info('X', tp.BOOL, [1])],
- [helper.make_tensor_value_info('y', tp.INT64, [1])])
+ nodes,
+ "if_test",
+ [helper.make_tensor_value_info("X", tp.BOOL, [1])],
+ [helper.make_tensor_value_info("y", tp.INT64, [1])],
+ )
# Create the model and check
m = helper.make_model(
- g, opset_imports=[helper.make_opsetid('', TARGET_OPSET)],
- ir_version=TARGET_IR)
+ g,
+ opset_imports=[helper.make_opsetid("", TARGET_OPSET)],
+ ir_version=TARGET_IR,
+ )
checker.check_model(m)
sess = InferenceSession(
- m.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ m.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
optimized_model = onnx_remove_node_identity(m)
sess_opt = InferenceSession(
- optimized_model.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ optimized_model.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
for v in [True, False]:
x = np.array([v])
- expected = sess.run(None, {'X': x})
- got = sess_opt.run(None, {'X': x})
+ expected = sess.run(None, {"X": x})
+ got = sess_opt.run(None, {"X": x})
assert_almost_equal(expected, got)
diff --git a/tests/test_options.py b/tests/test_options.py
index 5b0402d3c..eed853cf7 100644
--- a/tests/test_options.py
+++ b/tests/test_options.py
@@ -39,7 +39,7 @@ def dummy_converter(scope, operator, container):
cst = numpy.array([57777], dtype=numpy.float32)
elif len(options) == 1:
opts = list(options.items())
- if opts[0][0] == 'opt1':
+ if opts[0][0] == "opt1":
if opts[0][1] is None:
cst = numpy.array([57789], dtype=numpy.float32)
elif opts[0][1]:
@@ -48,16 +48,16 @@ def dummy_converter(scope, operator, container):
cst = numpy.array([57779], dtype=numpy.float32)
else:
raise AssertionError("Issue with %r." % options)
- elif opts[0][0] == 'opt3':
+ elif opts[0][0] == "opt3":
if opts[0][1] is None:
cst = numpy.array([51789], dtype=numpy.float32)
- elif opts[0][1] == 'r':
+ elif opts[0][1] == "r":
cst = numpy.array([56779], dtype=numpy.float32)
- elif opts[0][1] == 't':
+ elif opts[0][1] == "t":
cst = numpy.array([58779], dtype=numpy.float32)
else:
raise AssertionError("Issue with %r." % options)
- elif opts[0][0] == 'opt2':
+ elif opts[0][0] == "opt2":
if opts[0][1] is None:
cst = numpy.array([44444], dtype=numpy.float32)
elif isinstance(opts[0][1], int):
@@ -71,25 +71,29 @@ def dummy_converter(scope, operator, container):
id1 = OnnxIdentity(X, op_version=opv)
op = OnnxAdd(id1, cst, op_version=opv)
- id2 = OnnxIdentity(op, output_names=out[:1],
- op_version=opv)
+ id2 = OnnxIdentity(op, output_names=out[:1], op_version=opv)
id2.add_to(scope, container)
class TestOptions(unittest.TestCase):
-
@classmethod
def setUpClass(cls):
update_registered_converter(
- DummyTransformer, "IdentityTransformer",
- dummy_shape_calculator, dummy_converter,
- options={'opt1': [False, True], 'opt2': None,
- 'opt3': ('r', 't'), 'opt4': -1})
+ DummyTransformer,
+ "IdentityTransformer",
+ dummy_shape_calculator,
+ dummy_converter,
+ options={
+ "opt1": [False, True],
+ "opt2": None,
+ "opt3": ("r", "t"),
+ "opt4": -1,
+ },
+ )
def check_in(self, value, onx):
if str(value) not in str(onx):
- raise AssertionError(
- "Unable to find %r in\n%s" % (str(value), str(onx)))
+ raise AssertionError("Unable to find %r in\n%s" % (str(value), str(onx)))
def test_no_options(self):
digits = datasets.load_digits(n_class=6)
@@ -97,34 +101,37 @@ def test_no_options(self):
yd = digits.target[:20]
idtr = DummyTransformer().fit(Xd, yd)
model_onnx = to_onnx(idtr, Xd, target_opset=TARGET_OPSET)
- self.check_in('57777', model_onnx)
+ self.check_in("57777", model_onnx)
def test_options_list_true(self):
digits = datasets.load_digits(n_class=6)
Xd = digits.data[:20].astype(numpy.float32)
yd = digits.target[:20]
idtr = DummyTransformer().fit(Xd, yd)
- model_onnx = to_onnx(idtr, Xd, target_opset=TARGET_OPSET,
- options={'opt1': True})
- self.check_in('57778', model_onnx)
+ model_onnx = to_onnx(
+ idtr, Xd, target_opset=TARGET_OPSET, options={"opt1": True}
+ )
+ self.check_in("57778", model_onnx)
def test_options_list_false(self):
digits = datasets.load_digits(n_class=6)
Xd = digits.data[:20].astype(numpy.float32)
yd = digits.target[:20]
idtr = DummyTransformer().fit(Xd, yd)
- model_onnx = to_onnx(idtr, Xd, target_opset=TARGET_OPSET,
- options={'opt1': False})
- self.check_in('57779', model_onnx)
+ model_onnx = to_onnx(
+ idtr, Xd, target_opset=TARGET_OPSET, options={"opt1": False}
+ )
+ self.check_in("57779", model_onnx)
def test_options_list_outside_none(self):
digits = datasets.load_digits(n_class=6)
Xd = digits.data[:20].astype(numpy.float32)
yd = digits.target[:20]
idtr = DummyTransformer().fit(Xd, yd)
- model_onnx = to_onnx(idtr, Xd, target_opset=TARGET_OPSET,
- options={'opt1': None})
- self.check_in('57789', model_onnx)
+ model_onnx = to_onnx(
+ idtr, Xd, target_opset=TARGET_OPSET, options={"opt1": None}
+ )
+ self.check_in("57789", model_onnx)
def test_options_list_outside(self):
digits = datasets.load_digits(n_class=6)
@@ -133,8 +140,7 @@ def test_options_list_outside(self):
idtr = DummyTransformer().fit(Xd, yd)
with self.assertRaises(ValueError):
# value not allowed
- to_onnx(idtr, Xd, target_opset=TARGET_OPSET,
- options={'opt1': 'OUT'})
+ to_onnx(idtr, Xd, target_opset=TARGET_OPSET, options={"opt1": "OUT"})
def test_options_integer(self):
digits = datasets.load_digits(n_class=6)
@@ -143,35 +149,33 @@ def test_options_integer(self):
idtr = DummyTransformer().fit(Xd, yd)
with self.assertRaises(TypeError):
# integer not allowed
- to_onnx(idtr, Xd, target_opset=TARGET_OPSET,
- options={'opt4': 44444})
+ to_onnx(idtr, Xd, target_opset=TARGET_OPSET, options={"opt4": 44444})
def test_options_tuple1(self):
digits = datasets.load_digits(n_class=6)
Xd = digits.data[:20].astype(numpy.float32)
yd = digits.target[:20]
idtr = DummyTransformer().fit(Xd, yd)
- model_onnx = to_onnx(idtr, Xd, target_opset=TARGET_OPSET,
- options={'opt3': 't'})
- self.check_in('58779', model_onnx)
+ model_onnx = to_onnx(idtr, Xd, target_opset=TARGET_OPSET, options={"opt3": "t"})
+ self.check_in("58779", model_onnx)
def test_options_tuple2(self):
digits = datasets.load_digits(n_class=6)
Xd = digits.data[:20].astype(numpy.float32)
yd = digits.target[:20]
idtr = DummyTransformer().fit(Xd, yd)
- model_onnx = to_onnx(idtr, Xd, target_opset=TARGET_OPSET,
- options={'opt3': 'r'})
- self.check_in('56779', model_onnx)
+ model_onnx = to_onnx(idtr, Xd, target_opset=TARGET_OPSET, options={"opt3": "r"})
+ self.check_in("56779", model_onnx)
def test_options_tuple_none(self):
digits = datasets.load_digits(n_class=6)
Xd = digits.data[:20].astype(numpy.float32)
yd = digits.target[:20]
idtr = DummyTransformer().fit(Xd, yd)
- model_onnx = to_onnx(idtr, Xd, target_opset=TARGET_OPSET,
- options={'opt3': None})
- self.check_in('51789', model_onnx)
+ model_onnx = to_onnx(
+ idtr, Xd, target_opset=TARGET_OPSET, options={"opt3": None}
+ )
+ self.check_in("51789", model_onnx)
def test_options_tuple_out(self):
digits = datasets.load_digits(n_class=6)
@@ -180,26 +184,27 @@ def test_options_tuple_out(self):
idtr = DummyTransformer().fit(Xd, yd)
with self.assertRaises(ValueError):
# value not allowed
- to_onnx(idtr, Xd, target_opset=TARGET_OPSET,
- options={'opt3': 'G'})
+ to_onnx(idtr, Xd, target_opset=TARGET_OPSET, options={"opt3": "G"})
def test_options_none(self):
digits = datasets.load_digits(n_class=6)
Xd = digits.data[:20].astype(numpy.float32)
yd = digits.target[:20]
idtr = DummyTransformer().fit(Xd, yd)
- model_onnx = to_onnx(idtr, Xd, target_opset=TARGET_OPSET,
- options={'opt2': None})
- self.check_in('44444', model_onnx)
+ model_onnx = to_onnx(
+ idtr, Xd, target_opset=TARGET_OPSET, options={"opt2": None}
+ )
+ self.check_in("44444", model_onnx)
def test_options_num(self):
digits = datasets.load_digits(n_class=6)
Xd = digits.data[:20].astype(numpy.float32)
yd = digits.target[:20]
idtr = DummyTransformer().fit(Xd, yd)
- model_onnx = to_onnx(idtr, Xd, target_opset=TARGET_OPSET,
- options={'opt2': 33333})
- self.check_in('33333', model_onnx)
+ model_onnx = to_onnx(
+ idtr, Xd, target_opset=TARGET_OPSET, options={"opt2": 33333}
+ )
+ self.check_in("33333", model_onnx)
if __name__ == "__main__":
diff --git a/tests/test_other_converter_library_pipelines.py b/tests/test_other_converter_library_pipelines.py
index 4a5b2c76f..1e5031385 100644
--- a/tests/test_other_converter_library_pipelines.py
+++ b/tests/test_other_converter_library_pipelines.py
@@ -13,9 +13,11 @@
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import convert_sklearn, update_registered_converter
from skl2onnx.common.shape_calculator import (
- calculate_linear_classifier_output_shapes, )
+ calculate_linear_classifier_output_shapes,
+)
from skl2onnx.operator_converters.linear_classifier import (
- convert_sklearn_linear_classifier, )
+ convert_sklearn_linear_classifier,
+)
from test_utils import dump_data_and_model, TARGET_OPSET
@@ -26,8 +28,7 @@ def __init__(self, penalty="l1"):
BaseEstimator.__init__(self)
ClassifierMixin.__init__(self)
self.penalty = penalty
- self.estimator = LogisticRegression(penalty=self.penalty,
- solver="liblinear")
+ self.estimator = LogisticRegression(penalty=self.penalty, solver="liblinear")
def fit(self, X, y, sample_weight=None):
self.estimator_ = self.estimator.fit(X, y, sample_weight=sample_weight)
@@ -69,11 +70,14 @@ def test_custom_pipeline_scaler(self):
try:
model_onnx = convert_sklearn(
- pipe, "pipeline", [("input", FloatTensorType([None, 2]))],
- target_opset=TARGET_OPSET)
+ pipe,
+ "pipeline",
+ [("input", FloatTensorType([None, 2]))],
+ target_opset=TARGET_OPSET,
+ )
except RuntimeError as e:
if "No proper shape calculator found for" not in str(
- e
+ e
) and "Unable to find a shape calculator for type" not in str(e):
raise e
@@ -82,42 +86,52 @@ def test_custom_pipeline_scaler(self):
pipe,
"pipeline",
[("input", FloatTensorType([None, 2]))],
- custom_conversion_functions={
- "MyCustomClassifier": my_custom_converter},
+ custom_conversion_functions={"MyCustomClassifier": my_custom_converter},
custom_shape_calculators={
- "MyCustomClassifier": my_custom_shape_extractor},
- target_opset=TARGET_OPSET)
+ "MyCustomClassifier": my_custom_shape_extractor
+ },
+ target_opset=TARGET_OPSET,
+ )
except TypeError as e:
- if "Keys in custom_conversion_functions must be types" not in str(
- e):
+ if "Keys in custom_conversion_functions must be types" not in str(e):
raise e
model_onnx = convert_sklearn(
pipe,
"pipeline",
[("input", FloatTensorType([None, 2]))],
- custom_conversion_functions={
- MyCustomClassifier: my_custom_converter},
- custom_shape_calculators={
- MyCustomClassifier: my_custom_shape_extractor},
- target_opset=TARGET_OPSET)
+ custom_conversion_functions={MyCustomClassifier: my_custom_converter},
+ custom_shape_calculators={MyCustomClassifier: my_custom_shape_extractor},
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(numpy.float32),
- pipe, model_onnx,
- basename="SklearnPipelineScalerCustomClassifier")
+ pipe,
+ model_onnx,
+ basename="SklearnPipelineScalerCustomClassifier",
+ )
update_registered_converter(
- MyCustomClassifier, "MyCustomClassifier",
- my_custom_shape_extractor, my_custom_converter)
+ MyCustomClassifier,
+ "MyCustomClassifier",
+ my_custom_shape_extractor,
+ my_custom_converter,
+ )
- model_onnx = convert_sklearn(pipe, "pipeline",
- [("input", FloatTensorType([None, 2]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ pipe,
+ "pipeline",
+ [("input", FloatTensorType([None, 2]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(numpy.float32), pipe, model_onnx,
- basename="SklearnPipelineScalerCustomClassifier2")
+ X.astype(numpy.float32),
+ pipe,
+ model_onnx,
+ basename="SklearnPipelineScalerCustomClassifier2",
+ )
if __name__ == "__main__":
diff --git a/tests/test_parsing_options.py b/tests/test_parsing_options.py
index f576be1fc..c1fc7c497 100644
--- a/tests/test_parsing_options.py
+++ b/tests/test_parsing_options.py
@@ -7,91 +7,106 @@
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.datasets import make_regression
-from skl2onnx.common.data_types import (
- FloatTensorType, DoubleTensorType)
+from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType
from skl2onnx import convert_sklearn
from test_utils import TARGET_OPSET, InferenceSessionEx as InferenceSession
class TestParsingOptions(unittest.TestCase):
-
def test_pipeline(self):
- model = Pipeline(
- [('sc1', StandardScaler()), ('sc2', StandardScaler())])
+ model = Pipeline([("sc1", StandardScaler()), ("sc2", StandardScaler())])
X, y = make_regression(n_features=4, random_state=42)
model.fit(X)
- initial_types = [('input', FloatTensorType((None, X.shape[1])))]
- model_onnx = convert_sklearn(model, initial_types=initial_types,
- target_opset=TARGET_OPSET)
+ initial_types = [("input", FloatTensorType((None, X.shape[1])))]
+ model_onnx = convert_sklearn(
+ model, initial_types=initial_types, target_opset=TARGET_OPSET
+ )
assert model_onnx is not None
model_onnx = convert_sklearn(
- model, initial_types=initial_types,
- final_types=[('output', None)],
- target_opset=TARGET_OPSET)
+ model,
+ initial_types=initial_types,
+ final_types=[("output", None)],
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- assert sess.get_outputs()[0].name == 'output'
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ assert sess.get_outputs()[0].name == "output"
model_onnx = convert_sklearn(
- model, initial_types=initial_types,
- final_types=[('output4', None)],
- target_opset=TARGET_OPSET)
+ model,
+ initial_types=initial_types,
+ final_types=[("output4", None)],
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- assert sess.get_outputs()[0].name == 'output4'
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ assert sess.get_outputs()[0].name == "output4"
model_onnx = convert_sklearn(
- model, initial_types=initial_types,
- final_types=[('output4', DoubleTensorType())],
- target_opset=TARGET_OPSET)
+ model,
+ initial_types=initial_types,
+ final_types=[("output4", DoubleTensorType())],
+ target_opset=TARGET_OPSET,
+ )
try:
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except RuntimeError as e:
if "Cast(9)" in str(e):
return
raise e
- assert sess.get_outputs()[0].name == 'output4'
+ assert sess.get_outputs()[0].name == "output4"
assert str(sess.get_outputs()[0].type) == "tensor(double)"
def test_decisiontree_regressor(self):
model = DecisionTreeRegressor(max_depth=2)
X, y = make_regression(n_features=4, random_state=42)
model.fit(X, y)
- initial_types = [('input', FloatTensorType((None, X.shape[1])))]
- model_onnx = convert_sklearn(model, initial_types=initial_types,
- final_types=[('output4', None)],
- target_opset=TARGET_OPSET)
+ initial_types = [("input", FloatTensorType((None, X.shape[1])))]
+ model_onnx = convert_sklearn(
+ model,
+ initial_types=initial_types,
+ final_types=[("output4", None)],
+ target_opset=TARGET_OPSET,
+ )
assert model_onnx is not None
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- assert sess.get_outputs()[0].name == 'output4'
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ assert sess.get_outputs()[0].name == "output4"
def test_kmeans(self):
model = KMeans()
X, y = make_regression(n_features=4, random_state=42)
model.fit(X, y)
- initial_types = [('input', FloatTensorType((None, X.shape[1])))]
+ initial_types = [("input", FloatTensorType((None, X.shape[1])))]
with self.assertRaises(RuntimeError):
- convert_sklearn(model, initial_types=initial_types,
- final_types=[('output4', None)],
- target_opset=TARGET_OPSET)
+ convert_sklearn(
+ model,
+ initial_types=initial_types,
+ final_types=[("output4", None)],
+ target_opset=TARGET_OPSET,
+ )
with self.assertRaises(RuntimeError):
- convert_sklearn(model, initial_types=initial_types,
- final_types=[('dup1', None), ('dup1', None)],
- target_opset=TARGET_OPSET)
+ convert_sklearn(
+ model,
+ initial_types=initial_types,
+ final_types=[("dup1", None), ("dup1", None)],
+ target_opset=TARGET_OPSET,
+ )
model_onnx = convert_sklearn(
- model, initial_types=initial_types,
- final_types=[('output4', None), ('output5', None)],
- target_opset=TARGET_OPSET)
+ model,
+ initial_types=initial_types,
+ final_types=[("output4", None), ("output5", None)],
+ target_opset=TARGET_OPSET,
+ )
assert model_onnx is not None
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- assert sess.get_outputs()[0].name == 'output4'
- assert sess.get_outputs()[1].name == 'output5'
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ assert sess.get_outputs()[0].name == "output4"
+ assert sess.get_outputs()[1].name == "output5"
if __name__ == "__main__":
diff --git a/tests/test_raw_name.py b/tests/test_raw_name.py
index 276cf458c..efa1ceed3 100644
--- a/tests/test_raw_name.py
+++ b/tests/test_raw_name.py
@@ -11,7 +11,6 @@
class RawNameTest(unittest.TestCase):
-
_raw_names = (
"float_input",
"float_input--",
@@ -35,8 +34,8 @@ def _get_initial_types(X, raw_name):
@staticmethod
def _predict(clr_onnx, X):
sess = rt.InferenceSession(
- clr_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ clr_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
return sess.run([label_name], {input_name: X.astype(numpy.float32)})[0]
@@ -55,7 +54,8 @@ def test_raw_name(self):
clr_onnx = convert_sklearn(
clr,
initial_types=self._get_initial_types(X, raw_name),
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
pred_onnx = self._predict(clr_onnx, X)
assert_almost_equal(pred, pred_onnx)
diff --git a/tests/test_scikit_pandas.py b/tests/test_scikit_pandas.py
index 6e343f5b6..0abffee8f 100644
--- a/tests/test_scikit_pandas.py
+++ b/tests/test_scikit_pandas.py
@@ -14,6 +14,7 @@
def has_scikit_pandas():
try:
import sklearn_pandas # noqa
+
return True
except ImportError:
return False
@@ -25,20 +26,23 @@ def dataframe_mapper_shape_calculator(operator):
class TestOtherLibrariesInPipelineScikitPandas(unittest.TestCase):
- @unittest.skipIf(not has_scikit_pandas(),
- reason="scikit-pandas not installed")
+ @unittest.skipIf(not has_scikit_pandas(), reason="scikit-pandas not installed")
def test_scikit_pandas(self):
from sklearn_pandas import DataFrameMapper
- df = pandas.DataFrame({
- "feat1": [1, 2, 3, 4, 5, 6],
- "feat2": [1.0, 2.0, 3.0, 2.0, 3.0, 4.0],
- })
-
- mapper = DataFrameMapper([
- (["feat1", "feat2"], StandardScaler()),
- (["feat1", "feat2"], MinMaxScaler()),
- ])
+ df = pandas.DataFrame(
+ {
+ "feat1": [1, 2, 3, 4, 5, 6],
+ "feat2": [1.0, 2.0, 3.0, 2.0, 3.0, 4.0],
+ }
+ )
+
+ mapper = DataFrameMapper(
+ [
+ (["feat1", "feat2"], StandardScaler()),
+ (["feat1", "feat2"], MinMaxScaler()),
+ ]
+ )
try:
model_onnx = convert_sklearn( # noqa
diff --git a/tests/test_shapes.py b/tests/test_shapes.py
index 2879098d3..330cdd41e 100644
--- a/tests/test_shapes.py
+++ b/tests/test_shapes.py
@@ -13,26 +13,27 @@
from test_utils import TARGET_OPSET, InferenceSessionEx as InferenceSession
-ort_version = ort_version.split('+')[0]
+ort_version = ort_version.split("+")[0]
class TestShapes(unittest.TestCase):
-
@unittest.skipIf(TARGET_OPSET < 11, reason="not available")
- @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.0.0"),
- reason="not available")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.0.0"), reason="not available"
+ )
def test_onnxruntime_shapes_reg(self):
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
clr = RandomForestRegressor(max_depth=1)
clr.fit(X_train, y_train)
- initial_type = [('float_input', FloatTensorType([None, 4]))]
- onx = convert_sklearn(clr, initial_types=initial_type,
- target_opset=TARGET_OPSET)
+ initial_type = [("float_input", FloatTensorType([None, 4]))]
+ onx = convert_sklearn(
+ clr, initial_types=initial_type, target_opset=TARGET_OPSET
+ )
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
input_name = sess.get_inputs()[0].name
pred_onx = sess.run(None, {input_name: X_test.astype(numpy.float32)})
shape1 = sess.get_inputs()[0].shape
@@ -46,21 +47,25 @@ def test_onnxruntime_shapes_reg(self):
self.assertEqual(pred_onx[0].shape[1], shape2[1])
@unittest.skipIf(TARGET_OPSET < 11, reason="not available")
- @unittest.skipIf(pv.Version(ort_version) <= pv.Version("1.0.0"),
- reason="not available")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("1.0.0"), reason="not available"
+ )
def test_onnxruntime_shapes_clr(self):
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
clr = RandomForestClassifier(max_depth=1)
clr.fit(X_train, y_train)
- initial_type = [('float_input', FloatTensorType([None, 4]))]
- onx = convert_sklearn(clr, initial_types=initial_type,
- options={id(clr): {'zipmap': False}},
- target_opset=TARGET_OPSET)
+ initial_type = [("float_input", FloatTensorType([None, 4]))]
+ onx = convert_sklearn(
+ clr,
+ initial_types=initial_type,
+ options={id(clr): {"zipmap": False}},
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
input_name = sess.get_inputs()[0].name
pred_onx = sess.run(None, {input_name: X_test.astype(numpy.float32)})
shape1 = sess.get_inputs()[0].shape
diff --git a/tests/test_sklearn_adaboost_converter.py b/tests/test_sklearn_adaboost_converter.py
index cfb6120c7..678bcd2c9 100644
--- a/tests/test_sklearn_adaboost_converter.py
+++ b/tests/test_sklearn_adaboost_converter.py
@@ -19,53 +19,71 @@
dump_data_and_model,
fit_classification_model,
fit_regression_model,
- TARGET_OPSET
+ TARGET_OPSET,
)
-ort_version = '.'.join(ort_version.split('.')[:2])
-skl_version = '.'.join(sklearn_version.split('.')[:2])
+ort_version = ".".join(ort_version.split(".")[:2])
+skl_version = ".".join(sklearn_version.split(".")[:2])
class TestSklearnAdaBoostModels(unittest.TestCase):
@unittest.skipIf(TARGET_OPSET < 11, reason="not available")
def test_ada_boost_classifier_samme_r(self):
if pv.Version(skl_version) < pv.Version("1.2"):
- model, X_test = fit_classification_model(AdaBoostClassifier(
- n_estimators=10, algorithm="SAMME.R", random_state=42,
- base_estimator=DecisionTreeClassifier(
- max_depth=2, random_state=42)), 3)
+ model, X_test = fit_classification_model(
+ AdaBoostClassifier(
+ n_estimators=10,
+ algorithm="SAMME.R",
+ random_state=42,
+ base_estimator=DecisionTreeClassifier(max_depth=2, random_state=42),
+ ),
+ 3,
+ )
else:
- model, X_test = fit_classification_model(AdaBoostClassifier(
- n_estimators=10, algorithm="SAMME.R", random_state=42,
- estimator=DecisionTreeClassifier(
- max_depth=2, random_state=42)), 3)
+ model, X_test = fit_classification_model(
+ AdaBoostClassifier(
+ n_estimators=10,
+ algorithm="SAMME.R",
+ random_state=42,
+ estimator=DecisionTreeClassifier(max_depth=2, random_state=42),
+ ),
+ 3,
+ )
model_onnx = convert_sklearn(
model,
"AdaBoost classification",
[("input", FloatTensorType((None, X_test.shape[1])))],
- target_opset=10
+ target_opset=10,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X_test,
- model,
- model_onnx,
- basename="SklearnAdaBoostClassifierSAMMER")
+ X_test, model, model_onnx, basename="SklearnAdaBoostClassifierSAMMER"
+ )
@unittest.skipIf(TARGET_OPSET < 11, reason="not available")
def test_ada_boost_classifier_samme_r_decision_function(self):
if pv.Version(skl_version) < pv.Version("1.2"):
- model, X_test = fit_classification_model(AdaBoostClassifier(
- n_estimators=10, algorithm="SAMME.R", random_state=42,
- base_estimator=DecisionTreeClassifier(
- max_depth=2, random_state=42)), 4)
+ model, X_test = fit_classification_model(
+ AdaBoostClassifier(
+ n_estimators=10,
+ algorithm="SAMME.R",
+ random_state=42,
+ base_estimator=DecisionTreeClassifier(max_depth=2, random_state=42),
+ ),
+ 4,
+ )
else:
- model, X_test = fit_classification_model(AdaBoostClassifier(
- n_estimators=10, algorithm="SAMME.R", random_state=42,
- estimator=DecisionTreeClassifier(
- max_depth=2, random_state=42)), 4)
- options = {id(model): {'raw_scores': True}}
+ model, X_test = fit_classification_model(
+ AdaBoostClassifier(
+ n_estimators=10,
+ algorithm="SAMME.R",
+ random_state=42,
+ estimator=DecisionTreeClassifier(max_depth=2, random_state=42),
+ ),
+ 4,
+ )
+ options = {id(model): {"raw_scores": True}}
model_onnx = convert_sklearn(
model,
"AdaBoost classification",
@@ -79,45 +97,64 @@ def test_ada_boost_classifier_samme_r_decision_function(self):
model,
model_onnx,
basename="SklearnAdaBoostClassifierSAMMERDecisionFunction",
- methods=['predict', 'decision_function'])
+ methods=["predict", "decision_function"],
+ )
@unittest.skipIf(TARGET_OPSET < 11, reason="not available")
def test_ada_boost_classifier_samme_r_logreg(self):
if pv.Version(skl_version) < pv.Version("1.2"):
- model, X_test = fit_classification_model(AdaBoostClassifier(
- n_estimators=5, algorithm="SAMME.R",
- base_estimator=LogisticRegression(
- solver='liblinear')), 4)
+ model, X_test = fit_classification_model(
+ AdaBoostClassifier(
+ n_estimators=5,
+ algorithm="SAMME.R",
+ base_estimator=LogisticRegression(solver="liblinear"),
+ ),
+ 4,
+ )
else:
- model, X_test = fit_classification_model(AdaBoostClassifier(
- n_estimators=5, algorithm="SAMME.R",
- estimator=LogisticRegression(
- solver='liblinear')), 4)
+ model, X_test = fit_classification_model(
+ AdaBoostClassifier(
+ n_estimators=5,
+ algorithm="SAMME.R",
+ estimator=LogisticRegression(solver="liblinear"),
+ ),
+ 4,
+ )
model_onnx = convert_sklearn(
model,
"AdaBoost classification",
[("input", FloatTensorType((None, X_test.shape[1])))],
- target_opset=10
+ target_opset=10,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X_test,
- model,
- model_onnx,
- basename="SklearnAdaBoostClassifierSAMMERLogReg")
+ X_test, model, model_onnx, basename="SklearnAdaBoostClassifierSAMMERLogReg"
+ )
@unittest.skipIf(TARGET_OPSET < 11, reason="not available")
def test_ada_boost_classifier_samme(self):
if pv.Version(skl_version) < pv.Version("1.2"):
- model, X_test = fit_classification_model(AdaBoostClassifier(
- n_estimators=5, algorithm="SAMME", random_state=42,
- base_estimator=DecisionTreeClassifier(
- max_depth=6, random_state=42)), 2, n_features=7)
+ model, X_test = fit_classification_model(
+ AdaBoostClassifier(
+ n_estimators=5,
+ algorithm="SAMME",
+ random_state=42,
+ base_estimator=DecisionTreeClassifier(max_depth=6, random_state=42),
+ ),
+ 2,
+ n_features=7,
+ )
else:
- model, X_test = fit_classification_model(AdaBoostClassifier(
- n_estimators=5, algorithm="SAMME", random_state=42,
- estimator=DecisionTreeClassifier(
- max_depth=6, random_state=42)), 2, n_features=7)
+ model, X_test = fit_classification_model(
+ AdaBoostClassifier(
+ n_estimators=5,
+ algorithm="SAMME",
+ random_state=42,
+ estimator=DecisionTreeClassifier(max_depth=6, random_state=42),
+ ),
+ 2,
+ n_features=7,
+ )
model_onnx = convert_sklearn(
model,
"AdaBoostClSamme",
@@ -126,24 +163,32 @@ def test_ada_boost_classifier_samme(self):
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X_test,
- model,
- model_onnx,
- basename="SklearnAdaBoostClassifierSAMMEDT")
+ X_test, model, model_onnx, basename="SklearnAdaBoostClassifierSAMMEDT"
+ )
@unittest.skipIf(TARGET_OPSET < 11, reason="not available")
def test_ada_boost_classifier_samme_decision_function(self):
if pv.Version(skl_version) < pv.Version("1.2"):
- model, X_test = fit_classification_model(AdaBoostClassifier(
- n_estimators=5, algorithm="SAMME", random_state=42,
- base_estimator=DecisionTreeClassifier(
- max_depth=6, random_state=42)), 2)
+ model, X_test = fit_classification_model(
+ AdaBoostClassifier(
+ n_estimators=5,
+ algorithm="SAMME",
+ random_state=42,
+ base_estimator=DecisionTreeClassifier(max_depth=6, random_state=42),
+ ),
+ 2,
+ )
else:
- model, X_test = fit_classification_model(AdaBoostClassifier(
- n_estimators=5, algorithm="SAMME", random_state=42,
- estimator=DecisionTreeClassifier(
- max_depth=6, random_state=42)), 2)
- options = {id(model): {'raw_scores': True}}
+ model, X_test = fit_classification_model(
+ AdaBoostClassifier(
+ n_estimators=5,
+ algorithm="SAMME",
+ random_state=42,
+ estimator=DecisionTreeClassifier(max_depth=6, random_state=42),
+ ),
+ 2,
+ )
+ options = {id(model): {"raw_scores": True}}
model_onnx = convert_sklearn(
model,
"AdaBoostClSamme",
@@ -157,32 +202,30 @@ def test_ada_boost_classifier_samme_decision_function(self):
model,
model_onnx,
basename="SklearnAdaBoostClassifierSAMMEDTDecisionFunction",
- methods=['predict', 'decision_function_binary'],
+ methods=["predict", "decision_function_binary"],
)
@unittest.skipIf(TARGET_OPSET < 11, reason="not available")
def test_ada_boost_classifier_lr(self):
model, X_test = fit_classification_model(
- AdaBoostClassifier(learning_rate=0.3, random_state=42), 3,
- is_int=True)
+ AdaBoostClassifier(learning_rate=0.3, random_state=42), 3, is_int=True
+ )
model_onnx = convert_sklearn(
model,
"AdaBoost classification",
[("input", Int64TensorType((None, X_test.shape[1])))],
- target_opset=10
+ target_opset=10,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X_test,
- model,
- model_onnx,
- basename="SklearnAdaBoostClassifierLR")
+ X_test, model, model_onnx, basename="SklearnAdaBoostClassifierLR"
+ )
@unittest.skipIf(TARGET_OPSET < 11, reason="not available")
def test_ada_boost_classifier_bool(self):
model, X_test = fit_classification_model(
- AdaBoostClassifier(random_state=42), 3,
- is_bool=True)
+ AdaBoostClassifier(random_state=42), 3, is_bool=True
+ )
model_onnx = convert_sklearn(
model,
"AdaBoost classification",
@@ -191,113 +234,137 @@ def test_ada_boost_classifier_bool(self):
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X_test,
- model,
- model_onnx,
- basename="SklearnAdaBoostClassifierBool")
+ X_test, model, model_onnx, basename="SklearnAdaBoostClassifierBool"
+ )
@unittest.skipIf(TARGET_OPSET < 11, reason="not available")
def test_ada_boost_regressor(self):
- model, X = fit_regression_model(
- AdaBoostRegressor(n_estimators=5))
+ model, X = fit_regression_model(AdaBoostRegressor(n_estimators=5))
model_onnx = convert_sklearn(
- model, "AdaBoost regression",
+ model,
+ "AdaBoost regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=10)
+ target_opset=10,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
+ X,
+ model,
+ model_onnx,
backend="onnxruntime",
- basename="SklearnAdaBoostRegressor-Dec4")
+ basename="SklearnAdaBoostRegressor-Dec4",
+ )
@unittest.skipIf(TARGET_OPSET < 11, reason="not available")
def test_ada_boost_regressor_lreg(self):
if pv.Version(skl_version) < pv.Version("1.2"):
model, X = fit_regression_model(
- AdaBoostRegressor(n_estimators=5,
- base_estimator=LinearRegression()))
+ AdaBoostRegressor(n_estimators=5, base_estimator=LinearRegression())
+ )
else:
model, X = fit_regression_model(
- AdaBoostRegressor(n_estimators=5,
- estimator=LinearRegression()))
+ AdaBoostRegressor(n_estimators=5, estimator=LinearRegression())
+ )
model_onnx = convert_sklearn(
- model, "AdaBoost regression",
+ model,
+ "AdaBoost regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=10)
+ target_opset=10,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
+ X,
+ model,
+ model_onnx,
backend="onnxruntime",
- basename="SklearnAdaBoostRegressorLReg-Dec4")
+ basename="SklearnAdaBoostRegressorLReg-Dec4",
+ )
@unittest.skipIf(TARGET_OPSET < 11, reason="not available")
def test_ada_boost_regressor_int(self):
- model, X = fit_regression_model(
- AdaBoostRegressor(n_estimators=5), is_int=True)
+ model, X = fit_regression_model(AdaBoostRegressor(n_estimators=5), is_int=True)
model_onnx = convert_sklearn(
- model, "AdaBoost regression",
+ model,
+ "AdaBoost regression",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=10)
+ target_opset=10,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
+ X,
+ model,
+ model_onnx,
backend="onnxruntime",
- basename="SklearnAdaBoostRegressorInt-Dec4")
+ basename="SklearnAdaBoostRegressorInt-Dec4",
+ )
@unittest.skipIf(TARGET_OPSET < 11, reason="not available")
def test_ada_boost_regressor_lr10(self):
model, X = fit_regression_model(
- AdaBoostRegressor(learning_rate=0.5, random_state=42))
+ AdaBoostRegressor(learning_rate=0.5, random_state=42)
+ )
model_onnx = convert_sklearn(
- model, "AdaBoost regression",
+ model,
+ "AdaBoost regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=10)
+ target_opset=10,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
+ X,
+ model,
+ model_onnx,
backend="onnxruntime",
- basename="SklearnAdaBoostRegressorLR-Dec4")
+ basename="SklearnAdaBoostRegressorLR-Dec4",
+ )
- @unittest.skipIf((pv.Version(ort_version) <
- pv.Version("0.5.9999")),
- reason="not available")
+ @unittest.skipIf(
+ (pv.Version(ort_version) < pv.Version("0.5.9999")), reason="not available"
+ )
@unittest.skipIf(TARGET_OPSET < 11, reason="not available")
def test_ada_boost_regressor_lr11(self):
model, X = fit_regression_model(
- AdaBoostRegressor(learning_rate=0.5, random_state=42))
+ AdaBoostRegressor(learning_rate=0.5, random_state=42)
+ )
if onnx_opset_version() < 11:
try:
convert_sklearn(
- model, "AdaBoost regression",
- [("input", FloatTensorType([None, X.shape[1]]))])
+ model,
+ "AdaBoost regression",
+ [("input", FloatTensorType([None, X.shape[1]]))],
+ )
except RuntimeError:
return
model_onnx = convert_sklearn(
- model, "AdaBoost regression",
+ model,
+ "AdaBoost regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnAdaBoostRegressorLR-Dec4")
+ X, model, model_onnx, basename="SklearnAdaBoostRegressorLR-Dec4"
+ )
@unittest.skipIf(TARGET_OPSET < 11, reason="not available")
def test_ada_boost_regressor_bool(self):
model, X = fit_regression_model(
- AdaBoostRegressor(learning_rate=0.5, random_state=42),
- is_bool=True)
+ AdaBoostRegressor(learning_rate=0.5, random_state=42), is_bool=True
+ )
model_onnx = convert_sklearn(
- model, "AdaBoost regression",
+ model,
+ "AdaBoost regression",
[("input", BooleanTensorType([None, X.shape[1]]))],
target_opset=10,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
+ X,
+ model,
+ model_onnx,
backend="onnxruntime",
- basename="SklearnAdaBoostRegressorBool")
+ basename="SklearnAdaBoostRegressorBool",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_array_feature_extractor.py b/tests/test_sklearn_array_feature_extractor.py
index d3c79006b..9c918cb0f 100644
--- a/tests/test_sklearn_array_feature_extractor.py
+++ b/tests/test_sklearn_array_feature_extractor.py
@@ -8,6 +8,7 @@
from onnxruntime import __version__ as ort_version
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import OneHotEncoder
+
try:
from sklearn.compose import ColumnTransformer
except ImportError:
@@ -16,57 +17,78 @@
from skl2onnx import to_onnx
from skl2onnx.common.data_types import FloatTensorType
from sklearn.pipeline import Pipeline
-from test_utils import (
- dump_data_and_model,
- TARGET_OPSET)
+from test_utils import dump_data_and_model, TARGET_OPSET
class TestSklearnArrayFeatureExtractor(unittest.TestCase):
-
@unittest.skipIf(
- ColumnTransformer is None or
- pv.Version(ort_version) <= pv.Version("0.4.0"),
- reason="onnxruntime too old")
+ ColumnTransformer is None or pv.Version(ort_version) <= pv.Version("0.4.0"),
+ reason="onnxruntime too old",
+ )
def test_array_feature_extractor(self):
data_to_cluster = pd.DataFrame(
- [[1, 2, 3.5, 4.5], [1, 2, 1.7, 4.0],
- [2, 4, 2.4, 4.3], [2, 4, 2.5, 4.0]],
- columns=[1, 2, 3, 4])
+ [[1, 2, 3.5, 4.5], [1, 2, 1.7, 4.0], [2, 4, 2.4, 4.3], [2, 4, 2.5, 4.0]],
+ columns=[1, 2, 3, 4],
+ )
cat_attributes_clustering = [1, 2]
num_attributes_clustering = [3, 4] # this is of length 12 in reality
gmm = GaussianMixture(n_components=2, random_state=1)
- ohe_cat = [OneHotEncoder(categories='auto', sparse=False, drop=None)
- for i in cat_attributes_clustering]
- ct_cat = ColumnTransformer([
- ("oneHotEncoder" + str(i), ohe_cat[i], [i])
- for i, item in enumerate(cat_attributes_clustering)
- ], remainder='passthrough')
- onehotencoding_pipeline = Pipeline([("columnTransformer", ct_cat), ])
- clustering_pipeline = Pipeline([
- ('onehotencoder_and_scaler', onehotencoding_pipeline),
- ('clustering', gmm)])
+ ohe_cat = [
+ OneHotEncoder(categories="auto", sparse=False, drop=None)
+ for i in cat_attributes_clustering
+ ]
+ ct_cat = ColumnTransformer(
+ [
+ ("oneHotEncoder" + str(i), ohe_cat[i], [i])
+ for i, item in enumerate(cat_attributes_clustering)
+ ],
+ remainder="passthrough",
+ )
+ onehotencoding_pipeline = Pipeline(
+ [
+ ("columnTransformer", ct_cat),
+ ]
+ )
+ clustering_pipeline = Pipeline(
+ [("onehotencoder_and_scaler", onehotencoding_pipeline), ("clustering", gmm)]
+ )
clustering_pipeline.fit(X=data_to_cluster)
initial_type = [
- ('float_input', FloatTensorType(
- [None, len([*cat_attributes_clustering,
- *num_attributes_clustering])]))]
+ (
+ "float_input",
+ FloatTensorType(
+ [
+ None,
+ len([*cat_attributes_clustering, *num_attributes_clustering]),
+ ]
+ ),
+ )
+ ]
data = data_to_cluster.values.astype(np.float32)
# checks the first step
model_onnx = to_onnx(
- clustering_pipeline.steps[0][1], initial_types=initial_type,
- target_opset=TARGET_OPSET)
+ clustering_pipeline.steps[0][1],
+ initial_types=initial_type,
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
- data, clustering_pipeline.steps[0][1], model_onnx,
- basename="SklearnArrayFeatureExtractorStep0")
+ data,
+ clustering_pipeline.steps[0][1],
+ model_onnx,
+ basename="SklearnArrayFeatureExtractorStep0",
+ )
# checks the whole pipeline
model_onnx = to_onnx(
- clustering_pipeline, initial_types=initial_type,
- target_opset=TARGET_OPSET)
+ clustering_pipeline, initial_types=initial_type, target_opset=TARGET_OPSET
+ )
dump_data_and_model(
- data, clustering_pipeline, model_onnx,
- basename="SklearnArrayFeatureExtractor")
+ data,
+ clustering_pipeline,
+ model_onnx,
+ basename="SklearnArrayFeatureExtractor",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_bagging_converter.py b/tests/test_sklearn_bagging_converter.py
index 732e984f3..713577987 100644
--- a/tests/test_sklearn_bagging_converter.py
+++ b/tests/test_sklearn_bagging_converter.py
@@ -4,6 +4,7 @@
import unittest
import packaging.version as pv
import onnxruntime
+
try:
# scikit-learn >= 0.22
from sklearn.utils._testing import ignore_warnings
@@ -14,310 +15,364 @@
BaggingClassifier,
BaggingRegressor,
GradientBoostingClassifier,
- GradientBoostingRegressor)
+ GradientBoostingRegressor,
+)
from sklearn.linear_model import SGDClassifier, SGDRegressor
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import (
BooleanTensorType,
FloatTensorType,
- Int64TensorType)
+ Int64TensorType,
+)
from test_utils import (
dump_data_and_model,
fit_classification_model,
fit_regression_model,
- TARGET_OPSET)
+ TARGET_OPSET,
+)
class TestSklearnBaggingConverter(unittest.TestCase):
@ignore_warnings(category=(FutureWarning, DeprecationWarning))
def test_bagging_classifier_default_binary_int(self):
- model, X = fit_classification_model(
- BaggingClassifier(), 2, is_int=True)
+ model, X = fit_classification_model(BaggingClassifier(), 2, is_int=True)
model_onnx = convert_sklearn(
model,
"bagging classifier",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnBaggingClassifierDefaultBinary")
+ X, model, model_onnx, basename="SklearnBaggingClassifierDefaultBinary"
+ )
@ignore_warnings(category=(FutureWarning, DeprecationWarning))
def test_bagging_classifier_default_multiclass_int(self):
- model, X = fit_classification_model(
- BaggingClassifier(), 4, is_int=True)
+ model, X = fit_classification_model(BaggingClassifier(), 4, is_int=True)
model_onnx = convert_sklearn(
model,
"bagging classifier",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnBaggingClassifierDefaultMulticlass")
+ X, model, model_onnx, basename="SklearnBaggingClassifierDefaultMulticlass"
+ )
@ignore_warnings(category=FutureWarning)
def test_bagging_classifier_default_binary(self):
- model, X = fit_classification_model(
- BaggingClassifier(), 2)
+ model, X = fit_classification_model(BaggingClassifier(), 2)
model_onnx = convert_sklearn(
model,
"bagging classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X,
model,
- model_onnx, verbose=False,
- basename="SklearnBaggingClassifierDefaultBinary")
+ model_onnx,
+ verbose=False,
+ basename="SklearnBaggingClassifierDefaultBinary",
+ )
@ignore_warnings(category=FutureWarning)
def test_bagging_classifier_max_features(self):
- model, X = fit_classification_model(
- BaggingClassifier(max_features=0.5), 2)
+ model, X = fit_classification_model(BaggingClassifier(max_features=0.5), 2)
model_onnx = convert_sklearn(
- model, "bagging classifier",
+ model,
+ "bagging classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx, verbose=False,
- basename="SklearnBaggingClassifierMaxFeatures")
+ X,
+ model,
+ model_onnx,
+ verbose=False,
+ basename="SklearnBaggingClassifierMaxFeatures",
+ )
@ignore_warnings(category=FutureWarning)
def test_bagging_classifier_bootstrap_features(self):
model, X = fit_classification_model(
- BaggingClassifier(bootstrap_features=True), 2)
+ BaggingClassifier(bootstrap_features=True), 2
+ )
model_onnx = convert_sklearn(
- model, "bagging classifier",
+ model,
+ "bagging classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X[:5], model, model_onnx, verbose=False,
- basename="SklearnBaggingClassifierBootstrapFeatures")
+ X[:5],
+ model,
+ model_onnx,
+ verbose=False,
+ basename="SklearnBaggingClassifierBootstrapFeatures",
+ )
@ignore_warnings(category=FutureWarning)
def test_bagging_classifier_default_binary_nozipmap(self):
- model, X = fit_classification_model(
- BaggingClassifier(), 2)
+ model, X = fit_classification_model(BaggingClassifier(), 2)
model_onnx = convert_sklearn(
- model, "bagging classifier",
+ model,
+ "bagging classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
- options={id(model): {'zipmap': False}})
+ options={id(model): {"zipmap": False}},
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnBaggingClassifierDefaultBinaryNoZipMap")
+ X,
+ model,
+ model_onnx,
+ basename="SklearnBaggingClassifierDefaultBinaryNoZipMap",
+ )
@ignore_warnings(category=FutureWarning)
def test_bagging_classifier_default_multiclass(self):
- model, X = fit_classification_model(
- BaggingClassifier(), 3)
+ model, X = fit_classification_model(BaggingClassifier(), 3)
model_onnx = convert_sklearn(
model,
"bagging classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X[:10],
model,
model_onnx,
- basename="SklearnBaggingClassifierDefaultMulticlass")
+ basename="SklearnBaggingClassifierDefaultMulticlass",
+ )
@ignore_warnings(category=(FutureWarning, DeprecationWarning))
def test_bagging_classifier_sgd_binary(self):
model, X = fit_classification_model(
BaggingClassifier(
- SGDClassifier(loss='modified_huber', random_state=42),
- random_state=42), 2)
+ SGDClassifier(loss="modified_huber", random_state=42), random_state=42
+ ),
+ 2,
+ )
model_onnx = convert_sklearn(
model,
"bagging classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnBaggingClassifierSGDBinary")
+ X, model, model_onnx, basename="SklearnBaggingClassifierSGDBinary"
+ )
@ignore_warnings(category=FutureWarning)
def test_bagging_classifier_sgd_binary_decision_function(self):
model, X = fit_classification_model(
- BaggingClassifier(SGDClassifier(random_state=42),
- random_state=42), 2)
- options = {id(model): {'raw_scores': True}}
+ BaggingClassifier(SGDClassifier(random_state=42), random_state=42), 2
+ )
+ options = {id(model): {"raw_scores": True}}
model_onnx = convert_sklearn(
model,
"bagging classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
options=options,
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X[:5], model, model_onnx,
+ X[:5],
+ model,
+ model_onnx,
basename="SklearnBaggingClassifierSGDBinaryDecisionFunction-Dec3",
- methods=['predict', 'decision_function_binary'])
+ methods=["predict", "decision_function_binary"],
+ )
- @unittest.skipIf(pv.Version(onnxruntime.__version__)
- <= pv.Version("0.4.0"),
- reason="Not implemented.")
+ @unittest.skipIf(
+ pv.Version(onnxruntime.__version__) <= pv.Version("0.4.0"),
+ reason="Not implemented.",
+ )
@ignore_warnings(category=(FutureWarning, DeprecationWarning))
def test_bagging_classifier_sgd_multiclass(self):
model, X = fit_classification_model(
BaggingClassifier(
- SGDClassifier(loss='modified_huber', random_state=42),
- random_state=42), 5)
+ SGDClassifier(loss="modified_huber", random_state=42), random_state=42
+ ),
+ 5,
+ )
model_onnx = convert_sklearn(
model,
"bagging classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X[:5], model, model_onnx,
- basename="SklearnBaggingClassifierSGDMulticlass-Dec3")
+ X[:5],
+ model,
+ model_onnx,
+ basename="SklearnBaggingClassifierSGDMulticlass-Dec3",
+ )
@ignore_warnings(category=FutureWarning)
def test_bagging_classifier_sgd_multiclass_decision_function(self):
model, X = fit_classification_model(
BaggingClassifier(
GradientBoostingClassifier(random_state=42, n_estimators=4),
- random_state=42), 4, n_features=10)
- options = {id(model): {'raw_scores': True, "zipmap": False}}
+ random_state=42,
+ ),
+ 4,
+ n_features=10,
+ )
+ options = {id(model): {"raw_scores": True, "zipmap": False}}
model_onnx = convert_sklearn(
- model, "bagging classifier",
+ model,
+ "bagging classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
options=options,
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X[:15], model, model_onnx,
+ X[:15],
+ model,
+ model_onnx,
basename="SklearnBaggingClassifierSGDMultiDecisionFunction-Dec3",
- methods=['predict', 'decision_function'])
+ methods=["predict", "decision_function"],
+ )
@ignore_warnings(category=FutureWarning)
def test_bagging_classifier_gradient_boosting_binary(self):
model, X = fit_classification_model(
- BaggingClassifier(
- GradientBoostingClassifier(n_estimators=10)), 2)
+ BaggingClassifier(GradientBoostingClassifier(n_estimators=10)), 2
+ )
model_onnx = convert_sklearn(
model,
"bagging classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
- options={"zipmap": False})
+ options={"zipmap": False},
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X,
model,
model_onnx,
- basename="SklearnBaggingClassifierGradientBoostingBinary")
+ basename="SklearnBaggingClassifierGradientBoostingBinary",
+ )
@ignore_warnings(category=FutureWarning)
def test_bagging_classifier_gradient_boosting_multiclass(self):
model, X = fit_classification_model(
- BaggingClassifier(
- GradientBoostingClassifier(n_estimators=10)), 3)
+ BaggingClassifier(GradientBoostingClassifier(n_estimators=10)), 3
+ )
model_onnx = convert_sklearn(
- model, "bagging classifier",
+ model,
+ "bagging classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
- options={"zipmap": False})
+ options={"zipmap": False},
+ )
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnBaggingClassifierGradientBoostingMulticlass")
+ X,
+ model,
+ model_onnx,
+ basename="SklearnBaggingClassifierGradientBoostingMulticlass",
+ )
@ignore_warnings(category=FutureWarning)
def test_bagging_regressor_default(self):
- model, X = fit_regression_model(
- BaggingRegressor())
+ model, X = fit_regression_model(BaggingRegressor())
model_onnx = convert_sklearn(
model,
"bagging regressor",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnBaggingRegressorDefault-Dec4")
+ X, model, model_onnx, basename="SklearnBaggingRegressorDefault-Dec4"
+ )
@ignore_warnings(category=FutureWarning)
def test_bagging_regressor_max_features(self):
model, X = fit_regression_model(
- BaggingRegressor(max_features=0.5, n_estimators=3))
+ BaggingRegressor(max_features=0.5, n_estimators=3)
+ )
model_onnx = convert_sklearn(
- model, "bagging regressor",
+ model,
+ "bagging regressor",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx, verbose=False,
- basename="SklearnBaggingRegressorMaxFeatures-Dec4")
+ X,
+ model,
+ model_onnx,
+ verbose=False,
+ basename="SklearnBaggingRegressorMaxFeatures-Dec4",
+ )
def test_bagging_regressor_bootstrap_features(self):
- model, X = fit_regression_model(
- BaggingRegressor(bootstrap_features=False))
+ model, X = fit_regression_model(BaggingRegressor(bootstrap_features=False))
model_onnx = convert_sklearn(
- model, "bagging regressor",
+ model,
+ "bagging regressor",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx, verbose=False,
- basename="SklearnBaggingRegressorBootstrapFeatures-Dec4")
+ X,
+ model,
+ model_onnx,
+ verbose=False,
+ basename="SklearnBaggingRegressorBootstrapFeatures-Dec4",
+ )
@ignore_warnings(category=FutureWarning)
def test_bagging_regressor_sgd(self):
- model, X = fit_regression_model(
- BaggingRegressor(SGDRegressor()))
+ model, X = fit_regression_model(BaggingRegressor(SGDRegressor()))
model_onnx = convert_sklearn(
- model, "bagging regressor",
+ model,
+ "bagging regressor",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnBaggingRegressorSGD-Dec4")
+ X, model, model_onnx, basename="SklearnBaggingRegressorSGD-Dec4"
+ )
@ignore_warnings(category=FutureWarning)
def test_bagging_regressor_gradient_boosting(self):
model, X = fit_regression_model(
- BaggingRegressor(
- GradientBoostingRegressor(n_estimators=10)))
+ BaggingRegressor(GradientBoostingRegressor(n_estimators=10))
+ )
model_onnx = convert_sklearn(
- model, "bagging regressor",
+ model,
+ "bagging regressor",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnBaggingRegressorGradientBoosting-Dec4")
+ X,
+ model,
+ model_onnx,
+ basename="SklearnBaggingRegressorGradientBoosting-Dec4",
+ )
@ignore_warnings(category=FutureWarning)
def test_bagging_regressor_bool(self):
- model, X = fit_regression_model(
- BaggingRegressor(), is_bool=True)
+ model, X = fit_regression_model(BaggingRegressor(), is_bool=True)
model_onnx = convert_sklearn(
model,
"bagging regressor",
@@ -326,10 +381,8 @@ def test_bagging_regressor_bool(self):
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnBaggingRegressorBool-Dec4")
+ X, model, model_onnx, basename="SklearnBaggingRegressorBool-Dec4"
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_binarizer_converter.py b/tests/test_sklearn_binarizer_converter.py
index db488d071..e63eaea94 100644
--- a/tests/test_sklearn_binarizer_converter.py
+++ b/tests/test_sklearn_binarizer_converter.py
@@ -14,19 +14,21 @@
class TestSklearnBinarizer(unittest.TestCase):
def test_model_binarizer(self):
- data = np.array([[1., -1., 2.],
- [2., 0., 0.],
- [0., 1., -1.]], dtype=np.float32)
+ data = np.array(
+ [[1.0, -1.0, 2.0], [2.0, 0.0, 0.0], [0.0, 1.0, -1.0]], dtype=np.float32
+ )
model = Binarizer(threshold=0.5)
model.fit(data)
model_onnx = convert_sklearn(
- model, "scikit-learn binarizer",
+ model,
+ "scikit-learn binarizer",
[("input", FloatTensorType(data.shape))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data, model, model_onnx,
- basename="SklearnBinarizer-SkipDim1")
+ data, model, model_onnx, basename="SklearnBinarizer-SkipDim1"
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_calibrated_classifier_cv_converter.py b/tests/test_sklearn_calibrated_classifier_cv_converter.py
index 740379160..1f3555084 100644
--- a/tests/test_sklearn_calibrated_classifier_cv_converter.py
+++ b/tests/test_sklearn_calibrated_classifier_cv_converter.py
@@ -11,8 +11,8 @@
from onnxruntime import __version__ as ort_version
from sklearn.calibration import CalibratedClassifierCV
from sklearn.datasets import load_digits, load_iris
-from sklearn.ensemble import (
- RandomForestClassifier, GradientBoostingClassifier)
+from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
+
try:
from sklearn.ensemble import HistGradientBoostingClassifier
except ImportError:
@@ -23,6 +23,7 @@
from sklearn.svm import SVC, LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.exceptions import ConvergenceWarning
+
try:
# scikit-learn >= 0.22
from sklearn.utils._testing import ignore_warnings
@@ -36,49 +37,59 @@
apply_less = None
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import (
- FloatTensorType, Int64TensorType, onnx_built_with_ml)
+ FloatTensorType,
+ Int64TensorType,
+)
from test_utils import (
- dump_data_and_model, TARGET_OPSET,
- InferenceSessionEx as InferenceSession)
+ dump_data_and_model,
+ TARGET_OPSET,
+ InferenceSessionEx as InferenceSession,
+)
-ort_version = ort_version.split('+')[0]
+ort_version = ort_version.split("+")[0]
class TestSklearnCalibratedClassifierCVConverters(unittest.TestCase):
- @unittest.skipIf(not onnx_built_with_ml(),
- reason="Requires ONNX-ML extension.")
- @ignore_warnings(
- category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
def test_model_calibrated_classifier_cv_float(self):
data = load_iris()
X, y = data.data, data.target
clf = MultinomialNB().fit(X, y)
model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y)
model_onnx = convert_sklearn(
- model, "scikit-learn CalibratedClassifierCVMNB",
+ model,
+ "scikit-learn CalibratedClassifierCVMNB",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnCalibratedClassifierCVFloat")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnCalibratedClassifierCVFloat",
+ )
- @ignore_warnings(
- category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
def test_model_calibrated_classifier_cv_float_nozipmap(self):
data = load_iris()
X, y = data.data, data.target
clf = MultinomialNB().fit(X, y)
model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y)
model_onnx = convert_sklearn(
- model, "scikit-learn CalibratedClassifierCVMNB",
+ model,
+ "scikit-learn CalibratedClassifierCVMNB",
[("input", FloatTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
- options={id(model): {'zipmap': False}})
+ options={id(model): {"zipmap": False}},
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnCalibratedClassifierCVFloatNoZipMap")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnCalibratedClassifierCVFloatNoZipMap",
+ )
@ignore_warnings(category=FutureWarning)
def test_model_calibrated_classifier_cv_sigmoid_int(self):
@@ -87,38 +98,45 @@ def test_model_calibrated_classifier_cv_sigmoid_int(self):
clf = MultinomialNB().fit(X, y)
model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y)
model_onnx = convert_sklearn(
- model, "scikit-learn CalibratedClassifierCVMNB",
+ model,
+ "scikit-learn CalibratedClassifierCVMNB",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.int64), model, model_onnx,
- basename="SklearnCalibratedClassifierCVInt-Dec4")
+ X.astype(np.int64),
+ model,
+ model_onnx,
+ basename="SklearnCalibratedClassifierCVInt-Dec4",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
- @ignore_warnings(
- category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
def test_model_calibrated_classifier_cv_isotonic_float(self):
data = load_iris()
X, y = data.data, data.target
clf = KNeighborsClassifier().fit(X, y)
model = CalibratedClassifierCV(clf, cv=2, method="isotonic").fit(X, y)
model_onnx = convert_sklearn(
- model, "scikit-learn CalibratedClassifierCVKNN",
+ model,
+ "scikit-learn CalibratedClassifierCVKNN",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
try:
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnCalibratedClassifierCVIsotonicFloat")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnCalibratedClassifierCVIsotonicFloat",
+ )
except Exception as e:
- raise AssertionError("Issue with model\n{}".format(
- model_onnx)) from e
+ raise AssertionError("Issue with model\n{}".format(model_onnx)) from e
- @ignore_warnings(
- category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
def test_model_calibrated_classifier_cv_binary_mnb(self):
data = load_iris()
X, y = data.data, data.target
@@ -126,19 +144,23 @@ def test_model_calibrated_classifier_cv_binary_mnb(self):
clf = MultinomialNB().fit(X, y)
model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y)
model_onnx = convert_sklearn(
- model, "scikit-learn CalibratedClassifierCV",
+ model,
+ "scikit-learn CalibratedClassifierCV",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnCalibratedClassifierCVBinaryMNB")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnCalibratedClassifierCVBinaryMNB",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
- @ignore_warnings(
- category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
def test_model_calibrated_classifier_cv_isotonic_binary_knn(self):
data = load_iris()
X, y = data.data, data.target
@@ -146,184 +168,218 @@ def test_model_calibrated_classifier_cv_isotonic_binary_knn(self):
clf = KNeighborsClassifier().fit(X, y)
model = CalibratedClassifierCV(clf, cv=2, method="isotonic").fit(X, y)
model_onnx = convert_sklearn(
- model, "scikit-learn CalibratedClassifierCV",
+ model,
+ "scikit-learn CalibratedClassifierCV",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnCalibratedClassifierCVIsotonicBinaryKNN")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnCalibratedClassifierCVIsotonicBinaryKNN",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
- @ignore_warnings(
- category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
def test_model_calibrated_classifier_cv_logistic_regression(self):
data = load_iris()
X, y = data.data, data.target
y[y > 1] = 1
model = CalibratedClassifierCV(
- base_estimator=LogisticRegression(), method='sigmoid').fit(X, y)
+ base_estimator=LogisticRegression(), method="sigmoid"
+ ).fit(X, y)
model_onnx = convert_sklearn(
- model, "unused",
+ model,
+ "unused",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnCalibratedClassifierCVBinaryLogReg")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnCalibratedClassifierCVBinaryLogReg",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
- @ignore_warnings(
- category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
def test_model_calibrated_classifier_cv_rf(self):
data = load_iris()
X, y = data.data, data.target
y[y > 1] = 1
model = CalibratedClassifierCV(
- base_estimator=RandomForestClassifier(n_estimators=2),
- method='sigmoid').fit(X, y)
+ base_estimator=RandomForestClassifier(n_estimators=2), method="sigmoid"
+ ).fit(X, y)
model_onnx = convert_sklearn(
- model, "clarf",
+ model,
+ "clarf",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnCalibratedClassifierRF")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnCalibratedClassifierRF",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
- @ignore_warnings(
- category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
def test_model_calibrated_classifier_cv_gbt(self):
data = load_iris()
X, y = data.data, data.target
y[y > 1] = 1
model = CalibratedClassifierCV(
- base_estimator=GradientBoostingClassifier(n_estimators=2),
- method='sigmoid').fit(X, y)
+ base_estimator=GradientBoostingClassifier(n_estimators=2), method="sigmoid"
+ ).fit(X, y)
model_onnx = convert_sklearn(
- model, "clarf",
+ model,
+ "clarf",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnCalibratedClassifierGBT")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnCalibratedClassifierGBT",
+ )
+ @unittest.skipIf(HistGradientBoostingClassifier is None, reason="not available")
@unittest.skipIf(
- HistGradientBoostingClassifier is None, reason="not available")
- @unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
- @ignore_warnings(
- category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
def test_model_calibrated_classifier_cv_hgbt(self):
data = load_iris()
X, y = data.data, data.target
y[y > 1] = 1
model = CalibratedClassifierCV(
- base_estimator=HistGradientBoostingClassifier(max_iter=4),
- method='sigmoid').fit(X, y)
+ base_estimator=HistGradientBoostingClassifier(max_iter=4), method="sigmoid"
+ ).fit(X, y)
model_onnx = convert_sklearn(
- model, "clarf",
+ model,
+ "clarf",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnCalibratedClassifierHGBT")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnCalibratedClassifierHGBT",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
- @ignore_warnings(
- category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
def test_model_calibrated_classifier_cv_tree(self):
data = load_iris()
X, y = data.data, data.target
y[y > 1] = 1
model = CalibratedClassifierCV(
- base_estimator=DecisionTreeClassifier(),
- method='sigmoid').fit(X, y)
+ base_estimator=DecisionTreeClassifier(), method="sigmoid"
+ ).fit(X, y)
model_onnx = convert_sklearn(
- model, "clarf",
+ model,
+ "clarf",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnCalibratedClassifierDT")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnCalibratedClassifierDT",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
@unittest.skipIf(apply_less is None, reason="onnxconverter-common old")
- @ignore_warnings(
- category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
def test_model_calibrated_classifier_cv_svc(self):
data = load_iris()
X, y = data.data, data.target
- model = CalibratedClassifierCV(
- base_estimator=SVC(),
- method='sigmoid').fit(X, y)
+ model = CalibratedClassifierCV(base_estimator=SVC(), method="sigmoid").fit(X, y)
model_onnx = convert_sklearn(
- model, "unused",
+ model,
+ "unused",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnCalibratedClassifierSVC")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnCalibratedClassifierSVC",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
@unittest.skipIf(apply_less is None, reason="onnxconverter-common old")
- @ignore_warnings(
- category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
def test_model_calibrated_classifier_cv_linearsvc(self):
data = load_iris()
X, y = data.data, data.target
model = CalibratedClassifierCV(
- base_estimator=LinearSVC(),
- method='sigmoid').fit(X, y)
+ base_estimator=LinearSVC(), method="sigmoid"
+ ).fit(X, y)
model_onnx = convert_sklearn(
- model, "unused",
+ model,
+ "unused",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnCalibratedClassifierLinearSVC")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnCalibratedClassifierLinearSVC",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
@unittest.skipIf(apply_less is None, reason="onnxconverter-common old")
- @ignore_warnings(
- category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
def test_model_calibrated_classifier_cv_linearsvc2(self):
data = load_iris()
X, y = data.data, data.target
y[y == 2] = 0
self.assertEqual(len(set(y)), 2)
model = CalibratedClassifierCV(
- base_estimator=LinearSVC(),
- method='sigmoid').fit(X, y)
+ base_estimator=LinearSVC(), method="sigmoid"
+ ).fit(X, y)
model_onnx = convert_sklearn(
- model, "unused",
+ model,
+ "unused",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnCalibratedClassifierLinearSVC2")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnCalibratedClassifierLinearSVC2",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
@unittest.skipIf(apply_less is None, reason="onnxconverter-common old")
- @ignore_warnings(
- category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
def test_model_calibrated_classifier_cv_svc2_binary(self):
data = load_iris()
X, y = data.data, data.target
@@ -335,21 +391,22 @@ def test_model_calibrated_classifier_cv_svc2_binary(self):
model_sub.fit(X, y)
with self.subTest(model=model_sub):
model = CalibratedClassifierCV(
- base_estimator=model_sub, cv=2,
- method='sigmoid').fit(X, y)
+ base_estimator=model_sub, cv=2, method="sigmoid"
+ ).fit(X, y)
model_onnx = convert_sklearn(
- model, "unused",
+ model,
+ "unused",
[("input", FloatTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
- options={id(model): {'zipmap': False}})
+ options={id(model): {"zipmap": False}},
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
if sess is not None:
try:
- res = sess.run(
- None, {'input': X[:5].astype(np.float32)})
+ res = sess.run(None, {"input": X[:5].astype(np.float32)})
except RuntimeError as e:
raise AssertionError("runtime failed") from e
assert_almost_equal(model.predict_proba(X[:5]), res[1])
@@ -357,8 +414,11 @@ def test_model_calibrated_classifier_cv_svc2_binary(self):
name = model_sub.__class__.__name__
dump_data_and_model(
- X.astype(np.float32)[:10], model, model_onnx,
- basename=f"SklearnCalibratedClassifierBinary{name}SVC2")
+ X.astype(np.float32)[:10],
+ model,
+ model_onnx,
+ basename=f"SklearnCalibratedClassifierBinary{name}SVC2",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_cast_regressor.py b/tests/test_sklearn_cast_regressor.py
index d89a9ff2f..a158f1a3f 100644
--- a/tests/test_sklearn_cast_regressor.py
+++ b/tests/test_sklearn_cast_regressor.py
@@ -13,109 +13,132 @@
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import StandardScaler
+
try:
from sklearn.compose import ColumnTransformer
except ImportError:
ColumnTransformer = None
from skl2onnx.sklapi import CastRegressor, CastTransformer
from skl2onnx import convert_sklearn, to_onnx
-from skl2onnx.common.data_types import (
- FloatTensorType, DoubleTensorType)
+from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType
from test_utils import (
- dump_data_and_model, TARGET_OPSET,
- InferenceSessionEx as InferenceSession)
+ dump_data_and_model,
+ TARGET_OPSET,
+ InferenceSessionEx as InferenceSession,
+)
class TestSklearnCastRegressorConverter(unittest.TestCase):
-
def common_test_cast_regressor(self, dtype, input_type):
model = CastRegressor(DecisionTreeRegressor(max_depth=2), dtype=dtype)
- data = numpy.array([[0.1, 0.2, 3.1], [1, 1, 0],
- [0, 2, 1], [1, 0, 2],
- [0.1, 2.1, 1.1], [1.1, 0.1, 2.2],
- [-0.1, -2.1, -1.1], [-1.1, -0.1, -2.2],
- [0.2, 2.2, 1.2], [1.2, 0.2, 2.2]],
- dtype=numpy.float32)
- y = (numpy.sum(data, axis=1, keepdims=0) +
- numpy.random.randn(data.shape[0]))
+ data = numpy.array(
+ [
+ [0.1, 0.2, 3.1],
+ [1, 1, 0],
+ [0, 2, 1],
+ [1, 0, 2],
+ [0.1, 2.1, 1.1],
+ [1.1, 0.1, 2.2],
+ [-0.1, -2.1, -1.1],
+ [-1.1, -0.1, -2.2],
+ [0.2, 2.2, 1.2],
+ [1.2, 0.2, 2.2],
+ ],
+ dtype=numpy.float32,
+ )
+ y = numpy.sum(data, axis=1, keepdims=0) + numpy.random.randn(data.shape[0])
model.fit(data, y)
pred = model
assert pred.dtype == dtype
model_onnx = convert_sklearn(
- model, "cast", [("input", FloatTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model,
+ "cast",
+ [("input", FloatTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data, model, model_onnx,
- basename="SklearnCastRegressor{}".format(
- input_type.__class__.__name__))
+ data,
+ model,
+ model_onnx,
+ basename="SklearnCastRegressor{}".format(input_type.__class__.__name__),
+ )
- @unittest.skipIf(pv.Version(ort_version) < pv.Version('0.5.0'),
- reason="runtime too old")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="runtime too old"
+ )
def test_cast_regressor_float(self):
- self.common_test_cast_regressor(
- numpy.float32, FloatTensorType)
+ self.common_test_cast_regressor(numpy.float32, FloatTensorType)
- @unittest.skipIf(pv.Version(ort_version) < pv.Version('0.5.0'),
- reason="runtime too old")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="runtime too old"
+ )
def test_cast_regressor_float64(self):
- self.common_test_cast_regressor(
- numpy.float64, DoubleTensorType)
+ self.common_test_cast_regressor(numpy.float64, DoubleTensorType)
@unittest.skipIf(TARGET_OPSET < 9, reason="not supported")
- @unittest.skipIf(pv.Version(ort_version) < pv.Version('0.5.0'),
- reason="runtime too old")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="runtime too old"
+ )
def test_pipeline(self):
-
def maxdiff(a1, a2):
d = numpy.abs(a1.ravel() - a2.ravel())
return d.max()
X, y = make_regression(10000, 10, random_state=3)
- X_train, X_test, y_train, _ = train_test_split(
- X, y, random_state=3)
+ X_train, X_test, y_train, _ = train_test_split(X, y, random_state=3)
Xi_train, yi_train = X_train.copy(), y_train.copy()
Xi_test = X_test.copy()
for i in range(X.shape[1]):
- Xi_train[:, i] = (Xi_train[:, i] * math.pi * 2 ** i).astype(
- numpy.int64)
- Xi_test[:, i] = (Xi_test[:, i] * math.pi * 2 ** i).astype(
- numpy.int64)
+ Xi_train[:, i] = (Xi_train[:, i] * math.pi * 2**i).astype(numpy.int64)
+ Xi_test[:, i] = (Xi_test[:, i] * math.pi * 2**i).astype(numpy.int64)
max_depth = 10
Xi_test = Xi_test.astype(numpy.float32)
# model 1
- model1 = Pipeline([
- ('scaler', StandardScaler()),
- ('dt', DecisionTreeRegressor(max_depth=max_depth))
- ])
+ model1 = Pipeline(
+ [
+ ("scaler", StandardScaler()),
+ ("dt", DecisionTreeRegressor(max_depth=max_depth)),
+ ]
+ )
model1.fit(Xi_train, yi_train)
exp1 = model1.predict(Xi_test)
- onx1 = to_onnx(model1, X_train[:1].astype(numpy.float32),
- target_opset=TARGET_OPSET)
+ onx1 = to_onnx(
+ model1, X_train[:1].astype(numpy.float32), target_opset=TARGET_OPSET
+ )
sess1 = InferenceSession(
- onx1.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got1 = sess1.run(None, {'X': Xi_test})[0]
+ onx1.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got1 = sess1.run(None, {"X": Xi_test})[0]
md1 = maxdiff(exp1, got1)
# model 2
- model2 = Pipeline([
- ('cast64', CastTransformer(dtype=numpy.float64)),
- ('scaler', StandardScaler()),
- ('cast', CastTransformer()),
- ('dt', CastRegressor(DecisionTreeRegressor(max_depth=max_depth),
- dtype=numpy.float32))
- ])
+ model2 = Pipeline(
+ [
+ ("cast64", CastTransformer(dtype=numpy.float64)),
+ ("scaler", StandardScaler()),
+ ("cast", CastTransformer()),
+ (
+ "dt",
+ CastRegressor(
+ DecisionTreeRegressor(max_depth=max_depth), dtype=numpy.float32
+ ),
+ ),
+ ]
+ )
model2.fit(Xi_train, yi_train)
exp2 = model2.predict(Xi_test)
- onx = to_onnx(model2, X_train[:1].astype(numpy.float32),
- options={StandardScaler: {'div': 'div_cast'}},
- target_opset=TARGET_OPSET)
+ onx = to_onnx(
+ model2,
+ X_train[:1].astype(numpy.float32),
+ options={StandardScaler: {"div": "div_cast"}},
+ target_opset=TARGET_OPSET,
+ )
sess2 = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got2 = sess2.run(None, {'X': Xi_test})[0]
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got2 = sess2.run(None, {"X": Xi_test})[0]
md2 = maxdiff(exp2, got2)
assert md2 <= md1
assert md2 <= 0.0
diff --git a/tests/test_sklearn_cast_transformer.py b/tests/test_sklearn_cast_transformer.py
index 00cdcb013..e90a376c1 100644
--- a/tests/test_sklearn_cast_transformer.py
+++ b/tests/test_sklearn_cast_transformer.py
@@ -14,6 +14,7 @@
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
+
try:
from sklearn.compose import ColumnTransformer
except ImportError:
@@ -21,131 +22,156 @@
from skl2onnx.sklapi import CastTransformer
from skl2onnx import convert_sklearn, to_onnx
from skl2onnx.common.data_types import (
- Int64TensorType, FloatTensorType, DoubleTensorType)
+ Int64TensorType,
+ FloatTensorType,
+ DoubleTensorType,
+)
from test_utils import (
- dump_data_and_model, TARGET_OPSET,
- InferenceSessionEx as InferenceSession)
+ dump_data_and_model,
+ TARGET_OPSET,
+ InferenceSessionEx as InferenceSession,
+)
class TestSklearnCastTransformerConverter(unittest.TestCase):
-
def common_test_cast_transformer(self, dtype, input_type):
- model = Pipeline([
- ('cast', CastTransformer(dtype=dtype)),
- ('invcast', CastTransformer(dtype=numpy.float32)),
- ])
- data = numpy.array([[0.1, 0.2, 3.1], [1, 1, 0],
- [0, 2, 1], [1, 0, 2]],
- dtype=numpy.float32)
+ model = Pipeline(
+ [
+ ("cast", CastTransformer(dtype=dtype)),
+ ("invcast", CastTransformer(dtype=numpy.float32)),
+ ]
+ )
+ data = numpy.array(
+ [[0.1, 0.2, 3.1], [1, 1, 0], [0, 2, 1], [1, 0, 2]], dtype=numpy.float32
+ )
model.fit(data)
pred = model.steps[0][1].transform(data)
assert pred.dtype == dtype
model_onnx = convert_sklearn(
- model, "cast", [("input", FloatTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model,
+ "cast",
+ [("input", FloatTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data, model, model_onnx,
- basename="SklearnCastTransformer{}".format(
- input_type.__class__.__name__))
+ data,
+ model,
+ model_onnx,
+ basename="SklearnCastTransformer{}".format(input_type.__class__.__name__),
+ )
- @unittest.skipIf(pv.Version(ort_version) < pv.Version('0.5.0'),
- reason="runtime too old")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="runtime too old"
+ )
def test_cast_transformer_float(self):
- self.common_test_cast_transformer(
- numpy.float32, FloatTensorType)
+ self.common_test_cast_transformer(numpy.float32, FloatTensorType)
- @unittest.skipIf(pv.Version(ort_version) < pv.Version('0.5.0'),
- reason="runtime too old")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="runtime too old"
+ )
def test_cast_transformer_float64(self):
- self.common_test_cast_transformer(
- numpy.float64, DoubleTensorType)
+ self.common_test_cast_transformer(numpy.float64, DoubleTensorType)
- @unittest.skipIf(pv.Version(ort_version) < pv.Version('0.5.0'),
- reason="runtime too old")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="runtime too old"
+ )
def test_cast_transformer_int64(self):
- self.common_test_cast_transformer(
- numpy.int64, Int64TensorType)
+ self.common_test_cast_transformer(numpy.int64, Int64TensorType)
@unittest.skipIf(TARGET_OPSET < 9, reason="not supported")
- @unittest.skipIf(pv.Version(ort_version) < pv.Version('0.5.0'),
- reason="runtime too old")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="runtime too old"
+ )
def test_pipeline(self):
-
def maxdiff(a1, a2):
d = numpy.abs(a1.ravel() - a2.ravel())
return d.max()
X, y = make_regression(10000, 10, random_state=3)
- X_train, X_test, y_train, _ = train_test_split(
- X, y, random_state=3)
+ X_train, X_test, y_train, _ = train_test_split(X, y, random_state=3)
Xi_train, yi_train = X_train.copy(), y_train.copy()
Xi_test = X_test.copy()
for i in range(X.shape[1]):
- Xi_train[:, i] = (Xi_train[:, i] * math.pi * 2 ** i).astype(
- numpy.int64)
- Xi_test[:, i] = (Xi_test[:, i] * math.pi * 2 ** i).astype(
- numpy.int64)
+ Xi_train[:, i] = (Xi_train[:, i] * math.pi * 2**i).astype(numpy.int64)
+ Xi_test[:, i] = (Xi_test[:, i] * math.pi * 2**i).astype(numpy.int64)
max_depth = 10
Xi_test = Xi_test.astype(numpy.float32)
# model 1
- model1 = Pipeline([
- ('scaler', StandardScaler()),
- ('dt', DecisionTreeRegressor(max_depth=max_depth))
- ])
+ model1 = Pipeline(
+ [
+ ("scaler", StandardScaler()),
+ ("dt", DecisionTreeRegressor(max_depth=max_depth)),
+ ]
+ )
model1.fit(Xi_train, yi_train)
exp1 = model1.predict(Xi_test)
- onx1 = to_onnx(model1, X_train[:1].astype(numpy.float32),
- target_opset=TARGET_OPSET)
+ onx1 = to_onnx(
+ model1, X_train[:1].astype(numpy.float32), target_opset=TARGET_OPSET
+ )
sess1 = InferenceSession(
- onx1.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got1 = sess1.run(None, {'X': Xi_test})[0]
+ onx1.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got1 = sess1.run(None, {"X": Xi_test})[0]
md1 = maxdiff(exp1, got1)
# model 2
- model2 = Pipeline([
- ('cast64', CastTransformer(dtype=numpy.float64)),
- ('scaler', StandardScaler()),
- ('cast', CastTransformer()),
- ('dt', DecisionTreeRegressor(max_depth=max_depth))
- ])
+ model2 = Pipeline(
+ [
+ ("cast64", CastTransformer(dtype=numpy.float64)),
+ ("scaler", StandardScaler()),
+ ("cast", CastTransformer()),
+ ("dt", DecisionTreeRegressor(max_depth=max_depth)),
+ ]
+ )
model2.fit(Xi_train, yi_train)
exp2 = model2.predict(Xi_test)
- onx = to_onnx(model2, X_train[:1].astype(numpy.float32),
- options={StandardScaler: {'div': 'div_cast'}},
- target_opset=TARGET_OPSET)
+ onx = to_onnx(
+ model2,
+ X_train[:1].astype(numpy.float32),
+ options={StandardScaler: {"div": "div_cast"}},
+ target_opset=TARGET_OPSET,
+ )
sess2 = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got2 = sess2.run(None, {'X': Xi_test})[0]
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got2 = sess2.run(None, {"X": Xi_test})[0]
md2 = maxdiff(exp2, got2)
assert md2 <= md1
assert md2 <= 0.01
- @unittest.skipIf(ColumnTransformer is None,
- reason="scikit-learn too old")
+ @unittest.skipIf(ColumnTransformer is None, reason="scikit-learn too old")
def test_cast_transformer_dataframe(self):
- model = Pipeline([
- ('prep', ColumnTransformer([
- ('prep1', CastTransformer(), [0, 1]),
- ('prep2', CastTransformer(), [2]),
- ])),
- ('invcast', CastTransformer(dtype=numpy.float32)),
- ])
- data = numpy.array([[0.1, 0.2, 3.4], [1, 1, 0],
- [0, 2, 1], [1, 0, 2]],
- dtype=numpy.float32)
+ model = Pipeline(
+ [
+ (
+ "prep",
+ ColumnTransformer(
+ [
+ ("prep1", CastTransformer(), [0, 1]),
+ ("prep2", CastTransformer(), [2]),
+ ]
+ ),
+ ),
+ ("invcast", CastTransformer(dtype=numpy.float32)),
+ ]
+ )
+ data = numpy.array(
+ [[0.1, 0.2, 3.4], [1, 1, 0], [0, 2, 1], [1, 0, 2]], dtype=numpy.float32
+ )
data = DataFrame(data)
model.fit(data)
model_onnx = convert_sklearn(
- model, "cast", [("input", FloatTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model,
+ "cast",
+ [("input", FloatTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data.values, model, model_onnx,
- basename="SklearnCastTransformerCT")
+ data.values, model, model_onnx, basename="SklearnCastTransformerCT"
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_concat.py b/tests/test_sklearn_concat.py
index 2e267759e..11214848b 100644
--- a/tests/test_sklearn_concat.py
+++ b/tests/test_sklearn_concat.py
@@ -7,65 +7,69 @@
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
+
try:
from sklearn.compose import ColumnTransformer
except ImportError:
ColumnTransformer = None
from sklearn.pipeline import Pipeline
-from sklearn.preprocessing import (
- StandardScaler,
- OneHotEncoder,
- FunctionTransformer
-)
+from sklearn.preprocessing import StandardScaler, OneHotEncoder, FunctionTransformer
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import (
- BooleanTensorType, FloatTensorType,
- Int64TensorType, StringTensorType)
+ BooleanTensorType,
+ FloatTensorType,
+ Int64TensorType,
+ StringTensorType,
+)
from test_utils import TARGET_OPSET
def _column_tranformer_fitted_from_df(data):
def transformer_for_column(column: pd.Series):
- if column.dtype in ['float64', 'float32', 'int64']:
+ if column.dtype in ["float64", "float32", "int64"]:
return StandardScaler()
- if column.dtype in ['bool']:
- return 'passthrough'
- if column.dtype in ['O']:
+ if column.dtype in ["bool"]:
+ return "passthrough"
+ if column.dtype in ["O"]:
try:
- return OneHotEncoder(drop='first')
+ return OneHotEncoder(drop="first")
except TypeError:
# older version of scikit-learn
return OneHotEncoder()
raise ValueError(
- 'Unexpected column dtype for {column.name}:{column.dtype}'.format(
- column=column))
+ "Unexpected column dtype for {column.name}:{column.dtype}".format(
+ column=column
+ )
+ )
return ColumnTransformer(
- [(col, transformer_for_column(
- data[col]), [col]) for col in data.columns],
- remainder='drop'
+ [(col, transformer_for_column(data[col]), [col]) for col in data.columns],
+ remainder="drop",
).fit(data)
def _convert_dataframe_schema(data):
def type_for_column(column: pd.Series):
- if column.dtype in ['float64', 'float32']:
+ if column.dtype in ["float64", "float32"]:
return FloatTensorType([None, 1])
- if column.dtype in ['int64']:
+ if column.dtype in ["int64"]:
return Int64TensorType([None, 1])
- if column.dtype in ['bool']:
+ if column.dtype in ["bool"]:
return BooleanTensorType([None, 1])
- if column.dtype in ['O']:
+ if column.dtype in ["O"]:
return StringTensorType([None, 1])
raise ValueError(
- 'Unexpected column dtype for {column.name}:{column.dtype}'.format(
- column=column))
+ "Unexpected column dtype for {column.name}:{column.dtype}".format(
+ column=column
+ )
+ )
+
return [(col, type_for_column(data[col])) for col in data.columns]
def _predict(session: rt.InferenceSession, data: pd.DataFrame) -> pd.Series:
def _correctly_typed_column(column: pd.Series) -> pd.Series:
- if column.dtype in ['float64']:
+ if column.dtype in ["float64"]:
return column.astype(np.float32)
return column
@@ -77,78 +81,68 @@ def _correctly_shaped_values(values):
for c in data.columns
}
- return pd.Series(
- session.run(None, inputs)[0].reshape(-1),
- index=data.index
- )
+ return pd.Series(session.run(None, inputs)[0].reshape(-1), index=data.index)
class TestSklearnPipeline(unittest.TestCase):
-
@unittest.skipIf(ColumnTransformer is None, reason="too old scikit-learn")
def test_concat(self):
- data = os.path.join(os.path.dirname(__file__),
- "datasets", "small_titanic.csv")
+ data = os.path.join(os.path.dirname(__file__), "datasets", "small_titanic.csv")
data = pd.read_csv(data)
- data['female'] = data['sex'] == 'female'
- data = data[['age', 'fare', 'female', 'embarked',
- 'pclass', 'survived']]
+ data["female"] = data["sex"] == "female"
+ data = data[["age", "fare", "female", "embarked", "pclass", "survived"]]
for col in data:
dtype = data[col].dtype
- if dtype in ['float64', 'float32']:
- data[col].fillna(0., inplace=True)
- if dtype in ['int64']:
+ if dtype in ["float64", "float32"]:
+ data[col].fillna(0.0, inplace=True)
+ if dtype in ["int64"]:
data[col].fillna(0, inplace=True)
- elif dtype in ['O']:
- data[col].fillna('N/A', inplace=True)
+ elif dtype in ["O"]:
+ data[col].fillna("N/A", inplace=True)
- full_df = data.drop('survived', axis=1)
- full_labels = data['survived']
+ full_df = data.drop("survived", axis=1)
+ full_labels = data["survived"]
train_df, test_df, train_labels, test_labels = train_test_split(
- full_df, full_labels, test_size=.2, random_state=0)
+ full_df, full_labels, test_size=0.2, random_state=0
+ )
col_transformer = _column_tranformer_fitted_from_df(full_df)
regressor = DecisionTreeRegressor(random_state=0)
- regressor.fit(
- col_transformer.transform(train_df),
- train_labels)
+ regressor.fit(col_transformer.transform(train_df), train_labels)
model = Pipeline(
- steps=[('preprocessor', col_transformer),
- ('regressor', regressor)])
+ steps=[("preprocessor", col_transformer), ("regressor", regressor)]
+ )
initial_types = _convert_dataframe_schema(full_df)
itypes = set(_[1].__class__ for _ in initial_types)
self.assertIn(BooleanTensorType, itypes)
self.assertIn(FloatTensorType, itypes)
- onx = convert_sklearn(model, initial_types=initial_types,
- target_opset=TARGET_OPSET)
+ onx = convert_sklearn(
+ model, initial_types=initial_types, target_opset=TARGET_OPSET
+ )
session = rt.InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
pred_skl = model.predict(test_df)
pred_onx = _predict(session, test_df)
- diff = np.sort(
- np.abs(np.squeeze(pred_skl) - np.squeeze(pred_onx)))
+ diff = np.sort(np.abs(np.squeeze(pred_skl) - np.squeeze(pred_onx)))
if diff[0] != diff[-1]:
raise AssertionError(
- "Discrepencies\nSKL\n{}\nORT\n{}".format(pred_skl, pred_onx))
+ "Discrepencies\nSKL\n{}\nORT\n{}".format(pred_skl, pred_onx)
+ )
class TestConcatOutputType(unittest.TestCase):
-
@unittest.skipIf(ColumnTransformer is None, reason="too old scikit-learn")
def test_concat_output_type(self):
# create sample dataset
- data_dict = {
- 'a': [1, 2, 3],
- 'b': [1.5, 2.6, 5.2]
- }
+ data_dict = {"a": [1, 2, 3], "b": [1.5, 2.6, 5.2]}
# load to dataframe
data = pd.DataFrame.from_dict(data_dict)
@@ -159,7 +153,7 @@ def test_concat_output_type(self):
col_transformer = ColumnTransformer(
transformers=[
("a", FunctionTransformer(), ["a"]),
- ("b", StandardScaler(), ["b"])
+ ("b", StandardScaler(), ["b"]),
],
)
@@ -170,8 +164,9 @@ def test_concat_output_type(self):
initial_types = _convert_dataframe_schema(data)
# convert to onnx
- onx = convert_sklearn(col_transformer, initial_types=initial_types,
- target_opset=TARGET_OPSET)
+ onx = convert_sklearn(
+ col_transformer, initial_types=initial_types, target_opset=TARGET_OPSET
+ )
# make sure that the output of the concat is a float
# we are concatenating an `int` with a `float`, and
diff --git a/tests/test_sklearn_constant_predictor.py b/tests/test_sklearn_constant_predictor.py
index f2846b654..4844a3d4d 100644
--- a/tests/test_sklearn_constant_predictor.py
+++ b/tests/test_sklearn_constant_predictor.py
@@ -10,10 +10,7 @@
from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType
-from test_utils import (
- dump_data_and_model,
- TARGET_OPSET
-)
+from test_utils import dump_data_and_model, TARGET_OPSET
ort_version = ".".join(ort_version.split(".")[:2])
@@ -27,14 +24,20 @@ def test_constant_predictor_float(self):
test_x = np.array([[1, 0], [2, 8]])
model_onnx = to_onnx(
- model, "scikit-learn ConstantPredictor",
+ model,
+ "scikit-learn ConstantPredictor",
initial_types=[("input", FloatTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
- options={'zipmap': False})
+ options={"zipmap": False},
+ )
self.assertIsNotNone(model_onnx is not None)
- dump_data_and_model(test_x.astype(np.float32), model, model_onnx,
- basename="SklearnConstantPredictorFloat")
+ dump_data_and_model(
+ test_x.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnConstantPredictorFloat",
+ )
def test_constant_predictor_double(self):
model = _ConstantPredictor()
@@ -44,14 +47,20 @@ def test_constant_predictor_double(self):
test_x = np.array([[1, 0], [2, 8]])
model_onnx = to_onnx(
- model, "scikit-learn ConstantPredictor",
+ model,
+ "scikit-learn ConstantPredictor",
initial_types=[("input", DoubleTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
- options={'zipmap': False})
+ options={"zipmap": False},
+ )
self.assertIsNotNone(model_onnx is not None)
- dump_data_and_model(test_x.astype(np.float64), model, model_onnx,
- basename="SklearnConstantPredictorDouble")
+ dump_data_and_model(
+ test_x.astype(np.float64),
+ model,
+ model_onnx,
+ basename="SklearnConstantPredictorDouble",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_count_vectorizer_converter.py b/tests/test_sklearn_count_vectorizer_converter.py
index 3ef41680e..eb7f1cc66 100644
--- a/tests/test_sklearn_count_vectorizer_converter.py
+++ b/tests/test_sklearn_count_vectorizer_converter.py
@@ -12,106 +12,123 @@
class TestSklearnCountVectorizer(unittest.TestCase):
-
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_count_vectorizer11(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vect = CountVectorizer(ngram_range=(1, 1))
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, "CountVectorizer",
- [("input", StringTensorType([1]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "CountVectorizer",
+ [("input", StringTensorType([1]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus,
- vect,
- model_onnx,
- basename="SklearnCountVectorizer11-OneOff-SklCol")
+ corpus, vect, model_onnx, basename="SklearnCountVectorizer11-OneOff-SklCol"
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_count_vectorizer22(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vect = CountVectorizer(ngram_range=(2, 2))
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, "CountVectorizer",
- [("input", StringTensorType([1]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "CountVectorizer",
+ [("input", StringTensorType([1]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus,
- vect,
- model_onnx,
- basename="SklearnCountVectorizer22-OneOff-SklCol")
+ corpus, vect, model_onnx, basename="SklearnCountVectorizer22-OneOff-SklCol"
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_count_vectorizer12(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vect = CountVectorizer(ngram_range=(1, 2))
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, "CountVectorizer",
- [("input", StringTensorType([1]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "CountVectorizer",
+ [("input", StringTensorType([1]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus,
- vect,
- model_onnx,
- basename="SklearnCountVectorizer12-OneOff-SklCol")
+ corpus, vect, model_onnx, basename="SklearnCountVectorizer12-OneOff-SklCol"
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_count_vectorizer13(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vect = CountVectorizer(ngram_range=(1, 3))
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, "CountVectorizer",
- [("input", StringTensorType([1]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "CountVectorizer",
+ [("input", StringTensorType([1]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus,
- vect,
- model_onnx,
- basename="SklearnCountVectorizer13-OneOff-SklCol")
+ corpus, vect, model_onnx, basename="SklearnCountVectorizer13-OneOff-SklCol"
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_count_vectorizer_binary(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vect = CountVectorizer(binary=True)
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, "CountVectorizer",
- [("input", StringTensorType([1]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "CountVectorizer",
+ [("input", StringTensorType([1]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
corpus,
vect,
model_onnx,
- basename="SklearnCountVectorizerBinary-OneOff-SklCol")
+ basename="SklearnCountVectorizerBinary-OneOff-SklCol",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_count_vectorizer_converter_bug.py b/tests/test_sklearn_count_vectorizer_converter_bug.py
index 5f35035c9..73b4fea1d 100644
--- a/tests/test_sklearn_count_vectorizer_converter_bug.py
+++ b/tests/test_sklearn_count_vectorizer_converter_bug.py
@@ -12,60 +12,69 @@
class TestSklearnCountVectorizerBug(unittest.TestCase):
-
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_count_vectorizer_custom_tokenizer(self):
- corpus = numpy.array([
- '9999',
- '999 99',
- '1234',
- '1 2 3 4',
- '1 2 3 4+',
- ]).reshape((5, 1))
- vect = CountVectorizer(ngram_range=(1, 1),
- tokenizer=lambda s: [s])
+ corpus = numpy.array(
+ [
+ "9999",
+ "999 99",
+ "1234",
+ "1 2 3 4",
+ "1 2 3 4+",
+ ]
+ ).reshape((5, 1))
+ vect = CountVectorizer(ngram_range=(1, 1), tokenizer=lambda s: [s])
vect.fit(corpus.ravel())
- extra = {
- CountVectorizer: {
- "separators": ["ZZZZ"]
- }
- }
+ extra = {CountVectorizer: {"separators": ["ZZZZ"]}}
prev = vect.tokenizer
vect.tokenizer = None
- model_onnx = convert_sklearn(vect, 'CountVectorizer',
- [('input', StringTensorType([1]))],
- options=extra,
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "CountVectorizer",
+ [("input", StringTensorType([1]))],
+ options=extra,
+ target_opset=TARGET_OPSET,
+ )
vect.tokenizer = prev
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizer11CustomTokenizer-OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer11CustomTokenizer-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_count_vectorizer_wrong_ngram(self):
- corpus = numpy.array([
- 'A AABBB0',
- 'AAABB B1',
- 'AA ABBB2',
- 'AAAB BB3',
- 'AAA BBB4',
- ]).reshape((5, 1))
- vect = TfidfVectorizer(ngram_range=(1, 2),
- token_pattern=r"(?u)\b\w\w+\b")
+ corpus = numpy.array(
+ [
+ "A AABBB0",
+ "AAABB B1",
+ "AA ABBB2",
+ "AAAB BB3",
+ "AAA BBB4",
+ ]
+ ).reshape((5, 1))
+ vect = TfidfVectorizer(ngram_range=(1, 2), token_pattern=r"(?u)\b\w\w+\b")
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizer12Wngram-OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer12Wngram-OneOff-SklCol",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_custom_nmf.py b/tests/test_sklearn_custom_nmf.py
index f9ffabdd7..d24247155 100644
--- a/tests/test_sklearn_custom_nmf.py
+++ b/tests/test_sklearn_custom_nmf.py
@@ -5,8 +5,7 @@
import numpy as np
from sklearn.decomposition import NMF
from skl2onnx.common.data_types import FloatTensorType
-from skl2onnx.algebra.onnx_ops import (
- OnnxArrayFeatureExtractor, OnnxMul, OnnxReduceSum)
+from skl2onnx.algebra.onnx_ops import OnnxArrayFeatureExtractor, OnnxMul, OnnxReduceSum
from onnxruntime import InferenceSession
from test_utils import TARGET_OPSET
@@ -14,10 +13,11 @@
class TestSklearnCustomNMF(unittest.TestCase):
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_custom_nmf(self):
-
- mat = np.array([[1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0],
- [1, 0, 0, 0], [0, 0, 1, 0]], dtype=np.float64)
- mat[:mat.shape[1], :] += np.identity(mat.shape[1])
+ mat = np.array(
+ [[1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], [0, 0, 1, 0]],
+ dtype=np.float64,
+ )
+ mat[: mat.shape[1], :] += np.identity(mat.shape[1])
mod = NMF(n_components=2, max_iter=2)
W = mod.fit_transform(mat)
@@ -46,27 +46,24 @@ def nmf_to_onnx(W, H):
and returns the predictions for it. It assumes
these indices applies on the training data.
"""
- col = OnnxArrayFeatureExtractor(H, 'col')
- row = OnnxArrayFeatureExtractor(W.T, 'row')
+ col = OnnxArrayFeatureExtractor(H, "col")
+ row = OnnxArrayFeatureExtractor(W.T, "row")
dot = OnnxMul(col, row, op_version=TARGET_OPSET)
- res = OnnxReduceSum(dot, output_names="rec",
- op_version=TARGET_OPSET)
+ res = OnnxReduceSum(dot, output_names="rec", op_version=TARGET_OPSET)
indices_type = np.array([0], dtype=np.int64)
- onx = res.to_onnx(inputs={'col': indices_type,
- 'row': indices_type},
- outputs=[('rec', FloatTensorType((None, 1)))])
+ onx = res.to_onnx(
+ inputs={"col": indices_type, "row": indices_type},
+ outputs=[("rec", FloatTensorType((None, 1)))],
+ )
return onx
- model_onnx = nmf_to_onnx(W.astype(np.float32),
- H.astype(np.float32))
+ model_onnx = nmf_to_onnx(W.astype(np.float32), H.astype(np.float32))
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
def predict_onnx(sess, row_indices, col_indices):
- res = sess.run(None,
- {'col': col_indices,
- 'row': row_indices})
+ res = sess.run(None, {"col": col_indices, "row": row_indices})
return res
onnx_preds = []
diff --git a/tests/test_sklearn_decision_tree_converters.py b/tests/test_sklearn_decision_tree_converters.py
index e61d3dc59..4ecc9c582 100644
--- a/tests/test_sklearn_decision_tree_converters.py
+++ b/tests/test_sklearn_decision_tree_converters.py
@@ -7,8 +7,10 @@
from numpy.testing import assert_almost_equal
from pandas import DataFrame
from sklearn.tree import (
- DecisionTreeClassifier, DecisionTreeRegressor,
- ExtraTreeClassifier, ExtraTreeRegressor
+ DecisionTreeClassifier,
+ DecisionTreeRegressor,
+ ExtraTreeClassifier,
+ ExtraTreeRegressor,
)
from sklearn.datasets import make_classification
from skl2onnx.common.data_types import (
@@ -35,26 +37,27 @@
)
-ort_version = ort_version.split('+')[0]
+ort_version = ort_version.split("+")[0]
class TestSklearnDecisionTreeModels(unittest.TestCase):
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version("0.3.0"),
- reason="No suitable kernel definition found "
- "for op Cast(9) (node Cast)")
+ reason="No suitable kernel definition found " "for op Cast(9) (node Cast)",
+ )
def test_decisiontree_classifier1(self):
model = DecisionTreeClassifier(max_depth=2)
X, y = make_classification(10, n_features=4, random_state=42)
X = X[:, :2]
model.fit(X, y)
- initial_types = [('input', FloatTensorType((None, X.shape[1])))]
- model_onnx = convert_sklearn(model, initial_types=initial_types,
- target_opset=TARGET_OPSET)
+ initial_types = [("input", FloatTensorType((None, X.shape[1])))]
+ model_onnx = convert_sklearn(
+ model, initial_types=initial_types, target_opset=TARGET_OPSET
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X.astype(np.float32)})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(np.float32)})
pred = model.predict_proba(X)
if res[1][0][0] != pred[0, 0]:
raise AssertionError("{}\n--\n{}".format(pred, DataFrame(res[1])))
@@ -64,13 +67,14 @@ def test_decisiontree_regressor0(self):
X, y = make_classification(10, n_features=4, random_state=42)
X = X[:, :2]
model.fit(X, y)
- initial_types = [('input', FloatTensorType((None, X.shape[1])))]
- model_onnx = convert_sklearn(model, initial_types=initial_types,
- target_opset=TARGET_OPSET)
+ initial_types = [("input", FloatTensorType((None, X.shape[1])))]
+ model_onnx = convert_sklearn(
+ model, initial_types=initial_types, target_opset=TARGET_OPSET
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X.astype(np.float32)})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(np.float32)})
pred = model.predict(X)
if res[0][0, 0] != pred[0]:
raise AssertionError("{}\n--\n{}".format(pred, DataFrame(res[1])))
@@ -81,15 +85,17 @@ def test_decisiontree_regressor_decision_path(self):
X, y = make_classification(10, n_features=4, random_state=42)
X = X[:, :2]
model.fit(X, y)
- initial_types = [('input', FloatTensorType((None, X.shape[1])))]
+ initial_types = [("input", FloatTensorType((None, X.shape[1])))]
model_onnx = convert_sklearn(
- model, initial_types=initial_types,
- options={id(model): {'decision_path': True}},
- target_opset=TARGET_OPSET)
+ model,
+ initial_types=initial_types,
+ options={id(model): {"decision_path": True}},
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X.astype(np.float32)})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(np.float32)})
pred = model.predict(X)
assert_almost_equal(pred, res[0].ravel())
dec = model.decision_path(X)
@@ -102,15 +108,17 @@ def test_decisiontree_regressor_decision_leaf(self):
X, y = make_classification(10, n_features=4, random_state=42)
X = X[:, :2]
model.fit(X, y)
- initial_types = [('input', FloatTensorType((None, X.shape[1])))]
+ initial_types = [("input", FloatTensorType((None, X.shape[1])))]
model_onnx = convert_sklearn(
- model, initial_types=initial_types,
- options={id(model): {'decision_leaf': True}},
- target_opset=TARGET_OPSET)
+ model,
+ initial_types=initial_types,
+ options={id(model): {"decision_leaf": True}},
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X.astype(np.float32)})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(np.float32)})
pred = model.predict(X)
assert_almost_equal(pred, res[0].ravel())
dec = model.decision_path(X)
@@ -123,16 +131,17 @@ def test_decisiontree_regressor_decision_path_leaf(self):
X, y = make_classification(10, n_features=4, random_state=42)
X = X[:, :2]
model.fit(X, y)
- initial_types = [('input', FloatTensorType((None, X.shape[1])))]
+ initial_types = [("input", FloatTensorType((None, X.shape[1])))]
model_onnx = convert_sklearn(
- model, initial_types=initial_types,
- options={id(model): {'decision_leaf': True,
- 'decision_path': True}},
- target_opset=TARGET_OPSET)
+ model,
+ initial_types=initial_types,
+ options={id(model): {"decision_leaf": True, "decision_path": True}},
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X.astype(np.float32)})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(np.float32)})
pred = model.predict(X)
assert_almost_equal(pred, res[0].ravel())
dec = model.decision_path(X)
@@ -147,15 +156,17 @@ def test_decisiontree_classifier_decision_path(self):
X, y = make_classification(10, n_features=4, random_state=42)
X = X[:, :2]
model.fit(X, y)
- initial_types = [('input', FloatTensorType((None, X.shape[1])))]
+ initial_types = [("input", FloatTensorType((None, X.shape[1])))]
model_onnx = convert_sklearn(
- model, initial_types=initial_types,
- options={id(model): {'decision_path': True, 'zipmap': False}},
- target_opset=TARGET_OPSET)
+ model,
+ initial_types=initial_types,
+ options={id(model): {"decision_path": True, "zipmap": False}},
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X.astype(np.float32)})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(np.float32)})
pred = model.predict(X)
assert_almost_equal(pred, res[0].ravel())
prob = model.predict_proba(X)
@@ -170,15 +181,17 @@ def test_decisiontree_classifier_decision_leaf(self):
X, y = make_classification(10, n_features=4, random_state=42)
X = X[:, :2]
model.fit(X, y)
- initial_types = [('input', FloatTensorType((None, X.shape[1])))]
+ initial_types = [("input", FloatTensorType((None, X.shape[1])))]
model_onnx = convert_sklearn(
- model, initial_types=initial_types,
- options={id(model): {'decision_leaf': True, 'zipmap': False}},
- target_opset=TARGET_OPSET)
+ model,
+ initial_types=initial_types,
+ options={id(model): {"decision_leaf": True, "zipmap": False}},
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X.astype(np.float32)})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(np.float32)})
pred = model.predict(X)
assert_almost_equal(pred, res[0].ravel())
prob = model.predict_proba(X)
@@ -193,16 +206,23 @@ def test_decisiontree_classifier_decision_path_leaf(self):
X, y = make_classification(10, n_features=4, random_state=42)
X = X[:, :2]
model.fit(X, y)
- initial_types = [('input', FloatTensorType((None, X.shape[1])))]
+ initial_types = [("input", FloatTensorType((None, X.shape[1])))]
model_onnx = convert_sklearn(
- model, initial_types=initial_types,
- options={id(model): {'decision_leaf': True, 'decision_path': True,
- 'zipmap': False}},
- target_opset=TARGET_OPSET)
+ model,
+ initial_types=initial_types,
+ options={
+ id(model): {
+ "decision_leaf": True,
+ "decision_path": True,
+ "zipmap": False,
+ }
+ },
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X.astype(np.float32)})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(np.float32)})
pred = model.predict(X)
assert_almost_equal(pred, res[0].ravel())
prob = model.predict_proba(X)
@@ -241,84 +261,109 @@ def test_extra_tree_regressor(self):
def test_decision_tree_regressor_int(self):
model, X = fit_regression_model(
- DecisionTreeRegressor(random_state=42), is_int=True)
+ DecisionTreeRegressor(random_state=42), is_int=True
+ )
model_onnx = convert_sklearn(
- model, "decision tree regression",
+ model,
+ "decision tree regression",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnDecisionTreeRegressionInt")
+ X, model, model_onnx, basename="SklearnDecisionTreeRegressionInt"
+ )
def test_model_multi_class_nocl(self):
model, X = fit_classification_model(
- DecisionTreeClassifier(),
- 4, label_string=True)
+ DecisionTreeClassifier(), 4, label_string=True
+ )
model_onnx = convert_sklearn(
- model, "multi-class nocl",
+ model,
+ "multi-class nocl",
[("input", FloatTensorType([None, X.shape[1]]))],
- options={id(model): {'nocl': True}},
- target_opset=TARGET_OPSET)
+ options={id(model): {"nocl": True}},
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
sonx = str(model_onnx)
- assert 'classlabels_strings' not in sonx
- assert 'cl0' not in sonx
+ assert "classlabels_strings" not in sonx
+ assert "cl0" not in sonx
dump_data_and_model(
- X, model, model_onnx, classes=model.classes_,
- basename="SklearnDTMultiNoCl")
+ X, model, model_onnx, classes=model.classes_, basename="SklearnDTMultiNoCl"
+ )
def test_model_decision_tree_classifier_multilabel(self):
model, X_test = fit_multilabel_classification_model(
- DecisionTreeClassifier(random_state=42))
- options = {id(model): {'zipmap': False}}
+ DecisionTreeClassifier(random_state=42)
+ )
+ options = {id(model): {"zipmap": False}}
model_onnx = convert_sklearn(
- model, "scikit-learn DecisionTreeClassifier",
+ model,
+ "scikit-learn DecisionTreeClassifier",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- options=options, target_opset=TARGET_OPSET)
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- assert 'zipmap' not in str(model_onnx).lower()
+ assert "zipmap" not in str(model_onnx).lower()
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnDecisionTreeClassifierMultiLabel-Out0")
+ X_test,
+ model,
+ model_onnx,
+ basename="SklearnDecisionTreeClassifierMultiLabel-Out0",
+ )
def test_model_extra_tree_classifier_multilabel(self):
model, X_test = fit_multilabel_classification_model(
- ExtraTreeClassifier(random_state=42))
- options = {id(model): {'zipmap': False}}
+ ExtraTreeClassifier(random_state=42)
+ )
+ options = {id(model): {"zipmap": False}}
model_onnx = convert_sklearn(
- model, "scikit-learn ExtraTreeClassifier",
+ model,
+ "scikit-learn ExtraTreeClassifier",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- options=options, target_opset=TARGET_OPSET)
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- assert 'zipmap' not in str(model_onnx).lower()
+ assert "zipmap" not in str(model_onnx).lower()
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnExtraTreeClassifierMultiLabel-Out0")
+ X_test,
+ model,
+ model_onnx,
+ basename="SklearnExtraTreeClassifierMultiLabel-Out0",
+ )
def test_decision_tree_regressor_bool(self):
model, X = fit_regression_model(
- DecisionTreeRegressor(random_state=42), is_bool=True)
+ DecisionTreeRegressor(random_state=42), is_bool=True
+ )
model_onnx = convert_sklearn(
- model, "decision tree regressor",
+ model,
+ "decision tree regressor",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnDecisionTreeRegressionBool-Dec4")
+ X, model, model_onnx, basename="SklearnDecisionTreeRegressionBool-Dec4"
+ )
def test_extra_tree_regressor_bool(self):
model, X = fit_regression_model(
- ExtraTreeRegressor(random_state=42), is_bool=True)
+ ExtraTreeRegressor(random_state=42), is_bool=True
+ )
model_onnx = convert_sklearn(
- model, "extra tree regressor",
+ model,
+ "extra tree regressor",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnExtraTreeRegressionBool-Dec4")
+ X, model, model_onnx, basename="SklearnExtraTreeRegressionBool-Dec4"
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_dict_vectorizer_converter.py b/tests/test_sklearn_dict_vectorizer_converter.py
index 44ff84d51..da28ab72c 100644
--- a/tests/test_sklearn_dict_vectorizer_converter.py
+++ b/tests/test_sklearn_dict_vectorizer_converter.py
@@ -15,11 +15,14 @@
StringTensorType,
FloatTensorType,
Int64TensorType,
- BooleanTensorType)
-from skl2onnx.common.data_types import onnx_built_with_ml
+ BooleanTensorType,
+)
from test_utils import (
- dump_data_and_model, TARGET_OPSET,
- InferenceSessionEx as InferenceSession)
+ dump_data_and_model,
+ TARGET_OPSET,
+ InferenceSessionEx as InferenceSession,
+)
+
try:
from onnxruntime.capi.onnxruntime_pybind11_state import InvalidArgument
from onnxruntime.capi.onnxruntime_pybind11_state import InvalidGraph
@@ -34,15 +37,20 @@ def test_model_dict_vectorizer(self):
data = [{"amy": 1.0, "chin": 200.0}, {"nice": 3.0, "amy": 1.0}]
model.fit_transform(data)
model_onnx = convert_sklearn(
- model, "dictionary vectorizer",
- [(
- "input",
- DictionaryType(StringTensorType([1]), FloatTensorType([1])),
- )], target_opset=TARGET_OPSET)
+ model,
+ "dictionary vectorizer",
+ [
+ (
+ "input",
+ DictionaryType(StringTensorType([1]), FloatTensorType([1])),
+ )
+ ],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data, model, model_onnx,
- basename="SklearnDictVectorizer-OneOff-SkipDim1")
+ data, model, model_onnx, basename="SklearnDictVectorizer-OneOff-SkipDim1"
+ )
def test_model_dict_vectorizer_sort_false(self):
model = DictVectorizer(sparse=False, sort=False)
@@ -51,73 +59,91 @@ def test_model_dict_vectorizer_sort_false(self):
model_onnx = convert_sklearn(
model,
"dictionary vectorizer",
- [(
- "input",
- DictionaryType(Int64TensorType([1]), FloatTensorType([1])),
- )], target_opset=TARGET_OPSET)
+ [
+ (
+ "input",
+ DictionaryType(Int64TensorType([1]), FloatTensorType([1])),
+ )
+ ],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data, model, model_onnx,
- basename="SklearnDictVectorizerSortFalse-OneOff-SkipDim1")
+ data,
+ model,
+ model_onnx,
+ basename="SklearnDictVectorizerSortFalse-OneOff-SkipDim1",
+ )
def test_model_dict_vectorizer_issue(self):
- key_value_map = [{1: 'A', 2: 'B'}, {1: 'C', 3: 'D'},
- {1: 'C', 3: 'A'}]
+ key_value_map = [{1: "A", 2: "B"}, {1: "C", 3: "D"}, {1: "C", 3: "A"}]
model = DictVectorizer(sparse=False).fit(key_value_map)
with self.assertRaises(RuntimeError):
convert_sklearn(
- model, 'dv',
- [("input", DictionaryType(Int64TensorType([1]),
- StringTensorType([1])))],
- target_opset=TARGET_OPSET)
+ model,
+ "dv",
+ [
+ (
+ "input",
+ DictionaryType(Int64TensorType([1]), StringTensorType([1])),
+ )
+ ],
+ target_opset=TARGET_OPSET,
+ )
- @unittest.skipIf(not onnx_built_with_ml(),
- reason="Requires ONNX-ML extension.")
def test_model_dict_vectorizer_pipeline_float(self):
- data = [{'ALL_LOWER': 1, 'NEXT_ALL_LOWER': 1},
- {'PREV_ALL_LOWER': 1, 'ALL_LOWER': 1, 'NEXT_ALL_LOWER': 1},
- {'PREV_ALL_LOWER': 1, 'ALL_LOWER': 1, 'NEXT_ALL_LOWER': 1},
- {'PREV_ALL_LOWER': 1, 'ALL_LOWER': 1, 'NEXT_ALL_LOWER': 1}]
+ data = [
+ {"ALL_LOWER": 1, "NEXT_ALL_LOWER": 1},
+ {"PREV_ALL_LOWER": 1, "ALL_LOWER": 1, "NEXT_ALL_LOWER": 1},
+ {"PREV_ALL_LOWER": 1, "ALL_LOWER": 1, "NEXT_ALL_LOWER": 1},
+ {"PREV_ALL_LOWER": 1, "ALL_LOWER": 1, "NEXT_ALL_LOWER": 1},
+ ]
model = make_pipeline(DictVectorizer(sparse=False), StandardScaler())
model.fit(data)
expected = model.transform(data)
model_onnx = convert_sklearn(
- model, 'dv',
- [("input", DictionaryType(StringTensorType([1]),
- FloatTensorType([1])))],
- target_opset=TARGET_OPSET)
+ model,
+ "dv",
+ [("input", DictionaryType(StringTensorType([1]), FloatTensorType([1])))],
+ target_opset=TARGET_OPSET,
+ )
onnx.checker.check_model(model_onnx)
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- inp = {'ALL_LOWER': numpy.array([1], dtype=numpy.float32),
- 'NEXT_ALL_LOWER': numpy.array([1], dtype=numpy.float32)}
- res = sess.run(None, {'input': inp})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ inp = {
+ "ALL_LOWER": numpy.array([1], dtype=numpy.float32),
+ "NEXT_ALL_LOWER": numpy.array([1], dtype=numpy.float32),
+ }
+ res = sess.run(None, {"input": inp})
assert_almost_equal(expected[0].ravel(), res[0].ravel())
- @unittest.skipIf(not onnx_built_with_ml(),
- reason="Requires ONNX-ML extension.")
def test_model_dict_vectorizer_pipeline_int(self):
- data = [{'ALL_LOWER': 1, 'NEXT_ALL_LOWER': 1},
- {'PREV_ALL_LOWER': 1, 'ALL_LOWER': 1, 'NEXT_ALL_LOWER': 1},
- {'PREV_ALL_LOWER': 1, 'ALL_LOWER': 1, 'NEXT_ALL_LOWER': 1},
- {'PREV_ALL_LOWER': 1, 'ALL_LOWER': 1, 'NEXT_ALL_LOWER': 1}]
+ data = [
+ {"ALL_LOWER": 1, "NEXT_ALL_LOWER": 1},
+ {"PREV_ALL_LOWER": 1, "ALL_LOWER": 1, "NEXT_ALL_LOWER": 1},
+ {"PREV_ALL_LOWER": 1, "ALL_LOWER": 1, "NEXT_ALL_LOWER": 1},
+ {"PREV_ALL_LOWER": 1, "ALL_LOWER": 1, "NEXT_ALL_LOWER": 1},
+ ]
model = make_pipeline(DictVectorizer(sparse=False), StandardScaler())
model.fit(data)
# expected = model.transform(data)
model_onnx = convert_sklearn(
- model, 'dv',
- [("input", DictionaryType(StringTensorType([1]),
- Int64TensorType([1])))],
- target_opset=TARGET_OPSET)
+ model,
+ "dv",
+ [("input", DictionaryType(StringTensorType([1]), Int64TensorType([1])))],
+ target_opset=TARGET_OPSET,
+ )
onnx.checker.check_model(model_onnx)
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- inp = {'ALL_LOWER': numpy.array(1, dtype=numpy.int64),
- 'NEXT_ALL_LOWER': numpy.array(1, dtype=numpy.int64)}
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ inp = {
+ "ALL_LOWER": numpy.array(1, dtype=numpy.int64),
+ "NEXT_ALL_LOWER": numpy.array(1, dtype=numpy.int64),
+ }
try:
- got = sess.run(None, {'input': inp})
+ got = sess.run(None, {"input": inp})
except InvalidArgument:
return
self.assertTrue(got is not None)
@@ -125,33 +151,32 @@ def test_model_dict_vectorizer_pipeline_int(self):
expected = model.transform(data)
assert_almost_equal(expected[0], res)
- @unittest.skipIf(not onnx_built_with_ml(),
- reason="Requires ONNX-ML extension.")
def test_model_dict_vectorizer_pipeline_boolean(self):
- data = [{'ALL_LOWER': True, 'NEXT_ALL_LOWER': True},
- {'PREV_ALL_LOWER': True, 'ALL_LOWER': True,
- 'NEXT_ALL_LOWER': True},
- {'PREV_ALL_LOWER': True, 'ALL_LOWER': True,
- 'NEXT_ALL_LOWER': True},
- {'PREV_ALL_LOWER': True, 'ALL_LOWER': True,
- 'NEXT_ALL_LOWER': True}]
+ data = [
+ {"ALL_LOWER": True, "NEXT_ALL_LOWER": True},
+ {"PREV_ALL_LOWER": True, "ALL_LOWER": True, "NEXT_ALL_LOWER": True},
+ {"PREV_ALL_LOWER": True, "ALL_LOWER": True, "NEXT_ALL_LOWER": True},
+ {"PREV_ALL_LOWER": True, "ALL_LOWER": True, "NEXT_ALL_LOWER": True},
+ ]
model = make_pipeline(DictVectorizer(sparse=False), StandardScaler())
model.fit(data)
model_onnx = convert_sklearn(
- model, 'dv',
- [("input", DictionaryType(StringTensorType([1]),
- BooleanTensorType([1])))],
- target_opset=TARGET_OPSET)
+ model,
+ "dv",
+ [("input", DictionaryType(StringTensorType([1]), BooleanTensorType([1])))],
+ target_opset=TARGET_OPSET,
+ )
onnx.checker.check_model(model_onnx)
try:
sess = InferenceSession(
model_onnx.SerializeToString(),
providers=["CPUExecutionProvider"],
- verbose=0)
+ verbose=0,
+ )
except InvalidGraph:
return
- got = sess.run(None, {'input': data})
+ got = sess.run(None, {"input": data})
self.assertTrue(got is not None)
diff --git a/tests/test_sklearn_documentation.py b/tests/test_sklearn_documentation.py
index cf7904bfb..ce9e5922b 100644
--- a/tests/test_sklearn_documentation.py
+++ b/tests/test_sklearn_documentation.py
@@ -11,13 +11,18 @@
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.datasets import fetch_20newsgroups
+
try:
from sklearn.datasets._twenty_newsgroups import (
- strip_newsgroup_footer, strip_newsgroup_quoting)
+ strip_newsgroup_footer,
+ strip_newsgroup_quoting,
+ )
except ImportError:
# scikit-learn < 0.24
from sklearn.datasets.twenty_newsgroups import (
- strip_newsgroup_footer, strip_newsgroup_quoting)
+ strip_newsgroup_footer,
+ strip_newsgroup_quoting,
+ )
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
@@ -54,7 +59,7 @@ def transform(self, posts):
sub = ""
for line in headers.split("\n"):
if line.startswith(prefix):
- sub = line[len(prefix):]
+ sub = line[len(prefix) :]
break
features[i, 0] = sub
@@ -64,18 +69,16 @@ def transform(self, posts):
class TestSklearnDocumentation(unittest.TestCase):
"Test example from the documentation of scikit-learn."
+ @unittest.skipIf(sys.platform == "win32", reason="Too long on Windows")
@unittest.skipIf(
- sys.platform == "win32",
- reason="Too long on Windows")
- @unittest.skipIf(
- TARGET_OPSET < 10,
- reason="Encoding issue fixed in a later version")
+ TARGET_OPSET < 10, reason="Encoding issue fixed in a later version"
+ )
def test_pipeline_tfidf(self):
categories = ["alt.atheism", "talk.religion.misc"]
try:
- train = fetch_20newsgroups(random_state=1,
- subset="test",
- categories=categories)
+ train = fetch_20newsgroups(
+ random_state=1, subset="test", categories=categories
+ )
except urllib.error.URLError:
warnings.warn("Unit test may fail due to connectivity issue.")
return
@@ -85,49 +88,54 @@ def test_pipeline_tfidf(self):
tfi.fit(tdata.ravel())
extra = {
TfidfVectorizer: {
- "separators": [
- " ", "[.]", "\\?", ",", ";", ":", "\\!", "\\(", "\\)"
- ]
+ "separators": [" ", "[.]", "\\?", ",", ";", ":", "\\!", "\\(", "\\)"]
}
}
model_onnx = convert_sklearn(
- tfi, "tfidf",
+ tfi,
+ "tfidf",
initial_types=[("input", StringTensorType([1]))],
- options=extra, target_opset=TARGET_OPSET
+ options=extra,
+ target_opset=TARGET_OPSET,
)
dump_data_and_model(
tdata[:5],
tfi,
model_onnx,
- basename="SklearnDocumentationTfIdf-OneOff-SklCol")
+ basename="SklearnDocumentationTfIdf-OneOff-SklCol",
+ )
@unittest.skipIf(
ColumnTransformer is None,
reason="ColumnTransformer introduced in 0.20",
)
@unittest.skipIf(
- TARGET_OPSET < 10,
- reason="Encoding issue fixed in a later version")
+ TARGET_OPSET < 10, reason="Encoding issue fixed in a later version"
+ )
def test_pipeline_tfidf_pipeline_minmax(self):
categories = ["alt.atheism", "talk.religion.misc"]
try:
- train = fetch_20newsgroups(random_state=1,
- subset="train",
- categories=categories)
+ train = fetch_20newsgroups(
+ random_state=1, subset="train", categories=categories
+ )
except urllib.error.URLError:
warnings.warn("Unit test may fail due to connectivity issue.")
return
train_data = SubjectBodyExtractor().fit_transform(train.data)
- pipeline = Pipeline([(
- "union",
- ColumnTransformer(
- [
- ("subject", TfidfVectorizer(min_df=50), 0),
- ("body", TfidfVectorizer(min_df=40), 1),
- ],
- transformer_weights={"subject": 0.8},
- ),
- )])
+ pipeline = Pipeline(
+ [
+ (
+ "union",
+ ColumnTransformer(
+ [
+ ("subject", TfidfVectorizer(min_df=50), 0),
+ ("body", TfidfVectorizer(min_df=40), 1),
+ ],
+ transformer_weights={"subject": 0.8},
+ ),
+ )
+ ]
+ )
pipeline.fit(train_data[:300])
extra = {
TfidfVectorizer: {
@@ -152,20 +160,25 @@ def test_pipeline_tfidf_pipeline_minmax(self):
}
}
model_onnx = convert_sklearn(
- pipeline, "tfidf",
+ pipeline,
+ "tfidf",
initial_types=[("input", StringTensorType([None, 2]))],
- options=extra, target_opset=TARGET_OPSET
+ options=extra,
+ target_opset=TARGET_OPSET,
+ )
+ test_data = np.array(
+ [
+ ["Albert Einstein", "Not relatively."],
+ ["Alan turing", "Not automatically."],
+ ]
)
- test_data = np.array([
- ["Albert Einstein", "Not relatively."],
- ["Alan turing", "Not automatically."],
- ])
dump_data_and_model(
test_data,
pipeline,
model_onnx,
verbose=False,
- basename="SklearnDocumentationTfIdfUnion1")
+ basename="SklearnDocumentationTfIdfUnion1",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_double_tensor_type_cls.py b/tests/test_sklearn_double_tensor_type_cls.py
index 9e91e5d28..ff2d39881 100644
--- a/tests/test_sklearn_double_tensor_type_cls.py
+++ b/tests/test_sklearn_double_tensor_type_cls.py
@@ -7,6 +7,7 @@
from sklearn.calibration import CalibratedClassifierCV
from sklearn.exceptions import ConvergenceWarning
from sklearn.ensemble import BaggingClassifier
+
# Requires PR #488.
# from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.linear_model import LogisticRegression, SGDClassifier
@@ -15,6 +16,7 @@
from sklearn.multiclass import OneVsRestClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.svm import SVC
+
try:
from sklearn.ensemble import VotingClassifier
except ImportError:
@@ -37,27 +39,33 @@
from skl2onnx.common.data_types import DoubleTensorType
from onnxruntime import __version__ as ort_version
from onnx import __version__ as onnx_version
-from test_utils import (
- dump_data_and_model, fit_classification_model, TARGET_OPSET)
+from test_utils import dump_data_and_model, fit_classification_model, TARGET_OPSET
warnings_to_skip = (DeprecationWarning, FutureWarning, ConvergenceWarning)
-ort_version = ort_version.split('+')[0]
-ORT_VERSION = '1.7.0'
-onnx_version = ".".join(onnx_version.split('.')[:2])
+ort_version = ort_version.split("+")[0]
+ORT_VERSION = "1.7.0"
+onnx_version = ".".join(onnx_version.split(".")[:2])
-LOG_LOSS = ("log_loss" if pv.Version(skl_version) >= pv.Version("1.1")
- else "log")
+LOG_LOSS = "log_loss" if pv.Version(skl_version) >= pv.Version("1.1") else "log"
class TestSklearnDoubleTensorTypeClassifier(unittest.TestCase):
-
def _common_classifier(
- self, model_cls_set, name_root=None, debug=False,
- raw_scores=True, pos_features=False, is_int=False,
- comparable_outputs=None, n_features=4,
- n_repeated=None, n_redundant=None, verbose=False):
+ self,
+ model_cls_set,
+ name_root=None,
+ debug=False,
+ raw_scores=True,
+ pos_features=False,
+ is_int=False,
+ comparable_outputs=None,
+ n_features=4,
+ n_repeated=None,
+ n_redundant=None,
+ verbose=False,
+ ):
for model_cls in model_cls_set:
if name_root is None:
name = model_cls.__name__
@@ -65,309 +73,370 @@ def _common_classifier(
name = name_root
for n_cl in [2, 3]:
model, X = fit_classification_model(
- model_cls(), n_cl, n_features=n_features,
- pos_features=pos_features, is_int=is_int,
- n_repeated=n_repeated, n_redundant=n_redundant)
- pmethod = ('decision_function_binary' if n_cl == 2 else
- 'decision_function')
+ model_cls(),
+ n_cl,
+ n_features=n_features,
+ pos_features=pos_features,
+ is_int=is_int,
+ n_repeated=n_repeated,
+ n_redundant=n_redundant,
+ )
+ pmethod = (
+ "decision_function_binary" if n_cl == 2 else "decision_function"
+ )
bs = [True, False] if raw_scores else [False]
for b in bs:
for z in [False]:
# zipmap does not allow tensor(double) as inputs
- with self.subTest(n_classes=n_cl, raw_scores=b,
- model=name):
+ with self.subTest(n_classes=n_cl, raw_scores=b, model=name):
if raw_scores:
- options = {"raw_scores": b,
- "zipmap": z}
+ options = {"raw_scores": b, "zipmap": z}
else:
options = {"zipmap": z}
model_onnx = convert_sklearn(
- model, "model",
- [("input", DoubleTensorType(
- [None, X.shape[1]]))],
+ model,
+ "model",
+ [("input", DoubleTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
- options={id(model): options})
+ options={id(model): options},
+ )
if debug:
print(model_onnx)
self.assertIn("elem_type: 11", str(model_onnx))
- methods = None if not b else ['predict', pmethod]
+ methods = None if not b else ["predict", pmethod]
if not b and n_cl == 2:
# onnxruntime does not support sigmoid for
# DoubleTensorType
continue
dump_data_and_model(
- X.astype(np.float64)[:7], model, model_onnx,
- methods=methods, verbose=verbose,
+ X.astype(np.float64)[:7],
+ model,
+ model_onnx,
+ methods=methods,
+ verbose=verbose,
comparable_outputs=comparable_outputs,
basename="Sklearn{}Double2RAW{}"
- "ZIP{}CL{}".format(
- name,
- 1 if b else 0,
- 1 if z else 0, n_cl))
+ "ZIP{}CL{}".format(
+ name, 1 if b else 0, 1 if z else 0, n_cl
+ ),
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="ArgMax is missing")
+ pv.Version(ort_version) < pv.Version(ORT_VERSION), reason="ArgMax is missing"
+ )
@unittest.skipIf(
- pv.Version(onnx_version) < pv.Version(ORT_VERSION),
- reason="ArgMax is missing")
+ pv.Version(onnx_version) < pv.Version(ORT_VERSION), reason="ArgMax is missing"
+ )
@ignore_warnings(category=warnings_to_skip)
def test_model_logistic_64(self):
self._common_classifier([LogisticRegression])
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="ArgMax is missing")
+ pv.Version(ort_version) < pv.Version(ORT_VERSION), reason="ArgMax is missing"
+ )
@unittest.skipIf(
- pv.Version(onnx_version) < pv.Version(ORT_VERSION),
- reason="ArgMax is missing")
+ pv.Version(onnx_version) < pv.Version(ORT_VERSION), reason="ArgMax is missing"
+ )
@ignore_warnings(category=warnings_to_skip)
def test_modelsgd_64(self):
self._common_classifier([SGDClassifier])
- self._common_classifier([lambda: SGDClassifier(loss='hinge')],
- "SGDClassifierHinge")
- self._common_classifier([lambda: SGDClassifier(loss='perceptron')],
- "SGDClassifierPerceptron")
+ self._common_classifier(
+ [lambda: SGDClassifier(loss="hinge")], "SGDClassifierHinge"
+ )
+ self._common_classifier(
+ [lambda: SGDClassifier(loss="perceptron")], "SGDClassifierPerceptron"
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Reciprocal are missing")
+ reason="ArgMax, Reciprocal are missing",
+ )
@unittest.skipIf(
pv.Version(onnx_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Reciprocal are missing")
+ reason="ArgMax, Reciprocal are missing",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_modelsgdlog_64(self):
self._common_classifier(
- [lambda: SGDClassifier(loss=LOG_LOSS, random_state=32)],
- "SGDClassifierLog")
+ [lambda: SGDClassifier(loss=LOG_LOSS, random_state=32)], "SGDClassifierLog"
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Relu are missing")
+ reason="ArgMax, Relu are missing",
+ )
@unittest.skipIf(
pv.Version(onnx_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Relu are missing")
+ reason="ArgMax, Relu are missing",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_mlpclassifier_relu_64(self):
self._common_classifier(
- [lambda: MLPClassifier(activation='relu')],
- "MLPClassifierRelu", raw_scores=False)
+ [lambda: MLPClassifier(activation="relu")],
+ "MLPClassifierRelu",
+ raw_scores=False,
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Tanh are missing")
+ reason="ArgMax, Tanh are missing",
+ )
@unittest.skipIf(
pv.Version(onnx_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Tanh are missing")
+ reason="ArgMax, Tanh are missing",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_mlpclassifier_tanh_64(self):
self._common_classifier(
- [lambda: MLPClassifier(activation='tanh',
- hidden_layer_sizes=(2,))],
- "MLPClassifierTanh", raw_scores=False)
+ [lambda: MLPClassifier(activation="tanh", hidden_layer_sizes=(2,))],
+ "MLPClassifierTanh",
+ raw_scores=False,
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Sigmoid are missing")
+ reason="ArgMax, Sigmoid are missing",
+ )
@unittest.skipIf(
pv.Version(onnx_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Tanh are missing")
+ reason="ArgMax, Tanh are missing",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_mlpclassifier_logistic_64(self):
self._common_classifier(
- [lambda: MLPClassifier(activation='logistic',
- hidden_layer_sizes=(2,))],
- "MLPClassifierLogistic", raw_scores=False)
+ [lambda: MLPClassifier(activation="logistic", hidden_layer_sizes=(2,))],
+ "MLPClassifierLogistic",
+ raw_scores=False,
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="ArgMax is missing")
+ pv.Version(ort_version) < pv.Version(ORT_VERSION), reason="ArgMax is missing"
+ )
@unittest.skipIf(
pv.Version(onnx_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Tanh are missing")
+ reason="ArgMax, Tanh are missing",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_mlpclassifier_identity_64(self):
self._common_classifier(
- [lambda: MLPClassifier(activation='identity',
- hidden_layer_sizes=(2,))],
- "MLPClassifierIdentity", raw_scores=False)
+ [lambda: MLPClassifier(activation="identity", hidden_layer_sizes=(2,))],
+ "MLPClassifierIdentity",
+ raw_scores=False,
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, TopK are missing")
+ reason="ArgMax, TopK are missing",
+ )
@unittest.skipIf(
pv.Version(onnx_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Tanh are missing")
+ reason="ArgMax, Tanh are missing",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_knn_64(self):
self._common_classifier(
- [lambda: KNeighborsClassifier()],
- "KNeighborsClassifier", raw_scores=False)
+ [lambda: KNeighborsClassifier()], "KNeighborsClassifier", raw_scores=False
+ )
- @unittest.skipIf(
- VotingClassifier is None, reason="scikit-learn too old")
+ @unittest.skipIf(VotingClassifier is None, reason="scikit-learn too old")
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Sum are missing")
+ reason="ArgMax, Sum are missing",
+ )
@unittest.skipIf(
pv.Version(onnx_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Tanh are missing")
+ reason="ArgMax, Tanh are missing",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_voting_64(self):
- estimators = [('a', LogisticRegression()),
- ('b', LogisticRegression())]
+ estimators = [("a", LogisticRegression()), ("b", LogisticRegression())]
self._common_classifier(
- [lambda: VotingClassifier(estimators,
- flatten_transform=False)],
- "VotingClassifier", raw_scores=False,
- comparable_outputs=[0])
+ [lambda: VotingClassifier(estimators, flatten_transform=False)],
+ "VotingClassifier",
+ raw_scores=False,
+ comparable_outputs=[0],
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, LpNormalization are missing")
+ reason="ArgMax, LpNormalization are missing",
+ )
@unittest.skipIf(
pv.Version(onnx_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Tanh are missing")
+ reason="ArgMax, Tanh are missing",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_ovr_64(self):
self._common_classifier(
[lambda: OneVsRestClassifier(LogisticRegression())],
- "VotingClassifier", raw_scores=False)
+ "VotingClassifier",
+ raw_scores=False,
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, LpNormalization are missing")
+ reason="ArgMax, LpNormalization are missing",
+ )
@unittest.skipIf(
pv.Version(onnx_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Tanh are missing")
+ reason="ArgMax, Tanh are missing",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_svc_linear_64(self):
self._common_classifier(
- [lambda: SVC(kernel='linear')], "SVCLinear",
- raw_scores=False)
+ [lambda: SVC(kernel="linear")], "SVCLinear", raw_scores=False
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Sum are missing")
+ reason="ArgMax, Sum are missing",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_svc_poly_64(self):
self._common_classifier(
- [lambda: SVC(kernel='poly')], "SVCpoly",
- raw_scores=False)
+ [lambda: SVC(kernel="poly")], "SVCpoly", raw_scores=False
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Sum are missing")
+ reason="ArgMax, Sum are missing",
+ )
@unittest.skipIf(
pv.Version(onnx_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Tanh are missing")
+ reason="ArgMax, Tanh are missing",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_svc_rbf_64(self):
- self._common_classifier(
- [lambda: SVC(kernel='rbf')], "SVCrbf",
- raw_scores=False)
+ self._common_classifier([lambda: SVC(kernel="rbf")], "SVCrbf", raw_scores=False)
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Sum are missing")
+ reason="ArgMax, Sum are missing",
+ )
@unittest.skipIf(
pv.Version(onnx_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Tanh are missing")
+ reason="ArgMax, Tanh are missing",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_svc_sigmoid_64(self):
self._common_classifier(
- [lambda: SVC(kernel='sigmoid')], "SVCsigmoid",
- raw_scores=False)
+ [lambda: SVC(kernel="sigmoid")], "SVCsigmoid", raw_scores=False
+ )
- @unittest.skipIf(
- BernoulliNB is None, reason="new in scikit version 0.20")
+ @unittest.skipIf(BernoulliNB is None, reason="new in scikit version 0.20")
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Log are missing")
+ reason="ArgMax, Log are missing",
+ )
@unittest.skipIf(
pv.Version(onnx_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Tanh are missing")
+ reason="ArgMax, Tanh are missing",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_bernoullinb_64(self):
self._common_classifier(
- [lambda: BernoulliNB()], "BernoulliNB", raw_scores=False)
+ [lambda: BernoulliNB()], "BernoulliNB", raw_scores=False
+ )
- @unittest.skipIf(
- ComplementNB is None, reason="new in scikit version 0.20")
+ @unittest.skipIf(ComplementNB is None, reason="new in scikit version 0.20")
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, ReduceLogSumExp are missing")
+ reason="ArgMax, ReduceLogSumExp are missing",
+ )
@unittest.skipIf(
pv.Version(onnx_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Tanh are missing")
+ reason="ArgMax, Tanh are missing",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_complementnb_64(self):
self._common_classifier(
- [lambda: ComplementNB()], "ComplementNB",
- raw_scores=False, pos_features=True)
+ [lambda: ComplementNB()],
+ "ComplementNB",
+ raw_scores=False,
+ pos_features=True,
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, ReduceMean are missing")
+ reason="ArgMax, ReduceMean are missing",
+ )
@unittest.skipIf(
pv.Version(onnx_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Tanh are missing")
+ reason="ArgMax, Tanh are missing",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_bagging_64(self):
self._common_classifier(
- [lambda: BaggingClassifier(
- LogisticRegression(random_state=42), random_state=42)],
- "BaggingClassifier")
+ [
+ lambda: BaggingClassifier(
+ LogisticRegression(random_state=42), random_state=42
+ )
+ ],
+ "BaggingClassifier",
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Sigmoid are missing")
+ reason="ArgMax, Sigmoid are missing",
+ )
@unittest.skipIf(
pv.Version(onnx_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Tanh are missing")
- @unittest.skipIf(
- StackingClassifier is None, reason="scikit-learn too old")
+ reason="ArgMax, Tanh are missing",
+ )
+ @unittest.skipIf(StackingClassifier is None, reason="scikit-learn too old")
@ignore_warnings(category=warnings_to_skip)
def test_stacking_64(self):
self._common_classifier(
- [lambda: StackingClassifier([
- ('a', LogisticRegression()),
- ('b', LogisticRegression())])],
- "StackingClassifier")
+ [
+ lambda: StackingClassifier(
+ [("a", LogisticRegression()), ("b", LogisticRegression())]
+ )
+ ],
+ "StackingClassifier",
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Sigmoid are missing")
+ reason="ArgMax, Sigmoid are missing",
+ )
@unittest.skipIf(
pv.Version(onnx_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Tanh are missing")
- @unittest.skipIf(
- StackingClassifier is None, reason="scikit-learn too old")
+ reason="ArgMax, Tanh are missing",
+ )
+ @unittest.skipIf(StackingClassifier is None, reason="scikit-learn too old")
@ignore_warnings(category=warnings_to_skip)
def test_calibration_sigmoid_64(self):
self._common_classifier(
- [lambda: CalibratedClassifierCV(
- base_estimator=LogisticRegression(), method='sigmoid')],
+ [
+ lambda: CalibratedClassifierCV(
+ base_estimator=LogisticRegression(), method="sigmoid"
+ )
+ ],
"CalibratedClassifierCV",
- raw_scores=False)
+ raw_scores=False,
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Sigmoid are missing")
+ reason="ArgMax, Sigmoid are missing",
+ )
@unittest.skipIf(
pv.Version(onnx_version) < pv.Version(ORT_VERSION),
- reason="ArgMax, Tanh are missing")
- @unittest.skipIf(
- StackingClassifier is None, reason="scikit-learn too old")
- @unittest.skipIf(
- True, reason="Converter does not call IsotonicRegression")
+ reason="ArgMax, Tanh are missing",
+ )
+ @unittest.skipIf(StackingClassifier is None, reason="scikit-learn too old")
+ @unittest.skipIf(True, reason="Converter does not call IsotonicRegression")
@ignore_warnings(category=warnings_to_skip)
def test_calibration_isotonic_64(self):
self._common_classifier(
- [lambda: CalibratedClassifierCV(
- base_estimator=LogisticRegression(), method='isotonic')],
+ [
+ lambda: CalibratedClassifierCV(
+ base_estimator=LogisticRegression(), method="isotonic"
+ )
+ ],
"CalibratedClassifierCV",
- raw_scores=False)
+ raw_scores=False,
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_double_tensor_type_reg.py b/tests/test_sklearn_double_tensor_type_reg.py
index 3d5fca4ce..e4aa67cbd 100644
--- a/tests/test_sklearn_double_tensor_type_reg.py
+++ b/tests/test_sklearn_double_tensor_type_reg.py
@@ -6,6 +6,7 @@
import packaging.version as pv
import numpy as np
from sklearn.exceptions import ConvergenceWarning
+
try:
from sklearn.utils._testing import ignore_warnings
except ImportError:
@@ -15,6 +16,7 @@
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
+
try:
from sklearn.ensemble import VotingRegressor
except ImportError:
@@ -23,8 +25,7 @@
from skl2onnx import convert_sklearn, to_onnx
from skl2onnx.common.data_types import DoubleTensorType
from onnxruntime import __version__ as ort_version
-from test_utils import (
- dump_data_and_model, fit_regression_model, TARGET_OPSET)
+from test_utils import dump_data_and_model, fit_regression_model, TARGET_OPSET
warnings_to_skip = (DeprecationWarning, FutureWarning, ConvergenceWarning)
@@ -32,127 +33,162 @@
class TestSklearnDoubleTensorTypeRegressor(unittest.TestCase):
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version("1.2.0"),
- reason="onnxruntime misses implementation for double")
+ reason="onnxruntime misses implementation for double",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_model_linear_regression_64(self):
model, X = fit_regression_model(LinearRegression())
model_onnx = convert_sklearn(
- model, "linear regression",
+ model,
+ "linear regression",
[("input", DoubleTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIn("elem_type: 11", str(model_onnx))
dump_data_and_model(
- X.astype(np.float64), model, model_onnx,
- basename="SklearnLinearRegressionDouble")
+ X.astype(np.float64),
+ model,
+ model_onnx,
+ basename="SklearnLinearRegressionDouble",
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version("1.7.0"),
reason="onnxruntime misses implementation for "
- "Relu, Tanh, Sigmoid for double")
+ "Relu, Tanh, Sigmoid for double",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_model_mlpregressor_64(self):
# Could not find an implementation for the node Relu:Relu(6)
# Could not find an implementation for the node Tanh:Tanh(6)
# Could not find an implementation for the node Sigmoid:Sigmoid(6)
- for activation in ['relu', 'tanh', 'logistic']:
+ for activation in ["relu", "tanh", "logistic"]:
with self.subTest(activation=activation):
- model, X = fit_regression_model(
- MLPRegressor(activation=activation))
+ model, X = fit_regression_model(MLPRegressor(activation=activation))
model_onnx = convert_sklearn(
- model, "linear regression",
+ model,
+ "linear regression",
[("input", DoubleTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIn("elem_type: 11", str(model_onnx))
dump_data_and_model(
- X.astype(np.float64), model, model_onnx,
- basename="SklearnMLPRegressorDouble%s" % activation)
+ X.astype(np.float64),
+ model,
+ model_onnx,
+ basename="SklearnMLPRegressorDouble%s" % activation,
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version("1.7.0"),
- reason="onnxruntime misses implementation for "
- "ReduceMean for double")
+ reason="onnxruntime misses implementation for " "ReduceMean for double",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_bagging_regressor_sgd_64(self):
# Could not find an implementation for
# the node ReduceMean:ReduceMean(11)
- model, X = fit_regression_model(
- BaggingRegressor(SGDRegressor()))
+ model, X = fit_regression_model(BaggingRegressor(SGDRegressor()))
model_onnx = convert_sklearn(
- model, "bagging regressor",
+ model,
+ "bagging regressor",
[("input", DoubleTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
- X.astype(np.float64), model, model_onnx,
- basename="SklearnBaggingRegressorSGDDouble")
+ X.astype(np.float64),
+ model,
+ model_onnx,
+ basename="SklearnBaggingRegressorSGDDouble",
+ )
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version("1.2.0"),
- reason="onnxruntime misses implementation for double")
+ reason="onnxruntime misses implementation for double",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_model_sgd_regressor_64(self):
model, X = fit_regression_model(SGDRegressor())
model_onnx = convert_sklearn(
- model, "linear regression",
+ model,
+ "linear regression",
[("input", DoubleTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIn("elem_type: 11", str(model_onnx))
dump_data_and_model(
- X.astype(np.float64), model, model_onnx,
- basename="SklearnLinearSGDRegressorDouble")
+ X.astype(np.float64),
+ model,
+ model_onnx,
+ basename="SklearnLinearSGDRegressorDouble",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("1.7.0"),
- reason="shape_inference fails")
+ pv.Version(ort_version) < pv.Version("1.7.0"), reason="shape_inference fails"
+ )
@ignore_warnings(category=warnings_to_skip)
def test_gpr_rbf_fitted_true_double(self):
gp = GaussianProcessRegressor(
- alpha=1e-7, n_restarts_optimizer=15, normalize_y=True)
+ alpha=1e-7, n_restarts_optimizer=15, normalize_y=True
+ )
gp, X = fit_regression_model(gp)
model_onnx = to_onnx(
- gp, initial_types=[('X', DoubleTensorType([None, None]))],
- target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", DoubleTensorType([None, None]))],
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
- X.astype(np.float64), gp, model_onnx, verbose=False,
- basename="SklearnGaussianProcessRBFTDouble")
+ X.astype(np.float64),
+ gp,
+ model_onnx,
+ verbose=False,
+ basename="SklearnGaussianProcessRBFTDouble",
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version("1.7.0"),
- reason="onnxruntime misses implementation for "
- "TopK for double")
+ reason="onnxruntime misses implementation for " "TopK for double",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_model_knn_regressor_double(self):
# Could not find an implementation for the node To_TopK:TopK(11)
model, X = fit_regression_model(KNeighborsRegressor(n_neighbors=2))
model_onnx = convert_sklearn(
- model, "KNN regressor",
+ model,
+ "KNN regressor",
[("input", DoubleTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
- options={id(model): {'optim': 'cdist'}})
+ options={id(model): {"optim": "cdist"}},
+ )
dump_data_and_model(
X.astype(np.float64)[:7],
- model, model_onnx,
- basename="SklearnKNeighborsRegressorDouble")
+ model,
+ model_onnx,
+ basename="SklearnKNeighborsRegressorDouble",
+ )
@unittest.skipIf(VotingRegressor is None, reason="new in 0.21")
@unittest.skipIf(
pv.Version(ort_version) < pv.Version("1.7.0"),
- reason="onnxruntime misses implementation for "
- "Sum for double")
+ reason="onnxruntime misses implementation for " "Sum for double",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_model_voting_regression(self):
# Could not find an implementation for the node Sum:Sum(8)
- model = VotingRegressor([
- ('lr', LinearRegression()),
- ('dt', SGDRegressor())])
+ model = VotingRegressor([("lr", LinearRegression()), ("dt", SGDRegressor())])
model, X = fit_regression_model(model)
model_onnx = convert_sklearn(
- model, "voting regression",
+ model,
+ "voting regression",
[("input", DoubleTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
- X.astype(np.float64), model, model_onnx,
+ X.astype(np.float64),
+ model,
+ model_onnx,
basename="SklearnVotingRegressorDouble",
- comparable_outputs=[0])
+ comparable_outputs=[0],
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_double_tensor_type_tr.py b/tests/test_sklearn_double_tensor_type_tr.py
index ba3c311c4..78e2e7523 100644
--- a/tests/test_sklearn_double_tensor_type_tr.py
+++ b/tests/test_sklearn_double_tensor_type_tr.py
@@ -5,6 +5,7 @@
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.exceptions import ConvergenceWarning
+
try:
from sklearn.utils._testing import ignore_warnings
except ImportError:
@@ -13,10 +14,12 @@
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture, BayesianGaussianMixture
from sklearn.preprocessing import Binarizer
+
try:
from onnxruntime.capi.onnxruntime_pybind11_state import Fail as OrtFail
from onnxruntime.capi.onnxruntime_pybind11_state import (
- NotImplemented as OrtNotImplemented)
+ NotImplemented as OrtNotImplemented,
+ )
except ImportError:
OrtFail = RuntimeError
OrtNotImplemented = RuntimeError
@@ -25,22 +28,21 @@
from skl2onnx.common.data_types import DoubleTensorType
from onnxruntime import __version__ as ort_version
from test_utils import (
- dump_data_and_model, TARGET_OPSET,
- InferenceSessionEx as InferenceSession)
+ dump_data_and_model,
+ TARGET_OPSET,
+ InferenceSessionEx as InferenceSession,
+)
-warnings_to_skip = (
- DeprecationWarning, FutureWarning, ConvergenceWarning, UserWarning)
+warnings_to_skip = (DeprecationWarning, FutureWarning, ConvergenceWarning, UserWarning)
ORT_VERSION = "1.7.0"
OPSET_VERSION = 11
-ort_version = ".".join(ort_version.split('.')[:2])
+ort_version = ".".join(ort_version.split(".")[:2])
class TestSklearnDoubleTensorTypeTransformer(unittest.TestCase):
-
- def _common_transform(
- self, model_cls_set, name_root=None, debug=False):
+ def _common_transform(self, model_cls_set, name_root=None, debug=False):
for model_cls in model_cls_set:
if name_root is None:
name = model_cls.__name__
@@ -51,26 +53,32 @@ def _common_transform(
X = np.random.randn(100, 4).astype(np.float64)
model.fit(X)
X = np.random.randn(100, 4).astype(np.float64)
- pmethod = 'transform'
+ pmethod = "transform"
with self.subTest(model=name):
options = {}
model_onnx = convert_sklearn(
- model, "model",
+ model,
+ "model",
[("input", DoubleTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
- options={id(model): options})
+ options={id(model): options},
+ )
if debug:
print(model_onnx)
self.assertIn("elem_type: 11", str(model_onnx))
methods = [pmethod]
dump_data_and_model(
- X.astype(np.float64), model, model_onnx,
+ X.astype(np.float64),
+ model,
+ model_onnx,
methods=methods,
- basename="Sklearn{}Double".format(name))
+ basename="Sklearn{}Double".format(name),
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="onnxruntime misses operator for double")
+ reason="onnxruntime misses operator for double",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_scaler_64(self):
self._common_transform([StandardScaler])
@@ -95,101 +103,110 @@ def _test_score(self, model, X, tg, decimal=5, black_op=None):
exp = model.score_samples(X)
expp = model.predict_proba(X)
onx = to_onnx(
- model, X[:1], target_opset=tg,
- options={id(model): {'score_samples': True}},
- black_op=black_op)
+ model,
+ X[:1],
+ target_opset=tg,
+ options={id(model): {"score_samples": True}},
+ black_op=black_op,
+ )
try:
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except OrtFail as e:
- raise RuntimeError('Issue {}\n{}'.format(e, str(onx)))
- got = sess.run(None, {'X': X})
+ raise RuntimeError("Issue {}\n{}".format(e, str(onx)))
+ got = sess.run(None, {"X": X})
self.assertEqual(len(got), 3)
- np.testing.assert_almost_equal(
- expp.ravel(), got[1].ravel(), decimal=decimal)
- np.testing.assert_almost_equal(
- exp.ravel(), got[2].ravel(), decimal=decimal)
+ np.testing.assert_almost_equal(expp.ravel(), got[1].ravel(), decimal=decimal)
+ np.testing.assert_almost_equal(exp.ravel(), got[2].ravel(), decimal=decimal)
@unittest.skipIf(
- TARGET_OPSET < OPSET_VERSION,
- reason="onnxruntime misses Gemm for double")
+ TARGET_OPSET < OPSET_VERSION, reason="onnxruntime misses Gemm for double"
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="onnxruntime misses Gemm for double")
+ reason="onnxruntime misses Gemm for double",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_model_gaussian_mixture_binary_classification(self):
- model, X = self._fit_model_binary_classification(
- GaussianMixture(), load_iris())
+ model, X = self._fit_model_binary_classification(GaussianMixture(), load_iris())
for tg in range(min(9, TARGET_OPSET), TARGET_OPSET + 1):
with self.subTest(target_opset=tg):
if tg < 11:
with self.assertRaises(RuntimeError):
model_onnx = convert_sklearn(
- model, "gaussian_mixture",
- [("input", DoubleTensorType([
- None, X.shape[1]]))],
- target_opset=tg)
+ model,
+ "gaussian_mixture",
+ [("input", DoubleTensorType([None, X.shape[1]]))],
+ target_opset=tg,
+ )
continue
model_onnx = convert_sklearn(
- model, "gaussian_mixture",
+ model,
+ "gaussian_mixture",
[("input", DoubleTensorType([None, X.shape[1]]))],
- target_opset=tg)
+ target_opset=tg,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnBinGaussianMixtureDouble")
+ X, model, model_onnx, basename="SklearnBinGaussianMixtureDouble"
+ )
self._test_score(model, X, tg)
- @unittest.skipIf(
- TARGET_OPSET < OPSET_VERSION,
- reason="onnx misses Gemm for double")
+ @unittest.skipIf(TARGET_OPSET < OPSET_VERSION, reason="onnx misses Gemm for double")
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="onnxruntime misses Gemm for double")
+ reason="onnxruntime misses Gemm for double",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_model_bayesian_mixture_binary_classification(self):
for cov in ["full", "tied", "diag", "spherical"]:
with self.subTest(cov=cov):
model, X = self._fit_model_binary_classification(
- BayesianGaussianMixture(), load_iris(),
- covariance_type=cov)
+ BayesianGaussianMixture(), load_iris(), covariance_type=cov
+ )
model_onnx = convert_sklearn(
- model, "gaussian_mixture",
+ model,
+ "gaussian_mixture",
[("input", DoubleTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnBinBayesianGaussianMixtureDouble")
+ X,
+ model,
+ model_onnx,
+ basename="SklearnBinBayesianGaussianMixtureDouble",
+ )
self._test_score(model, X, TARGET_OPSET)
- @unittest.skipIf(
- TARGET_OPSET < OPSET_VERSION,
- reason="onnx misses Gemm for double")
+ @unittest.skipIf(TARGET_OPSET < OPSET_VERSION, reason="onnx misses Gemm for double")
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="onnxruntime misses Gemm for double")
+ reason="onnxruntime misses Gemm for double",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_model_gaussian_mixture_multiclass(self):
model, X = self._fit_model_multiclass_classification(
- GaussianMixture(), load_iris())
+ GaussianMixture(), load_iris()
+ )
model_onnx = convert_sklearn(
- model, "gaussian_mixture",
+ model,
+ "gaussian_mixture",
[("input", DoubleTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnMclGaussianMixtureDouble")
+ X, model, model_onnx, basename="SklearnMclGaussianMixtureDouble"
+ )
self._test_score(model, X, TARGET_OPSET)
- @unittest.skipIf(
- TARGET_OPSET < OPSET_VERSION,
- reason="onnx misses Gemm for double")
+ @unittest.skipIf(TARGET_OPSET < OPSET_VERSION, reason="onnx misses Gemm for double")
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="onnxruntime misses Gemm for double")
+ reason="onnxruntime misses Gemm for double",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_gaussian_mixture_comp2(self):
data = load_iris()
@@ -197,190 +214,243 @@ def test_gaussian_mixture_comp2(self):
model = GaussianMixture(n_components=2)
model.fit(X)
model_onnx = convert_sklearn(
- model, "GM", [("input", DoubleTensorType([None, 4]))],
- target_opset=TARGET_OPSET)
+ model,
+ "GM",
+ [("input", DoubleTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X.astype(np.float64)[40:60], model, model_onnx,
+ X.astype(np.float64)[40:60],
+ model,
+ model_onnx,
basename="GaussianMixtureC2Double",
- intermediate_steps=False)
+ intermediate_steps=False,
+ )
self._test_score(model, X, TARGET_OPSET)
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="onnxruntime misses Gemm for double")
- @unittest.skipIf(
- TARGET_OPSET < OPSET_VERSION,
- reason="onnx misses Gemm for double")
+ reason="onnxruntime misses Gemm for double",
+ )
+ @unittest.skipIf(TARGET_OPSET < OPSET_VERSION, reason="onnx misses Gemm for double")
@ignore_warnings(category=warnings_to_skip)
def test_gaussian_mixture_full(self):
data = load_iris()
X = data.data
- model = GaussianMixture(n_components=2, covariance_type='full')
+ model = GaussianMixture(n_components=2, covariance_type="full")
model.fit(X)
model_onnx = convert_sklearn(
- model, "GM", [("input", DoubleTensorType([None, 4]))],
- target_opset=TARGET_OPSET)
+ model,
+ "GM",
+ [("input", DoubleTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X.astype(np.float64)[40:60], model, model_onnx,
+ X.astype(np.float64)[40:60],
+ model,
+ model_onnx,
basename="GaussianMixtureC2FullDouble",
- intermediate_steps=False)
+ intermediate_steps=False,
+ )
self._test_score(model, X, TARGET_OPSET)
@unittest.skipIf(
- TARGET_OPSET < OPSET_VERSION,
- reason="onnxruntime misses Gemm for double")
+ TARGET_OPSET < OPSET_VERSION, reason="onnxruntime misses Gemm for double"
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="onnxruntime misses Gemm for double")
+ reason="onnxruntime misses Gemm for double",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_gaussian_mixture_tied(self):
data = load_iris()
X = data.data
- model = GaussianMixture(n_components=2, covariance_type='tied')
+ model = GaussianMixture(n_components=2, covariance_type="tied")
model.fit(X)
model_onnx = convert_sklearn(
- model, "GM", [("input", DoubleTensorType([None, 4]))],
- target_opset=TARGET_OPSET)
+ model,
+ "GM",
+ [("input", DoubleTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(np.float64)[40:60],
- model, model_onnx, basename="GaussianMixtureC2TiedDouble",
- intermediate_steps=False)
+ model,
+ model_onnx,
+ basename="GaussianMixtureC2TiedDouble",
+ intermediate_steps=False,
+ )
self._test_score(model, X, TARGET_OPSET)
- @unittest.skipIf(
- TARGET_OPSET < OPSET_VERSION,
- reason="onnx misses Gemm for double")
+ @unittest.skipIf(TARGET_OPSET < OPSET_VERSION, reason="onnx misses Gemm for double")
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="onnxruntime misses Gemm for double")
+ reason="onnxruntime misses Gemm for double",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_gaussian_mixture_diag(self):
data = load_iris()
X = data.data
- model = GaussianMixture(n_components=2, covariance_type='diag')
+ model = GaussianMixture(n_components=2, covariance_type="diag")
model.fit(X)
model_onnx = convert_sklearn(
- model, "GM", [("input", DoubleTensorType([None, 4]))],
- target_opset=TARGET_OPSET)
- self.assertIn('ReduceLogSumExp', str(model_onnx))
+ model,
+ "GM",
+ [("input", DoubleTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ )
+ self.assertIn("ReduceLogSumExp", str(model_onnx))
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(np.float64)[40:60],
- model, model_onnx, basename="GaussianMixtureC2DiagDouble",
- intermediate_steps=False)
+ model,
+ model_onnx,
+ basename="GaussianMixtureC2DiagDouble",
+ intermediate_steps=False,
+ )
self._test_score(model, X, TARGET_OPSET, decimal=4)
- @unittest.skipIf(
- TARGET_OPSET < OPSET_VERSION,
- reason="onnx misses Gemm for double")
+ @unittest.skipIf(TARGET_OPSET < OPSET_VERSION, reason="onnx misses Gemm for double")
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="onnxruntime misses Gemm for double")
+ reason="onnxruntime misses Gemm for double",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_gaussian_mixture_spherical(self):
data = load_iris()
X = data.data
- model = GaussianMixture(n_components=2, covariance_type='spherical')
+ model = GaussianMixture(n_components=2, covariance_type="spherical")
model.fit(X)
model_onnx = convert_sklearn(
- model, "GM", [("input", DoubleTensorType([None, 4]))],
- target_opset=TARGET_OPSET)
+ model,
+ "GM",
+ [("input", DoubleTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(np.float64)[40:60],
- model, model_onnx, basename="GaussianMixtureC2SphericalDouble",
- intermediate_steps=False)
+ model,
+ model_onnx,
+ basename="GaussianMixtureC2SphericalDouble",
+ intermediate_steps=False,
+ )
self._test_score(model, X, TARGET_OPSET, decimal=4)
- @unittest.skipIf(
- TARGET_OPSET < OPSET_VERSION,
- reason="onnx misses Gemm for double")
+ @unittest.skipIf(TARGET_OPSET < OPSET_VERSION, reason="onnx misses Gemm for double")
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="onnxruntime misses Gemm for double")
+ reason="onnxruntime misses Gemm for double",
+ )
@ignore_warnings(category=warnings_to_skip)
def _test_gaussian_mixture_full_black_op(self):
data = load_iris()
X = data.data
- model = GaussianMixture(n_components=2, covariance_type='full')
+ model = GaussianMixture(n_components=2, covariance_type="full")
model.fit(X)
with self.assertRaises(RuntimeError):
convert_sklearn(
- model, "GM", [("input", DoubleTensorType([None, 4]))],
- target_opset=TARGET_OPSET, black_op={'Add'})
+ model,
+ "GM",
+ [("input", DoubleTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ black_op={"Add"},
+ )
model_onnx = convert_sklearn(
- model, "GM", [("input", DoubleTensorType([None, 4]))],
- target_opset=TARGET_OPSET, black_op={'ReduceLogSumExp'})
+ model,
+ "GM",
+ [("input", DoubleTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ black_op={"ReduceLogSumExp"},
+ )
self.assertIsNotNone(model_onnx)
- self.assertNotIn('ReduceLogSumExp', str(model_onnx))
+ self.assertNotIn("ReduceLogSumExp", str(model_onnx))
dump_data_and_model(
X.astype(np.float64)[40:60],
- model, model_onnx, basename="GaussianMixtureC2FullBLDouble",
- intermediate_steps=False)
+ model,
+ model_onnx,
+ basename="GaussianMixtureC2FullBLDouble",
+ intermediate_steps=False,
+ )
self._test_score(model, X, TARGET_OPSET)
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="onnxruntime misses Gemm for double")
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="OnnxEqual does not support float")
+ reason="onnxruntime misses Gemm for double",
+ )
+ @unittest.skipIf(TARGET_OPSET < 11, reason="OnnxEqual does not support float")
@ignore_warnings(category=warnings_to_skip)
def _test_gaussian_mixture_full_black_op_noargmax(self):
data = load_iris()
X = data.data
- model = GaussianMixture(n_components=2, covariance_type='full')
+ model = GaussianMixture(n_components=2, covariance_type="full")
model.fit(X)
with self.assertRaises(RuntimeError):
convert_sklearn(
- model, "GM", [("input", DoubleTensorType([None, 4]))],
- target_opset=TARGET_OPSET, black_op={'Add'})
+ model,
+ "GM",
+ [("input", DoubleTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ black_op={"Add"},
+ )
model_onnx = convert_sklearn(
- model, "GM", [("input", DoubleTensorType([None, 4]))],
+ model,
+ "GM",
+ [("input", DoubleTensorType([None, 4]))],
target_opset=TARGET_OPSET,
- black_op={'ReduceLogSumExp', 'ArgMax'})
+ black_op={"ReduceLogSumExp", "ArgMax"},
+ )
self.assertIsNotNone(model_onnx)
- self.assertNotIn('ArgMax', str(model_onnx))
+ self.assertNotIn("ArgMax", str(model_onnx))
dump_data_and_model(
X.astype(np.float64)[40:60],
- model, model_onnx,
+ model,
+ model_onnx,
basename="GaussianMixtureC2FullBLNMDouble",
- intermediate_steps=False)
+ intermediate_steps=False,
+ )
self._test_score(model, X, TARGET_OPSET)
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="onnxruntime misses Gemm for double")
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="OnnxEqual does not support float")
+ reason="onnxruntime misses Gemm for double",
+ )
+ @unittest.skipIf(TARGET_OPSET < 11, reason="OnnxEqual does not support float")
@ignore_warnings(category=warnings_to_skip)
def test_gaussian_mixture_full_black_op_noargmax_inf(self):
data = load_iris()
X = data.data
- model = GaussianMixture(n_components=10, covariance_type='full')
+ model = GaussianMixture(n_components=10, covariance_type="full")
model.fit(X)
model_onnx1 = convert_sklearn(
- model, "GM", [("input", DoubleTensorType([None, 4]))],
+ model,
+ "GM",
+ [("input", DoubleTensorType([None, 4]))],
target_opset=TARGET_OPSET,
- options={id(model): {'score_samples': True}})
+ options={id(model): {"score_samples": True}},
+ )
model_onnx2 = convert_sklearn(
- model, "GM", [("input", DoubleTensorType([None, 4]))],
+ model,
+ "GM",
+ [("input", DoubleTensorType([None, 4]))],
target_opset=TARGET_OPSET,
- options={id(model): {'score_samples': True}},
- black_op={'ReduceLogSumExp', 'ArgMax'})
- self.assertNotIn('ArgMax', str(model_onnx2))
+ options={id(model): {"score_samples": True}},
+ black_op={"ReduceLogSumExp", "ArgMax"},
+ )
+ self.assertNotIn("ArgMax", str(model_onnx2))
sess1 = InferenceSession(
- model_onnx1.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res1 = sess1.run(None, {'input': (X[:5] * 1e2).astype(np.float64)})
+ model_onnx1.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res1 = sess1.run(None, {"input": (X[:5] * 1e2).astype(np.float64)})
a1, b1, c1 = res1
sess2 = InferenceSession(
- model_onnx2.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res2 = sess2.run(None, {'input': (X[:5] * 1e2).astype(np.float64)})
+ model_onnx2.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res2 = sess2.run(None, {"input": (X[:5] * 1e2).astype(np.float64)})
a2, b2, c2 = res2
self.assertEqual(b1.max(), b2.max())
@@ -389,34 +459,36 @@ def test_gaussian_mixture_full_black_op_noargmax_inf(self):
self.assertLess(abs(c1.min() - c2.min()) / c2.min(), 1e-5)
self._test_score(
- model, X, TARGET_OPSET, black_op={'ReduceLogSumExp', 'ArgMax'},
- decimal=2)
+ model, X, TARGET_OPSET, black_op={"ReduceLogSumExp", "ArgMax"}, decimal=2
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="onnxruntime misses Where for double")
+ reason="onnxruntime misses Where for double",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_binarizer(self):
- data = np.array([[1., -1., 2.],
- [2., 0., 0.],
- [0., 1., -1.]], dtype=np.float64)
+ data = np.array(
+ [[1.0, -1.0, 2.0], [2.0, 0.0, 0.0], [0.0, 1.0, -1.0]], dtype=np.float64
+ )
model = Binarizer(threshold=0.5)
model.fit(data)
model_onnx = convert_sklearn(
- model, "scikit-learn binarizer",
+ model,
+ "scikit-learn binarizer",
[("input", DoubleTensorType(data.shape))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data, model, model_onnx,
- basename="SklearnBinarizerDouble-SkipDim1")
+ data, model, model_onnx, basename="SklearnBinarizerDouble-SkipDim1"
+ )
- @unittest.skipIf(
- TARGET_OPSET < OPSET_VERSION,
- reason="onnx misses Gemm for double")
+ @unittest.skipIf(TARGET_OPSET < OPSET_VERSION, reason="onnx misses Gemm for double")
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="onnxruntime misses Gemm for double")
+ reason="onnxruntime misses Gemm for double",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_kmeans_clustering(self):
data = load_iris()
@@ -424,20 +496,23 @@ def test_kmeans_clustering(self):
model = KMeans(n_clusters=3)
model.fit(X)
model_onnx = convert_sklearn(
- model, "kmeans",
+ model,
+ "kmeans",
[("input", DoubleTensorType([None, 4]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X[40:60], model, model_onnx,
- basename="SklearnKMeansDoubleGemm-Dec4")
+ X[40:60], model, model_onnx, basename="SklearnKMeansDoubleGemm-Dec4"
+ )
@unittest.skipIf(
- TARGET_OPSET < OPSET_VERSION,
- reason="onnx misses ArgMin for double")
+ TARGET_OPSET < OPSET_VERSION, reason="onnx misses ArgMin for double"
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version(ORT_VERSION),
- reason="onnxruntime misses Gemm for double")
+ reason="onnxruntime misses Gemm for double",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_kmeans_clustering_nogemm(self):
data = load_iris()
@@ -445,36 +520,40 @@ def test_kmeans_clustering_nogemm(self):
model = KMeans(n_clusters=3)
model.fit(X)
model_onnx = convert_sklearn(
- model, "kmeans",
+ model,
+ "kmeans",
[("input", DoubleTensorType([None, 4]))],
target_opset=TARGET_OPSET,
- options={id(model): {'gemm': False}})
+ options={id(model): {"gemm": False}},
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X[40:60], model, model_onnx,
- basename="SklearnKMeansDoubleNoGemm-Dec4")
+ X[40:60], model, model_onnx, basename="SklearnKMeansDoubleNoGemm-Dec4"
+ )
@unittest.skipIf(
pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="onnxruntime misses Gemm for double")
+ reason="onnxruntime misses Gemm for double",
+ )
@ignore_warnings(category=warnings_to_skip)
def test_pca_default(self):
-
def _fit_model_pca(model):
data = load_diabetes()
X_train, X_test, *_ = train_test_split(
- data.data, data.target, test_size=0.2, random_state=42)
+ data.data, data.target, test_size=0.2, random_state=42
+ )
model.fit(X_train)
return model, X_test.astype(np.float64)
model, X_test = _fit_model_pca(PCA(random_state=42, n_components=2))
model_onnx = convert_sklearn(
- model, initial_types=[
- ("input", DoubleTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ initial_types=[("input", DoubleTensorType([None, X_test.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnPCADoubleDefault")
+ X_test, model, model_onnx, basename="SklearnPCADoubleDefault"
+ )
# Untested operators:
# * float parameters only:
diff --git a/tests/test_sklearn_feature_hasher.py b/tests/test_sklearn_feature_hasher.py
index f1fa643d4..50dfc8d4c 100644
--- a/tests/test_sklearn_feature_hasher.py
+++ b/tests/test_sklearn_feature_hasher.py
@@ -9,62 +9,78 @@
from pandas import DataFrame
from onnx import TensorProto
from onnx.helper import (
- make_model, make_node,
- make_graph, make_tensor_value_info, make_opsetid)
+ make_model,
+ make_node,
+ make_graph,
+ make_tensor_value_info,
+ make_opsetid,
+)
from onnx.checker import check_model
from onnxruntime import __version__ as ort_version
from sklearn.feature_extraction import FeatureHasher
from skl2onnx import to_onnx
from skl2onnx.common.data_types import (
- StringTensorType, Int64TensorType, FloatTensorType,
- DoubleTensorType)
-from test_utils import (
- TARGET_OPSET, TARGET_IR,
- InferenceSessionEx as InferenceSession)
+ StringTensorType,
+ Int64TensorType,
+ FloatTensorType,
+ DoubleTensorType,
+)
+from test_utils import TARGET_OPSET, TARGET_IR, InferenceSessionEx as InferenceSession
class TestSklearnFeatureHasher(unittest.TestCase):
-
- @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.12.0"),
- reason="no murmurhash3 in ort")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.12.0"), reason="no murmurhash3 in ort"
+ )
def test_ort_murmurhash3_int(self):
- X = make_tensor_value_info('X', TensorProto.UINT32, [None])
- Y = make_tensor_value_info('Y', TensorProto.UINT32, [None])
- node = make_node('MurmurHash3', ['X'], ['Y'], domain="com.microsoft",
- positive=1, seed=0)
- graph = make_graph([node], 'hash', [X], [Y])
- onnx_model = make_model(graph, opset_imports=[
- make_opsetid('', TARGET_OPSET),
- make_opsetid('com.microsoft', 1)],
- ir_version=TARGET_IR)
+ X = make_tensor_value_info("X", TensorProto.UINT32, [None])
+ Y = make_tensor_value_info("Y", TensorProto.UINT32, [None])
+ node = make_node(
+ "MurmurHash3", ["X"], ["Y"], domain="com.microsoft", positive=1, seed=0
+ )
+ graph = make_graph([node], "hash", [X], [Y])
+ onnx_model = make_model(
+ graph,
+ opset_imports=[
+ make_opsetid("", TARGET_OPSET),
+ make_opsetid("com.microsoft", 1),
+ ],
+ ir_version=TARGET_IR,
+ )
check_model(onnx_model)
sess = InferenceSession(
- onnx_model.SerializeToString(),
- providers=["CPUExecutionProvider"])
- feeds = {'X': np.array([0, 1, 2, 3, 4, 5], dtype=np.uint32)}
+ onnx_model.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ feeds = {"X": np.array([0, 1, 2, 3, 4, 5], dtype=np.uint32)}
got = sess.run(None, feeds)
self.assertEqual(got[0].shape, feeds["X"].shape)
self.assertEqual(got[0].dtype, feeds["X"].dtype)
- @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.12.0"),
- reason="no murmurhash3 in ort")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.12.0"), reason="no murmurhash3 in ort"
+ )
def test_ort_murmurhash3_string(self):
- X = make_tensor_value_info('X', TensorProto.STRING, [None])
- Y = make_tensor_value_info('Y', TensorProto.INT32, [None])
- node = make_node('MurmurHash3', ['X'], ['Y'], domain="com.microsoft",
- positive=0, seed=0)
- graph = make_graph([node], 'hash', [X], [Y])
- onnx_model = make_model(graph, opset_imports=[
- make_opsetid('', TARGET_OPSET),
- make_opsetid('com.microsoft', 1)],
- ir_version=TARGET_IR)
+ X = make_tensor_value_info("X", TensorProto.STRING, [None])
+ Y = make_tensor_value_info("Y", TensorProto.INT32, [None])
+ node = make_node(
+ "MurmurHash3", ["X"], ["Y"], domain="com.microsoft", positive=0, seed=0
+ )
+ graph = make_graph([node], "hash", [X], [Y])
+ onnx_model = make_model(
+ graph,
+ opset_imports=[
+ make_opsetid("", TARGET_OPSET),
+ make_opsetid("com.microsoft", 1),
+ ],
+ ir_version=TARGET_IR,
+ )
check_model(onnx_model)
sess = InferenceSession(
- onnx_model.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onnx_model.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
- input_strings = ['z0', 'o11', 'd222', 'q4444', 't333', 'c5555']
- feeds = {'X': np.array(input_strings)}
+ input_strings = ["z0", "o11", "d222", "q4444", "t333", "c5555"]
+ feeds = {"X": np.array(input_strings)}
got = sess.run(None, feeds)
n_features = 4
@@ -81,7 +97,7 @@ def test_ort_murmurhash3_string(self):
for i in range(final.shape[0]):
mat[i, indices[i]] = final[i]
- skl = FeatureHasher(n_features, input_type='string', dtype=np.uint32)
+ skl = FeatureHasher(n_features, input_type="string", dtype=np.uint32)
expected = skl.transform(feeds["X"].reshape((-1, 1)))
dense = expected.todense()
for i, (a, b) in enumerate(zip(dense.tolist(), mat.tolist())):
@@ -90,12 +106,14 @@ def test_ort_murmurhash3_string(self):
def test_feature_hasher(self):
n_features = 5
- input_strings = ['z0', 'o11', 'd222', 'q4444', 't333', 'c5555']
+ input_strings = ["z0", "o11", "d222", "q4444", "t333", "c5555"]
data = np.array(input_strings).reshape((-1, 1))
- for alternate_sign, dtype in [(True, np.float32),
- (True, np.float64),
- (True, np.int64),
- (False, np.float32)]:
+ for alternate_sign, dtype in [
+ (True, np.float32),
+ (True, np.float64),
+ (True, np.int64),
+ (False, np.float32),
+ ]:
if dtype == np.float32:
final_type = FloatTensorType
elif dtype == np.float64:
@@ -105,61 +123,64 @@ def test_feature_hasher(self):
else:
final_type = None
with self.subTest(alternate_sign=alternate_sign, dtype=dtype):
- model = FeatureHasher(n_features=n_features,
- alternate_sign=alternate_sign,
- dtype=dtype,
- input_type='string')
+ model = FeatureHasher(
+ n_features=n_features,
+ alternate_sign=alternate_sign,
+ dtype=dtype,
+ input_type="string",
+ )
model.fit(data)
expected = model.transform(data).todense()
model_onnx = to_onnx(
- model, initial_types=[("X", StringTensorType([None, 1]))],
+ model,
+ initial_types=[("X", StringTensorType([None, 1]))],
target_opset=TARGET_OPSET,
- final_types=[('Y', final_type([None, 1]))])
+ final_types=[("Y", final_type([None, 1]))],
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': data})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": data})
self.assertEqual(expected.shape, got[0].shape)
self.assertEqual(expected.dtype, got[0].dtype)
- for i, (a, b) in enumerate(zip(expected.tolist(),
- got[0].tolist())):
+ for i, (a, b) in enumerate(zip(expected.tolist(), got[0].tolist())):
if a != b:
- raise AssertionError(
- f"Discrepancies at line {i}: {a} != {b}")
+ raise AssertionError(f"Discrepancies at line {i}: {a} != {b}")
def test_feature_hasher_two_columns(self):
n_features = 5
- input_strings = ['z0', 'o11', 'd222', 'q4444', 't333', 'c5555']
+ input_strings = ["z0", "o11", "d222", "q4444", "t333", "c5555"]
data = np.array(input_strings).reshape((-1, 2))
- model = FeatureHasher(n_features=n_features,
- alternate_sign=True,
- dtype=np.float32,
- input_type='string')
+ model = FeatureHasher(
+ n_features=n_features,
+ alternate_sign=True,
+ dtype=np.float32,
+ input_type="string",
+ )
model.fit(data)
expected = model.transform(data).todense()
model_onnx = to_onnx(
- model, initial_types=[
- ("X", StringTensorType([None, data.shape[1]]))],
+ model,
+ initial_types=[("X", StringTensorType([None, data.shape[1]]))],
target_opset=TARGET_OPSET,
- final_types=[('Y', FloatTensorType([None, n_features]))])
+ final_types=[("Y", FloatTensorType([None, n_features]))],
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': data})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": data})
self.assertEqual(expected.shape, got[0].shape)
self.assertEqual(expected.dtype, got[0].dtype)
- for i, (a, b) in enumerate(zip(expected.tolist(),
- got[0].tolist())):
+ for i, (a, b) in enumerate(zip(expected.tolist(), got[0].tolist())):
if a != b:
- raise AssertionError(
- f"Discrepancies at line {i}: {a} != {b}")
+ raise AssertionError(f"Discrepancies at line {i}: {a} != {b}")
def test_feature_hasher_dataframe(self):
n_features = 5
- input_strings = ['z0', 'o11', 'd222', 'q4444', 't333', 'c5555']
+ input_strings = ["z0", "o11", "d222", "q4444", "t333", "c5555"]
data = np.array(input_strings).reshape((-1, 2))
data = DataFrame(data)
data.columns = ["c1", "c2"]
@@ -174,59 +195,61 @@ def test_feature_hasher_dataframe(self):
if df != ar:
return
- model = FeatureHasher(n_features=n_features,
- alternate_sign=True,
- dtype=np.float32,
- input_type='string')
+ model = FeatureHasher(
+ n_features=n_features,
+ alternate_sign=True,
+ dtype=np.float32,
+ input_type="string",
+ )
model.fit(data)
expected = model.transform(data).todense()
print(expected)
model_onnx = to_onnx(
- model, initial_types=[
- ("X", StringTensorType([None, data.shape[0]]))],
+ model,
+ initial_types=[("X", StringTensorType([None, data.shape[0]]))],
target_opset=TARGET_OPSET,
- final_types=[('Y', FloatTensorType([None, n_features]))])
+ final_types=[("Y", FloatTensorType([None, n_features]))],
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': data_nx})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": data_nx})
self.assertEqual(expected.shape, got[0].shape)
self.assertEqual(expected.dtype, got[0].dtype)
- for i, (a, b) in enumerate(zip(expected.tolist(),
- got[0].tolist())):
+ for i, (a, b) in enumerate(zip(expected.tolist(), got[0].tolist())):
if a != b:
- raise AssertionError(
- f"Discrepancies at line {i}: {a} != {b}")
+ raise AssertionError(f"Discrepancies at line {i}: {a} != {b}")
def test_feature_hasher_two_columns_unicode(self):
n_features = 5
- input_strings = ['z0', 'o11', 'd222', '고리', 'é', 'ô']
+ input_strings = ["z0", "o11", "d222", "고리", "é", "ô"]
data = np.array(input_strings).reshape((-1, 2))
- model = FeatureHasher(n_features=n_features,
- alternate_sign=True,
- dtype=np.float32,
- input_type='string')
+ model = FeatureHasher(
+ n_features=n_features,
+ alternate_sign=True,
+ dtype=np.float32,
+ input_type="string",
+ )
model.fit(data)
expected = model.transform(data).todense()
model_onnx = to_onnx(
- model, initial_types=[
- ("X", StringTensorType([None, data.shape[1]]))],
+ model,
+ initial_types=[("X", StringTensorType([None, data.shape[1]]))],
target_opset=TARGET_OPSET,
- final_types=[('Y', FloatTensorType([None, n_features]))])
+ final_types=[("Y", FloatTensorType([None, n_features]))],
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': data})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": data})
self.assertEqual(expected.shape, got[0].shape)
self.assertEqual(expected.dtype, got[0].dtype)
- for i, (a, b) in enumerate(zip(expected.tolist(),
- got[0].tolist())):
+ for i, (a, b) in enumerate(zip(expected.tolist(), got[0].tolist())):
if a != b:
- raise AssertionError(
- f"Discrepancies at line {i}: {a} != {b}")
+ raise AssertionError(f"Discrepancies at line {i}: {a} != {b}")
if __name__ == "__main__":
diff --git a/tests/test_sklearn_feature_selection_converters.py b/tests/test_sklearn_feature_selection_converters.py
index f8418d508..63f3136d0 100644
--- a/tests/test_sklearn_feature_selection_converters.py
+++ b/tests/test_sklearn_feature_selection_converters.py
@@ -30,122 +30,134 @@ class TestSklearnFeatureSelectionConverters(unittest.TestCase):
def test_generic_univariate_select_int(self):
model = GenericUnivariateSelect()
X = np.array(
- [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
- dtype=np.int64)
+ [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.int64
+ )
y = np.array([0, 1, 0, 1])
model.fit(X, y)
model_onnx = convert_sklearn(
- model, "generic univariate select",
+ model,
+ "generic univariate select",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnGenericUnivariateSelect")
+ X, model, model_onnx, basename="SklearnGenericUnivariateSelect"
+ )
def test_rfe_int(self):
model = RFE(estimator=SVR(kernel="linear"))
X = np.array(
- [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
- dtype=np.int64)
+ [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.int64
+ )
y = np.array([0, 1, 0, 1])
model.fit(X, y)
model_onnx = convert_sklearn(
- model, "rfe", [("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "rfe",
+ [("input", Int64TensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X, model, model_onnx, basename="SklearnRFE",
- methods=["transform"])
+ X, model, model_onnx, basename="SklearnRFE", methods=["transform"]
+ )
def test_rfecv_int(self):
model = RFECV(estimator=SVR(kernel="linear"), cv=3)
X = np.array(
- [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
- dtype=np.int64)
+ [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.int64
+ )
y = np.array([0, 1, 0, 1])
model.fit(X, y)
model_onnx = convert_sklearn(
- model, "rfecv", [("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "rfecv",
+ [("input", Int64TensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X, model, model_onnx, basename="SklearnRFECV",
- methods=["transform"])
+ X, model, model_onnx, basename="SklearnRFECV", methods=["transform"]
+ )
def test_select_fdr_int(self):
model = SelectFdr()
X, y = load_breast_cancer(return_X_y=True)
model.fit(X, y)
model_onnx = convert_sklearn(
- model, "select fdr",
+ model,
+ "select fdr",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.int64), model, model_onnx,
- basename="SklearnSelectFdr")
+ X.astype(np.int64), model, model_onnx, basename="SklearnSelectFdr"
+ )
def test_select_fpr_int(self):
model = SelectFpr()
X = np.array(
- [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
- dtype=np.int64)
+ [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.int64
+ )
y = np.array([0, 1, 0, 1])
model.fit(X, y)
model_onnx = convert_sklearn(
- model, "select fpr",
+ model,
+ "select fpr",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnSelectFpr")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnSelectFpr")
def test_select_from_model_int(self):
model = SelectFromModel(estimator=SVR(kernel="linear"))
X = np.array(
- [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
- dtype=np.int64)
+ [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.int64
+ )
y = np.array([0, 1, 0, 1])
model.fit(X, y)
model_onnx = convert_sklearn(
- model, "select from model",
+ model,
+ "select from model",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnSelectFromModel")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnSelectFromModel")
def test_select_fwe_int(self):
model = SelectFwe()
X, y = load_breast_cancer(return_X_y=True)
model.fit(X, y)
model_onnx = convert_sklearn(
- model, "select fwe",
+ model,
+ "select fwe",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.int64), model, model_onnx,
- basename="SklearnSelectFwe")
+ X.astype(np.int64), model, model_onnx, basename="SklearnSelectFwe"
+ )
def test_select_k_best_int(self):
model = SelectKBest(k="all")
X = np.array(
- [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
- dtype=np.int64)
+ [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.int64
+ )
y = np.array([0, 1, 0, 1])
model.fit(X, y)
model_onnx = convert_sklearn(
- model, "select k best",
+ model,
+ "select k best",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnSelectKBest")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnSelectKBest")
def test_select_percentile_int(self):
model = SelectPercentile()
@@ -156,195 +168,208 @@ def test_select_percentile_int(self):
y = np.array([0, 1, 0, 1])
model.fit(X, y)
model_onnx = convert_sklearn(
- model, "select percentile",
+ model,
+ "select percentile",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnSelectPercentile")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnSelectPercentile")
def test_variance_threshold_int(self):
model = VarianceThreshold()
X = np.array(
- [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
- dtype=np.int64)
+ [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.int64
+ )
y = np.array([0, 1, 0, 1])
model.fit(X, y)
model_onnx = convert_sklearn(
- model, "variance threshold",
+ model,
+ "variance threshold",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnVarianceThreshold")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnVarianceThreshold")
def test_generic_univariate_select_float(self):
model = GenericUnivariateSelect()
X = np.array(
- [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
- dtype=np.float32)
+ [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.float32
+ )
y = np.array([0, 1, 0, 1])
model.fit(X, y)
model_onnx = convert_sklearn(
- model, "generic univariate select",
+ model,
+ "generic univariate select",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnGenericUnivariateSelect")
+ X, model, model_onnx, basename="SklearnGenericUnivariateSelect"
+ )
def test_rfe_float(self):
model = RFE(estimator=SVR(kernel="linear"))
X = np.array(
- [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
- dtype=np.float32)
+ [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.float32
+ )
y = np.array([0, 1, 0, 1])
model.fit(X, y)
model_onnx = convert_sklearn(
- model, "rfe", [("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "rfe",
+ [("input", FloatTensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X, model, model_onnx, basename="SklearnRFE",
- methods=["transform"])
+ X, model, model_onnx, basename="SklearnRFE", methods=["transform"]
+ )
def test_rfecv_float(self):
model = RFECV(estimator=SVR(kernel="linear"), cv=3)
X = np.array(
- [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
- dtype=np.float32)
+ [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.float32
+ )
y = np.array([0, 1, 0, 1])
model.fit(X, y)
model_onnx = convert_sklearn(
- model, "rfecv", [("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "rfecv",
+ [("input", FloatTensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X, model, model_onnx, basename="SklearnRFECV",
- methods=["transform"])
+ X, model, model_onnx, basename="SklearnRFECV", methods=["transform"]
+ )
def test_select_fdr_float(self):
model = SelectFdr()
X, y = load_breast_cancer(return_X_y=True)
model.fit(X, y)
model_onnx = convert_sklearn(
- model, "select fdr",
+ model,
+ "select fdr",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnSelectFdr")
+ X.astype(np.float32), model, model_onnx, basename="SklearnSelectFdr"
+ )
def test_select_fpr_float(self):
model = SelectFpr()
X = np.array(
- [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
- dtype=np.float32)
+ [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.float32
+ )
y = np.array([0, 1, 0, 1])
model.fit(X, y)
model_onnx = convert_sklearn(
- model, "select fpr",
+ model,
+ "select fpr",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnSelectFpr")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnSelectFpr")
def test_select_from_model_float(self):
model = SelectFromModel(estimator=SVR(kernel="linear"))
X = np.array(
- [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
- dtype=np.float32)
+ [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.float32
+ )
y = np.array([0, 1, 0, 1])
model.fit(X, y)
model_onnx = convert_sklearn(
- model, "select from model",
+ model,
+ "select from model",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnSelectFromModel")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnSelectFromModel")
def test_select_from_model_float_nomodel(self):
- model = SelectFromModel(
- estimator=SVR(kernel="linear"), threshold=1e5)
+ model = SelectFromModel(estimator=SVR(kernel="linear"), threshold=1e5)
X = np.array(
- [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
- dtype=np.float32)
+ [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.float32
+ )
y = np.array([0, 1, 0, 1])
model.fit(X, y)
with self.assertRaises(RuntimeError):
convert_sklearn(
- model, "select from model",
+ model,
+ "select from model",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
def test_select_fwe_float(self):
model = SelectFwe()
X, y = load_breast_cancer(return_X_y=True)
model.fit(X, y)
model_onnx = convert_sklearn(
- model, "select fwe",
+ model,
+ "select fwe",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.float32),
- model, model_onnx, basename="SklearnSelectFwe")
+ X.astype(np.float32), model, model_onnx, basename="SklearnSelectFwe"
+ )
def test_select_k_best_float(self):
model = SelectKBest(k="all")
X = np.array(
- [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
- dtype=np.float32)
+ [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.float32
+ )
y = np.array([0, 1, 0, 1])
model.fit(X, y)
model_onnx = convert_sklearn(
- model, "select k best",
+ model,
+ "select k best",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnSelectKBest")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnSelectKBest")
def test_select_percentile_float(self):
model = SelectPercentile()
X = np.array(
- [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
- dtype=np.float32)
+ [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.float32
+ )
y = np.array([0, 1, 0, 1])
model.fit(X, y)
model_onnx = convert_sklearn(
- model, "select percentile",
+ model,
+ "select percentile",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnSelectPercentile")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnSelectPercentile")
def test_variance_threshold_float(self):
model = VarianceThreshold()
X = np.array(
- [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]],
- dtype=np.float32)
+ [[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.float32
+ )
y = np.array([0, 1, 0, 1])
model.fit(X, y)
model_onnx = convert_sklearn(
- model, "variance threshold",
+ model,
+ "variance threshold",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnVarianceThreshold")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnVarianceThreshold")
if __name__ == "__main__":
diff --git a/tests/test_sklearn_feature_union.py b/tests/test_sklearn_feature_union.py
index b40774ad1..104523b1b 100644
--- a/tests/test_sklearn_feature_union.py
+++ b/tests/test_sklearn_feature_union.py
@@ -15,111 +15,134 @@
from test_utils import dump_data_and_model, TARGET_OPSET
-ort_version = ort_version.split('+')[0]
+ort_version = ort_version.split("+")[0]
class TestSklearnAdaBoostModels(unittest.TestCase):
@unittest.skipIf(
- pv.Version(ort_version) <= pv.Version('0.4.0'),
- reason="onnxruntime too old")
+ pv.Version(ort_version) <= pv.Version("0.4.0"), reason="onnxruntime too old"
+ )
def test_feature_union_default(self):
data = load_iris()
X, y = data.data, data.target
X = X.astype(np.float32)
- X_train, X_test, *_ = train_test_split(X, y, test_size=0.5,
- random_state=42)
- model = FeatureUnion([('standard', StandardScaler()),
- ('minmax', MinMaxScaler())]).fit(X_train)
+ X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, random_state=42)
+ model = FeatureUnion(
+ [("standard", StandardScaler()), ("minmax", MinMaxScaler())]
+ ).fit(X_train)
model_onnx = convert_sklearn(
- model, 'feature union',
- [('input', FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "feature union",
+ [("input", FloatTensorType([None, X_test.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- dump_data_and_model(X_test, model, model_onnx,
- basename="SklearnFeatureUnionDefault")
+ dump_data_and_model(
+ X_test, model, model_onnx, basename="SklearnFeatureUnionDefault"
+ )
@unittest.skipIf(
- pv.Version(ort_version) <= pv.Version('0.4.0'),
- reason="onnxruntime too old")
+ pv.Version(ort_version) <= pv.Version("0.4.0"), reason="onnxruntime too old"
+ )
def test_feature_union_nested(self):
data = load_iris()
X, y = data.data, data.target
X = X.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(
- X, y, test_size=0.5, random_state=42)
- model = FeatureUnion([
- ('features', FeatureUnion([
- ('standard', StandardScaler()),
- ])
- ),
- ]).fit(X_train)
+ X, y, test_size=0.5, random_state=42
+ )
+ model = FeatureUnion(
+ [
+ (
+ "features",
+ FeatureUnion(
+ [
+ ("standard", StandardScaler()),
+ ]
+ ),
+ ),
+ ]
+ ).fit(X_train)
model_onnx = convert_sklearn(
- model, 'feature union',
- [('input', FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "feature union",
+ [("input", FloatTensorType([None, X_test.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- dump_data_and_model(X_test, model, model_onnx,
- basename="SklearnFeatureUnionNested")
+ dump_data_and_model(
+ X_test, model, model_onnx, basename="SklearnFeatureUnionNested"
+ )
@unittest.skipIf(
- pv.Version(ort_version) <= pv.Version('0.4.0'),
- reason="onnxruntime too old")
+ pv.Version(ort_version) <= pv.Version("0.4.0"), reason="onnxruntime too old"
+ )
def test_feature_union_transformer_weights_0(self):
data = load_iris()
X, y = data.data, data.target
X = X.astype(np.float32)
- X_train, X_test, *_ = train_test_split(X, y, test_size=0.5,
- random_state=42)
- model = FeatureUnion([('standard', StandardScaler()),
- ('minmax', MinMaxScaler())],
- transformer_weights={'standard': 2, 'minmax': 4}
- ).fit(X_train)
+ X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, random_state=42)
+ model = FeatureUnion(
+ [("standard", StandardScaler()), ("minmax", MinMaxScaler())],
+ transformer_weights={"standard": 2, "minmax": 4},
+ ).fit(X_train)
model_onnx = convert_sklearn(
- model, 'feature union',
- [('input', FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "feature union",
+ [("input", FloatTensorType([None, X_test.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- dump_data_and_model(X_test, model, model_onnx,
- basename="SklearnFeatureUnionTransformerWeights0")
+ dump_data_and_model(
+ X_test, model, model_onnx, basename="SklearnFeatureUnionTransformerWeights0"
+ )
def test_feature_union_transformer_weights_1(self):
data = load_digits()
X, y = data.data, data.target
X = X.astype(np.int64)
- X_train, X_test, *_ = train_test_split(X, y, test_size=0.5,
- random_state=42)
- model = FeatureUnion([('pca', PCA()),
- ('svd', TruncatedSVD())],
- transformer_weights={'pca': 10, 'svd': 3}
- ).fit(X_train)
+ X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, random_state=42)
+ model = FeatureUnion(
+ [("pca", PCA()), ("svd", TruncatedSVD())],
+ transformer_weights={"pca": 10, "svd": 3},
+ ).fit(X_train)
model_onnx = convert_sklearn(
- model, 'feature union',
- [('input', Int64TensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "feature union",
+ [("input", Int64TensorType([None, X_test.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnFeatureUnionTransformerWeights1-Dec4")
+ X_test,
+ model,
+ model_onnx,
+ basename="SklearnFeatureUnionTransformerWeights1-Dec4",
+ )
def test_feature_union_transformer_weights_2(self):
data = load_digits()
X, y = data.data, data.target
X = X.astype(np.float32)
- X_train, X_test, *_ = train_test_split(X, y, test_size=0.5,
- random_state=42)
- model = FeatureUnion([('pca', PCA()),
- ('svd', TruncatedSVD())],
- transformer_weights={'pca': 10, 'svd': 3}
- ).fit(X_train)
+ X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, random_state=42)
+ model = FeatureUnion(
+ [("pca", PCA()), ("svd", TruncatedSVD())],
+ transformer_weights={"pca": 10, "svd": 3},
+ ).fit(X_train)
model_onnx = convert_sklearn(
- model, 'feature union',
- [('input', FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "feature union",
+ [("input", FloatTensorType([None, X_test.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnFeatureUnionTransformerWeights2-Dec4")
+ X_test,
+ model,
+ model_onnx,
+ basename="SklearnFeatureUnionTransformerWeights2-Dec4",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_function_transformer_converter.py b/tests/test_sklearn_function_transformer_converter.py
index c7a680cb7..41b67e367 100644
--- a/tests/test_sklearn_function_transformer_converter.py
+++ b/tests/test_sklearn_function_transformer_converter.py
@@ -25,8 +25,9 @@
class TestSklearnFunctionTransformerConverter(unittest.TestCase):
- @unittest.skipIf(ColumnTransformer is None,
- reason="ColumnTransformer introduced in 0.20")
+ @unittest.skipIf(
+ ColumnTransformer is None, reason="ColumnTransformer introduced in 0.20"
+ )
def test_function_transformer(self):
def convert_dataframe_schema(df, drop=None):
inputs = []
@@ -51,28 +52,35 @@ def convert_dataframe_schema(df, drop=None):
# behaviour is different accross versions of scikit-learn.
data["X3"] = (y + 1).astype(np.int64)
- pipe = Pipeline(steps=[
- ("select",
- ColumnTransformer(
- [("id", FunctionTransformer(validate=True),
- ["X1", "X2", "X3"])])),
- ("logreg", LogisticRegression(max_iter=1400)),
- ])
+ pipe = Pipeline(
+ steps=[
+ (
+ "select",
+ ColumnTransformer(
+ [("id", FunctionTransformer(validate=True), ["X1", "X2", "X3"])]
+ ),
+ ),
+ ("logreg", LogisticRegression(max_iter=1400)),
+ ]
+ )
pipe.fit(data[["X1", "X2", "X3"]], y)
inputs = convert_dataframe_schema(data)
- model_onnx = convert_sklearn(pipe, "scikit-learn function_transformer",
- inputs, target_opset=TARGET_OPSET,
- options={'zipmap': False})
+ model_onnx = convert_sklearn(
+ pipe,
+ "scikit-learn function_transformer",
+ inputs,
+ target_opset=TARGET_OPSET,
+ options={"zipmap": False},
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data[:5],
- pipe,
- model_onnx,
- basename="SklearnFunctionTransformer-DF")
+ data[:5], pipe, model_onnx, basename="SklearnFunctionTransformer-DF"
+ )
- @unittest.skipIf(ColumnTransformer is None,
- reason="ColumnTransformer introduced in 0.20")
+ @unittest.skipIf(
+ ColumnTransformer is None, reason="ColumnTransformer introduced in 0.20"
+ )
def test_passthrough(self):
def convert_dataframe_schema(df, drop=None):
inputs = []
@@ -93,26 +101,34 @@ def convert_dataframe_schema(df, drop=None):
y = data.target
data = pandas.DataFrame(X, columns=["X1", "X2"])
- pipe = Pipeline(steps=[
- ("select",
- ColumnTransformer([("id", FunctionTransformer(), ["X1"]),
- ("id2", "passthrough", ["X2"])])),
- ("logreg", LogisticRegression()),
- ])
+ pipe = Pipeline(
+ steps=[
+ (
+ "select",
+ ColumnTransformer(
+ [
+ ("id", FunctionTransformer(), ["X1"]),
+ ("id2", "passthrough", ["X2"]),
+ ]
+ ),
+ ),
+ ("logreg", LogisticRegression()),
+ ]
+ )
pipe.fit(data[["X1", "X2"]], y)
inputs = convert_dataframe_schema(data)
- model_onnx = convert_sklearn(pipe, "scikit-learn function_transformer",
- inputs, target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ pipe, "scikit-learn function_transformer", inputs, target_opset=TARGET_OPSET
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data[:5],
- pipe,
- model_onnx,
- basename="SklearnFunctionTransformerPass-DF")
+ data[:5], pipe, model_onnx, basename="SklearnFunctionTransformerPass-DF"
+ )
- @unittest.skipIf(ColumnTransformer is None,
- reason="ColumnTransformer introduced in 0.20")
+ @unittest.skipIf(
+ ColumnTransformer is None, reason="ColumnTransformer introduced in 0.20"
+ )
def test_remainder_passthrough(self):
def convert_dataframe_schema(df, drop=None):
inputs = []
@@ -133,21 +149,27 @@ def convert_dataframe_schema(df, drop=None):
y = data.target
data = pandas.DataFrame(X, columns=["X1", "X2"])
- pipe = Pipeline(steps=[
- ("select",
- ColumnTransformer([("id", FunctionTransformer(), ["X1"])],
- remainder="passthrough")),
- ("logreg", LogisticRegression()),
- ])
+ pipe = Pipeline(
+ steps=[
+ (
+ "select",
+ ColumnTransformer(
+ [("id", FunctionTransformer(), ["X1"])], remainder="passthrough"
+ ),
+ ),
+ ("logreg", LogisticRegression()),
+ ]
+ )
pipe.fit(data[["X1", "X2"]], y)
inputs = convert_dataframe_schema(data)
- model_onnx = convert_sklearn(pipe, "scikit-learn function_transformer",
- inputs, target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ pipe, "scikit-learn function_transformer", inputs, target_opset=TARGET_OPSET
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data[:5], pipe, model_onnx,
- basename="SklearnFunctionTransformerPassRem-DF")
+ data[:5], pipe, model_onnx, basename="SklearnFunctionTransformerPassRem-DF"
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_gamma_regressor.py b/tests/test_sklearn_gamma_regressor.py
index 751537836..e09a3cc48 100644
--- a/tests/test_sklearn_gamma_regressor.py
+++ b/tests/test_sklearn_gamma_regressor.py
@@ -4,6 +4,7 @@
import unittest
import numpy as np
+
try:
from sklearn.linear_model import GammaRegressor
except ImportError:
@@ -12,22 +13,19 @@
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import (
- FloatTensorType, DoubleTensorType, Int64TensorType
+ FloatTensorType,
+ DoubleTensorType,
+ Int64TensorType,
)
-from test_utils import (
- dump_data_and_model,
- TARGET_OPSET
-)
+from test_utils import dump_data_and_model, TARGET_OPSET
ort_version = ".".join(ort_version.split(".")[:2])
class TestGammaRegressorConverter(unittest.TestCase):
- @unittest.skipIf(GammaRegressor is None,
- reason="scikit-learn<1.0")
+ @unittest.skipIf(GammaRegressor is None, reason="scikit-learn<1.0")
def test_gamma_regressor_float(self):
-
model = GammaRegressor()
X = np.array([[1, 2], [2, 3], [3, 4], [4, 3]])
y = np.array([19, 26, 33, 30])
@@ -38,16 +36,19 @@ def test_gamma_regressor_float(self):
model,
"scikit-learn Gamma Regressor",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx is not None)
- dump_data_and_model(test_x.astype(np.float32), model, model_onnx,
- basename="SklearnGammaRegressor")
+ dump_data_and_model(
+ test_x.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnGammaRegressor",
+ )
- @unittest.skipIf(GammaRegressor is None,
- reason="scikit-learn<1.0")
+ @unittest.skipIf(GammaRegressor is None, reason="scikit-learn<1.0")
def test_gamma_regressor_int(self):
-
model = GammaRegressor()
X = np.array([[10, 20], [20, 30], [30, 40], [40, 30]])
y = np.array([19, 26, 33, 30])
@@ -58,16 +59,16 @@ def test_gamma_regressor_int(self):
model,
"scikit-learn Gamma Regressor",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx is not None)
- dump_data_and_model(test_x.astype(np.int64), model, model_onnx,
- basename="SklearnGammaRegressor")
+ dump_data_and_model(
+ test_x.astype(np.int64), model, model_onnx, basename="SklearnGammaRegressor"
+ )
- @unittest.skipIf(GammaRegressor is None,
- reason="scikit-learn<1.0")
+ @unittest.skipIf(GammaRegressor is None, reason="scikit-learn<1.0")
def test_gamma_regressor_double(self):
-
model = GammaRegressor()
X = np.array([[1.1, 2.1], [2.3, 3.2], [3.2, 4.3], [4.2, 3.1]])
y = np.array([19, 26, 33, 30])
@@ -78,11 +79,16 @@ def test_gamma_regressor_double(self):
model,
"scikit-learn Gamma Regressor",
[("input", DoubleTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx is not None)
- dump_data_and_model(test_x.astype(np.double), model, model_onnx,
- basename="SklearnGammaRegressor")
+ dump_data_and_model(
+ test_x.astype(np.double),
+ model,
+ model_onnx,
+ basename="SklearnGammaRegressor",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_gaussian_mixture_converter.py b/tests/test_sklearn_gaussian_mixture_converter.py
index fa86fd14e..fb1de2731 100644
--- a/tests/test_sklearn_gaussian_mixture_converter.py
+++ b/tests/test_sklearn_gaussian_mixture_converter.py
@@ -4,6 +4,7 @@
import numpy as np
from sklearn.datasets import load_iris
from sklearn.mixture import GaussianMixture, BayesianGaussianMixture
+
try:
# scikit-learn >= 0.22
from sklearn.utils._testing import ignore_warnings
@@ -17,9 +18,11 @@
from skl2onnx import convert_sklearn, to_onnx
from skl2onnx.common.data_types import FloatTensorType
from test_utils import (
- dump_data_and_model, TARGET_OPSET,
+ dump_data_and_model,
+ TARGET_OPSET,
InferenceSessionEx as InferenceSession,
- ReferenceEvaluatorEx)
+ ReferenceEvaluatorEx,
+)
class TestGaussianMixtureConverter(unittest.TestCase):
@@ -43,9 +46,12 @@ def _test_score(self, model, X, tg, decimal=5, black_op=None):
exp = model.score_samples(X)
expp = model.predict_proba(X)
onx = to_onnx(
- model, X[:1], target_opset=tg,
- options={id(model): {'score_samples': True}},
- black_op=black_op)
+ model,
+ X[:1],
+ target_opset=tg,
+ options={id(model): {"score_samples": True}},
+ black_op=black_op,
+ )
if ReferenceEvaluatorEx is None:
sess = None
else:
@@ -54,42 +60,40 @@ def _test_score(self, model, X, tg, decimal=5, black_op=None):
except NotImplementedError:
sess = None
if sess is not None:
- got = sess.run(None, {'X': X})
+ got = sess.run(None, {"X": X})
self.assertEqual(len(got), 3)
np.testing.assert_almost_equal(
- expp.ravel(), got[1].ravel(), decimal=decimal)
- np.testing.assert_almost_equal(
- exp.ravel(), got[2].ravel(), decimal=decimal)
+ expp.ravel(), got[1].ravel(), decimal=decimal
+ )
+ np.testing.assert_almost_equal(exp.ravel(), got[2].ravel(), decimal=decimal)
try:
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except OrtFail as e:
- raise RuntimeError('Issue {}\n{}'.format(e, str(onx)))
- got = sess.run(None, {'X': X})
+ raise RuntimeError("Issue {}\n{}".format(e, str(onx)))
+ got = sess.run(None, {"X": X})
self.assertEqual(len(got), 3)
- np.testing.assert_almost_equal(
- expp.ravel(), got[1].ravel(), decimal=decimal)
- np.testing.assert_almost_equal(
- exp.ravel(), got[2].ravel(), decimal=decimal)
+ np.testing.assert_almost_equal(expp.ravel(), got[1].ravel(), decimal=decimal)
+ np.testing.assert_almost_equal(exp.ravel(), got[2].ravel(), decimal=decimal)
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="Missing Gemm (11)")
+ @unittest.skipIf(TARGET_OPSET < 11, reason="Missing Gemm (11)")
@ignore_warnings(category=UserWarning)
def test_model_gaussian_mixture_binary_classification(self):
- model, X = self._fit_model_binary_classification(
- GaussianMixture(), load_iris())
+ model, X = self._fit_model_binary_classification(GaussianMixture(), load_iris())
for tg in range(min(9, TARGET_OPSET), TARGET_OPSET + 1):
with self.subTest(target_opset=tg):
model_onnx = convert_sklearn(
- model, "gaussian_mixture",
+ model,
+ "gaussian_mixture",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=tg)
+ target_opset=tg,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnBinGaussianMixture")
+ X, model, model_onnx, basename="SklearnBinGaussianMixture"
+ )
self._test_score(model, X, tg)
@ignore_warnings(category=UserWarning)
@@ -97,38 +101,33 @@ def test_model_bayesian_mixture_binary_classification(self):
for cov in ["full", "tied", "diag", "spherical"]:
with self.subTest(cov=cov):
model, X = self._fit_model_binary_classification(
- BayesianGaussianMixture(), load_iris(),
- covariance_type=cov)
+ BayesianGaussianMixture(), load_iris(), covariance_type=cov
+ )
model_onnx = convert_sklearn(
model,
"gaussian_mixture",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnBinBayesianGaussianMixture")
+ X, model, model_onnx, basename="SklearnBinBayesianGaussianMixture"
+ )
self._test_score(model, X, TARGET_OPSET)
@ignore_warnings(category=UserWarning)
def test_model_gaussian_mixture_multiclass(self):
model, X = self._fit_model_multiclass_classification(
- GaussianMixture(), load_iris())
+ GaussianMixture(), load_iris()
+ )
model_onnx = convert_sklearn(
model,
"gaussian_mixture",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnMclGaussianMixture")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnMclGaussianMixture")
self._test_score(model, X, TARGET_OPSET)
@ignore_warnings(category=UserWarning)
@@ -137,167 +136,210 @@ def test_gaussian_mixture_comp2(self):
X = data.data
model = GaussianMixture(n_components=2)
model.fit(X)
- model_onnx = convert_sklearn(model, "GM",
- [("input", FloatTensorType([None, 4]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "GM",
+ [("input", FloatTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(np.float32)[40:60],
model,
model_onnx,
basename="GaussianMixtureC2",
- intermediate_steps=True)
+ intermediate_steps=True,
+ )
self._test_score(model, X, TARGET_OPSET)
@ignore_warnings(category=UserWarning)
def test_gaussian_mixture_full(self):
data = load_iris()
X = data.data
- model = GaussianMixture(n_components=2, covariance_type='full')
+ model = GaussianMixture(n_components=2, covariance_type="full")
model.fit(X)
- model_onnx = convert_sklearn(model, "GM",
- [("input", FloatTensorType([None, 4]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "GM",
+ [("input", FloatTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(np.float32)[40:60],
model,
model_onnx,
basename="GaussianMixtureC2Full",
- intermediate_steps=True)
+ intermediate_steps=True,
+ )
self._test_score(model, X, TARGET_OPSET)
@ignore_warnings(category=UserWarning)
def test_gaussian_mixture_tied(self):
data = load_iris()
X = data.data
- model = GaussianMixture(n_components=2, covariance_type='tied')
+ model = GaussianMixture(n_components=2, covariance_type="tied")
model.fit(X)
- model_onnx = convert_sklearn(model, "GM",
- [("input", FloatTensorType([None, 4]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "GM",
+ [("input", FloatTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(np.float32)[40:60],
model,
model_onnx,
basename="GaussianMixtureC2Tied",
- intermediate_steps=True)
+ intermediate_steps=True,
+ )
self._test_score(model, X, TARGET_OPSET)
@ignore_warnings(category=UserWarning)
def test_gaussian_mixture_diag(self):
data = load_iris()
X = data.data
- model = GaussianMixture(n_components=2, covariance_type='diag')
+ model = GaussianMixture(n_components=2, covariance_type="diag")
model.fit(X)
- model_onnx = convert_sklearn(model, "GM",
- [("input", FloatTensorType([None, 4]))],
- target_opset=TARGET_OPSET)
- self.assertIn('ReduceLogSumExp', str(model_onnx))
+ model_onnx = convert_sklearn(
+ model,
+ "GM",
+ [("input", FloatTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ )
+ self.assertIn("ReduceLogSumExp", str(model_onnx))
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(np.float32)[40:60],
model,
model_onnx,
basename="GaussianMixtureC2Diag",
- intermediate_steps=True)
+ intermediate_steps=True,
+ )
self._test_score(model, X, TARGET_OPSET, decimal=4)
@ignore_warnings(category=UserWarning)
def test_gaussian_mixture_spherical(self):
data = load_iris()
X = data.data
- model = GaussianMixture(n_components=2, covariance_type='spherical')
+ model = GaussianMixture(n_components=2, covariance_type="spherical")
model.fit(X)
model_onnx = convert_sklearn(
- model, "GM", [("input", FloatTensorType([None, 4]))],
- target_opset=TARGET_OPSET)
+ model,
+ "GM",
+ [("input", FloatTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(np.float32)[40:60],
- model, model_onnx,
+ model,
+ model_onnx,
basename="GaussianMixtureC2Spherical",
- intermediate_steps=True)
+ intermediate_steps=True,
+ )
self._test_score(model, X, TARGET_OPSET, decimal=4)
@ignore_warnings(category=UserWarning)
def test_gaussian_mixture_full_black_op(self):
data = load_iris()
X = data.data
- model = GaussianMixture(n_components=2, covariance_type='full')
+ model = GaussianMixture(n_components=2, covariance_type="full")
model.fit(X)
with self.assertRaises(RuntimeError):
convert_sklearn(
- model, "GM", [("input", FloatTensorType([None, 4]))],
- target_opset=TARGET_OPSET, black_op={'Add'})
+ model,
+ "GM",
+ [("input", FloatTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ black_op={"Add"},
+ )
model_onnx = convert_sklearn(
- model, "GM", [("input", FloatTensorType([None, 4]))],
- target_opset=TARGET_OPSET, black_op={'ReduceLogSumExp'})
+ model,
+ "GM",
+ [("input", FloatTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ black_op={"ReduceLogSumExp"},
+ )
self.assertIsNotNone(model_onnx)
- self.assertNotIn('ReduceLogSumExp', str(model_onnx))
+ self.assertNotIn("ReduceLogSumExp", str(model_onnx))
dump_data_and_model(
X.astype(np.float32)[40:60],
model,
model_onnx,
basename="GaussianMixtureC2FullBL",
- intermediate_steps=True)
+ intermediate_steps=True,
+ )
self._test_score(model, X, TARGET_OPSET)
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="OnnxEqual does not support float")
+ @unittest.skipIf(TARGET_OPSET < 11, reason="OnnxEqual does not support float")
@ignore_warnings(category=UserWarning)
def test_gaussian_mixture_full_black_op_noargmax(self):
data = load_iris()
X = data.data
- model = GaussianMixture(n_components=2, covariance_type='full')
+ model = GaussianMixture(n_components=2, covariance_type="full")
model.fit(X)
with self.assertRaises(RuntimeError):
convert_sklearn(
- model, "GM", [("input", FloatTensorType([None, 4]))],
- target_opset=TARGET_OPSET, black_op={'Add'})
+ model,
+ "GM",
+ [("input", FloatTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ black_op={"Add"},
+ )
model_onnx = convert_sklearn(
- model, "GM", [("input", FloatTensorType([None, 4]))],
+ model,
+ "GM",
+ [("input", FloatTensorType([None, 4]))],
target_opset=TARGET_OPSET,
- black_op={'ReduceLogSumExp', 'ArgMax'})
+ black_op={"ReduceLogSumExp", "ArgMax"},
+ )
self.assertIsNotNone(model_onnx)
- self.assertNotIn('ArgMax', str(model_onnx))
+ self.assertNotIn("ArgMax", str(model_onnx))
dump_data_and_model(
X.astype(np.float32)[40:60],
- model, model_onnx,
+ model,
+ model_onnx,
basename="GaussianMixtureC2FullBLNM",
- intermediate_steps=True)
+ intermediate_steps=True,
+ )
self._test_score(model, X, TARGET_OPSET)
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="OnnxEqual does not support float")
+ @unittest.skipIf(TARGET_OPSET < 11, reason="OnnxEqual does not support float")
@ignore_warnings(category=UserWarning)
def test_gaussian_mixture_full_black_op_noargmax_inf(self):
data = load_iris()
X = data.data
- model = GaussianMixture(n_components=10, covariance_type='full')
+ model = GaussianMixture(n_components=10, covariance_type="full")
model.fit(X)
model_onnx1 = convert_sklearn(
- model, "GM", [("input", FloatTensorType([None, 4]))],
+ model,
+ "GM",
+ [("input", FloatTensorType([None, 4]))],
target_opset=TARGET_OPSET,
- options={id(model): {'score_samples': True}})
+ options={id(model): {"score_samples": True}},
+ )
model_onnx2 = convert_sklearn(
- model, "GM", [("input", FloatTensorType([None, 4]))],
+ model,
+ "GM",
+ [("input", FloatTensorType([None, 4]))],
target_opset=TARGET_OPSET,
- options={id(model): {'score_samples': True}},
- black_op={'ReduceLogSumExp', 'ArgMax'})
- self.assertNotIn('ArgMax', str(model_onnx2))
+ options={id(model): {"score_samples": True}},
+ black_op={"ReduceLogSumExp", "ArgMax"},
+ )
+ self.assertNotIn("ArgMax", str(model_onnx2))
sess1 = InferenceSession(
- model_onnx1.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res1 = sess1.run(None, {'input': (X[:5] * 1e2).astype(np.float32)})
+ model_onnx1.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res1 = sess1.run(None, {"input": (X[:5] * 1e2).astype(np.float32)})
a1, b1, c1 = res1
sess2 = InferenceSession(
- model_onnx2.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res2 = sess2.run(None, {'input': (X[:5] * 1e2).astype(np.float32)})
+ model_onnx2.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res2 = sess2.run(None, {"input": (X[:5] * 1e2).astype(np.float32)})
a2, b2, c2 = res2
self.assertEqual(b1.max(), b2.max())
@@ -306,8 +348,8 @@ def test_gaussian_mixture_full_black_op_noargmax_inf(self):
self.assertLess(abs(c1.min() - c2.min()) / c2.min(), 1e-5)
self._test_score(
- model, X, TARGET_OPSET, black_op={'ReduceLogSumExp', 'ArgMax'},
- decimal=2)
+ model, X, TARGET_OPSET, black_op={"ReduceLogSumExp", "ArgMax"}, decimal=2
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_gaussian_process_classifier.py b/tests/test_sklearn_gaussian_process_classifier.py
index 69c9ea956..df3050248 100644
--- a/tests/test_sklearn_gaussian_process_classifier.py
+++ b/tests/test_sklearn_gaussian_process_classifier.py
@@ -7,6 +7,7 @@
from numpy.testing import assert_almost_equal
import scipy
from onnxruntime import SessionOptions
+
try:
from onnxruntime.capi.onnxruntime_pybind11_state import Fail as OrtFail
except ImportError:
@@ -14,6 +15,7 @@
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn import __version__ as sklver
+
try:
from sklearn.gaussian_process import GaussianProcessClassifier
except ImportError:
@@ -22,33 +24,37 @@
from skl2onnx import to_onnx
from skl2onnx.helpers.onnx_helper import change_onnx_domain
from test_utils import (
- dump_data_and_model, TARGET_OPSET,
- InferenceSessionEx as InferenceSession)
+ dump_data_and_model,
+ TARGET_OPSET,
+ InferenceSessionEx as InferenceSession,
+)
-sklver_ = ".".join(sklver.split('.')[:2])
+sklver_ = ".".join(sklver.split(".")[:2])
class TestSklearnGaussianProcessClassifier(unittest.TestCase):
-
@classmethod
def setUpClass(cls):
try:
- from ortcustomops import (
- onnx_op, PyCustomOpDef, get_library_path)
+ from ortcustomops import onnx_op, PyCustomOpDef, get_library_path
except ImportError:
return
- @onnx_op(op_type="SolveFloat",
- inputs=[PyCustomOpDef.dt_float, PyCustomOpDef.dt_float],
- outputs=[PyCustomOpDef.dt_float])
+ @onnx_op(
+ op_type="SolveFloat",
+ inputs=[PyCustomOpDef.dt_float, PyCustomOpDef.dt_float],
+ outputs=[PyCustomOpDef.dt_float],
+ )
def solveopf(a, b):
# The user custom op implementation here.
return scipy.linalg.solve(a, b).astype(np.float32)
- @onnx_op(op_type="SolveDouble",
- inputs=[PyCustomOpDef.dt_double, PyCustomOpDef.dt_double],
- outputs=[PyCustomOpDef.dt_double])
+ @onnx_op(
+ op_type="SolveDouble",
+ inputs=[PyCustomOpDef.dt_double, PyCustomOpDef.dt_double],
+ outputs=[PyCustomOpDef.dt_double],
+ )
def solveopd(a, b):
# The user custom op implementation here.
return scipy.linalg.solve(a, b).astype(np.float64)
@@ -62,13 +68,11 @@ def fit_classification_model(self, gp, n_classes=2):
y = y % 2
elif n_classes != 3:
raise NotImplementedError("n_classes must be 2 or 3")
- X_train, X_test, y_train, y_test = train_test_split(
- X, y, random_state=3)
+ X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=3)
gp.fit(X_train, y_train)
return gp, X_test.astype(np.float32)
def common_test_gpc(self, dtype=np.float32, n_classes=2):
-
gp = GaussianProcessClassifier()
gp, X = self.fit_classification_model(gp, n_classes=n_classes)
@@ -78,53 +82,58 @@ def common_test_gpc(self, dtype=np.float32, n_classes=2):
else:
cls = DoubleTensorType
model_onnx = to_onnx(
- gp, initial_types=[('X', cls([None, None]))],
+ gp,
+ initial_types=[("X", cls([None, None]))],
target_opset=TARGET_OPSET,
- options={GaussianProcessClassifier: {
- 'zipmap': False, 'optim': 'cdist'}})
+ options={GaussianProcessClassifier: {"zipmap": False, "optim": "cdist"}},
+ )
self.assertTrue(model_onnx is not None)
try:
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except OrtFail:
- if not hasattr(self, 'path'):
+ if not hasattr(self, "path"):
return
- suffix = 'Double' if dtype == np.float64 else 'Float'
+ suffix = "Double" if dtype == np.float64 else "Float"
# Operator Solve is missing
model_onnx = change_onnx_domain(
- model_onnx, {'Solve': ('Solve%s' % suffix, 'ai.onnx.contrib')})
+ model_onnx, {"Solve": ("Solve%s" % suffix, "ai.onnx.contrib")}
+ )
so = SessionOptions()
so.register_custom_ops_library(self.path)
sess = InferenceSession(
- model_onnx.SerializeToString(), so,
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), so, providers=["CPUExecutionProvider"]
+ )
- res = sess.run(None, {'X': X.astype(dtype)})
+ res = sess.run(None, {"X": X.astype(dtype)})
assert_almost_equal(res[0].ravel(), gp.predict(X).ravel())
- assert_almost_equal(res[1], gp.predict_proba(X),
- decimal=3)
+ assert_almost_equal(res[1], gp.predict_proba(X), decimal=3)
return
dt = 32 if dtype == np.float32 else 64
dump_data_and_model(
- X.astype(dtype), gp, model_onnx, verbose=False,
- basename="SklearnGaussianProcessRBFT%d%d" % (n_classes, dt))
+ X.astype(dtype),
+ gp,
+ model_onnx,
+ verbose=False,
+ basename="SklearnGaussianProcessRBFT%d%d" % (n_classes, dt),
+ )
@unittest.skipIf(TARGET_OPSET < 12, reason="einsum")
- @unittest.skipIf(GaussianProcessClassifier is None,
- reason="scikit-learn is too old")
- @unittest.skipIf(pv.Version(sklver_) < pv.Version("0.22"),
- reason="not available")
+ @unittest.skipIf(
+ GaussianProcessClassifier is None, reason="scikit-learn is too old"
+ )
+ @unittest.skipIf(pv.Version(sklver_) < pv.Version("0.22"), reason="not available")
def test_gpc_float_bin(self):
self.common_test_gpc(dtype=np.float32)
@unittest.skipIf(TARGET_OPSET < 12, reason="einsum, reciprocal")
- @unittest.skipIf(GaussianProcessClassifier is None,
- reason="scikit-learn is too old")
- @unittest.skipIf(pv.Version(sklver_) < pv.Version("0.22"),
- reason="not available")
+ @unittest.skipIf(
+ GaussianProcessClassifier is None, reason="scikit-learn is too old"
+ )
+ @unittest.skipIf(pv.Version(sklver_) < pv.Version("0.22"), reason="not available")
def test_gpc_double_bin(self):
self.common_test_gpc(dtype=np.float64)
diff --git a/tests/test_sklearn_gaussian_process_regressor.py b/tests/test_sklearn_gaussian_process_regressor.py
index 5768b3da5..1c1165d32 100644
--- a/tests/test_sklearn_gaussian_process_regressor.py
+++ b/tests/test_sklearn_gaussian_process_regressor.py
@@ -14,9 +14,17 @@
from sklearn.datasets import load_iris, make_regression, make_friedman2
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import (
- Sum, DotProduct, ExpSineSquared, RationalQuadratic,
- RBF, ConstantKernel as C, PairwiseKernel, WhiteKernel)
+ Sum,
+ DotProduct,
+ ExpSineSquared,
+ RationalQuadratic,
+ RBF,
+ ConstantKernel as C,
+ PairwiseKernel,
+ WhiteKernel,
+)
from sklearn.model_selection import train_test_split
+
try:
# scikit-learn >= 0.22
from sklearn.utils._testing import ignore_warnings
@@ -28,9 +36,11 @@
from skl2onnx import to_onnx
from skl2onnx.proto import get_latest_tested_opset_version
from skl2onnx.operator_converters.gaussian_process import (
- convert_kernel, convert_kernel_diag
+ convert_kernel,
+ convert_kernel_diag,
)
from onnxruntime import SessionOptions
+
try:
from onnxruntime import GraphOptimizationLevel
except ImportError:
@@ -41,14 +51,19 @@
NotImplemented = RuntimeError
from onnxruntime import __version__ as ort_version
from test_utils import (
- dump_data_and_model, fit_regression_model, TARGET_OPSET,
- InferenceSessionEx as InferenceSession)
+ dump_data_and_model,
+ fit_regression_model,
+ TARGET_OPSET,
+ InferenceSessionEx as InferenceSession,
+)
_TARGET_OPSET_ = min(get_latest_tested_opset_version(), TARGET_OPSET)
-ort_version = ".".join(ort_version.split('.')[:2])
+ort_version = ".".join(ort_version.split(".")[:2])
-Xtrain_ = pd.read_csv(StringIO("""
+Xtrain_ = pd.read_csv(
+ StringIO(
+ """
1.000000000000000000e+02,1.158972369426435591e+02,5.667579938823991137e-01,2.264397682069040421e-02,1.182166076334919581e-02,2.600819340784729095e-01
1.000000000000000000e+02,8.493978168996618194e+01,2.775702708579337874e-01,1.887456201351307358e-02,2.912599235354124821e-02,2.327206144705836199e-01
1.000000000000000000e+02,8.395765637241281354e+01,7.760226193410907358e-01,2.139558949508506974e-02,1.944769253403489523e-02,5.462612465817335838e-01
@@ -59,9 +74,16 @@
1.000000000000000000e+02,8.121250906502669409e+01,1.865077048426986073e+00,2.182149790268794742e-02,4.300530595437276893e-02,5.083327963416256479e-01
1.000000000000000000e+02,8.612638714481262525e+01,2.717895097207565502e-01,2.029318789405683970e-02,2.387016690377936207e-02,1.889736980423707968e-01
1.000000000000000000e+02,7.377491009582655579e+01,7.210994150180145557e-01,2.239484250704669444e-02,1.642684033674572316e-02,4.341188586319142395e-01
-""".strip("\n\r ")), header=None).values
-
-Xtest_ = pd.read_csv(StringIO("""
+""".strip(
+ "\n\r "
+ )
+ ),
+ header=None,
+).values
+
+Xtest_ = pd.read_csv(
+ StringIO(
+ """
1.000000000000000000e+02,1.061277971307766705e+02,1.472195004809226493e+00,2.307125069497626552e-02,4.539948095743629591e-02,2.855191098141335870e-01
1.000000000000000000e+02,9.417031896832908444e+01,1.249743892709246573e+00,2.370416174339620707e-02,2.613847280316268853e-02,5.097165413593484073e-01
1.000000000000000000e+02,9.305231488674536422e+01,1.795726729335217264e+00,2.473274733802270642e-02,1.349765645107412620e-02,9.410288840541443378e-02
@@ -72,9 +94,16 @@
1.000000000000000000e+02,1.228982583299257101e+02,1.115599996405831629e+00,1.929354155079938959e-02,3.056996308544096715e-03,1.197052763998271013e-01
1.000000000000000000e+02,1.160303269386108838e+02,1.018627021014927303e+00,2.248784981616459844e-02,2.688111547114307651e-02,3.326105131778724355e-01
1.000000000000000000e+02,1.163414374640396005e+02,6.644299545804077667e-01,1.508088417713602906e-02,4.451836657613789106e-02,3.245643044204808425e-01
-""".strip("\n\r ")), header=None).values
-
-Ytrain_ = pd.read_csv(StringIO("""
+""".strip(
+ "\n\r "
+ )
+ ),
+ header=None,
+).values
+
+Ytrain_ = pd.read_csv(
+ StringIO(
+ """
1.810324564191880370e+01
4.686462914930641377e-01
1.032271142638131778e+01
@@ -85,9 +114,16 @@
1.652864171243088975e+01
2.491797751537555006e-01
3.413210402096089169e+00
-""".strip("\n\r ")), header=None).values
-
-Ytest_ = pd.read_csv(StringIO("""
+""".strip(
+ "\n\r "
+ )
+ ),
+ header=None,
+).values
+
+Ytest_ = pd.read_csv(
+ StringIO(
+ """
1.836586066727948463e+01
1.848708258852349573e+01
1.641115566770171341e+00
@@ -98,7 +134,12 @@
2.289825832992571009e+01
2.353204496952379898e+01
2.237280571788585348e+01
-""".strip("\n\r ")), header=None).values
+""".strip(
+ "\n\r "
+ )
+ ),
+ header=None,
+).values
THRESHOLD = "0.4.0"
@@ -106,16 +147,22 @@
class TestSklearnGaussianProcessRegressor(unittest.TestCase):
-
def remove_dim1(self, arr):
new_shape = tuple(v for v in arr.shape if v != 1)
if new_shape != arr.shape:
arr = arr.reshape(new_shape)
return arr
- def check_outputs(self, model, model_onnx, Xtest,
- predict_attributes, decimal=5,
- skip_if_float32=False, disable_optimisation=True):
+ def check_outputs(
+ self,
+ model,
+ model_onnx,
+ Xtest,
+ predict_attributes,
+ decimal=5,
+ skip_if_float32=False,
+ disable_optimisation=True,
+ ):
if "TransposeScaleMatMul" in str(model_onnx):
raise RuntimeError("This node must not be added.")
if predict_attributes is None:
@@ -123,16 +170,17 @@ def check_outputs(self, model, model_onnx, Xtest,
exp = model.predict(Xtest, **predict_attributes)
if disable_optimisation and GraphOptimizationLevel is not None:
opts = SessionOptions()
- opts.graph_optimization_level = (
- GraphOptimizationLevel.ORT_DISABLE_ALL)
+ opts.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL
sess = InferenceSession(
- model_onnx.SerializeToString(), sess_options=opts,
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(),
+ sess_options=opts,
+ providers=["CPUExecutionProvider"],
+ )
else:
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': Xtest})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": Xtest})
if isinstance(exp, tuple):
if len(exp) != len(got):
raise AssertionError("Mismatched number of outputs.")
@@ -140,636 +188,750 @@ def check_outputs(self, model, model_onnx, Xtest,
if skip_if_float32 and g.dtype == np.float32:
continue
try:
- assert_almost_equal(self.remove_dim1(e),
- self.remove_dim1(g),
- decimal=decimal)
+ assert_almost_equal(
+ self.remove_dim1(e), self.remove_dim1(g), decimal=decimal
+ )
except AssertionError as e: # noqa
raise AssertionError(
"Mismatch for output {} and attributes {}"
- ".".format(i, predict_attributes)) from e
+ ".".format(i, predict_attributes)
+ ) from e
else:
if skip_if_float32 and Xtest.dtype == np.float32:
return
- assert_almost_equal(np.squeeze(exp),
- np.squeeze(got), decimal=decimal)
+ assert_almost_equal(np.squeeze(exp), np.squeeze(got), decimal=decimal)
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_kernel_constant1(self):
- ker = C(5.)
- onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32,
- op_version=_TARGET_OPSET_)
+ ker = C(5.0)
+ onx = convert_kernel(
+ ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_
+ )
model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType([None, None]))],
- target_opset=TARGET_OPSET)
+ inputs=[("X", FloatTensorType([None, None]))], target_opset=TARGET_OPSET
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0]
m1 = res
m2 = ker(Xtest_)
assert_almost_equal(m1, m2, decimal=5)
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_kernel_cosine_float(self):
- ker = PairwiseKernel(metric='cosine')
+ ker = PairwiseKernel(metric="cosine")
# X, X
- onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32,
- op_version=_TARGET_OPSET_)
+ onx = convert_kernel(
+ ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_
+ )
model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType([None, None]))],
- target_opset=TARGET_OPSET)
+ inputs=[("X", FloatTensorType([None, None]))], target_opset=TARGET_OPSET
+ )
x = np.random.randn(4, 3)
- x[0, 0] = x[1, 1] = x[2, 2] = 10.
- x[3, 2] = 5.
+ x[0, 0] = x[1, 1] = x[2, 2] = 10.0
+ x[3, 2] = 5.0
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': x.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": x.astype(np.float32)})[0]
m1 = res
m2 = ker(x)
assert_almost_equal(m1, m2, decimal=5)
# X, x
- onx = convert_kernel(ker, 'X', x_train=x,
- output_names=['Y'], dtype=np.float32,
- op_version=_TARGET_OPSET_)
+ onx = convert_kernel(
+ ker,
+ "X",
+ x_train=x,
+ output_names=["Y"],
+ dtype=np.float32,
+ op_version=_TARGET_OPSET_,
+ )
model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType([None, None]))],
- target_opset=TARGET_OPSET)
+ inputs=[("X", FloatTensorType([None, None]))], target_opset=TARGET_OPSET
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': x.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": x.astype(np.float32)})[0]
m1 = res
m2 = ker(x)
assert_almost_equal(m1, m2, decimal=5)
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_kernel_cosine_double(self):
- ker = PairwiseKernel(metric='cosine')
- onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float64,
- op_version=_TARGET_OPSET_)
+ ker = PairwiseKernel(metric="cosine")
+ onx = convert_kernel(
+ ker, "X", output_names=["Y"], dtype=np.float64, op_version=_TARGET_OPSET_
+ )
model_onnx = onx.to_onnx(
- inputs=[('X', DoubleTensorType([None, None]))],
- target_opset=TARGET_OPSET)
+ inputs=[("X", DoubleTensorType([None, None]))], target_opset=TARGET_OPSET
+ )
x = np.random.randn(4, 3)
- x[0, 0] = x[1, 1] = x[2, 2] = 10.
- x[3, 2] = 5.
+ x[0, 0] = x[1, 1] = x[2, 2] = 10.0
+ x[3, 2] = 5.0
try:
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except NotImplemented as e:
if "NOT_IMPLEMENTED" in str(e):
# Failed to find kernel for FusedMatMul(1).
return
raise e
- res = sess.run(None, {'X': x.astype(np.float64)})[0]
+ res = sess.run(None, {"X": x.astype(np.float64)})[0]
m1 = res
m2 = ker(x)
assert_almost_equal(m1, m2, decimal=5)
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_kernel_rbf1(self):
ker = RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3))
- onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32,
- op_version=_TARGET_OPSET_)
- model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType([None, None]))])
+ onx = convert_kernel(
+ ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_
+ )
+ model_onnx = onx.to_onnx(inputs=[("X", FloatTensorType([None, None]))])
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0]
m1 = res
m2 = ker(Xtest_)
assert_almost_equal(m1, m2, decimal=5)
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_kernel_rbf1_anisotropic(self):
- ker = RBF(length_scale=np.array([1.1, 1.2, 1.3, 1.4, 1.5, 1.6],
- dtype=np.float32),
- length_scale_bounds=(1e-3, 1e3))
- onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32,
- op_version=_TARGET_OPSET_)
- model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType([None, None]))])
+ ker = RBF(
+ length_scale=np.array([1.1, 1.2, 1.3, 1.4, 1.5, 1.6], dtype=np.float32),
+ length_scale_bounds=(1e-3, 1e3),
+ )
+ onx = convert_kernel(
+ ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_
+ )
+ model_onnx = onx.to_onnx(inputs=[("X", FloatTensorType([None, None]))])
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0]
m1 = res
m2 = ker(Xtest_)
assert_almost_equal(m1, m2, decimal=5)
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_kernel_rbf10(self):
ker = RBF(length_scale=10, length_scale_bounds=(1e-3, 1e3))
- onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32,
- op_version=_TARGET_OPSET_)
- model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType([None, None]))])
+ onx = convert_kernel(
+ ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_
+ )
+ model_onnx = onx.to_onnx(inputs=[("X", FloatTensorType([None, None]))])
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0]
m1 = res
m2 = ker(Xtest_)
assert_almost_equal(m1, m2, decimal=5)
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_kernel_rbf2(self):
ker = RBF(length_scale=1, length_scale_bounds="fixed")
- onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32,
- op_version=_TARGET_OPSET_)
- model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType([None, None]))])
+ onx = convert_kernel(
+ ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_
+ )
+ model_onnx = onx.to_onnx(inputs=[("X", FloatTensorType([None, None]))])
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0]
m1 = res
m2 = ker(Xtest_)
assert_almost_equal(m1, m2, decimal=5)
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_kernel_rbf_mul(self):
- ker = (C(1.0, constant_value_bounds="fixed") *
- RBF(1.0, length_scale_bounds="fixed"))
- onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32,
- op_version=_TARGET_OPSET_)
+ ker = C(1.0, constant_value_bounds="fixed") * RBF(
+ 1.0, length_scale_bounds="fixed"
+ )
+ onx = convert_kernel(
+ ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_
+ )
model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType([None, None]))],
- target_opset=TARGET_OPSET)
+ inputs=[("X", FloatTensorType([None, None]))], target_opset=TARGET_OPSET
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0]
m1 = res
m2 = ker(Xtest_)
assert_almost_equal(m1, m2, decimal=5)
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_kernel_ker1_def(self):
- ker = (C(1.0, (1e-3, 1e3)) *
- RBF(length_scale=10, length_scale_bounds=(1e-3, 1e3)))
- onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32,
- op_version=_TARGET_OPSET_)
+ ker = C(1.0, (1e-3, 1e3)) * RBF(
+ length_scale=10, length_scale_bounds=(1e-3, 1e3)
+ )
+ onx = convert_kernel(
+ ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_
+ )
model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType([None, None]))],
- target_opset=TARGET_OPSET)
+ inputs=[("X", FloatTensorType([None, None]))], target_opset=TARGET_OPSET
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0]
m1 = res
m2 = ker(Xtest_)
assert_almost_equal(m1, m2, decimal=5)
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_kernel_ker12_def(self):
- ker = (Sum(C(0.1, (1e-3, 1e3)), C(0.1, (1e-3, 1e3)) *
- RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3))))
- onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32,
- op_version=_TARGET_OPSET_)
+ ker = Sum(
+ C(0.1, (1e-3, 1e3)),
+ C(0.1, (1e-3, 1e3)) * RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3)),
+ )
+ onx = convert_kernel(
+ ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_
+ )
model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType([None, None]))],
- target_opset=_TARGET_OPSET_)
+ inputs=[("X", FloatTensorType([None, None]))], target_opset=_TARGET_OPSET_
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0]
m1 = res
m2 = ker(Xtest_)
assert_almost_equal(m1, m2, decimal=5)
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_kernel_ker2_def(self):
ker = Sum(
- C(0.1, (1e-3, 1e3)) * RBF(length_scale=10,
- length_scale_bounds=(1e-3, 1e3)),
- C(0.1, (1e-3, 1e3)) * RBF(length_scale=1,
- length_scale_bounds=(1e-3, 1e3)))
- onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32,
- op_version=_TARGET_OPSET_)
+ C(0.1, (1e-3, 1e3)) * RBF(length_scale=10, length_scale_bounds=(1e-3, 1e3)),
+ C(0.1, (1e-3, 1e3)) * RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3)),
+ )
+ onx = convert_kernel(
+ ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_
+ )
model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType([None, None]))],
- target_opset=_TARGET_OPSET_)
+ inputs=[("X", FloatTensorType([None, None]))], target_opset=_TARGET_OPSET_
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0]
m1 = res
m2 = ker(Xtest_)
assert_almost_equal(m1, m2, decimal=0)
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_kernel_ker2_dotproduct(self):
- ker = DotProduct(sigma_0=2.)
- onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32,
- op_version=_TARGET_OPSET_)
+ ker = DotProduct(sigma_0=2.0)
+ onx = convert_kernel(
+ ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_
+ )
model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType())],
- outputs=[('Y', FloatTensorType())],
- target_opset=_TARGET_OPSET_)
+ inputs=[("X", FloatTensorType())],
+ outputs=[("Y", FloatTensorType())],
+ target_opset=_TARGET_OPSET_,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
x = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float32)
- res = sess.run(None, {'X': x})
+ res = sess.run(None, {"X": x})
m1 = res[0]
m2 = ker(x)
assert_almost_equal(m1, m2, decimal=5)
- res = sess.run(None, {'X': Xtest_.astype(np.float32)})
+ res = sess.run(None, {"X": Xtest_.astype(np.float32)})
m1 = res[0]
m2 = ker(Xtest_)
assert_almost_equal(m1, m2, decimal=2)
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_kernel_ker2_exp_sine_squared(self):
ker = ExpSineSquared()
- onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32,
- op_version=_TARGET_OPSET_)
+ onx = convert_kernel(
+ ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_
+ )
model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType([None, None]))],
- target_opset=_TARGET_OPSET_)
+ inputs=[("X", FloatTensorType([None, None]))], target_opset=_TARGET_OPSET_
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0]
m1 = res
m2 = ker(Xtest_)
assert_almost_equal(m1, m2, decimal=4)
- onx = convert_kernel(ker, 'X', output_names=['Z'],
- x_train=(Xtest_ * 2).astype(np.float32),
- dtype=np.float32, op_version=_TARGET_OPSET_)
+ onx = convert_kernel(
+ ker,
+ "X",
+ output_names=["Z"],
+ x_train=(Xtest_ * 2).astype(np.float32),
+ dtype=np.float32,
+ op_version=_TARGET_OPSET_,
+ )
model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType([None, None]))],
- target_opset=_TARGET_OPSET_)
+ inputs=[("X", FloatTensorType([None, None]))], target_opset=_TARGET_OPSET_
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0]
m1 = res
m2 = ker(Xtest_, Xtest_ * 2)
assert_almost_equal(m1, m2, decimal=4)
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_kernel_exp_sine_squared_diag(self):
ker = ExpSineSquared()
onx = convert_kernel_diag(
- ker, 'X', output_names=['Y'], dtype=np.float32,
- op_version=_TARGET_OPSET_)
+ ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_
+ )
model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType([None, None]))],
- target_opset=_TARGET_OPSET_)
+ inputs=[("X", FloatTensorType([None, None]))], target_opset=_TARGET_OPSET_
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0]
m1 = res
m2 = ker.diag(Xtest_)
assert_almost_equal(m1, m2, decimal=4)
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_kernel_rational_quadratic_diag(self):
ker = RationalQuadratic()
onx = convert_kernel_diag(
- ker, 'X', output_names=['Y'], dtype=np.float32,
- op_version=_TARGET_OPSET_)
+ ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_
+ )
model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType([None, None]))],
- target_opset=_TARGET_OPSET_)
+ inputs=[("X", FloatTensorType([None, None]))], target_opset=_TARGET_OPSET_
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0]
m1 = res
m2 = ker.diag(Xtest_)
assert_almost_equal(m1, m2, decimal=4)
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_kernel_dot_product_diag(self):
ker = DotProduct()
onx = convert_kernel_diag(
- ker, 'X', output_names=['Y'], dtype=np.float32,
- op_version=_TARGET_OPSET_)
+ ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_
+ )
model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType([None, None]))],
- target_opset=_TARGET_OPSET_)
+ inputs=[("X", FloatTensorType([None, None]))], target_opset=_TARGET_OPSET_
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0]
m1 = res
m2 = ker.diag(Xtest_)
assert_almost_equal(m1 / 1000, m2 / 1000, decimal=5)
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_kernel_dot_product(self):
ker = DotProduct()
- onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32,
- op_version=_TARGET_OPSET_)
+ onx = convert_kernel(
+ ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_
+ )
model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType([None, None]))],
- target_opset=_TARGET_OPSET_)
+ inputs=[("X", FloatTensorType([None, None]))], target_opset=_TARGET_OPSET_
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0]
m1 = res
m2 = ker(Xtest_)
assert_almost_equal(m1 / 1000, m2 / 1000, decimal=5)
- onx = convert_kernel(ker, 'X', output_names=['Z'],
- x_train=(Xtest_ * 2).astype(np.float32),
- dtype=np.float32, op_version=_TARGET_OPSET_)
+ onx = convert_kernel(
+ ker,
+ "X",
+ output_names=["Z"],
+ x_train=(Xtest_ * 2).astype(np.float32),
+ dtype=np.float32,
+ op_version=_TARGET_OPSET_,
+ )
model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType([None, None]))],
- target_opset=_TARGET_OPSET_)
+ inputs=[("X", FloatTensorType([None, None]))], target_opset=_TARGET_OPSET_
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0]
m1 = res
m2 = ker(Xtest_, Xtest_ * 2)
assert_almost_equal(m1 / 1000, m2 / 1000, decimal=5)
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_kernel_rational_quadratic(self):
ker = RationalQuadratic()
- onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32,
- op_version=_TARGET_OPSET_)
+ onx = convert_kernel(
+ ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_
+ )
model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType([None, None]))],
- target_opset=_TARGET_OPSET_)
+ inputs=[("X", FloatTensorType([None, None]))], target_opset=_TARGET_OPSET_
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0]
m1 = res
m2 = ker(Xtest_)
assert_almost_equal(m1, m2, decimal=5)
- onx = convert_kernel(ker, 'X', output_names=['Z'],
- x_train=(Xtest_ * 2).astype(np.float32),
- dtype=np.float32, op_version=_TARGET_OPSET_)
- model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType([None, None]))])
+ onx = convert_kernel(
+ ker,
+ "X",
+ output_names=["Z"],
+ x_train=(Xtest_ * 2).astype(np.float32),
+ dtype=np.float32,
+ op_version=_TARGET_OPSET_,
+ )
+ model_onnx = onx.to_onnx(inputs=[("X", FloatTensorType([None, None]))])
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": Xtest_.astype(np.float32)})[0]
m1 = res
m2 = ker(Xtest_, Xtest_ * 2)
assert_almost_equal(m1, m2, decimal=3)
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_gpr_rbf_unfitted(self):
+ se = C(1.0, (1e-3, 1e3)) * RBF(length_scale=10, length_scale_bounds=(1e-3, 1e3))
+ kernel = Sum(
+ se,
+ C(0.1, (1e-3, 1e3)) * RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3)),
+ )
- se = (C(1.0, (1e-3, 1e3)) *
- RBF(length_scale=10, length_scale_bounds=(1e-3, 1e3)))
- kernel = (Sum(se, C(0.1, (1e-3, 1e3)) *
- RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3))))
-
- gp = GaussianProcessRegressor(alpha=1e-5, kernel=kernel,
- n_restarts_optimizer=25,
- normalize_y=True)
+ gp = GaussianProcessRegressor(
+ alpha=1e-5, kernel=kernel, n_restarts_optimizer=25, normalize_y=True
+ )
# return_cov=False, return_std=False
model_onnx = to_onnx(
- gp, initial_types=[('X', FloatTensorType([]))],
- target_opset=_TARGET_OPSET_)
+ gp, initial_types=[("X", FloatTensorType([]))], target_opset=_TARGET_OPSET_
+ )
self.assertTrue(model_onnx is not None)
- dump_data_and_model(Xtest_.astype(np.float32), gp, model_onnx,
- verbose=False,
- basename="SklearnGaussianProcessRBFUnfitted")
+ dump_data_and_model(
+ Xtest_.astype(np.float32),
+ gp,
+ model_onnx,
+ verbose=False,
+ basename="SklearnGaussianProcessRBFUnfitted",
+ )
# return_cov=True, return_std=True
- options = {GaussianProcessRegressor: {"return_std": True,
- "return_cov": True}}
+ options = {GaussianProcessRegressor: {"return_std": True, "return_cov": True}}
try:
- to_onnx(gp, Xtrain_.astype(np.float32), options=options,
- target_opset=TARGET_OPSET)
+ to_onnx(
+ gp,
+ Xtrain_.astype(np.float32),
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
except RuntimeError as e:
assert "Not returning standard deviation" in str(e)
# return_std=True
options = {GaussianProcessRegressor: {"return_std": True}}
model_onnx = to_onnx(
- gp, options=options,
- initial_types=[('X', FloatTensorType([None, None]))],
- target_opset=TARGET_OPSET)
+ gp,
+ options=options,
+ initial_types=[("X", FloatTensorType([None, None]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- self.check_outputs(gp, model_onnx, Xtest_.astype(np.float32),
- predict_attributes=options[
- GaussianProcessRegressor])
+ self.check_outputs(
+ gp,
+ model_onnx,
+ Xtest_.astype(np.float32),
+ predict_attributes=options[GaussianProcessRegressor],
+ )
# return_cov=True
options = {GaussianProcessRegressor: {"return_cov": True}}
# model_onnx = to_onnx(gp, Xtrain_.astype(np.float32), options=options)
model_onnx = to_onnx(
- gp, options=options,
- initial_types=[('X', FloatTensorType([None, None]))],
- target_opset=TARGET_OPSET)
+ gp,
+ options=options,
+ initial_types=[("X", FloatTensorType([None, None]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- self.check_outputs(gp, model_onnx, Xtest_.astype(np.float32),
- predict_attributes=options[
- GaussianProcessRegressor])
+ self.check_outputs(
+ gp,
+ model_onnx,
+ Xtest_.astype(np.float32),
+ predict_attributes=options[GaussianProcessRegressor],
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("1.6.0"),
- reason="shape_inference fails")
+ pv.Version(ort_version) < pv.Version("1.6.0"), reason="shape_inference fails"
+ )
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_gpr_rbf_fitted_true(self):
-
- gp = GaussianProcessRegressor(alpha=1e-5,
- n_restarts_optimizer=25,
- normalize_y=True)
+ gp = GaussianProcessRegressor(
+ alpha=1e-5, n_restarts_optimizer=25, normalize_y=True
+ )
gp, X = fit_regression_model(gp)
# return_cov=False, return_std=False
model_onnx = to_onnx(
- gp, initial_types=[('X', DoubleTensorType([None, None]))],
- target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", DoubleTensorType([None, None]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- dump_data_and_model(X.astype(np.float64), gp, model_onnx,
- verbose=False,
- basename="SklearnGaussianProcessRBFTDouble")
+ dump_data_and_model(
+ X.astype(np.float64),
+ gp,
+ model_onnx,
+ verbose=False,
+ basename="SklearnGaussianProcessRBFTDouble",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("1.6.0"),
- reason="shape_inference fails")
+ pv.Version(ort_version) < pv.Version("1.6.0"), reason="shape_inference fails"
+ )
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_gpr_cosine_fitted_true_float(self):
- gp = GaussianProcessRegressor(alpha=1e-5,
- n_restarts_optimizer=25,
- normalize_y=False,
- kernel=PairwiseKernel(metric='cosine'))
- gp, X = fit_regression_model(
- gp, n_features=2, n_samples=20, factor=0.01)
+ gp = GaussianProcessRegressor(
+ alpha=1e-5,
+ n_restarts_optimizer=25,
+ normalize_y=False,
+ kernel=PairwiseKernel(metric="cosine"),
+ )
+ gp, X = fit_regression_model(gp, n_features=2, n_samples=20, factor=0.01)
# return_cov=False, return_std=False
model_onnx = to_onnx(
- gp, initial_types=[('X', FloatTensorType([None, None]))],
- target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", FloatTensorType([None, None]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- dump_data_and_model(X.astype(np.float32), gp, model_onnx,
- verbose=False,
- basename="SklearnGaussianProcessCosineFloat-Dec2")
+ dump_data_and_model(
+ X.astype(np.float32),
+ gp,
+ model_onnx,
+ verbose=False,
+ basename="SklearnGaussianProcessCosineFloat-Dec2",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("1.6.0"),
- reason="shape_inference fails")
+ pv.Version(ort_version) < pv.Version("1.6.0"), reason="shape_inference fails"
+ )
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_gpr_cosine_fitted_true_double(self):
- gp = GaussianProcessRegressor(alpha=1e-5,
- n_restarts_optimizer=25,
- normalize_y=False,
- kernel=PairwiseKernel(metric='cosine'))
- gp, X = fit_regression_model(
- gp, n_features=2, n_samples=20, factor=0.01)
+ gp = GaussianProcessRegressor(
+ alpha=1e-5,
+ n_restarts_optimizer=25,
+ normalize_y=False,
+ kernel=PairwiseKernel(metric="cosine"),
+ )
+ gp, X = fit_regression_model(gp, n_features=2, n_samples=20, factor=0.01)
# return_cov=False, return_std=False
model_onnx = to_onnx(
- gp, initial_types=[('X', DoubleTensorType([None, None]))],
- target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", DoubleTensorType([None, None]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- dump_data_and_model(X.astype(np.float64), gp, model_onnx,
- verbose=False,
- basename="SklearnGaussianProcessCosineDouble")
+ dump_data_and_model(
+ X.astype(np.float64),
+ gp,
+ model_onnx,
+ verbose=False,
+ basename="SklearnGaussianProcessCosineDouble",
+ )
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_gpr_rbf_fitted_false(self):
-
- gp = GaussianProcessRegressor(alpha=1e-5,
- n_restarts_optimizer=25,
- normalize_y=False)
+ gp = GaussianProcessRegressor(
+ alpha=1e-5, n_restarts_optimizer=25, normalize_y=False
+ )
gp.fit(Xtrain_, Ytrain_)
# return_cov=False, return_std=False
model_onnx = to_onnx(
- gp, initial_types=[('X', FloatTensorType([None, None]))],
- target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", FloatTensorType([None, None]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- dump_data_and_model(Xtest_.astype(np.float32), gp, model_onnx,
- verbose=False,
- basename="SklearnGaussianProcessRBF-Dec4")
+ dump_data_and_model(
+ Xtest_.astype(np.float32),
+ gp,
+ model_onnx,
+ verbose=False,
+ basename="SklearnGaussianProcessRBF-Dec4",
+ )
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_gpr_rbf_fitted_return_std_true(self):
- gp = GaussianProcessRegressor(alpha=1e-5,
- n_restarts_optimizer=25,
- normalize_y=True)
+ gp = GaussianProcessRegressor(
+ alpha=1e-5, n_restarts_optimizer=25, normalize_y=True
+ )
gp.fit(Xtrain_, Ytrain_)
# return_cov=False, return_std=False
options = {GaussianProcessRegressor: {"return_std": True}}
try:
to_onnx(
- gp, initial_types=[('X', FloatTensorType([None, None]))],
- options=options, target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", FloatTensorType([None, None]))],
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
except RuntimeError as e:
assert "The method *predict* must be called" in str(e)
gp.predict(Xtrain_, return_std=True)
model_onnx = to_onnx(
- gp, initial_types=[('X', FloatTensorType([None, None]))],
- options=options, target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", FloatTensorType([None, None]))],
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- self.check_outputs(gp, model_onnx, Xtest_.astype(np.float32),
- predict_attributes=options[
- GaussianProcessRegressor],
- decimal=4, disable_optimisation=True)
- dump_data_and_model(Xtest_.astype(np.float32), gp, model_onnx,
- verbose=False,
- basename="SklearnGaussianProcessRBFStd-Out0",
- disable_optimisation=True)
+ self.check_outputs(
+ gp,
+ model_onnx,
+ Xtest_.astype(np.float32),
+ predict_attributes=options[GaussianProcessRegressor],
+ decimal=4,
+ disable_optimisation=True,
+ )
+ dump_data_and_model(
+ Xtest_.astype(np.float32),
+ gp,
+ model_onnx,
+ verbose=False,
+ basename="SklearnGaussianProcessRBFStd-Out0",
+ disable_optimisation=True,
+ )
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
- @unittest.skipIf(
- TARGET_OPSET >= 12, reason="TARGET_OPSET < 12")
+ reason="onnxruntime %s" % THRESHOLD,
+ )
+ @unittest.skipIf(TARGET_OPSET >= 12, reason="TARGET_OPSET < 12")
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_gpr_rbf_fitted_return_std_exp_sine_squared_true(self):
state = np.random.RandomState(0)
@@ -780,8 +942,11 @@ def test_gpr_rbf_fitted_return_std_exp_sine_squared_true(self):
X_train, X_test, y_train, _ = train_test_split(X, y)
gp = GaussianProcessRegressor(
kernel=ExpSineSquared(periodicity_bounds=(1e-10, 1e10)),
- alpha=1e-7, n_restarts_optimizer=25, normalize_y=True,
- random_state=1)
+ alpha=1e-7,
+ n_restarts_optimizer=25,
+ normalize_y=True,
+ random_state=1,
+ )
try:
gp.fit(X_train, y_train)
except (AttributeError, TypeError):
@@ -792,22 +957,33 @@ def test_gpr_rbf_fitted_return_std_exp_sine_squared_true(self):
options = {GaussianProcessRegressor: {"return_std": True}}
gp.predict(X_train, return_std=True)
model_onnx = to_onnx(
- gp, initial_types=[('X', DoubleTensorType([None, None]))],
- options=options, target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", DoubleTensorType([None, None]))],
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test.astype(np.float64), gp, model_onnx,
+ X_test.astype(np.float64),
+ gp,
+ model_onnx,
verbose=False,
basename="SklearnGaussianProcessExpSineSquaredStdT-Out0-Dec2",
- disable_optimisation=True)
- self.check_outputs(gp, model_onnx, X_test.astype(np.float64),
- predict_attributes=options[
- GaussianProcessRegressor],
- decimal=4, disable_optimisation=True)
+ disable_optimisation=True,
+ )
+ self.check_outputs(
+ gp,
+ model_onnx,
+ X_test.astype(np.float64),
+ predict_attributes=options[GaussianProcessRegressor],
+ decimal=4,
+ disable_optimisation=True,
+ )
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_gpr_rbf_fitted_return_std_exp_sine_squared_false(self):
X = 15 * np.random.rand(100, 2)
@@ -816,8 +992,11 @@ def test_gpr_rbf_fitted_return_std_exp_sine_squared_false(self):
X_train, X_test, y_train, _ = train_test_split(X, y)
gp = GaussianProcessRegressor(
kernel=ExpSineSquared(periodicity_bounds=(1e-10, 1e10)),
- alpha=1e-7, n_restarts_optimizer=20, normalize_y=False,
- random_state=0)
+ alpha=1e-7,
+ n_restarts_optimizer=20,
+ normalize_y=False,
+ random_state=0,
+ )
try:
gp.fit(X_train, y_train)
except (AttributeError, TypeError):
@@ -828,70 +1007,93 @@ def test_gpr_rbf_fitted_return_std_exp_sine_squared_false(self):
options = {GaussianProcessRegressor: {"return_std": True}}
gp.predict(X_train, return_std=True)
model_onnx = to_onnx(
- gp, initial_types=[('X', DoubleTensorType([None, None]))],
- options=options, target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", DoubleTensorType([None, None]))],
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test.astype(np.float64), gp, model_onnx,
+ X_test.astype(np.float64),
+ gp,
+ model_onnx,
verbose=False,
- basename="SklearnGaussianProcessExpSineSquaredStdF-Out0-Dec3")
- self.check_outputs(gp, model_onnx, X_test.astype(np.float64),
- predict_attributes=options[
- GaussianProcessRegressor],
- decimal=3)
+ basename="SklearnGaussianProcessExpSineSquaredStdF-Out0-Dec3",
+ )
+ self.check_outputs(
+ gp,
+ model_onnx,
+ X_test.astype(np.float64),
+ predict_attributes=options[GaussianProcessRegressor],
+ decimal=3,
+ )
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_gpr_rbf_fitted_return_std_exp_sine_squared_double_true(self):
-
- gp = GaussianProcessRegressor(kernel=ExpSineSquared(),
- alpha=1e-7,
- n_restarts_optimizer=15,
- normalize_y=True)
+ gp = GaussianProcessRegressor(
+ kernel=ExpSineSquared(),
+ alpha=1e-7,
+ n_restarts_optimizer=15,
+ normalize_y=True,
+ )
try:
gp.fit(Xtrain_, Ytrain_)
except (AttributeError, TypeError) as e:
# unstable issue fixed with scikit-learn>=0.24
warnings.warn(
- "Training did not converge but fails at raising "
- "a warning: %r." % e)
+ "Training did not converge but fails at raising " "a warning: %r." % e
+ )
return
# return_cov=False, return_std=False
options = {GaussianProcessRegressor: {"return_std": True}}
gp.predict(Xtrain_, return_std=True)
model_onnx = to_onnx(
- gp, initial_types=[('X', DoubleTensorType([None, None]))],
- options=options, target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", DoubleTensorType([None, None]))],
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- Xtest_.astype(np.float64), gp, model_onnx,
+ Xtest_.astype(np.float64),
+ gp,
+ model_onnx,
verbose=False,
basename="SklearnGaussianProcessExpSineSquaredStdDouble-Out0-Dec3",
- disable_optimisation=True)
- self.check_outputs(gp, model_onnx, Xtest_.astype(np.float64),
- predict_attributes=options[
- GaussianProcessRegressor],
- decimal=3, disable_optimisation=True)
+ disable_optimisation=True,
+ )
+ self.check_outputs(
+ gp,
+ model_onnx,
+ Xtest_.astype(np.float64),
+ predict_attributes=options[GaussianProcessRegressor],
+ decimal=3,
+ disable_optimisation=True,
+ )
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
- @unittest.skipIf(
- TARGET_OPSET >= 12, reason="TARGET_OPSET < 12")
+ reason="onnxruntime %s" % THRESHOLD,
+ )
+ @unittest.skipIf(TARGET_OPSET >= 12, reason="TARGET_OPSET < 12")
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_gpr_rbf_fitted_return_std_dot_product_true(self):
X = 15 * np.random.rand(100, 2)
y = np.sin(X[:, 0] - X[:, 1]).ravel()
y += 0.5 * (0.5 - np.random.rand(X.shape[0]))
X_train, X_test, y_train, _ = train_test_split(X, y)
- gp = GaussianProcessRegressor(kernel=DotProduct(),
- alpha=1e-2,
- n_restarts_optimizer=25,
- normalize_y=True,
- random_state=0)
+ gp = GaussianProcessRegressor(
+ kernel=DotProduct(),
+ alpha=1e-2,
+ n_restarts_optimizer=25,
+ normalize_y=True,
+ random_state=0,
+ )
try:
gp.fit(X_train, y_train)
except (AttributeError, TypeError):
@@ -903,32 +1105,43 @@ def test_gpr_rbf_fitted_return_std_dot_product_true(self):
# return_cov=False, return_std=False
options = {GaussianProcessRegressor: {"return_std": True}}
model_onnx = to_onnx(
- gp, initial_types=[('X', DoubleTensorType([None, None]))],
- options=options, target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", DoubleTensorType([None, None]))],
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test.astype(np.float64), gp, model_onnx,
+ X_test.astype(np.float64),
+ gp,
+ model_onnx,
basename="SklearnGaussianProcessDotProductStdDouble-Out0-Dec3",
- disable_optimisation=True)
- self.check_outputs(gp, model_onnx, X_test.astype(np.float64),
- predict_attributes=options[
- GaussianProcessRegressor],
- decimal=3, disable_optimisation=True)
+ disable_optimisation=True,
+ )
+ self.check_outputs(
+ gp,
+ model_onnx,
+ X_test.astype(np.float64),
+ predict_attributes=options[GaussianProcessRegressor],
+ decimal=3,
+ disable_optimisation=True,
+ )
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
- @unittest.skipIf(
- TARGET_OPSET >= 12, reason="TARGET_OPSET < 12")
+ reason="onnxruntime %s" % THRESHOLD,
+ )
+ @unittest.skipIf(TARGET_OPSET >= 12, reason="TARGET_OPSET < 12")
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_gpr_rbf_fitted_return_std_rational_quadratic_true(self):
-
X, y = make_regression(n_features=2, n_informative=2, random_state=2)
X_train, X_test, y_train, _ = train_test_split(X, y)
- gp = GaussianProcessRegressor(kernel=RationalQuadratic(),
- alpha=1e-3,
- n_restarts_optimizer=25,
- normalize_y=True)
+ gp = GaussianProcessRegressor(
+ kernel=RationalQuadratic(),
+ alpha=1e-3,
+ n_restarts_optimizer=25,
+ normalize_y=True,
+ )
try:
gp.fit(X_train, y_train)
except (AttributeError, TypeError):
@@ -939,21 +1152,31 @@ def test_gpr_rbf_fitted_return_std_rational_quadratic_true(self):
# return_cov=False, return_std=False
options = {GaussianProcessRegressor: {"return_std": True}}
model_onnx = to_onnx(
- gp, initial_types=[('X', DoubleTensorType([None, None]))],
- options=options, target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", DoubleTensorType([None, None]))],
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test.astype(np.float64), gp, model_onnx,
+ X_test.astype(np.float64),
+ gp,
+ model_onnx,
basename="SklearnGaussianProcessRationalQuadraticStdDouble-Out0",
- disable_optimisation=True)
- self.check_outputs(gp, model_onnx, X_test.astype(np.float64),
- predict_attributes=options[
- GaussianProcessRegressor],
- disable_optimisation=True)
+ disable_optimisation=True,
+ )
+ self.check_outputs(
+ gp,
+ model_onnx,
+ X_test.astype(np.float64),
+ predict_attributes=options[GaussianProcessRegressor],
+ disable_optimisation=True,
+ )
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_gpr_fitted_shapes(self):
data = load_iris()
@@ -964,26 +1187,31 @@ def test_gpr_fitted_shapes(self):
gp.fit(X_train, y_train)
model_onnx = to_onnx(
- gp, initial_types=[('X', FloatTensorType([None, None]))],
- target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", FloatTensorType([None, None]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
self.check_outputs(gp, model_onnx, X_test, {}, skip_if_float32=True)
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_gpr_fitted_partial_float64(self):
data = load_iris()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
- gp = GaussianProcessRegressor(kernel=DotProduct(), alpha=10.)
+ gp = GaussianProcessRegressor(kernel=DotProduct(), alpha=10.0)
gp.fit(X_train, y_train)
model_onnx = to_onnx(
- gp, initial_types=[('X', FloatTensorType([None, None]))],
- target_opset=_TARGET_OPSET_)
+ gp,
+ initial_types=[("X", FloatTensorType([None, None]))],
+ target_opset=_TARGET_OPSET_,
+ )
self.assertTrue(model_onnx is not None)
try:
self.check_outputs(gp, model_onnx, X_test.astype(np.float32), {})
@@ -991,39 +1219,46 @@ def test_gpr_fitted_partial_float64(self):
assert "Max relative difference:" in str(e)
model_onnx = to_onnx(
- gp, initial_types=[('X', DoubleTensorType([None, None]))],
- target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", DoubleTensorType([None, None]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
self.check_outputs(gp, model_onnx, X_test, {})
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD2),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_gpr_fitted_partial_float64_operator_cdist_rbf(self):
data = load_iris()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
- gp = GaussianProcessRegressor(kernel=RBF(), alpha=10.)
+ gp = GaussianProcessRegressor(kernel=RBF(), alpha=10.0)
gp.fit(X_train, y_train)
try:
to_onnx(
- gp, initial_types=[('X', FloatTensorType([None, None]))],
- options={GaussianProcessRegressor: {'optim': 'CDIST'}},
- target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", FloatTensorType([None, None]))],
+ options={GaussianProcessRegressor: {"optim": "CDIST"}},
+ target_opset=TARGET_OPSET,
+ )
raise AssertionError("CDIST is not implemented")
except ValueError:
pass
model_onnx = to_onnx(
- gp, initial_types=[('X', FloatTensorType([None, None]))],
- options={GaussianProcessRegressor: {'optim': 'cdist'}},
- target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", FloatTensorType([None, None]))],
+ options={GaussianProcessRegressor: {"optim": "cdist"}},
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- name_save = inspect.currentframe().f_code.co_name + '.onnx'
- with open(name_save, 'wb') as f:
+ name_save = inspect.currentframe().f_code.co_name + ".onnx"
+ with open(name_save, "wb") as f:
f.write(model_onnx.SerializeToString())
try:
self.check_outputs(gp, model_onnx, X_test.astype(np.float32), {})
@@ -1034,39 +1269,46 @@ def test_gpr_fitted_partial_float64_operator_cdist_rbf(self):
assert "Max relative difference:" in str(e)
model_onnx = to_onnx(
- gp, initial_types=[('X', DoubleTensorType([None, None]))],
- target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", DoubleTensorType([None, None]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
self.check_outputs(gp, model_onnx, X_test, {})
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD2),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_gpr_fitted_partial_float64_operator_cdist_sine(self):
data = load_iris()
X = data.data[:, :2]
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
- gp = GaussianProcessRegressor(kernel=ExpSineSquared(), alpha=100.)
+ gp = GaussianProcessRegressor(kernel=ExpSineSquared(), alpha=100.0)
gp.fit(X_train, y_train)
try:
to_onnx(
- gp, initial_types=[('X', FloatTensorType([None, None]))],
- options={GaussianProcessRegressor: {'optim': 'CDIST'}},
- target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", FloatTensorType([None, None]))],
+ options={GaussianProcessRegressor: {"optim": "CDIST"}},
+ target_opset=TARGET_OPSET,
+ )
raise AssertionError("CDIST is not implemented")
except ValueError:
pass
model_onnx = to_onnx(
- gp, initial_types=[('X', FloatTensorType([None, None]))],
- options={GaussianProcessRegressor: {'optim': 'cdist'}},
- target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", FloatTensorType([None, None]))],
+ options={GaussianProcessRegressor: {"optim": "cdist"}},
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- name_save = inspect.currentframe().f_code.co_name + '.onnx'
- with open(name_save, 'wb') as f:
+ name_save = inspect.currentframe().f_code.co_name + ".onnx"
+ with open(name_save, "wb") as f:
f.write(model_onnx.SerializeToString())
try:
self.check_outputs(gp, model_onnx, X_test.astype(np.float32), {})
@@ -1077,39 +1319,46 @@ def test_gpr_fitted_partial_float64_operator_cdist_sine(self):
assert "Max relative difference:" in str(e)
model_onnx = to_onnx(
- gp, initial_types=[('X', DoubleTensorType([None, None]))],
- target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", DoubleTensorType([None, None]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
self.check_outputs(gp, model_onnx, X_test, {})
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version(THRESHOLD2),
- reason="onnxruntime %s" % THRESHOLD)
+ reason="onnxruntime %s" % THRESHOLD,
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_gpr_fitted_partial_float64_operator_cdist_quad(self):
data = load_iris()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
- gp = GaussianProcessRegressor(kernel=RationalQuadratic(), alpha=100.)
+ gp = GaussianProcessRegressor(kernel=RationalQuadratic(), alpha=100.0)
gp.fit(X_train, y_train)
try:
to_onnx(
- gp, initial_types=[('X', FloatTensorType([None, None]))],
- options={GaussianProcessRegressor: {'optim': 'CDIST'}},
- target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", FloatTensorType([None, None]))],
+ options={GaussianProcessRegressor: {"optim": "CDIST"}},
+ target_opset=TARGET_OPSET,
+ )
raise AssertionError("CDIST is not implemented")
except ValueError:
pass
model_onnx = to_onnx(
- gp, initial_types=[('X', FloatTensorType([None, None]))],
- options={GaussianProcessRegressor: {'optim': 'cdist'}},
- target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", FloatTensorType([None, None]))],
+ options={GaussianProcessRegressor: {"optim": "cdist"}},
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- name_save = inspect.currentframe().f_code.co_name + '.onnx'
- with open(name_save, 'wb') as f:
+ name_save = inspect.currentframe().f_code.co_name + ".onnx"
+ with open(name_save, "wb") as f:
f.write(model_onnx.SerializeToString())
try:
self.check_outputs(gp, model_onnx, X_test.astype(np.float32), {})
@@ -1120,8 +1369,10 @@ def test_gpr_fitted_partial_float64_operator_cdist_quad(self):
assert "Max relative difference:" in str(e)
model_onnx = to_onnx(
- gp, initial_types=[('X', DoubleTensorType([None, None]))],
- target_opset=TARGET_OPSET)
+ gp,
+ initial_types=[("X", DoubleTensorType([None, None]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
self.check_outputs(gp, model_onnx, X_test, {})
@@ -1132,15 +1383,15 @@ def test_x_issue_789(self):
model = GaussianProcessRegressor()
pipe = make_pipeline(MinMaxScaler(feature_range=(-1, 1)), model)
pipe.fit(tx1, ty1)
- initial_type = [('data_in', DoubleTensorType([None, X.shape[1]]))]
- onx = to_onnx(pipe, initial_types=initial_type,
- target_opset=_TARGET_OPSET_)
+ initial_type = [("data_in", DoubleTensorType([None, X.shape[1]]))]
+ onx = to_onnx(pipe, initial_types=initial_type, target_opset=_TARGET_OPSET_)
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- pred = sess.run(None, {'data_in': vx1.astype(np.float64)})
- assert_almost_equal(pipe.predict(vx1.astype(np.float64)).ravel(),
- pred[0].ravel())
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ pred = sess.run(None, {"data_in": vx1.astype(np.float64)})
+ assert_almost_equal(
+ pipe.predict(vx1.astype(np.float64)).ravel(), pred[0].ravel()
+ )
def test_x_issue_789_cdist(self):
n_samples, n_features = 10000, 10
@@ -1149,89 +1400,98 @@ def test_x_issue_789_cdist(self):
model = GaussianProcessRegressor()
pipe = make_pipeline(MinMaxScaler(feature_range=(-1, 1)), model)
pipe.fit(tx1, ty1)
- initial_type = [('data_in', DoubleTensorType([None, X.shape[1]]))]
- onx = to_onnx(pipe, initial_types=initial_type,
- target_opset=_TARGET_OPSET_,
- options={GaussianProcessRegressor: {'optim': 'cdist'}})
+ initial_type = [("data_in", DoubleTensorType([None, X.shape[1]]))]
+ onx = to_onnx(
+ pipe,
+ initial_types=initial_type,
+ target_opset=_TARGET_OPSET_,
+ options={GaussianProcessRegressor: {"optim": "cdist"}},
+ )
self.assertIn('op_type: "CDist"', str(onx))
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- pred = sess.run(None, {'data_in': vx1.astype(np.float64)})
- assert_almost_equal(pipe.predict(vx1.astype(np.float64)).ravel(),
- pred[0].ravel())
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ pred = sess.run(None, {"data_in": vx1.astype(np.float64)})
+ assert_almost_equal(
+ pipe.predict(vx1.astype(np.float64)).ravel(), pred[0].ravel()
+ )
def test_white_kernel_float(self):
X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
tx1, vx1, ty1, vy1 = train_test_split(X, y)
kernel = DotProduct() + WhiteKernel(noise_level=0.5)
- gpr = GaussianProcessRegressor(
- kernel=kernel, random_state=0).fit(tx1, ty1)
- initial_type = [('data_in', FloatTensorType([None, X.shape[1]]))]
- onx = to_onnx(gpr, initial_types=initial_type,
- target_opset=_TARGET_OPSET_)
+ gpr = GaussianProcessRegressor(kernel=kernel, random_state=0).fit(tx1, ty1)
+ initial_type = [("data_in", FloatTensorType([None, X.shape[1]]))]
+ onx = to_onnx(gpr, initial_types=initial_type, target_opset=_TARGET_OPSET_)
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- pred = sess.run(None, {'data_in': vx1.astype(np.float32)})
- assert_almost_equal(gpr.predict(vx1.astype(np.float32)).shape[0],
- pred[0].shape[0])
- assert_allclose(gpr.predict(vx1.astype(np.float32)).ravel(),
- pred[0].ravel(), rtol=1e-3)
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ pred = sess.run(None, {"data_in": vx1.astype(np.float32)})
+ assert_almost_equal(
+ gpr.predict(vx1.astype(np.float32)).shape[0], pred[0].shape[0]
+ )
+ assert_allclose(
+ gpr.predict(vx1.astype(np.float32)).ravel(), pred[0].ravel(), rtol=1e-3
+ )
def test_white_kernel_double(self):
X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
tx1, vx1, ty1, vy1 = train_test_split(X, y)
kernel = DotProduct() + WhiteKernel(noise_level=0.5)
- gpr = GaussianProcessRegressor(
- kernel=kernel, random_state=0).fit(tx1, ty1)
- initial_type = [('data_in', DoubleTensorType([None, X.shape[1]]))]
- onx = to_onnx(gpr, initial_types=initial_type,
- target_opset=_TARGET_OPSET_)
+ gpr = GaussianProcessRegressor(kernel=kernel, random_state=0).fit(tx1, ty1)
+ initial_type = [("data_in", DoubleTensorType([None, X.shape[1]]))]
+ onx = to_onnx(gpr, initial_types=initial_type, target_opset=_TARGET_OPSET_)
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- pred = sess.run(None, {'data_in': vx1.astype(np.float64)})
- assert_almost_equal(gpr.predict(vx1.astype(np.float64)).ravel(),
- pred[0].ravel())
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ pred = sess.run(None, {"data_in": vx1.astype(np.float64)})
+ assert_almost_equal(
+ gpr.predict(vx1.astype(np.float64)).ravel(), pred[0].ravel()
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_kernel_white_kernel(self):
ker = WhiteKernel()
# X, X
- onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32,
- op_version=_TARGET_OPSET_)
+ onx = convert_kernel(
+ ker, "X", output_names=["Y"], dtype=np.float32, op_version=_TARGET_OPSET_
+ )
model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType([None, None]))],
- target_opset=TARGET_OPSET)
+ inputs=[("X", FloatTensorType([None, None]))], target_opset=TARGET_OPSET
+ )
with open("debug.onnx", "wb") as f:
f.write(model_onnx.SerializeToString())
x = np.random.randn(4, 3)
- x[0, 0] = x[1, 1] = x[2, 2] = 10.
- x[3, 2] = 5.
+ x[0, 0] = x[1, 1] = x[2, 2] = 10.0
+ x[3, 2] = 5.0
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': x.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": x.astype(np.float32)})[0]
m1 = res
m2 = ker(x)
assert_almost_equal(m2, m1, decimal=5)
# X, x
- onx = convert_kernel(ker, 'X', x_train=x,
- output_names=['Y'], dtype=np.float32,
- op_version=_TARGET_OPSET_)
+ onx = convert_kernel(
+ ker,
+ "X",
+ x_train=x,
+ output_names=["Y"],
+ dtype=np.float32,
+ op_version=_TARGET_OPSET_,
+ )
model_onnx = onx.to_onnx(
- inputs=[('X', FloatTensorType([None, None]))],
- target_opset=TARGET_OPSET)
+ inputs=[("X", FloatTensorType([None, None]))], target_opset=TARGET_OPSET
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': x.astype(np.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": x.astype(np.float32)})[0]
m1 = res
m2 = ker(x, x)
assert_almost_equal(m2, m1, decimal=5)
diff --git a/tests/test_sklearn_glm_classifier_converter.py b/tests/test_sklearn_glm_classifier_converter.py
index fc3f79f0c..c34ffd3c6 100644
--- a/tests/test_sklearn_glm_classifier_converter.py
+++ b/tests/test_sklearn_glm_classifier_converter.py
@@ -9,6 +9,7 @@
from sklearn.svm import LinearSVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.exceptions import ConvergenceWarning
+
try:
# scikit-learn >= 0.22
from sklearn.utils._testing import ignore_warnings
@@ -27,49 +28,55 @@
fit_classification_model,
fit_multilabel_classification_model,
TARGET_OPSET,
- InferenceSessionEx as InferenceSession)
+ InferenceSessionEx as InferenceSession,
+)
-ort_version = ort_version.split('+')[0]
+ort_version = ort_version.split("+")[0]
def _sklearn_version():
# Remove development version 0.22.dev0 becomes 0.22.
- v = ".".join(sklearn.__version__.split('.')[:2])
+ v = ".".join(sklearn.__version__.split(".")[:2])
return pv.Version(v)
class TestGLMClassifierConverter(unittest.TestCase):
-
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_logistic_regression_binary_class_boolean(self):
- X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]],
- dtype=np.float32)
+ X = np.array(
+ [[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], dtype=np.float32
+ )
y = np.array([True, True, True, False, False, False])
model = linear_model.LogisticRegression(max_iter=100).fit(X, y)
model_onnx = convert_sklearn(
- model, "linear model",
+ model,
+ "linear model",
[("input", FloatTensorType([None, X.shape[1]]))],
- options={id(model): {'zipmap': False}},
- target_opset=TARGET_OPSET)
+ options={id(model): {"zipmap": False}},
+ target_opset=TARGET_OPSET,
+ )
self.assertIn('name: "classlabels_ints"', str(model_onnx))
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLogitisticRegressionBinaryBoolean")
+ X, model, model_onnx, basename="SklearnLogitisticRegressionBinaryBoolean"
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_logistic_regression_binary_class(self):
model, X = fit_classification_model(
- linear_model.LogisticRegression(max_iter=100), 2)
+ linear_model.LogisticRegression(max_iter=100), 2
+ )
model_onnx = convert_sklearn(
- model, "logistic regression",
+ model,
+ "logistic regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLogitisticRegressionBinary")
+ X, model, model_onnx, basename="SklearnLogitisticRegressionBinary"
+ )
if pv.Version(ort_version) >= pv.Version("1.0.0"):
sess = InferenceSession(model_onnx.SerializeToString())
out = sess.get_outputs()
@@ -81,20 +88,23 @@ def test_model_logistic_regression_binary_class(self):
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_logistic_regression_binary_class_blacklist(self):
model, X = fit_classification_model(
- linear_model.LogisticRegression(max_iter=100), 2)
+ linear_model.LogisticRegression(max_iter=100), 2
+ )
model_onnx = convert_sklearn(
- model, "logistic regression",
+ model,
+ "logistic regression",
[("input", FloatTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
- black_op={'LinearClassifier'})
- self.assertNotIn('LinearClassifier', str(model_onnx))
+ black_op={"LinearClassifier"},
+ )
+ self.assertNotIn("LinearClassifier", str(model_onnx))
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLogitisticRegressionBinaryBlackList")
+ X, model, model_onnx, basename="SklearnLogitisticRegressionBinaryBlackList"
+ )
if pv.Version(ort_version) >= pv.Version("1.0.0"):
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
out = sess.get_outputs()
lb = out[0].type
sh = out[0].shape
@@ -104,16 +114,18 @@ def test_model_logistic_regression_binary_class_blacklist(self):
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_logistic_regression_binary_class_string(self):
model, X = fit_classification_model(
- linear_model.LogisticRegression(max_iter=100), 2,
- label_string=True)
+ linear_model.LogisticRegression(max_iter=100), 2, label_string=True
+ )
model_onnx = convert_sklearn(
- model, "logistic regression",
+ model,
+ "logistic regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLogitisticRegressionBinary")
+ X, model, model_onnx, basename="SklearnLogitisticRegressionBinary"
+ )
if pv.Version(ort_version) >= pv.Version("1.0.0"):
sess = InferenceSession(model_onnx.SerializeToString())
out = sess.get_outputs()
@@ -125,28 +137,34 @@ def test_model_logistic_regression_binary_class_string(self):
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_logistic_regression_int(self):
model, X = fit_classification_model(
- linear_model.LogisticRegression(max_iter=100), 3, is_int=True)
+ linear_model.LogisticRegression(max_iter=100), 3, is_int=True
+ )
model_onnx = convert_sklearn(
- model, "logistic regression",
+ model,
+ "logistic regression",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLogitisticRegressionInt")
+ X, model, model_onnx, basename="SklearnLogitisticRegressionInt"
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_logistic_regression_bool(self):
model, X = fit_classification_model(
- linear_model.LogisticRegression(max_iter=100), 3, is_bool=True)
+ linear_model.LogisticRegression(max_iter=100), 3, is_bool=True
+ )
model_onnx = convert_sklearn(
- model, "logistic regression",
+ model,
+ "logistic regression",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLogitisticRegressionBool")
+ X, model, model_onnx, basename="SklearnLogitisticRegressionBool"
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_logistic_linear_discriminant_analysis(self):
@@ -155,13 +173,18 @@ def test_model_logistic_linear_discriminant_analysis(self):
X_test = np.array([[-0.8, -1], [-2, -1]], dtype=np.float32)
model = LinearDiscriminantAnalysis(n_components=1).fit(X, y)
model_onnx = convert_sklearn(
- model, "linear model",
+ model,
+ "linear model",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnLinearDiscriminantAnalysisBin-Dec3")
+ X_test,
+ model,
+ model_onnx,
+ basename="SklearnLinearDiscriminantAnalysisBin-Dec3",
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_logistic_linear_discriminant_analysis_decfunc(self):
@@ -170,15 +193,20 @@ def test_model_logistic_linear_discriminant_analysis_decfunc(self):
X_test = np.array([[-0.8, -1], [0, 1]], dtype=np.float32)
model = LinearDiscriminantAnalysis().fit(X, y)
model_onnx = convert_sklearn(
- model, "linear model",
+ model,
+ "linear model",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- options={id(model): {'raw_scores': True}},
- target_opset=TARGET_OPSET)
+ options={id(model): {"raw_scores": True}},
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X_test, model, model_onnx,
+ X_test,
+ model,
+ model_onnx,
basename="SklearnLinearDiscriminantAnalysisBinRawScore-Out0",
- methods=['predict', 'decision_function'])
+ methods=["predict", "decision_function"],
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_logistic_linear_discriminant_analysis_decfunc3(self):
@@ -187,303 +215,366 @@ def test_model_logistic_linear_discriminant_analysis_decfunc3(self):
X_test = np.array([[-0.8, -1], [0, 1]], dtype=np.float32)
model = LinearDiscriminantAnalysis().fit(X, y)
model_onnx = convert_sklearn(
- model, "linear model",
+ model,
+ "linear model",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- options={id(model): {'raw_scores': True}},
- target_opset=TARGET_OPSET)
+ options={id(model): {"raw_scores": True}},
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X_test, model, model_onnx,
+ X_test,
+ model,
+ model_onnx,
basename="SklearnLinearDiscriminantAnalysisBinRawScore3-Out0",
- methods=['predict', 'decision_function'])
+ methods=["predict", "decision_function"],
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_logistic_regression_cv_binary_class(self):
model, X = fit_classification_model(
- linear_model.LogisticRegressionCV(max_iter=100), 2)
+ linear_model.LogisticRegressionCV(max_iter=100), 2
+ )
model_onnx = convert_sklearn(
- model, "logistic regression cv",
+ model,
+ "logistic regression cv",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLogitisticCVRegressionBinary")
+ X, model, model_onnx, basename="SklearnLogitisticCVRegressionBinary"
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_logistic_regression_cv_int(self):
try:
model, X = fit_classification_model(
- linear_model.LogisticRegressionCV(max_iter=100),
- 7, is_int=True)
+ linear_model.LogisticRegressionCV(max_iter=100), 7, is_int=True
+ )
except AttributeError:
# AttributeError: 'str' object has no attribute 'decode'
# Bug fixed in scikit-learn 0.24 due to a warning using encoding.
return
model_onnx = convert_sklearn(
- model, "logistic regression cv",
+ model,
+ "logistic regression cv",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLogitisticRegressionCVInt")
+ X, model, model_onnx, basename="SklearnLogitisticRegressionCVInt"
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_logistic_regression_cv_bool(self):
model, X = fit_classification_model(
- linear_model.LogisticRegressionCV(max_iter=100), 3, is_bool=True)
+ linear_model.LogisticRegressionCV(max_iter=100), 3, is_bool=True
+ )
model_onnx = convert_sklearn(
- model, "logistic regression cv",
+ model,
+ "logistic regression cv",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLogitisticRegressionCVBool")
+ X, model, model_onnx, basename="SklearnLogitisticRegressionCVBool"
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_logistic_regression_binary_class_nointercept(self):
model, X = fit_classification_model(
- linear_model.LogisticRegression(
- fit_intercept=False, max_iter=10000), 2)
+ linear_model.LogisticRegression(fit_intercept=False, max_iter=10000), 2
+ )
model_onnx = convert_sklearn(
- model, "logistic regression",
+ model,
+ "logistic regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLogitisticRegressionBinaryNoIntercept")
+ X,
+ model,
+ model_onnx,
+ basename="SklearnLogitisticRegressionBinaryNoIntercept",
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_logistic_regression_multi_class(self):
model, X = fit_classification_model(
- linear_model.LogisticRegression(max_iter=10000), 4)
+ linear_model.LogisticRegression(max_iter=10000), 4
+ )
model_onnx = convert_sklearn(
- model, "multi-class logistic regression",
+ model,
+ "multi-class logistic regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLogitisticRegressionMulti")
+ X, model, model_onnx, basename="SklearnLogitisticRegressionMulti"
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_logistic_regression_multi_class_nocl(self):
model, X = fit_classification_model(
- linear_model.LogisticRegression(max_iter=10000), 4,
- label_string=True)
+ linear_model.LogisticRegression(max_iter=10000), 4, label_string=True
+ )
model_onnx = convert_sklearn(
- model, "multi-class logistic regression",
+ model,
+ "multi-class logistic regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- options={id(model): {'nocl': True}},
- target_opset=TARGET_OPSET)
+ options={id(model): {"nocl": True}},
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
sonx = str(model_onnx)
- assert 'classlabels_strings' not in sonx
- assert 'cl0' not in sonx
+ assert "classlabels_strings" not in sonx
+ assert "cl0" not in sonx
dump_data_and_model(
- X, model, model_onnx, classes=model.classes_,
- basename="SklearnLogitisticRegressionMultiNoCl")
+ X,
+ model,
+ model_onnx,
+ classes=model.classes_,
+ basename="SklearnLogitisticRegressionMultiNoCl",
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_logistic_regression_multi_class_ovr(self):
model, X = fit_classification_model(
- linear_model.LogisticRegression(
- multi_class='ovr', max_iter=10000), 3)
+ linear_model.LogisticRegression(multi_class="ovr", max_iter=10000), 3
+ )
model_onnx = convert_sklearn(
- model, "multi-class logistic regression",
+ model,
+ "multi-class logistic regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLogitisticRegressionMulti")
+ X, model, model_onnx, basename="SklearnLogitisticRegressionMulti"
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_logistic_regression_multi_class_multinomial(self):
model, X = fit_classification_model(
linear_model.LogisticRegression(
- multi_class="multinomial", solver="lbfgs",
- max_iter=10000), 4)
+ multi_class="multinomial", solver="lbfgs", max_iter=10000
+ ),
+ 4,
+ )
model_onnx = convert_sklearn(
- model, "multi-class logistic regression",
+ model,
+ "multi-class logistic regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLogitisticRegressionMulti")
+ X, model, model_onnx, basename="SklearnLogitisticRegressionMulti"
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_logistic_regression_multi_class_no_intercept(self):
model, X = fit_classification_model(
- linear_model.LogisticRegression(
- fit_intercept=False, max_iter=10000), 3)
+ linear_model.LogisticRegression(fit_intercept=False, max_iter=10000), 3
+ )
model_onnx = convert_sklearn(
- model, "multi-class logistic regression",
+ model,
+ "multi-class logistic regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLogitisticRegressionMultiNoIntercept")
+ X, model, model_onnx, basename="SklearnLogitisticRegressionMultiNoIntercept"
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_logistic_regression_multi_class_lbfgs(self):
- penalty = (
- 'l2' if _sklearn_version() < pv.Version('0.21.0')
- else 'none')
+ penalty = "l2" if _sklearn_version() < pv.Version("0.21.0") else "none"
model, X = fit_classification_model(
linear_model.LogisticRegression(
- solver='lbfgs', penalty=penalty, max_iter=10000), 5)
+ solver="lbfgs", penalty=penalty, max_iter=10000
+ ),
+ 5,
+ )
model_onnx = convert_sklearn(
- model, "multi-class logistic regression",
+ model,
+ "multi-class logistic regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLogitisticRegressionMultiLbfgs")
+ X, model, model_onnx, basename="SklearnLogitisticRegressionMultiLbfgs"
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_logistic_regression_multi_class_liblinear_l1(self):
model, X = fit_classification_model(
linear_model.LogisticRegression(
- solver='liblinear', penalty='l1', max_iter=10000), 4)
+ solver="liblinear", penalty="l1", max_iter=10000
+ ),
+ 4,
+ )
model_onnx = convert_sklearn(
- model, "multi-class logistic regression",
+ model,
+ "multi-class logistic regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLogitisticRegressionMultiLiblinearL1")
+ X, model, model_onnx, basename="SklearnLogitisticRegressionMultiLiblinearL1"
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_logistic_regression_multi_class_saga_elasticnet(self):
- if _sklearn_version() < pv.Version('0.21.0'):
+ if _sklearn_version() < pv.Version("0.21.0"):
model, X = fit_classification_model(
- linear_model.LogisticRegression(
- solver='saga', max_iter=10000), 3)
+ linear_model.LogisticRegression(solver="saga", max_iter=10000), 3
+ )
else:
model, X = fit_classification_model(
linear_model.LogisticRegression(
- solver='saga', penalty='elasticnet', l1_ratio=0.1,
- max_iter=10000), 3)
+ solver="saga", penalty="elasticnet", l1_ratio=0.1, max_iter=10000
+ ),
+ 3,
+ )
model_onnx = convert_sklearn(
- model, "multi-class logistic regression",
+ model,
+ "multi-class logistic regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLogitisticRegressionMultiSagaElasticnet")
+ X,
+ model,
+ model_onnx,
+ basename="SklearnLogitisticRegressionMultiSagaElasticnet",
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_linear_svc_binary_class(self):
model, X = fit_classification_model(LinearSVC(max_iter=10000), 2)
model_onnx = convert_sklearn(
- model, "linear SVC",
+ model,
+ "linear SVC",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLinearSVCBinary-NoProb")
+ X, model, model_onnx, basename="SklearnLinearSVCBinary-NoProb"
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_linear_svc_multi_class(self):
model, X = fit_classification_model(LinearSVC(max_iter=100), 5)
model_onnx = convert_sklearn(
- model, "multi-class linear SVC",
+ model,
+ "multi-class linear SVC",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLinearSVCMulti")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnLinearSVCMulti")
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_linear_svc_int(self):
- model, X = fit_classification_model(
- LinearSVC(max_iter=100), 5, is_int=True)
+ model, X = fit_classification_model(LinearSVC(max_iter=100), 5, is_int=True)
model_onnx = convert_sklearn(
- model, "multi-class linear SVC",
+ model,
+ "multi-class linear SVC",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLinearSVCInt")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnLinearSVCInt")
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_linear_svc_bool(self):
- model, X = fit_classification_model(
- LinearSVC(max_iter=100), 5, is_bool=True)
+ model, X = fit_classification_model(LinearSVC(max_iter=100), 5, is_bool=True)
model_onnx = convert_sklearn(
- model, "multi-class linear SVC",
+ model,
+ "multi-class linear SVC",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLinearSVCBool")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnLinearSVCBool")
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_ridge_classifier_binary(self):
model, X = fit_classification_model(linear_model.RidgeClassifier(), 2)
model_onnx = convert_sklearn(
- model, "binary ridge classifier",
+ model,
+ "binary ridge classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnRidgeClassifierBin")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnRidgeClassifierBin")
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_ridge_classifier_binary_nozipmap(self):
model, X = fit_classification_model(
- linear_model.LogisticRegression(max_iter=10000), 2)
+ linear_model.LogisticRegression(max_iter=10000), 2
+ )
model_onnx = convert_sklearn(
- model, "binary ridge classifier",
+ model,
+ "binary ridge classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
- assert 'zipmap' in str(model_onnx).lower()
+ target_opset=TARGET_OPSET,
+ )
+ assert "zipmap" in str(model_onnx).lower()
- options = {id(model): {'zipmap': True}}
+ options = {id(model): {"zipmap": True}}
model_onnx = convert_sklearn(
- model, "binary ridge classifier",
+ model,
+ "binary ridge classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- options=options, target_opset=TARGET_OPSET)
- assert 'zipmap' in str(model_onnx).lower()
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
+ assert "zipmap" in str(model_onnx).lower()
- options = {id(model): {'zipmap': False}}
+ options = {id(model): {"zipmap": False}}
model_onnx = convert_sklearn(
- model, "binary ridge classifier",
+ model,
+ "binary ridge classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- options=options, target_opset=TARGET_OPSET)
- assert 'zipmap' not in str(model_onnx).lower()
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
+ assert "zipmap" not in str(model_onnx).lower()
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnRidgeClassifierNZMBin")
+ X, model, model_onnx, basename="SklearnRidgeClassifierNZMBin"
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_ridge_classifier_binary_mispelled_zipmap(self):
model, X = fit_classification_model(
- linear_model.LogisticRegression(max_iter=10000), 2)
+ linear_model.LogisticRegression(max_iter=10000), 2
+ )
- options = {id(model): {'zipmap ': True}}
+ options = {id(model): {"zipmap ": True}}
try:
convert_sklearn(
- model, "binary ridge classifier",
+ model,
+ "binary ridge classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- options=options, target_opset=TARGET_OPSET)
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
raise AssertionError("Expecting an error.")
except NameError as e:
assert "Option 'zipmap ' not in" in str(e)
@@ -491,14 +582,18 @@ def test_model_ridge_classifier_binary_mispelled_zipmap(self):
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_ridge_classifier_binary_mispelled_zipmap_wrong_value(self):
model, X = fit_classification_model(
- linear_model.LogisticRegression(max_iter=10000), 2)
+ linear_model.LogisticRegression(max_iter=10000), 2
+ )
- options = {id(model): {'zipmap': 'True'}}
+ options = {id(model): {"zipmap": "True"}}
try:
convert_sklearn(
- model, "binary ridge classifier",
+ model,
+ "binary ridge classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- options=options, target_opset=TARGET_OPSET)
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
raise AssertionError("Expecting an error.")
except ValueError as e:
assert "Unexpected value ['True'] for option 'zipmap'" in str(e)
@@ -507,142 +602,163 @@ def test_model_ridge_classifier_binary_mispelled_zipmap_wrong_value(self):
def test_model_ridge_classifier_multi_class(self):
model, X = fit_classification_model(linear_model.RidgeClassifier(), 5)
model_onnx = convert_sklearn(
- model, "multi-class ridge classifier",
+ model,
+ "multi-class ridge classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnRidgeClassifierMulti")
+ X, model, model_onnx, basename="SklearnRidgeClassifierMulti"
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_ridge_classifier_int(self):
model, X = fit_classification_model(
- linear_model.RidgeClassifier(), 5, is_int=True)
+ linear_model.RidgeClassifier(), 5, is_int=True
+ )
model_onnx = convert_sklearn(
- model, "multi-class ridge classifier",
+ model,
+ "multi-class ridge classifier",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnRidgeClassifierInt")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnRidgeClassifierInt")
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_ridge_classifier_bool(self):
model, X = fit_classification_model(
- linear_model.RidgeClassifier(), 4, is_bool=True)
+ linear_model.RidgeClassifier(), 4, is_bool=True
+ )
model_onnx = convert_sklearn(
- model, "multi-class ridge classifier",
+ model,
+ "multi-class ridge classifier",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnRidgeClassifierBool")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnRidgeClassifierBool")
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_ridge_classifier_cv_binary(self):
- model, X = fit_classification_model(
- linear_model.RidgeClassifierCV(), 2)
+ model, X = fit_classification_model(linear_model.RidgeClassifierCV(), 2)
model_onnx = convert_sklearn(
- model, "binary ridge classifier cv",
+ model,
+ "binary ridge classifier cv",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnRidgeClassifierCVBin")
+ X, model, model_onnx, basename="SklearnRidgeClassifierCVBin"
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_ridge_classifier_cv_int(self):
model, X = fit_classification_model(
- linear_model.RidgeClassifierCV(), 2, is_int=True)
+ linear_model.RidgeClassifierCV(), 2, is_int=True
+ )
model_onnx = convert_sklearn(
- model, "binary ridge classifier cv",
+ model,
+ "binary ridge classifier cv",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnRidgeClassifierCVInt")
+ X, model, model_onnx, basename="SklearnRidgeClassifierCVInt"
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_ridge_classifier_cv_bool(self):
model, X = fit_classification_model(
- linear_model.RidgeClassifierCV(), 2, is_bool=True)
+ linear_model.RidgeClassifierCV(), 2, is_bool=True
+ )
model_onnx = convert_sklearn(
- model, "binary ridge classifier cv",
+ model,
+ "binary ridge classifier cv",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnRidgeClassifierCVBool")
+ X, model, model_onnx, basename="SklearnRidgeClassifierCVBool"
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_ridge_classifier_cv_multi_class(self):
- model, X = fit_classification_model(
- linear_model.RidgeClassifierCV(), 5)
+ model, X = fit_classification_model(linear_model.RidgeClassifierCV(), 5)
model_onnx = convert_sklearn(
- model, "multi-class ridge classifier cv",
+ model,
+ "multi-class ridge classifier cv",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnRidgeClassifierCVMulti")
+ X, model, model_onnx, basename="SklearnRidgeClassifierCVMulti"
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_logistic_regression_binary_class_decision_function(self):
model, X = fit_classification_model(
- linear_model.LogisticRegression(max_iter=10000), 2)
+ linear_model.LogisticRegression(max_iter=10000), 2
+ )
model_onnx = convert_sklearn(
- model, "logistic regression",
+ model,
+ "logistic regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- options={linear_model.LogisticRegression: {'raw_scores': True}},
- target_opset=TARGET_OPSET)
+ options={linear_model.LogisticRegression: {"raw_scores": True}},
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X[:5], model, model_onnx,
+ X[:5],
+ model,
+ model_onnx,
basename="SklearnLogitisticRegressionBinaryRawScore",
- methods=['predict', 'decision_function_binary'])
+ methods=["predict", "decision_function_binary"],
+ )
- @unittest.skip(
- reason="Scikit-learn doesn't return multi-label output.")
+ @unittest.skip(reason="Scikit-learn doesn't return multi-label output.")
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_ridge_classifier_cv_multilabel(self):
model, X_test = fit_multilabel_classification_model(
- linear_model.RidgeClassifierCV(random_state=42))
+ linear_model.RidgeClassifierCV(random_state=42)
+ )
model_onnx = convert_sklearn(
model,
"scikit-learn RidgeClassifierCV",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnRidgeClassifierCVMultiLabel")
+ X_test, model, model_onnx, basename="SklearnRidgeClassifierCVMultiLabel"
+ )
@unittest.skipIf(TARGET_OPSET < 11, reason="not available")
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_classifier_multi_zipmap_columns(self):
model, X = fit_classification_model(
- linear_model.LogisticRegression(), 3,
- n_features=4, label_string=True)
+ linear_model.LogisticRegression(), 3, n_features=4, label_string=True
+ )
model_onnx = convert_sklearn(
- model, "multi-class ridge classifier",
+ model,
+ "multi-class ridge classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- options={linear_model.LogisticRegression: {'zipmap': 'columns'}},
- target_opset=TARGET_OPSET)
+ options={linear_model.LogisticRegression: {"zipmap": "columns"}},
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
sess = InferenceSession(model_onnx.SerializeToString())
if sess is None:
return
names = [_.name for _ in sess.get_outputs()]
- self.assertEqual(['output_label', 'scl0', 'scl1', 'scl2'], names)
+ self.assertEqual(["output_label", "scl0", "scl1", "scl2"], names)
xt = X[:10].astype(np.float32)
- got = sess.run(None, {'input': xt})
+ got = sess.run(None, {"input": xt})
prob = model.predict_proba(xt)
for i in range(prob.shape[1]):
assert_almost_equal(prob[:, i], got[i + 1])
@@ -651,21 +767,23 @@ def test_model_classifier_multi_zipmap_columns(self):
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_classifier_multi_class_string_zipmap_columns(self):
model, X = fit_classification_model(
- linear_model.LogisticRegression(), 3,
- n_features=4, label_string=False)
+ linear_model.LogisticRegression(), 3, n_features=4, label_string=False
+ )
model_onnx = convert_sklearn(
- model, "multi-class ridge classifier",
+ model,
+ "multi-class ridge classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- options={linear_model.LogisticRegression: {'zipmap': 'columns'}},
- target_opset=TARGET_OPSET)
+ options={linear_model.LogisticRegression: {"zipmap": "columns"}},
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
sess = InferenceSession(model_onnx.SerializeToString())
if sess is None:
return
names = [_.name for _ in sess.get_outputs()]
- self.assertEqual(['output_label', 'i0', 'i1', 'i2'], names)
+ self.assertEqual(["output_label", "i0", "i1", "i2"], names)
xt = X[:10].astype(np.float32)
- got = sess.run(None, {'input': xt})
+ got = sess.run(None, {"input": xt})
prob = model.predict_proba(xt)
for i in range(prob.shape[1]):
assert_almost_equal(prob[:, i], got[i + 1])
diff --git a/tests/test_sklearn_glm_regressor_converter.py b/tests/test_sklearn_glm_regressor_converter.py
index 1065b3acf..c23ffaca5 100644
--- a/tests/test_sklearn_glm_regressor_converter.py
+++ b/tests/test_sklearn_glm_regressor_converter.py
@@ -6,6 +6,7 @@
import packaging.version as pv
import numpy
from numpy.testing import assert_almost_equal
+
try:
# scikit-learn >= 0.22
from sklearn.utils._testing import ignore_warnings
@@ -18,6 +19,7 @@
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import LinearSVR
+
try:
from sklearn.linear_model import QuantileRegressor
except (ImportError, AttributeError):
@@ -42,11 +44,14 @@
)
from onnxruntime import __version__ as ort_version
from test_utils import (
- dump_data_and_model, fit_regression_model, TARGET_OPSET,
- InferenceSessionEx as InferenceSession)
+ dump_data_and_model,
+ fit_regression_model,
+ TARGET_OPSET,
+ InferenceSessionEx as InferenceSession,
+)
-ort_version = ort_version.split('+')[0]
+ort_version = ort_version.split("+")[0]
class TestGLMRegressorConverter(unittest.TestCase):
@@ -54,180 +59,207 @@ class TestGLMRegressorConverter(unittest.TestCase):
def test_model_linear_regression(self):
model, X = fit_regression_model(linear_model.LinearRegression())
model_onnx = convert_sklearn(
- model, "linear regression",
+ model,
+ "linear regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLinearRegression-Dec4")
+ X, model, model_onnx, basename="SklearnLinearRegression-Dec4"
+ )
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_linear_regression_blacklist(self):
model, X = fit_regression_model(linear_model.LinearRegression())
model_onnx = convert_sklearn(
- model, "linear regression",
+ model,
+ "linear regression",
[("input", FloatTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
- black_op={'LinearRegressor'})
- self.assertNotIn('LinearRegressor', str(model_onnx))
+ black_op={"LinearRegressor"},
+ )
+ self.assertNotIn("LinearRegressor", str(model_onnx))
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLinearRegressionBlackOp-Dec4")
+ X, model, model_onnx, basename="SklearnLinearRegressionBlackOp-Dec4"
+ )
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version("0.5.0"),
- reason="old onnxruntime does not support double")
+ reason="old onnxruntime does not support double",
+ )
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_linear_regression_multi(self):
- model, X = fit_regression_model(linear_model.LinearRegression(),
- n_targets=2)
+ model, X = fit_regression_model(linear_model.LinearRegression(), n_targets=2)
model_onnx = convert_sklearn(
- model, "linear regression",
+ model,
+ "linear regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLinearRegressionMulti-Dec4")
+ X, model, model_onnx, basename="SklearnLinearRegressionMulti-Dec4"
+ )
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version("0.5.0"),
- reason="old onnxruntime does not support double")
+ reason="old onnxruntime does not support double",
+ )
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_linear_regression64(self):
model, X = fit_regression_model(linear_model.LinearRegression())
- model_onnx = convert_sklearn(model, "linear regression",
- [("input", DoubleTensorType(X.shape))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "linear regression",
+ [("input", DoubleTensorType(X.shape))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
self.assertIn("elem_type: 11", str(model_onnx))
dump_data_and_model(
- X.astype(numpy.float64), model, model_onnx,
- basename="SklearnLinearRegression64-Dec4")
+ X.astype(numpy.float64),
+ model,
+ model_onnx,
+ basename="SklearnLinearRegression64-Dec4",
+ )
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version("0.5.0"),
- reason="old onnxruntime does not support double")
+ reason="old onnxruntime does not support double",
+ )
def test_model_linear_regression64_multiple(self):
- model, X = fit_regression_model(linear_model.LinearRegression(),
- n_targets=2)
- model_onnx = convert_sklearn(model, "linear regression",
- [("input", DoubleTensorType(X.shape))],
- target_opset=TARGET_OPSET)
+ model, X = fit_regression_model(linear_model.LinearRegression(), n_targets=2)
+ model_onnx = convert_sklearn(
+ model,
+ "linear regression",
+ [("input", DoubleTensorType(X.shape))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
self.assertIn("elem_type: 11", str(model_onnx))
dump_data_and_model(
- X.astype(numpy.float64), model, model_onnx,
- basename="SklearnLinearRegression64Multi-Dec4")
+ X.astype(numpy.float64),
+ model,
+ model_onnx,
+ basename="SklearnLinearRegression64Multi-Dec4",
+ )
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_linear_regression_int(self):
- model, X = fit_regression_model(
- linear_model.LinearRegression(), is_int=True)
+ model, X = fit_regression_model(linear_model.LinearRegression(), is_int=True)
model_onnx = convert_sklearn(
- model, "linear regression",
+ model,
+ "linear regression",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLinearRegressionInt-Dec4")
+ X, model, model_onnx, basename="SklearnLinearRegressionInt-Dec4"
+ )
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_linear_regression_nointercept(self):
model, X = fit_regression_model(
- linear_model.LinearRegression(fit_intercept=False))
+ linear_model.LinearRegression(fit_intercept=False)
+ )
model_onnx = convert_sklearn(
- model, "linear regression",
+ model,
+ "linear regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLinearRegressionNoIntercept-Dec4")
+ X, model, model_onnx, basename="SklearnLinearRegressionNoIntercept-Dec4"
+ )
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_linear_regression_bool(self):
- model, X = fit_regression_model(
- linear_model.LinearRegression(), is_bool=True)
+ model, X = fit_regression_model(linear_model.LinearRegression(), is_bool=True)
model_onnx = convert_sklearn(
- model, "linear regression",
+ model,
+ "linear regression",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLinearRegressionBool")
+ X, model, model_onnx, basename="SklearnLinearRegressionBool"
+ )
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_linear_svr(self):
model, X = fit_regression_model(LinearSVR())
model_onnx = convert_sklearn(
- model, "linear SVR",
+ model,
+ "linear SVR",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLinearSvr-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnLinearSvr-Dec4")
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_linear_svr_int(self):
model, X = fit_regression_model(LinearSVR(), is_int=True)
model_onnx = convert_sklearn(
- model, "linear SVR",
+ model,
+ "linear SVR",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLinearSvrInt-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnLinearSvrInt-Dec4")
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_linear_svr_bool(self):
model, X = fit_regression_model(LinearSVR(), is_bool=True)
model_onnx = convert_sklearn(
- model, "linear SVR",
+ model,
+ "linear SVR",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLinearSVRBool")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnLinearSVRBool")
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_ridge(self):
model, X = fit_regression_model(linear_model.Ridge())
model_onnx = convert_sklearn(
- model, "ridge regression",
+ model,
+ "ridge regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx, basename="SklearnRidge-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnRidge-Dec4")
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_ridge_int(self):
model, X = fit_regression_model(linear_model.Ridge(), is_int=True)
model_onnx = convert_sklearn(
- model, "ridge regression",
+ model,
+ "ridge regression",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx, basename="SklearnRidgeInt-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnRidgeInt-Dec4")
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_ridge_bool(self):
model, X = fit_regression_model(linear_model.Ridge(), is_bool=True)
model_onnx = convert_sklearn(
- model, "ridge regression",
+ model,
+ "ridge regression",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx, basename="SklearnRidgeBool")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnRidgeBool")
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_sgd_regressor(self):
@@ -236,269 +268,285 @@ def test_model_sgd_regressor(self):
model,
"scikit-learn SGD regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx, basename="SklearnSGDRegressor-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnSGDRegressor-Dec4")
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_sgd_regressor_int(self):
- model, X = fit_regression_model(
- linear_model.SGDRegressor(), is_int=True)
+ model, X = fit_regression_model(linear_model.SGDRegressor(), is_int=True)
model_onnx = convert_sklearn(
- model, "SGD regression",
+ model,
+ "SGD regression",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx, basename="SklearnSGDRegressorInt-Dec4")
+ X, model, model_onnx, basename="SklearnSGDRegressorInt-Dec4"
+ )
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_sgd_regressor_bool(self):
- model, X = fit_regression_model(
- linear_model.SGDRegressor(), is_bool=True)
+ model, X = fit_regression_model(linear_model.SGDRegressor(), is_bool=True)
model_onnx = convert_sklearn(
- model, "SGD regression",
+ model,
+ "SGD regression",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnSGDRegressorBool-Dec4")
+ X, model, model_onnx, basename="SklearnSGDRegressorBool-Dec4"
+ )
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_elastic_net_regressor(self):
model, X = fit_regression_model(linear_model.ElasticNet())
model_onnx = convert_sklearn(
- model, "scikit-learn elastic-net regression",
+ model,
+ "scikit-learn elastic-net regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnElasticNet-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnElasticNet-Dec4")
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_elastic_net_cv_regressor(self):
model, X = fit_regression_model(linear_model.ElasticNetCV())
model_onnx = convert_sklearn(
- model, "scikit-learn elastic-net regression",
+ model,
+ "scikit-learn elastic-net regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnElasticNetCV-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnElasticNetCV-Dec4")
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_elastic_net_regressor_int(self):
model, X = fit_regression_model(linear_model.ElasticNet(), is_int=True)
model_onnx = convert_sklearn(
- model, "elastic net regression",
+ model,
+ "elastic net regression",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnElasticNetRegressorInt-Dec4")
+ X, model, model_onnx, basename="SklearnElasticNetRegressorInt-Dec4"
+ )
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_elastic_net_regressor_bool(self):
- model, X = fit_regression_model(
- linear_model.ElasticNet(), is_bool=True)
+ model, X = fit_regression_model(linear_model.ElasticNet(), is_bool=True)
model_onnx = convert_sklearn(
- model, "elastic net regression",
+ model,
+ "elastic net regression",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnElasticNetRegressorBool")
+ X, model, model_onnx, basename="SklearnElasticNetRegressorBool"
+ )
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_lars(self):
model, X = fit_regression_model(linear_model.Lars())
model_onnx = convert_sklearn(
- model, "lars",
+ model,
+ "lars",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx, basename="SklearnLars-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnLars-Dec4")
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_lars_cv(self):
model, X = fit_regression_model(linear_model.LarsCV())
model_onnx = convert_sklearn(
- model, "lars",
+ model,
+ "lars",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLarsCV-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnLarsCV-Dec4")
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_lasso_lars(self):
model, X = fit_regression_model(linear_model.LassoLars(alpha=0.01))
model_onnx = convert_sklearn(
- model, "lasso lars",
+ model,
+ "lasso lars",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLassoLars-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnLassoLars-Dec4")
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_lasso_lars_cv(self):
model, X = fit_regression_model(linear_model.LassoLarsCV())
model_onnx = convert_sklearn(
- model, "lasso lars cv",
+ model,
+ "lasso lars cv",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLassoLarsCV-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnLassoLarsCV-Dec4")
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_lasso_lars_ic(self):
model, X = fit_regression_model(linear_model.LassoLarsIC())
model_onnx = convert_sklearn(
- model, "lasso lars cv",
+ model,
+ "lasso lars cv",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLassoLarsIC-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnLassoLarsIC-Dec4")
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_lasso_cv(self):
model, X = fit_regression_model(linear_model.LassoCV())
model_onnx = convert_sklearn(
- model, "lasso cv",
+ model,
+ "lasso cv",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLassoCV-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnLassoCV-Dec4")
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_lasso_lars_int(self):
model, X = fit_regression_model(linear_model.LassoLars(), is_int=True)
model_onnx = convert_sklearn(
- model, "lasso lars",
+ model,
+ "lasso lars",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLassoLarsInt-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnLassoLarsInt-Dec4")
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_lasso_lars_bool(self):
- model, X = fit_regression_model(
- linear_model.LassoLars(), is_bool=True)
+ model, X = fit_regression_model(linear_model.LassoLars(), is_bool=True)
model_onnx = convert_sklearn(
- model, "lasso lars",
+ model,
+ "lasso lars",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnLassoLarsBool")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnLassoLarsBool")
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_multi_linear_regression(self):
- model, X = fit_regression_model(linear_model.LinearRegression(),
- n_targets=2)
+ model, X = fit_regression_model(linear_model.LinearRegression(), n_targets=2)
model_onnx = convert_sklearn(
- model, "linear regression",
+ model,
+ "linear regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx, verbose=False,
- basename="SklearnMultiLinearRegression-Dec4")
+ X,
+ model,
+ model_onnx,
+ verbose=False,
+ basename="SklearnMultiLinearRegression-Dec4",
+ )
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_ard_regression(self):
- model, X = fit_regression_model(
- linear_model.ARDRegression(), factor=0.001)
+ model, X = fit_regression_model(linear_model.ARDRegression(), factor=0.001)
model_onnx = convert_sklearn(
- model, "ard regression",
+ model,
+ "ard regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnARDRegression-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnARDRegression-Dec4")
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_theilsen(self):
model, X = fit_regression_model(linear_model.TheilSenRegressor())
model_onnx = convert_sklearn(
- model, "thiel-sen regressor",
+ model,
+ "thiel-sen regressor",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnTheilSen-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnTheilSen-Dec4")
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_bayesian_ridge(self):
model, X = fit_regression_model(linear_model.BayesianRidge())
model_onnx = convert_sklearn(
- model, "bayesian ridge",
+ model,
+ "bayesian ridge",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnBayesianRidge-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnBayesianRidge-Dec4")
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_bayesian_ridge_return_std(self):
- model, X = fit_regression_model(linear_model.BayesianRidge(),
- n_features=2, n_samples=20)
+ model, X = fit_regression_model(
+ linear_model.BayesianRidge(), n_features=2, n_samples=20
+ )
model_onnx = convert_sklearn(
- model, "bayesian ridge",
+ model,
+ "bayesian ridge",
[("input", FloatTensorType([None, X.shape[1]]))],
- options={linear_model.BayesianRidge: {'return_std': True}},
- target_opset=TARGET_OPSET)
+ options={linear_model.BayesianRidge: {"return_std": True}},
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- outputs = sess.run(None, {'input': X})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ outputs = sess.run(None, {"input": X})
pred, std = model.predict(X, return_std=True)
assert_almost_equal(pred, outputs[0].ravel(), decimal=4)
assert_almost_equal(std, outputs[1].ravel(), decimal=4)
- @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.3.0"),
- reason="output type")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.3.0"), reason="output type"
+ )
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_bayesian_ridge_return_std_double(self):
- model, X = fit_regression_model(linear_model.BayesianRidge(),
- n_features=2, n_samples=100,
- n_informative=1)
+ model, X = fit_regression_model(
+ linear_model.BayesianRidge(), n_features=2, n_samples=100, n_informative=1
+ )
model_onnx = convert_sklearn(
- model, "bayesian ridge",
+ model,
+ "bayesian ridge",
[("input", DoubleTensorType([None, X.shape[1]]))],
- options={linear_model.BayesianRidge: {'return_std': True}},
- target_opset=TARGET_OPSET)
+ options={linear_model.BayesianRidge: {"return_std": True}},
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
X = X.astype(numpy.float64)
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- outputs = sess.run(None, {'input': X})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ outputs = sess.run(None, {"input": X})
pred, std = model.predict(X, return_std=True)
assert_almost_equal(pred, outputs[0].ravel())
assert_almost_equal(std, outputs[1].ravel(), decimal=4)
@@ -512,22 +560,25 @@ def test_model_bayesian_ridge_return_std_normalize(self):
return
model, X = fit_regression_model(model, n_features=2, n_samples=50)
model_onnx = convert_sklearn(
- model, "bayesian ridge",
+ model,
+ "bayesian ridge",
[("input", FloatTensorType([None, X.shape[1]]))],
- options={linear_model.BayesianRidge: {'return_std': True}},
- target_opset=TARGET_OPSET)
+ options={linear_model.BayesianRidge: {"return_std": True}},
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- outputs = sess.run(None, {'input': X})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ outputs = sess.run(None, {"input": X})
pred, std = model.predict(X, return_std=True)
assert_almost_equal(pred, outputs[0].ravel(), decimal=4)
assert_almost_equal(std, outputs[1].ravel(), decimal=4)
- @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.3.0"),
- reason="output type")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.3.0"), reason="output type"
+ )
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_bayesian_ridge_return_std_normalize_double(self):
try:
@@ -537,17 +588,19 @@ def test_model_bayesian_ridge_return_std_normalize_double(self):
return
model, X = fit_regression_model(model, n_features=2, n_samples=50)
model_onnx = convert_sklearn(
- model, "bayesian ridge",
+ model,
+ "bayesian ridge",
[("input", DoubleTensorType([None, X.shape[1]]))],
- options={linear_model.BayesianRidge: {'return_std': True}},
- target_opset=TARGET_OPSET)
+ options={linear_model.BayesianRidge: {"return_std": True}},
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
X = X.astype(numpy.float64)
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- outputs = sess.run(None, {'input': X})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ outputs = sess.run(None, {"input": X})
pred, std = model.predict(X, return_std=True)
assert_almost_equal(pred, outputs[0].ravel())
assert_almost_equal(std, outputs[1].ravel(), decimal=4)
@@ -556,246 +609,311 @@ def test_model_bayesian_ridge_return_std_normalize_double(self):
def test_model_huber_regressor(self):
model, X = fit_regression_model(linear_model.HuberRegressor())
model_onnx = convert_sklearn(
- model, "huber regressor",
+ model,
+ "huber regressor",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnHuberRegressor-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnHuberRegressor-Dec4")
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_multi_task_lasso(self):
- model, X = fit_regression_model(linear_model.MultiTaskLasso(),
- n_targets=2)
+ model, X = fit_regression_model(linear_model.MultiTaskLasso(), n_targets=2)
model_onnx = convert_sklearn(
- model, "multi-task lasso",
+ model,
+ "multi-task lasso",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx, verbose=False,
- basename="SklearnMultiTaskLasso-Dec4")
+ X, model, model_onnx, verbose=False, basename="SklearnMultiTaskLasso-Dec4"
+ )
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_multi_task_lasso_cv(self):
- model, X = fit_regression_model(linear_model.MultiTaskLassoCV(),
- n_targets=2)
+ model, X = fit_regression_model(linear_model.MultiTaskLassoCV(), n_targets=2)
model_onnx = convert_sklearn(
- model, "mutli-task lasso cv",
+ model,
+ "mutli-task lasso cv",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx, verbose=False,
- basename="SklearnMultiTaskLassoCV-Dec4")
+ X, model, model_onnx, verbose=False, basename="SklearnMultiTaskLassoCV-Dec4"
+ )
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_multi_task_elasticnet(self):
- model, X = fit_regression_model(linear_model.MultiTaskElasticNet(),
- n_targets=2)
+ model, X = fit_regression_model(linear_model.MultiTaskElasticNet(), n_targets=2)
model_onnx = convert_sklearn(
- model, "multi-task elasticnet",
+ model,
+ "multi-task elasticnet",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx, verbose=False,
- basename="SklearnMultiTaskElasticNet-Dec4")
+ X,
+ model,
+ model_onnx,
+ verbose=False,
+ basename="SklearnMultiTaskElasticNet-Dec4",
+ )
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_orthogonal_matching_pursuit(self):
- model, X = fit_regression_model(
- linear_model.OrthogonalMatchingPursuit())
+ model, X = fit_regression_model(linear_model.OrthogonalMatchingPursuit())
model_onnx = convert_sklearn(
- model, "orthogonal matching pursuit",
+ model,
+ "orthogonal matching pursuit",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx, verbose=False,
- basename="SklearnOrthogonalMatchingPursuit-Dec4")
+ X,
+ model,
+ model_onnx,
+ verbose=False,
+ basename="SklearnOrthogonalMatchingPursuit-Dec4",
+ )
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_passive_aggressive_regressor(self):
- model, X = fit_regression_model(
- linear_model.PassiveAggressiveRegressor())
+ model, X = fit_regression_model(linear_model.PassiveAggressiveRegressor())
model_onnx = convert_sklearn(
- model, "passive aggressive regressor",
+ model,
+ "passive aggressive regressor",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx, verbose=False,
- basename="SklearnPassiveAggressiveRegressor-Dec4")
+ X,
+ model,
+ model_onnx,
+ verbose=False,
+ basename="SklearnPassiveAggressiveRegressor-Dec4",
+ )
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_ransac_regressor_default(self):
- model, X = fit_regression_model(
- linear_model.RANSACRegressor())
+ model, X = fit_regression_model(linear_model.RANSACRegressor())
model_onnx = convert_sklearn(
- model, "ransac regressor",
+ model,
+ "ransac regressor",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx, verbose=False,
- basename="SklearnRANSACRegressor-Dec4")
+ X, model, model_onnx, verbose=False, basename="SklearnRANSACRegressor-Dec4"
+ )
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_ransac_regressor_mlp(self):
model, X = fit_regression_model(
linear_model.RANSACRegressor(
- MLPRegressor(solver='sgd', max_iter=20),
- min_samples=5))
+ MLPRegressor(solver="sgd", max_iter=20), min_samples=5
+ )
+ )
model_onnx = convert_sklearn(
- model, "ransac regressor",
+ model,
+ "ransac regressor",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx, verbose=False,
- basename="SklearnRANSACRegressorMLP-Dec3")
+ X,
+ model,
+ model_onnx,
+ verbose=False,
+ basename="SklearnRANSACRegressorMLP-Dec3",
+ )
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_ransac_regressor_tree(self):
model, X = fit_regression_model(
- linear_model.RANSACRegressor(
- GradientBoostingRegressor(),
- min_samples=5))
+ linear_model.RANSACRegressor(GradientBoostingRegressor(), min_samples=5)
+ )
model_onnx = convert_sklearn(
- model, "ransac regressor",
+ model,
+ "ransac regressor",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx, verbose=False,
- basename="SklearnRANSACRegressorTree-Dec3")
+ X,
+ model,
+ model_onnx,
+ verbose=False,
+ basename="SklearnRANSACRegressorTree-Dec3",
+ )
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
def test_model_multi_task_elasticnet_cv(self):
- model, X = fit_regression_model(linear_model.MultiTaskElasticNetCV(),
- n_targets=2)
+ model, X = fit_regression_model(
+ linear_model.MultiTaskElasticNetCV(), n_targets=2
+ )
model_onnx = convert_sklearn(
- model, "multi-task elasticnet cv",
+ model,
+ "multi-task elasticnet cv",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx, verbose=False,
- basename="SklearnMultiTaskElasticNetCV-Dec4")
+ X,
+ model,
+ model_onnx,
+ verbose=False,
+ basename="SklearnMultiTaskElasticNetCV-Dec4",
+ )
- @ignore_warnings(category=(FutureWarning, ConvergenceWarning,
- DeprecationWarning))
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
def test_model_orthogonal_matching_pursuit_cv(self):
- model, X = fit_regression_model(
- linear_model.OrthogonalMatchingPursuitCV())
+ model, X = fit_regression_model(linear_model.OrthogonalMatchingPursuitCV())
model_onnx = convert_sklearn(
- model, "orthogonal matching pursuit cv",
+ model,
+ "orthogonal matching pursuit cv",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx, verbose=False,
- basename="SklearnOrthogonalMatchingPursuitCV-Dec4")
+ X,
+ model,
+ model_onnx,
+ verbose=False,
+ basename="SklearnOrthogonalMatchingPursuitCV-Dec4",
+ )
- def check_model(self, model, X, name='input'):
+ def check_model(self, model, X, name="input"):
try:
sess = InferenceSession(
- model.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except Exception as e:
- raise AssertionError(
- "Unable to load model\n%s" % str(model)) from e
+ raise AssertionError("Unable to load model\n%s" % str(model)) from e
try:
return sess.run(None, {name: X[:7]})
except Exception as e:
raise AssertionError(
- "Unable to run model X.shape=%r X.dtype=%r\n%s" % (
- X[:7].shape, X.dtype, str(model))) from e
+ "Unable to run model X.shape=%r X.dtype=%r\n%s"
+ % (X[:7].shape, X.dtype, str(model))
+ ) from e
- @ignore_warnings(category=(FutureWarning, ConvergenceWarning,
- DeprecationWarning))
- @unittest.skipIf(PoissonRegressor is None,
- reason="scikit-learn too old")
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
+ @unittest.skipIf(PoissonRegressor is None, reason="scikit-learn too old")
def test_model_poisson_regressor(self):
X, y = make_regression(
- n_features=5, n_samples=100, n_targets=1, random_state=42,
- n_informative=3)
+ n_features=5, n_samples=100, n_targets=1, random_state=42, n_informative=3
+ )
y = numpy.abs(y)
y = y / y.max() + 1e-5
model = linear_model.PoissonRegressor().fit(X, y)
model_onnx = convert_sklearn(
- model, "linear regression",
+ model,
+ "linear regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.check_model(model_onnx, X.astype(numpy.float32))
dump_data_and_model(
- X.astype(numpy.float32), model, model_onnx,
- basename="SklearnPoissonRegressor-Dec4")
+ X.astype(numpy.float32),
+ model,
+ model_onnx,
+ basename="SklearnPoissonRegressor-Dec4",
+ )
model_onnx = convert_sklearn(
- model, "linear regression",
+ model,
+ "linear regression",
[("input", DoubleTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
- X.astype(numpy.float64), model, model_onnx,
- basename="SklearnPoissonRegressor64")
+ X.astype(numpy.float64),
+ model,
+ model_onnx,
+ basename="SklearnPoissonRegressor64",
+ )
- @ignore_warnings(category=(FutureWarning, ConvergenceWarning,
- DeprecationWarning))
- @unittest.skipIf(TweedieRegressor is None,
- reason="scikti-learn too old")
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
+ @unittest.skipIf(TweedieRegressor is None, reason="scikti-learn too old")
def test_model_tweedie_regressor(self):
X, y = make_regression(
- n_features=5, n_samples=100, n_targets=1, random_state=42,
- n_informative=3)
+ n_features=5, n_samples=100, n_targets=1, random_state=42, n_informative=3
+ )
y = numpy.abs(y)
y = y / y.max() + 1e-5
for power in range(0, 4):
with self.subTest(power=power):
model = linear_model.TweedieRegressor(power=power).fit(X, y)
model_onnx = convert_sklearn(
- model, "linear regression",
+ model,
+ "linear regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.check_model(model_onnx, X.astype(numpy.float32))
dump_data_and_model(
- X.astype(numpy.float32), model, model_onnx,
- basename="SklearnTweedieRegressor%d-Dec4" % power)
+ X.astype(numpy.float32),
+ model,
+ model_onnx,
+ basename="SklearnTweedieRegressor%d-Dec4" % power,
+ )
model_onnx = convert_sklearn(
- model, "linear regression",
+ model,
+ "linear regression",
[("input", DoubleTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
- X.astype(numpy.float64), model, model_onnx,
- basename="SklearnTweedieRegressor64%d" % power)
-
- @unittest.skipIf(QuantileRegressor is None,
- reason="scikit-learn<1.0")
- @ignore_warnings(category=(FutureWarning, ConvergenceWarning,
- DeprecationWarning))
+ X.astype(numpy.float64),
+ model,
+ model_onnx,
+ basename="SklearnTweedieRegressor64%d" % power,
+ )
+
+ @unittest.skipIf(QuantileRegressor is None, reason="scikit-learn<1.0")
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
def test_model_quantile_regressor(self):
X, y = make_regression(
- n_features=5, n_samples=100, n_targets=1, random_state=42,
- n_informative=3)
+ n_features=5, n_samples=100, n_targets=1, random_state=42, n_informative=3
+ )
y = numpy.abs(y)
y = y / y.max() + 1e-5
model = linear_model.QuantileRegressor(solver="highs").fit(X, y)
model_onnx = convert_sklearn(
- model, "linear regression",
+ model,
+ "linear regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.check_model(model_onnx, X.astype(numpy.float32))
dump_data_and_model(
- X.astype(numpy.float32), model, model_onnx,
- basename="SklearnQuantileRegressor-Dec4")
+ X.astype(numpy.float32),
+ model,
+ model_onnx,
+ basename="SklearnQuantileRegressor-Dec4",
+ )
model_onnx = convert_sklearn(
- model, "linear regression",
+ model,
+ "linear regression",
[("input", DoubleTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
- X.astype(numpy.float64), model, model_onnx,
- basename="SklearnQuantileRegressor64")
+ X.astype(numpy.float64),
+ model,
+ model_onnx,
+ basename="SklearnQuantileRegressor64",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_gradient_boosting_converters.py b/tests/test_sklearn_gradient_boosting_converters.py
index 5f3873d65..4ff10f1b6 100644
--- a/tests/test_sklearn_gradient_boosting_converters.py
+++ b/tests/test_sklearn_gradient_boosting_converters.py
@@ -7,10 +7,7 @@
from pandas import DataFrame
from sklearn import __version__ as skl_version
from sklearn.datasets import make_classification
-from sklearn.ensemble import (
- GradientBoostingClassifier,
- GradientBoostingRegressor
-)
+from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from onnxruntime import __version__ as ort_version
from skl2onnx import convert_sklearn
@@ -20,20 +17,25 @@
Int64TensorType,
)
from test_utils import (
- dump_binary_classification, dump_multiple_classification,
- fit_classification_model, dump_data_and_model, fit_regression_model,
- TARGET_OPSET, InferenceSessionEx as InferenceSession)
+ dump_binary_classification,
+ dump_multiple_classification,
+ fit_classification_model,
+ dump_data_and_model,
+ fit_regression_model,
+ TARGET_OPSET,
+ InferenceSessionEx as InferenceSession,
+)
-ort_version = ort_version.split('+')[0]
-skl_version = skl_version.split('+')[0]
+ort_version = ort_version.split("+")[0]
+skl_version = skl_version.split("+")[0]
class TestSklearnGradientBoostingModels(unittest.TestCase):
-
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version("0.5.0"),
- reason="Depends on PR #1015 onnxruntime.")
+ reason="Depends on PR #1015 onnxruntime.",
+ )
def test_gradient_boosting_classifier1Deviance(self):
model = GradientBoostingClassifier(n_estimators=1, max_depth=2)
X, y = make_classification(10, n_features=4, random_state=42)
@@ -43,25 +45,35 @@ def test_gradient_boosting_classifier1Deviance(self):
for cl in [None, 0.231, 1e-6, 0.9]:
if cl is not None:
model.init_.class_prior_ = np.array([cl, cl])
- initial_types = [('input', FloatTensorType((None, X.shape[1])))]
- model_onnx = convert_sklearn(model, initial_types=initial_types,
- target_opset=TARGET_OPSET)
+ initial_types = [("input", FloatTensorType((None, X.shape[1])))]
+ model_onnx = convert_sklearn(
+ model, initial_types=initial_types, target_opset=TARGET_OPSET
+ )
if "Regressor" in str(model_onnx):
raise AssertionError(str(model_onnx))
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X.astype(np.float32)})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(np.float32)})
pred = model.predict_proba(X)
delta = abs(res[1][0][0] - pred[0, 0])
if delta > 1e-5:
- rows = ["diff", str(delta),
- "X", str(X),
- "base_values_", str(model.init_.class_prior_),
- "predicted_label", str(model.predict(X)),
- "expected", str(pred),
- "onnxruntime", str(DataFrame(res[1])),
- "model", str(model_onnx)]
+ rows = [
+ "diff",
+ str(delta),
+ "X",
+ str(X),
+ "base_values_",
+ str(model.init_.class_prior_),
+ "predicted_label",
+ str(model.predict(X)),
+ "expected",
+ str(pred),
+ "onnxruntime",
+ str(DataFrame(res[1])),
+ "model",
+ str(model_onnx),
+ ]
raise AssertionError("\n---\n".join(rows))
dump_binary_classification(model, suffix="1Deviance")
@@ -75,203 +87,267 @@ def test_gradient_boosting_classifier_multi(self):
def test_gradient_boosting_binary_classification(self):
model, X = fit_classification_model(
- GradientBoostingClassifier(n_estimators=3), 2)
+ GradientBoostingClassifier(n_estimators=3), 2
+ )
model_onnx = convert_sklearn(
- model, "gradient boosting classifier",
+ model,
+ "gradient boosting classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnGradientBoostingBinaryClassifier")
+ X, model, model_onnx, basename="SklearnGradientBoostingBinaryClassifier"
+ )
def test_gradient_boosting_binary_classification_init_zero(self):
model, X = fit_classification_model(
- GradientBoostingClassifier(n_estimators=4, init='zero'), 2)
+ GradientBoostingClassifier(n_estimators=4, init="zero"), 2
+ )
model_onnx = convert_sklearn(
- model, "gradient boosting classifier",
+ model,
+ "gradient boosting classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnGradientBoostingBinaryClassifierInitZero")
+ X,
+ model,
+ model_onnx,
+ basename="SklearnGradientBoostingBinaryClassifierInitZero",
+ )
def test_gradient_boosting_multiclass_classification(self):
model, X = fit_classification_model(
- GradientBoostingClassifier(n_estimators=4), 5)
+ GradientBoostingClassifier(n_estimators=4), 5
+ )
model_onnx = convert_sklearn(
- model, "gradient boosting classifier",
+ model,
+ "gradient boosting classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnGradientBoostingMultiClassClassifier")
+ X, model, model_onnx, basename="SklearnGradientBoostingMultiClassClassifier"
+ )
def test_gradient_boosting_int(self):
model, X = fit_classification_model(
- GradientBoostingClassifier(n_estimators=4), 5, is_int=True)
+ GradientBoostingClassifier(n_estimators=4), 5, is_int=True
+ )
model_onnx = convert_sklearn(
- model, "gradient boosting classifier",
+ model,
+ "gradient boosting classifier",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnGradientBoostingInt")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnGradientBoostingInt")
def test_gradient_boosting_bool(self):
model, X = fit_classification_model(
- GradientBoostingClassifier(n_estimators=4), 5, is_bool=True)
+ GradientBoostingClassifier(n_estimators=4), 5, is_bool=True
+ )
model_onnx = convert_sklearn(
- model, "gradient boosting classifier",
+ model,
+ "gradient boosting classifier",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnGradientBoostingBool")
+ X, model, model_onnx, basename="SklearnGradientBoostingBool"
+ )
def test_gradient_boosting_multiclass_decision_function(self):
model, X = fit_classification_model(
- GradientBoostingClassifier(n_estimators=4), 5)
- options = {id(model): {'raw_scores': True}}
+ GradientBoostingClassifier(n_estimators=4), 5
+ )
+ options = {id(model): {"raw_scores": True}}
model_onnx = convert_sklearn(
- model, "gradient boosting classifier",
+ model,
+ "gradient boosting classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- options=options, target_opset=TARGET_OPSET)
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
+ X,
+ model,
+ model_onnx,
basename="SklearnGradientBoostingMultiClassDecisionFunction",
- methods=['predict', 'decision_function'])
+ methods=["predict", "decision_function"],
+ )
def test_gradient_boosting_multiclass_classification_init_zero(self):
model, X = fit_classification_model(
- GradientBoostingClassifier(n_estimators=4, init='zero'), 4)
+ GradientBoostingClassifier(n_estimators=4, init="zero"), 4
+ )
model_onnx = convert_sklearn(
- model, "gradient boosting classifier",
+ model,
+ "gradient boosting classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnGradientBoostingMultiClassClassifierInitZero")
+ X,
+ model,
+ model_onnx,
+ basename="SklearnGradientBoostingMultiClassClassifierInitZero",
+ )
@unittest.skipIf(
- pv.Version(skl_version) <= pv.Version("1.0"),
- reason="Loss name was removed.")
+ pv.Version(skl_version) <= pv.Version("1.0"), reason="Loss name was removed."
+ )
def test_gradient_boosting_regressor_ls_loss(self):
model, X = fit_regression_model(
- GradientBoostingRegressor(n_estimators=3, loss="squared_error"))
+ GradientBoostingRegressor(n_estimators=3, loss="squared_error")
+ )
model_onnx = convert_sklearn(
- model, "gradient boosting regression",
+ model,
+ "gradient boosting regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnGradientBoostingRegressionLsLoss")
+ X, model, model_onnx, basename="SklearnGradientBoostingRegressionLsLoss"
+ )
@unittest.skipIf(
- pv.Version(skl_version) <= pv.Version("1.0"),
- reason="Loss name was removed.")
+ pv.Version(skl_version) <= pv.Version("1.0"), reason="Loss name was removed."
+ )
def test_gradient_boosting_regressor_lad_loss(self):
model, X = fit_regression_model(
- GradientBoostingRegressor(n_estimators=3, loss="absolute_error"))
+ GradientBoostingRegressor(n_estimators=3, loss="absolute_error")
+ )
model_onnx = convert_sklearn(
- model, "gradient boosting regression",
+ model,
+ "gradient boosting regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnGradientBoostingRegressionLadLoss")
+ X, model, model_onnx, basename="SklearnGradientBoostingRegressionLadLoss"
+ )
def test_gradient_boosting_regressor_huber_loss(self):
model, X = fit_regression_model(
- GradientBoostingRegressor(n_estimators=3, loss="huber"))
+ GradientBoostingRegressor(n_estimators=3, loss="huber")
+ )
model_onnx = convert_sklearn(
- model, "gradient boosting regression",
+ model,
+ "gradient boosting regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnGradientBoostingRegressionHuberLoss")
+ X, model, model_onnx, basename="SklearnGradientBoostingRegressionHuberLoss"
+ )
def test_gradient_boosting_regressor_quantile_loss(self):
model, X = fit_regression_model(
- GradientBoostingRegressor(n_estimators=3, loss="quantile"))
+ GradientBoostingRegressor(n_estimators=3, loss="quantile")
+ )
model_onnx = convert_sklearn(
- model, "gradient boosting regression",
+ model,
+ "gradient boosting regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnGradientBoostingRegressionQuantileLoss-Dec4")
+ X,
+ model,
+ model_onnx,
+ basename="SklearnGradientBoostingRegressionQuantileLoss-Dec4",
+ )
def test_gradient_boosting_regressor_int(self):
model, X = fit_regression_model(
- GradientBoostingRegressor(random_state=42), is_int=True)
+ GradientBoostingRegressor(random_state=42), is_int=True
+ )
model_onnx = convert_sklearn(
- model, "gradient boosting regression",
+ model,
+ "gradient boosting regression",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnGradientBoostingRegressionInt-Dec3")
+ X, model, model_onnx, basename="SklearnGradientBoostingRegressionInt-Dec3"
+ )
def test_gradient_boosting_regressor_zero_init(self):
model, X = fit_regression_model(
- GradientBoostingRegressor(n_estimators=30, init="zero",
- random_state=42))
+ GradientBoostingRegressor(n_estimators=30, init="zero", random_state=42)
+ )
model_onnx = convert_sklearn(
- model, "gradient boosting regression",
+ model,
+ "gradient boosting regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnGradientBoostingRegressionZeroInit-Dec4")
+ X,
+ model,
+ model_onnx,
+ basename="SklearnGradientBoostingRegressionZeroInit-Dec4",
+ )
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version("0.5.0"),
- reason="Depends on PR #1015 onnxruntime.")
+ reason="Depends on PR #1015 onnxruntime.",
+ )
def test_gradient_boosting_regressor_learning_rate(self):
X, y = make_classification(
- n_features=100, n_samples=1000, n_classes=2, n_informative=8)
+ n_features=100, n_samples=1000, n_classes=2, n_informative=8
+ )
X_train, X_test, y_train, _ = train_test_split(
- X, y, test_size=0.5, random_state=42)
+ X, y, test_size=0.5, random_state=42
+ )
model = GradientBoostingClassifier().fit(X_train, y_train)
onnx_model = convert_sklearn(
- model, 'lr2', [('input', FloatTensorType(X_test.shape))],
- target_opset=TARGET_OPSET)
+ model,
+ "lr2",
+ [("input", FloatTensorType(X_test.shape))],
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- onnx_model.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, input_feed={'input': X_test.astype(np.float32)})
+ onnx_model.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, input_feed={"input": X_test.astype(np.float32)})
r1 = np.mean(
- np.isclose(model.predict_proba(X_test),
- list(map(lambda x: list(map(lambda y: x[y], x)),
- res[1])), atol=1e-4))
+ np.isclose(
+ model.predict_proba(X_test),
+ list(map(lambda x: list(map(lambda y: x[y], x)), res[1])),
+ atol=1e-4,
+ )
+ )
r2 = np.mean(res[0] == model.predict(X_test))
assert r1 == r2
def test_gradient_boosting_regressor_bool(self):
model, X = fit_regression_model(
- GradientBoostingRegressor(random_state=42), is_bool=True)
+ GradientBoostingRegressor(random_state=42), is_bool=True
+ )
model_onnx = convert_sklearn(
- model, "gradient boosting regressor",
+ model,
+ "gradient boosting regressor",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnGradientBoostingRegressorBool-Dec4")
+ X, model, model_onnx, basename="SklearnGradientBoostingRegressorBool-Dec4"
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_grid_search_cv_converter.py b/tests/test_sklearn_grid_search_cv_converter.py
index 5239b5d68..1e29191d8 100644
--- a/tests/test_sklearn_grid_search_cv_converter.py
+++ b/tests/test_sklearn_grid_search_cv_converter.py
@@ -15,168 +15,216 @@
from sklearn.datasets import load_iris
from skl2onnx import convert_sklearn, to_onnx
from skl2onnx.common.data_types import (
- DoubleTensorType, FloatTensorType, Int64TensorType)
+ DoubleTensorType,
+ FloatTensorType,
+ Int64TensorType,
+)
from test_utils import (
- dump_data_and_model, fit_classification_model,
+ dump_data_and_model,
+ fit_classification_model,
fit_clustering_model,
- fit_regression_model, TARGET_OPSET)
+ fit_regression_model,
+ TARGET_OPSET,
+)
class TestSklearnGridSearchCVModels(unittest.TestCase):
def test_grid_search_binary_float(self):
- tuned_parameters = [{'C': np.logspace(-1, 0, 4)}]
+ tuned_parameters = [{"C": np.logspace(-1, 0, 4)}]
clf = GridSearchCV(
- LogisticRegression(random_state=42, max_iter=100, solver='lbfgs',
- multi_class='ovr'),
- tuned_parameters, cv=5)
+ LogisticRegression(
+ random_state=42, max_iter=100, solver="lbfgs", multi_class="ovr"
+ ),
+ tuned_parameters,
+ cv=5,
+ )
model, X = fit_classification_model(clf, n_classes=2)
model_onnx = convert_sklearn(
- model, "GridSearchCV",
+ model,
+ "GridSearchCV",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnGridSearchBinaryFloat-Dec4")
+ X, model, model_onnx, basename="SklearnGridSearchBinaryFloat-Dec4"
+ )
def test_grid_search_multiclass_float(self):
- tuned_parameters = [{'C': np.logspace(-1, 0, 4)}]
+ tuned_parameters = [{"C": np.logspace(-1, 0, 4)}]
clf = GridSearchCV(
- SVC(random_state=42, probability=True, gamma='auto'),
- tuned_parameters, cv=5)
+ SVC(random_state=42, probability=True, gamma="auto"), tuned_parameters, cv=5
+ )
model, X = fit_classification_model(clf, n_classes=5)
model_onnx = convert_sklearn(
- model, "GridSearchCV",
+ model,
+ "GridSearchCV",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnGridSearchMulticlassFloat")
+ X, model, model_onnx, basename="SklearnGridSearchMulticlassFloat"
+ )
def test_grid_search_binary_int(self):
- tuned_parameters = [{'C': np.logspace(-1, 0, 4)}]
+ tuned_parameters = [{"C": np.logspace(-1, 0, 4)}]
clf = GridSearchCV(
- LogisticRegression(random_state=42, max_iter=100, solver='lbfgs',
- multi_class='ovr'),
- tuned_parameters, cv=5)
+ LogisticRegression(
+ random_state=42, max_iter=100, solver="lbfgs", multi_class="ovr"
+ ),
+ tuned_parameters,
+ cv=5,
+ )
model, X = fit_classification_model(clf, n_classes=2, is_int=True)
model_onnx = convert_sklearn(
- model, "GridSearchCV",
+ model,
+ "GridSearchCV",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnGridSearchBinaryInt-Dec4")
+ X, model, model_onnx, basename="SklearnGridSearchBinaryInt-Dec4"
+ )
def test_grid_search_multiclass_int(self):
- tuned_parameters = [{'C': np.logspace(-1, 0, 4)}]
+ tuned_parameters = [{"C": np.logspace(-1, 0, 4)}]
clf = GridSearchCV(
- LogisticRegression(random_state=42, max_iter=100, solver='lbfgs',
- multi_class='multinomial'),
- tuned_parameters, cv=5)
+ LogisticRegression(
+ random_state=42, max_iter=100, solver="lbfgs", multi_class="multinomial"
+ ),
+ tuned_parameters,
+ cv=5,
+ )
model, X = fit_classification_model(clf, n_classes=4, is_int=True)
model_onnx = convert_sklearn(
- model, "GridSearchCV",
+ model,
+ "GridSearchCV",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnGridSearchMulticlassInt-Dec4")
+ X, model, model_onnx, basename="SklearnGridSearchMulticlassInt-Dec4"
+ )
def test_grid_search_regression_int(self):
- tuned_parameters = [{'alpha': np.logspace(-4, -0.5, 4)}]
- clf = GridSearchCV(Lasso(max_iter=100),
- tuned_parameters, cv=5)
+ tuned_parameters = [{"alpha": np.logspace(-4, -0.5, 4)}]
+ clf = GridSearchCV(Lasso(max_iter=100), tuned_parameters, cv=5)
model, X = fit_regression_model(clf, is_int=True)
model_onnx = convert_sklearn(
- model, "GridSearchCV",
+ model,
+ "GridSearchCV",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnGridSerachRegressionInt-OneOffArray-Dec4")
+ X,
+ model,
+ model_onnx,
+ basename="SklearnGridSerachRegressionInt-OneOffArray-Dec4",
+ )
def test_grid_search_regressor_float(self):
- tuned_parameters = [{'alpha': np.logspace(-4, -0.5, 4)}]
- clf = GridSearchCV(LassoLars(max_iter=100),
- tuned_parameters, cv=5)
+ tuned_parameters = [{"alpha": np.logspace(-4, -0.5, 4)}]
+ clf = GridSearchCV(LassoLars(max_iter=100), tuned_parameters, cv=5)
model, X = fit_regression_model(clf)
model_onnx = convert_sklearn(
- model, "GridSearchCV",
+ model,
+ "GridSearchCV",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnGridSearchRegressionFloat-OneOffArray-Dec4")
+ X,
+ model,
+ model_onnx,
+ basename="SklearnGridSearchRegressionFloat-OneOffArray-Dec4",
+ )
@unittest.skipIf(
- pv.Version(ort_version) <= pv.Version('0.4.0'),
- reason="onnxruntime %s" % '0.4.0')
+ pv.Version(ort_version) <= pv.Version("0.4.0"),
+ reason="onnxruntime %s" % "0.4.0",
+ )
def test_grid_search_gaussian_regressor_float(self):
- tuned_parameters = [{'alpha': np.logspace(-4, -0.5, 4)}]
- clf = GridSearchCV(GaussianProcessRegressor(),
- tuned_parameters, cv=5)
+ tuned_parameters = [{"alpha": np.logspace(-4, -0.5, 4)}]
+ clf = GridSearchCV(GaussianProcessRegressor(), tuned_parameters, cv=5)
model, X = fit_regression_model(clf)
model_onnx = convert_sklearn(
- model, "GridSearchCV",
+ model,
+ "GridSearchCV",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnGridSearchGaussianRegressionFloat"
- "-OneOffArray-Dec4")
+ X,
+ model,
+ model_onnx,
+ basename="SklearnGridSearchGaussianRegressionFloat" "-OneOffArray-Dec4",
+ )
@unittest.skipIf(
- pv.Version(ort_version) <= pv.Version('0.4.0'),
- reason="onnxruntime %s" % '0.4.0')
+ pv.Version(ort_version) <= pv.Version("0.4.0"),
+ reason="onnxruntime %s" % "0.4.0",
+ )
def test_grid_search_gaussian_regressor_double(self):
- tuned_parameters = [{'alpha': np.logspace(-4, -0.5, 4)}]
- clf = GridSearchCV(GaussianProcessRegressor(),
- tuned_parameters, cv=3)
+ tuned_parameters = [{"alpha": np.logspace(-4, -0.5, 4)}]
+ clf = GridSearchCV(GaussianProcessRegressor(), tuned_parameters, cv=3)
model, X = fit_regression_model(clf)
model_onnx = convert_sklearn(
- model, "GridSearchCV",
+ model,
+ "GridSearchCV",
[("input", DoubleTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X.astype(np.float64), model, model_onnx,
- basename="SklearnGridSearchGaussianRegressionDouble"
- "-OneOffArray-Dec4")
+ X.astype(np.float64),
+ model,
+ model_onnx,
+ basename="SklearnGridSearchGaussianRegressionDouble" "-OneOffArray-Dec4",
+ )
def test_grid_search_binary_float_nozipmap(self):
- tuned_parameters = [{'C': np.logspace(-1, 0, 30)}]
+ tuned_parameters = [{"C": np.logspace(-1, 0, 30)}]
clf = GridSearchCV(
- LogisticRegression(random_state=42, max_iter=100, solver='lbfgs',
- multi_class='ovr'),
- tuned_parameters, cv=5)
+ LogisticRegression(
+ random_state=42, max_iter=100, solver="lbfgs", multi_class="ovr"
+ ),
+ tuned_parameters,
+ cv=5,
+ )
model, X = fit_classification_model(clf, n_classes=2)
model_onnx = convert_sklearn(
- model, "GridSearchCV",
+ model,
+ "GridSearchCV",
[("input", FloatTensorType([None, X.shape[1]]))],
- options={id(clf): {'zipmap': False, 'raw_scores': True}},
- target_opset=TARGET_OPSET)
+ options={id(clf): {"zipmap": False, "raw_scores": True}},
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
assert "zipmap" not in str(model_onnx).lower()
assert '"LOGISTIC"' not in str(model_onnx).lower()
dump_data_and_model(
- X, model, model_onnx,
+ X,
+ model,
+ model_onnx,
basename="SklearnGridSearchBinaryFloat-Out0",
- methods=['predict', 'decision_function'])
+ methods=["predict", "decision_function"],
+ )
def test_grid_search_svm(self):
rand_seed = 0
np.random.seed(rand_seed)
def convert_to_onnx(sklearn_model, X, model_savename):
- onnx_model = to_onnx(sklearn_model, X[:1].astype(np.float32),
- target_opset=TARGET_OPSET)
+ onnx_model = to_onnx(
+ sklearn_model, X[:1].astype(np.float32), target_opset=TARGET_OPSET
+ )
onnx.checker.check_model(onnx_model)
return onnx_model
@@ -185,14 +233,19 @@ def load_train_test():
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(
- X, y, train_size=0.8, random_state=rand_seed)
+ X, y, train_size=0.8, random_state=rand_seed
+ )
return X_train, X_test, y_train, y_test
def train_svc_gs(X_train, y_train, apply_fix=False):
- param_grid = {'C': [0.1, 1, 1e1], 'gamma': [1e-3, 1e-2, 1e-1]}
- clf_est = SVC(kernel='rbf', coef0=0.0, degree=3,
- decision_function_shape='ovr',
- probability=True)
+ param_grid = {"C": [0.1, 1, 1e1], "gamma": [1e-3, 1e-2, 1e-1]}
+ clf_est = SVC(
+ kernel="rbf",
+ coef0=0.0,
+ degree=3,
+ decision_function_shape="ovr",
+ probability=True,
+ )
clf = GridSearchCV(clf_est, param_grid)
clf.fit(X_train, y_train)
return clf
@@ -206,22 +259,27 @@ def run():
x_test, model, model_onnx = run()
dump_data_and_model(
- x_test.astype(np.float32), model, model_onnx,
- basename="SklearnGridSearchSVC-Out0")
+ x_test.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnGridSearchSVC-Out0",
+ )
def test_grid_search_binary_kmeans(self):
- tuned_parameters = [{'n_clusters': [2, 3]}]
+ tuned_parameters = [{"n_clusters": [2, 3]}]
clf = GridSearchCV(KMeans(), tuned_parameters, cv=5)
model, X = fit_clustering_model(clf, n_classes=2)
X = X.astype(np.float32)
model_onnx = convert_sklearn(
- model, "GridSearchCV",
+ model,
+ "GridSearchCV",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model.best_estimator_, model_onnx,
- basename="SklearnGridSearchKMeans")
+ X, model.best_estimator_, model_onnx, basename="SklearnGridSearchKMeans"
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_imputer_converter.py b/tests/test_sklearn_imputer_converter.py
index 96eabbe7e..c514651c9 100644
--- a/tests/test_sklearn_imputer_converter.py
+++ b/tests/test_sklearn_imputer_converter.py
@@ -9,6 +9,7 @@
import pandas as pd
from numpy.testing import assert_almost_equal
import sklearn
+
try:
from sklearn.preprocessing import Imputer
except ImportError:
@@ -22,48 +23,51 @@
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import (
- FloatTensorType, Int64TensorType, StringTensorType)
+ FloatTensorType,
+ Int64TensorType,
+ StringTensorType,
+)
from test_utils import (
- dump_data_and_model, TARGET_OPSET,
- InferenceSessionEx as InferenceSession)
+ dump_data_and_model,
+ TARGET_OPSET,
+ InferenceSessionEx as InferenceSession,
+)
-skl_ver = '.'.join(sklearn.__version__.split('.')[:2])
+skl_ver = ".".join(sklearn.__version__.split(".")[:2])
class TestSklearnImputerConverter(unittest.TestCase):
-
def _check_outputs_ints(self, model, model_onnx, data):
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- idata = {'input': np.array(data).astype(np.int64)}
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ idata = {"input": np.array(data).astype(np.int64)}
res = sess.run(None, idata)[0]
exp = model.transform(data)
assert_almost_equal(res, exp)
- def _check_outputs_strings(self, model, model_onnx, data,
- verbose=0):
- idata = {'input': np.array(data).astype(np.str_)}
+ def _check_outputs_strings(self, model, model_onnx, data, verbose=0):
+ idata = {"input": np.array(data).astype(np.str_)}
sess = InferenceSession(
model_onnx.SerializeToString(),
providers=["CPUExecutionProvider"],
- verbose=verbose)
+ verbose=verbose,
+ )
res = sess.run(None, idata)[0]
exp = model.transform(data)
if list(exp.ravel()) != list(res.ravel()):
- raise AssertionError(
- "Unexpected output expected %r != %r." % (exp, res))
+ raise AssertionError("Unexpected output expected %r != %r." % (exp, res))
- @unittest.skipIf(Imputer is None,
- reason="Imputer removed in 0.21")
+ @unittest.skipIf(Imputer is None, reason="Imputer removed in 0.21")
def test_imputer_float_inputs(self):
model = Imputer(missing_values="NaN", strategy="mean", axis=0)
data = [[1, 2], [np.nan, 3], [7, 6]]
model.fit(data)
- model_onnx = convert_sklearn(model, "scikit-learn imputer",
- [("input", FloatTensorType([None, 2]))])
+ model_onnx = convert_sklearn(
+ model, "scikit-learn imputer", [("input", FloatTensorType([None, 2]))]
+ )
self.assertTrue(model_onnx.graph.node is not None)
# should contain only node
@@ -72,8 +76,7 @@ def test_imputer_float_inputs(self):
# last node should contain the Imputer
outputs = model_onnx.graph.output
self.assertEqual(len(outputs), 1)
- self.assertEqual(outputs[0].type.tensor_type.shape.dim[-1].dim_value,
- 2)
+ self.assertEqual(outputs[0].type.tensor_type.shape.dim[-1].dim_value, 2)
dump_data_and_model(
np.array(data, dtype=np.float32),
model,
@@ -81,8 +84,7 @@ def test_imputer_float_inputs(self):
basename="SklearnImputerMeanFloat32",
)
- @unittest.skipIf(SimpleImputer is None,
- reason="SimpleImputer changed in 0.20")
+ @unittest.skipIf(SimpleImputer is None, reason="SimpleImputer changed in 0.20")
def test_simple_imputer_float_inputs(self):
model = SimpleImputer(strategy="mean", fill_value="nan")
data = [[1, 2], [np.nan, 3], [7, 6]]
@@ -92,7 +94,8 @@ def test_simple_imputer_float_inputs(self):
model,
"scikit-learn simple imputer",
[("input", FloatTensorType([None, 2]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx.graph.node is not None)
# should contain only node
@@ -101,15 +104,15 @@ def test_simple_imputer_float_inputs(self):
# last node should contain the Imputer
outputs = model_onnx.graph.output
self.assertEqual(len(outputs), 1)
- self.assertEqual(
- outputs[0].type.tensor_type.shape.dim[-1].dim_value, 2)
+ self.assertEqual(outputs[0].type.tensor_type.shape.dim[-1].dim_value, 2)
dump_data_and_model(
np.array(data, dtype=np.float32),
- model, model_onnx,
- basename="SklearnSimpleImputerMeanFloat32")
+ model,
+ model_onnx,
+ basename="SklearnSimpleImputerMeanFloat32",
+ )
- @unittest.skipIf(SimpleImputer is None,
- reason="SimpleImputer changed in 0.20")
+ @unittest.skipIf(SimpleImputer is None, reason="SimpleImputer changed in 0.20")
def test_simple_imputer_float_inputs_int_mostf(self):
model = SimpleImputer(strategy="most_frequent", fill_value="nan")
data = [[1, 2], [np.nan, 3], [7, 6], [8, np.nan]]
@@ -119,7 +122,8 @@ def test_simple_imputer_float_inputs_int_mostf(self):
model,
"scikit-learn simple imputer",
[("input", Int64TensorType([None, 2]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx.graph.node is not None)
# should contain only node
@@ -130,8 +134,7 @@ def test_simple_imputer_float_inputs_int_mostf(self):
self.assertEqual(len(outputs), 1)
self._check_outputs_ints(model, model_onnx, data)
- @unittest.skipIf(SimpleImputer is None,
- reason="SimpleImputer changed in 0.20")
+ @unittest.skipIf(SimpleImputer is None, reason="SimpleImputer changed in 0.20")
def test_simple_imputer_float_inputs_int_mean(self):
model = SimpleImputer(strategy="mean", fill_value="nan")
data = [[1, 2], [np.nan, 3], [7, 6], [8, np.nan]]
@@ -142,43 +145,50 @@ def test_simple_imputer_float_inputs_int_mean(self):
model,
"scikit-learn simple imputer",
[("input", Int64TensorType([None, 2]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
except RuntimeError as e:
assert "nan values are replaced by float" in str(e)
- @unittest.skipIf(SimpleImputer is None,
- reason="SimpleImputer changed in 0.20")
+ @unittest.skipIf(SimpleImputer is None, reason="SimpleImputer changed in 0.20")
@unittest.skipIf(
- pv.Version(skl_ver) < pv.Version('0.24'),
- reason="SimpleImputer does not support strings")
+ pv.Version(skl_ver) < pv.Version("0.24"),
+ reason="SimpleImputer does not support strings",
+ )
def test_simple_imputer_string_inputs_int_mostf(self):
model = SimpleImputer(
- strategy="most_frequent", fill_value="nan", missing_values="")
+ strategy="most_frequent", fill_value="nan", missing_values=""
+ )
data = [["s1", "s2"], ["", "s3"], ["s7", "s6"], ["s8", ""]]
model.fit(data)
model_onnx = convert_sklearn(
- model, "scikit-learn simple imputer",
+ model,
+ "scikit-learn simple imputer",
[("input", StringTensorType([None, 2]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIn("ai.onnx.ml", str(model_onnx))
self.assertTrue(model_onnx.graph.node is not None)
self.assertEqual(len(model_onnx.graph.output), 1)
self._check_outputs_strings(model, model_onnx, data)
- @unittest.skipIf(SimpleImputer is None,
- reason="SimpleImputer changed in 0.20")
+ @unittest.skipIf(SimpleImputer is None, reason="SimpleImputer changed in 0.20")
@unittest.skipIf(
- pv.Version(skl_ver) < pv.Version('0.24'),
- reason="SimpleImputer does not support strings")
+ pv.Version(skl_ver) < pv.Version("0.24"),
+ reason="SimpleImputer does not support strings",
+ )
def test_simple_imputer_string_inputs_int_mostf_default(self):
- model = SimpleImputer(strategy="most_frequent", missing_values='')
- data = pd.DataFrame([["s1", "s2"], ["s1", "s2"], ["", "s3"],
- ["s7", "s6"], ["s8", ""]])
+ model = SimpleImputer(strategy="most_frequent", missing_values="")
+ data = pd.DataFrame(
+ [["s1", "s2"], ["s1", "s2"], ["", "s3"], ["s7", "s6"], ["s8", ""]]
+ )
model.fit(data)
model_onnx = convert_sklearn(
- model, "scikit-learn simple imputer",
+ model,
+ "scikit-learn simple imputer",
[("input", StringTensorType([None, 2]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIn("ai.onnx.ml", str(model_onnx))
self.assertTrue(model_onnx.graph.node is not None)
self.assertEqual(len(model_onnx.graph.output), 1)
diff --git a/tests/test_sklearn_isolation_forest.py b/tests/test_sklearn_isolation_forest.py
index 2108f8cb5..3c1bbf199 100644
--- a/tests/test_sklearn_isolation_forest.py
+++ b/tests/test_sklearn_isolation_forest.py
@@ -9,57 +9,64 @@
from numpy.testing import assert_almost_equal
from onnxruntime import InferenceSession
from sklearn import __version__ as sklv
+
try:
from sklearn.ensemble import IsolationForest
except ImportError:
IsolationForest = None
from skl2onnx import to_onnx
from test_utils import dump_data_and_model, TARGET_OPSET, TARGET_OPSET_ML
+
try:
from onnxruntime.capi.onnxruntime_pybind11_state import NotImplemented
except ImportError:
NotImplemented = RuntimeError
-sklv2 = '.'.join(sklv.split('.')[:2])
+sklv2 = ".".join(sklv.split(".")[:2])
class TestSklearnIsolationForest(unittest.TestCase):
-
@unittest.skipIf(IsolationForest is None, reason="old scikit-learn")
- @unittest.skipIf(pv.Version(sklv2) < pv.Version('0.22.0'),
- reason="tree structure is different.")
+ @unittest.skipIf(
+ pv.Version(sklv2) < pv.Version("0.22.0"), reason="tree structure is different."
+ )
@unittest.skipIf(TARGET_OPSET < 12, reason="not available")
def test_isolation_forest(self):
isol = IsolationForest(n_estimators=3, random_state=0)
- data = np.array([[-1.1, -1.2], [0.3, 0.2],
- [0.5, 0.4], [100., 99.]], dtype=np.float32)
+ data = np.array(
+ [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]], dtype=np.float32
+ )
model = isol.fit(data)
model_onnx = to_onnx(
- model, data,
- target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML})
+ model, data, target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML}
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(data, model, model_onnx,
- basename="IsolationForest")
+ dump_data_and_model(data, model, model_onnx, basename="IsolationForest")
@unittest.skipIf(IsolationForest is None, reason="old scikit-learn")
- @unittest.skipIf(pv.Version(sklv2) < pv.Version('0.22.0'),
- reason="tree structure is different.")
+ @unittest.skipIf(
+ pv.Version(sklv2) < pv.Version("0.22.0"), reason="tree structure is different."
+ )
@unittest.skipIf(TARGET_OPSET < 12, reason="not available")
def test_isolation_forest_score_samples(self):
isol = IsolationForest(n_estimators=3, random_state=0)
- data = np.array([[-1.1, -1.2], [0.3, 0.2],
- [0.5, 0.4], [100., 99.]], dtype=np.float32)
+ data = np.array(
+ [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]], dtype=np.float32
+ )
model = isol.fit(data)
model_onnx = to_onnx(
- model, data, options={'score_samples': True},
- target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML})
+ model,
+ data,
+ options={"score_samples": True},
+ target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML},
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
names = [o.name for o in sess.get_outputs()]
- self.assertEqual(names, ['label', 'scores', 'score_samples'])
- got = sess.run(None, {'X': data})
+ self.assertEqual(names, ["label", "scores", "score_samples"])
+ got = sess.run(None, {"X": data})
self.assertEqual(len(got), 3)
expected_label = isol.predict(data)
expected_decif = isol.decision_function(data)
@@ -69,36 +76,37 @@ def test_isolation_forest_score_samples(self):
assert_almost_equal(expected_score, got[2].ravel())
@unittest.skipIf(IsolationForest is None, reason="old scikit-learn")
- @unittest.skipIf(pv.Version(sklv2) < pv.Version('0.22.0'),
- reason="tree structure is different.")
+ @unittest.skipIf(
+ pv.Version(sklv2) < pv.Version("0.22.0"), reason="tree structure is different."
+ )
@unittest.skipIf(TARGET_OPSET < 12, reason="not available")
def test_isolation_forest_op1(self):
isol = IsolationForest(n_estimators=3, random_state=0)
- data = np.array([[-1.1, -1.2], [0.3, 0.2],
- [0.5, 0.4], [100., 99.]], dtype=np.float32)
+ data = np.array(
+ [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]], dtype=np.float32
+ )
model = isol.fit(data)
with self.assertRaises(RuntimeError):
- to_onnx(model, data,
- target_opset={'': TARGET_OPSET, 'ai.onnx.ml': 1})
+ to_onnx(model, data, target_opset={"": TARGET_OPSET, "ai.onnx.ml": 1})
@unittest.skipIf(IsolationForest is None, reason="old scikit-learn")
- @unittest.skipIf(pv.Version(sklv2) < pv.Version('0.22.0'),
- reason="tree structure is different.")
+ @unittest.skipIf(
+ pv.Version(sklv2) < pv.Version("0.22.0"), reason="tree structure is different."
+ )
@unittest.skipIf(TARGET_OPSET < 12, reason="not available")
def test_isolation_forest_rnd(self):
isol = IsolationForest(n_estimators=2, random_state=0)
rs = np.random.RandomState(0)
data = rs.randn(100, 4).astype(np.float32)
- data[-1, 2:] = 99.
- data[-2, :2] = -99.
+ data[-1, 2:] = 99.0
+ data[-2, :2] = -99.0
model = isol.fit(data)
model_onnx = to_onnx(
- model, data,
- target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML})
+ model, data, target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML}
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(data, model, model_onnx,
- basename="IsolationForestRnd")
+ dump_data_and_model(data, model, model_onnx, basename="IsolationForestRnd")
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/tests/test_sklearn_k_bins_discretiser_converter.py b/tests/test_sklearn_k_bins_discretiser_converter.py
index c3313d9bd..079527eb1 100644
--- a/tests/test_sklearn_k_bins_discretiser_converter.py
+++ b/tests/test_sklearn_k_bins_discretiser_converter.py
@@ -6,6 +6,7 @@
import unittest
import numpy as np
+
try:
from sklearn.preprocessing import KBinsDiscretizer
except ImportError:
@@ -22,144 +23,190 @@ class TestSklearnKBinsDiscretiser(unittest.TestCase):
reason="KBinsDiscretizer available since 0.20",
)
def test_model_k_bins_discretiser_ordinal_uniform(self):
- X = np.array([[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
- [0, 3.2, 4.7, -8.9]])
- model = KBinsDiscretizer(n_bins=3,
- encode="ordinal",
- strategy="uniform").fit(X)
+ X = np.array(
+ [[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0], [0, 3.2, 4.7, -8.9]]
+ )
+ model = KBinsDiscretizer(n_bins=3, encode="ordinal", strategy="uniform").fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn KBinsDiscretiser",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnKBinsDiscretiserOrdinalUniform")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnKBinsDiscretiserOrdinalUniform",
+ )
@unittest.skipIf(
KBinsDiscretizer is None,
reason="KBinsDiscretizer available since 0.20",
)
def test_model_k_bins_discretiser_ordinal_quantile(self):
- X = np.array([
- [1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
- [0, 3.2, 4.7, -8.9], [0.2, 1.3, 0.6, -9.4],
- [0.8, 4.2, -14.7, -28.9], [8.2, 1.9, 2.6, -5.4],
- [4.8, -9.2, 33.7, 3.9], [81.2, 1., 0.6, 12.4],
- [6.8, 11.2, -1.7, -2.9], [11.2, 12.9, 4.3, -1.4],
- ])
- model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
- encode="ordinal",
- strategy="quantile").fit(X)
+ X = np.array(
+ [
+ [1.2, 3.2, 1.3, -5.6],
+ [4.3, -3.2, 5.7, 1.0],
+ [0, 3.2, 4.7, -8.9],
+ [0.2, 1.3, 0.6, -9.4],
+ [0.8, 4.2, -14.7, -28.9],
+ [8.2, 1.9, 2.6, -5.4],
+ [4.8, -9.2, 33.7, 3.9],
+ [81.2, 1.0, 0.6, 12.4],
+ [6.8, 11.2, -1.7, -2.9],
+ [11.2, 12.9, 4.3, -1.4],
+ ]
+ )
+ model = KBinsDiscretizer(
+ n_bins=[3, 2, 3, 4], encode="ordinal", strategy="quantile"
+ ).fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn KBinsDiscretiser",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnKBinsDiscretiserOrdinalQuantile")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnKBinsDiscretiserOrdinalQuantile",
+ )
@unittest.skipIf(
KBinsDiscretizer is None,
reason="KBinsDiscretizer available since 0.20",
)
def test_model_k_bins_discretiser_ordinal_kmeans(self):
- X = np.array([
- [1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
- [0, 3.2, 4.7, -8.9], [0.2, 1.3, 0.6, -9.4],
- [0.8, 4.2, -14.7, -28.9], [8.2, 1.9, 2.6, -5.4],
- [4.8, -9.2, 33.7, 3.9], [81.2, 1., 0.6, 12.4],
- [6.8, 11.2, -1.7, -2.9], [11.2, 12.9, 4.3, -1.4],
- ])
- model = KBinsDiscretizer(n_bins=3, encode="ordinal",
- strategy="kmeans").fit(X)
+ X = np.array(
+ [
+ [1.2, 3.2, 1.3, -5.6],
+ [4.3, -3.2, 5.7, 1.0],
+ [0, 3.2, 4.7, -8.9],
+ [0.2, 1.3, 0.6, -9.4],
+ [0.8, 4.2, -14.7, -28.9],
+ [8.2, 1.9, 2.6, -5.4],
+ [4.8, -9.2, 33.7, 3.9],
+ [81.2, 1.0, 0.6, 12.4],
+ [6.8, 11.2, -1.7, -2.9],
+ [11.2, 12.9, 4.3, -1.4],
+ ]
+ )
+ model = KBinsDiscretizer(n_bins=3, encode="ordinal", strategy="kmeans").fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn KBinsDiscretiser",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnKBinsDiscretiserOrdinalKMeans")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnKBinsDiscretiserOrdinalKMeans",
+ )
@unittest.skipIf(
KBinsDiscretizer is None,
reason="KBinsDiscretizer available since 0.20",
)
def test_model_k_bins_discretiser_onehot_dense_uniform(self):
- X = np.array([[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
- [0, 3.2, 4.7, -8.9]])
- model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
- encode="onehot-dense",
- strategy="uniform").fit(X)
+ X = np.array(
+ [[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0], [0, 3.2, 4.7, -8.9]]
+ )
+ model = KBinsDiscretizer(
+ n_bins=[3, 2, 3, 4], encode="onehot-dense", strategy="uniform"
+ ).fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn KBinsDiscretiser",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnKBinsDiscretiserOneHotDenseUniform")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnKBinsDiscretiserOneHotDenseUniform",
+ )
@unittest.skipIf(
KBinsDiscretizer is None,
reason="KBinsDiscretizer available since 0.20",
)
def test_model_k_bins_discretiser_onehot_dense_quantile(self):
- X = np.array([
- [1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
- [0, 3.2, 4.7, -8.9], [0.2, 1.3, 0.6, -9.4],
- [0.8, 4.2, -14.7, -28.9], [8.2, 1.9, 2.6, -5.4],
- [4.8, -9.2, 33.7, 3.9], [81.2, 1., 0.6, 12.4],
- [6.8, 11.2, -1.7, -2.9], [11.2, 12.9, 4.3, -1.4],
- ])
- model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
- encode="onehot-dense",
- strategy="quantile").fit(X)
+ X = np.array(
+ [
+ [1.2, 3.2, 1.3, -5.6],
+ [4.3, -3.2, 5.7, 1.0],
+ [0, 3.2, 4.7, -8.9],
+ [0.2, 1.3, 0.6, -9.4],
+ [0.8, 4.2, -14.7, -28.9],
+ [8.2, 1.9, 2.6, -5.4],
+ [4.8, -9.2, 33.7, 3.9],
+ [81.2, 1.0, 0.6, 12.4],
+ [6.8, 11.2, -1.7, -2.9],
+ [11.2, 12.9, 4.3, -1.4],
+ ]
+ )
+ model = KBinsDiscretizer(
+ n_bins=[3, 2, 3, 4], encode="onehot-dense", strategy="quantile"
+ ).fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn KBinsDiscretiser",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnKBinsDiscretiserOneHotDenseQuantile")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnKBinsDiscretiserOneHotDenseQuantile",
+ )
@unittest.skipIf(
- KBinsDiscretizer is None,
- reason="KBinsDiscretizer available since 0.20")
+ KBinsDiscretizer is None, reason="KBinsDiscretizer available since 0.20"
+ )
def test_model_k_bins_discretiser_onehot_dense_kmeans(self):
- X = np.array([
- [1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
- [0, 3.2, 4.7, -8.9], [0.2, 1.3, 0.6, -9.4],
- [0.8, 4.2, -14.7, -28.9], [8.2, 1.9, 2.6, -5.4],
- [4.8, -9.2, 33.7, 3.9], [81.2, 1., 0.6, 12.4],
- [6.8, 11.2, -1.7, -2.9], [11.2, 12.9, 4.3, -1.4],
- ])
- model = KBinsDiscretizer(n_bins=3,
- encode="onehot-dense",
- strategy="kmeans").fit(X)
+ X = np.array(
+ [
+ [1.2, 3.2, 1.3, -5.6],
+ [4.3, -3.2, 5.7, 1.0],
+ [0, 3.2, 4.7, -8.9],
+ [0.2, 1.3, 0.6, -9.4],
+ [0.8, 4.2, -14.7, -28.9],
+ [8.2, 1.9, 2.6, -5.4],
+ [4.8, -9.2, 33.7, 3.9],
+ [81.2, 1.0, 0.6, 12.4],
+ [6.8, 11.2, -1.7, -2.9],
+ [11.2, 12.9, 4.3, -1.4],
+ ]
+ )
+ model = KBinsDiscretizer(
+ n_bins=3, encode="onehot-dense", strategy="kmeans"
+ ).fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn KBinsDiscretiser",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx, verbose=0,
- basename="SklearnKBinsDiscretiserOneHotDenseKMeans")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ verbose=0,
+ basename="SklearnKBinsDiscretiserOneHotDenseKMeans",
+ )
@unittest.skipIf(
KBinsDiscretizer is None,
@@ -167,65 +214,85 @@ def test_model_k_bins_discretiser_onehot_dense_kmeans(self):
)
def test_model_k_bins_discretiser_ordinal_uniform_int(self):
X = np.array([[1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9]])
- model = KBinsDiscretizer(n_bins=3,
- encode="ordinal",
- strategy="uniform").fit(X)
+ model = KBinsDiscretizer(n_bins=3, encode="ordinal", strategy="uniform").fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn KBinsDiscretiser",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.int64), model, model_onnx,
- basename="SklearnKBinsDiscretiserOrdinalUniformInt")
+ X.astype(np.int64),
+ model,
+ model_onnx,
+ basename="SklearnKBinsDiscretiserOrdinalUniformInt",
+ )
@unittest.skipIf(
KBinsDiscretizer is None,
reason="KBinsDiscretizer available since 0.20",
)
def test_model_k_bins_discretiser_ordinal_quantile_int(self):
- X = np.array([
- [1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9],
- [-1, 0, 1, -16], [31, -5, 15, 10], [12, -2, 8, -19],
- [12, 13, 31, -16], [0, -21, 15, 30], [10, 22, 71, -91]
- ])
- model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
- encode="ordinal",
- strategy="quantile").fit(X)
+ X = np.array(
+ [
+ [1, 3, 3, -6],
+ [3, -2, 5, 0],
+ [0, 2, 7, -9],
+ [-1, 0, 1, -16],
+ [31, -5, 15, 10],
+ [12, -2, 8, -19],
+ [12, 13, 31, -16],
+ [0, -21, 15, 30],
+ [10, 22, 71, -91],
+ ]
+ )
+ model = KBinsDiscretizer(
+ n_bins=[3, 2, 3, 4], encode="ordinal", strategy="quantile"
+ ).fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn KBinsDiscretiser",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.int64), model, model_onnx,
- basename="SklearnKBinsDiscretiserOrdinalQuantileInt")
+ X.astype(np.int64),
+ model,
+ model_onnx,
+ basename="SklearnKBinsDiscretiserOrdinalQuantileInt",
+ )
@unittest.skipIf(
KBinsDiscretizer is None,
reason="KBinsDiscretizer available since 0.20",
)
def test_model_k_bins_discretiser_ordinal_kmeans_int(self):
- X = np.array([
- [1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9],
- [-1, 0, 1, -16], [31, -5, 15, 10], [12, -2, 8, -19]
- ])
- model = KBinsDiscretizer(n_bins=3, encode="ordinal",
- strategy="kmeans").fit(X)
+ X = np.array(
+ [
+ [1, 3, 3, -6],
+ [3, -2, 5, 0],
+ [0, 2, 7, -9],
+ [-1, 0, 1, -16],
+ [31, -5, 15, 10],
+ [12, -2, 8, -19],
+ ]
+ )
+ model = KBinsDiscretizer(n_bins=3, encode="ordinal", strategy="kmeans").fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn KBinsDiscretiser",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.int64), model, model_onnx,
- basename="SklearnKBinsDiscretiserOrdinalKMeansInt")
+ X.astype(np.int64),
+ model,
+ model_onnx,
+ basename="SklearnKBinsDiscretiserOrdinalKMeansInt",
+ )
@unittest.skipIf(
KBinsDiscretizer is None,
@@ -233,19 +300,22 @@ def test_model_k_bins_discretiser_ordinal_kmeans_int(self):
)
def test_model_k_bins_discretiser_onehot_dense_uniform_int(self):
X = np.array([[1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9]])
- model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
- encode="onehot-dense",
- strategy="uniform").fit(X)
+ model = KBinsDiscretizer(
+ n_bins=[3, 2, 3, 4], encode="onehot-dense", strategy="uniform"
+ ).fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn KBinsDiscretiser",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.int64), model, model_onnx,
- basename="SklearnKBinsDiscretiserOneHotDenseUniformInt")
+ X.astype(np.int64),
+ model,
+ model_onnx,
+ basename="SklearnKBinsDiscretiserOneHotDenseUniformInt",
+ )
@unittest.skipIf(
KBinsDiscretizer is None,
@@ -253,43 +323,57 @@ def test_model_k_bins_discretiser_onehot_dense_uniform_int(self):
)
def test_model_k_bins_discretiser_onehot_dense_quantile_int(self):
X = np.array([[1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9]])
- model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
- encode="onehot-dense",
- strategy="quantile").fit(X)
+ model = KBinsDiscretizer(
+ n_bins=[3, 2, 3, 4], encode="onehot-dense", strategy="quantile"
+ ).fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn KBinsDiscretiser",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.int64), model, model_onnx,
- basename="SklearnKBinsDiscretiserOneHotDenseQuantileInt")
+ X.astype(np.int64),
+ model,
+ model_onnx,
+ basename="SklearnKBinsDiscretiserOneHotDenseQuantileInt",
+ )
@unittest.skipIf(
KBinsDiscretizer is None,
reason="KBinsDiscretizer available since 0.20",
)
def test_model_k_bins_discretiser_onehot_dense_kmeans_int(self):
- X = np.array([
- [1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9],
- [-1, 12, 32, -16], [31, -20, 51, 7], [10, 23, 73, -90],
- [1, 23, 36, -61], [93, -12, 15, 10], [20, 12, 17, -19]
- ])
- model = KBinsDiscretizer(n_bins=3,
- encode="onehot-dense",
- strategy="kmeans").fit(X)
+ X = np.array(
+ [
+ [1, 3, 3, -6],
+ [3, -2, 5, 0],
+ [0, 2, 7, -9],
+ [-1, 12, 32, -16],
+ [31, -20, 51, 7],
+ [10, 23, 73, -90],
+ [1, 23, 36, -61],
+ [93, -12, 15, 10],
+ [20, 12, 17, -19],
+ ]
+ )
+ model = KBinsDiscretizer(
+ n_bins=3, encode="onehot-dense", strategy="kmeans"
+ ).fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn KBinsDiscretiser",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.int64), model, model_onnx,
- basename="SklearnKBinsDiscretiserOneHotDenseKMeansInt")
+ X.astype(np.int64),
+ model,
+ model_onnx,
+ basename="SklearnKBinsDiscretiserOneHotDenseKMeansInt",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_k_means_converter.py b/tests/test_sklearn_k_means_converter.py
index 6e9466bdd..41651b8db 100644
--- a/tests/test_sklearn_k_means_converter.py
+++ b/tests/test_sklearn_k_means_converter.py
@@ -16,43 +16,54 @@ def test_kmeans_clustering(self):
X = data.data
model = KMeans(n_clusters=3, n_init=3)
model.fit(X)
- model_onnx = convert_sklearn(model, "kmeans",
- [("input", FloatTensorType([None, 4]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "kmeans",
+ [("input", FloatTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32)[40:60],
- model, model_onnx,
- basename="SklearnKMeans-Dec4")
+ model,
+ model_onnx,
+ basename="SklearnKMeans-Dec4",
+ )
def test_kmeans_clustering_noshape(self):
data = load_iris()
X = data.data
model = KMeans(n_clusters=3, n_init=3)
model.fit(X)
- model_onnx = convert_sklearn(model, "kmeans",
- [("input", FloatTensorType([]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model, "kmeans", [("input", FloatTensorType([]))], target_opset=TARGET_OPSET
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32)[40:60],
- model, model_onnx,
- basename="SklearnKMeans-Dec4")
+ model,
+ model_onnx,
+ basename="SklearnKMeans-Dec4",
+ )
def test_batchkmeans_clustering(self):
data = load_iris()
X = data.data
model = MiniBatchKMeans(n_clusters=3, n_init=3)
model.fit(X)
- model_onnx = convert_sklearn(model, "kmeans",
- [("input", FloatTensorType([None, 4]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "kmeans",
+ [("input", FloatTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32)[40:60],
model,
model_onnx,
- basename="SklearnKMeans-Dec4")
+ basename="SklearnKMeans-Dec4",
+ )
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
def test_batchkmeans_clustering_opset9(self):
@@ -60,15 +71,16 @@ def test_batchkmeans_clustering_opset9(self):
X = data.data
model = MiniBatchKMeans(n_clusters=3, n_init=3)
model.fit(X)
- model_onnx = convert_sklearn(model, "kmeans",
- [("input", FloatTensorType([None, 4]))],
- target_opset=9)
+ model_onnx = convert_sklearn(
+ model, "kmeans", [("input", FloatTensorType([None, 4]))], target_opset=9
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32)[40:60],
model,
model_onnx,
- basename="SklearnKMeansOp9-Dec4")
+ basename="SklearnKMeansOp9-Dec4",
+ )
@unittest.skipIf(TARGET_OPSET < 11, reason="not available")
def test_batchkmeans_clustering_opset11(self):
@@ -76,15 +88,16 @@ def test_batchkmeans_clustering_opset11(self):
X = data.data
model = MiniBatchKMeans(n_clusters=3, n_init=3)
model.fit(X)
- model_onnx = convert_sklearn(model, "kmeans",
- [("input", FloatTensorType([None, 4]))],
- target_opset=11)
+ model_onnx = convert_sklearn(
+ model, "kmeans", [("input", FloatTensorType([None, 4]))], target_opset=11
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32)[40:60],
model,
model_onnx,
- basename="SklearnKMeansOp9-Dec4")
+ basename="SklearnKMeansOp9-Dec4",
+ )
def test_batchkmeans_clustering_opset1(self):
data = load_iris()
@@ -92,9 +105,9 @@ def test_batchkmeans_clustering_opset1(self):
model = MiniBatchKMeans(n_clusters=3, n_init=3)
model.fit(X)
try:
- convert_sklearn(model, "kmeans",
- [("input", FloatTensorType([None, 4]))],
- target_opset=1)
+ convert_sklearn(
+ model, "kmeans", [("input", FloatTensorType([None, 4]))], target_opset=1
+ )
except RuntimeError as e:
assert "Node 'OnnxAdd' has been changed since version" in str(e)
@@ -103,32 +116,38 @@ def test_kmeans_clustering_int(self):
X = data.data
model = KMeans(n_clusters=4, n_init=3)
model.fit(X)
- model_onnx = convert_sklearn(model, "kmeans",
- [("input", Int64TensorType([None,
- X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "kmeans",
+ [("input", Int64TensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.int64)[40:60],
model,
model_onnx,
- basename="SklearnKMeansInt-Dec4")
+ basename="SklearnKMeansInt-Dec4",
+ )
def test_batchkmeans_clustering_int(self):
data = load_digits()
X = data.data
model = MiniBatchKMeans(n_clusters=4, n_init=3)
model.fit(X)
- model_onnx = convert_sklearn(model, "kmeans",
- [("input", Int64TensorType([None,
- X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "kmeans",
+ [("input", Int64TensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.int64)[40:60],
model,
model_onnx,
- basename="SklearnBatchKMeansInt-Dec4")
+ basename="SklearnBatchKMeansInt-Dec4",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_kernel_pca_converter.py b/tests/test_sklearn_kernel_pca_converter.py
index ed64b05e2..c9b33df9f 100644
--- a/tests/test_sklearn_kernel_pca_converter.py
+++ b/tests/test_sklearn_kernel_pca_converter.py
@@ -8,6 +8,7 @@
from sklearn.datasets import load_diabetes
from sklearn.decomposition import KernelPCA
from sklearn.model_selection import train_test_split
+
try:
# scikit-learn >= 0.22
from sklearn.utils._testing import ignore_warnings
@@ -18,94 +19,91 @@
from test_utils import dump_data_and_model, TARGET_OPSET
-ort_version = ".".join(ort_version.split('.')[:2])
+ort_version = ".".join(ort_version.split(".")[:2])
class TestSklearnKernelPCAConverter(unittest.TestCase):
-
def _fit_model(self, model, dtype=np.float32):
data = load_diabetes()
X_train, X_test, *_ = train_test_split(
- data.data, data.target, test_size=0.2, random_state=42)
+ data.data, data.target, test_size=0.2, random_state=42
+ )
model.fit(X_train)
return model, X_test.astype(np.float32)
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="all needed operators not available")
- @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.3.0'),
- reason="discrepancies")
+ @unittest.skipIf(TARGET_OPSET < 11, reason="all needed operators not available")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.3.0"), reason="discrepancies"
+ )
@ignore_warnings(category=(FutureWarning, DeprecationWarning))
def test_kernel_pca_default_float(self):
- model, X_test = self._fit_model(
- KernelPCA(random_state=42))
+ model, X_test = self._fit_model(KernelPCA(random_state=42))
model_onnx = to_onnx(model, X_test, target_opset=TARGET_OPSET)
- dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnKernelPCA32")
+ dump_data_and_model(X_test, model, model_onnx, basename="SklearnKernelPCA32")
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="all needed operators not available")
- @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.3.0'),
- reason="discrepancies")
+ @unittest.skipIf(TARGET_OPSET < 11, reason="all needed operators not available")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.3.0"), reason="discrepancies"
+ )
@ignore_warnings(category=(FutureWarning, DeprecationWarning))
def test_kernel_pca_default_double(self):
model, X_test = self._fit_model(
- KernelPCA(random_state=42, n_components=2), dtype=np.float64)
+ KernelPCA(random_state=42, n_components=2), dtype=np.float64
+ )
model_onnx = to_onnx(model, X_test, target_opset=TARGET_OPSET)
- dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnKernelPCA64")
+ dump_data_and_model(X_test, model, model_onnx, basename="SklearnKernelPCA64")
- @unittest.skipIf(TARGET_OPSET < 13,
- reason="all needed operators not available")
- @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.3.0'),
- reason="discrepancies")
+ @unittest.skipIf(TARGET_OPSET < 13, reason="all needed operators not available")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.3.0"), reason="discrepancies"
+ )
@ignore_warnings(category=(FutureWarning, DeprecationWarning))
def test_kernel_pca_float(self):
- for kernel in ['rbf', 'cosine', 'sigmoid', 'poly', 'linear']:
+ for kernel in ["rbf", "cosine", "sigmoid", "poly", "linear"]:
with self.subTest(kernel=kernel):
model, X_test = self._fit_model(
- KernelPCA(random_state=42, kernel=kernel,
- n_components=4))
+ KernelPCA(random_state=42, kernel=kernel, n_components=4)
+ )
model_onnx = to_onnx(model, X_test, target_opset=TARGET_OPSET)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnKernelPCA%s32" % kernel)
+ X_test, model, model_onnx, basename="SklearnKernelPCA%s32" % kernel
+ )
- @unittest.skipIf(TARGET_OPSET < 13,
- reason="all needed operators not available")
- @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.3.0'),
- reason="discrepancies")
+ @unittest.skipIf(TARGET_OPSET < 13, reason="all needed operators not available")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.3.0"), reason="discrepancies"
+ )
@ignore_warnings(category=(FutureWarning, DeprecationWarning))
def test_kernel_pca_double(self):
- for kernel in ['linear', 'poly', 'rbf', 'sigmoid', 'cosine']:
+ for kernel in ["linear", "poly", "rbf", "sigmoid", "cosine"]:
with self.subTest(kernel=kernel):
model, X_test = self._fit_model(
- KernelPCA(random_state=42, kernel=kernel,
- n_components=4),
- dtype=np.float64)
+ KernelPCA(random_state=42, kernel=kernel, n_components=4),
+ dtype=np.float64,
+ )
model_onnx = to_onnx(model, X_test, target_opset=TARGET_OPSET)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnKernelPCA%s64" % kernel)
+ X_test, model, model_onnx, basename="SklearnKernelPCA%s64" % kernel
+ )
- @unittest.skipIf(TARGET_OPSET < 13,
- reason="all needed operators not available")
- @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.3.0'),
- reason="discrepancies")
+ @unittest.skipIf(TARGET_OPSET < 13, reason="all needed operators not available")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.3.0"), reason="discrepancies"
+ )
@ignore_warnings(category=(FutureWarning, DeprecationWarning))
def test_kernel_pca_double_cdist(self):
- for kernel in ['linear', 'poly', 'rbf', 'sigmoid', 'cosine']:
+ for kernel in ["linear", "poly", "rbf", "sigmoid", "cosine"]:
with self.subTest(kernel=kernel):
model, X_test = self._fit_model(
- KernelPCA(random_state=42, kernel=kernel,
- n_components=4),
- dtype=np.float64)
- model_onnx = to_onnx(model, X_test, target_opset=TARGET_OPSET,
- options={'optim': 'cdist'})
+ KernelPCA(random_state=42, kernel=kernel, n_components=4),
+ dtype=np.float64,
+ )
+ model_onnx = to_onnx(
+ model, X_test, target_opset=TARGET_OPSET, options={"optim": "cdist"}
+ )
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnKernelPCA%s64" % kernel)
+ X_test, model, model_onnx, basename="SklearnKernelPCA%s64" % kernel
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_label_binariser_converter.py b/tests/test_sklearn_label_binariser_converter.py
index cd329f2d6..e08914199 100644
--- a/tests/test_sklearn_label_binariser_converter.py
+++ b/tests/test_sklearn_label_binariser_converter.py
@@ -22,14 +22,15 @@ def test_model_label_binariser_default(self):
model,
"scikit-learn label binariser",
[("input", Int64TensorType([None]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.int64),
model,
model_onnx,
- basename="SklearnLabelBinariserDefault")
+ basename="SklearnLabelBinariserDefault",
+ )
def test_model_label_binariser_neg_label(self):
X = np.array([1, 2, 6, 4, 2])
@@ -38,14 +39,15 @@ def test_model_label_binariser_neg_label(self):
model,
"scikit-learn label binariser",
[("input", Int64TensorType([None]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.int64),
model,
model_onnx,
- basename="SklearnLabelBinariserNegLabel")
+ basename="SklearnLabelBinariserNegLabel",
+ )
def test_model_label_binariser_pos_label(self):
X = np.array([1, 2, 6, 4, 2])
@@ -54,14 +56,15 @@ def test_model_label_binariser_pos_label(self):
model,
"scikit-learn label binariser",
[("input", Int64TensorType([None]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.int64),
model,
model_onnx,
- basename="SklearnLabelBinariserPosLabel")
+ basename="SklearnLabelBinariserPosLabel",
+ )
def test_model_label_binariser_neg_pos_label(self):
X = np.array([1, 2, 6, 4, 2])
@@ -70,14 +73,15 @@ def test_model_label_binariser_neg_pos_label(self):
model,
"scikit-learn label binariser",
[("input", Int64TensorType([None]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.int64),
model,
model_onnx,
- basename="SklearnLabelBinariserNegPosLabel")
+ basename="SklearnLabelBinariserNegPosLabel",
+ )
def test_model_label_binariser_binary_labels(self):
X = np.array([1, 0, 0, 0, 1])
@@ -86,27 +90,30 @@ def test_model_label_binariser_binary_labels(self):
model,
"scikit-learn label binariser",
[("input", Int64TensorType([None]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.int64),
model,
model_onnx,
- basename="SklearnLabelBinariserBinaryLabels")
+ basename="SklearnLabelBinariserBinaryLabels",
+ )
def test_model_label_binariser_2d(self):
X1 = np.array([[0, 1, 1], [1, 0, 0]], dtype=np.int64)
model = LabelBinarizer().fit(X1)
onnx_fs = convert_sklearn(
- model, 'lb',
- [('float_input', Int64TensorType([None, X1.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "lb",
+ [("float_input", Int64TensorType([None, X1.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- onnx_fs.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onnx_fs.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
- res = sess.run(None, input_feed={'float_input': X1})
+ res = sess.run(None, input_feed={"float_input": X1})
exp = model.transform(X1)
got = res[0]
assert_almost_equal(exp, got)
diff --git a/tests/test_sklearn_label_encoder_converter.py b/tests/test_sklearn_label_encoder_converter.py
index ebedef95a..9abe444bd 100644
--- a/tests/test_sklearn_label_encoder_converter.py
+++ b/tests/test_sklearn_label_encoder_converter.py
@@ -16,14 +16,13 @@
from test_utils import dump_data_and_model, TARGET_OPSET
-ort_version = ".".join(ort_version.split('.')[:2])
+ort_version = ".".join(ort_version.split(".")[:2])
class TestSklearnLabelEncoderConverter(unittest.TestCase):
-
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.3.0"),
- reason="onnxruntime too old")
+ pv.Version(ort_version) < pv.Version("0.3.0"), reason="onnxruntime too old"
+ )
def test_model_label_encoder(self):
model = LabelEncoder()
data = ["str3", "str2", "str0", "str1", "str3"]
@@ -32,21 +31,19 @@ def test_model_label_encoder(self):
model,
"scikit-learn label encoder",
[("input", StringTensorType([None]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
self.assertTrue(model_onnx.graph.node is not None)
if model_onnx.ir_version >= 7 and TARGET_OPSET < 12:
raise AssertionError("Incompatbilities")
dump_data_and_model(
- np.array(data),
- model,
- model_onnx,
- basename="SklearnLabelEncoder")
+ np.array(data), model, model_onnx, basename="SklearnLabelEncoder"
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.3.0"),
- reason="onnxruntime too old")
+ pv.Version(ort_version) < pv.Version("0.3.0"), reason="onnxruntime too old"
+ )
def test_model_label_encoder_float(self):
model = LabelEncoder()
data = np.array([1.2, 3.4, 5.4, 1.2], dtype=np.float32)
@@ -55,22 +52,20 @@ def test_model_label_encoder_float(self):
model,
"scikit-learn label encoder",
[("input", FloatTensorType([None]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
self.assertTrue(model_onnx.graph.node is not None)
if model_onnx.ir_version >= 7 and TARGET_OPSET < 12:
raise AssertionError("Incompatbilities")
dump_data_and_model(
- data,
- model,
- model_onnx,
- basename="SklearnLabelEncoderFloat")
+ data, model, model_onnx, basename="SklearnLabelEncoderFloat"
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.3.0"),
- reason="onnxruntime too old")
- @unittest.skipIf(TARGET_OPSET < 12, reason='not available')
+ pv.Version(ort_version) < pv.Version("0.3.0"), reason="onnxruntime too old"
+ )
+ @unittest.skipIf(TARGET_OPSET < 12, reason="not available")
def test_model_label_encoder_int(self):
model = LabelEncoder()
data = np.array([10, 3, 5, -34, 0], dtype=np.int64)
@@ -83,16 +78,15 @@ def test_model_label_encoder_int(self):
model,
"scikit-learn label encoder",
[("input", Int64TensorType([None]))],
- target_opset=op)
+ target_opset=op,
+ )
self.assertTrue(model_onnx is not None)
self.assertTrue(model_onnx.graph.node is not None)
if model_onnx.ir_version >= 7 and TARGET_OPSET < 12:
raise AssertionError("Incompatbilities")
dump_data_and_model(
- data,
- model,
- model_onnx,
- basename="SklearnLabelEncoderInt")
+ data, model, model_onnx, basename="SklearnLabelEncoderInt"
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_local_outlier_factor.py b/tests/test_sklearn_local_outlier_factor.py
index 8ed3edb40..f833a3bb2 100644
--- a/tests/test_sklearn_local_outlier_factor.py
+++ b/tests/test_sklearn_local_outlier_factor.py
@@ -9,6 +9,7 @@
from numpy.testing import assert_almost_equal
from onnxruntime import __version__ as ort_version
from onnxruntime import InferenceSession
+
try:
from onnxruntime.capi.onnxruntime_pybind11_state import InvalidGraph
except ImportError:
@@ -19,35 +20,36 @@
LocalOutlierFactor = None
from skl2onnx import to_onnx
from test_utils import TARGET_OPSET
+
try:
from onnxruntime.capi.onnxruntime_pybind11_state import NotImplemented
except ImportError:
NotImplemented = RuntimeError
-ort_version = ".".join(ort_version.split('.')[:2])
+ort_version = ".".join(ort_version.split(".")[:2])
class TestSklearnLocalOutlierForest(unittest.TestCase):
-
@unittest.skipIf(LocalOutlierFactor is None, reason="old scikit-learn")
def test_local_outlier_factor(self):
lof = LocalOutlierFactor(n_neighbors=2, novelty=True)
- data = np.array([[-1.1, -1.2], [0.3, 0.2],
- [0.5, 0.4], [100., 99.]], dtype=np.float32)
+ data = np.array(
+ [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]], dtype=np.float32
+ )
model = lof.fit(data)
model_onnx = to_onnx(model, data, target_opset=TARGET_OPSET)
- self.assertNotIn('CDist', str(model_onnx))
+ self.assertNotIn("CDist", str(model_onnx))
data = data.copy()
data[:, 0] += 0.1
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
names = [o.name for o in sess.get_outputs()]
- self.assertEqual(names, ['label', 'scores'])
- got = sess.run(None, {'X': data})
+ self.assertEqual(names, ["label", "scores"])
+ got = sess.run(None, {"X": data})
self.assertEqual(len(got), 2)
expected_label = lof.predict(data)
expected_decif = lof.decision_function(data)
@@ -57,21 +59,22 @@ def test_local_outlier_factor(self):
@unittest.skipIf(LocalOutlierFactor is None, reason="old scikit-learn")
def test_local_outlier_factor_n_neighbors_greater_than_observations(self):
lof = LocalOutlierFactor(n_neighbors=25, novelty=True)
- data = np.array([[-1.1, -1.2], [0.3, 0.2],
- [0.5, 0.4], [100., 99.]], dtype=np.float32)
+ data = np.array(
+ [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]], dtype=np.float32
+ )
model = lof.fit(data)
model_onnx = to_onnx(model, data, target_opset=TARGET_OPSET)
- self.assertNotIn('CDist', str(model_onnx))
+ self.assertNotIn("CDist", str(model_onnx))
data = data.copy()
data[:, 0] += 0.1
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
names = [o.name for o in sess.get_outputs()]
- self.assertEqual(names, ['label', 'scores'])
- got = sess.run(None, {'X': data})
+ self.assertEqual(names, ["label", "scores"])
+ got = sess.run(None, {"X": data})
self.assertEqual(len(got), 2)
expected_label = lof.predict(data)
expected_decif = lof.decision_function(data)
@@ -79,26 +82,27 @@ def test_local_outlier_factor_n_neighbors_greater_than_observations(self):
assert_almost_equal(expected_decif, got[1].ravel(), decimal=5)
@unittest.skipIf(LocalOutlierFactor is None, reason="old scikit-learn")
- @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.5.0"),
- reason="CDist")
+ @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.5.0"), reason="CDist")
def test_local_outlier_factor_cdist(self):
lof = LocalOutlierFactor(n_neighbors=2, novelty=True)
- data = np.array([[-1.1, -1.2], [0.3, 0.2],
- [0.5, 0.4], [100., 99.]], dtype=np.float32)
+ data = np.array(
+ [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]], dtype=np.float32
+ )
model = lof.fit(data)
- model_onnx = to_onnx(model, data, target_opset=TARGET_OPSET,
- options={'optim': 'cdist'})
- self.assertIn('CDist', str(model_onnx))
+ model_onnx = to_onnx(
+ model, data, target_opset=TARGET_OPSET, options={"optim": "cdist"}
+ )
+ self.assertIn("CDist", str(model_onnx))
data = data.copy()
data[:, 0] += 0.1
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
names = [o.name for o in sess.get_outputs()]
- self.assertEqual(names, ['label', 'scores'])
- got = sess.run(None, {'X': data})
+ self.assertEqual(names, ["label", "scores"])
+ got = sess.run(None, {"X": data})
self.assertEqual(len(got), 2)
expected_label = lof.predict(data)
expected_decif = lof.decision_function(data)
@@ -106,25 +110,25 @@ def test_local_outlier_factor_cdist(self):
assert_almost_equal(expected_decif, got[1].ravel())
@unittest.skipIf(LocalOutlierFactor is None, reason="old scikit-learn")
- @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.5.0"),
- reason="CDist")
+ @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.5.0"), reason="CDist")
def test_local_outlier_factor_p3(self):
lof = LocalOutlierFactor(n_neighbors=2, novelty=True, p=3)
- data = np.array([[-1.1, -1.2], [0.3, 0.2],
- [0.5, 0.4], [100., 99.]], dtype=np.float32)
+ data = np.array(
+ [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]], dtype=np.float32
+ )
model = lof.fit(data)
model_onnx = to_onnx(model, data, target_opset=TARGET_OPSET)
- self.assertNotIn('CDist', str(model_onnx))
+ self.assertNotIn("CDist", str(model_onnx))
data = data.copy()
data[:, 0] += 0.1
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
names = [o.name for o in sess.get_outputs()]
- self.assertEqual(names, ['label', 'scores'])
- got = sess.run(None, {'X': data})
+ self.assertEqual(names, ["label", "scores"])
+ got = sess.run(None, {"X": data})
self.assertEqual(len(got), 2)
expected_label = lof.predict(data)
expected_decif = lof.decision_function(data)
@@ -132,32 +136,33 @@ def test_local_outlier_factor_p3(self):
assert_almost_equal(expected_decif, got[1].ravel(), decimal=5)
@unittest.skipIf(LocalOutlierFactor is None, reason="old scikit-learn")
- @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.5.0"),
- reason="CDist")
+ @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.5.0"), reason="CDist")
def test_local_outlier_factor_cdist_p3(self):
lof = LocalOutlierFactor(n_neighbors=2, novelty=True, p=3)
- data = np.array([[-1.1, -1.2], [0.3, 0.2],
- [0.5, 0.4], [100., 99.]], dtype=np.float32)
+ data = np.array(
+ [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]], dtype=np.float32
+ )
model = lof.fit(data)
- model_onnx = to_onnx(model, data, target_opset=TARGET_OPSET,
- options={'optim': 'cdist'})
- self.assertIn('CDist', str(model_onnx))
+ model_onnx = to_onnx(
+ model, data, target_opset=TARGET_OPSET, options={"optim": "cdist"}
+ )
+ self.assertIn("CDist", str(model_onnx))
data = data.copy()
data[:, 0] += 0.1
try:
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except InvalidGraph as e:
if "Unrecognized attribute: p for operator CDist" in str(e):
return
raise e
names = [o.name for o in sess.get_outputs()]
- self.assertEqual(names, ['label', 'scores'])
- got = sess.run(None, {'X': data})
+ self.assertEqual(names, ["label", "scores"])
+ got = sess.run(None, {"X": data})
self.assertEqual(len(got), 2)
expected_label = lof.predict(data)
expected_decif = lof.decision_function(data)
@@ -166,12 +171,13 @@ def test_local_outlier_factor_cdist_p3(self):
@unittest.skipIf(LocalOutlierFactor is None, reason="old scikit-learn")
def test_local_outlier_factor_metric(self):
- for metric in ['cityblock', 'euclidean', 'manhattan', 'sqeuclidean']:
+ for metric in ["cityblock", "euclidean", "manhattan", "sqeuclidean"]:
with self.subTest(metric=metric):
- lof = LocalOutlierFactor(n_neighbors=2, novelty=True,
- metric=metric)
- data = np.array([[-1.1, -1.2], [0.3, 0.2],
- [0.5, 0.4], [100., 99.]], dtype=np.float32)
+ lof = LocalOutlierFactor(n_neighbors=2, novelty=True, metric=metric)
+ data = np.array(
+ [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]],
+ dtype=np.float32,
+ )
model = lof.fit(data)
model_onnx = to_onnx(model, data, target_opset=TARGET_OPSET)
@@ -179,11 +185,11 @@ def test_local_outlier_factor_metric(self):
data[:, 0] += 0.1
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
names = [o.name for o in sess.get_outputs()]
- self.assertEqual(names, ['label', 'scores'])
- got = sess.run(None, {'X': data})
+ self.assertEqual(names, ["label", "scores"])
+ got = sess.run(None, {"X": data})
self.assertEqual(len(got), 2)
expected_label = lof.predict(data)
expected_decif = lof.decision_function(data)
@@ -191,28 +197,29 @@ def test_local_outlier_factor_metric(self):
assert_almost_equal(expected_decif, got[1].ravel(), decimal=4)
@unittest.skipIf(LocalOutlierFactor is None, reason="old scikit-learn")
- @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.5.0"),
- reason="CDist")
+ @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.5.0"), reason="CDist")
def test_local_outlier_factor_metric_cdist(self):
- for metric in ['euclidean', 'sqeuclidean']:
+ for metric in ["euclidean", "sqeuclidean"]:
with self.subTest(metric=metric):
- lof = LocalOutlierFactor(n_neighbors=2, novelty=True,
- metric=metric)
- data = np.array([[-1.1, -1.2], [0.3, 0.2],
- [0.5, 0.4], [100., 99.]], dtype=np.float32)
+ lof = LocalOutlierFactor(n_neighbors=2, novelty=True, metric=metric)
+ data = np.array(
+ [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]],
+ dtype=np.float32,
+ )
model = lof.fit(data)
- model_onnx = to_onnx(model, data, target_opset=TARGET_OPSET,
- options={'optim': 'cdist'})
+ model_onnx = to_onnx(
+ model, data, target_opset=TARGET_OPSET, options={"optim": "cdist"}
+ )
data = data.copy()
data[:, 0] += 0.1
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
names = [o.name for o in sess.get_outputs()]
- self.assertEqual(names, ['label', 'scores'])
- got = sess.run(None, {'X': data})
+ self.assertEqual(names, ["label", "scores"])
+ got = sess.run(None, {"X": data})
self.assertEqual(len(got), 2)
expected_label = lof.predict(data)
expected_decif = lof.decision_function(data)
@@ -221,21 +228,21 @@ def test_local_outlier_factor_metric_cdist(self):
@unittest.skipIf(LocalOutlierFactor is None, reason="old scikit-learn")
@unittest.skipIf(TARGET_OPSET < 13, reason="TopK")
- @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.7.0"),
- reason="TopK")
+ @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.7.0"), reason="TopK")
def test_local_outlier_factor_double(self):
lof = LocalOutlierFactor(n_neighbors=2, novelty=True)
- data = np.array([[-1.1, -1.2], [0.3, 0.2],
- [0.5, 0.4], [100., 99.]], dtype=np.float64)
+ data = np.array(
+ [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]], dtype=np.float64
+ )
model = lof.fit(data)
model_onnx = to_onnx(model, data, target_opset=TARGET_OPSET)
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
names = [o.name for o in sess.get_outputs()]
- self.assertEqual(names, ['label', 'scores'])
- got = sess.run(None, {'X': data})
+ self.assertEqual(names, ["label", "scores"])
+ got = sess.run(None, {"X": data})
self.assertEqual(len(got), 2)
expected_label = lof.predict(data)
expected_decif = lof.decision_function(data)
@@ -245,17 +252,19 @@ def test_local_outlier_factor_double(self):
@unittest.skipIf(LocalOutlierFactor is None, reason="old scikit-learn")
def test_local_outlier_factor_score_samples(self):
lof = LocalOutlierFactor(n_neighbors=2, novelty=True)
- data = np.array([[-1.1, -1.2], [0.3, 0.2],
- [0.5, 0.4], [100., 99.]], dtype=np.float32)
+ data = np.array(
+ [[-1.1, -1.2], [0.3, 0.2], [0.5, 0.4], [100.0, 99.0]], dtype=np.float32
+ )
model = lof.fit(data)
- model_onnx = to_onnx(model, data, target_opset=TARGET_OPSET,
- options={'score_samples': True})
+ model_onnx = to_onnx(
+ model, data, target_opset=TARGET_OPSET, options={"score_samples": True}
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
names = [o.name for o in sess.get_outputs()]
- self.assertEqual(names, ['label', 'scores', 'score_samples'])
- got = sess.run(None, {'X': data})
+ self.assertEqual(names, ["label", "scores", "score_samples"])
+ got = sess.run(None, {"X": data})
self.assertEqual(len(got), 3)
expected_label = lof.predict(data)
expected_decif = lof.decision_function(data)
@@ -269,17 +278,17 @@ def test_local_outlier_factor_rnd(self):
lof = LocalOutlierFactor(n_neighbors=2, novelty=True)
rs = np.random.RandomState(0)
data = rs.randn(100, 4).astype(np.float32)
- data[-1, 2:] = 99.
- data[-2, :2] = -99.
+ data[-1, 2:] = 99.0
+ data[-2, :2] = -99.0
model = lof.fit(data)
model_onnx = to_onnx(model, data, target_opset=TARGET_OPSET)
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
names = [o.name for o in sess.get_outputs()]
- self.assertEqual(names, ['label', 'scores'])
- got = sess.run(None, {'X': data})
+ self.assertEqual(names, ["label", "scores"])
+ got = sess.run(None, {"X": data})
self.assertEqual(len(got), 2)
expected_label = lof.predict(data)
expected_decif = lof.decision_function(data)
@@ -287,5 +296,5 @@ def test_local_outlier_factor_rnd(self):
assert_almost_equal(expected_decif, got[1].ravel(), decimal=5)
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/tests/test_sklearn_mlp_converter.py b/tests/test_sklearn_mlp_converter.py
index 3555cf065..8aa18ed6d 100644
--- a/tests/test_sklearn_mlp_converter.py
+++ b/tests/test_sklearn_mlp_converter.py
@@ -10,15 +10,18 @@
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_multilabel_classification
+
try:
from sklearn.utils._testing import ignore_warnings
except ImportError:
try:
from sklearn.utils.testing import ignore_warnings
except ImportError:
+
def ignore_warnings(category=Warning):
return lambda x: x
+
from sklearn.exceptions import ConvergenceWarning
from onnxruntime import InferenceSession, __version__ as ort_version
from skl2onnx import convert_sklearn
@@ -32,275 +35,311 @@ def ignore_warnings(category=Warning):
fit_classification_model,
fit_multilabel_classification_model,
fit_regression_model,
- TARGET_OPSET
+ TARGET_OPSET,
)
-ort_version = ".".join(ort_version.split('.')[:2])
+ort_version = ".".join(ort_version.split(".")[:2])
class TestSklearnMLPConverters(unittest.TestCase):
@ignore_warnings(category=(ConvergenceWarning, FutureWarning))
def test_model_mlp_classifier_binary(self):
- model, X_test = fit_classification_model(
- MLPClassifier(random_state=42), 2)
+ model, X_test = fit_classification_model(MLPClassifier(random_state=42), 2)
model_onnx = convert_sklearn(
model,
"scikit-learn MLPClassifier",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnMLPClassifierBinary")
+ X_test, model, model_onnx, basename="SklearnMLPClassifierBinary"
+ )
@ignore_warnings(category=(ConvergenceWarning, FutureWarning))
def test_model_mlp_classifier_multiclass_default(self):
- model, X_test = fit_classification_model(
- MLPClassifier(random_state=42), 4)
+ model, X_test = fit_classification_model(MLPClassifier(random_state=42), 4)
model_onnx = convert_sklearn(
model,
"scikit-learn MLPClassifier",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnMLPClassifierMultiClass")
+ X_test, model, model_onnx, basename="SklearnMLPClassifierMultiClass"
+ )
@ignore_warnings(category=(ConvergenceWarning, FutureWarning))
def test_model_mlp_classifier_multiclass_default_uint8(self):
model, X_test = fit_classification_model(
- MLPClassifier(random_state=42), 4, cls_dtype=np.uint8)
+ MLPClassifier(random_state=42), 4, cls_dtype=np.uint8
+ )
model_onnx = convert_sklearn(
model,
"scikit-learn MLPClassifier",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnMLPClassifierMultiClassU8")
+ X_test, model, model_onnx, basename="SklearnMLPClassifierMultiClassU8"
+ )
@ignore_warnings(category=(ConvergenceWarning, FutureWarning))
def test_model_mlp_classifier_multiclass_default_uint64(self):
model, X_test = fit_classification_model(
- MLPClassifier(random_state=42), 4, cls_dtype=np.uint64)
+ MLPClassifier(random_state=42), 4, cls_dtype=np.uint64
+ )
model_onnx = convert_sklearn(
model,
"scikit-learn MLPClassifier",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnMLPClassifierMultiClassU64")
+ X_test, model, model_onnx, basename="SklearnMLPClassifierMultiClassU64"
+ )
@ignore_warnings(category=(ConvergenceWarning, FutureWarning))
def test_model_mlp_classifier_multilabel_default(self):
model, X_test = fit_multilabel_classification_model(
- MLPClassifier(random_state=42))
+ MLPClassifier(random_state=42)
+ )
model_onnx = convert_sklearn(
model,
"scikit-learn MLPClassifier",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnMLPClassifierMultiLabel")
+ X_test, model, model_onnx, basename="SklearnMLPClassifierMultiLabel"
+ )
@ignore_warnings(category=(ConvergenceWarning, FutureWarning))
def test_model_mlp_regressor_default(self):
- model, X_test = fit_regression_model(
- MLPRegressor(random_state=42))
+ model, X_test = fit_regression_model(MLPRegressor(random_state=42))
model_onnx = convert_sklearn(
model,
"scikit-learn MLPRegressor",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnMLPRegressor-Dec4")
+ X_test, model, model_onnx, basename="SklearnMLPRegressor-Dec4"
+ )
@ignore_warnings(category=(ConvergenceWarning, FutureWarning))
def test_model_mlp_classifier_multiclass_identity(self):
model, X_test = fit_classification_model(
- MLPClassifier(random_state=42, activation="identity"), 3,
- is_int=True)
+ MLPClassifier(random_state=42, activation="identity"), 3, is_int=True
+ )
model_onnx = convert_sklearn(
model,
"scikit-learn MLPClassifier",
[("input", Int64TensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnMLPClassifierMultiClassIdentityActivation")
+ X_test,
+ model,
+ model_onnx,
+ basename="SklearnMLPClassifierMultiClassIdentityActivation",
+ )
@ignore_warnings(category=(ConvergenceWarning, FutureWarning))
def test_model_mlp_classifier_multilabel_identity(self):
model, X_test = fit_multilabel_classification_model(
- MLPClassifier(random_state=42, activation="identity"),
- is_int=True)
+ MLPClassifier(random_state=42, activation="identity"), is_int=True
+ )
model_onnx = convert_sklearn(
model,
"scikit-learn MLPClassifier",
[("input", Int64TensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnMLPClassifierMultiLabelIdentityActivation")
+ X_test,
+ model,
+ model_onnx,
+ basename="SklearnMLPClassifierMultiLabelIdentityActivation",
+ )
@ignore_warnings(category=(ConvergenceWarning, FutureWarning))
def test_model_mlp_regressor_identity(self):
model, X_test = fit_regression_model(
- MLPRegressor(random_state=42, activation="identity"), is_int=True)
+ MLPRegressor(random_state=42, activation="identity"), is_int=True
+ )
model_onnx = convert_sklearn(
model,
"scikit-learn MLPRegressor",
[("input", Int64TensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnMLPRegressorIdentityActivation-Dec4")
+ X_test,
+ model,
+ model_onnx,
+ basename="SklearnMLPRegressorIdentityActivation-Dec4",
+ )
@ignore_warnings(category=(ConvergenceWarning, FutureWarning))
def test_model_mlp_classifier_multiclass_logistic(self):
model, X_test = fit_classification_model(
- MLPClassifier(random_state=42, activation="logistic"), 5)
+ MLPClassifier(random_state=42, activation="logistic"), 5
+ )
model_onnx = convert_sklearn(
model,
"scikit-learn MLPClassifier",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnMLPClassifierMultiClassLogisticActivation")
+ X_test,
+ model,
+ model_onnx,
+ basename="SklearnMLPClassifierMultiClassLogisticActivation",
+ )
@ignore_warnings(category=(ConvergenceWarning, FutureWarning))
def test_model_mlp_classifier_multilabel_logistic(self):
model, X_test = fit_multilabel_classification_model(
- MLPClassifier(random_state=42, activation="logistic"), n_classes=4)
+ MLPClassifier(random_state=42, activation="logistic"), n_classes=4
+ )
model_onnx = convert_sklearn(
model,
"scikit-learn MLPClassifier",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnMLPClassifierMultiLabelLogisticActivation")
+ X_test,
+ model,
+ model_onnx,
+ basename="SklearnMLPClassifierMultiLabelLogisticActivation",
+ )
@ignore_warnings(category=(ConvergenceWarning, FutureWarning))
def test_model_mlp_regressor_logistic(self):
model, X_test = fit_regression_model(
- MLPRegressor(random_state=42, activation="logistic"))
+ MLPRegressor(random_state=42, activation="logistic")
+ )
model_onnx = convert_sklearn(
model,
"scikit-learn MLPRegressor",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnMLPRegressorLogisticActivation-Dec4")
+ X_test,
+ model,
+ model_onnx,
+ basename="SklearnMLPRegressorLogisticActivation-Dec4",
+ )
@ignore_warnings(category=(ConvergenceWarning, FutureWarning))
def test_model_mlp_classifier_multiclass_tanh(self):
model, X_test = fit_classification_model(
- MLPClassifier(random_state=42, activation="tanh"), 3)
+ MLPClassifier(random_state=42, activation="tanh"), 3
+ )
model_onnx = convert_sklearn(
model,
"scikit-learn MLPClassifier",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnMLPClassifierMultiClassTanhActivation")
+ X_test,
+ model,
+ model_onnx,
+ basename="SklearnMLPClassifierMultiClassTanhActivation",
+ )
@ignore_warnings(category=(ConvergenceWarning, FutureWarning))
def test_model_mlp_classifier_multilabel_tanh(self):
model, X_test = fit_multilabel_classification_model(
- MLPClassifier(random_state=42, activation="tanh"), n_labels=3)
+ MLPClassifier(random_state=42, activation="tanh"), n_labels=3
+ )
model_onnx = convert_sklearn(
model,
"scikit-learn MLPClassifier",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnMLPClassifierMultiLabelTanhActivation")
+ X_test,
+ model,
+ model_onnx,
+ basename="SklearnMLPClassifierMultiLabelTanhActivation",
+ )
@ignore_warnings(category=(ConvergenceWarning, FutureWarning))
def test_model_mlp_regressor_tanh(self):
model, X_test = fit_regression_model(
- MLPRegressor(random_state=42, activation="tanh"))
+ MLPRegressor(random_state=42, activation="tanh")
+ )
model_onnx = convert_sklearn(
model,
"scikit-learn MLPRegressor",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnMLPRegressorTanhActivation-Dec4")
+ X_test, model, model_onnx, basename="SklearnMLPRegressorTanhActivation-Dec4"
+ )
@ignore_warnings(category=(ConvergenceWarning, FutureWarning))
def test_model_mlp_regressor_bool(self):
model, X_test = fit_regression_model(
- MLPRegressor(random_state=42), is_bool=True)
+ MLPRegressor(random_state=42), is_bool=True
+ )
model_onnx = convert_sklearn(
model,
"scikit-learn MLPRegressor",
[("input", BooleanTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnMLPRegressorBool")
+ X_test, model, model_onnx, basename="SklearnMLPRegressorBool"
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version('1.0.0'),
- reason="onnxruntime %s" % '1.0.0')
+ pv.Version(ort_version) < pv.Version("1.0.0"), reason="onnxruntime %s" % "1.0.0"
+ )
@ignore_warnings(category=(ConvergenceWarning, FutureWarning))
def test_model_mlp_classifier_nozipmap(self):
X, y = make_multilabel_classification(n_labels=5, n_classes=10)
X = X.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(
- X, y, test_size=0.5, random_state=42)
+ X, y, test_size=0.5, random_state=42
+ )
model = MLPClassifier().fit(X_train, y_train)
- options = {id(model): {'zipmap': False}}
+ options = {id(model): {"zipmap": False}}
model_onnx = convert_sklearn(
- model, 'mlp',
- [('input', FloatTensorType([None, X_test.shape[1]]))],
- options=options, target_opset=TARGET_OPSET)
+ model,
+ "mlp",
+ [("input", FloatTensorType([None, X_test.shape[1]]))],
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, input_feed={'input': X_test})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, input_feed={"input": X_test})
assert_almost_equal(res[1], model.predict_proba(X_test), decimal=5)
assert_almost_equal(res[0], model.predict(X_test), decimal=5)
diff --git a/tests/test_sklearn_multi_output.py b/tests/test_sklearn_multi_output.py
index 3970d9ae8..8c65c1220 100644
--- a/tests/test_sklearn_multi_output.py
+++ b/tests/test_sklearn_multi_output.py
@@ -9,6 +9,7 @@
from sklearn.datasets import load_linnerud, make_multilabel_classification
from sklearn.multioutput import MultiOutputRegressor, MultiOutputClassifier
from sklearn.linear_model import Ridge, LogisticRegression
+
try:
from sklearn.utils._testing import ignore_warnings
except ImportError:
@@ -18,14 +19,13 @@
from test_utils import dump_data_and_model, TARGET_OPSET
-skl_ver = ".".join(skl_ver.split('.')[:2])
+skl_ver = ".".join(skl_ver.split(".")[:2])
class TestMultiOutputConverter(unittest.TestCase):
-
def setUp(self):
if __name__ == "__main__":
- log = getLogger('skl2onnx')
+ log = getLogger("skl2onnx")
log.disabled = True
# log.setLevel(logging.DEBUG)
# logging.basicConfig(level=logging.DEBUG)
@@ -34,28 +34,24 @@ def setUp(self):
def test_multi_output_regressor(self):
X, y = load_linnerud(return_X_y=True)
clf = MultiOutputRegressor(Ridge(random_state=123)).fit(X, y)
- onx = to_onnx(clf, X[:1].astype(numpy.float32),
- target_opset=TARGET_OPSET)
+ onx = to_onnx(clf, X[:1].astype(numpy.float32), target_opset=TARGET_OPSET)
dump_data_and_model(
- X.astype(numpy.float32), clf, onx,
- basename="SklearnMultiOutputRegressor")
+ X.astype(numpy.float32), clf, onx, basename="SklearnMultiOutputRegressor"
+ )
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="SequenceConstruct not available.")
- @ignore_warnings(category=(FutureWarning,
- DeprecationWarning))
+ @unittest.skipIf(TARGET_OPSET < 11, reason="SequenceConstruct not available.")
+ @ignore_warnings(category=(FutureWarning, DeprecationWarning))
def test_multi_output_classifier(self):
X, y = make_multilabel_classification(n_classes=3, random_state=0)
X = X.astype(numpy.float32)
clf = MultiOutputClassifier(LogisticRegression()).fit(X, y)
- onx = to_onnx(clf, X[:1], target_opset=TARGET_OPSET,
- options={'zipmap': False})
+ onx = to_onnx(clf, X[:1], target_opset=TARGET_OPSET, options={"zipmap": False})
self.assertNotIn("ZipMap", str(onx))
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': X})
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": X})
exp_lab = clf.predict(X)
exp_prb = clf.predict_proba(X)
assert_almost_equal(exp_lab, res[0])
@@ -64,14 +60,18 @@ def test_multi_output_classifier(self):
assert_almost_equal(e, g, decimal=5)
# check option nocl=True
- onx = to_onnx(clf, X[:1], target_opset=TARGET_OPSET,
- options={id(clf): {'nocl': True, 'zipmap': False}})
+ onx = to_onnx(
+ clf,
+ X[:1],
+ target_opset=TARGET_OPSET,
+ options={id(clf): {"nocl": True, "zipmap": False}},
+ )
self.assertNotIn("ZipMap", str(onx))
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': X})
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": X})
exp_lab = clf.predict(X)
exp_prb = clf.predict_proba(X)
assert_almost_equal(exp_lab, res[0])
@@ -80,14 +80,18 @@ def test_multi_output_classifier(self):
assert_almost_equal(e, g, decimal=5)
# check option nocl=False
- onx = to_onnx(clf, X[:1], target_opset=TARGET_OPSET,
- options={id(clf): {'nocl': False, 'zipmap': False}})
+ onx = to_onnx(
+ clf,
+ X[:1],
+ target_opset=TARGET_OPSET,
+ options={id(clf): {"nocl": False, "zipmap": False}},
+ )
self.assertNotIn("ZipMap", str(onx))
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': X})
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": X})
exp_lab = clf.predict(X)
exp_prb = clf.predict_proba(X)
assert_almost_equal(exp_lab, res[0])
@@ -95,38 +99,44 @@ def test_multi_output_classifier(self):
for e, g in zip(exp_prb, res[1]):
assert_almost_equal(e, g, decimal=5)
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="SequenceConstruct not available.")
- @unittest.skipIf(pv.Version(skl_ver) < pv.Version("0.22"),
- reason="classes_ attribute is missing")
- @ignore_warnings(category=(FutureWarning,
- DeprecationWarning))
+ @unittest.skipIf(TARGET_OPSET < 11, reason="SequenceConstruct not available.")
+ @unittest.skipIf(
+ pv.Version(skl_ver) < pv.Version("0.22"), reason="classes_ attribute is missing"
+ )
+ @ignore_warnings(category=(FutureWarning, DeprecationWarning))
def test_multi_output_classifier_exc(self):
X, y = make_multilabel_classification(n_classes=3, random_state=0)
X = X.astype(numpy.float32)
clf = MultiOutputClassifier(LogisticRegression()).fit(X, y)
clf.classes_ = numpy.array(clf.classes_)
with self.assertRaises(RuntimeError):
- to_onnx(clf, X[:1], target_opset=TARGET_OPSET,
- options={'zipmap': False, 'output_class_labels': True})
-
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="SequenceConstruct not available.")
- @unittest.skipIf(pv.Version(skl_ver) < pv.Version("0.22"),
- reason="classes_ attribute is missing")
- @ignore_warnings(category=(FutureWarning,
- DeprecationWarning))
+ to_onnx(
+ clf,
+ X[:1],
+ target_opset=TARGET_OPSET,
+ options={"zipmap": False, "output_class_labels": True},
+ )
+
+ @unittest.skipIf(TARGET_OPSET < 11, reason="SequenceConstruct not available.")
+ @unittest.skipIf(
+ pv.Version(skl_ver) < pv.Version("0.22"), reason="classes_ attribute is missing"
+ )
+ @ignore_warnings(category=(FutureWarning, DeprecationWarning))
def test_multi_output_classifier_fallback(self):
X, y = make_multilabel_classification(n_classes=3, random_state=0)
X = X.astype(numpy.float32)
clf = MultiOutputClassifier(LogisticRegression()).fit(X, y)
del clf.classes_
- onx = to_onnx(clf, X[:1], target_opset=TARGET_OPSET,
- options={'zipmap': False, 'output_class_labels': True})
+ onx = to_onnx(
+ clf,
+ X[:1],
+ target_opset=TARGET_OPSET,
+ options={"zipmap": False, "output_class_labels": True},
+ )
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'X': X})
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"X": X})
exp_lab = clf.predict(X)
exp_prb = clf.predict_proba(X)
assert_almost_equal(exp_lab, res[0])
diff --git a/tests/test_sklearn_naive_bayes_converter.py b/tests/test_sklearn_naive_bayes_converter.py
index 1920bd0c1..4b95f26de 100644
--- a/tests/test_sklearn_naive_bayes_converter.py
+++ b/tests/test_sklearn_naive_bayes_converter.py
@@ -7,6 +7,7 @@
GaussianNB,
MultinomialNB,
)
+
try:
from sklearn.naive_bayes import CategoricalNB
except ImportError:
@@ -23,72 +24,60 @@
FloatTensorType,
Int64TensorType,
)
-from test_utils import (
- dump_data_and_model,
- fit_classification_model,
- TARGET_OPSET
-)
+from test_utils import dump_data_and_model, fit_classification_model, TARGET_OPSET
class TestNaiveBayesConverter(unittest.TestCase):
-
def test_model_multinomial_nb_binary_classification(self):
- model, X = fit_classification_model(
- MultinomialNB(), 2, pos_features=True)
+ model, X = fit_classification_model(MultinomialNB(), 2, pos_features=True)
model_onnx = convert_sklearn(
model,
"multinomial naive bayes",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(np.float32),
model,
model_onnx,
- basename="SklearnBinMultinomialNB-Dec4")
+ basename="SklearnBinMultinomialNB-Dec4",
+ )
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
def test_model_bernoulli_nb_binary_classification(self):
- model, X = fit_classification_model(
- BernoulliNB(), 2)
+ model, X = fit_classification_model(BernoulliNB(), 2)
model_onnx = convert_sklearn(
model,
"bernoulli naive bayes",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnBinBernoulliNB")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnBinBernoulliNB")
def test_model_multinomial_nb_multiclass(self):
- model, X = fit_classification_model(
- MultinomialNB(), 5, pos_features=True)
+ model, X = fit_classification_model(MultinomialNB(), 5, pos_features=True)
model_onnx = convert_sklearn(
model,
"multinomial naive bayes",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnMclMultinomialNB-Dec4")
+ X, model, model_onnx, basename="SklearnMclMultinomialNB-Dec4"
+ )
def test_model_multinomial_nb_multiclass_params(self):
model, X = fit_classification_model(
- MultinomialNB(alpha=0.5, fit_prior=False), 5, pos_features=True)
+ MultinomialNB(alpha=0.5, fit_prior=False), 5, pos_features=True
+ )
model_onnx = convert_sklearn(
model,
"multinomial naive bayes",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
pp = model.predict_proba(X)
@@ -97,64 +86,56 @@ def test_model_multinomial_nb_multiclass_params(self):
diff = pps[:, col - 1] - pps[:, col - 2]
ind = diff >= 1e-4
dump_data_and_model(
- X[ind],
- model,
- model_onnx,
- basename="SklearnMclMultinomialNBParams-Dec4")
+ X[ind], model, model_onnx, basename="SklearnMclMultinomialNBParams-Dec4"
+ )
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
def test_model_bernoulli_nb_multiclass(self):
- model, X = fit_classification_model(
- BernoulliNB(), 4)
+ model, X = fit_classification_model(BernoulliNB(), 4)
model_onnx = convert_sklearn(
model,
"bernoulli naive bayes",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnMclBernoulliNB")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnMclBernoulliNB")
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
def test_model_bernoulli_nb_multiclass_params(self):
model, X = fit_classification_model(
- BernoulliNB(alpha=0, binarize=1.0, fit_prior=False), 4)
+ BernoulliNB(alpha=0, binarize=1.0, fit_prior=False), 4
+ )
model_onnx = convert_sklearn(
model,
"bernoulli naive bayes",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnMclBernoulliNBParams")
+ X, model, model_onnx, basename="SklearnMclBernoulliNBParams"
+ )
def test_model_multinomial_nb_binary_classification_int(self):
model, X = fit_classification_model(
- MultinomialNB(), 2, is_int=True, pos_features=True)
+ MultinomialNB(), 2, is_int=True, pos_features=True
+ )
model_onnx = convert_sklearn(
model,
"multinomial naive bayes",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnBinMultinomialNBInt-Dec4")
+ X, model, model_onnx, basename="SklearnBinMultinomialNBInt-Dec4"
+ )
def test_model_multinomial_nb_binary_classification_bool(self):
model, X = fit_classification_model(
- MultinomialNB(), 2, is_bool=True, pos_features=True)
+ MultinomialNB(), 2, is_bool=True, pos_features=True
+ )
model_onnx = convert_sklearn(
model,
"multinomial naive bayes",
@@ -163,32 +144,24 @@ def test_model_multinomial_nb_binary_classification_bool(self):
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnBinMultinomialNBBool-Dec4")
+ X, model, model_onnx, basename="SklearnBinMultinomialNBBool-Dec4"
+ )
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
def test_model_bernoulli_nb_binary_classification_int(self):
- model, X = fit_classification_model(
- BernoulliNB(), 2, is_int=True)
+ model, X = fit_classification_model(BernoulliNB(), 2, is_int=True)
model_onnx = convert_sklearn(
model,
"bernoulli naive bayes",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnBinBernoulliNBInt")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnBinBernoulliNBInt")
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
def test_model_bernoulli_nb_binary_classification_bool(self):
- model, X = fit_classification_model(
- BernoulliNB(), 2, is_bool=True)
+ model, X = fit_classification_model(BernoulliNB(), 2, is_bool=True)
model_onnx = convert_sklearn(
model,
"bernoulli naive bayes",
@@ -196,112 +169,85 @@ def test_model_bernoulli_nb_binary_classification_bool(self):
target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnBinBernoulliNBBool")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnBinBernoulliNBBool")
def test_model_multinomial_nb_multiclass_int(self):
model, X = fit_classification_model(
- MultinomialNB(), 5, is_int=True, pos_features=True)
+ MultinomialNB(), 5, is_int=True, pos_features=True
+ )
model_onnx = convert_sklearn(
model,
"multinomial naive bayes",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnMclMultinomialNBInt-Dec4")
+ X, model, model_onnx, basename="SklearnMclMultinomialNBInt-Dec4"
+ )
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
def test_model_bernoulli_nb_multiclass_int(self):
- model, X = fit_classification_model(
- BernoulliNB(), 4, is_int=True)
+ model, X = fit_classification_model(BernoulliNB(), 4, is_int=True)
model_onnx = convert_sklearn(
model,
"bernoulli naive bayes",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnMclBernoulliNBInt-Dec4")
+ X, model, model_onnx, basename="SklearnMclBernoulliNBInt-Dec4"
+ )
def test_model_gaussian_nb_binary_classification(self):
- model, X = fit_classification_model(
- GaussianNB(), 2)
+ model, X = fit_classification_model(GaussianNB(), 2)
model_onnx = convert_sklearn(
model,
"gaussian naive bayes",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnBinGaussianNB")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnBinGaussianNB")
def test_model_gaussian_nb_multiclass(self):
- model, X = fit_classification_model(
- GaussianNB(), 4)
+ model, X = fit_classification_model(GaussianNB(), 4)
model_onnx = convert_sklearn(
model,
"gaussian naive bayes",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnMclGaussianNB")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnMclGaussianNB")
def test_model_gaussian_nb_binary_classification_int(self):
- model, X = fit_classification_model(
- GaussianNB(), 2, is_int=True)
+ model, X = fit_classification_model(GaussianNB(), 2, is_int=True)
model_onnx = convert_sklearn(
model,
"gaussian naive bayes",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnBinGaussianNBInt")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnBinGaussianNBInt")
def test_model_gaussian_nb_multiclass_int(self):
- model, X = fit_classification_model(
- GaussianNB(), 5, is_int=True)
+ model, X = fit_classification_model(GaussianNB(), 5, is_int=True)
model_onnx = convert_sklearn(
model,
"gaussian naive bayes",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnMclGaussianNBInt-Dec4")
+ X, model, model_onnx, basename="SklearnMclGaussianNBInt-Dec4"
+ )
def test_model_gaussian_nb_multiclass_bool(self):
- model, X = fit_classification_model(
- GaussianNB(), 5, is_bool=True)
+ model, X = fit_classification_model(GaussianNB(), 5, is_bool=True)
model_onnx = convert_sklearn(
model,
"gaussian naive bayes",
@@ -310,136 +256,121 @@ def test_model_gaussian_nb_multiclass_bool(self):
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnMclGaussianNBBool-Dec4")
+ X, model, model_onnx, basename="SklearnMclGaussianNBBool-Dec4"
+ )
- @unittest.skipIf(ComplementNB is None,
- reason="new in scikit version 0.20")
+ @unittest.skipIf(ComplementNB is None, reason="new in scikit version 0.20")
def test_model_complement_nb_binary_classification(self):
- model, X = fit_classification_model(
- ComplementNB(), 2, pos_features=True)
+ model, X = fit_classification_model(ComplementNB(), 2, pos_features=True)
model_onnx = convert_sklearn(
model,
"complement naive bayes",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnBinComplementNB-Dec4")
+ X, model, model_onnx, basename="SklearnBinComplementNB-Dec4"
+ )
- @unittest.skipIf(ComplementNB is None,
- reason="new in scikit version 0.20")
+ @unittest.skipIf(ComplementNB is None, reason="new in scikit version 0.20")
def test_model_complement_nb_multiclass(self):
- model, X = fit_classification_model(
- ComplementNB(), 4, pos_features=True)
+ model, X = fit_classification_model(ComplementNB(), 4, pos_features=True)
model_onnx = convert_sklearn(
model,
"complement naive bayes",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnMclComplementNB-Dec4")
+ X, model, model_onnx, basename="SklearnMclComplementNB-Dec4"
+ )
- @unittest.skipIf(ComplementNB is None,
- reason="new in scikit version 0.20")
+ @unittest.skipIf(ComplementNB is None, reason="new in scikit version 0.20")
def test_model_complement_nb_binary_classification_int(self):
model, X = fit_classification_model(
- ComplementNB(), 2, is_int=True, pos_features=True)
+ ComplementNB(), 2, is_int=True, pos_features=True
+ )
model_onnx = convert_sklearn(
model,
"complement naive bayes",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnBinComplementNBInt-Dec4")
+ X, model, model_onnx, basename="SklearnBinComplementNBInt-Dec4"
+ )
- @unittest.skipIf(ComplementNB is None,
- reason="new in scikit version 0.20")
+ @unittest.skipIf(ComplementNB is None, reason="new in scikit version 0.20")
def test_model_complement_nb_multiclass_int(self):
model, X = fit_classification_model(
- ComplementNB(), 5, is_int=True, pos_features=True)
+ ComplementNB(), 5, is_int=True, pos_features=True
+ )
model_onnx = convert_sklearn(
model,
"complement naive bayes",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnMclComplementNBInt-Dec4")
+ X, model, model_onnx, basename="SklearnMclComplementNBInt-Dec4"
+ )
- @unittest.skipIf(ComplementNB is None,
- reason="new in scikit version 0.20")
+ @unittest.skipIf(ComplementNB is None, reason="new in scikit version 0.20")
def test_model_complement_nb_multiclass_bool(self):
model, X = fit_classification_model(
- ComplementNB(), 5, is_bool=True, pos_features=True)
+ ComplementNB(), 5, is_bool=True, pos_features=True
+ )
model_onnx = convert_sklearn(
model,
"complement naive bayes",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnMclComplementNBBool-Dec4")
+ X, model, model_onnx, basename="SklearnMclComplementNBBool-Dec4"
+ )
- @unittest.skipIf(CategoricalNB is None,
- reason="new in scikit version 0.22")
+ @unittest.skipIf(CategoricalNB is None, reason="new in scikit version 0.22")
def test_model_categorical_nb(self):
model, X = fit_classification_model(
- CategoricalNB(), 3, is_int=True, pos_features=True)
+ CategoricalNB(), 3, is_int=True, pos_features=True
+ )
model_onnx = convert_sklearn(
model,
"categorical naive bayes",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X[10:13],
- model,
- model_onnx,
- basename="SklearnCategoricalNB")
+ X[10:13], model, model_onnx, basename="SklearnCategoricalNB"
+ )
def test_model_gaussian_nb_multi_class_nocl(self):
- model, X = fit_classification_model(
- GaussianNB(),
- 2, label_string=True)
+ model, X = fit_classification_model(GaussianNB(), 2, label_string=True)
model_onnx = convert_sklearn(
model,
"GaussianNB multi-class nocl",
[("input", FloatTensorType([None, X.shape[1]]))],
- options={id(model): {'nocl': True}},
- target_opset=TARGET_OPSET)
+ options={id(model): {"nocl": True}},
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
sonx = str(model_onnx)
- assert 'classlabels_strings' not in sonx
- assert 'cl0' not in sonx
+ assert "classlabels_strings" not in sonx
+ assert "cl0" not in sonx
dump_data_and_model(
- X, model, model_onnx, classes=model.classes_,
- basename="SklearnGaussianNBMultiNoCl")
+ X,
+ model,
+ model_onnx,
+ classes=model.classes_,
+ basename="SklearnGaussianNBMultiNoCl",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_nearest_neighbour_converter.py b/tests/test_sklearn_nearest_neighbour_converter.py
index 761f55191..d924e5ba5 100644
--- a/tests/test_sklearn_nearest_neighbour_converter.py
+++ b/tests/test_sklearn_nearest_neighbour_converter.py
@@ -12,6 +12,7 @@
from numpy.testing import assert_almost_equal
from onnxruntime import __version__ as ort_version
from pandas import DataFrame
+
try:
from sklearn.utils._testing import ignore_warnings
except ImportError:
@@ -20,14 +21,16 @@
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import (
- KNeighborsRegressor, RadiusNeighborsRegressor,
- KNeighborsClassifier, RadiusNeighborsClassifier,
- NearestNeighbors)
+ KNeighborsRegressor,
+ RadiusNeighborsRegressor,
+ KNeighborsClassifier,
+ RadiusNeighborsClassifier,
+ NearestNeighbors,
+)
+
try:
from sklearn.impute import KNNImputer
- from sklearn.neighbors import (
- KNeighborsTransformer,
- NeighborhoodComponentsAnalysis)
+ from sklearn.neighbors import KNeighborsTransformer, NeighborhoodComponentsAnalysis
except ImportError:
# New in 0.22
KNNImputer = None
@@ -35,9 +38,9 @@
NeighborhoodComponentsAnalysis = None
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
+
try:
- from onnxruntime.capi.onnxruntime_pybind11_state import (
- NotImplemented as OrtImpl)
+ from onnxruntime.capi.onnxruntime_pybind11_state import NotImplemented as OrtImpl
except ImportError:
OrtImpl = RuntimeError
from skl2onnx import convert_sklearn, to_onnx
@@ -46,28 +49,27 @@
FloatTensorType,
Int64TensorType,
)
-from skl2onnx.common.data_types import onnx_built_with_ml
from skl2onnx.helpers.onnx_helper import (
- enumerate_model_node_outputs, select_model_inputs_outputs)
+ enumerate_model_node_outputs,
+ select_model_inputs_outputs,
+)
from test_utils import (
dump_data_and_model,
fit_classification_model,
fit_multilabel_classification_model,
TARGET_OPSET,
- InferenceSessionEx as InferenceSession)
+ InferenceSessionEx as InferenceSession,
+)
def dont_test_radius():
- return (
- pv.Version(ort_version) <= pv.Version("1.3.0") or
- TARGET_OPSET <= 11)
+ return pv.Version(ort_version) <= pv.Version("1.3.0") or TARGET_OPSET <= 11
-ort_version = ".".join(ort_version.split('.')[:2])
+ort_version = ".".join(ort_version.split(".")[:2])
class TestNearestNeighbourConverter(unittest.TestCase):
-
@functools.lru_cache(maxsize=1)
def _get_iris(self):
iris = datasets.load_iris()
@@ -91,20 +93,22 @@ def _fit_model_multiclass_classification(self, model, use_string=False):
@functools.lru_cache(maxsize=20)
def _get_reg_data(self, n, n_features, n_targets, n_informative=10):
X, y = datasets.make_regression(
- n, n_features=n_features, random_state=0,
- n_targets=n_targets, n_informative=n_informative)
+ n,
+ n_features=n_features,
+ random_state=0,
+ n_targets=n_targets,
+ n_informative=n_informative,
+ )
return X, y
- def _fit_model(self, model, n_targets=1, label_int=False,
- n_informative=10):
+ def _fit_model(self, model, n_targets=1, label_int=False, n_informative=10):
X, y = self._get_reg_data(20, 4, n_targets, n_informative)
if label_int:
y = y.astype(numpy.int64)
model.fit(X, y)
return model, X
- def _fit_model_simple(self, model, n_targets=1, label_int=False,
- n_informative=3):
+ def _fit_model_simple(self, model, n_targets=1, label_int=False, n_informative=3):
X, y = self._get_reg_data(20, 2, n_targets, n_informative)
y /= 100
if label_int:
@@ -113,174 +117,206 @@ def _fit_model_simple(self, model, n_targets=1, label_int=False,
return model, X
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_regressor(self):
model, X = self._fit_model(KNeighborsRegressor(n_neighbors=2))
- model_onnx = convert_sklearn(model, "KNN regressor",
- [("input", FloatTensorType([None, 4]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "KNN regressor",
+ [("input", FloatTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32)[:7],
- model, model_onnx,
- basename="SklearnKNeighborsRegressor-Dec4")
+ model,
+ model_onnx,
+ basename="SklearnKNeighborsRegressor-Dec4",
+ )
dump_data_and_model(
(X + 0.1).astype(numpy.float32)[:7],
- model, model_onnx,
- basename="SklearnKNeighborsRegressor-Dec4")
+ model,
+ model_onnx,
+ basename="SklearnKNeighborsRegressor-Dec4",
+ )
@unittest.skipIf(dont_test_radius(), reason="not available")
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("1.8.0"),
- reason="produces nan values")
+ pv.Version(ort_version) < pv.Version("1.8.0"), reason="produces nan values"
+ )
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_regressor_radius(self):
model, X = self._fit_model(RadiusNeighborsRegressor())
- model_onnx = convert_sklearn(model, "KNN regressor",
- [("input", FloatTensorType([None, 4]))],
- target_opset=TARGET_OPSET,
- options={id(model): {'optim': 'cdist'}})
+ model_onnx = convert_sklearn(
+ model,
+ "KNN regressor",
+ [("input", FloatTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ options={id(model): {"optim": "cdist"}},
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
X = X[:5]
- got = sess.run(None, {'input': X.astype(numpy.float32)})[0]
+ got = sess.run(None, {"input": X.astype(numpy.float32)})[0]
exp = model.predict(X.astype(numpy.float32))
if any(numpy.isnan(got.ravel())):
# The model is unexpectedly producing nan values
# not on all platforms.
- rows = ['--EXP--', str(exp), '--GOT--', str(got),
- '--EVERY-OUTPUT--']
- for out in enumerate_model_node_outputs(
- model_onnx, add_node=False):
+ rows = ["--EXP--", str(exp), "--GOT--", str(got), "--EVERY-OUTPUT--"]
+ for out in enumerate_model_node_outputs(model_onnx, add_node=False):
onx = select_model_inputs_outputs(model_onnx, out)
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(
- None, {'input': X.astype(numpy.float32)})
- rows.append('--{}--'.format(out))
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(numpy.float32)})
+ rows.append("--{}--".format(out))
rows.append(str(res))
- if (pv.Version(ort_version) <
- pv.Version("1.4.0")):
+ if pv.Version(ort_version) < pv.Version("1.4.0"):
return
- raise AssertionError('\n'.join(rows))
+ raise AssertionError("\n".join(rows))
assert_almost_equal(exp.ravel(), got.ravel(), decimal=3)
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
@unittest.skipIf(TARGET_OPSET < 11, reason="not available")
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_regressor_double(self):
model, X = self._fit_model(KNeighborsRegressor(n_neighbors=2))
model_onnx = convert_sklearn(
- model, "KNN regressor",
+ model,
+ "KNN regressor",
[("input", DoubleTensorType([None, 4]))],
target_opset=TARGET_OPSET,
- options={id(model): {'optim': 'cdist'}})
+ options={id(model): {"optim": "cdist"}},
+ )
self.assertIsNotNone(model_onnx)
try:
InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except OrtImpl as e:
- if ("Could not find an implementation for the node "
- "To_TopK:TopK(11)") in str(e):
+ if (
+ "Could not find an implementation for the node " "To_TopK:TopK(11)"
+ ) in str(e):
# onnxruntime does not declare TopK(11) for double
return
raise e
dump_data_and_model(
X.astype(numpy.float64)[:7],
- model, model_onnx,
- basename="SklearnKNeighborsRegressor64")
+ model,
+ model_onnx,
+ basename="SklearnKNeighborsRegressor64",
+ )
@unittest.skipIf(dont_test_radius(), reason="not available")
@unittest.skipIf(
pv.Version(ort_version) < pv.Version("1.7.0"),
- reason="nan may happen during computation")
+ reason="nan may happen during computation",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_regressor_double_radius(self):
model, X = self._fit_model(RadiusNeighborsRegressor())
model_onnx = convert_sklearn(
- model, "KNN regressor",
+ model,
+ "KNN regressor",
[("input", DoubleTensorType([None, 4]))],
target_opset=TARGET_OPSET,
- options={id(model): {'optim': 'cdist'}})
+ options={id(model): {"optim": "cdist"}},
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float64)[:7],
- model, model_onnx,
- basename="SklearnRadiusNeighborsRegressor64")
+ model,
+ model_onnx,
+ basename="SklearnRadiusNeighborsRegressor64",
+ )
dump_data_and_model(
- (X + 10.).astype(numpy.float64)[:7],
- model, model_onnx,
- basename="SklearnRadiusNeighborsRegressor64")
+ (X + 10.0).astype(numpy.float64)[:7],
+ model,
+ model_onnx,
+ basename="SklearnRadiusNeighborsRegressor64",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_regressor_yint(self):
- model, X = self._fit_model(
- KNeighborsRegressor(n_neighbors=2), label_int=True)
- model_onnx = convert_sklearn(model, "KNN regressor",
- [("input", FloatTensorType([None, 4]))],
- target_opset=TARGET_OPSET)
+ model, X = self._fit_model(KNeighborsRegressor(n_neighbors=2), label_int=True)
+ model_onnx = convert_sklearn(
+ model,
+ "KNN regressor",
+ [("input", FloatTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32)[:7],
- model, model_onnx,
- basename="SklearnKNeighborsRegressorYInt")
+ model,
+ model_onnx,
+ basename="SklearnKNeighborsRegressorYInt",
+ )
@unittest.skipIf(dont_test_radius(), reason="not available")
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_regressor_yint_radius(self):
- model, X = self._fit_model(
- RadiusNeighborsRegressor(), label_int=True)
- model_onnx = convert_sklearn(model, "KNN regressor",
- [("input", FloatTensorType([None, 4]))],
- target_opset=TARGET_OPSET)
+ model, X = self._fit_model(RadiusNeighborsRegressor(), label_int=True)
+ model_onnx = convert_sklearn(
+ model,
+ "KNN regressor",
+ [("input", FloatTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32)[:7],
- model, model_onnx,
- basename="SklearnRadiusNeighborsRegressorYInt")
+ model,
+ model_onnx,
+ basename="SklearnRadiusNeighborsRegressorYInt",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_regressor2_1(self):
- model, X = self._fit_model(KNeighborsRegressor(n_neighbors=1),
- n_targets=2)
- model_onnx = convert_sklearn(model, "KNN regressor",
- [("input", FloatTensorType([None, 4]))],
- target_opset=TARGET_OPSET)
+ model, X = self._fit_model(KNeighborsRegressor(n_neighbors=1), n_targets=2)
+ model_onnx = convert_sklearn(
+ model,
+ "KNN regressor",
+ [("input", FloatTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32)[:3],
- model, model_onnx,
- basename="SklearnKNeighborsRegressor2")
+ model,
+ model_onnx,
+ basename="SklearnKNeighborsRegressor2",
+ )
@unittest.skipIf(dont_test_radius(), reason="not available")
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_regressor2_1_radius(self):
model, X = self._fit_model_simple(
- RadiusNeighborsRegressor(algorithm="brute"),
- n_targets=2)
+ RadiusNeighborsRegressor(algorithm="brute"), n_targets=2
+ )
X = X[:-1]
model_onnx = convert_sklearn(
- model, "KNN regressor",
+ model,
+ "KNN regressor",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'input': X.astype(numpy.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"input": X.astype(numpy.float32)})[0]
exp = model.predict(X.astype(numpy.float32))
if any(numpy.isnan(got.ravel())):
# The model is unexpectedly producing nan values
@@ -290,299 +326,344 @@ def test_model_knn_regressor2_1_radius(self):
# and contains only 0 or 1 values.
# The output contains nan values on the first row
# but not on the second one.
- rows = ['--EXP--', str(exp), '--GOT--', str(got),
- '--EVERY-OUTPUT--']
- for out in enumerate_model_node_outputs(
- model_onnx, add_node=False):
+ rows = ["--EXP--", str(exp), "--GOT--", str(got), "--EVERY-OUTPUT--"]
+ for out in enumerate_model_node_outputs(model_onnx, add_node=False):
onx = select_model_inputs_outputs(model_onnx, out)
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(
- None, {'input': X.astype(numpy.float32)})
- rows.append('--{}--'.format(out))
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(numpy.float32)})
+ rows.append("--{}--".format(out))
rows.append(str(res))
- if (ort_version.startswith('1.4.') or
- ort_version.startswith('1.5.')):
+ if ort_version.startswith("1.4.") or ort_version.startswith("1.5."):
# TODO: investigate the regression in onnxruntime 1.4
# One broadcasted multiplication unexpectedly produces nan.
- whole = '\n'.join(rows)
+ whole = "\n".join(rows)
if "[ nan" in whole:
warnings.warn(whole)
return
raise AssertionError(whole)
- if (ort_version.startswith('1.3.') and
- sys.platform == 'win32'):
+ if ort_version.startswith("1.3.") and sys.platform == "win32":
# Same error but different line number for further
# investigation.
raise AssertionError(whole)
- raise AssertionError('\n'.join(rows))
+ raise AssertionError("\n".join(rows))
assert_almost_equal(exp, got, decimal=5)
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_regressor2_1_opset(self):
- model, X = self._fit_model(KNeighborsRegressor(n_neighbors=1),
- n_targets=2)
+ model, X = self._fit_model(KNeighborsRegressor(n_neighbors=1), n_targets=2)
for op in [TARGET_OPSET, 12, 11, 10, 9]:
if op > TARGET_OPSET:
continue
with self.subTest(opset=op):
model_onnx = convert_sklearn(
- model, "KNN regressor",
+ model,
+ "KNN regressor",
[("input", FloatTensorType([None, 4]))],
- target_opset=op)
+ target_opset=op,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32)[:3],
- model, model_onnx,
- basename="SklearnKNeighborsRegressor2%d" % op)
+ model,
+ model_onnx,
+ basename="SklearnKNeighborsRegressor2%d" % op,
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_regressor2_2(self):
- model, X = self._fit_model(KNeighborsRegressor(n_neighbors=2),
- n_targets=2)
- model_onnx = convert_sklearn(model, "KNN regressor",
- [("input", FloatTensorType([None, 4]))],
- target_opset=TARGET_OPSET)
+ model, X = self._fit_model(KNeighborsRegressor(n_neighbors=2), n_targets=2)
+ model_onnx = convert_sklearn(
+ model,
+ "KNN regressor",
+ [("input", FloatTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32)[:2],
- model, model_onnx,
- basename="SklearnKNeighborsRegressor2")
+ model,
+ model_onnx,
+ basename="SklearnKNeighborsRegressor2",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
- @unittest.skipIf(TARGET_OPSET < 9,
- reason="needs higher target_opset")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
+ @unittest.skipIf(TARGET_OPSET < 9, reason="needs higher target_opset")
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_regressor_weights_distance_11(self):
model, X = self._fit_model(
- KNeighborsRegressor(
- weights="distance", algorithm="brute", n_neighbors=1))
+ KNeighborsRegressor(weights="distance", algorithm="brute", n_neighbors=1)
+ )
for op in sorted(set([9, 10, 11, 12, TARGET_OPSET])):
if op > TARGET_OPSET:
continue
with self.subTest(opset=op):
model_onnx = convert_sklearn(
- model, "KNN regressor",
+ model,
+ "KNN regressor",
[("input", FloatTensorType([None, 4]))],
- target_opset=op)
+ target_opset=op,
+ )
if op < 12 and model_onnx.ir_version > 6:
raise AssertionError(
"ir_version ({}, op={}) must be <= 6.".format(
- model_onnx.ir_version, op))
+ model_onnx.ir_version, op
+ )
+ )
if op < 11 and model_onnx.ir_version > 5:
raise AssertionError(
"ir_version ({}, op={}) must be <= 5.".format(
- model_onnx.ir_version, op))
+ model_onnx.ir_version, op
+ )
+ )
if op < 10 and model_onnx.ir_version > 4:
raise AssertionError(
"ir_version ({}, op={}) must be <= 4.".format(
- model_onnx.ir_version, op))
+ model_onnx.ir_version, op
+ )
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32)[:3],
- model, model_onnx,
- basename="SklearnKNeighborsRegressorWDist%d-Dec3" % op)
+ model,
+ model_onnx,
+ basename="SklearnKNeighborsRegressorWDist%d-Dec3" % op,
+ )
@unittest.skipIf(dont_test_radius(), reason="not available")
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_regressor_weights_distance_11_radius(self):
model, X = self._fit_model_simple(
- RadiusNeighborsRegressor(
- weights="distance", algorithm="brute", radius=100))
+ RadiusNeighborsRegressor(weights="distance", algorithm="brute", radius=100)
+ )
for op in sorted(set([TARGET_OPSET, 12, 11])):
if op > TARGET_OPSET:
continue
with self.subTest(opset=op):
model_onnx = convert_sklearn(
- model, "KNN regressor",
+ model,
+ "KNN regressor",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=op)
+ target_opset=op,
+ )
self.assertIsNotNone(model_onnx)
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'input': X.astype(numpy.float32)})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"input": X.astype(numpy.float32)})[0]
exp = model.predict(X.astype(numpy.float32))
assert_almost_equal(exp, got.ravel(), decimal=3)
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_regressor_metric_cityblock(self):
model, X = self._fit_model(KNeighborsRegressor(metric="cityblock"))
- model_onnx = convert_sklearn(model, "KNN regressor",
- [("input", FloatTensorType([None, 4]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "KNN regressor",
+ [("input", FloatTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32)[:7],
- model, model_onnx,
- basename="SklearnKNeighborsRegressorMetricCityblock")
+ model,
+ model_onnx,
+ basename="SklearnKNeighborsRegressorMetricCityblock",
+ )
- @unittest.skipIf(not onnx_built_with_ml(),
- reason="Requires ONNX-ML extension.")
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
- @unittest.skipIf(TARGET_OPSET < TARGET_OPSET,
- reason="needs higher target_opset")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
+ @unittest.skipIf(TARGET_OPSET < TARGET_OPSET, reason="needs higher target_opset")
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_classifier_binary_class(self):
- model, X = self._fit_model_binary_classification(
- KNeighborsClassifier())
+ model, X = self._fit_model_binary_classification(KNeighborsClassifier())
model_onnx = convert_sklearn(
model,
"KNN classifier binary",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32),
- model, model_onnx,
- basename="SklearnKNeighborsClassifierBinary")
+ model,
+ model_onnx,
+ basename="SklearnKNeighborsClassifierBinary",
+ )
@unittest.skipIf(dont_test_radius(), reason="not available")
- @unittest.skipIf(TARGET_OPSET < 12,
- reason="needs higher target_opset")
+ @unittest.skipIf(TARGET_OPSET < 12, reason="needs higher target_opset")
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_classifier_binary_class_radius(self):
- model, X = self._fit_model_binary_classification(
- RadiusNeighborsClassifier())
+ model, X = self._fit_model_binary_classification(RadiusNeighborsClassifier())
model_onnx = convert_sklearn(
- model, "KNN classifier binary",
+ model,
+ "KNN classifier binary",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32),
- model, model_onnx,
- basename="SklearnRadiusNeighborsClassifierBinary")
+ model,
+ model_onnx,
+ basename="SklearnRadiusNeighborsClassifierBinary",
+ )
- @unittest.skipIf(not onnx_built_with_ml(),
- reason="Requires ONNX-ML extension.")
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_classifier_multi_class(self):
- model, X = self._fit_model_multiclass_classification(
- KNeighborsClassifier())
+ model, X = self._fit_model_multiclass_classification(KNeighborsClassifier())
model_onnx = convert_sklearn(
model,
"KNN classifier multi-class",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32),
- model, model_onnx,
- basename="SklearnKNeighborsClassifierMulti")
+ model,
+ model_onnx,
+ basename="SklearnKNeighborsClassifierMulti",
+ )
@unittest.skipIf(dont_test_radius(), reason="not available")
- @unittest.skipIf(TARGET_OPSET < 12,
- reason="needs higher target_opset")
+ @unittest.skipIf(TARGET_OPSET < 12, reason="needs higher target_opset")
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_classifier_multi_class_radius(self):
model, X = self._fit_model_multiclass_classification(
- RadiusNeighborsClassifier())
+ RadiusNeighborsClassifier()
+ )
model_onnx = convert_sklearn(
- model, "KNN classifier multi-class",
+ model,
+ "KNN classifier multi-class",
[("input", FloatTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
- options={id(model): {'optim': 'cdist'}})
+ options={id(model): {"optim": "cdist"}},
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32)[:5],
- model, model_onnx,
- basename="SklearnRadiusNeighborsClassifierMulti")
+ model,
+ model_onnx,
+ basename="SklearnRadiusNeighborsClassifierMulti",
+ )
- @unittest.skipIf(not onnx_built_with_ml(),
- reason="Requires ONNX-ML extension.")
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_classifier_multi_class_string(self):
model, X = self._fit_model_multiclass_classification(
- KNeighborsClassifier(), use_string=True)
+ KNeighborsClassifier(), use_string=True
+ )
model_onnx = convert_sklearn(
model,
"KNN classifier multi-class",
[("input", FloatTensorType([None, 3]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32),
- model, model_onnx,
- basename="SklearnKNeighborsClassifierMulti")
+ model,
+ model_onnx,
+ basename="SklearnKNeighborsClassifierMulti",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_classifier_weights_distance(self):
model, X = self._fit_model_multiclass_classification(
- KNeighborsClassifier(weights='distance'))
+ KNeighborsClassifier(weights="distance")
+ )
model_onnx = convert_sklearn(
- model, 'KNN classifier', [('input', FloatTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model,
+ "KNN classifier",
+ [("input", FloatTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X.astype(numpy.float32)[:7], model, model_onnx,
- basename="SklearnKNeighborsClassifierWeightsDistance")
+ X.astype(numpy.float32)[:7],
+ model,
+ model_onnx,
+ basename="SklearnKNeighborsClassifierWeightsDistance",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_classifier_metric_cityblock(self):
model, X = self._fit_model_multiclass_classification(
- KNeighborsClassifier(metric='cityblock'))
+ KNeighborsClassifier(metric="cityblock")
+ )
model_onnx = convert_sklearn(
- model, 'KNN classifier', [('input', FloatTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model,
+ "KNN classifier",
+ [("input", FloatTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X.astype(numpy.float32)[:7], model, model_onnx,
- basename="SklearnKNeighborsClassifierMetricCityblock")
+ X.astype(numpy.float32)[:7],
+ model,
+ model_onnx,
+ basename="SklearnKNeighborsClassifierMetricCityblock",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_classifier_multilabel(self):
model, X_test = fit_multilabel_classification_model(
- KNeighborsClassifier(), n_classes=7, n_labels=3,
- n_samples=100, n_features=10)
- options = {id(model): {'zipmap': False}}
+ KNeighborsClassifier(),
+ n_classes=7,
+ n_labels=3,
+ n_samples=100,
+ n_features=10,
+ )
+ options = {id(model): {"zipmap": False}}
model_onnx = convert_sklearn(
model,
"scikit-learn KNN Classifier",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- options=options, target_opset=TARGET_OPSET)
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- assert 'zipmap' not in str(model_onnx).lower()
+ assert "zipmap" not in str(model_onnx).lower()
dump_data_and_model(
- X_test[:10], model, model_onnx,
- basename="SklearnKNNClassifierMultiLabel-Out0")
+ X_test[:10],
+ model,
+ model_onnx,
+ basename="SklearnKNNClassifierMultiLabel-Out0",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_regressor_int(self):
model, X = self._fit_model(KNeighborsRegressor())
@@ -591,37 +672,38 @@ def test_model_knn_regressor_int(self):
model,
"KNN regressor",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnKNNRegressorInt-Dec4"
+ X, model, model_onnx, basename="SklearnKNNRegressorInt-Dec4"
)
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_regressor_equal(self):
- X, y = datasets.make_regression(
- n_samples=1000, n_features=100, random_state=42)
+ X, y = datasets.make_regression(n_samples=1000, n_features=100, random_state=42)
X = X.astype(numpy.int64)
X_train, X_test, y_train, y_test = train_test_split(
- X, y, test_size=0.5, random_state=42)
- model = KNeighborsRegressor(
- algorithm='brute', metric='manhattan').fit(X_train, y_train)
+ X, y, test_size=0.5, random_state=42
+ )
+ model = KNeighborsRegressor(algorithm="brute", metric="manhattan").fit(
+ X_train, y_train
+ )
model_onnx = convert_sklearn(
- model, 'knn',
- [('input', Int64TensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "knn",
+ [("input", Int64TensorType([None, X_test.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
exp = model.predict(X_test)
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': numpy.array(X_test)})[0].ravel()
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": numpy.array(X_test)})[0].ravel()
# The conversion has discrepencies when
# neighbours are at the exact same distance.
@@ -637,47 +719,57 @@ def test_model_knn_regressor_equal(self):
# assert_almost_equal(exp, res)
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_multi_class_nocl(self):
model, X = fit_classification_model(
- KNeighborsClassifier(),
- 2, label_string=True)
+ KNeighborsClassifier(), 2, label_string=True
+ )
model_onnx = convert_sklearn(
- model, "KNN multi-class nocl",
+ model,
+ "KNN multi-class nocl",
[("input", FloatTensorType([None, X.shape[1]]))],
- options={id(model): {'nocl': True}},
- target_opset=TARGET_OPSET)
+ options={id(model): {"nocl": True}},
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
sonx = str(model_onnx)
- assert 'classlabels_strings' not in sonx
- assert 'cl0' not in sonx
+ assert "classlabels_strings" not in sonx
+ assert "cl0" not in sonx
dump_data_and_model(
- X, model, model_onnx, classes=model.classes_,
- basename="SklearnKNNMultiNoCl", verbose=False)
+ X,
+ model,
+ model_onnx,
+ classes=model.classes_,
+ basename="SklearnKNNMultiNoCl",
+ verbose=False,
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_regressor2_2_pipee(self):
- pipe = make_pipeline(StandardScaler(),
- KNeighborsClassifier())
+ pipe = make_pipeline(StandardScaler(), KNeighborsClassifier())
model, X = self._fit_model_binary_classification(pipe)
model_onnx = convert_sklearn(
- model, "KNN pipe",
+ model,
+ "KNN pipe",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32)[:2],
- model, model_onnx,
- basename="SklearnKNeighborsRegressorPipe2")
+ model,
+ model_onnx,
+ basename="SklearnKNeighborsRegressorPipe2",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
@ignore_warnings(category=DeprecationWarning)
def test_onnx_test_knn_transform(self):
iris = datasets.load_iris()
@@ -690,179 +782,200 @@ def test_onnx_test_knn_transform(self):
for to in (9, 10, 11):
if to > TARGET_OPSET:
break
- model_def = to_onnx(clr, X_train.astype(numpy.float32),
- target_opset=to)
+ model_def = to_onnx(clr, X_train.astype(numpy.float32), target_opset=to)
oinf = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
X_test = X_test[:3]
- y = oinf.run(None, {'X': X_test.astype(numpy.float32)})
+ y = oinf.run(None, {"X": X_test.astype(numpy.float32)})
dist, ind = clr.kneighbors(X_test)
assert_almost_equal(dist, DataFrame(y[1]).values, decimal=5)
assert_almost_equal(ind, y[0])
- @unittest.skipIf(NeighborhoodComponentsAnalysis is None,
- reason="new in 0.22")
+ @unittest.skipIf(NeighborhoodComponentsAnalysis is None, reason="new in 0.22")
@ignore_warnings(category=DeprecationWarning)
def test_sklearn_nca_default(self):
model, X_test = fit_classification_model(
- NeighborhoodComponentsAnalysis(random_state=42), 3)
+ NeighborhoodComponentsAnalysis(random_state=42), 3
+ )
model_onnx = convert_sklearn(
- model, "NCA",
+ model,
+ "NCA",
[("input", FloatTensorType((None, X_test.shape[1])))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnNCADefault")
+ dump_data_and_model(X_test, model, model_onnx, basename="SklearnNCADefault")
- @unittest.skipIf(NeighborhoodComponentsAnalysis is None,
- reason="new in 0.22")
+ @unittest.skipIf(NeighborhoodComponentsAnalysis is None, reason="new in 0.22")
@ignore_warnings(category=DeprecationWarning)
def test_sklearn_nca_identity(self):
model, X_test = fit_classification_model(
NeighborhoodComponentsAnalysis(
- init='identity', max_iter=4, random_state=42), 3)
+ init="identity", max_iter=4, random_state=42
+ ),
+ 3,
+ )
model_onnx = convert_sklearn(
- model, "NCA",
+ model,
+ "NCA",
[("input", FloatTensorType((None, X_test.shape[1])))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X_test, model,
- model_onnx, basename="SklearnNCAIdentity")
+ dump_data_and_model(X_test, model, model_onnx, basename="SklearnNCAIdentity")
- @unittest.skipIf(NeighborhoodComponentsAnalysis is None,
- reason="new in 0.22")
+ @unittest.skipIf(NeighborhoodComponentsAnalysis is None, reason="new in 0.22")
@ignore_warnings(category=DeprecationWarning)
def test_sklearn_nca_double(self):
model, X_test = fit_classification_model(
- NeighborhoodComponentsAnalysis(
- n_components=2, max_iter=4, random_state=42), 3)
+ NeighborhoodComponentsAnalysis(n_components=2, max_iter=4, random_state=42),
+ 3,
+ )
X_test = X_test.astype(numpy.float64)
model_onnx = convert_sklearn(
- model, "NCA",
+ model,
+ "NCA",
[("input", DoubleTensorType((None, X_test.shape[1])))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnNCADouble")
+ dump_data_and_model(X_test, model, model_onnx, basename="SklearnNCADouble")
- @unittest.skipIf(NeighborhoodComponentsAnalysis is None,
- reason="new in 0.22")
+ @unittest.skipIf(NeighborhoodComponentsAnalysis is None, reason="new in 0.22")
@ignore_warnings(category=DeprecationWarning)
def test_sklearn_nca_int(self):
model, X_test = fit_classification_model(
- NeighborhoodComponentsAnalysis(
- init='pca', max_iter=4, random_state=42), 3, is_int=True)
+ NeighborhoodComponentsAnalysis(init="pca", max_iter=4, random_state=42),
+ 3,
+ is_int=True,
+ )
model_onnx = convert_sklearn(
- model, "NCA",
+ model,
+ "NCA",
[("input", Int64TensorType((None, X_test.shape[1])))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnNCAInt")
+ dump_data_and_model(X_test, model, model_onnx, basename="SklearnNCAInt")
- @unittest.skipIf(KNeighborsTransformer is None,
- reason="new in 0.22")
+ @unittest.skipIf(KNeighborsTransformer is None, reason="new in 0.22")
@ignore_warnings(category=DeprecationWarning)
def test_sklearn_k_neighbours_transformer_distance(self):
model, X_test = fit_classification_model(
- KNeighborsTransformer(
- n_neighbors=4, mode='distance'), 2)
+ KNeighborsTransformer(n_neighbors=4, mode="distance"), 2
+ )
model_onnx = convert_sklearn(
- model, "KNN transformer",
+ model,
+ "KNN transformer",
[("input", FloatTensorType((None, X_test.shape[1])))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnKNNTransformerDistance")
+ X_test, model, model_onnx, basename="SklearnKNNTransformerDistance"
+ )
- @unittest.skipIf(KNeighborsTransformer is None,
- reason="new in 0.22")
+ @unittest.skipIf(KNeighborsTransformer is None, reason="new in 0.22")
@ignore_warnings(category=DeprecationWarning)
def test_sklearn_k_neighbours_transformer_connectivity(self):
model, X_test = fit_classification_model(
- KNeighborsTransformer(
- n_neighbors=3, mode='connectivity'), 3)
+ KNeighborsTransformer(n_neighbors=3, mode="connectivity"), 3
+ )
model_onnx = convert_sklearn(
- model, "KNN transformer",
+ model,
+ "KNN transformer",
[("input", FloatTensorType((None, X_test.shape[1])))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnKNNTransformerConnectivity")
+ X_test, model, model_onnx, basename="SklearnKNNTransformerConnectivity"
+ )
- @unittest.skipIf(KNNImputer is None,
- reason="new in 0.22")
+ @unittest.skipIf(KNNImputer is None, reason="new in 0.22")
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
@ignore_warnings(category=DeprecationWarning)
def test_sklearn_knn_imputer(self):
x_train = numpy.array(
- [[1, 2, numpy.nan, 12], [3, numpy.nan, 3, 13],
- [1, 4, numpy.nan, 1], [numpy.nan, 4, 3, 12]], dtype=numpy.float32)
+ [
+ [1, 2, numpy.nan, 12],
+ [3, numpy.nan, 3, 13],
+ [1, 4, numpy.nan, 1],
+ [numpy.nan, 4, 3, 12],
+ ],
+ dtype=numpy.float32,
+ )
x_test = numpy.array(
[[1.3, 2.4, numpy.nan, 1], [-1.3, numpy.nan, 3.1, numpy.nan]],
- dtype=numpy.float32)
- model = KNNImputer(n_neighbors=3, metric='nan_euclidean').fit(x_train)
+ dtype=numpy.float32,
+ )
+ model = KNNImputer(n_neighbors=3, metric="nan_euclidean").fit(x_train)
for opset in [TARGET_OPSET, 9, 10, 11, 12]:
if opset > TARGET_OPSET:
continue
model_onnx = convert_sklearn(
- model, "KNN imputer",
+ model,
+ "KNN imputer",
[("input", FloatTensorType((None, x_test.shape[1])))],
- target_opset=opset)
+ target_opset=opset,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- x_test, model, model_onnx,
- basename="SklearnKNNImputer%d" % opset)
+ x_test, model, model_onnx, basename="SklearnKNNImputer%d" % opset
+ )
- @unittest.skipIf(KNNImputer is None,
- reason="new in 0.22")
+ @unittest.skipIf(KNNImputer is None, reason="new in 0.22")
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
@ignore_warnings(category=DeprecationWarning)
def test_sklearn_knn_imputer_cdist(self):
x_train = numpy.array(
- [[1, 2, numpy.nan, 12], [3, numpy.nan, 3, 13],
- [1, 4, numpy.nan, 1], [numpy.nan, 4, 3, 12]], dtype=numpy.float32)
+ [
+ [1, 2, numpy.nan, 12],
+ [3, numpy.nan, 3, 13],
+ [1, 4, numpy.nan, 1],
+ [numpy.nan, 4, 3, 12],
+ ],
+ dtype=numpy.float32,
+ )
x_test = numpy.array(
[[1.3, 2.4, numpy.nan, 1], [-1.3, numpy.nan, 3.1, numpy.nan]],
- dtype=numpy.float32)
- model = KNNImputer(n_neighbors=3, metric='nan_euclidean').fit(x_train)
+ dtype=numpy.float32,
+ )
+ model = KNNImputer(n_neighbors=3, metric="nan_euclidean").fit(x_train)
with self.assertRaises(NameError):
convert_sklearn(
- model, "KNN imputer",
+ model,
+ "KNN imputer",
[("input", FloatTensorType((None, x_test.shape[1])))],
target_opset=TARGET_OPSET,
- options={id(model): {'optim2': 'cdist'}})
+ options={id(model): {"optim2": "cdist"}},
+ )
for opset in [TARGET_OPSET, 12, 11, 10, 9]:
if opset > TARGET_OPSET:
continue
model_onnx = convert_sklearn(
- model, "KNN imputer",
+ model,
+ "KNN imputer",
[("input", FloatTensorType((None, x_test.shape[1])))],
target_opset=opset,
- options={id(model): {'optim': 'cdist'}})
+ options={id(model): {"optim": "cdist"}},
+ )
self.assertIsNotNone(model_onnx)
self.assertIn('op_type: "cdist"', str(model_onnx).lower())
- self.assertNotIn('scan', str(model_onnx).lower())
+ self.assertNotIn("scan", str(model_onnx).lower())
dump_data_and_model(
- x_test, model, model_onnx,
- basename="SklearnKNNImputer%dcdist" % opset)
+ x_test, model, model_onnx, basename="SklearnKNNImputer%dcdist" % opset
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="needs higher target_opset")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
+ @unittest.skipIf(TARGET_OPSET < 11, reason="needs higher target_opset")
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_iris_regressor_multi_reg(self):
iris = datasets.load_iris()
@@ -870,15 +983,21 @@ def test_model_knn_iris_regressor_multi_reg(self):
y = iris.target.astype(numpy.float32)
y = numpy.vstack([y, 1 - y, y + 10]).T
model = KNeighborsRegressor(
- algorithm='brute', weights='distance', n_neighbors=7)
+ algorithm="brute", weights="distance", n_neighbors=7
+ )
model.fit(X[:13], y[:13])
- onx = to_onnx(model, X[:1],
- options={id(model): {'optim': 'cdist'}},
- target_opset=TARGET_OPSET)
+ onx = to_onnx(
+ model,
+ X[:1],
+ options={id(model): {"optim": "cdist"}},
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
X.astype(numpy.float32)[:7],
- model, onx,
- basename="SklearnKNeighborsRegressorMReg")
+ model,
+ onx,
+ basename="SklearnKNeighborsRegressorMReg",
+ )
@unittest.skipIf(dont_test_radius(), reason="not available")
@ignore_warnings(category=DeprecationWarning)
@@ -887,26 +1006,31 @@ def test_model_knn_iris_regressor_multi_reg_radius(self):
X = iris.data.astype(numpy.float32)
y = iris.target.astype(numpy.float32)
y = numpy.vstack([y, 1 - y, y + 10]).T
- model = KNeighborsRegressor(
- algorithm='brute', weights='distance')
+ model = KNeighborsRegressor(algorithm="brute", weights="distance")
model.fit(X[:13], y[:13])
- onx = to_onnx(model, X[:1],
- options={id(model): {'optim': 'cdist'}},
- target_opset=TARGET_OPSET)
+ onx = to_onnx(
+ model,
+ X[:1],
+ options={id(model): {"optim": "cdist"}},
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
X.astype(numpy.float32)[:7],
- model, onx,
- basename="SklearnRadiusNeighborsRegressorMReg")
+ model,
+ onx,
+ basename="SklearnRadiusNeighborsRegressorMReg",
+ )
dump_data_and_model(
(X + 0.1).astype(numpy.float32)[:7],
- model, onx,
- basename="SklearnRadiusNeighborsRegressorMReg")
+ model,
+ onx,
+ basename="SklearnRadiusNeighborsRegressorMReg",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="needs higher target_opset")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
+ @unittest.skipIf(TARGET_OPSET < 11, reason="needs higher target_opset")
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_iris_classifier_multi_reg2_weight(self):
iris = datasets.load_iris()
@@ -914,16 +1038,21 @@ def test_model_knn_iris_classifier_multi_reg2_weight(self):
y = iris.target.astype(numpy.int64)
y = numpy.vstack([(y + 1) % 2, y % 2]).T
model = KNeighborsClassifier(
- algorithm='brute', weights='distance', n_neighbors=7)
+ algorithm="brute", weights="distance", n_neighbors=7
+ )
model.fit(X[:13], y[:13])
- onx = to_onnx(model, X[:1],
- options={id(model): {'optim': 'cdist',
- 'zipmap': False}},
- target_opset=TARGET_OPSET)
+ onx = to_onnx(
+ model,
+ X[:1],
+ options={id(model): {"optim": "cdist", "zipmap": False}},
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
X.astype(numpy.float32)[:11],
- model, onx,
- basename="SklearnKNeighborsClassifierMReg2-Out0")
+ model,
+ onx,
+ basename="SklearnKNeighborsClassifierMReg2-Out0",
+ )
@unittest.skipIf(dont_test_radius(), reason="not available")
@ignore_warnings(category=DeprecationWarning)
@@ -932,23 +1061,25 @@ def test_model_knn_iris_classifier_multi_reg2_weight_radius(self):
X = iris.data.astype(numpy.float32)
y = iris.target.astype(numpy.int64)
y = numpy.vstack([(y + 1) % 2, y % 2]).T
- model = RadiusNeighborsClassifier(
- algorithm='brute', weights='distance')
+ model = RadiusNeighborsClassifier(algorithm="brute", weights="distance")
model.fit(X[:13], y[:13])
- onx = to_onnx(model, X[:1],
- options={id(model): {'optim': 'cdist',
- 'zipmap': False}},
- target_opset=TARGET_OPSET)
+ onx = to_onnx(
+ model,
+ X[:1],
+ options={id(model): {"optim": "cdist", "zipmap": False}},
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
X.astype(numpy.float32)[:11],
- model, onx,
- basename="SklearnRadiusNeighborsClassifierMReg2-Out0")
+ model,
+ onx,
+ basename="SklearnRadiusNeighborsClassifierMReg2-Out0",
+ )
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("0.5.0"),
- reason="not available")
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="needs higher target_opset")
+ pv.Version(ort_version) < pv.Version("0.5.0"), reason="not available"
+ )
+ @unittest.skipIf(TARGET_OPSET < 11, reason="needs higher target_opset")
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_iris_classifier_multi_reg3_weight(self):
iris = datasets.load_iris()
@@ -956,17 +1087,21 @@ def test_model_knn_iris_classifier_multi_reg3_weight(self):
y = iris.target.astype(numpy.int64)
y = numpy.vstack([y % 2, y % 2, (y + 1) % 2]).T
model = KNeighborsClassifier(
- algorithm='brute', weights='distance',
- n_neighbors=7)
+ algorithm="brute", weights="distance", n_neighbors=7
+ )
model.fit(X[:13], y[:13])
- onx = to_onnx(model, X[:1],
- options={id(model): {'optim': 'cdist',
- 'zipmap': False}},
- target_opset=TARGET_OPSET)
+ onx = to_onnx(
+ model,
+ X[:1],
+ options={id(model): {"optim": "cdist", "zipmap": False}},
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
X.astype(numpy.float32)[:11],
- model, onx,
- basename="SklearnKNeighborsClassifierMReg3-Out0")
+ model,
+ onx,
+ basename="SklearnKNeighborsClassifierMReg3-Out0",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_normalizer_converter.py b/tests/test_sklearn_normalizer_converter.py
index 9559871f3..5975da08b 100644
--- a/tests/test_sklearn_normalizer_converter.py
+++ b/tests/test_sklearn_normalizer_converter.py
@@ -8,7 +8,10 @@
from sklearn.preprocessing import Normalizer
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import (
- Int64TensorType, FloatTensorType, DoubleTensorType)
+ Int64TensorType,
+ FloatTensorType,
+ DoubleTensorType,
+)
from test_utils import dump_data_and_model, TARGET_OPSET
@@ -18,9 +21,11 @@ def test_model_normalizer(self):
x = numpy.random.randn(10, 1).astype(numpy.int64)
model.fit(x)
model_onnx = convert_sklearn(
- model, "scikit-learn normalizer",
+ model,
+ "scikit-learn normalizer",
[("input", Int64TensorType([None, 1]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
self.assertTrue(len(model_onnx.graph.node) == 1)
@@ -29,88 +34,112 @@ def test_model_normalizer_blackop(self):
x = numpy.random.randn(10, 3).astype(numpy.float32)
model.fit(x)
model_onnx = convert_sklearn(
- model, "scikit-learn normalizer",
+ model,
+ "scikit-learn normalizer",
[("input", FloatTensorType([None, 3]))],
target_opset=TARGET_OPSET,
- black_op={"Normalizer"})
+ black_op={"Normalizer"},
+ )
self.assertNotIn('op_type: "Normalizer', str(model_onnx))
dump_data_and_model(
numpy.array([[1, -1, 3], [3, 1, 2]], dtype=numpy.float32),
- model, model_onnx,
- basename="SklearnNormalizerL1BlackOp-SkipDim1")
+ model,
+ model_onnx,
+ basename="SklearnNormalizerL1BlackOp-SkipDim1",
+ )
def test_model_normalizer_float_l1(self):
model = Normalizer(norm="l1")
x = numpy.random.randn(10, 3).astype(numpy.float32)
model.fit(x)
model_onnx = convert_sklearn(
- model, "scikit-learn normalizer",
+ model,
+ "scikit-learn normalizer",
[("input", FloatTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
self.assertTrue(len(model_onnx.graph.node) == 1)
dump_data_and_model(
numpy.array([[1, -1, 3], [3, 1, 2]], dtype=numpy.float32),
- model, model_onnx,
- basename="SklearnNormalizerL1-SkipDim1")
+ model,
+ model_onnx,
+ basename="SklearnNormalizerL1-SkipDim1",
+ )
def test_model_normalizer_float_l2(self):
model = Normalizer(norm="l2")
x = numpy.random.randn(10, 3).astype(numpy.float32)
model.fit(x)
model_onnx = convert_sklearn(
- model, "scikit-learn normalizer",
+ model,
+ "scikit-learn normalizer",
[("input", FloatTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
self.assertTrue(len(model_onnx.graph.node) == 1)
dump_data_and_model(
numpy.array([[1, -1, 3], [3, 1, 2]], dtype=numpy.float32),
- model, model_onnx,
- basename="SklearnNormalizerL2-SkipDim1")
+ model,
+ model_onnx,
+ basename="SklearnNormalizerL2-SkipDim1",
+ )
def test_model_normalizer_double_l1(self):
model = Normalizer(norm="l1")
x = numpy.random.randn(10, 3).astype(numpy.float64)
model.fit(x)
model_onnx = convert_sklearn(
- model, "scikit-learn normalizer",
+ model,
+ "scikit-learn normalizer",
[("input", DoubleTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
numpy.array([[1, -1, 3], [3, 1, 2]], dtype=numpy.float64),
- model, model_onnx,
- basename="SklearnNormalizerL1Double-SkipDim1")
+ model,
+ model_onnx,
+ basename="SklearnNormalizerL1Double-SkipDim1",
+ )
def test_model_normalizer_double_l2(self):
model = Normalizer(norm="l2")
x = numpy.random.randn(10, 3).astype(numpy.float64)
model.fit(x)
model_onnx = convert_sklearn(
- model, "scikit-learn normalizer",
+ model,
+ "scikit-learn normalizer",
[("input", DoubleTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
numpy.array([[1, -1, 3], [3, 1, 2]], dtype=numpy.float64),
- model, model_onnx,
- basename="SklearnNormalizerL2Double-SkipDim1")
+ model,
+ model_onnx,
+ basename="SklearnNormalizerL2Double-SkipDim1",
+ )
def test_model_normalizer_float_noshape(self):
model = Normalizer(norm="l2")
x = numpy.random.randn(10, 3).astype(numpy.float32)
model.fit(x)
model_onnx = convert_sklearn(
- model, "scikit-learn normalizer",
+ model,
+ "scikit-learn normalizer",
[("input", FloatTensorType([]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
self.assertTrue(len(model_onnx.graph.node) == 1)
dump_data_and_model(
numpy.array([[1, -1, 3], [3, 1, 2]], dtype=numpy.float32),
- model, model_onnx,
- basename="SklearnNormalizerL2NoShape-SkipDim1")
+ model,
+ model_onnx,
+ basename="SklearnNormalizerL2NoShape-SkipDim1",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_one_hot_encoder_converter.py b/tests/test_sklearn_one_hot_encoder_converter.py
index 36ac8a191..60d5fd36d 100644
--- a/tests/test_sklearn_one_hot_encoder_converter.py
+++ b/tests/test_sklearn_one_hot_encoder_converter.py
@@ -20,84 +20,102 @@
def one_hot_encoder_supports_string():
# pv.Version does not work with development versions
- vers = '.'.join(sklearn_version.split('.')[:2])
+ vers = ".".join(sklearn_version.split(".")[:2])
return pv.Version(vers) >= pv.Version("0.20.0")
def one_hot_encoder_supports_drop():
# pv.Version does not work with development versions
- vers = '.'.join(sklearn_version.split('.')[:2])
+ vers = ".".join(sklearn_version.split(".")[:2])
return pv.Version(vers) >= pv.Version("0.21.0")
class TestSklearnOneHotEncoderConverter(unittest.TestCase):
- @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"),
- reason="issues with shapes")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes"
+ )
@unittest.skipIf(
not one_hot_encoder_supports_string(),
- reason="OneHotEncoder did not have categories_ before 0.20")
+ reason="OneHotEncoder did not have categories_ before 0.20",
+ )
def test_model_one_hot_encoder(self):
- model = OneHotEncoder(categories='auto')
- data = numpy.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]],
- dtype=numpy.int64)
+ model = OneHotEncoder(categories="auto")
+ data = numpy.array(
+ [[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=numpy.int64
+ )
model.fit(data)
model_onnx = convert_sklearn(
- model, "scikit-learn one-hot encoder",
+ model,
+ "scikit-learn one-hot encoder",
[("input", Int64TensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data, model, model_onnx,
- basename="SklearnOneHotEncoderInt64-SkipDim1")
+ data, model, model_onnx, basename="SklearnOneHotEncoderInt64-SkipDim1"
+ )
- @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"),
- reason="issues with shapes")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes"
+ )
@unittest.skipIf(
not one_hot_encoder_supports_string(),
- reason="OneHotEncoder did not have categories_ before 0.20")
+ reason="OneHotEncoder did not have categories_ before 0.20",
+ )
def test_model_one_hot_encoder_int32(self):
- model = OneHotEncoder(categories='auto')
- data = numpy.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]],
- dtype=numpy.int32)
+ model = OneHotEncoder(categories="auto")
+ data = numpy.array(
+ [[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=numpy.int32
+ )
model.fit(data)
model_onnx = convert_sklearn(
- model, "scikit-learn one-hot encoder",
+ model,
+ "scikit-learn one-hot encoder",
[("input", Int32TensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
str_model_onnx = str(model_onnx)
assert "int64_data" in str_model_onnx
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data, model, model_onnx,
- basename="SklearnOneHotEncoderInt32-SkipDim1")
+ data, model, model_onnx, basename="SklearnOneHotEncoderInt32-SkipDim1"
+ )
- @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"),
- reason="issues with shapes")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes"
+ )
@unittest.skipIf(
not one_hot_encoder_supports_string(),
- reason="OneHotEncoder did not have categories_ before 0.20")
+ reason="OneHotEncoder did not have categories_ before 0.20",
+ )
def test_model_one_hot_encoder_int32_scaler(self):
- model = make_pipeline(OneHotEncoder(categories='auto', sparse=False),
- RobustScaler())
- data = numpy.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]],
- dtype=numpy.int32)
+ model = make_pipeline(
+ OneHotEncoder(categories="auto", sparse=False), RobustScaler()
+ )
+ data = numpy.array(
+ [[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=numpy.int32
+ )
model.fit(data)
model_onnx = convert_sklearn(
- model, "scikit-learn one-hot encoder",
+ model,
+ "scikit-learn one-hot encoder",
[("input", Int32TensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
str_model_onnx = str(model_onnx)
assert "int64_data" in str_model_onnx
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data, model, model_onnx,
- basename="SklearnOneHotEncoderInt32Scaler-SkipDim1")
+ data, model, model_onnx, basename="SklearnOneHotEncoderInt32Scaler-SkipDim1"
+ )
- @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"),
- reason="issues with shapes")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes"
+ )
@unittest.skipIf(
not one_hot_encoder_supports_drop(),
- reason="OneHotEncoder does not support drop in scikit versions < 0.21")
+ reason="OneHotEncoder does not support drop in scikit versions < 0.21",
+ )
def test_one_hot_encoder_mixed_string_int_drop(self):
data = [
["c0.4", "c0.2", 3],
@@ -108,124 +126,147 @@ def test_one_hot_encoder_mixed_string_int_drop(self):
["c0.2", "c2.2", 1],
]
test = [["c0.2", "c2.2", 1]]
- model = OneHotEncoder(categories="auto", drop=['c0.4', 'c0.2', 3])
+ model = OneHotEncoder(categories="auto", drop=["c0.4", "c0.2", 3])
model.fit(data)
inputs = [
("input1", StringTensorType([None, 2])),
("input2", Int64TensorType([None, 1])),
]
model_onnx = convert_sklearn(
- model, "one-hot encoder", inputs, target_opset=TARGET_OPSET)
+ model, "one-hot encoder", inputs, target_opset=TARGET_OPSET
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- test, model, model_onnx, verbose=False,
- basename="SklearnOneHotEncoderMixedStringIntDrop")
+ test,
+ model,
+ model_onnx,
+ verbose=False,
+ basename="SklearnOneHotEncoderMixedStringIntDrop",
+ )
- @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"),
- reason="issues with shapes")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes"
+ )
@unittest.skipIf(
not one_hot_encoder_supports_string(),
- reason="OneHotEncoder does not support strings in 0.19")
+ reason="OneHotEncoder does not support strings in 0.19",
+ )
def test_one_hot_encoder_onecat(self):
data = [["cat"], ["cat"]]
model = OneHotEncoder(categories="auto")
model.fit(data)
inputs = [("input1", StringTensorType([None, 1]))]
- model_onnx = convert_sklearn(model, "one-hot encoder one string cat",
- inputs, target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model, "one-hot encoder one string cat", inputs, target_opset=TARGET_OPSET
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data, model, model_onnx,
- basename="SklearnOneHotEncoderOneStringCat")
+ data, model, model_onnx, basename="SklearnOneHotEncoderOneStringCat"
+ )
- @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"),
- reason="issues with shapes")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes"
+ )
@unittest.skipIf(
not one_hot_encoder_supports_string(),
- reason="OneHotEncoder does not support strings in 0.19")
+ reason="OneHotEncoder does not support strings in 0.19",
+ )
def test_one_hot_encoder_twocats(self):
data = [["cat2"], ["cat1"]]
model = OneHotEncoder(categories="auto")
model.fit(data)
inputs = [("input1", StringTensorType([None, 1]))]
- model_onnx = convert_sklearn(model, "one-hot encoder two string cats",
- inputs, target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model, "one-hot encoder two string cats", inputs, target_opset=TARGET_OPSET
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data, model, model_onnx,
- basename="SklearnOneHotEncoderTwoStringCat")
+ data, model, model_onnx, basename="SklearnOneHotEncoderTwoStringCat"
+ )
- @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"),
- reason="issues with shapes")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes"
+ )
@unittest.skipIf(
not one_hot_encoder_supports_drop(),
- reason="OneHotEncoder does not support drop in scikit versions < 0.21")
+ reason="OneHotEncoder does not support drop in scikit versions < 0.21",
+ )
def test_one_hot_encoder_string_drop_first(self):
- data = [['Male', 'First'], ['Female', 'First'], ['Female', 'Second']]
- test_data = [['Male', 'Second']]
- model = OneHotEncoder(drop='first',
- categories='auto')
+ data = [["Male", "First"], ["Female", "First"], ["Female", "Second"]]
+ test_data = [["Male", "Second"]]
+ model = OneHotEncoder(drop="first", categories="auto")
model.fit(data)
inputs = [
("input1", StringTensorType([None, 1])),
("input2", StringTensorType([None, 1])),
]
model_onnx = convert_sklearn(
- model, "one-hot encoder", inputs, target_opset=TARGET_OPSET)
+ model, "one-hot encoder", inputs, target_opset=TARGET_OPSET
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- test_data, model, model_onnx,
- basename="SklearnOneHotEncoderStringDropFirst")
+ test_data, model, model_onnx, basename="SklearnOneHotEncoderStringDropFirst"
+ )
- @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"),
- reason="issues with shapes")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes"
+ )
@unittest.skipIf(
not one_hot_encoder_supports_string(),
- reason="OneHotEncoder does not support this in 0.19")
+ reason="OneHotEncoder does not support this in 0.19",
+ )
def test_model_one_hot_encoder_list_sparse(self):
- model = OneHotEncoder(categories=[[0, 1, 4, 5],
- [1, 2, 3, 5],
- [0, 3, 4, 6]],
- sparse=True)
- data = numpy.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]],
- dtype=numpy.int64)
+ model = OneHotEncoder(
+ categories=[[0, 1, 4, 5], [1, 2, 3, 5], [0, 3, 4, 6]], sparse=True
+ )
+ data = numpy.array(
+ [[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=numpy.int64
+ )
model.fit(data)
model_onnx = convert_sklearn(
- model, "scikit-learn one-hot encoder",
+ model,
+ "scikit-learn one-hot encoder",
[("input1", Int64TensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data, model, model_onnx,
- basename="SklearnOneHotEncoderCatSparse-SkipDim1")
+ data, model, model_onnx, basename="SklearnOneHotEncoderCatSparse-SkipDim1"
+ )
- @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"),
- reason="issues with shapes")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes"
+ )
@unittest.skipIf(
not one_hot_encoder_supports_string(),
- reason="OneHotEncoder does not support this in 0.19")
+ reason="OneHotEncoder does not support this in 0.19",
+ )
def test_model_one_hot_encoder_list_dense(self):
- model = OneHotEncoder(categories=[[0, 1, 4, 5],
- [1, 2, 3, 5],
- [0, 3, 4, 6]],
- sparse=False)
- data = numpy.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]],
- dtype=numpy.int64)
+ model = OneHotEncoder(
+ categories=[[0, 1, 4, 5], [1, 2, 3, 5], [0, 3, 4, 6]], sparse=False
+ )
+ data = numpy.array(
+ [[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=numpy.int64
+ )
model.fit(data)
model_onnx = convert_sklearn(
- model, "scikit-learn one-hot encoder",
+ model,
+ "scikit-learn one-hot encoder",
[("input", Int64TensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data, model, model_onnx,
- basename="SklearnOneHotEncoderCatDense-SkipDim1")
+ data, model, model_onnx, basename="SklearnOneHotEncoderCatDense-SkipDim1"
+ )
- @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"),
- reason="issues with shapes")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes"
+ )
@unittest.skipIf(
not one_hot_encoder_supports_drop(),
- reason="OneHotEncoder does not support drop in scikit versions < 0.21")
+ reason="OneHotEncoder does not support drop in scikit versions < 0.21",
+ )
def test_one_hot_encoder_int_drop(self):
data = [
[1, 2, 3],
@@ -236,25 +277,26 @@ def test_one_hot_encoder_int_drop(self):
[0, 3, 3],
]
test = numpy.array([[2, 2, 1], [4, 2, 1]], dtype=numpy.int64)
- model = OneHotEncoder(categories="auto", drop=[0, 1, 3],
- dtype=numpy.float32)
+ model = OneHotEncoder(categories="auto", drop=[0, 1, 3], dtype=numpy.float32)
model.fit(data)
inputs = [
("input1", Int64TensorType([None, 3])),
]
model_onnx = convert_sklearn(
- model, "one-hot encoder", inputs,
- target_opset=TARGET_OPSET)
+ model, "one-hot encoder", inputs, target_opset=TARGET_OPSET
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- test, model, model_onnx,
- basename="SklearnOneHotEncoderIntDrop")
+ test, model, model_onnx, basename="SklearnOneHotEncoderIntDrop"
+ )
- @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"),
- reason="issues with shapes")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes"
+ )
@unittest.skipIf(
not one_hot_encoder_supports_drop(),
- reason="OneHotEncoder does not support drop in scikit versions < 0.21")
+ reason="OneHotEncoder does not support drop in scikit versions < 0.21",
+ )
def test_one_hot_encoder_int_drop_first(self):
data = [
[1, 2, 3],
@@ -265,37 +307,40 @@ def test_one_hot_encoder_int_drop_first(self):
[0, 3, 3],
]
test = numpy.array([[2, 2, 1], [1, 3, 3]], dtype=numpy.int64)
- model = OneHotEncoder(categories="auto", drop='first',
- dtype=numpy.int64)
+ model = OneHotEncoder(categories="auto", drop="first", dtype=numpy.int64)
model.fit(data)
inputs = [
("input1", Int64TensorType([None, 3])),
]
model_onnx = convert_sklearn(
- model, "one-hot encoder", inputs, target_opset=TARGET_OPSET)
+ model, "one-hot encoder", inputs, target_opset=TARGET_OPSET
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- test, model, model_onnx,
- basename="SklearnOneHotEncoderIntDropFirst")
+ test, model, model_onnx, basename="SklearnOneHotEncoderIntDropFirst"
+ )
- @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"),
- reason="issues with shapes")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes"
+ )
@unittest.skipIf(
not one_hot_encoder_supports_drop(),
- reason="OneHotEncoder does not support drop in scikit versions < 0.21")
+ reason="OneHotEncoder does not support drop in scikit versions < 0.21",
+ )
def test_one_hot_encoder_string_drop_first_2(self):
- data = [['Male', 'First'], ['Female', 'First'], ['Female', 'Second']]
- model = OneHotEncoder(drop='first')
+ data = [["Male", "First"], ["Female", "First"], ["Female", "Second"]]
+ model = OneHotEncoder(drop="first")
model.fit(data)
inputs = [
("input", StringTensorType([None, 2])),
]
model_onnx = convert_sklearn(
- model, "one-hot encoder", inputs, target_opset=TARGET_OPSET)
+ model, "one-hot encoder", inputs, target_opset=TARGET_OPSET
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data, model, model_onnx,
- basename="SklearnOneHotEncoderStringDropFirst2")
+ data, model, model_onnx, basename="SklearnOneHotEncoderStringDropFirst2"
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_one_vs_one_classifier_converter.py b/tests/test_sklearn_one_vs_one_classifier_converter.py
index c2c19081b..2cebe87cf 100644
--- a/tests/test_sklearn_one_vs_one_classifier_converter.py
+++ b/tests/test_sklearn_one_vs_one_classifier_converter.py
@@ -12,113 +12,127 @@
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from skl2onnx import convert_sklearn
-from skl2onnx.common.data_types import (
- DoubleTensorType,
- FloatTensorType)
+from skl2onnx.common.data_types import DoubleTensorType, FloatTensorType
from test_utils import TARGET_OPSET
warnings_to_skip = (DeprecationWarning, FutureWarning, ConvergenceWarning)
-ort_version = '.'.join(ort_version.split('.')[:2])
+ort_version = ".".join(ort_version.split(".")[:2])
class TestOneVsOneClassifierConverter(unittest.TestCase):
-
def test_one_vs_one_classifier_converter_linearsvc(self):
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
- X, y, test_size=0.33, shuffle=True, random_state=0)
- model = OneVsOneClassifier(LinearSVC(random_state=0)).fit(
- X_train, y_train)
+ X, y, test_size=0.33, shuffle=True, random_state=0
+ )
+ model = OneVsOneClassifier(LinearSVC(random_state=0)).fit(X_train, y_train)
exp_label = model.predict(X_test[:10])
exp_prob = model.decision_function(X_test[:10])
model_onnx = convert_sklearn(
- model, "scikit-learn OneVsOne Classifier",
+ model,
+ "scikit-learn OneVsOne Classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET, options={'zipmap': False})
+ target_opset=TARGET_OPSET,
+ options={"zipmap": False},
+ )
XI = X_test[:10].astype(np.float32)
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'input': XI})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"input": XI})
assert_almost_equal(exp_label.ravel(), got[0].ravel())
assert_almost_equal(exp_prob, got[1])
def test_one_vs_one_classifier_converter_logisticregression(self):
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
- X, y, test_size=0.33, shuffle=True, random_state=0)
+ X, y, test_size=0.33, shuffle=True, random_state=0
+ )
model = OneVsOneClassifier(LogisticRegression(random_state=0)).fit(
- X_train, y_train)
+ X_train, y_train
+ )
exp_label = model.predict(X_test[:10])
exp_prob = model.decision_function(X_test[:10])
model_onnx = convert_sklearn(
- model, "scikit-learn OneVsOne Classifier",
+ model,
+ "scikit-learn OneVsOne Classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET, options={'zipmap': False})
+ target_opset=TARGET_OPSET,
+ options={"zipmap": False},
+ )
XI = X_test[:10].astype(np.float32)
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'input': XI})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"input": XI})
assert_almost_equal(exp_label.ravel(), got[0].ravel())
assert_almost_equal(exp_prob, got[1])
def test_one_vs_one_classifier_converter_logisticregression_double(self):
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
- X, y, test_size=0.33, shuffle=True, random_state=0)
+ X, y, test_size=0.33, shuffle=True, random_state=0
+ )
model = OneVsOneClassifier(LogisticRegression(random_state=0)).fit(
- X_train, y_train)
+ X_train, y_train
+ )
exp_label = model.predict(X_test[:10])
exp_prob = model.decision_function(X_test[:10])
model_onnx = convert_sklearn(
- model, "scikit-learn OneVsOne Classifier",
+ model,
+ "scikit-learn OneVsOne Classifier",
[("input", DoubleTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET, options={'zipmap': False})
+ target_opset=TARGET_OPSET,
+ options={"zipmap": False},
+ )
XI = X_test[:10].astype(np.float64)
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'input': XI})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"input": XI})
assert_almost_equal(exp_label.ravel(), got[0].ravel())
assert_almost_equal(exp_prob, got[1])
def test_one_vs_one_classifier_converter_decisiontree(self):
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
- X, y, test_size=0.33, shuffle=True, random_state=0)
+ X, y, test_size=0.33, shuffle=True, random_state=0
+ )
model = OneVsOneClassifier(DecisionTreeClassifier(max_depth=3)).fit(
- X_train, y_train)
+ X_train, y_train
+ )
limit = 10
exp_label = model.predict(X_test[:limit])
exp_prob = model.decision_function(X_test[:limit])
model_onnx = convert_sklearn(
- model, "scikit-learn OneVsOne Classifier",
+ model,
+ "scikit-learn OneVsOne Classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET, options={'zipmap': False})
+ target_opset=TARGET_OPSET,
+ options={"zipmap": False},
+ )
XI = X_test[:limit].astype(np.float32)
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'input': XI})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"input": XI})
assert_almost_equal(exp_label.ravel(), got[0].ravel())
assert_almost_equal(exp_prob, got[1])
if __name__ == "__main__":
- # TestOneVsOneClassifierConverter().test_one_vs_one_classifier_converter_logisticregression()
unittest.main()
diff --git a/tests/test_sklearn_one_vs_rest_classifier_converter.py b/tests/test_sklearn_one_vs_rest_classifier_converter.py
index 3032ae8d4..10c6bafbd 100644
--- a/tests/test_sklearn_one_vs_rest_classifier_converter.py
+++ b/tests/test_sklearn_one_vs_rest_classifier_converter.py
@@ -5,8 +5,7 @@
import numpy as np
from numpy.testing import assert_almost_equal
from onnxruntime import InferenceSession, __version__ as ort_version
-from sklearn.ensemble import (
- GradientBoostingClassifier, GradientBoostingRegressor)
+from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.neural_network import MLPClassifier, MLPRegressor
@@ -15,25 +14,25 @@
from sklearn.ensemble import RandomForestClassifier
from sklearn.exceptions import ConvergenceWarning
from sklearn.svm import LinearSVC
+
try:
from sklearn.utils._testing import ignore_warnings
except ImportError:
from sklearn.utils.testing import ignore_warnings
from skl2onnx import convert_sklearn
-from skl2onnx.common.data_types import (
- FloatTensorType,
- Int64TensorType)
+from skl2onnx.common.data_types import FloatTensorType, Int64TensorType
from test_utils import (
dump_data_and_model,
dump_multiple_classification,
fit_classification_model,
fit_multilabel_classification_model,
- TARGET_OPSET)
+ TARGET_OPSET,
+)
warnings_to_skip = (DeprecationWarning, FutureWarning, ConvergenceWarning)
-ort_version = '.'.join(ort_version.split('.')[:2])
+ort_version = ".".join(ort_version.split(".")[:2])
class TestOneVsRestClassifierConverter(unittest.TestCase):
@@ -41,35 +40,32 @@ class TestOneVsRestClassifierConverter(unittest.TestCase):
@ignore_warnings(category=warnings_to_skip)
def test_ovr_linear_svc(self):
model = OneVsRestClassifier(LinearSVC())
- dump_multiple_classification(
- model, target_opset=TARGET_OPSET, verbose=False)
+ dump_multiple_classification(model, target_opset=TARGET_OPSET, verbose=False)
@ignore_warnings(category=warnings_to_skip)
def test_ovr_logistic_regression(self):
model = OneVsRestClassifier(LogisticRegression())
- dump_multiple_classification(
- model, target_opset=TARGET_OPSET)
+ dump_multiple_classification(model, target_opset=TARGET_OPSET)
@unittest.skipIf(
- pv.Version(ort_version) <= pv.Version('1.4.0'),
- reason="onnxruntime too old")
+ pv.Version(ort_version) <= pv.Version("1.4.0"), reason="onnxruntime too old"
+ )
@ignore_warnings(category=warnings_to_skip)
def test_ovr_rf(self):
- model = OneVsRestClassifier(
- RandomForestClassifier(n_estimators=2, max_depth=2))
- model, X = fit_classification_model(
- model, 3, is_int=True, n_features=3)
+ model = OneVsRestClassifier(RandomForestClassifier(n_estimators=2, max_depth=2))
+ model, X = fit_classification_model(model, 3, is_int=True, n_features=3)
model_onnx = convert_sklearn(
- model, initial_types=[
- ('input', Int64TensorType([None, X.shape[1]]))],
+ model,
+ initial_types=[("input", Int64TensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
- options={id(model): {'zipmap': False}})
+ options={id(model): {"zipmap": False}},
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
XI = X.astype(np.int64)
- got = sess.run(None, {'input': XI})
+ got = sess.run(None, {"input": XI})
exp_label = model.predict(XI)
exp_proba = model.predict_proba(XI)
assert_almost_equal(exp_proba, got[1], decimal=5)
@@ -85,8 +81,8 @@ def test_ovr_rf(self):
assert_almost_equal(exp_label, got[0])
@unittest.skipIf(
- pv.Version(ort_version) <= pv.Version('1.3.0'),
- reason="onnxruntime too old")
+ pv.Version(ort_version) <= pv.Version("1.3.0"), reason="onnxruntime too old"
+ )
@ignore_warnings(category=warnings_to_skip)
def test_ovr_rf_multilabel_float(self):
for opset in [12, TARGET_OPSET]:
@@ -94,20 +90,26 @@ def test_ovr_rf_multilabel_float(self):
continue
with self.subTest(opset=opset):
model = OneVsRestClassifier(
- RandomForestClassifier(n_estimators=2, max_depth=3))
+ RandomForestClassifier(n_estimators=2, max_depth=3)
+ )
model, X = fit_multilabel_classification_model(
- model, 3, is_int=False, n_features=5)
+ model, 3, is_int=False, n_features=5
+ )
model_onnx = convert_sklearn(
- model, initial_types=[
- ('input', FloatTensorType([None, X.shape[1]]))],
- target_opset=opset)
+ model,
+ initial_types=[("input", FloatTensorType([None, X.shape[1]]))],
+ target_opset=opset,
+ )
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnOVRRFMultiLabelFloat%d" % opset)
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnOVRRFMultiLabelFloat%d" % opset,
+ )
@unittest.skipIf(
- pv.Version(ort_version) <= pv.Version('1.3.0'),
- reason="onnxruntime too old")
+ pv.Version(ort_version) <= pv.Version("1.3.0"), reason="onnxruntime too old"
+ )
@ignore_warnings(category=warnings_to_skip)
def test_ovr_rf_multilabel_float_11(self):
for opset in [9, 10, 11]:
@@ -115,21 +117,27 @@ def test_ovr_rf_multilabel_float_11(self):
continue
with self.subTest(opset=opset):
model = OneVsRestClassifier(
- RandomForestClassifier(n_estimators=2, max_depth=3))
+ RandomForestClassifier(n_estimators=2, max_depth=3)
+ )
model, X = fit_multilabel_classification_model(
- model, 3, is_int=False, n_features=5)
+ model, 3, is_int=False, n_features=5
+ )
model_onnx = convert_sklearn(
- model, initial_types=[
- ('input', FloatTensorType([None, X.shape[1]]))],
- target_opset=opset)
+ model,
+ initial_types=[("input", FloatTensorType([None, X.shape[1]]))],
+ target_opset=opset,
+ )
self.assertNotIn('"Clip"', str(model_onnx))
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnOVRRFMultiLabelFloat%d" % opset)
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnOVRRFMultiLabelFloat%d" % opset,
+ )
@unittest.skipIf(
- pv.Version(ort_version) <= pv.Version('1.3.0'),
- reason="onnxruntime too old")
+ pv.Version(ort_version) <= pv.Version("1.3.0"), reason="onnxruntime too old"
+ )
@ignore_warnings(category=warnings_to_skip)
def test_ovr_rf_multilabel_int(self):
for opset in [12, TARGET_OPSET]:
@@ -137,20 +145,26 @@ def test_ovr_rf_multilabel_int(self):
continue
with self.subTest(opset=opset):
model = OneVsRestClassifier(
- RandomForestClassifier(n_estimators=2, max_depth=3))
+ RandomForestClassifier(n_estimators=2, max_depth=3)
+ )
model, X = fit_multilabel_classification_model(
- model, 3, is_int=True, n_features=5)
+ model, 3, is_int=True, n_features=5
+ )
model_onnx = convert_sklearn(
- model, initial_types=[
- ('input', Int64TensorType([None, X.shape[1]]))],
- target_opset=opset)
+ model,
+ initial_types=[("input", Int64TensorType([None, X.shape[1]]))],
+ target_opset=opset,
+ )
dump_data_and_model(
- X.astype(np.int64), model, model_onnx,
- basename="SklearnOVRRFMultiLabelInt64%d" % opset)
+ X.astype(np.int64),
+ model,
+ model_onnx,
+ basename="SklearnOVRRFMultiLabelInt64%d" % opset,
+ )
@unittest.skipIf(
- pv.Version(ort_version) <= pv.Version('1.3.0'),
- reason="onnxruntime too old")
+ pv.Version(ort_version) <= pv.Version("1.3.0"), reason="onnxruntime too old"
+ )
@ignore_warnings(category=warnings_to_skip)
def test_ovr_rf_multilabel_int_11(self):
for opset in [9, 10, 11]:
@@ -158,26 +172,29 @@ def test_ovr_rf_multilabel_int_11(self):
continue
with self.subTest(opset=opset):
model = OneVsRestClassifier(
- RandomForestClassifier(n_estimators=2, max_depth=3))
+ RandomForestClassifier(n_estimators=2, max_depth=3)
+ )
model, X = fit_multilabel_classification_model(
- model, 3, is_int=True, n_features=5)
+ model, 3, is_int=True, n_features=5
+ )
model_onnx = convert_sklearn(
- model, initial_types=[
- ('input', Int64TensorType([None, X.shape[1]]))],
- target_opset=opset)
+ model,
+ initial_types=[("input", Int64TensorType([None, X.shape[1]]))],
+ target_opset=opset,
+ )
self.assertNotIn('"Clip"', str(model_onnx))
dump_data_and_model(
- X.astype(np.int64), model, model_onnx,
- basename="SklearnOVRRFMultiLabelInt64%d" % opset)
+ X.astype(np.int64),
+ model,
+ model_onnx,
+ basename="SklearnOVRRFMultiLabelInt64%d" % opset,
+ )
@ignore_warnings(category=warnings_to_skip)
def test_ovr_02(self):
model = OneVsRestClassifier(LogisticRegression())
dump_multiple_classification(
- model,
- first_class=2,
- suffix="F2",
- target_opset=TARGET_OPSET
+ model, first_class=2, suffix="F2", target_opset=TARGET_OPSET
)
@ignore_warnings(category=warnings_to_skip)
@@ -188,37 +205,37 @@ def test_ovr_string(self):
verbose=False,
label_string=True,
suffix="String",
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
@ignore_warnings(category=warnings_to_skip)
def test_ovr_classification_float(self):
model, X = fit_classification_model(
- OneVsRestClassifier(LogisticRegression(solver='liblinear')), 3)
+ OneVsRestClassifier(LogisticRegression(solver="liblinear")), 3
+ )
model_onnx = convert_sklearn(
model,
"ovr classification",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnOVRClassificationFloat")
+ X, model, model_onnx, basename="SklearnOVRClassificationFloat"
+ )
@ignore_warnings(category=warnings_to_skip)
def test_ovr_classification_decision_function(self):
model, X = fit_classification_model(
- OneVsRestClassifier(LogisticRegression()), 4)
- options = {id(model): {'raw_scores': True}}
+ OneVsRestClassifier(LogisticRegression()), 4
+ )
+ options = {id(model): {"raw_scores": True}}
model_onnx = convert_sklearn(
model,
"ovr classification",
[("input", FloatTensorType([None, X.shape[1]]))],
options=options,
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
@@ -226,32 +243,37 @@ def test_ovr_classification_decision_function(self):
model,
model_onnx,
basename="SklearnOVRClassificationDecisionFunction",
- methods=['predict', 'decision_function'])
+ methods=["predict", "decision_function"],
+ )
if pv.Version(ort_version) < pv.Version("1.0.0"):
return
- options = {id(model): {'raw_scores': True, 'zipmap': False}}
+ options = {id(model): {"raw_scores": True, "zipmap": False}}
model_onnx = convert_sklearn(
- model, "ovr classification",
+ model,
+ "ovr classification",
[("input", FloatTensorType([None, X.shape[1]]))],
- options=options, target_opset=TARGET_OPSET)
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'input': X})[1]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"input": X})[1]
dec = model.decision_function(X)
assert_almost_equal(got, dec, decimal=4)
@ignore_warnings(category=warnings_to_skip)
def test_ovr_classification_decision_function_binary(self):
model, X = fit_classification_model(
- OneVsRestClassifier(LogisticRegression()), 2)
- options = {id(model): {'raw_scores': True}}
+ OneVsRestClassifier(LogisticRegression()), 2
+ )
+ options = {id(model): {"raw_scores": True}}
model_onnx = convert_sklearn(
model,
"ovr classification",
[("input", FloatTensorType([None, X.shape[1]]))],
options=options,
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
@@ -259,18 +281,22 @@ def test_ovr_classification_decision_function_binary(self):
model,
model_onnx,
basename="SklearnOVRClassificationDecisionFunctionBinary",
- methods=['predict', 'decision_function_binary'])
+ methods=["predict", "decision_function_binary"],
+ )
if pv.Version(ort_version) < pv.Version("1.0.0"):
return
- options = {id(model): {'raw_scores': True, 'zipmap': False}}
+ options = {id(model): {"raw_scores": True, "zipmap": False}}
model_onnx = convert_sklearn(
- model, "ovr classification",
+ model,
+ "ovr classification",
[("input", FloatTensorType([None, X.shape[1]]))],
- options=options, target_opset=TARGET_OPSET)
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'input': X})[1]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"input": X})[1]
dec = model.decision_function(X)
assert_almost_equal(got[:, 1], dec, decimal=4)
assert_almost_equal(-got[:, 0], dec, decimal=4)
@@ -278,135 +304,129 @@ def test_ovr_classification_decision_function_binary(self):
@ignore_warnings(category=warnings_to_skip)
def test_ovr_classification_int(self):
model, X = fit_classification_model(
- OneVsRestClassifier(LogisticRegression()), 5, is_int=True)
+ OneVsRestClassifier(LogisticRegression()), 5, is_int=True
+ )
model_onnx = convert_sklearn(
model,
"ovr classification",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnOVRClassificationInt")
+ X, model, model_onnx, basename="SklearnOVRClassificationInt"
+ )
@ignore_warnings(category=warnings_to_skip)
def test_ovr_classification_float_binary(self):
model, X = fit_classification_model(
- OneVsRestClassifier(LogisticRegression()), 2)
+ OneVsRestClassifier(LogisticRegression()), 2
+ )
model_onnx = convert_sklearn(
model,
"ovr classification",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnOVRClassificationFloatBin")
+ X, model, model_onnx, basename="SklearnOVRClassificationFloatBin"
+ )
@ignore_warnings(category=warnings_to_skip)
def test_ovr_classification_float_binary_nozipmap(self):
model, X = fit_classification_model(
- OneVsRestClassifier(LogisticRegression()), 2)
+ OneVsRestClassifier(LogisticRegression()), 2
+ )
model_onnx = convert_sklearn(
- model, "ovr classification",
+ model,
+ "ovr classification",
[("input", FloatTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
- options={id(model): {'zipmap': False}})
+ options={id(model): {"zipmap": False}},
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnOVRClassificationFloatBinNoZipMap")
+ X, model, model_onnx, basename="SklearnOVRClassificationFloatBinNoZipMap"
+ )
@ignore_warnings(category=warnings_to_skip)
def test_ovr_classification_int_binary(self):
model, X = fit_classification_model(
- OneVsRestClassifier(LogisticRegression()), 2, is_int=True)
+ OneVsRestClassifier(LogisticRegression()), 2, is_int=True
+ )
model_onnx = convert_sklearn(
model,
"ovr classification",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnOVRClassificationIntBin")
+ X, model, model_onnx, basename="SklearnOVRClassificationIntBin"
+ )
@ignore_warnings(category=warnings_to_skip)
def test_ovr_classification_float_mlp(self):
- model, X = fit_classification_model(
- OneVsRestClassifier(MLPClassifier()), 4)
+ model, X = fit_classification_model(OneVsRestClassifier(MLPClassifier()), 4)
model_onnx = convert_sklearn(
model,
"ovr classification",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnOVRClassificationFloatMLP")
+ X, model, model_onnx, basename="SklearnOVRClassificationFloatMLP"
+ )
@ignore_warnings(category=warnings_to_skip)
def test_ovr_classification_int_ensemble(self):
model, X = fit_classification_model(
- OneVsRestClassifier(GradientBoostingClassifier()), 5, is_int=True)
+ OneVsRestClassifier(GradientBoostingClassifier()), 5, is_int=True
+ )
model_onnx = convert_sklearn(
model,
"ovr classification",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnOVRClassificationIntEnsemble")
+ X, model, model_onnx, basename="SklearnOVRClassificationIntEnsemble"
+ )
@ignore_warnings(category=warnings_to_skip)
def test_ovr_classification_float_binary_ensemble(self):
model, X = fit_classification_model(
- OneVsRestClassifier(GradientBoostingClassifier()), 2)
+ OneVsRestClassifier(GradientBoostingClassifier()), 2
+ )
model_onnx = convert_sklearn(
model,
"ovr classification",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnOVRClassificationFloatBinEnsemble")
+ X, model, model_onnx, basename="SklearnOVRClassificationFloatBinEnsemble"
+ )
@ignore_warnings(category=warnings_to_skip)
def test_ovr_classification_int_binary_mlp(self):
model, X = fit_classification_model(
- OneVsRestClassifier(MLPClassifier()), 2, is_int=True)
+ OneVsRestClassifier(MLPClassifier()), 2, is_int=True
+ )
model_onnx = convert_sklearn(
model,
"ovr classification",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnOVRClassificationIntBinMLP")
+ X, model, model_onnx, basename="SklearnOVRClassificationIntBinMLP"
+ )
@ignore_warnings(category=warnings_to_skip)
def test_ovr_regression_float(self):
@@ -416,94 +436,93 @@ def test_ovr_regression_float(self):
check only probabilities."""
rs = 11
model, X = fit_classification_model(
- OneVsRestClassifier(
- LinearRegression()), 3, random_state=rs)
+ OneVsRestClassifier(LinearRegression()), 3, random_state=rs
+ )
model_onnx = convert_sklearn(
model,
"ovr regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X[:5],
- model,
- model_onnx,
- basename="SklearnOVRRegressionFloat-Out0")
+ X[:5], model, model_onnx, basename="SklearnOVRRegressionFloat-Out0"
+ )
@ignore_warnings(category=warnings_to_skip)
def test_ovr_regression_int(self):
model, X = fit_classification_model(
- OneVsRestClassifier(LinearRegression()), 10, is_int=True)
+ OneVsRestClassifier(LinearRegression()), 10, is_int=True
+ )
model_onnx = convert_sklearn(
model,
"ovr regression",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnOVRRegressionInt-Out0")
+ X, model, model_onnx, basename="SklearnOVRRegressionInt-Out0"
+ )
@ignore_warnings(category=warnings_to_skip)
def test_ovr_regression_float_mlp(self):
- model, X = fit_classification_model(
- OneVsRestClassifier(MLPRegressor()), 5)
+ model, X = fit_classification_model(OneVsRestClassifier(MLPRegressor()), 5)
model_onnx = convert_sklearn(
model,
"ovr regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnOVRRegressionFloatMLP-Out0")
+ X, model, model_onnx, basename="SklearnOVRRegressionFloatMLP-Out0"
+ )
@ignore_warnings(category=warnings_to_skip)
def test_ovr_regression_int_ensemble(self):
model, X = fit_classification_model(
- OneVsRestClassifier(GradientBoostingRegressor()), 4, is_int=True)
+ OneVsRestClassifier(GradientBoostingRegressor()), 4, is_int=True
+ )
model_onnx = convert_sklearn(
model,
"ovr regression",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnOVRRegressionIntEnsemble-Out0")
+ X, model, model_onnx, basename="SklearnOVRRegressionIntEnsemble-Out0"
+ )
- @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.2.0"),
- reason="fails to load the model")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.2.0"), reason="fails to load the model"
+ )
def test_ovr_raw_scores(self):
X, y = make_classification(
- n_classes=2, n_samples=100, random_state=42,
- n_features=100, n_informative=7)
+ n_classes=2, n_samples=100, random_state=42, n_features=100, n_informative=7
+ )
X_train, X_test, y_train, _ = train_test_split(
- X, y, test_size=0.5, random_state=42)
+ X, y, test_size=0.5, random_state=42
+ )
model = OneVsRestClassifier(
- estimator=GradientBoostingClassifier(random_state=42))
+ estimator=GradientBoostingClassifier(random_state=42)
+ )
model.fit(X_train, y_train)
- options = {id(model): {'raw_scores': True, 'zipmap': False}}
+ options = {id(model): {"raw_scores": True, "zipmap": False}}
onnx_model = convert_sklearn(
- model, 'lr',
- [('input', FloatTensorType([None, X_test.shape[1]]))],
- options=options, target_opset=TARGET_OPSET)
+ model,
+ "lr",
+ [("input", FloatTensorType([None, X_test.shape[1]]))],
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- onnx_model.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, input_feed={'input': X_test.astype(np.float32)})
+ onnx_model.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, input_feed={"input": X_test.astype(np.float32)})
exp = model.predict(X_test)
assert_almost_equal(exp, res[0])
exp = model.decision_function(X_test)
diff --git a/tests/test_sklearn_ordinal_encoder.py b/tests/test_sklearn_ordinal_encoder.py
index 145c17cfe..33b818bd4 100644
--- a/tests/test_sklearn_ordinal_encoder.py
+++ b/tests/test_sklearn_ordinal_encoder.py
@@ -6,6 +6,7 @@
import numpy as np
import onnxruntime
from sklearn import __version__ as sklearn_version
+
try:
from sklearn.preprocessing import OrdinalEncoder
except ImportError:
@@ -20,7 +21,7 @@
def ordinal_encoder_support():
# pv.Version does not work with development versions
- vers = '.'.join(sklearn_version.split('.')[:2])
+ vers = ".".join(sklearn_version.split(".")[:2])
if pv.Version(vers) < pv.Version("0.20.0"):
return False
if pv.Version(onnxruntime.__version__) < pv.Version("0.3.0"):
@@ -31,25 +32,27 @@ def ordinal_encoder_support():
class TestSklearnOrdinalEncoderConverter(unittest.TestCase):
@unittest.skipIf(
not ordinal_encoder_support(),
- reason="OrdinalEncoder was not available before 0.20")
+ reason="OrdinalEncoder was not available before 0.20",
+ )
def test_model_ordinal_encoder(self):
model = OrdinalEncoder(dtype=np.int64)
- data = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]],
- dtype=np.int64)
+ data = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=np.int64)
model.fit(data)
model_onnx = convert_sklearn(
- model, "scikit-learn ordinal encoder",
+ model,
+ "scikit-learn ordinal encoder",
[("input", Int64TensorType([None, 3]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data, model, model_onnx,
- basename="SklearnOrdinalEncoderInt64-SkipDim1")
+ data, model, model_onnx, basename="SklearnOrdinalEncoderInt64-SkipDim1"
+ )
@unittest.skipIf(
not ordinal_encoder_support(),
- reason="OrdinalEncoder was not available before 0.20")
+ reason="OrdinalEncoder was not available before 0.20",
+ )
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
def test_ordinal_encoder_mixed_string_int_drop(self):
data = [
@@ -68,61 +71,65 @@ def test_ordinal_encoder_mixed_string_int_drop(self):
("input2", Int64TensorType([None, 1])),
]
model_onnx = convert_sklearn(
- model, "ordinal encoder", inputs, target_opset=TARGET_OPSET)
+ model, "ordinal encoder", inputs, target_opset=TARGET_OPSET
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- test, model, model_onnx,
- basename="SklearnOrdinalEncoderMixedStringIntDrop")
+ test, model, model_onnx, basename="SklearnOrdinalEncoderMixedStringIntDrop"
+ )
@unittest.skipIf(
not ordinal_encoder_support(),
- reason="OrdinalEncoder was not available before 0.20")
+ reason="OrdinalEncoder was not available before 0.20",
+ )
def test_ordinal_encoder_onecat(self):
data = [["cat"], ["cat"]]
model = OrdinalEncoder(categories="auto")
model.fit(data)
inputs = [("input1", StringTensorType([None, 1]))]
- model_onnx = convert_sklearn(model, "ordinal encoder one string cat",
- inputs, target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model, "ordinal encoder one string cat", inputs, target_opset=TARGET_OPSET
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data, model, model_onnx,
- basename="SklearnOrdinalEncoderOneStringCat")
+ data, model, model_onnx, basename="SklearnOrdinalEncoderOneStringCat"
+ )
@unittest.skipIf(
not ordinal_encoder_support(),
- reason="OrdinalEncoder was not available before 0.20")
+ reason="OrdinalEncoder was not available before 0.20",
+ )
def test_ordinal_encoder_twocats(self):
data = [["cat2"], ["cat1"]]
model = OrdinalEncoder(categories="auto")
model.fit(data)
inputs = [("input1", StringTensorType([None, 1]))]
- model_onnx = convert_sklearn(model, "ordinal encoder two string cats",
- inputs, target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model, "ordinal encoder two string cats", inputs, target_opset=TARGET_OPSET
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data, model, model_onnx,
- basename="SklearnOrdinalEncoderTwoStringCat")
+ data, model, model_onnx, basename="SklearnOrdinalEncoderTwoStringCat"
+ )
@unittest.skipIf(
not ordinal_encoder_support(),
- reason="OrdinalEncoder was not available before 0.20")
+ reason="OrdinalEncoder was not available before 0.20",
+ )
def test_model_ordinal_encoder_cat_list(self):
- model = OrdinalEncoder(categories=[[0, 1, 4, 5],
- [1, 2, 3, 5],
- [0, 3, 4, 6]])
- data = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]],
- dtype=np.int64)
+ model = OrdinalEncoder(categories=[[0, 1, 4, 5], [1, 2, 3, 5], [0, 3, 4, 6]])
+ data = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=np.int64)
model.fit(data)
model_onnx = convert_sklearn(
- model, "scikit-learn ordinal encoder",
+ model,
+ "scikit-learn ordinal encoder",
[("input", Int64TensorType([None, 3]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data, model, model_onnx,
- basename="SklearnOrdinalEncoderCatList")
+ data, model, model_onnx, basename="SklearnOrdinalEncoderCatList"
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_passive_aggressive_classifier_converter.py b/tests/test_sklearn_passive_aggressive_classifier_converter.py
index 35d4aedd6..45874a842 100644
--- a/tests/test_sklearn_passive_aggressive_classifier_converter.py
+++ b/tests/test_sklearn_passive_aggressive_classifier_converter.py
@@ -5,78 +5,81 @@
from sklearn.linear_model import PassiveAggressiveClassifier
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType, Int64TensorType
-from test_utils import (
- dump_data_and_model,
- fit_classification_model,
- TARGET_OPSET
-)
+from test_utils import dump_data_and_model, fit_classification_model, TARGET_OPSET
class TestPassiveAggressiveClassifierConverter(unittest.TestCase):
-
def test_model_passive_aggressive_classifier_binary_class(self):
model, X = fit_classification_model(
- PassiveAggressiveClassifier(random_state=42), 2)
+ PassiveAggressiveClassifier(random_state=42), 2
+ )
model_onnx = convert_sklearn(
model,
"scikit-learn PassiveAggressiveClassifier binary",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X,
model,
model_onnx,
- basename="SklearnPassiveAggressiveClassifierBinary-Out0")
+ basename="SklearnPassiveAggressiveClassifierBinary-Out0",
+ )
def test_model_passive_aggressive_classifier_multi_class(self):
model, X = fit_classification_model(
- PassiveAggressiveClassifier(random_state=42), 5)
+ PassiveAggressiveClassifier(random_state=42), 5
+ )
model_onnx = convert_sklearn(
model,
"scikit-learn PassiveAggressiveClassifier multi-class",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X,
model,
model_onnx,
- basename="SklearnPassiveAggressiveClassifierMulti-Out0")
+ basename="SklearnPassiveAggressiveClassifierMulti-Out0",
+ )
def test_model_passive_aggressive_classifier_binary_class_int(self):
model, X = fit_classification_model(
- PassiveAggressiveClassifier(random_state=42), 2, is_int=True)
+ PassiveAggressiveClassifier(random_state=42), 2, is_int=True
+ )
model_onnx = convert_sklearn(
model,
"scikit-learn PassiveAggressiveClassifier binary",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X,
model,
model_onnx,
- basename="SklearnPassiveAggressiveClassifierBinaryInt-Out0")
+ basename="SklearnPassiveAggressiveClassifierBinaryInt-Out0",
+ )
def test_model_passive_aggressive_classifier_multi_class_int(self):
model, X = fit_classification_model(
- PassiveAggressiveClassifier(random_state=42), 5, is_int=True)
+ PassiveAggressiveClassifier(random_state=42), 5, is_int=True
+ )
model_onnx = convert_sklearn(
model,
"scikit-learn PassiveAggressiveClassifier multi-class",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X,
model,
model_onnx,
- basename="SklearnPassiveAggressiveClassifierMultiInt-Out0")
+ basename="SklearnPassiveAggressiveClassifierMultiInt-Out0",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_pca_converter.py b/tests/test_sklearn_pca_converter.py
index 8a1c9d588..4acfebf9f 100644
--- a/tests/test_sklearn_pca_converter.py
+++ b/tests/test_sklearn_pca_converter.py
@@ -14,7 +14,8 @@
def _fit_model_pca(model):
data = load_diabetes()
X_train, X_test, *_ = train_test_split(
- data.data, data.target, test_size=0.2, random_state=42)
+ data.data, data.target, test_size=0.2, random_state=42
+ )
model.fit(X_train)
return model, X_test.astype(np.float32)
@@ -24,81 +25,93 @@ def test_pca_default(self):
model, X_test = _fit_model_pca(PCA(random_state=42))
model_onnx = convert_sklearn(
model,
- initial_types=[("input",
- FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET)
+ initial_types=[("input", FloatTensorType([None, X_test.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnPCADefault")
+ dump_data_and_model(X_test, model, model_onnx, basename="SklearnPCADefault")
def test_incrementalpca_default(self):
model, X_test = _fit_model_pca(IncrementalPCA())
model_onnx = convert_sklearn(
model,
- initial_types=[("input",
- FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET)
+ initial_types=[("input", FloatTensorType([None, X_test.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnIncrementalPCADefault")
+ X_test, model, model_onnx, basename="SklearnIncrementalPCADefault"
+ )
def test_pca_parameters_auto(self):
- model, X_test = _fit_model_pca(PCA(
- random_state=42, copy=False, tol=0.1, whiten=True,
- n_components=0.9005263157894737, svd_solver="auto"))
+ model, X_test = _fit_model_pca(
+ PCA(
+ random_state=42,
+ copy=False,
+ tol=0.1,
+ whiten=True,
+ n_components=0.9005263157894737,
+ svd_solver="auto",
+ )
+ )
model_onnx = convert_sklearn(
model,
- initial_types=[("input",
- FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET)
+ initial_types=[("input", FloatTensorType([None, X_test.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnPCAParametersAuto")
+ X_test, model, model_onnx, basename="SklearnPCAParametersAuto"
+ )
def test_pca_parameters_arpack(self):
- model, X_test = _fit_model_pca(PCA(
- random_state=42, n_components=4, svd_solver='arpack'))
+ model, X_test = _fit_model_pca(
+ PCA(random_state=42, n_components=4, svd_solver="arpack")
+ )
model_onnx = convert_sklearn(
model,
- initial_types=[("input",
- FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET)
+ initial_types=[("input", FloatTensorType([None, X_test.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnPCAParametersArpack")
+ X_test, model, model_onnx, basename="SklearnPCAParametersArpack"
+ )
def test_pca_parameters_full(self):
- model, X_test = _fit_model_pca(PCA(
- random_state=42, n_components=5, svd_solver='full', whiten=True))
+ model, X_test = _fit_model_pca(
+ PCA(random_state=42, n_components=5, svd_solver="full", whiten=True)
+ )
model_onnx = convert_sklearn(
model,
- initial_types=[("input",
- FloatTensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET)
+ initial_types=[("input", FloatTensorType([None, X_test.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnPCAParametersFull")
+ X_test, model, model_onnx, basename="SklearnPCAParametersFull"
+ )
def test_pca_default_int_randomised(self):
data = load_digits()
X_train, X_test, *_ = train_test_split(
- data.data, data.target, test_size=0.2, random_state=42)
- model = PCA(random_state=42, svd_solver='randomized',
- iterated_power=3).fit(X_train)
+ data.data, data.target, test_size=0.2, random_state=42
+ )
+ model = PCA(random_state=42, svd_solver="randomized", iterated_power=3).fit(
+ X_train
+ )
model_onnx = convert_sklearn(
model,
- initial_types=[("input",
- Int64TensorType([None, X_test.shape[1]]))],
- target_opset=TARGET_OPSET)
+ initial_types=[("input", Int64TensorType([None, X_test.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X_test.astype(np.int64), model, model_onnx,
- basename="SklearnPCADefaultIntRandomised")
+ X_test.astype(np.int64),
+ model,
+ model_onnx,
+ basename="SklearnPCADefaultIntRandomised",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_perceptron_converter.py b/tests/test_sklearn_perceptron_converter.py
index dfd806bde..343e6822b 100644
--- a/tests/test_sklearn_perceptron_converter.py
+++ b/tests/test_sklearn_perceptron_converter.py
@@ -7,79 +7,74 @@
from sklearn.linear_model import Perceptron
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType, Int64TensorType
-from test_utils import (
- dump_data_and_model,
- fit_classification_model,
- TARGET_OPSET
-)
+from test_utils import dump_data_and_model, fit_classification_model, TARGET_OPSET
class TestPerceptronClassifierConverter(unittest.TestCase):
-
def test_model_perceptron_binary_class(self):
- model, X = fit_classification_model(
- Perceptron(random_state=42), 2)
+ model, X = fit_classification_model(Perceptron(random_state=42), 2)
model_onnx = convert_sklearn(
model,
"scikit-learn Perceptron binary classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(np.float32),
model,
model_onnx,
- basename="SklearnPerceptronClassifierBinary-Out0")
+ basename="SklearnPerceptronClassifierBinary-Out0",
+ )
def test_model_perceptron_multi_class(self):
- model, X = fit_classification_model(
- Perceptron(random_state=42), 5)
+ model, X = fit_classification_model(Perceptron(random_state=42), 5)
model_onnx = convert_sklearn(
model,
"scikit-learn Perceptron multi-class classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(np.float32),
model,
model_onnx,
- basename="SklearnPerceptronClassifierMulti-Out0")
+ basename="SklearnPerceptronClassifierMulti-Out0",
+ )
def test_model_perceptron_binary_class_int(self):
- model, X = fit_classification_model(
- Perceptron(random_state=42), 2, is_int=True)
+ model, X = fit_classification_model(Perceptron(random_state=42), 2, is_int=True)
model_onnx = convert_sklearn(
model,
"scikit-learn Perceptron binary classifier",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(np.int64),
model,
model_onnx,
- basename="SklearnPerceptronClassifierBinaryInt-Out0")
+ basename="SklearnPerceptronClassifierBinaryInt-Out0",
+ )
def test_model_perceptron_multi_class_int(self):
- model, X = fit_classification_model(
- Perceptron(random_state=42), 5, is_int=True)
+ model, X = fit_classification_model(Perceptron(random_state=42), 5, is_int=True)
model_onnx = convert_sklearn(
model,
"scikit-learn Perceptron multi-class classifier",
[("input", Int64TensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
- options={'zipmap': False},
+ options={"zipmap": False},
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(np.int64)[:10],
model,
model_onnx,
- basename="SklearnPerceptronClassifierMultiInt-Out0")
+ basename="SklearnPerceptronClassifierMultiInt-Out0",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_pipeline.py b/tests/test_sklearn_pipeline.py
index 732f69b3f..6c4747696 100644
--- a/tests/test_sklearn_pipeline.py
+++ b/tests/test_sklearn_pipeline.py
@@ -23,8 +23,7 @@
from sklearn.utils.testing import ignore_warnings
try:
from sklearn.compose import ColumnTransformer
- from sklearn.compose import (
- make_column_transformer, make_column_selector)
+ from sklearn.compose import make_column_transformer, make_column_selector
except ImportError:
# not available in 0.19
ColumnTransformer = None
@@ -40,8 +39,11 @@
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import (
- OneHotEncoder, StandardScaler, MinMaxScaler,
- MaxAbsScaler)
+ OneHotEncoder,
+ StandardScaler,
+ MinMaxScaler,
+ MaxAbsScaler,
+)
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
@@ -54,15 +56,18 @@
)
from sklearn.multioutput import MultiOutputClassifier
from test_utils import (
- dump_data_and_model, fit_classification_model, TARGET_OPSET,
+ dump_data_and_model,
+ fit_classification_model,
+ TARGET_OPSET,
InferenceSessionEx as InferenceSession,
- ReferenceEvaluatorEx)
+ ReferenceEvaluatorEx,
+)
from onnxruntime import __version__ as ort_version
# pv.Version does not work with development versions
-ort_version = ".".join(ort_version.split('.')[:2])
-skl_version = ".".join(skl_version.split('.')[:2])
+ort_version = ".".join(ort_version.split(".")[:2])
+skl_version = ".".join(skl_version.split(".")[:2])
def check_scikit_version():
@@ -85,32 +90,31 @@ def transform(self, inp):
res = self.pipe.transform(x2)
return res
else:
- raise TypeError("Unable to predict with type {0}".format(
- type(inp)))
+ raise TypeError("Unable to predict with type {0}".format(type(inp)))
class TestSklearnPipeline(unittest.TestCase):
-
@ignore_warnings(category=FutureWarning)
def test_pipeline(self):
- data = numpy.array([[0, 0], [0, 0], [1, 1], [1, 1]],
- dtype=numpy.float32)
+ data = numpy.array([[0, 0], [0, 0], [1, 1], [1, 1]], dtype=numpy.float32)
scaler = StandardScaler()
scaler.fit(data)
model = Pipeline([("scaler1", scaler), ("scaler2", scaler)])
- model_onnx = convert_sklearn(model, "pipeline",
- [("input", FloatTensorType([None, 2]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "pipeline",
+ [("input", FloatTensorType([None, 2]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- dump_data_and_model(data, model, model_onnx,
- basename="SklearnPipelineScaler")
+ dump_data_and_model(data, model, model_onnx, basename="SklearnPipelineScaler")
@ignore_warnings(category=FutureWarning)
def test_combine_inputs(self):
data = numpy.array(
- [[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]],
- dtype=numpy.float32)
+ [[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]], dtype=numpy.float32
+ )
scaler = StandardScaler()
scaler.fit(data)
model = Pipeline([("scaler1", scaler), ("scaler2", scaler)])
@@ -122,7 +126,8 @@ def test_combine_inputs(self):
("input1", FloatTensorType([None, 1])),
("input2", FloatTensorType([None, 1])),
],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(len(model_onnx.graph.node[-1].output) == 1)
self.assertTrue(model_onnx is not None)
data = {
@@ -130,12 +135,15 @@ def test_combine_inputs(self):
"input2": data[:, 1].reshape((-1, 1)),
}
dump_data_and_model(
- data, PipeConcatenateInput(model),
- model_onnx, basename="SklearnPipelineScaler11")
+ data,
+ PipeConcatenateInput(model),
+ model_onnx,
+ basename="SklearnPipelineScaler11",
+ )
@unittest.skipIf(
- pv.Version(ort_version) <= pv.Version('0.4.0'),
- reason="onnxruntime too old")
+ pv.Version(ort_version) <= pv.Version("0.4.0"), reason="onnxruntime too old"
+ )
@ignore_warnings(category=FutureWarning)
def test_combine_inputs_union_in_pipeline(self):
from sklearn.preprocessing import StandardScaler
@@ -145,16 +153,20 @@ def test_combine_inputs_union_in_pipeline(self):
[[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]],
dtype=numpy.float32,
)
- model = Pipeline([
- ("scaler1", StandardScaler()),
- (
- "union",
- FeatureUnion([
- ("scaler2", StandardScaler()),
- ("scaler3", MinMaxScaler()),
- ]),
- ),
- ])
+ model = Pipeline(
+ [
+ ("scaler1", StandardScaler()),
+ (
+ "union",
+ FeatureUnion(
+ [
+ ("scaler2", StandardScaler()),
+ ("scaler3", MinMaxScaler()),
+ ]
+ ),
+ ),
+ ]
+ )
model.fit(data)
model_onnx = convert_sklearn(
model,
@@ -163,7 +175,8 @@ def test_combine_inputs_union_in_pipeline(self):
("input1", FloatTensorType([None, 1])),
("input2", FloatTensorType([None, 1])),
],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(len(model_onnx.graph.node[-1].output) == 1)
self.assertTrue(model_onnx is not None)
data = {
@@ -171,14 +184,18 @@ def test_combine_inputs_union_in_pipeline(self):
"input2": data[:, 1].reshape((-1, 1)),
}
dump_data_and_model(
- data, PipeConcatenateInput(model),
- model_onnx, basename="SklearnPipelineScaler11Union")
+ data,
+ PipeConcatenateInput(model),
+ model_onnx,
+ basename="SklearnPipelineScaler11Union",
+ )
+
TARGET_OPSET
@unittest.skipIf(TARGET_OPSET < 15, reason="uses CastLike")
@unittest.skipIf(
- pv.Version(ort_version) <= pv.Version('0.4.0'),
- reason="onnxruntime too old")
+ pv.Version(ort_version) <= pv.Version("0.4.0"), reason="onnxruntime too old"
+ )
@ignore_warnings(category=FutureWarning)
def test_combine_inputs_floats_ints(self):
data = [[0, 0.0], [0, 0.0], [1, 1.0], [1, 1.0]]
@@ -193,7 +210,8 @@ def test_combine_inputs_floats_ints(self):
("input1", Int64TensorType([None, 1])),
("input2", FloatTensorType([None, 1])),
],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(len(model_onnx.graph.node[-1].output) == 1)
self.assertTrue(model_onnx is not None)
data = numpy.array(data)
@@ -202,25 +220,26 @@ def test_combine_inputs_floats_ints(self):
"input2": data[:, 1].reshape((-1, 1)).astype(numpy.float32),
}
dump_data_and_model(
- data, PipeConcatenateInput(model),
- model_onnx, basename="SklearnPipelineScalerMixed")
+ data,
+ PipeConcatenateInput(model),
+ model_onnx,
+ basename="SklearnPipelineScalerMixed",
+ )
@unittest.skipIf(
- ColumnTransformer is None,
- reason="ColumnTransformer not available in 0.19")
- @unittest.skipIf(pv.Version(ort_version) <= pv.Version("0.4.0"),
- reason="issues with shapes")
+ ColumnTransformer is None, reason="ColumnTransformer not available in 0.19"
+ )
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("0.4.0"), reason="issues with shapes"
+ )
@ignore_warnings(category=(RuntimeWarning, FutureWarning))
def test_pipeline_column_transformer(self):
-
iris = datasets.load_iris()
X = iris.data[:, :3]
y = iris.target
X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
- X_train["vcat"] = X_train["vA"].apply(lambda x: "cat1"
- if x > 0.5 else "cat2")
- X_train["vcat2"] = X_train["vB"].apply(lambda x: "cat3"
- if x > 0.5 else "cat4")
+ X_train["vcat"] = X_train["vA"].apply(lambda x: "cat1" if x > 0.5 else "cat2")
+ X_train["vcat2"] = X_train["vB"].apply(lambda x: "cat3" if x > 0.5 else "cat4")
y_train = y % 2
numeric_features = [0, 1, 2] # ["vA", "vB", "vC"]
categorical_features = [3, 4] # ["vcat", "vcat2"]
@@ -228,31 +247,42 @@ def test_pipeline_column_transformer(self):
classifier = LogisticRegression(
C=0.01,
class_weight=dict(zip([False, True], [0.2, 0.8])),
- n_jobs=1, max_iter=10, solver="lbfgs", tol=1e-3)
+ n_jobs=1,
+ max_iter=10,
+ solver="lbfgs",
+ tol=1e-3,
+ )
- numeric_transformer = Pipeline(steps=[
- ("imputer", SimpleImputer(strategy="median")),
- ("scaler", StandardScaler()),
- ])
+ numeric_transformer = Pipeline(
+ steps=[
+ ("imputer", SimpleImputer(strategy="median")),
+ ("scaler", StandardScaler()),
+ ]
+ )
- categorical_transformer = Pipeline(steps=[
- (
- "onehot",
- OneHotEncoder(sparse=True, handle_unknown="ignore"),
- ),
- (
- "tsvd",
- TruncatedSVD(n_components=1, algorithm="arpack", tol=1e-4),
- ),
- ])
+ categorical_transformer = Pipeline(
+ steps=[
+ (
+ "onehot",
+ OneHotEncoder(sparse=True, handle_unknown="ignore"),
+ ),
+ (
+ "tsvd",
+ TruncatedSVD(n_components=1, algorithm="arpack", tol=1e-4),
+ ),
+ ]
+ )
- preprocessor = ColumnTransformer(transformers=[
- ("num", numeric_transformer, numeric_features),
- ("cat", categorical_transformer, categorical_features),
- ])
+ preprocessor = ColumnTransformer(
+ transformers=[
+ ("num", numeric_transformer, numeric_features),
+ ("cat", categorical_transformer, categorical_features),
+ ]
+ )
- model = Pipeline(steps=[("precprocessor",
- preprocessor), ("classifier", classifier)])
+ model = Pipeline(
+ steps=[("precprocessor", preprocessor), ("classifier", classifier)]
+ )
model.fit(X_train, y_train)
initial_type = [
@@ -261,17 +291,20 @@ def test_pipeline_column_transformer(self):
]
X_train = X_train[:11]
- model_onnx = convert_sklearn(model, initial_types=initial_type,
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model, initial_types=initial_type, target_opset=TARGET_OPSET
+ )
dump_data_and_model(
- X_train, model, model_onnx,
- basename="SklearnPipelineColumnTransformerPipeliner")
+ X_train,
+ model,
+ model_onnx,
+ basename="SklearnPipelineColumnTransformerPipeliner",
+ )
if __name__ == "__main__":
try:
- from onnx.tools.net_drawer import (
- GetPydotGraph, GetOpNodeProducer)
+ from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer
except ImportError:
return
@@ -279,7 +312,8 @@ def test_pipeline_column_transformer(self):
model_onnx.graph,
name=model_onnx.graph.name,
rankdir="TP",
- node_producer=GetOpNodeProducer("docstring"))
+ node_producer=GetOpNodeProducer("docstring"),
+ )
pydot_graph.write_dot("graph.dot")
import os
@@ -287,19 +321,19 @@ def test_pipeline_column_transformer(self):
os.system("dot -O -G=300 -Tpng graph.dot")
@unittest.skipIf(
- ColumnTransformer is None,
- reason="ColumnTransformer not available in 0.19")
+ ColumnTransformer is None, reason="ColumnTransformer not available in 0.19"
+ )
@unittest.skipIf(
- not check_scikit_version(),
- reason="Scikit 0.20 causes some mismatches")
+ not check_scikit_version(), reason="Scikit 0.20 causes some mismatches"
+ )
@ignore_warnings(category=FutureWarning)
def test_pipeline_column_transformer_titanic(self):
-
# fit
try:
titanic_url = (
"https://raw.githubusercontent.com/amueller/"
- "scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv")
+ "scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv"
+ )
data = pandas.read_csv(titanic_url)
except url_error.URLError:
# Do not fail the test if the data cannot be fetched.
@@ -314,33 +348,40 @@ def test_pipeline_column_transformer_titanic(self):
for cat in ["embarked", "sex", "pclass"]:
X[cat].fillna("missing", inplace=True)
- X_train, X_test, y_train, y_test = train_test_split(
- X, y, test_size=0.2)
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
numeric_features = ["age", "fare"]
- numeric_transformer = Pipeline(steps=[
- ("imputer", SimpleImputer(strategy="median")),
- ("scaler", StandardScaler()),
- ])
+ numeric_transformer = Pipeline(
+ steps=[
+ ("imputer", SimpleImputer(strategy="median")),
+ ("scaler", StandardScaler()),
+ ]
+ )
categorical_features = ["embarked", "sex", "pclass"]
- categorical_transformer = Pipeline(steps=[
- # --- SimpleImputer on string is not available
- # for string in ONNX-ML specifications.
- # ('imputer',
- # SimpleImputer(strategy='constant', fill_value='missing')),
- ("onehot", OneHotEncoder(handle_unknown="ignore"))
- ])
-
- preprocessor = ColumnTransformer(transformers=[
- ("num", numeric_transformer, numeric_features),
- ("cat", categorical_transformer, categorical_features),
- ])
-
- clf = Pipeline(steps=[
- ("preprocessor", preprocessor),
- # ("classifier", LogisticRegression(solver="lbfgs")),
- ])
+ categorical_transformer = Pipeline(
+ steps=[
+ # --- SimpleImputer on string is not available
+ # for string in ONNX-ML specifications.
+ # ('imputer',
+ # SimpleImputer(strategy='constant', fill_value='missing')),
+ ("onehot", OneHotEncoder(handle_unknown="ignore"))
+ ]
+ )
+
+ preprocessor = ColumnTransformer(
+ transformers=[
+ ("num", numeric_transformer, numeric_features),
+ ("cat", categorical_transformer, categorical_features),
+ ]
+ )
+
+ clf = Pipeline(
+ steps=[
+ ("preprocessor", preprocessor),
+ # ("classifier", LogisticRegression(solver="lbfgs")),
+ ]
+ )
# inputs
@@ -349,7 +390,7 @@ def convert_dataframe_schema(df, drop=None):
for k, v in zip(df.columns, df.dtypes):
if drop is not None and k in drop:
continue
- if v == 'int64':
+ if v == "int64":
t = Int64TensorType([None, 1])
elif v == "float64":
t = FloatTensorType([None, 1])
@@ -371,135 +412,174 @@ def convert_dataframe_schema(df, drop=None):
X_train = X_train.copy()
X_test = X_test.copy()
- X_train['pclass'] = X_train['pclass'].astype(numpy.int64)
- X_test['pclass'] = X_test['pclass'].astype(numpy.int64)
+ X_train["pclass"] = X_train["pclass"].astype(numpy.int64)
+ X_test["pclass"] = X_test["pclass"].astype(numpy.int64)
X_train = X_train.drop(to_drop, axis=1)
X_test = X_test.drop(to_drop, axis=1)
# Step 1: without classifier
clf.fit(X_train, y_train)
initial_inputs = convert_dataframe_schema(X_train, to_drop)
- model_onnx = convert_sklearn(clf, "pipeline_titanic", initial_inputs,
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ clf, "pipeline_titanic", initial_inputs, target_opset=TARGET_OPSET
+ )
data = X_test
pred = clf.transform(data)
data_types = {
- 'pclass': numpy.int64,
- 'age': numpy.float32,
- 'sex': numpy.str_,
- 'fare': numpy.float32,
- 'embarked': numpy.str_,
+ "pclass": numpy.int64,
+ "age": numpy.float32,
+ "sex": numpy.str_,
+ "fare": numpy.float32,
+ "embarked": numpy.str_,
+ }
+ inputs = {
+ k: data[k].values.astype(data_types[k]).reshape(-1, 1) for k in data.columns
}
- inputs = {k: data[k].values.astype(data_types[k]).reshape(-1, 1)
- for k in data.columns}
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
run = sess.run(None, inputs)
got = run[-1]
assert_almost_equal(pred, got, decimal=5)
# Step 2: with classifier
- clf = Pipeline(steps=[
- ("preprocessor", preprocessor),
- ("classifier", LogisticRegression(solver="lbfgs")),
- ]).fit(X_train, y_train)
+ clf = Pipeline(
+ steps=[
+ ("preprocessor", preprocessor),
+ ("classifier", LogisticRegression(solver="lbfgs")),
+ ]
+ ).fit(X_train, y_train)
pred = clf.predict_proba(data)
- model_onnx = convert_sklearn(clf, "pipeline_titanic", initial_inputs,
- target_opset=TARGET_OPSET,
- options={id(clf): {'zipmap': False}})
+ model_onnx = convert_sklearn(
+ clf,
+ "pipeline_titanic",
+ initial_inputs,
+ target_opset=TARGET_OPSET,
+ options={id(clf): {"zipmap": False}},
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
run = sess.run(None, inputs)
got = run[-1]
assert_almost_equal(pred, got, decimal=5)
@unittest.skipIf(
- ColumnTransformer is None,
- reason="ColumnTransformer not available in 0.19")
+ ColumnTransformer is None, reason="ColumnTransformer not available in 0.19"
+ )
@ignore_warnings(category=FutureWarning)
def test_column_transformer_weights(self):
model, X = fit_classification_model(
ColumnTransformer(
- [('pca', PCA(n_components=5), slice(0, 10)),
- ('svd', TruncatedSVD(n_components=5), slice(10, 100))],
- transformer_weights={'pca': 2, 'svd': 3}), 3, n_features=100)
+ [
+ ("pca", PCA(n_components=5), slice(0, 10)),
+ ("svd", TruncatedSVD(n_components=5), slice(10, 100)),
+ ],
+ transformer_weights={"pca": 2, "svd": 3},
+ ),
+ 3,
+ n_features=100,
+ )
model_onnx = convert_sklearn(
model,
"column transformer weights",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnColumnTransformerWeights-Dec4")
+ X, model, model_onnx, basename="SklearnColumnTransformerWeights-Dec4"
+ )
@unittest.skipIf(
- ColumnTransformer is None,
- reason="ColumnTransformer not available in 0.19")
+ ColumnTransformer is None, reason="ColumnTransformer not available in 0.19"
+ )
@ignore_warnings(category=FutureWarning)
def test_column_transformer_drop(self):
model, X = fit_classification_model(
ColumnTransformer(
- [('pca', PCA(n_components=5), slice(0, 10)),
- ('svd', TruncatedSVD(n_components=5), slice(80, 100))],
- remainder='drop'), 3, n_features=100)
+ [
+ ("pca", PCA(n_components=5), slice(0, 10)),
+ ("svd", TruncatedSVD(n_components=5), slice(80, 100)),
+ ],
+ remainder="drop",
+ ),
+ 3,
+ n_features=100,
+ )
model_onnx = convert_sklearn(
model,
"column transformer drop",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnColumnTransformerDrop")
+ X, model, model_onnx, basename="SklearnColumnTransformerDrop"
+ )
@unittest.skipIf(
- ColumnTransformer is None,
- reason="ColumnTransformer not available in 0.19")
+ ColumnTransformer is None, reason="ColumnTransformer not available in 0.19"
+ )
@ignore_warnings(category=FutureWarning)
def test_column_transformer_passthrough(self):
model, X = fit_classification_model(
ColumnTransformer(
- [('pca', PCA(n_components=5), slice(0, 10)),
- ('svd', TruncatedSVD(n_components=5), slice(80, 100))],
- transformer_weights={'pca': 2, 'svd': 3},
- remainder='passthrough'), 3, n_features=100)
+ [
+ ("pca", PCA(n_components=5), slice(0, 10)),
+ ("svd", TruncatedSVD(n_components=5), slice(80, 100)),
+ ],
+ transformer_weights={"pca": 2, "svd": 3},
+ remainder="passthrough",
+ ),
+ 3,
+ n_features=100,
+ )
model_onnx = convert_sklearn(
model,
"column transformer passthrough",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnColumnTransformerPassthrough")
+ X, model, model_onnx, basename="SklearnColumnTransformerPassthrough"
+ )
@unittest.skipIf(
- ColumnTransformer is None,
- reason="ColumnTransformer not available in 0.19")
+ ColumnTransformer is None, reason="ColumnTransformer not available in 0.19"
+ )
@ignore_warnings(category=FutureWarning)
def test_column_transformer_passthrough_no_weights(self):
model, X = fit_classification_model(
ColumnTransformer(
- [('pca', PCA(n_components=5), slice(0, 10)),
- ('svd', TruncatedSVD(n_components=5), slice(70, 80))],
- remainder='passthrough'), 3, n_features=100)
+ [
+ ("pca", PCA(n_components=5), slice(0, 10)),
+ ("svd", TruncatedSVD(n_components=5), slice(70, 80)),
+ ],
+ remainder="passthrough",
+ ),
+ 3,
+ n_features=100,
+ )
model_onnx = convert_sklearn(
model,
"column transformer passthrough",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnColumnTransformerPassthroughNoWeights")
+ X,
+ model,
+ model_onnx,
+ basename="SklearnColumnTransformerPassthroughNoWeights",
+ )
@unittest.skipIf(
- ColumnTransformer is None,
- reason="ColumnTransformer not available in 0.19")
+ ColumnTransformer is None, reason="ColumnTransformer not available in 0.19"
+ )
@ignore_warnings(category=FutureWarning)
def test_pipeline_dataframe(self):
text = """
@@ -508,46 +588,66 @@ def test_pipeline_dataframe(self):
7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,red
7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,red
11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,red
- """.replace(" ", "")
+ """.replace(
+ " ", ""
+ )
X_train = pandas.read_csv(StringIO(text))
for c in X_train.columns:
- if c != 'color':
+ if c != "color":
X_train[c] = X_train[c].astype(numpy.float32)
- numeric_features = [c for c in X_train if c != 'color']
-
- pipe = Pipeline([
- ("prep", ColumnTransformer([
- ("color", Pipeline([
- ('one', OneHotEncoder()),
- ('select', ColumnTransformer(
- [('sel1', 'passthrough', [0])]))
- ]), ['color']),
- ("others", "passthrough", numeric_features)
- ])),
- ])
+ numeric_features = [c for c in X_train if c != "color"]
+
+ pipe = Pipeline(
+ [
+ (
+ "prep",
+ ColumnTransformer(
+ [
+ (
+ "color",
+ Pipeline(
+ [
+ ("one", OneHotEncoder()),
+ (
+ "select",
+ ColumnTransformer(
+ [("sel1", "passthrough", [0])]
+ ),
+ ),
+ ]
+ ),
+ ["color"],
+ ),
+ ("others", "passthrough", numeric_features),
+ ]
+ ),
+ ),
+ ]
+ )
init_types = [
- ('fixed_acidity', FloatTensorType(shape=[None, 1])),
- ('volatile_acidity', FloatTensorType(shape=[None, 1])),
- ('citric_acid', FloatTensorType(shape=[None, 1])),
- ('residual_sugar', FloatTensorType(shape=[None, 1])),
- ('chlorides', FloatTensorType(shape=[None, 1])),
- ('free_sulfur_dioxide', FloatTensorType(shape=[None, 1])),
- ('total_sulfur_dioxide', FloatTensorType(shape=[None, 1])),
- ('density', FloatTensorType(shape=[None, 1])),
- ('pH', FloatTensorType(shape=[None, 1])),
- ('sulphates', FloatTensorType(shape=[None, 1])),
- ('alcohol', FloatTensorType(shape=[None, 1])),
- ('quality', FloatTensorType(shape=[None, 1])),
- ('color', StringTensorType(shape=[None, 1]))
+ ("fixed_acidity", FloatTensorType(shape=[None, 1])),
+ ("volatile_acidity", FloatTensorType(shape=[None, 1])),
+ ("citric_acid", FloatTensorType(shape=[None, 1])),
+ ("residual_sugar", FloatTensorType(shape=[None, 1])),
+ ("chlorides", FloatTensorType(shape=[None, 1])),
+ ("free_sulfur_dioxide", FloatTensorType(shape=[None, 1])),
+ ("total_sulfur_dioxide", FloatTensorType(shape=[None, 1])),
+ ("density", FloatTensorType(shape=[None, 1])),
+ ("pH", FloatTensorType(shape=[None, 1])),
+ ("sulphates", FloatTensorType(shape=[None, 1])),
+ ("alcohol", FloatTensorType(shape=[None, 1])),
+ ("quality", FloatTensorType(shape=[None, 1])),
+ ("color", StringTensorType(shape=[None, 1])),
]
pipe.fit(X_train)
model_onnx = convert_sklearn(
- pipe, initial_types=init_types, target_opset=TARGET_OPSET)
+ pipe, initial_types=init_types, target_opset=TARGET_OPSET
+ )
oinf = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
pred = pipe.transform(X_train)
inputs = {c: X_train[c].values for c in X_train.columns}
@@ -558,28 +658,39 @@ def test_pipeline_dataframe(self):
@ignore_warnings(category=(FutureWarning, UserWarning))
def test_pipeline_tfidf_svc(self):
- pipe = Pipeline([
- ('tfidf', TfidfVectorizer()),
- ('clf_svc', SVC(probability=True, kernel='linear'))])
- data = numpy.array(["first sentance", "second sentence",
- "many sentances", "dummy sentance",
- "no sentance at all"])
+ pipe = Pipeline(
+ [
+ ("tfidf", TfidfVectorizer()),
+ ("clf_svc", SVC(probability=True, kernel="linear")),
+ ]
+ )
+ data = numpy.array(
+ [
+ "first sentance",
+ "second sentence",
+ "many sentances",
+ "dummy sentance",
+ "no sentance at all",
+ ]
+ )
y = numpy.array([0, 0, 1, 0, 1])
pipe.fit(data, y)
expected_label = pipe.predict(data)
expected_proba = pipe.predict_proba(data)
df = pandas.DataFrame(data)
- df.columns = ['text']
+ df.columns = ["text"]
# first conversion if shape=[None, 1]
model_onnx = convert_sklearn(
- pipe, initial_types=[('text', StringTensorType([None, 1]))],
+ pipe,
+ initial_types=[("text", StringTensorType([None, 1]))],
target_opset=TARGET_OPSET,
- options={id(pipe): {'zipmap': False}})
+ options={id(pipe): {"zipmap": False}},
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'text': data.reshape((-1, 1))})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"text": data.reshape((-1, 1))})
assert_almost_equal(expected_proba, got[1])
assert_almost_equal(expected_label, got[0])
# sess.run(None, {'text': df}) --> failures
@@ -587,194 +698,286 @@ def test_pipeline_tfidf_svc(self):
# second conversion with shape=[None]
model_onnx = convert_sklearn(
- pipe, initial_types=[('text', StringTensorType([None]))],
+ pipe,
+ initial_types=[("text", StringTensorType([None]))],
target_opset=TARGET_OPSET,
- options={id(pipe): {'zipmap': False}})
+ options={id(pipe): {"zipmap": False}},
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'text': data})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"text": data})
assert_almost_equal(expected_proba, got[1])
assert_almost_equal(expected_label, got[0])
# sess.run(None, {'text': df}) failure
# sess.run(None, {'text': df["text"]}) failure
- sess.run(None, {'text': df["text"].values}) # success
+ sess.run(None, {"text": df["text"].values}) # success
@ignore_warnings(category=(FutureWarning, UserWarning))
def test_pipeline_voting_tfidf_svc(self):
- pipe1 = Pipeline([
- ('tfidf1', TfidfVectorizer()),
- ('svc', SVC(probability=True, kernel='linear'))])
- pipe2 = Pipeline([
- ('tfidf2', TfidfVectorizer(norm='l2', use_idf=False)),
- ('sgd', SGDClassifier(alpha=0.0001, penalty='l2',
- loss='modified_huber'))])
- pipe3 = Pipeline([
- ('tfidf3', TfidfVectorizer()),
- ('mnb', MultinomialNB())])
+ pipe1 = Pipeline(
+ [
+ ("tfidf1", TfidfVectorizer()),
+ ("svc", SVC(probability=True, kernel="linear")),
+ ]
+ )
+ pipe2 = Pipeline(
+ [
+ ("tfidf2", TfidfVectorizer(norm="l2", use_idf=False)),
+ (
+ "sgd",
+ SGDClassifier(alpha=0.0001, penalty="l2", loss="modified_huber"),
+ ),
+ ]
+ )
+ pipe3 = Pipeline([("tfidf3", TfidfVectorizer()), ("mnb", MultinomialNB())])
voting = VotingClassifier(
- [('p1', pipe1), ('p2', pipe2), ('p3', pipe3)],
- voting='soft', flatten_transform=False)
- data = numpy.array(["first sentance", "second sentence",
- "many sentances", "dummy sentance",
- "no sentance at all"])
+ [("p1", pipe1), ("p2", pipe2), ("p3", pipe3)],
+ voting="soft",
+ flatten_transform=False,
+ )
+ data = numpy.array(
+ [
+ "first sentance",
+ "second sentence",
+ "many sentances",
+ "dummy sentance",
+ "no sentance at all",
+ ]
+ )
y = numpy.array([0, 0, 1, 0, 1])
voting.fit(data, y)
expected_label = voting.predict(data)
expected_proba = voting.predict_proba(data)
df = pandas.DataFrame(data)
- df.columns = ['text']
+ df.columns = ["text"]
model_onnx = convert_sklearn(
- voting, initial_types=[('text', StringTensorType([None, 1]))],
+ voting,
+ initial_types=[("text", StringTensorType([None, 1]))],
target_opset=TARGET_OPSET,
- options={id(voting): {'zipmap': False}})
+ options={id(voting): {"zipmap": False}},
+ )
# with open("debug.onnx", "wb") as f:
# f.write(model_onnx.SerializeToString())
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'text': data.reshape((-1, 1))})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"text": data.reshape((-1, 1))})
assert_almost_equal(expected_proba, got[1], decimal=5)
assert_almost_equal(expected_label, got[0])
@ignore_warnings(category=(FutureWarning, UserWarning))
def test_pipeline_pipeline_voting_tfidf_svc(self):
- pipe1 = Pipeline([
- ('ntfidf1', Pipeline([
- ('tfidf1', TfidfVectorizer()),
- ('scaler', FeatureUnion([
- ('scaler2', StandardScaler(with_mean=False)),
- ('mm', MaxAbsScaler())]))])),
- ('svc', SVC(probability=True, kernel='linear'))])
- pipe2 = Pipeline([
- ('tfidf2', TfidfVectorizer(norm='l2', use_idf=False)),
- ('sgd', SGDClassifier(alpha=0.0001, penalty='l2',
- loss='modified_huber'))])
- pipe3 = Pipeline([
- ('tfidf3', TfidfVectorizer()),
- ('mnb', MultinomialNB())])
+ pipe1 = Pipeline(
+ [
+ (
+ "ntfidf1",
+ Pipeline(
+ [
+ ("tfidf1", TfidfVectorizer()),
+ (
+ "scaler",
+ FeatureUnion(
+ [
+ ("scaler2", StandardScaler(with_mean=False)),
+ ("mm", MaxAbsScaler()),
+ ]
+ ),
+ ),
+ ]
+ ),
+ ),
+ ("svc", SVC(probability=True, kernel="linear")),
+ ]
+ )
+ pipe2 = Pipeline(
+ [
+ ("tfidf2", TfidfVectorizer(norm="l2", use_idf=False)),
+ (
+ "sgd",
+ SGDClassifier(alpha=0.0001, penalty="l2", loss="modified_huber"),
+ ),
+ ]
+ )
+ pipe3 = Pipeline([("tfidf3", TfidfVectorizer()), ("mnb", MultinomialNB())])
voting = VotingClassifier(
- [('p1', pipe1), ('p2', pipe2), ('p3', pipe3)],
- voting='soft', flatten_transform=False)
- data = numpy.array(["first sentance", "second sentence",
- "many sentances", "dummy sentance",
- "no sentance at all"])
+ [("p1", pipe1), ("p2", pipe2), ("p3", pipe3)],
+ voting="soft",
+ flatten_transform=False,
+ )
+ data = numpy.array(
+ [
+ "first sentance",
+ "second sentence",
+ "many sentances",
+ "dummy sentance",
+ "no sentance at all",
+ ]
+ )
y = numpy.array([0, 0, 1, 0, 1])
voting.fit(data, y)
expected_label = voting.predict(data)
expected_proba = voting.predict_proba(data)
df = pandas.DataFrame(data)
- df.columns = ['text']
+ df.columns = ["text"]
model_onnx = convert_sklearn(
- voting, initial_types=[('text', StringTensorType([None, 1]))],
+ voting,
+ initial_types=[("text", StringTensorType([None, 1]))],
target_opset=TARGET_OPSET,
- options={id(voting): {'zipmap': False}})
+ options={id(voting): {"zipmap": False}},
+ )
# with open("debug.onnx", "wb") as f:
# f.write(model_onnx.SerializeToString())
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'text': data.reshape((-1, 1))})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"text": data.reshape((-1, 1))})
assert_almost_equal(expected_proba, got[1])
assert_almost_equal(expected_label, got[0])
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="SequenceConstruct not available")
- @unittest.skipIf(
- not check_scikit_version(),
- reason="Scikit 0.21 too old")
+ @unittest.skipIf(TARGET_OPSET < 11, reason="SequenceConstruct not available")
+ @unittest.skipIf(not check_scikit_version(), reason="Scikit 0.21 too old")
@ignore_warnings(category=(FutureWarning, UserWarning))
def test_pipeline_pipeline_rf(self):
- cat_feat = ['A', 'B']
- text_feat = 'TEXT'
-
- pipe = Pipeline(steps=[
- ('preprocessor', ColumnTransformer(
- transformers=[
- ('cat_tr', OneHotEncoder(handle_unknown='ignore'),
- cat_feat),
- ('count_vect', Pipeline(steps=[
- ('count_vect', CountVectorizer(
- max_df=0.8, min_df=0.05, max_features=1000))]),
- text_feat)])),
- ('classifier', MultiOutputClassifier(
- estimator=RandomForestClassifier(
- n_estimators=5, max_depth=5)))])
-
- data = numpy.array([
- ["cat1", "cat2", "cat3", "cat1", "cat2"],
- ["C1", "C2", "C3", "C3", "C4"],
- ["first sentance", "second sentence",
- "many sentances", "dummy sentance",
- "no sentance at all"]]).T
+ cat_feat = ["A", "B"]
+ text_feat = "TEXT"
+
+ pipe = Pipeline(
+ steps=[
+ (
+ "preprocessor",
+ ColumnTransformer(
+ transformers=[
+ (
+ "cat_tr",
+ OneHotEncoder(handle_unknown="ignore"),
+ cat_feat,
+ ),
+ (
+ "count_vect",
+ Pipeline(
+ steps=[
+ (
+ "count_vect",
+ CountVectorizer(
+ max_df=0.8,
+ min_df=0.05,
+ max_features=1000,
+ ),
+ )
+ ]
+ ),
+ text_feat,
+ ),
+ ]
+ ),
+ ),
+ (
+ "classifier",
+ MultiOutputClassifier(
+ estimator=RandomForestClassifier(n_estimators=5, max_depth=5)
+ ),
+ ),
+ ]
+ )
+
+ data = numpy.array(
+ [
+ ["cat1", "cat2", "cat3", "cat1", "cat2"],
+ ["C1", "C2", "C3", "C3", "C4"],
+ [
+ "first sentance",
+ "second sentence",
+ "many sentances",
+ "dummy sentance",
+ "no sentance at all",
+ ],
+ ]
+ ).T
y = numpy.array([[0, 1], [0, 1], [1, 0], [0, 1], [1, 1]])
- df = pandas.DataFrame(data, columns=['A', 'B', 'TEXT'])
+ df = pandas.DataFrame(data, columns=["A", "B", "TEXT"])
pipe.fit(df, y)
expected_label = pipe.predict(df)
expected_proba = pipe.predict_proba(df)
model_onnx = convert_sklearn(
- pipe, initial_types=[
- ('A', StringTensorType([None, 1])),
- ('B', StringTensorType([None, 1])),
- ('TEXT', StringTensorType([None, 1]))],
+ pipe,
+ initial_types=[
+ ("A", StringTensorType([None, 1])),
+ ("B", StringTensorType([None, 1])),
+ ("TEXT", StringTensorType([None, 1])),
+ ],
target_opset=TARGET_OPSET,
- options={MultiOutputClassifier: {'zipmap': False}})
+ options={MultiOutputClassifier: {"zipmap": False}},
+ )
# with open("debug.onnx", "wb") as f:
# f.write(model_onnx.SerializeToString())
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'A': data[:, :1], 'B': data[:, 1:2],
- 'TEXT': data[:, 2:]})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"A": data[:, :1], "B": data[:, 1:2], "TEXT": data[:, 2:]})
self.assertEqual(len(expected_proba), len(got[1]))
for e, g in zip(expected_proba, got[1]):
assert_almost_equal(e, g, decimal=5)
assert_almost_equal(expected_label, got[0])
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="SequenceConstruct not available")
- @unittest.skipIf(
- not check_scikit_version(),
- reason="Scikit 0.21 too old")
+ @unittest.skipIf(TARGET_OPSET < 11, reason="SequenceConstruct not available")
+ @unittest.skipIf(not check_scikit_version(), reason="Scikit 0.21 too old")
@ignore_warnings(category=(DeprecationWarning, FutureWarning, UserWarning))
def test_issue_712_multio(self):
dfx = pandas.DataFrame(
- {'CAT1': ['985332', '985333', '985334', '985335', '985336'],
- 'CAT2': ['1985332', '1985333', '1985334', '1985335', '1985336'],
- 'TEXT': ["abc abc", "abc def", "def ghj", "abcdef", "abc ii"]})
- dfy = pandas.DataFrame(
- {'REAL': [5, 6, 7, 6, 5],
- 'CATY': [0, 1, 0, 1, 0]})
-
- cat_features = ['CAT1', 'CAT2']
- categorical_transformer = OneHotEncoder(handle_unknown='ignore')
- textual_feature = 'TEXT'
- count_vect_transformer = Pipeline(steps=[
- ('count_vect', CountVectorizer(
- max_df=0.8, min_df=0.05, max_features=1000))])
+ {
+ "CAT1": ["985332", "985333", "985334", "985335", "985336"],
+ "CAT2": ["1985332", "1985333", "1985334", "1985335", "1985336"],
+ "TEXT": ["abc abc", "abc def", "def ghj", "abcdef", "abc ii"],
+ }
+ )
+ dfy = pandas.DataFrame({"REAL": [5, 6, 7, 6, 5], "CATY": [0, 1, 0, 1, 0]})
+
+ cat_features = ["CAT1", "CAT2"]
+ categorical_transformer = OneHotEncoder(handle_unknown="ignore")
+ textual_feature = "TEXT"
+ count_vect_transformer = Pipeline(
+ steps=[
+ (
+ "count_vect",
+ CountVectorizer(max_df=0.8, min_df=0.05, max_features=1000),
+ )
+ ]
+ )
preprocessor = ColumnTransformer(
transformers=[
- ('cat_transform', categorical_transformer, cat_features),
- ('count_vector', count_vect_transformer, textual_feature)])
+ ("cat_transform", categorical_transformer, cat_features),
+ ("count_vector", count_vect_transformer, textual_feature),
+ ]
+ )
model_RF = RandomForestClassifier(random_state=42, max_depth=50)
- rf_clf = Pipeline(steps=[
- ('preprocessor', preprocessor),
- ('classifier', MultiOutputClassifier(estimator=model_RF))])
+ rf_clf = Pipeline(
+ steps=[
+ ("preprocessor", preprocessor),
+ ("classifier", MultiOutputClassifier(estimator=model_RF)),
+ ]
+ )
rf_clf.fit(dfx, dfy)
expected_label = rf_clf.predict(dfx)
expected_proba = rf_clf.predict_proba(dfx)
- inputs = {'CAT1': dfx['CAT1'].values.reshape((-1, 1)),
- 'CAT2': dfx['CAT2'].values.reshape((-1, 1)),
- 'TEXT': dfx['TEXT'].values.reshape((-1, 1))}
- onx = to_onnx(rf_clf, dfx, target_opset=TARGET_OPSET,
- options={MultiOutputClassifier: {'zipmap': False}})
+ inputs = {
+ "CAT1": dfx["CAT1"].values.reshape((-1, 1)),
+ "CAT2": dfx["CAT2"].values.reshape((-1, 1)),
+ "TEXT": dfx["TEXT"].values.reshape((-1, 1)),
+ }
+ onx = to_onnx(
+ rf_clf,
+ dfx,
+ target_opset=TARGET_OPSET,
+ options={MultiOutputClassifier: {"zipmap": False}},
+ )
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
got = sess.run(None, inputs)
assert_almost_equal(expected_label, got[0])
@@ -782,62 +985,107 @@ def test_issue_712_multio(self):
for e, g in zip(expected_proba, got[1]):
assert_almost_equal(e, g, decimal=5)
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="SequenceConstruct not available")
- @unittest.skipIf(
- not check_scikit_version(),
- reason="Scikit 0.21 too old")
+ @unittest.skipIf(TARGET_OPSET < 11, reason="SequenceConstruct not available")
+ @unittest.skipIf(not check_scikit_version(), reason="Scikit 0.21 too old")
@ignore_warnings(category=(DeprecationWarning, FutureWarning, UserWarning))
def test_issue_712_svc_multio(self):
for sub_model in [LinearSVC(), SVC()]:
for method in ["sigmoid", "isotonic"]:
with self.subTest(sub_model=sub_model, method=method):
dfx = pandas.DataFrame(
- {'CAT1': ['985332', '985333', '985334', '985335',
- '985336', '985332', '985333', '985334',
- '985335', '985336', '985336'],
- 'CAT2': ['1985332', '1985333', '1985334', '1985335',
- '1985336', '1985332', '1985333', '1985334',
- '1985335', '1985336', '1985336'],
- 'TEXT': ["abc abc", "abc def", "def ghj", "abcdef",
- "abc ii", "abc abc", "abc def", "def ghj",
- "abcdef", "abc ii", "abc abc"]})
+ {
+ "CAT1": [
+ "985332",
+ "985333",
+ "985334",
+ "985335",
+ "985336",
+ "985332",
+ "985333",
+ "985334",
+ "985335",
+ "985336",
+ "985336",
+ ],
+ "CAT2": [
+ "1985332",
+ "1985333",
+ "1985334",
+ "1985335",
+ "1985336",
+ "1985332",
+ "1985333",
+ "1985334",
+ "1985335",
+ "1985336",
+ "1985336",
+ ],
+ "TEXT": [
+ "abc abc",
+ "abc def",
+ "def ghj",
+ "abcdef",
+ "abc ii",
+ "abc abc",
+ "abc def",
+ "def ghj",
+ "abcdef",
+ "abc ii",
+ "abc abc",
+ ],
+ }
+ )
dfy = pandas.DataFrame(
- {'REAL': [5, 6, 7, 6, 5, 5, 6, 7, 5, 6, 7],
- 'CATY': [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]})
-
- cat_features = ['CAT1', 'CAT2']
- categorical_transformer = OneHotEncoder(
- handle_unknown='ignore')
- textual_feature = 'TEXT'
- count_vect_transformer = Pipeline(steps=[
- ('count_vect', CountVectorizer(
- max_df=0.8, min_df=0.05, max_features=1000))])
+ {
+ "REAL": [5, 6, 7, 6, 5, 5, 6, 7, 5, 6, 7],
+ "CATY": [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
+ }
+ )
+
+ cat_features = ["CAT1", "CAT2"]
+ categorical_transformer = OneHotEncoder(handle_unknown="ignore")
+ textual_feature = "TEXT"
+ count_vect_transformer = Pipeline(
+ steps=[
+ (
+ "count_vect",
+ CountVectorizer(
+ max_df=0.8, min_df=0.05, max_features=1000
+ ),
+ )
+ ]
+ )
preprocessor = ColumnTransformer(
transformers=[
- ('cat_transform', categorical_transformer,
- cat_features),
- ('count_vector', count_vect_transformer,
- textual_feature)])
- model_SVC = CalibratedClassifierCV(
- sub_model, cv=2, method=method)
- rf_clf = Pipeline(steps=[
- ('preprocessor', preprocessor),
- ('classifier', MultiOutputClassifier(
- estimator=model_SVC))])
+ ("cat_transform", categorical_transformer, cat_features),
+ ("count_vector", count_vect_transformer, textual_feature),
+ ]
+ )
+ model_SVC = CalibratedClassifierCV(sub_model, cv=2, method=method)
+ rf_clf = Pipeline(
+ steps=[
+ ("preprocessor", preprocessor),
+ ("classifier", MultiOutputClassifier(estimator=model_SVC)),
+ ]
+ )
rf_clf.fit(dfx, dfy)
expected_label = rf_clf.predict(dfx)
expected_proba = rf_clf.predict_proba(dfx)
- inputs = {'CAT1': dfx['CAT1'].values.reshape((-1, 1)),
- 'CAT2': dfx['CAT2'].values.reshape((-1, 1)),
- 'TEXT': dfx['TEXT'].values.reshape((-1, 1))}
+ inputs = {
+ "CAT1": dfx["CAT1"].values.reshape((-1, 1)),
+ "CAT2": dfx["CAT2"].values.reshape((-1, 1)),
+ "TEXT": dfx["TEXT"].values.reshape((-1, 1)),
+ }
onx = to_onnx(
- rf_clf, dfx, target_opset=TARGET_OPSET,
- options={MultiOutputClassifier: {'zipmap': False}})
+ rf_clf,
+ dfx,
+ target_opset=TARGET_OPSET,
+ options={MultiOutputClassifier: {"zipmap": False}},
+ )
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
got = sess.run(None, inputs)
assert_almost_equal(expected_label, got[0])
self.assertEqual(len(expected_proba), len(got[1]))
@@ -848,154 +1096,239 @@ def test_issue_712_svc_multio(self):
else:
assert_almost_equal(e, g, decimal=5)
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="SequenceConstruct not available")
- @unittest.skipIf(
- not check_scikit_version(),
- reason="Scikit 0.21 too old")
+ @unittest.skipIf(TARGET_OPSET < 11, reason="SequenceConstruct not available")
+ @unittest.skipIf(not check_scikit_version(), reason="Scikit 0.21 too old")
@ignore_warnings(category=(DeprecationWarning, FutureWarning, UserWarning))
def test_issue_712_svc_binary0(self):
for sub_model in [LinearSVC(), SVC()]:
for method in ["sigmoid", "isotonic"]:
with self.subTest(sub_model=sub_model, method=method):
dfx = pandas.DataFrame(
- {'CAT1': ['985332', '985333', '985334', '985335',
- '985336', '985332', '985333', '985334',
- '985335', '985336', '985336'],
- 'CAT2': ['1985332', '1985333', '1985334', '1985335',
- '1985336', '1985332', '1985333', '1985334',
- '1985335', '1985336', '1985336'],
- 'TEXT': ["abc abc", "abc def", "def ghj", "abcdef",
- "abc ii", "abc abc", "abc def", "def ghj",
- "abcdef", "abc ii", "abc abc"]})
+ {
+ "CAT1": [
+ "985332",
+ "985333",
+ "985334",
+ "985335",
+ "985336",
+ "985332",
+ "985333",
+ "985334",
+ "985335",
+ "985336",
+ "985336",
+ ],
+ "CAT2": [
+ "1985332",
+ "1985333",
+ "1985334",
+ "1985335",
+ "1985336",
+ "1985332",
+ "1985333",
+ "1985334",
+ "1985335",
+ "1985336",
+ "1985336",
+ ],
+ "TEXT": [
+ "abc abc",
+ "abc def",
+ "def ghj",
+ "abcdef",
+ "abc ii",
+ "abc abc",
+ "abc def",
+ "def ghj",
+ "abcdef",
+ "abc ii",
+ "abc abc",
+ ],
+ }
+ )
dfy = numpy.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0])
- cat_features = ['CAT1', 'CAT2']
- categorical_transformer = OneHotEncoder(
- handle_unknown='ignore')
- textual_feature = 'TEXT'
- count_vect_transformer = Pipeline(steps=[
- ('count_vect', CountVectorizer(
- max_df=0.8, min_df=0.05, max_features=1000))])
+ cat_features = ["CAT1", "CAT2"]
+ categorical_transformer = OneHotEncoder(handle_unknown="ignore")
+ textual_feature = "TEXT"
+ count_vect_transformer = Pipeline(
+ steps=[
+ (
+ "count_vect",
+ CountVectorizer(
+ max_df=0.8, min_df=0.05, max_features=1000
+ ),
+ )
+ ]
+ )
preprocessor = ColumnTransformer(
transformers=[
- ('cat_transform', categorical_transformer,
- cat_features),
- ('count_vector', count_vect_transformer,
- textual_feature)])
- model_SVC = CalibratedClassifierCV(
- sub_model, cv=2, method=method)
- rf_clf = Pipeline(steps=[
- ('preprocessor', preprocessor),
- ('classifier', model_SVC)])
+ ("cat_transform", categorical_transformer, cat_features),
+ ("count_vector", count_vect_transformer, textual_feature),
+ ]
+ )
+ model_SVC = CalibratedClassifierCV(sub_model, cv=2, method=method)
+ rf_clf = Pipeline(
+ steps=[
+ ("preprocessor", preprocessor),
+ ("classifier", model_SVC),
+ ]
+ )
rf_clf.fit(dfx, dfy)
expected_label = rf_clf.predict(dfx)
expected_proba = rf_clf.predict_proba(dfx)
- inputs = {'CAT1': dfx['CAT1'].values.reshape((-1, 1)),
- 'CAT2': dfx['CAT2'].values.reshape((-1, 1)),
- 'TEXT': dfx['TEXT'].values.reshape((-1, 1))}
- onx = to_onnx(rf_clf, dfx, target_opset=TARGET_OPSET,
- options={'zipmap': False})
+ inputs = {
+ "CAT1": dfx["CAT1"].values.reshape((-1, 1)),
+ "CAT2": dfx["CAT2"].values.reshape((-1, 1)),
+ "TEXT": dfx["TEXT"].values.reshape((-1, 1)),
+ }
+ onx = to_onnx(
+ rf_clf,
+ dfx,
+ target_opset=TARGET_OPSET,
+ options={"zipmap": False},
+ )
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
got = sess.run(None, inputs)
assert_almost_equal(expected_label, got[0])
assert_almost_equal(expected_proba, got[1], decimal=5)
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="SequenceConstruct not available")
- @unittest.skipIf(
- not check_scikit_version(),
- reason="Scikit 0.21 too old")
+ @unittest.skipIf(TARGET_OPSET < 11, reason="SequenceConstruct not available")
+ @unittest.skipIf(not check_scikit_version(), reason="Scikit 0.21 too old")
@ignore_warnings(category=(DeprecationWarning, FutureWarning, UserWarning))
def test_issue_712_svc_multi(self):
for sub_model in [SVC(), LinearSVC()]:
for method in ["isotonic", "sigmoid"]:
with self.subTest(sub_model=sub_model, method=method):
dfx = pandas.DataFrame(
- {'CAT1': ['985332', '985333', '985334', '985335',
- '985336', '985332', '985333', '985334',
- '985335', '985336', '985336'],
- 'CAT2': ['1985332', '1985333', '1985334', '1985335',
- '1985336', '1985332', '1985333', '1985334',
- '1985335', '1985336', '1985336'],
- 'TEXT': ["abc abc", "abc def", "def ghj", "abcdef",
- "abc ii", "abc abc", "abc def", "def ghj",
- "abcdef", "abc ii", "abc abc"]})
+ {
+ "CAT1": [
+ "985332",
+ "985333",
+ "985334",
+ "985335",
+ "985336",
+ "985332",
+ "985333",
+ "985334",
+ "985335",
+ "985336",
+ "985336",
+ ],
+ "CAT2": [
+ "1985332",
+ "1985333",
+ "1985334",
+ "1985335",
+ "1985336",
+ "1985332",
+ "1985333",
+ "1985334",
+ "1985335",
+ "1985336",
+ "1985336",
+ ],
+ "TEXT": [
+ "abc abc",
+ "abc def",
+ "def ghj",
+ "abcdef",
+ "abc ii",
+ "abc abc",
+ "abc def",
+ "def ghj",
+ "abcdef",
+ "abc ii",
+ "abc abc",
+ ],
+ }
+ )
dfy = numpy.array([5, 6, 7, 6, 5, 5, 8, 7, 5, 6, 8])
- cat_features = ['CAT1', 'CAT2']
- categorical_transformer = OneHotEncoder(
- handle_unknown='ignore')
- textual_feature = 'TEXT'
- count_vect_transformer = Pipeline(steps=[
- ('count_vect', CountVectorizer(
- max_df=0.8, min_df=0.05, max_features=1000))])
+ cat_features = ["CAT1", "CAT2"]
+ categorical_transformer = OneHotEncoder(handle_unknown="ignore")
+ textual_feature = "TEXT"
+ count_vect_transformer = Pipeline(
+ steps=[
+ (
+ "count_vect",
+ CountVectorizer(
+ max_df=0.8, min_df=0.05, max_features=1000
+ ),
+ )
+ ]
+ )
preprocessor = ColumnTransformer(
transformers=[
- ('cat_transform', categorical_transformer,
- cat_features),
- ('count_vector', count_vect_transformer,
- textual_feature)])
- model_SVC = CalibratedClassifierCV(
- sub_model, cv=2, method=method)
- rf_clf = Pipeline(steps=[
- ('preprocessor', preprocessor),
- ('classifier', model_SVC)])
+ ("cat_transform", categorical_transformer, cat_features),
+ ("count_vector", count_vect_transformer, textual_feature),
+ ]
+ )
+ model_SVC = CalibratedClassifierCV(sub_model, cv=2, method=method)
+ rf_clf = Pipeline(
+ steps=[
+ ("preprocessor", preprocessor),
+ ("classifier", model_SVC),
+ ]
+ )
rf_clf.fit(dfx, dfy)
expected_label = rf_clf.predict(dfx)
expected_proba = rf_clf.predict_proba(dfx)
- inputs = {'CAT1': dfx['CAT1'].values.reshape((-1, 1)),
- 'CAT2': dfx['CAT2'].values.reshape((-1, 1)),
- 'TEXT': dfx['TEXT'].values.reshape((-1, 1))}
- onx = to_onnx(rf_clf, dfx, target_opset=TARGET_OPSET,
- options={'zipmap': False})
+ inputs = {
+ "CAT1": dfx["CAT1"].values.reshape((-1, 1)),
+ "CAT2": dfx["CAT2"].values.reshape((-1, 1)),
+ "TEXT": dfx["TEXT"].values.reshape((-1, 1)),
+ }
+ onx = to_onnx(
+ rf_clf,
+ dfx,
+ target_opset=TARGET_OPSET,
+ options={"zipmap": False},
+ )
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
got = sess.run(None, inputs)
assert_almost_equal(expected_label, got[0])
if method == "isotonic":
# float/double issues
- assert_almost_equal(
- expected_proba[2:4], got[1][2:4], decimal=3)
+ assert_almost_equal(expected_proba[2:4], got[1][2:4], decimal=3)
else:
assert_almost_equal(expected_proba, got[1], decimal=5)
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="SequenceConstruct not available")
- @unittest.skipIf(
- not check_scikit_version(),
- reason="Scikit 0.21 too old")
+ @unittest.skipIf(TARGET_OPSET < 11, reason="SequenceConstruct not available")
+ @unittest.skipIf(not check_scikit_version(), reason="Scikit 0.21 too old")
@ignore_warnings(category=(FutureWarning, UserWarning))
def test_pipeline_make_column_selector(self):
- X = pandas.DataFrame({
- 'city': ['London', 'London', 'Paris', 'Sallisaw'],
- 'rating': [5, 3, 4, 5]})
- X['rating'] = X['rating'].astype(numpy.float32)
+ X = pandas.DataFrame(
+ {"city": ["London", "London", "Paris", "Sallisaw"], "rating": [5, 3, 4, 5]}
+ )
+ X["rating"] = X["rating"].astype(numpy.float32)
ct = make_column_transformer(
- (StandardScaler(), make_column_selector(
- dtype_include=numpy.number)),
- (OneHotEncoder(), make_column_selector(
- dtype_include=object)))
+ (StandardScaler(), make_column_selector(dtype_include=numpy.number)),
+ (OneHotEncoder(), make_column_selector(dtype_include=object)),
+ )
expected = ct.fit_transform(X)
onx = to_onnx(ct, X, target_opset=TARGET_OPSET)
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
names = [i.name for i in sess.get_inputs()]
- got = sess.run(None, {names[0]: X[names[0]].values.reshape((-1, 1)),
- names[1]: X[names[1]].values.reshape((-1, 1))})
+ got = sess.run(
+ None,
+ {
+ names[0]: X[names[0]].values.reshape((-1, 1)),
+ names[1]: X[names[1]].values.reshape((-1, 1)),
+ },
+ )
assert_almost_equal(expected, got[0])
- @unittest.skipIf(
- not check_scikit_version(),
- reason="Scikit 0.21 too old")
+ @unittest.skipIf(not check_scikit_version(), reason="Scikit 0.21 too old")
def test_feature_selector_no_converter(self):
-
class ColumnSelector(TransformerMixin, BaseEstimator):
def __init__(self, cols):
if not isinstance(cols, list):
@@ -1014,24 +1347,26 @@ def transform(self, X):
# https://github.com/databricks/automl/blob/main/
# runtime/tests/automl_runtime/sklearn/column_selector_test.py
X_in = pandas.DataFrame(
- numpy.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
- dtype=numpy.float32),
- columns=["a", "b", "c"])
- y = pandas.DataFrame(numpy.array([[1], [0], [1]]),
- columns=["label"])
+ numpy.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=numpy.float32),
+ columns=["a", "b", "c"],
+ )
+ y = pandas.DataFrame(numpy.array([[1], [0], [1]]), columns=["label"])
X_out_expected = numpy.array([1, 0, 1])
standardizer = StandardScaler()
selected_cols = ["a", "b"]
col_selector = ColumnSelector(selected_cols)
preprocessor = ColumnTransformer(
- [("standardizer", standardizer, selected_cols)], remainder="drop")
+ [("standardizer", standardizer, selected_cols)], remainder="drop"
+ )
- model = Pipeline([
- ("column_selector", col_selector),
- ("preprocessor", preprocessor),
- ("decision_tree", DecisionTreeClassifier())
- ])
+ model = Pipeline(
+ [
+ ("column_selector", col_selector),
+ ("preprocessor", preprocessor),
+ ("decision_tree", DecisionTreeClassifier()),
+ ]
+ )
model.fit(X=X_in, y=y)
# Add one column so that the dataframe for prediction is
# different with the data for training
@@ -1041,7 +1376,7 @@ def transform(self, X):
with self.assertRaises(RuntimeError) as e:
to_onnx(model, X_in)
- self.assertIn('ColumnTransformer', str(e))
+ self.assertIn("ColumnTransformer", str(e))
@unittest.skipIf(TARGET_OPSET < 15, reason="use CastLike")
def test_feature_vectorizer_double(self):
@@ -1050,18 +1385,18 @@ def test_feature_vectorizer_double(self):
X["sexi"] = X["sex"].astype(numpy.int64)
X = X.drop("sex", axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y)
- regr = Pipeline([("std", StandardScaler()),
- ("reg", LinearRegression())])
+ regr = Pipeline([("std", StandardScaler()), ("reg", LinearRegression())])
regr = regr.fit(X_train, y_train)
onnx_model = to_onnx(regr, X=X_train)
sess = InferenceSession(
- onnx_model.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onnx_model.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
expected = regr.predict(X_test)
names = [i.name for i in sess.get_inputs()]
- feeds = {n: X_test[c].values.reshape((-1, 1))
- for n, c in zip(names, X_test.columns)}
+ feeds = {
+ n: X_test[c].values.reshape((-1, 1)) for n, c in zip(names, X_test.columns)
+ }
got = sess.run(None, feeds)
assert_almost_equal(expected.ravel(), got[0].ravel(), decimal=4)
if ReferenceEvaluatorEx is None:
diff --git a/tests/test_sklearn_pipeline_concat_tfidf.py b/tests/test_sklearn_pipeline_concat_tfidf.py
index 8a20c953e..464a5ea89 100644
--- a/tests/test_sklearn_pipeline_concat_tfidf.py
+++ b/tests/test_sklearn_pipeline_concat_tfidf.py
@@ -7,6 +7,7 @@
from onnxruntime import InferenceSession
from onnxruntime.capi.onnxruntime_pybind11_state import Fail
import pandas
+
try:
# scikit-learn >= 0.22
from sklearn.utils._testing import ignore_warnings
@@ -27,50 +28,232 @@
class TestSklearnPipelineConcatTfIdf(unittest.TestCase):
-
- words = ['ability', 'able', 'about', 'above', 'abroad',
- 'absence', 'absolute', 'absolutely', 'absorb',
- 'academic', 'accept', 'access', 'accident', 'accompany',
- 'accomplish', 'according', 'account', 'accurate', 'achieve',
- 'achievement', 'acid', 'acknowledge', 'acquire', 'across',
- 'act', 'action', 'active', 'activity', 'actor', 'actress',
- 'actual', 'actually', 'ad', 'adapt', 'add', 'addition',
- 'additional', 'address', 'adequate', 'adjust',
- 'adjustment', 'administration', 'administrator', 'admire',
- 'admission', 'admit', 'adolescent', 'adopt', 'adult',
- 'advance', 'advanced', 'advantage', 'adventure',
- 'advice', 'advise', 'adviser', 'advocate', 'affair',
- 'afford', 'afraid', 'after', 'afternoon', 'again', 'against',
- 'age', 'agency', 'agenda', 'agent', 'aggressive', 'ago',
- 'agree', 'agreement', 'agricultural', 'ah', 'ahead', 'aid',
- 'aide', 'aim', 'air', 'aircraft', 'airline', 'airport',
- 'alive', 'all', 'alliance', 'allow', 'ally', 'almost',
- 'along', 'already', 'also', 'alter', 'alternative',
- 'always', 'AM', 'amazing', 'among', 'amount', 'analysis',
- 'analyze', 'ancient', 'and', 'anger', 'angle', 'angry',
- 'anniversary', 'announce', 'annual', 'another', 'answer',
- 'anticipate', 'anxiety', 'any', 'anybody', 'anymore',
- 'anything', 'anyway', 'anywhere', 'apart', 'apartment',
- 'apparently', 'appeal', 'appear', 'appearance', 'apple',
- 'application', 'apply', 'appoint', 'appointment',
- 'approach', 'appropriate', 'approval', 'approve',
- 'architect', 'area', 'argue', 'argument', 'arise', 'arm',
- 'around', 'arrange', 'arrangement', 'arrest',
- 'arrival', 'arrive', 'art', 'article', 'artist', 'artistic',
- 'as', 'aside', 'ask', 'asleep', 'aspect', 'assert',
- 'assess', 'assessment', 'asset', 'assign', 'assignment',
- 'assist', 'assistance', 'assistant', 'associate',
- 'association', 'assume', 'assumption', 'assure', 'at',
- 'athlete', 'athletic', 'atmosphere', 'attach', 'attack',
- 'attempt', 'attend', 'attention', 'attitude', 'attract',
- 'attractive', 'attribute', 'audience', 'author', 'auto',
- 'available', 'average', 'avoid', 'award', 'aware',
- 'away', 'baby', 'back', 'background', 'bag', 'bake',
- 'balance', 'ball', 'band', 'bank', 'bar', 'barrel',
- 'barrier', 'base', 'baseball', 'basic', 'basically',
- 'a', 'to', 'the', 'an', 'than', 'of', 'off', 'us',
- 'who', 'which', 'what', 'why', 'whom', 'at', 'from',
- 'for', 'to', 'towards']
+ words = [
+ "ability",
+ "able",
+ "about",
+ "above",
+ "abroad",
+ "absence",
+ "absolute",
+ "absolutely",
+ "absorb",
+ "academic",
+ "accept",
+ "access",
+ "accident",
+ "accompany",
+ "accomplish",
+ "according",
+ "account",
+ "accurate",
+ "achieve",
+ "achievement",
+ "acid",
+ "acknowledge",
+ "acquire",
+ "across",
+ "act",
+ "action",
+ "active",
+ "activity",
+ "actor",
+ "actress",
+ "actual",
+ "actually",
+ "ad",
+ "adapt",
+ "add",
+ "addition",
+ "additional",
+ "address",
+ "adequate",
+ "adjust",
+ "adjustment",
+ "administration",
+ "administrator",
+ "admire",
+ "admission",
+ "admit",
+ "adolescent",
+ "adopt",
+ "adult",
+ "advance",
+ "advanced",
+ "advantage",
+ "adventure",
+ "advice",
+ "advise",
+ "adviser",
+ "advocate",
+ "affair",
+ "afford",
+ "afraid",
+ "after",
+ "afternoon",
+ "again",
+ "against",
+ "age",
+ "agency",
+ "agenda",
+ "agent",
+ "aggressive",
+ "ago",
+ "agree",
+ "agreement",
+ "agricultural",
+ "ah",
+ "ahead",
+ "aid",
+ "aide",
+ "aim",
+ "air",
+ "aircraft",
+ "airline",
+ "airport",
+ "alive",
+ "all",
+ "alliance",
+ "allow",
+ "ally",
+ "almost",
+ "along",
+ "already",
+ "also",
+ "alter",
+ "alternative",
+ "always",
+ "AM",
+ "amazing",
+ "among",
+ "amount",
+ "analysis",
+ "analyze",
+ "ancient",
+ "and",
+ "anger",
+ "angle",
+ "angry",
+ "anniversary",
+ "announce",
+ "annual",
+ "another",
+ "answer",
+ "anticipate",
+ "anxiety",
+ "any",
+ "anybody",
+ "anymore",
+ "anything",
+ "anyway",
+ "anywhere",
+ "apart",
+ "apartment",
+ "apparently",
+ "appeal",
+ "appear",
+ "appearance",
+ "apple",
+ "application",
+ "apply",
+ "appoint",
+ "appointment",
+ "approach",
+ "appropriate",
+ "approval",
+ "approve",
+ "architect",
+ "area",
+ "argue",
+ "argument",
+ "arise",
+ "arm",
+ "around",
+ "arrange",
+ "arrangement",
+ "arrest",
+ "arrival",
+ "arrive",
+ "art",
+ "article",
+ "artist",
+ "artistic",
+ "as",
+ "aside",
+ "ask",
+ "asleep",
+ "aspect",
+ "assert",
+ "assess",
+ "assessment",
+ "asset",
+ "assign",
+ "assignment",
+ "assist",
+ "assistance",
+ "assistant",
+ "associate",
+ "association",
+ "assume",
+ "assumption",
+ "assure",
+ "at",
+ "athlete",
+ "athletic",
+ "atmosphere",
+ "attach",
+ "attack",
+ "attempt",
+ "attend",
+ "attention",
+ "attitude",
+ "attract",
+ "attractive",
+ "attribute",
+ "audience",
+ "author",
+ "auto",
+ "available",
+ "average",
+ "avoid",
+ "award",
+ "aware",
+ "away",
+ "baby",
+ "back",
+ "background",
+ "bag",
+ "bake",
+ "balance",
+ "ball",
+ "band",
+ "bank",
+ "bar",
+ "barrel",
+ "barrier",
+ "base",
+ "baseball",
+ "basic",
+ "basically",
+ "a",
+ "to",
+ "the",
+ "an",
+ "than",
+ "of",
+ "off",
+ "us",
+ "who",
+ "which",
+ "what",
+ "why",
+ "whom",
+ "at",
+ "from",
+ "for",
+ "to",
+ "towards",
+ ]
@staticmethod
def random_cats(n=10000, start=1000, end=9000):
@@ -83,60 +266,66 @@ def random_sentance(n=10000, length=7):
ls = numpy.random.randint(1, length, n)
text = []
for size in ls:
- sentance = [words[random.randint(0, len(words) - 1)]
- for i in range(size)]
+ sentance = [words[random.randint(0, len(words) - 1)] for i in range(size)]
text.append(" ".join(sentance))
return numpy.array(text)
@staticmethod
def get_pipeline(N=10000):
dfx = pandas.DataFrame(
- {'CAT1': TestSklearnPipelineConcatTfIdf.random_cats(N, 10, 20),
- 'CAT2': TestSklearnPipelineConcatTfIdf.random_cats(N, 30, 40),
- 'TEXT': TestSklearnPipelineConcatTfIdf.random_sentance(N)})
+ {
+ "CAT1": TestSklearnPipelineConcatTfIdf.random_cats(N, 10, 20),
+ "CAT2": TestSklearnPipelineConcatTfIdf.random_cats(N, 30, 40),
+ "TEXT": TestSklearnPipelineConcatTfIdf.random_sentance(N),
+ }
+ )
dfy = numpy.random.randint(0, 2, N)
dfx_train, dfx_test, dfy_train, dfy_test = train_test_split(dfx, dfy)
- cat_features = ['CAT1', 'CAT2']
- categorical_transformer = OneHotEncoder(
- handle_unknown='ignore', sparse=True)
- textual_feature = 'TEXT'
- count_vect_transformer = Pipeline(steps=[
- ('count_vect', CountVectorizer(
- max_df=0.8, min_df=0.02, max_features=1000))])
+ cat_features = ["CAT1", "CAT2"]
+ categorical_transformer = OneHotEncoder(handle_unknown="ignore", sparse=True)
+ textual_feature = "TEXT"
+ count_vect_transformer = Pipeline(
+ steps=[
+ (
+ "count_vect",
+ CountVectorizer(max_df=0.8, min_df=0.02, max_features=1000),
+ )
+ ]
+ )
preprocessor = ColumnTransformer(
transformers=[
- ('cat_transform', categorical_transformer,
- cat_features),
- ('count_vector', count_vect_transformer,
- textual_feature)])
- pipe = Pipeline(steps=[('preprocessor', preprocessor)])
+ ("cat_transform", categorical_transformer, cat_features),
+ ("count_vector", count_vect_transformer, textual_feature),
+ ]
+ )
+ pipe = Pipeline(steps=[("preprocessor", preprocessor)])
pipe.fit(dfx_train, dfy_train)
dfx_test = dfx_test.reset_index(drop=True).copy()
- dfx_test.loc[0, 'TEXT'] = 'about'
- dfx_test.loc[1, 'TEXT'] = 'the'
+ dfx_test.loc[0, "TEXT"] = "about"
+ dfx_test.loc[1, "TEXT"] = "the"
return pipe, dfx_test
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="SequenceConstruct not available")
+ @unittest.skipIf(TARGET_OPSET < 11, reason="SequenceConstruct not available")
@ignore_warnings(category=(DeprecationWarning, FutureWarning, UserWarning))
def test_issue_712_svc_binary(self):
-
pipe, dfx_test = TestSklearnPipelineConcatTfIdf.get_pipeline()
expected = pipe.transform(dfx_test)
- inputs = {'CAT1': dfx_test['CAT1'].values.reshape((-1, 1)),
- 'CAT2': dfx_test['CAT2'].values.reshape((-1, 1)),
- 'TEXT': dfx_test['TEXT'].values.reshape((-1, 1))}
+ inputs = {
+ "CAT1": dfx_test["CAT1"].values.reshape((-1, 1)),
+ "CAT2": dfx_test["CAT2"].values.reshape((-1, 1)),
+ "TEXT": dfx_test["TEXT"].values.reshape((-1, 1)),
+ }
onx = to_onnx(pipe, dfx_test, target_opset=TARGET_OPSET)
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
expected_dense = expected.todense()
for i in range(dfx_test.shape[0]):
- row_inputs = {k: v[i: i + 1] for k, v in inputs.items()}
+ row_inputs = {k: v[i : i + 1] for k, v in inputs.items()}
got = sess.run(None, row_inputs)
assert_almost_equal(expected_dense[i], got[0])
@@ -155,28 +344,32 @@ def test_issue_712_svc_binary(self):
got = sess.run(None, inputs)
# assert_almost_equal(expected.todense(), got[0])
- @unittest.skipIf(TARGET_OPSET < 11,
- reason="SequenceConstruct not available")
+ @unittest.skipIf(TARGET_OPSET < 11, reason="SequenceConstruct not available")
@ignore_warnings(category=(DeprecationWarning, FutureWarning, UserWarning))
def test_issue_712_svc_binary_empty(self):
-
pipe, dfx_test = TestSklearnPipelineConcatTfIdf.get_pipeline()
expected = pipe.transform(dfx_test)
- inputs = {'CAT1': dfx_test['CAT1'].values.reshape((-1, 1)),
- 'CAT2': dfx_test['CAT2'].values.reshape((-1, 1)),
- 'TEXT': dfx_test['TEXT'].values.reshape((-1, 1))}
- onx = to_onnx(pipe, dfx_test, target_opset=TARGET_OPSET,
- options={CountVectorizer: {'keep_empty_string': True}})
+ inputs = {
+ "CAT1": dfx_test["CAT1"].values.reshape((-1, 1)),
+ "CAT2": dfx_test["CAT2"].values.reshape((-1, 1)),
+ "TEXT": dfx_test["TEXT"].values.reshape((-1, 1)),
+ }
+ onx = to_onnx(
+ pipe,
+ dfx_test,
+ target_opset=TARGET_OPSET,
+ options={CountVectorizer: {"keep_empty_string": True}},
+ )
with open("debug.onnx", "wb") as f:
f.write(onx.SerializeToString())
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
expected_dense = expected.todense()
for i in range(dfx_test.shape[0]):
- row_inputs = {k: v[i: i + 1] for k, v in inputs.items()}
+ row_inputs = {k: v[i : i + 1] for k, v in inputs.items()}
got = sess.run(None, row_inputs)
assert_almost_equal(expected_dense[i], got[0])
diff --git a/tests/test_sklearn_pipeline_within_pipeline.py b/tests/test_sklearn_pipeline_within_pipeline.py
index 87cfffbe5..e45de2305 100644
--- a/tests/test_sklearn_pipeline_within_pipeline.py
+++ b/tests/test_sklearn_pipeline_within_pipeline.py
@@ -8,6 +8,7 @@
from io import StringIO
import numpy as np
import pandas
+
try:
from sklearn.compose import ColumnTransformer
except ImportError:
@@ -22,7 +23,11 @@
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import (
- MinMaxScaler, RobustScaler, StandardScaler, OneHotEncoder)
+ MinMaxScaler,
+ RobustScaler,
+ StandardScaler,
+ OneHotEncoder,
+)
from sklearn.feature_extraction.text import CountVectorizer
from skl2onnx import convert_sklearn, to_onnx
from skl2onnx.common.data_types import FloatTensorType, StringTensorType
@@ -30,7 +35,6 @@
class TestSklearnPipelineWithinPipeline(unittest.TestCase):
-
def test_pipeline_pca_pipeline_minmax(self):
model = Pipeline(
memory=None,
@@ -51,13 +55,15 @@ def test_pipeline_pca_pipeline_minmax(self):
"Pipeline",
Pipeline(
memory=None,
- steps=[(
- "MinMax scaler",
- MinMaxScaler(
- copy=True,
- feature_range=(0, 3.7209871159509307),
- ),
- )],
+ steps=[
+ (
+ "MinMax scaler",
+ MinMaxScaler(
+ copy=True,
+ feature_range=(0, 3.7209871159509307),
+ ),
+ )
+ ],
),
),
],
@@ -70,14 +76,12 @@ def test_pipeline_pca_pipeline_minmax(self):
model,
"pipelinewithinpipeline",
[("input", FloatTensorType(data.shape))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data,
- model,
- model_onnx,
- basename="SklearnPipelinePcaPipelineMinMax")
+ data, model, model_onnx, basename="SklearnPipelinePcaPipelineMinMax"
+ )
def test_pipeline_pca_pipeline_none_lin(self):
model = Pipeline(
@@ -121,14 +125,12 @@ def test_pipeline_pca_pipeline_none_lin(self):
model,
"pipelinewithinpipeline",
[("input", FloatTensorType(data.shape))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data,
- model,
- model_onnx,
- basename="SklearnPipelinePcaPipelineMinMaxLogReg")
+ data, model, model_onnx, basename="SklearnPipelinePcaPipelineMinMaxLogReg"
+ )
def test_pipeline_pca_pipeline_multinomial(self):
model = Pipeline(
@@ -182,14 +184,12 @@ def test_pipeline_pca_pipeline_multinomial(self):
model,
"pipelinewithinpipeline",
[("input", FloatTensorType(data.shape))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data,
- model,
- model_onnx,
- basename="SklearnPipelinePcaPipelineMinMaxNB2")
+ data, model, model_onnx, basename="SklearnPipelinePcaPipelineMinMaxNB2"
+ )
def test_pipeline_pca_pipeline_multinomial_none(self):
model = Pipeline(
@@ -240,102 +240,130 @@ def test_pipeline_pca_pipeline_multinomial_none(self):
model,
"pipelinewithinpipeline",
[("input", FloatTensorType(data.shape))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data,
- model,
- model_onnx,
- basename="SklearnPipelinePcaPipelineMinMaxNBNone")
+ data, model, model_onnx, basename="SklearnPipelinePcaPipelineMinMaxNBNone"
+ )
@unittest.skipIf(
- ColumnTransformer is None,
- reason="ColumnTransformer not available in 0.19")
+ ColumnTransformer is None, reason="ColumnTransformer not available in 0.19"
+ )
def test_pipeline_column_transformer_pipeline_imputer_scaler_lr(self):
X = np.array([[1, 2], [3, np.nan], [3, 0]], dtype=np.float32)
y = np.array([1, 0, 1])
- model = Pipeline([
- (
- "ct",
- ColumnTransformer([
- (
- "pipeline1",
- Pipeline([
- ("imputer", SimpleImputer()),
- ("scaler", StandardScaler()),
- ]),
- [0],
- ),
- (
- "pipeline2",
- Pipeline([
- ("imputer", SimpleImputer()),
- ("scaler", RobustScaler()),
- ]),
- [1],
+ model = Pipeline(
+ [
+ (
+ "ct",
+ ColumnTransformer(
+ [
+ (
+ "pipeline1",
+ Pipeline(
+ [
+ ("imputer", SimpleImputer()),
+ ("scaler", StandardScaler()),
+ ]
+ ),
+ [0],
+ ),
+ (
+ "pipeline2",
+ Pipeline(
+ [
+ ("imputer", SimpleImputer()),
+ ("scaler", RobustScaler()),
+ ]
+ ),
+ [1],
+ ),
+ ]
),
- ]),
- ),
- ("lr", LogisticRegression(solver="liblinear")),
- ])
+ ),
+ ("lr", LogisticRegression(solver="liblinear")),
+ ]
+ )
model.fit(X, y)
model_onnx = convert_sklearn(
model,
"pipelinewithinpipeline",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X,
- model,
- model_onnx,
- basename="SklearnPipelineCTPipelineImputerScalerLR")
+ X, model, model_onnx, basename="SklearnPipelineCTPipelineImputerScalerLR"
+ )
@unittest.skipIf(
- ColumnTransformer is None,
- reason="ColumnTransformer not available in 0.19")
+ ColumnTransformer is None, reason="ColumnTransformer not available in 0.19"
+ )
def test_complex_pipeline(self):
-
- df = pandas.read_csv(StringIO(dedent("""
+ df = pandas.read_csv(
+ StringIO(
+ dedent(
+ """
CAT1,CAT2,TEXT
A,M,clean
B,N,text
A,M,cleaning
- B,N,normalizing""")))
+ B,N,normalizing"""
+ )
+ )
+ )
X_train = df
y_train = np.array([[1, 0, 1, 0], [1, 0, 1, 0]]).T
- categorical_features = ['CAT1', 'CAT2']
- textual_feature = 'TEXT'
+ categorical_features = ["CAT1", "CAT2"]
+ textual_feature = "TEXT"
preprocessor = ColumnTransformer(
transformers=[
- ('cat_transform', OneHotEncoder(handle_unknown='ignore'),
- categorical_features),
- ('count_vector', Pipeline(steps=[
- ('count_vect', CountVectorizer(
- max_df=0.8, min_df=0.05, max_features=1000))]),
- textual_feature)])
+ (
+ "cat_transform",
+ OneHotEncoder(handle_unknown="ignore"),
+ categorical_features,
+ ),
+ (
+ "count_vector",
+ Pipeline(
+ steps=[
+ (
+ "count_vect",
+ CountVectorizer(
+ max_df=0.8, min_df=0.05, max_features=1000
+ ),
+ )
+ ]
+ ),
+ textual_feature,
+ ),
+ ]
+ )
preprocessor.fit(X_train, y_train)
- initial_type = [('CAT1', StringTensorType([None, 1])),
- ('CAT2', StringTensorType([None, 1])),
- ('TEXTs', StringTensorType([None, 1]))]
+ initial_type = [
+ ("CAT1", StringTensorType([None, 1])),
+ ("CAT2", StringTensorType([None, 1])),
+ ("TEXTs", StringTensorType([None, 1])),
+ ]
with self.assertRaises(RuntimeError):
- to_onnx(preprocessor, initial_types=initial_type,
- target_opset=TARGET_OPSET)
+ to_onnx(preprocessor, initial_types=initial_type, target_opset=TARGET_OPSET)
- initial_type = [('CAT1', StringTensorType([None, 1])),
- ('CAT2', StringTensorType([None, 1])),
- ('TEXT', StringTensorType([None, 1]))]
- onx = to_onnx(preprocessor, initial_types=initial_type,
- target_opset=TARGET_OPSET)
+ initial_type = [
+ ("CAT1", StringTensorType([None, 1])),
+ ("CAT2", StringTensorType([None, 1])),
+ ("TEXT", StringTensorType([None, 1])),
+ ]
+ onx = to_onnx(
+ preprocessor, initial_types=initial_type, target_opset=TARGET_OPSET
+ )
dump_data_and_model(
- X_train, preprocessor, onx,
- basename="SklearnPipelineComplex")
+ X_train, preprocessor, onx, basename="SklearnPipelineComplex"
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_pls_regression.py b/tests/test_sklearn_pls_regression.py
index 981ebd718..1d9200129 100644
--- a/tests/test_sklearn_pls_regression.py
+++ b/tests/test_sklearn_pls_regression.py
@@ -9,65 +9,85 @@
from sklearn.cross_decomposition import PLSRegression
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import (
- FloatTensorType, Int64TensorType, DoubleTensorType
+ FloatTensorType,
+ Int64TensorType,
+ DoubleTensorType,
)
from test_utils import dump_data_and_model, TARGET_OPSET
class TestSklearnPLSRegressionConverters(unittest.TestCase):
def test_model_pls_regression(self):
- X = numpy.array([[0., 0., 1.], [1., 0., 0.],
- [2., 2., 2.], [2., 5., 4.]],
- numpy.float32)
- Y = numpy.array([[0.1, -0.2], [0.9, 1.1], [6.2, 5.9],
- [11.9, 12.3]],
- numpy.float32)
+ X = numpy.array(
+ [[0.0, 0.0, 1.0], [1.0, 0.0, 0.0], [2.0, 2.0, 2.0], [2.0, 5.0, 4.0]],
+ numpy.float32,
+ )
+ Y = numpy.array(
+ [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]], numpy.float32
+ )
pls2 = PLSRegression(n_components=2)
pls2.fit(X, Y)
model_onnx = convert_sklearn(
- pls2, "scikit-learn pls",
+ pls2,
+ "scikit-learn pls",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X, pls2, model_onnx, methods=['predict'],
- basename="SklearnPLSRegression", verbose=10)
+ X,
+ pls2,
+ model_onnx,
+ methods=["predict"],
+ basename="SklearnPLSRegression",
+ verbose=10,
+ )
def test_model_pls_regression64(self):
- X = numpy.array([[0., 0., 1.], [1., 0., 0.],
- [2., 2., 2.], [2., 5., 4.]],
- numpy.float64)
- Y = numpy.array([[0.1, -0.2], [0.9, 1.1], [6.2, 5.9],
- [11.9, 12.3]],
- numpy.float64)
+ X = numpy.array(
+ [[0.0, 0.0, 1.0], [1.0, 0.0, 0.0], [2.0, 2.0, 2.0], [2.0, 5.0, 4.0]],
+ numpy.float64,
+ )
+ Y = numpy.array(
+ [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]], numpy.float64
+ )
pls2 = PLSRegression(n_components=2)
pls2.fit(X, Y)
model_onnx = convert_sklearn(
- pls2, "scikit-learn pls64",
+ pls2,
+ "scikit-learn pls64",
[("input", DoubleTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X, pls2, model_onnx, methods=['predict'],
- basename="SklearnPLSRegression64")
+ X, pls2, model_onnx, methods=["predict"], basename="SklearnPLSRegression64"
+ )
def test_model_pls_regressionInt64(self):
- X = numpy.array([[0., 0., 1.], [1., 0., 0.],
- [2., 2., 2.], [2., 5., 4.]],
- numpy.int64)
- Y = numpy.array([[0.1, -0.2], [0.9, 1.1], [6.2, 5.9],
- [11.9, 12.3]],
- numpy.int64)
+ X = numpy.array(
+ [[0.0, 0.0, 1.0], [1.0, 0.0, 0.0], [2.0, 2.0, 2.0], [2.0, 5.0, 4.0]],
+ numpy.int64,
+ )
+ Y = numpy.array(
+ [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]], numpy.int64
+ )
pls2 = PLSRegression(n_components=2)
pls2.fit(X, Y)
model_onnx = convert_sklearn(
- pls2, "scikit-learn plsint64",
+ pls2,
+ "scikit-learn plsint64",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X, pls2, model_onnx, methods=['predict'],
- basename="SklearnPLSRegressionInt64")
+ X,
+ pls2,
+ model_onnx,
+ methods=["predict"],
+ basename="SklearnPLSRegressionInt64",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_polynomial_features_converter.py b/tests/test_sklearn_polynomial_features_converter.py
index db8eec54d..af7f39129 100644
--- a/tests/test_sklearn_polynomial_features_converter.py
+++ b/tests/test_sklearn_polynomial_features_converter.py
@@ -5,6 +5,7 @@
"""
import unittest
import numpy as np
+
try:
# scikit-learn >= 0.22
from sklearn.utils._testing import ignore_warnings
@@ -18,41 +19,53 @@
class TestSklearnPolynomialFeatures(unittest.TestCase):
-
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
@ignore_warnings(category=FutureWarning)
def test_model_polynomial_features_float_degree_2(self):
- X = np.array([[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
- [0, 3.2, 4.7, -8.9]])
+ X = np.array(
+ [[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0], [0, 3.2, 4.7, -8.9]]
+ )
model = PolynomialFeatures(degree=2).fit(X)
model_onnx = convert_sklearn(
- model, "scikit-learn polynomial features",
+ model,
+ "scikit-learn polynomial features",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnPolynomialFeaturesFloatDegree2")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnPolynomialFeaturesFloatDegree2",
+ )
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
@ignore_warnings(category=FutureWarning)
def test_model_polynomial_features_int_degree_2(self):
- X = np.array([
- [1, 3, 4, 0],
- [2, 3, 4, 1],
- [1, -4, 3, 7],
- [3, 10, -9, 5],
- [1, 0, 10, 5],
- ])
+ X = np.array(
+ [
+ [1, 3, 4, 0],
+ [2, 3, 4, 1],
+ [1, -4, 3, 7],
+ [3, 10, -9, 5],
+ [1, 0, 10, 5],
+ ]
+ )
model = PolynomialFeatures(degree=2).fit(X)
model_onnx = convert_sklearn(
- model, "scikit-learn polynomial features",
+ model,
+ "scikit-learn polynomial features",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.int64), model, model_onnx,
- basename="SklearnPolynomialFeaturesIntDegree2")
+ X.astype(np.int64),
+ model,
+ model_onnx,
+ basename="SklearnPolynomialFeaturesIntDegree2",
+ )
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
@ignore_warnings(category=FutureWarning)
@@ -60,49 +73,65 @@ def test_model_polynomial_features_float_degree_3(self):
X = np.array([[1.2, 3.2, 1.2], [4.3, 3.2, 4.5], [3.2, 4.7, 1.1]])
model = PolynomialFeatures(degree=3).fit(X)
model_onnx = convert_sklearn(
- model, "scikit-learn polynomial features",
+ model,
+ "scikit-learn polynomial features",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnPolynomialFeaturesFloatDegree3")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnPolynomialFeaturesFloatDegree3",
+ )
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
@ignore_warnings(category=FutureWarning)
def test_model_polynomial_features_int_degree_3(self):
- X = np.array([
- [1, 3, 33],
- [4, 1, -11],
- [3, 7, -3],
- [3, 5, 4],
- [1, 0, 3],
- [5, 4, 9],
- ])
+ X = np.array(
+ [
+ [1, 3, 33],
+ [4, 1, -11],
+ [3, 7, -3],
+ [3, 5, 4],
+ [1, 0, 3],
+ [5, 4, 9],
+ ]
+ )
model = PolynomialFeatures(degree=3).fit(X)
model_onnx = convert_sklearn(
- model, "scikit-learn polynomial features",
+ model,
+ "scikit-learn polynomial features",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.int64), model, model_onnx,
- basename="SklearnPolynomialFeaturesIntDegree3")
+ X.astype(np.int64),
+ model,
+ model_onnx,
+ basename="SklearnPolynomialFeaturesIntDegree3",
+ )
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
@ignore_warnings(category=FutureWarning)
def test_model_polynomial_features_float_degree_4(self):
- X = np.array([[1.2, 3.2, 3.1, 1.3], [4.3, 3.2, 0.5, 1.3],
- [3.2, 4.7, 5.4, 7.1]])
+ X = np.array([[1.2, 3.2, 3.1, 1.3], [4.3, 3.2, 0.5, 1.3], [3.2, 4.7, 5.4, 7.1]])
model = PolynomialFeatures(degree=4).fit(X)
model_onnx = convert_sklearn(
- model, "scikit-learn polynomial features",
+ model,
+ "scikit-learn polynomial features",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnPolynomialFeaturesFloatDegree4-Dec4")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnPolynomialFeaturesFloatDegree4-Dec4",
+ )
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
@ignore_warnings(category=FutureWarning)
@@ -110,13 +139,18 @@ def test_model_polynomial_features_int_degree_4(self):
X = np.array([[1, 3, 4, 1], [3, 7, 3, 5], [1, 0, 5, 4]])
model = PolynomialFeatures(degree=4).fit(X)
model_onnx = convert_sklearn(
- model, "scikit-learn polynomial features",
+ model,
+ "scikit-learn polynomial features",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- X.astype(np.int64), model, model_onnx,
- basename="SklearnPolynomialFeaturesIntDegree4")
+ X.astype(np.int64),
+ model,
+ model_onnx,
+ basename="SklearnPolynomialFeaturesIntDegree4",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_power_transformer.py b/tests/test_sklearn_power_transformer.py
index 1f02af244..2f9d3141e 100644
--- a/tests/test_sklearn_power_transformer.py
+++ b/tests/test_sklearn_power_transformer.py
@@ -39,9 +39,7 @@ def test_powertransformer_yeo_johnson_positive_without_scaler(self):
target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- data, model, model_onnx, basename="PowerTransformer"
- )
+ dump_data_and_model(data, model, model_onnx, basename="PowerTransformer")
@unittest.skipIf(PowerTransformer is None, "Problems with import occurred")
def test_powertransformer_yeo_johnson_negative_without_scaler(self):
@@ -55,9 +53,7 @@ def test_powertransformer_yeo_johnson_negative_without_scaler(self):
target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- data, model, model_onnx, basename="PowerTransformer"
- )
+ dump_data_and_model(data, model, model_onnx, basename="PowerTransformer")
@unittest.skipIf(PowerTransformer is None, "Problems with import occurred")
def test_powertransformer_yeo_johnson_combined_without_scaler(self):
@@ -71,9 +67,7 @@ def test_powertransformer_yeo_johnson_combined_without_scaler(self):
target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- data, model, model_onnx, basename="PowerTransformer"
- )
+ dump_data_and_model(data, model, model_onnx, basename="PowerTransformer")
@unittest.skipIf(PowerTransformer is None, "Problems with import occurred")
def test_powertransformer_box_cox_without_scaler(self):
@@ -87,9 +81,7 @@ def test_powertransformer_box_cox_without_scaler(self):
target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- data, model, model_onnx, basename="PowerTransformer"
- )
+ dump_data_and_model(data, model, model_onnx, basename="PowerTransformer")
@unittest.skipIf(PowerTransformer is None, "Problems with import occurred")
def test_powertransformer_yeo_johnson_positive_with_scaler(self):
@@ -103,9 +95,7 @@ def test_powertransformer_yeo_johnson_positive_with_scaler(self):
target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- data, model, model_onnx, basename="PowerTransformer"
- )
+ dump_data_and_model(data, model, model_onnx, basename="PowerTransformer")
@unittest.skipIf(PowerTransformer is None, "Problems with import occurred")
def test_powertransformer_with_scaler_blacklist(self):
@@ -136,9 +126,7 @@ def test_powertransformer_yeo_johnson_negative_with_scaler(self):
target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- data, model, model_onnx, basename="PowerTransformer"
- )
+ dump_data_and_model(data, model, model_onnx, basename="PowerTransformer")
@unittest.skipIf(PowerTransformer is None, "Problems with import occurred")
def test_powertransformer_yeo_johnson_combined_with_scaler(self):
@@ -152,9 +140,7 @@ def test_powertransformer_yeo_johnson_combined_with_scaler(self):
target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- data, model, model_onnx, basename="PowerTransformer"
- )
+ dump_data_and_model(data, model, model_onnx, basename="PowerTransformer")
@unittest.skipIf(PowerTransformer is None, "Problems with import occurred")
def test_powertransformer_box_cox_with_scaler(self):
@@ -168,9 +154,7 @@ def test_powertransformer_box_cox_with_scaler(self):
target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- data, model, model_onnx, basename="PowerTransformer"
- )
+ dump_data_and_model(data, model, model_onnx, basename="PowerTransformer")
@unittest.skipIf(PowerTransformer is None, "Problems with import occurred")
def test_powertransformer_zeros(self):
@@ -184,9 +168,7 @@ def test_powertransformer_zeros(self):
target_opset=TARGET_OPSET,
)
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- data, model, model_onnx, basename="PowerTransformer"
- )
+ dump_data_and_model(data, model, model_onnx, basename="PowerTransformer")
if __name__ == "__main__":
diff --git a/tests/test_sklearn_quadratic_discriminant_analysis_converter.py b/tests/test_sklearn_quadratic_discriminant_analysis_converter.py
index 881dbe6ee..d9b7df667 100644
--- a/tests/test_sklearn_quadratic_discriminant_analysis_converter.py
+++ b/tests/test_sklearn_quadratic_discriminant_analysis_converter.py
@@ -10,25 +10,21 @@
from onnxruntime import __version__ as ort_version
from onnx import __version__ as onnx_version
from skl2onnx import convert_sklearn
-from skl2onnx.common.data_types import (
- FloatTensorType,
- DoubleTensorType
-)
+from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType
-from test_utils import (
- dump_data_and_model,
- TARGET_OPSET
-)
+from test_utils import dump_data_and_model, TARGET_OPSET
ort_version = ".".join(ort_version.split(".")[:2])
-onnx_version = ".".join(onnx_version.split('.')[:2])
+onnx_version = ".".join(onnx_version.split(".")[:2])
class TestQuadraticDiscriminantAnalysisConverter(unittest.TestCase):
- @unittest.skipIf(pv.Version(sklearn.__version__) < pv.Version('1.0'),
- reason="scikit-learn<1.0")
- @unittest.skipIf(pv.Version(onnx_version) < pv.Version('1.11'),
- reason="fails with onnx 1.10")
+ @unittest.skipIf(
+ pv.Version(sklearn.__version__) < pv.Version("1.0"), reason="scikit-learn<1.0"
+ )
+ @unittest.skipIf(
+ pv.Version(onnx_version) < pv.Version("1.11"), reason="fails with onnx 1.10"
+ )
def test_model_qda_2c2f_float(self):
# 2 classes, 2 features
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
@@ -42,23 +38,30 @@ def test_model_qda_2c2f_float(self):
skl_model,
"scikit-learn QDA",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(onnx_model)
- dump_data_and_model(X_test.astype(np.float32), skl_model, onnx_model,
- basename="SklearnQDA_2c2f_Float")
-
- @unittest.skipIf(pv.Version(sklearn.__version__) < pv.Version('1.0'),
- reason="scikit-learn<1.0")
- @unittest.skipIf(pv.Version(onnx_version) < pv.Version('1.11'),
- reason="fails with onnx 1.10")
+ dump_data_and_model(
+ X_test.astype(np.float32),
+ skl_model,
+ onnx_model,
+ basename="SklearnQDA_2c2f_Float",
+ )
+
+ @unittest.skipIf(
+ pv.Version(sklearn.__version__) < pv.Version("1.0"), reason="scikit-learn<1.0"
+ )
+ @unittest.skipIf(
+ pv.Version(onnx_version) < pv.Version("1.11"), reason="fails with onnx 1.10"
+ )
def test_model_qda_2c3f_float(self):
# 2 classes, 3 features
- X = np.array([[-1, -1, 0], [-2, -1, 1], [-3, -2, 0],
- [1, 1, 0], [2, 1, 1], [3, 2, 1]])
+ X = np.array(
+ [[-1, -1, 0], [-2, -1, 1], [-3, -2, 0], [1, 1, 0], [2, 1, 1], [3, 2, 1]]
+ )
y = np.array([1, 1, 1, 2, 2, 2])
- X_test = np.array([[-0.8, -1, 0], [-1, -1.6, 0],
- [1, 1.5, 1], [3.1, 2.1, 1]])
+ X_test = np.array([[-0.8, -1, 0], [-1, -1.6, 0], [1, 1.5, 1], [3.1, 2.1, 1]])
skl_model = QuadraticDiscriminantAnalysis()
skl_model.fit(X, y)
@@ -67,20 +70,38 @@ def test_model_qda_2c3f_float(self):
skl_model,
"scikit-learn QDA",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(onnx_model)
- dump_data_and_model(X_test.astype(np.float32), skl_model, onnx_model,
- basename="SklearnQDA_2c3f_Float")
-
- @unittest.skipIf(pv.Version(sklearn.__version__) < pv.Version('1.0'),
- reason="scikit-learn<1.0")
- @unittest.skipIf(pv.Version(onnx_version) < pv.Version('1.11'),
- reason="fails with onnx 1.10")
+ dump_data_and_model(
+ X_test.astype(np.float32),
+ skl_model,
+ onnx_model,
+ basename="SklearnQDA_2c3f_Float",
+ )
+
+ @unittest.skipIf(
+ pv.Version(sklearn.__version__) < pv.Version("1.0"), reason="scikit-learn<1.0"
+ )
+ @unittest.skipIf(
+ pv.Version(onnx_version) < pv.Version("1.11"), reason="fails with onnx 1.10"
+ )
def test_model_qda_3c2f_float(self):
# 3 classes, 2 features
- X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1],
- [2, 1], [3, 2], [-1, 2], [-2, 3], [-2, 2]])
+ X = np.array(
+ [
+ [-1, -1],
+ [-2, -1],
+ [-3, -2],
+ [1, 1],
+ [2, 1],
+ [3, 2],
+ [-1, 2],
+ [-2, 3],
+ [-2, 2],
+ ]
+ )
y = np.array([1, 1, 1, 2, 2, 2, 3, 3, 3])
X_test = np.array([[-0.8, -1], [0.8, 1], [-0.8, 1]])
@@ -91,20 +112,28 @@ def test_model_qda_3c2f_float(self):
skl_model,
"scikit-learn QDA",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(onnx_model)
- dump_data_and_model(X_test.astype(np.float32), skl_model, onnx_model,
- basename="SklearnQDA_3c2f_Float")
-
- @unittest.skipIf(pv.Version(sklearn.__version__) < pv.Version('1.0'),
- reason="scikit-learn<1.0")
- @unittest.skipIf(pv.Version(onnx_version) < pv.Version('1.11'),
- reason="fails with onnx 1.10")
+ dump_data_and_model(
+ X_test.astype(np.float32),
+ skl_model,
+ onnx_model,
+ basename="SklearnQDA_3c2f_Float",
+ )
+
+ @unittest.skipIf(
+ pv.Version(sklearn.__version__) < pv.Version("1.0"), reason="scikit-learn<1.0"
+ )
+ @unittest.skipIf(
+ pv.Version(onnx_version) < pv.Version("1.11"), reason="fails with onnx 1.10"
+ )
def test_model_qda_2c2f_double(self):
# 2 classes, 2 features
- X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1],
- [2, 1], [3, 2]]).astype(np.double)
+ X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]).astype(
+ np.double
+ )
y = np.array([1, 1, 1, 2, 2, 2])
X_test = np.array([[-0.8, -1], [0.8, 1]])
@@ -115,23 +144,31 @@ def test_model_qda_2c2f_double(self):
skl_model,
"scikit-learn QDA",
[("input", DoubleTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET, options={'zipmap': False})
+ target_opset=TARGET_OPSET,
+ options={"zipmap": False},
+ )
self.assertIsNotNone(onnx_model)
- dump_data_and_model(X_test.astype(np.double), skl_model, onnx_model,
- basename="SklearnQDA_2c2f_Double")
-
- @unittest.skipIf(pv.Version(sklearn.__version__) < pv.Version('1.0'),
- reason="scikit-learn<1.0")
- @unittest.skipIf(pv.Version(onnx_version) < pv.Version('1.11'),
- reason="fails with onnx 1.10")
+ dump_data_and_model(
+ X_test.astype(np.double),
+ skl_model,
+ onnx_model,
+ basename="SklearnQDA_2c2f_Double",
+ )
+
+ @unittest.skipIf(
+ pv.Version(sklearn.__version__) < pv.Version("1.0"), reason="scikit-learn<1.0"
+ )
+ @unittest.skipIf(
+ pv.Version(onnx_version) < pv.Version("1.11"), reason="fails with onnx 1.10"
+ )
def test_model_qda_2c3f_double(self):
# 2 classes, 3 features
- X = np.array([[-1, -1, 0], [-2, -1, 1], [-3, -2, 0],
- [1, 1, 0], [2, 1, 1], [3, 2, 1]]).astype(np.double)
+ X = np.array(
+ [[-1, -1, 0], [-2, -1, 1], [-3, -2, 0], [1, 1, 0], [2, 1, 1], [3, 2, 1]]
+ ).astype(np.double)
y = np.array([1, 1, 1, 2, 2, 2])
- X_test = np.array([[-0.8, -1, 0], [-1, -1.6, 0],
- [1, 1.5, 1], [3.1, 2.1, 1]])
+ X_test = np.array([[-0.8, -1, 0], [-1, -1.6, 0], [1, 1.5, 1], [3.1, 2.1, 1]])
skl_model = QuadraticDiscriminantAnalysis()
skl_model.fit(X, y)
@@ -140,20 +177,39 @@ def test_model_qda_2c3f_double(self):
skl_model,
"scikit-learn QDA",
[("input", DoubleTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET, options={'zipmap': False})
+ target_opset=TARGET_OPSET,
+ options={"zipmap": False},
+ )
self.assertIsNotNone(onnx_model)
- dump_data_and_model(X_test.astype(np.double), skl_model, onnx_model,
- basename="SklearnQDA_2c3f_Double")
-
- @unittest.skipIf(pv.Version(sklearn.__version__) < pv.Version('1.0'),
- reason="scikit-learn<1.0")
- @unittest.skipIf(pv.Version(onnx_version) < pv.Version('1.11'),
- reason="fails with onnx 1.10")
+ dump_data_and_model(
+ X_test.astype(np.double),
+ skl_model,
+ onnx_model,
+ basename="SklearnQDA_2c3f_Double",
+ )
+
+ @unittest.skipIf(
+ pv.Version(sklearn.__version__) < pv.Version("1.0"), reason="scikit-learn<1.0"
+ )
+ @unittest.skipIf(
+ pv.Version(onnx_version) < pv.Version("1.11"), reason="fails with onnx 1.10"
+ )
def test_model_qda_3c2f_double(self):
# 3 classes, 2 features
- X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2],
- [-1, 2], [-2, 3], [-2, 2]]).astype(np.double)
+ X = np.array(
+ [
+ [-1, -1],
+ [-2, -1],
+ [-3, -2],
+ [1, 1],
+ [2, 1],
+ [3, 2],
+ [-1, 2],
+ [-2, 3],
+ [-2, 2],
+ ]
+ ).astype(np.double)
y = np.array([1, 1, 1, 2, 2, 2, 3, 3, 3])
X_test = np.array([[-0.8, -1], [0.8, 1], [-0.8, 1]])
@@ -164,11 +220,17 @@ def test_model_qda_3c2f_double(self):
skl_model,
"scikit-learn QDA",
[("input", DoubleTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET, options={'zipmap': False})
+ target_opset=TARGET_OPSET,
+ options={"zipmap": False},
+ )
self.assertIsNotNone(onnx_model)
- dump_data_and_model(X_test.astype(np.double), skl_model, onnx_model,
- basename="SklearnQDA_3c2f_Double")
+ dump_data_and_model(
+ X_test.astype(np.double),
+ skl_model,
+ onnx_model,
+ basename="SklearnQDA_3c2f_Double",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_random_forest_converters.py b/tests/test_sklearn_random_forest_converters.py
index 7e369a9b6..d0ea0a6f4 100644
--- a/tests/test_sklearn_random_forest_converters.py
+++ b/tests/test_sklearn_random_forest_converters.py
@@ -7,15 +7,17 @@
from numpy.testing import assert_almost_equal
from onnxruntime import InferenceSession, __version__ as ort_version
import sklearn
-from sklearn.datasets import (
- load_iris, make_regression, make_classification)
+from sklearn.datasets import load_iris, make_regression, make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import (
- RandomForestClassifier, RandomForestRegressor,
- ExtraTreesClassifier, ExtraTreesRegressor
+ RandomForestClassifier,
+ RandomForestRegressor,
+ ExtraTreesClassifier,
+ ExtraTreesRegressor,
)
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
+
try:
# scikit-learn >= 0.22
from sklearn.utils._testing import ignore_warnings
@@ -45,10 +47,11 @@
path_to_leaf,
TARGET_OPSET,
)
+
try:
from sklearn.ensemble import (
HistGradientBoostingClassifier,
- HistGradientBoostingRegressor
+ HistGradientBoostingRegressor,
)
except ImportError:
HistGradientBoostingClassifier = None
@@ -57,11 +60,11 @@
def _sklearn_version():
# Remove development version 0.22.dev0 becomes 0.22.
- v = ".".join(sklearn.__version__.split('.')[:2])
+ v = ".".join(sklearn.__version__.split(".")[:2])
return pv.Version(v)
-ort_version = ".".join(ort_version.split('.')[:2])
+ort_version = ".".join(ort_version.split(".")[:2])
class TestSklearnTreeEnsembleModels(unittest.TestCase):
@@ -70,8 +73,7 @@ def test_random_forest_classifier(self):
dump_one_class_classification(model)
dump_binary_classification(model)
dump_binary_classification(model, label_string=False)
- dump_binary_classification(
- model, label_string=False, label_bool=True)
+ dump_binary_classification(model, label_string=False, label_bool=True)
dump_multiple_classification(model)
@ignore_warnings(category=FutureWarning)
@@ -79,38 +81,51 @@ def test_random_forest_classifier_mismatched_estimator_counts(self):
model = RandomForestClassifier(n_estimators=3)
X = [[0, 1], [1, 1], [2, 0]]
X = numpy.array(X, dtype=numpy.float32)
- y = ['A', 'B', 'A']
+ y = ["A", "B", "A"]
model.fit(X, y)
# Training code can manipulate n_estimators causing
# n_estimators != len(estimators_). So simulate that here.
model.n_estimators += 1
- model_onnx, prefix = convert_model(model, 'binary classifier',
- [('input',
- FloatTensorType([None, 2]))],
- target_opset=TARGET_OPSET)
- dump_data_and_model(X, model, model_onnx,
- basename=prefix + "Bin" +
- model.__class__.__name__ +
- '_mismatched_estimator_counts')
+ model_onnx, prefix = convert_model(
+ model,
+ "binary classifier",
+ [("input", FloatTensorType([None, 2]))],
+ target_opset=TARGET_OPSET,
+ )
+ dump_data_and_model(
+ X,
+ model,
+ model_onnx,
+ basename=prefix
+ + "Bin"
+ + model.__class__.__name__
+ + "_mismatched_estimator_counts",
+ )
@ignore_warnings(category=FutureWarning)
def test_random_forest_regressor_mismatches(self):
iris = load_iris()
X, y = iris.data, iris.target
- X_train, X_test, y_train, _ = train_test_split(
- X, y, random_state=13)
+ X_train, X_test, y_train, _ = train_test_split(X, y, random_state=13)
X_test = X_test.astype(numpy.float32)
clr = RandomForestRegressor(n_jobs=1, n_estimators=100)
clr.fit(X_train, y_train)
clr.fit(X, y)
- model_onnx, prefix = convert_model(clr, 'reg',
- [('input',
- FloatTensorType([None, 4]))],
- target_opset=TARGET_OPSET)
- dump_data_and_model(X_test, clr, model_onnx,
- basename=prefix + "RegMis" +
- clr.__class__.__name__ +
- '_mismatched_estimator_counts')
+ model_onnx, prefix = convert_model(
+ clr,
+ "reg",
+ [("input", FloatTensorType([None, 4]))],
+ target_opset=TARGET_OPSET,
+ )
+ dump_data_and_model(
+ X_test,
+ clr,
+ model_onnx,
+ basename=prefix
+ + "RegMis"
+ + clr.__class__.__name__
+ + "_mismatched_estimator_counts",
+ )
@ignore_warnings(category=FutureWarning)
def test_random_forest_regressor(self):
@@ -128,14 +143,21 @@ def test_random_forest_regressor_mismatched_estimator_counts(self):
# Training code can manipulate n_estimators causing
# n_estimators != len(estimators_). So simulate that here.
model.n_estimators += 1
- model_onnx, prefix = convert_model(model, 'single regressor',
- [('input',
- FloatTensorType([None, 2]))],
- target_opset=TARGET_OPSET)
- dump_data_and_model(X, model, model_onnx,
- basename=prefix + "Reg" +
- model.__class__.__name__ +
- "_mismatched_estimator_counts")
+ model_onnx, prefix = convert_model(
+ model,
+ "single regressor",
+ [("input", FloatTensorType([None, 2]))],
+ target_opset=TARGET_OPSET,
+ )
+ dump_data_and_model(
+ X,
+ model,
+ model_onnx,
+ basename=prefix
+ + "Reg"
+ + model.__class__.__name__
+ + "_mismatched_estimator_counts",
+ )
@ignore_warnings(category=FutureWarning)
def test_extra_trees_classifier(self):
@@ -153,41 +175,49 @@ def test_extra_trees_regressor(self):
@ignore_warnings(category=FutureWarning)
def test_model_multi_class_nocl(self):
model, X = fit_classification_model(
- RandomForestClassifier(random_state=42),
- 2, label_string=True)
+ RandomForestClassifier(random_state=42), 2, label_string=True
+ )
model_onnx = convert_sklearn(
- model, "multi-class nocl",
+ model,
+ "multi-class nocl",
[("input", FloatTensorType([None, X.shape[1]]))],
- options={id(model): {'nocl': True, 'zipmap': False}},
- target_opset=TARGET_OPSET)
+ options={id(model): {"nocl": True, "zipmap": False}},
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
sonx = str(model_onnx)
- assert 'classlabels_strings' not in sonx
- assert 'cl0' not in sonx
+ assert "classlabels_strings" not in sonx
+ assert "cl0" not in sonx
dump_data_and_model(
- X[:5], model, model_onnx, classes=model.classes_,
- basename="SklearnRFMultiNoCl")
+ X[:5],
+ model,
+ model_onnx,
+ classes=model.classes_,
+ basename="SklearnRFMultiNoCl",
+ )
@ignore_warnings(category=FutureWarning)
def test_model_multi_class_nocl_all(self):
model, X = fit_classification_model(
- RandomForestClassifier(random_state=42),
- 2, label_string=True)
+ RandomForestClassifier(random_state=42), 2, label_string=True
+ )
model_onnx = convert_sklearn(
- model, "multi-class nocl",
+ model,
+ "multi-class nocl",
[("input", FloatTensorType([None, X.shape[1]]))],
- options={id(model): {'nocl': True, 'zipmap': False}},
- target_opset=TARGET_OPSET)
+ options={id(model): {"nocl": True, "zipmap": False}},
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
sonx = str(model_onnx)
- assert 'classlabels_strings' not in sonx
- assert 'cl0' not in sonx
+ assert "classlabels_strings" not in sonx
+ assert "cl0" not in sonx
exp_label = model.predict(X)
exp_proba = model.predict_proba(X)
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'input': X.astype(numpy.float32)})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"input": X.astype(numpy.float32)})
exp_label = numpy.array([int(cl[2:]) for cl in exp_label])
assert_almost_equal(exp_proba, got[1], decimal=5)
diff = numpy.abs(exp_label - got[0]).sum()
@@ -204,96 +234,113 @@ def test_model_multi_class_nocl_all(self):
@ignore_warnings(category=FutureWarning)
def test_random_forest_classifier_int(self):
model, X = fit_classification_model(
- RandomForestClassifier(n_estimators=5, random_state=42),
- 3, is_int=True)
+ RandomForestClassifier(n_estimators=5, random_state=42), 3, is_int=True
+ )
model_onnx = convert_sklearn(
- model, "random forest classifier",
+ model,
+ "random forest classifier",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnRandomForestClassifierInt")
+ X, model, model_onnx, basename="SklearnRandomForestClassifierInt"
+ )
@ignore_warnings(category=FutureWarning)
def test_extra_trees_classifier_int(self):
model, X = fit_classification_model(
- ExtraTreesClassifier(n_estimators=5, random_state=42),
- 4, is_int=True)
+ ExtraTreesClassifier(n_estimators=5, random_state=42), 4, is_int=True
+ )
model_onnx = convert_sklearn(
- model, "extra trees classifier",
+ model,
+ "extra trees classifier",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnExtraTreesClassifierInt")
+ X, model, model_onnx, basename="SklearnExtraTreesClassifierInt"
+ )
@ignore_warnings(category=FutureWarning)
def test_random_forest_classifier_bool(self):
model, X = fit_classification_model(
- RandomForestClassifier(n_estimators=5, random_state=42),
- 3, is_bool=True)
+ RandomForestClassifier(n_estimators=5, random_state=42), 3, is_bool=True
+ )
model_onnx = convert_sklearn(
- model, "random forest classifier",
+ model,
+ "random forest classifier",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnRandomForestClassifierBool")
+ X, model, model_onnx, basename="SklearnRandomForestClassifierBool"
+ )
@ignore_warnings(category=FutureWarning)
def test_extra_trees_classifier_bool(self):
model, X = fit_classification_model(
- ExtraTreesClassifier(n_estimators=5, random_state=42),
- 2, is_bool=True)
+ ExtraTreesClassifier(n_estimators=5, random_state=42), 2, is_bool=True
+ )
model_onnx = convert_sklearn(
- model, "extra trees regression",
+ model,
+ "extra trees regression",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnExtraTreesClassifierBool")
+ X, model, model_onnx, basename="SklearnExtraTreesClassifierBool"
+ )
@ignore_warnings(category=FutureWarning)
def test_random_forest_classifier_double(self):
model, X = fit_classification_model(
- RandomForestClassifier(n_estimators=5, random_state=42),
- 3, is_double=True)
+ RandomForestClassifier(n_estimators=5, random_state=42), 3, is_double=True
+ )
for opv in [1, 2, 3]:
model_onnx = convert_sklearn(
- model, "random forest classifier",
+ model,
+ "random forest classifier",
[("input", DoubleTensorType([None, X.shape[1]]))],
- target_opset={'ai.onnx.ml': opv,
- '': TARGET_OPSET})
+ target_opset={"ai.onnx.ml": opv, "": TARGET_OPSET},
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnRandomForestClassifierDouble")
+ X, model, model_onnx, basename="SklearnRandomForestClassifierDouble"
+ )
@ignore_warnings(category=FutureWarning)
def test_model_random_forest_classifier_multi_output_int(self):
model, X_test = fit_multi_output_classification_model(
- RandomForestClassifier(random_state=42, n_estimators=20))
- options = {id(model): {'zipmap': False}}
+ RandomForestClassifier(random_state=42, n_estimators=20)
+ )
+ options = {id(model): {"zipmap": False}}
model_onnx = convert_sklearn(
- model, "random forest classifier",
+ model,
+ "random forest classifier",
[("input", Int64TensorType([None, X_test.shape[1]]))],
- options=options, target_opset=TARGET_OPSET)
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- assert 'zipmap' not in str(model_onnx).lower()
+ assert "zipmap" not in str(model_onnx).lower()
dump_data_and_model(
- X_test.astype(numpy.int64), model, model_onnx,
- basename="SklearnRandomForestClassifierMultiOutputInt")
+ X_test.astype(numpy.int64),
+ model,
+ model_onnx,
+ basename="SklearnRandomForestClassifierMultiOutputInt",
+ )
@ignore_warnings(category=FutureWarning)
def common_test_model_hgb_regressor(self, add_nan=False):
rng = numpy.random.RandomState(12345)
model = HistGradientBoostingRegressor(max_iter=4, max_depth=2)
- X, y = make_regression(n_features=10, n_samples=1000,
- n_targets=1, random_state=42)
+ X, y = make_regression(
+ n_features=10, n_samples=1000, n_targets=1, random_state=42
+ )
if add_nan:
rows = rng.randint(0, X.shape[0] - 1, X.shape[0] // 3)
cols = rng.randint(0, X.shape[1] - 1, X.shape[0] // 3)
@@ -301,117 +348,171 @@ def common_test_model_hgb_regressor(self, add_nan=False):
X = X.astype(numpy.float32)
y = y.astype(numpy.float32)
- X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,
- random_state=42)
+ X_train, X_test, y_train, _ = train_test_split(
+ X, y, test_size=0.5, random_state=42
+ )
model.fit(X_train, y_train)
model_onnx = convert_sklearn(
- model, "unused", [("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "unused",
+ [("input", FloatTensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
X_test = X_test.astype(numpy.float32)[:10]
dump_data_and_model(
- X_test, model, model_onnx,
- basename=f"SklearnHGBRegressor{add_nan}", verbose=False)
-
- @unittest.skipIf(_sklearn_version() < pv.Version('0.22.0'),
- reason="missing_go_to_left is missing")
- @unittest.skipIf(HistGradientBoostingRegressor is None,
- reason="scikit-learn 0.22 + manual activation")
- @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.2.0'),
- reason="issue with nan for earlier ort")
+ X_test,
+ model,
+ model_onnx,
+ basename=f"SklearnHGBRegressor{add_nan}",
+ verbose=False,
+ )
+
+ @unittest.skipIf(
+ _sklearn_version() < pv.Version("0.22.0"),
+ reason="missing_go_to_left is missing",
+ )
+ @unittest.skipIf(
+ HistGradientBoostingRegressor is None,
+ reason="scikit-learn 0.22 + manual activation",
+ )
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.2.0"),
+ reason="issue with nan for earlier ort",
+ )
@ignore_warnings(category=FutureWarning)
def test_model_hgb_regressor_nonan(self):
self.common_test_model_hgb_regressor(False)
- @unittest.skipIf(_sklearn_version() < pv.Version('0.22.0'),
- reason="NaN not allowed")
- @unittest.skipIf(HistGradientBoostingRegressor is None,
- reason="scikit-learn 0.22 + manual activation")
- @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.2.0'),
- reason="issue with nan for earlier ort")
+ @unittest.skipIf(
+ _sklearn_version() < pv.Version("0.22.0"), reason="NaN not allowed"
+ )
+ @unittest.skipIf(
+ HistGradientBoostingRegressor is None,
+ reason="scikit-learn 0.22 + manual activation",
+ )
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.2.0"),
+ reason="issue with nan for earlier ort",
+ )
@ignore_warnings(category=FutureWarning)
def test_model_hgb_regressor_nan(self):
self.common_test_model_hgb_regressor(True)
def common_test_model_hgb_classifier(self, add_nan=False, n_classes=2):
model = HistGradientBoostingClassifier(max_iter=5, max_depth=2)
- X, y = make_classification(n_features=10, n_samples=1000,
- n_informative=4, n_classes=n_classes,
- random_state=42)
+ X, y = make_classification(
+ n_features=10,
+ n_samples=1000,
+ n_informative=4,
+ n_classes=n_classes,
+ random_state=42,
+ )
if add_nan:
rows = numpy.random.randint(0, X.shape[0] - 1, X.shape[0] // 3)
cols = numpy.random.randint(0, X.shape[1] - 1, X.shape[0] // 3)
X[rows, cols] = numpy.nan
- X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,
- random_state=42)
+ X_train, X_test, y_train, _ = train_test_split(
+ X, y, test_size=0.5, random_state=42
+ )
model.fit(X_train, y_train)
model_onnx = convert_sklearn(
- model, "unused", [("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "unused",
+ [("input", FloatTensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
X_test = X_test.astype(numpy.float32)[:5]
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnHGBClassifier%s%d" % (
- "nan" if add_nan else '', n_classes),
- verbose=False)
+ X_test,
+ model,
+ model_onnx,
+ basename="SklearnHGBClassifier%s%d" % ("nan" if add_nan else "", n_classes),
+ verbose=False,
+ )
if n_classes == 2:
model_onnx = convert_sklearn(
- model, "unused",
+ model,
+ "unused",
[("input", FloatTensorType([None, X.shape[1]]))],
- options={model.__class__: {'raw_scores': True}},
- target_opset=TARGET_OPSET)
+ options={model.__class__: {"raw_scores": True}},
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
X_test = X_test.astype(numpy.float32)[:5]
# There is a bug in onnxruntime <= 1.1.0.
# Raw scores are always positive.
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnHGBClassifierRaw%s%d" % (
- "nan" if add_nan else '', n_classes),
+ X_test,
+ model,
+ model_onnx,
+ basename="SklearnHGBClassifierRaw%s%d"
+ % ("nan" if add_nan else "", n_classes),
verbose=False,
- methods=['predict', 'decision_function_binary'])
+ methods=["predict", "decision_function_binary"],
+ )
- @unittest.skipIf(_sklearn_version() < pv.Version('0.22.0'),
- reason="missing_go_to_left is missing")
- @unittest.skipIf(HistGradientBoostingClassifier is None,
- reason="scikit-learn 0.22 + manual activation")
- @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.2.0'),
- reason="issue with nan for earlier ort")
+ @unittest.skipIf(
+ _sklearn_version() < pv.Version("0.22.0"),
+ reason="missing_go_to_left is missing",
+ )
+ @unittest.skipIf(
+ HistGradientBoostingClassifier is None,
+ reason="scikit-learn 0.22 + manual activation",
+ )
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.2.0"),
+ reason="issue with nan for earlier ort",
+ )
@ignore_warnings(category=FutureWarning)
def test_model_hgb_classifier_nonan(self):
self.common_test_model_hgb_classifier(False)
- @unittest.skipIf(_sklearn_version() < pv.Version('0.22.0'),
- reason="NaN not allowed")
- @unittest.skipIf(HistGradientBoostingClassifier is None,
- reason="scikit-learn 0.22 + manual activation")
- @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.2.0'),
- reason="issue with nan for earlier ort")
+ @unittest.skipIf(
+ _sklearn_version() < pv.Version("0.22.0"), reason="NaN not allowed"
+ )
+ @unittest.skipIf(
+ HistGradientBoostingClassifier is None,
+ reason="scikit-learn 0.22 + manual activation",
+ )
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.2.0"),
+ reason="issue with nan for earlier ort",
+ )
@ignore_warnings(category=FutureWarning)
def test_model_hgb_classifier_nan(self):
self.common_test_model_hgb_classifier(True)
- @unittest.skipIf(_sklearn_version() < pv.Version('0.22.0'),
- reason="missing_go_to_left is missing")
- @unittest.skipIf(HistGradientBoostingClassifier is None,
- reason="scikit-learn 0.22 + manual activation")
- @unittest.skipIf(pv.Version(ort_version) < pv.Version('1.2.0'),
- reason="issue with nan for earlier ort")
+ @unittest.skipIf(
+ _sklearn_version() < pv.Version("0.22.0"),
+ reason="missing_go_to_left is missing",
+ )
+ @unittest.skipIf(
+ HistGradientBoostingClassifier is None,
+ reason="scikit-learn 0.22 + manual activation",
+ )
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.2.0"),
+ reason="issue with nan for earlier ort",
+ )
@ignore_warnings(category=FutureWarning)
def test_model_hgb_classifier_nonan_multi(self):
self.common_test_model_hgb_classifier(False, n_classes=3)
- @unittest.skipIf(_sklearn_version() < pv.Version('0.22.0'),
- reason="NaN not allowed")
- @unittest.skipIf(HistGradientBoostingClassifier is None,
- reason="scikit-learn 0.22 + manual activation")
+ @unittest.skipIf(
+ _sklearn_version() < pv.Version("0.22.0"), reason="NaN not allowed"
+ )
+ @unittest.skipIf(
+ HistGradientBoostingClassifier is None,
+ reason="scikit-learn 0.22 + manual activation",
+ )
@ignore_warnings(category=FutureWarning)
def test_model_hgb_classifier_nan_multi(self):
self.common_test_model_hgb_classifier(True, n_classes=3)
@@ -419,135 +520,178 @@ def test_model_hgb_classifier_nan_multi(self):
@ignore_warnings(category=FutureWarning)
def test_model_random_forest_classifier_multilabel(self):
model, X_test = fit_multilabel_classification_model(
- RandomForestClassifier(random_state=42, n_estimators=5))
- options = {id(model): {'zipmap': False}}
+ RandomForestClassifier(random_state=42, n_estimators=5)
+ )
+ options = {id(model): {"zipmap": False}}
model_onnx = convert_sklearn(
- model, "scikit-learn RandomForestClassifier",
+ model,
+ "scikit-learn RandomForestClassifier",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- options=options, target_opset=TARGET_OPSET)
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- assert 'zipmap' not in str(model_onnx).lower()
+ assert "zipmap" not in str(model_onnx).lower()
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnRandomForestClassifierMultiLabel-Out0")
+ X_test,
+ model,
+ model_onnx,
+ basename="SklearnRandomForestClassifierMultiLabel-Out0",
+ )
@ignore_warnings(category=FutureWarning)
def test_model_random_forest_classifier_multilabel_low_samples(self):
model, X_test = fit_multilabel_classification_model(
- RandomForestClassifier(random_state=42, n_estimators=5),
- n_samples=4)
- options = {id(model): {'zipmap': False}}
+ RandomForestClassifier(random_state=42, n_estimators=5), n_samples=4
+ )
+ options = {id(model): {"zipmap": False}}
model_onnx = convert_sklearn(
- model, "scikit-learn RandomForestClassifier",
+ model,
+ "scikit-learn RandomForestClassifier",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- options=options, target_opset=TARGET_OPSET)
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- assert 'zipmap' not in str(model_onnx).lower()
+ assert "zipmap" not in str(model_onnx).lower()
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnRandomForestClassifierMultiLabelLowSamples-Out0")
+ X_test,
+ model,
+ model_onnx,
+ basename="SklearnRandomForestClassifierMultiLabelLowSamples-Out0",
+ )
@ignore_warnings(category=FutureWarning)
def test_model_extra_trees_classifier_multilabel(self):
model, X_test = fit_multilabel_classification_model(
- ExtraTreesClassifier(random_state=42, n_estimators=5))
- options = {id(model): {'zipmap': False}}
+ ExtraTreesClassifier(random_state=42, n_estimators=5)
+ )
+ options = {id(model): {"zipmap": False}}
model_onnx = convert_sklearn(
- model, "scikit-learn ExtraTreesClassifier",
+ model,
+ "scikit-learn ExtraTreesClassifier",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- options=options, target_opset=TARGET_OPSET)
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- assert 'zipmap' not in str(model_onnx).lower()
+ assert "zipmap" not in str(model_onnx).lower()
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnExtraTreesClassifierMultiLabel-Out0")
+ X_test,
+ model,
+ model_onnx,
+ basename="SklearnExtraTreesClassifierMultiLabel-Out0",
+ )
@ignore_warnings(category=FutureWarning)
def test_model_extra_trees_classifier_multilabel_low_samples(self):
model, X_test = fit_multilabel_classification_model(
- ExtraTreesClassifier(random_state=42, n_estimators=5),
- n_samples=10)
- options = {id(model): {'zipmap': False}}
+ ExtraTreesClassifier(random_state=42, n_estimators=5), n_samples=10
+ )
+ options = {id(model): {"zipmap": False}}
model_onnx = convert_sklearn(
- model, "scikit-learn ExtraTreesClassifier",
+ model,
+ "scikit-learn ExtraTreesClassifier",
[("input", FloatTensorType([None, X_test.shape[1]]))],
- options=options, target_opset=TARGET_OPSET)
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- assert 'zipmap' not in str(model_onnx).lower()
+ assert "zipmap" not in str(model_onnx).lower()
dump_data_and_model(
- X_test, model, model_onnx,
- basename="SklearnExtraTreesClassifierMultiLabelLowSamples-Out0")
+ X_test,
+ model,
+ model_onnx,
+ basename="SklearnExtraTreesClassifierMultiLabelLowSamples-Out0",
+ )
@ignore_warnings(category=FutureWarning)
def test_boston_pca_rf(self):
X, y = make_regression(100, n_features=10)
- X_train, X_test, y_train, y_test = train_test_split(
- X, y, random_state=0)
- pipe = Pipeline([
- ('acp', PCA(n_components=3)),
- ('rf', RandomForestRegressor(n_estimators=100))])
+ X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+ pipe = Pipeline(
+ [
+ ("acp", PCA(n_components=3)),
+ ("rf", RandomForestRegressor(n_estimators=100)),
+ ]
+ )
pipe.fit(X_train, y_train)
X32 = X_test.astype(numpy.float32)
model_onnx = to_onnx(pipe, X32[:1], target_opset=TARGET_OPSET)
dump_data_and_model(
- X32, pipe, model_onnx, methods=['predict'],
- basename="SklearnBostonPCARF-Dec4")
+ X32,
+ pipe,
+ model_onnx,
+ methods=["predict"],
+ basename="SklearnBostonPCARF-Dec4",
+ )
@ignore_warnings(category=FutureWarning)
def test_random_forest_regressor_int(self):
model, X = fit_regression_model(
- RandomForestRegressor(n_estimators=5, random_state=42),
- is_int=True)
+ RandomForestRegressor(n_estimators=5, random_state=42), is_int=True
+ )
model_onnx = convert_sklearn(
- model, "random forest regression",
+ model,
+ "random forest regression",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnRandomForestRegressorInt-Dec4",)
+ X,
+ model,
+ model_onnx,
+ basename="SklearnRandomForestRegressorInt-Dec4",
+ )
@ignore_warnings(category=FutureWarning)
def test_extra_trees_regressor_int(self):
model, X = fit_regression_model(
- ExtraTreesRegressor(n_estimators=5, random_state=42),
- is_int=True)
+ ExtraTreesRegressor(n_estimators=5, random_state=42), is_int=True
+ )
model_onnx = convert_sklearn(
- model, "extra trees regression",
+ model,
+ "extra trees regression",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnExtraTreesRegressorInt-Dec4")
+ X, model, model_onnx, basename="SklearnExtraTreesRegressorInt-Dec4"
+ )
@ignore_warnings(category=FutureWarning)
def test_random_forest_regressor_bool(self):
model, X = fit_regression_model(
- RandomForestRegressor(n_estimators=5, random_state=42),
- is_bool=True)
+ RandomForestRegressor(n_estimators=5, random_state=42), is_bool=True
+ )
model_onnx = convert_sklearn(
- model, "random forest regression",
+ model,
+ "random forest regression",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnRandomForestRegressorBool-Dec4")
+ X, model, model_onnx, basename="SklearnRandomForestRegressorBool-Dec4"
+ )
@ignore_warnings(category=FutureWarning)
def test_extra_trees_regressor_bool(self):
model, X = fit_regression_model(
- ExtraTreesRegressor(n_estimators=5, random_state=42),
- is_bool=True)
+ ExtraTreesRegressor(n_estimators=5, random_state=42), is_bool=True
+ )
model_onnx = convert_sklearn(
- model, "extra trees regression",
+ model,
+ "extra trees regression",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnExtraTreesRegressorBool-Dec4")
+ X, model, model_onnx, basename="SklearnExtraTreesRegressorBool-Dec4"
+ )
@unittest.skipIf(TARGET_OPSET < 12, reason="LabelEncoder")
@ignore_warnings(category=FutureWarning)
@@ -556,20 +700,22 @@ def test_randomforestregressor_decision_path(self):
X, y = make_classification(10, n_features=4, random_state=42)
X = X[:, :2]
model.fit(X, y)
- initial_types = [('input', FloatTensorType((None, X.shape[1])))]
+ initial_types = [("input", FloatTensorType((None, X.shape[1])))]
model_onnx = convert_sklearn(
- model, initial_types=initial_types,
- options={id(model): {'decision_path': True}},
- target_opset=TARGET_OPSET)
+ model,
+ initial_types=initial_types,
+ options={id(model): {"decision_path": True}},
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X.astype(numpy.float32)})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(numpy.float32)})
pred = model.predict(X)
assert_almost_equal(pred, res[0].ravel())
dec = model.decision_path(X)
exp = binary_array_to_string(dec[0].todense())
- got = numpy.array([''.join(row) for row in res[1]])
+ got = numpy.array(["".join(row) for row in res[1]])
assert exp == got.ravel().tolist()
@unittest.skipIf(TARGET_OPSET < 12, reason="LabelEncoder")
@@ -579,20 +725,22 @@ def test_extratreesregressor_decision_path(self):
X, y = make_classification(10, n_features=4, random_state=42)
X = X[:, :2]
model.fit(X, y)
- initial_types = [('input', FloatTensorType((None, X.shape[1])))]
+ initial_types = [("input", FloatTensorType((None, X.shape[1])))]
model_onnx = convert_sklearn(
- model, initial_types=initial_types,
- options={id(model): {'decision_path': True}},
- target_opset=TARGET_OPSET)
+ model,
+ initial_types=initial_types,
+ options={id(model): {"decision_path": True}},
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X.astype(numpy.float32)})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(numpy.float32)})
pred = model.predict(X)
assert_almost_equal(pred, res[0].ravel())
dec = model.decision_path(X)
exp = binary_array_to_string(dec[0].todense())
- got = numpy.array([''.join(row) for row in res[1]])
+ got = numpy.array(["".join(row) for row in res[1]])
assert exp == got.ravel().tolist()
@unittest.skipIf(TARGET_OPSET < 12, reason="LabelEncoder")
@@ -602,22 +750,24 @@ def test_randomforestclassifier_decision_path(self):
X, y = make_classification(3, n_features=4, random_state=42)
X = X[:, :2]
model.fit(X, y)
- initial_types = [('input', FloatTensorType((None, X.shape[1])))]
+ initial_types = [("input", FloatTensorType((None, X.shape[1])))]
model_onnx = convert_sklearn(
- model, initial_types=initial_types,
- options={id(model): {'decision_path': True, 'zipmap': False}},
- target_opset=TARGET_OPSET)
+ model,
+ initial_types=initial_types,
+ options={id(model): {"decision_path": True, "zipmap": False}},
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X.astype(numpy.float32)})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(numpy.float32)})
pred = model.predict(X)
assert_almost_equal(pred, res[0].ravel())
prob = model.predict_proba(X)
assert_almost_equal(prob, res[1])
dec = model.decision_path(X)
exp = binary_array_to_string(dec[0].todense())
- got = numpy.array([''.join(row) for row in res[2]])
+ got = numpy.array(["".join(row) for row in res[2]])
assert exp == got.ravel().tolist()
@unittest.skipIf(TARGET_OPSET < 12, reason="LabelEncoder")
@@ -627,22 +777,24 @@ def test_extratreesclassifier_decision_path(self):
X, y = make_classification(10, n_features=4, random_state=42)
X = X[:, :2]
model.fit(X, y)
- initial_types = [('input', FloatTensorType((None, X.shape[1])))]
+ initial_types = [("input", FloatTensorType((None, X.shape[1])))]
model_onnx = convert_sklearn(
- model, initial_types=initial_types,
- options={id(model): {'decision_path': True, 'zipmap': False}},
- target_opset=TARGET_OPSET)
+ model,
+ initial_types=initial_types,
+ options={id(model): {"decision_path": True, "zipmap": False}},
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X.astype(numpy.float32)})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(numpy.float32)})
pred = model.predict(X)
assert_almost_equal(pred, res[0].ravel())
prob = model.predict_proba(X)
assert_almost_equal(prob, res[1])
dec = model.decision_path(X)
exp = binary_array_to_string(dec[0].todense())
- got = numpy.array([''.join(row) for row in res[2]])
+ got = numpy.array(["".join(row) for row in res[2]])
assert exp == got.ravel().tolist()
@unittest.skipIf(TARGET_OPSET < 12, reason="LabelEncoder")
@@ -652,16 +804,18 @@ def test_rf_regressor_decision_leaf(self):
X, y = make_regression(10, n_features=4, random_state=42)
X = X[:, :2]
model.fit(X, y)
- initial_types = [('input', FloatTensorType((None, X.shape[1])))]
+ initial_types = [("input", FloatTensorType((None, X.shape[1])))]
model_onnx = convert_sklearn(
- model, initial_types=initial_types,
- options={id(model): {'decision_leaf': True}},
- target_opset=TARGET_OPSET)
+ model,
+ initial_types=initial_types,
+ options={id(model): {"decision_leaf": True}},
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X.astype(numpy.float32)})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(numpy.float32)})
pred = model.predict(X)
assert_almost_equal(pred, res[0].ravel(), decimal=4)
dec = model.decision_path(X)
@@ -675,22 +829,23 @@ def test_rf_regressor_decision_path_leaf(self):
X, y = make_regression(10, n_features=4, random_state=42)
X = X[:, :2]
model.fit(X, y)
- initial_types = [('input', FloatTensorType((None, X.shape[1])))]
+ initial_types = [("input", FloatTensorType((None, X.shape[1])))]
model_onnx = convert_sklearn(
- model, initial_types=initial_types,
- options={id(model): {'decision_leaf': True,
- 'decision_path': True}},
- target_opset=TARGET_OPSET)
+ model,
+ initial_types=initial_types,
+ options={id(model): {"decision_leaf": True, "decision_path": True}},
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X.astype(numpy.float32)})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(numpy.float32)})
pred = model.predict(X)
assert_almost_equal(pred, res[0].ravel(), decimal=4)
dec = model.decision_path(X)
exp_leaf = path_to_leaf(model.estimators_, dec[0].todense(), dec[1])
exp_path = binary_array_to_string(dec[0].todense())
- got_path = numpy.array([''.join(row) for row in res[1]])
+ got_path = numpy.array(["".join(row) for row in res[1]])
assert exp_path == got_path.ravel().tolist()
assert exp_leaf.tolist() == res[2].tolist()
@@ -701,15 +856,17 @@ def test_rf_classifier_decision_leaf(self):
X, y = make_classification(3, n_features=4, random_state=42)
X = X[:, :2]
model.fit(X, y)
- initial_types = [('input', FloatTensorType((None, X.shape[1])))]
+ initial_types = [("input", FloatTensorType((None, X.shape[1])))]
model_onnx = convert_sklearn(
- model, initial_types=initial_types,
- options={id(model): {'decision_leaf': True, 'zipmap': False}},
- target_opset=TARGET_OPSET)
+ model,
+ initial_types=initial_types,
+ options={id(model): {"decision_leaf": True, "zipmap": False}},
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X.astype(numpy.float32)})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(numpy.float32)})
pred = model.predict(X)
assert_almost_equal(pred, res[0].ravel())
dec = model.decision_path(X)
@@ -723,23 +880,29 @@ def test_rf_classifier_decision_path_leaf(self):
X, y = make_classification(3, n_features=4, random_state=42)
X = X[:, :2]
model.fit(X, y)
- initial_types = [('input', FloatTensorType((None, X.shape[1])))]
+ initial_types = [("input", FloatTensorType((None, X.shape[1])))]
model_onnx = convert_sklearn(
- model, initial_types=initial_types,
- options={id(model): {'decision_leaf': True,
- 'decision_path': True,
- 'zipmap': False}},
- target_opset=TARGET_OPSET)
+ model,
+ initial_types=initial_types,
+ options={
+ id(model): {
+ "decision_leaf": True,
+ "decision_path": True,
+ "zipmap": False,
+ }
+ },
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X.astype(numpy.float32)})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(numpy.float32)})
pred = model.predict(X)
assert_almost_equal(pred, res[0].ravel())
dec = model.decision_path(X)
exp_leaf = path_to_leaf(model.estimators_, dec[0].todense(), dec[1])
exp_path = binary_array_to_string(dec[0].todense())
- got_path = numpy.array([''.join(row) for row in res[2]])
+ got_path = numpy.array(["".join(row) for row in res[2]])
assert exp_path == got_path.ravel().tolist()
assert exp_leaf.tolist() == res[3].tolist()
diff --git a/tests/test_sklearn_random_projection.py b/tests/test_sklearn_random_projection.py
index 67b0de17e..707b6e5a2 100644
--- a/tests/test_sklearn_random_projection.py
+++ b/tests/test_sklearn_random_projection.py
@@ -10,11 +10,10 @@
from skl2onnx.common.data_types import FloatTensorType
from test_utils import dump_data_and_model, TARGET_OPSET
-nort = pv.Version(onnxruntime.__version__) < pv.Version('0.5.0')
+nort = pv.Version(onnxruntime.__version__) < pv.Version("0.5.0")
class TestSklearnRandomProjection(unittest.TestCase):
-
@unittest.skipIf(TARGET_OPSET < 9 or nort, reason="MatMul not available")
def test_gaussian_random_projection_float32(self):
rng = np.random.RandomState(42)
@@ -23,12 +22,15 @@ def test_gaussian_random_projection_float32(self):
model = pt.fit(X)
assert model.transform(X).shape[1] == 4
model_onnx = convert_sklearn(
- model, "scikit-learn GaussianRandomProjection",
+ model,
+ "scikit-learn GaussianRandomProjection",
[("inputs", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(X.astype(np.float32), model,
- model_onnx, basename="GaussianRandomProjection")
+ dump_data_and_model(
+ X.astype(np.float32), model, model_onnx, basename="GaussianRandomProjection"
+ )
@unittest.skipIf(TARGET_OPSET < 9 or nort, reason="MatMul not available")
def test_gaussian_random_projection_float64(self):
@@ -38,8 +40,7 @@ def test_gaussian_random_projection_float64(self):
model = pt.fit(X)
model_onnx = to_onnx(model, X[:1], target_opset=TARGET_OPSET)
self.assertIsNotNone(model_onnx)
- dump_data_and_model(X, model,
- model_onnx, basename="GaussianRandomProjection64")
+ dump_data_and_model(X, model, model_onnx, basename="GaussianRandomProjection64")
if __name__ == "__main__":
diff --git a/tests/test_sklearn_random_trees_embedding.py b/tests/test_sklearn_random_trees_embedding.py
index c1a52db48..7d68c5272 100644
--- a/tests/test_sklearn_random_trees_embedding.py
+++ b/tests/test_sklearn_random_trees_embedding.py
@@ -3,6 +3,7 @@
import unittest
import numpy
from onnxruntime import InferenceSession
+
try:
# scikit-learn >= 0.22
from sklearn.utils._testing import ignore_warnings
@@ -11,48 +12,48 @@
from sklearn.utils.testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning
from sklearn.datasets import make_regression
-from sklearn.ensemble import (
- RandomTreesEmbedding)
+from sklearn.ensemble import RandomTreesEmbedding
from skl2onnx import to_onnx
from test_utils import TARGET_OPSET, dump_data_and_model
class TestSklearnRandomTreeEmbeddings(unittest.TestCase):
-
- def check_model(self, model, X, name='X'):
+ def check_model(self, model, X, name="X"):
try:
sess = InferenceSession(
- model.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except Exception as e:
- raise AssertionError(
- "Unable to load model\n%s" % str(model)) from e
+ raise AssertionError("Unable to load model\n%s" % str(model)) from e
try:
return sess.run(None, {name: X[:7]})
except Exception as e:
raise AssertionError(
- "Unable to run model X.shape=%r X.dtype=%r\n%s" % (
- X[:7].shape, X.dtype, str(model))) from e
+ "Unable to run model X.shape=%r X.dtype=%r\n%s"
+ % (X[:7].shape, X.dtype, str(model))
+ ) from e
- @ignore_warnings(category=(FutureWarning, ConvergenceWarning,
- DeprecationWarning))
+ @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning))
def test_random_trees_embedding(self):
X, _ = make_regression(
- n_features=5, n_samples=100, n_targets=1, random_state=42,
- n_informative=3)
+ n_features=5, n_samples=100, n_targets=1, random_state=42, n_informative=3
+ )
X = X.astype(numpy.float32)
model = RandomTreesEmbedding(
- n_estimators=3, max_depth=2, sparse_output=False).fit(X)
+ n_estimators=3, max_depth=2, sparse_output=False
+ ).fit(X)
model.transform(X)
- model_onnx = to_onnx(
- model, X[:1], target_opset=TARGET_OPSET)
+ model_onnx = to_onnx(model, X[:1], target_opset=TARGET_OPSET)
with open("model.onnx", "wb") as f:
f.write(model_onnx.SerializeToString())
self.check_model(model_onnx, X)
dump_data_and_model(
- X.astype(numpy.float32), model, model_onnx,
- basename="SklearnRandomTreesEmbedding")
+ X.astype(numpy.float32),
+ model,
+ model_onnx,
+ basename="SklearnRandomTreesEmbedding",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_replace_transformer.py b/tests/test_sklearn_replace_transformer.py
index 5bc980660..9f8559d5b 100644
--- a/tests/test_sklearn_replace_transformer.py
+++ b/tests/test_sklearn_replace_transformer.py
@@ -6,6 +6,7 @@
import unittest
import numpy
from sklearn.pipeline import Pipeline
+
try:
from sklearn.compose import ColumnTransformer
except ImportError:
@@ -17,30 +18,35 @@
class TestSklearnCastTransformerConverter(unittest.TestCase):
-
def common_test_replace_transformer(self, dtype, input_type):
- model = Pipeline([
- ('replace', ReplaceTransformer(dtype=numpy.float32)),
- ])
- data = numpy.array([[0.1, 0.2, 3.1], [1, 1, 0],
- [0, 2, 1], [1, 0, 2]],
- dtype=numpy.float32)
+ model = Pipeline(
+ [
+ ("replace", ReplaceTransformer(dtype=numpy.float32)),
+ ]
+ )
+ data = numpy.array(
+ [[0.1, 0.2, 3.1], [1, 1, 0], [0, 2, 1], [1, 0, 2]], dtype=numpy.float32
+ )
model.fit(data)
pred = model.steps[0][1].transform(data)
assert pred.dtype == dtype
model_onnx = convert_sklearn(
- model, "cast", [("input", FloatTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model,
+ "cast",
+ [("input", FloatTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- data, model, model_onnx,
- basename="SklearnCastTransformer{}".format(
- input_type.__class__.__name__))
+ data,
+ model,
+ model_onnx,
+ basename="SklearnCastTransformer{}".format(input_type.__class__.__name__),
+ )
@unittest.skipIf(TARGET_OPSET < 11, reason="not supported")
def test_replace_transformer(self):
- self.common_test_replace_transformer(
- numpy.float32, FloatTensorType)
+ self.common_test_replace_transformer(numpy.float32, FloatTensorType)
if __name__ == "__main__":
diff --git a/tests/test_sklearn_scaler_converter.py b/tests/test_sklearn_scaler_converter.py
index 553731f2e..cde0231a5 100644
--- a/tests/test_sklearn_scaler_converter.py
+++ b/tests/test_sklearn_scaler_converter.py
@@ -8,7 +8,12 @@
import numpy
from onnxruntime import __version__ as ort_version
from sklearn.preprocessing import (
- StandardScaler, RobustScaler, MinMaxScaler, MaxAbsScaler)
+ StandardScaler,
+ RobustScaler,
+ MinMaxScaler,
+ MaxAbsScaler,
+)
+
try:
# scikit-learn >= 0.22
from sklearn.utils._testing import ignore_warnings
@@ -17,72 +22,93 @@
from sklearn.utils.testing import ignore_warnings
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import (
- Int64TensorType, FloatTensorType, DoubleTensorType)
+ Int64TensorType,
+ FloatTensorType,
+ DoubleTensorType,
+)
from test_utils import dump_data_and_model, TARGET_OPSET
-ort_version = ".".join(ort_version.split('.')[:2])
+ort_version = ".".join(ort_version.split(".")[:2])
class TestSklearnScalerConverter(unittest.TestCase):
-
@ignore_warnings(category=DeprecationWarning)
def test_standard_scaler_int(self):
model = StandardScaler()
data = [[0, 0, 3], [1, 1, 0], [0, 2, 1], [1, 0, 2]]
model.fit(data)
- model_onnx = convert_sklearn(model, "scaler",
- [("input", Int64TensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "scaler",
+ [("input", Int64TensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
numpy.array(data, dtype=numpy.int64),
- model, model_onnx,
- basename="SklearnStandardScalerInt64")
+ model,
+ model_onnx,
+ basename="SklearnStandardScalerInt64",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_min_max_scaler_int(self):
model = MinMaxScaler()
data = [[0, 0, 3], [1, 1, 0], [0, 2, 1], [1, 0, 2]]
model.fit(data)
- model_onnx = convert_sklearn(model, "scaler",
- [("input", Int64TensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "scaler",
+ [("input", Int64TensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
numpy.array(data, dtype=numpy.int64),
- model, model_onnx,
- basename="SklearnMinMaxScalerInt64")
+ model,
+ model_onnx,
+ basename="SklearnMinMaxScalerInt64",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_standard_scaler_double(self):
model = StandardScaler()
data = [[0, 0, 3], [1, 1, 0], [0, 2, 1], [1, 0, 2]]
model.fit(data)
- model_onnx = convert_sklearn(model, "scaler",
- [("input", DoubleTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "scaler",
+ [("input", DoubleTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
numpy.array(data, dtype=numpy.float64),
- model, model_onnx,
- basename="SklearnStandardScalerDouble")
+ model,
+ model_onnx,
+ basename="SklearnStandardScalerDouble",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_standard_scaler_blacklist(self):
model = StandardScaler()
- data = numpy.array([[0, 0, 3], [1, 1, 0], [0, 2, 1], [1, 0, 2]],
- dtype=numpy.float32)
+ data = numpy.array(
+ [[0, 0, 3], [1, 1, 0], [0, 2, 1], [1, 0, 2]], dtype=numpy.float32
+ )
model.fit(data)
- model_onnx = convert_sklearn(model, "scaler",
- [("input", FloatTensorType([None, 3]))],
- target_opset=TARGET_OPSET,
- black_op={'Normalizer', 'Scaler'})
- self.assertNotIn('Normalizer', str(model_onnx))
- self.assertNotIn('Scaler', str(model_onnx))
+ model_onnx = convert_sklearn(
+ model,
+ "scaler",
+ [("input", FloatTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ black_op={"Normalizer", "Scaler"},
+ )
+ self.assertNotIn("Normalizer", str(model_onnx))
+ self.assertNotIn("Scaler", str(model_onnx))
dump_data_and_model(
- data, model, model_onnx,
- basename="SklearnStandardScalerBlackList")
+ data, model, model_onnx, basename="SklearnStandardScalerBlackList"
+ )
@ignore_warnings(category=DeprecationWarning)
def test_standard_scaler_floats(self):
@@ -94,13 +120,18 @@ def test_standard_scaler_floats(self):
[1.0, 0.0, 2.0],
]
model.fit(data)
- model_onnx = convert_sklearn(model, "scaler",
- [("input", FloatTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "scaler",
+ [("input", FloatTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
numpy.array(data, dtype=numpy.float32),
- model, basename="SklearnStandardScalerFloat32")
+ model,
+ basename="SklearnStandardScalerFloat32",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_standard_scaler_floats_div(self):
@@ -113,13 +144,18 @@ def test_standard_scaler_floats_div(self):
]
model.fit(data)
model_onnx = convert_sklearn(
- model, "scaler", [("input", FloatTensorType([None, 3]))],
- options={id(model): {'div': 'div'}})
+ model,
+ "scaler",
+ [("input", FloatTensorType([None, 3]))],
+ options={id(model): {"div": "div"}},
+ )
assert 'op_type: "Div"' in str(model_onnx)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
numpy.array(data, dtype=numpy.float32),
- model, basename="SklearnStandardScalerFloat32Div")
+ model,
+ basename="SklearnStandardScalerFloat32Div",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_standard_scaler_floats_div_cast(self):
@@ -132,16 +168,21 @@ def test_standard_scaler_floats_div_cast(self):
]
model.fit(data)
model_onnx = convert_sklearn(
- model, "cast", [("input", FloatTensorType([None, 3]))],
- options={id(model): {'div': 'div_cast'}},
- target_opset=TARGET_OPSET)
+ model,
+ "cast",
+ [("input", FloatTensorType([None, 3]))],
+ options={id(model): {"div": "div_cast"}},
+ target_opset=TARGET_OPSET,
+ )
assert 'op_type: "Div"' in str(model_onnx)
assert 'caler"' not in str(model_onnx)
assert "double_data:" in str(model_onnx)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
numpy.array(data, dtype=numpy.float32),
- model, basename="SklearnStandardScalerFloat32DivCast")
+ model,
+ basename="SklearnStandardScalerFloat32DivCast",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_standard_scaler_floats_no_std(self):
@@ -153,13 +194,18 @@ def test_standard_scaler_floats_no_std(self):
[1.0, 0.0, 2.0],
]
model.fit(data)
- model_onnx = convert_sklearn(model, "scaler",
- [("input", FloatTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "scaler",
+ [("input", FloatTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
numpy.array(data, dtype=numpy.float32),
- model, basename="SklearnStandardScalerFloat32NoStd")
+ model,
+ basename="SklearnStandardScalerFloat32NoStd",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_standard_scaler_floats_no_mean(self):
@@ -171,13 +217,18 @@ def test_standard_scaler_floats_no_mean(self):
[1.0, 0.0, 2.0],
]
model.fit(data)
- model_onnx = convert_sklearn(model, "scaler",
- [("input", FloatTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "scaler",
+ [("input", FloatTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
numpy.array(data, dtype=numpy.float32),
- model, basename="SklearnStandardScalerFloat32NoMean")
+ model,
+ basename="SklearnStandardScalerFloat32NoMean",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_standard_scaler_floats_no_mean_std(self):
@@ -189,13 +240,18 @@ def test_standard_scaler_floats_no_mean_std(self):
[1.0, 0.0, 2.0],
]
model.fit(data)
- model_onnx = convert_sklearn(model, "scaler",
- [("input", FloatTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "scaler",
+ [("input", FloatTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
numpy.array(data, dtype=numpy.float32),
- model, basename="SklearnStandardScalerFloat32NoMeanStd")
+ model,
+ basename="SklearnStandardScalerFloat32NoMeanStd",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_robust_scaler_floats(self):
@@ -207,13 +263,18 @@ def test_robust_scaler_floats(self):
[1.0, 0.0, 2.0],
]
model.fit(data)
- model_onnx = convert_sklearn(model, "scaler",
- [("input", FloatTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "scaler",
+ [("input", FloatTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
numpy.array(data, dtype=numpy.float32),
- model, basename="SklearnRobustScalerFloat32")
+ model,
+ basename="SklearnRobustScalerFloat32",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_robust_scaler_doubles(self):
@@ -225,13 +286,19 @@ def test_robust_scaler_doubles(self):
[1.0, 0.0, 2.0],
]
model.fit(data)
- model_onnx = convert_sklearn(model, "scaler",
- [("input", DoubleTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "scaler",
+ [("input", DoubleTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
numpy.array(data, dtype=numpy.float64),
- model, model_onnx, basename="SklearnRobustScalerFloat64")
+ model,
+ model_onnx,
+ basename="SklearnRobustScalerFloat64",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_robust_scaler_floats_no_bias(self):
@@ -243,14 +310,18 @@ def test_robust_scaler_floats_no_bias(self):
[1.0, 0.0, 2.0],
]
model.fit(data)
- model_onnx = convert_sklearn(model, "scaler",
- [("input", FloatTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "scaler",
+ [("input", FloatTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
numpy.array(data, dtype=numpy.float32),
model,
- basename="SklearnRobustScalerWithCenteringFloat32")
+ basename="SklearnRobustScalerWithCenteringFloat32",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_robust_scaler_floats_no_scaling(self):
@@ -262,13 +333,18 @@ def test_robust_scaler_floats_no_scaling(self):
[1.0, 0.0, 2.0],
]
model.fit(data)
- model_onnx = convert_sklearn(model, "scaler",
- [("input", FloatTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "scaler",
+ [("input", FloatTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
numpy.array(data, dtype=numpy.float32),
- model, basename="SklearnRobustScalerNoScalingFloat32")
+ model,
+ basename="SklearnRobustScalerNoScalingFloat32",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_robust_scaler_floats_no_centering_scaling(self):
@@ -280,14 +356,18 @@ def test_robust_scaler_floats_no_centering_scaling(self):
[1.0, 0.0, 2.0],
]
model.fit(data)
- model_onnx = convert_sklearn(model, "scaler",
- [("input", FloatTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "scaler",
+ [("input", FloatTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
numpy.array(data, dtype=numpy.float32),
model,
- basename="SklearnRobustScalerNoCenteringScalingFloat32")
+ basename="SklearnRobustScalerNoCenteringScalingFloat32",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_min_max_scaler(self):
@@ -299,13 +379,18 @@ def test_min_max_scaler(self):
[1.0, 0.0, 2.0],
]
model.fit(data)
- model_onnx = convert_sklearn(model, "scaler",
- [("input", FloatTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "scaler",
+ [("input", FloatTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
numpy.array(data, dtype=numpy.float32),
- model, basename="SklearnMinMaxScaler")
+ model,
+ basename="SklearnMinMaxScaler",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_min_max_scaler_double(self):
@@ -317,18 +402,26 @@ def test_min_max_scaler_double(self):
[1.0, 0.0, 2.0],
]
model.fit(data)
- model_onnx = convert_sklearn(model, "scaler",
- [("input", DoubleTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "scaler",
+ [("input", DoubleTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
numpy.array(data, dtype=numpy.float64),
- model, model_onnx, basename="SklearnMinMaxScalerDouble")
+ model,
+ model_onnx,
+ basename="SklearnMinMaxScalerDouble",
+ )
@ignore_warnings(category=DeprecationWarning)
@unittest.skipIf(TARGET_OPSET < 15, reason="old signature for clip")
- @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.9.0"),
- reason="Operator clip not fully implemented")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.9.0"),
+ reason="Operator clip not fully implemented",
+ )
def test_min_max_scaler_clip(self):
model = MinMaxScaler(clip=True)
data = [
@@ -338,21 +431,29 @@ def test_min_max_scaler_clip(self):
[1.0, 0.0, 2.0],
]
model.fit(data)
- model_onnx = convert_sklearn(model, "scaler",
- [("input", FloatTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "scaler",
+ [("input", FloatTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
data[0][0] = 1e6
data[0][1] = 5
data[0][2] = -1.0
dump_data_and_model(
numpy.array(data, dtype=numpy.float32),
- model, model_onnx, basename="SklearnMinMaxScalerClip")
+ model,
+ model_onnx,
+ basename="SklearnMinMaxScalerClip",
+ )
@ignore_warnings(category=DeprecationWarning)
@unittest.skipIf(TARGET_OPSET < 15, reason="old signature for clip")
- @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.9.0"),
- reason="Operator clip not fully implemented")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.9.0"),
+ reason="Operator clip not fully implemented",
+ )
def test_min_max_scaler_double_clip(self):
model = MinMaxScaler(clip=True)
data = [
@@ -362,16 +463,22 @@ def test_min_max_scaler_double_clip(self):
[1.0, 0.0, 2.0],
]
model.fit(data)
- model_onnx = convert_sklearn(model, "scaler",
- [("input", DoubleTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "scaler",
+ [("input", DoubleTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
data[0][0] = 1e6
data[0][1] = 5
data[0][2] = -1.0
dump_data_and_model(
numpy.array(data, dtype=numpy.float64),
- model, model_onnx, basename="SklearnMinMaxScalerDouble")
+ model,
+ model_onnx,
+ basename="SklearnMinMaxScalerDouble",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_max_abs_scaler(self):
@@ -383,13 +490,18 @@ def test_max_abs_scaler(self):
[1.0, 0.0, 2.0],
]
model.fit(data)
- model_onnx = convert_sklearn(model, "scaler",
- [("input", FloatTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "scaler",
+ [("input", FloatTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
numpy.array(data, dtype=numpy.float32),
- model, basename="SklearnMaxAbsScaler")
+ model,
+ basename="SklearnMaxAbsScaler",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_max_abs_scaler_double(self):
@@ -401,13 +513,19 @@ def test_max_abs_scaler_double(self):
[1.0, 0.0, 2.0],
]
model.fit(data)
- model_onnx = convert_sklearn(model, "scaler",
- [("input", DoubleTensorType([None, 3]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "scaler",
+ [("input", DoubleTensorType([None, 3]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
numpy.array(data, dtype=numpy.float64),
- model, model_onnx, basename="SklearnMaxAbsScalerDouble")
+ model,
+ model_onnx,
+ basename="SklearnMaxAbsScalerDouble",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_sgd_classifier_converter.py b/tests/test_sklearn_sgd_classifier_converter.py
index 7db1d99d4..a9d291917 100644
--- a/tests/test_sklearn_sgd_classifier_converter.py
+++ b/tests/test_sklearn_sgd_classifier_converter.py
@@ -14,329 +14,465 @@
FloatTensorType,
Int64TensorType,
)
-from test_utils import (
- dump_data_and_model,
- fit_classification_model,
- TARGET_OPSET
-)
+from test_utils import dump_data_and_model, fit_classification_model, TARGET_OPSET
ort_version = ".".join(ort_version.split(".")[:2])
-LOG_LOSS = ("log_loss" if pv.Version(skl_version) >= pv.Version("1.1")
- else "log")
+LOG_LOSS = "log_loss" if pv.Version(skl_version) >= pv.Version("1.1") else "log"
class TestSGDClassifierConverter(unittest.TestCase):
-
def test_model_sgd_binary_class_hinge(self):
model, X = fit_classification_model(
- SGDClassifier(loss='hinge', random_state=42), 2)
+ SGDClassifier(loss="hinge", random_state=42), 2
+ )
model_onnx = convert_sklearn(
model,
"scikit-learn SGD binary classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnSGDClassifierBinaryHinge-Out0")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnSGDClassifierBinaryHinge-Out0",
+ )
def test_model_sgd_multi_class_hinge(self):
model, X = fit_classification_model(
- SGDClassifier(loss='hinge', random_state=42), 5)
+ SGDClassifier(loss="hinge", random_state=42), 5
+ )
model_onnx = convert_sklearn(
- model, "scikit-learn SGD multi-class classifier",
+ model,
+ "scikit-learn SGD multi-class classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnSGDClassifierMultiHinge-Out0")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnSGDClassifierMultiHinge-Out0",
+ )
def test_model_sgd_multi_class_hinge_string(self):
model, X = fit_classification_model(
- SGDClassifier(loss='hinge', random_state=42), 5, label_string=True)
+ SGDClassifier(loss="hinge", random_state=42), 5, label_string=True
+ )
model_onnx = convert_sklearn(
- model, "scikit-learn SGD multi-class classifier",
+ model,
+ "scikit-learn SGD multi-class classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnSGDClassifierMultiHinge-Out0")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnSGDClassifierMultiHinge-Out0",
+ )
- @unittest.skipIf(TARGET_OPSET < 13,
- reason="duplicated test")
+ @unittest.skipIf(TARGET_OPSET < 13, reason="duplicated test")
def test_model_sgd_binary_class_log_sigmoid(self):
model, X = fit_classification_model(
- SGDClassifier(loss=LOG_LOSS, random_state=42), 2, n_features=2)
+ SGDClassifier(loss=LOG_LOSS, random_state=42), 2, n_features=2
+ )
model_onnx = convert_sklearn(
- model, "scikit-learn SGD binary classifier",
+ model,
+ "scikit-learn SGD binary classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=10, options={'zipmap': False})
+ target_opset=10,
+ options={"zipmap": False},
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X.astype(np.float32)[:5], model, model_onnx,
+ X.astype(np.float32)[:5],
+ model,
+ model_onnx,
basename="SklearnSGDClassifierBinaryLog-Dec4",
- verbose=False)
+ verbose=False,
+ )
model_onnx = convert_sklearn(
- model, "scikit-learn SGD binary classifier",
+ model,
+ "scikit-learn SGD binary classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET, options={'zipmap': False})
+ target_opset=TARGET_OPSET,
+ options={"zipmap": False},
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X.astype(np.float32)[:5], model, model_onnx,
+ X.astype(np.float32)[:5],
+ model,
+ model_onnx,
basename="SklearnSGDClassifierBinaryLog13-Dec4",
- verbose=False)
+ verbose=False,
+ )
def test_model_sgd_binary_class_log(self):
model, X = fit_classification_model(
- SGDClassifier(loss=LOG_LOSS, random_state=42), 2)
+ SGDClassifier(loss=LOG_LOSS, random_state=42), 2
+ )
model_onnx = convert_sklearn(
- model, "scikit-learn SGD binary classifier",
+ model,
+ "scikit-learn SGD binary classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=min(TARGET_OPSET, 10))
+ target_opset=min(TARGET_OPSET, 10),
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnSGDClassifierBinaryLog-Dec4")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnSGDClassifierBinaryLog-Dec4",
+ )
def test_model_sgd_binary_class_log_decision_function(self):
model, X = fit_classification_model(
- SGDClassifier(loss=LOG_LOSS, random_state=42), 2)
- options = {id(model): {'raw_scores': True}}
+ SGDClassifier(loss=LOG_LOSS, random_state=42), 2
+ )
+ options = {id(model): {"raw_scores": True}}
model_onnx = convert_sklearn(
- model, "scikit-learn SGD binary classifier",
+ model,
+ "scikit-learn SGD binary classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
options=options,
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
+ X.astype(np.float32),
+ model,
+ model_onnx,
basename="SklearnSGDClassifierBinaryLogDecisionFunction-Dec3",
- methods=['predict', 'decision_function_binary'])
+ methods=["predict", "decision_function_binary"],
+ )
def test_model_sgd_multi_class_log(self):
model, X = fit_classification_model(
- SGDClassifier(loss=LOG_LOSS, random_state=42), 5)
+ SGDClassifier(loss=LOG_LOSS, random_state=42), 5
+ )
model_onnx = convert_sklearn(
- model, "scikit-learn SGD multi-class classifier",
+ model,
+ "scikit-learn SGD multi-class classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=min(12, TARGET_OPSET))
+ target_opset=min(12, TARGET_OPSET),
+ )
X = np.array([X[1], X[1]])
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnSGDClassifierMultiLog")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnSGDClassifierMultiLog",
+ )
@unittest.skipIf(TARGET_OPSET < 13, reason="duplicated test")
def test_model_sgd_multi_class_log_sigmoid(self):
model, X = fit_classification_model(
- SGDClassifier(loss=LOG_LOSS, random_state=42), 5)
+ SGDClassifier(loss=LOG_LOSS, random_state=42), 5
+ )
model_onnx = convert_sklearn(
- model, "scikit-learn SGD multi-class classifier",
+ model,
+ "scikit-learn SGD multi-class classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET, options={'zipmap': False})
+ target_opset=TARGET_OPSET,
+ options={"zipmap": False},
+ )
X = np.array([X[1], X[1]])
dump_data_and_model(
- X.astype(np.float32), model, model_onnx, verbose=False,
- basename="SklearnSGDClassifierMultiLog13")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ verbose=False,
+ basename="SklearnSGDClassifierMultiLog13",
+ )
def test_model_sgd_multi_class_log_decision_function(self):
model, X = fit_classification_model(
- SGDClassifier(loss=LOG_LOSS, random_state=42), 3)
- options = {id(model): {'raw_scores': True}}
+ SGDClassifier(loss=LOG_LOSS, random_state=42), 3
+ )
+ options = {id(model): {"raw_scores": True}}
model_onnx = convert_sklearn(
- model, "scikit-learn SGD multi-class classifier",
+ model,
+ "scikit-learn SGD multi-class classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- options=options, target_opset=TARGET_OPSET)
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
+ X.astype(np.float32),
+ model,
+ model_onnx,
basename="SklearnSGDClassifierMultiLogDecisionFunction-Dec3",
- methods=['predict', 'decision_function'])
+ methods=["predict", "decision_function"],
+ )
def test_model_sgd_binary_class_log_l1_no_intercept(self):
model, X = fit_classification_model(
- SGDClassifier(loss=LOG_LOSS, penalty='l1', fit_intercept=False,
- random_state=42), 2)
+ SGDClassifier(
+ loss=LOG_LOSS, penalty="l1", fit_intercept=False, random_state=42
+ ),
+ 2,
+ )
model_onnx = convert_sklearn(
- model, "scikit-learn SGD binary classifier",
+ model,
+ "scikit-learn SGD binary classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnSGDClassifierBinaryLogL1NoIntercept-Dec4")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnSGDClassifierBinaryLogL1NoIntercept-Dec4",
+ )
- @unittest.skipIf(pv.Version(ort_version) <= pv.Version("1.0.0"),
- reason="discrepencies")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("1.0.0"), reason="discrepencies"
+ )
def test_model_sgd_multi_class_log_l1_no_intercept(self):
model, X = fit_classification_model(
- SGDClassifier(loss=LOG_LOSS, penalty='l1', fit_intercept=False,
- random_state=43), 3, n_features=7)
+ SGDClassifier(
+ loss=LOG_LOSS, penalty="l1", fit_intercept=False, random_state=43
+ ),
+ 3,
+ n_features=7,
+ )
X = np.array([X[4], X[4]])
model_onnx = convert_sklearn(
- model, "scikit-learn SGD multi-class classifier",
+ model,
+ "scikit-learn SGD multi-class classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(np.float32),
- model, model_onnx, verbose=False,
- basename="SklearnSGDClassifierMultiLogL1NoIntercept-Dec4")
+ model,
+ model_onnx,
+ verbose=False,
+ basename="SklearnSGDClassifierMultiLogL1NoIntercept-Dec4",
+ )
def test_model_sgd_binary_class_elasticnet_power_t(self):
model, X = fit_classification_model(
- SGDClassifier(penalty='elasticnet', l1_ratio=0.3,
- power_t=2, random_state=42), 2)
+ SGDClassifier(
+ penalty="elasticnet", l1_ratio=0.3, power_t=2, random_state=42
+ ),
+ 2,
+ )
model_onnx = convert_sklearn(
- model, "scikit-learn SGD binary classifier",
+ model,
+ "scikit-learn SGD binary classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnSGDClassifierBinaryElasticnetPowerT-Out0")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnSGDClassifierBinaryElasticnetPowerT-Out0",
+ )
def test_model_sgd_multi_class_elasticnet_power_t(self):
model, X = fit_classification_model(
- SGDClassifier(penalty='elasticnet', l1_ratio=0.3,
- power_t=2, random_state=42), 5)
+ SGDClassifier(
+ penalty="elasticnet", l1_ratio=0.3, power_t=2, random_state=42
+ ),
+ 5,
+ )
model_onnx = convert_sklearn(
- model, "scikit-learn SGD multi-class classifier",
+ model,
+ "scikit-learn SGD multi-class classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnSGDClassifierMultiElasticnetPowerT-Out0")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnSGDClassifierMultiElasticnetPowerT-Out0",
+ )
def test_model_sgd_binary_class_squared_hinge(self):
model, X = fit_classification_model(
- SGDClassifier(loss='squared_hinge', random_state=42), 2)
+ SGDClassifier(loss="squared_hinge", random_state=42), 2
+ )
model_onnx = convert_sklearn(
- model, "scikit-learn SGD binary classifier",
+ model,
+ "scikit-learn SGD binary classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnSGDClassifierBinarySquaredHinge-Out0")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnSGDClassifierBinarySquaredHinge-Out0",
+ )
def test_model_sgd_multi_class_squared_hinge(self):
model, X = fit_classification_model(
- SGDClassifier(loss='squared_hinge', random_state=42), 5)
+ SGDClassifier(loss="squared_hinge", random_state=42), 5
+ )
model_onnx = convert_sklearn(
- model, "scikit-learn SGD multi-class classifier",
+ model,
+ "scikit-learn SGD multi-class classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnSGDClassifierMultiSquaredHinge-Out0")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnSGDClassifierMultiSquaredHinge-Out0",
+ )
def test_model_sgd_binary_class_perceptron(self):
model, X = fit_classification_model(
- SGDClassifier(loss='perceptron', random_state=42), 2)
+ SGDClassifier(loss="perceptron", random_state=42), 2
+ )
model_onnx = convert_sklearn(
- model, "scikit-learn SGD binary classifier",
+ model,
+ "scikit-learn SGD binary classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnSGDClassifierBinaryPerceptron-Out0")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnSGDClassifierBinaryPerceptron-Out0",
+ )
def test_model_sgd_multi_class_perceptron(self):
model, X = fit_classification_model(
- SGDClassifier(loss='perceptron', random_state=42), 5)
+ SGDClassifier(loss="perceptron", random_state=42), 5
+ )
model_onnx = convert_sklearn(
- model, "scikit-learn SGD multi-class classifier",
+ model,
+ "scikit-learn SGD multi-class classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X.astype(np.float32), model, model_onnx,
- basename="SklearnSGDClassifierMultiPerceptron-Out0")
+ X.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnSGDClassifierMultiPerceptron-Out0",
+ )
def test_model_sgd_binary_class_hinge_int(self):
model, X = fit_classification_model(
- SGDClassifier(loss='hinge', random_state=42), 2, is_int=True)
+ SGDClassifier(loss="hinge", random_state=42), 2, is_int=True
+ )
model_onnx = convert_sklearn(
- model, "scikit-learn SGD binary classifier",
+ model,
+ "scikit-learn SGD binary classifier",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnSGDClassifierBinaryHingeInt-Out0")
+ X, model, model_onnx, basename="SklearnSGDClassifierBinaryHingeInt-Out0"
+ )
def test_model_sgd_multi_class_hinge_int(self):
model, X = fit_classification_model(
- SGDClassifier(loss='hinge', random_state=42), 5, is_int=True)
+ SGDClassifier(loss="hinge", random_state=42), 5, is_int=True
+ )
model_onnx = convert_sklearn(
- model, "scikit-learn SGD multi-class classifier",
+ model,
+ "scikit-learn SGD multi-class classifier",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnSGDClassifierMultiHingeInt-Out0")
+ X, model, model_onnx, basename="SklearnSGDClassifierMultiHingeInt-Out0"
+ )
def test_model_sgd_binary_class_log_int(self):
model, X = fit_classification_model(
- SGDClassifier(loss=LOG_LOSS, random_state=42), 2, is_int=True)
+ SGDClassifier(loss=LOG_LOSS, random_state=42), 2, is_int=True
+ )
model_onnx = convert_sklearn(
- model, "scikit-learn SGD binary classifier",
+ model,
+ "scikit-learn SGD binary classifier",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnSGDClassifierBinaryLogInt")
+ X, model, model_onnx, basename="SklearnSGDClassifierBinaryLogInt"
+ )
def test_model_sgd_binary_class_log_bool(self):
model, X = fit_classification_model(
- SGDClassifier(loss=LOG_LOSS, random_state=42), 2, is_bool=True)
+ SGDClassifier(loss=LOG_LOSS, random_state=42), 2, is_bool=True
+ )
model_onnx = convert_sklearn(
- model, "scikit-learn SGD binary classifier",
+ model,
+ "scikit-learn SGD binary classifier",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnSGDClassifierBinaryLogBool")
+ X, model, model_onnx, basename="SklearnSGDClassifierBinaryLogBool"
+ )
def test_model_sgd_multi_class_log_int(self):
model, X = fit_classification_model(
- SGDClassifier(loss=LOG_LOSS, random_state=42), 5, is_int=True)
+ SGDClassifier(loss=LOG_LOSS, random_state=42), 5, is_int=True
+ )
model_onnx = convert_sklearn(
- model, "scikit-learn SGD multi-class classifier",
+ model,
+ "scikit-learn SGD multi-class classifier",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
X = X[6:8]
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnSGDClassifierMultiLogInt")
+ X, model, model_onnx, basename="SklearnSGDClassifierMultiLogInt"
+ )
def test_model_multi_class_nocl(self):
model, X = fit_classification_model(
- SGDClassifier(loss=LOG_LOSS, random_state=42),
- 2, label_string=True)
+ SGDClassifier(loss=LOG_LOSS, random_state=42), 2, label_string=True
+ )
model_onnx = convert_sklearn(
- model, "multi-class nocl",
+ model,
+ "multi-class nocl",
[("input", FloatTensorType([None, X.shape[1]]))],
- options={id(model): {'nocl': True}},
- target_opset=TARGET_OPSET)
+ options={id(model): {"nocl": True}},
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
sonx = str(model_onnx)
- assert 'classlabels_strings' not in sonx
- assert 'cl0' not in sonx
+ assert "classlabels_strings" not in sonx
+ assert "cl0" not in sonx
dump_data_and_model(
- X[6:8], model, model_onnx, classes=model.classes_,
- basename="SklearnSGDMultiNoCl", verbose=False)
+ X[6:8],
+ model,
+ model_onnx,
+ classes=model.classes_,
+ basename="SklearnSGDMultiNoCl",
+ verbose=False,
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_sgd_oneclass_svm_converter.py b/tests/test_sklearn_sgd_oneclass_svm_converter.py
index e4c69a35b..5837ae1da 100644
--- a/tests/test_sklearn_sgd_oneclass_svm_converter.py
+++ b/tests/test_sklearn_sgd_oneclass_svm_converter.py
@@ -4,6 +4,7 @@
import unittest
import numpy as np
+
try:
from sklearn.linear_model import SGDOneClassSVM
except ImportError:
@@ -15,21 +16,15 @@
FloatTensorType,
)
-from test_utils import (
- dump_data_and_model,
- TARGET_OPSET
-)
+from test_utils import dump_data_and_model, TARGET_OPSET
ort_version = ".".join(ort_version.split(".")[:2])
class TestSGDOneClassSVMConverter(unittest.TestCase):
- @unittest.skipIf(SGDOneClassSVM is None,
- reason="scikit-learn<1.0")
+ @unittest.skipIf(SGDOneClassSVM is None, reason="scikit-learn<1.0")
def test_model_sgd_oneclass_svm(self):
- X = np.array([
- [-1, -1], [-2, -1], [1, 1], [2, 1]
- ])
+ X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
model = SGDOneClassSVM(random_state=42)
model.fit(X)
test_x = np.array([[0, 0], [-1, -1], [1, 1]]).astype(np.float32)
@@ -39,11 +34,16 @@ def test_model_sgd_oneclass_svm(self):
model,
"scikit-learn SGD OneClass SVM",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(test_x.astype(np.float32), model, model_onnx,
- basename="SklearnSGDOneClassSVMBinaryHinge")
+ dump_data_and_model(
+ test_x.astype(np.float32),
+ model,
+ model_onnx,
+ basename="SklearnSGDOneClassSVMBinaryHinge",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_stacking.py b/tests/test_sklearn_stacking.py
index 734f856a4..531c1d702 100644
--- a/tests/test_sklearn_stacking.py
+++ b/tests/test_sklearn_stacking.py
@@ -14,8 +14,8 @@
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import OneHotEncoder, Normalizer
from sklearn.neighbors import KNeighborsClassifier
-from sklearn.ensemble import (
- RandomForestClassifier, GradientBoostingClassifier)
+from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
+
try:
from sklearn.ensemble import StackingRegressor, StackingClassifier
except ImportError:
@@ -27,262 +27,359 @@
except ImportError:
from sklearn.utils.testing import ignore_warnings
from skl2onnx import (
- convert_sklearn, to_onnx, update_registered_converter,
- get_model_alias)
+ convert_sklearn,
+ to_onnx,
+ update_registered_converter,
+ get_model_alias,
+)
from skl2onnx.common.data_types import FloatTensorType
from test_utils import (
- dump_data_and_model, fit_regression_model,
- fit_classification_model, TARGET_OPSET)
+ dump_data_and_model,
+ fit_regression_model,
+ fit_classification_model,
+ TARGET_OPSET,
+)
def model_to_test_reg(passthrough=False):
- estimators = [
- ('dt', DecisionTreeRegressor()),
- ('las', LinearRegression())]
+ estimators = [("dt", DecisionTreeRegressor()), ("las", LinearRegression())]
stacking_regressor = StackingRegressor(
- estimators=estimators, final_estimator=LinearRegression(),
- passthrough=passthrough)
+ estimators=estimators,
+ final_estimator=LinearRegression(),
+ passthrough=passthrough,
+ )
return stacking_regressor
def model_to_test_cl(passthrough=False):
- estimators = [
- ('dt', DecisionTreeClassifier()),
- ('las', LogisticRegression())]
+ estimators = [("dt", DecisionTreeClassifier()), ("las", LogisticRegression())]
stacking_regressor = StackingClassifier(
- estimators=estimators, final_estimator=LogisticRegression(),
- passthrough=passthrough)
+ estimators=estimators,
+ final_estimator=LogisticRegression(),
+ passthrough=passthrough,
+ )
return stacking_regressor
class TestStackingConverter(unittest.TestCase):
-
- @unittest.skipIf(StackingRegressor is None,
- reason="new in 0.22")
+ @unittest.skipIf(StackingRegressor is None, reason="new in 0.22")
@ignore_warnings(category=FutureWarning)
def test_model_stacking_regression(self):
model, X = fit_regression_model(model_to_test_reg())
model_onnx = convert_sklearn(
- model, "stacking regressor",
+ model,
+ "stacking regressor",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
+ X,
+ model,
+ model_onnx,
basename="SklearnStackingRegressor-Dec4",
- comparable_outputs=[0])
+ comparable_outputs=[0],
+ )
- @unittest.skipIf(StackingRegressor is None,
- reason="new in 0.22")
+ @unittest.skipIf(StackingRegressor is None, reason="new in 0.22")
@ignore_warnings(category=FutureWarning)
def test_model_stacking_regression_passthrough(self):
- model, X = fit_regression_model(model_to_test_reg(passthrough=True),
- factor=0.1)
+ model, X = fit_regression_model(model_to_test_reg(passthrough=True), factor=0.1)
model_onnx = convert_sklearn(
- model, "stacking regressor",
+ model,
+ "stacking regressor",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
+ X,
+ model,
+ model_onnx,
basename="SklearnStackingRegressorPassthrough",
- comparable_outputs=[0])
+ comparable_outputs=[0],
+ )
- @unittest.skipIf(StackingClassifier is None,
- reason="new in 0.22")
+ @unittest.skipIf(StackingClassifier is None, reason="new in 0.22")
@ignore_warnings(category=FutureWarning)
def test_model_stacking_classifier(self):
- model, X = fit_classification_model(
- model_to_test_cl(), n_classes=2)
+ model, X = fit_classification_model(model_to_test_cl(), n_classes=2)
model_onnx = convert_sklearn(
- model, "stacking classifier",
+ model,
+ "stacking classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
+ X,
+ model,
+ model_onnx,
basename="SklearnStackingClassifier",
- comparable_outputs=[0])
+ comparable_outputs=[0],
+ )
- @unittest.skipIf(StackingClassifier is None,
- reason="new in 0.22")
+ @unittest.skipIf(StackingClassifier is None, reason="new in 0.22")
@ignore_warnings(category=FutureWarning)
def test_model_stacking_classifier_passthrough(self):
model, X = fit_classification_model(
- model_to_test_cl(passthrough=True), n_classes=2)
+ model_to_test_cl(passthrough=True), n_classes=2
+ )
model_onnx = convert_sklearn(
- model, "stacking classifier",
+ model,
+ "stacking classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
+ X,
+ model,
+ model_onnx,
basename="SklearnStackingClassifierPassthrough",
- comparable_outputs=[0])
+ comparable_outputs=[0],
+ )
- @unittest.skipIf(StackingClassifier is None,
- reason="new in 0.22")
+ @unittest.skipIf(StackingClassifier is None, reason="new in 0.22")
@ignore_warnings(category=FutureWarning)
def test_model_stacking_classifier_nozipmap(self):
- model, X = fit_classification_model(
- model_to_test_cl(), n_classes=2)
+ model, X = fit_classification_model(model_to_test_cl(), n_classes=2)
model_onnx = convert_sklearn(
- model, "stacking classifier",
+ model,
+ "stacking classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
- options={id(model): {'zipmap': False}})
+ options={id(model): {"zipmap": False}},
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
+ X,
+ model,
+ model_onnx,
basename="SklearnStackingClassifierNoZipMap",
- comparable_outputs=[0])
+ comparable_outputs=[0],
+ )
- @unittest.skipIf(StackingClassifier is None,
- reason="new in 0.22")
+ @unittest.skipIf(StackingClassifier is None, reason="new in 0.22")
@ignore_warnings(category=FutureWarning)
def test_model_stacking_classifier_nozipmap_passthrough(self):
model, X = fit_classification_model(
- model_to_test_cl(passthrough=True), n_classes=2)
+ model_to_test_cl(passthrough=True), n_classes=2
+ )
model_onnx = convert_sklearn(
- model, "stacking classifier",
+ model,
+ "stacking classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
- options={id(model): {'zipmap': False}})
+ options={id(model): {"zipmap": False}},
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
+ X,
+ model,
+ model_onnx,
basename="SklearnStackingClassifierNoZipMapPassthrough",
- comparable_outputs=[0])
+ comparable_outputs=[0],
+ )
- @unittest.skipIf(StackingClassifier is None,
- reason="new in 0.22")
+ @unittest.skipIf(StackingClassifier is None, reason="new in 0.22")
@ignore_warnings(category=FutureWarning)
def test_issue_786_exc(self):
pipeline = make_pipeline(
- OneHotEncoder(handle_unknown='ignore', sparse=False),
- StackingClassifier(estimators=[
- ("rf", RandomForestClassifier(n_estimators=10,
- random_state=42)),
- ("gb", GradientBoostingClassifier(n_estimators=10,
- random_state=42)),
- ("knn", KNeighborsClassifier(n_neighbors=2))
- ], final_estimator=LogisticRegression(), cv=2))
+ OneHotEncoder(handle_unknown="ignore", sparse=False),
+ StackingClassifier(
+ estimators=[
+ ("rf", RandomForestClassifier(n_estimators=10, random_state=42)),
+ (
+ "gb",
+ GradientBoostingClassifier(n_estimators=10, random_state=42),
+ ),
+ ("knn", KNeighborsClassifier(n_neighbors=2)),
+ ],
+ final_estimator=LogisticRegression(),
+ cv=2,
+ ),
+ )
X_train = pandas.DataFrame(
- dict(text=['A', 'B', 'A', 'B', 'AA', 'B',
- 'A', 'B', 'A', 'AA', 'B', 'B'],
- val=[0.5, 0.6, 0.7, 0.61, 0.51, 0.67,
- 0.51, 0.61, 0.71, 0.611, 0.511, 0.671]))
- X_train['val'] = X_train.val.astype(numpy.float32)
- y_train = numpy.array([0, 1, 0, 1, 0, 1,
- 0, 1, 0, 1, 0, 1])
+ dict(
+ text=["A", "B", "A", "B", "AA", "B", "A", "B", "A", "AA", "B", "B"],
+ val=[
+ 0.5,
+ 0.6,
+ 0.7,
+ 0.61,
+ 0.51,
+ 0.67,
+ 0.51,
+ 0.61,
+ 0.71,
+ 0.611,
+ 0.511,
+ 0.671,
+ ],
+ )
+ )
+ X_train["val"] = X_train.val.astype(numpy.float32)
+ y_train = numpy.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
pipeline.fit(X_train, y_train)
with self.assertRaises(RuntimeError):
to_onnx(pipeline, X=X_train[:1], target_opset=TARGET_OPSET)
- @unittest.skipIf(StackingClassifier is None,
- reason="new in 0.22")
+ @unittest.skipIf(StackingClassifier is None, reason="new in 0.22")
@ignore_warnings(category=FutureWarning)
def test_issue_786(self):
pipeline = make_pipeline(
- OneHotEncoder(handle_unknown='ignore', sparse=False),
- StackingClassifier(estimators=[
- ("rf", RandomForestClassifier(n_estimators=10,
- random_state=42)),
- ("gb", GradientBoostingClassifier(n_estimators=10,
- random_state=42)),
- ("knn", KNeighborsClassifier(n_neighbors=2))
- ], final_estimator=LogisticRegression(), cv=2))
+ OneHotEncoder(handle_unknown="ignore", sparse=False),
+ StackingClassifier(
+ estimators=[
+ ("rf", RandomForestClassifier(n_estimators=10, random_state=42)),
+ (
+ "gb",
+ GradientBoostingClassifier(n_estimators=10, random_state=42),
+ ),
+ ("knn", KNeighborsClassifier(n_neighbors=2)),
+ ],
+ final_estimator=LogisticRegression(),
+ cv=2,
+ ),
+ )
X_train = pandas.DataFrame(
- dict(text=['A', 'B', 'A', 'B', 'AA', 'B',
- 'A', 'B', 'A', 'AA', 'B', 'B'],
- val=[0.5, 0.6, 0.7, 0.61, 0.51, 0.67,
- 0.51, 0.61, 0.71, 0.611, 0.511, 0.671]))
- X_train['val'] = (X_train.val * 1000).astype(numpy.float32)
- y_train = numpy.array([0, 1, 0, 1, 0, 1,
- 0, 1, 0, 1, 0, 1])
+ dict(
+ text=["A", "B", "A", "B", "AA", "B", "A", "B", "A", "AA", "B", "B"],
+ val=[
+ 0.5,
+ 0.6,
+ 0.7,
+ 0.61,
+ 0.51,
+ 0.67,
+ 0.51,
+ 0.61,
+ 0.71,
+ 0.611,
+ 0.511,
+ 0.671,
+ ],
+ )
+ )
+ X_train["val"] = (X_train.val * 1000).astype(numpy.float32)
+ y_train = numpy.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
pipeline.fit(X_train, y_train)
- onx = to_onnx(pipeline, X=X_train[:1],
- options={'zipmap': False},
- target_opset=TARGET_OPSET)
+ onx = to_onnx(
+ pipeline,
+ X=X_train[:1],
+ options={"zipmap": False},
+ target_opset=TARGET_OPSET,
+ )
# with open("ohe_debug.onnx", "wb") as f:
# f.write(onx.SerializeToString())
sess = InferenceSession(
- onx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'text': X_train.text.values.reshape((-1, 1)),
- 'val': X_train.val.values.reshape((-1, 1))})
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(
+ None,
+ {
+ "text": X_train.text.values.reshape((-1, 1)),
+ "val": X_train.val.values.reshape((-1, 1)),
+ },
+ )
assert_almost_equal(pipeline.predict(X_train), res[0])
assert_almost_equal(pipeline.predict_proba(X_train), res[1])
- @unittest.skipIf(StackingClassifier is None,
- reason="new in 0.22")
+ @unittest.skipIf(StackingClassifier is None, reason="new in 0.22")
@ignore_warnings(category=FutureWarning)
def test_model_stacking_classifier_column_transformer(self):
classifiers = {
- 'A': RandomForestClassifier(n_estimators=5, random_state=42),
- 'B': GradientBoostingClassifier(n_estimators=5, random_state=42)
+ "A": RandomForestClassifier(n_estimators=5, random_state=42),
+ "B": GradientBoostingClassifier(n_estimators=5, random_state=42),
}
- model_to_test = Pipeline(steps=[
- ('cbe', ColumnTransformer([
- ("norm1", Normalizer(norm='l1'), [0, 1]),
- ("norm2", Normalizer(norm='l2'), [2, 3])])),
- ('sc', StackingClassifier(
- estimators=list(map(tuple, classifiers.items())),
- stack_method='predict_proba',
- passthrough=False
- ))
- ])
- model, X = fit_classification_model(
- model_to_test, n_classes=2)
+ model_to_test = Pipeline(
+ steps=[
+ (
+ "cbe",
+ ColumnTransformer(
+ [
+ ("norm1", Normalizer(norm="l1"), [0, 1]),
+ ("norm2", Normalizer(norm="l2"), [2, 3]),
+ ]
+ ),
+ ),
+ (
+ "sc",
+ StackingClassifier(
+ estimators=list(map(tuple, classifiers.items())),
+ stack_method="predict_proba",
+ passthrough=False,
+ ),
+ ),
+ ]
+ )
+ model, X = fit_classification_model(model_to_test, n_classes=2)
model_onnx = convert_sklearn(
- model, "stacking classifier",
+ model,
+ "stacking classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
+ X,
+ model,
+ model_onnx,
basename="SklearnStackingClassifierPipe",
- comparable_outputs=[0])
+ comparable_outputs=[0],
+ )
- @unittest.skipIf(StackingClassifier is None,
- reason="new in 0.22")
+ @unittest.skipIf(StackingClassifier is None, reason="new in 0.22")
@ignore_warnings(category=FutureWarning)
def test_model_stacking_classifier_column_transformer_passthrough(self):
classifiers = {
- 'A': RandomForestClassifier(n_estimators=5, random_state=42),
- 'B': GradientBoostingClassifier(n_estimators=5, random_state=42)
+ "A": RandomForestClassifier(n_estimators=5, random_state=42),
+ "B": GradientBoostingClassifier(n_estimators=5, random_state=42),
}
- model_to_test = Pipeline(steps=[
- ('cbe', ColumnTransformer([
- ("norm1", Normalizer(norm='l1'), [0, 1]),
- ("norm2", Normalizer(norm='l2'), [2, 3])])),
- ('sc', StackingClassifier(
- estimators=list(map(tuple, classifiers.items())),
- stack_method='predict_proba',
- passthrough=True
- ))
- ])
- model, X = fit_classification_model(
- model_to_test, n_classes=2)
+ model_to_test = Pipeline(
+ steps=[
+ (
+ "cbe",
+ ColumnTransformer(
+ [
+ ("norm1", Normalizer(norm="l1"), [0, 1]),
+ ("norm2", Normalizer(norm="l2"), [2, 3]),
+ ]
+ ),
+ ),
+ (
+ "sc",
+ StackingClassifier(
+ estimators=list(map(tuple, classifiers.items())),
+ stack_method="predict_proba",
+ passthrough=True,
+ ),
+ ),
+ ]
+ )
+ model, X = fit_classification_model(model_to_test, n_classes=2)
model_onnx = convert_sklearn(
- model, "stacking classifier",
+ model,
+ "stacking classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
+ X,
+ model,
+ model_onnx,
basename="SklearnStackingClassifierPipePassthrough",
- comparable_outputs=[0])
+ comparable_outputs=[0],
+ )
- @unittest.skipIf(StackingClassifier is None,
- reason="new in 0.22")
+ @unittest.skipIf(StackingClassifier is None, reason="new in 0.22")
@ignore_warnings(category=FutureWarning)
def test_concat_stacking(self):
-
class CustomTransformer:
-
def fit(self, X, y=None):
return self
@@ -296,10 +393,10 @@ def parser(scope, model, inputs, custom_parsers=None):
alias = get_model_alias(type(model))
op = scope.declare_local_operator(alias, model)
op.inputs = inputs
- n_features = sum(
- list(map(lambda x: x.type.shape[1], op.inputs)))
+ n_features = sum(list(map(lambda x: x.type.shape[1], op.inputs)))
variable = scope.declare_local_variable(
- "c_outputs", FloatTensorType([None, n_features]))
+ "c_outputs", FloatTensorType([None, n_features])
+ )
op.outputs.append(variable)
return op.outputs
@@ -308,60 +405,70 @@ def converter(scope, operator, container):
for index in range(operator.inputs[0].type.shape[1]):
index_name = scope.get_unique_variable_name("ind%d" % index)
- container.add_initializer(
- index_name, TensorProto.INT64, [], [index])
- feature_column_name = scope.get_unique_variable_name(
- "fc%d" % index)
+ container.add_initializer(index_name, TensorProto.INT64, [], [index])
+ feature_column_name = scope.get_unique_variable_name("fc%d" % index)
container.add_node(
"ArrayFeatureExtractor",
[operator.inputs[0].full_name, index_name],
- feature_column_name, op_domain="ai.onnx.ml",
- name=scope.get_unique_operator_name("AFE%d" % index))
+ feature_column_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("AFE%d" % index),
+ )
output_cols.append(feature_column_name)
container.add_node(
- "Concat", output_cols,
+ "Concat",
+ output_cols,
operator.outputs[0].full_name,
name=scope.get_unique_operator_name("CUSTOMCONCAT"),
- axis=-1)
+ axis=-1,
+ )
update_registered_converter(
- CustomTransformer, "CustomTransformerUT",
- shape_calculator, converter, parser=parser, overwrite=True)
+ CustomTransformer,
+ "CustomTransformerUT",
+ shape_calculator,
+ converter,
+ parser=parser,
+ overwrite=True,
+ )
clf1 = RandomForestClassifier(n_estimators=5)
clf2 = RandomForestClassifier(n_estimators=5)
- classifiers = {'clf1': clf1, 'clf2': clf2}
+ classifiers = {"clf1": clf1, "clf2": clf2}
stacking_ensemble = StackingClassifier(
estimators=list(map(tuple, classifiers.items())),
- n_jobs=1, stack_method='predict_proba',
- passthrough=False)
+ n_jobs=1,
+ stack_method="predict_proba",
+ passthrough=False,
+ )
- pipe = Pipeline(steps=[
- ('ct', CustomTransformer()), ('sc', stacking_ensemble)])
+ pipe = Pipeline(steps=[("ct", CustomTransformer()), ("sc", stacking_ensemble)])
x = numpy.random.randn(20, 4).astype(numpy.float32)
y = numpy.random.randint(2, size=20).astype(numpy.int64)
pipe.fit(x, y)
input_types = [("X", FloatTensorType([None, x.shape[1]]))]
model_onnx = convert_sklearn(
- pipe, 'bug', input_types, target_opset=TARGET_OPSET,
- verbose=0, options={'zipmap': False})
+ pipe,
+ "bug",
+ input_types,
+ target_opset=TARGET_OPSET,
+ verbose=0,
+ options={"zipmap": False},
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': x})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": x})[0]
self.assertEqual(got.shape[0], x.shape[0])
- @unittest.skipIf(StackingClassifier is None,
- reason="new in 0.22")
+ @unittest.skipIf(StackingClassifier is None, reason="new in 0.22")
@ignore_warnings(category=FutureWarning)
def test_concat_stacking_passthrough(self):
-
class CustomTransformer:
-
def fit(self, X, y=None):
return self
@@ -375,10 +482,10 @@ def parser(scope, model, inputs, custom_parsers=None):
alias = get_model_alias(type(model))
op = scope.declare_local_operator(alias, model)
op.inputs = inputs
- n_features = sum(
- list(map(lambda x: x.type.shape[1], op.inputs)))
+ n_features = sum(list(map(lambda x: x.type.shape[1], op.inputs)))
variable = scope.declare_local_variable(
- "c_outputs", FloatTensorType([None, n_features]))
+ "c_outputs", FloatTensorType([None, n_features])
+ )
op.outputs.append(variable)
return op.outputs
@@ -387,51 +494,64 @@ def converter(scope, operator, container):
for index in range(operator.inputs[0].type.shape[1]):
index_name = scope.get_unique_variable_name("ind%d" % index)
- container.add_initializer(
- index_name, TensorProto.INT64, [], [index])
- feature_column_name = scope.get_unique_variable_name(
- "fc%d" % index)
+ container.add_initializer(index_name, TensorProto.INT64, [], [index])
+ feature_column_name = scope.get_unique_variable_name("fc%d" % index)
container.add_node(
"ArrayFeatureExtractor",
[operator.inputs[0].full_name, index_name],
- feature_column_name, op_domain="ai.onnx.ml",
- name=scope.get_unique_operator_name("AFE%d" % index))
+ feature_column_name,
+ op_domain="ai.onnx.ml",
+ name=scope.get_unique_operator_name("AFE%d" % index),
+ )
output_cols.append(feature_column_name)
container.add_node(
- "Concat", output_cols,
+ "Concat",
+ output_cols,
operator.outputs[0].full_name,
name=scope.get_unique_operator_name("CUSTOMCONCAT"),
- axis=-1)
+ axis=-1,
+ )
update_registered_converter(
- CustomTransformer, "CustomTransformerUT",
- shape_calculator, converter, parser=parser, overwrite=True)
+ CustomTransformer,
+ "CustomTransformerUT",
+ shape_calculator,
+ converter,
+ parser=parser,
+ overwrite=True,
+ )
clf1 = RandomForestClassifier(n_estimators=5)
clf2 = RandomForestClassifier(n_estimators=5)
- classifiers = {'clf1': clf1, 'clf2': clf2}
+ classifiers = {"clf1": clf1, "clf2": clf2}
stacking_ensemble = StackingClassifier(
estimators=list(map(tuple, classifiers.items())),
- n_jobs=1, stack_method='predict_proba',
- passthrough=True)
+ n_jobs=1,
+ stack_method="predict_proba",
+ passthrough=True,
+ )
- pipe = Pipeline(steps=[
- ('ct', CustomTransformer()), ('sc', stacking_ensemble)])
+ pipe = Pipeline(steps=[("ct", CustomTransformer()), ("sc", stacking_ensemble)])
x = numpy.random.randn(20, 4).astype(numpy.float32)
y = numpy.random.randint(2, size=20).astype(numpy.int64)
pipe.fit(x, y)
input_types = [("X", FloatTensorType([None, x.shape[1]]))]
model_onnx = convert_sklearn(
- pipe, 'bug', input_types, target_opset=TARGET_OPSET,
- verbose=0, options={'zipmap': False})
+ pipe,
+ "bug",
+ input_types,
+ target_opset=TARGET_OPSET,
+ verbose=0,
+ options={"zipmap": False},
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': x})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": x})[0]
self.assertEqual(got.shape[0], x.shape[0])
diff --git a/tests/test_sklearn_svm_converters.py b/tests/test_sklearn_svm_converters.py
index 0f0eb3fc8..da6db75eb 100644
--- a/tests/test_sklearn_svm_converters.py
+++ b/tests/test_sklearn_svm_converters.py
@@ -9,6 +9,7 @@
from numpy.testing import assert_almost_equal
from sklearn.datasets import load_iris
from sklearn.svm import SVC, SVR, NuSVC, NuSVR, OneClassSVM, LinearSVC
+
try:
from skl2onnx.common._apply_operation import apply_less
except ImportError:
@@ -23,15 +24,17 @@
from skl2onnx.operator_converters.ada_boost import _scikit_learn_before_022
from onnxruntime import __version__ as ort_version
from test_utils import (
- dump_data_and_model, fit_regression_model, TARGET_OPSET,
- InferenceSessionEx as InferenceSession)
+ dump_data_and_model,
+ fit_regression_model,
+ TARGET_OPSET,
+ InferenceSessionEx as InferenceSession,
+)
-ort_version = ort_version.split('+')[0]
+ort_version = ort_version.split("+")[0]
class TestSklearnSVM(unittest.TestCase):
-
def _fit_binary_classification(self, model):
iris = load_iris()
X = iris.data[:, :3]
@@ -86,12 +89,15 @@ def _check_attributes(self, node, attribute_test):
def test_convert_svc_binary_linear_pfalse(self):
model, X = self._fit_binary_classification(
- SVC(kernel="linear", probability=False,
- decision_function_shape='ovo'))
+ SVC(kernel="linear", probability=False, decision_function_shape="ovo")
+ )
model_onnx = convert_sklearn(
- model, "SVC", [("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "SVC",
+ [("input", FloatTensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
nodes = model_onnx.graph.node
self.assertIsNotNone(nodes)
svc_node = nodes[0]
@@ -108,26 +114,32 @@ def test_convert_svc_binary_linear_pfalse(self):
},
)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnBinSVCLinearPF-NoProbOpp")
+ X, model, model_onnx, basename="SklearnBinSVCLinearPF-NoProbOpp"
+ )
model_onnx = convert_sklearn(
- model, "SVC", [("input", FloatTensorType([None, X.shape[1]]))],
- options={id(model): {'zipmap': False}},
- target_opset=TARGET_OPSET)
+ model,
+ "SVC",
+ [("input", FloatTensorType([None, X.shape[1]]))],
+ options={id(model): {"zipmap": False}},
+ target_opset=TARGET_OPSET,
+ )
nodes = model_onnx.graph.node
self.assertIsNotNone(nodes)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnBinSVCLinearPF-NoProbOpp")
+ X, model, model_onnx, basename="SklearnBinSVCLinearPF-NoProbOpp"
+ )
def test_convert_svc_binary_linear_ptrue(self):
model, X = self._fit_binary_classification(
- SVC(kernel="linear", probability=True))
+ SVC(kernel="linear", probability=True)
+ )
model_onnx = convert_sklearn(
- model, "SVC", [("input",
- FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "SVC",
+ [("input", FloatTensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
nodes = model_onnx.graph.node
self.assertIsNotNone(nodes)
svc_node = nodes[0]
@@ -143,72 +155,89 @@ def test_convert_svc_binary_linear_ptrue(self):
"vectors_per_class": None,
},
)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnBinSVCLinearPT")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnBinSVCLinearPT")
def test_convert_svc_multi_linear_pfalse(self):
model, X = self._fit_multi_classification(
- SVC(kernel="linear", probability=False,
- decision_function_shape="ovo"))
+ SVC(kernel="linear", probability=False, decision_function_shape="ovo")
+ )
model_onnx = convert_sklearn(
- model, "SVC", [("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "SVC",
+ [("input", FloatTensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
nodes = model_onnx.graph.node
self.assertIsNotNone(nodes)
svc_node = nodes[0]
self._check_attributes(
- svc_node, {
- "coefficients": None, "kernel_params": None,
- "kernel_type": "LINEAR", "post_transform": None,
- "rho": None, "support_vectors": None,
- "vectors_per_class": None})
+ svc_node,
+ {
+ "coefficients": None,
+ "kernel_params": None,
+ "kernel_type": "LINEAR",
+ "post_transform": None,
+ "rho": None,
+ "support_vectors": None,
+ "vectors_per_class": None,
+ },
+ )
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnMclSVCLinearPF-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnMclSVCLinearPF-Dec4")
@unittest.skipIf(apply_less is None, reason="onnxconverter-common old")
def test_convert_svc_multi_linear_pfalse_ovr(self):
model, X = self._fit_multi_classification(
- SVC(kernel="linear", probability=False,
- decision_function_shape='ovr'))
+ SVC(kernel="linear", probability=False, decision_function_shape="ovr")
+ )
model_onnx = convert_sklearn(
- model, "SVC", [("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnMclSVCOVR-Dec4")
+ model,
+ "SVC",
+ [("input", FloatTensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
+ dump_data_and_model(X, model, model_onnx, basename="SklearnMclSVCOVR-Dec4")
def test_convert_svc_multi_linear_ptrue(self):
model, X = self._fit_multi_classification(
- SVC(kernel="linear", probability=True),
- nbclass=3)
+ SVC(kernel="linear", probability=True), nbclass=3
+ )
model_onnx = convert_sklearn(
- model, "SVC", [("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "SVC",
+ [("input", FloatTensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
nodes = model_onnx.graph.node
self.assertIsNotNone(nodes)
svc_node = nodes[0]
self._check_attributes(
- svc_node, {
- "coefficients": None, "kernel_params": None,
- "kernel_type": "LINEAR", "post_transform": None,
- "rho": None, "support_vectors": None,
- "vectors_per_class": None})
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnMclSVCLinearPT-Dec2")
+ svc_node,
+ {
+ "coefficients": None,
+ "kernel_params": None,
+ "kernel_type": "LINEAR",
+ "post_transform": None,
+ "rho": None,
+ "support_vectors": None,
+ "vectors_per_class": None,
+ },
+ )
+ dump_data_and_model(X, model, model_onnx, basename="SklearnMclSVCLinearPT-Dec2")
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version("0.4.0"),
- reason="use of recent Cast operator")
+ reason="use of recent Cast operator",
+ )
def test_convert_svr_linear(self):
model, X = self._fit_binary_classification(SVR(kernel="linear"))
model_onnx = convert_sklearn(
- model, "SVR", [("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "SVR",
+ [("input", FloatTensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
nodes = model_onnx.graph.node
self.assertIsNotNone(nodes)
self._check_attributes(
@@ -222,15 +251,18 @@ def test_convert_svr_linear(self):
"support_vectors": None,
},
)
- dump_data_and_model(X, model, model_onnx,
- basename="SklearnRegSVRLinear-Dec3")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnRegSVRLinear-Dec3")
def test_convert_nusvc_binary_pfalse(self):
model, X = self._fit_binary_classification(
- NuSVC(probability=False, decision_function_shape='ovo'))
+ NuSVC(probability=False, decision_function_shape="ovo")
+ )
model_onnx = convert_sklearn(
- model, "SVC", [("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "SVC",
+ [("input", FloatTensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
nodes = model_onnx.graph.node
self.assertIsNotNone(nodes)
svc_node = nodes[0]
@@ -247,17 +279,21 @@ def test_convert_nusvc_binary_pfalse(self):
},
)
dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnBinNuSVCPF-NoProbOpp")
+ X, model, model_onnx, basename="SklearnBinNuSVCPF-NoProbOpp"
+ )
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version("0.4.0"),
- reason="use of recent Cast operator")
+ reason="use of recent Cast operator",
+ )
def test_convert_nusvc_binary_ptrue(self):
model, X = self._fit_binary_classification(NuSVC(probability=True))
model_onnx = convert_sklearn(
- model, "SVC", [("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "SVC",
+ [("input", FloatTensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
nodes = model_onnx.graph.node
self.assertIsNotNone(nodes)
svc_node = nodes[0]
@@ -273,17 +309,18 @@ def test_convert_nusvc_binary_ptrue(self):
"vectors_per_class": None,
},
)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnBinNuSVCPT")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnBinNuSVCPT")
def test_convert_nusvc_multi_pfalse(self):
model, X = self._fit_multi_classification(
- NuSVC(probability=False, nu=0.1,
- decision_function_shape='ovo'))
+ NuSVC(probability=False, nu=0.1, decision_function_shape="ovo")
+ )
model_onnx = convert_sklearn(
- model, "SVC", [("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "SVC",
+ [("input", FloatTensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
nodes = model_onnx.graph.node
self.assertIsNotNone(nodes)
svc_node = nodes[0]
@@ -299,55 +336,64 @@ def test_convert_nusvc_multi_pfalse(self):
"vectors_per_class": None,
},
)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnMclNuSVCPF-Dec1")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnMclNuSVCPF-Dec1")
def test_convert_svc_multi_pfalse_4(self):
model, X = self._fit_multi_classification(
- SVC(probability=False,
- decision_function_shape='ovo'), 4)
+ SVC(probability=False, decision_function_shape="ovo"), 4
+ )
model_onnx = convert_sklearn(
- model, "SVC", [("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "SVC",
+ [("input", FloatTensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
nodes = model_onnx.graph.node
self.assertIsNotNone(nodes)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnMcSVCPF")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnMcSVCPF")
- @unittest.skipIf(_scikit_learn_before_022(),
- reason="break_ties introduced after 0.22")
+ @unittest.skipIf(
+ _scikit_learn_before_022(), reason="break_ties introduced after 0.22"
+ )
def test_convert_svc_multi_pfalse_4_break_ties(self):
model, X = self._fit_multi_classification(
- SVC(probability=True, break_ties=True), 4)
+ SVC(probability=True, break_ties=True), 4
+ )
model_onnx = convert_sklearn(
- model, "unused", [("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "unused",
+ [("input", FloatTensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
nodes = model_onnx.graph.node
self.assertIsNotNone(nodes)
dump_data_and_model(
X.astype(numpy.float32),
- model, model_onnx,
- basename="SklearnMcSVCPFBTF-Dec4")
+ model,
+ model_onnx,
+ basename="SklearnMcSVCPFBTF-Dec4",
+ )
def test_convert_svc_multi_ptrue_4(self):
model, X = self._fit_multi_classification(SVC(probability=True), 4)
model_onnx = convert_sklearn(
- model, "SVC", [("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "SVC",
+ [("input", FloatTensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
nodes = model_onnx.graph.node
self.assertIsNotNone(nodes)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnMcSVCPF4-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnMcSVCPF4-Dec4")
def test_convert_nusvc_multi_ptrue(self):
- model, X = self._fit_multi_classification(
- NuSVC(probability=True, nu=0.1))
+ model, X = self._fit_multi_classification(NuSVC(probability=True, nu=0.1))
model_onnx = convert_sklearn(
- model, "SVC", [("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "SVC",
+ [("input", FloatTensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
nodes = model_onnx.graph.node
self.assertIsNotNone(nodes)
svc_node = nodes[0]
@@ -363,18 +409,20 @@ def test_convert_nusvc_multi_ptrue(self):
"vectors_per_class": None,
},
)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnMclNuSVCPT-Dec3")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnMclNuSVCPT-Dec3")
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version("0.4.0"),
- reason="use of recent Cast operator")
+ reason="use of recent Cast operator",
+ )
def test_convert_nusvr(self):
model, X = self._fit_binary_classification(NuSVR())
model_onnx = convert_sklearn(
- model, "SVR", [("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "SVR",
+ [("input", FloatTensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
node = model_onnx.graph.node[0]
self.assertIsNotNone(node)
self._check_attributes(
@@ -388,89 +436,90 @@ def test_convert_nusvr(self):
"support_vectors": None,
},
)
- dump_data_and_model(X, model, model_onnx,
- basename="SklearnRegNuSVR")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnRegNuSVR")
@unittest.skipIf(
pv.Version(ort_version) <= pv.Version("0.4.0"),
- reason="use of recent Cast operator")
+ reason="use of recent Cast operator",
+ )
def test_convert_nusvr_default(self):
model, X = self._fit_binary_classification(NuSVR())
model_onnx = convert_sklearn(
- model, "SVR", [("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "SVR",
+ [("input", FloatTensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(X, model, model_onnx, basename="SklearnRegNuSVR2")
def test_convert_svr_int(self):
- model, X = fit_regression_model(
- SVR(), is_int=True)
+ model, X = fit_regression_model(SVR(), is_int=True)
model_onnx = convert_sklearn(
- model, "SVR",
+ model,
+ "SVR",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnSVRInt-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnSVRInt-Dec4")
def test_convert_nusvr_int(self):
- model, X = fit_regression_model(
- NuSVR(), is_int=True)
+ model, X = fit_regression_model(NuSVR(), is_int=True)
model_onnx = convert_sklearn(
- model, "NuSVR", [("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ model,
+ "NuSVR",
+ [("input", Int64TensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnNuSVRInt-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnNuSVRInt-Dec4")
def test_convert_svr_bool(self):
- model, X = fit_regression_model(
- SVR(), is_bool=True)
+ model, X = fit_regression_model(SVR(), is_bool=True)
model_onnx = convert_sklearn(
- model, "SVR",
+ model,
+ "SVR",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnSVRBool-Dec4")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnSVRBool-Dec4")
def test_convert_nusvr_bool(self):
- model, X = fit_regression_model(
- NuSVR(), is_bool=True)
+ model, X = fit_regression_model(NuSVR(), is_bool=True)
model_onnx = convert_sklearn(
- model, "NuSVR",
+ model,
+ "NuSVR",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnNuSVRBool")
+ dump_data_and_model(X, model, model_onnx, basename="SklearnNuSVRBool")
- @unittest.skipIf(
- TARGET_OPSET < 9,
- reason="operator sign available since opset 9")
+ @unittest.skipIf(TARGET_OPSET < 9, reason="operator sign available since opset 9")
def test_convert_oneclasssvm(self):
model, X = self._fit_one_class_svm(OneClassSVM())
model_onnx = convert_sklearn(
- model, "OCSVM", [("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
- dump_data_and_model(
- X, model, model_onnx,
- basename="SklearnBinOneClassSVM")
+ model,
+ "OCSVM",
+ [("input", FloatTensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
+ dump_data_and_model(X, model, model_onnx, basename="SklearnBinOneClassSVM")
def test_model_linear_svc_binary_class(self):
model, X = self._fit_binary_classification(LinearSVC(max_iter=10000))
model_onnx = convert_sklearn(
- model, "linear SVC",
+ model,
+ "linear SVC",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X})
label = model.predict(X)
proba = model.decision_function(X)
assert_almost_equal(proba, res[1].ravel(), decimal=5)
@@ -479,13 +528,15 @@ def test_model_linear_svc_binary_class(self):
def test_model_linear_svc_multi_class(self):
model, X = self._fit_multi_classification(LinearSVC(max_iter=10000))
model_onnx = convert_sklearn(
- model, "linear SVC",
+ model,
+ "linear SVC",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X})
label = model.predict(X)
proba = model.decision_function(X)
assert_almost_equal(proba, res[1], decimal=5)
@@ -494,13 +545,15 @@ def test_model_linear_svc_multi_class(self):
def test_model_svc_binary_class_false(self):
model, X = self._fit_binary_classification(SVC(max_iter=10000))
model_onnx = convert_sklearn(
- model, "linear SVC",
+ model,
+ "linear SVC",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X})
label = model.predict(X)
proba = model.decision_function(X)
assert_almost_equal(proba, res[1][:, 0], decimal=5)
@@ -510,13 +563,15 @@ def test_model_svc_binary_class_false(self):
def test_model_svc_multi_class_false(self):
model, X = self._fit_multi_classification(SVC(max_iter=10000))
model_onnx = convert_sklearn(
- model, "linear SVC",
+ model,
+ "linear SVC",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X})
label = model.predict(X)
proba = model.decision_function(X)
assert_almost_equal(proba, res[1], decimal=5)
@@ -524,31 +579,37 @@ def test_model_svc_multi_class_false(self):
def test_model_svc_binary_class_true(self):
model, X = self._fit_binary_classification(
- SVC(max_iter=10000, probability=True))
+ SVC(max_iter=10000, probability=True)
+ )
model_onnx = convert_sklearn(
- model, "linear SVC",
+ model,
+ "linear SVC",
[("input", FloatTensorType([None, X.shape[1]]))],
- options={'zipmap': False}, target_opset=TARGET_OPSET)
+ options={"zipmap": False},
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X})
label = model.predict(X)
proba = model.predict_proba(X)
assert_almost_equal(proba, res[1], decimal=5)
assert_almost_equal(label, res[0])
def test_model_svc_multi_class_true(self):
- model, X = self._fit_multi_classification(
- SVC(max_iter=10000, probability=True))
+ model, X = self._fit_multi_classification(SVC(max_iter=10000, probability=True))
model_onnx = convert_sklearn(
- model, "linear SVC",
+ model,
+ "linear SVC",
[("input", FloatTensorType([None, X.shape[1]]))],
- options={'zipmap': False}, target_opset=TARGET_OPSET)
+ options={"zipmap": False},
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X})
label = model.predict(X)
proba = model.predict_proba(X)
assert_almost_equal(proba, res[1], decimal=5)
@@ -557,13 +618,15 @@ def test_model_svc_multi_class_true(self):
def test_model_nusvc_binary_class_false(self):
model, X = self._fit_binary_classification(NuSVC(max_iter=10000))
model_onnx = convert_sklearn(
- model, "linear SVC",
+ model,
+ "linear SVC",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X})
label = model.predict(X)
proba = model.decision_function(X)
assert_almost_equal(proba, res[1][:, 0], decimal=5)
@@ -571,16 +634,17 @@ def test_model_nusvc_binary_class_false(self):
@unittest.skipIf(TARGET_OPSET < 12, reason="operator Less")
def test_model_nusvc_multi_class_false(self):
- model, X = self._fit_multi_classification(
- NuSVC(max_iter=10000, nu=0.1))
+ model, X = self._fit_multi_classification(NuSVC(max_iter=10000, nu=0.1))
model_onnx = convert_sklearn(
- model, "linear SVC",
+ model,
+ "linear SVC",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X})
label = model.predict(X)
proba = model.decision_function(X)
assert_almost_equal(proba, res[1], decimal=4)
@@ -588,15 +652,19 @@ def test_model_nusvc_multi_class_false(self):
def test_model_nusvc_binary_class_true(self):
model, X = self._fit_binary_classification(
- NuSVC(max_iter=10000, probability=True))
+ NuSVC(max_iter=10000, probability=True)
+ )
model_onnx = convert_sklearn(
- model, "linear SVC",
+ model,
+ "linear SVC",
[("input", FloatTensorType([None, X.shape[1]]))],
- options={'zipmap': False}, target_opset=TARGET_OPSET)
+ options={"zipmap": False},
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X})
label = model.predict(X)
proba = model.predict_proba(X)
assert_almost_equal(proba, res[1], decimal=5)
@@ -604,15 +672,19 @@ def test_model_nusvc_binary_class_true(self):
def test_model_nusvc_multi_class_true(self):
model, X = self._fit_multi_classification(
- NuSVC(max_iter=10000, probability=True, nu=0.1))
+ NuSVC(max_iter=10000, probability=True, nu=0.1)
+ )
model_onnx = convert_sklearn(
- model, "linear SVC",
+ model,
+ "linear SVC",
[("input", FloatTensorType([None, X.shape[1]]))],
- options={'zipmap': False}, target_opset=TARGET_OPSET)
+ options={"zipmap": False},
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X})
label = model.predict(X)
proba = model.predict_proba(X)
assert_almost_equal(proba, res[1], decimal=3)
diff --git a/tests/test_sklearn_text.py b/tests/test_sklearn_text.py
index ea1e68d99..ae8f33ea0 100644
--- a/tests/test_sklearn_text.py
+++ b/tests/test_sklearn_text.py
@@ -15,16 +15,16 @@
class TestSklearnText(unittest.TestCase):
-
def test_count_vectorizer(self):
-
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- "",
- ]).reshape((5, ))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ "",
+ ]
+ ).reshape((5,))
for ng in [(1, 1), (1, 2), (2, 2), (1, 3)]:
mod1 = CountVectorizer(ngram_range=ng)
@@ -42,22 +42,22 @@ def test_count_vectorizer(self):
self.assertIsInstance(k, tuple)
def test_count_vectorizer_regex(self):
-
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- "",
- ]).reshape((5, ))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ "",
+ ]
+ ).reshape((5,))
for pattern in ["[a-zA-Z ]{1,4}", "[a-zA-Z]{1,4}"]:
for ng in [(1, 1), (1, 2), (2, 2), (1, 3)]:
mod1 = CountVectorizer(ngram_range=ng, token_pattern=pattern)
mod1.fit(corpus)
- mod2 = TraceableCountVectorizer(ngram_range=ng,
- token_pattern=pattern)
+ mod2 = TraceableCountVectorizer(ngram_range=ng, token_pattern=pattern)
mod2.fit(corpus)
pred1 = mod1.transform(corpus)
@@ -72,19 +72,20 @@ def test_count_vectorizer_regex(self):
for k in voc:
self.assertIsInstance(k, tuple)
for i in k:
- if ' ' in i:
+ if " " in i:
spaces += 1
self.assertGreater(spaces, 1)
def test_tfidf_vectorizer(self):
-
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- "",
- ]).reshape((5, ))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ "",
+ ]
+ ).reshape((5,))
for ng in [(1, 1), (1, 2), (2, 2), (1, 3)]:
mod1 = TfidfVectorizer(ngram_range=ng)
@@ -102,26 +103,27 @@ def test_tfidf_vectorizer(self):
self.assertIsInstance(k, tuple)
def test_tfidf_vectorizer_english(self):
-
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- "",
- ]).reshape((5, ))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ "",
+ ]
+ ).reshape((5,))
for ng in [(1, 1), (1, 2), (2, 2), (1, 3)]:
with self.subTest(ngram_range=ng):
mod1 = TfidfVectorizer(ngram_range=ng, stop_words="english")
mod1.fit(corpus)
- mod2 = TraceableTfidfVectorizer(
- ngram_range=ng, stop_words="english")
+ mod2 = TraceableTfidfVectorizer(ngram_range=ng, stop_words="english")
mod2.fit(corpus)
if len(mod1.vocabulary_) != len(mod2.vocabulary_):
raise AssertionError(
- f"mod1={mod1.vocabulary_}, mod2={mod2.vocabulary_}")
+ f"mod1={mod1.vocabulary_}, mod2={mod2.vocabulary_}"
+ )
pred1 = mod1.transform(corpus)
pred2 = mod2.transform(corpus)
@@ -132,14 +134,15 @@ def test_tfidf_vectorizer_english(self):
self.assertIsInstance(k, tuple)
def test_count_vectorizer_english2(self):
-
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- "",
- ]).reshape((5, ))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ "",
+ ]
+ ).reshape((5,))
for ng in [(1, 1), (1, 2), (1, 3)]:
with self.subTest(ngram_range=ng):
@@ -149,7 +152,8 @@ def test_count_vectorizer_english2(self):
token_pattern="[\\w_]{2,}",
lowercase=True,
min_df=2,
- max_features=100000)
+ max_features=100000,
+ )
mod1.fit(corpus)
mod2 = TraceableCountVectorizer(
@@ -158,19 +162,21 @@ def test_count_vectorizer_english2(self):
token_pattern="[\\w_]{2,}",
lowercase=True,
min_df=2,
- max_features=100000)
+ max_features=100000,
+ )
mod2.fit(corpus)
if mod1.token_pattern != mod2.token_pattern:
raise AssertionError(
- f"{mod1.token_pattern!r} != {mod2.token_pattern!r}")
+ f"{mod1.token_pattern!r} != {mod2.token_pattern!r}"
+ )
if len(mod1.stop_words_) != len(mod2.stop_words_):
- raise AssertionError(
- f"{mod1.stop_words_} != {mod2.stop_words_}")
+ raise AssertionError(f"{mod1.stop_words_} != {mod2.stop_words_}")
if len(mod1.vocabulary_) != len(mod2.vocabulary_):
raise AssertionError(
f"skl_version={skl_version!r}, "
f"skl_file={skl_file!r},\n"
- f"mod1={mod1.vocabulary_}, mod2={mod2.vocabulary_}")
+ f"mod1={mod1.vocabulary_}, mod2={mod2.vocabulary_}"
+ )
pred1 = mod1.transform(corpus)
pred2 = mod2.transform(corpus)
@@ -181,14 +187,15 @@ def test_count_vectorizer_english2(self):
self.assertIsInstance(k, tuple)
def test_tfidf_vectorizer_english2(self):
-
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- "",
- ]).reshape((5, ))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ "",
+ ]
+ ).reshape((5,))
for ng in [(1, 1), (1, 2), (1, 3)]:
with self.subTest(ngram_range=ng):
@@ -198,7 +205,8 @@ def test_tfidf_vectorizer_english2(self):
token_pattern="[\\w_]{2,}",
lowercase=True,
min_df=2,
- max_features=100000)
+ max_features=100000,
+ )
mod1.fit(corpus)
mod2 = TraceableTfidfVectorizer(
@@ -207,19 +215,21 @@ def test_tfidf_vectorizer_english2(self):
token_pattern="[\\w_]{2,}",
lowercase=True,
min_df=2,
- max_features=100000)
+ max_features=100000,
+ )
mod2.fit(corpus)
if mod1.token_pattern != mod2.token_pattern:
raise AssertionError(
- f"{mod1.token_pattern!r} != {mod2.token_pattern!r}")
+ f"{mod1.token_pattern!r} != {mod2.token_pattern!r}"
+ )
if len(mod1.stop_words_) != len(mod2.stop_words_):
- raise AssertionError(
- f"{mod1.stop_words_} != {mod2.stop_words_}")
+ raise AssertionError(f"{mod1.stop_words_} != {mod2.stop_words_}")
if len(mod1.vocabulary_) != len(mod2.vocabulary_):
raise AssertionError(
f"skl_version={skl_version!r}, "
f"skl_file={skl_file!r},\n"
- f"mod1={mod1.vocabulary_}, mod2={mod2.vocabulary_}")
+ f"mod1={mod1.vocabulary_}, mod2={mod2.vocabulary_}"
+ )
pred1 = mod1.transform(corpus)
pred2 = mod2.transform(corpus)
@@ -230,33 +240,34 @@ def test_tfidf_vectorizer_english2(self):
self.assertIsInstance(k, tuple)
def test_tfidf_vectorizer_regex(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- "",
- ]).reshape((5, ))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ "",
+ ]
+ ).reshape((5,))
for pattern in ["[a-zA-Z ]{1,4}", "[a-zA-Z]{1,4}"]:
for ng in [(1, 1), (1, 2), (2, 2), (1, 3)]:
mod1 = TfidfVectorizer(ngram_range=ng, token_pattern=pattern)
mod1.fit(corpus)
- mod2 = TraceableTfidfVectorizer(ngram_range=ng,
- token_pattern=pattern)
+ mod2 = TraceableTfidfVectorizer(ngram_range=ng, token_pattern=pattern)
mod2.fit(corpus)
pred1 = mod1.transform(corpus)
pred2 = mod2.transform(corpus)
- if ' ]' in pattern:
+ if " ]" in pattern:
voc = mod2.vocabulary_
spaces = 0
for k in voc:
self.assertIsInstance(k, tuple)
for i in k:
- if ' ' in i:
+ if " " in i:
spaces += 1
self.assertGreater(spaces, 1)
assert_almost_equal(pred1.todense(), pred2.todense())
@@ -264,21 +275,29 @@ def test_tfidf_vectorizer_regex(self):
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer_issue(self):
register()
- corpus = numpy.array([
- 'the-first document.',
- 'this-is the-third-one.',
- 'this-the first-document?',
- ]).reshape((3, 1))
+ corpus = numpy.array(
+ [
+ "the-first document.",
+ "this-is the-third-one.",
+ "this-the first-document?",
+ ]
+ ).reshape((3, 1))
vect = TraceableTfidfVectorizer(
- ngram_range=(1, 2),
- token_pattern=r"\b[a-z ]+\b")
+ ngram_range=(1, 2), token_pattern=r"\b[a-z ]+\b"
+ )
vect.fit(corpus.ravel())
- model_onnx = to_onnx(vect, 'TfidfVectorizer',
- initial_types=[('input', StringTensorType([1]))],
- target_opset=TARGET_OPSET)
+ model_onnx = to_onnx(
+ vect,
+ "TfidfVectorizer",
+ initial_types=[("input", StringTensorType([1]))],
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizerIssue-OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizerIssue-OneOff-SklCol",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_tfidf_transformer_converter.py b/tests/test_sklearn_tfidf_transformer_converter.py
index 1dd3ba9c1..f848da48d 100644
--- a/tests/test_sklearn_tfidf_transformer_converter.py
+++ b/tests/test_sklearn_tfidf_transformer_converter.py
@@ -13,17 +13,19 @@
class TestSklearnTfidfTransformerConverter(unittest.TestCase):
-
def test_model_tfidf_transform(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- "Troisième document en français",
- ]).reshape((5, 1))
- data = (CountVectorizer(ngram_range=(1, 1)).fit_transform(
- corpus.ravel()).todense())
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ "Troisième document en français",
+ ]
+ ).reshape((5, 1))
+ data = (
+ CountVectorizer(ngram_range=(1, 1)).fit_transform(corpus.ravel()).todense()
+ )
data = numpy.array(data.astype(numpy.float32))
for sublinear_tf in (False, True):
@@ -44,9 +46,8 @@ def test_model_tfidf_transform(self):
model_onnx = convert_sklearn(
model,
"TfidfTransformer",
- [("input",
- FloatTensorType([None, data.shape[1]]))],
- target_opset=TARGET_OPSET
+ [("input", FloatTensorType([None, data.shape[1]]))],
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
suffix = norm.upper() if norm else ""
@@ -57,7 +58,8 @@ def test_model_tfidf_transform(self):
data,
model,
model_onnx,
- basename="SklearnTfidfTransform" + suffix)
+ basename="SklearnTfidfTransform" + suffix,
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_tfidf_transformer_converter_sparse.py b/tests/test_sklearn_tfidf_transformer_converter_sparse.py
index 3c0805f05..cb3353a7b 100644
--- a/tests/test_sklearn_tfidf_transformer_converter_sparse.py
+++ b/tests/test_sklearn_tfidf_transformer_converter_sparse.py
@@ -19,9 +19,12 @@ class TestSklearnTfidfVectorizerSparse(unittest.TestCase):
@unittest.skipIf(
TARGET_OPSET < 9,
# issue with encoding
- reason="https://github.com/onnx/onnx/pull/1734")
- @unittest.skipIf(pv.Version(ort.__version__) <= pv.Version("0.2.1"),
- reason="sparse not supported")
+ reason="https://github.com/onnx/onnx/pull/1734",
+ )
+ @unittest.skipIf(
+ pv.Version(ort.__version__) <= pv.Version("0.2.1"),
+ reason="sparse not supported",
+ )
def test_model_tfidf_transform_bug(self):
categories = [
"alt.atheism",
@@ -29,25 +32,26 @@ def test_model_tfidf_transform_bug(self):
"comp.graphics",
"sci.med",
]
- twenty_train = fetch_20newsgroups(subset="train",
- categories=categories,
- shuffle=True,
- random_state=0)
- text_clf = Pipeline([("vect", CountVectorizer()),
- ("tfidf", TfidfTransformer())])
+ twenty_train = fetch_20newsgroups(
+ subset="train", categories=categories, shuffle=True, random_state=0
+ )
+ text_clf = Pipeline(
+ [("vect", CountVectorizer()), ("tfidf", TfidfTransformer())]
+ )
twenty_train.data[0] = "bruît " + twenty_train.data[0]
text_clf.fit(twenty_train.data, twenty_train.target)
model_onnx = convert_sklearn(
text_clf,
name="DocClassifierCV-Tfidf",
initial_types=[("input", StringTensorType([5]))],
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
dump_data_and_model(
twenty_train.data[5:10],
text_clf,
model_onnx,
- basename="SklearnPipelineTfidfTransformer")
+ basename="SklearnPipelineTfidfTransformer",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_tfidf_vectorizer_converter.py b/tests/test_sklearn_tfidf_vectorizer_converter.py
index 09ff5ead6..69c2eb445 100644
--- a/tests/test_sklearn_tfidf_vectorizer_converter.py
+++ b/tests/test_sklearn_tfidf_vectorizer_converter.py
@@ -10,6 +10,7 @@
from numpy.testing import assert_almost_equal
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
+
try:
from sklearn.compose import ColumnTransformer
except ImportError:
@@ -24,314 +25,372 @@
from skl2onnx.common.data_types import StringTensorType, FloatTensorType
from onnxruntime import __version__ as ort_version
from test_utils import (
- dump_data_and_model, TARGET_OPSET,
- InferenceSessionEx as InferenceSession)
+ dump_data_and_model,
+ TARGET_OPSET,
+ InferenceSessionEx as InferenceSession,
+)
-ort_version = '.'.join(ort_version.split('.')[:2])
+ort_version = ".".join(ort_version.split(".")[:2])
class TestSklearnTfidfVectorizer(unittest.TestCase):
-
def get_options(self):
return {TfidfVectorizer: {"tokenexp": None}}
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
@unittest.skipIf(
- pv.Version(ort_version) <= pv.Version("0.3.0"),
- reason="Requires opset 9.")
+ pv.Version(ort_version) <= pv.Version("0.3.0"), reason="Requires opset 9."
+ )
def test_model_tfidf_vectorizer11(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(1, 1), norm=None)
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, "TfidfVectorizer",
- [("input", StringTensorType())],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType())],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus,
- vect,
- model_onnx,
- basename="SklearnTfidfVectorizer11-OneOff-SklCol")
+ corpus, vect, model_onnx, basename="SklearnTfidfVectorizer11-OneOff-SklCol"
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': corpus.ravel()})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": corpus.ravel()})[0]
assert res.shape == (4, 9)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
@unittest.skipIf(
- pv.Version(ort_version) <= pv.Version("0.3.0"),
- reason="Requires opset 9.")
+ pv.Version(ort_version) <= pv.Version("0.3.0"), reason="Requires opset 9."
+ )
def test_model_tfidf_vectorizer11_nolowercase(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(1, 1), norm=None, lowercase=False)
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, "TfidfVectorizer",
- [("input", StringTensorType())],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType())],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
corpus,
vect,
model_onnx,
- basename="SklearnTfidfVectorizer11NoL-OneOff-SklCol")
+ basename="SklearnTfidfVectorizer11NoL-OneOff-SklCol",
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': corpus.ravel()})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": corpus.ravel()})[0]
assert res.shape == (4, 11)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
- @unittest.skipIf(
- ColumnTransformer is None,
- reason="Requires newer scikit-learn")
+ @unittest.skipIf(ColumnTransformer is None, reason="Requires newer scikit-learn")
def test_model_tfidf_vectorizer11_compose(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
corpus = numpy.hstack([corpus, corpus])
y = numpy.array([0, 1, 0, 1])
- model = ColumnTransformer([
- ('a', TfidfVectorizer(), 0),
- ('b', TfidfVectorizer(), 1),
- ])
+ model = ColumnTransformer(
+ [
+ ("a", TfidfVectorizer(), 0),
+ ("b", TfidfVectorizer(), 1),
+ ]
+ )
model.fit(corpus, y)
- model_onnx = convert_sklearn(model, "TfIdfcomp",
- [("input", StringTensorType([4, 2]))],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ model,
+ "TfIdfcomp",
+ [("input", StringTensorType([4, 2]))],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': corpus})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": corpus})[0]
exp = model.transform(corpus)
assert_almost_equal(res, exp)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer11_empty_string_case1(self):
- corpus = numpy.array([
- 'This is the first document.',
- 'This document is the second document.',
- 'And this is the third one.',
- ' ',
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ " ",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(1, 1), norm=None)
vect.fit(corpus[:3].ravel())
- model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
# TfidfVectorizer in onnxruntime fails with empty strings,
# which was fixed in version 0.3.0 afterward
dump_data_and_model(
- corpus[2:], vect, model_onnx,
- basename="SklearnTfidfVectorizer11EmptyStringSepCase1-"
- "OneOff-SklCol")
+ corpus[2:],
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer11EmptyStringSepCase1-" "OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer11_empty_string_case2(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "",
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(1, 1), norm=None)
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, "TfidfVectorizer",
- [("input", StringTensorType([1]))],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
# onnxruntime fails with empty strings
dump_data_and_model(
corpus,
vect,
model_onnx,
- basename="SklearnTfidfVectorizer11EmptyString-OneOff-SklCol")
+ basename="SklearnTfidfVectorizer11EmptyString-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer11_out_vocabulary(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(1, 1), norm=None)
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, "TfidfVectorizer",
- [("input", StringTensorType([1]))],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- corpus = numpy.array([
- "AZZ ZZ This is the first document.",
- "BZZ ZZ This document is the second document.",
- "ZZZ ZZ And this is the third one.",
- "WZZ ZZ Is this the first document?",
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "AZZ ZZ This is the first document.",
+ "BZZ ZZ This document is the second document.",
+ "ZZZ ZZ And this is the third one.",
+ "WZZ ZZ Is this the first document?",
+ ]
+ ).reshape((4, 1))
dump_data_and_model(
corpus,
vect,
model_onnx,
- basename="SklearnTfidfVectorizer11OutVocab-OneOff-SklCol")
+ basename="SklearnTfidfVectorizer11OutVocab-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer22(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(2, 2), norm=None)
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, "TfidfVectorizer",
- [("input", StringTensorType([1]))],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus,
- vect,
- model_onnx,
- basename="SklearnTfidfVectorizer22-OneOff-SklCol")
+ corpus, vect, model_onnx, basename="SklearnTfidfVectorizer22-OneOff-SklCol"
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer21(self):
corpus = numpy.array(["AA AA", "AA AA BB"]).reshape((2, 1))
vect = TfidfVectorizer(ngram_range=(1, 2), norm=None)
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, "TfidfVectorizer",
- [("input", StringTensorType([1]))],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus,
- vect,
- model_onnx,
- basename="SklearnTfidfVectorizer22S-OneOff-SklCol")
+ corpus, vect, model_onnx, basename="SklearnTfidfVectorizer22S-OneOff-SklCol"
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer12(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(1, 2), norm=None)
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, "TfidfVectorizer",
- [("input", StringTensorType([1]))],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus,
- vect,
- model_onnx,
- basename="SklearnTfidfVectorizer22-OneOff-SklCol")
+ corpus, vect, model_onnx, basename="SklearnTfidfVectorizer22-OneOff-SklCol"
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer12_normL1(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(1, 2), norm="l1")
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, "TfidfVectorizer",
- [("input", StringTensorType([1]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
corpus,
vect,
model_onnx,
- basename="SklearnTfidfVectorizer22L1-OneOff-SklCol")
+ basename="SklearnTfidfVectorizer22L1-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer12_normL2(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(1, 2), norm="l2")
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, "TfidfVectorizer",
- [("input", StringTensorType([1]))],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
corpus,
vect,
model_onnx,
- basename="SklearnTfidfVectorizer22L2-OneOff-SklCol")
+ basename="SklearnTfidfVectorizer22L2-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer13(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(1, 3), norm=None)
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, "TfidfVectorizer",
- [("input", StringTensorType([1]))],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus,
- vect,
- model_onnx,
- basename="SklearnTfidfVectorizer13-OneOff-SklCol")
+ corpus, vect, model_onnx, basename="SklearnTfidfVectorizer13-OneOff-SklCol"
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer11parenthesis_class(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the (first) document?",
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the (first) document?",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(1, 1), norm=None)
vect.fit(corpus.ravel())
extra = {
TfidfVectorizer: {
- "separators": [
- " ", "\\.", "\\?", ",", ";", ":", "\\!", "\\(", "\\)"
- ]
+ "separators": [" ", "\\.", "\\?", ",", ";", ":", "\\!", "\\(", "\\)"]
}
}
model_onnx = convert_sklearn(
@@ -339,7 +398,7 @@ def test_model_tfidf_vectorizer11parenthesis_class(self):
"TfidfVectorizer",
[("input", StringTensorType([1]))],
options=extra,
- target_opset=TARGET_OPSET
+ target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
# This test depends on this issue:
@@ -348,105 +407,122 @@ def test_model_tfidf_vectorizer11parenthesis_class(self):
corpus,
vect,
model_onnx,
- basename="SklearnTfidfVectorizer11ParenthesisClass-OneOff-SklCol")
+ basename="SklearnTfidfVectorizer11ParenthesisClass-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer11_idparenthesis_id(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the (first) document?",
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the (first) document?",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(1, 1), norm=None)
vect.fit(corpus.ravel())
- extra = {
- id(vect): {"sep2": [" ", ".", "?", ",", ";", ":", "!", "(", ")"]}
- }
+ extra = {id(vect): {"sep2": [" ", ".", "?", ",", ";", ":", "!", "(", ")"]}}
try:
convert_sklearn(
vect,
"TfidfVectorizer",
[("input", StringTensorType([None, 1]))],
- options=extra, target_opset=TARGET_OPSET)
+ options=extra,
+ target_opset=TARGET_OPSET,
+ )
except (RuntimeError, NameError):
pass
extra = {
id(vect): {
- "separators": [
- " ", "[.]", "\\?", ",", ";", ":", "\\!", "\\(", "\\)"
- ]
+ "separators": [" ", "[.]", "\\?", ",", ";", ":", "\\!", "\\(", "\\)"]
}
}
model_onnx = convert_sklearn(
vect,
"TfidfVectorizer",
[("input", StringTensorType([1]))],
- options=extra, target_opset=TARGET_OPSET)
+ options=extra,
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizer11ParenthesisId-OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer11ParenthesisId-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer_binary(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(binary=True)
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, "TfidfVectorizer",
- [("input", StringTensorType([1]))],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
corpus,
vect,
model_onnx,
- basename="SklearnTfidfVectorizerBinary-OneOff-SklCol")
+ basename="SklearnTfidfVectorizerBinary-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
@unittest.skipIf(
- pv.Version(ort_version) <= pv.Version("0.3.0"),
- reason="Requires opset 9.")
+ pv.Version(ort_version) <= pv.Version("0.3.0"), reason="Requires opset 9."
+ )
def test_model_tfidf_vectorizer11_64(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(1, 1), norm=None)
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, "TfidfVectorizer",
- [("input", StringTensorType())],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType())],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
corpus,
vect,
model_onnx,
- basename="SklearnTfidfVectorizer1164-OneOff-SklCol")
+ basename="SklearnTfidfVectorizer1164-OneOff-SklCol",
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': corpus.ravel()})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": corpus.ravel()})[0]
assert res.shape == (4, 9)
- @unittest.skipIf(
- apply_less is None, reason="onnxconverter-common too old")
+ @unittest.skipIf(apply_less is None, reason="onnxconverter-common too old")
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
@unittest.skipIf(
- pv.Version(ort_version) < pv.Version("1.3.0"),
- reason="Requires opset 9.")
+ pv.Version(ort_version) < pv.Version("1.3.0"), reason="Requires opset 9."
+ )
def test_tfidf_svm(self):
data = [
["schedule a meeting", 0],
@@ -454,7 +530,7 @@ def test_tfidf_svm(self):
["slot in a meeting", 0],
["call ron", 1],
["make a phone call", 1],
- ["call in on the phone", 2]
+ ["call in on the phone", 2],
]
docs = [doc for (doc, _) in data]
labels = [label for (_, label) in data]
@@ -469,63 +545,75 @@ def test_tfidf_svm(self):
embeddings = embeddings.astype(numpy.float32).todense()
exp = clf.predict(embeddings)
- initial_type = [('input', FloatTensorType([None, dim]))]
- model_onnx = convert_sklearn(clf, initial_types=initial_type,
- target_opset=TARGET_OPSET)
+ initial_type = [("input", FloatTensorType([None, dim]))]
+ model_onnx = convert_sklearn(
+ clf, initial_types=initial_type, target_opset=TARGET_OPSET
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': embeddings})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": embeddings})[0]
assert_almost_equal(exp, res)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
@unittest.skipIf(
- pv.Version(ort_version) <= pv.Version("1.0.0"),
- reason="Requires opset 10.")
+ pv.Version(ort_version) <= pv.Version("1.0.0"), reason="Requires opset 10."
+ )
def test_model_tfidf_vectorizer_nan(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(1, 1), norm=None)
vect.fit(corpus.ravel())
options = copy.deepcopy(self.get_options())
- options[TfidfVectorizer]['nan'] = True
- model_onnx = convert_sklearn(vect, "TfidfVectorizer",
- [("input", StringTensorType())],
- options=options,
- target_opset=TARGET_OPSET)
+ options[TfidfVectorizer]["nan"] = True
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType())],
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': corpus.ravel()})[0]
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": corpus.ravel()})[0]
assert res.shape == (4, 9)
assert numpy.isnan(res[0, 0])
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
def test_model_tfidf_vectorizer11_custom_vocabulary(self):
- corpus = numpy.array([
- "This is the first document.",
- "This document is the second document.",
- "And this is the third one.",
- "Is this the first document?",
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vc = ["first", "second", "third", "document", "this"]
vect = TfidfVectorizer(ngram_range=(1, 1), norm=None, vocabulary=vc)
vect.fit(corpus.ravel())
self.assertFalse(hasattr(vect, "stop_words_"))
- model_onnx = convert_sklearn(vect, "TfidfVectorizer",
- [("input", StringTensorType())],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType())],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
corpus,
vect,
model_onnx,
- basename="SklearnTfidfVectorizer11CustomVocab-OneOff-SklCol")
+ basename="SklearnTfidfVectorizer11CustomVocab-OneOff-SklCol",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_tfidf_vectorizer_converter_char.py b/tests/test_sklearn_tfidf_vectorizer_converter_char.py
index 6b1149817..f0096b18e 100644
--- a/tests/test_sklearn_tfidf_vectorizer_converter_char.py
+++ b/tests/test_sklearn_tfidf_vectorizer_converter_char.py
@@ -12,128 +12,176 @@
class TestSklearnTfidfVectorizerRegex(unittest.TestCase):
-
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer11_short_word(self):
- corpus = numpy.array([
- 'This is the first document.',
- 'This document is the second document.',
- ]).reshape((2, 1))
- vect = TfidfVectorizer(ngram_range=(1, 1), norm=None,
- analyzer='word', token_pattern=".{1,2}")
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ ]
+ ).reshape((2, 1))
+ vect = TfidfVectorizer(
+ ngram_range=(1, 1), norm=None, analyzer="word", token_pattern=".{1,2}"
+ )
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizer11CharW2-OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer11CharW2-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer22_short_word(self):
- corpus = numpy.array([
- 'This is the first document.',
- 'This document is the second document.',
- ]).reshape((2, 1))
- vect = TfidfVectorizer(ngram_range=(1, 2), norm=None,
- analyzer='word', token_pattern=".{1,5}")
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ ]
+ ).reshape((2, 1))
+ vect = TfidfVectorizer(
+ ngram_range=(1, 2), norm=None, analyzer="word", token_pattern=".{1,5}"
+ )
vect.fit(corpus.ravel())
try:
- convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- target_opset=TARGET_OPSET)
+ convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ target_opset=TARGET_OPSET,
+ )
except RuntimeError as e:
assert ("Unable to split n-grams 'e fir st do'") in str(e)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer11_char(self):
- corpus = numpy.array([
- 'This is the first document.',
- 'This document is the second document.',
- ]).reshape((2, 1))
- vect = TfidfVectorizer(ngram_range=(1, 1), norm=None,
- analyzer='char')
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ ]
+ ).reshape((2, 1))
+ vect = TfidfVectorizer(ngram_range=(1, 1), norm=None, analyzer="char")
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizer11Char-OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer11Char-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
@unittest.skipIf(True, reason="expected failure")
def test_model_tfidf_vectorizer11_char_doublespace(self):
- corpus = numpy.array([
- 'This is the first document.',
- 'This document is the second document.',
- ]).reshape((2, 1))
- vect = TfidfVectorizer(ngram_range=(1, 1), norm=None,
- analyzer='char')
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ ]
+ ).reshape((2, 1))
+ vect = TfidfVectorizer(ngram_range=(1, 1), norm=None, analyzer="char")
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizer11CharSpace-OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer11CharSpace-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer12_char(self):
- corpus = numpy.array([
- 'This is the first document.',
- 'This document is the second document.',
- ]).reshape((2, 1))
- vect = TfidfVectorizer(ngram_range=(1, 2), norm=None,
- analyzer='char')
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ ]
+ ).reshape((2, 1))
+ vect = TfidfVectorizer(ngram_range=(1, 2), norm=None, analyzer="char")
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizer12Char-OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer12Char-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer12_normL1_char(self):
- corpus = numpy.array([
- 'This is the first document.',
- 'This document is the second document.',
- 'And this is the third one.',
- 'Is this the first document?',
- ]).reshape((4, 1))
- vect = TfidfVectorizer(ngram_range=(1, 2), norm='l1', analyzer='char')
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
+ vect = TfidfVectorizer(ngram_range=(1, 2), norm="l1", analyzer="char")
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizer12L1Char-OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer12L1Char-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer12_short_word_spaces(self):
- corpus = numpy.array([
- 'This is the first document.',
- 'This document is the second document.',
- ]).reshape((2, 1))
- vect = TfidfVectorizer(ngram_range=(1, 2), norm=None,
- analyzer='word', token_pattern=".{1,3}")
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ ]
+ ).reshape((2, 1))
+ vect = TfidfVectorizer(
+ ngram_range=(1, 2), norm=None, analyzer="word", token_pattern=".{1,3}"
+ )
vect.fit(corpus.ravel())
try:
model_onnx = convert_sklearn(
- vect, 'TfidfVectorizer',
- [('input', StringTensorType([None, 1]))],
- target_opset=TARGET_OPSET)
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([None, 1]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
except RuntimeError as e:
if "Unable to split n-grams 't i s t'" not in str(e):
@@ -141,21 +189,30 @@ def test_model_tfidf_vectorizer12_short_word_spaces(self):
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer11_short_word_spaces(self):
- corpus = numpy.array([
- 'This is the first document.',
- 'This document is the second document.',
- ]).reshape((2, 1))
- vect = TfidfVectorizer(ngram_range=(1, 1), norm=None,
- analyzer='word', token_pattern=".{1,3}")
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ ]
+ ).reshape((2, 1))
+ vect = TfidfVectorizer(
+ ngram_range=(1, 1), norm=None, analyzer="word", token_pattern=".{1,3}"
+ )
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizer11CharW2-OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer11CharW2-OneOff-SklCol",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py b/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py
index e4504cd20..9ed7306ae 100644
--- a/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py
+++ b/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py
@@ -14,36 +14,46 @@
class TestSklearnTfidfVectorizerDataSet(unittest.TestCase):
-
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
def test_tfidf_20newsgroups(self):
data = fetch_20newsgroups()
X, y = np.array(data.data)[:100], np.array(data.target)[:100]
X_train, X_test, y_train, y_test = train_test_split(
- X, y, test_size=0.5, random_state=42)
+ X, y, test_size=0.5, random_state=42
+ )
model = TfidfVectorizer().fit(X_train)
onnx_model = convert_sklearn(
- model, 'cv', [('input', StringTensorType(X_test.shape))],
- target_opset=TARGET_OPSET)
+ model,
+ "cv",
+ [("input", StringTensorType(X_test.shape))],
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
- X_test, model, onnx_model,
- basename="SklearnTfidfVectorizer20newsgroups")
+ X_test, model, onnx_model, basename="SklearnTfidfVectorizer20newsgroups"
+ )
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
def test_tfidf_20newsgroups_nolowercase(self):
data = fetch_20newsgroups()
X, y = np.array(data.data)[:100], np.array(data.target)[:100]
X_train, X_test, y_train, y_test = train_test_split(
- X, y, test_size=0.5, random_state=42)
+ X, y, test_size=0.5, random_state=42
+ )
model = TfidfVectorizer(lowercase=False).fit(X_train)
onnx_model = convert_sklearn(
- model, 'cv', [('input', StringTensorType(X_test.shape))],
- target_opset=TARGET_OPSET)
+ model,
+ "cv",
+ [("input", StringTensorType(X_test.shape))],
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
- X_test, model, onnx_model,
- basename="SklearnTfidfVectorizer20newsgroupsNOLower")
+ X_test,
+ model,
+ onnx_model,
+ basename="SklearnTfidfVectorizer20newsgroupsNOLower",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_tfidf_vectorizer_converter_pipeline.py b/tests/test_sklearn_tfidf_vectorizer_converter_pipeline.py
index 634bc23c7..5872d0f09 100644
--- a/tests/test_sklearn_tfidf_vectorizer_converter_pipeline.py
+++ b/tests/test_sklearn_tfidf_vectorizer_converter_pipeline.py
@@ -18,77 +18,123 @@
class TestSklearnTfidfVectorizerPipeline(unittest.TestCase):
-
- def common_test_model_tfidf_vectorizer_pipeline_cls(
- self, kind=None, verbose=False):
- if kind == 'stop':
- if pv.Version(ort_version) >= pv.Version('1.4.0'):
+ def common_test_model_tfidf_vectorizer_pipeline_cls(self, kind=None, verbose=False):
+ if kind == "stop":
+ if pv.Version(ort_version) >= pv.Version("1.4.0"):
# regression with stopwords in onnxruntime 1.4+
- stopwords = ['theh']
+ stopwords = ["theh"]
else:
- stopwords = ['the', 'and', 'is']
+ stopwords = ["the", "and", "is"]
else:
stopwords = None
- X_train = numpy.array([
- "This is the first document",
- "This document is the second document.",
- "And this is the third one",
- "Is this the first document?",
- ]).reshape((4, 1))
+ X_train = numpy.array(
+ [
+ "This is the first document",
+ "This document is the second document.",
+ "And this is the third one",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
y_train = numpy.array([0, 1, 0, 1])
if kind is None:
- model_pipeline = Pipeline([
- ('vectorizer', TfidfVectorizer(
- stop_words=stopwords, lowercase=True, use_idf=True,
- ngram_range=(1, 3), max_features=30000)),
- ])
- elif kind == 'cls':
- model_pipeline = Pipeline([
- ('vectorizer', TfidfVectorizer(
- stop_words=stopwords, lowercase=True, use_idf=True,
- ngram_range=(1, 3), max_features=30000)),
- ('feature_selector', SelectKBest(k=10)),
- ('classifier', SVC(
- class_weight='balanced', kernel='rbf', gamma='scale',
- probability=True))
- ])
- elif kind == 'stop':
- model_pipeline = Pipeline([
- ('vectorizer', CountVectorizer(
- stop_words=stopwords, lowercase=True,
- ngram_range=(1, 2), max_features=30000)),
- ])
- elif kind == 'reg':
- model_pipeline = Pipeline([
- ('vectorizer', TfidfVectorizer(
- stop_words=stopwords, lowercase=True, use_idf=True,
- ngram_range=(1, 3), max_features=30000)),
- ('feature_selector', SelectKBest(k=10)),
- ('classifier', SVR(kernel='rbf', gamma='scale'))
- ])
+ model_pipeline = Pipeline(
+ [
+ (
+ "vectorizer",
+ TfidfVectorizer(
+ stop_words=stopwords,
+ lowercase=True,
+ use_idf=True,
+ ngram_range=(1, 3),
+ max_features=30000,
+ ),
+ ),
+ ]
+ )
+ elif kind == "cls":
+ model_pipeline = Pipeline(
+ [
+ (
+ "vectorizer",
+ TfidfVectorizer(
+ stop_words=stopwords,
+ lowercase=True,
+ use_idf=True,
+ ngram_range=(1, 3),
+ max_features=30000,
+ ),
+ ),
+ ("feature_selector", SelectKBest(k=10)),
+ (
+ "classifier",
+ SVC(
+ class_weight="balanced",
+ kernel="rbf",
+ gamma="scale",
+ probability=True,
+ ),
+ ),
+ ]
+ )
+ elif kind == "stop":
+ model_pipeline = Pipeline(
+ [
+ (
+ "vectorizer",
+ CountVectorizer(
+ stop_words=stopwords,
+ lowercase=True,
+ ngram_range=(1, 2),
+ max_features=30000,
+ ),
+ ),
+ ]
+ )
+ elif kind == "reg":
+ model_pipeline = Pipeline(
+ [
+ (
+ "vectorizer",
+ TfidfVectorizer(
+ stop_words=stopwords,
+ lowercase=True,
+ use_idf=True,
+ ngram_range=(1, 3),
+ max_features=30000,
+ ),
+ ),
+ ("feature_selector", SelectKBest(k=10)),
+ ("classifier", SVR(kernel="rbf", gamma="scale")),
+ ]
+ )
else:
raise AssertionError(kind)
model_pipeline.fit(X_train.ravel(), y_train)
- initial_type = [('input', StringTensorType([None, 1]))]
+ initial_type = [("input", StringTensorType([None, 1]))]
model_onnx = convert_sklearn(
- model_pipeline, "cv", initial_types=initial_type,
- options={SVC: {'zipmap': False}},
- target_opset=TARGET_OPSET)
+ model_pipeline,
+ "cv",
+ initial_types=initial_type,
+ options={SVC: {"zipmap": False}},
+ target_opset=TARGET_OPSET,
+ )
- if kind in (None, 'stop'):
+ if kind in (None, "stop"):
exp = [model_pipeline.transform(X_train.ravel()).toarray()]
- elif kind == 'cls':
- exp = [model_pipeline.predict(X_train.ravel()),
- model_pipeline.predict_proba(X_train.ravel())]
- elif kind == 'reg':
+ elif kind == "cls":
+ exp = [
+ model_pipeline.predict(X_train.ravel()),
+ model_pipeline.predict_proba(X_train.ravel()),
+ ]
+ elif kind == "reg":
exp = [model_pipeline.predict(X_train.ravel()).reshape((-1, 1))]
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'input': X_train})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"input": X_train})
if verbose:
voc = model_pipeline.steps[0][-1].vocabulary_
voc = list(sorted([(v, k) for k, v in voc.items()]))
@@ -102,23 +148,23 @@ def common_test_model_tfidf_vectorizer_pipeline_cls(
assert_almost_equal(a, b)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
- @unittest.skipIf(
- pv.Version(ort_version) < pv.Version("1.0.0"),
- reason="Too old")
+ @unittest.skipIf(pv.Version(ort_version) < pv.Version("1.0.0"), reason="Too old")
def test_model_tfidf_vectorizer_pipeline(self):
- for kind in [None, 'cls', 'reg']:
+ for kind in [None, "cls", "reg"]:
with self.subTest(kind=kind):
self.common_test_model_tfidf_vectorizer_pipeline_cls(kind)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
@unittest.skipIf(
pv.Version(ort_version) < pv.Version("1.4.0"),
- reason="Wrong handling of stopwods and n-grams")
+ reason="Wrong handling of stopwods and n-grams",
+ )
def test_model_tfidf_vectorizer_pipeline_stop_words(self):
- for kind in ['stop']:
+ for kind in ["stop"]:
with self.subTest(kind=kind):
self.common_test_model_tfidf_vectorizer_pipeline_cls(
- kind, verbose=False)
+ kind, verbose=False
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_tfidf_vectorizer_converter_regex.py b/tests/test_sklearn_tfidf_vectorizer_converter_regex.py
index 4e383d492..df94705f4 100644
--- a/tests/test_sklearn_tfidf_vectorizer_converter_regex.py
+++ b/tests/test_sklearn_tfidf_vectorizer_converter_regex.py
@@ -12,45 +12,57 @@
class TestSklearnTfidfVectorizerRegex(unittest.TestCase):
-
def get_options(self):
return {TfidfVectorizer: {"tokenexp": ""}}
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer11(self):
- corpus = numpy.array([
- 'This is the first document.',
- 'This document is the second document.',
- 'And this is the third one.',
- 'Is this the first document?',
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(1, 1), norm=None)
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizer11Regex-OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer11Regex-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer11_opset(self):
- corpus = numpy.array([
- 'This is the first document.',
- 'This document is the second document.',
- 'And this is the third one.',
- 'Is this the first document?',
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(1, 1), norm=None)
vect.fit(corpus.ravel())
for opset in range(8, TARGET_OPSET + 1):
try:
model_onnx = convert_sklearn(
- vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- options=self.get_options(), target_opset=opset)
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=self.get_options(),
+ target_opset=opset,
+ )
except RuntimeError as e:
if "only works for opset" in str(e):
continue
@@ -62,284 +74,390 @@ def test_model_tfidf_vectorizer11_opset(self):
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer11_word4(self):
- corpus = numpy.array([
- 'This is the first document.',
- 'This document is the second document.',
- 'And this is the third one.',
- 'Is this the first document?',
- ]).reshape((4, 1))
- vect = TfidfVectorizer(ngram_range=(
- 1, 1), norm=None, token_pattern="[a-zA-Z]{1,4}")
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
+ vect = TfidfVectorizer(
+ ngram_range=(1, 1), norm=None, token_pattern="[a-zA-Z]{1,4}"
+ )
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizer11Regex4-OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer11Regex4-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer11_empty_string(self):
- corpus = numpy.array([
- 'This is the first document.',
- 'This document is the second document.',
- 'And this is the third one.',
- '',
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(1, 1), norm=None)
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
# TfidfVectorizer in onnxruntime fails with empty strings
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizer11EmptyStringRegex-OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer11EmptyStringRegex-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer11_out_vocabulary(self):
- corpus = numpy.array([
- 'This is the first document.',
- 'This document is the second document.',
- 'And this is the third one.',
- 'Is this the first document?',
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(1, 1), norm=None)
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- corpus = numpy.array([
- 'AZZ ZZ This is the first document.',
- 'BZZ ZZ This document is the second document.',
- 'ZZZ ZZ And this is the third one.',
- 'WZZ ZZ Is this the first document?',
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "AZZ ZZ This is the first document.",
+ "BZZ ZZ This document is the second document.",
+ "ZZZ ZZ And this is the third one.",
+ "WZZ ZZ Is this the first document?",
+ ]
+ ).reshape((4, 1))
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizer11OutVocabRegex-OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer11OutVocabRegex-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer22(self):
- corpus = numpy.array([
- 'This is the first document.',
- 'This document is the second document.',
- 'And this is the third one.',
- 'Is this the first document?',
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(2, 2), norm=None)
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizer22Regex-OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer22Regex-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer12(self):
- corpus = numpy.array([
- 'AA AA',
- 'AA AA BB',
- ]).reshape((2, 1))
+ corpus = numpy.array(
+ [
+ "AA AA",
+ "AA AA BB",
+ ]
+ ).reshape((2, 1))
vect = TfidfVectorizer(ngram_range=(1, 2), norm=None)
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizer12SRegex-OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer12SRegex-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer122(self):
- corpus = numpy.array([
- 'This is the first document.',
- 'This document is the second document.',
- 'And this is the third one.',
- 'Is this the first document?',
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(1, 2), norm=None)
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizer12Regex-OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer12Regex-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer12_normL1(self):
- corpus = numpy.array([
- 'This is the first document.',
- 'This document is the second document.',
- 'And this is the third one.',
- 'Is this the first document?',
- ]).reshape((4, 1))
- vect = TfidfVectorizer(ngram_range=(1, 2), norm='l1')
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
+ vect = TfidfVectorizer(ngram_range=(1, 2), norm="l1")
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizer12L1Regex-OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer12L1Regex-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer12_normL2(self):
- corpus = numpy.array([
- 'This is the first document.',
- 'This document is the second document.',
- 'And this is the third one.',
- 'Is this the first document?',
- ]).reshape((4, 1))
- vect = TfidfVectorizer(ngram_range=(1, 2), norm='l2')
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
+ vect = TfidfVectorizer(ngram_range=(1, 2), norm="l2")
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizer12L2Regex-OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer12L2Regex-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer13(self):
- corpus = numpy.array([
- 'This is the first document.',
- 'This document is the second document.',
- 'And this is the third one.',
- 'Is this the first document?',
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(1, 3), norm=None)
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizer13Regex-OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer13Regex-OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer11parenthesis_class(self):
- corpus = numpy.array([
- 'This is the first document.',
- 'This document is the second document.',
- 'And this is the third one.',
- 'Is this the (first) document?',
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the (first) document?",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(1, 1), norm=None)
vect.fit(corpus.ravel())
- extra = {TfidfVectorizer: {'separators': [
- ' ', '[.]', '\\?', ',', ';',
- ':', '\\!', '\\(', '\\)'
- ],
- 'tokenexp': None}}
- model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- options=extra,
- target_opset=TARGET_OPSET)
+ extra = {
+ TfidfVectorizer: {
+ "separators": [" ", "[.]", "\\?", ",", ";", ":", "\\!", "\\(", "\\)"],
+ "tokenexp": None,
+ }
+ }
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=extra,
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
# This test depends on this issue:
# https://github.com/Microsoft/onnxruntime/issues/957.
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizer11ParenthesisClassRegex-"
- "OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer11ParenthesisClassRegex-" "OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer11_idparenthesis_id(self):
- corpus = numpy.array([
- 'This is the first document.',
- 'This document is the second document.',
- 'And this is the third one.',
- 'Is this the (first) document?',
- ]).reshape((4, 1))
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the (first) document?",
+ ]
+ ).reshape((4, 1))
vect = TfidfVectorizer(ngram_range=(1, 1), norm=None)
vect.fit(corpus.ravel())
- extra = {id(vect): {"sep2": [' ', '.', '?', ',', ';', ':',
- '!', '(', ')'],
- 'regex': None}}
+ extra = {
+ id(vect): {
+ "sep2": [" ", ".", "?", ",", ";", ":", "!", "(", ")"],
+ "regex": None,
+ }
+ }
try:
- convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- options=extra,
- target_opset=TARGET_OPSET)
+ convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=extra,
+ target_opset=TARGET_OPSET,
+ )
except (RuntimeError, NameError):
pass
- extra = {id(vect): {"separators": [
- ' ', '[.]', '\\?', ',', ';', ':',
- '\\!', '\\(', '\\)'
- ],
- "tokenexp": None}}
- model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- options=extra,
- target_opset=TARGET_OPSET)
+ extra = {
+ id(vect): {
+ "separators": [" ", "[.]", "\\?", ",", ";", ":", "\\!", "\\(", "\\)"],
+ "tokenexp": None,
+ }
+ }
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=extra,
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
# This test depends on this issue:
# https://github.com/Microsoft/onnxruntime/issues/957.
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizer11ParenthesisIdRegex-"
- "OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer11ParenthesisIdRegex-" "OneOff-SklCol",
+ )
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_tfidf_vectorizer_issue(self):
- corpus = numpy.array([
- 'the-first document.',
- 'this-is the-third-one.',
- 'this-the first-document?',
- ]).reshape((3, 1))
- vect = TfidfVectorizer(
- ngram_range=(1, 2),
- token_pattern=r"\b[a-z ]+\b")
+ corpus = numpy.array(
+ [
+ "the-first document.",
+ "this-is the-third-one.",
+ "this-the first-document?",
+ ]
+ ).reshape((3, 1))
+ vect = TfidfVectorizer(ngram_range=(1, 2), token_pattern=r"\b[a-z ]+\b")
vect.fit(corpus.ravel())
with self.assertRaises(RuntimeError) as e:
- convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertIn("More one decomposition in tokens", str(e))
self.assertIn(
- "Unable to split n-grams 'the first document' into tokens.",
- str(e))
+ "Unable to split n-grams 'the first document' into tokens.", str(e)
+ )
- corpus = numpy.array([
- 'first document.',
- 'this-is the-third-one.',
- 'the first document',
- ]).reshape((3, 1))
- vect = TfidfVectorizer(
- ngram_range=(1, 2),
- token_pattern=r"\b[a-z ]+\b")
+ corpus = numpy.array(
+ [
+ "first document.",
+ "this-is the-third-one.",
+ "the first document",
+ ]
+ ).reshape((3, 1))
+ vect = TfidfVectorizer(ngram_range=(1, 2), token_pattern=r"\b[a-z ]+\b")
vect.fit(corpus.ravel())
- model_onnx = convert_sklearn(vect, 'TfidfVectorizer',
- [('input', StringTensorType([1]))],
- options=self.get_options(),
- target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType([1]))],
+ options=self.get_options(),
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(
- corpus, vect, model_onnx,
- basename="SklearnTfidfVectorizerIssue-OneOff-SklCol")
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizerIssue-OneOff-SklCol",
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_truncated_svd.py b/tests/test_sklearn_truncated_svd.py
index 46baeba9e..b6f46afbf 100644
--- a/tests/test_sklearn_truncated_svd.py
+++ b/tests/test_sklearn_truncated_svd.py
@@ -22,39 +22,37 @@ def test_truncated_svd(self):
svd = TruncatedSVD(n_components=K)
svd.fit(x)
- model_onnx = convert_sklearn(svd,
- initial_types=[
- ("input",
- FloatTensorType(shape=[None, C]))
- ], target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ svd,
+ initial_types=[("input", FloatTensorType(shape=[None, C]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
dump_data_and_model(x, svd, model_onnx, basename="SklearnTruncatedSVD")
def test_truncated_svd_arpack(self):
X = create_tensor(10, 10)
- svd = TruncatedSVD(n_components=5, algorithm='arpack', n_iter=10,
- tol=0.1, random_state=42).fit(X)
- model_onnx = convert_sklearn(svd,
- initial_types=[
- ("input",
- FloatTensorType(shape=X.shape))
- ], target_opset=TARGET_OPSET)
+ svd = TruncatedSVD(
+ n_components=5, algorithm="arpack", n_iter=10, tol=0.1, random_state=42
+ ).fit(X)
+ model_onnx = convert_sklearn(
+ svd,
+ initial_types=[("input", FloatTensorType(shape=X.shape))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- dump_data_and_model(X, svd, model_onnx,
- basename="SklearnTruncatedSVDArpack")
+ dump_data_and_model(X, svd, model_onnx, basename="SklearnTruncatedSVDArpack")
def test_truncated_svd_int(self):
X = create_tensor(5, 5).astype(np.int64)
svd = TruncatedSVD(n_iter=20, random_state=42).fit(X)
- model_onnx = convert_sklearn(svd,
- initial_types=[
- ("input",
- Int64TensorType([None, X.shape[1]]))
- ], target_opset=TARGET_OPSET)
+ model_onnx = convert_sklearn(
+ svd,
+ initial_types=[("input", Int64TensorType([None, X.shape[1]]))],
+ target_opset=TARGET_OPSET,
+ )
self.assertTrue(model_onnx is not None)
- dump_data_and_model(
- X, svd, model_onnx,
- basename="SklearnTruncatedSVDInt")
+ dump_data_and_model(X, svd, model_onnx, basename="SklearnTruncatedSVDInt")
if __name__ == "__main__":
diff --git a/tests/test_sklearn_voting_classifier_converter.py b/tests/test_sklearn_voting_classifier_converter.py
index 0faf8c141..cc571f782 100644
--- a/tests/test_sklearn_voting_classifier_converter.py
+++ b/tests/test_sklearn_voting_classifier_converter.py
@@ -14,7 +14,7 @@
dump_multiple_classification,
dump_binary_classification,
dump_data_and_model,
- TARGET_OPSET
+ TARGET_OPSET,
)
@@ -43,28 +43,27 @@ def custom_tranform_converter(scope, operator, container):
weights = [0.5, 0.1, 10]
shape = [len(weights), 1]
container.add_initializer(weights_name, atype, shape, weights)
- apply_mul(scope, [input.full_name, weights_name], output.full_name,
- container)
+ apply_mul(scope, [input.full_name, weights_name], output.full_name, container)
class TestVotingClassifierConverter(unittest.TestCase):
def test_operator_mul(self):
-
model = CustomTransform()
Xd = numpy.array([[1, 2], [3, 4], [4, 5]])
model_onnx = convert_sklearn(
- model, "CustomTransform",
+ model,
+ "CustomTransform",
[("input", FloatTensorType([None, Xd.shape[1]]))],
custom_shape_calculators={
CustomTransform: custom_transform_shape_calculator
},
- custom_conversion_functions={
- CustomTransform: custom_tranform_converter
- }, target_opset=TARGET_OPSET)
+ custom_conversion_functions={CustomTransform: custom_tranform_converter},
+ target_opset=TARGET_OPSET,
+ )
dump_data_and_model(
- Xd.astype(numpy.float32), model, model_onnx,
- basename="CustomTransformerMul")
+ Xd.astype(numpy.float32), model, model_onnx, basename="CustomTransformerMul"
+ )
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
def test_voting_hard_binary(self):
@@ -78,8 +77,8 @@ def test_voting_hard_binary(self):
)
# predict_proba is not defined when voting is hard.
dump_binary_classification(
- model, suffix="Hard", comparable_outputs=[0],
- target_opset=TARGET_OPSET)
+ model, suffix="Hard", comparable_outputs=[0], target_opset=TARGET_OPSET
+ )
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
def test_voting_hard_binary_weights(self):
@@ -94,8 +93,11 @@ def test_voting_hard_binary_weights(self):
)
# predict_proba is not defined when voting is hard.
dump_binary_classification(
- model, suffix="WeightsHard", comparable_outputs=[0],
- target_opset=TARGET_OPSET)
+ model,
+ suffix="WeightsHard",
+ comparable_outputs=[0],
+ target_opset=TARGET_OPSET,
+ )
def test_voting_soft_binary(self):
model = VotingClassifier(
@@ -107,8 +109,8 @@ def test_voting_soft_binary(self):
],
)
dump_binary_classification(
- model, suffix="Soft", comparable_outputs=[0, 1],
- target_opset=TARGET_OPSET)
+ model, suffix="Soft", comparable_outputs=[0, 1], target_opset=TARGET_OPSET
+ )
def test_voting_soft_binary_weighted(self):
model = VotingClassifier(
@@ -121,8 +123,8 @@ def test_voting_soft_binary_weighted(self):
],
)
dump_binary_classification(
- model, suffix="WeightedSoft",
- target_opset=TARGET_OPSET)
+ model, suffix="WeightedSoft", target_opset=TARGET_OPSET
+ )
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
def test_voting_hard_multi(self):
@@ -136,8 +138,8 @@ def test_voting_hard_multi(self):
],
)
dump_multiple_classification(
- model, suffix="Hard", comparable_outputs=[0],
- target_opset=TARGET_OPSET)
+ model, suffix="Hard", comparable_outputs=[0], target_opset=TARGET_OPSET
+ )
@unittest.skipIf(TARGET_OPSET < 9, reason="not available")
def test_voting_hard_multi_weighted(self):
@@ -152,8 +154,11 @@ def test_voting_hard_multi_weighted(self):
],
)
dump_multiple_classification(
- model, suffix="WeightedHard", comparable_outputs=[0],
- target_opset=TARGET_OPSET)
+ model,
+ suffix="WeightedHard",
+ comparable_outputs=[0],
+ target_opset=TARGET_OPSET,
+ )
def test_voting_soft_multi(self):
model = VotingClassifier(
@@ -164,8 +169,7 @@ def test_voting_soft_multi(self):
("lr2", LogisticRegression()),
],
)
- dump_multiple_classification(
- model, suffix="Soft", target_opset=TARGET_OPSET)
+ dump_multiple_classification(model, suffix="Soft", target_opset=TARGET_OPSET)
def test_voting_soft_multi_string(self):
model = VotingClassifier(
@@ -177,8 +181,8 @@ def test_voting_soft_multi_string(self):
],
)
dump_multiple_classification(
- model, label_string=True, suffix="Soft",
- target_opset=TARGET_OPSET)
+ model, label_string=True, suffix="Soft", target_opset=TARGET_OPSET
+ )
def test_voting_soft_multi_weighted(self):
model = VotingClassifier(
@@ -191,8 +195,8 @@ def test_voting_soft_multi_weighted(self):
],
)
dump_multiple_classification(
- model, suffix="WeightedSoft",
- target_opset=TARGET_OPSET)
+ model, suffix="WeightedSoft", target_opset=TARGET_OPSET
+ )
def test_voting_soft_multi_weighted4(self):
model = VotingClassifier(
@@ -207,8 +211,8 @@ def test_voting_soft_multi_weighted4(self):
],
)
dump_multiple_classification(
- model, suffix="Weighted4Soft",
- target_opset=TARGET_OPSET)
+ model, suffix="Weighted4Soft", target_opset=TARGET_OPSET
+ )
def test_voting_soft_multi_weighted42(self):
model = VotingClassifier(
@@ -223,8 +227,8 @@ def test_voting_soft_multi_weighted42(self):
],
)
dump_multiple_classification(
- model, suffix="Weighted42Soft",
- target_opset=TARGET_OPSET)
+ model, suffix="Weighted42Soft", target_opset=TARGET_OPSET
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_voting_regressor_converter.py b/tests/test_sklearn_voting_regressor_converter.py
index e4f6639ac..005a1d336 100644
--- a/tests/test_sklearn_voting_regressor_converter.py
+++ b/tests/test_sklearn_voting_regressor_converter.py
@@ -5,6 +5,7 @@
import unittest
import numpy
from sklearn.linear_model import LinearRegression
+
try:
from sklearn.ensemble import VotingRegressor
except ImportError:
@@ -17,58 +18,72 @@
FloatTensorType,
Int64TensorType,
)
-from test_utils import (
- dump_data_and_model, fit_regression_model, TARGET_OPSET)
+from test_utils import dump_data_and_model, fit_regression_model, TARGET_OPSET
def model_to_test():
- return VotingRegressor([
- ('lr', LinearRegression()),
- ('dt', DecisionTreeRegressor()),
- ])
+ return VotingRegressor(
+ [
+ ("lr", LinearRegression()),
+ ("dt", DecisionTreeRegressor()),
+ ]
+ )
class TestVotingRegressorConverter(unittest.TestCase):
-
@unittest.skipIf(VotingRegressor is None, reason="new in 0.21")
def test_model_voting_regression(self):
model, X = fit_regression_model(model_to_test())
model_onnx = convert_sklearn(
- model, "voting regression",
+ model,
+ "voting regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32),
- model, model_onnx,
+ model,
+ model_onnx,
basename="SklearnVotingRegressor-Dec4",
- comparable_outputs=[0])
+ comparable_outputs=[0],
+ )
@unittest.skipIf(VotingRegressor is None, reason="new in 0.21")
def test_model_voting_regression_int(self):
model, X = fit_regression_model(model_to_test(), is_int=True)
model_onnx = convert_sklearn(
- model, "voting regression",
+ model,
+ "voting regression",
[("input", Int64TensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
+ X,
+ model,
+ model_onnx,
basename="SklearnVotingRegressorInt-Dec4",
- comparable_outputs=[0])
+ comparable_outputs=[0],
+ )
@unittest.skipIf(VotingRegressor is None, reason="new in 0.21")
def test_model_voting_regression_bool(self):
model, X = fit_regression_model(model_to_test(), is_bool=True)
model_onnx = convert_sklearn(
- model, "voting regression",
+ model,
+ "voting regression",
[("input", BooleanTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
self.assertIsNotNone(model_onnx)
dump_data_and_model(
- X, model, model_onnx,
+ X,
+ model,
+ model_onnx,
basename="SklearnVotingRegressorBool",
- comparable_outputs=[0])
+ comparable_outputs=[0],
+ )
if __name__ == "__main__":
diff --git a/tests/test_sklearn_woe_transformer.py b/tests/test_sklearn_woe_transformer.py
index 398d89f47..e54a9c5f6 100644
--- a/tests/test_sklearn_woe_transformer.py
+++ b/tests/test_sklearn_woe_transformer.py
@@ -6,6 +6,7 @@
import unittest
import numpy
from numpy.testing import assert_almost_equal
+
try:
from onnxruntime.capi.onnxruntime_pybind11_state import InvalidArgument
except ImportError:
@@ -22,116 +23,139 @@
class TestSklearnWOETransformerConverter(unittest.TestCase):
-
- @unittest.skipIf(TARGET_OPSET < 12, reason='OneHotEncoder')
+ @unittest.skipIf(TARGET_OPSET < 12, reason="OneHotEncoder")
def test_woe_transformer(self):
x = numpy.array(
- [[0.5, 0.7, 0.9], [0.51, 0.71, 0.91], [0.7, 0.5, 0.92]],
- dtype=numpy.float32)
- woe = WOETransformer(intervals=[
- [(0.5, 0.7, False, False),
- (0.5, 0.7, True, False),
- (0.5, 0.7, False, True),
- (0.5, 0.7, True, True)],
- [(0.9, numpy.inf),
- (-numpy.inf, 0.9)]])
+ [[0.5, 0.7, 0.9], [0.51, 0.71, 0.91], [0.7, 0.5, 0.92]], dtype=numpy.float32
+ )
+ woe = WOETransformer(
+ intervals=[
+ [
+ (0.5, 0.7, False, False),
+ (0.5, 0.7, True, False),
+ (0.5, 0.7, False, True),
+ (0.5, 0.7, True, True),
+ ],
+ [(0.9, numpy.inf), (-numpy.inf, 0.9)],
+ ]
+ )
woe.fit(x)
self.assertEqual(woe.indices_, [(0, 4), (4, 6), (6, 7)])
self.assertEqual(woe.n_dims_, 7)
- self.assertEqual(woe.intervals_, [
- [(0.5, 0.7, False, False),
- (0.5, 0.7, True, False),
- (0.5, 0.7, False, True),
- (0.5, 0.7, True, True)],
- [(0.9, numpy.inf, False, True),
- (-numpy.inf, 0.9, False, True)],
- None])
- self.assertEqual(woe.weights_, [
- [1, 1, 1, 1], [1, 1], None])
+ self.assertEqual(
+ woe.intervals_,
+ [
+ [
+ (0.5, 0.7, False, False),
+ (0.5, 0.7, True, False),
+ (0.5, 0.7, False, True),
+ (0.5, 0.7, True, True),
+ ],
+ [(0.9, numpy.inf, False, True), (-numpy.inf, 0.9, False, True)],
+ None,
+ ],
+ )
+ self.assertEqual(woe.weights_, [[1, 1, 1, 1], [1, 1], None])
names = woe.get_feature_names()
self.assertEqual(
names,
- [']0.5,0.7[', '[0.5,0.7[', ']0.5,0.7]', '[0.5,0.7]',
- ']0.9,inf]', ']-inf,0.9]', 'X2'])
+ [
+ "]0.5,0.7[",
+ "[0.5,0.7[",
+ "]0.5,0.7]",
+ "[0.5,0.7]",
+ "]0.9,inf]",
+ "]-inf,0.9]",
+ "X2",
+ ],
+ )
x2 = woe.transform(x)
expected = numpy.array(
- [[0, 1, 0, 1, 0, 1, 0.9],
- [1, 1, 1, 1, 0, 1, 0.91],
- [0, 0, 1, 1, 0, 1, 0.92]],
- dtype=numpy.float32)
+ [
+ [0, 1, 0, 1, 0, 1, 0.9],
+ [1, 1, 1, 1, 0, 1, 0.91],
+ [0, 0, 1, 1, 0, 1, 0.92],
+ ],
+ dtype=numpy.float32,
+ )
assert_almost_equal(expected, x2)
- @unittest.skipIf(TARGET_OPSET < 12, reason='OneHotEncoder')
+ @unittest.skipIf(TARGET_OPSET < 12, reason="OneHotEncoder")
def test_woe_transformer_conv_ext(self):
x = numpy.array(
- [[0.4, 1.4, 2.4, 3.4],
- [0.5, 1.5, 2.5, 3.5],
- [0.6, 1.6, 2.6, 3.6],
- [0.7, 1.7, 2.7, 3.7]],
- dtype=numpy.float32)
- woe = WOETransformer(intervals=[
- [(0.4, 0.6, False, False)],
- [(1.4, 1.6, False, True)],
- [(2.4, 2.6, True, False)],
- [(3.4, 3.6, True, True)]])
+ [
+ [0.4, 1.4, 2.4, 3.4],
+ [0.5, 1.5, 2.5, 3.5],
+ [0.6, 1.6, 2.6, 3.6],
+ [0.7, 1.7, 2.7, 3.7],
+ ],
+ dtype=numpy.float32,
+ )
+ woe = WOETransformer(
+ intervals=[
+ [(0.4, 0.6, False, False)],
+ [(1.4, 1.6, False, True)],
+ [(2.4, 2.6, True, False)],
+ [(3.4, 3.6, True, True)],
+ ]
+ )
woe.fit(x)
expected = woe.transform(x)
onnx_model = to_onnx(woe, x, target_opset=TARGET_OPSET)
sess = InferenceSession(
- onnx_model.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': x})[0]
+ onnx_model.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": x})[0]
assert_almost_equal(expected, got)
- @unittest.skipIf(TARGET_OPSET < 12, reason='OneHotEncoder')
+ @unittest.skipIf(TARGET_OPSET < 12, reason="OneHotEncoder")
def test_woe_transformer_conv_ext2(self):
- for inca, incb in [(False, False), (True, True),
- (False, True), (True, False)]:
+ for inca, incb in [(False, False), (True, True), (False, True), (True, False)]:
with self.subTest(inca=inca, incb=incb):
x = numpy.array([[0.45], [0.5], [0.55]], dtype=numpy.float32)
- woe = WOETransformer(intervals=[
- [(0.4, 0.5, False, inca), (0.5, 0.6, incb, False)]])
+ woe = WOETransformer(
+ intervals=[[(0.4, 0.5, False, inca), (0.5, 0.6, incb, False)]]
+ )
woe.fit(x)
expected = woe.transform(x)
- onnx_model = to_onnx(
- woe, x, target_opset=TARGET_OPSET, verbose=0)
+ onnx_model = to_onnx(woe, x, target_opset=TARGET_OPSET, verbose=0)
sess = InferenceSession(
- onnx_model.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': x})[0]
+ onnx_model.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": x})[0]
assert_almost_equal(expected, got)
- @unittest.skipIf(TARGET_OPSET < 12, reason='OneHotEncoder')
+ @unittest.skipIf(TARGET_OPSET < 12, reason="OneHotEncoder")
def test_woe_transformer_conv_ext3(self):
x = numpy.array(
- [[0.4, 1.4, 2.4, 3.4],
- [0.5, 1.5, 2.5, 3.5],
- [0.6, 1.6, 2.6, 3.6]],
- dtype=numpy.float32)
- woe = WOETransformer(intervals=[
- [(0.4, 0.5, False, False), (0.5, 0.6, False, False)],
- [(1.4, 1.5, False, True), (1.5, 1.6, False, True)],
- [(2.4, 2.5, True, False), (2.5, 2.6, True, False)],
- [(3.4, 3.5, True, True), (3.5, 3.6, True, True)]])
+ [[0.4, 1.4, 2.4, 3.4], [0.5, 1.5, 2.5, 3.5], [0.6, 1.6, 2.6, 3.6]],
+ dtype=numpy.float32,
+ )
+ woe = WOETransformer(
+ intervals=[
+ [(0.4, 0.5, False, False), (0.5, 0.6, False, False)],
+ [(1.4, 1.5, False, True), (1.5, 1.6, False, True)],
+ [(2.4, 2.5, True, False), (2.5, 2.6, True, False)],
+ [(3.4, 3.5, True, True), (3.5, 3.6, True, True)],
+ ]
+ )
woe.fit(x)
expected = woe.transform(x)
onnx_model = to_onnx(woe, x, target_opset=TARGET_OPSET)
sess = InferenceSession(
- onnx_model.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': x})[0]
+ onnx_model.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": x})[0]
assert_almost_equal(expected, got)
- @unittest.skipIf(TARGET_OPSET < 12, reason='OneHotEncoder')
+ @unittest.skipIf(TARGET_OPSET < 12, reason="OneHotEncoder")
def test_woe_transformer_conv(self):
x = numpy.array(
- [[0.2, 0.7, 0.9],
- [0.51, 0.71, 0.91],
- [0.7, 1.5, 0.92]],
- dtype=numpy.float32)
- woe = WOETransformer(intervals=[
- [(0.4, 0.6, False, True)],
- [(0.9, numpy.inf), (-numpy.inf, 0.9)]])
+ [[0.2, 0.7, 0.9], [0.51, 0.71, 0.91], [0.7, 1.5, 0.92]], dtype=numpy.float32
+ )
+ woe = WOETransformer(
+ intervals=[[(0.4, 0.6, False, True)], [(0.9, numpy.inf), (-numpy.inf, 0.9)]]
+ )
woe.fit(x)
expected = woe.transform(x)
@@ -141,131 +165,138 @@ def test_woe_transformer_conv(self):
f.write(onnx_model.SerializeToString())
sess = InferenceSession(
- onnx_model.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': x})[0]
+ onnx_model.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": x})[0]
assert_almost_equal(expected, got)
- @unittest.skipIf(TARGET_OPSET < 12, reason='OneHotEncoder')
+ @unittest.skipIf(TARGET_OPSET < 12, reason="OneHotEncoder")
def test_woe_transformer_conv_weights(self):
x = numpy.array(
- [[0.2, 0.7, 0.9],
- [0.51, 0.71, 0.91],
- [0.7, 1.5, 0.92]],
- dtype=numpy.float32)
+ [[0.2, 0.7, 0.9], [0.51, 0.71, 0.91], [0.7, 1.5, 0.92]], dtype=numpy.float32
+ )
woe = WOETransformer(
- intervals=[[(0.4, 0.6, False, True)],
- [(0.9, numpy.inf), (-numpy.inf, 0.9)]],
- weights=[[2.7], [3.5, 6.7]])
+ intervals=[
+ [(0.4, 0.6, False, True)],
+ [(0.9, numpy.inf), (-numpy.inf, 0.9)],
+ ],
+ weights=[[2.7], [3.5, 6.7]],
+ )
woe.fit(x)
expected = woe.transform(x)
onnx_model = to_onnx(woe, x, target_opset=TARGET_OPSET)
sess = InferenceSession(
- onnx_model.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': x})[0]
+ onnx_model.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": x})[0]
assert_almost_equal(expected, got)
- @unittest.skipIf(InvalidArgument is None,
- reason='onnxruntime is too old')
- @unittest.skipIf(TARGET_OPSET < 12, reason='OneHotEncoder')
+ @unittest.skipIf(InvalidArgument is None, reason="onnxruntime is too old")
+ @unittest.skipIf(TARGET_OPSET < 12, reason="OneHotEncoder")
def test_woe_transformer_conv_weights_onnx(self):
x = numpy.array(
- [[0.2, 0.7, 0.9],
- [0.51, 0.71, 0.91],
- [0.7, 1.5, 0.92]],
- dtype=numpy.float32)
+ [[0.2, 0.7, 0.9], [0.51, 0.71, 0.91], [0.7, 1.5, 0.92]], dtype=numpy.float32
+ )
woe = WOETransformer(
- intervals=[[(0.4, 0.6, False, True)],
- [(0.9, numpy.inf), (-numpy.inf, 0.9)]],
- weights=[[2.7], [3.5, 6.7]])
+ intervals=[
+ [(0.4, 0.6, False, True)],
+ [(0.9, numpy.inf), (-numpy.inf, 0.9)],
+ ],
+ weights=[[2.7], [3.5, 6.7]],
+ )
woe.fit(x)
expected = woe.transform(x)
onnx_model = woe_transformer_to_onnx(woe, TARGET_OPSET)
try:
sess = InferenceSession(
- onnx_model.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onnx_model.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except InvalidArgument as e:
- raise AssertionError(
- "Cannot load model:\n%s" % str(onnx_model)) from e
- got = sess.run(None, {'X': x})[0]
+ raise AssertionError("Cannot load model:\n%s" % str(onnx_model)) from e
+ got = sess.run(None, {"X": x})[0]
assert_almost_equal(expected, got)
- @unittest.skipIf(InvalidArgument is None,
- reason='onnxruntime is too old')
+ @unittest.skipIf(InvalidArgument is None, reason="onnxruntime is too old")
def test_woe_transformer_conv_weights_onnx_noonehot(self):
x = numpy.array(
- [[0.2, 0.7, 0.9],
- [0.51, 0.71, 0.91],
- [0.7, 1.5, 0.92]],
- dtype=numpy.float32)
+ [[0.2, 0.7, 0.9], [0.51, 0.71, 0.91], [0.7, 1.5, 0.92]], dtype=numpy.float32
+ )
woe = WOETransformer(
- intervals=[[(0.4, 0.6, False, True)],
- [(0.9, numpy.inf), (-numpy.inf, 0.9)]],
+ intervals=[
+ [(0.4, 0.6, False, True)],
+ [(0.9, numpy.inf), (-numpy.inf, 0.9)],
+ ],
weights=[[2.7], [3.5, 6.7]],
- onehot=False)
+ onehot=False,
+ )
woe.fit(x)
expected = woe.transform(x)
- manual = numpy.array([[0., 6.7, 0.9],
- [2.7, 6.7, 0.91],
- [0., 3.5, 0.92]], dtype=numpy.float32)
+ manual = numpy.array(
+ [[0.0, 6.7, 0.9], [2.7, 6.7, 0.91], [0.0, 3.5, 0.92]], dtype=numpy.float32
+ )
assert_almost_equal(manual, expected)
- with self.subTest(way='skl2onnx'):
+ with self.subTest(way="skl2onnx"):
onnx_model = to_onnx(woe, x, target_opset=TARGET_OPSET, verbose=0)
sess = InferenceSession(
- onnx_model.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'X': x})[0]
+ onnx_model.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"X": x})[0]
assert_almost_equal(expected, got)
- with self.subTest(way='onnx'):
+ with self.subTest(way="onnx"):
onnx_model = woe_transformer_to_onnx(woe, TARGET_OPSET)
try:
sess = InferenceSession(
- onnx_model.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onnx_model.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except InvalidArgument as e:
- raise AssertionError(
- "Cannot load model:\n%s" % str(onnx_model)) from e
- got = sess.run(None, {'X': x})[0]
+ raise AssertionError("Cannot load model:\n%s" % str(onnx_model)) from e
+ got = sess.run(None, {"X": x})[0]
assert_almost_equal(expected, got)
- @unittest.skipIf(InvalidArgument is None,
- reason='onnxruntime is too old')
- @unittest.skipIf(TARGET_OPSET < 12, reason='OneHotEncoder')
+ @unittest.skipIf(InvalidArgument is None, reason="onnxruntime is too old")
+ @unittest.skipIf(TARGET_OPSET < 12, reason="OneHotEncoder")
def test_woe_transformer_bigger(self):
x = numpy.array([[0, 1, 2, 3, 4, 5, 6, -1]], dtype=numpy.float32).T
- intervals = [[(0.0, 1.0, False, True), (1.0, 2.0, False, True),
- (2.0, 3.0, False, True), (3.0, 4.0, False, True)]]
- weights = [[-1.4057124469769924, -1.7241661780955269,
- 2.545531271604435, 0.9614111671546247]]
- woe = WOETransformer(intervals=intervals, weights=weights,
- onehot=False)
+ intervals = [
+ [
+ (0.0, 1.0, False, True),
+ (1.0, 2.0, False, True),
+ (2.0, 3.0, False, True),
+ (3.0, 4.0, False, True),
+ ]
+ ]
+ weights = [
+ [
+ -1.4057124469769924,
+ -1.7241661780955269,
+ 2.545531271604435,
+ 0.9614111671546247,
+ ]
+ ]
+ woe = WOETransformer(intervals=intervals, weights=weights, onehot=False)
woe.fit(x)
expected = woe.transform(x)
onnx_model = to_onnx(woe, x, target_opset=TARGET_OPSET)
try:
sess = InferenceSession(
- onnx_model.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onnx_model.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except InvalidArgument as e:
- raise AssertionError(
- "Cannot load model:\n%s" % str(onnx_model)) from e
- got = sess.run(None, {'X': x})[0]
+ raise AssertionError("Cannot load model:\n%s" % str(onnx_model)) from e
+ got = sess.run(None, {"X": x})[0]
assert_almost_equal(expected, got)
onnx_model = woe_transformer_to_onnx(woe, TARGET_OPSET)
try:
sess = InferenceSession(
- onnx_model.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onnx_model.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except InvalidArgument as e:
- raise AssertionError(
- "Cannot load model:\n%s" % str(onnx_model)) from e
- got = sess.run(None, {'X': x})[0]
+ raise AssertionError("Cannot load model:\n%s" % str(onnx_model)) from e
+ got = sess.run(None, {"X": x})[0]
assert_almost_equal(expected, got)
diff --git a/tests/test_supported_converters.py b/tests/test_supported_converters.py
index 2dc3bf9bd..a59657f7c 100644
--- a/tests/test_supported_converters.py
+++ b/tests/test_supported_converters.py
@@ -12,12 +12,17 @@
from sklearn.preprocessing import StandardScaler
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import (
- supported_converters, convert_sklearn, to_onnx,
- update_registered_converter)
+ supported_converters,
+ convert_sklearn,
+ to_onnx,
+ update_registered_converter,
+)
from skl2onnx.operator_converters.linear_classifier import (
- convert_sklearn_linear_classifier)
+ convert_sklearn_linear_classifier,
+)
from skl2onnx.shape_calculators.linear_classifier import (
- calculate_linear_classifier_output_shapes)
+ calculate_linear_classifier_output_shapes,
+)
from test_utils import fit_regression_model, TARGET_OPSET
@@ -38,61 +43,76 @@ def test_sklearn_converters(self):
def test_ir_version(self):
model, X = fit_regression_model(
- GradientBoostingRegressor(n_estimators=3, loss="huber"))
+ GradientBoostingRegressor(n_estimators=3, loss="huber")
+ )
model_onnx = convert_sklearn(
model,
"gradient boosting regression",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
sub = "ir_version: "
if sub not in str(model_onnx):
raise AssertionError(
"Unable to find '{}' (opset={}) in\n{}".format(
- sub, TARGET_OPSET, str(model_onnx)))
+ sub, TARGET_OPSET, str(model_onnx)
+ )
+ )
def test_register_classifier(self):
update_registered_converter(
- DummyClassifier, 'DummyClassifierAlias',
+ DummyClassifier,
+ "DummyClassifierAlias",
calculate_linear_classifier_output_shapes,
convert_sklearn_linear_classifier,
- options={'nocl': [True, False],
- 'zipmap': [True, False, 'columns'],
- 'output_class_labels': [False, True],
- 'raw_scores': [True, False]})
- pipe = Pipeline([('st', StandardScaler()), ('d', DummyClassifier())])
+ options={
+ "nocl": [True, False],
+ "zipmap": [True, False, "columns"],
+ "output_class_labels": [False, True],
+ "raw_scores": [True, False],
+ },
+ )
+ pipe = Pipeline([("st", StandardScaler()), ("d", DummyClassifier())])
X = np.array([[0, 1], [1, 0], [0.5, 0.5]], dtype=np.float64)
y = np.array([1, 0, 1], dtype=np.int64)
pipe.fit(X, y)
model_onnx = to_onnx(pipe, X.astype(np.float32))
assert "zipmap" in str(model_onnx).lower()
- model_onnx = to_onnx(pipe, X.astype(np.float32),
- options={'d__zipmap': False})
+ model_onnx = to_onnx(pipe, X.astype(np.float32), options={"d__zipmap": False})
assert "zipmap" not in str(model_onnx).lower()
model_onnx = to_onnx(
- pipe, X.astype(np.float32),
- options={DummyClassifier: {'zipmap': False,
- 'output_class_labels': True}})
+ pipe,
+ X.astype(np.float32),
+ options={DummyClassifier: {"zipmap": False, "output_class_labels": True}},
+ )
assert "zipmap" not in str(model_onnx).lower()
self.assertEqual(3, len(model_onnx.graph.output))
model_onnx = to_onnx(
- pipe, X.astype(np.float32),
- options={id(pipe.steps[-1][-1]): {
- 'zipmap': False, 'output_class_labels': True}})
+ pipe,
+ X.astype(np.float32),
+ options={
+ id(pipe.steps[-1][-1]): {"zipmap": False, "output_class_labels": True}
+ },
+ )
assert "zipmap" not in str(model_onnx).lower()
self.assertEqual(3, len(model_onnx.graph.output))
model_onnx = to_onnx(
- pipe, X.astype(np.float32),
- options={'d__zipmap': False, 'd__output_class_labels': True})
+ pipe,
+ X.astype(np.float32),
+ options={"d__zipmap": False, "d__output_class_labels": True},
+ )
assert "zipmap" not in str(model_onnx).lower()
self.assertEqual(3, len(model_onnx.graph.output))
model_onnx = to_onnx(
- pipe, X.astype(np.float32),
- options={'zipmap': False, 'output_class_labels': True})
+ pipe,
+ X.astype(np.float32),
+ options={"zipmap": False, "output_class_labels": True},
+ )
assert "zipmap" not in str(model_onnx).lower()
self.assertEqual(3, len(model_onnx.graph.output))
diff --git a/tests/test_topology_prune.py b/tests/test_topology_prune.py
index b27ab1260..a5372dcfd 100644
--- a/tests/test_topology_prune.py
+++ b/tests/test_topology_prune.py
@@ -9,6 +9,7 @@
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import make_pipeline
from sklearn import datasets
+
try:
# scikit-learn >= 0.22
from sklearn.utils._testing import ignore_warnings
@@ -35,7 +36,6 @@ def transform(self, X):
class identity(IdentityTransformer):
-
def __init__(self):
IdentityTransformer.__init__(self)
@@ -50,16 +50,13 @@ def dummy_converter(scope, operator, container):
out = operator.outputs
id1 = OnnxIdentity(X, op_version=TARGET_OPSET)
- id2 = OnnxIdentity(id1, output_names=out[:1],
- op_version=TARGET_OPSET)
+ id2 = OnnxIdentity(id1, output_names=out[:1], op_version=TARGET_OPSET)
id2.add_to(scope, container)
class TestTopologyPrune(unittest.TestCase):
-
@ignore_warnings(category=DeprecationWarning)
def test_dummy_identity(self):
-
digits = datasets.load_digits(n_class=6)
Xd = digits.data[:20]
yd = digits.target[:20]
@@ -68,67 +65,74 @@ def test_dummy_identity(self):
idtr = make_pipeline(IdentityTransformer(), identity())
idtr.fit(Xd, yd)
- update_registered_converter(IdentityTransformer, "IdentityTransformer",
- dummy_shape_calculator, dummy_converter)
- update_registered_converter(identity, "identity",
- dummy_shape_calculator, dummy_converter)
+ update_registered_converter(
+ IdentityTransformer,
+ "IdentityTransformer",
+ dummy_shape_calculator,
+ dummy_converter,
+ )
+ update_registered_converter(
+ identity, "identity", dummy_shape_calculator, dummy_converter
+ )
model_onnx = convert_sklearn(
- idtr, "idtr",
+ idtr,
+ "idtr",
[("input", FloatTensorType([None, Xd.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
- idnode = [node for node in model_onnx.graph.node
- if node.op_type == "Identity"]
+ idnode = [node for node in model_onnx.graph.node if node.op_type == "Identity"]
self.assertEqual(len(idnode), 1)
@ignore_warnings(category=DeprecationWarning)
def test_onnx_subgraphs1(self):
- x = numpy.array([1, 2, 4, 5, 5, 4]).astype(
- numpy.float32).reshape((3, 2))
+ x = numpy.array([1, 2, 4, 5, 5, 4]).astype(numpy.float32).reshape((3, 2))
cop = OnnxAdd(
- OnnxIdentity('input', op_version=TARGET_OPSET),
- 'input', op_version=TARGET_OPSET)
- cdist = onnx_squareform_pdist(
- cop, dtype=numpy.float32, op_version=TARGET_OPSET)
- cop2 = OnnxIdentity(cdist, output_names=['cdist'],
- op_version=TARGET_OPSET)
+ OnnxIdentity("input", op_version=TARGET_OPSET),
+ "input",
+ op_version=TARGET_OPSET,
+ )
+ cdist = onnx_squareform_pdist(cop, dtype=numpy.float32, op_version=TARGET_OPSET)
+ cop2 = OnnxIdentity(cdist, output_names=["cdist"], op_version=TARGET_OPSET)
model_def = cop2.to_onnx(
- {'input': FloatTensorType([None, None])},
- outputs=[('cdist', FloatTensorType([None, None]))],
- target_opset=TARGET_OPSET)
+ {"input": FloatTensorType([None, None])},
+ outputs=[("cdist", FloatTensorType([None, None]))],
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': x})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": x})
self.assertEqual(len(res), 1)
@ignore_warnings(category=DeprecationWarning)
def test_onnx_subgraphs2(self):
- x = numpy.array([1, 2, 4, 5, 5, 4]).astype(
- numpy.float32).reshape((3, 2))
+ x = numpy.array([1, 2, 4, 5, 5, 4]).astype(numpy.float32).reshape((3, 2))
cop = OnnxAdd(
- OnnxIdentity('input', op_version=TARGET_OPSET),
- 'input', op_version=TARGET_OPSET)
- cdist = onnx_squareform_pdist(
- cop, dtype=numpy.float32, op_version=TARGET_OPSET)
- id1 = [id(a) for a in cdist.onx_op.graph_algebra['body']]
+ OnnxIdentity("input", op_version=TARGET_OPSET),
+ "input",
+ op_version=TARGET_OPSET,
+ )
+ cdist = onnx_squareform_pdist(cop, dtype=numpy.float32, op_version=TARGET_OPSET)
+ id1 = [id(a) for a in cdist.onx_op.graph_algebra["body"]]
cdist2 = onnx_squareform_pdist(
- cop, dtype=numpy.float32, op_version=TARGET_OPSET)
- id2 = [id(a) for a in cdist2.onx_op.graph_algebra['body']]
+ cop, dtype=numpy.float32, op_version=TARGET_OPSET
+ )
+ id2 = [id(a) for a in cdist2.onx_op.graph_algebra["body"]]
self.assertNotEqual(id1, id2)
- cop2 = OnnxAdd(cdist, cdist2, output_names=['cdist'],
- op_version=TARGET_OPSET)
+ cop2 = OnnxAdd(cdist, cdist2, output_names=["cdist"], op_version=TARGET_OPSET)
model_def = cop2.to_onnx(
- {'input': FloatTensorType([None, None])},
- outputs=[('cdist', FloatTensorType([None, None]))],
- target_opset=TARGET_OPSET)
+ {"input": FloatTensorType([None, None])},
+ outputs=[("cdist", FloatTensorType([None, None]))],
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_def.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': x})
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": x})
self.assertEqual(len(res), 1)
diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py
index 736d910e9..a336ebf56 100644
--- a/tests/test_utils/__init__.py
+++ b/tests/test_utils/__init__.py
@@ -26,24 +26,27 @@
binary_array_to_string,
path_to_leaf,
)
+
try:
from .utils_backend_onnx import ReferenceEvaluatorEx
except ImportError:
+
def ReferenceEvaluatorEx(*args, **kwargs):
raise NotImplementedError(
"onnx package does not implement class ReferenceEvaluator. "
- "Update to onnx>=1.13.0.")
+ "Update to onnx>=1.13.0."
+ )
def InferenceSessionEx(onx, *args, verbose=0, **kwargs):
from onnxruntime import InferenceSession
+
if "providers" not in kwargs:
kwargs["providers"] = ["CPUExecutionProvider"]
try:
return InferenceSession(onx, *args, **kwargs)
except Exception as e:
- if (TARGET_OPSET >= 18 and
- "support for domain ai.onnx is till opset" in str(e)):
+ if TARGET_OPSET >= 18 and "support for domain ai.onnx is till opset" in str(e):
return ReferenceEvaluatorEx(onx, verbose=verbose)
raise e
diff --git a/tests/test_utils/reference_implementation_afe.py b/tests/test_utils/reference_implementation_afe.py
index 437f2ec56..2275ea67a 100644
--- a/tests/test_utils/reference_implementation_afe.py
+++ b/tests/test_utils/reference_implementation_afe.py
@@ -28,8 +28,7 @@ def _array_feature_extrator(data, indices):
try:
tem = data[..., index]
except IndexError as e:
- raise RuntimeError(
- f"data.shape={data.shape}, indices={indices}") from e
+ raise RuntimeError(f"data.shape={data.shape}, indices={indices}") from e
res = tem.reshape(new_shape)
return res
@@ -38,7 +37,6 @@ def _array_feature_extrator(data, indices):
from onnx.reference.op_run import OpRun
class ArrayFeatureExtractor(OpRun):
-
op_domain = "ai.onnx.ml"
def _run(self, data, indices):
diff --git a/tests/test_utils/reference_implementation_helper.py b/tests/test_utils/reference_implementation_helper.py
index e703d3831..676a3ffbe 100644
--- a/tests/test_utils/reference_implementation_helper.py
+++ b/tests/test_utils/reference_implementation_helper.py
@@ -3,19 +3,19 @@
def ErfInv(x):
- sgn = -1. if x < 0 else 1.
- x = (1. - x) * (1 + x)
+ sgn = -1.0 if x < 0 else 1.0
+ x = (1.0 - x) * (1 + x)
log = np.log(x)
- v = 2. / (3.14159 * 0.147) + 0.5 * log
- v2 = 1. / 0.147 * log
+ v = 2.0 / (3.14159 * 0.147) + 0.5 * log
+ v2 = 1.0 / 0.147 * log
v3 = -v + np.sqrt(v * v - v2)
x = sgn * np.sqrt(v3)
return x
def ComputeLogistic(val):
- v = 1. / (1. + np.exp(-np.abs(val)))
- return (1. - v) if val < 0 else v
+ v = 1.0 / (1.0 + np.exp(-np.abs(val)))
+ return (1.0 - v) if val < 0 else v
def ComputeProbit(val):
@@ -55,8 +55,8 @@ def sigmoid_probability(score, proba, probb):
def multiclass_probability(k, R):
max_iter = max(100, k)
Q = np.empty((k, k), dtype=R.dtype)
- Qp = np.empty((k, ), dtype=R.dtype)
- P = np.empty((k, ), dtype=R.dtype)
+ Qp = np.empty((k,), dtype=R.dtype)
+ P = np.empty((k,), dtype=R.dtype)
eps = 0.005 / k
for t in range(0, k):
@@ -89,11 +89,10 @@ def multiclass_probability(k, R):
for t in range(k):
diff = (-Qp[t] + pQp) / Q[t, t]
P[t] += diff
- pQp = ((pQp + diff * (diff * Q[t, t] + 2 * Qp[t])) /
- (1 + diff) ** 2)
+ pQp = (pQp + diff * (diff * Q[t, t] + 2 * Qp[t])) / (1 + diff) ** 2
for j in range(k):
Qp[j] = (Qp[j] + diff * Q[t, j]) / (1 + diff)
- P[j] /= (1 + diff)
+ P[j] /= 1 + diff
return P
@@ -123,17 +122,26 @@ def write_scores(n_classes, scores, post_transform, add_second_class):
res = np.array([1 - scores[0], scores[0]], dtype=scores.dtype)
elif add_second_class in (2, 3):
if post_transform == "LOGISTIC":
- return np.array([ComputeLogistic(-scores[0]),
- ComputeLogistic(scores[0])],
- dtype=scores.dtype)
+ return np.array(
+ [ComputeLogistic(-scores[0]), ComputeLogistic(scores[0])],
+ dtype=scores.dtype,
+ )
return np.array([-scores[0], scores[0]], dtype=scores.dtype)
return np.array([scores[0]], dtype=scores.dtype)
raise NotImplementedError(f"n_classes={n_classes} not supported.")
-def set_score_svm(max_weight, maxclass, n, post_transform,
- has_proba, weights_are_all_positive_,
- classlabels, posclass, negclass):
+def set_score_svm(
+ max_weight,
+ maxclass,
+ n,
+ post_transform,
+ has_proba,
+ weights_are_all_positive_,
+ classlabels,
+ posclass,
+ negclass,
+):
write_additional_scores = -1
if len(classlabels) == 2:
write_additional_scores = 2 if post_transform == "NONE" else 0
diff --git a/tests/test_utils/reference_implementation_ml.py b/tests/test_utils/reference_implementation_ml.py
index ea9cc7f2a..d25daec33 100644
--- a/tests/test_utils/reference_implementation_ml.py
+++ b/tests/test_utils/reference_implementation_ml.py
@@ -9,7 +9,6 @@
from onnx.reference.op_run import OpRun
class FusedMatMul(OpRun):
-
@staticmethod
def _fmatmul00(a, b, alpha):
return np.matmul(a, b) * alpha
@@ -38,15 +37,20 @@ def _transpose(x, trans, transBatch):
x = np.transpose(x, perm)
return x
- def _run(self, a, b, alpha=None, transA=None, transB=None,
- transBatchA=None, transBatchB=None):
-
+ def _run(
+ self,
+ a,
+ b,
+ alpha=None,
+ transA=None,
+ transB=None,
+ transBatchA=None,
+ transBatchB=None,
+ ):
if transA:
- _meth = (FusedMatMul._fmatmul11 if transB
- else FusedMatMul._fmatmul10)
+ _meth = FusedMatMul._fmatmul11 if transB else FusedMatMul._fmatmul10
else:
- _meth = (FusedMatMul._fmatmul01 if transB
- else FusedMatMul._fmatmul00)
+ _meth = FusedMatMul._fmatmul01 if transB else FusedMatMul._fmatmul00
_meth = lambda a, b: _meth(a, b, alpha) # noqa
# more recent versions of the operator
if transBatchA is None:
@@ -54,12 +58,11 @@ def _run(self, a, b, alpha=None, transA=None, transB=None,
if transBatchB is None:
transBatchB = 0
- if (transBatchA or transBatchB or
- len(a.shape) != 2 or len(b.shape) != 2):
+ if transBatchA or transBatchB or len(a.shape) != 2 or len(b.shape) != 2:
ta = self._transpose(a, transA, transBatchA)
tb = self._transpose(b, transB, transBatchB)
try:
- return (np.matmul(ta, tb) * alpha, )
+ return (np.matmul(ta, tb) * alpha,)
except ValueError as e:
raise ValueError(
f"Unable to multiply shape {a.shape}x{b.shape} "
@@ -68,9 +71,10 @@ def _run(self, a, b, alpha=None, transA=None, transB=None,
f"transB={transB}, "
f"transBatchA={transBatchA}, "
f"transBatchB={transBatchB}, "
- f"meth={_meth}.") from e
+ f"meth={_meth}."
+ ) from e
try:
- return (_meth(a, b), )
+ return (_meth(a, b),)
except ValueError as e:
raise ValueError(
f"Unable to multiply shape {a.shape}x{b.shape} "
@@ -78,10 +82,10 @@ def _run(self, a, b, alpha=None, transA=None, transB=None,
f"transB={transB}, "
f"transBatchA={transBatchA}, "
f"transBatchB={transBatchB}, "
- f"meth={_meth}.") from e
+ f"meth={_meth}."
+ ) from e
class Scaler(OpRun):
-
op_domain = "ai.onnx.ml"
def _run(self, x, offset=None, scale=None):
@@ -89,12 +93,10 @@ def _run(self, x, offset=None, scale=None):
return (dx * scale,)
class LinearClassifier(OpRun):
-
op_domain = "ai.onnx.ml"
@staticmethod
- def _post_process_predicted_label(label, scores,
- classlabels_ints_string):
+ def _post_process_predicted_label(label, scores, classlabels_ints_string):
"""
Replaces int64 predicted labels by the corresponding
strings.
@@ -104,21 +106,21 @@ def _post_process_predicted_label(label, scores,
return label, scores
def _run(
- self,
- x,
- classlabels_ints=None,
- classlabels_strings=None,
- coefficients=None,
- intercepts=None,
- multi_class=None,
- post_transform=None):
+ self,
+ x,
+ classlabels_ints=None,
+ classlabels_strings=None,
+ coefficients=None,
+ intercepts=None,
+ multi_class=None,
+ post_transform=None,
+ ):
dtype = x.dtype
if dtype != np.float64:
x = x.astype(np.float32)
coefficients = np.array(coefficients).astype(x.dtype)
intercepts = np.array(intercepts).astype(x.dtype)
- n_class = max(
- len(classlabels_ints or []), len(classlabels_strings or []))
+ n_class = max(len(classlabels_ints or []), len(classlabels_strings or []))
n = coefficients.shape[0] // n_class
coefficients = coefficients.reshape(n_class, n).T
scores = np.dot(x, coefficients)
@@ -130,13 +132,13 @@ def _run(
elif post_transform == "LOGISTIC":
scores = expit(scores)
elif post_transform == "SOFTMAX":
- np.subtract(
- scores, scores.max(axis=1)[:, np.newaxis], out=scores)
+ np.subtract(scores, scores.max(axis=1)[:, np.newaxis], out=scores)
scores = np.exp(scores)
scores = np.divide(scores, scores.sum(axis=1)[:, np.newaxis])
else:
raise NotImplementedError( # pragma: no cover
- f"Unknown post_transform: '{post_transform}'.")
+ f"Unknown post_transform: '{post_transform}'."
+ )
if coefficients.shape[1] == 1:
labels = np.zeros((scores.shape[0],), dtype=x.dtype)
@@ -144,23 +146,17 @@ def _run(
else:
labels = np.argmax(scores, axis=1)
if classlabels_ints is not None:
- labels = np.array(
- [classlabels_ints[i] for i in labels], dtype=np.int64)
+ labels = np.array([classlabels_ints[i] for i in labels], dtype=np.int64)
elif classlabels_strings is not None:
labels = np.array([classlabels_strings[i] for i in labels])
return (labels, scores)
class LinearRegressor(OpRun):
-
op_domain = "ai.onnx.ml"
def _run(
- self,
- x,
- coefficients=None,
- intercepts=None,
- targets=1,
- post_transform=None):
+ self, x, coefficients=None, intercepts=None, targets=1, post_transform=None
+ ):
coefficients = np.array(coefficients).astype(x.dtype)
intercepts = np.array(intercepts).astype(x.dtype)
n = coefficients.shape[0] // targets
@@ -172,11 +168,11 @@ def _run(
pass
else:
raise NotImplementedError(
- f"Unknown post_transform: '{self.post_transform}'.")
+ f"Unknown post_transform: '{self.post_transform}'."
+ )
return (score,)
class Normalizer(OpRun):
-
op_domain = "ai.onnx.ml"
@staticmethod
@@ -213,7 +209,6 @@ def _run(self, x, norm=None):
return (_norm(x),)
class OneHotEncoder(OpRun):
-
op_domain = "ai.onnx.ml"
def _run(self, x, cats_int64s=None, cats_strings=None, zeros=None):
@@ -240,8 +235,8 @@ def _run(self, x, cats_int64s=None, cats_strings=None, zeros=None):
res[a, i, j] = 1.0
else:
raise RuntimeError(
- f"This operator is not implemented "
- f"for " f"shape {x.shape}.")
+ f"This operator is not implemented " f"for " f"shape {x.shape}."
+ )
if not self.zeros:
red = res.sum(axis=len(res.shape) - 1)
@@ -267,7 +262,6 @@ def _run(self, x, cats_int64s=None, cats_strings=None, zeros=None):
return (res,)
class Binarizer(OpRun):
-
op_domain = "ai.onnx.ml"
def _run(self, x, threshold=None):
@@ -279,40 +273,39 @@ def _run(self, x, threshold=None):
return (X,)
class FeatureVectorizer(OpRun):
-
op_domain = "ai.onnx.ml"
def _preprocess(self, a, axis):
if axis >= len(a.shape):
- new_shape = a.shape + (1, ) * (axis + 1 - len(a.shape))
+ new_shape = a.shape + (1,) * (axis + 1 - len(a.shape))
return a.reshape(new_shape)
return a
def _run(self, *args, inputdimensions=None):
- args = [self._preprocess(a, axis)
- for a, axis in zip(args, inputdimensions)]
+ args = [self._preprocess(a, axis) for a, axis in zip(args, inputdimensions)]
dimensions = set(inputdimensions)
if len(set(dimensions)) == 1:
res = np.concatenate(args, axis=inputdimensions[0])
- return (res, )
+ return (res,)
raise RuntimeError(
- f"inputdimensions={inputdimensions} is not supported yet.")
+ f"inputdimensions={inputdimensions} is not supported yet."
+ )
class Imputer(OpRun):
-
op_domain = "ai.onnx.ml"
- def _run(self, x,
- imputed_value_floats=None,
- imputed_value_int64s=None,
- replaced_value_float=None,
- replaced_value_int64=None):
- if (imputed_value_floats is not None and
- len(imputed_value_floats) > 0):
+ def _run(
+ self,
+ x,
+ imputed_value_floats=None,
+ imputed_value_int64s=None,
+ replaced_value_float=None,
+ replaced_value_int64=None,
+ ):
+ if imputed_value_floats is not None and len(imputed_value_floats) > 0:
values = imputed_value_floats
replace = replaced_value_float
- elif (imputed_value_int64s is not None and
- len(imputed_value_int64s) > 0):
+ elif imputed_value_int64s is not None and len(imputed_value_int64s) > 0:
values = imputed_value_int64s
replace = replaced_value_int64
else:
@@ -321,11 +314,11 @@ def _run(self, x,
if isinstance(values, list):
values = np.array(values)
if len(x.shape) != 2:
- raise TypeError(
- f"x must be a matrix but shape is {x.shape}")
+ raise TypeError(f"x must be a matrix but shape is {x.shape}")
if values.shape[0] not in (x.shape[1], 1):
raise TypeError( # pragma: no cover
- f"Dimension mismatch {values.shape[0]} != {x.shape[1]}")
+ f"Dimension mismatch {values.shape[0]} != {x.shape[1]}"
+ )
x = x.copy()
if np.isnan(replace):
for i in range(0, x.shape[1]):
@@ -336,22 +329,24 @@ def _run(self, x,
val = values[min(i, values.shape[0] - 1)]
x[x[:, i] == replace, i] = val
- return (x, )
+ return (x,)
class LabelEncoder(OpRun):
-
op_domain = "ai.onnx.ml"
- def _run(self, x,
- default_float=None,
- default_int64=None,
- default_string=None,
- keys_floats=None,
- keys_int64s=None,
- keys_strings=None,
- values_floats=None,
- values_int64s=None,
- values_strings=None):
+ def _run(
+ self,
+ x,
+ default_float=None,
+ default_int64=None,
+ default_string=None,
+ keys_floats=None,
+ keys_int64s=None,
+ keys_strings=None,
+ values_floats=None,
+ values_int64s=None,
+ values_strings=None,
+ ):
keys = keys_floats or keys_int64s or keys_strings
values = values_floats or values_int64s or values_strings
classes = {k: v for k, v in zip(keys, values)}
@@ -370,7 +365,7 @@ def _run(self, x,
else:
defval = default_string
if not isinstance(defval, str):
- defval = ''
+ defval = ""
dtype = np.str_
shape = x.shape
if len(x.shape) > 1:
@@ -379,16 +374,13 @@ def _run(self, x,
for i in range(0, x.shape[0]):
v = classes.get(cast(x[i]), defval)
res.append(v)
- return (np.array(res, dtype=dtype).reshape(shape), )
+ return (np.array(res, dtype=dtype).reshape(shape),)
class DictVectorizer(OpRun):
-
op_domain = "ai.onnx.ml"
def _run(self, x, int64_vocabulary=None, string_vocabulary=None):
-
if isinstance(x, (np.ndarray, list)):
-
dict_labels = {}
if int64_vocabulary:
for i, v in enumerate(int64_vocabulary):
@@ -399,7 +391,8 @@ def _run(self, x, int64_vocabulary=None, string_vocabulary=None):
if len(dict_labels) == 0:
raise RuntimeError(
"int64_vocabulary and string_vocabulary "
- "cannot be both empty.")
+ "cannot be both empty."
+ )
values = []
rows = []
@@ -412,16 +405,17 @@ def _run(self, x, int64_vocabulary=None, string_vocabulary=None):
values = np.array(values)
rows = np.array(rows)
cols = np.array(cols)
- return (coo_matrix(
- (values, (rows, cols)),
- shape=(len(x), len(dict_labels))).todense(), )
+ return (
+ coo_matrix(
+ (values, (rows, cols)), shape=(len(x), len(dict_labels))
+ ).todense(),
+ )
if isinstance(x, dict):
keys = int64_vocabulary or string_vocabulary
res = []
for k in keys:
res.append(x.get(k, 0))
- return (np.array(res), )
+ return (np.array(res),)
- raise TypeError( # pragma: no cover
- f"x must be iterable not {type(x)}.")
+ raise TypeError(f"x must be iterable not {type(x)}.") # pragma: no cover
diff --git a/tests/test_utils/reference_implementation_svm.py b/tests/test_utils/reference_implementation_svm.py
index 8f6d263d5..77692d8f4 100644
--- a/tests/test_utils/reference_implementation_svm.py
+++ b/tests/test_utils/reference_implementation_svm.py
@@ -28,8 +28,7 @@ def _attribute_value(attr):
return list(attr.ints)
if attr.strings:
return list(map(_to_str, attr.strings))
- raise NotImplementedError(
- "Unable to return a value for attribute %r." % attr)
+ raise NotImplementedError("Unable to return a value for attribute %r." % attr)
class SVMAttributes:
@@ -37,8 +36,8 @@ def __init__(self):
self._names = []
def add(self, name, value):
- if isinstance(value, list) and name not in {'kernel_params'}:
- if name in {'vectors_per_class'}:
+ if isinstance(value, list) and name not in {"kernel_params"}:
+ if name in {"vectors_per_class"}:
value = np.array(value, dtype=np.int64)
else:
value = np.array(value, dtype=np.float32)
@@ -67,13 +66,12 @@ def __init__(self, **kwargs):
self.coef0_ = self.atts.kernel_params[1]
self.degree_ = int(self.atts.kernel_params[2])
else:
- self.gamma_ = 0.
- self.coef0_ = 0.
+ self.gamma_ = 0.0
+ self.coef0_ = 0.0
self.degree_ = 0
def __str__(self):
- rows = ["TreeEnsemble",
- f"root_index={self.root_index}", str(self.atts)]
+ rows = ["TreeEnsemble", f"root_index={self.root_index}", str(self.atts)]
return "\n".join(rows)
def kernel_dot(self, pA, pB, kernel):
@@ -81,7 +79,7 @@ def kernel_dot(self, pA, pB, kernel):
if k == "poly":
s = np.dot(pA, pB)
s = s * self.gamma_ + self.coef0_
- return s ** self.degree_
+ return s**self.degree_
if k == "sigmoid":
s = np.dot(pA, pB)
s = s * self.gamma_ + self.coef0_
@@ -95,7 +93,6 @@ def kernel_dot(self, pA, pB, kernel):
raise ValueError(f"Unexpected kernel={kernel!r}.")
def run(self, X):
-
if self.atts.n_supports > 0:
# length of each support vector
mode_ = "SVM_SVC"
@@ -107,7 +104,7 @@ def run(self, X):
z = np.empty((X.shape[0], 1), dtype=X.dtype)
for n in range(X.shape[0]):
- s = 0.
+ s = 0.0
if mode_ == "SVM_SVC":
for j in range(self.atts.n_supports):
@@ -127,31 +124,38 @@ def run(self, X):
if onnx_opset_version() >= 18:
from onnx.reference.op_run import OpRun
+
try:
from .reference_implementation_helper import (
- write_scores, set_score_svm, multiclass_probability,
- sigmoid_probability)
+ write_scores,
+ set_score_svm,
+ multiclass_probability,
+ sigmoid_probability,
+ )
except ImportError:
from reference_implementation_helper import (
- write_scores, set_score_svm, multiclass_probability,
- sigmoid_probability)
+ write_scores,
+ set_score_svm,
+ multiclass_probability,
+ sigmoid_probability,
+ )
class SVMRegressor(OpRun):
-
op_domain = "ai.onnx.ml"
def _run(
- self,
- X,
- coefficients=None,
- kernel_params=None,
- kernel_type=None,
- n_targets=None,
- n_supports=None,
- one_class=None,
- post_transform=None,
- rho=None,
- support_vectors=None):
+ self,
+ X,
+ coefficients=None,
+ kernel_params=None,
+ kernel_type=None,
+ n_targets=None,
+ n_supports=None,
+ one_class=None,
+ post_transform=None,
+ rho=None,
+ support_vectors=None,
+ ):
svm = SVMCommon(
coefficients=coefficients,
kernel_params=kernel_params,
@@ -161,17 +165,18 @@ def _run(
one_class=one_class,
post_transform=post_transform,
rho=rho,
- support_vectors=support_vectors)
+ support_vectors=support_vectors,
+ )
self._svm = svm
res = svm.run(X)
if post_transform in (None, "NONE"):
return (res,)
raise NotImplementedError(
- f"post_transform={post_transform!r} not implemented.")
+ f"post_transform={post_transform!r} not implemented."
+ )
class SVMClassifier(OpRun):
-
op_domain = "ai.onnx.ml"
def _run_linear(self, X, coefs, class_count_, kernel_type_):
@@ -182,8 +187,16 @@ def _run_linear(self, X, coefs, class_count_, kernel_type_):
scores.append(score)
return np.array(scores, dtype=X.dtype)
- def _run_svm(self, X, sv, vector_count_, kernel_type_,
- class_count_, starting_vector_, coefs):
+ def _run_svm(
+ self,
+ X,
+ sv,
+ vector_count_,
+ kernel_type_,
+ class_count_,
+ starting_vector_,
+ coefs,
+ ):
evals = 0
kernels = []
@@ -201,10 +214,14 @@ def _run_svm(self, X, sv, vector_count_, kernel_type_,
si_j = starting_vector_[j]
class_j_sc = self._svm.atts.vectors_per_class[j]
- s1 = np.dot(coefs[j - 1, si_i: si_i+class_i_sc],
- kernels[si_i: si_i+class_i_sc])
- s2 = np.dot(coefs[i, si_j: si_j+class_j_sc],
- kernels[si_j: si_j+class_j_sc])
+ s1 = np.dot(
+ coefs[j - 1, si_i : si_i + class_i_sc],
+ kernels[si_i : si_i + class_i_sc],
+ )
+ s2 = np.dot(
+ coefs[i, si_j : si_j + class_j_sc],
+ kernels[si_j : si_j + class_j_sc],
+ )
s = self._svm.atts.rho[evals] + s1 + s2
scores.append(s)
@@ -216,15 +233,16 @@ def _run_svm(self, X, sv, vector_count_, kernel_type_,
return votes, np.array(scores, dtype=X.dtype)
def _probabilities(self, scores, class_count_):
- probsp2 = np.zeros((class_count_, class_count_),
- dtype=scores.dtype)
+ probsp2 = np.zeros((class_count_, class_count_), dtype=scores.dtype)
index = 0
for i in range(class_count_):
for j in range(i + 1, class_count_):
- val1 = sigmoid_probability(scores[index],
- self._svm.atts.prob_a[index],
- self._svm.atts.prob_b[index])
+ val1 = sigmoid_probability(
+ scores[index],
+ self._svm.atts.prob_a[index],
+ self._svm.atts.prob_b[index],
+ )
val2 = max(val1, 1.0e-7)
val2 = min(val2, (1 - 1.0e-7))
probsp2[i, j] = val2
@@ -232,10 +250,9 @@ def _probabilities(self, scores, class_count_):
index += 1
return multiclass_probability(class_count_, probsp2)
- def _compute_final_scores(self, votes, scores,
- weights_are_all_positive_,
- has_proba, classlabels_ints):
-
+ def _compute_final_scores(
+ self, votes, scores, weights_are_all_positive_, has_proba, classlabels_ints
+ ):
max_weight = 0
if len(votes):
max_class = np.argmax(votes)
@@ -247,33 +264,44 @@ def _compute_final_scores(self, votes, scores,
write_additional_scores = -1
if self._svm.atts.rho.size == 1:
label, write_additional_scores = set_score_svm(
- max_weight, max_class, 0,
- self._svm.atts.post_transform, has_proba,
- weights_are_all_positive_, classlabels_ints, 1, 0)
+ max_weight,
+ max_class,
+ 0,
+ self._svm.atts.post_transform,
+ has_proba,
+ weights_are_all_positive_,
+ classlabels_ints,
+ 1,
+ 0,
+ )
elif classlabels_ints is not None and len(classlabels_ints) > 0:
label = classlabels_ints[max_class]
else:
label = max_class
- new_scores = write_scores(scores.size, scores,
- self._svm.atts.post_transform,
- write_additional_scores)
+ new_scores = write_scores(
+ scores.size,
+ scores,
+ self._svm.atts.post_transform,
+ write_additional_scores,
+ )
return label, new_scores
def _run(
- self,
- X,
- classlabels_ints=None,
- classlabels_strings=None,
- coefficients=None,
- kernel_params=None,
- kernel_type=None,
- post_transform=None,
- prob_a=None,
- prob_b=None,
- rho=None,
- support_vectors=None,
- vectors_per_class=None):
+ self,
+ X,
+ classlabels_ints=None,
+ classlabels_strings=None,
+ coefficients=None,
+ kernel_params=None,
+ kernel_type=None,
+ post_transform=None,
+ prob_a=None,
+ prob_b=None,
+ rho=None,
+ support_vectors=None,
+ vectors_per_class=None,
+ ):
svm = SVMCommon(
coefficients=coefficients,
kernel_params=kernel_params,
@@ -283,7 +311,8 @@ def _run(
prob_b=prob_b,
rho=rho,
support_vectors=support_vectors,
- vectors_per_class=vectors_per_class)
+ vectors_per_class=vectors_per_class,
+ )
self._svm = svm
vector_count_ = 0
@@ -293,8 +322,7 @@ def _run(
starting_vector_.append(vector_count_)
vector_count_ += vc
- class_count_ = max(len(classlabels_ints or
- classlabels_strings or []), 1)
+ class_count_ = max(len(classlabels_ints or classlabels_strings or []), 1)
if vector_count_ > 0:
# length of each support vector
mode_ = "SVM_SVC"
@@ -313,25 +341,32 @@ def _run(
if vector_count_ == 0 and mode_ == "SVM_LINEAR":
res = np.empty((X.shape[0], class_count_), dtype=X.dtype)
for n in range(X.shape[0]):
- scores = self._run_linear(
- X[n], coefs, class_count_, kernel_type_)
+ scores = self._run_linear(X[n], coefs, class_count_, kernel_type_)
res[n, :] = scores
else:
res = np.empty(
- (X.shape[0], class_count_ * (class_count_ - 1) // 2),
- dtype=X.dtype)
+ (X.shape[0], class_count_ * (class_count_ - 1) // 2), dtype=X.dtype
+ )
votes = np.empty((X.shape[0], class_count_), dtype=X.dtype)
for n in range(X.shape[0]):
vote, scores = self._run_svm(
- X[n], sv, vector_count_, kernel_type_, class_count_,
- starting_vector_, coefs)
+ X[n],
+ sv,
+ vector_count_,
+ kernel_type_,
+ class_count_,
+ starting_vector_,
+ coefs,
+ )
res[n, :] = scores
votes[n, :] = vote
# proba
- if (self._svm.atts.prob_a is not None and
- len(self._svm.atts.prob_a) > 0 and
- mode_ == "SVM_SVC"):
+ if (
+ self._svm.atts.prob_a is not None
+ and len(self._svm.atts.prob_a) > 0
+ and mode_ == "SVM_SVC"
+ ):
scores = np.empty((res.shape[0], class_count_), dtype=X.dtype)
for n in range(scores.shape[0]):
s = self._probabilities(res[n], class_count_)
@@ -346,18 +381,23 @@ def _run(
labels = []
for n in range(scores.shape[0]):
label, new_scores = self._compute_final_scores(
- votes[n], scores[n], weights_are_all_positive_,
- has_proba, classlabels_ints)
+ votes[n],
+ scores[n],
+ weights_are_all_positive_,
+ has_proba,
+ classlabels_ints,
+ )
if final_scores is None:
- final_scores = np.empty((X.shape[0], new_scores.size),
- dtype=X.dtype)
+ final_scores = np.empty(
+ (X.shape[0], new_scores.size), dtype=X.dtype
+ )
final_scores[n, :] = new_scores
labels.append(label)
# labels
- if (classlabels_strings is not None and
- len(classlabels_strings) > 0):
- return (np.array([classlabels_strings[i]
- for i in labels]),
- final_scores)
+ if classlabels_strings is not None and len(classlabels_strings) > 0:
+ return (
+ np.array([classlabels_strings[i] for i in labels]),
+ final_scores,
+ )
return (np.array(labels, dtype=np.int64), final_scores)
diff --git a/tests/test_utils/reference_implementation_text.py b/tests/test_utils/reference_implementation_text.py
index f459dc4a7..0ff6497e0 100644
--- a/tests/test_utils/reference_implementation_text.py
+++ b/tests/test_utils/reference_implementation_text.py
@@ -13,46 +13,49 @@
from onnx.reference.ops.op_tfidf_vectorizer import (
WeightingCriteria,
NgramPart,
- populate_grams)
+ populate_grams,
+ )
class Tokenizer(OpRun):
-
op_domain = "com.microsoft"
def _run(
- self,
- text,
- mark=None,
- mincharnum=None,
- pad_value=None,
- separators=None,
- tokenexp=None,
- tokenexpsplit=None,
- stopwords=None):
- char_tokenization_ = (
- tokenexp == "." or list(separators or []) == [""])
+ self,
+ text,
+ mark=None,
+ mincharnum=None,
+ pad_value=None,
+ separators=None,
+ tokenexp=None,
+ tokenexpsplit=None,
+ stopwords=None,
+ ):
+ char_tokenization_ = tokenexp == "." or list(separators or []) == [""]
stops_ = set(stopwords or [])
try:
str_separators_ = set(_ for _ in (separators or ""))
except AttributeError as e: # pragma: no cover
raise TypeError(
- f"Unable to interpret separators {separators!r}.") from e
+ f"Unable to interpret separators {separators!r}."
+ ) from e
if tokenexp not in (None, ""):
tokenexp_ = re.compile(tokenexp)
if char_tokenization_:
- return self._run_char_tokenization(
- text, stops_, mark, pad_value)
+ return self._run_char_tokenization(text, stops_, mark, pad_value)
if str_separators_ is not None and len(str_separators_) > 0:
str_separators = [re.compile(s) for s in str_separators_]
return self._run_sep_tokenization(
- text, stops_, str_separators, mark, pad_value)
+ text, stops_, str_separators, mark, pad_value
+ )
if tokenexp not in (None, ""):
return self._run_regex_tokenization(
- text, stops_, tokenexp_, tokenexpsplit, mark, pad_value)
+ text, stops_, tokenexp_, tokenexpsplit, mark, pad_value
+ )
raise RuntimeError( # pragma: no cover
"Unable to guess which tokenization to use, sep={}, "
- "tokenexp='{}'.".format(separators, tokenexp))
+ "tokenexp='{}'.".format(separators, tokenexp)
+ )
@staticmethod
def _run_tokenization(text, stops, split, mark, pad_value):
@@ -96,8 +99,8 @@ def _run_tokenization(text, stops, split, mark, pad_value):
res = np.array(res)
else:
raise RuntimeError( # pragma: no cover
- f"Only vector or matrices are supported "
- f"not shape {text.shape}.")
+ f"Only vector or matrices are supported " f"not shape {text.shape}."
+ )
return (res,)
@staticmethod
@@ -110,8 +113,7 @@ def split(t):
for c in t:
yield c
- return Tokenizer._run_tokenization(
- text, stops, split, mark, pad_value)
+ return Tokenizer._run_tokenization(text, stops, split, mark, pad_value)
@staticmethod
def _run_sep_tokenization(text, stops, separators, mark, pad_value):
@@ -126,8 +128,10 @@ def split(t):
while pos < len(t):
for sep in separators:
if isinstance(sep, str):
- if (pos + len(sep) <= len(t) and
- sep == t[pos: pos + len(sep)]):
+ if (
+ pos + len(sep) <= len(t)
+ and sep == t[pos : pos + len(sep)]
+ ):
word = t[begin:pos]
yield word
begin = pos + len(sep)
@@ -145,12 +149,10 @@ def split(t):
word = t[begin:pos]
yield word
- return Tokenizer._run_tokenization(
- text, stops, split, mark, pad_value)
+ return Tokenizer._run_tokenization(text, stops, split, mark, pad_value)
@staticmethod
- def _run_regex_tokenization(text, stops, exp, tokenexpsplit,
- mark, pad_value):
+ def _run_regex_tokenization(text, stops, exp, tokenexpsplit, mark, pad_value):
"""
Tokenizes using a regular expression.
"""
@@ -164,8 +166,7 @@ def split(t):
def split(t):
return filter(lambda x: x, exp.findall(t))
- return Tokenizer._run_tokenization(
- text, stops, split, mark, pad_value)
+ return Tokenizer._run_tokenization(text, stops, split, mark, pad_value)
class TfIdfVectorizer(OpRun):
def __init__(self, onnx_node, run_params): # type: ignore
@@ -178,7 +179,8 @@ def __init__(self, onnx_node, run_params): # type: ignore
if value is None:
raise ValueError(
f"Unexpected mode={mode!r}, "
- f"not found in {dir(WeightingCriteria)}.")
+ f"not found in {dir(WeightingCriteria)}."
+ )
self.weighting_criteria_ = value # type: ignore
self.min_gram_length_ = self.min_gram_length # type: ignore
@@ -215,39 +217,41 @@ def __init__(self, onnx_node, run_params): # type: ignore
# Load into dictionary only required gram sizes
ngram_size = 1
for i in range(len(self.ngram_counts_)):
-
start_idx = self.ngram_counts_[i]
end_idx = (
self.ngram_counts_[i + 1]
if (i + 1) < len(self.ngram_counts_)
- else total_items)
+ else total_items
+ )
items = end_idx - start_idx
if items > 0:
ngrams = items // ngram_size
- if (ngram_size >= self.min_gram_length_ and
- ngram_size <= self.max_gram_length_):
+ if (
+ ngram_size >= self.min_gram_length_
+ and ngram_size <= self.max_gram_length_
+ ):
ngram_id = populate_grams(
self.pool_int64s_,
start_idx,
ngrams,
ngram_size,
ngram_id,
- self.int64_map_)
+ self.int64_map_,
+ )
else:
ngram_id += ngrams
ngram_size += 1
- def increment_count(self, ngram_id: int, row_num: int,
- frequencies: List[int]) -> None:
+ def increment_count(
+ self, ngram_id: int, row_num: int, frequencies: List[int]
+ ) -> None:
ngram_id -= 1
# assert(ngram_id < ngram_indexes_.size());
- output_idx = (
- row_num * self.output_size_ + self.ngram_indexes_[ngram_id])
+ output_idx = row_num * self.output_size_ + self.ngram_indexes_[ngram_id]
# assert(static_cast(output_idx) < frequencies.size());
frequencies[output_idx] += 1
def output_result(self, B: int, frequencies: List[int]) -> np.ndarray:
-
def _getattr(cls, name):
try:
return getattr(cls, name)
@@ -269,14 +273,12 @@ def _getattr(cls, name):
Y = np.empty((total_dims,), dtype=np.float32)
w = self.weights_
- if self.weighting_criteria_ == _getattr(
- WeightingCriteria, "TF"):
+ if self.weighting_criteria_ == _getattr(WeightingCriteria, "TF"):
i = 0
for f in frequencies:
Y[i] = f
i += 1
- elif self.weighting_criteria_ == _getattr(
- WeightingCriteria, "IDF"):
+ elif self.weighting_criteria_ == _getattr(WeightingCriteria, "IDF"):
if len(w) > 0:
p = 0
for _batch in range(B):
@@ -288,8 +290,7 @@ def _getattr(cls, name):
for f in frequencies:
Y[p] = 1 if f > 0 else 0
p += 1
- elif self.weighting_criteria_ == _getattr(
- WeightingCriteria, "TFIDF"):
+ elif self.weighting_criteria_ == _getattr(WeightingCriteria, "TFIDF"):
if len(w) > 0:
p = 0
for _batch in range(B):
@@ -306,21 +307,21 @@ def _getattr(cls, name):
return Y.reshape(output_dims)
def compute_impl( # type: ignore
- self,
- X: np.ndarray,
- row_num: int,
- row_size: int,
- frequencies: List[int],
- max_gram_length=None,
- max_skip_count=None,
- min_gram_length=None,
- mode=None,
- ngram_counts=None,
- ngram_indexes=None,
- pool_int64s=None,
- pool_strings=None,
- weights=None) -> None:
-
+ self,
+ X: np.ndarray,
+ row_num: int,
+ row_size: int,
+ frequencies: List[int],
+ max_gram_length=None,
+ max_skip_count=None,
+ min_gram_length=None,
+ mode=None,
+ ngram_counts=None,
+ ngram_indexes=None,
+ pool_int64s=None,
+ pool_strings=None,
+ weights=None,
+ ) -> None:
if len(X.shape) > 1:
X_flat = X[row_num]
else:
@@ -338,17 +339,18 @@ def compute_impl( # type: ignore
while ngram_start < ngram_row_end:
# We went far enough so no n-grams of any size can be
# gathered
- at_least_this = ngram_start + skip_distance * (
- start_ngram_size - 1)
+ at_least_this = ngram_start + skip_distance * (start_ngram_size - 1)
if at_least_this >= ngram_row_end:
break
ngram_item = ngram_start
int_map = self.int64_map_
ngram_size = 1
- while (int_map.has_leaves() and
- ngram_size <= max_gram_length and
- ngram_item < ngram_row_end):
+ while (
+ int_map.has_leaves()
+ and ngram_size <= max_gram_length
+ and ngram_item < ngram_row_end
+ ):
val = X_flat[ngram_item]
hit = int_map.find(val)
if hit is None:
@@ -370,17 +372,18 @@ def compute_impl( # type: ignore
break
def _run( # type: ignore
- self,
- X,
- max_gram_length=None,
- max_skip_count=None,
- min_gram_length=None,
- mode=None,
- ngram_counts=None,
- ngram_indexes=None,
- pool_int64s=None,
- pool_strings=None,
- weights=None):
+ self,
+ X,
+ max_gram_length=None,
+ max_skip_count=None,
+ min_gram_length=None,
+ mode=None,
+ ngram_counts=None,
+ ngram_indexes=None,
+ pool_int64s=None,
+ pool_strings=None,
+ weights=None,
+ ):
if self.mapping_ is not None:
xi = np.empty(X.shape, dtype=np.int64)
for i in range(0, X.shape[0]):
@@ -406,8 +409,7 @@ def _run( # type: ignore
num_rows = 1
C = 1
if total_items != 1:
- raise ValueError(
- f"Unexpected total of items {total_items}.")
+ raise ValueError(f"Unexpected total of items {total_items}.")
elif len(input_dims) == 1:
num_rows = 1
C = input_dims[0]
@@ -418,22 +420,24 @@ def _run( # type: ignore
if B < 1:
raise ValueError(
f"Input shape must have either [C] or [B,C] "
- f"dimensions with B > 0, B={B}, C={C}.")
+ f"dimensions with B > 0, B={B}, C={C}."
+ )
else:
raise ValueError(
f"Input shape must have either [C] or [B,C] "
- f"dimensions with B > 0, B={B}, C={C}.")
+ f"dimensions with B > 0, B={B}, C={C}."
+ )
if num_rows * C != total_items:
raise ValueError(
f"Unexpected total of items, num_rows * C = "
- f"{num_rows * C} != total_items = {total_items}.")
+ f"{num_rows * C} != total_items = {total_items}."
+ )
# Frequency holder allocate [B..output_size_] and init all to zero
- frequencies = np.zeros(
- (num_rows * self.output_size_,), dtype=np.int64)
+ frequencies = np.zeros((num_rows * self.output_size_,), dtype=np.int64)
if total_items == 0 or self.int64_map_.empty():
- return (self.output_result(B, frequencies), )
+ return (self.output_result(B, frequencies),)
def fn(row_num):
self.compute_impl(
@@ -449,7 +453,8 @@ def fn(row_num):
ngram_indexes=ngram_indexes,
pool_int64s=pool_int64s,
pool_strings=pool_strings,
- weights=weights)
+ weights=weights,
+ )
# can be parallelized.
for i in range(num_rows):
diff --git a/tests/test_utils/reference_implementation_tree.py b/tests/test_utils/reference_implementation_tree.py
index 1793c1c3a..6cf7abbf0 100644
--- a/tests/test_utils/reference_implementation_tree.py
+++ b/tests/test_utils/reference_implementation_tree.py
@@ -28,8 +28,7 @@ def _attribute_value(attr):
return list(attr.ints)
if attr.strings:
return list(map(_to_str, attr.strings))
- raise NotImplementedError(
- "Unable to return a value for attribute %r." % attr)
+ raise NotImplementedError("Unable to return a value for attribute %r." % attr)
class TreeEnsembleAttributes:
@@ -41,10 +40,11 @@ def add(self, name, value):
self._names.append(name)
if isinstance(value, list):
if name in {
- "base_values",
- "class_weights",
- "nodes_values",
- "nodes_hitrates"}:
+ "base_values",
+ "class_weights",
+ "nodes_values",
+ "nodes_hitrates",
+ }:
value = np.array(value, dtype=np.float32)
elif name.endswith("as_tensor"):
value = np.array(value)
@@ -71,18 +71,18 @@ def __init__(self, **kwargs):
self.atts.add(name, value)
self.tree_ids = list(sorted(set(self.atts.nodes_treeids)))
- self.root_index = {tid: len(self.atts.nodes_treeids)
- for tid in self.tree_ids}
+ self.root_index = {tid: len(self.atts.nodes_treeids) for tid in self.tree_ids}
for index, tree_id in enumerate(self.atts.nodes_treeids):
self.root_index[tree_id] = min(self.root_index[tree_id], index)
self.node_index = {
(tid, nid): i
for i, (tid, nid) in enumerate(
- zip(self.atts.nodes_treeids, self.atts.nodes_nodeids))}
+ zip(self.atts.nodes_treeids, self.atts.nodes_nodeids)
+ )
+ }
def __str__(self):
- rows = ["TreeEnsemble",
- f"root_index={self.root_index}", str(self.atts)]
+ rows = ["TreeEnsemble", f"root_index={self.root_index}", str(self.atts)]
return "\n".join(rows)
def leaf_index_tree(self, X, tree_id):
@@ -111,9 +111,13 @@ def leaf_index_tree(self, X, tree_id):
r = x != th
else:
raise ValueError(
- f"Unexpected rule {rule!r} for node index {index}.")
- nid = (self.atts.nodes_truenodeids[index]
- if r else self.atts.nodes_falsenodeids[index])
+ f"Unexpected rule {rule!r} for node index {index}."
+ )
+ nid = (
+ self.atts.nodes_truenodeids[index]
+ if r
+ else self.atts.nodes_falsenodeids[index]
+ )
index = self.node_index[tree_id, nid]
return index
@@ -134,41 +138,40 @@ def leave_index_tree(self, X):
if onnx_opset_version() >= 18:
from onnx.reference.op_run import OpRun
+
try:
- from .reference_implementation_helper import (
- ComputeProbit, write_scores)
+ from .reference_implementation_helper import ComputeProbit, write_scores
except ImportError:
- from reference_implementation_helper import (
- ComputeProbit, write_scores)
+ from reference_implementation_helper import ComputeProbit, write_scores
class TreeEnsembleRegressor(OpRun):
-
op_domain = "ai.onnx.ml"
def _run(
- self,
- X,
- aggregate_function=None,
- base_values=None,
- base_values_as_tensor=None,
- n_targets=None,
- nodes_falsenodeids=None,
- nodes_featureids=None,
- nodes_hitrates=None,
- nodes_hitrates_as_tensor=None,
- nodes_missing_value_tracks_true=None,
- nodes_modes=None,
- nodes_nodeids=None,
- nodes_treeids=None,
- nodes_truenodeids=None,
- nodes_values=None,
- nodes_values_as_tensor=None,
- post_transform=None,
- target_ids=None,
- target_nodeids=None,
- target_treeids=None,
- target_weights=None,
- target_weights_as_tensor=None):
+ self,
+ X,
+ aggregate_function=None,
+ base_values=None,
+ base_values_as_tensor=None,
+ n_targets=None,
+ nodes_falsenodeids=None,
+ nodes_featureids=None,
+ nodes_hitrates=None,
+ nodes_hitrates_as_tensor=None,
+ nodes_missing_value_tracks_true=None,
+ nodes_modes=None,
+ nodes_nodeids=None,
+ nodes_treeids=None,
+ nodes_truenodeids=None,
+ nodes_values=None,
+ nodes_values_as_tensor=None,
+ post_transform=None,
+ target_ids=None,
+ target_nodeids=None,
+ target_treeids=None,
+ target_weights=None,
+ target_weights_as_tensor=None,
+ ):
nmv = nodes_missing_value_tracks_true
tr = TreeEnsemble(
base_values=base_values,
@@ -185,69 +188,70 @@ def _run(
nodes_values=nodes_values,
nodes_values_as_tensor=nodes_values_as_tensor,
target_weights=target_weights,
- target_weights_as_tensor=target_weights_as_tensor)
+ target_weights_as_tensor=target_weights_as_tensor,
+ )
self._tree = tr
leaves_index = tr.leave_index_tree(X)
- res = np.empty(
- (leaves_index.shape[0], n_targets), dtype=X.dtype)
+ res = np.empty((leaves_index.shape[0], n_targets), dtype=X.dtype)
if base_values is None:
res[:, :] = 0
else:
res[:, :] = np.array(base_values).reshape((1, -1))
target_index = {}
- for i, (tid, nid) in enumerate(
- zip(target_treeids, target_nodeids)):
+ for i, (tid, nid) in enumerate(zip(target_treeids, target_nodeids)):
if (tid, nid) not in target_index:
target_index[tid, nid] = []
target_index[tid, nid].append(i)
for i in range(res.shape[0]):
indices = leaves_index[i]
- t_index = [target_index[nodes_treeids[i], nodes_nodeids[i]]
- for i in indices]
+ t_index = [
+ target_index[nodes_treeids[i], nodes_nodeids[i]] for i in indices
+ ]
if aggregate_function == "SUM":
for its in t_index:
for it in its:
- res[i, target_ids[it]] += (
- tr.atts.target_weights[it])
+ res[i, target_ids[it]] += tr.atts.target_weights[it]
else:
raise NotImplementedError(
f"aggregate_transform={aggregate_function!r} "
- f"not supported yet.")
+ f"not supported yet."
+ )
if post_transform in (None, "NONE"):
return (res,)
raise NotImplementedError(
- f"post_transform={post_transform!r} not implemented.")
+ f"post_transform={post_transform!r} not implemented."
+ )
class TreeEnsembleClassifier(OpRun):
-
op_domain = "ai.onnx.ml"
def _run(
- self,
- X,
- base_values=None,
- base_values_as_tensor=None,
- class_ids=None,
- class_nodeids=None,
- class_treeids=None,
- class_weights=None,
- class_weights_as_tensor=None,
- classlabels_int64s=None,
- classlabels_strings=None,
- nodes_falsenodeids=None,
- nodes_featureids=None,
- nodes_hitrates=None,
- nodes_hitrates_as_tensor=None,
- nodes_missing_value_tracks_true=None,
- nodes_modes=None,
- nodes_nodeids=None,
- nodes_treeids=None,
- nodes_truenodeids=None,
- nodes_values=None,
- nodes_values_as_tensor=None,
- post_transform=None):
+ self,
+ X,
+ base_values=None,
+ base_values_as_tensor=None,
+ class_ids=None,
+ class_nodeids=None,
+ class_treeids=None,
+ class_weights=None,
+ class_weights_as_tensor=None,
+ classlabels_int64s=None,
+ classlabels_strings=None,
+ nodes_falsenodeids=None,
+ nodes_featureids=None,
+ nodes_hitrates=None,
+ nodes_hitrates_as_tensor=None,
+ nodes_missing_value_tracks_true=None,
+ nodes_modes=None,
+ nodes_nodeids=None,
+ nodes_treeids=None,
+ nodes_truenodeids=None,
+ nodes_values=None,
+ nodes_values_as_tensor=None,
+ post_transform=None,
+ ):
nmv = nodes_missing_value_tracks_true
tr = TreeEnsemble(
nodes_falsenodeids=nodes_falsenodeids,
@@ -262,15 +266,16 @@ def _run(
nodes_values=nodes_values,
nodes_values_as_tensor=nodes_values_as_tensor,
class_weights=class_weights,
- class_weights_as_tensor=class_weights_as_tensor)
+ class_weights_as_tensor=class_weights_as_tensor,
+ )
self._tree = tr
if X.dtype not in (np.float32, np.float64):
X = X.astype(np.float32)
leaves_index = tr.leave_index_tree(X)
n_classes = max(
- len(classlabels_int64s or []), len(classlabels_strings or []))
- res = np.empty(
- (leaves_index.shape[0], n_classes), dtype=np.float32)
+ len(classlabels_int64s or []), len(classlabels_strings or [])
+ )
+ res = np.empty((leaves_index.shape[0], n_classes), dtype=np.float32)
if base_values is None:
res[:, :] = 0
else:
@@ -283,8 +288,9 @@ def _run(
class_index[tid, nid].append(i)
for i in range(res.shape[0]):
indices = leaves_index[i]
- t_index = [class_index[nodes_treeids[i], nodes_nodeids[i]]
- for i in indices]
+ t_index = [
+ class_index[nodes_treeids[i], nodes_nodeids[i]] for i in indices
+ ]
for its in t_index:
for it in its:
res[i, class_ids[it]] += tr.atts.class_weights[it]
@@ -317,8 +323,8 @@ def _run(
new_scores = np.empty((res.shape[0], nc), dtype=res.dtype)
for i in range(res.shape[0]):
new_scores[i, :] = write_scores(
- res.shape[1], res[i], post_transform,
- add_second_class)
+ res.shape[1], res[i], post_transform, add_second_class
+ )
# labels
labels = np.argmax(new_scores, axis=1).astype(np.int64)
@@ -326,21 +332,21 @@ def _run(
if len(classlabels_int64s) == 1:
if classlabels_int64s[0] == 1:
d = {1: 1}
- labels = np.array(
- [d.get(i, 0) for i in labels], dtype=np.int64)
+ labels = np.array([d.get(i, 0) for i in labels], dtype=np.int64)
else:
raise NotImplementedError(
f"classlabels_int64s={classlabels_int64s}, "
- f"not supported.")
+ f"not supported."
+ )
else:
labels = np.array(
- [classlabels_int64s[i] for i in labels],
- dtype=np.int64)
+ [classlabels_int64s[i] for i in labels], dtype=np.int64
+ )
elif classlabels_strings is not None:
if len(classlabels_strings) == 1:
raise NotImplementedError(
- f"classlabels_strings={classlabels_strings}, "
- f"not supported.")
+ f"classlabels_strings={classlabels_strings}, " f"not supported."
+ )
labels = np.array([classlabels_strings[i] for i in labels])
return labels, new_scores
@@ -352,27 +358,26 @@ def _run(
from sklearn.ensemble import (
RandomForestRegressor,
RandomForestClassifier,
- BaggingClassifier)
+ BaggingClassifier,
+ )
from skl2onnx import to_onnx
from reference_implementation_afe import ArrayFeatureExtractor
class ArgMax(_ArgMax):
- def _run(self, data, axis=None, keepdims=None,
- select_last_index=None):
+ def _run(self, data, axis=None, keepdims=None, select_last_index=None):
if select_last_index == 0: # type: ignore
- return _ArgMax._run(
- self, data, axis=axis, keepdims=keepdims)
+ return _ArgMax._run(self, data, axis=axis, keepdims=keepdims)
raise NotImplementedError("Unused in sklearn-onnx.")
# classification 1
X, y = make_classification(
- 100, n_features=6, n_classes=3, n_informative=3, n_redundant=0)
+ 100, n_features=6, n_classes=3, n_informative=3, n_redundant=0
+ )
model = BaggingClassifier().fit(X, y)
- onx = to_onnx(model, X.astype(np.float32),
- options={"zipmap": False})
+ onx = to_onnx(model, X.astype(np.float32), options={"zipmap": False})
tr = ReferenceEvaluator(
- onx, new_ops=[TreeEnsembleClassifier,
- ArrayFeatureExtractor, ArgMax])
+ onx, new_ops=[TreeEnsembleClassifier, ArrayFeatureExtractor, ArgMax]
+ )
print("-----------------------")
print(tr.run(None, {"X": X[:10].astype(np.float32)}))
print("--")
@@ -382,8 +387,7 @@ def _run(self, data, axis=None, keepdims=None,
# classification 2
model = RandomForestClassifier(max_depth=3, n_estimators=2).fit(X, y)
- onx = to_onnx(model, X.astype(np.float32),
- options={"zipmap": False})
+ onx = to_onnx(model, X.astype(np.float32), options={"zipmap": False})
tr = ReferenceEvaluator(onx, new_ops=[TreeEnsembleClassifier])
print(tr.run(None, {"X": X[:5].astype(np.float32)}))
print(model.predict(X[:5].astype(np.float32)))
diff --git a/tests/test_utils/reference_implementation_zipmap.py b/tests/test_utils/reference_implementation_zipmap.py
index 33aa66981..002328a4d 100644
--- a/tests/test_utils/reference_implementation_zipmap.py
+++ b/tests/test_utils/reference_implementation_zipmap.py
@@ -46,17 +46,13 @@ def __init__(self, rev_keys, values, mat=None):
self._mat = mat
def __eq__(self, o):
- raise NotImplementedError(
- "__eq__ not available for ZipMapDictionary."
- )
+ raise NotImplementedError("__eq__ not available for ZipMapDictionary.")
def __getstate__(self):
"""
For pickle.
"""
- return dict(
- _rev_keys=self._rev_keys, _values=self._values, _mat=self._mat
- )
+ return dict(_rev_keys=self._rev_keys, _values=self._values, _mat=self._mat)
def __setstate__(self, state):
"""
@@ -84,9 +80,7 @@ def __len__(self):
"""
Returns the number of items.
"""
- return (
- len(self._values) if self._mat is None else self._mat.shape[1]
- )
+ return len(self._values) if self._mat is None else self._mat.shape[1]
def __iter__(self):
for k in self._rev_keys:
@@ -153,9 +147,7 @@ def __init__(self, rev_keys, mat):
self._mat = mat
def __eq__(self, o):
- raise NotImplementedError(
- "__eq__ not available for ArrayZipMapDictionary."
- )
+ raise NotImplementedError("__eq__ not available for ArrayZipMapDictionary.")
@property
def dtype(self):
@@ -172,9 +164,7 @@ def __getitem__(self, i):
return ZipMapDictionary(self._rev_keys, i, self._mat)
def __setitem__(self, pos, value):
- raise LookupError(
- f"Changing an element is not supported (pos=[{pos}])."
- )
+ raise LookupError(f"Changing an element is not supported (pos=[{pos}]).")
@property
def values(self):
@@ -217,15 +207,13 @@ def __str__(self):
return f"ZipMaps[{', '.join(map(str, self))}]"
class ZipMap(OpRun):
-
op_domain = "ai.onnx.ml"
def _run(self, x, classlabels_int64s=None, classlabels_strings=None):
if classlabels_int64s:
rev_keys_ = ZipMapDictionary.build_rev_keys(classlabels_int64s)
elif classlabels_strings:
- rev_keys_ = ZipMapDictionary.build_rev_keys(
- classlabels_strings)
+ rev_keys_ = ZipMapDictionary.build_rev_keys(classlabels_strings)
else:
rev_keys_ = {}
res = ArrayZipMapDictionary(rev_keys_, x)
diff --git a/tests/test_utils/tests_helper.py b/tests/test_utils/tests_helper.py
index 2c75b5552..733064f0c 100644
--- a/tests/test_utils/tests_helper.py
+++ b/tests/test_utils/tests_helper.py
@@ -42,9 +42,7 @@ def _has_decision_function(model):
return hasattr(model, "decision_function")
-disable_dump = (
- os.environ.get("AZURE_HTTP_USER_AGENT", "undefined") != "undefined"
-)
+disable_dump = os.environ.get("AZURE_HTTP_USER_AGENT", "undefined") != "undefined"
def _has_transform_model(model):
@@ -86,9 +84,7 @@ def fit_classification_model(
X = numpy.abs(X)
if is_bool:
X = X.astype(bool)
- X_train, X_test, y_train, _ = train_test_split(
- X, y, test_size=0.5, random_state=42
- )
+ X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, random_state=42)
model.fit(X_train, y_train)
return model, X_test
@@ -137,9 +133,7 @@ def fit_multilabel_classification_model(
random_state=42,
)
X = X.astype(numpy.int64) if is_int else X.astype(numpy.float32)
- X_train, X_test, y_train, _ = train_test_split(
- X, y, test_size=0.5, random_state=42
- )
+ X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, random_state=42)
model.fit(X_train, y_train)
return model, X_test
@@ -153,9 +147,7 @@ def fit_multi_output_classification_model(
n_outputs=2,
):
numpy.random.seed(0)
- X_train = numpy.random.randint(
- 0, n_informative, size=(n_samples, n_features)
- )
+ X_train = numpy.random.randint(0, n_informative, size=(n_samples, n_features))
y_train = numpy.random.randint(0, n_classes, size=(n_samples, n_outputs))
model = RandomForestClassifier()
model.fit(X_train, y_train)
@@ -184,9 +176,7 @@ def fit_regression_model(
X = X.astype(numpy.int64) if is_int or is_bool else X.astype(numpy.float32)
if is_bool:
X = X.astype(bool)
- X_train, X_test, y_train, _ = train_test_split(
- X, y, test_size=0.5, random_state=42
- )
+ X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, random_state=42)
model.fit(X_train, y_train)
return model, X_test
@@ -332,9 +322,7 @@ def _raw_score_binary_classification(model, X):
scores = scores.reshape(-1, 1)
if len(scores.shape) != 2 or scores.shape[1] != 1:
raise RuntimeError(
- "Unexpected shape {} for a binary classifiation".format(
- scores.shape
- )
+ "Unexpected shape {} for a binary classifiation".format(scores.shape)
)
return numpy.hstack([-scores, scores])
@@ -351,9 +339,10 @@ def call(X, model=model):
call = getattr(model, method)
except AttributeError as e:
if method == "decision_function_binary":
+
def call(X, model=model):
- return _raw_score_binary_classification(
- model, X)
+ return _raw_score_binary_classification(model, X)
+
else:
raise e
if callable(call):
@@ -364,9 +353,7 @@ def lambda_original():
return call(dataone) # noqa
else:
- raise RuntimeError(
- "Method '{0}' is not callable.".format(method)
- )
+ raise RuntimeError("Method '{0}' is not callable.".format(method))
else:
if hasattr(model, "predict"):
if _has_predict_proba(model):
@@ -383,9 +370,9 @@ def lambda_original():
model.decision_function(data),
]
- def lambda_original(): return model.decision_function(
- dataone
- ) # noqa
+ def lambda_original():
+ return model.decision_function(dataone) # noqa
+
elif _has_transform_model(model):
# clustering
try:
@@ -433,9 +420,7 @@ def lambda_original():
else:
raise TypeError(
- "Model has no predict or transform method: {0}".format(
- type(model)
- )
+ "Model has no predict or transform method: {0}".format(type(model))
)
runtime_test["expected"] = prediction
@@ -547,9 +532,7 @@ def lambda_original():
if output is not None:
if not disable_dump:
- dest = os.path.join(
- folder, basename + ".backend.{0}.pkl".format(b)
- )
+ dest = os.path.join(folder, basename + ".backend.{0}.pkl".format(b))
names.append(dest)
with open(dest, "wb") as f:
pickle.dump(output, f)
@@ -560,8 +543,8 @@ def lambda_original():
):
# run a benchmark
obs = compute_benchmark(
- {"onnxrt": lambda_onnx,
- "original": lambda_original})
+ {"onnxrt": lambda_onnx, "original": lambda_original}
+ )
df = pandas.DataFrame(obs)
df["input_size"] = sys.getsizeof(dataone)
dest = os.path.join(folder, basename + ".bench")
@@ -589,9 +572,7 @@ def convert_model(model, name, input_types, target_opset=None):
"Sklearn",
)
if model is None:
- raise RuntimeError(
- "Unable to convert model of type '{0}'.".format(type(model))
- )
+ raise RuntimeError("Unable to convert model of type '{0}'.".format(type(model)))
return model, prefix
@@ -726,9 +707,7 @@ def dump_multiple_classification(
y = [i + first_class for i in y]
if label_string:
if label_uint8:
- raise AssertionError(
- "label_string and label_uint8 cannot be both True"
- )
+ raise AssertionError("label_string and label_uint8 cannot be both True")
y = ["l%d" % i for i in y]
suffix += "String"
elif label_uint8:
@@ -737,9 +716,7 @@ def dump_multiple_classification(
model.fit(X, y)
if verbose:
print(
- "[dump_multiple_classification] model '{}'".format(
- model.__class__.__name__
- )
+ "[dump_multiple_classification] model '{}'".format(model.__class__.__name__)
)
model_onnx, prefix = convert_model(
model,
@@ -767,9 +744,7 @@ def dump_multiple_classification(
model.fit(X, y)
if verbose:
print(
- "[dump_multiple_classification] model '{}'".format(
- model.__class__.__name__
- )
+ "[dump_multiple_classification] model '{}'".format(model.__class__.__name__)
)
model_onnx, prefix = convert_model(
model,
@@ -862,8 +837,7 @@ def dump_multilabel_classification(
)
)
model_onnx, prefix = convert_model(
- model, "multi-class classifier", [("input",
- FloatTensorType([None, 2]))]
+ model, "multi-class classifier", [("input", FloatTensorType([None, 2]))]
)
if verbose:
print("[make_multilabel_classification] model was converted")
@@ -880,13 +854,14 @@ def dump_multilabel_classification(
def dump_multiple_regression(
- model,
- suffix="",
- folder=None,
- allow_failure=None,
- comparable_outputs=None,
- verbose=False,
- target_opset=None):
+ model,
+ suffix="",
+ folder=None,
+ allow_failure=None,
+ comparable_outputs=None,
+ verbose=False,
+ target_opset=None,
+):
"""
Trains and dumps a model for a multi regression problem.
The function trains a model and calls
@@ -903,7 +878,8 @@ def dump_multiple_regression(
model,
"multi-regressor",
[("input", FloatTensorType([None, 2]))],
- target_opset=target_opset)
+ target_opset=target_opset,
+ )
dump_data_and_model(
X,
model,
@@ -912,16 +888,18 @@ def dump_multiple_regression(
allow_failure=allow_failure,
basename=prefix + "MRg" + model.__class__.__name__ + suffix,
verbose=verbose,
- comparable_outputs=comparable_outputs)
+ comparable_outputs=comparable_outputs,
+ )
def dump_single_regression(
- model,
- suffix="",
- folder=None,
- allow_failure=None,
- comparable_outputs=None,
- target_opset=None):
+ model,
+ suffix="",
+ folder=None,
+ allow_failure=None,
+ comparable_outputs=None,
+ target_opset=None,
+):
"""
Trains and dumps a model for a regression problem.
The function trains a model and calls
@@ -938,7 +916,8 @@ def dump_single_regression(
model,
"single regressor",
[("input", FloatTensorType([None, 2]))],
- target_opset=target_opset)
+ target_opset=target_opset,
+ )
dump_data_and_model(
X,
model,
@@ -946,7 +925,8 @@ def dump_single_regression(
folder=folder,
allow_failure=allow_failure,
basename=prefix + "Reg" + model.__class__.__name__ + suffix,
- comparable_outputs=comparable_outputs)
+ comparable_outputs=comparable_outputs,
+ )
def timeit_repeat(fct, number, repeat):
@@ -994,7 +974,8 @@ def timeexec(fct, number, repeat):
repeat=repeat,
min5=mini,
max5=maxi,
- run=number)
+ run=number,
+ )
def compute_benchmark(fcts, number=10, repeat=100):
@@ -1125,8 +1106,8 @@ def make_report_backend(folder, as_df=False, verbose=0):
if benched == 0:
raise RuntimeError(
- "No benchmark files in '{0}', found:\n{1}".format(
- folder, "\n".join(files)))
+ "No benchmark files in '{0}', found:\n{1}".format(folder, "\n".join(files))
+ )
def dict_update(d, u):
d.update(u)
@@ -1170,8 +1151,7 @@ def dict_update(d, u):
# execution failed
pass
try:
- row["ratio_nodes"] = (
- row["nb_onnx_nodes"] / row["nb_estimators"])
+ row["ratio_nodes"] = row["nb_onnx_nodes"] / row["nb_estimators"]
except KeyError:
# execution failed
pass
@@ -1195,8 +1175,7 @@ def binary_array_to_string(mat):
def path_to_leaf(tree, mat, tree_indices=None):
if tree_indices is None:
# single tree
- leave = set([i for i in range(tree.node_count)
- if tree.children_left[i] <= i])
+ leave = set([i for i in range(tree.node_count) if tree.children_left[i] <= i])
res = []
for row in range(mat.shape[0]):
leaf = None
@@ -1211,7 +1190,7 @@ def path_to_leaf(tree, mat, tree_indices=None):
leaves = []
for i in range(0, len(tree)):
- mm = mat[:, tree_indices[i]: tree_indices[i + 1]]
+ mm = mat[:, tree_indices[i] : tree_indices[i + 1]]
tt = tree[i].tree_ if hasattr(tree[i], "tree_") else tree[i]
res = path_to_leaf(tt, mm)
leaves.append(numpy.array(res, dtype=numpy.int64))
diff --git a/tests/test_utils/utils_backend.py b/tests/test_utils/utils_backend.py
index 841b6f4c0..1e91c38b3 100644
--- a/tests/test_utils/utils_backend.py
+++ b/tests/test_utils/utils_backend.py
@@ -75,9 +75,7 @@ def is_backend_enabled(backend):
return False
if backend == "onnx":
return onnx_opset_version() >= 18
- raise NotImplementedError(
- "Not implemented for backend '{0}'".format(backend)
- )
+ raise NotImplementedError("Not implemented for backend '{0}'".format(backend))
def compare_backend(
@@ -279,17 +277,12 @@ def compare_outputs(expected, output, verbose=False, **kwargs):
kwargs["decimal"] = min(kwargs["decimal"], 2)
if Dec1:
kwargs["decimal"] = min(kwargs["decimal"], 1)
- if isinstance(expected, numpy.ndarray) and isinstance(
- output, numpy.ndarray
- ):
+ if isinstance(expected, numpy.ndarray) and isinstance(output, numpy.ndarray):
if SkipDim1:
# Arrays like (2, 1, 2, 3) becomes (2, 2, 3)
# as one dimension is useless.
- expected = expected.reshape(
- tuple([d for d in expected.shape if d > 1])
- )
- output = output.reshape(
- tuple([d for d in expected.shape if d > 1]))
+ expected = expected.reshape(tuple([d for d in expected.shape if d > 1]))
+ output = output.reshape(tuple([d for d in expected.shape if d > 1]))
if NoProb or NoProbOpp:
# One vector is (N,) with scores, negative for class 0
# positive for class 1
@@ -315,21 +308,11 @@ def compare_outputs(expected, output, verbose=False, **kwargs):
output = -output
elif expected.shape != output.shape:
raise NotImplementedError(
- "Shape mismatch: {0} != {1}".format(
- expected.shape, output.shape
- )
+ "Shape mismatch: {0} != {1}".format(expected.shape, output.shape)
)
- if (
- len(expected.shape) == 1
- and len(output.shape) == 2
- and output.shape[1] == 1
- ):
+ if len(expected.shape) == 1 and len(output.shape) == 2 and output.shape[1] == 1:
output = output.ravel()
- if (
- len(output.shape) == 3
- and output.shape[0] == 1
- and len(expected.shape) == 2
- ):
+ if len(output.shape) == 3 and output.shape[0] == 1 and len(expected.shape) == 2:
output = output.reshape(output.shape[1:])
if expected.dtype in (
numpy.str_,
@@ -346,14 +329,10 @@ def compare_outputs(expected, output, verbose=False, **kwargs):
return OnnxRuntimeAssertionError(str(e))
else:
try:
- assert_array_almost_equal(
- expected, output, verbose=verbose, **kwargs
- )
+ assert_array_almost_equal(expected, output, verbose=verbose, **kwargs)
except Exception as e:
longer = (
- "\n--EXPECTED--\n{0}\n--OUTPUT--\n{1}".format(
- expected, output
- )
+ "\n--EXPECTED--\n{0}\n--OUTPUT--\n{1}".format(expected, output)
if verbose
else ""
)
@@ -364,8 +343,7 @@ def compare_outputs(expected, output, verbose=False, **kwargs):
diff = numpy.abs(expected_ - output_).max()
else:
diff = max(
- (1 if ci != cj else 0)
- for ci, cj in zip(expected_, output_)
+ (1 if ci != cj else 0) for ci, cj in zip(expected_, output_)
)
if diff == 0:
return None
diff --git a/tests/test_utils/utils_backend_onnx.py b/tests/test_utils/utils_backend_onnx.py
index 1df2ae22f..ff9cb5763 100644
--- a/tests/test_utils/utils_backend_onnx.py
+++ b/tests/test_utils/utils_backend_onnx.py
@@ -13,6 +13,7 @@
from onnx import AttributeProto, numpy_helper
import onnx as onnx_package
from onnx.defs import onnx_opset_version
+
try:
from onnx.helper import tensor_dtype_to_string
except ImportError:
@@ -20,7 +21,8 @@
from skl2onnx.helpers.onnx_helper import (
select_model_inputs_outputs,
enumerate_model_node_outputs,
- enumerate_model_initializers)
+ enumerate_model_initializers,
+)
from skl2onnx.algebra.type_helper import _guess_type
from scipy.spatial.distance import cdist
from .utils_backend import (
@@ -29,7 +31,8 @@
ExpectedAssertionError,
OnnxRuntimeAssertionError,
OnnxRuntimeMissingNewOnnxOperatorException,
- compare_outputs)
+ compare_outputs,
+)
if onnx_opset_version() >= 18:
@@ -58,8 +61,7 @@ def _run(self, x, y, metric="euclidean"):
from onnx.reference.op_run import RuntimeContextError
from onnx.reference.ops.op_argmin import _ArgMin, _argmin
from onnx.reference.ops.op_argmax import _ArgMax, _argmax
- from onnx.reference.ops.op_reduce_log_sum_exp import (
- compute_log_sum_exp)
+ from onnx.reference.ops.op_reduce_log_sum_exp import compute_log_sum_exp
from onnx.reference.ops.op_scan import Scan as _Scan
from .reference_implementation_ml import (
Binarizer,
@@ -78,35 +80,30 @@ def _run(self, x, y, metric="euclidean"):
from .reference_implementation_afe import ArrayFeatureExtractor
from .reference_implementation_tree import (
TreeEnsembleClassifier,
- TreeEnsembleRegressor)
- from .reference_implementation_svm import (
- SVMClassifier,
- SVMRegressor)
+ TreeEnsembleRegressor,
+ )
+ from .reference_implementation_svm import SVMClassifier, SVMRegressor
from .reference_implementation_text import TfIdfVectorizer
class ArgMin(_ArgMin):
- def _run(self, data, axis=None, keepdims=None,
- select_last_index=None):
+ def _run(self, data, axis=None, keepdims=None, select_last_index=None):
if select_last_index == 0:
if keepdims == 0:
- return _ArgMin._run(
- self, data, axis=axis, keepdims=keepdims)
+ return _ArgMin._run(self, data, axis=axis, keepdims=keepdims)
return (_argmin(data, axis=axis, keepdims=keepdims),)
raise NotImplementedError("Unused in sklearn-onnx.")
class ArgMax(_ArgMax):
- def _run(self, data, axis=None, keepdims=None,
- select_last_index=None):
+ def _run(self, data, axis=None, keepdims=None, select_last_index=None):
if select_last_index == 0:
if keepdims == 0:
- return _ArgMax._run(
- self, data, axis=axis, keepdims=keepdims)
+ return _ArgMax._run(self, data, axis=axis, keepdims=keepdims)
try:
return (_argmax(data, axis=axis, keepdims=keepdims),)
except Exception as e:
raise RuntimeError(
- f"Issue with shape={data.shape} "
- f"and axis={axis}.") from e
+ f"Issue with shape={data.shape} " f"and axis={axis}."
+ ) from e
raise NotImplementedError("Unused in sklearn-onnx.")
class ReduceLogSumExp_1(OpRunReduceNumpy):
@@ -115,8 +112,7 @@ def _run(self, data, axes=None, keepdims=None, **kwargs):
return compute_log_sum_exp(data, tax, keepdims)
class ReduceLogSumExp_18(OpRunReduceNumpy):
- def _run(self, data, axes=None, keepdims=None,
- noop_with_empty_axes=None):
+ def _run(self, data, axes=None, keepdims=None, noop_with_empty_axes=None):
assert noop_with_empty_axes != 1
tax = tuple(axes) if axes is not None else None
return compute_log_sum_exp(data, tax, keepdims)
@@ -126,91 +122,88 @@ def _run(self, data, axes=None, keepdims=1, **kwargs):
axes = tuple(axes) if axes is not None else None
keepdims = keepdims != 0 # type: ignore
return (
- np.sqrt(np.sum(np.square(data), axis=axes,
- keepdims=keepdims)).astype(
- dtype=data.dtype),)
+ np.sqrt(
+ np.sum(np.square(data), axis=axes, keepdims=keepdims)
+ ).astype(dtype=data.dtype),
+ )
class ReduceL2_18(OpRunReduceNumpy):
- def _run(self, data, axes=None, keepdims=None,
- noop_with_empty_axes=None):
+ def _run(self, data, axes=None, keepdims=None, noop_with_empty_axes=None):
assert noop_with_empty_axes != 1
axes = tuple(axes) if axes is not None else None
keepdims = keepdims != 0 # type: ignore
return (
- np.sqrt(np.sum(np.square(data), axis=axes,
- keepdims=keepdims)).astype(
- dtype=data.dtype),)
+ np.sqrt(
+ np.sum(np.square(data), axis=axes, keepdims=keepdims)
+ ).astype(dtype=data.dtype),
+ )
class ReduceMean_1(OpRunReduceNumpy):
def _run(self, data, axes=None, keepdims=None, **kwargs):
axes = tuple(axes) if axes is not None else None
keepdims = keepdims != 0 # type: ignore
- return (np.mean(data, axis=axes,
- keepdims=keepdims).astype(data.dtype),)
+ return (np.mean(data, axis=axes, keepdims=keepdims).astype(data.dtype),)
class ReduceMean_18(OpRunReduceNumpy):
- def _run(self, data, axes=None, keepdims=None,
- noop_with_empty_axes=None):
+ def _run(self, data, axes=None, keepdims=None, noop_with_empty_axes=None):
assert noop_with_empty_axes != 1
axes = tuple(axes) if axes is not None else None
keepdims = keepdims != 0 # type: ignore
- return (np.mean(data, axis=axes,
- keepdims=keepdims).astype(data.dtype),)
+ return (np.mean(data, axis=axes, keepdims=keepdims).astype(data.dtype),)
class ReduceMax_1(OpRunReduceNumpy):
def _run(self, data, axes=None, keepdims=None, **kwargs):
axes = tuple(axes) if axes is not None else None
keepdims = keepdims != 0 # type: ignore
- return (np.max(data, axis=axes,
- keepdims=keepdims).astype(data.dtype),)
+ return (np.max(data, axis=axes, keepdims=keepdims).astype(data.dtype),)
class ReduceMax_18(OpRunReduceNumpy):
- def _run(self, data, axes=None, keepdims=None,
- noop_with_empty_axes=None):
+ def _run(self, data, axes=None, keepdims=None, noop_with_empty_axes=None):
assert noop_with_empty_axes != 1
axes = tuple(axes) if axes is not None else None
keepdims = keepdims != 0 # type: ignore
- return (np.max(data, axis=axes,
- keepdims=keepdims).astype(data.dtype),)
+ return (np.max(data, axis=axes, keepdims=keepdims).astype(data.dtype),)
class ReduceProd_1(OpRunReduceNumpy):
def _run(self, data, axes=None, keepdims=None, **kwargs):
axes = tuple(axes) if axes is not None else None
keepdims = keepdims != 0 # type: ignore
- return (np.prod(data, axis=axes,
- keepdims=keepdims).astype(data.dtype),)
+ return (np.prod(data, axis=axes, keepdims=keepdims).astype(data.dtype),)
class ReduceProd_18(OpRunReduceNumpy):
- def _run(self, data, axes=None, keepdims=None,
- noop_with_empty_axes=None):
+ def _run(self, data, axes=None, keepdims=None, noop_with_empty_axes=None):
assert noop_with_empty_axes != 1
axes = tuple(axes) if axes is not None else None
keepdims = keepdims != 0 # type: ignore
- return (np.prod(data, axis=axes,
- keepdims=keepdims).astype(data.dtype),)
+ return (np.prod(data, axis=axes, keepdims=keepdims).astype(data.dtype),)
class ReduceSumSquare_1(OpRunReduceNumpy):
def _run(self, data, axes=None, keepdims=None, **kwargs):
axes = tuple(axes) if axes is not None else None
keepdims = keepdims != 0 # type: ignore
- return (np.sum(np.square(data), axis=axes,
- keepdims=keepdims).astype(data.dtype),)
+ return (
+ np.sum(np.square(data), axis=axes, keepdims=keepdims).astype(
+ data.dtype
+ ),
+ )
class ReduceSumSquare_18(OpRunReduceNumpy):
- def _run(self, data, axes=None, keepdims=None,
- noop_with_empty_axes=None):
+ def _run(self, data, axes=None, keepdims=None, noop_with_empty_axes=None):
assert noop_with_empty_axes != 1
axes = tuple(axes) if axes is not None else None
keepdims = keepdims != 0 # type: ignore
- return (np.sum(np.square(data), axis=axes,
- keepdims=keepdims).astype(data.dtype),)
+ return (
+ np.sum(np.square(data), axis=axes, keepdims=keepdims).astype(
+ data.dtype
+ ),
+ )
class ConstantOfShape(OpRun):
def __init__(self, onnx_node, run_params): # type: ignore
OpRun.__init__(self, onnx_node, run_params)
self.cst = (
- self.value[0] if isinstance(self.value, np.ndarray)
- else self.value)
+ self.value[0] if isinstance(self.value, np.ndarray) else self.value
+ )
if isinstance(self.cst, int):
self.cst = np.int64(self.cst)
elif isinstance(self.cst, float):
@@ -218,8 +211,9 @@ def __init__(self, onnx_node, run_params): # type: ignore
elif self.cst is None:
self.cst = np.float32(0)
if not isinstance(
- self.cst, (np.float32, np.float64, np.int64,
- np.int32, np.bool_, np.float16)):
+ self.cst,
+ (np.float32, np.float64, np.int64, np.int32, np.bool_, np.float16),
+ ):
raise TypeError(f"cst must be a real not {type(self.cst)}")
def _run(self, data, value=None):
@@ -229,18 +223,21 @@ def _run(self, data, value=None):
raise RuntimeError(
f"Unable to create a constant of shape {data!r} "
f"with value {self.cst!r} "
- f"(raw value={value!r}).") from e
+ f"(raw value={value!r})."
+ ) from e
return (res,)
class Where(OpRun):
def _run(self, condition, x, y): # type: ignore
- if (x.dtype != y.dtype and
- x.dtype not in (np.object_,) and
- not (x.dtype.type is np.str_ and
- y.dtype.type is np.str_)):
+ if (
+ x.dtype != y.dtype
+ and x.dtype not in (np.object_,)
+ and not (x.dtype.type is np.str_ and y.dtype.type is np.str_)
+ ):
raise RuntimeError(
f"x and y should share the same dtype "
- f"{x.dtype} != {y.dtype}")
+ f"{x.dtype} != {y.dtype}"
+ )
return (np.where(condition, x, y).astype(x.dtype),)
class Scan(_Scan):
@@ -251,42 +248,51 @@ def _extract_attribute_value(self, att, ref_att=None):
att.g,
opsets=self.run_params["opsets"],
verbose=max(0, self.run_params.get("verbose", 0) - 2),
- new_ops=None if new_ops is None else new_ops.values())
+ new_ops=None if new_ops is None else new_ops.values(),
+ )
return super()._extract_attribute_value(att, ref_att)
- additional_implementations.extend([
- # ai.onnx
- ArgMax,
- ArgMin,
- ConstantOfShape,
- ReduceL2_1, ReduceL2_18,
- ReduceLogSumExp_1, ReduceLogSumExp_18,
- ReduceMax_1, ReduceMax_18,
- ReduceMean_1, ReduceMean_18,
- ReduceProd_1, ReduceProd_18,
- ReduceSumSquare_1, ReduceSumSquare_18,
- Where,
- # ai.onnx.ml
- ArrayFeatureExtractor,
- Binarizer,
- DictVectorizer,
- FeatureVectorizer,
- FusedMatMul,
- Imputer,
- LabelEncoder,
- LinearClassifier,
- LinearRegressor,
- Normalizer,
- OneHotEncoder,
- TfIdfVectorizer,
- TreeEnsembleClassifier,
- TreeEnsembleRegressor,
- Scaler,
- Scan,
- SVMClassifier,
- SVMRegressor,
- ZipMap,
- ])
+ additional_implementations.extend(
+ [
+ # ai.onnx
+ ArgMax,
+ ArgMin,
+ ConstantOfShape,
+ ReduceL2_1,
+ ReduceL2_18,
+ ReduceLogSumExp_1,
+ ReduceLogSumExp_18,
+ ReduceMax_1,
+ ReduceMax_18,
+ ReduceMean_1,
+ ReduceMean_18,
+ ReduceProd_1,
+ ReduceProd_18,
+ ReduceSumSquare_1,
+ ReduceSumSquare_18,
+ Where,
+ # ai.onnx.ml
+ ArrayFeatureExtractor,
+ Binarizer,
+ DictVectorizer,
+ FeatureVectorizer,
+ FusedMatMul,
+ Imputer,
+ LabelEncoder,
+ LinearClassifier,
+ LinearRegressor,
+ Normalizer,
+ OneHotEncoder,
+ TfIdfVectorizer,
+ TreeEnsembleClassifier,
+ TreeEnsembleRegressor,
+ Scaler,
+ Scan,
+ SVMClassifier,
+ SVMRegressor,
+ ZipMap,
+ ]
+ )
class ReferenceEvaluatorEx(ReferenceEvaluator):
def __init__(self, *args, new_ops=None, **kwargs):
@@ -303,7 +309,7 @@ def __init__(self, *args, new_ops=None, **kwargs):
raise TypeError(f"Not implemented for {type(args[0])}.")
main_domain = None
for dom in model.opset_import:
- if dom.domain == '':
+ if dom.domain == "":
main_domain = dom.version
if main_domain is None:
main_domain = 1
@@ -316,7 +322,7 @@ def __init__(self, *args, new_ops=None, **kwargs):
new_new_ops = []
many = {}
for op in new_ops:
- if op.op_domain != '':
+ if op.op_domain != "":
new_new_ops.append(op)
continue
name = op.__name__
@@ -401,22 +407,24 @@ def _log_arg(self, a):
elements = a.ravel().tolist()
if len(elements) > 5:
elements = elements[:5]
- return (
- f"{a.dtype}:{a.shape}:"
- f"{','.join(map(str, elements))}...")
+ return f"{a.dtype}:{a.shape}:" f"{','.join(map(str, elements))}..."
return f"{a.dtype}:{a.shape}:{elements}"
if hasattr(a, "append"):
return ", ".join(map(self._log_arg, a))
return a
def get_inputs(self):
- res = [InputDef(n, list(get_shape(t, True)), get_type(t))
- for n, t in zip(self.input_names, self.input_types)]
+ res = [
+ InputDef(n, list(get_shape(t, True)), get_type(t))
+ for n, t in zip(self.input_names, self.input_types)
+ ]
return res
def get_outputs(self):
- res = [InputDef(n, list(get_shape(t, True)), get_type(t))
- for n, t in zip(self.output_names, self.output_types)]
+ res = [
+ InputDef(n, list(get_shape(t, True)), get_type(t))
+ for n, t in zip(self.output_names, self.output_types)
+ ]
return res
def run(self, *args, **kwargs):
@@ -431,20 +439,23 @@ def replay_run(self, verbose=10):
args, kwargs = self.last_inputs
with contextlib.redirect_stdout(st):
self.run(*args, **kwargs)
- classes = [st.getvalue(),
- "--",
- f"main_domain={self._main_domain}",
- "--",
- "\n".join(sorted(map(str, self._new_ops))),
- "--",
- "\n".join(map(str, self._opset_import)),
- "--"]
+ classes = [
+ st.getvalue(),
+ "--",
+ f"main_domain={self._main_domain}",
+ "--",
+ "\n".join(sorted(map(str, self._new_ops))),
+ "--",
+ "\n".join(map(str, self._opset_import)),
+ "--",
+ ]
for rt in self.rt_nodes_:
classes.append(str(type(rt)))
if hasattr(rt, "body"):
for rt2 in rt.body.rt_nodes_:
classes.append(f" {str(type(rt2))}")
return "\n".join(classes)
+
else:
ReferenceEvaluatorEx = None
@@ -491,8 +502,7 @@ def __init__(self, name, shape, dtype):
def get_shape(t, use_none=False):
if t.tensor_type:
- dims = [getattr(d, 'dim_value', None)
- for d in t.tensor_type.shape.dim]
+ dims = [getattr(d, "dim_value", None) for d in t.tensor_type.shape.dim]
if use_none:
return tuple(r if r != 0 else None for r in dims)
return tuple(dims)
@@ -506,33 +516,35 @@ def get_type(t):
else:
res = tensor_dtype_to_string(t.tensor_type.elem_type)
maps = {
- 'TensorProto.STRING': 'tensor(string)',
- 'TensorProto.INT64': 'tensor(int64)',
- 'TensorProto.INT32': 'tensor(int32)',
- 'TensorProto.DOUBLE': 'tensor(double)',
- 'TensorProto.FLOAT': 'tensor(float)',
- 'TensorProto.BOOL': 'tensor(bool)',
+ "TensorProto.STRING": "tensor(string)",
+ "TensorProto.INT64": "tensor(int64)",
+ "TensorProto.INT32": "tensor(int32)",
+ "TensorProto.DOUBLE": "tensor(double)",
+ "TensorProto.FLOAT": "tensor(float)",
+ "TensorProto.BOOL": "tensor(bool)",
}
return maps[res]
return None
def get_inputs(sess):
- return [InputDef(n, get_shape(t), get_type(t))
- for n, t in zip(sess.input_names,
- sess.input_types)]
+ return [
+ InputDef(n, get_shape(t), get_type(t))
+ for n, t in zip(sess.input_names, sess.input_types)
+ ]
def compare_runtime(
- test,
- decimal=5,
- options=None,
- verbose=0,
- context=None,
- comparable_outputs=None,
- intermediate_steps=False,
- classes=None,
- disable_optimisation=False):
+ test,
+ decimal=5,
+ options=None,
+ verbose=0,
+ context=None,
+ comparable_outputs=None,
+ intermediate_steps=False,
+ classes=None,
+ disable_optimisation=False,
+):
"""
The function compares the expected output (computed with
the model before being converted to ONNX) and the ONNX output
@@ -586,24 +598,29 @@ def compare_runtime(
_display_intermediate_steps(onx, None, disable_optimisation)
if verbose:
import onnx
+
model = onnx.load(onx)
smodel = "\nJSON ONNX\n" + str(model)
else:
smodel = ""
- if ("NOT_IMPLEMENTED : Could not find an implementation "
- "for the node" in str(e)):
+ if "NOT_IMPLEMENTED : Could not find an implementation " "for the node" in str(
+ e
+ ):
# onnxruntime does not implement a specific node yet.
raise OnnxRuntimeMissingNewOnnxOperatorException(
"ReferenceEvaluator does not implement a new operator "
- "'{0}'\n{1}\nONNX\n{2}".format(onx, e, smodel))
+ "'{0}'\n{1}\nONNX\n{2}".format(onx, e, smodel)
+ )
if "is not a registered function/op" in str(e):
content = onnx_package.load(onx)
raise OnnxRuntimeAssertionError(
"Missing op? '{0}'\nONNX\n{1}\n{2}\n---\n{3}".format(
- onx, smodel, e, content))
+ onx, smodel, e, content
+ )
+ )
raise OnnxRuntimeAssertionError(
- "Unable to load onnx '{0}'\nONNX\n{1}\n{2}"
- ".".format(onx, smodel, e))
+ "Unable to load onnx '{0}'\nONNX\n{1}\n{2}" ".".format(onx, smodel, e)
+ )
input = load["data"]
DF = options.pop("DF", False)
@@ -624,24 +641,26 @@ def compare_runtime(
inputs = {inp[0].name: input}
elif isinstance(input, np.ndarray):
shape = sum(
- i.shape[1] if len(i.shape) == 2
- else i.shape[0] for i in inp)
+ i.shape[1] if len(i.shape) == 2 else i.shape[0] for i in inp
+ )
if shape == input.shape[1]:
inputs = {n.name: input[:, i] for i, n in enumerate(inp)}
else:
raise OnnxRuntimeAssertionError(
"Wrong number of inputs onnx {0} != "
"original shape {1}, onnx='{2}'".format(
- len(inp), input.shape, onx))
+ len(inp), input.shape, onx
+ )
+ )
elif isinstance(input, list):
try:
array_input = np.array(input)
except Exception:
raise OnnxRuntimeAssertionError(
"Wrong number of inputs onnx {0} != "
- "original {1}, onnx='{2}'".format(
- len(inp), len(input), onx))
- if hasattr(inp[0], 'shape'):
+ "original {1}, onnx='{2}'".format(len(inp), len(input), onx)
+ )
+ if hasattr(inp[0], "shape"):
shape = sum(i.shape[1] for i in inp)
if shape == array_input.shape[1]:
inputs = {}
@@ -649,13 +668,16 @@ def compare_runtime(
for i, n in enumerate(inp):
d = c + n.shape[1]
inputs[n.name] = _create_column(
- [row[c:d] for row in input], n.type)
+ [row[c:d] for row in input], n.type
+ )
c = d
else:
raise OnnxRuntimeAssertionError(
"Wrong number of inputs onnx {0} != "
"original shape {1}, onnx='{2}'*".format(
- len(inp), array_input.shape, onx))
+ len(inp), array_input.shape, onx
+ )
+ )
else:
array_input = array_input.reshape((-1, len(inp)))
inputs = {i.name: r for i, r in zip(inp, array_input.T)}
@@ -665,33 +687,35 @@ def compare_runtime(
except Exception:
raise OnnxRuntimeAssertionError(
"Wrong number of inputs onnx {0} != "
- "original {1}, onnx='{2}'".format(
- len(inp), len(input), onx))
- if hasattr(inp[0], 'shape'):
+ "original {1}, onnx='{2}'".format(len(inp), len(input), onx)
+ )
+ if hasattr(inp[0], "shape"):
shape = sum(i.shape[1] for i in inp)
if shape == array_input.shape[1]:
inputs = {}
c = 0
for i, n in enumerate(inp):
d = c + n.shape[1]
- inputs[n.name] = _create_column(
- input.iloc[:, c:d], n.type)
+ inputs[n.name] = _create_column(input.iloc[:, c:d], n.type)
c = d
else:
raise OnnxRuntimeAssertionError(
"Wrong number of inputs onnx {0}={1} columns != "
"original shape {2}, onnx='{3}'*".format(
- len(inp), shape, array_input.shape, onx))
+ len(inp), shape, array_input.shape, onx
+ )
+ )
else:
array_input = array_input.reshape((-1, len(inp)))
inputs = {i.name: r for i, r in zip(inp, array_input.T)}
else:
raise OnnxRuntimeAssertionError(
- "Wrong type of inputs onnx {0}, onnx='{1}'".format(
- type(input), onx))
+ "Wrong type of inputs onnx {0}, onnx='{1}'".format(type(input), onx)
+ )
else:
raise OnnxRuntimeAssertionError(
- "Dict or list is expected, not {0}".format(type(input)))
+ "Dict or list is expected, not {0}".format(type(input))
+ )
for k in inputs:
if isinstance(inputs[k], list):
@@ -704,8 +728,8 @@ def compare_runtime(
if verbose:
print(
"[compare_runtime] OneOff: type(inputs)={} "
- "len={} OneOffArray={}".format(
- type(input), len(inputs), OneOffArray))
+ "len={} OneOffArray={}".format(type(input), len(inputs), OneOffArray)
+ )
if len(inputs) == 1 and not OneOffArray:
name, values = list(inputs.items())[0]
res = []
@@ -713,28 +737,31 @@ def compare_runtime(
try:
one = sess.run(None, {name: input})
if lambda_onnx is None:
- lambda_onnx = (
- lambda sess=sess, input=input: sess.run( # noqa
- None, {name: input}))
+ lambda_onnx = lambda sess=sess, input=input: sess.run( # noqa
+ None, {name: input}
+ )
if verbose:
import pprint
+
pprint.pprint(one)
except ExpectedAssertionError as expe:
raise expe
except Exception as e:
if intermediate_steps:
_display_intermediate_steps(
- onx, {name: input}, disable_optimisation)
- if hasattr(sess, 'replay_run'):
+ onx, {name: input}, disable_optimisation
+ )
+ if hasattr(sess, "replay_run"):
# ReferenceEvaluator
res = sess.replay_run()
raise OnnxRuntimeAssertionError(
- f"Unable to run model\n---\n{res}\n----\n{e}")
+ f"Unable to run model\n---\n{res}\n----\n{e}"
+ )
if verbose:
raise OnnxRuntimeAssertionError(
- f"Unable to run model due to {e}\n{onx}")
- raise OnnxRuntimeAssertionError(
- f"Unable to run onnx model {e}")
+ f"Unable to run model due to {e}\n{onx}"
+ )
+ raise OnnxRuntimeAssertionError(f"Unable to run onnx model {e}")
res.append(one)
if verbose:
@@ -743,8 +770,7 @@ def compare_runtime(
else:
def to_array(vv):
- if isinstance(
- vv, (np.ndarray, np.int64, np.float32, str)):
+ if isinstance(vv, (np.ndarray, np.int64, np.float32, str)):
return np.array([vv])
return np.array([vv], dtype=np.float32)
@@ -755,18 +781,18 @@ def to_array(vv):
try:
one = sess.run(None, iii)
if lambda_onnx is None:
- lambda_onnx = (
- lambda sess=sess, iii=iii: sess.run( # noqa
- None, iii))
+ lambda_onnx = lambda sess=sess, iii=iii: sess.run( # noqa
+ None, iii
+ )
if verbose:
import pprint
+
pprint.pprint(one)
except ExpectedAssertionError as expe:
raise expe
except Exception as e:
if intermediate_steps:
- _display_intermediate_steps(
- onx, iii, disable_optimisation)
+ _display_intermediate_steps(onx, iii, disable_optimisation)
if verbose:
import onnx
@@ -774,16 +800,17 @@ def to_array(vv):
smodel = "\nJSON ONNX\n" + str(model)
else:
smodel = ""
- if hasattr(sess, 'replay_run'):
+ if hasattr(sess, "replay_run"):
# ReferenceEvaluator
res = sess.replay_run()
raise OnnxRuntimeAssertionError(
- f"Unable to run\n---\n{res}\n----\n{e}")
+ f"Unable to run\n---\n{res}\n----\n{e}"
+ )
if verbose:
raise OnnxRuntimeAssertionError(
- f"Unable to run model due to {e}{smodel}")
- raise OnnxRuntimeAssertionError(
- f"Unable to run model due to {e}")
+ f"Unable to run model due to {e}{smodel}"
+ )
+ raise OnnxRuntimeAssertionError(f"Unable to run model due to {e}")
res.append(one)
if verbose:
print("[compare_runtime] OneOff: _post_process_output2")
@@ -794,14 +821,17 @@ def to_array(vv):
pass
elif not isinstance(output, np.ndarray):
raise TypeError(
- "output must be an array, not {}".format(type(output)))
+ "output must be an array, not {}".format(type(output))
+ )
else:
output = [output]
else:
if verbose:
print(
"[compare_runtime] type(inputs)={} len={} names={}".format(
- type(input), len(inputs), list(sorted(inputs))))
+ type(input), len(inputs), list(sorted(inputs))
+ )
+ )
try:
output = sess.run(None, inputs)
@@ -810,6 +840,7 @@ def lambda_onnx():
if verbose:
import pprint
+
pprint.pprint(output)
except ExpectedAssertionError as expe:
raise expe
@@ -818,31 +849,33 @@ def lambda_onnx():
_display_intermediate_steps(onx, inputs, disable_optimisation)
if "-Fail" in onx:
raise ExpectedAssertionError(
- "onnxruntime cannot compute the "
- "prediction for '{0}'".format(onx))
+ "onnxruntime cannot compute the " "prediction for '{0}'".format(onx)
+ )
else:
if verbose:
import onnx
+
model = onnx.load(onx)
smodel = "\nJSON ONNX\n" + str(model)
else:
smodel = ""
- ops = "\n".join(map(lambda x: str(x.__class__),
- sess.rt_nodes_))
+ ops = "\n".join(map(lambda x: str(x.__class__), sess.rt_nodes_))
raise OnnxRuntimeAssertionError(
f"ReferenceEvaluator cannot compute the prediction"
- f" for {onx!r} due to {e}\nops={ops}\n{smodel}")
+ f" for {onx!r} due to {e}\nops={ops}\n{smodel}"
+ )
except Exception as e:
- if hasattr(sess, 'replay_run'):
+ if hasattr(sess, "replay_run"):
# ReferenceEvaluator
res = sess.replay_run()
raise OnnxRuntimeAssertionError(
- f"Unable to run model\n---\n{res}\n----\n{e}")
+ f"Unable to run model\n---\n{res}\n----\n{e}"
+ )
if verbose:
raise OnnxRuntimeAssertionError(
- f"Unable to run model due to {e}\n{onx}")
- raise OnnxRuntimeAssertionError(
- f"Unable to run model due to {e}")
+ f"Unable to run model due to {e}\n{onx}"
+ )
+ raise OnnxRuntimeAssertionError(f"Unable to run model due to {e}")
if verbose:
print("[compare_runtime] done type={}".format(type(output)))
@@ -864,7 +897,8 @@ def lambda_onnx():
decimal=decimal,
verbose=verbose,
classes=classes,
- **options)
+ **options,
+ )
except OnnxRuntimeAssertionError as de:
if isinstance(onx, str):
import onnx
@@ -893,7 +927,8 @@ def lambda_onnx():
smodel = ""
raise OnnxRuntimeAssertionError(
"Model '{0}' has discrepencies with backend="
- "'onnx'.\n{1}: {2}{3}".format(onx, type(e), e, smodel))
+ "'onnx'.\n{1}: {2}{3}".format(onx, type(e), e, smodel)
+ )
return output0, lambda_onnx
@@ -919,7 +954,8 @@ def _post_process_output(res):
if mi != max(ls):
raise NotImplementedError(
"Unable to postprocess various number of "
- "outputs in [{0}, {1}]".format(min(ls), max(ls)))
+ "outputs in [{0}, {1}]".format(min(ls), max(ls))
+ )
if mi > 1:
output = []
for i in range(mi):
@@ -935,7 +971,8 @@ def _post_process_output(res):
return res
if len(res[0]) != 1:
raise NotImplementedError(
- "Not conversion implemented for {0}".format(res))
+ "Not conversion implemented for {0}".format(res)
+ )
st = [r[0] for r in res]
return np.vstack(st)
return res
@@ -951,18 +988,13 @@ def _create_column(values, dtype):
if str(dtype) == "tensor(string)":
return np.array(values, dtype=np.str_)
raise OnnxRuntimeAssertionError(
- "Unable to create one column from dtype '{0}'".format(dtype))
+ "Unable to create one column from dtype '{0}'".format(dtype)
+ )
def _compare_expected(
- expected,
- output,
- sess,
- onnx,
- decimal=5,
- verbose=False,
- classes=None,
- **kwargs):
+ expected, output, sess, onnx, decimal=5, verbose=False, classes=None, **kwargs
+):
"""
Compares the expected output against the runtime outputs.
This is specific to *ReferenceEvaluator* due to variable *sess*
@@ -983,12 +1015,13 @@ def _compare_expected(
del kwargs["Reshape"]
output = np.hstack(output).ravel()
output = output.reshape(
- (len(expected), len(output.ravel()) // len(expected)))
+ (len(expected), len(output.ravel()) // len(expected))
+ )
if len(expected) != len(output):
raise OnnxRuntimeAssertionError(
"Unexpected number of outputs '{0}', "
- "expected={1}, got={2}".format(
- onnx, len(expected), len(output)))
+ "expected={1}, got={2}".format(onnx, len(expected), len(output))
+ )
for exp, out in zip(expected, output):
_compare_expected(
exp,
@@ -998,37 +1031,38 @@ def _compare_expected(
decimal=5,
verbose=verbose,
classes=classes,
- **kwargs)
+ **kwargs,
+ )
tested += 1
else:
raise OnnxRuntimeAssertionError(
- "Type mismatch for '{0}', output type is {1}".format(
- onnx, type(output)))
+ "Type mismatch for '{0}', output type is {1}".format(onnx, type(output))
+ )
elif isinstance(expected, dict):
if not isinstance(output, dict):
- raise OnnxRuntimeAssertionError(
- "Type mismatch for '{0}'".format(onnx))
+ raise OnnxRuntimeAssertionError("Type mismatch for '{0}'".format(onnx))
for k, v in output.items():
if k not in expected:
continue
msg = compare_outputs(
- expected[k], v, decimal=decimal, verbose=verbose, **kwargs)
+ expected[k], v, decimal=decimal, verbose=verbose, **kwargs
+ )
if msg:
- if hasattr(sess, 'replay_run'):
+ if hasattr(sess, "replay_run"):
# ReferenceEvaluator
res = sess.replay_run()
raise OnnxRuntimeAssertionError(
- f"Unexpected output '{k}'\n---\n{res}\n----\n{msg}")
+ f"Unexpected output '{k}'\n---\n{res}\n----\n{msg}"
+ )
elif verbose:
raise OnnxRuntimeAssertionError(
- f"Unexpected output {k!r} in model {onnx}\n{msg}")
- raise OnnxRuntimeAssertionError(
- f"Unexpected output {k!r}\n{msg}")
+ f"Unexpected output {k!r} in model {onnx}\n{msg}"
+ )
+ raise OnnxRuntimeAssertionError(f"Unexpected output {k!r}\n{msg}")
tested += 1
elif isinstance(expected, np.ndarray):
if isinstance(output, list):
- if (expected.shape[0] == len(output) and
- isinstance(output[0], dict)):
+ if expected.shape[0] == len(output) and isinstance(output[0], dict):
import pandas
output = pandas.DataFrame(output)
@@ -1041,36 +1075,43 @@ def _compare_expected(
ex = ex[:170] + "..."
raise OnnxRuntimeAssertionError(
"More than one output when 1 is expected "
- "for onnx '{0}'\n{1}".format(onnx, ex))
+ "for onnx '{0}'\n{1}".format(onnx, ex)
+ )
output = output[-1]
if not isinstance(output, np.ndarray):
raise OnnxRuntimeAssertionError(
"output must be an array for onnx '{0}' not {1}".format(
- onnx, type(output)))
- if (classes is not None and (
- expected.dtype == np.str_ or
- expected.dtype.char == "U")):
+ onnx, type(output)
+ )
+ )
+ if classes is not None and (
+ expected.dtype == np.str_ or expected.dtype.char == "U"
+ ):
try:
output = np.array([classes[cl] for cl in output])
except IndexError as e:
raise RuntimeError(
- "Unable to handle\n{}\n{}\n{}".format(
- expected, output, classes)) from e
+ "Unable to handle\n{}\n{}\n{}".format(expected, output, classes)
+ ) from e
msg = compare_outputs(
- expected, output, decimal=decimal, verbose=verbose, **kwargs)
+ expected, output, decimal=decimal, verbose=verbose, **kwargs
+ )
if isinstance(msg, ExpectedAssertionError):
raise msg
if msg:
- if hasattr(sess, 'replay_run'):
+ if hasattr(sess, "replay_run"):
# ReferenceEvaluator
res = sess.replay_run()
raise OnnxRuntimeAssertionError(
- f"Unexpected output\n---\n{res}\n----\n{msg}")
+ f"Unexpected output\n---\n{res}\n----\n{msg}"
+ )
elif verbose:
raise OnnxRuntimeAssertionError(
- f"Unexpected output in model {onnx}\n{msg}")
+ f"Unexpected output in model {onnx}\n{msg}"
+ )
raise OnnxRuntimeAssertionError(
- f"Unexpected output ({type(sess)} - {dir(sess)})\n{msg}")
+ f"Unexpected output ({type(sess)} - {dir(sess)})\n{msg}"
+ )
tested += 1
else:
from scipy.sparse import csr_matrix
@@ -1080,21 +1121,25 @@ def _compare_expected(
one_array = np.array(output)
dense = np.asarray(expected.todense())
msg = compare_outputs(
- dense, one_array, decimal=decimal, verbose=verbose, **kwargs)
+ dense, one_array, decimal=decimal, verbose=verbose, **kwargs
+ )
if msg:
- if hasattr(sess, 'replay_run'):
+ if hasattr(sess, "replay_run"):
# ReferenceEvaluator
res = sess.replay_run()
raise OnnxRuntimeAssertionError(
- f"Unexpected output\n---\n{res}\n----\n{msg}")
+ f"Unexpected output\n---\n{res}\n----\n{msg}"
+ )
elif verbose:
raise OnnxRuntimeAssertionError(
- f"Unexpected output in model '{onnx}'\n{msg}")
+ f"Unexpected output in model '{onnx}'\n{msg}"
+ )
raise OnnxRuntimeAssertionError(f"Unexpected output\n{msg}")
tested += 1
else:
raise OnnxRuntimeAssertionError(
"Unexpected type for expected output ({1}) "
- "and onnx '{0}'".format(onnx, type(expected)))
+ "and onnx '{0}'".format(onnx, type(expected))
+ )
if tested == 0:
raise OnnxRuntimeAssertionError("No test for onnx '{0}'".format(onnx))
diff --git a/tests/test_utils/utils_backend_onnxruntime.py b/tests/test_utils/utils_backend_onnxruntime.py
index a191ed7b1..38150a6f3 100644
--- a/tests/test_utils/utils_backend_onnxruntime.py
+++ b/tests/test_utils/utils_backend_onnxruntime.py
@@ -40,9 +40,7 @@ def _display_intermediate_steps(model_onnx, inputs, disable_optimisation):
print("-")
print("OUTPUT: {} from {}".format(out, node.name))
step = select_model_inputs_outputs(model_onnx, out)
- if disable_optimisation and hasattr(
- onnxruntime, "GraphOptimizationLevel"
- ):
+ if disable_optimisation and hasattr(onnxruntime, "GraphOptimizationLevel"):
opts = onnxruntime.SessionOptions()
opts.graph_optimization_level = (
onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL
@@ -53,13 +51,15 @@ def _display_intermediate_steps(model_onnx, inputs, disable_optimisation):
step_sess = onnxruntime.InferenceSession(
step.SerializeToString(),
sess_options=opts,
- providers=["CPUExecutionProvider"])
+ providers=["CPUExecutionProvider"],
+ )
except Exception as e:
if "support for domain ai.onnx is till opset 17" in str(e):
return
raise RuntimeError(
"Unable to load ONNX model with onnxruntime. "
- "Last added node is:\n{}".format(node)) from e
+ "Last added node is:\n{}".format(node)
+ ) from e
for o in step_sess.get_inputs():
print("IN :", o)
for o in step_sess.get_outputs():
@@ -145,13 +145,15 @@ def compare_runtime(
onx = onx.SerializeToString()
try:
sess = onnxruntime.InferenceSession(
- onx, sess_options=opts, providers=["CPUExecutionProvider"])
+ onx, sess_options=opts, providers=["CPUExecutionProvider"]
+ )
except ExpectedAssertionError as expe:
raise expe
except Exception as e:
if "CannotLoad" in options:
raise ExpectedAssertionError(
- "Unable to load onnx '{0}' due to\n{1}".format(onx, e))
+ "Unable to load onnx '{0}' due to\n{1}".format(onx, e)
+ )
else:
if intermediate_steps:
_display_intermediate_steps(onx, None, disable_optimisation)
@@ -162,24 +164,29 @@ def compare_runtime(
smodel = "\nJSON ONNX\n" + str(model)
else:
smodel = ""
- if ("NOT_IMPLEMENTED : Could not find an implementation "
- "for the node" in str(e)):
+ if (
+ "NOT_IMPLEMENTED : Could not find an implementation "
+ "for the node" in str(e)
+ ):
# onnxruntime does not implement a specific node yet.
raise OnnxRuntimeMissingNewOnnxOperatorException(
"onnxruntime does not implement a new operator "
- "'{0}'\n{1}\nONNX\n{2}".format(onx, e, smodel))
+ "'{0}'\n{1}\nONNX\n{2}".format(onx, e, smodel)
+ )
if "is not a registered function/op" in str(e):
content = onnx_package.load(onx)
raise OnnxRuntimeAssertionError(
"Missing op? '{0}'\nONNX\n{1}\n{2}\n---\n{3}".format(
- onx, smodel, e, content))
+ onx, smodel, e, content
+ )
+ )
msg = "Current official support for domain ai.onnx is till opset"
if msg in str(e):
# ReferenceEvaluator must work on this one.
return None, None
raise OnnxRuntimeAssertionError(
- "Unable to load onnx '{0}'\nONNX\n{1}\n{2}".format(
- onx, smodel, e))
+ "Unable to load onnx '{0}'\nONNX\n{1}\n{2}".format(onx, smodel, e)
+ )
input = load["data"]
DF = options.pop("DF", False)
@@ -200,8 +207,8 @@ def compare_runtime(
inputs = {inp[0].name: input}
elif isinstance(input, numpy.ndarray):
shape = sum(
- i.shape[1] if len(i.shape) == 2
- else i.shape[0] for i in inp)
+ i.shape[1] if len(i.shape) == 2 else i.shape[0] for i in inp
+ )
if shape == input.shape[1]:
inputs = {n.name: input[:, i] for i, n in enumerate(inp)}
else:
@@ -217,8 +224,8 @@ def compare_runtime(
except Exception:
raise OnnxRuntimeAssertionError(
"Wrong number of inputs onnx {0} != "
- "original {1}, onnx='{2}'".format(
- len(inp), len(input), onx))
+ "original {1}, onnx='{2}'".format(len(inp), len(input), onx)
+ )
shape = sum(i.shape[1] for i in inp)
if shape == array_input.shape[1]:
inputs = {}
@@ -226,43 +233,47 @@ def compare_runtime(
for i, n in enumerate(inp):
d = c + n.shape[1]
inputs[n.name] = _create_column(
- [row[c:d] for row in input], n.type)
+ [row[c:d] for row in input], n.type
+ )
c = d
else:
raise OnnxRuntimeAssertionError(
"Wrong number of inputs onnx {0} != "
"original shape {1}, onnx='{2}'*".format(
- len(inp), array_input.shape, onx))
+ len(inp), array_input.shape, onx
+ )
+ )
elif isinstance(input, pandas.DataFrame):
try:
array_input = numpy.array(input)
except Exception:
raise OnnxRuntimeAssertionError(
"Wrong number of inputs onnx {0} != "
- "original {1}, onnx='{2}'".format(
- len(inp), len(input), onx))
+ "original {1}, onnx='{2}'".format(len(inp), len(input), onx)
+ )
shape = sum(i.shape[1] for i in inp)
if shape == array_input.shape[1]:
inputs = {}
c = 0
for i, n in enumerate(inp):
d = c + n.shape[1]
- inputs[n.name] = _create_column(
- input.iloc[:, c:d], n.type
- )
+ inputs[n.name] = _create_column(input.iloc[:, c:d], n.type)
c = d
else:
raise OnnxRuntimeAssertionError(
"Wrong number of inputs onnx {0}={1} columns != "
"original shape {2}, onnx='{3}'*".format(
- len(inp), shape, array_input.shape, onx))
+ len(inp), shape, array_input.shape, onx
+ )
+ )
else:
raise OnnxRuntimeAssertionError(
- "Wrong type of inputs onnx {0}, onnx='{1}'".format(
- type(input), onx))
+ "Wrong type of inputs onnx {0}, onnx='{1}'".format(type(input), onx)
+ )
else:
raise OnnxRuntimeAssertionError(
- "Dict or list is expected, not {0}".format(type(input)))
+ "Dict or list is expected, not {0}".format(type(input))
+ )
for k in inputs:
if isinstance(inputs[k], list):
@@ -275,8 +286,8 @@ def compare_runtime(
if verbose:
print(
"[compare_runtime] OneOff: type(inputs)={} "
- "len={} OneOffArray={}".format(
- type(input), len(inputs), OneOffArray))
+ "len={} OneOffArray={}".format(type(input), len(inputs), OneOffArray)
+ )
if len(inputs) == 1 and not OneOffArray:
name, values = list(inputs.items())[0]
res = []
@@ -297,12 +308,13 @@ def lambda_onnx():
except Exception as e:
if intermediate_steps:
_display_intermediate_steps(
- onx, {name: input}, disable_optimisation)
+ onx, {name: input}, disable_optimisation
+ )
if verbose:
raise OnnxRuntimeAssertionError(
- f"Unable to run model due to {e}\n{onx}")
- raise OnnxRuntimeAssertionError(
- f"Unable to run model due to {e}")
+ f"Unable to run model due to {e}\n{onx}"
+ )
+ raise OnnxRuntimeAssertionError(f"Unable to run model due to {e}")
res.append(one)
if verbose:
print("[compare_runtime] OneOff: _post_process_output1")
@@ -310,8 +322,7 @@ def lambda_onnx():
else:
def to_array(vv):
- if isinstance(
- vv, (numpy.ndarray, numpy.int64, numpy.float32, str)):
+ if isinstance(vv, (numpy.ndarray, numpy.int64, numpy.float32, str)):
return numpy.array([vv])
else:
return numpy.array([vv], dtype=numpy.float32)
@@ -335,9 +346,7 @@ def lambda_onnx():
raise expe
except Exception as e:
if intermediate_steps:
- _display_intermediate_steps(
- onx, iii, disable_optimisation
- )
+ _display_intermediate_steps(onx, iii, disable_optimisation)
if verbose:
import onnx
@@ -347,9 +356,11 @@ def lambda_onnx():
smodel = ""
if verbose:
raise OnnxRuntimeAssertionError(
- f"Unable to run onnx due to {e}{smodel}\n{onx}")
+ f"Unable to run onnx due to {e}{smodel}\n{onx}"
+ )
raise OnnxRuntimeAssertionError(
- f"Unable to run onnx due to {e}{smodel}")
+ f"Unable to run onnx due to {e}{smodel}"
+ )
res.append(one)
if verbose:
print("[compare_runtime] OneOff: _post_process_output2")
@@ -360,14 +371,17 @@ def lambda_onnx():
pass
elif not isinstance(output, numpy.ndarray):
raise TypeError(
- "output must be an array, not {}".format(type(output)))
+ "output must be an array, not {}".format(type(output))
+ )
else:
output = [output]
else:
if verbose:
print(
"[compare_runtime] type(inputs)={} len={} names={}".format(
- type(input), len(inputs), list(sorted(inputs))))
+ type(input), len(inputs), list(sorted(inputs))
+ )
+ )
if verbose:
run_options = onnxruntime.RunOptions()
if hasattr(run_options, "run_log_verbosity_level"):
@@ -393,8 +407,8 @@ def lambda_onnx():
_display_intermediate_steps(onx, inputs, disable_optimisation)
if "-Fail" in onx:
raise ExpectedAssertionError(
- "onnxruntime cannot compute the "
- "prediction for '{0}'".format(onx))
+ "onnxruntime cannot compute the " "prediction for '{0}'".format(onx)
+ )
else:
if verbose:
import onnx
@@ -405,13 +419,12 @@ def lambda_onnx():
smodel = ""
raise OnnxRuntimeAssertionError(
"onnxruntime cannot compute the prediction"
- " for '{0}' due to {1}{2}".format(onx, e, smodel))
+ " for '{0}' due to {1}{2}".format(onx, e, smodel)
+ )
except Exception as e:
if verbose:
- raise OnnxRuntimeAssertionError(
- f"Unable to run onnx due to {e}\n{onx}")
- raise OnnxRuntimeAssertionError(
- f"Unable to run onnx due to {e}")
+ raise OnnxRuntimeAssertionError(f"Unable to run onnx due to {e}\n{onx}")
+ raise OnnxRuntimeAssertionError(f"Unable to run onnx due to {e}")
if verbose:
print("[compare_runtime] done type={}".format(type(output)))
@@ -433,7 +446,8 @@ def lambda_onnx():
decimal=decimal,
verbose=verbose,
classes=classes,
- **options)
+ **options,
+ )
except ExpectedAssertionError as expe:
raise expe
except Exception as e:
@@ -446,7 +460,9 @@ def lambda_onnx():
smodel = ""
raise OnnxRuntimeAssertionError(
"Model '{0}' has discrepencies.\n{1}: {2}{3}".format(
- onx, type(e), e, smodel))
+ onx, type(e), e, smodel
+ )
+ )
return output0, lambda_onnx
@@ -473,7 +489,8 @@ def _post_process_output(res):
if mi != max(ls):
raise NotImplementedError(
"Unable to postprocess various number of "
- "outputs in [{0}, {1}]".format(min(ls), max(ls)))
+ "outputs in [{0}, {1}]".format(min(ls), max(ls))
+ )
if mi > 1:
output = []
for i in range(mi):
@@ -490,7 +507,8 @@ def _post_process_output(res):
else:
if len(res[0]) != 1:
raise NotImplementedError(
- "Not conversion implemented for {0}".format(res))
+ "Not conversion implemented for {0}".format(res)
+ )
st = [r[0] for r in res]
return numpy.vstack(st)
else:
@@ -508,18 +526,13 @@ def _create_column(values, dtype):
if str(dtype) == "tensor(string)":
return numpy.array(values, dtype=numpy.str_)
raise OnnxRuntimeAssertionError(
- "Unable to create one column from dtype '{0}'".format(dtype))
+ "Unable to create one column from dtype '{0}'".format(dtype)
+ )
def _compare_expected(
- expected,
- output,
- sess,
- onnx,
- decimal=5,
- verbose=False,
- classes=None,
- **kwargs):
+ expected, output, sess, onnx, decimal=5, verbose=False, classes=None, **kwargs
+):
"""
Compares the expected output against the runtime outputs.
This is specific to *onnxruntime* due to variable *sess*
@@ -540,12 +553,13 @@ def _compare_expected(
del kwargs["Reshape"]
output = numpy.hstack(output).ravel()
output = output.reshape(
- (len(expected), len(output.ravel()) // len(expected)))
+ (len(expected), len(output.ravel()) // len(expected))
+ )
if len(expected) != len(output):
raise OnnxRuntimeAssertionError(
"Unexpected number of outputs '{0}', "
- "expected={1}, got={2}".format(
- onnx, len(expected), len(output)))
+ "expected={1}, got={2}".format(onnx, len(expected), len(output))
+ )
for exp, out in zip(expected, output):
_compare_expected(
exp,
@@ -555,32 +569,32 @@ def _compare_expected(
decimal=5,
verbose=verbose,
classes=classes,
- **kwargs)
+ **kwargs,
+ )
tested += 1
else:
raise OnnxRuntimeAssertionError(
- "Type mismatch for '{0}', output type is {1}".format(
- onnx, type(output)))
+ "Type mismatch for '{0}', output type is {1}".format(onnx, type(output))
+ )
elif isinstance(expected, dict):
if not isinstance(output, dict):
- raise OnnxRuntimeAssertionError(
- "Type mismatch for '{0}'".format(onnx))
+ raise OnnxRuntimeAssertionError("Type mismatch for '{0}'".format(onnx))
for k, v in output.items():
if k not in expected:
continue
msg = compare_outputs(
- expected[k], v, decimal=decimal, verbose=verbose, **kwargs)
+ expected[k], v, decimal=decimal, verbose=verbose, **kwargs
+ )
if msg:
if verbose:
raise OnnxRuntimeAssertionError(
- f"Unexpected output {k!r} in model {onnx}\n{msg}")
- raise OnnxRuntimeAssertionError(
- f"Unexpected output {k!r}\n{msg}")
+ f"Unexpected output {k!r} in model {onnx}\n{msg}"
+ )
+ raise OnnxRuntimeAssertionError(f"Unexpected output {k!r}\n{msg}")
tested += 1
elif isinstance(expected, numpy.ndarray):
if isinstance(output, list):
- if (expected.shape[0] == len(output) and
- isinstance(output[0], dict)):
+ if expected.shape[0] == len(output) and isinstance(output[0], dict):
import pandas
output = pandas.DataFrame(output)
@@ -593,31 +607,35 @@ def _compare_expected(
ex = ex[:170] + "..."
raise OnnxRuntimeAssertionError(
"More than one output when 1 is expected "
- "for onnx '{0}'\n{1}".format(onnx, ex))
+ "for onnx '{0}'\n{1}".format(onnx, ex)
+ )
output = output[-1]
if not isinstance(output, numpy.ndarray):
raise OnnxRuntimeAssertionError(
"output must be an array for onnx '{0}' not {1}".format(
- onnx, type(output)))
- if (classes is not None and (
- expected.dtype == numpy.str_ or
- expected.dtype.char == "U")):
+ onnx, type(output)
+ )
+ )
+ if classes is not None and (
+ expected.dtype == numpy.str_ or expected.dtype.char == "U"
+ ):
try:
output = numpy.array([classes[cl] for cl in output])
except IndexError as e:
raise RuntimeError(
- "Unable to handle\n{}\n{}\n{}".format(
- expected, output, classes)) from e
+ "Unable to handle\n{}\n{}\n{}".format(expected, output, classes)
+ ) from e
msg = compare_outputs(
- expected, output, decimal=decimal, verbose=verbose, **kwargs)
+ expected, output, decimal=decimal, verbose=verbose, **kwargs
+ )
if isinstance(msg, ExpectedAssertionError):
raise msg
if msg:
if verbose:
raise OnnxRuntimeAssertionError(
- f"Unexpected output in model {onnx}\n{msg}")
- raise OnnxRuntimeAssertionError(
- f"Unexpected output\n{msg}")
+ f"Unexpected output in model {onnx}\n{msg}"
+ )
+ raise OnnxRuntimeAssertionError(f"Unexpected output\n{msg}")
tested += 1
else:
from scipy.sparse import csr_matrix
@@ -627,17 +645,19 @@ def _compare_expected(
one_array = numpy.array(output)
dense = numpy.asarray(expected.todense())
msg = compare_outputs(
- dense, one_array, decimal=decimal, verbose=verbose, **kwargs)
+ dense, one_array, decimal=decimal, verbose=verbose, **kwargs
+ )
if msg:
if verbose:
raise OnnxRuntimeAssertionError(
- f"Unexpected output in model {onnx}\n{msg}")
- raise OnnxRuntimeAssertionError(
- f"Unexpected output\n{msg}")
+ f"Unexpected output in model {onnx}\n{msg}"
+ )
+ raise OnnxRuntimeAssertionError(f"Unexpected output\n{msg}")
tested += 1
else:
raise OnnxRuntimeAssertionError(
"Unexpected type for expected output ({1}) "
- "and onnx '{0}'".format(onnx, type(expected)))
+ "and onnx '{0}'".format(onnx, type(expected))
+ )
if tested == 0:
raise OnnxRuntimeAssertionError("No test for onnx '{0}'".format(onnx))
diff --git a/tests/test_utils_sklearn.py b/tests/test_utils_sklearn.py
index 5390c6096..be9209334 100644
--- a/tests/test_utils_sklearn.py
+++ b/tests/test_utils_sklearn.py
@@ -16,6 +16,7 @@
from sklearn.tree import DecisionTreeRegressor
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import OneHotEncoder, StandardScaler, MinMaxScaler
+
try:
from sklearn.ensemble import VotingRegressor
except ImportError:
@@ -32,29 +33,24 @@
from sklearn.preprocessing import Imputer as SimpleImputer
from skl2onnx.common.utils_sklearn import enumerate_model_names
from skl2onnx import convert_sklearn, to_onnx
-from skl2onnx.common.data_types import (
- FloatTensorType, StringTensorType)
-from skl2onnx.common.utils_sklearn import (
- _process_options, _process_pipeline_options)
-from test_utils import (
- dump_data_and_model, fit_regression_model, TARGET_OPSET)
+from skl2onnx.common.data_types import FloatTensorType, StringTensorType
+from skl2onnx.common.utils_sklearn import _process_options, _process_pipeline_options
+from test_utils import dump_data_and_model, fit_regression_model, TARGET_OPSET
-ort_version = ort_version.split('+')[0]
+ort_version = ort_version.split("+")[0]
class TestUtilsSklearn(unittest.TestCase):
-
- @unittest.skipIf(VotingRegressor is None,
- reason="new in 0.21")
+ @unittest.skipIf(VotingRegressor is None, reason="new in 0.21")
def test_voting_regression(self):
- model = VotingRegressor([
- ('lr', LinearRegression()),
- ('dt', DecisionTreeRegressor())])
+ model = VotingRegressor(
+ [("lr", LinearRegression()), ("dt", DecisionTreeRegressor())]
+ )
model, _ = fit_regression_model(model)
names = list(enumerate_model_names(model))
assert len(names) == 3
- assert [_[0] for _ in names] == ['', 'lr', 'dt']
+ assert [_[0] for _ in names] == ["", "lr", "dt"]
assert all(map(lambda x: isinstance(x, tuple), names))
assert all(map(lambda x: len(x) == 2, names))
@@ -70,62 +66,67 @@ def test_pipeline(self):
[[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]],
dtype=numpy.float32,
)
- model = Pipeline([
- ("scaler1", StandardScaler()),
- (
- "union",
- FeatureUnion([
- ("scaler2", StandardScaler()),
- ("scaler3", MinMaxScaler()),
- ]),
- ),
- ])
+ model = Pipeline(
+ [
+ ("scaler1", StandardScaler()),
+ (
+ "union",
+ FeatureUnion(
+ [
+ ("scaler2", StandardScaler()),
+ ("scaler3", MinMaxScaler()),
+ ]
+ ),
+ ),
+ ]
+ )
model.fit(data)
names = list(enumerate_model_names(model))
- assert [_[0] for _ in names] == ['', 'scaler1', 'union',
- 'union__scaler2', 'union__scaler3']
+ assert [_[0] for _ in names] == [
+ "",
+ "scaler1",
+ "union",
+ "union__scaler2",
+ "union__scaler3",
+ ]
def test_pipeline_lr(self):
data = numpy.array(
- [[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]],
- dtype=numpy.float32)
+ [[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]], dtype=numpy.float32
+ )
yd = numpy.array([0, 1, 0, 2], dtype=numpy.float32)
- pipe = Pipeline([
- ('norm', MinMaxScaler()),
- ('clr', LogisticRegression())
- ])
+ pipe = Pipeline([("norm", MinMaxScaler()), ("clr", LogisticRegression())])
pipe.fit(data, yd)
- options = {'clr__raw_scores': True, 'clr__zipmap': False}
+ options = {"clr__raw_scores": True, "clr__zipmap": False}
new_options = _process_options(pipe, options)
- exp = {'raw_scores': True, 'zipmap': False}
+ exp = {"raw_scores": True, "zipmap": False}
op = pipe.steps[1][1]
self.assertIn(id(op), new_options)
self.assertEqual(new_options[id(op)], exp)
model_def = to_onnx(
- pipe, data,
- options={'clr__raw_scores': True, 'clr__zipmap': False},
- target_opset=TARGET_OPSET)
+ pipe,
+ data,
+ options={"clr__raw_scores": True, "clr__zipmap": False},
+ target_opset=TARGET_OPSET,
+ )
sonx = str(model_def)
assert "SOFTMAX" not in sonx
@unittest.skipIf(
- ColumnTransformer is None,
- reason="ColumnTransformer not available in 0.19")
+ ColumnTransformer is None, reason="ColumnTransformer not available in 0.19"
+ )
@unittest.skipIf(
- pv.Version(ort_version) <= pv.Version('0.4.0'),
- reason="onnxruntime too old")
+ pv.Version(ort_version) <= pv.Version("0.4.0"), reason="onnxruntime too old"
+ )
def test_pipeline_column_transformer(self):
-
iris = load_iris()
X = iris.data[:, :3]
y = iris.target
X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
- X_train["vcat"] = X_train["vA"].apply(
- lambda x: "cat1" if x > 0.5 else "cat2")
- X_train["vcat2"] = X_train["vB"].apply(
- lambda x: "cat3" if x > 0.5 else "cat4")
+ X_train["vcat"] = X_train["vA"].apply(lambda x: "cat1" if x > 0.5 else "cat2")
+ X_train["vcat2"] = X_train["vB"].apply(lambda x: "cat3" if x > 0.5 else "cat4")
y_train = y % 2
numeric_features = [0, 1, 2] # ["vA", "vB", "vC"]
categorical_features = [3, 4] # ["vcat", "vcat2"]
@@ -133,26 +134,36 @@ def test_pipeline_column_transformer(self):
classifier = LogisticRegression(
C=0.01,
class_weight=dict(zip([False, True], [0.2, 0.8])),
- n_jobs=1, max_iter=10, solver="lbfgs", tol=1e-3)
+ n_jobs=1,
+ max_iter=10,
+ solver="lbfgs",
+ tol=1e-3,
+ )
- numeric_transformer = Pipeline(steps=[
- ("imputer", SimpleImputer(strategy="median")),
- ("scaler", StandardScaler())])
+ numeric_transformer = Pipeline(
+ steps=[
+ ("imputer", SimpleImputer(strategy="median")),
+ ("scaler", StandardScaler()),
+ ]
+ )
- categorical_transformer = Pipeline(steps=[
- (
- "onehot",
- OneHotEncoder(sparse=True, handle_unknown="ignore")),
- (
- "tsvd",
- TruncatedSVD(n_components=1, algorithm="arpack", tol=1e-4))])
+ categorical_transformer = Pipeline(
+ steps=[
+ ("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore")),
+ ("tsvd", TruncatedSVD(n_components=1, algorithm="arpack", tol=1e-4)),
+ ]
+ )
- preprocessor = ColumnTransformer(transformers=[
- ("num", numeric_transformer, numeric_features),
- ("cat", categorical_transformer, categorical_features)])
+ preprocessor = ColumnTransformer(
+ transformers=[
+ ("num", numeric_transformer, numeric_features),
+ ("cat", categorical_transformer, categorical_features),
+ ]
+ )
- model = Pipeline(steps=[("precprocessor",
- preprocessor), ("classifier", classifier)])
+ model = Pipeline(
+ steps=[("precprocessor", preprocessor), ("classifier", classifier)]
+ )
model.fit(X_train, y_train)
names = list(enumerate_model_names(model, short=False))
@@ -162,59 +173,86 @@ def test_pipeline_column_transformer(self):
simple2 = [_[0] for _ in names]
assert len(simple2) == len(simple)
exp = [
- '', 'precprocessor', 'precprocessor__num',
- 'precprocessor__num__imputer', 'precprocessor__num__scaler',
- 'precprocessor__cat', 'precprocessor__cat__onehot',
- 'precprocessor__cat__onehot__categories___0',
- 'precprocessor__cat__onehot__categories___1',
- 'precprocessor__cat__tsvd', 'classifier']
- self.assertEqual(simple2[:len(exp) - 2], exp[:-2])
+ "",
+ "precprocessor",
+ "precprocessor__num",
+ "precprocessor__num__imputer",
+ "precprocessor__num__scaler",
+ "precprocessor__cat",
+ "precprocessor__cat__onehot",
+ "precprocessor__cat__onehot__categories___0",
+ "precprocessor__cat__onehot__categories___1",
+ "precprocessor__cat__tsvd",
+ "classifier",
+ ]
+ self.assertEqual(simple2[: len(exp) - 2], exp[:-2])
initial_type = [
("numfeat", FloatTensorType([None, 3])),
- ("strfeat", StringTensorType([None, 2]))]
- model_onnx = convert_sklearn(model, initial_types=initial_type,
- target_opset=TARGET_OPSET)
+ ("strfeat", StringTensorType([None, 2])),
+ ]
+ model_onnx = convert_sklearn(
+ model, initial_types=initial_type, target_opset=TARGET_OPSET
+ )
dump_data_and_model(
- X_train, model, model_onnx,
- basename="SklearnPipelineColumnTransformerPipelinerOptions1")
+ X_train,
+ model,
+ model_onnx,
+ basename="SklearnPipelineColumnTransformerPipelinerOptions1",
+ )
- options = {'classifier': {'zipmap': False}}
+ options = {"classifier": {"zipmap": False}}
new_options = _process_options(model, options)
assert len(new_options) == 2
model_onnx = convert_sklearn(
- model, initial_types=initial_type,
- options={'classifier': {'zipmap': False}},
- target_opset=TARGET_OPSET)
- assert 'zipmap' not in str(model_onnx).lower()
+ model,
+ initial_types=initial_type,
+ options={"classifier": {"zipmap": False}},
+ target_opset=TARGET_OPSET,
+ )
+ assert "zipmap" not in str(model_onnx).lower()
dump_data_and_model(
- X_train, model, model_onnx,
- basename="SklearnPipelineColumnTransformerPipelinerOptions2")
+ X_train,
+ model,
+ model_onnx,
+ basename="SklearnPipelineColumnTransformerPipelinerOptions2",
+ )
- options = {'classifier__zipmap': False}
+ options = {"classifier__zipmap": False}
new_options = _process_options(model, options)
assert len(new_options) == 2
model_onnx = convert_sklearn(
- model, initial_types=initial_type,
- options=options, target_opset=TARGET_OPSET)
- assert 'zipmap' not in str(model_onnx).lower()
+ model,
+ initial_types=initial_type,
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
+ assert "zipmap" not in str(model_onnx).lower()
dump_data_and_model(
- X_train, model, model_onnx,
- basename="SklearnPipelineColumnTransformerPipelinerOptions2")
+ X_train,
+ model,
+ model_onnx,
+ basename="SklearnPipelineColumnTransformerPipelinerOptions2",
+ )
- options = {id(model): {'zipmap': False}}
+ options = {id(model): {"zipmap": False}}
new_options = _process_pipeline_options(model, options)
model_onnx = convert_sklearn(
- model, initial_types=initial_type,
- options={id(model): {'zipmap': False}},
- target_opset=TARGET_OPSET)
- assert 'zipmap' not in str(model_onnx).lower()
+ model,
+ initial_types=initial_type,
+ options={id(model): {"zipmap": False}},
+ target_opset=TARGET_OPSET,
+ )
+ assert "zipmap" not in str(model_onnx).lower()
dump_data_and_model(
- X_train, model, model_onnx,
- basename="SklearnPipelineColumnTransformerPipelinerOptions2")
+ X_train,
+ model,
+ model_onnx,
+ basename="SklearnPipelineColumnTransformerPipelinerOptions2",
+ )
if __name__ == "__main__":
diff --git a/tests/test_variable_names.py b/tests/test_variable_names.py
index e8e32b976..ab4630be8 100644
--- a/tests/test_variable_names.py
+++ b/tests/test_variable_names.py
@@ -14,6 +14,7 @@
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LinearRegression
from onnxruntime import InferenceSession
+
try:
from onnxruntime.capi.onnxruntime_pybind11_state import InvalidArgument
except ImportError:
@@ -22,12 +23,14 @@
from skl2onnx.algebra.onnx_ops import OnnxIdentity
from skl2onnx import convert_sklearn, to_onnx
from onnxconverter_common.data_types import (
- FloatTensorType, Int64TensorType, StringTensorType)
+ FloatTensorType,
+ Int64TensorType,
+ StringTensorType,
+)
from test_utils import fit_regression_model, TARGET_OPSET
class Passthrough:
-
def fit(self, X, y=None):
return self
@@ -41,21 +44,20 @@ def parser(scope, model, inputs, custom_parsers=None):
operator.inputs = inputs
for op_input in inputs:
op_output = scope.declare_local_variable(
- op_input.raw_name, copy.deepcopy(op_input.type))
+ op_input.raw_name, copy.deepcopy(op_input.type)
+ )
operator.outputs.append(op_output)
return operator.outputs
def shape_calculator(operator):
- op_input_map = {op_input.raw_name: op_input
- for op_input in operator.inputs}
+ op_input_map = {op_input.raw_name: op_input for op_input in operator.inputs}
for op_output in operator.outputs:
op_output.type.shape = op_input_map[op_output.raw_name].type.shape
def converter(scope, operator, container):
- op_input_map = {op_input.raw_name: op_input
- for op_input in operator.inputs}
+ op_input_map = {op_input.raw_name: op_input for op_input in operator.inputs}
for op_output in operator.outputs:
op_input = op_input_map[op_output.raw_name]
OnnxIdentity(
@@ -66,25 +68,23 @@ def converter(scope, operator, container):
class TestVariableNames(unittest.TestCase):
-
@classmethod
def setUpClass(cls):
update_registered_converter(
- Passthrough, "Passthrough",
- shape_calculator, converter,
- parser=parser)
+ Passthrough, "Passthrough", shape_calculator, converter, parser=parser
+ )
def test_variable_names(self):
pipeline = Pipeline([("passthrough", Passthrough())])
initial_types = [("input", FloatTensorType([None, 2]))]
- model_onnx = convert_sklearn(pipeline, initial_types=initial_types,
- target_opset=TARGET_OPSET,
- verbose=0)
- self.assertIn('Identity', str(model_onnx))
+ model_onnx = convert_sklearn(
+ pipeline, initial_types=initial_types, target_opset=TARGET_OPSET, verbose=0
+ )
+ self.assertIn("Identity", str(model_onnx))
x = np.array([0, 1, 1, 0], dtype=np.float32).reshape((-1, 2))
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
name = sess.get_inputs()[0].name
got = sess.run(None, {name: x})
assert_almost_equal(x, got[0])
@@ -93,15 +93,18 @@ def test_variable_names_distinct(self):
pipeline = Pipeline([("passthrough", Passthrough())])
initial_types = [("INPUTA", FloatTensorType([None, 2]))]
final_types = [("OUTPUTA", FloatTensorType([None, 2]))]
- model_onnx = convert_sklearn(pipeline, initial_types=initial_types,
- target_opset=TARGET_OPSET,
- final_types=final_types,
- verbose=0)
+ model_onnx = convert_sklearn(
+ pipeline,
+ initial_types=initial_types,
+ target_opset=TARGET_OPSET,
+ final_types=final_types,
+ verbose=0,
+ )
x = np.array([0, 1, 1, 0], dtype=np.float32).reshape((-1, 2))
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- got = sess.run(None, {'INPUTA': x})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ got = sess.run(None, {"INPUTA": x})
assert_almost_equal(x, got[0])
def test_variable_names_output(self):
@@ -109,26 +112,31 @@ def test_variable_names_output(self):
initial_types = [("input", FloatTensorType([None, 2]))]
final_types = initial_types
with self.assertRaises(RuntimeError):
- convert_sklearn(pipeline, initial_types=initial_types,
- target_opset=TARGET_OPSET,
- final_types=final_types)
+ convert_sklearn(
+ pipeline,
+ initial_types=initial_types,
+ target_opset=TARGET_OPSET,
+ final_types=final_types,
+ )
def _test_non_ascii_variable_name(self):
model, X = fit_regression_model(LinearRegression())
model_onnx = to_onnx(
- model, name="linear regression",
+ model,
+ name="linear regression",
initial_types=[("年齢", FloatTensorType([None, X.shape[1]]))],
- target_opset=TARGET_OPSET)
+ target_opset=TARGET_OPSET,
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
# Invalid Feed Input Name:\u5e74\u9f62
# sess.run(None, {'年齢': X})
self.assertTrue(sess is not None)
def test_non_ascii_variable_name_pipeline(self):
-
- data = dedent("""
+ data = dedent(
+ """
pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
1,1,"A",female,29.0,0,0,24160,211.3375,B5,S,2,,"MO"
1,1,"B",male,0.9167,1,2,113781,151.55,C22 C26,S,11,,"Can"
@@ -150,39 +158,48 @@ def test_non_ascii_variable_name_pipeline(self):
1,1,"Q",female,50.0,0,1,PC 17558,247.5208,B58 B60,C,6,,"PQ"
1,1,"R",female,32.0,0,0,11813,76.2917,D15,C,8,,
1,0,"S",male,36.0,0,0,13050,75.2417,C6,C,A,,"MN"
- """).strip(" \n")
+ """
+ ).strip(" \n")
data = pd.read_csv(StringIO(data))
data.rename(columns={"age": "年齢"}, inplace=True)
- X = data.drop('survived', axis=1)
+ X = data.drop("survived", axis=1)
# y = data['survived']
- cols = ['embarked', 'sex', 'pclass', '年齢', 'fare']
+ cols = ["embarked", "sex", "pclass", "年齢", "fare"]
X = X[cols]
- for cat in ['embarked', 'sex', 'pclass']:
- X[cat].fillna('missing', inplace=True)
- numeric_features = ['年齢', 'fare']
- numeric_transformer = Pipeline(steps=[
- ('imputer', SimpleImputer(strategy='median')),
- ('scaler', StandardScaler())])
- categorical_features = ['embarked', 'sex', 'pclass']
- categorical_transformer = Pipeline(steps=[
- ('onehot', OneHotEncoder(handle_unknown='ignore'))])
+ for cat in ["embarked", "sex", "pclass"]:
+ X[cat].fillna("missing", inplace=True)
+ numeric_features = ["年齢", "fare"]
+ numeric_transformer = Pipeline(
+ steps=[
+ ("imputer", SimpleImputer(strategy="median")),
+ ("scaler", StandardScaler()),
+ ]
+ )
+ categorical_features = ["embarked", "sex", "pclass"]
+ categorical_transformer = Pipeline(
+ steps=[("onehot", OneHotEncoder(handle_unknown="ignore"))]
+ )
preprocessor = ColumnTransformer(
transformers=[
- ('num', numeric_transformer, numeric_features),
- ('cat', categorical_transformer, categorical_features)])
+ ("num", numeric_transformer, numeric_features),
+ ("cat", categorical_transformer, categorical_features),
+ ]
+ )
preprocessor.fit_transform(X)
- initial_type = [('pclass', Int64TensorType(shape=[None, 1])),
- ('sex', StringTensorType(shape=[None, 1])),
- ('年齢', FloatTensorType(shape=[None, 1])),
- ('fare', FloatTensorType(shape=[None, 1])),
- ('embarked', StringTensorType(shape=[None, 1]))]
+ initial_type = [
+ ("pclass", Int64TensorType(shape=[None, 1])),
+ ("sex", StringTensorType(shape=[None, 1])),
+ ("年齢", FloatTensorType(shape=[None, 1])),
+ ("fare", FloatTensorType(shape=[None, 1])),
+ ("embarked", StringTensorType(shape=[None, 1])),
+ ]
onnx_object = convert_sklearn(
- preprocessor, initial_types=initial_type,
- target_opset=TARGET_OPSET)
+ preprocessor, initial_types=initial_type, target_opset=TARGET_OPSET
+ )
sess = InferenceSession(
- onnx_object.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ onnx_object.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
self.assertTrue(sess is not None)
# Invalid Feed Input Name:\u5e74\u9f62
# onx_data = {}
diff --git a/tests_onnxmltools/test_columns.py b/tests_onnxmltools/test_columns.py
index 48db6a082..6c7f28815 100644
--- a/tests_onnxmltools/test_columns.py
+++ b/tests_onnxmltools/test_columns.py
@@ -11,69 +11,77 @@
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import update_registered_converter, convert_sklearn
from skl2onnx.common.shape_calculator import (
- calculate_linear_classifier_output_shapes) # noqa
+ calculate_linear_classifier_output_shapes,
+) # noqa
from skl2onnx._parse import _parse_sklearn_classifier
from onnxmltools.convert.lightgbm.operator_converters.LightGbm import (
- convert_lightgbm) # noqa
+ convert_lightgbm,
+) # noqa
from onnxmltools.convert.xgboost.operator_converters.XGBoost import (
- convert_xgboost) # noqa
+ convert_xgboost,
+) # noqa
try:
from test_utils import fit_classification_model
except ImportError:
import os
import sys
- sys.path.append(
- os.path.join(
- os.path.dirname(__file__), "..", "tests"))
+
+ sys.path.append(os.path.join(os.path.dirname(__file__), "..", "tests"))
from test_utils import fit_classification_model
from test_utils import TARGET_OPSET, TARGET_OPSET_ML
class TestOptionColumns(unittest.TestCase):
-
@classmethod
def setUpClass(self):
-
update_registered_converter(
- LGBMClassifier, 'LightGbmLGBMClassifier',
+ LGBMClassifier,
+ "LightGbmLGBMClassifier",
calculate_linear_classifier_output_shapes,
- convert_lightgbm, options={
- 'zipmap': [True, False, 'columns'], 'nocl': [True, False]})
+ convert_lightgbm,
+ options={"zipmap": [True, False, "columns"], "nocl": [True, False]},
+ )
def custom_parser(scope, model, inputs, custom_parsers=None):
if custom_parsers is not None and model in custom_parsers:
return custom_parsers[model](
- scope, model, inputs, custom_parsers=custom_parsers)
- if not all(isinstance(i, (numbers.Real, bool, np.bool_))
- for i in model.classes_):
+ scope, model, inputs, custom_parsers=custom_parsers
+ )
+ if not all(
+ isinstance(i, (numbers.Real, bool, np.bool_)) for i in model.classes_
+ ):
raise NotImplementedError(
- "Current converter does not support string labels.")
+ "Current converter does not support string labels."
+ )
return _parse_sklearn_classifier(scope, model, inputs)
update_registered_converter(
- XGBClassifier, 'XGBClassifier',
+ XGBClassifier,
+ "XGBClassifier",
calculate_linear_classifier_output_shapes,
- convert_xgboost, parser=custom_parser,
- options={'zipmap': [True, False, 'columns'],
- 'nocl': [True, False]})
+ convert_xgboost,
+ parser=custom_parser,
+ options={"zipmap": [True, False, "columns"], "nocl": [True, False]},
+ )
def c_test_model(self, model):
- model, X = fit_classification_model(
- model, 3, n_features=4, label_string=False)
+ model, X = fit_classification_model(model, 3, n_features=4, label_string=False)
model_onnx = convert_sklearn(
- model, "multi-class ridge classifier",
+ model,
+ "multi-class ridge classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- options={id(model): {'zipmap': 'columns'}},
- target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML})
+ options={id(model): {"zipmap": "columns"}},
+ target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML},
+ )
self.assertIsNotNone(model_onnx)
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
names = [_.name for _ in sess.get_outputs()]
- self.assertEqual(['output_label', 'i0', 'i1', 'i2'], names)
+ self.assertEqual(["output_label", "i0", "i1", "i2"], names)
xt = X[:10].astype(np.float32)
- got = sess.run(None, {'input': xt})
+ got = sess.run(None, {"input": xt})
prob = model.predict_proba(xt)
for i in range(prob.shape[1]):
assert_almost_equal(prob[:, i], got[i + 1])
diff --git a/tests_onnxmltools/test_lightgbm.py b/tests_onnxmltools/test_lightgbm.py
index ad53aa339..dc9e7a94e 100644
--- a/tests_onnxmltools/test_lightgbm.py
+++ b/tests_onnxmltools/test_lightgbm.py
@@ -5,6 +5,7 @@
import numpy
from numpy.testing import assert_almost_equal
from onnxruntime import InferenceSession
+
try:
from onnxruntime.capi.onnxruntime_pybind11_state import InvalidArgument
except ImportError:
@@ -19,79 +20,85 @@
calculate_linear_regressor_output_shapes,
)
from onnxmltools.convert.lightgbm.operator_converters.LightGbm import (
- convert_lightgbm # noqa
+ convert_lightgbm, # noqa
)
import onnxmltools
from onnxmltools.convert.lightgbm._parse import WrappedBooster # noqa
from skl2onnx import to_onnx
-from skl2onnx._parse import (
- _parse_sklearn_classifier, _parse_sklearn_simple_model)
+from skl2onnx._parse import _parse_sklearn_classifier, _parse_sklearn_simple_model
try:
from test_utils import dump_single_regression
except ImportError:
import os
import sys
- sys.path.append(
- os.path.join(
- os.path.dirname(__file__), "..", "tests"))
+
+ sys.path.append(os.path.join(os.path.dirname(__file__), "..", "tests"))
from test_utils import dump_single_regression
from test_utils import (
- dump_binary_classification, dump_multiple_classification,
- TARGET_OPSET, TARGET_OPSET_ML)
+ dump_binary_classification,
+ dump_multiple_classification,
+ TARGET_OPSET,
+ TARGET_OPSET_ML,
+)
def calculate_lightgbm_output_shapes(operator):
op = operator.raw_operator
if hasattr(op, "_model_dict"):
- objective = op._model_dict['objective']
- elif hasattr(op, 'objective_'):
+ objective = op._model_dict["objective"]
+ elif hasattr(op, "objective_"):
objective = op.objective_
else:
raise RuntimeError( # pragma: no cover
"Unable to find attributes '_model_dict' or 'objective_' in "
- "instance of type %r (list of attributes=%r)." % (
- type(op), dir(op)))
- if objective.startswith('binary') or objective.startswith('multiclass'):
+ "instance of type %r (list of attributes=%r)." % (type(op), dir(op))
+ )
+ if objective.startswith("binary") or objective.startswith("multiclass"):
return calculate_linear_classifier_output_shapes(operator)
- if objective.startswith('regression'): # pragma: no cover
+ if objective.startswith("regression"): # pragma: no cover
return calculate_linear_regressor_output_shapes(operator)
raise NotImplementedError( # pragma: no cover
- "Objective '{}' is not implemented yet.".format(objective))
+ "Objective '{}' is not implemented yet.".format(objective)
+ )
def lightgbm_parser(scope, model, inputs, custom_parsers=None):
if hasattr(model, "fit"):
raise TypeError( # pragma: no cover
- "This converter does not apply on type '{}'."
- "".format(type(model)))
+ "This converter does not apply on type '{}'." "".format(type(model))
+ )
if len(inputs) == 1:
wrapped = WrappedBooster(model)
objective = wrapped.get_objective()
- if objective.startswith('binary'):
+ if objective.startswith("binary"):
wrapped = WrappedLightGbmBoosterClassifier(wrapped)
return _parse_sklearn_classifier(
- scope, wrapped, inputs, custom_parsers=custom_parsers)
- if objective.startswith('multiclass'):
+ scope, wrapped, inputs, custom_parsers=custom_parsers
+ )
+ if objective.startswith("multiclass"):
wrapped = WrappedLightGbmBoosterClassifier(wrapped)
return _parse_sklearn_classifier(
- scope, wrapped, inputs, custom_parsers=custom_parsers)
- if objective.startswith('regression'): # pragma: no cover
+ scope, wrapped, inputs, custom_parsers=custom_parsers
+ )
+ if objective.startswith("regression"): # pragma: no cover
return _parse_sklearn_simple_model(
- scope, wrapped, inputs, custom_parsers=custom_parsers)
+ scope, wrapped, inputs, custom_parsers=custom_parsers
+ )
raise NotImplementedError( # pragma: no cover
- "Objective '{}' is not implemented yet.".format(objective))
+ "Objective '{}' is not implemented yet.".format(objective)
+ )
# Multiple columns
- this_operator = scope.declare_local_operator('LightGBMConcat')
+ this_operator = scope.declare_local_operator("LightGBMConcat")
this_operator.raw_operator = model
this_operator.inputs = inputs
- var = scope.declare_local_variable(
- 'Xlgbm', inputs[0].type.__class__([None, None]))
+ var = scope.declare_local_variable("Xlgbm", inputs[0].type.__class__([None, None]))
this_operator.outputs.append(var)
- return lightgbm_parser(scope, model, this_operator.outputs,
- custom_parsers=custom_parsers)
+ return lightgbm_parser(
+ scope, model, this_operator.outputs, custom_parsers=custom_parsers
+ )
class WrappedLightGbmBoosterClassifier(ClassifierMixin):
@@ -100,111 +107,147 @@ class WrappedLightGbmBoosterClassifier(ClassifierMixin):
"""
def __init__(self, wrapped): # pylint: disable=W0231
- for k in {'boosting_type', '_model_dict', '_model_dict_info',
- 'operator_name', 'classes_', 'booster_', 'n_features_',
- 'objective_', 'boosting_type', 'n_features_in_',
- 'n_features_out_'}:
+ for k in {
+ "boosting_type",
+ "_model_dict",
+ "_model_dict_info",
+ "operator_name",
+ "classes_",
+ "booster_",
+ "n_features_",
+ "objective_",
+ "boosting_type",
+ "n_features_in_",
+ "n_features_out_",
+ }:
if hasattr(wrapped, k):
setattr(self, k, getattr(wrapped, k))
class TestLightGbmTreeEnsembleModels(unittest.TestCase):
-
@classmethod
def setUpClass(self):
-
update_registered_converter(
- LGBMClassifier, 'LightGbmLGBMClassifier',
+ LGBMClassifier,
+ "LightGbmLGBMClassifier",
calculate_linear_classifier_output_shapes,
- convert_lightgbm, options={
- 'zipmap': [True, False, 'columns'], 'nocl': [True, False]})
+ convert_lightgbm,
+ options={"zipmap": [True, False, "columns"], "nocl": [True, False]},
+ )
update_registered_converter(
- LGBMRegressor, 'LgbmRegressor',
+ LGBMRegressor,
+ "LgbmRegressor",
calculate_linear_regressor_output_shapes,
- convert_lightgbm)
+ convert_lightgbm,
+ )
@unittest.skipIf(
- pv.Version(onnxmltools.__version__) < pv.Version('1.11'),
- reason="converter for lightgbm is too old")
+ pv.Version(onnxmltools.__version__) < pv.Version("1.11"),
+ reason="converter for lightgbm is too old",
+ )
def test_lightgbm_classifier(self):
model = LGBMClassifier(n_estimators=3, min_child_samples=1)
dump_binary_classification(
- model,
- target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML})
+ model, target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML}
+ )
dump_multiple_classification(
- model,
- target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML})
+ model, target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML}
+ )
@unittest.skipIf(
- pv.Version(onnxmltools.__version__) < pv.Version('1.11'),
- reason="converter for lightgbm is too old")
+ pv.Version(onnxmltools.__version__) < pv.Version("1.11"),
+ reason="converter for lightgbm is too old",
+ )
def test_lightgbm_regressor(self):
model = LGBMRegressor(n_estimators=3, min_child_samples=1)
dump_single_regression(
- model,
- target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML})
+ model, target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML}
+ )
@unittest.skipIf(
- pv.Version(onnxmltools.__version__) < pv.Version('1.11'),
- reason="converter for lightgbm is too old")
+ pv.Version(onnxmltools.__version__) < pv.Version("1.11"),
+ reason="converter for lightgbm is too old",
+ )
def test_lightgbm_regressor1(self):
model = LGBMRegressor(n_estimators=1, min_child_samples=1)
dump_single_regression(
- model, suffix="1",
- target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML})
+ model,
+ suffix="1",
+ target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML},
+ )
@unittest.skipIf(
- pv.Version(onnxmltools.__version__) < pv.Version('1.11'),
- reason="converter for lightgbm is too old")
+ pv.Version(onnxmltools.__version__) < pv.Version("1.11"),
+ reason="converter for lightgbm is too old",
+ )
def test_lightgbm_regressor2(self):
model = LGBMRegressor(n_estimators=2, max_depth=1, min_child_samples=1)
dump_single_regression(
- model, suffix="2",
- target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML})
+ model,
+ suffix="2",
+ target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML},
+ )
@unittest.skipIf(
- pv.Version(onnxmltools.__version__) < pv.Version('1.11'),
- reason="converter for lightgbm is too old")
+ pv.Version(onnxmltools.__version__) < pv.Version("1.11"),
+ reason="converter for lightgbm is too old",
+ )
def test_lightgbm_booster_multi_classifier(self):
X = [[0, 1], [1, 1], [2, 0], [1, 2], [-1, 2], [1, -2]]
X = numpy.array(X, dtype=numpy.float32)
y = [0, 1, 0, 1, 2, 2]
data = Dataset(X, label=y)
model = train(
- {'boosting_type': 'gbdt', 'objective': 'multiclass',
- 'n_estimators': 3, 'min_child_samples': 1, 'num_class': 3},
- data)
+ {
+ "boosting_type": "gbdt",
+ "objective": "multiclass",
+ "n_estimators": 3,
+ "min_child_samples": 1,
+ "num_class": 3,
+ },
+ data,
+ )
update_registered_converter(
WrappedLightGbmBoosterClassifier,
- 'WrappedLightGbmBoosterClassifier',
+ "WrappedLightGbmBoosterClassifier",
calculate_lightgbm_output_shapes,
- convert_lightgbm, parser=lightgbm_parser,
- options={'zipmap': [False, True], 'nocl': [False, True]})
+ convert_lightgbm,
+ parser=lightgbm_parser,
+ options={"zipmap": [False, True], "nocl": [False, True]},
+ )
update_registered_converter(
- WrappedBooster, 'WrappedBooster',
+ WrappedBooster,
+ "WrappedBooster",
calculate_lightgbm_output_shapes,
- convert_lightgbm, parser=lightgbm_parser,
- options={'zipmap': [False, True], 'nocl': [False, True]})
+ convert_lightgbm,
+ parser=lightgbm_parser,
+ options={"zipmap": [False, True], "nocl": [False, True]},
+ )
update_registered_converter(
- Booster, 'LightGbmBooster', calculate_lightgbm_output_shapes,
- convert_lightgbm, parser=lightgbm_parser)
+ Booster,
+ "LightGbmBooster",
+ calculate_lightgbm_output_shapes,
+ convert_lightgbm,
+ parser=lightgbm_parser,
+ )
model_onnx = to_onnx(
- model, initial_types=[('X', FloatTensorType([None, 2]))],
- options={WrappedLightGbmBoosterClassifier: {'zipmap': False}},
- target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML})
+ model,
+ initial_types=[("X", FloatTensorType([None, 2]))],
+ options={WrappedLightGbmBoosterClassifier: {"zipmap": False}},
+ target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML},
+ )
try:
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
except InvalidArgument as e:
- raise AssertionError(
- "Cannot load model\n%r" % str(model_onnx)) from e
+ raise AssertionError("Cannot load model\n%r" % str(model_onnx)) from e
expected = model.predict(X)
- res = sess.run(None, {'X': X})
+ res = sess.run(None, {"X": X})
assert_almost_equal(expected, res[1])
diff --git a/tests_onnxmltools/test_xgboost_converters.py b/tests_onnxmltools/test_xgboost_converters.py
index 4a8201f0a..404796a52 100644
--- a/tests_onnxmltools/test_xgboost_converters.py
+++ b/tests_onnxmltools/test_xgboost_converters.py
@@ -12,6 +12,7 @@
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Normalizer
+
try:
from sklearn.ensemble import StackingClassifier
except ImportError:
@@ -21,12 +22,13 @@
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx.common.shape_calculator import (
calculate_linear_classifier_output_shapes, # noqa
- calculate_linear_regressor_output_shapes)
+ calculate_linear_regressor_output_shapes,
+)
from skl2onnx._parse import _parse_sklearn_classifier
from xgboost import XGBRegressor, XGBClassifier
import onnxmltools
from onnxmltools.convert.xgboost.operator_converters.XGBoost import (
- convert_xgboost # noqa
+ convert_xgboost, # noqa
)
try:
@@ -34,43 +36,47 @@
except ImportError:
import os
import sys
- sys.path.append(
- os.path.join(
- os.path.dirname(__file__), "..", "tests"))
+
+ sys.path.append(os.path.join(os.path.dirname(__file__), "..", "tests"))
from test_utils import dump_single_regression
-from test_utils import (
- dump_multiple_classification, TARGET_OPSET, TARGET_OPSET_ML)
+from test_utils import dump_multiple_classification, TARGET_OPSET, TARGET_OPSET_ML
class TestXGBoostModels(unittest.TestCase):
-
@classmethod
def setUpClass(self):
-
def custom_parser(scope, model, inputs, custom_parsers=None):
if custom_parsers is not None and model in custom_parsers:
return custom_parsers[model](
- scope, model, inputs, custom_parsers=custom_parsers)
- if not all(isinstance(i, (numbers.Real, bool, np.bool_))
- for i in model.classes_):
+ scope, model, inputs, custom_parsers=custom_parsers
+ )
+ if not all(
+ isinstance(i, (numbers.Real, bool, np.bool_)) for i in model.classes_
+ ):
raise NotImplementedError(
- "Current converter does not support string labels.")
+ "Current converter does not support string labels."
+ )
return _parse_sklearn_classifier(scope, model, inputs)
update_registered_converter(
- XGBClassifier, 'XGBClassifier',
+ XGBClassifier,
+ "XGBClassifier",
calculate_linear_classifier_output_shapes,
- convert_xgboost, parser=custom_parser,
- options={'zipmap': [True, False, 'columns'],
- 'nocl': [True, False]})
+ convert_xgboost,
+ parser=custom_parser,
+ options={"zipmap": [True, False, "columns"], "nocl": [True, False]},
+ )
update_registered_converter(
- XGBRegressor, 'XGBRegressor',
+ XGBRegressor,
+ "XGBRegressor",
calculate_linear_regressor_output_shapes,
- convert_xgboost)
+ convert_xgboost,
+ )
@unittest.skipIf(
- pv.Version(onnxmltools.__version__) < pv.Version('1.11'),
- reason="converter for xgboost is too old")
+ pv.Version(onnxmltools.__version__) < pv.Version("1.11"),
+ reason="converter for xgboost is too old",
+ )
def test_xgb_regressor(self):
iris = load_iris()
X = iris.data[:, :2]
@@ -80,13 +86,15 @@ def test_xgb_regressor(self):
xgb.fit(X, y)
conv_model = convert_sklearn(
xgb,
- initial_types=[
- ('input', FloatTensorType(shape=[None, X.shape[1]]))],
- target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML})
+ initial_types=[("input", FloatTensorType(shape=[None, X.shape[1]]))],
+ target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML},
+ )
self.assertTrue(conv_model is not None)
dump_single_regression(
- xgb, suffix="-Dec4",
- target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML})
+ xgb,
+ suffix="-Dec4",
+ target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML},
+ )
def test_xgb_classifier(self):
xgb = XGBClassifier(n_estimators=2, max_depth=2)
@@ -96,20 +104,22 @@ def test_xgb_classifier(self):
y[y == 2] = 0
xgb.fit(X, y)
conv_model = convert_sklearn(
- xgb, initial_types=[
- ('input', FloatTensorType(shape=[None, X.shape[1]]))],
- options={id(xgb): {'zipmap': False}},
- target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML})
+ xgb,
+ initial_types=[("input", FloatTensorType(shape=[None, X.shape[1]]))],
+ options={id(xgb): {"zipmap": False}},
+ target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML},
+ )
sess = InferenceSession(
- conv_model.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X.astype(np.float32)})
+ conv_model.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(np.float32)})
assert_almost_equal(xgb.predict_proba(X), res[1])
assert_almost_equal(xgb.predict(X), res[0])
@unittest.skipIf(
- pv.Version(onnxmltools.__version__) < pv.Version('1.11'),
- reason="converter for xgboost is too old")
+ pv.Version(onnxmltools.__version__) < pv.Version("1.11"),
+ reason="converter for xgboost is too old",
+ )
def test_xgb_classifier_multi(self):
iris = load_iris()
X = iris.data[:, :2]
@@ -119,123 +129,151 @@ def test_xgb_classifier_multi(self):
xgb.fit(X, y)
conv_model = convert_sklearn(
xgb,
- initial_types=[
- ('input', FloatTensorType(shape=[None, X.shape[1]]))],
- target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML})
+ initial_types=[("input", FloatTensorType(shape=[None, X.shape[1]]))],
+ target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML},
+ )
self.assertTrue(conv_model is not None)
dump_multiple_classification(
- xgb,
- target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML})
+ xgb, target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML}
+ )
@unittest.skipIf(
- pv.Version(onnxmltools.__version__) < pv.Version('1.11'),
- reason="converter for xgboost is too old")
+ pv.Version(onnxmltools.__version__) < pv.Version("1.11"),
+ reason="converter for xgboost is too old",
+ )
def test_xgb_classifier_multi_reglog(self):
iris = load_iris()
X = iris.data[:, :2]
y = iris.target
- xgb = XGBClassifier(objective='reg:logistic')
+ xgb = XGBClassifier(objective="reg:logistic")
xgb.fit(X, y)
conv_model = convert_sklearn(
- xgb, initial_types=[
- ('input', FloatTensorType(shape=[None, X.shape[1]]))],
- target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML})
+ xgb,
+ initial_types=[("input", FloatTensorType(shape=[None, X.shape[1]]))],
+ target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML},
+ )
self.assertTrue(conv_model is not None)
dump_multiple_classification(
- xgb, suffix="RegLog",
- target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML})
+ xgb,
+ suffix="RegLog",
+ target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML},
+ )
def test_xgb_classifier_reglog(self):
iris = load_iris()
X = iris.data[:, :2]
y = iris.target
y[y == 2] = 0
- xgb = XGBClassifier(objective='binary:logistic')
+ xgb = XGBClassifier(objective="binary:logistic")
xgb.fit(X, y)
conv_model = convert_sklearn(
- xgb, initial_types=[
- ('input', FloatTensorType(shape=[None, X.shape[1]]))],
- options={id(xgb): {'zipmap': False}},
- target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML})
+ xgb,
+ initial_types=[("input", FloatTensorType(shape=[None, X.shape[1]]))],
+ options={id(xgb): {"zipmap": False}},
+ target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML},
+ )
self.assertTrue(conv_model is not None)
sess = InferenceSession(
- conv_model.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X.astype(np.float32)})
+ conv_model.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(np.float32)})
assert_almost_equal(xgb.predict_proba(X), res[1])
assert_almost_equal(xgb.predict(X), res[0])
- @unittest.skipIf(StackingClassifier is None,
- reason="new in 0.22")
+ @unittest.skipIf(StackingClassifier is None, reason="new in 0.22")
def test_model_stacking_classifier_column_transformer(self):
classifiers = {
- 'A': XGBClassifier(n_estimators=5, random_state=42),
- 'B': XGBClassifier(n_estimators=5, random_state=42)
+ "A": XGBClassifier(n_estimators=5, random_state=42),
+ "B": XGBClassifier(n_estimators=5, random_state=42),
}
- model_to_test = Pipeline(steps=[
- ('cbe', ColumnTransformer([
- ("norm1", Normalizer(norm='l1'), [0, 1]),
- ("norm2", Normalizer(norm='l2'), [2, 3])])),
- ('sc', StackingClassifier(
- estimators=list(map(tuple, classifiers.items())),
- stack_method='predict_proba',
- passthrough=False
- ))
- ])
+ model_to_test = Pipeline(
+ steps=[
+ (
+ "cbe",
+ ColumnTransformer(
+ [
+ ("norm1", Normalizer(norm="l1"), [0, 1]),
+ ("norm2", Normalizer(norm="l2"), [2, 3]),
+ ]
+ ),
+ ),
+ (
+ "sc",
+ StackingClassifier(
+ estimators=list(map(tuple, classifiers.items())),
+ stack_method="predict_proba",
+ passthrough=False,
+ ),
+ ),
+ ]
+ )
iris = load_iris()
X = iris.data.astype(np.float32)
y = (iris.target == 0).astype(np.int32)
model_to_test.fit(X, y)
model_onnx = convert_sklearn(
- model_to_test, "stacking classifier",
+ model_to_test,
+ "stacking classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML},
- options={'zipmap': False})
+ target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML},
+ options={"zipmap": False},
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X.astype(np.float32)})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(np.float32)})
assert_almost_equal(model_to_test.predict_proba(X), res[1])
assert_almost_equal(model_to_test.predict(X), res[0])
- @unittest.skipIf(StackingClassifier is None,
- reason="new in 0.22")
+ @unittest.skipIf(StackingClassifier is None, reason="new in 0.22")
def test_model_stacking_classifier_column_transformer_custom(self):
-
classifiers = {
- 'A': XGBClassifier(n_estimators=5, random_state=42),
- 'B': XGBClassifier(n_estimators=5, random_state=42)
+ "A": XGBClassifier(n_estimators=5, random_state=42),
+ "B": XGBClassifier(n_estimators=5, random_state=42),
}
- model_to_test = Pipeline(steps=[
- ('cbe', ColumnTransformer([
- ("norm1", Normalizer(norm='l1'), [0, 1]),
- ("norm2", Normalizer(norm='l2'), [2, 3])])),
- ('sc', StackingClassifier(
- estimators=list(map(tuple, classifiers.items())),
- stack_method='predict_proba',
- passthrough=False
- ))
- ])
+ model_to_test = Pipeline(
+ steps=[
+ (
+ "cbe",
+ ColumnTransformer(
+ [
+ ("norm1", Normalizer(norm="l1"), [0, 1]),
+ ("norm2", Normalizer(norm="l2"), [2, 3]),
+ ]
+ ),
+ ),
+ (
+ "sc",
+ StackingClassifier(
+ estimators=list(map(tuple, classifiers.items())),
+ stack_method="predict_proba",
+ passthrough=False,
+ ),
+ ),
+ ]
+ )
iris = load_iris()
X = iris.data.astype(np.float32)
df = pandas.DataFrame(X)
- df.columns = ['A', 'B', 'C', 'D']
+ df.columns = ["A", "B", "C", "D"]
X[:, 0] = X[:, 0].astype(np.int64).astype(X.dtype)
- df['A'] = df.A.astype(np.int64)
- df['B'] = df.B.astype(np.float32)
- df['C'] = df.C.astype(np.str_)
+ df["A"] = df.A.astype(np.int64)
+ df["B"] = df.B.astype(np.float32)
+ df["C"] = df.C.astype(np.str_)
y = (iris.target == 0).astype(np.int32)
model_to_test.fit(df, y)
model_onnx = convert_sklearn(
- model_to_test, "stacking classifier",
+ model_to_test,
+ "stacking classifier",
[("input", FloatTensorType([None, X.shape[1]]))],
- target_opset={'': TARGET_OPSET, 'ai.onnx.ml': TARGET_OPSET_ML},
- options={'zipmap': False})
+ target_opset={"": TARGET_OPSET, "ai.onnx.ml": TARGET_OPSET_ML},
+ options={"zipmap": False},
+ )
sess = InferenceSession(
- model_onnx.SerializeToString(),
- providers=["CPUExecutionProvider"])
- res = sess.run(None, {'input': X.astype(np.float32)})
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": X.astype(np.float32)})
assert_almost_equal(model_to_test.predict_proba(df), res[1])
assert_almost_equal(model_to_test.predict(df), res[0])