scikit-learn#
This page answers common “how do I…” questions for converting
scikit-learn estimators and pipelines to ONNX with
yobx.sklearn.to_onnx().
How to convert a single estimator#
Train a scikit-learn estimator, then pass it together with a
representative dummy input (one row is enough) to
yobx.sklearn.to_onnx():
<<<
import numpy as np
from sklearn.preprocessing import StandardScaler
from yobx.helpers.onnx_helper import pretty_onnx
from yobx.sklearn import to_onnx
rng = np.random.default_rng(0)
X = rng.standard_normal((20, 4)).astype(np.float32)
scaler = StandardScaler().fit(X)
onx = to_onnx(scaler, (X[:1],))
print(pretty_onnx(onx))
>>>
opset: domain='' version=21
opset: domain='ai.onnx.ml' version=5
input: name='X' type=dtype('float32') shape=['batch', 4]
init: name='init1_s4_' type=float32 shape=(4,) -- array([-0.119, 0.054, 0.25 , 0.296], dtype=float32)-- Opset.make_node.0
init: name='init1_s4_2' type=float32 shape=(4,) -- array([0.864, 0.913, 1.04 , 0.971], dtype=float32)-- Opset.make_node.0
Sub(X, init1_s4_) -> _onx_sub_X
Div(_onx_sub_X, init1_s4_2) -> x
output: name='x' type='NOTENSOR' shape=None
The dummy input controls the dtype and the number of features of the generated ONNX graph; its batch dimension is replaced by a symbolic dynamic axis automatically.
How to convert a Pipeline#
yobx.sklearn.to_onnx() handles
Pipeline natively — each step is converted
in sequence and the resulting ONNX nodes are chained together:
<<<
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from yobx.helpers.onnx_helper import pretty_onnx
from yobx.sklearn import to_onnx
rng = np.random.default_rng(0)
X = rng.standard_normal((80, 4)).astype(np.float32)
y = (X[:, 0] + X[:, 1] > 0).astype(int)
pipe = Pipeline([("scaler", StandardScaler()), ("clf", LogisticRegression())]).fit(X, y)
onx = to_onnx(pipe, (X[:1],))
print(f"ONNX opset : {onx.opset_import[0].version}")
print(pretty_onnx(onx))
>>>
ONNX opset : 21
opset: domain='' version=21
opset: domain='ai.onnx.ml' version=5
input: name='X' type=dtype('float32') shape=['batch', 4]
init: name='main__scaler__init1_s4_' type=float32 shape=(4,) -- array([-0.041, 0.007, -0.24 , 0.112], dtype=float32)-- Opset.make_node.0
init: name='main__scaler__init1_s4_2' type=float32 shape=(4,) -- array([0.954, 0.858, 1.03 , 1.183], dtype=float32)-- Opset.make_node.0
init: name='main__clf__init1_s1x4_' type=float32 shape=(1, 4) -- array([ 2.229, 2.285, 0.46 , -0.414], dtype=float32)-- Opset.make_node.0
init: name='main__clf__init1_s1_' type=float32 shape=(1,) -- array([0.212], dtype=float32)-- Opset.make_node.1/Small
init: name='main__clf__init1_s1_2' type=float32 shape=(1,) -- array([1.], dtype=float32)-- Opset.make_node.1/Small
init: name='main__clf__init7_s2_0_1' type=int64 shape=(2,) -- array([0, 1])-- Opset.make_node.1/Shape
Sub(X, main__scaler__init1_s4_) -> main__scaler___onx_sub_X
Div(main__scaler___onx_sub_X, main__scaler__init1_s4_2) -> x
Gemm(x, main__clf__init1_s1x4_, main__clf__init1_s1_, transB=1) -> main__clf___onx_gemm_x
Sigmoid(main__clf___onx_gemm_x) -> main__clf___onx_sigmoid_main__clf___onx_gemm_x
Sub(main__clf__init1_s1_2, main__clf___onx_sigmoid_main__clf___onx_gemm_x) -> main__clf___onx_sub_main__clf__init1_s1_2
Concat(main__clf___onx_sub_main__clf__init1_s1_2, main__clf___onx_sigmoid_main__clf___onx_gemm_x, axis=-1) -> probabilities
ArgMax(probabilities, axis=1, keepdims=0) -> main__clf___onx_argmax_probabilities
Gather(main__clf__init7_s2_0_1, main__clf___onx_argmax_probabilities, axis=0) -> label
output: name='label' type='NOTENSOR' shape=None
output: name='probabilities' type='NOTENSOR' shape=None
See also
Converting a scikit-learn Pipeline to ONNX — a full runnable gallery example with output verification.
How to run the exported ONNX model#
Use onnxruntime to run the converted model and compare its outputs with scikit-learn’s own predictions:
<<<
import numpy as np
import onnxruntime
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from yobx.sklearn import to_onnx
rng = np.random.default_rng(0)
X_train = rng.standard_normal((80, 4)).astype(np.float32)
y_train = (X_train[:, 0] + X_train[:, 1] > 0).astype(int)
pipe = Pipeline([("scaler", StandardScaler()), ("clf", LogisticRegression())]).fit(
X_train, y_train
)
onx = to_onnx(pipe, (X_train[:1],))
# Run with onnxruntime
X_test = rng.standard_normal((20, 4)).astype(np.float32)
sess = onnxruntime.InferenceSession(
onx.SerializeToString(), providers=["CPUExecutionProvider"]
)
input_name = sess.get_inputs()[0].name
label_onnx, proba_onnx = sess.run(None, {input_name: X_test})
# Compare with scikit-learn
label_sk = pipe.predict(X_test)
assert (label_sk == label_onnx).all(), "Label mismatch!"
print("Labels match ✓")
print(f"First 5 labels (sklearn): {label_sk[:5]}")
print(f"First 5 labels (ONNX) : {label_onnx[:5]}")
>>>
Labels match ✓
First 5 labels (sklearn): [0 1 1 1 0]
First 5 labels (ONNX) : [0 1 1 1 0]
How to control dynamic shapes#
By default the batch dimension (axis 0) of every input is made dynamic.
Pass dynamic_shapes to name that axis explicitly or to mark additional
axes as symbolic:
import numpy as np
from sklearn.preprocessing import StandardScaler
from yobx.sklearn import to_onnx
rng = np.random.default_rng(0)
X = rng.standard_normal((20, 4)).astype(np.float32)
scaler = StandardScaler().fit(X)
# axis 0 is dynamic and named "batch"
onx = to_onnx(scaler, (X[:1],), dynamic_shapes=({0: "batch"},))
Pass an empty tuple (dynamic_shapes=()) to produce a fully static
graph where every dimension is fixed at conversion time:
onx_static = to_onnx(scaler, (X[:1],), dynamic_shapes=())
How to inspect the ONNX graph#
Print a compact text representation of the model with
pretty_onnx():
<<<
import numpy as np
from sklearn.preprocessing import StandardScaler
from yobx.sklearn import to_onnx
from yobx.helpers.onnx_helper import pretty_onnx
rng = np.random.default_rng(0)
X = rng.standard_normal((10, 4)).astype(np.float32)
scaler = StandardScaler().fit(X)
onx = to_onnx(scaler, (X[:1],))
print(pretty_onnx(onx))
>>>
opset: domain='' version=21
opset: domain='ai.onnx.ml' version=5
input: name='X' type=dtype('float32') shape=['batch', 4]
init: name='init1_s4_' type=float32 shape=(4,) -- array([-0.448, 0.052, -0.093, 0.247], dtype=float32)-- Opset.make_node.0
init: name='init1_s4_2' type=float32 shape=(4,) -- array([0.774, 0.641, 0.825, 0.728], dtype=float32)-- Opset.make_node.0
Sub(X, init1_s4_) -> _onx_sub_X
Div(_onx_sub_X, init1_s4_2) -> x
output: name='x' type='NOTENSOR' shape=None
How to save and reload the ONNX model#
The ExportArtifact returned by
yobx.sklearn.to_onnx() can be serialised directly to disk and
loaded again later:
import numpy as np
import onnx
from sklearn.preprocessing import StandardScaler
from yobx.sklearn import to_onnx
rng = np.random.default_rng(0)
X = rng.standard_normal((20, 4)).astype(np.float32)
scaler = StandardScaler().fit(X)
onx = to_onnx(scaler, (X[:1],))
# Save
onnx.save(onx, "scaler.onnx")
# Reload
onx_loaded = onnx.load("scaler.onnx")
See also
scikit-learn Export to ONNX — full reference for the scikit-learn converter, including the converter registry and how to add support for custom estimators.
Converting a scikit-learn Pipeline to ONNX — runnable gallery example.
Exporting sklearn estimators as ONNX local functions — exporting each pipeline step as a separate ONNX local function.
How to export a custom estimator#
There are two ways to make to_onnx() work with an
estimator that has no built-in converter.
Option 1 — TraceableMixin (numpy-based transformers)
If the transform method uses only standard numpy operations,
inherit from TraceableMixin together with the usual
sklearn base classes. The framework traces the method automatically — no
converter function is needed:
<<<
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from yobx.helpers.onnx_helper import pretty_onnx
from yobx.sklearn import to_onnx, TraceableMixin
class LogNormTransformer(BaseEstimator, TransformerMixin, TraceableMixin):
def fit(self, X, y=None):
self.scale_ = np.abs(X).mean(axis=0, keepdims=True).astype(np.float32)
return self
def transform(self, X):
return np.log(np.abs(X) / self.scale_ + np.float32(1))
rng = np.random.default_rng(0)
X = rng.standard_normal((20, 4)).astype(np.float32)
est = LogNormTransformer().fit(X)
onx = to_onnx(est, (X[:1],))
print(pretty_onnx(onx))
>>>
opset: domain='' version=21
opset: domain='ai.onnx.ml' version=5
input: name='X' type=dtype('float32') shape=['batch', 4]
init: name='init1_s1x4_' type=float32 shape=(1, 4) -- array([0.668, 0.716, 0.926, 0.851], dtype=float32)-- Opset.make_node.0
init: name='init1_s_' type=float32 shape=() -- array([1.], dtype=float32)-- Opset.make_node.1/Small
Abs(X) -> _onx_abs_X
Div(_onx_abs_X, init1_s1x4_) -> _onx_div_abs_X
Add(_onx_div_abs_X, init1_s_) -> _onx_add_div_abs_X
Log(_onx_add_div_abs_X) -> _onx_log_add_div_abs_X
output: name='_onx_log_add_div_abs_X' type='NOTENSOR' shape=None
Option 2 — extra_converters (full control)
For estimators whose logic cannot be expressed as plain numpy ops — or when
you need fine-grained control over the ONNX graph — write a converter
function and pass it via extra_converters:
<<<
import numpy as np
import onnxruntime
from sklearn.base import BaseEstimator, TransformerMixin
from yobx.helpers.onnx_helper import pretty_onnx
from yobx.sklearn import to_onnx
from yobx.helpers.onnx_helper import tensor_dtype_to_np_dtype
class ClipTransformer(BaseEstimator, TransformerMixin):
def __init__(self, clip_min=0.0, clip_max=1.0):
self.clip_min = clip_min
self.clip_max = clip_max
def fit(self, X, y=None):
return self
def transform(self, X):
return np.clip(X, self.clip_min, self.clip_max)
def convert_clip(g, sts, outputs, estimator, X, name="clip"):
dtype = tensor_dtype_to_np_dtype(g.get_type(X))
low = np.array(estimator.clip_min, dtype=dtype)
high = np.array(estimator.clip_max, dtype=dtype)
res = g.op.Clip(X, low, high, outputs=outputs, name=name)
g.set_type_shape_unary_op(res, X)
return res
rng = np.random.default_rng(0)
X = rng.standard_normal((20, 4)).astype(np.float32)
transformer = ClipTransformer(clip_min=-0.5, clip_max=0.5).fit(X)
onx = to_onnx(
transformer,
(X[:1],),
extra_converters={ClipTransformer: convert_clip},
)
print(pretty_onnx(onx))
sess = onnxruntime.InferenceSession(
onx.SerializeToString(), providers=["CPUExecutionProvider"]
)
X_test = rng.standard_normal((5, 4)).astype(np.float32)
(clipped,) = sess.run(None, {"X": X_test})
expected = transformer.transform(X_test)
assert np.allclose(clipped, expected, atol=1e-6)
print("Results match ✓")
>>>
opset: domain='' version=21
opset: domain='ai.onnx.ml' version=5
input: name='X' type=dtype('float32') shape=['batch', 4]
init: name='init1_s_' type=float32 shape=() -- array([-0.5], dtype=float32)-- Opset.make_node.1/Small
init: name='init1_s_2' type=float32 shape=() -- array([0.5], dtype=float32)-- Opset.make_node.1/Small
Clip(X, init1_s_, init1_s_2) -> Y
output: name='Y' type='NOTENSOR' shape=None
Results match ✓
See also
Custom converter with convert options — a full gallery example showing a custom converter with optional extra outputs.
scikit-learn Export to ONNX — converter registry and how to write a converter for any estimator.
How to export with FunctionTransformer#
FunctionTransformer wraps any numpy function
as a scikit-learn transformer. Because its func is a plain numpy
function, to_onnx() converts it via numpy tracing — no
custom converter is required.
Basic usage
<<<
import numpy as np
import onnxruntime
from sklearn.preprocessing import FunctionTransformer
from yobx.helpers.onnx_helper import pretty_onnx
from yobx.sklearn import to_onnx
def log1p_abs(X):
return np.log1p(np.abs(X))
rng = np.random.default_rng(0)
X = rng.standard_normal((20, 4)).astype(np.float32)
transformer = FunctionTransformer(func=log1p_abs).fit(X)
onx = to_onnx(transformer, (X[:1],))
print(pretty_onnx(onx))
sess = onnxruntime.InferenceSession(
onx.SerializeToString(), providers=["CPUExecutionProvider"]
)
X_test = rng.standard_normal((5, 4)).astype(np.float32)
(onnx_out,) = sess.run(None, {"X": X_test})
expected = transformer.transform(X_test).astype(np.float32)
assert np.allclose(onnx_out, expected, atol=1e-5)
print("Results match ✓")
>>>
opset: domain='' version=21
opset: domain='ai.onnx.ml' version=5
input: name='X' type=dtype('float32') shape=['batch', 4]
init: name='init1_s_' type=float32 shape=() -- array([1.], dtype=float32)-- Opset.make_node.1/Small
Abs(X) -> _onx_abs_X
Add(_onx_abs_X, init1_s_) -> _onx_add_abs_X
Log(_onx_add_abs_X) -> Y
output: name='Y' type='NOTENSOR' shape=None
Results match ✓
Passing keyword arguments with kw_args
Constants can be forwarded to the function via kw_args; the converter
folds them into the ONNX graph as initializers:
<<<
import numpy as np
from sklearn.preprocessing import FunctionTransformer
from yobx.helpers.onnx_helper import pretty_onnx
from yobx.sklearn import to_onnx
def scale_shift(X, scale=np.float32(1), shift=np.float32(0)):
return X * scale + shift
rng = np.random.default_rng(0)
X = rng.standard_normal((20, 4)).astype(np.float32)
transformer = FunctionTransformer(
func=scale_shift,
kw_args={"scale": np.float32(2.0), "shift": np.float32(1.0)},
).fit(X)
onx = to_onnx(transformer, (X[:1],))
print(pretty_onnx(onx))
>>>
opset: domain='' version=21
opset: domain='ai.onnx.ml' version=5
input: name='X' type=dtype('float32') shape=['batch', 4]
init: name='init1_s_' type=float32 shape=() -- array([2.], dtype=float32)-- Opset.make_node.1/Small
init: name='init1_s_2' type=float32 shape=() -- array([1.], dtype=float32)-- Opset.make_node.1/Small
Mul(X, init1_s_) -> _onx_mul_X
Add(_onx_mul_X, init1_s_2) -> Y
output: name='Y' type='NOTENSOR' shape=None
Identity transformer (func=None)
When func=None the transformer is a pass-through — the input is forwarded
to the output unchanged. The optimizer removes redundant intermediate nodes,
so the resulting graph is minimal:
<<<
import numpy as np
from sklearn.preprocessing import FunctionTransformer
from yobx.helpers.onnx_helper import pretty_onnx
from yobx.sklearn import to_onnx
X = np.ones((5, 3), dtype=np.float32)
identity_tf = FunctionTransformer(func=None).fit(X)
onx = to_onnx(identity_tf, (X[:1],))
print(pretty_onnx(onx))
>>>
opset: domain='' version=21
opset: domain='ai.onnx.ml' version=5
input: name='X' type=dtype('float32') shape=['batch', 3]
Identity(X) -> Y
output: name='Y' type='NOTENSOR' shape=None
See also
Exporting FunctionTransformer with numpy tracing — a full gallery example that also shows standalone numpy tracing and pipeline embedding.
Numpy-Tracing and FunctionTransformer — design doc explaining the numpy tracing mechanism.