Coverage for mlprodict/sklapi/onnx_speed_up.py: 100%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# coding: utf-8
2"""
3@file
4@brief Speeding up :epkg:`scikit-learn` with :epkg:`onnx`.
6.. versionadded:: 0.7
7"""
8import collections
9import inspect
10import io
11from contextlib import redirect_stdout, redirect_stderr
12import numpy
13from numpy.testing import assert_almost_equal
14import scipy.special as scipy_special
15import scipy.spatial.distance as scipy_distance
16from onnx import helper, load
17from sklearn.base import (
18 BaseEstimator, clone,
19 TransformerMixin, RegressorMixin, ClassifierMixin,
20 ClusterMixin)
21from sklearn.preprocessing import FunctionTransformer
22from skl2onnx.algebra.onnx_operator_mixin import OnnxOperatorMixin
23from ..tools.code_helper import print_code
24from .. import __max_supported_opset__
25from ..onnx_tools.onnx_export import export2numpy
26from ..onnx_tools.onnx2py_helper import (
27 onnx_model_opsets, _var_as_dict, to_skl2onnx_type)
28from ..onnx_tools.exports.numpy_helper import (
29 array_feature_extrator,
30 argmax_use_numpy_select_last_index,
31 argmin_use_numpy_select_last_index,
32 make_slice)
33from ..onnx_tools.exports.skl2onnx_helper import add_onnx_graph
34from ..onnx_conv import to_onnx
35from .onnx_transformer import OnnxTransformer
38class _OnnxPipelineStepSpeedup(BaseEstimator, OnnxOperatorMixin):
39 """
40 Speeds up inference by replacing methods *transform* or
41 *predict* by a runtime for :epkg:`ONNX`.
43 :param estimator: estimator to train
44 :param enforce_float32: boolean
45 :epkg:`onnxruntime` only supports *float32*,
46 :epkg:`scikit-learn` usually uses double floats, this parameter
47 ensures that every array of double floats is converted into
48 single floats
49 :param runtime: string, defined the runtime to use
50 as described in @see cl OnnxInference.
51 :param target_opset: targetted ONNX opset
52 :param conv_options: options for conversions, see @see fn to_onnx
53 :param nopython: used by :epkg:`numba` jitter
55 Attributes created by method *fit*:
57 * `estimator_`: cloned and trained version of *estimator*
58 * `onnxrt_`: objet of type @see cl OnnxInference,
59 :epkg:`sklearn:preprocessing:FunctionTransformer`
60 * `numpy_code_`: python code equivalent to the inference
61 method if the runtime is `'numpy'` or `'numba'`
62 * `onnx_io_names_`: dictionary, additional information
63 if the runtime is `'numpy'` or `'numba'`
65 .. versionadded:: 0.7
66 """
68 def __init__(self, estimator, runtime='python', enforce_float32=True,
69 target_opset=None, conv_options=None, nopython=True):
70 BaseEstimator.__init__(self)
71 self.estimator = estimator
72 self.runtime = runtime
73 self.enforce_float32 = enforce_float32
74 self.target_opset = target_opset
75 self.conv_options = conv_options
76 self.nopython = nopython
78 def _check_fitted_(self):
79 if not hasattr(self, 'onnxrt_'):
80 raise AttributeError( # pragma: no cover
81 "Object must be be fit.")
83 def _to_onnx(self, fitted_estimator, inputs):
84 """
85 Converts an estimator inference into :epkg:`ONNX`.
87 :param estimator: any estimator following :epkg:`scikit-learn` API
88 :param inputs: example of inputs
89 :return: ONNX
90 """
91 return to_onnx(
92 self.estimator_, inputs, target_opset=self.target_opset,
93 options=self.conv_options)
95 def _build_onnx_runtime(self, onx):
96 """
97 Returns an instance of @see cl OnnxTransformer which
98 executes the ONNX graph.
100 :param onx: ONNX graph
101 :param runtime: runtime type (see @see cl OnnxInference)
102 :return: instance of @see cl OnnxInference
103 """
104 if self.runtime in ('numpy', 'numba'):
105 return self._build_onnx_runtime_numpy(onx)
106 tr = OnnxTransformer(
107 onx, runtime=self.runtime,
108 enforce_float32=self.enforce_float32)
109 tr.fit()
110 return tr
112 def _build_onnx_runtime_numpy(self, onx):
113 """
114 Builds a runtime based on numpy.
115 Exports the ONNX graph into python code
116 based on numpy and then dynamically compiles
117 it with method @see me _build_onnx_runtime_numpy_compile.
118 """
119 model_onnx = load(io.BytesIO(onx))
120 self.onnx_io_names_ = {'inputs': [], 'outputs': []}
121 for inp in model_onnx.graph.input: # pylint: disable=E1101
122 d = _var_as_dict(inp)
123 self.onnx_io_names_['inputs'].append((d['name'], d['type']))
124 for inp in model_onnx.graph.output: # pylint: disable=E1101
125 d = _var_as_dict(inp)
126 self.onnx_io_names_['outputs'].append((d['name'], d['type']))
127 self.onnx_io_names_['skl2onnx_inputs'] = [
128 to_skl2onnx_type(d[0], d[1]['elem'], d[1]['shape'])
129 for d in self.onnx_io_names_['inputs']]
130 self.onnx_io_names_['skl2onnx_outputs'] = [
131 to_skl2onnx_type(d[0], d[1]['elem'], d[1]['shape'])
132 for d in self.onnx_io_names_['outputs']]
133 self.numpy_code_ = export2numpy(model_onnx, rename=True)
134 opsets = onnx_model_opsets(model_onnx)
135 return self._build_onnx_runtime_numpy_compile(opsets)
137 def _build_onnx_runtime_numpy_compile(self, opsets):
138 """
139 Second part of @see me _build_onnx_runtime_numpy.
140 """
141 try:
142 compiled_code = compile(
143 self.numpy_code_, '<string>', 'exec')
144 except SyntaxError as e: # pragma: no cover
145 raise AssertionError(
146 "Unable to compile a script due to %r. "
147 "\n--CODE--\n%s"
148 "" % (e, print_code(self.numpy_code_))) from e
150 glo = globals().copy()
151 loc = {
152 'numpy': numpy, 'dict': dict, 'list': list,
153 'print': print, 'sorted': sorted,
154 'collections': collections, 'inspect': inspect,
155 'helper': helper, 'scipy_special': scipy_special,
156 'scipy_distance': scipy_distance,
157 'array_feature_extrator': array_feature_extrator,
158 'argmin_use_numpy_select_last_index':
159 argmin_use_numpy_select_last_index,
160 'argmax_use_numpy_select_last_index':
161 argmax_use_numpy_select_last_index,
162 'make_slice': make_slice}
163 out = io.StringIO()
164 err = io.StringIO()
165 with redirect_stdout(out):
166 with redirect_stderr(err):
167 try:
168 exec(compiled_code, glo, loc) # pylint: disable=W0122
169 except Exception as e: # pragma: no cover
170 raise AssertionError(
171 "Unable to execute a script due to %r. "
172 "\n--OUT--\n%s\n--ERR--\n%s\n--CODE--\n%s"
173 "" % (e, out.getvalue(), err.getvalue(),
174 print_code(self.numpy_code_))) from e
175 names = [k for k in loc if k.startswith('numpy_')]
176 if len(names) != 1:
177 raise RuntimeError( # pragma: no cover
178 "Unable to guess which function is the one, names=%r."
179 "" % list(sorted(names)))
180 fct = loc[names[0]]
181 if self.runtime == 'numba':
182 from numba import jit
183 jitter = jit(nopython=self.nopython)
184 fct = jitter(fct)
185 cl = FunctionTransformer(fct, accept_sparse=True)
186 cl.op_version = opsets.get('', __max_supported_opset__)
187 return cl
189 def __getstate__(self):
190 """
191 :epkg:`pickle` does not support functions.
192 This method removes any link to function
193 when the runtime is `'numpy'`.
194 """
195 state = BaseEstimator.__getstate__(self)
196 if 'numpy_code_' in state:
197 del state['onnxrt_']
198 return state
200 def __setstate__(self, state):
201 """
202 :epkg:`pickle` does not support functions.
203 This method restores the function created when
204 the runtime is `'numpy'`.
205 """
206 BaseEstimator.__setstate__(self, state)
207 if 'numpy_code_' in state:
208 model_onnx = load(io.BytesIO(state['onnx_']))
209 opsets = onnx_model_opsets(model_onnx)
210 self.onnxrt_ = self._build_onnx_runtime_numpy_compile(opsets)
212 def fit(self, X, y=None, sample_weight=None, **kwargs):
213 """
214 Fits the estimator, converts to ONNX.
216 :param X: features
217 :param args: other arguments
218 :param kwargs: fitting options
219 """
220 if not hasattr(self, 'estimator_'):
221 self.estimator_ = clone(self.estimator)
222 if y is None:
223 if sample_weight is None:
224 self.estimator_.fit(X, **kwargs)
225 else:
226 self.estimator_.fit(X, sample_weight=sample_weight, **kwargs)
227 else:
228 if sample_weight is None:
229 self.estimator_.fit(X, y, **kwargs)
230 else:
231 self.estimator_.fit(
232 X, y, sample_weight=sample_weight, **kwargs)
234 if self.enforce_float32:
235 X = X.astype(numpy.float32)
236 self.onnx_ = self._to_onnx(self.estimator_, X).SerializeToString()
237 self.onnxrt_ = self._build_onnx_runtime(self.onnx_)
238 return self
240 @property
241 def op_version(self):
242 """
243 Returns the opset version.
244 """
245 self._check_fitted_()
246 return self.onnxrt_.op_version
248 def onnx_parser(self):
249 """
250 Returns a parser for this model.
251 """
252 self._check_fitted_()
253 if isinstance(self.onnxrt_, FunctionTransformer):
254 def parser():
255 # Types should be included as well.
256 return [r[0] for r in self.onnx_io_names_['skl2onnx_outputs']]
257 return parser
258 return self.onnxrt_.onnx_parser()
260 def onnx_shape_calculator(self):
261 """
262 Returns a shape calculator for this transform.
263 """
264 self._check_fitted_()
266 if isinstance(self.onnxrt_, FunctionTransformer):
267 def fct_shape_calculator(operator):
268 # Types should be included as well.
269 outputs = self.onnx_io_names_['skl2onnx_outputs']
270 if len(operator.outputs) != len(outputs):
271 raise RuntimeError( # pragma: no cover
272 "Mismatch between parser and shape calculator, "
273 "%r != %r." % (outputs, operator.outputs))
274 for a, b in zip(operator.outputs, outputs):
275 a.type = b[1]
276 return fct_shape_calculator
278 calc = self.onnxrt_.onnx_shape_calculator()
280 def shape_calculator(operator):
281 return calc(operator)
283 return shape_calculator
285 def onnx_converter(self):
286 """
287 Returns a converter for this transform.
288 """
289 self._check_fitted_()
291 if isinstance(self.onnxrt_, FunctionTransformer):
293 def fct_converter(scope, operator, container):
294 op = operator.raw_operator
295 onnx_model = load(io.BytesIO(op.onnx_))
296 add_onnx_graph(scope, operator, container, onnx_model)
298 return fct_converter
300 conv = self.onnxrt_.onnx_converter()
302 def converter(scope, operator, container):
303 op = operator.raw_operator
304 onnx_model = op.onnxrt_.onnxrt_.obj
305 conv(scope, operator, container, onnx_model=onnx_model)
307 return converter
310class OnnxSpeedupTransformer(TransformerMixin,
311 _OnnxPipelineStepSpeedup):
312 """
313 Trains with :epkg:`scikit-learn`, transform with :epkg:`ONNX`.
315 :param estimator: estimator to train
316 :param enforce_float32: boolean
317 :epkg:`onnxruntime` only supports *float32*,
318 :epkg:`scikit-learn` usually uses double floats, this parameter
319 ensures that every array of double floats is converted into
320 single floats
321 :param runtime: string, defined the runtime to use
322 as described in @see cl OnnxInference.
323 :param target_opset: targetted ONNX opset
324 :param conv_options: conversion options, see @see fn to_onnx
325 :param nopython: used by :epkg:`numba` jitter
327 Attributes created by method *fit*:
329 * `estimator_`: cloned and trained version of *estimator*
330 * `onnxrt_`: objet of type @see cl OnnxInference,
331 :epkg:`sklearn:preprocessing:FunctionTransformer`
332 * `numpy_code_`: python code equivalent to the inference
333 method if the runtime is `'numpy'` or `'numba'`
334 * `onnx_io_names_`: dictionary, additional information
335 if the runtime is `'numpy'` or `'numba'`
337 .. versionadded:: 0.7
338 """
340 def __init__(self, estimator, runtime='python', enforce_float32=True,
341 target_opset=None, conv_options=None, nopython=True):
342 _OnnxPipelineStepSpeedup.__init__(
343 self, estimator, runtime=runtime, enforce_float32=enforce_float32,
344 target_opset=target_opset, conv_options=conv_options,
345 nopython=nopython)
347 def fit(self, X, y=None, sample_weight=None): # pylint: disable=W0221
348 """
349 Trains based estimator.
350 """
351 if sample_weight is None:
352 _OnnxPipelineStepSpeedup.fit(self, X, y)
353 else:
354 _OnnxPipelineStepSpeedup.fit(
355 self, X, y, sample_weight=sample_weight)
356 return self
358 def transform(self, X):
359 """
360 Transforms with *ONNX*.
362 :param X: features
363 :return: transformed features
364 """
365 return self.onnxrt_.transform(X)
367 def raw_transform(self, X):
368 """
369 Transforms with *scikit-learn*.
371 :param X: features
372 :return: transformed features
373 """
374 return self.estimator_.transform(X)
376 def assert_almost_equal(self, X, **kwargs):
377 """
378 Checks that ONNX and scikit-learn produces the same
379 outputs.
380 """
381 expected = self.raw_transform(X)
382 got = self.transform(X)
383 assert_almost_equal(expected, got, **kwargs)
386class OnnxSpeedupRegressor(RegressorMixin,
387 _OnnxPipelineStepSpeedup):
388 """
389 Trains with :epkg:`scikit-learn`, transform with :epkg:`ONNX`.
391 :param estimator: estimator to train
392 :param enforce_float32: boolean
393 :epkg:`onnxruntime` only supports *float32*,
394 :epkg:`scikit-learn` usually uses double floats, this parameter
395 ensures that every array of double floats is converted into
396 single floats
397 :param runtime: string, defined the runtime to use
398 as described in @see cl OnnxInference.
399 :param target_opset: targetted ONNX opset
400 :param conv_options: conversion options, see @see fn to_onnx
401 :param nopython: used by :epkg:`numba` jitter
403 Attributes created by method *fit*:
405 * `estimator_`: cloned and trained version of *estimator*
406 * `onnxrt_`: objet of type @see cl OnnxInference,
407 :epkg:`sklearn:preprocessing:FunctionTransformer`
408 * `numpy_code_`: python code equivalent to the inference
409 method if the runtime is `'numpy'` or `'numba'`
410 * `onnx_io_names_`: dictionary, additional information
411 if the runtime is `'numpy'` or `'numba'`
413 .. versionadded:: 0.7
414 """
416 def __init__(self, estimator, runtime='python', enforce_float32=True,
417 target_opset=None, conv_options=None, nopython=True):
418 _OnnxPipelineStepSpeedup.__init__(
419 self, estimator, runtime=runtime, enforce_float32=enforce_float32,
420 target_opset=target_opset, conv_options=conv_options,
421 nopython=nopython)
423 def fit(self, X, y, sample_weight=None): # pylint: disable=W0221
424 """
425 Trains based estimator.
426 """
427 if sample_weight is None:
428 _OnnxPipelineStepSpeedup.fit(self, X, y)
429 else:
430 _OnnxPipelineStepSpeedup.fit(
431 self, X, y, sample_weight=sample_weight)
432 return self
434 def predict(self, X):
435 """
436 Transforms with *ONNX*.
438 :param X: features
439 :return: transformed features
440 """
441 return self.onnxrt_.transform(X)
443 def raw_predict(self, X):
444 """
445 Transforms with *scikit-learn*.
447 :param X: features
448 :return: transformed features
449 """
450 return self.estimator_.predict(X)
452 def assert_almost_equal(self, X, **kwargs):
453 """
454 Checks that ONNX and scikit-learn produces the same
455 outputs.
456 """
457 expected = numpy.squeeze(self.raw_predict(X))
458 got = numpy.squeeze(self.predict(X))
459 assert_almost_equal(expected, got, **kwargs)
462class OnnxSpeedupClassifier(ClassifierMixin,
463 _OnnxPipelineStepSpeedup):
464 """
465 Trains with :epkg:`scikit-learn`, transform with :epkg:`ONNX`.
467 :param estimator: estimator to train
468 :param enforce_float32: boolean
469 :epkg:`onnxruntime` only supports *float32*,
470 :epkg:`scikit-learn` usually uses double floats, this parameter
471 ensures that every array of double floats is converted into
472 single floats
473 :param runtime: string, defined the runtime to use
474 as described in @see cl OnnxInference.
475 :param target_opset: targetted ONNX opset
476 :param conv_options: conversion options, see @see fn to_onnx
477 :param nopython: used by :epkg:`numba` jitter
479 Attributes created by method *fit*:
481 * `estimator_`: cloned and trained version of *estimator*
482 * `onnxrt_`: objet of type @see cl OnnxInference,
483 :epkg:`sklearn:preprocessing:FunctionTransformer`
484 * `numpy_code_`: python code equivalent to the inference
485 method if the runtime is `'numpy'` or `'numba'`
486 * `onnx_io_names_`: dictionary, additional information
487 if the runtime is `'numpy'` or `'numba'`
489 .. versionadded:: 0.7
490 """
492 def __init__(self, estimator, runtime='python', enforce_float32=True,
493 target_opset=None, conv_options=None, nopython=True):
494 if conv_options is None:
495 conv_options = {'zipmap': False}
496 _OnnxPipelineStepSpeedup.__init__(
497 self, estimator, runtime=runtime, enforce_float32=enforce_float32,
498 target_opset=target_opset, conv_options=conv_options,
499 nopython=nopython)
501 def fit(self, X, y, sample_weight=None): # pylint: disable=W0221
502 """
503 Trains based estimator.
504 """
505 if sample_weight is None:
506 _OnnxPipelineStepSpeedup.fit(self, X, y)
507 else:
508 _OnnxPipelineStepSpeedup.fit(
509 self, X, y, sample_weight=sample_weight)
510 return self
512 def predict(self, X):
513 """
514 Transforms with *ONNX*.
516 :param X: features
517 :return: transformed features
518 """
519 pred = self.onnxrt_.transform(X)
520 if isinstance(pred, tuple):
521 return pred[0]
522 return pred.iloc[:, 0].values
524 def predict_proba(self, X):
525 """
526 Transforms with *ONNX*.
528 :param X: features
529 :return: transformed features
530 """
531 pred = self.onnxrt_.transform(X)
532 if isinstance(pred, tuple):
533 return pred[1]
534 return pred.iloc[:, 1:].values
536 def raw_predict(self, X):
537 """
538 Transforms with *scikit-learn*.
540 :param X: features
541 :return: transformed features
542 """
543 return self.estimator_.predict(X)
545 def raw_predict_proba(self, X):
546 """
547 Transforms with *scikit-learn*.
549 :param X: features
550 :return: transformed features
551 """
552 return self.estimator_.predict_proba(X)
554 def assert_almost_equal(self, X, **kwargs):
555 """
556 Checks that ONNX and scikit-learn produces the same
557 outputs.
558 """
559 expected = numpy.squeeze(self.raw_predict_proba(X))
560 got = numpy.squeeze(self.predict_proba(X))
561 assert_almost_equal(expected, got, **kwargs)
562 expected = numpy.squeeze(self.raw_predict(X))
563 got = numpy.squeeze(self.predict(X))
564 assert_almost_equal(expected, got, **kwargs)
567class OnnxSpeedupCluster(ClusterMixin,
568 _OnnxPipelineStepSpeedup):
569 """
570 Trains with :epkg:`scikit-learn`, transform with :epkg:`ONNX`.
572 :param estimator: estimator to train
573 :param enforce_float32: boolean
574 :epkg:`onnxruntime` only supports *float32*,
575 :epkg:`scikit-learn` usually uses double floats, this parameter
576 ensures that every array of double floats is converted into
577 single floats
578 :param runtime: string, defined the runtime to use
579 as described in @see cl OnnxInference.
580 :param target_opset: targetted ONNX opset
581 :param conv_options: conversion options, see @see fn to_onnx
582 :param nopython: used by :epkg:`numba` jitter
584 Attributes created by method *fit*:
586 * `estimator_`: cloned and trained version of *estimator*
587 * `onnxrt_`: objet of type @see cl OnnxInference,
588 :epkg:`sklearn:preprocessing:FunctionTransformer`
589 * `numpy_code_`: python code equivalent to the inference
590 method if the runtime is `'numpy'` or `'numba'`
591 * `onnx_io_names_`: dictionary, additional information
592 if the runtime is `'numpy'` or `'numba'`
594 .. versionadded:: 0.7
595 """
597 def __init__(self, estimator, runtime='python', enforce_float32=True,
598 target_opset=None, conv_options=None, nopython=True):
599 _OnnxPipelineStepSpeedup.__init__(
600 self, estimator, runtime=runtime, enforce_float32=enforce_float32,
601 target_opset=target_opset, conv_options=conv_options,
602 nopython=nopython)
604 def fit(self, X, y, sample_weight=None): # pylint: disable=W0221
605 """
606 Trains based estimator.
607 """
608 if sample_weight is None:
609 _OnnxPipelineStepSpeedup.fit(self, X, y)
610 else:
611 _OnnxPipelineStepSpeedup.fit(
612 self, X, y, sample_weight=sample_weight)
613 return self
615 def predict(self, X):
616 """
617 Transforms with *ONNX*.
619 :param X: features
620 :return: transformed features
621 """
622 pred = self.onnxrt_.transform(X)
623 if isinstance(pred, tuple):
624 return pred[0]
625 return pred.iloc[:, 0].values
627 def transform(self, X):
628 """
629 Transforms with *ONNX*.
631 :param X: features
632 :return: transformed features
633 """
634 pred = self.onnxrt_.transform(X)
635 if isinstance(pred, tuple):
636 return pred[1]
637 return pred.iloc[:, 1:].values
639 def raw_predict(self, X):
640 """
641 Transforms with *scikit-learn*.
643 :param X: features
644 :return: transformed features
645 """
646 return self.estimator_.predict(X)
648 def raw_transform(self, X):
649 """
650 Transforms with *scikit-learn*.
652 :param X: features
653 :return: transformed features
654 """
655 return self.estimator_.transform(X)
657 def assert_almost_equal(self, X, **kwargs):
658 """
659 Checks that ONNX and scikit-learn produces the same
660 outputs.
661 """
662 expected = numpy.squeeze(self.raw_transform(X))
663 got = numpy.squeeze(self.transform(X))
664 assert_almost_equal(expected, got, **kwargs)
665 expected = numpy.squeeze(self.raw_predict(X))
666 got = numpy.squeeze(self.predict(X))
667 assert_almost_equal(expected, got, **kwargs)