Coverage for mlprodict/asv_benchmark/common_asv_skl.py: 95%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Common class for all benchmarks testing
3converted models from :epkg:`scikit-learn`
4with :epkg:`asv`. The benchmark can be run through
5file :epkg:`run_asv.sh` on Linux or :epkg:`run_asv.bat` on
6Windows.
8.. warning::
9 On Windows, you should avoid cloning the repository
10 on a folder with a long full name. Visual Studio tends to
11 abide by the rule of the maximum path length even though
12 the system is told otherwise.
13"""
14import os
15from datetime import datetime
16import pickle
17from logging import getLogger
18import numpy
19from sklearn import set_config
20from sklearn.datasets import load_iris
21from sklearn.metrics import (
22 accuracy_score, mean_absolute_error, silhouette_score)
23from sklearn.model_selection import train_test_split
24from mlprodict import get_ir_version, __max_supported_opset__
25from mlprodict.onnxrt import OnnxInference
26from mlprodict.onnx_conv import (
27 to_onnx, register_rewritten_operators, register_converters)
28from mlprodict.onnxrt.validate.validate_benchmark import make_n_rows
29from mlprodict.onnxrt.validate.validate_problems import _modify_dimension
30from mlprodict.onnx_tools.optim import onnx_statistics
31from mlprodict.tools.asv_options_helper import (
32 expand_onnx_options, version2number)
33from mlprodict.tools.model_info import set_random_state
36class _CommonAsvSklBenchmark:
37 """
38 Common tests to all benchmarks testing converted
39 :epkg:`scikit-learn` models. See `benchmark attributes
40 <https://asv.readthedocs.io/en/stable/benchmarks.html#general>`_.
41 """
43 # Part which changes.
44 # params and param_names may be changed too.
46 params = [
47 ['skl', 'pyrtc', 'ort'], # values for runtime
48 [1, 10, 100, 10000], # values for N
49 [4, 20], # values for nf
50 [__max_supported_opset__], # values for opset
51 ["float", "double"], # values for dtype
52 [None], # values for optim
53 ]
54 param_names = ['rt', 'N', 'nf', 'opset', 'dtype', 'optim']
55 chk_method_name = None
56 version = datetime.now().isoformat()
57 pretty_source = "disabled"
59 par_ydtype = numpy.int64
60 par_dofit = True
61 par_convopts = None
63 def _create_model(self): # pragma: no cover
64 raise NotImplementedError("This method must be overwritten.")
66 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim): # pragma: no cover
67 raise NotImplementedError("This method must be overwritten.")
69 def _score_metric(self, X, y_exp, y_pred): # pragma: no cover
70 raise NotImplementedError("This method must be overwritten.")
72 def _optimize_onnx(self, onx):
73 return onx
75 def _get_xdtype(self, dtype):
76 if dtype in ('float', numpy.float32):
77 return numpy.float32
78 elif dtype in ('double', '64', 64, numpy.float64):
79 return numpy.float64
80 raise ValueError( # pragma: no cover
81 "Unknown dtype '{}'.".format(dtype))
83 def _get_dataset(self, nf, dtype):
84 xdtype = self._get_xdtype(dtype)
85 data = load_iris()
86 X, y = data.data, data.target
87 state = numpy.random.RandomState(seed=34) # pylint: disable=E1101
88 rnd = state.randn(*X.shape) / 3
89 X += rnd
90 X = _modify_dimension(X, nf)
91 X_train, X_test, y_train, y_test = train_test_split(
92 X, y, random_state=42)
93 Xt = X_test.astype(xdtype)
94 yt = y_test.astype(self.par_ydtype)
95 if X_train.shape[0] < X_train.shape[1]:
96 raise RuntimeError( # pragma: no cover
97 "Unable to train a model with less observations than features "
98 "shape=%r." % (X_train.shape, ))
99 return (X_train, y_train), (Xt, yt)
101 def _to_onnx(self, model, X, opset, dtype, optim):
102 if optim is None or len(optim) == 0:
103 options = self.par_convopts
104 elif self.par_convopts and len(self.par_convopts) > 0:
105 raise NotImplementedError( # pragma: no cover
106 "Conflict between par_convopts={} and optim={}".format(
107 self.par_convopts, optim))
108 else:
109 # Expand common onnx options, see _nick_name_options.
110 options = expand_onnx_options(model, optim)
112 return to_onnx(model, X, options=options, target_opset=opset)
114 def _create_onnx_inference(self, onx, runtime):
115 if 'onnxruntime' in runtime:
116 old = onx.ir_version
117 onx.ir_version = get_ir_version(__max_supported_opset__)
118 else:
119 old = None
121 try:
122 res = OnnxInference(
123 onx, runtime=runtime,
124 runtime_options=dict(log_severity_level=3))
125 except RuntimeError as e: # pragma: no cover
126 if "[ONNXRuntimeError]" in str(e):
127 return RuntimeError("onnxruntime fails due to {}".format(str(e)))
128 raise e
129 if old is not None:
130 onx.ir_version = old
131 return res
133 # Part which does not change.
135 def _check_rt(self, rt, meth):
136 """
137 Checks that runtime has the appropriate method.
138 """
139 if rt is None:
140 raise ValueError("rt cannot be empty.") # pragma: no cover
141 if not hasattr(rt, meth):
142 raise TypeError( # pragma: no cover
143 "rt of type %r has no method %r." % (type(rt), meth))
145 def runtime_name(self, runtime):
146 """
147 Returns the runtime shortname.
148 """
149 if runtime == 'skl':
150 name = runtime
151 elif runtime == 'ort':
152 name = 'onnxruntime1'
153 elif runtime == 'ort2':
154 name = 'onnxruntime2' # pragma: no cover
155 elif runtime == 'pyrt':
156 name = 'python'
157 elif runtime == 'pyrtc':
158 name = 'python_compiled'
159 else:
160 raise ValueError( # pragma: no cover
161 "Unknown runtime '{}'.".format(runtime))
162 return name
164 def _name(self, nf, opset, dtype):
165 last = 'cache-{}-nf{}-op{}-dt{}.pickle'.format(
166 self.__class__.__name__, nf, opset, dtype)
167 return last
169 def setup_cache(self):
170 "asv API"
171 for dtype in self.params[4]:
172 for opv in self.params[3]:
173 for nf in self.params[2]:
174 (X_train, y_train), (X, y) = self._get_dataset(nf, dtype)
175 model = self._create_model()
176 if self.par_dofit:
177 set_random_state(model)
178 model.fit(X_train, y_train)
179 stored = {'model': model, 'X': X, 'y': y}
180 filename = self._name(nf, opv, dtype)
181 with open(filename, "wb") as f:
182 pickle.dump(stored, f)
183 if not os.path.exists(filename):
184 raise RuntimeError( # pragma: no cover
185 "Unable to dump model %r into %r." % (
186 model, filename))
188 def setup(self, runtime, N, nf, opset, dtype, optim):
189 "asv API"
190 logger = getLogger('skl2onnx')
191 logger.disabled = True
192 register_converters()
193 register_rewritten_operators()
194 with open(self._name(nf, opset, dtype), "rb") as f:
195 stored = pickle.load(f)
196 self.stored = stored
197 self.model = stored['model']
198 self.X, self.y = make_n_rows(stored['X'], N, stored['y'])
199 onx, rt_, rt_fct_, rt_fct_track_ = self._create_onnx_and_runtime(
200 runtime, self.model, self.X, opset, dtype, optim)
201 self.onx = onx
202 setattr(self, "rt_" + runtime, rt_)
203 setattr(self, "rt_fct_" + runtime, rt_fct_)
204 setattr(self, "rt_fct_track_" + runtime, rt_fct_track_)
205 set_config(assume_finite=True)
207 def time_predict(self, runtime, N, nf, opset, dtype, optim):
208 "asv API"
209 return getattr(self, "rt_fct_" + runtime)(self.X)
211 def peakmem_predict(self, runtime, N, nf, opset, dtype, optim):
212 "asv API"
213 return getattr(self, "rt_fct_" + runtime)(self.X)
215 def track_score(self, runtime, N, nf, opset, dtype, optim):
216 "asv API"
217 yp = getattr(self, "rt_fct_track_" + runtime)(self.X)
218 return self._score_metric(self.X, self.y, yp)
220 def track_onnxsize(self, runtime, N, nf, opset, dtype, optim):
221 "asv API"
222 return len(self.onx.SerializeToString())
224 def track_nbnodes(self, runtime, N, nf, opset, dtype, optim):
225 "asv API"
226 stats = onnx_statistics(self.onx)
227 return stats.get('nnodes', 0)
229 def track_vmlprodict(self, runtime, N, nf, opset, dtype, optim):
230 "asv API"
231 from mlprodict import __version__
232 return version2number(__version__)
234 def track_vsklearn(self, runtime, N, nf, opset, dtype, optim):
235 "asv API"
236 from sklearn import __version__
237 return version2number(__version__)
239 def track_vort(self, runtime, N, nf, opset, dtype, optim):
240 "asv API"
241 from onnxruntime import __version__ as onnxrt_version
242 return version2number(onnxrt_version)
244 def check_method_name(self, method_name):
245 "Does some verifications. Fails if inconsistencies."
246 if getattr(self, 'chk_method_name', None) not in (None, method_name):
247 raise RuntimeError( # pragma: no cover
248 "Method name must be '{}'.".format(method_name))
249 if getattr(self, 'chk_method_name', None) is None:
250 raise RuntimeError( # pragma: no cover
251 "Unable to check that the method name is correct "
252 "(expected is '{}')".format(
253 method_name))
256class _CommonAsvSklBenchmarkClassifier(_CommonAsvSklBenchmark):
257 """
258 Common class for a classifier.
259 """
260 chk_method_name = 'predict_proba'
262 def _score_metric(self, X, y_exp, y_pred):
263 return accuracy_score(y_exp, y_pred)
265 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
266 self.check_method_name('predict_proba')
267 onx_ = self._to_onnx(model, X, opset, dtype, optim)
268 onx = self._optimize_onnx(onx_)
269 name = self.runtime_name(runtime)
270 if name == 'skl':
271 rt_ = None
272 rt_fct_ = lambda X: model.predict_proba(X)
273 rt_fct_track_ = lambda X: model.predict(X)
274 else:
275 rt_ = self._create_onnx_inference(onx, name)
276 self._check_rt(rt_, 'run')
277 rt_fct_ = lambda pX: rt_.run({'X': pX})
278 rt_fct_track_ = lambda pX: rt_fct_(pX)['output_label']
279 return onx, rt_, rt_fct_, rt_fct_track_
282class _CommonAsvSklBenchmarkClassifierRawScore(_CommonAsvSklBenchmark):
283 """
284 Common class for a classifier.
285 """
286 chk_method_name = 'decision_function'
288 def _score_metric(self, X, y_exp, y_pred):
289 return accuracy_score(y_exp, y_pred)
291 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
292 self.check_method_name('decision_function')
293 onx_ = self._to_onnx(model, X, opset, dtype, optim)
294 onx = self._optimize_onnx(onx_)
295 name = self.runtime_name(runtime)
296 if name == 'skl':
297 rt_ = None
298 rt_fct_ = lambda X: model.decision_function(X)
299 rt_fct_track_ = lambda X: model.predict(X)
300 else:
301 rt_ = self._create_onnx_inference(onx, name)
302 self._check_rt(rt_, 'run')
303 rt_fct_ = lambda X: rt_.run({'X': X})
304 rt_fct_track_ = lambda X: rt_fct_(X)['output_label']
305 return onx, rt_, rt_fct_, rt_fct_track_
308class _CommonAsvSklBenchmarkClustering(_CommonAsvSklBenchmark):
309 """
310 Common class for a clustering algorithm.
311 """
312 chk_method_name = 'predict'
314 def _score_metric(self, X, y_exp, y_pred):
315 if X.shape[0] == 1:
316 return 0. # pragma: no cover
317 elif set(y_pred) == 1:
318 return 0. # pragma: no cover
319 return silhouette_score(X, y_pred)
321 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
322 self.check_method_name('predict')
323 onx_ = self._to_onnx(model, X, opset, dtype, optim)
324 onx = self._optimize_onnx(onx_)
325 name = self.runtime_name(runtime)
326 if name == 'skl':
327 rt_ = None
328 rt_fct_ = lambda X: model.predict(X.astype(numpy.float64))
329 rt_fct_track_ = lambda X: model.predict(X.astype(numpy.float64))
330 else:
331 rt_ = self._create_onnx_inference(onx, name)
332 self._check_rt(rt_, 'run')
333 rt_fct_ = lambda X: rt_.run({'X': X})
334 rt_fct_track_ = lambda X: rt_fct_(X)['label']
335 return onx, rt_, rt_fct_, rt_fct_track_
338class _CommonAsvSklBenchmarkMultiClassifier(_CommonAsvSklBenchmark):
339 """
340 Common class for a multi-classifier.
341 """
342 chk_method_name = 'predict_proba'
344 def _get_dataset(self, nf, dtype):
345 xdtype = self._get_xdtype(dtype)
346 data = load_iris()
347 X, y = data.data, data.target
348 state = numpy.random.RandomState(seed=34) # pylint: disable=E1101
349 rnd = state.randn(*X.shape) / 3
350 X += rnd
351 nbclass = len(set(y))
352 y_ = numpy.zeros((y.shape[0], nbclass), dtype=y.dtype)
353 for i, vy in enumerate(y):
354 y_[i, vy] = 1
355 y = y_
356 X = _modify_dimension(X, nf)
357 X_train, X_test, y_train, y_test = train_test_split(
358 X, y, random_state=42)
359 X = X_test.astype(xdtype)
360 y = y_test.astype(self.par_ydtype)
361 return (X_train, y_train), (X, y)
363 def _score_metric(self, X, y_exp, y_pred):
364 return accuracy_score(y_exp.ravel(), y_pred.ravel())
366 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
367 self.check_method_name('predict_proba')
368 onx_ = self._to_onnx(model, X, opset, dtype, optim)
369 onx = self._optimize_onnx(onx_)
370 name = self.runtime_name(runtime)
371 if name == 'skl':
372 rt_ = None
373 rt_fct_ = lambda X: model.predict_proba(X)
374 rt_fct_track_ = lambda X: model.predict(X)
375 else:
376 rt_ = self._create_onnx_inference(onx, name)
377 self._check_rt(rt_, 'run')
378 rt_fct_ = lambda X: rt_.run({'X': X})
379 rt_fct_track_ = lambda X: rt_fct_(X)['output_label']
380 return onx, rt_, rt_fct_, rt_fct_track_
383class _CommonAsvSklBenchmarkOutlier(_CommonAsvSklBenchmark):
384 """
385 Common class for outlier detection.
386 """
387 chk_method_name = 'predict'
389 def _score_metric(self, X, y_exp, y_pred):
390 return numpy.sum(y_pred) / y_pred.shape[0]
392 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
393 self.check_method_name('predict')
394 onx_ = self._to_onnx(model, X, opset, dtype, optim)
395 onx = self._optimize_onnx(onx_)
396 name = self.runtime_name(runtime)
397 if name == 'skl':
398 rt_ = None
399 rt_fct_ = lambda X: model.predict(X)
400 rt_fct_track_ = lambda X: model.predict(X)
401 else:
402 rt_ = self._create_onnx_inference(onx, name)
403 self._check_rt(rt_, 'run')
404 rt_fct_ = lambda X: rt_.run({'X': X})
405 rt_fct_track_ = lambda X: rt_fct_(X)['scores']
406 return onx, rt_, rt_fct_, rt_fct_track_
409class _CommonAsvSklBenchmarkRegressor(_CommonAsvSklBenchmark):
410 """
411 Common class for a regressor.
412 """
413 chk_method_name = 'predict'
415 def _score_metric(self, X, y_exp, y_pred):
416 return mean_absolute_error(y_exp, y_pred)
418 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
419 self.check_method_name('predict')
420 onx = self._to_onnx(model, X, opset, dtype, optim)
421 name = self.runtime_name(runtime)
422 if name == 'skl':
423 rt_ = None
424 rt_fct_ = lambda X: model.predict(X)
425 rt_fct_track_ = lambda X: model.predict(X)
426 else:
427 rt_ = self._create_onnx_inference(onx, name)
428 self._check_rt(rt_, 'run')
429 rt_fct_ = lambda X: rt_.run({'X': X})
430 rt_fct_track_ = lambda X: rt_fct_(X)['variable']
431 return onx, rt_, rt_fct_, rt_fct_track_
434class _CommonAsvSklBenchmarkTrainableTransform(_CommonAsvSklBenchmark):
435 """
436 Common class for a trainable transformer.
437 """
438 chk_method_name = 'transform'
440 def _score_metric(self, X, y_exp, y_pred):
441 return numpy.sum(y_pred) / y_pred.shape[0]
443 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
444 self.check_method_name('transform')
445 onx_ = self._to_onnx(model, X, opset, dtype, optim)
446 onx = self._optimize_onnx(onx_)
447 name = self.runtime_name(runtime)
448 if name == 'skl':
449 rt_ = None
450 rt_fct_ = lambda X: model.transform(X)
451 rt_fct_track_ = lambda X: model.transform(X)
452 else:
453 rt_ = self._create_onnx_inference(onx, name)
454 self._check_rt(rt_, 'run')
455 rt_fct_ = lambda X: rt_.run({'X': X})
456 rt_fct_track_ = lambda X: rt_fct_(X)['variable']
457 return onx, rt_, rt_fct_, rt_fct_track_
460class _CommonAsvSklBenchmarkTransform(_CommonAsvSklBenchmark):
461 """
462 Common class for a transformer.
463 """
464 chk_method_name = 'transform'
466 def _score_metric(self, X, y_exp, y_pred):
467 return numpy.sum(y_pred) / y_pred.shape[0]
469 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
470 self.check_method_name('transform')
471 onx_ = self._to_onnx(model, X, opset, dtype, optim)
472 onx = self._optimize_onnx(onx_)
473 name = self.runtime_name(runtime)
474 if name == 'skl':
475 rt_ = None
476 rt_fct_ = lambda X: model.transform(X)
477 rt_fct_track_ = lambda X: model.transform(X)
478 else:
479 rt_ = self._create_onnx_inference(onx, name)
480 self._check_rt(rt_, 'run')
481 rt_fct_ = lambda X: rt_.run({'X': X})
482 rt_fct_track_ = lambda X: rt_fct_(X)['variable']
483 return onx, rt_, rt_fct_, rt_fct_track_
486class _CommonAsvSklBenchmarkTransformPositive(_CommonAsvSklBenchmarkTransform):
487 """
488 Common class for a transformer for positive features.
489 """
490 chk_method_name = 'transform'
492 def _get_dataset(self, nf, dtype):
493 xdtype = self._get_xdtype(dtype)
494 data = load_iris()
495 X, y = data.data, data.target
496 state = numpy.random.RandomState(seed=34) # pylint: disable=E1101
497 rnd = state.randn(*X.shape) / 3
498 X += rnd
499 X = _modify_dimension(X, nf)
500 X = numpy.abs(X)
501 X_train, X_test, y_train, y_test = train_test_split(
502 X, y, random_state=42)
503 X = X_test.astype(xdtype)
504 y = y_test.astype(self.par_ydtype)
505 return (X_train, y_train), (X, y)