Coverage for mlprodict/testing/test_utils/tests_helper.py: 98%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Inspired from sklearn-onnx, handles two backends.
4"""
5import pickle
6import os
7import warnings
8import traceback
9import time
10import sys
11import numpy
12import pandas
13from sklearn.datasets import (
14 make_classification, make_multilabel_classification,
15 make_regression)
16from sklearn.model_selection import train_test_split
17from sklearn.preprocessing import MultiLabelBinarizer
18from .utils_backend import compare_backend
19from .utils_backend_common import (
20 extract_options, evaluate_condition, is_backend_enabled,
21 OnnxBackendMissingNewOnnxOperatorException)
24def _has_predict_proba(model):
25 if hasattr(model, "voting") and model.voting == "hard":
26 return False
27 return hasattr(model, "predict_proba")
30def _has_decision_function(model):
31 if hasattr(model, "voting"):
32 return False
33 return hasattr(model, "decision_function")
36def _has_transform_model(model):
37 if hasattr(model, "voting"):
38 return False
39 return hasattr(model, "fit_transform") and hasattr(model, "score")
42def fit_classification_model(model, n_classes, is_int=False,
43 pos_features=False, label_string=False,
44 random_state=42, is_bool=False,
45 n_features=20):
46 """
47 Fits a classification model.
48 """
49 X, y = make_classification(n_classes=n_classes, n_features=n_features,
50 n_samples=500,
51 random_state=random_state,
52 n_informative=7)
53 if label_string:
54 y = numpy.array(['cl%d' % cl for cl in y])
55 X = X.astype(numpy.int64) if is_int or is_bool else X.astype(numpy.float32)
56 if pos_features:
57 X = numpy.abs(X)
58 if is_bool:
59 X = X.astype(bool)
60 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,
61 random_state=42)
62 model.fit(X_train, y_train)
63 return model, X_test
66def fit_multilabel_classification_model(model, n_classes=5, n_labels=2,
67 n_samples=400, n_features=20,
68 is_int=False):
69 """
70 Fits a classification model.
71 """
72 X, y = make_multilabel_classification(
73 n_classes=n_classes, n_labels=n_labels, n_features=n_features,
74 n_samples=n_samples, random_state=42)[:2]
75 X = X.astype(numpy.int64) if is_int else X.astype(numpy.float32)
76 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,
77 random_state=42)
78 model.fit(X_train, y_train)
79 return model, X_test
82def fit_regression_model(model, is_int=False, n_targets=1, is_bool=False,
83 factor=1., n_features=10, n_samples=500,
84 n_informative=10):
85 """
86 Fits a regression model.
87 """
88 X, y = make_regression(n_features=n_features, n_samples=n_samples,
89 n_targets=n_targets, random_state=42,
90 n_informative=n_informative)[:2]
91 y *= factor
92 X = X.astype(numpy.int64) if is_int or is_bool else X.astype(numpy.float32)
93 if is_bool:
94 X = X.astype(bool)
95 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,
96 random_state=42)
97 model.fit(X_train, y_train)
98 return model, X_test
101def fit_classification_model_simple(model, n_classes, is_int=False,
102 pos_features=False):
103 """
104 Fits a classification model.
105 """
106 X, y = make_classification(n_classes=n_classes, n_features=10,
107 n_samples=500, n_redundant=0,
108 n_repeated=0,
109 random_state=42, n_informative=9)
110 X = X.astype(numpy.int64) if is_int else X.astype(numpy.float32)
111 if pos_features:
112 X = numpy.abs(X)
113 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,
114 random_state=42)
115 model.fit(X_train, y_train)
116 return model, X_test
119def _raw_score_binary_classification(model, X):
120 scores = model.decision_function(X)
121 if len(scores.shape) == 1:
122 scores = scores.reshape(-1, 1)
123 if len(scores.shape) != 2 or scores.shape[1] != 1:
124 raise RuntimeError( # pragma: no cover
125 "Unexpected shape {} for a binary classifiation".format(
126 scores.shape))
127 return numpy.hstack([-scores, scores])
130def _save_model_dump(model, folder, basename, names):
131 if hasattr(model, "save"): # pragma: no cover
132 dest = os.path.join(folder, basename + ".model.keras")
133 names.append(dest)
134 model.save(dest)
135 else:
136 dest = os.path.join(folder, basename + ".model.pkl")
137 names.append(dest)
138 with open(dest, "wb") as f:
139 try:
140 pickle.dump(model, f)
141 except AttributeError as e: # pragma no cover
142 print("[dump_data_and_model] cannot pickle model '{}'"
143 " due to {}.".format(dest, e))
146def dump_data_and_model( # pylint: disable=R0912
147 data, model, onnx_model=None, basename="model", folder=None,
148 inputs=None, backend=('python', 'onnxruntime'),
149 context=None, allow_failure=None, methods=None,
150 dump_error_log=None, benchmark=None, comparable_outputs=None,
151 intermediate_steps=False, fail_evenif_notimplemented=False,
152 verbose=False, classes=None, check_error=None, disable_optimisation=False):
153 """
154 Saves data with pickle, saves the model with pickle and *onnx*,
155 runs and saves the predictions for the given model.
156 This function is used to test a backend (runtime) for *onnx*.
158 :param data: any kind of data
159 :param model: any model
160 :param onnx_model: *onnx* model or *None* to use an onnx converters to convert it
161 only if the model accepts one float vector
162 :param basename: three files are writen ``<basename>.data.pkl``,
163 ``<basename>.model.pkl``, ``<basename>.model.onnx``
164 :param folder: files are written in this folder,
165 it is created if it does not exist, if *folder* is None,
166 it looks first in environment variable ``ONNXTESTDUMP``,
167 otherwise, it is placed into ``'temp_dump'``.
168 :param inputs: standard type or specific one if specified, only used is
169 parameter *onnx* is None
170 :param backend: backend used to compare expected output and runtime output.
171 Two options are currently supported: None for no test,
172 `'onnxruntime'` to use module :epkg:`onnxruntime`,
173 ``python`` to use the python runtiume.
174 :param context: used if the model contains a custom operator such
175 as a custom Keras function...
176 :param allow_failure: None to raise an exception if comparison fails
177 for the backends, otherwise a string which is then evaluated to check
178 whether or not the test can fail, example:
179 ``"StrictVersion(onnx.__version__) < StrictVersion('1.3.0')"``
180 :param dump_error_log: if True, dumps any error message in a file
181 ``<basename>.err``, if it is None, it checks the environment
182 variable ``ONNXTESTDUMPERROR``
183 :param benchmark: if True, runs a benchmark and stores the results
184 into a file ``<basename>.bench``, if None, it checks the environment
185 variable ``ONNXTESTBENCHMARK``
186 :param verbose: additional information
187 :param methods: ONNX may produce one or several results, each of them
188 is equivalent to the output of a method from the model class,
189 this parameter defines which methods is equivalent to ONNX outputs.
190 If not specified, it falls back into a default behaviour implemented
191 for classifiers, regressors, clustering.
192 :param comparable_outputs: compares only these outputs
193 :param intermediate_steps: displays intermediate steps
194 in case of an error
195 :param fail_evenif_notimplemented: the test is considered as failing
196 even if the error is due to onnxuntime missing the implementation
197 of a new operator defiend in ONNX.
198 :param classes: classes names
199 (only for classifier, mandatory if option 'nocl' is used)
200 :param check_error: do not raise an exception if the error message
201 contains this text
202 :param disable_optimisation: disable all optimisations *onnxruntime*
203 could do
204 :return: the created files
206 Some convention for the name,
207 *Bin* for a binary classifier, *Mcl* for a multiclass
208 classifier, *Reg* for a regressor, *MRg* for a multi-regressor.
209 The name can contain some flags. Expected outputs refer to the
210 outputs computed with the original library, computed outputs
211 refer to the outputs computed with a ONNX runtime.
213 * ``-CannotLoad``: the model can be converted but the runtime
214 cannot load it
215 * ``-Dec3``: compares expected and computed outputs up to
216 3 decimals (5 by default)
217 * ``-Dec4``: compares expected and computed outputs up to
218 4 decimals (5 by default)
219 * ``-NoProb``: The original models computed probabilites for two classes
220 *size=(N, 2)* but the runtime produces a vector of size *N*, the test
221 will compare the second column to the column
222 * ``-Out0``: only compares the first output on both sides
223 * ``-Reshape``: merges all outputs into one single vector and resizes
224 it before comparing
225 * ``-SkipDim1``: before comparing expected and computed output,
226 arrays with a shape like *(2, 1, 2)* becomes *(2, 2)*
227 * ``-SklCol``: *scikit-learn* operator applies on a column and not a matrix
229 If the *backend* is not None, the function either raises an exception
230 if the comparison between the expected outputs and the backend outputs
231 fails or it saves the backend output and adds it to the results.
232 """
233 # delayed import because too long
234 from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType # delayed
236 runtime_test = dict(model=model, data=data)
238 if folder is None:
239 folder = os.environ.get("ONNXTESTDUMP", "temp_dump")
240 if dump_error_log is None:
241 dump_error_log = os.environ.get("ONNXTESTDUMPERROR", "0") in (
242 "1", 1, "True", "true", True)
243 if benchmark is None:
244 benchmark = os.environ.get("ONNXTESTBENCHMARK", "0") in (
245 "1", 1, "True", "true", True)
246 if not os.path.exists(folder):
247 os.makedirs(folder)
249 lambda_original = None
250 if isinstance(data, (numpy.ndarray, pandas.DataFrame)):
251 dataone = data[:1].copy()
252 else:
253 dataone = data
255 if methods is not None:
256 prediction = []
257 for method in methods:
258 if callable(method):
259 call = lambda X, model=model: method(
260 model, X) # pragma: no cover
261 else:
262 try:
263 call = getattr(model, method)
264 except AttributeError as e: # pragma no cover
265 if method == 'decision_function_binary':
266 call = (
267 lambda X, model=model:
268 _raw_score_binary_classification(model, X))
269 else:
270 raise e
271 if callable(call):
272 prediction.append(call(data))
273 # we only take the last one for benchmark
274 lambda_original = lambda: call(dataone)
275 else:
276 raise RuntimeError( # pragma: no cover
277 "Method '{0}' is not callable.".format(method))
278 else:
279 if hasattr(model, "predict"):
280 if _has_predict_proba(model):
281 # Classifier
282 prediction = [model.predict(data), model.predict_proba(data)]
283 lambda_original = lambda: model.predict_proba(dataone)
284 elif _has_decision_function(model):
285 # Classifier without probabilities
286 prediction = [model.predict(data),
287 model.decision_function(data)]
288 lambda_original = (
289 lambda: model.decision_function(dataone))
290 elif _has_transform_model(model):
291 # clustering
292 try:
293 prediction = [model.predict(data), model.transform(data)]
294 lambda_original = lambda: model.transform(dataone)
295 except ValueError:
296 # 0.23 enforced type checking.
297 d64 = data.astype(numpy.float64)
298 prediction = [model.predict(d64), model.transform(d64)]
299 dataone64 = dataone.astype(numpy.float64)
300 lambda_original = lambda: model.transform(dataone64)
301 else:
302 # Regressor or VotingClassifier
303 prediction = [model.predict(data)]
304 lambda_original = lambda: model.predict(dataone)
306 elif hasattr(model, "transform"):
307 options = extract_options(basename)
308 SklCol = options.get("SklCol", False)
309 if SklCol:
310 prediction = model.transform(data.ravel()) # pragma: no cover
311 lambda_original = lambda: model.transform(
312 dataone.ravel()) # pragma: no cover
313 else:
314 prediction = model.transform(data)
315 lambda_original = lambda: model.transform(dataone)
316 else:
317 raise TypeError( # pragma: no cover
318 "Model has no predict or transform method: {0}".format(
319 type(model)))
321 runtime_test["expected"] = prediction
323 names = []
324 dest = os.path.join(folder, basename + ".expected.pkl")
325 names.append(dest)
326 with open(dest, "wb") as f:
327 pickle.dump(prediction, f)
329 dest = os.path.join(folder, basename + ".data.pkl")
330 names.append(dest)
331 with open(dest, "wb") as f:
332 pickle.dump(data, f)
334 _save_model_dump(model, folder, basename, names)
336 if dump_error_log: # pragma: no cover
337 error_dump = os.path.join(folder, basename + ".err")
339 if onnx_model is None: # pragma: no cover
340 array = numpy.array(data)
341 if inputs is None:
342 if array.dtype == numpy.float64:
343 inputs = [("input", DoubleTensorType(list(array.shape)))]
344 else:
345 inputs = [("input", FloatTensorType(list(array.shape)))]
346 onnx_model, _ = convert_model(model, basename, inputs)
348 dest = os.path.join(folder, basename + ".model.onnx")
349 names.append(dest)
350 with open(dest, "wb") as f:
351 f.write(onnx_model.SerializeToString())
352 if verbose: # pragma: no cover
353 print("[dump_data_and_model] created '{}'.".format(dest))
355 runtime_test["onnx"] = dest
357 # backend
358 if backend is not None:
359 if isinstance(backend, tuple):
360 backend = list(backend)
361 if not isinstance(backend, list):
362 backend = [backend]
363 for b in backend:
364 if not is_backend_enabled(b):
365 continue # pragma: no cover
366 if isinstance(allow_failure, str):
367 allow = evaluate_condition(
368 b, allow_failure) # pragma: no cover
369 else:
370 allow = allow_failure
371 if allow is None and not check_error:
372 output, lambda_onnx = compare_backend(
373 b, runtime_test, options=extract_options(basename),
374 context=context, verbose=verbose,
375 comparable_outputs=comparable_outputs,
376 intermediate_steps=intermediate_steps,
377 disable_optimisation=disable_optimisation,
378 classes=classes)
379 elif check_error:
380 try:
381 output, lambda_onnx = compare_backend(
382 b, runtime_test, options=extract_options(basename),
383 context=context, verbose=verbose,
384 comparable_outputs=comparable_outputs,
385 intermediate_steps=intermediate_steps,
386 disable_optimisation=disable_optimisation,
387 classes=classes)
388 except Exception as e: # pragma: no cover
389 if check_error in str(e):
390 warnings.warn(str(e))
391 continue
392 raise e
393 else:
394 try:
395 output, lambda_onnx = compare_backend(
396 b, runtime_test,
397 options=extract_options(basename),
398 context=context, verbose=verbose,
399 comparable_outputs=comparable_outputs,
400 intermediate_steps=intermediate_steps,
401 classes=classes)
402 except OnnxBackendMissingNewOnnxOperatorException as e: # pragma no cover
403 if fail_evenif_notimplemented:
404 raise e
405 warnings.warn(str(e))
406 continue
407 except AssertionError as e: # pragma no cover
408 if dump_error_log:
409 with open(error_dump, "w", encoding="utf-8") as f:
410 f.write(str(e) + "\n--------------\n")
411 traceback.print_exc(file=f)
412 if isinstance(allow, bool) and allow:
413 warnings.warn("Issue with '{0}' due to {1}".format(
414 basename,
415 str(e).replace("\n", " -- ")))
416 continue
417 raise e
419 if output is not None:
420 dest = os.path.join(folder,
421 basename + ".backend.{0}.pkl".format(b))
422 names.append(dest)
423 with open(dest, "wb") as f:
424 pickle.dump(output, f)
425 if (benchmark and lambda_onnx is not None and
426 lambda_original is not None):
427 # run a benchmark
428 obs = compute_benchmark({
429 "onnxrt": lambda_onnx,
430 "original": lambda_original
431 })
432 df = pandas.DataFrame(obs)
433 df["input_size"] = sys.getsizeof(dataone)
434 dest = os.path.join(folder, basename + ".bench")
435 df.to_csv(dest, index=False)
437 return names
440def convert_model(model, name, input_types):
441 """
442 Runs the appropriate conversion method.
444 :param model: model, *scikit-learn*, *keras*,
445 or *coremltools* object
446 :param name: model name
447 :param input_types: input types
448 :return: *onnx* model
449 """
450 from skl2onnx import convert_sklearn # delayed
452 model, prefix = convert_sklearn(model, name, input_types), "Sklearn"
453 if model is None: # pragma: no cover
454 raise RuntimeError("Unable to convert model of type '{0}'.".format(
455 type(model)))
456 return model, prefix
459def dump_one_class_classification(
460 model, suffix="", folder=None, allow_failure=None,
461 comparable_outputs=None, verbose=False, benchmark=False,
462 methods=None):
463 """
464 Trains and dumps a model for a One Class outlier problem.
465 The function trains a model and calls
466 :func:`dump_data_and_model`.
468 Every created filename will follow the pattern:
469 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.
470 """
471 from skl2onnx.common.data_types import FloatTensorType # delayed
472 X = [[0.0, 1.0], [1.0, 1.0], [2.0, 0.0]]
473 X = numpy.array(X, dtype=numpy.float32)
474 y = [1, 1, 1]
475 model.fit(X, y)
476 model_onnx, prefix = convert_model(model, "one_class",
477 [("input", FloatTensorType([None, 2]))])
478 dump_data_and_model(
479 X, model, model_onnx, folder=folder,
480 allow_failure=allow_failure,
481 basename=prefix + "One" + model.__class__.__name__ + suffix,
482 verbose=verbose, comparable_outputs=comparable_outputs,
483 benchmark=benchmark, methods=methods)
486def dump_binary_classification(
487 model, suffix="", folder=None, allow_failure=None,
488 comparable_outputs=None, verbose=False, label_string=False,
489 benchmark=False, methods=None, nrows=None):
490 """
491 Trains and dumps a model for a binary classification problem.
492 The function trains a model and calls
493 :func:`dump_data_and_model`.
495 Every created filename will follow the pattern:
496 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.
497 """
498 from skl2onnx.common.data_types import FloatTensorType # delayed
499 X = [[0, 1], [1, 1], [2, 0]]
500 X = numpy.array(X, dtype=numpy.float32)
501 if label_string:
502 y = ["A", "B", "A"]
503 else:
504 y = numpy.array([0, 1, 0], numpy.int64)
505 model.fit(X, y)
506 model_onnx, prefix = convert_model(model, "binary classifier",
507 [("input", FloatTensorType([None, 2]))])
508 if nrows == 2:
509 for nr in range(X.shape[0] - 1):
510 dump_data_and_model(
511 X[nr: nr + 2], model, model_onnx, folder=folder, allow_failure=allow_failure,
512 basename=prefix + "Bin" + model.__class__.__name__ + suffix,
513 verbose=verbose, comparable_outputs=comparable_outputs, methods=methods)
514 else:
515 dump_data_and_model(
516 X, model, model_onnx, folder=folder, allow_failure=allow_failure,
517 basename=prefix + "Bin" + model.__class__.__name__ + suffix,
518 verbose=verbose, comparable_outputs=comparable_outputs, methods=methods)
520 X, y = make_classification(10, n_features=4, random_state=42)
521 X = X[:, :2]
522 model.fit(X, y)
523 model_onnx, prefix = convert_model(model, "binary classifier",
524 [("input", FloatTensorType([None, 2]))])
525 xt = X.astype(numpy.float32)
526 if nrows is not None:
527 xt = xt[:nrows]
528 dump_data_and_model(
529 xt, model, model_onnx,
530 allow_failure=allow_failure, folder=folder,
531 basename=prefix + "RndBin" + model.__class__.__name__ + suffix,
532 verbose=verbose, comparable_outputs=comparable_outputs,
533 benchmark=benchmark, methods=methods)
536def dump_multiple_classification(
537 model, suffix="", folder=None, allow_failure=None, verbose=False,
538 label_string=False, first_class=0, comparable_outputs=None,
539 benchmark=False, methods=None):
540 """
541 Trains and dumps a model for a binary classification problem.
542 The function trains a model and calls
543 :func:`dump_data_and_model`.
545 Every created filename will follow the pattern:
546 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.
547 """
548 from skl2onnx.common.data_types import FloatTensorType # delayed
549 X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]]
550 X = numpy.array(X, dtype=numpy.float32)
551 y = [0, 1, 2, 1, 1, 2]
552 y = [i + first_class for i in y]
553 if label_string:
554 y = ["l%d" % i for i in y]
555 model.fit(X, y)
556 if verbose: # pragma: no cover
557 print("[dump_multiple_classification] model '{}'".format(
558 model.__class__.__name__))
559 model_onnx, prefix = convert_model(model, "multi-class classifier",
560 [("input", FloatTensorType([None, 2]))])
561 if verbose: # pragma: no cover
562 print("[dump_multiple_classification] model was converted")
563 dump_data_and_model(
564 X.astype(numpy.float32), model, model_onnx, folder=folder,
565 allow_failure=allow_failure,
566 basename=prefix + "Mcl" + model.__class__.__name__ + suffix,
567 verbose=verbose, comparable_outputs=comparable_outputs,
568 methods=methods)
570 X, y = make_classification(40, n_features=4, random_state=42,
571 n_classes=3, n_clusters_per_class=1)
572 X = X[:, :2]
573 model.fit(X, y)
574 if verbose: # pragma: no cover
575 print("[dump_multiple_classification] model '{}'".format(
576 model.__class__.__name__))
577 model_onnx, prefix = convert_model(model, "multi-class classifier",
578 [("input", FloatTensorType([None, 2]))])
579 if verbose: # pragma: no cover
580 print("[dump_multiple_classification] model was converted")
581 dump_data_and_model(
582 X[:10].astype(numpy.float32), model, model_onnx, folder=folder,
583 allow_failure=allow_failure,
584 basename=prefix + "RndMcl" + model.__class__.__name__ + suffix,
585 verbose=verbose, comparable_outputs=comparable_outputs,
586 benchmark=benchmark, methods=methods)
589def dump_multilabel_classification(
590 model, suffix="", folder=None, allow_failure=None, verbose=False,
591 label_string=False, first_class=0, comparable_outputs=None,
592 benchmark=False, backend=('python', 'onnxruntime')):
593 """
594 Trains and dumps a model for a binary classification problem.
595 The function trains a model and calls
596 :func:`dump_data_and_model`.
598 Every created filename will follow the pattern:
599 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.
600 """
601 from skl2onnx.common.data_types import FloatTensorType # delayed
602 X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]]
603 X = numpy.array(X, dtype=numpy.float32)
604 if label_string:
605 y = [["l0"], ["l1"], ["l2"], ["l0", "l1"], ["l1"], ["l2"]]
606 else:
607 y = [[0 + first_class], [1 + first_class], [2 + first_class],
608 [0 + first_class, 1 + first_class],
609 [1 + first_class], [2 + first_class]]
610 y = MultiLabelBinarizer().fit_transform(y)
611 model.fit(X, y)
612 if verbose: # pragma: no cover
613 print("[make_multilabel_classification] model '{}'".format(
614 model.__class__.__name__))
615 model_onnx, prefix = convert_model(model, "multi-label-classifier",
616 [("input", FloatTensorType([None, 2]))])
617 if verbose: # pragma: no cover
618 print("[make_multilabel_classification] model was converted")
619 dump_data_and_model(
620 X.astype(numpy.float32), model, model_onnx, folder=folder,
621 allow_failure=allow_failure,
622 basename=prefix + "Mcl" + model.__class__.__name__ + suffix,
623 verbose=verbose, comparable_outputs=comparable_outputs,
624 backend=backend)
626 X, y = make_multilabel_classification( # pylint: disable=W0632
627 40, n_features=4, random_state=42, n_classes=3)
628 X = X[:, :2]
629 model.fit(X, y)
630 if verbose: # pragma: no cover
631 print("[make_multilabel_classification] model '{}'".format(
632 model.__class__.__name__))
633 model_onnx, prefix = convert_model(model, "multi-class classifier",
634 [("input", FloatTensorType([None, 2]))])
635 if verbose: # pragma: no cover
636 print("[make_multilabel_classification] model was converted")
637 dump_data_and_model(
638 X[:10].astype(numpy.float32), model, model_onnx, folder=folder,
639 allow_failure=allow_failure,
640 basename=prefix + "RndMla" + model.__class__.__name__ + suffix,
641 verbose=verbose, comparable_outputs=comparable_outputs,
642 benchmark=benchmark, backend=backend)
645def dump_multiple_regression(
646 model, suffix="", folder=None, allow_failure=None,
647 comparable_outputs=None, verbose=False, benchmark=False):
648 """
649 Trains and dumps a model for a multi regression problem.
650 The function trains a model and calls
651 :func:`dump_data_and_model`.
653 Every created filename will follow the pattern:
654 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.
655 """
656 from skl2onnx.common.data_types import FloatTensorType # delayed
657 X = [[0, 1], [1, 1], [2, 0]]
658 X = numpy.array(X, dtype=numpy.float32)
659 y = numpy.array([[100, 50], [100, 49], [100, 99]], dtype=numpy.float32)
660 model.fit(X, y)
661 model_onnx, prefix = convert_model(model, "multi-regressor",
662 [("input", FloatTensorType([None, 2]))])
663 dump_data_and_model(
664 X, model, model_onnx, folder=folder, allow_failure=allow_failure,
665 basename=prefix + "MRg" + model.__class__.__name__ + suffix,
666 verbose=verbose, comparable_outputs=comparable_outputs,
667 benchmark=benchmark)
670def dump_single_regression(model, suffix="", folder=None, allow_failure=None,
671 comparable_outputs=None, benchmark=False):
672 """
673 Trains and dumps a model for a regression problem.
674 The function trains a model and calls
675 :func:`dump_data_and_model`.
677 Every created filename will follow the pattern:
678 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.
679 """
680 from skl2onnx.common.data_types import FloatTensorType # delayed
681 X = [[0, 1], [1, 1], [2, 0]]
682 X = numpy.array(X, dtype=numpy.float32)
683 y = numpy.array([100, -10, 50], dtype=numpy.float32)
684 model.fit(X, y)
685 model_onnx, prefix = convert_model(model, "single regressor",
686 [("input", FloatTensorType([None, 2]))])
687 dump_data_and_model(
688 X, model, model_onnx, folder=folder, allow_failure=allow_failure,
689 basename=prefix + "Reg" + model.__class__.__name__ + suffix,
690 comparable_outputs=comparable_outputs)
693def timeit_repeat(fct, number, repeat):
694 """
695 Returns a series of *repeat* time measures for
696 *number* executions of *code* assuming *fct*
697 is a function.
698 """
699 res = []
700 for _ in range(0, repeat):
701 t1 = time.perf_counter()
702 for __ in range(0, number):
703 fct()
704 t2 = time.perf_counter()
705 res.append(t2 - t1)
706 return res
709def timeexec(fct, number, repeat):
710 """
711 Measures the time for a given expression.
713 :param fct: function to measure (as a string)
714 :param number: number of time to run the expression
715 (and then divide by this number to get an average)
716 :param repeat: number of times to repeat the computation
717 of the above average
718 :return: dictionary
719 """
720 rep = timeit_repeat(fct, number=number, repeat=repeat)
721 ave = sum(rep) / (number * repeat)
722 std = (sum((x / number - ave)**2 for x in rep) / repeat)**0.5
723 fir = rep[0] / number
724 fir3 = sum(rep[:3]) / (3 * number)
725 las3 = sum(rep[-3:]) / (3 * number)
726 rep.sort()
727 mini = rep[len(rep) // 20] / number
728 maxi = rep[-len(rep) // 20] / number
729 return dict(average=ave, deviation=std, first=fir, first3=fir3,
730 last3=las3, repeat=repeat, min5=mini, max5=maxi, run=number)
733def compute_benchmark(fcts, number=10, repeat=100):
734 """
735 Compares the processing time several functions.
737 :param fcts: dictionary ``{'name': fct}``
738 :param number: number of time to run the expression
739 (and then divide by this number to get an average)
740 :param repeat: number of times to repeat the computation
741 of the above average
742 :return: list of [{'name': name, 'time': ...}]
743 """
744 obs = []
745 for name, fct in fcts.items():
746 res = timeexec(fct, number=number, repeat=repeat)
747 res["name"] = name
748 obs.append(res)
749 return obs
752def binary_array_to_string(mat):
753 """
754 Used to compare decision path.
755 """
756 if not isinstance(mat, numpy.ndarray):
757 raise NotImplementedError( # pragma: no cover
758 "Not implemented for other types than arrays.")
759 if len(mat.shape) != 2:
760 raise NotImplementedError( # pragma: no cover
761 "Not implemented for other arrays than matrices.")
762 res = [[str(i) for i in row] for row in mat.tolist()]
763 return [''.join(row) for row in res]