Coverage for mlprodict/testing/test_utils/tests

1"""

2@file

3@brief Inspired from sklearn-onnx, handles two backends.

4"""

5import pickle

6import os

7import warnings

8import traceback

9import time

10import sys

11import numpy

12import pandas

13from sklearn.datasets import (

14 make_classification, make_multilabel_classification,

15 make_regression)

16from sklearn.model_selection import train_test_split

17from sklearn.preprocessing import MultiLabelBinarizer

18from .utils_backend import compare_backend

19from .utils_backend_common import (

20 extract_options, evaluate_condition, is_backend_enabled,

21 OnnxBackendMissingNewOnnxOperatorException)

24def _has_predict_proba(model):

25 if hasattr(model, "voting") and model.voting == "hard":

26 return False

27 return hasattr(model, "predict_proba")

30def _has_decision_function(model):

31 if hasattr(model, "voting"):

32 return False

33 return hasattr(model, "decision_function")

36def _has_transform_model(model):

37 if hasattr(model, "voting"):

38 return False

39 return hasattr(model, "fit_transform") and hasattr(model, "score")

42def fit_classification_model(model, n_classes, is_int=False,

43 pos_features=False, label_string=False,

44 random_state=42, is_bool=False,

45 n_features=20):

46 """

47 Fits a classification model.

48 """

49 X, y = make_classification(n_classes=n_classes, n_features=n_features,

50 n_samples=500,

51 random_state=random_state,

52 n_informative=7)

53 if label_string:

54 y = numpy.array(['cl%d' % cl for cl in y])

55 X = X.astype(numpy.int64) if is_int or is_bool else X.astype(numpy.float32)

56 if pos_features:

57 X = numpy.abs(X)

58 if is_bool:

59 X = X.astype(bool)

60 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,

61 random_state=42)

62 model.fit(X_train, y_train)

63 return model, X_test

66def fit_multilabel_classification_model(model, n_classes=5, n_labels=2,

67 n_samples=400, n_features=20,

68 is_int=False):

69 """

70 Fits a classification model.

71 """

72 X, y = make_multilabel_classification(

73 n_classes=n_classes, n_labels=n_labels, n_features=n_features,

74 n_samples=n_samples, random_state=42)[:2]

75 X = X.astype(numpy.int64) if is_int else X.astype(numpy.float32)

76 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,

77 random_state=42)

78 model.fit(X_train, y_train)

79 return model, X_test

82def fit_regression_model(model, is_int=False, n_targets=1, is_bool=False,

83 factor=1., n_features=10, n_samples=500,

84 n_informative=10):

85 """

86 Fits a regression model.

87 """

88 X, y = make_regression(n_features=n_features, n_samples=n_samples,

89 n_targets=n_targets, random_state=42,

90 n_informative=n_informative)[:2]

91 y *= factor

92 X = X.astype(numpy.int64) if is_int or is_bool else X.astype(numpy.float32)

93 if is_bool:

94 X = X.astype(bool)

95 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,

96 random_state=42)

97 model.fit(X_train, y_train)

98 return model, X_test

100

101def fit_classification_model_simple(model, n_classes, is_int=False,

102 pos_features=False):

103 """

104 Fits a classification model.

105 """

106 X, y = make_classification(n_classes=n_classes, n_features=10,

107 n_samples=500, n_redundant=0,

108 n_repeated=0,

109 random_state=42, n_informative=9)

110 X = X.astype(numpy.int64) if is_int else X.astype(numpy.float32)

111 if pos_features:

112 X = numpy.abs(X)

113 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,

114 random_state=42)

115 model.fit(X_train, y_train)

116 return model, X_test

117

118

119def _raw_score_binary_classification(model, X):

120 scores = model.decision_function(X)

121 if len(scores.shape) == 1:

122 scores = scores.reshape(-1, 1)

123 if len(scores.shape) != 2 or scores.shape[1] != 1:

124 raise RuntimeError( # pragma: no cover

125 "Unexpected shape {} for a binary classifiation".format(

126 scores.shape))

127 return numpy.hstack([-scores, scores])

128

129

130def _save_model_dump(model, folder, basename, names):

131 if hasattr(model, "save"): # pragma: no cover

132 dest = os.path.join(folder, basename + ".model.keras")

133 names.append(dest)

134 model.save(dest)

135 else:

136 dest = os.path.join(folder, basename + ".model.pkl")

137 names.append(dest)

138 with open(dest, "wb") as f:

139 try:

140 pickle.dump(model, f)

141 except AttributeError as e: # pragma no cover

142 print("[dump_data_and_model] cannot pickle model '{}'"

143 " due to {}.".format(dest, e))

144

145

146def dump_data_and_model( # pylint: disable=R0912

147 data, model, onnx_model=None, basename="model", folder=None,

148 inputs=None, backend=('python', 'onnxruntime'),

149 context=None, allow_failure=None, methods=None,

150 dump_error_log=None, benchmark=None, comparable_outputs=None,

151 intermediate_steps=False, fail_evenif_notimplemented=False,

152 verbose=False, classes=None, check_error=None, disable_optimisation=False):

153 """

154 Saves data with pickle, saves the model with pickle and *onnx*,

155 runs and saves the predictions for the given model.

156 This function is used to test a backend (runtime) for *onnx*.

157

158 :param data: any kind of data

159 :param model: any model

160 :param onnx_model: *onnx* model or *None* to use an onnx converters to convert it

161 only if the model accepts one float vector

162 :param basename: three files are writen ``<basename>.data.pkl``,

163 ``<basename>.model.pkl``, ``<basename>.model.onnx``

164 :param folder: files are written in this folder,

165 it is created if it does not exist, if *folder* is None,

166 it looks first in environment variable ``ONNXTESTDUMP``,

167 otherwise, it is placed into ``'temp_dump'``.

168 :param inputs: standard type or specific one if specified, only used is

169 parameter *onnx* is None

170 :param backend: backend used to compare expected output and runtime output.

171 Two options are currently supported: None for no test,

172 `'onnxruntime'` to use module :epkg:`onnxruntime`,

173 ``python`` to use the python runtiume.

174 :param context: used if the model contains a custom operator such

175 as a custom Keras function...

176 :param allow_failure: None to raise an exception if comparison fails

177 for the backends, otherwise a string which is then evaluated to check

178 whether or not the test can fail, example:

179 ``"StrictVersion(onnx.__version__) < StrictVersion('1.3.0')"``

180 :param dump_error_log: if True, dumps any error message in a file

181 ``<basename>.err``, if it is None, it checks the environment

182 variable ``ONNXTESTDUMPERROR``

183 :param benchmark: if True, runs a benchmark and stores the results

184 into a file ``<basename>.bench``, if None, it checks the environment

185 variable ``ONNXTESTBENCHMARK``

186 :param verbose: additional information

187 :param methods: ONNX may produce one or several results, each of them

188 is equivalent to the output of a method from the model class,

189 this parameter defines which methods is equivalent to ONNX outputs.

190 If not specified, it falls back into a default behaviour implemented

191 for classifiers, regressors, clustering.

192 :param comparable_outputs: compares only these outputs

193 :param intermediate_steps: displays intermediate steps

194 in case of an error

195 :param fail_evenif_notimplemented: the test is considered as failing

196 even if the error is due to onnxuntime missing the implementation

197 of a new operator defiend in ONNX.

198 :param classes: classes names

199 (only for classifier, mandatory if option 'nocl' is used)

200 :param check_error: do not raise an exception if the error message

201 contains this text

202 :param disable_optimisation: disable all optimisations *onnxruntime*

203 could do

204 :return: the created files

205

206 Some convention for the name,

207 *Bin* for a binary classifier, *Mcl* for a multiclass

208 classifier, *Reg* for a regressor, *MRg* for a multi-regressor.

209 The name can contain some flags. Expected outputs refer to the

210 outputs computed with the original library, computed outputs

211 refer to the outputs computed with a ONNX runtime.

212

213 * ``-CannotLoad``: the model can be converted but the runtime

214 cannot load it

215 * ``-Dec3``: compares expected and computed outputs up to

216 3 decimals (5 by default)

217 * ``-Dec4``: compares expected and computed outputs up to

218 4 decimals (5 by default)

219 * ``-NoProb``: The original models computed probabilites for two classes

220 *size=(N, 2)* but the runtime produces a vector of size *N*, the test

221 will compare the second column to the column

222 * ``-Out0``: only compares the first output on both sides

223 * ``-Reshape``: merges all outputs into one single vector and resizes

224 it before comparing

225 * ``-SkipDim1``: before comparing expected and computed output,

226 arrays with a shape like *(2, 1, 2)* becomes *(2, 2)*

227 * ``-SklCol``: *scikit-learn* operator applies on a column and not a matrix

228

229 If the *backend* is not None, the function either raises an exception

230 if the comparison between the expected outputs and the backend outputs

231 fails or it saves the backend output and adds it to the results.

232 """

233 # delayed import because too long

234 from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType # delayed

235

236 runtime_test = dict(model=model, data=data)

237

238 if folder is None:

239 folder = os.environ.get("ONNXTESTDUMP", "temp_dump")

240 if dump_error_log is None:

241 dump_error_log = os.environ.get("ONNXTESTDUMPERROR", "0") in (

242 "1", 1, "True", "true", True)

243 if benchmark is None:

244 benchmark = os.environ.get("ONNXTESTBENCHMARK", "0") in (

245 "1", 1, "True", "true", True)

246 if not os.path.exists(folder):

247 os.makedirs(folder)

248

249 lambda_original = None

250 if isinstance(data, (numpy.ndarray, pandas.DataFrame)):

251 dataone = data[:1].copy()

252 else:

253 dataone = data

254

255 if methods is not None:

256 prediction = []

257 for method in methods:

258 if callable(method):

259 call = lambda X, model=model: method(

260 model, X) # pragma: no cover

261 else:

262 try:

263 call = getattr(model, method)

264 except AttributeError as e: # pragma no cover

265 if method == 'decision_function_binary':

266 call = (

267 lambda X, model=model:

268 _raw_score_binary_classification(model, X))

269 else:

270 raise e

271 if callable(call):

272 prediction.append(call(data))

273 # we only take the last one for benchmark

274 lambda_original = lambda: call(dataone)

275 else:

276 raise RuntimeError( # pragma: no cover

277 "Method '{0}' is not callable.".format(method))

278 else:

279 if hasattr(model, "predict"):

280 if _has_predict_proba(model):

281 # Classifier

282 prediction = [model.predict(data), model.predict_proba(data)]

283 lambda_original = lambda: model.predict_proba(dataone)

284 elif _has_decision_function(model):

285 # Classifier without probabilities

286 prediction = [model.predict(data),

287 model.decision_function(data)]

288 lambda_original = (

289 lambda: model.decision_function(dataone))

290 elif _has_transform_model(model):

291 # clustering

292 try:

293 prediction = [model.predict(data), model.transform(data)]

294 lambda_original = lambda: model.transform(dataone)

295 except ValueError:

296 # 0.23 enforced type checking.

297 d64 = data.astype(numpy.float64)

298 prediction = [model.predict(d64), model.transform(d64)]

299 dataone64 = dataone.astype(numpy.float64)

300 lambda_original = lambda: model.transform(dataone64)

301 else:

302 # Regressor or VotingClassifier

303 prediction = [model.predict(data)]

304 lambda_original = lambda: model.predict(dataone)

305

306 elif hasattr(model, "transform"):

307 options = extract_options(basename)

308 SklCol = options.get("SklCol", False)

309 if SklCol:

310 prediction = model.transform(data.ravel()) # pragma: no cover

311 lambda_original = lambda: model.transform(

312 dataone.ravel()) # pragma: no cover

313 else:

314 prediction = model.transform(data)

315 lambda_original = lambda: model.transform(dataone)

316 else:

317 raise TypeError( # pragma: no cover

318 "Model has no predict or transform method: {0}".format(

319 type(model)))

320

321 runtime_test["expected"] = prediction

322

323 names = []

324 dest = os.path.join(folder, basename + ".expected.pkl")

325 names.append(dest)

326 with open(dest, "wb") as f:

327 pickle.dump(prediction, f)

328

329 dest = os.path.join(folder, basename + ".data.pkl")

330 names.append(dest)

331 with open(dest, "wb") as f:

332 pickle.dump(data, f)

333

334 _save_model_dump(model, folder, basename, names)

335

336 if dump_error_log: # pragma: no cover

337 error_dump = os.path.join(folder, basename + ".err")

338

339 if onnx_model is None: # pragma: no cover

340 array = numpy.array(data)

341 if inputs is None:

342 if array.dtype == numpy.float64:

343 inputs = [("input", DoubleTensorType(list(array.shape)))]

344 else:

345 inputs = [("input", FloatTensorType(list(array.shape)))]

346 onnx_model, _ = convert_model(model, basename, inputs)

347

348 dest = os.path.join(folder, basename + ".model.onnx")

349 names.append(dest)

350 with open(dest, "wb") as f:

351 f.write(onnx_model.SerializeToString())

352 if verbose: # pragma: no cover

353 print("[dump_data_and_model] created '{}'.".format(dest))

354

355 runtime_test["onnx"] = dest

356

357 # backend

358 if backend is not None:

359 if isinstance(backend, tuple):

360 backend = list(backend)

361 if not isinstance(backend, list):

362 backend = [backend]

363 for b in backend:

364 if not is_backend_enabled(b):

365 continue # pragma: no cover

366 if isinstance(allow_failure, str):

367 allow = evaluate_condition(

368 b, allow_failure) # pragma: no cover

369 else:

370 allow = allow_failure

371 if allow is None and not check_error:

372 output, lambda_onnx = compare_backend(

373 b, runtime_test, options=extract_options(basename),

374 context=context, verbose=verbose,

375 comparable_outputs=comparable_outputs,

376 intermediate_steps=intermediate_steps,

377 disable_optimisation=disable_optimisation,

378 classes=classes)

379 elif check_error:

380 try:

381 output, lambda_onnx = compare_backend(

382 b, runtime_test, options=extract_options(basename),

383 context=context, verbose=verbose,

384 comparable_outputs=comparable_outputs,

385 intermediate_steps=intermediate_steps,

386 disable_optimisation=disable_optimisation,

387 classes=classes)

388 except Exception as e: # pragma: no cover

389 if check_error in str(e):

390 warnings.warn(str(e))

391 continue

392 raise e

393 else:

394 try:

395 output, lambda_onnx = compare_backend(

396 b, runtime_test,

397 options=extract_options(basename),

398 context=context, verbose=verbose,

399 comparable_outputs=comparable_outputs,

400 intermediate_steps=intermediate_steps,

401 classes=classes)

402 except OnnxBackendMissingNewOnnxOperatorException as e: # pragma no cover

403 if fail_evenif_notimplemented:

404 raise e

405 warnings.warn(str(e))

406 continue

407 except AssertionError as e: # pragma no cover

408 if dump_error_log:

409 with open(error_dump, "w", encoding="utf-8") as f:

410 f.write(str(e) + "\n--------------\n")

411 traceback.print_exc(file=f)

412 if isinstance(allow, bool) and allow:

413 warnings.warn("Issue with '{0}' due to {1}".format(

414 basename,

415 str(e).replace("\n", " -- ")))

416 continue

417 raise e

418

419 if output is not None:

420 dest = os.path.join(folder,

421 basename + ".backend.{0}.pkl".format(b))

422 names.append(dest)

423 with open(dest, "wb") as f:

424 pickle.dump(output, f)

425 if (benchmark and lambda_onnx is not None and

426 lambda_original is not None):

427 # run a benchmark

428 obs = compute_benchmark({

429 "onnxrt": lambda_onnx,

430 "original": lambda_original

431 })

432 df = pandas.DataFrame(obs)

433 df["input_size"] = sys.getsizeof(dataone)

434 dest = os.path.join(folder, basename + ".bench")

435 df.to_csv(dest, index=False)

436

437 return names

438

439

440def convert_model(model, name, input_types):

441 """

442 Runs the appropriate conversion method.

443

444 :param model: model, *scikit-learn*, *keras*,

445 or *coremltools* object

446 :param name: model name

447 :param input_types: input types

448 :return: *onnx* model

449 """

450 from skl2onnx import convert_sklearn # delayed

451

452 model, prefix = convert_sklearn(model, name, input_types), "Sklearn"

453 if model is None: # pragma: no cover

454 raise RuntimeError("Unable to convert model of type '{0}'.".format(

455 type(model)))

456 return model, prefix

457

458

459def dump_one_class_classification(

460 model, suffix="", folder=None, allow_failure=None,

461 comparable_outputs=None, verbose=False, benchmark=False,

462 methods=None):

463 """

464 Trains and dumps a model for a One Class outlier problem.

465 The function trains a model and calls

466 :func:`dump_data_and_model`.

467

468 Every created filename will follow the pattern:

469 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.

470 """

471 from skl2onnx.common.data_types import FloatTensorType # delayed

472 X = [[0.0, 1.0], [1.0, 1.0], [2.0, 0.0]]

473 X = numpy.array(X, dtype=numpy.float32)

474 y = [1, 1, 1]

475 model.fit(X, y)

476 model_onnx, prefix = convert_model(model, "one_class",

477 [("input", FloatTensorType([None, 2]))])

478 dump_data_and_model(

479 X, model, model_onnx, folder=folder,

480 allow_failure=allow_failure,

481 basename=prefix + "One" + model.__class__.__name__ + suffix,

482 verbose=verbose, comparable_outputs=comparable_outputs,

483 benchmark=benchmark, methods=methods)

484

485

486def dump_binary_classification(

487 model, suffix="", folder=None, allow_failure=None,

488 comparable_outputs=None, verbose=False, label_string=False,

489 benchmark=False, methods=None, nrows=None):

490 """

491 Trains and dumps a model for a binary classification problem.

492 The function trains a model and calls

493 :func:`dump_data_and_model`.

494

495 Every created filename will follow the pattern:

496 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.

497 """

498 from skl2onnx.common.data_types import FloatTensorType # delayed

499 X = [[0, 1], [1, 1], [2, 0]]

500 X = numpy.array(X, dtype=numpy.float32)

501 if label_string:

502 y = ["A", "B", "A"]

503 else:

504 y = numpy.array([0, 1, 0], numpy.int64)

505 model.fit(X, y)

506 model_onnx, prefix = convert_model(model, "binary classifier",

507 [("input", FloatTensorType([None, 2]))])

508 if nrows == 2:

509 for nr in range(X.shape[0] - 1):

510 dump_data_and_model(

511 X[nr: nr + 2], model, model_onnx, folder=folder, allow_failure=allow_failure,

512 basename=prefix + "Bin" + model.__class__.__name__ + suffix,

513 verbose=verbose, comparable_outputs=comparable_outputs, methods=methods)

514 else:

515 dump_data_and_model(

516 X, model, model_onnx, folder=folder, allow_failure=allow_failure,

517 basename=prefix + "Bin" + model.__class__.__name__ + suffix,

518 verbose=verbose, comparable_outputs=comparable_outputs, methods=methods)

519

520 X, y = make_classification(10, n_features=4, random_state=42)

521 X = X[:, :2]

522 model.fit(X, y)

523 model_onnx, prefix = convert_model(model, "binary classifier",

524 [("input", FloatTensorType([None, 2]))])

525 xt = X.astype(numpy.float32)

526 if nrows is not None:

527 xt = xt[:nrows]

528 dump_data_and_model(

529 xt, model, model_onnx,

530 allow_failure=allow_failure, folder=folder,

531 basename=prefix + "RndBin" + model.__class__.__name__ + suffix,

532 verbose=verbose, comparable_outputs=comparable_outputs,

533 benchmark=benchmark, methods=methods)

534

535

536def dump_multiple_classification(

537 model, suffix="", folder=None, allow_failure=None, verbose=False,

538 label_string=False, first_class=0, comparable_outputs=None,

539 benchmark=False, methods=None):

540 """

541 Trains and dumps a model for a binary classification problem.

542 The function trains a model and calls

543 :func:`dump_data_and_model`.

544

545 Every created filename will follow the pattern:

546 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.

547 """

548 from skl2onnx.common.data_types import FloatTensorType # delayed

549 X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]]

550 X = numpy.array(X, dtype=numpy.float32)

551 y = [0, 1, 2, 1, 1, 2]

552 y = [i + first_class for i in y]

553 if label_string:

554 y = ["l%d" % i for i in y]

555 model.fit(X, y)

556 if verbose: # pragma: no cover

557 print("[dump_multiple_classification] model '{}'".format(

558 model.__class__.__name__))

559 model_onnx, prefix = convert_model(model, "multi-class classifier",

560 [("input", FloatTensorType([None, 2]))])

561 if verbose: # pragma: no cover

562 print("[dump_multiple_classification] model was converted")

563 dump_data_and_model(

564 X.astype(numpy.float32), model, model_onnx, folder=folder,

565 allow_failure=allow_failure,

566 basename=prefix + "Mcl" + model.__class__.__name__ + suffix,

567 verbose=verbose, comparable_outputs=comparable_outputs,

568 methods=methods)

569

570 X, y = make_classification(40, n_features=4, random_state=42,

571 n_classes=3, n_clusters_per_class=1)

572 X = X[:, :2]

573 model.fit(X, y)

574 if verbose: # pragma: no cover

575 print("[dump_multiple_classification] model '{}'".format(

576 model.__class__.__name__))

577 model_onnx, prefix = convert_model(model, "multi-class classifier",

578 [("input", FloatTensorType([None, 2]))])

579 if verbose: # pragma: no cover

580 print("[dump_multiple_classification] model was converted")

581 dump_data_and_model(

582 X[:10].astype(numpy.float32), model, model_onnx, folder=folder,

583 allow_failure=allow_failure,

584 basename=prefix + "RndMcl" + model.__class__.__name__ + suffix,

585 verbose=verbose, comparable_outputs=comparable_outputs,

586 benchmark=benchmark, methods=methods)

587

588

589def dump_multilabel_classification(

590 model, suffix="", folder=None, allow_failure=None, verbose=False,

591 label_string=False, first_class=0, comparable_outputs=None,

592 benchmark=False, backend=('python', 'onnxruntime')):

593 """

594 Trains and dumps a model for a binary classification problem.

595 The function trains a model and calls

596 :func:`dump_data_and_model`.

597

598 Every created filename will follow the pattern:

599 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.

600 """

601 from skl2onnx.common.data_types import FloatTensorType # delayed

602 X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]]

603 X = numpy.array(X, dtype=numpy.float32)

604 if label_string:

605 y = [["l0"], ["l1"], ["l2"], ["l0", "l1"], ["l1"], ["l2"]]

606 else:

607 y = [[0 + first_class], [1 + first_class], [2 + first_class],

608 [0 + first_class, 1 + first_class],

609 [1 + first_class], [2 + first_class]]

610 y = MultiLabelBinarizer().fit_transform(y)

611 model.fit(X, y)

612 if verbose: # pragma: no cover

613 print("[make_multilabel_classification] model '{}'".format(

614 model.__class__.__name__))

615 model_onnx, prefix = convert_model(model, "multi-label-classifier",

616 [("input", FloatTensorType([None, 2]))])

617 if verbose: # pragma: no cover

618 print("[make_multilabel_classification] model was converted")

619 dump_data_and_model(

620 X.astype(numpy.float32), model, model_onnx, folder=folder,

621 allow_failure=allow_failure,

622 basename=prefix + "Mcl" + model.__class__.__name__ + suffix,

623 verbose=verbose, comparable_outputs=comparable_outputs,

624 backend=backend)

625

626 X, y = make_multilabel_classification( # pylint: disable=W0632

627 40, n_features=4, random_state=42, n_classes=3)

628 X = X[:, :2]

629 model.fit(X, y)

630 if verbose: # pragma: no cover

631 print("[make_multilabel_classification] model '{}'".format(

632 model.__class__.__name__))

633 model_onnx, prefix = convert_model(model, "multi-class classifier",

634 [("input", FloatTensorType([None, 2]))])

635 if verbose: # pragma: no cover

636 print("[make_multilabel_classification] model was converted")

637 dump_data_and_model(

638 X[:10].astype(numpy.float32), model, model_onnx, folder=folder,

639 allow_failure=allow_failure,

640 basename=prefix + "RndMla" + model.__class__.__name__ + suffix,

641 verbose=verbose, comparable_outputs=comparable_outputs,

642 benchmark=benchmark, backend=backend)

643

644

645def dump_multiple_regression(

646 model, suffix="", folder=None, allow_failure=None,

647 comparable_outputs=None, verbose=False, benchmark=False):

648 """

649 Trains and dumps a model for a multi regression problem.

650 The function trains a model and calls

651 :func:`dump_data_and_model`.

652

653 Every created filename will follow the pattern:

654 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.

655 """

656 from skl2onnx.common.data_types import FloatTensorType # delayed

657 X = [[0, 1], [1, 1], [2, 0]]

658 X = numpy.array(X, dtype=numpy.float32)

659 y = numpy.array([[100, 50], [100, 49], [100, 99]], dtype=numpy.float32)

660 model.fit(X, y)

661 model_onnx, prefix = convert_model(model, "multi-regressor",

662 [("input", FloatTensorType([None, 2]))])

663 dump_data_and_model(

664 X, model, model_onnx, folder=folder, allow_failure=allow_failure,

665 basename=prefix + "MRg" + model.__class__.__name__ + suffix,

666 verbose=verbose, comparable_outputs=comparable_outputs,

667 benchmark=benchmark)

668

669

670def dump_single_regression(model, suffix="", folder=None, allow_failure=None,

671 comparable_outputs=None, benchmark=False):

672 """

673 Trains and dumps a model for a regression problem.

674 The function trains a model and calls

675 :func:`dump_data_and_model`.

676

677 Every created filename will follow the pattern:

678 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.

679 """

680 from skl2onnx.common.data_types import FloatTensorType # delayed

681 X = [[0, 1], [1, 1], [2, 0]]

682 X = numpy.array(X, dtype=numpy.float32)

683 y = numpy.array([100, -10, 50], dtype=numpy.float32)

684 model.fit(X, y)

685 model_onnx, prefix = convert_model(model, "single regressor",

686 [("input", FloatTensorType([None, 2]))])

687 dump_data_and_model(

688 X, model, model_onnx, folder=folder, allow_failure=allow_failure,

689 basename=prefix + "Reg" + model.__class__.__name__ + suffix,

690 comparable_outputs=comparable_outputs)

691

692

693def timeit_repeat(fct, number, repeat):

694 """

695 Returns a series of *repeat* time measures for

696 *number* executions of *code* assuming *fct*

697 is a function.

698 """

699 res = []

700 for _ in range(0, repeat):

701 t1 = time.perf_counter()

702 for __ in range(0, number):

703 fct()

704 t2 = time.perf_counter()

705 res.append(t2 - t1)

706 return res

707

708

709def timeexec(fct, number, repeat):

710 """

711 Measures the time for a given expression.

712

713 :param fct: function to measure (as a string)

714 :param number: number of time to run the expression

715 (and then divide by this number to get an average)

716 :param repeat: number of times to repeat the computation

717 of the above average

718 :return: dictionary

719 """

720 rep = timeit_repeat(fct, number=number, repeat=repeat)

721 ave = sum(rep) / (number * repeat)

722 std = (sum((x / number - ave)**2 for x in rep) / repeat)**0.5

723 fir = rep[0] / number

724 fir3 = sum(rep[:3]) / (3 * number)

725 las3 = sum(rep[-3:]) / (3 * number)

726 rep.sort()

727 mini = rep[len(rep) // 20] / number

728 maxi = rep[-len(rep) // 20] / number

729 return dict(average=ave, deviation=std, first=fir, first3=fir3,

730 last3=las3, repeat=repeat, min5=mini, max5=maxi, run=number)

731

732

733def compute_benchmark(fcts, number=10, repeat=100):

734 """

735 Compares the processing time several functions.

736

737 :param fcts: dictionary ``{'name': fct}``

738 :param number: number of time to run the expression

739 (and then divide by this number to get an average)

740 :param repeat: number of times to repeat the computation

741 of the above average

742 :return: list of [{'name': name, 'time': ...}]

743 """

744 obs = []

745 for name, fct in fcts.items():

746 res = timeexec(fct, number=number, repeat=repeat)

747 res["name"] = name

748 obs.append(res)

749 return obs

750

751

752def binary_array_to_string(mat):

753 """

754 Used to compare decision path.

755 """

756 if not isinstance(mat, numpy.ndarray):

757 raise NotImplementedError( # pragma: no cover

758 "Not implemented for other types than arrays.")

759 if len(mat.shape) != 2:

760 raise NotImplementedError( # pragma: no cover

761 "Not implemented for other arrays than matrices.")

762 res = [[str(i) for i in row] for row in mat.tolist()]

763 return [''.join(row) for row in res]

Coverage for mlprodict/testing/test_utils/tests_helper.py: 98%

277 statements