Coverage for mlprodict/testing/test_utils/tests_helper.py: 98%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

277 statements  

1""" 

2@file 

3@brief Inspired from sklearn-onnx, handles two backends. 

4""" 

5import pickle 

6import os 

7import warnings 

8import traceback 

9import time 

10import sys 

11import numpy 

12import pandas 

13from sklearn.datasets import ( 

14 make_classification, make_multilabel_classification, 

15 make_regression) 

16from sklearn.model_selection import train_test_split 

17from sklearn.preprocessing import MultiLabelBinarizer 

18from .utils_backend import compare_backend 

19from .utils_backend_common import ( 

20 extract_options, evaluate_condition, is_backend_enabled, 

21 OnnxBackendMissingNewOnnxOperatorException) 

22 

23 

24def _has_predict_proba(model): 

25 if hasattr(model, "voting") and model.voting == "hard": 

26 return False 

27 return hasattr(model, "predict_proba") 

28 

29 

30def _has_decision_function(model): 

31 if hasattr(model, "voting"): 

32 return False 

33 return hasattr(model, "decision_function") 

34 

35 

36def _has_transform_model(model): 

37 if hasattr(model, "voting"): 

38 return False 

39 return hasattr(model, "fit_transform") and hasattr(model, "score") 

40 

41 

42def fit_classification_model(model, n_classes, is_int=False, 

43 pos_features=False, label_string=False, 

44 random_state=42, is_bool=False, 

45 n_features=20): 

46 """ 

47 Fits a classification model. 

48 """ 

49 X, y = make_classification(n_classes=n_classes, n_features=n_features, 

50 n_samples=500, 

51 random_state=random_state, 

52 n_informative=7) 

53 if label_string: 

54 y = numpy.array(['cl%d' % cl for cl in y]) 

55 X = X.astype(numpy.int64) if is_int or is_bool else X.astype(numpy.float32) 

56 if pos_features: 

57 X = numpy.abs(X) 

58 if is_bool: 

59 X = X.astype(bool) 

60 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, 

61 random_state=42) 

62 model.fit(X_train, y_train) 

63 return model, X_test 

64 

65 

66def fit_multilabel_classification_model(model, n_classes=5, n_labels=2, 

67 n_samples=400, n_features=20, 

68 is_int=False): 

69 """ 

70 Fits a classification model. 

71 """ 

72 X, y = make_multilabel_classification( 

73 n_classes=n_classes, n_labels=n_labels, n_features=n_features, 

74 n_samples=n_samples, random_state=42)[:2] 

75 X = X.astype(numpy.int64) if is_int else X.astype(numpy.float32) 

76 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, 

77 random_state=42) 

78 model.fit(X_train, y_train) 

79 return model, X_test 

80 

81 

82def fit_regression_model(model, is_int=False, n_targets=1, is_bool=False, 

83 factor=1., n_features=10, n_samples=500, 

84 n_informative=10): 

85 """ 

86 Fits a regression model. 

87 """ 

88 X, y = make_regression(n_features=n_features, n_samples=n_samples, 

89 n_targets=n_targets, random_state=42, 

90 n_informative=n_informative)[:2] 

91 y *= factor 

92 X = X.astype(numpy.int64) if is_int or is_bool else X.astype(numpy.float32) 

93 if is_bool: 

94 X = X.astype(bool) 

95 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, 

96 random_state=42) 

97 model.fit(X_train, y_train) 

98 return model, X_test 

99 

100 

101def fit_classification_model_simple(model, n_classes, is_int=False, 

102 pos_features=False): 

103 """ 

104 Fits a classification model. 

105 """ 

106 X, y = make_classification(n_classes=n_classes, n_features=10, 

107 n_samples=500, n_redundant=0, 

108 n_repeated=0, 

109 random_state=42, n_informative=9) 

110 X = X.astype(numpy.int64) if is_int else X.astype(numpy.float32) 

111 if pos_features: 

112 X = numpy.abs(X) 

113 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, 

114 random_state=42) 

115 model.fit(X_train, y_train) 

116 return model, X_test 

117 

118 

119def _raw_score_binary_classification(model, X): 

120 scores = model.decision_function(X) 

121 if len(scores.shape) == 1: 

122 scores = scores.reshape(-1, 1) 

123 if len(scores.shape) != 2 or scores.shape[1] != 1: 

124 raise RuntimeError( # pragma: no cover 

125 "Unexpected shape {} for a binary classifiation".format( 

126 scores.shape)) 

127 return numpy.hstack([-scores, scores]) 

128 

129 

130def _save_model_dump(model, folder, basename, names): 

131 if hasattr(model, "save"): # pragma: no cover 

132 dest = os.path.join(folder, basename + ".model.keras") 

133 names.append(dest) 

134 model.save(dest) 

135 else: 

136 dest = os.path.join(folder, basename + ".model.pkl") 

137 names.append(dest) 

138 with open(dest, "wb") as f: 

139 try: 

140 pickle.dump(model, f) 

141 except AttributeError as e: # pragma no cover 

142 print("[dump_data_and_model] cannot pickle model '{}'" 

143 " due to {}.".format(dest, e)) 

144 

145 

146def dump_data_and_model( # pylint: disable=R0912 

147 data, model, onnx_model=None, basename="model", folder=None, 

148 inputs=None, backend=('python', 'onnxruntime'), 

149 context=None, allow_failure=None, methods=None, 

150 dump_error_log=None, benchmark=None, comparable_outputs=None, 

151 intermediate_steps=False, fail_evenif_notimplemented=False, 

152 verbose=False, classes=None, check_error=None, disable_optimisation=False): 

153 """ 

154 Saves data with pickle, saves the model with pickle and *onnx*, 

155 runs and saves the predictions for the given model. 

156 This function is used to test a backend (runtime) for *onnx*. 

157 

158 :param data: any kind of data 

159 :param model: any model 

160 :param onnx_model: *onnx* model or *None* to use an onnx converters to convert it 

161 only if the model accepts one float vector 

162 :param basename: three files are writen ``<basename>.data.pkl``, 

163 ``<basename>.model.pkl``, ``<basename>.model.onnx`` 

164 :param folder: files are written in this folder, 

165 it is created if it does not exist, if *folder* is None, 

166 it looks first in environment variable ``ONNXTESTDUMP``, 

167 otherwise, it is placed into ``'temp_dump'``. 

168 :param inputs: standard type or specific one if specified, only used is 

169 parameter *onnx* is None 

170 :param backend: backend used to compare expected output and runtime output. 

171 Two options are currently supported: None for no test, 

172 `'onnxruntime'` to use module :epkg:`onnxruntime`, 

173 ``python`` to use the python runtiume. 

174 :param context: used if the model contains a custom operator such 

175 as a custom Keras function... 

176 :param allow_failure: None to raise an exception if comparison fails 

177 for the backends, otherwise a string which is then evaluated to check 

178 whether or not the test can fail, example: 

179 ``"StrictVersion(onnx.__version__) < StrictVersion('1.3.0')"`` 

180 :param dump_error_log: if True, dumps any error message in a file 

181 ``<basename>.err``, if it is None, it checks the environment 

182 variable ``ONNXTESTDUMPERROR`` 

183 :param benchmark: if True, runs a benchmark and stores the results 

184 into a file ``<basename>.bench``, if None, it checks the environment 

185 variable ``ONNXTESTBENCHMARK`` 

186 :param verbose: additional information 

187 :param methods: ONNX may produce one or several results, each of them 

188 is equivalent to the output of a method from the model class, 

189 this parameter defines which methods is equivalent to ONNX outputs. 

190 If not specified, it falls back into a default behaviour implemented 

191 for classifiers, regressors, clustering. 

192 :param comparable_outputs: compares only these outputs 

193 :param intermediate_steps: displays intermediate steps 

194 in case of an error 

195 :param fail_evenif_notimplemented: the test is considered as failing 

196 even if the error is due to onnxuntime missing the implementation 

197 of a new operator defiend in ONNX. 

198 :param classes: classes names 

199 (only for classifier, mandatory if option 'nocl' is used) 

200 :param check_error: do not raise an exception if the error message 

201 contains this text 

202 :param disable_optimisation: disable all optimisations *onnxruntime* 

203 could do 

204 :return: the created files 

205 

206 Some convention for the name, 

207 *Bin* for a binary classifier, *Mcl* for a multiclass 

208 classifier, *Reg* for a regressor, *MRg* for a multi-regressor. 

209 The name can contain some flags. Expected outputs refer to the 

210 outputs computed with the original library, computed outputs 

211 refer to the outputs computed with a ONNX runtime. 

212 

213 * ``-CannotLoad``: the model can be converted but the runtime 

214 cannot load it 

215 * ``-Dec3``: compares expected and computed outputs up to 

216 3 decimals (5 by default) 

217 * ``-Dec4``: compares expected and computed outputs up to 

218 4 decimals (5 by default) 

219 * ``-NoProb``: The original models computed probabilites for two classes 

220 *size=(N, 2)* but the runtime produces a vector of size *N*, the test 

221 will compare the second column to the column 

222 * ``-Out0``: only compares the first output on both sides 

223 * ``-Reshape``: merges all outputs into one single vector and resizes 

224 it before comparing 

225 * ``-SkipDim1``: before comparing expected and computed output, 

226 arrays with a shape like *(2, 1, 2)* becomes *(2, 2)* 

227 * ``-SklCol``: *scikit-learn* operator applies on a column and not a matrix 

228 

229 If the *backend* is not None, the function either raises an exception 

230 if the comparison between the expected outputs and the backend outputs 

231 fails or it saves the backend output and adds it to the results. 

232 """ 

233 # delayed import because too long 

234 from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType # delayed 

235 

236 runtime_test = dict(model=model, data=data) 

237 

238 if folder is None: 

239 folder = os.environ.get("ONNXTESTDUMP", "temp_dump") 

240 if dump_error_log is None: 

241 dump_error_log = os.environ.get("ONNXTESTDUMPERROR", "0") in ( 

242 "1", 1, "True", "true", True) 

243 if benchmark is None: 

244 benchmark = os.environ.get("ONNXTESTBENCHMARK", "0") in ( 

245 "1", 1, "True", "true", True) 

246 if not os.path.exists(folder): 

247 os.makedirs(folder) 

248 

249 lambda_original = None 

250 if isinstance(data, (numpy.ndarray, pandas.DataFrame)): 

251 dataone = data[:1].copy() 

252 else: 

253 dataone = data 

254 

255 if methods is not None: 

256 prediction = [] 

257 for method in methods: 

258 if callable(method): 

259 call = lambda X, model=model: method( 

260 model, X) # pragma: no cover 

261 else: 

262 try: 

263 call = getattr(model, method) 

264 except AttributeError as e: # pragma no cover 

265 if method == 'decision_function_binary': 

266 call = ( 

267 lambda X, model=model: 

268 _raw_score_binary_classification(model, X)) 

269 else: 

270 raise e 

271 if callable(call): 

272 prediction.append(call(data)) 

273 # we only take the last one for benchmark 

274 lambda_original = lambda: call(dataone) 

275 else: 

276 raise RuntimeError( # pragma: no cover 

277 "Method '{0}' is not callable.".format(method)) 

278 else: 

279 if hasattr(model, "predict"): 

280 if _has_predict_proba(model): 

281 # Classifier 

282 prediction = [model.predict(data), model.predict_proba(data)] 

283 lambda_original = lambda: model.predict_proba(dataone) 

284 elif _has_decision_function(model): 

285 # Classifier without probabilities 

286 prediction = [model.predict(data), 

287 model.decision_function(data)] 

288 lambda_original = ( 

289 lambda: model.decision_function(dataone)) 

290 elif _has_transform_model(model): 

291 # clustering 

292 try: 

293 prediction = [model.predict(data), model.transform(data)] 

294 lambda_original = lambda: model.transform(dataone) 

295 except ValueError: 

296 # 0.23 enforced type checking. 

297 d64 = data.astype(numpy.float64) 

298 prediction = [model.predict(d64), model.transform(d64)] 

299 dataone64 = dataone.astype(numpy.float64) 

300 lambda_original = lambda: model.transform(dataone64) 

301 else: 

302 # Regressor or VotingClassifier 

303 prediction = [model.predict(data)] 

304 lambda_original = lambda: model.predict(dataone) 

305 

306 elif hasattr(model, "transform"): 

307 options = extract_options(basename) 

308 SklCol = options.get("SklCol", False) 

309 if SklCol: 

310 prediction = model.transform(data.ravel()) # pragma: no cover 

311 lambda_original = lambda: model.transform( 

312 dataone.ravel()) # pragma: no cover 

313 else: 

314 prediction = model.transform(data) 

315 lambda_original = lambda: model.transform(dataone) 

316 else: 

317 raise TypeError( # pragma: no cover 

318 "Model has no predict or transform method: {0}".format( 

319 type(model))) 

320 

321 runtime_test["expected"] = prediction 

322 

323 names = [] 

324 dest = os.path.join(folder, basename + ".expected.pkl") 

325 names.append(dest) 

326 with open(dest, "wb") as f: 

327 pickle.dump(prediction, f) 

328 

329 dest = os.path.join(folder, basename + ".data.pkl") 

330 names.append(dest) 

331 with open(dest, "wb") as f: 

332 pickle.dump(data, f) 

333 

334 _save_model_dump(model, folder, basename, names) 

335 

336 if dump_error_log: # pragma: no cover 

337 error_dump = os.path.join(folder, basename + ".err") 

338 

339 if onnx_model is None: # pragma: no cover 

340 array = numpy.array(data) 

341 if inputs is None: 

342 if array.dtype == numpy.float64: 

343 inputs = [("input", DoubleTensorType(list(array.shape)))] 

344 else: 

345 inputs = [("input", FloatTensorType(list(array.shape)))] 

346 onnx_model, _ = convert_model(model, basename, inputs) 

347 

348 dest = os.path.join(folder, basename + ".model.onnx") 

349 names.append(dest) 

350 with open(dest, "wb") as f: 

351 f.write(onnx_model.SerializeToString()) 

352 if verbose: # pragma: no cover 

353 print("[dump_data_and_model] created '{}'.".format(dest)) 

354 

355 runtime_test["onnx"] = dest 

356 

357 # backend 

358 if backend is not None: 

359 if isinstance(backend, tuple): 

360 backend = list(backend) 

361 if not isinstance(backend, list): 

362 backend = [backend] 

363 for b in backend: 

364 if not is_backend_enabled(b): 

365 continue # pragma: no cover 

366 if isinstance(allow_failure, str): 

367 allow = evaluate_condition( 

368 b, allow_failure) # pragma: no cover 

369 else: 

370 allow = allow_failure 

371 if allow is None and not check_error: 

372 output, lambda_onnx = compare_backend( 

373 b, runtime_test, options=extract_options(basename), 

374 context=context, verbose=verbose, 

375 comparable_outputs=comparable_outputs, 

376 intermediate_steps=intermediate_steps, 

377 disable_optimisation=disable_optimisation, 

378 classes=classes) 

379 elif check_error: 

380 try: 

381 output, lambda_onnx = compare_backend( 

382 b, runtime_test, options=extract_options(basename), 

383 context=context, verbose=verbose, 

384 comparable_outputs=comparable_outputs, 

385 intermediate_steps=intermediate_steps, 

386 disable_optimisation=disable_optimisation, 

387 classes=classes) 

388 except Exception as e: # pragma: no cover 

389 if check_error in str(e): 

390 warnings.warn(str(e)) 

391 continue 

392 raise e 

393 else: 

394 try: 

395 output, lambda_onnx = compare_backend( 

396 b, runtime_test, 

397 options=extract_options(basename), 

398 context=context, verbose=verbose, 

399 comparable_outputs=comparable_outputs, 

400 intermediate_steps=intermediate_steps, 

401 classes=classes) 

402 except OnnxBackendMissingNewOnnxOperatorException as e: # pragma no cover 

403 if fail_evenif_notimplemented: 

404 raise e 

405 warnings.warn(str(e)) 

406 continue 

407 except AssertionError as e: # pragma no cover 

408 if dump_error_log: 

409 with open(error_dump, "w", encoding="utf-8") as f: 

410 f.write(str(e) + "\n--------------\n") 

411 traceback.print_exc(file=f) 

412 if isinstance(allow, bool) and allow: 

413 warnings.warn("Issue with '{0}' due to {1}".format( 

414 basename, 

415 str(e).replace("\n", " -- "))) 

416 continue 

417 raise e 

418 

419 if output is not None: 

420 dest = os.path.join(folder, 

421 basename + ".backend.{0}.pkl".format(b)) 

422 names.append(dest) 

423 with open(dest, "wb") as f: 

424 pickle.dump(output, f) 

425 if (benchmark and lambda_onnx is not None and 

426 lambda_original is not None): 

427 # run a benchmark 

428 obs = compute_benchmark({ 

429 "onnxrt": lambda_onnx, 

430 "original": lambda_original 

431 }) 

432 df = pandas.DataFrame(obs) 

433 df["input_size"] = sys.getsizeof(dataone) 

434 dest = os.path.join(folder, basename + ".bench") 

435 df.to_csv(dest, index=False) 

436 

437 return names 

438 

439 

440def convert_model(model, name, input_types): 

441 """ 

442 Runs the appropriate conversion method. 

443 

444 :param model: model, *scikit-learn*, *keras*, 

445 or *coremltools* object 

446 :param name: model name 

447 :param input_types: input types 

448 :return: *onnx* model 

449 """ 

450 from skl2onnx import convert_sklearn # delayed 

451 

452 model, prefix = convert_sklearn(model, name, input_types), "Sklearn" 

453 if model is None: # pragma: no cover 

454 raise RuntimeError("Unable to convert model of type '{0}'.".format( 

455 type(model))) 

456 return model, prefix 

457 

458 

459def dump_one_class_classification( 

460 model, suffix="", folder=None, allow_failure=None, 

461 comparable_outputs=None, verbose=False, benchmark=False, 

462 methods=None): 

463 """ 

464 Trains and dumps a model for a One Class outlier problem. 

465 The function trains a model and calls 

466 :func:`dump_data_and_model`. 

467 

468 Every created filename will follow the pattern: 

469 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``. 

470 """ 

471 from skl2onnx.common.data_types import FloatTensorType # delayed 

472 X = [[0.0, 1.0], [1.0, 1.0], [2.0, 0.0]] 

473 X = numpy.array(X, dtype=numpy.float32) 

474 y = [1, 1, 1] 

475 model.fit(X, y) 

476 model_onnx, prefix = convert_model(model, "one_class", 

477 [("input", FloatTensorType([None, 2]))]) 

478 dump_data_and_model( 

479 X, model, model_onnx, folder=folder, 

480 allow_failure=allow_failure, 

481 basename=prefix + "One" + model.__class__.__name__ + suffix, 

482 verbose=verbose, comparable_outputs=comparable_outputs, 

483 benchmark=benchmark, methods=methods) 

484 

485 

486def dump_binary_classification( 

487 model, suffix="", folder=None, allow_failure=None, 

488 comparable_outputs=None, verbose=False, label_string=False, 

489 benchmark=False, methods=None, nrows=None): 

490 """ 

491 Trains and dumps a model for a binary classification problem. 

492 The function trains a model and calls 

493 :func:`dump_data_and_model`. 

494 

495 Every created filename will follow the pattern: 

496 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``. 

497 """ 

498 from skl2onnx.common.data_types import FloatTensorType # delayed 

499 X = [[0, 1], [1, 1], [2, 0]] 

500 X = numpy.array(X, dtype=numpy.float32) 

501 if label_string: 

502 y = ["A", "B", "A"] 

503 else: 

504 y = numpy.array([0, 1, 0], numpy.int64) 

505 model.fit(X, y) 

506 model_onnx, prefix = convert_model(model, "binary classifier", 

507 [("input", FloatTensorType([None, 2]))]) 

508 if nrows == 2: 

509 for nr in range(X.shape[0] - 1): 

510 dump_data_and_model( 

511 X[nr: nr + 2], model, model_onnx, folder=folder, allow_failure=allow_failure, 

512 basename=prefix + "Bin" + model.__class__.__name__ + suffix, 

513 verbose=verbose, comparable_outputs=comparable_outputs, methods=methods) 

514 else: 

515 dump_data_and_model( 

516 X, model, model_onnx, folder=folder, allow_failure=allow_failure, 

517 basename=prefix + "Bin" + model.__class__.__name__ + suffix, 

518 verbose=verbose, comparable_outputs=comparable_outputs, methods=methods) 

519 

520 X, y = make_classification(10, n_features=4, random_state=42) 

521 X = X[:, :2] 

522 model.fit(X, y) 

523 model_onnx, prefix = convert_model(model, "binary classifier", 

524 [("input", FloatTensorType([None, 2]))]) 

525 xt = X.astype(numpy.float32) 

526 if nrows is not None: 

527 xt = xt[:nrows] 

528 dump_data_and_model( 

529 xt, model, model_onnx, 

530 allow_failure=allow_failure, folder=folder, 

531 basename=prefix + "RndBin" + model.__class__.__name__ + suffix, 

532 verbose=verbose, comparable_outputs=comparable_outputs, 

533 benchmark=benchmark, methods=methods) 

534 

535 

536def dump_multiple_classification( 

537 model, suffix="", folder=None, allow_failure=None, verbose=False, 

538 label_string=False, first_class=0, comparable_outputs=None, 

539 benchmark=False, methods=None): 

540 """ 

541 Trains and dumps a model for a binary classification problem. 

542 The function trains a model and calls 

543 :func:`dump_data_and_model`. 

544 

545 Every created filename will follow the pattern: 

546 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``. 

547 """ 

548 from skl2onnx.common.data_types import FloatTensorType # delayed 

549 X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]] 

550 X = numpy.array(X, dtype=numpy.float32) 

551 y = [0, 1, 2, 1, 1, 2] 

552 y = [i + first_class for i in y] 

553 if label_string: 

554 y = ["l%d" % i for i in y] 

555 model.fit(X, y) 

556 if verbose: # pragma: no cover 

557 print("[dump_multiple_classification] model '{}'".format( 

558 model.__class__.__name__)) 

559 model_onnx, prefix = convert_model(model, "multi-class classifier", 

560 [("input", FloatTensorType([None, 2]))]) 

561 if verbose: # pragma: no cover 

562 print("[dump_multiple_classification] model was converted") 

563 dump_data_and_model( 

564 X.astype(numpy.float32), model, model_onnx, folder=folder, 

565 allow_failure=allow_failure, 

566 basename=prefix + "Mcl" + model.__class__.__name__ + suffix, 

567 verbose=verbose, comparable_outputs=comparable_outputs, 

568 methods=methods) 

569 

570 X, y = make_classification(40, n_features=4, random_state=42, 

571 n_classes=3, n_clusters_per_class=1) 

572 X = X[:, :2] 

573 model.fit(X, y) 

574 if verbose: # pragma: no cover 

575 print("[dump_multiple_classification] model '{}'".format( 

576 model.__class__.__name__)) 

577 model_onnx, prefix = convert_model(model, "multi-class classifier", 

578 [("input", FloatTensorType([None, 2]))]) 

579 if verbose: # pragma: no cover 

580 print("[dump_multiple_classification] model was converted") 

581 dump_data_and_model( 

582 X[:10].astype(numpy.float32), model, model_onnx, folder=folder, 

583 allow_failure=allow_failure, 

584 basename=prefix + "RndMcl" + model.__class__.__name__ + suffix, 

585 verbose=verbose, comparable_outputs=comparable_outputs, 

586 benchmark=benchmark, methods=methods) 

587 

588 

589def dump_multilabel_classification( 

590 model, suffix="", folder=None, allow_failure=None, verbose=False, 

591 label_string=False, first_class=0, comparable_outputs=None, 

592 benchmark=False, backend=('python', 'onnxruntime')): 

593 """ 

594 Trains and dumps a model for a binary classification problem. 

595 The function trains a model and calls 

596 :func:`dump_data_and_model`. 

597 

598 Every created filename will follow the pattern: 

599 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``. 

600 """ 

601 from skl2onnx.common.data_types import FloatTensorType # delayed 

602 X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]] 

603 X = numpy.array(X, dtype=numpy.float32) 

604 if label_string: 

605 y = [["l0"], ["l1"], ["l2"], ["l0", "l1"], ["l1"], ["l2"]] 

606 else: 

607 y = [[0 + first_class], [1 + first_class], [2 + first_class], 

608 [0 + first_class, 1 + first_class], 

609 [1 + first_class], [2 + first_class]] 

610 y = MultiLabelBinarizer().fit_transform(y) 

611 model.fit(X, y) 

612 if verbose: # pragma: no cover 

613 print("[make_multilabel_classification] model '{}'".format( 

614 model.__class__.__name__)) 

615 model_onnx, prefix = convert_model(model, "multi-label-classifier", 

616 [("input", FloatTensorType([None, 2]))]) 

617 if verbose: # pragma: no cover 

618 print("[make_multilabel_classification] model was converted") 

619 dump_data_and_model( 

620 X.astype(numpy.float32), model, model_onnx, folder=folder, 

621 allow_failure=allow_failure, 

622 basename=prefix + "Mcl" + model.__class__.__name__ + suffix, 

623 verbose=verbose, comparable_outputs=comparable_outputs, 

624 backend=backend) 

625 

626 X, y = make_multilabel_classification( # pylint: disable=W0632 

627 40, n_features=4, random_state=42, n_classes=3) 

628 X = X[:, :2] 

629 model.fit(X, y) 

630 if verbose: # pragma: no cover 

631 print("[make_multilabel_classification] model '{}'".format( 

632 model.__class__.__name__)) 

633 model_onnx, prefix = convert_model(model, "multi-class classifier", 

634 [("input", FloatTensorType([None, 2]))]) 

635 if verbose: # pragma: no cover 

636 print("[make_multilabel_classification] model was converted") 

637 dump_data_and_model( 

638 X[:10].astype(numpy.float32), model, model_onnx, folder=folder, 

639 allow_failure=allow_failure, 

640 basename=prefix + "RndMla" + model.__class__.__name__ + suffix, 

641 verbose=verbose, comparable_outputs=comparable_outputs, 

642 benchmark=benchmark, backend=backend) 

643 

644 

645def dump_multiple_regression( 

646 model, suffix="", folder=None, allow_failure=None, 

647 comparable_outputs=None, verbose=False, benchmark=False): 

648 """ 

649 Trains and dumps a model for a multi regression problem. 

650 The function trains a model and calls 

651 :func:`dump_data_and_model`. 

652 

653 Every created filename will follow the pattern: 

654 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``. 

655 """ 

656 from skl2onnx.common.data_types import FloatTensorType # delayed 

657 X = [[0, 1], [1, 1], [2, 0]] 

658 X = numpy.array(X, dtype=numpy.float32) 

659 y = numpy.array([[100, 50], [100, 49], [100, 99]], dtype=numpy.float32) 

660 model.fit(X, y) 

661 model_onnx, prefix = convert_model(model, "multi-regressor", 

662 [("input", FloatTensorType([None, 2]))]) 

663 dump_data_and_model( 

664 X, model, model_onnx, folder=folder, allow_failure=allow_failure, 

665 basename=prefix + "MRg" + model.__class__.__name__ + suffix, 

666 verbose=verbose, comparable_outputs=comparable_outputs, 

667 benchmark=benchmark) 

668 

669 

670def dump_single_regression(model, suffix="", folder=None, allow_failure=None, 

671 comparable_outputs=None, benchmark=False): 

672 """ 

673 Trains and dumps a model for a regression problem. 

674 The function trains a model and calls 

675 :func:`dump_data_and_model`. 

676 

677 Every created filename will follow the pattern: 

678 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``. 

679 """ 

680 from skl2onnx.common.data_types import FloatTensorType # delayed 

681 X = [[0, 1], [1, 1], [2, 0]] 

682 X = numpy.array(X, dtype=numpy.float32) 

683 y = numpy.array([100, -10, 50], dtype=numpy.float32) 

684 model.fit(X, y) 

685 model_onnx, prefix = convert_model(model, "single regressor", 

686 [("input", FloatTensorType([None, 2]))]) 

687 dump_data_and_model( 

688 X, model, model_onnx, folder=folder, allow_failure=allow_failure, 

689 basename=prefix + "Reg" + model.__class__.__name__ + suffix, 

690 comparable_outputs=comparable_outputs) 

691 

692 

693def timeit_repeat(fct, number, repeat): 

694 """ 

695 Returns a series of *repeat* time measures for 

696 *number* executions of *code* assuming *fct* 

697 is a function. 

698 """ 

699 res = [] 

700 for _ in range(0, repeat): 

701 t1 = time.perf_counter() 

702 for __ in range(0, number): 

703 fct() 

704 t2 = time.perf_counter() 

705 res.append(t2 - t1) 

706 return res 

707 

708 

709def timeexec(fct, number, repeat): 

710 """ 

711 Measures the time for a given expression. 

712 

713 :param fct: function to measure (as a string) 

714 :param number: number of time to run the expression 

715 (and then divide by this number to get an average) 

716 :param repeat: number of times to repeat the computation 

717 of the above average 

718 :return: dictionary 

719 """ 

720 rep = timeit_repeat(fct, number=number, repeat=repeat) 

721 ave = sum(rep) / (number * repeat) 

722 std = (sum((x / number - ave)**2 for x in rep) / repeat)**0.5 

723 fir = rep[0] / number 

724 fir3 = sum(rep[:3]) / (3 * number) 

725 las3 = sum(rep[-3:]) / (3 * number) 

726 rep.sort() 

727 mini = rep[len(rep) // 20] / number 

728 maxi = rep[-len(rep) // 20] / number 

729 return dict(average=ave, deviation=std, first=fir, first3=fir3, 

730 last3=las3, repeat=repeat, min5=mini, max5=maxi, run=number) 

731 

732 

733def compute_benchmark(fcts, number=10, repeat=100): 

734 """ 

735 Compares the processing time several functions. 

736 

737 :param fcts: dictionary ``{'name': fct}`` 

738 :param number: number of time to run the expression 

739 (and then divide by this number to get an average) 

740 :param repeat: number of times to repeat the computation 

741 of the above average 

742 :return: list of [{'name': name, 'time': ...}] 

743 """ 

744 obs = [] 

745 for name, fct in fcts.items(): 

746 res = timeexec(fct, number=number, repeat=repeat) 

747 res["name"] = name 

748 obs.append(res) 

749 return obs 

750 

751 

752def binary_array_to_string(mat): 

753 """ 

754 Used to compare decision path. 

755 """ 

756 if not isinstance(mat, numpy.ndarray): 

757 raise NotImplementedError( # pragma: no cover 

758 "Not implemented for other types than arrays.") 

759 if len(mat.shape) != 2: 

760 raise NotImplementedError( # pragma: no cover 

761 "Not implemented for other arrays than matrices.") 

762 res = [[str(i) for i in row] for row in mat.tolist()] 

763 return [''.join(row) for row in res]