Coverage for mlprodict/asv_benchmark/common_asv_skl.py: 95%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

310 statements  

1""" 

2Common class for all benchmarks testing 

3converted models from :epkg:`scikit-learn` 

4with :epkg:`asv`. The benchmark can be run through 

5file :epkg:`run_asv.sh` on Linux or :epkg:`run_asv.bat` on 

6Windows. 

7 

8.. warning:: 

9 On Windows, you should avoid cloning the repository 

10 on a folder with a long full name. Visual Studio tends to 

11 abide by the rule of the maximum path length even though 

12 the system is told otherwise. 

13""" 

14import os 

15from datetime import datetime 

16import pickle 

17from logging import getLogger 

18import numpy 

19from sklearn import set_config 

20from sklearn.datasets import load_iris 

21from sklearn.metrics import ( 

22 accuracy_score, mean_absolute_error, silhouette_score) 

23from sklearn.model_selection import train_test_split 

24from mlprodict import get_ir_version, __max_supported_opset__ 

25from mlprodict.onnxrt import OnnxInference 

26from mlprodict.onnx_conv import ( 

27 to_onnx, register_rewritten_operators, register_converters) 

28from mlprodict.onnxrt.validate.validate_benchmark import make_n_rows 

29from mlprodict.onnxrt.validate.validate_problems import _modify_dimension 

30from mlprodict.onnx_tools.optim import onnx_statistics 

31from mlprodict.tools.asv_options_helper import ( 

32 expand_onnx_options, version2number) 

33from mlprodict.tools.model_info import set_random_state 

34 

35 

36class _CommonAsvSklBenchmark: 

37 """ 

38 Common tests to all benchmarks testing converted 

39 :epkg:`scikit-learn` models. See `benchmark attributes 

40 <https://asv.readthedocs.io/en/stable/benchmarks.html#general>`_. 

41 """ 

42 

43 # Part which changes. 

44 # params and param_names may be changed too. 

45 

46 params = [ 

47 ['skl', 'pyrtc', 'ort'], # values for runtime 

48 [1, 10, 100, 10000], # values for N 

49 [4, 20], # values for nf 

50 [__max_supported_opset__], # values for opset 

51 ["float", "double"], # values for dtype 

52 [None], # values for optim 

53 ] 

54 param_names = ['rt', 'N', 'nf', 'opset', 'dtype', 'optim'] 

55 chk_method_name = None 

56 version = datetime.now().isoformat() 

57 pretty_source = "disabled" 

58 

59 par_ydtype = numpy.int64 

60 par_dofit = True 

61 par_convopts = None 

62 

63 def _create_model(self): # pragma: no cover 

64 raise NotImplementedError("This method must be overwritten.") 

65 

66 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim): # pragma: no cover 

67 raise NotImplementedError("This method must be overwritten.") 

68 

69 def _score_metric(self, X, y_exp, y_pred): # pragma: no cover 

70 raise NotImplementedError("This method must be overwritten.") 

71 

72 def _optimize_onnx(self, onx): 

73 return onx 

74 

75 def _get_xdtype(self, dtype): 

76 if dtype in ('float', numpy.float32): 

77 return numpy.float32 

78 elif dtype in ('double', '64', 64, numpy.float64): 

79 return numpy.float64 

80 raise ValueError( # pragma: no cover 

81 "Unknown dtype '{}'.".format(dtype)) 

82 

83 def _get_dataset(self, nf, dtype): 

84 xdtype = self._get_xdtype(dtype) 

85 data = load_iris() 

86 X, y = data.data, data.target 

87 state = numpy.random.RandomState(seed=34) # pylint: disable=E1101 

88 rnd = state.randn(*X.shape) / 3 

89 X += rnd 

90 X = _modify_dimension(X, nf) 

91 X_train, X_test, y_train, y_test = train_test_split( 

92 X, y, random_state=42) 

93 Xt = X_test.astype(xdtype) 

94 yt = y_test.astype(self.par_ydtype) 

95 if X_train.shape[0] < X_train.shape[1]: 

96 raise RuntimeError( # pragma: no cover 

97 "Unable to train a model with less observations than features " 

98 "shape=%r." % (X_train.shape, )) 

99 return (X_train, y_train), (Xt, yt) 

100 

101 def _to_onnx(self, model, X, opset, dtype, optim): 

102 if optim is None or len(optim) == 0: 

103 options = self.par_convopts 

104 elif self.par_convopts and len(self.par_convopts) > 0: 

105 raise NotImplementedError( # pragma: no cover 

106 "Conflict between par_convopts={} and optim={}".format( 

107 self.par_convopts, optim)) 

108 else: 

109 # Expand common onnx options, see _nick_name_options. 

110 options = expand_onnx_options(model, optim) 

111 

112 return to_onnx(model, X, options=options, target_opset=opset) 

113 

114 def _create_onnx_inference(self, onx, runtime): 

115 if 'onnxruntime' in runtime: 

116 old = onx.ir_version 

117 onx.ir_version = get_ir_version(__max_supported_opset__) 

118 else: 

119 old = None 

120 

121 try: 

122 res = OnnxInference( 

123 onx, runtime=runtime, 

124 runtime_options=dict(log_severity_level=3)) 

125 except RuntimeError as e: # pragma: no cover 

126 if "[ONNXRuntimeError]" in str(e): 

127 return RuntimeError("onnxruntime fails due to {}".format(str(e))) 

128 raise e 

129 if old is not None: 

130 onx.ir_version = old 

131 return res 

132 

133 # Part which does not change. 

134 

135 def _check_rt(self, rt, meth): 

136 """ 

137 Checks that runtime has the appropriate method. 

138 """ 

139 if rt is None: 

140 raise ValueError("rt cannot be empty.") # pragma: no cover 

141 if not hasattr(rt, meth): 

142 raise TypeError( # pragma: no cover 

143 "rt of type %r has no method %r." % (type(rt), meth)) 

144 

145 def runtime_name(self, runtime): 

146 """ 

147 Returns the runtime shortname. 

148 """ 

149 if runtime == 'skl': 

150 name = runtime 

151 elif runtime == 'ort': 

152 name = 'onnxruntime1' 

153 elif runtime == 'ort2': 

154 name = 'onnxruntime2' # pragma: no cover 

155 elif runtime == 'pyrt': 

156 name = 'python' 

157 elif runtime == 'pyrtc': 

158 name = 'python_compiled' 

159 else: 

160 raise ValueError( # pragma: no cover 

161 "Unknown runtime '{}'.".format(runtime)) 

162 return name 

163 

164 def _name(self, nf, opset, dtype): 

165 last = 'cache-{}-nf{}-op{}-dt{}.pickle'.format( 

166 self.__class__.__name__, nf, opset, dtype) 

167 return last 

168 

169 def setup_cache(self): 

170 "asv API" 

171 for dtype in self.params[4]: 

172 for opv in self.params[3]: 

173 for nf in self.params[2]: 

174 (X_train, y_train), (X, y) = self._get_dataset(nf, dtype) 

175 model = self._create_model() 

176 if self.par_dofit: 

177 set_random_state(model) 

178 model.fit(X_train, y_train) 

179 stored = {'model': model, 'X': X, 'y': y} 

180 filename = self._name(nf, opv, dtype) 

181 with open(filename, "wb") as f: 

182 pickle.dump(stored, f) 

183 if not os.path.exists(filename): 

184 raise RuntimeError( # pragma: no cover 

185 "Unable to dump model %r into %r." % ( 

186 model, filename)) 

187 

188 def setup(self, runtime, N, nf, opset, dtype, optim): 

189 "asv API" 

190 logger = getLogger('skl2onnx') 

191 logger.disabled = True 

192 register_converters() 

193 register_rewritten_operators() 

194 with open(self._name(nf, opset, dtype), "rb") as f: 

195 stored = pickle.load(f) 

196 self.stored = stored 

197 self.model = stored['model'] 

198 self.X, self.y = make_n_rows(stored['X'], N, stored['y']) 

199 onx, rt_, rt_fct_, rt_fct_track_ = self._create_onnx_and_runtime( 

200 runtime, self.model, self.X, opset, dtype, optim) 

201 self.onx = onx 

202 setattr(self, "rt_" + runtime, rt_) 

203 setattr(self, "rt_fct_" + runtime, rt_fct_) 

204 setattr(self, "rt_fct_track_" + runtime, rt_fct_track_) 

205 set_config(assume_finite=True) 

206 

207 def time_predict(self, runtime, N, nf, opset, dtype, optim): 

208 "asv API" 

209 return getattr(self, "rt_fct_" + runtime)(self.X) 

210 

211 def peakmem_predict(self, runtime, N, nf, opset, dtype, optim): 

212 "asv API" 

213 return getattr(self, "rt_fct_" + runtime)(self.X) 

214 

215 def track_score(self, runtime, N, nf, opset, dtype, optim): 

216 "asv API" 

217 yp = getattr(self, "rt_fct_track_" + runtime)(self.X) 

218 return self._score_metric(self.X, self.y, yp) 

219 

220 def track_onnxsize(self, runtime, N, nf, opset, dtype, optim): 

221 "asv API" 

222 return len(self.onx.SerializeToString()) 

223 

224 def track_nbnodes(self, runtime, N, nf, opset, dtype, optim): 

225 "asv API" 

226 stats = onnx_statistics(self.onx) 

227 return stats.get('nnodes', 0) 

228 

229 def track_vmlprodict(self, runtime, N, nf, opset, dtype, optim): 

230 "asv API" 

231 from mlprodict import __version__ 

232 return version2number(__version__) 

233 

234 def track_vsklearn(self, runtime, N, nf, opset, dtype, optim): 

235 "asv API" 

236 from sklearn import __version__ 

237 return version2number(__version__) 

238 

239 def track_vort(self, runtime, N, nf, opset, dtype, optim): 

240 "asv API" 

241 from onnxruntime import __version__ as onnxrt_version 

242 return version2number(onnxrt_version) 

243 

244 def check_method_name(self, method_name): 

245 "Does some verifications. Fails if inconsistencies." 

246 if getattr(self, 'chk_method_name', None) not in (None, method_name): 

247 raise RuntimeError( # pragma: no cover 

248 "Method name must be '{}'.".format(method_name)) 

249 if getattr(self, 'chk_method_name', None) is None: 

250 raise RuntimeError( # pragma: no cover 

251 "Unable to check that the method name is correct " 

252 "(expected is '{}')".format( 

253 method_name)) 

254 

255 

256class _CommonAsvSklBenchmarkClassifier(_CommonAsvSklBenchmark): 

257 """ 

258 Common class for a classifier. 

259 """ 

260 chk_method_name = 'predict_proba' 

261 

262 def _score_metric(self, X, y_exp, y_pred): 

263 return accuracy_score(y_exp, y_pred) 

264 

265 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim): 

266 self.check_method_name('predict_proba') 

267 onx_ = self._to_onnx(model, X, opset, dtype, optim) 

268 onx = self._optimize_onnx(onx_) 

269 name = self.runtime_name(runtime) 

270 if name == 'skl': 

271 rt_ = None 

272 rt_fct_ = lambda X: model.predict_proba(X) 

273 rt_fct_track_ = lambda X: model.predict(X) 

274 else: 

275 rt_ = self._create_onnx_inference(onx, name) 

276 self._check_rt(rt_, 'run') 

277 rt_fct_ = lambda pX: rt_.run({'X': pX}) 

278 rt_fct_track_ = lambda pX: rt_fct_(pX)['output_label'] 

279 return onx, rt_, rt_fct_, rt_fct_track_ 

280 

281 

282class _CommonAsvSklBenchmarkClassifierRawScore(_CommonAsvSklBenchmark): 

283 """ 

284 Common class for a classifier. 

285 """ 

286 chk_method_name = 'decision_function' 

287 

288 def _score_metric(self, X, y_exp, y_pred): 

289 return accuracy_score(y_exp, y_pred) 

290 

291 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim): 

292 self.check_method_name('decision_function') 

293 onx_ = self._to_onnx(model, X, opset, dtype, optim) 

294 onx = self._optimize_onnx(onx_) 

295 name = self.runtime_name(runtime) 

296 if name == 'skl': 

297 rt_ = None 

298 rt_fct_ = lambda X: model.decision_function(X) 

299 rt_fct_track_ = lambda X: model.predict(X) 

300 else: 

301 rt_ = self._create_onnx_inference(onx, name) 

302 self._check_rt(rt_, 'run') 

303 rt_fct_ = lambda X: rt_.run({'X': X}) 

304 rt_fct_track_ = lambda X: rt_fct_(X)['output_label'] 

305 return onx, rt_, rt_fct_, rt_fct_track_ 

306 

307 

308class _CommonAsvSklBenchmarkClustering(_CommonAsvSklBenchmark): 

309 """ 

310 Common class for a clustering algorithm. 

311 """ 

312 chk_method_name = 'predict' 

313 

314 def _score_metric(self, X, y_exp, y_pred): 

315 if X.shape[0] == 1: 

316 return 0. # pragma: no cover 

317 elif set(y_pred) == 1: 

318 return 0. # pragma: no cover 

319 return silhouette_score(X, y_pred) 

320 

321 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim): 

322 self.check_method_name('predict') 

323 onx_ = self._to_onnx(model, X, opset, dtype, optim) 

324 onx = self._optimize_onnx(onx_) 

325 name = self.runtime_name(runtime) 

326 if name == 'skl': 

327 rt_ = None 

328 rt_fct_ = lambda X: model.predict(X.astype(numpy.float64)) 

329 rt_fct_track_ = lambda X: model.predict(X.astype(numpy.float64)) 

330 else: 

331 rt_ = self._create_onnx_inference(onx, name) 

332 self._check_rt(rt_, 'run') 

333 rt_fct_ = lambda X: rt_.run({'X': X}) 

334 rt_fct_track_ = lambda X: rt_fct_(X)['label'] 

335 return onx, rt_, rt_fct_, rt_fct_track_ 

336 

337 

338class _CommonAsvSklBenchmarkMultiClassifier(_CommonAsvSklBenchmark): 

339 """ 

340 Common class for a multi-classifier. 

341 """ 

342 chk_method_name = 'predict_proba' 

343 

344 def _get_dataset(self, nf, dtype): 

345 xdtype = self._get_xdtype(dtype) 

346 data = load_iris() 

347 X, y = data.data, data.target 

348 state = numpy.random.RandomState(seed=34) # pylint: disable=E1101 

349 rnd = state.randn(*X.shape) / 3 

350 X += rnd 

351 nbclass = len(set(y)) 

352 y_ = numpy.zeros((y.shape[0], nbclass), dtype=y.dtype) 

353 for i, vy in enumerate(y): 

354 y_[i, vy] = 1 

355 y = y_ 

356 X = _modify_dimension(X, nf) 

357 X_train, X_test, y_train, y_test = train_test_split( 

358 X, y, random_state=42) 

359 X = X_test.astype(xdtype) 

360 y = y_test.astype(self.par_ydtype) 

361 return (X_train, y_train), (X, y) 

362 

363 def _score_metric(self, X, y_exp, y_pred): 

364 return accuracy_score(y_exp.ravel(), y_pred.ravel()) 

365 

366 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim): 

367 self.check_method_name('predict_proba') 

368 onx_ = self._to_onnx(model, X, opset, dtype, optim) 

369 onx = self._optimize_onnx(onx_) 

370 name = self.runtime_name(runtime) 

371 if name == 'skl': 

372 rt_ = None 

373 rt_fct_ = lambda X: model.predict_proba(X) 

374 rt_fct_track_ = lambda X: model.predict(X) 

375 else: 

376 rt_ = self._create_onnx_inference(onx, name) 

377 self._check_rt(rt_, 'run') 

378 rt_fct_ = lambda X: rt_.run({'X': X}) 

379 rt_fct_track_ = lambda X: rt_fct_(X)['output_label'] 

380 return onx, rt_, rt_fct_, rt_fct_track_ 

381 

382 

383class _CommonAsvSklBenchmarkOutlier(_CommonAsvSklBenchmark): 

384 """ 

385 Common class for outlier detection. 

386 """ 

387 chk_method_name = 'predict' 

388 

389 def _score_metric(self, X, y_exp, y_pred): 

390 return numpy.sum(y_pred) / y_pred.shape[0] 

391 

392 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim): 

393 self.check_method_name('predict') 

394 onx_ = self._to_onnx(model, X, opset, dtype, optim) 

395 onx = self._optimize_onnx(onx_) 

396 name = self.runtime_name(runtime) 

397 if name == 'skl': 

398 rt_ = None 

399 rt_fct_ = lambda X: model.predict(X) 

400 rt_fct_track_ = lambda X: model.predict(X) 

401 else: 

402 rt_ = self._create_onnx_inference(onx, name) 

403 self._check_rt(rt_, 'run') 

404 rt_fct_ = lambda X: rt_.run({'X': X}) 

405 rt_fct_track_ = lambda X: rt_fct_(X)['scores'] 

406 return onx, rt_, rt_fct_, rt_fct_track_ 

407 

408 

409class _CommonAsvSklBenchmarkRegressor(_CommonAsvSklBenchmark): 

410 """ 

411 Common class for a regressor. 

412 """ 

413 chk_method_name = 'predict' 

414 

415 def _score_metric(self, X, y_exp, y_pred): 

416 return mean_absolute_error(y_exp, y_pred) 

417 

418 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim): 

419 self.check_method_name('predict') 

420 onx = self._to_onnx(model, X, opset, dtype, optim) 

421 name = self.runtime_name(runtime) 

422 if name == 'skl': 

423 rt_ = None 

424 rt_fct_ = lambda X: model.predict(X) 

425 rt_fct_track_ = lambda X: model.predict(X) 

426 else: 

427 rt_ = self._create_onnx_inference(onx, name) 

428 self._check_rt(rt_, 'run') 

429 rt_fct_ = lambda X: rt_.run({'X': X}) 

430 rt_fct_track_ = lambda X: rt_fct_(X)['variable'] 

431 return onx, rt_, rt_fct_, rt_fct_track_ 

432 

433 

434class _CommonAsvSklBenchmarkTrainableTransform(_CommonAsvSklBenchmark): 

435 """ 

436 Common class for a trainable transformer. 

437 """ 

438 chk_method_name = 'transform' 

439 

440 def _score_metric(self, X, y_exp, y_pred): 

441 return numpy.sum(y_pred) / y_pred.shape[0] 

442 

443 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim): 

444 self.check_method_name('transform') 

445 onx_ = self._to_onnx(model, X, opset, dtype, optim) 

446 onx = self._optimize_onnx(onx_) 

447 name = self.runtime_name(runtime) 

448 if name == 'skl': 

449 rt_ = None 

450 rt_fct_ = lambda X: model.transform(X) 

451 rt_fct_track_ = lambda X: model.transform(X) 

452 else: 

453 rt_ = self._create_onnx_inference(onx, name) 

454 self._check_rt(rt_, 'run') 

455 rt_fct_ = lambda X: rt_.run({'X': X}) 

456 rt_fct_track_ = lambda X: rt_fct_(X)['variable'] 

457 return onx, rt_, rt_fct_, rt_fct_track_ 

458 

459 

460class _CommonAsvSklBenchmarkTransform(_CommonAsvSklBenchmark): 

461 """ 

462 Common class for a transformer. 

463 """ 

464 chk_method_name = 'transform' 

465 

466 def _score_metric(self, X, y_exp, y_pred): 

467 return numpy.sum(y_pred) / y_pred.shape[0] 

468 

469 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim): 

470 self.check_method_name('transform') 

471 onx_ = self._to_onnx(model, X, opset, dtype, optim) 

472 onx = self._optimize_onnx(onx_) 

473 name = self.runtime_name(runtime) 

474 if name == 'skl': 

475 rt_ = None 

476 rt_fct_ = lambda X: model.transform(X) 

477 rt_fct_track_ = lambda X: model.transform(X) 

478 else: 

479 rt_ = self._create_onnx_inference(onx, name) 

480 self._check_rt(rt_, 'run') 

481 rt_fct_ = lambda X: rt_.run({'X': X}) 

482 rt_fct_track_ = lambda X: rt_fct_(X)['variable'] 

483 return onx, rt_, rt_fct_, rt_fct_track_ 

484 

485 

486class _CommonAsvSklBenchmarkTransformPositive(_CommonAsvSklBenchmarkTransform): 

487 """ 

488 Common class for a transformer for positive features. 

489 """ 

490 chk_method_name = 'transform' 

491 

492 def _get_dataset(self, nf, dtype): 

493 xdtype = self._get_xdtype(dtype) 

494 data = load_iris() 

495 X, y = data.data, data.target 

496 state = numpy.random.RandomState(seed=34) # pylint: disable=E1101 

497 rnd = state.randn(*X.shape) / 3 

498 X += rnd 

499 X = _modify_dimension(X, nf) 

500 X = numpy.abs(X) 

501 X_train, X_test, y_train, y_test = train_test_split( 

502 X, y, random_state=42) 

503 X = X_test.astype(xdtype) 

504 y = y_test.astype(self.par_ydtype) 

505 return (X_train, y_train), (X, y)