Coverage for mlprodict/onnxrt/validate/validate_scenarios.py: 100%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Scenarios for validation.
4"""
5from sklearn import __all__ as sklearn__all__, __version__ as sklearn_version
6from sklearn.cluster import KMeans
7from sklearn.calibration import CalibratedClassifierCV
8from sklearn.decomposition import SparseCoder, LatentDirichletAllocation
9from sklearn.ensemble import (
10 VotingClassifier, AdaBoostRegressor, VotingRegressor,
11 ExtraTreesRegressor, ExtraTreesClassifier,
12 RandomForestRegressor, RandomForestClassifier,
13 HistGradientBoostingRegressor, HistGradientBoostingClassifier,
14 AdaBoostClassifier, GradientBoostingClassifier, GradientBoostingRegressor,
15 IsolationForest)
16from sklearn.feature_extraction import DictVectorizer, FeatureHasher
17from sklearn.feature_selection import (
18 SelectFromModel, SelectPercentile, RFE, RFECV,
19 SelectKBest, SelectFwe)
20from sklearn.gaussian_process import GaussianProcessRegressor, GaussianProcessClassifier
21from sklearn.gaussian_process.kernels import ExpSineSquared, DotProduct, RationalQuadratic, RBF
22from sklearn.linear_model import (
23 LogisticRegression, LogisticRegressionCV, SGDClassifier,
24 LinearRegression, Perceptron, RidgeClassifier, RidgeClassifierCV,
25 PassiveAggressiveClassifier)
26from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
27from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier, OutputCodeClassifier
28from sklearn.multioutput import MultiOutputRegressor, MultiOutputClassifier, ClassifierChain, RegressorChain
29from sklearn.neighbors import (
30 LocalOutlierFactor, KNeighborsRegressor, KNeighborsClassifier,
31 RadiusNeighborsRegressor, RadiusNeighborsClassifier)
32from sklearn.neural_network import MLPClassifier
33from sklearn.preprocessing import Normalizer, PowerTransformer
34from sklearn.random_projection import GaussianRandomProjection, SparseRandomProjection
35from sklearn.svm import SVC, NuSVC, SVR
36from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier, ExtraTreeClassifier
38try:
39 from sklearn.ensemble import StackingClassifier, StackingRegressor
40except ImportError: # pragma: no cover
41 # new in 0.22
42 StackingClassifier, StackingRegressor = None, None
45def build_custom_scenarios():
46 """
47 Defines parameters values for some operators.
49 .. runpython::
50 :showcode:
51 :warningout: DeprecationWarning
53 from mlprodict.onnxrt.validate.validate_scenarios import build_custom_scenarios
54 import pprint
55 pprint.pprint(build_custom_scenarios())
56 """
57 options = {
58 # skips
59 SparseCoder: None,
60 # scenarios
61 AdaBoostClassifier: [
62 ('default', {'n_estimators': 10},
63 {'conv_options': [{AdaBoostClassifier: {'zipmap': False}}]}),
64 ],
65 AdaBoostRegressor: [
66 ('default', {'n_estimators': 10}),
67 ],
68 CalibratedClassifierCV: [
69 ('sgd', {
70 'base_estimator': SGDClassifier(),
71 }),
72 ('default', {}),
73 ],
74 ClassifierChain: [
75 ('logreg', {
76 'base_estimator': LogisticRegression(solver='liblinear'),
77 })
78 ],
79 DecisionTreeClassifier: [
80 ('default', {}, {'conv_options': [
81 {DecisionTreeClassifier: {'zipmap': False}}]})
82 ],
83 DictVectorizer: [
84 ('default', {}),
85 ],
86 ExtraTreeClassifier: [
87 ('default', {},
88 {'conv_options': [{ExtraTreeClassifier: {'zipmap': False}}]}),
89 ],
90 ExtraTreesClassifier: [
91 ('default', {'n_estimators': 10},
92 {'conv_options': [{ExtraTreesClassifier: {'zipmap': False}}]}),
93 ],
94 ExtraTreesRegressor: [
95 ('default', {'n_estimators': 10}),
96 ],
97 FeatureHasher: [
98 ('default', {}),
99 ],
100 GaussianProcessClassifier: [
101 ('expsine', {
102 'kernel': ExpSineSquared(),
103 }, {'conv_options': [{}, {GaussianProcessClassifier: {'optim': 'cdist'}}]}),
104 ('dotproduct', {
105 'kernel': DotProduct(),
106 }, {'conv_options': [{GaussianProcessClassifier: {'optim': 'cdist'}}]}),
107 ('rational', {
108 'kernel': RationalQuadratic(),
109 }, {'conv_options': [{GaussianProcessClassifier: {'optim': 'cdist'}}]}),
110 ('rbf', {
111 'kernel': RBF(),
112 }, {'conv_options': [{GaussianProcessClassifier: {'optim': 'cdist'}}]}),
113 ],
114 GaussianProcessRegressor: [
115 ('expsine', {
116 'kernel': ExpSineSquared(),
117 'alpha': 20.,
118 }, {'conv_options': [{GaussianProcessRegressor: {'optim': 'cdist'}}]}),
119 ('dotproduct', {
120 'kernel': DotProduct(),
121 'alpha': 100.,
122 }, {'conv_options': [{}, {GaussianProcessRegressor: {'optim': 'cdist'}}]}),
123 ('rational', {
124 'kernel': RationalQuadratic(),
125 'alpha': 100.,
126 }, {'conv_options': [{GaussianProcessRegressor: {'optim': 'cdist'}}]}),
127 ('rbf', {
128 'kernel': RBF(),
129 'alpha': 100.,
130 }, {'conv_options': [{GaussianProcessRegressor: {'optim': 'cdist'}}]}),
131 ],
132 GaussianRandomProjection: [
133 ('eps95', {'eps': 0.95}),
134 ],
135 GradientBoostingClassifier: [
136 ('default', {'n_estimators': 200},
137 {'conv_options': [{GradientBoostingClassifier: {'zipmap': False}}]}),
138 ],
139 GradientBoostingRegressor: [
140 ('default', {'n_estimators': 200}),
141 ],
142 GridSearchCV: [
143 ('cl', {
144 'estimator': LogisticRegression(solver='liblinear'),
145 'n_jobs': 1,
146 'param_grid': {'fit_intercept': [False, True]}},
147 {'conv_options': [{GridSearchCV: {'zipmap': False}}],
148 'subset_problems': ['b-cl', 'm-cl', '~b-cl-64']}),
149 ('reg', {
150 'estimator': LinearRegression(), 'n_jobs': 1,
151 'param_grid': {'fit_intercept': [False, True]},
152 }, ['b-reg', 'm-reg', '~b-reg-64']),
153 ('reg', {
154 'estimator': KMeans(), 'n_jobs': 1,
155 'param_grid': {'n_clusters': [2, 3]},
156 }, ['cluster']),
157 ],
158 HistGradientBoostingClassifier: [
159 ('default', {'max_iter': 100},
160 {'conv_options': [{HistGradientBoostingClassifier: {'zipmap': False}}]}),
161 ],
162 HistGradientBoostingRegressor: [
163 ('default', {'max_iter': 100}),
164 ],
165 IsolationForest: [
166 ('default', {'n_estimators': 10}),
167 ],
168 KNeighborsClassifier: [
169 ('default_k3', {'algorithm': 'brute', 'n_neighbors': 3},
170 {'conv_options': [{KNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}),
171 ('weights_k3', {'algorithm': 'brute',
172 'weights': 'distance', 'n_neighbors': 3},
173 {'conv_options': [{KNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}),
174 ],
175 KNeighborsRegressor: [
176 ('default_k3', {'algorithm': 'brute', 'n_neighbors': 3},
177 {'conv_options': [{KNeighborsRegressor: {'optim': 'cdist'}}]}),
178 ('weights_k3', {'algorithm': 'brute',
179 'weights': 'distance', 'n_neighbors': 3},
180 {'conv_options': [{KNeighborsRegressor: {'optim': 'cdist'}}]}),
181 ],
182 LatentDirichletAllocation: [
183 ('default', {'n_components': 2}),
184 ],
185 LocalOutlierFactor: [
186 ('novelty', {'novelty': True}),
187 ],
188 LogisticRegression: [
189 ('liblinear', {'solver': 'liblinear', },
190 {'optim': [None, 'onnx'],
191 'conv_options': [{}, {LogisticRegression: {'zipmap': False}}],
192 'subset_problems': ['b-cl', '~b-cl-64', 'm-cl']}),
193 ('liblinear-dec',
194 {'solver': 'liblinear', },
195 {'conv_options': [{LogisticRegression: {'raw_scores': True, 'zipmap': False}}],
196 'subset_problems': ['~b-cl-dec', '~m-cl-dec']}),
197 ],
198 LogisticRegressionCV: [
199 ('default', {},
200 {'conv_options': [{LogisticRegressionCV: {'zipmap': False}}]}),
201 ],
202 MLPClassifier: [
203 ('default', {}, {'conv_options': [
204 {MLPClassifier: {'zipmap': False}}]}),
205 ],
206 MultiOutputClassifier: [
207 ('logreg', {
208 'estimator': LogisticRegression(solver='liblinear')},
209 {'conv_options': [{MultiOutputClassifier: {'zipmap': False}}]},)
210 ],
211 MultiOutputRegressor: [
212 ('linreg', {
213 'estimator': LinearRegression(),
214 })
215 ],
216 Normalizer: [
217 ('l2', {'norm': 'l2', }),
218 ('l1', {'norm': 'l1', }),
219 ('max', {'norm': 'max', }),
220 ],
221 NuSVC: [
222 ('prob', {
223 'probability': True,
224 }),
225 ],
226 OneVsOneClassifier: [
227 ('logreg', {'estimator': LogisticRegression(solver='liblinear')},
228 {'conv_options': [{OneVsOneClassifier: {'zipmap': False}}]})
229 ],
230 OneVsRestClassifier: [
231 ('logreg', {'estimator': LogisticRegression(solver='liblinear')},
232 {'conv_options': [{OneVsOneClassifier: {'zipmap': False}}]})
233 ],
234 OutputCodeClassifier: [
235 ('logreg', {'estimator': LogisticRegression(solver='liblinear')},
236 {'conv_options': [{OneVsOneClassifier: {'zipmap': False}}]})
237 ],
238 PassiveAggressiveClassifier: [
239 ('logreg', {}, {'conv_options': [
240 {PassiveAggressiveClassifier: {'zipmap': False}}]})
241 ],
242 Perceptron: [
243 ('logreg', {}, {'conv_options': [{Perceptron: {'zipmap': False}}]})
244 ],
245 PowerTransformer: [
246 ('yeo-johnson', {'method': 'yeo-johnson'}),
247 ('box-cox', {'method': 'box-cox'}),
248 ],
249 RadiusNeighborsClassifier: [
250 ('default_k3', {'algorithm': 'brute'},
251 {'conv_options': [{RadiusNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}),
252 ('weights_k3', {'algorithm': 'brute', 'weights': 'distance'},
253 {'conv_options': [{RadiusNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}),
254 ],
255 RadiusNeighborsRegressor: [
256 ('default_k3', {'algorithm': 'brute'},
257 {'conv_options': [{}, {RadiusNeighborsRegressor: {'optim': 'cdist'}}]}),
258 ('weights_k3', {'algorithm': 'brute', 'weights': 'distance'},
259 {'conv_options': [{RadiusNeighborsRegressor: {'optim': 'cdist'}}]}),
260 ],
261 RandomForestClassifier: [
262 ('default', {'n_estimators': 10},
263 {'conv_options': [{RandomForestClassifier: {'zipmap': False}}]}),
264 ],
265 RandomForestRegressor: [
266 ('default', {'n_estimators': 10}),
267 ],
268 RandomizedSearchCV: [
269 ('cl', {
270 'estimator': LogisticRegression(solver='liblinear'),
271 'param_distributions': {'fit_intercept': [False, True]},
272 }),
273 ('reg', {
274 'estimator': LinearRegression(),
275 'param_distributions': {'fit_intercept': [False, True]},
276 }),
277 ],
278 RegressorChain: [
279 ('linreg', {
280 'base_estimator': LinearRegression(),
281 })
282 ],
283 RidgeClassifier: [
284 ('default', {},
285 {'conv_options': [{RidgeClassifier: {'zipmap': False}}]}),
286 ],
287 RidgeClassifierCV: [
288 ('default', {},
289 {'conv_options': [{RidgeClassifierCV: {'zipmap': False}}]}),
290 ],
291 RFE: [
292 ('reg', {
293 'estimator': LinearRegression(),
294 })
295 ],
296 RFECV: [
297 ('reg', {
298 'estimator': LinearRegression(),
299 })
300 ],
301 SelectFromModel: [
302 ('rf', {
303 'estimator': DecisionTreeRegressor(),
304 }),
305 ],
306 SelectFwe: [
307 ('alpha100', {
308 'alpha': 100.0,
309 }),
310 ],
311 SelectKBest: [
312 ('k2', {
313 'k': 2,
314 }),
315 ],
316 SelectPercentile: [
317 ('p50', {
318 'percentile': 50,
319 }),
320 ],
321 SGDClassifier: [
322 ('log', {'loss': 'log'},
323 {'conv_options': [{SGDClassifier: {'zipmap': False}}]}),
324 ],
325 SparseRandomProjection: [
326 ('eps95', {'eps': 0.95}),
327 ],
328 SVC: [
329 ('linear', {'probability': True, 'kernel': 'linear'},
330 {'conv_options': [{SVC: {'zipmap': False}}]}),
331 ('poly', {'probability': True, 'kernel': 'poly'},
332 {'conv_options': [{SVC: {'zipmap': False}}]}),
333 ('rbf', {'probability': True, 'kernel': 'rbf'},
334 {'conv_options': [{SVC: {'zipmap': False}}]}),
335 ('sigmoid', {'probability': True, 'kernel': 'sigmoid'},
336 {'conv_options': [{SVC: {'zipmap': False}}]}),
337 ],
338 SVR: [
339 ('linear', {'kernel': 'linear'}),
340 ('poly', {'kernel': 'poly'}),
341 ('rbf', {'kernel': 'rbf'}),
342 ('sigmoid', {'kernel': 'sigmoid'}),
343 ],
344 VotingClassifier: [
345 ('logreg-noflatten', {
346 'voting': 'soft',
347 'flatten_transform': False,
348 'estimators': [
349 ('lr1', LogisticRegression(
350 solver='liblinear', fit_intercept=True)),
351 ('lr2', LogisticRegression(
352 solver='liblinear', fit_intercept=False)),
353 ],
354 }, {'conv_options': [{VotingClassifier: {'zipmap': False}}]})
355 ],
356 VotingRegressor: [
357 ('linreg', {
358 'estimators': [
359 ('lr1', LinearRegression()),
360 ('lr2', LinearRegression(fit_intercept=False)),
361 ],
362 })
363 ],
364 }
365 if StackingClassifier is not None and StackingRegressor is not None:
366 options.update({
367 StackingClassifier: [
368 ('logreg', {
369 'estimators': [
370 ('lr1', LogisticRegression(solver='liblinear')),
371 ('lr2', LogisticRegression(
372 solver='liblinear', fit_intercept=False)),
373 ],
374 }, {'conv_options': [{StackingClassifier: {'zipmap': False}}]})
375 ],
376 StackingRegressor: [
377 ('linreg', {
378 'estimators': [
379 ('lr1', LinearRegression()),
380 ('lr2', LinearRegression(fit_intercept=False)),
381 ],
382 })
383 ],
384 })
385 return options
388def interpret_options_from_string(st):
389 """
390 Converts a string into a dictionary.
392 @param st string
393 @return evaluated object
394 """
395 if isinstance(st, dict):
396 return st # pragma: no cover
397 value = eval(st) # pylint: disable=W0123
398 return value
401_extra_parameters = build_custom_scenarios()