Coverage for mlprodict/cli/convert_validate.py: 100%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Command line about validation of prediction runtime.
4"""
5import os
6import pickle
7from logging import getLogger
8import warnings
9from pandas import read_csv
10from ..onnx_conv import to_onnx
11from ..onnxrt import OnnxInference
12from ..onnx_tools.optim import onnx_optimisations
13from ..onnxrt.validate.validate_difference import measure_relative_difference
14from ..onnx_conv import guess_schema_from_data, guess_schema_from_model
17def convert_validate(pkl, data=None, schema=None,
18 method="predict", name='Y',
19 target_opset=None,
20 outonnx="model.onnx",
21 runtime='python', metric="l1med",
22 use_double=None, noshape=False,
23 optim='onnx', rewrite_ops=True,
24 options=None, fLOG=print, verbose=1,
25 register=True):
26 """
27 Converts a model stored in *pkl* file and measure the differences
28 between the model and the ONNX predictions.
30 :param pkl: pickle file
31 :param data: data file, loaded with pandas,
32 converted to a single array, the data is used to guess
33 the schema if *schema* not specified
34 :param schema: initial type of the model
35 :param method: method to call
36 :param name: output name
37 :param target_opset: target opset
38 :param outonnx: produced ONNX model
39 :param runtime: runtime to use to compute predictions,
40 'python', 'python_compiled',
41 'onnxruntime1' or 'onnxruntime2'
42 :param metric: the metric 'l1med' is given by function
43 :func:`measure_relative_difference
44 <mlprodict.onnxrt.validate.validate_difference.measure_relative_difference>`
45 :param noshape: run the conversion with no shape information
46 :param use_double: use double for the runtime if possible,
47 two possible options, ``"float64"`` or ``'switch'``,
48 the first option produces an ONNX file with doubles,
49 the second option loads an ONNX file (float or double)
50 and replaces matrices in ONNX with the matrices coming from
51 the model, this second way is just for testing purposes
52 :param optim: applies optimisations on the first ONNX graph,
53 use 'onnx' to reduce the number of node Identity and
54 redundant subgraphs
55 :param rewrite_ops: rewrites some converters from :epkg:`sklearn-onnx`
56 :param options: additional options for conversion,
57 dictionary as a string
58 :param verbose: verbose level
59 :param register: registers additional converters implemented by this package
60 :param fLOG: logging function
61 :return: a dictionary with the results
63 .. cmdref::
64 :title: Converts and compares an ONNX file
65 :cmd: -m mlprodict convert_validate --help
66 :lid: l-cmd-convert_validate
68 The command converts and validates a :epkg:`scikit-learn` model.
69 An example to check the prediction of a logistic regression.
71 ::
73 import os
74 import pickle
75 import pandas
76 from sklearn.datasets import load_iris
77 from sklearn.model_selection import train_test_split
78 from sklearn.linear_model import LogisticRegression
79 from mlprodict.__main__ import main
80 from mlprodict.cli import convert_validate
82 iris = load_iris()
83 X, y = iris.data, iris.target
84 X_train, X_test, y_train, _ = train_test_split(X, y, random_state=11)
85 clr = LogisticRegression()
86 clr.fit(X_train, y_train)
88 pandas.DataFrame(X_test).to_csv("data.csv", index=False)
89 with open("model.pkl", "wb") as f:
90 pickle.dump(clr, f)
92 And the command line to check the predictions
93 using a command line.
95 ::
97 convert_validate --pkl model.pkl --data data.csv
98 --method predict,predict_proba
99 --name output_label,output_probability
100 --verbose 1
101 """
102 from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType # delayed
103 if fLOG is None:
104 verbose = 0 # pragma: no cover
105 if use_double not in (None, 'float64', 'switch'):
106 raise ValueError( # pragma: no cover
107 "use_double must be either None, 'float64' or 'switch'")
108 if optim == '':
109 optim = None # pragma: no cover
110 if target_opset == '':
111 target_opset = None # pragma: no cover
112 if verbose == 0:
113 logger = getLogger('skl2onnx')
114 logger.disabled = True
115 if not os.path.exists(pkl):
116 raise FileNotFoundError( # pragma: no cover
117 "Unable to find model '{}'.".format(pkl))
118 if os.path.exists(outonnx):
119 warnings.warn("File '{}' will be overwritten.".format(outonnx))
120 if verbose > 0:
121 fLOG("[convert_validate] load model '{}'".format(pkl))
122 with open(pkl, "rb") as f:
123 model = pickle.load(f)
125 if use_double == 'float64':
126 tensor_type = DoubleTensorType
127 else:
128 tensor_type = FloatTensorType
129 if options in (None, ''):
130 options = None
131 else:
132 from ..onnxrt.validate.validate_scenarios import (
133 interpret_options_from_string)
134 options = interpret_options_from_string(options)
135 if verbose > 0:
136 fLOG("[convert_validate] options={}".format(repr(options)))
138 if register:
139 from ..onnx_conv import (
140 register_converters, register_rewritten_operators)
141 register_converters()
142 register_rewritten_operators()
144 # data and schema
145 if data is None or not os.path.exists(data):
146 if schema is None:
147 schema = guess_schema_from_model(model, tensor_type)
148 if verbose > 0:
149 fLOG("[convert_validate] model schema={}".format(schema))
150 df = None
151 else:
152 if verbose > 0:
153 fLOG("[convert_validate] load data '{}'".format(data))
154 df = read_csv(data)
155 if verbose > 0:
156 fLOG("[convert_validate] convert data into matrix")
157 if schema is None:
158 schema = guess_schema_from_data(df, tensor_type)
159 if schema is None:
160 schema = [ # pragma: no cover
161 ('X', tensor_type([None, df.shape[1]]))]
162 if len(schema) == 1:
163 df = df.values # pylint: disable=E1101
164 if verbose > 0:
165 fLOG("[convert_validate] data schema={}".format(schema))
167 if noshape:
168 if verbose > 0:
169 fLOG( # pragma: no cover
170 "[convert_validate] convert the model with no shape information")
171 schema = [(name, col.__class__([None, None])) for name, col in schema]
172 onx = to_onnx(
173 model, initial_types=schema, rewrite_ops=rewrite_ops,
174 target_opset=target_opset, options=options)
175 else:
176 if verbose > 0:
177 fLOG("[convert_validate] convert the model with shapes")
178 onx = to_onnx(
179 model, initial_types=schema, target_opset=target_opset,
180 rewrite_ops=rewrite_ops, options=options)
182 if optim is not None:
183 if verbose > 0:
184 fLOG("[convert_validate] run optimisations '{}'".format(optim))
185 onx = onnx_optimisations(onx, optim=optim)
186 if verbose > 0:
187 fLOG("[convert_validate] saves to '{}'".format(outonnx))
188 memory = onx.SerializeToString()
189 with open(outonnx, 'wb') as f:
190 f.write(memory)
192 if verbose > 0:
193 fLOG("[convert_validate] creates OnnxInference session")
194 sess = OnnxInference(
195 onx, runtime=runtime, runtime_options=dict(
196 log_severity_level=3))
197 if use_double == "switch":
198 if verbose > 0:
199 fLOG("[convert_validate] switch to double")
200 sess.switch_initializers_dtype(model)
202 if verbose > 0:
203 fLOG("[convert_validate] compute prediction from model")
205 if ',' in method:
206 methods = method.split(',')
207 else:
208 methods = [method]
209 if ',' in name:
210 names = name.split(',')
211 else:
212 names = [name]
214 if len(names) != len(methods):
215 raise ValueError(
216 "Number of methods and outputs do not match: {}, {}".format(
217 names, methods))
219 if metric != 'l1med':
220 raise ValueError( # pragma: no cover
221 "Unknown metric '{}'".format(metric))
223 if df is None:
224 # no test on data
225 return dict(onnx=memory)
227 if verbose > 0:
228 fLOG("[convert_validate] compute predictions from ONNX with name '{}'"
229 "".format(name))
231 ort_preds = sess.run(
232 {'X': df}, verbose=max(verbose - 1, 0), fLOG=fLOG)
234 metrics = []
235 out_skl_preds = []
236 out_ort_preds = []
237 for method_, name_ in zip(methods, names):
238 if verbose > 0:
239 fLOG("[convert_validate] compute predictions with method '{}'".format(
240 method_))
241 meth = getattr(model, method_)
242 skl_pred = meth(df)
243 out_skl_preds.append(df)
245 if name_ not in ort_preds:
246 raise KeyError(
247 "Unable to find output name '{}' in {}".format(
248 name_, list(sorted(ort_preds))))
250 ort_pred = ort_preds[name_]
251 out_ort_preds.append(ort_pred)
252 diff = measure_relative_difference(skl_pred, ort_pred)
253 if verbose > 0:
254 fLOG("[convert_validate] {}={}".format(metric, diff))
255 metrics.append(diff)
257 return dict(skl_pred=out_skl_preds, ort_pred=out_ort_preds,
258 metrics=metrics, onnx=memory)