Coverage for mlprodict/testing/model_verification.py: 100%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Complex but recurring testing functions.
4"""
5import random
6import pandas
7import numpy
8from numpy.testing import assert_allclose
9from ..grammar.cc import compile_c_function
10from ..grammar.cc.c_compilation import CompilationError
13def iris_data():
14 """
15 Returns ``(X, y)`` for iris data.
16 """
17 from sklearn.datasets import load_iris
18 iris = load_iris()
19 X = iris.data[:, :2]
20 state = numpy.random.RandomState(seed=34) # pylint: disable=E1101
21 rnd = state.randn(*X.shape) / 3
22 X += rnd
23 y = iris.target
24 return X, y
27def check_is_almost_equal(xv, exp, precision=1e-5, message=None):
28 """
29 Checks that two floats or two arrays are almost equal.
31 @param xv float or vector
32 @param exp expected value
33 @param precision precision
34 @param message additional message
35 """
36 if isinstance(exp, float) or len(exp.ravel()) == 1:
37 if not (isinstance(xv, float) or len(xv.ravel()) == 1):
38 raise TypeError( # pragma: no cover
39 "Type mismatch between {0} and {1} (expected).".format(
40 type(xv), type(exp)))
41 diff = abs(xv - exp)
42 if diff > 1e-5:
43 raise ValueError( # pragma: no cover
44 "Predictions are different expected={0}, computed={1}".format(
45 exp, xv))
46 else:
47 if not isinstance(xv, numpy.ndarray):
48 raise TypeError(
49 "Type mismatch between {0} and {1} (expected).".format(type(xv), type(exp)))
50 xv = xv.ravel()
51 exp = exp.ravel()
52 try:
53 assert_allclose(xv, exp, atol=precision)
54 except AssertionError as e:
55 if message is None:
56 raise e
57 else:
58 raise AssertionError(message) from e # pragma: no cover
61def check_model_representation(model, X, y=None, convs=None,
62 output_names=None, only_float=True,
63 verbose=False, suffix="", fLOG=None):
64 """
65 Checks that a trained model can be exported in a specific list
66 of formats and produces the same outputs if the
67 representation can be used to predict.
69 @param model model (a class or an instance of a model but not trained)
70 @param X features
71 @param y targets
72 @param convs list of format to check, all possible by default ``['json', 'c']``
73 @param output_names list of output columns
74 (can be None, a default value is infered based on scikit-learn output then)
75 @param verbose print some information
76 @param suffix add this to disambiguate module
77 @param fLOG logging function
78 @return function to call to run the prediction
79 """
80 if not only_float:
81 raise NotImplementedError( # pragma: no cover
82 "Only float are allowed.")
83 if isinstance(X, list):
84 X = pandas.DataFrame(X)
85 if len(X.shape) != 2:
86 raise ValueError( # pragma: no cover
87 "X cannot be converted into a proper DataFrame. It has shape {0}."
88 "".format(X.shape))
89 if only_float:
90 X = X.values
91 if isinstance(y, list):
92 y = numpy.array(y)
93 if convs is None:
94 convs = ['json', 'c']
96 # sklearn
97 if not hasattr(model.__class__, "fit"):
98 # It is a class object and not an instance.
99 # We use the default values.
100 model = model()
102 model.fit(X, y)
103 h = random.randint(0, X.shape[0] - 1)
104 if isinstance(X, pandas.DataFrame):
105 oneX = X.iloc[h, :].astype(numpy.float32)
106 else:
107 oneX = X[h, :].ravel().astype(numpy.float32)
109 # model or transform
110 moneX = numpy.resize(oneX, (1, len(oneX)))
111 if hasattr(model, "predict"):
112 ske = model.predict(moneX)
113 else:
114 ske = model.transform(moneX)
116 if verbose and fLOG:
117 fLOG("---------------------")
118 fLOG(type(oneX), oneX.dtype)
119 fLOG(model)
120 for k, v in sorted(model.__dict__.items()):
121 if k[-1] == '_':
122 fLOG(" {0}={1}".format(k, v))
123 fLOG("---------------------")
125 # grammar
126 from ..grammar.grammar_sklearn import sklearn2graph
127 gr = sklearn2graph(model, output_names=output_names)
128 lot = gr.execute(Features=oneX)
129 if verbose and fLOG:
130 fLOG(gr.graph_execution())
132 # verification
133 check_is_almost_equal(lot, ske)
135 # default for output_names
136 if output_names is None:
137 if len(ske.shape) == 1:
138 output_names = ["Prediction"]
139 elif len(ske.shape) == 2:
140 output_names = ["p%d" % i for i in range(ske.shape[1])]
141 else:
142 raise ValueError( # pragma: no cover
143 "Cannot guess default values for output_names.")
145 for lang in convs:
146 if lang in ('c', ):
147 code_c = gr.export(lang=lang)['code']
148 if code_c is None:
149 raise ValueError("cannot be None") # pragma: no cover
151 compile_fct = compile_c_function
153 from contextlib import redirect_stdout, redirect_stderr
154 from io import StringIO
155 fout = StringIO()
156 ferr = StringIO()
157 with redirect_stdout(fout):
158 with redirect_stderr(ferr):
159 try:
160 fct = compile_fct(
161 code_c, len(output_names), suffix=suffix,
162 fLOG=lambda s: fout.write(s + "\n"))
163 except Exception as e: # pragma: no cover
164 raise CompilationError(
165 "Unable to compile a code\n-OUT-\n{0}\n-ERR-\n{1}"
166 "\n-CODE-\n{2}\n-----------\n{3}".format(
167 fout.getvalue(), ferr.getvalue(),
168 code_c, e)) from e
170 if verbose and fLOG:
171 fLOG("-----------------")
172 fLOG(output_names)
173 fLOG("-----------------")
174 fLOG(code_c)
175 fLOG("-----------------")
176 fLOG("h=", h, "oneX=", oneX)
177 fLOG("-----------------")
178 lotc = fct(oneX)
179 check_is_almost_equal(
180 lotc, ske, message="Issue with lang='{0}'".format(lang))
181 lotc_exp = lotc.copy()
182 lotc2 = fct(oneX, lotc)
183 if not numpy.array_equal(lotc_exp, lotc2):
184 raise ValueError( # pragma: no cover
185 "Second call returns different results.\n{0}\n{1}".format(
186 lotc_exp, lotc2))
187 else:
188 ser = gr.export(lang="json", hook={'array': lambda v: v.tolist()})
189 if ser is None:
190 raise ValueError( # pragma: no cover
191 "No output for long='{0}'".format(lang))