Coverage for mlprodict/onnxrt/validate/validate_difference.py: 95%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Validates runtime for many :scikit-learn: operators.
4The submodule relies on :epkg:`onnxconverter_common`,
5:epkg:`sklearn-onnx`.
6"""
7import numpy
8import pandas
11def measure_relative_difference(skl_pred, ort_pred, batch=True, abs_diff=False):
12 """
13 Measures the relative difference between predictions
14 between two ways of computing them.
15 The functions returns nan if shapes are different.
17 @param skl_pred prediction from :epkg:`scikit-learn`
18 or any other way
19 @param ort_pred prediction from an :epkg:`ONNX` runtime
20 or any other way
21 @param batch predictions are processed in a batch,
22 *skl_pred* and *ort_pred* should be arrays
23 or tuple or list of arrays
24 @param abs_diff return the absolute difference
25 @return relative max difference
26 or nan if it does not make any sense
28 Because approximations get bigger when the vector is high,
29 the function computes an adjusted relative differences.
30 Let's assume *X* and *Y* are two vectors, let's denote
31 :math:`med(X)` the median of *X*. The function returns the
32 following metric: :math:`\\max_i(|X_i - Y_i| / \\max(X_i, med(|X|))`.
34 The function takes the fourth highest difference, not the three first
35 which may happen after a conversion into float32.
36 """
37 if hasattr(ort_pred, "is_zip_map") and ort_pred.is_zip_map:
38 ort_pred = ort_pred.values
39 if (isinstance(skl_pred, list) and
40 all(map(lambda t: isinstance(t, numpy.ndarray), skl_pred))):
41 # multi label classification
42 skl_pred = numpy.array(skl_pred)
43 skl_pred = skl_pred.reshape((skl_pred.shape[1], -1))
45 if isinstance(skl_pred, tuple) or (batch and isinstance(skl_pred, list)):
46 diffs = []
47 if batch:
48 if len(skl_pred) != len(ort_pred):
49 return 1e10 # pragma: no cover
50 for i in range(len(skl_pred)): # pylint: disable=C0200
51 diff = measure_relative_difference(skl_pred[i], ort_pred[i])
52 diffs.append(diff)
53 else: # pragma: no cover
54 for i in range(len(skl_pred)): # pylint: disable=C0200
55 try:
56 diff = measure_relative_difference(
57 skl_pred[i], [_[i] for _ in ort_pred])
58 except IndexError: # pragma: no cover
59 return 1e9
60 except RuntimeError as e: # pragma: no cover
61 raise RuntimeError("Unable to compute differences between"
62 "\n{}--------\n{}".format(
63 skl_pred, ort_pred)) from e
64 diffs.append(diff)
65 return max(diffs)
66 else:
67 ort_pred_ = ort_pred
68 if isinstance(ort_pred, list):
69 if isinstance(ort_pred[0], dict):
70 ort_pred = pandas.DataFrame(list(ort_pred)).values
71 elif (isinstance(ort_pred[0], list) and
72 isinstance(ort_pred[0][0], dict)):
73 if len(ort_pred) == 1: # pragma: no cover
74 ort_pred = pandas.DataFrame(list(ort_pred[0])).values
75 elif len(ort_pred[0]) == 1: # pragma: no cover
76 ort_pred = pandas.DataFrame(
77 [o[0] for o in ort_pred]).values
78 else:
79 raise RuntimeError( # pragma: no cover
80 "Unable to compute differences between"
81 "\n{}--------\n{}".format(skl_pred, ort_pred))
82 else:
83 try:
84 ort_pred = numpy.array(ort_pred)
85 except ValueError as e: # pragma: no cover
86 raise ValueError(
87 "Unable to interpret (batch={}, type(skl_pred): {})\n{}\n-----\n{}".format(
88 batch, type(skl_pred), skl_pred, ort_pred)) from e
90 if hasattr(skl_pred, 'todense'):
91 skl_pred = skl_pred.todense().getA()
92 skl_sparse = True
93 else:
94 skl_sparse = False
95 if hasattr(ort_pred, 'todense'):
96 ort_pred = ort_pred.todense().getA()
97 ort_sparse = True
98 else:
99 ort_sparse = False
101 try:
102 if (any(numpy.isnan(skl_pred.reshape((-1, )))) and
103 all(~numpy.isnan(ort_pred.reshape((-1, ))))):
104 skl_pred = numpy.nan_to_num(skl_pred)
105 if (any(numpy.isnan(ort_pred.reshape((-1, )))) and
106 all(~numpy.isnan(skl_pred.reshape((-1, ))))):
107 ort_pred = numpy.nan_to_num(ort_pred)
108 except ValueError as e: # pragma: no cover
109 raise RuntimeError(
110 "Unable to compute differences between {}{} - {}{}\n{}\n{}\n"
111 "--------\n{}".format(
112 skl_pred.shape, " (sparse)" if skl_sparse else "",
113 ort_pred.shape, " (sparse)" if ort_sparse else "",
114 e, skl_pred, ort_pred)) from e
116 if isinstance(ort_pred, list):
117 raise RuntimeError( # pragma: no cover
118 "Issue with {}\n{}".format(ort_pred, ort_pred_))
120 if skl_pred.shape != ort_pred.shape and skl_pred.size == ort_pred.size:
121 ort_pred = ort_pred.ravel()
122 skl_pred = skl_pred.ravel()
124 if skl_pred.shape != ort_pred.shape:
125 return 1e11
127 if hasattr(skl_pred, 'A'):
128 # ravel() on matrix still returns a matrix
129 skl_pred = skl_pred.A # pragma: no cover
130 if hasattr(ort_pred, 'A'):
131 # ravel() on matrix still returns a matrix
132 ort_pred = ort_pred.A # pragma: no cover
133 r_skl_pred = skl_pred.ravel()
134 r_ort_pred = ort_pred.ravel()
136 if abs_diff:
137 return numpy.abs(r_skl_pred - r_ort_pred).max()
139 ab = numpy.abs(r_skl_pred)
140 median = numpy.median(ab.ravel())
141 mx = numpy.max(ab)
142 if median == 0:
143 median = mx
144 if median == 0:
145 median = 1
146 mx = numpy.maximum(ab, median)
147 di = r_ort_pred - r_skl_pred
148 d = di / mx
149 rel_sort = numpy.sort(numpy.abs(d))
150 rel_diff = rel_sort[-4] if len(rel_sort) > 5 else rel_sort[-1]
152 if numpy.isnan(rel_diff) and not all(numpy.isnan(r_ort_pred)):
153 raise RuntimeError( # pragma: no cover
154 "Unable to compute differences between {}{} - {}{}\n{}\n"
155 "--------\n{}".format(
156 skl_pred.shape, " (sparse)" if skl_sparse else "",
157 ort_pred.shape, " (sparse)" if ort_pred else "",
158 skl_pred, ort_pred))
159 return rel_diff