Coverage for mlprodict/onnxrt/validate/validate

1"""

2@file

3@brief Validates runtime for many :scikit-learn: operators.

4The submodule relies on :epkg:`onnxconverter_common`,

5:epkg:`sklearn-onnx`.

6"""

7import numpy

8import pandas

11def measure_relative_difference(skl_pred, ort_pred, batch=True, abs_diff=False):

12 """

13 Measures the relative difference between predictions

14 between two ways of computing them.

15 The functions returns nan if shapes are different.

17 @param skl_pred prediction from :epkg:`scikit-learn`

18 or any other way

19 @param ort_pred prediction from an :epkg:`ONNX` runtime

20 or any other way

21 @param batch predictions are processed in a batch,

22 *skl_pred* and *ort_pred* should be arrays

23 or tuple or list of arrays

24 @param abs_diff return the absolute difference

25 @return relative max difference

26 or nan if it does not make any sense

28 Because approximations get bigger when the vector is high,

29 the function computes an adjusted relative differences.

30 Let's assume *X* and *Y* are two vectors, let's denote

31 :math:`med(X)` the median of *X*. The function returns the

32 following metric: :math:`\\max_i(|X_i - Y_i| / \\max(X_i, med(|X|))`.

34 The function takes the fourth highest difference, not the three first

35 which may happen after a conversion into float32.

36 """

37 if hasattr(ort_pred, "is_zip_map") and ort_pred.is_zip_map:

38 ort_pred = ort_pred.values

39 if (isinstance(skl_pred, list) and

40 all(map(lambda t: isinstance(t, numpy.ndarray), skl_pred))):

41 # multi label classification

42 skl_pred = numpy.array(skl_pred)

43 skl_pred = skl_pred.reshape((skl_pred.shape[1], -1))

45 if isinstance(skl_pred, tuple) or (batch and isinstance(skl_pred, list)):

46 diffs = []

47 if batch:

48 if len(skl_pred) != len(ort_pred):

49 return 1e10 # pragma: no cover

50 for i in range(len(skl_pred)): # pylint: disable=C0200

51 diff = measure_relative_difference(skl_pred[i], ort_pred[i])

52 diffs.append(diff)

53 else: # pragma: no cover

54 for i in range(len(skl_pred)): # pylint: disable=C0200

55 try:

56 diff = measure_relative_difference(

57 skl_pred[i], [_[i] for _ in ort_pred])

58 except IndexError: # pragma: no cover

59 return 1e9

60 except RuntimeError as e: # pragma: no cover

61 raise RuntimeError("Unable to compute differences between"

62 "\n{}--------\n{}".format(

63 skl_pred, ort_pred)) from e

64 diffs.append(diff)

65 return max(diffs)

66 else:

67 ort_pred_ = ort_pred

68 if isinstance(ort_pred, list):

69 if isinstance(ort_pred[0], dict):

70 ort_pred = pandas.DataFrame(list(ort_pred)).values

71 elif (isinstance(ort_pred[0], list) and

72 isinstance(ort_pred[0][0], dict)):

73 if len(ort_pred) == 1: # pragma: no cover

74 ort_pred = pandas.DataFrame(list(ort_pred[0])).values

75 elif len(ort_pred[0]) == 1: # pragma: no cover

76 ort_pred = pandas.DataFrame(

77 [o[0] for o in ort_pred]).values

78 else:

79 raise RuntimeError( # pragma: no cover

80 "Unable to compute differences between"

81 "\n{}--------\n{}".format(skl_pred, ort_pred))

82 else:

83 try:

84 ort_pred = numpy.array(ort_pred)

85 except ValueError as e: # pragma: no cover

86 raise ValueError(

87 "Unable to interpret (batch={}, type(skl_pred): {})\n{}\n-----\n{}".format(

88 batch, type(skl_pred), skl_pred, ort_pred)) from e

90 if hasattr(skl_pred, 'todense'):

91 skl_pred = skl_pred.todense().getA()

92 skl_sparse = True

93 else:

94 skl_sparse = False

95 if hasattr(ort_pred, 'todense'):

96 ort_pred = ort_pred.todense().getA()

97 ort_sparse = True

98 else:

99 ort_sparse = False

100

101 try:

102 if (any(numpy.isnan(skl_pred.reshape((-1, )))) and

103 all(~numpy.isnan(ort_pred.reshape((-1, ))))):

104 skl_pred = numpy.nan_to_num(skl_pred)

105 if (any(numpy.isnan(ort_pred.reshape((-1, )))) and

106 all(~numpy.isnan(skl_pred.reshape((-1, ))))):

107 ort_pred = numpy.nan_to_num(ort_pred)

108 except ValueError as e: # pragma: no cover

109 raise RuntimeError(

110 "Unable to compute differences between {}{} - {}{}\n{}\n{}\n"

111 "--------\n{}".format(

112 skl_pred.shape, " (sparse)" if skl_sparse else "",

113 ort_pred.shape, " (sparse)" if ort_sparse else "",

114 e, skl_pred, ort_pred)) from e

115

116 if isinstance(ort_pred, list):

117 raise RuntimeError( # pragma: no cover

118 "Issue with {}\n{}".format(ort_pred, ort_pred_))

119

120 if skl_pred.shape != ort_pred.shape and skl_pred.size == ort_pred.size:

121 ort_pred = ort_pred.ravel()

122 skl_pred = skl_pred.ravel()

123

124 if skl_pred.shape != ort_pred.shape:

125 return 1e11

126

127 if hasattr(skl_pred, 'A'):

128 # ravel() on matrix still returns a matrix

129 skl_pred = skl_pred.A # pragma: no cover

130 if hasattr(ort_pred, 'A'):

131 # ravel() on matrix still returns a matrix

132 ort_pred = ort_pred.A # pragma: no cover

133 r_skl_pred = skl_pred.ravel()

134 r_ort_pred = ort_pred.ravel()

135

136 if abs_diff:

137 return numpy.abs(r_skl_pred - r_ort_pred).max()

138

139 ab = numpy.abs(r_skl_pred)

140 median = numpy.median(ab.ravel())

141 mx = numpy.max(ab)

142 if median == 0:

143 median = mx

144 if median == 0:

145 median = 1

146 mx = numpy.maximum(ab, median)

147 di = r_ort_pred - r_skl_pred

148 d = di / mx

149 rel_sort = numpy.sort(numpy.abs(d))

150 rel_diff = rel_sort[-4] if len(rel_sort) > 5 else rel_sort[-1]

151

152 if numpy.isnan(rel_diff) and not all(numpy.isnan(r_ort_pred)):

153 raise RuntimeError( # pragma: no cover

154 "Unable to compute differences between {}{} - {}{}\n{}\n"

155 "--------\n{}".format(

156 skl_pred.shape, " (sparse)" if skl_sparse else "",

157 ort_pred.shape, " (sparse)" if ort_pred else "",

158 skl_pred, ort_pred))

159 return rel_diff

Coverage for mlprodict/onnxrt/validate/validate_difference.py: 95%

62 statements