Coverage for mlprodict/onnxrt/doc/doc_write

1"""

2@file

3@brief Documentation helper.

4"""

5from logging import getLogger

6from textwrap import indent, dedent

7import numpy

8from pandas import DataFrame, notnull

9from sklearn.linear_model import LinearRegression

10from pyquickhelper.loghelper import noLOG

11from pyquickhelper.pandashelper.tblformat import df2rst

12from sklearn import __all__ as sklearn__all__

13from ... import __max_supported_opset__

14from ...tools.model_info import analyze_model

15from ..validate.validate import enumerate_validated_operator_opsets, sklearn_operators

16from ...onnx_tools.optim.sklearn_helper import inspect_sklearn_model

17from ...onnx_tools.optim.onnx_helper import onnx_statistics

18from ..onnx_inference import OnnxInference

19from ..validate.validate_summary import _clean_values_optim

20from .doc_helper import visual_rst_template

23def _make_opset(row):

24 opsets = []

25 if hasattr(row, 'to_dict'):

26 row = row.to_dict() # pragma: no cover

27 for k, v in row.items():

28 if k.startswith('opset'):

29 if isinstance(v, int):

30 opsets.append(v)

31 elif isinstance(v, float): # pragma: no cover

32 if numpy.isnan(v):

33 opsets.append(0)

34 else:

35 opsets.append(int(v))

36 else: # pragma: no cover

37 vv = list(_ for _ in v if 'OK' in str(v))

38 if len(vv) > 0:

39 opsets.append(int(k.replace("opset", "")))

40 if len(opsets) == 0:

41 return "o%d" % __max_supported_opset__ # pragma: no cover

42 val = max(opsets)

43 return "o%d" % val

46def enumerate_visual_onnx_representation_into_rst(sub, fLOG=noLOG):

47 """

48 Returns content for pages such as

49 :ref:`l-skl2onnx-linear_model`.

50 """

51 from jinja2 import Template # delayed import

52 logger = getLogger('skl2onnx')

53 logger.disabled = True

55 rst_templ = visual_rst_template()

56 templ = Template(rst_templ)

57 done = set()

58 subsets = [_['name'] for _ in sklearn_operators(sub)]

59 subsets.sort()

60 for row in enumerate_validated_operator_opsets(

61 verbose=0, debug=None, fLOG=fLOG,

62 opset_min=__max_supported_opset__,

63 opset_max=__max_supported_opset__,

64 store_models=True, models=subsets):

66 if 'ONNX' not in row:

67 continue

68 name = row['name']

69 scenario = row['scenario']

70 problem = row['problem']

71 model = row['MODEL']

72 method = row['method_name']

73 optim = row.get('optim', '')

74 opset = _make_opset(row)

75 stats_skl = inspect_sklearn_model(model)

76 stats_onx = onnx_statistics(row['ONNX'])

77 stats_model = analyze_model(model)

78 stats = {'skl_' + k: v for k, v in stats_skl.items()}

79 stats.update({'onx_' + k: v for k, v in stats_onx.items()})

80 stats.update({'fit_' + k: v for k, v in stats_model.items()})

82 df = DataFrame([stats])

83 table = df2rst(df.T.reset_index(drop=False))

85 clean_optim = _clean_values_optim(optim)

86 title = " - ".join([name, problem, scenario, clean_optim])

87 if title in done:

88 continue # pragma: no cover

89 done.add(title)

90 link = "-".join([name, problem, scenario, clean_optim, opset])

91 link = link.replace(" ", "").replace(

92 "{", "").replace("}", "").replace("'", "")

94 optim_param = ("Model was converted with additional parameter: ``{}``.".format(optim)

95 if optim else "")

97 oinf = OnnxInference(row['ONNX'], skip_run=True)

98 dot = oinf.to_dot(recursive=True)

99 try:

100 res = templ.render(dot=dot, model=repr(model), method=method,

101 kind=problem, title=title,

102 indent=indent, len=len,

103 link=link, table=table,

104 optim_param=optim_param)

105 except KeyError as e: # pragma: no cover

106 rows = [

107 '', str(e), '',

108 "title='{}'".format(title),

109 "method='{}'".format(method),

110 "problem='{}'".format(problem),

111 model.__class__.__name__, "", "---------",

112 rst_templ]

113 res = ".. index:: docissue:\n\n::\n\n" + \

114 indent("\n".join(rows), " ")

115 yield res

116

117

118def compose_page_onnxrt_ops(level="^"):

119 """

120 Writes page :ref:`l-onnx-runtime-operators`.

121

122 @param level title level

123 """

124 begin = dedent("""

125 .. _l-onnx-runtime-operators:

126

127 Python Runtime for ONNX operators

128 =================================

129

130 The main function instantiates a runtime class which

131 computes the outputs of a specific node.

132

133 .. autosignature:: mlprodict.onnxrt.ops.load_op

134

135 Other sections documents available operators.

136 This project was mostly started to show a way to

137 implement a custom runtime, do some benchmarks,

138 test, exepriment...

139

140 .. contents::

141 :local:

142

143 Python

144 ++++++

145

146 """)

147 from ..ops_cpu._op_list import _op_list

148

149 names = []

150 for op in _op_list:

151 names.append((op.__name__, op))

152 names.sort()

153

154 rows = [begin]

155 for name, op in names:

156 rows.append("")

157 rows.append(".. _lpyort-{}:".format(name))

158 rows.append("")

159 rows.append(name)

160 rows.append(level * len(name))

161 rows.append("")

162 mod = op.__module__.split('.')[-1]

163 rows.append(

164 ".. autosignature:: mlprodict.onnxrt.ops_cpu.{}.{}".format(mod, name))

165 rows.append('')

166 return "\n".join(rows)

167

168

169def split_columns_subsets(df):

170 """

171 Functions used in the documentation to split

172 a dataframe by columns into multiple dataframe to

173 reduce the scrolling.

174 """

175 common = [c for c in ['name', 'problem',

176 'scenario', 'optim'] if c in df.columns]

177 subsets = []

178 subsets.append(

179 [c for c in df.columns if 'opset' in c or 'onx_nnodes' == c])

180 subsets.append([c for c in df.columns if 'ERROR' in c or 'opset' in c])

181 subsets.append([c for c in df.columns if c.startswith(

182 'skl_') or c.startswith('onx_') or 'opset' in c])

183 subsets.append([c for c in df.columns if 'N=' in c or 'opset' in c])

184 subsets = [s for s in subsets if len(s) > 0]

185 return common, subsets

186

187

188def build_key_split(key, index):

189 """

190 Used for documentation.

191 """

192 try:

193 new_key = str(key).split('`')[1].split('<')[0].strip()

194 except IndexError:

195 new_key = str(key)

196 if 'SVC' in new_key or 'SVR' in new_key or 'SVM' in new_key:

197 return 'SVM' # pragma: no cover

198 if 'Neighbors' in new_key:

199 return 'Neighbors' # pragma: no cover

200 if 'Scaler' in new_key:

201 return 'Scaler' # pragma: no cover

202 if 'Normalizer' in new_key:

203 return 'Scaler' # pragma: no cover

204 if new_key.endswith("NB"):

205 return "...NB" # pragma: no cover

206 if new_key.endswith("RBM"):

207 return "...NB" # pragma: no cover

208 if "KMeans" in new_key:

209 return "KMeans" # pragma: no cover

210 if ('XGB' in new_key or 'LGBM' in new_key or 'Tree' in new_key or

211 'Forest' in new_key):

212 return 'Trees' # pragma: no cover

213 if ('ARDRegression' in new_key or 'ElasticNet' in new_key or

214 'HuberRegressor' in new_key or 'Lars' in new_key or

215 'Lasso' in new_key or 'LinearRegression' in new_key or

216 'LogisticRegression' in new_key or

217 'Ridge' in new_key or 'SGD' in new_key or

218 'TheilSen' in new_key):

219 return 'Linear' # pragma: no cover

220 for begin in ["Lasso", "Select", "Label", 'Tfidf', 'Feature',

221 'Bernoulli', 'MultiTask', 'OneVs', 'PLS',

222 'Sparse', 'Spectral', 'MiniBatch',

223 'Bayesian']:

224 if new_key.startswith(begin):

225 return begin + '...'

226 for end in ['CV', 'Regressor', 'Classifier']: # pragma: no cover

227 if new_key.endswith(end):

228 new_key = new_key[:-len(end)]

229 return new_key # pragma: no cover

230

231

232def filter_rows(df):

233 """

234 Used for documentation.

235 """

236 for c in ['ERROR-msg', 'RT/SKL-N=1']:

237 if c in df.columns:

238 return df[df[c].apply(lambda x: notnull(x) and x not in (None, '', 'nan'))]

239 return df

Coverage for mlprodict/onnxrt/doc/doc_write_helper.py: 100%

115 statements