Coverage for mlprodict/asv_benchmark/asv_exports.py: 92%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Functions to help exporting json format into text.
4"""
5import pprint
6import copy
7import os
8import json
9from json.decoder import JSONDecodeError
12def fix_missing_imports():
13 """
14 The execution of a file through function :epkg:`exec`
15 does not import new modules. They must be there when
16 it is done. This function fills the gap for some of
17 them.
19 @return added names
20 """
21 from sklearn.linear_model import LogisticRegression
22 return {'LogisticRegression': LogisticRegression}
25def _dict2str(d):
26 vals = []
27 for k, v in d.items():
28 if isinstance(v, dict):
29 vals.append("{}{}".format(k, _dict2str(v)))
30 else:
31 vals.append("{}{}".format(k, v))
32 return "-".join(vals)
35def _coor_to_str(cc):
36 ccs = []
37 for c in cc:
38 if c in ('{}', {}):
39 c = "o"
40 elif len(c) > 1 and (c[0], c[-1]) == ('{', '}'):
41 c = c.replace("<class ", "")
42 c = c.replace(">:", ":")
43 c = c.replace("'", '"').replace("True", "1").replace("False", "0")
44 try:
45 d = json.loads(c)
46 except JSONDecodeError as e: # pragma: no cover
47 raise RuntimeError(
48 "Unable to interpret '{}'.".format(c)) from e
50 if len(d) == 1:
51 its = list(d.items())[0]
52 if '.' in its[0]:
53 c = _dict2str(its[1])
54 else:
55 c = _dict2str(d)
56 else:
57 c = _dict2str(d)
58 c = str(c).strip("'")
59 ccs.append(c)
60 return 'M-' + "-".join(map(str, ccs)).replace("'", "")
63def _figures2dict(metrics, coor, baseline=None):
64 """
65 Converts the data from list to dictionaries.
67 @param metrics single array of values
68 @param coor list of list of coordinates names
69 @param baseline one coordinates is the baseline
70 @return dictionary of metrics
71 """
72 if baseline is None:
73 base_j = None
74 else:
75 quoted_base = "'{}'".format(baseline)
76 base_j = None
77 for i, base in enumerate(coor):
78 if baseline in base:
79 base_j = i, base.index(baseline)
80 break
81 if quoted_base in base:
82 base_j = i, base.index(quoted_base)
83 break
84 if base_j is None:
85 raise ValueError( # pragma: no cover
86 "Unable to find value baseline '{}' or [{}] in {}".format(
87 baseline, quoted_base, pprint.pformat(coor)))
88 m_bases = {}
89 ind = [0 for c in coor]
90 res = {}
91 pos = 0
92 while ind[0] < len(coor[0]):
93 cc = [coor[i][ind[i]] for i in range(len(ind))]
94 if baseline is not None:
95 if cc[base_j[0]] != base_j[1]:
96 cc2 = cc.copy()
97 cc2[base_j[0]] = coor[base_j[0]][base_j[1]]
98 key = tuple(cc2)
99 skey = _coor_to_str(key)
100 if key not in m_bases:
101 m_bases[skey] = []
102 m_bases[skey].append(_coor_to_str(cc))
104 name = _coor_to_str(cc)
105 res[name] = metrics[pos]
106 pos += 1
107 ind[-1] += 1
108 last = len(ind) - 1
109 while last > 0 and ind[last] >= len(coor[last]):
110 ind[last] = 0
111 last -= 1
112 ind[last] += 1
114 for k, v in m_bases.items():
115 for ks in v:
116 if (k in res and res[k] != 0 and ks in res and
117 res[ks] is not None and res[k] is not None):
118 res['R-' + ks[2:]] = float(res[ks]) / res[k]
119 return res
122def enumerate_export_asv_json(folder, as_df=False, last_one=False,
123 baseline=None, conf=None):
124 """
125 Looks into :epkg:`asv` results and wraps all of them
126 into a :epkg:`dataframe` or flat data.
128 @param folder location of the results
129 @param last_one to return only the last one
130 @param baseline defines a baseline and computes ratios
131 @param conf configuration file, may be used to
132 add additional data
133 @return :epkg:`dataframe` or flat data
134 """
135 meta_class = None
136 if conf is not None:
137 if not os.path.exists(conf):
138 raise FileNotFoundError( # pragma: no cover
139 "Unable to find '{}'.".format(conf))
140 with open(conf, "r", encoding='utf-8') as f:
141 meta = json.load(f)
142 bdir = os.path.join(os.path.dirname(conf), meta['benchmark_dir'])
143 if os.path.exists(bdir):
144 meta_class = _retrieve_class_parameters(bdir)
146 bench = os.path.join(folder, 'benchmarks.json')
147 if not os.path.exists(bench):
148 raise FileNotFoundError( # pragma: no cover
149 "Unable to find '{}'.".format(bench))
150 with open(bench, 'r', encoding='utf-8') as f:
151 content = json.load(f)
153 # content contains the list of tests
154 content = {k: v for k, v in content.items() if isinstance(v, dict)}
156 # looking into metadata
157 machines = os.listdir(folder)
158 for machine in machines:
159 if 'benchmarks.json' in machine:
160 continue
161 filemine = os.path.join(folder, machine, 'machine.json')
162 with open(filemine, 'r', encoding='utf-8') as f:
163 meta = json.load(f)
165 # looking into all tests or the last one
166 subs = os.listdir(os.path.join(folder, machine))
167 subs = [m for m in subs if m != 'machine.json']
168 if last_one:
169 dates = [(os.stat(os.path.join(folder, machine, m)).st_ctime, m)
170 for m in subs if ('-env' in m or 'virtualenv-' in m) and '.json' in m]
171 dates.sort()
172 subs = [dates[-1][-1]]
174 # look into tests
175 for sub in subs:
176 data = os.path.join(folder, machine, sub)
177 with open(data, 'r', encoding='utf-8') as f:
178 test_content = json.load(f)
179 meta_res = copy.deepcopy(meta)
180 for k, v in test_content.items():
181 if k != 'results':
182 meta_res[k] = v
183 continue
184 results = test_content['results']
185 for kk, vv in results.items():
186 if vv is None:
187 raise RuntimeError( # pragma: no cover
188 'Unexpected empty value for vv')
189 try:
190 metrics, coord, hash = vv[:3]
191 except ValueError as e: # pragma: no cover
192 raise ValueError(
193 "Test '{}', unable to interpret: {}.".format(
194 kk, vv)) from e
196 obs = {}
197 for mk, mv in meta_res.items():
198 if mk in {'result_columns'}:
199 continue
200 if isinstance(mv, dict):
201 for mk2, mv2 in mv.items():
202 obs['{}_{}'.format(mk, mk2)] = mv2
203 else:
204 obs[mk] = mv
205 spl = kk.split('.')
206 obs['test_hash'] = hash
207 obs['test_fullname'] = kk
208 if len(spl) >= 4:
209 obs['test_model_set'] = spl[0]
210 obs['test_model_kind'] = spl[1]
211 obs['test_model'] = ".".join(spl[2:-1])
212 obs['test_name'] = spl[-1]
213 elif len(spl) >= 3:
214 obs['test_model_set'] = spl[0]
215 obs['test_model'] = ".".join(spl[1:-1])
216 obs['test_name'] = spl[-1]
217 else:
218 obs['test_model'] = ".".join(spl[:-1])
219 obs['test_name'] = spl[-1]
220 if metrics is not None:
221 obs.update(
222 _figures2dict(metrics, coord, baseline=baseline))
223 if meta_class is not None:
224 _update_test_metadata(obs, meta_class)
225 yield obs
228def export_asv_json(folder, as_df=False, last_one=False, baseline=None,
229 conf=None):
230 """
231 Looks into :epkg:`asv` results and wraps all of them
232 into a :epkg:`dataframe` or flat data.
234 @param folder location of the results
235 @param as_df returns a dataframe or
236 a list of dictionaries
237 @param last_one to return only the last one
238 @param baseline computes ratio against the baseline
239 @param conf configuration file, may be used to
240 add additional data
241 @return :epkg:`dataframe` or flat data
242 """
243 rows = list(enumerate_export_asv_json(
244 folder, last_one=last_one, baseline=baseline, conf=conf))
245 if as_df:
246 import pandas
247 return pandas.DataFrame(rows)
248 return rows
251def _retrieve_class_parameters(bdir):
252 """
253 Imports files in bdir, compile files and extra metadata from them.
254 """
255 found = {}
256 for path, _, files in os.walk(os.path.abspath(bdir)):
257 fulls = [os.path.join(path, f) for f in files]
258 for full in fulls:
259 if (os.path.splitext(full)[-1] == '.py' and
260 os.path.split(full)[-1] != '__init__.py'):
261 cls = list(_enumerate_classes(full))
262 for cl in cls:
263 name = cl.__name__
264 found[name] = cl
265 return found
268def _update_test_metadata(row, class_meta):
269 name = row.get('test_model', None)
270 if name is None:
271 return
272 sub = name.split('.')[-1]
273 if sub in class_meta:
274 for k, v in class_meta[sub].__dict__.items():
275 if k.startswith('par_'):
276 row[k] = v
279def _enumerate_classes(filename):
280 """
281 Extracts the classes of a file.
282 """
283 with open(filename, "r", encoding="utf-8") as f:
284 content = f.read()
285 gl = fix_missing_imports()
286 loc = {}
287 cp = compile(content, filename, mode='exec')
289 try:
290 exec(cp, gl, loc) # pylint: disable=W0122
291 except NameError as e: # pragma: no cover
292 raise NameError(
293 "An import is probably missing from function 'fix_missing_imports'"
294 ".") from e
296 for k, v in loc.items():
297 if k[0] < 'A' or k[0] > 'Z':
298 continue
299 if not hasattr(v, 'setup_cache'):
300 continue
301 yield v