Coverage for mlprodict/asv_benchmark/asv_exports.py: 92%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

185 statements  

1""" 

2@file 

3@brief Functions to help exporting json format into text. 

4""" 

5import pprint 

6import copy 

7import os 

8import json 

9from json.decoder import JSONDecodeError 

10 

11 

12def fix_missing_imports(): 

13 """ 

14 The execution of a file through function :epkg:`exec` 

15 does not import new modules. They must be there when 

16 it is done. This function fills the gap for some of 

17 them. 

18 

19 @return added names 

20 """ 

21 from sklearn.linear_model import LogisticRegression 

22 return {'LogisticRegression': LogisticRegression} 

23 

24 

25def _dict2str(d): 

26 vals = [] 

27 for k, v in d.items(): 

28 if isinstance(v, dict): 

29 vals.append("{}{}".format(k, _dict2str(v))) 

30 else: 

31 vals.append("{}{}".format(k, v)) 

32 return "-".join(vals) 

33 

34 

35def _coor_to_str(cc): 

36 ccs = [] 

37 for c in cc: 

38 if c in ('{}', {}): 

39 c = "o" 

40 elif len(c) > 1 and (c[0], c[-1]) == ('{', '}'): 

41 c = c.replace("<class ", "") 

42 c = c.replace(">:", ":") 

43 c = c.replace("'", '"').replace("True", "1").replace("False", "0") 

44 try: 

45 d = json.loads(c) 

46 except JSONDecodeError as e: # pragma: no cover 

47 raise RuntimeError( 

48 "Unable to interpret '{}'.".format(c)) from e 

49 

50 if len(d) == 1: 

51 its = list(d.items())[0] 

52 if '.' in its[0]: 

53 c = _dict2str(its[1]) 

54 else: 

55 c = _dict2str(d) 

56 else: 

57 c = _dict2str(d) 

58 c = str(c).strip("'") 

59 ccs.append(c) 

60 return 'M-' + "-".join(map(str, ccs)).replace("'", "") 

61 

62 

63def _figures2dict(metrics, coor, baseline=None): 

64 """ 

65 Converts the data from list to dictionaries. 

66 

67 @param metrics single array of values 

68 @param coor list of list of coordinates names 

69 @param baseline one coordinates is the baseline 

70 @return dictionary of metrics 

71 """ 

72 if baseline is None: 

73 base_j = None 

74 else: 

75 quoted_base = "'{}'".format(baseline) 

76 base_j = None 

77 for i, base in enumerate(coor): 

78 if baseline in base: 

79 base_j = i, base.index(baseline) 

80 break 

81 if quoted_base in base: 

82 base_j = i, base.index(quoted_base) 

83 break 

84 if base_j is None: 

85 raise ValueError( # pragma: no cover 

86 "Unable to find value baseline '{}' or [{}] in {}".format( 

87 baseline, quoted_base, pprint.pformat(coor))) 

88 m_bases = {} 

89 ind = [0 for c in coor] 

90 res = {} 

91 pos = 0 

92 while ind[0] < len(coor[0]): 

93 cc = [coor[i][ind[i]] for i in range(len(ind))] 

94 if baseline is not None: 

95 if cc[base_j[0]] != base_j[1]: 

96 cc2 = cc.copy() 

97 cc2[base_j[0]] = coor[base_j[0]][base_j[1]] 

98 key = tuple(cc2) 

99 skey = _coor_to_str(key) 

100 if key not in m_bases: 

101 m_bases[skey] = [] 

102 m_bases[skey].append(_coor_to_str(cc)) 

103 

104 name = _coor_to_str(cc) 

105 res[name] = metrics[pos] 

106 pos += 1 

107 ind[-1] += 1 

108 last = len(ind) - 1 

109 while last > 0 and ind[last] >= len(coor[last]): 

110 ind[last] = 0 

111 last -= 1 

112 ind[last] += 1 

113 

114 for k, v in m_bases.items(): 

115 for ks in v: 

116 if (k in res and res[k] != 0 and ks in res and 

117 res[ks] is not None and res[k] is not None): 

118 res['R-' + ks[2:]] = float(res[ks]) / res[k] 

119 return res 

120 

121 

122def enumerate_export_asv_json(folder, as_df=False, last_one=False, 

123 baseline=None, conf=None): 

124 """ 

125 Looks into :epkg:`asv` results and wraps all of them 

126 into a :epkg:`dataframe` or flat data. 

127 

128 @param folder location of the results 

129 @param last_one to return only the last one 

130 @param baseline defines a baseline and computes ratios 

131 @param conf configuration file, may be used to 

132 add additional data 

133 @return :epkg:`dataframe` or flat data 

134 """ 

135 meta_class = None 

136 if conf is not None: 

137 if not os.path.exists(conf): 

138 raise FileNotFoundError( # pragma: no cover 

139 "Unable to find '{}'.".format(conf)) 

140 with open(conf, "r", encoding='utf-8') as f: 

141 meta = json.load(f) 

142 bdir = os.path.join(os.path.dirname(conf), meta['benchmark_dir']) 

143 if os.path.exists(bdir): 

144 meta_class = _retrieve_class_parameters(bdir) 

145 

146 bench = os.path.join(folder, 'benchmarks.json') 

147 if not os.path.exists(bench): 

148 raise FileNotFoundError( # pragma: no cover 

149 "Unable to find '{}'.".format(bench)) 

150 with open(bench, 'r', encoding='utf-8') as f: 

151 content = json.load(f) 

152 

153 # content contains the list of tests 

154 content = {k: v for k, v in content.items() if isinstance(v, dict)} 

155 

156 # looking into metadata 

157 machines = os.listdir(folder) 

158 for machine in machines: 

159 if 'benchmarks.json' in machine: 

160 continue 

161 filemine = os.path.join(folder, machine, 'machine.json') 

162 with open(filemine, 'r', encoding='utf-8') as f: 

163 meta = json.load(f) 

164 

165 # looking into all tests or the last one 

166 subs = os.listdir(os.path.join(folder, machine)) 

167 subs = [m for m in subs if m != 'machine.json'] 

168 if last_one: 

169 dates = [(os.stat(os.path.join(folder, machine, m)).st_ctime, m) 

170 for m in subs if ('-env' in m or 'virtualenv-' in m) and '.json' in m] 

171 dates.sort() 

172 subs = [dates[-1][-1]] 

173 

174 # look into tests 

175 for sub in subs: 

176 data = os.path.join(folder, machine, sub) 

177 with open(data, 'r', encoding='utf-8') as f: 

178 test_content = json.load(f) 

179 meta_res = copy.deepcopy(meta) 

180 for k, v in test_content.items(): 

181 if k != 'results': 

182 meta_res[k] = v 

183 continue 

184 results = test_content['results'] 

185 for kk, vv in results.items(): 

186 if vv is None: 

187 raise RuntimeError( # pragma: no cover 

188 'Unexpected empty value for vv') 

189 try: 

190 metrics, coord, hash = vv[:3] 

191 except ValueError as e: # pragma: no cover 

192 raise ValueError( 

193 "Test '{}', unable to interpret: {}.".format( 

194 kk, vv)) from e 

195 

196 obs = {} 

197 for mk, mv in meta_res.items(): 

198 if mk in {'result_columns'}: 

199 continue 

200 if isinstance(mv, dict): 

201 for mk2, mv2 in mv.items(): 

202 obs['{}_{}'.format(mk, mk2)] = mv2 

203 else: 

204 obs[mk] = mv 

205 spl = kk.split('.') 

206 obs['test_hash'] = hash 

207 obs['test_fullname'] = kk 

208 if len(spl) >= 4: 

209 obs['test_model_set'] = spl[0] 

210 obs['test_model_kind'] = spl[1] 

211 obs['test_model'] = ".".join(spl[2:-1]) 

212 obs['test_name'] = spl[-1] 

213 elif len(spl) >= 3: 

214 obs['test_model_set'] = spl[0] 

215 obs['test_model'] = ".".join(spl[1:-1]) 

216 obs['test_name'] = spl[-1] 

217 else: 

218 obs['test_model'] = ".".join(spl[:-1]) 

219 obs['test_name'] = spl[-1] 

220 if metrics is not None: 

221 obs.update( 

222 _figures2dict(metrics, coord, baseline=baseline)) 

223 if meta_class is not None: 

224 _update_test_metadata(obs, meta_class) 

225 yield obs 

226 

227 

228def export_asv_json(folder, as_df=False, last_one=False, baseline=None, 

229 conf=None): 

230 """ 

231 Looks into :epkg:`asv` results and wraps all of them 

232 into a :epkg:`dataframe` or flat data. 

233 

234 @param folder location of the results 

235 @param as_df returns a dataframe or 

236 a list of dictionaries 

237 @param last_one to return only the last one 

238 @param baseline computes ratio against the baseline 

239 @param conf configuration file, may be used to 

240 add additional data 

241 @return :epkg:`dataframe` or flat data 

242 """ 

243 rows = list(enumerate_export_asv_json( 

244 folder, last_one=last_one, baseline=baseline, conf=conf)) 

245 if as_df: 

246 import pandas 

247 return pandas.DataFrame(rows) 

248 return rows 

249 

250 

251def _retrieve_class_parameters(bdir): 

252 """ 

253 Imports files in bdir, compile files and extra metadata from them. 

254 """ 

255 found = {} 

256 for path, _, files in os.walk(os.path.abspath(bdir)): 

257 fulls = [os.path.join(path, f) for f in files] 

258 for full in fulls: 

259 if (os.path.splitext(full)[-1] == '.py' and 

260 os.path.split(full)[-1] != '__init__.py'): 

261 cls = list(_enumerate_classes(full)) 

262 for cl in cls: 

263 name = cl.__name__ 

264 found[name] = cl 

265 return found 

266 

267 

268def _update_test_metadata(row, class_meta): 

269 name = row.get('test_model', None) 

270 if name is None: 

271 return 

272 sub = name.split('.')[-1] 

273 if sub in class_meta: 

274 for k, v in class_meta[sub].__dict__.items(): 

275 if k.startswith('par_'): 

276 row[k] = v 

277 

278 

279def _enumerate_classes(filename): 

280 """ 

281 Extracts the classes of a file. 

282 """ 

283 with open(filename, "r", encoding="utf-8") as f: 

284 content = f.read() 

285 gl = fix_missing_imports() 

286 loc = {} 

287 cp = compile(content, filename, mode='exec') 

288 

289 try: 

290 exec(cp, gl, loc) # pylint: disable=W0122 

291 except NameError as e: # pragma: no cover 

292 raise NameError( 

293 "An import is probably missing from function 'fix_missing_imports'" 

294 ".") from e 

295 

296 for k, v in loc.items(): 

297 if k[0] < 'A' or k[0] > 'Z': 

298 continue 

299 if not hasattr(v, 'setup_cache'): 

300 continue 

301 yield v