Coverage for mlprodict/asv_benchmark/asv

1"""

2@file

3@brief Functions to help exporting json format into text.

4"""

5import pprint

6import copy

7import os

8import json

9from json.decoder import JSONDecodeError

12def fix_missing_imports():

13 """

14 The execution of a file through function :epkg:`exec`

15 does not import new modules. They must be there when

16 it is done. This function fills the gap for some of

17 them.

19 @return added names

20 """

21 from sklearn.linear_model import LogisticRegression

22 return {'LogisticRegression': LogisticRegression}

25def _dict2str(d):

26 vals = []

27 for k, v in d.items():

28 if isinstance(v, dict):

29 vals.append("{}{}".format(k, _dict2str(v)))

30 else:

31 vals.append("{}{}".format(k, v))

32 return "-".join(vals)

35def _coor_to_str(cc):

36 ccs = []

37 for c in cc:

38 if c in ('{}', {}):

39 c = "o"

40 elif len(c) > 1 and (c[0], c[-1]) == ('{', '}'):

41 c = c.replace("<class ", "")

42 c = c.replace(">:", ":")

43 c = c.replace("'", '"').replace("True", "1").replace("False", "0")

44 try:

45 d = json.loads(c)

46 except JSONDecodeError as e: # pragma: no cover

47 raise RuntimeError(

48 "Unable to interpret '{}'.".format(c)) from e

50 if len(d) == 1:

51 its = list(d.items())[0]

52 if '.' in its[0]:

53 c = _dict2str(its[1])

54 else:

55 c = _dict2str(d)

56 else:

57 c = _dict2str(d)

58 c = str(c).strip("'")

59 ccs.append(c)

60 return 'M-' + "-".join(map(str, ccs)).replace("'", "")

63def _figures2dict(metrics, coor, baseline=None):

64 """

65 Converts the data from list to dictionaries.

67 @param metrics single array of values

68 @param coor list of list of coordinates names

69 @param baseline one coordinates is the baseline

70 @return dictionary of metrics

71 """

72 if baseline is None:

73 base_j = None

74 else:

75 quoted_base = "'{}'".format(baseline)

76 base_j = None

77 for i, base in enumerate(coor):

78 if baseline in base:

79 base_j = i, base.index(baseline)

80 break

81 if quoted_base in base:

82 base_j = i, base.index(quoted_base)

83 break

84 if base_j is None:

85 raise ValueError( # pragma: no cover

86 "Unable to find value baseline '{}' or [{}] in {}".format(

87 baseline, quoted_base, pprint.pformat(coor)))

88 m_bases = {}

89 ind = [0 for c in coor]

90 res = {}

91 pos = 0

92 while ind[0] < len(coor[0]):

93 cc = [coor[i][ind[i]] for i in range(len(ind))]

94 if baseline is not None:

95 if cc[base_j[0]] != base_j[1]:

96 cc2 = cc.copy()

97 cc2[base_j[0]] = coor[base_j[0]][base_j[1]]

98 key = tuple(cc2)

99 skey = _coor_to_str(key)

100 if key not in m_bases:

101 m_bases[skey] = []

102 m_bases[skey].append(_coor_to_str(cc))

103

104 name = _coor_to_str(cc)

105 res[name] = metrics[pos]

106 pos += 1

107 ind[-1] += 1

108 last = len(ind) - 1

109 while last > 0 and ind[last] >= len(coor[last]):

110 ind[last] = 0

111 last -= 1

112 ind[last] += 1

113

114 for k, v in m_bases.items():

115 for ks in v:

116 if (k in res and res[k] != 0 and ks in res and

117 res[ks] is not None and res[k] is not None):

118 res['R-' + ks[2:]] = float(res[ks]) / res[k]

119 return res

120

121

122def enumerate_export_asv_json(folder, as_df=False, last_one=False,

123 baseline=None, conf=None):

124 """

125 Looks into :epkg:`asv` results and wraps all of them

126 into a :epkg:`dataframe` or flat data.

127

128 @param folder location of the results

129 @param last_one to return only the last one

130 @param baseline defines a baseline and computes ratios

131 @param conf configuration file, may be used to

132 add additional data

133 @return :epkg:`dataframe` or flat data

134 """

135 meta_class = None

136 if conf is not None:

137 if not os.path.exists(conf):

138 raise FileNotFoundError( # pragma: no cover

139 "Unable to find '{}'.".format(conf))

140 with open(conf, "r", encoding='utf-8') as f:

141 meta = json.load(f)

142 bdir = os.path.join(os.path.dirname(conf), meta['benchmark_dir'])

143 if os.path.exists(bdir):

144 meta_class = _retrieve_class_parameters(bdir)

145

146 bench = os.path.join(folder, 'benchmarks.json')

147 if not os.path.exists(bench):

148 raise FileNotFoundError( # pragma: no cover

149 "Unable to find '{}'.".format(bench))

150 with open(bench, 'r', encoding='utf-8') as f:

151 content = json.load(f)

152

153 # content contains the list of tests

154 content = {k: v for k, v in content.items() if isinstance(v, dict)}

155

156 # looking into metadata

157 machines = os.listdir(folder)

158 for machine in machines:

159 if 'benchmarks.json' in machine:

160 continue

161 filemine = os.path.join(folder, machine, 'machine.json')

162 with open(filemine, 'r', encoding='utf-8') as f:

163 meta = json.load(f)

164

165 # looking into all tests or the last one

166 subs = os.listdir(os.path.join(folder, machine))

167 subs = [m for m in subs if m != 'machine.json']

168 if last_one:

169 dates = [(os.stat(os.path.join(folder, machine, m)).st_ctime, m)

170 for m in subs if ('-env' in m or 'virtualenv-' in m) and '.json' in m]

171 dates.sort()

172 subs = [dates[-1][-1]]

173

174 # look into tests

175 for sub in subs:

176 data = os.path.join(folder, machine, sub)

177 with open(data, 'r', encoding='utf-8') as f:

178 test_content = json.load(f)

179 meta_res = copy.deepcopy(meta)

180 for k, v in test_content.items():

181 if k != 'results':

182 meta_res[k] = v

183 continue

184 results = test_content['results']

185 for kk, vv in results.items():

186 if vv is None:

187 raise RuntimeError( # pragma: no cover

188 'Unexpected empty value for vv')

189 try:

190 metrics, coord, hash = vv[:3]

191 except ValueError as e: # pragma: no cover

192 raise ValueError(

193 "Test '{}', unable to interpret: {}.".format(

194 kk, vv)) from e

195

196 obs = {}

197 for mk, mv in meta_res.items():

198 if mk in {'result_columns'}:

199 continue

200 if isinstance(mv, dict):

201 for mk2, mv2 in mv.items():

202 obs['{}_{}'.format(mk, mk2)] = mv2

203 else:

204 obs[mk] = mv

205 spl = kk.split('.')

206 obs['test_hash'] = hash

207 obs['test_fullname'] = kk

208 if len(spl) >= 4:

209 obs['test_model_set'] = spl[0]

210 obs['test_model_kind'] = spl[1]

211 obs['test_model'] = ".".join(spl[2:-1])

212 obs['test_name'] = spl[-1]

213 elif len(spl) >= 3:

214 obs['test_model_set'] = spl[0]

215 obs['test_model'] = ".".join(spl[1:-1])

216 obs['test_name'] = spl[-1]

217 else:

218 obs['test_model'] = ".".join(spl[:-1])

219 obs['test_name'] = spl[-1]

220 if metrics is not None:

221 obs.update(

222 _figures2dict(metrics, coord, baseline=baseline))

223 if meta_class is not None:

224 _update_test_metadata(obs, meta_class)

225 yield obs

226

227

228def export_asv_json(folder, as_df=False, last_one=False, baseline=None,

229 conf=None):

230 """

231 Looks into :epkg:`asv` results and wraps all of them

232 into a :epkg:`dataframe` or flat data.

233

234 @param folder location of the results

235 @param as_df returns a dataframe or

236 a list of dictionaries

237 @param last_one to return only the last one

238 @param baseline computes ratio against the baseline

239 @param conf configuration file, may be used to

240 add additional data

241 @return :epkg:`dataframe` or flat data

242 """

243 rows = list(enumerate_export_asv_json(

244 folder, last_one=last_one, baseline=baseline, conf=conf))

245 if as_df:

246 import pandas

247 return pandas.DataFrame(rows)

248 return rows

249

250

251def _retrieve_class_parameters(bdir):

252 """

253 Imports files in bdir, compile files and extra metadata from them.

254 """

255 found = {}

256 for path, _, files in os.walk(os.path.abspath(bdir)):

257 fulls = [os.path.join(path, f) for f in files]

258 for full in fulls:

259 if (os.path.splitext(full)[-1] == '.py' and

260 os.path.split(full)[-1] != '__init__.py'):

261 cls = list(_enumerate_classes(full))

262 for cl in cls:

263 name = cl.__name__

264 found[name] = cl

265 return found

266

267

268def _update_test_metadata(row, class_meta):

269 name = row.get('test_model', None)

270 if name is None:

271 return

272 sub = name.split('.')[-1]

273 if sub in class_meta:

274 for k, v in class_meta[sub].__dict__.items():

275 if k.startswith('par_'):

276 row[k] = v

277

278

279def _enumerate_classes(filename):

280 """

281 Extracts the classes of a file.

282 """

283 with open(filename, "r", encoding="utf-8") as f:

284 content = f.read()

285 gl = fix_missing_imports()

286 loc = {}

287 cp = compile(content, filename, mode='exec')

288

289 try:

290 exec(cp, gl, loc) # pylint: disable=W0122

291 except NameError as e: # pragma: no cover

292 raise NameError(

293 "An import is probably missing from function 'fix_missing_imports'"

294 ".") from e

295

296 for k, v in loc.items():

297 if k[0] < 'A' or k[0] > 'Z':

298 continue

299 if not hasattr(v, 'setup_cache'):

300 continue

301 yield v

Coverage for mlprodict/asv_benchmark/asv_exports.py: 92%

185 statements