Coverage for mlprodict/npy/xop_convert.py: 98%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

130 statements  

1""" 

2@file 

3@brief Easier API to build onnx graphs. Inspired from :epkg:`skl2onnx`. 

4 

5.. versionadded:: 0.9 

6""" 

7import logging 

8import numpy 

9from .xop import OnnxOperator, OnnxOperatorFunction 

10from .xop_variable import NodeResultName, Variable 

11 

12 

13logger = logging.getLogger('xop') 

14 

15 

16class OnnxSubOnnx(OnnxOperator): 

17 """ 

18 This operator is used to insert existing ONNX into 

19 the ONNX graph being built. 

20 """ 

21 

22 domain = 'mlprodict' 

23 since_version = 1 

24 expected_inputs = None 

25 expected_outputs = None 

26 input_range = [1, 1e9] 

27 output_range = [1, 1e9] 

28 op_type = 'SubOnnx' 

29 domain = 'mlprodict.xop' 

30 

31 def __init__(self, model, *inputs, output_names=None): 

32 logger.debug("SubOnnx(ONNX, %d in, output_names=%r)", 

33 len(inputs), output_names) 

34 if model is None: 

35 raise ValueError("Model cannot be None.") # pragma: no cover 

36 if len(inputs) > len(model.graph.input): 

37 raise RuntimeError( # pragma: no cover 

38 "Unexpected number of inputs %r > expected %r." % ( 

39 len(inputs), len(model.graph.input))) 

40 if (output_names is not None and 

41 len(output_names) != len(model.graph.output)): 

42 raise RuntimeError( # pragma: no cover 

43 "Unexpected number of outputs %r != expected %r." % ( 

44 len(output_names), len(model.graph.output))) 

45 if len(inputs) == 0: 

46 if hasattr(model, 'graph'): 

47 inputs = [Variable(i.name, i.type.tensor_type) 

48 for i in model.graph.input] 

49 else: 

50 inputs = [Variable(n) for n in model.input] 

51 OnnxOperator.__init__(self, *inputs, output_names=output_names) 

52 if self.output_names is None and self.expected_outputs is None: 

53 if hasattr(model, 'graph'): 

54 self.expected_outputs = [ 

55 (i.name, i.type.tensor_type) 

56 for i in model.graph.output] 

57 else: 

58 self.expected_outputs = [(n, None) for n in model.output] 

59 self.model = model 

60 

61 @property 

62 def input_names(self): 

63 "Returns the input names." 

64 return ([i.name for i in self.model.graph.input] 

65 if hasattr(self.model, 'graph') else list(self.model.input)) 

66 

67 def __repr__(self): 

68 "usual" 

69 atts = {} 

70 for att in ['output_names']: 

71 value = getattr(self, att, None) 

72 if value is not None: 

73 atts[att] = value 

74 atts.update(self.kwargs) 

75 msg = ", ".join("%s=%r" % (k, v) for k, v in atts.items()) 

76 if len(atts) > 0: 

77 msg = ", " + msg 

78 return "%s(...%s)" % ( 

79 self.__class__.__name__, msg) 

80 

81 def add_to(self, builder): 

82 """ 

83 Adds to graph builder. 

84 

85 :param builder: instance of @see cl _GraphBuilder, 

86 it must have a method `add_node` 

87 """ 

88 logger.debug("SubOnnx.add_to(builder)") 

89 inputs = builder.get_input_names(self, self.inputs) 

90 n_outputs = len(self.model.graph.output) 

91 outputs = [builder.get_unique_output_name(NodeResultName(self, i)) 

92 for i in range(n_outputs)] 

93 

94 mapped_names = {} 

95 

96 # adding initializers 

97 for init in self.model.graph.initializer: 

98 new_name = builder.get_unique_name(init.name, reserved=False) 

99 mapped_names[init.name] = new_name 

100 builder.add_initializer(new_name, init) 

101 

102 # linking inputs 

103 for inp, name in zip(self.model.graph.input, inputs): 

104 new_name = builder.get_unique_name(inp.name, reserved=False) 

105 mapped_names[inp.name] = new_name 

106 builder.add_node( 

107 'Identity', builder.get_unique_name( 

108 '_sub_' + name, reserved=False), 

109 [name], [new_name]) 

110 

111 # adding nodes 

112 for node in self.model.graph.node: 

113 new_inputs = [] 

114 for i in node.input: 

115 if i not in mapped_names: 

116 raise RuntimeError( # pragma: no cover 

117 "Unable to find input %r in %r." % (i, mapped_names)) 

118 new_inputs.append(mapped_names[i]) 

119 new_outputs = [] 

120 for o in node.output: 

121 new_name = builder.get_unique_name(o, reserved=False) 

122 mapped_names[o] = new_name 

123 new_outputs.append(new_name) 

124 

125 atts = {} 

126 for att in node.attribute: 

127 atts[att.name] = OnnxOperatorFunction.attribute_to_value(att) 

128 

129 builder.add_node( 

130 node.op_type, 

131 builder.get_unique_name('_sub_' + node.name, reserved=False), 

132 new_inputs, new_outputs, domain=node.domain, **atts) 

133 

134 # linking outputs 

135 for out, name in zip(self.model.graph.output, outputs): 

136 builder.add_node( 

137 'Identity', builder.get_unique_name( 

138 '_sub_' + out.name, reserved=False), 

139 [mapped_names[out.name]], [name]) 

140 

141 def to_onnx_this(self, evaluated_inputs): 

142 """ 

143 Returns the ONNX graph. 

144 

145 :param evaluated_inputs: unused 

146 :return: ONNX graph 

147 """ 

148 return self.model 

149 

150 

151class OnnxSubEstimator(OnnxSubOnnx): 

152 """ 

153 This operator is used to call the converter of a model 

154 to insert the node coming from the conversion into a 

155 bigger ONNX graph. It supports model from :epkg:`scikit-learn` 

156 using :epkg:`sklearn-onnx`. 

157 

158 :param model: model to convert 

159 :param inputs: inputs 

160 :param op_version: targetted opset 

161 :param options: to rewrite the options used to convert the model 

162 :param initial_types: the implementation may be wrong in guessing 

163 the input types of the model, this parameter can be used 

164 to overwrite them, usually a dictionary 

165 `{ input_name: numpy array as an example }` 

166 :param kwargs: any other parameters such as black listed or 

167 white listed operators 

168 """ 

169 

170 since_version = 1 

171 expected_inputs = None 

172 expected_outputs = None 

173 input_range = [1, 1e9] 

174 output_range = [1, 1e9] 

175 op_type = "SubEstimator" 

176 domain = 'mlprodict.xop' 

177 

178 def __init__(self, model, *inputs, op_version=None, 

179 output_names=None, options=None, 

180 initial_types=None, **kwargs): 

181 logger.debug("OnnxSubEstimator(%r, %r, op_version=%r, " 

182 "output_names=%r, initial_types=%r, options=%r, " 

183 "kwargs=%r)", type(model), inputs, op_version, 

184 output_names, initial_types, options, kwargs) 

185 if model is None: 

186 raise ValueError("Model cannot be None.") # pragma: no cover 

187 onx = OnnxSubEstimator._to_onnx( 

188 model, inputs, op_version=op_version, options=options, 

189 initial_types=initial_types, **kwargs) 

190 OnnxSubOnnx.__init__( 

191 self, onx, *inputs, output_names=output_names) 

192 self.ml_model = model 

193 self.options = options 

194 self.initial_types = initial_types 

195 self.op_version = op_version 

196 

197 def __repr__(self): 

198 "usual" 

199 atts = {} 

200 for att in ['op_version', 'output_names', 'options', 

201 'initial_types']: 

202 value = getattr(self, att, None) 

203 if value is not None: 

204 atts[att] = value 

205 atts.update(self.kwargs) 

206 msg = ", ".join("%s=%r" % (k, v) for k, v in atts.items()) 

207 if len(atts) > 0: 

208 msg = ", " + msg 

209 return "%s(%r%s)" % ( 

210 self.__class__.__name__, self.ml_model, msg) 

211 

212 @staticmethod 

213 def _to_onnx(model, inputs, op_version=None, options=None, 

214 initial_types=None, **kwargs): 

215 """ 

216 Converts a model into ONNX and inserts it into an ONNX graph. 

217 

218 :param model: a trained machine learned model 

219 :param inputs: inputs 

220 :param op_version: opset versions or None to use the latest one 

221 :param options: options to change the behaviour of the converter 

222 :param kwargs: additional parameters such as black listed or while listed 

223 operators 

224 :return: ONNX model 

225 

226 The method currently supports models trained with 

227 :epkg:`scikit-learn`, :epkg:`xgboost`, :epkg`:lightgbm`. 

228 """ 

229 from sklearn.base import BaseEstimator 

230 

231 if isinstance(model, BaseEstimator): 

232 logger.debug("OnnxSubEstimator._to_onnx(%r, %r, op_version=%r " 

233 "options=%r, initial_types=%r, kwargs=%r)", 

234 type(model), inputs, op_version, options, 

235 initial_types, kwargs) 

236 return OnnxSubEstimator._to_onnx_sklearn( 

237 model, inputs, op_version=op_version, options=options, 

238 initial_types=initial_types, **kwargs) 

239 raise RuntimeError( # pragma: no cover 

240 "Unable to convert into ONNX model type %r." % type(model)) 

241 

242 @staticmethod 

243 def _to_onnx_sklearn(model, inputs, op_version=None, options=None, 

244 initial_types=None, **kwargs): 

245 """ 

246 Converts a :epkg:`scikit-learn` model into ONNX 

247 and inserts it into an ONNX graph. The library relies on 

248 function @see fn to_onnx and library :epkg:`skearn-onnx`. 

249 

250 :param model: a trained machine learned model 

251 :param inputs: inputs 

252 :param op_version: opset versions or None to use the latest one 

253 :param initial_types: if None, the input types are guessed from the 

254 inputs. The function converts into ONNX the previous 

255 node of the graph and tries to infer the initial_types 

256 with the little informations it has. It may not work. 

257 It is recommended to specify this parameter. 

258 :param options: options to change the behaviour of the converter 

259 :param kwargs: additional parameters such as black listed or while listed 

260 operators 

261 :return: ONNX model 

262 

263 Default options is `{'zipmap': False}` for a classifier. 

264 """ 

265 from ..onnx_conv.convert import to_onnx 

266 if options is None: 

267 from sklearn.base import ClassifierMixin 

268 if isinstance(model, ClassifierMixin): 

269 options = {'zipmap': False} 

270 if initial_types is None: 

271 # adding more information 

272 from skl2onnx.common.data_types import _guess_numpy_type # delayed 

273 for i, n in enumerate(inputs): 

274 if not isinstance(n, Variable): 

275 raise NotImplementedError( 

276 "Inpput %d is not a variable but %r." % (i, type(n))) 

277 initial_types = [(n.name, _guess_numpy_type(n.dtype, n.shape)) 

278 for n in inputs] 

279 

280 logger.debug("OnnxSubEstimator._to_onnx_sklearn(%r, %r, " 

281 "op_version=%r, options=%r, initial_types=%r, " 

282 "kwargs=%r)", 

283 type(model), inputs, op_version, options, 

284 initial_types, kwargs) 

285 

286 if isinstance(initial_types, numpy.ndarray): 

287 if len(inputs) != 1: 

288 raise RuntimeError( # pragma: no cover 

289 "The model has %s inputs but only %d input are " 

290 "described in 'initial_types'." % ( 

291 len(inputs), 1)) 

292 X = initial_types 

293 initial_types = None 

294 elif len(inputs) != len(initial_types): 

295 raise RuntimeError( # pragma: no cover 

296 "The model has %s inputs but only %d input are " 

297 "described in 'initial_types'." % ( 

298 len(inputs), len(initial_types))) 

299 else: 

300 X = None 

301 

302 onx = to_onnx(model, X, initial_types=initial_types, options=options, 

303 rewrite_ops=True, target_opset=op_version, **kwargs) 

304 return onx