Coverage for onnxcustom/utils/orttraining_helper.py: 99%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

242 statements  

1# pylint: disable=C0415,E1101 

2""" 

3@file 

4@brief ONNX manipulations to help build ONNX gradient graphs. 

5""" 

6from collections import OrderedDict 

7import numpy 

8from onnx.mapping import TENSOR_TYPE_TO_NP_TYPE 

9from onnx.numpy_helper import to_array, from_array 

10from onnx.helper import ( 

11 make_node, make_graph, make_model, make_tensor_value_info, 

12 set_model_props) 

13from onnx import TensorProto 

14 

15 

16def _unique_name(existing_names, name): 

17 """ 

18 Returns a name different from any name in *existing_names*. 

19 

20 :param existing_names: set of names 

21 :param name: current 

22 :return: unique name 

23 """ 

24 if name not in existing_names: 

25 existing_names.add(name) 

26 return name 

27 name0 = name 

28 i = 2 

29 while name in existing_names: 

30 name = "%s_%d" % (name0, i) 

31 i += 1 

32 existing_names.add(name) 

33 return name 

34 

35 

36def _loss_l1(existing_names, elem, shape, 

37 output_name, label_name, 

38 weight_name, loss_name): 

39 """ 

40 Implements loss l1. 

41 """ 

42 diff_name = _unique_name(existing_names, "loss_diff") 

43 diff2_name = _unique_name(existing_names, "loss_diff") 

44 nodes = [make_node('Sub', [output_name, label_name], [diff_name]), 

45 make_node('Abs', [diff_name], [diff2_name])] 

46 if weight_name is not None: 

47 res_name = _unique_name(existing_names, "loss_diff_weight") 

48 nodes.append( 

49 make_node('Mul', [diff2_name, weight_name], [res_name])) 

50 else: 

51 res_name = diff2_name 

52 nodes.append(make_node('ReduceSum', [res_name], [loss_name])) 

53 

54 inputs = [make_tensor_value_info(label_name, elem, shape)] 

55 if weight_name is not None: 

56 inputs.append( 

57 make_tensor_value_info(weight_name, elem, [shape[0]])) 

58 return ( 

59 [], inputs, nodes, 

60 [make_tensor_value_info(loss_name, elem, [1, 1])]) 

61 

62 

63def _loss_l2(existing_names, elem, shape, 

64 output_name, label_name, 

65 weight_name, loss_name): 

66 """ 

67 Implements loss l2. 

68 """ 

69 diff_name = _unique_name(existing_names, "loss_diff") 

70 diff2_name = _unique_name(existing_names, "loss_diff") 

71 nodes = [make_node('Sub', [output_name, label_name], [diff_name]), 

72 make_node('Mul', [diff_name, diff_name], [diff2_name])] 

73 if weight_name is not None: 

74 res_name = _unique_name(existing_names, "loss_diff_weight") 

75 nodes.append( 

76 make_node('Mul', [diff2_name, weight_name], [res_name])) 

77 else: 

78 res_name = diff2_name 

79 nodes.append(make_node('ReduceSum', [res_name], [loss_name])) 

80 

81 inputs = [make_tensor_value_info(label_name, elem, shape)] 

82 if weight_name is not None: 

83 inputs.append( 

84 make_tensor_value_info(weight_name, elem, [shape[0]])) 

85 return ( 

86 [], inputs, nodes, 

87 [make_tensor_value_info(loss_name, elem, [1, 1])]) 

88 

89 

90def _loss_elastic(existing_names, elem, shape, 

91 output_name, label_name, 

92 weight_name, loss_name, 

93 l1_weight=0.5, l2_weight=0.5): 

94 """ 

95 Implements mixture of losses l1 and l2. 

96 """ 

97 l1_name = _unique_name(existing_names, "l1_name") 

98 l2_name = _unique_name(existing_names, "l2_name") 

99 dtype = TENSOR_TYPE_TO_NP_TYPE[elem] 

100 onx_l1_weight = from_array( 

101 numpy.array([l1_weight], dtype=dtype), name=l1_name) 

102 onx_l2_weight = from_array( 

103 numpy.array([l2_weight], dtype=dtype), name=l2_name) 

104 inits = [onx_l1_weight, onx_l2_weight] 

105 

106 diff_name = _unique_name(existing_names, "loss_diff") 

107 diff1_name = _unique_name(existing_names, "loss_l1") 

108 diff2_name = _unique_name(existing_names, "loss_l2") 

109 wl1_name = _unique_name(existing_names, "loss_l1") 

110 wl2_name = _unique_name(existing_names, "loss_l2") 

111 final_loss = _unique_name(existing_names, "final_loss") 

112 nodes = [make_node('Sub', [output_name, label_name], [diff_name]), 

113 make_node('Mul', [diff_name, diff_name], [diff2_name]), 

114 make_node('Abs', [diff_name], [diff1_name]), 

115 make_node('Mul', [diff1_name, l1_name], [wl1_name]), 

116 make_node('Mul', [diff2_name, l2_name], [wl2_name]), 

117 make_node('Add', [wl1_name, wl2_name], [final_loss]), 

118 ] 

119 if weight_name is not None: 

120 res_name = _unique_name(existing_names, "loss_diff_weight") 

121 nodes.append( 

122 make_node('Mul', [final_loss, weight_name], [res_name])) 

123 else: 

124 res_name = final_loss 

125 nodes.append(make_node('ReduceSum', [res_name], [loss_name])) 

126 

127 inputs = [make_tensor_value_info(label_name, elem, shape)] 

128 if weight_name is not None: 

129 inputs.append( 

130 make_tensor_value_info(weight_name, elem, [shape[0]])) 

131 return ( 

132 inits, inputs, nodes, 

133 [make_tensor_value_info(loss_name, elem, [1, 1])]) 

134 

135 

136def _loss_log(existing_names, elem, shape, 

137 output_name, label_name, 

138 weight_name, loss_name, 

139 eps=1e-6): 

140 """ 

141 This only works for a binary classification. 

142 The log loss is `'log(yt, yp) = (1-yt)\\log(1-yp) - yt\\log(yp)`, 

143 this only works for a binary classification where *yp* is the 

144 predicted probability, *yt* is the expected probability. 

145 *yt* is expected to be binary, *yp* is a matrix with two 

146 columns, the sum on every line is 1. 

147 Parameter *eps* is used to avoid computing *log(0)*. 

148 """ 

149 if output_name == 'output_label': 

150 raise RuntimeError( # pragma: no cover 

151 "output_name=%r, log loss does not work on labels." 

152 "" % output_name) 

153 dtype = TENSOR_TYPE_TO_NP_TYPE[elem] 

154 one_name = _unique_name(existing_names, "one_name") 

155 eps_name = _unique_name(existing_names, "eps_name") 

156 eps1_name = _unique_name(existing_names, "eps1_name") 

157 axes_name = _unique_name(existing_names, "axes_name") 

158 

159 eps_init = from_array(numpy.array([eps], dtype=dtype), name=eps_name) 

160 one_init = from_array(numpy.array([1], dtype=dtype), name=one_name) 

161 eps1_init = from_array( 

162 numpy.array([1 - eps], dtype=dtype), name=eps1_name) 

163 axes_init = from_array( 

164 numpy.array([1], dtype=numpy.int64), name=axes_name) 

165 

166 clip_name = _unique_name(existing_names, "clip_name") 

167 clip_red_name = _unique_name(existing_names, "clip_red_name") 

168 new_output_name = _unique_name(existing_names, "new_output_name") 

169 cast_name = _unique_name(existing_names, "cast_name") 

170 log_name = _unique_name(existing_names, "log_name") 

171 subl_name = _unique_name(existing_names, "subl_name") 

172 conc_name = _unique_name(existing_names, "conc_name") 

173 mul_name = _unique_name(existing_names, "mul_name") 

174 like_name = _unique_name(existing_names, "like_name") 

175 

176 nodes = [ 

177 make_node( 

178 'Clip', [output_name, eps_name, eps1_name], [clip_name]), 

179 make_node( 

180 'ReduceSum', [clip_name, axes_name], [clip_red_name], keepdims=1), 

181 make_node('Div', [clip_name, clip_red_name], [new_output_name]), 

182 make_node('Log', [new_output_name], [log_name]), 

183 make_node('Cast', [label_name], [cast_name], to=elem), 

184 make_node('Sub', [one_name, cast_name], [subl_name]), 

185 make_node('Concat', [subl_name, cast_name], [conc_name], axis=1), 

186 make_node('Mul', [log_name, conc_name], [mul_name]), 

187 make_node( 

188 'ReduceSum', [mul_name, axes_name], [like_name], keepdims=1)] 

189 

190 inputs = [make_tensor_value_info(label_name, TensorProto.INT64, shape)] 

191 

192 if weight_name is not None: 

193 inputs.append( 

194 make_tensor_value_info(weight_name, elem, [shape[0]])) 

195 likew_name = _unique_name(existing_names, "likew_name") 

196 nodes.append( 

197 make_node('Mul', [like_name, weight_name], [likew_name])) 

198 like_name = likew_name 

199 

200 shape_name = _unique_name(existing_names, "shape_name") 

201 onx_shape = from_array( 

202 numpy.array([1, 1], dtype=numpy.int64), name=shape_name) 

203 reduced_loss = _unique_name(existing_names, "reduced_loss") 

204 neg_reduced_loss = _unique_name(existing_names, "neg_reduced_loss") 

205 nodes.extend([ 

206 make_node('ReduceMean', [like_name], [reduced_loss]), 

207 make_node('Neg', [reduced_loss], [neg_reduced_loss]), 

208 make_node('Reshape', [neg_reduced_loss, shape_name], [loss_name])]) 

209 

210 return ( 

211 [eps_init, eps1_init, one_init, axes_init, onx_shape], 

212 inputs, nodes, [make_tensor_value_info(loss_name, elem, [1, 1])]) 

213 

214 

215def penalty_loss_onnx(name, dtype, l1=None, l2=None, existing_names=None): 

216 """ 

217 Returns onnx nodes to compute 

218 :math:`|w| \\alpha + w^2 \\beta` 

219 where :math:`\\alpha=l1` and :math:`\\beta=l2`. 

220 

221 :param name: name of weights 

222 :param dtype: numpy dtype 

223 :param l1: coefficient for L1 norm 

224 :param l2: coefficient for L2 norm 

225 :param existing_names: names already taken in the ONNX graph 

226 :return: initializer, nodes 

227 """ 

228 suffix = name 

229 cst_shape = _unique_name(existing_names, "shape_%s" % suffix) 

230 new_name = _unique_name(existing_names, "reshaped_%s" % suffix) 

231 inits = [from_array( 

232 numpy.array([-1], dtype=numpy.int64), name=cst_shape)] 

233 nodes = [make_node('Reshape', [name, cst_shape], [new_name])] 

234 name = new_name 

235 

236 if l1 is None or l1 == 0: 

237 if l2 is None or l2 == 0: 

238 raise ValueError( # pragma: no cover 

239 "l1 and l2 cannot be null or None at the same time, " 

240 "name=%r." % name) 

241 l2_name = _unique_name(existing_names, "l2_weight_%s" % suffix) 

242 inits.extend([from_array( 

243 numpy.array([l2], dtype=dtype), name=l2_name)]) 

244 mul_name = _unique_name(existing_names, "reduced0_%s" % suffix) 

245 red_name = _unique_name(existing_names, "reduced_%s" % suffix) 

246 pen_name = _unique_name(existing_names, "penalty_%s" % suffix) 

247 nodes.extend([ 

248 make_node('Mul', [name, name], [mul_name]), 

249 make_node('ReduceSum', [mul_name], [red_name]), 

250 make_node('Mul', [red_name, l2_name], [pen_name])]) 

251 return inits, nodes 

252 

253 if l2 is None or l2 == 0: 

254 l1_name = _unique_name(existing_names, "l1_weight_%s" % suffix) 

255 inits.extend([from_array( 

256 numpy.array([l1], dtype=dtype), name=l1_name)]) 

257 red_name = _unique_name(existing_names, "reduced_%s" % suffix) 

258 abs_name = _unique_name(existing_names, "absolute_%s" % suffix) 

259 pen_name = _unique_name(existing_names, "penalty_%s" % suffix) 

260 nodes.extend([ 

261 make_node('Abs', [name], [abs_name]), 

262 make_node('ReduceSum', [abs_name], [red_name]), 

263 make_node('Mul', [red_name, l1_name], [pen_name])]) 

264 return inits, nodes 

265 

266 l1_name = _unique_name(existing_names, "l1_weight_%s" % suffix) 

267 l2_name = _unique_name(existing_names, "l2_weight_%s" % suffix) 

268 inits.extend([ 

269 from_array(numpy.array([l1], dtype=dtype), name=l1_name), 

270 from_array(numpy.array([l2], dtype=dtype), name=l2_name)]) 

271 

272 red_name1 = _unique_name(existing_names, "reduced1_%s" % suffix) 

273 mul_name = _unique_name(existing_names, "reducedm_%s" % suffix) 

274 red_name2 = _unique_name(existing_names, "reduced2_%s" % suffix) 

275 abs_name = _unique_name(existing_names, "absolute_%s" % suffix) 

276 pen_name1 = _unique_name(existing_names, "penalty1_%s" % suffix) 

277 pen_name2 = _unique_name(existing_names, "penalty2_%s" % suffix) 

278 pen_name = _unique_name(existing_names, "penalty_%s" % suffix) 

279 nodes.extend([ 

280 make_node('Mul', [name, name], [mul_name]), 

281 make_node('ReduceSum', [mul_name], [red_name2]), 

282 make_node('Mul', [red_name2, l2_name], [pen_name2]), 

283 make_node('Abs', [name], [abs_name]), 

284 make_node('ReduceSum', [abs_name], [red_name1]), 

285 make_node('Mul', [red_name1, l1_name], [pen_name1]), 

286 make_node('Add', [pen_name1, pen_name2], [pen_name])]) 

287 

288 return inits, nodes 

289 

290 

291def get_train_initializer(onx): 

292 """ 

293 Returns the list of initializers to train. 

294 

295 :return: dictionary `{name: (value, tensor)}` 

296 

297 The function walk through the list of initializers and 

298 returns all tensors with elements from types float or double. 

299 """ 

300 res = OrderedDict() 

301 for init in onx.graph.initializer: 

302 if init.data_type in ( 

303 TensorProto.FLOAT16, # pylint: disable=E1101 

304 TensorProto.FLOAT, # pylint: disable=E1101 

305 TensorProto.DOUBLE): # pylint: disable=E1101 

306 res[init.name] = (to_array(init), init) 

307 return res 

308 

309 

310def _rewrite_op_no_grad(onx): 

311 """ 

312 Rewrites operators with no gradient. 

313 """ 

314 set_types = set(n.op_type for n in onx.graph.node) 

315 if "Reciprocal" in set_types: 

316 from skl2onnx.algebra.onnx_ops import OnnxDiv # pylint: disable=E0611 

317 from skl2onnx.common.data_types import FloatTensorType 

318 from .onnx_rewriter import onnx_rewrite_operator 

319 

320 opset = None 

321 for op in onx.opset_import: 

322 if op.domain in ('', 'ai.onnx'): 

323 opset = op.version 

324 if opset is None: # pragma: no cover 

325 from .. import get_max_opset 

326 opset = get_max_opset() 

327 

328 node = OnnxDiv(numpy.array([1], dtype=numpy.float32), 

329 'X', output_names=['Y'], 

330 op_version=opset) 

331 rewrite_onx = node.to_onnx( 

332 inputs={'X': FloatTensorType()}, 

333 outputs={'Y': FloatTensorType()}, 

334 target_opset=opset) 

335 onx = onnx_rewrite_operator(onx, 'Reciprocal', rewrite_onx) 

336 

337 return onx 

338 

339 

340def add_loss_output(onx, score_name='squared_error', 

341 loss_name='loss', label_name='label', 

342 weight_name=None, penalty=None, 

343 output_index=None, **kwargs): 

344 """ 

345 Modifies an ONNX graph to add operators to score and allow training. 

346 

347 :param onx: onx graph 

348 :param score_name: name of the score 

349 :param loss_name: name of the output loss 

350 :param label_name: name of the label input 

351 :param weight_name: None or any value to consider weight 

352 while computing loss 

353 :param penalty: dictionary similar to the 

354 following one `{ weight_name: {'l1': alpha, 'l2': beta} }` 

355 or `{ weight_name: beta}`, 

356 it adds a L1 and/or L2 penalty to one input or initializer, 

357 penalty = :math:`|w| \\alpha + w^2 \\beta` 

358 :param output_index: the output used to compute the loss, 

359 if None, the function assumes there is only one output, 

360 it must be specified if there are more than 1, 

361 it can be an integer or a string (output name) 

362 :param kwargs: additional arguments for losses (see below) 

363 :return: modified graph 

364 

365 Possible values for *score_name*: 

366 

367 * `'squared_error'` or `'l2`': :math:`\\sum_i{(f(x_i)-y_i)^2}` or 

368 :math:`\\sum_i{w_i (f(x_i)-y_i)^2}` if *weight_name* 

369 is not None 

370 * `'absolute_error'` or `'l1`': :math:`\\sum_i{|f(x_i)-y_i|}` or 

371 :math:`\\sum_i{w_i |f(x_i)-y_i|}` if *weight_name* 

372 is not None 

373 * `'elastic'`: mixture of losses, kwargs must define 

374 *l1_weight* and *l2_weight*, undefined, default value are 0.5 

375 * `'log(yt, yp)'`: log loss :math:`(1-yt)\\log(1-yp) - yt\\log(yp)`, 

376 this only works for a binary classification where *yp* is the 

377 predicted probability, *yt* is the expected probability. 

378 *yt* is expected to be binary, *yp* is a matrix with two 

379 columns, the sum on every line is 1. 

380 

381 See example :ref:`l-orttraining-nn-gpu`. 

382 Next example shows the loss with L1 and L2 loss. 

383 

384 .. gdot:: 

385 :script: DOT-SECTION 

386 

387 import numpy 

388 from sklearn.datasets import make_regression 

389 from sklearn.model_selection import train_test_split 

390 from sklearn.linear_model import LinearRegression 

391 from mlprodict.onnx_conv import to_onnx 

392 from mlprodict.onnxrt import OnnxInference 

393 from onnxcustom import __max_supported_opset__ as opset 

394 from onnxcustom.utils.orttraining_helper import add_loss_output 

395 from onnxcustom.training.optimizers import OrtGradientOptimizer 

396 

397 X, y = make_regression( # pylint: disable=W0632 

398 100, n_features=10, bias=2, random_state=0) 

399 X = X.astype(numpy.float32) 

400 y = y.astype(numpy.float32) 

401 w = (numpy.random.rand(y.shape[0]) + 1).astype(X.dtype) 

402 X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w) 

403 reg = LinearRegression() 

404 reg.fit(X_train, y_train, sample_weight=w_train) 

405 reg.coef_ = reg.coef_.reshape((1, -1)) 

406 onx = to_onnx(reg, X_train, target_opset=opset, 

407 black_op={'LinearRegressor'}) 

408 

409 onx_loss = add_loss_output( 

410 onx, weight_name='weight', score_name='elastic', 

411 l1_weight=0.1, l2_weight=0.9) 

412 

413 print("DOT-SECTION", OnnxInference(onx_loss).to_dot()) 

414 

415 Next example shows how to add a L2 loss with L1 and L2 penalties 

416 on the coefficients. 

417 

418 .. gdot:: 

419 :script: DOT-SECTION 

420 

421 import numpy 

422 from sklearn.datasets import make_regression 

423 from sklearn.model_selection import train_test_split 

424 from sklearn.linear_model import LinearRegression 

425 from mlprodict.onnx_conv import to_onnx 

426 from mlprodict.onnxrt import OnnxInference 

427 from onnxcustom import __max_supported_opset__ as opset 

428 from onnxcustom.utils.orttraining_helper import add_loss_output 

429 from onnxcustom.training.optimizers import OrtGradientOptimizer 

430 

431 X, y = make_regression( # pylint: disable=W0632 

432 100, n_features=10, bias=2, random_state=0) 

433 X = X.astype(numpy.float32) 

434 y = y.astype(numpy.float32) 

435 w = (numpy.random.rand(y.shape[0]) + 1).astype(X.dtype) 

436 X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w) 

437 reg = LinearRegression() 

438 reg.fit(X_train, y_train, sample_weight=w_train) 

439 reg.coef_ = reg.coef_.reshape((1, -1)) 

440 onx = to_onnx(reg, X_train, target_opset=opset, 

441 black_op={'LinearRegressor'}) 

442 

443 onx_loss = add_loss_output( 

444 onx, weight_name='weight', score_name='elastic', 

445 penalty={'coef': {'l1': 0.5, 'l2':0.5}, 

446 'intercept': {'l1': 0.5, 'l2':0.5}}) 

447 

448 print("DOT-SECTION", OnnxInference(onx_loss).to_dot()) 

449 """ 

450 from mlprodict.onnx_tools.optim import onnx_remove_node_unused 

451 

452 # rename every intermediate output call label 

453 def _replace(ens): 

454 for i in range(len(ens)): # pylint: disable=C0200 

455 if ens[i] == 'label': 

456 ens[i] = '_label_' 

457 

458 for node in onx.graph.node: 

459 if "_label_" in node.input or "_label_" in node.output: 

460 raise RuntimeError( # pragma: no cover 

461 "One intermediate result contains '_label_'. " 

462 "It should be removed manually.\n%r" % node) 

463 _replace(node.input) 

464 _replace(node.output) 

465 

466 if output_index is None: 

467 if len(onx.graph.output) != 1: 

468 raise ValueError( # pragma: no cover 

469 "Unable to guess the output to compare to the " 

470 "expacted labels among %r." % ( 

471 [o.name for o in onx.graph.output])) 

472 outputs = onx.graph.output 

473 output_index = 0 

474 elif isinstance(output_index, int): 

475 outputs = [onx.graph.output[output_index]] 

476 elif isinstance(output_index, str): 

477 outputs = [(i, o) for i, o in enumerate(onx.graph.output) 

478 if o.name == output_index] 

479 if len(outputs) != 1: 

480 raise ValueError( # pragma: no cover 

481 "Unable to find output %r in %r." % ( 

482 output_index, [o.name for o in onx.graph.output])) 

483 output_index = outputs[0][0] 

484 outputs = [outputs[0][1]] 

485 else: 

486 raise TypeError( # pragma: no cover 

487 "output_index must be an integer or a str not %r." 

488 "" % type(output_index)) 

489 

490 existing_names = [] 

491 for node in onx.graph.node: 

492 existing_names.extend(node.output) 

493 existing_names.extend(node.input) 

494 existing_names = set(existing_names) 

495 

496 output_onx = onx.graph.output[output_index] 

497 output_name = output_onx.name 

498 elem = output_onx.type.tensor_type.elem_type 

499 if elem == 0: 

500 raise TypeError( # pragma: no cover 

501 "Unable to guess input tensor type from %r." 

502 "" % output_onx) 

503 shape = [] 

504 for d in output_onx.type.tensor_type.shape.dim: 

505 shape.append(d.dim_value if d.dim_value > 0 else None) 

506 

507 if score_name in ('squared_error', 'l2'): 

508 inits, inputs, nodes, outputs = _loss_l2( 

509 existing_names, elem, shape, output_name, label_name, 

510 weight_name, loss_name) 

511 elif score_name in ('absolute_error', 'l1'): 

512 inits, inputs, nodes, outputs = _loss_l1( 

513 existing_names, elem, shape, output_name, label_name, 

514 weight_name, loss_name) 

515 elif score_name == 'elastic': 

516 inits, inputs, nodes, outputs = _loss_elastic( 

517 existing_names, elem, shape, output_name, label_name, 

518 weight_name, loss_name, **kwargs) 

519 elif score_name == 'log': 

520 shape = (None, 1) 

521 inits, inputs, nodes, outputs = _loss_log( 

522 existing_names, elem, shape, output_name, label_name, 

523 weight_name, loss_name, **kwargs) 

524 else: 

525 raise NotImplementedError( # pragma: no cover 

526 "Unexpected %r value for score_name." % score_name) 

527 

528 if penalty is not None: 

529 final_name = nodes[-1].output[0] 

530 loss_name = _unique_name(existing_names, "loss_diff") 

531 nodes[-1].output[0] = loss_name 

532 names = [] 

533 for k, v in penalty.items(): 

534 if isinstance(v, float): 

535 v = {'l2': v} 

536 inits_to_add, nodes_to_add = penalty_loss_onnx( 

537 k, dtype=TENSOR_TYPE_TO_NP_TYPE[elem], 

538 existing_names=existing_names, **v) 

539 names.append(nodes_to_add[-1].output[0]) 

540 nodes.extend(nodes_to_add) 

541 inits.extend(inits_to_add) 

542 # Operator Sum does not have a gradient. 

543 if len(names) == 1: 

544 pen_name = names[0] 

545 else: 

546 current = names[0] 

547 for i in range(1, len(names)): 

548 new_name = _unique_name(existing_names, "sumop") 

549 nodes.append( 

550 make_node('Add', [current, names[i]], [new_name])) 

551 current = new_name 

552 pen_name = current 

553 

554 cst_shape = _unique_name(existing_names, "shapevect") 

555 inits.append(from_array( 

556 numpy.array([-1, 1], dtype=numpy.int64), name=cst_shape)) 

557 loss_reshape = _unique_name(existing_names, "loss_reshape") 

558 pen_reshape = _unique_name(existing_names, "penalty_reshape") 

559 nodes.extend([ 

560 make_node("Reshape", [pen_name, cst_shape], [pen_reshape]), 

561 make_node("Reshape", [loss_name, cst_shape], [loss_reshape])]) 

562 

563 nodes.append( 

564 make_node('Add', [pen_reshape, loss_reshape], [final_name])) 

565 

566 inits = list(onx.graph.initializer) + inits 

567 graph = make_graph( 

568 list(onx.graph.node) + nodes, 

569 onx.graph.name, 

570 list(onx.graph.input) + inputs, 

571 outputs + [onx.graph.output[output_index]], 

572 inits) 

573 onnx_model = make_model(graph) 

574 onnx_model.ir_version = onx.ir_version 

575 onnx_model.producer_name = onx.producer_name 

576 onnx_model.producer_version = onx.producer_version 

577 onnx_model.domain = onx.domain 

578 onnx_model.model_version = onx.model_version 

579 onnx_model.doc_string = onx.doc_string 

580 if len(onx.metadata_props) > 0: 

581 values = {p.key: p.value for p in onx.metadata_props} 

582 set_model_props(onnx_model, values) 

583 

584 # fix opset import 

585 del onnx_model.opset_import[:] # pylint: disable=E1101 

586 for oimp in onx.opset_import: 

587 op_set = onnx_model.opset_import.add() # pylint: disable=E1101 

588 op_set.domain = oimp.domain 

589 op_set.version = oimp.version 

590 return _rewrite_op_no_grad(onnx_remove_node_unused(onnx_model))