Coverage for mlprodict/onnxrt/validate/validate_latency.py: 88%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

95 statements  

1""" 

2@file 

3@brief Command line about validation of prediction runtime. 

4""" 

5import os 

6from collections import OrderedDict 

7import json 

8import numpy 

9from onnx import TensorProto 

10from pandas import DataFrame 

11from .. import OnnxInference 

12from ..ops_whole.session import OnnxWholeSession 

13 

14 

15def _random_input(typ, shape, batch): 

16 if typ in ('tensor(double)', TensorProto.DOUBLE): # pylint: disable=E1101 

17 dtype = numpy.float64 

18 elif typ in ('tensor(float)', TensorProto.FLOAT): # pylint: disable=E1101 

19 dtype = numpy.float32 

20 else: 

21 raise NotImplementedError( 

22 "Unable to guess dtype from %r." % typ) 

23 

24 if len(shape) <= 1: 

25 new_shape = shape 

26 elif shape[0] in (None, 0): 

27 new_shape = tuple([batch] + list(shape[1:])) 

28 else: 

29 new_shape = shape 

30 return numpy.random.randn(*new_shape).astype(dtype) 

31 

32 

33def random_feed(inputs, batch=10, empty_dimension=1): 

34 """ 

35 Creates a dictionary of random inputs. 

36 

37 :param batch: dimension to use as batch dimension if unknown 

38 :param empty_dimension: if a dimension is null, replaces it by this value 

39 :return: dictionary 

40 """ 

41 res = OrderedDict() 

42 for inp in inputs: 

43 name = inp.name 

44 if hasattr(inp.type, 'tensor_type'): 

45 typ = inp.type.tensor_type.elem_type 

46 shape = tuple(getattr(d, 'dim_value', batch) 

47 for d in inp.type.tensor_type.shape.dim) 

48 shape = (shape[0], ) + tuple( 

49 b if b > 0 else empty_dimension for b in shape[1:]) 

50 else: 

51 typ = inp.type 

52 shape = inp.shape 

53 res[name] = _random_input(typ, shape, batch) 

54 return res 

55 

56 

57def latency(model, law='normal', size=1, number=10, repeat=10, max_time=0, 

58 runtime="onnxruntime", device='cpu', profiling=None): 

59 """ 

60 Measures the latency of a model (python API). 

61 

62 :param model: ONNX graph 

63 :param law: random law used to generate fake inputs 

64 :param size: batch size, it replaces the first dimension 

65 of every input if it is left unknown 

66 :param number: number of calls to measure 

67 :param repeat: number of times to repeat the experiment 

68 :param max_time: if it is > 0, it runs as many time during 

69 that period of time 

70 :param runtime: available runtime 

71 :param device: device, `cpu`, `cuda:0` 

72 :param profiling: if True, profile the execution of every 

73 node, if can be sorted by name or type, 

74 the value for this parameter should e in `(None, 'name', 'type')`, 

75 :return: dictionary or a tuple (dictionary, dataframe) 

76 if the profiling is enable 

77 

78 .. cmdref:: 

79 :title: Measures model latency 

80 :cmd: -m mlprodict latency --help 

81 :lid: l-cmd-latency 

82 

83 The command generates random inputs and call many times the 

84 model on these inputs. It returns the processing time for one 

85 iteration. 

86 

87 Example:: 

88 

89 python -m mlprodict latency --model "model.onnx" 

90 """ 

91 from cpyquickhelper.numbers import measure_time # delayed import 

92 

93 if isinstance(model, str) and not os.path.exists(model): 

94 raise FileNotFoundError( # pragma: no cover 

95 "Unable to find model %r." % model) 

96 if profiling not in (None, '', 'name', 'type'): 

97 raise ValueError( 

98 "Unexpected value for profiling: %r." % profiling) 

99 size = int(size) 

100 number = int(number) 

101 repeat = int(repeat) 

102 if max_time in (None, 0, ""): 

103 max_time = None 

104 else: 

105 max_time = float(max_time) 

106 if max_time <= 0: 

107 max_time = None 

108 

109 if law != "normal": 

110 raise ValueError( 

111 "Only law='normal' is supported, not %r." % law) 

112 

113 if device in ('cpu', 'CPUExecutionProviders'): 

114 providers = ['CPUExecutionProviders'] 

115 elif device in ('cuda:0', 'CUDAExecutionProviders'): 

116 if runtime != 'onnxruntime': 

117 raise NotImplementedError( # pragma: no cover 

118 "Only runtime 'onnxruntime' supports this device or provider " 

119 "%r." % device) 

120 providers = ['CUDAExecutionProviders'] 

121 elif ',' in device: 

122 from onnxruntime import get_all_providers # delayed import 

123 if runtime != 'onnxruntime': 

124 raise NotImplementedError( # pragma: no cover 

125 "Only runtime 'onnxruntime' supports this device or provider " 

126 "%r." % device) 

127 providers = device.split(',') 

128 allp = set(get_all_providers()) 

129 for p in providers: 

130 if p not in allp: 

131 raise ValueError( 

132 "One device or provider %r is not supported among %r." 

133 "" % (p, allp)) 

134 else: 

135 raise ValueError( # pragma no cover 

136 "Device %r not supported." % device) 

137 

138 if runtime in ("onnxruntime", "onnxruntime-cuda"): 

139 from onnxruntime import InferenceSession, SessionOptions # delayed import 

140 providers = ['CPUExecutionProvider'] 

141 if runtime == "onnxruntime-cuda": 

142 providers = ['CUDAExecutionProvider'] + providers 

143 if profiling in ('name', 'type'): 

144 so = SessionOptions() 

145 so.enable_profiling = True 

146 sess = InferenceSession( 

147 model, sess_options=so, providers=providers) 

148 else: 

149 sess = InferenceSession(model, providers=providers) 

150 fct = lambda feeds: sess.run(None, feeds) 

151 inputs = sess.get_inputs() 

152 else: 

153 if profiling in ('name', 'type'): 

154 runtime_options = {"enable_profiling": True} 

155 if runtime != 'onnxruntime1': 

156 raise NotImplementedError( # pragma: no cover 

157 "Profiling is not implemented for runtime=%r." % runtime) 

158 else: 

159 runtime_options = None 

160 oinf = OnnxInference(model, runtime=runtime, 

161 runtime_options=runtime_options) 

162 fct = lambda feeds: oinf.run(feeds) 

163 inputs = oinf.obj.graph.input 

164 

165 feeds = random_feed(inputs, size) 

166 res = measure_time( 

167 lambda: fct(feeds), number=number, repeat=repeat, context={}, 

168 max_time=max_time, div_by_number=True) 

169 for k, v in feeds.items(): 

170 res["shape(%s)" % k] = "x".join(map(str, v.shape)) 

171 if profiling in ('name', 'type'): 

172 if runtime == 'onnxruntime': 

173 profile_name = sess.end_profiling() 

174 with open(profile_name, 'r', encoding='utf-8') as f: 

175 js = json.load(f) 

176 js = OnnxWholeSession.process_profiling(js) 

177 df = DataFrame(js) 

178 else: 

179 df = oinf.get_profiling(as_df=True) 

180 if profiling == 'name': 

181 gr = df[['dur', "args_op_name", "name"]].groupby( 

182 ["args_op_name", "name"]).sum().sort_values('dur') 

183 else: 

184 gr = df[['dur', "args_op_name"]].groupby( 

185 "args_op_name").sum().sort_values('dur') 

186 return res, gr 

187 

188 return res