Coverage for mlprodict/onnx_conv/onnx_ops/onnx_tokenizer.py: 100%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

18 statements  

1""" 

2@file 

3@brief Custom operator Tokenizer. 

4""" 

5from skl2onnx.algebra.onnx_operator import OnnxOperator 

6 

7 

8class OnnxTokenizer_1(OnnxOperator): 

9 """ 

10 Defines a custom operator not defined by ONNX 

11 specifications but in onnxruntime. 

12 """ 

13 

14 since_version = 1 

15 expected_inputs = [('text', 'T')] 

16 expected_outputs = [('tokens', 'T')] 

17 input_range = [1, 1] 

18 output_range = [1, 1] 

19 is_deprecated = False 

20 domain = 'mlprodict' 

21 operator_name = 'Tokenizer' 

22 past_version = {} 

23 

24 def __init__(self, text, mark=0, mincharnum=1, 

25 pad_value='#', separators=None, 

26 tokenexp='[a-zA-Z0-9_]+', stopwords=None, 

27 op_version=None, **kwargs): 

28 """ 

29 :param text: array or OnnxOperatorMixin 

30 :param mark: see :epkg:`Tokenizer` 

31 :param pad_value: see :epkg:`Tokenizer` 

32 :param separators: see :epkg:`Tokenizer` 

33 :param tokenexp: see :epkg:`Tokenizer` 

34 :param stopwords: list of stopwords, addition to :epkg:`Tokenizer` 

35 :param op_version: opset version 

36 :param kwargs: additional parameter 

37 """ 

38 if separators is None: 

39 separators = [] 

40 if stopwords is None: 

41 stopwords = [] 

42 OnnxOperator.__init__( 

43 self, text, mark=mark, mincharnum=mincharnum, 

44 pad_value=pad_value, separators=separators, tokenexp=tokenexp, 

45 stopwords=stopwords, op_version=op_version, **kwargs) 

46 

47 

48OnnxTokenizer = OnnxTokenizer_1