Coverage for mlprodict/onnx_conv/onnx_ops/onnx_tokenizer.py: 100%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Custom operator Tokenizer.
4"""
5from skl2onnx.algebra.onnx_operator import OnnxOperator
8class OnnxTokenizer_1(OnnxOperator):
9 """
10 Defines a custom operator not defined by ONNX
11 specifications but in onnxruntime.
12 """
14 since_version = 1
15 expected_inputs = [('text', 'T')]
16 expected_outputs = [('tokens', 'T')]
17 input_range = [1, 1]
18 output_range = [1, 1]
19 is_deprecated = False
20 domain = 'mlprodict'
21 operator_name = 'Tokenizer'
22 past_version = {}
24 def __init__(self, text, mark=0, mincharnum=1,
25 pad_value='#', separators=None,
26 tokenexp='[a-zA-Z0-9_]+', stopwords=None,
27 op_version=None, **kwargs):
28 """
29 :param text: array or OnnxOperatorMixin
30 :param mark: see :epkg:`Tokenizer`
31 :param pad_value: see :epkg:`Tokenizer`
32 :param separators: see :epkg:`Tokenizer`
33 :param tokenexp: see :epkg:`Tokenizer`
34 :param stopwords: list of stopwords, addition to :epkg:`Tokenizer`
35 :param op_version: opset version
36 :param kwargs: additional parameter
37 """
38 if separators is None:
39 separators = []
40 if stopwords is None:
41 stopwords = []
42 OnnxOperator.__init__(
43 self, text, mark=mark, mincharnum=mincharnum,
44 pad_value=pad_value, separators=separators, tokenexp=tokenexp,
45 stopwords=stopwords, op_version=op_version, **kwargs)
48OnnxTokenizer = OnnxTokenizer_1