StringNormalizer#
StringNormalizer - 10#
Version
domain: main
since_version: 10
function:
support_level: SupportType.COMMON
shape inference: True
This version of the operator has been available since version 10.
Summary
Attributes
case_change_action - STRING : string enum that cases output to be lowercased/uppercases/unchanged. Valid values are “LOWER”, “UPPER”, “NONE”. Default is “NONE”
is_case_sensitive - INT : Boolean. Whether the identification of stop words in X is case- sensitive. Default is false
locale - STRING : Environment dependent string that denotes the locale according to which output strings needs to be upper/lowercased.Default en_US or platform specific equivalent as decided by the implementation.
stopwords - STRINGS : List of stop words. If not set, no word would be removed from X.
Inputs
X (heterogeneous) - tensor(string):
Outputs
Y (heterogeneous) - tensor(string):
Examples
_nostopwords_nochangecase
import numpy as np
import onnx
input = np.array(["monday", "tuesday"]).astype(object)
output = input
# No stopwords. This is a NOOP
node = onnx.helper.make_node(
"StringNormalizer",
inputs=["x"],
outputs=["y"],
is_case_sensitive=1,
)
expect(
node,
inputs=[input],
outputs=[output],
name="test_strnormalizer_nostopwords_nochangecase",
)
_monday_casesensintive_nochangecase
import numpy as np
import onnx
input = np.array(["monday", "tuesday", "wednesday", "thursday"]).astype(object)
output = np.array(["tuesday", "wednesday", "thursday"]).astype(object)
stopwords = ["monday"]
node = onnx.helper.make_node(
"StringNormalizer",
inputs=["x"],
outputs=["y"],
is_case_sensitive=1,
stopwords=stopwords,
)
expect(
node,
inputs=[input],
outputs=[output],
name="test_strnormalizer_export_monday_casesensintive_nochangecase",
)
_monday_casesensintive_lower
import numpy as np
import onnx
input = np.array(["monday", "tuesday", "wednesday", "thursday"]).astype(object)
output = np.array(["tuesday", "wednesday", "thursday"]).astype(object)
stopwords = ["monday"]
node = onnx.helper.make_node(
"StringNormalizer",
inputs=["x"],
outputs=["y"],
case_change_action="LOWER",
is_case_sensitive=1,
stopwords=stopwords,
)
expect(
node,
inputs=[input],
outputs=[output],
name="test_strnormalizer_export_monday_casesensintive_lower",
)
_monday_casesensintive_upper
import numpy as np
import onnx
input = np.array(["monday", "tuesday", "wednesday", "thursday"]).astype(object)
output = np.array(["TUESDAY", "WEDNESDAY", "THURSDAY"]).astype(object)
stopwords = ["monday"]
node = onnx.helper.make_node(
"StringNormalizer",
inputs=["x"],
outputs=["y"],
case_change_action="UPPER",
is_case_sensitive=1,
stopwords=stopwords,
)
expect(
node,
inputs=[input],
outputs=[output],
name="test_strnormalizer_export_monday_casesensintive_upper",
)
_monday_empty_output
import numpy as np
import onnx
input = np.array(["monday", "monday"]).astype(object)
output = np.array([""]).astype(object)
stopwords = ["monday"]
node = onnx.helper.make_node(
"StringNormalizer",
inputs=["x"],
outputs=["y"],
case_change_action="UPPER",
is_case_sensitive=1,
stopwords=stopwords,
)
expect(
node,
inputs=[input],
outputs=[output],
name="test_strnormalizer_export_monday_empty_output",
)
_monday_insensintive_upper_twodim
import numpy as np
import onnx
input = (
np.array(
["Monday", "tuesday", "wednesday", "Monday", "tuesday", "wednesday"]
)
.astype(object)
.reshape([1, 6])
)
# It does upper case cecedille, accented E
# and german umlaut but fails
# with german eszett
output = (
np.array(["TUESDAY", "WEDNESDAY", "TUESDAY", "WEDNESDAY"])
.astype(object)
.reshape([1, 4])
)
stopwords = ["monday"]
node = onnx.helper.make_node(
"StringNormalizer",
inputs=["x"],
outputs=["y"],
case_change_action="UPPER",
stopwords=stopwords,
)
expect(
node,
inputs=[input],
outputs=[output],
name="test_strnormalizer_export_monday_insensintive_upper_twodim",
)