Source code for onnx.numpy_helper

# SPDX-License-Identifier: Apache-2.0

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import sys

import numpy as np  # type: ignore
from onnx import TensorProto, MapProto, SequenceProto, OptionalProto
from onnx import mapping, helper
from onnx.external_data_helper import load_external_data_for_tensor, uses_external_data
from typing import Sequence, Any, Optional, Text, List, Dict


[docs]def combine_pairs_to_complex(fa: Sequence[int]) -> Sequence[np.complex64]: return [complex(fa[i * 2], fa[i * 2 + 1]) for i in range(len(fa) // 2)]
[docs]def to_array(tensor: TensorProto, base_dir: Text = "") -> np.ndarray: """Converts a tensor def object to a numpy array. Inputs: tensor: a TensorProto object. base_dir: if external tensor exists, base_dir can help to find the path to it Returns: arr: the converted array. """ if tensor.HasField("segment"): raise ValueError( "Currently not supporting loading segments.") if tensor.data_type == TensorProto.UNDEFINED: raise TypeError("The element type in the input tensor is not defined.") tensor_dtype = tensor.data_type np_dtype = mapping.TENSOR_TYPE_TO_NP_TYPE[tensor_dtype] storage_type = mapping.TENSOR_TYPE_TO_STORAGE_TENSOR_TYPE[tensor_dtype] storage_np_dtype = mapping.TENSOR_TYPE_TO_NP_TYPE[storage_type] storage_field = mapping.STORAGE_TENSOR_TYPE_TO_FIELD[storage_type] dims = tensor.dims if tensor.data_type == TensorProto.STRING: utf8_strings = getattr(tensor, storage_field) ss = list(s.decode('utf-8') for s in utf8_strings) return np.asarray(ss).astype(np_dtype).reshape(dims) # Load raw data from external tensor if it exists if uses_external_data(tensor): load_external_data_for_tensor(tensor, base_dir) if tensor.HasField("raw_data"): # Raw_bytes support: using frombuffer. if sys.byteorder == 'big': # Convert endian from little to big convert_endian(tensor) return np.frombuffer( tensor.raw_data, dtype=np_dtype).reshape(dims) else: # float16/bfloat16 is stored as int32 (uint16 type); Need view to get the original value if (tensor_dtype == TensorProto.FLOAT16 or tensor_dtype == TensorProto.BFLOAT16): return ( np.asarray( tensor.int32_data, dtype=np.uint16) .reshape(dims) .view(np.float16)) data = getattr(tensor, storage_field) if (tensor_dtype == TensorProto.COMPLEX64 or tensor_dtype == TensorProto.COMPLEX128): data = combine_pairs_to_complex(data) return ( np.asarray( data, dtype=storage_np_dtype) .astype(np_dtype) .reshape(dims) )
[docs]def from_array(arr: np.ndarray, name: Optional[Text] = None) -> TensorProto: """Converts a numpy array to a tensor def. Inputs: arr: a numpy array. name: (optional) the name of the tensor. Returns: tensor_def: the converted tensor def. """ tensor = TensorProto() tensor.dims.extend(arr.shape) if name: tensor.name = name if arr.dtype == object: # Special care for strings. tensor.data_type = mapping.NP_TYPE_TO_TENSOR_TYPE[arr.dtype] # TODO: Introduce full string support. # We flatten the array in case there are 2-D arrays are specified # We throw the error below if we have a 3-D array or some kind of other # object. If you want more complex shapes then follow the below instructions. # Unlike other types where the shape is automatically inferred from # nested arrays of values, the only reliable way now to feed strings # is to put them into a flat array then specify type astype(object) # (otherwise all strings may have different types depending on their length) # and then specify shape .reshape([x, y, z]) flat_array = arr.flatten() for e in flat_array: if isinstance(e, str): tensor.string_data.append(e.encode('utf-8')) elif isinstance(e, np.ndarray): for s in e: if isinstance(s, str): tensor.string_data.append(s.encode('utf-8')) elif isinstance(s, bytes): tensor.string_data.append(s) elif isinstance(e, bytes): tensor.string_data.append(e) else: raise NotImplementedError( "Unrecognized object in the object array, expect a string, or array of bytes: ", str(type(e))) return tensor # For numerical types, directly use numpy raw bytes. try: dtype = mapping.NP_TYPE_TO_TENSOR_TYPE[arr.dtype] except KeyError: raise RuntimeError( "Numpy data type not understood yet: {}".format(str(arr.dtype))) tensor.data_type = dtype tensor.raw_data = arr.tobytes() # note: tobytes() is only after 1.9. if sys.byteorder == 'big': # Convert endian from big to little convert_endian(tensor) return tensor
[docs]def to_list(sequence: SequenceProto) -> List[Any]: """Converts a sequence def to a Python list. Inputs: sequence: a SequenceProto object. Returns: lst: the converted list. """ lst: List[Any] = [] elem_type = sequence.elem_type value_field = mapping.STORAGE_ELEMENT_TYPE_TO_FIELD[elem_type] values = getattr(sequence, value_field) for value in values: if elem_type == SequenceProto.TENSOR or elem_type == SequenceProto.SPARSE_TENSOR: lst.append(to_array(value)) elif elem_type == SequenceProto.SEQUENCE: lst.append(to_list(value)) elif elem_type == SequenceProto.MAP: lst.append(to_dict(value)) else: raise TypeError("The element type in the input sequence is not supported.") return lst
[docs]def from_list(lst: List[Any], name: Optional[Text] = None, dtype: Optional[int] = None) -> SequenceProto: """Converts a list into a sequence def. Inputs: lst: a Python list name: (optional) the name of the sequence. dtype: (optional) type of element in the input list, used for specifying sequence values when converting an empty list. Returns: sequence: the converted sequence def. """ sequence = SequenceProto() if name: sequence.name = name if dtype: elem_type = dtype elif len(lst) > 0: first_elem = lst[0] if isinstance(first_elem, dict): elem_type = SequenceProto.MAP elif isinstance(first_elem, list): elem_type = SequenceProto.SEQUENCE else: elem_type = SequenceProto.TENSOR else: # if empty input list and no dtype specified # choose sequence of tensors on default elem_type = SequenceProto.TENSOR sequence.elem_type = elem_type if (len(lst) > 0) and not all(isinstance(elem, type(lst[0])) for elem in lst): raise TypeError("The element type in the input list is not the same " "for all elements and therefore is not supported as a sequence.") if elem_type == SequenceProto.TENSOR: for tensor in lst: sequence.tensor_values.extend([from_array(tensor)]) elif elem_type == SequenceProto.SEQUENCE: for seq in lst: sequence.sequence_values.extend([from_list(seq)]) elif elem_type == SequenceProto.MAP: for map in lst: sequence.map_values.extend([from_dict(map)]) else: raise TypeError("The element type in the input list is not a tensor, " "sequence, or map and is not supported.") return sequence
[docs]def to_dict(map: MapProto) -> np.ndarray: """Converts a map def to a Python dictionary. Inputs: map: a MapProto object. Returns: dict: the converted dictionary. """ key_list: List[Any] = [] if map.key_type == TensorProto.STRING: key_list = list(map.string_keys) else: key_list = list(map.keys) value_list = to_list(map.values) if len(key_list) != len(value_list): raise IndexError("Length of keys and values for MapProto (map name: ", map.name, ") are not the same.") dictionary = dict(zip(key_list, value_list)) return dictionary
[docs]def from_dict(dict: Dict[Any, Any], name: Optional[Text] = None) -> MapProto: """Converts a Python dictionary into a map def. Inputs: dict: Python dictionary name: (optional) the name of the map. Returns: map: the converted map def. """ map = MapProto() if name: map.name = name keys = list(dict.keys()) raw_key_type = np.array(keys[0]).dtype key_type = mapping.NP_TYPE_TO_TENSOR_TYPE[raw_key_type] valid_key_int_types = [TensorProto.INT8, TensorProto.INT16, TensorProto.INT32, TensorProto.INT64, TensorProto.UINT8, TensorProto.UINT16, TensorProto.UINT32, TensorProto.UINT64] if not all(isinstance(key, raw_key_type) for key in keys): raise TypeError("The key type in the input dictionary is not the same " "for all keys and therefore is not valid as a map.") values = list(dict.values()) raw_value_type = type(values[0]) if not all(isinstance(val, raw_value_type) for val in values): raise TypeError("The value type in the input dictionary is not the same " "for all values and therefore is not valid as a map.") value_seq = from_list(values) map.key_type = key_type if key_type == TensorProto.STRING: map.string_keys.extend(keys) elif key_type in valid_key_int_types: map.keys.extend(keys) map.values.CopyFrom(value_seq) return map
[docs]def to_optional(optional: OptionalProto) -> Optional[Any]: """Converts an optional def to a Python optional. Inputs: optional: an OptionalProto object. Returns: opt: the converted optional. """ opt: Optional[Any] = None elem_type = optional.elem_type if elem_type == OptionalProto.UNDEFINED: return opt value_field = mapping.OPTIONAL_ELEMENT_TYPE_TO_FIELD[elem_type] value = getattr(optional, value_field) # TODO: create a map and replace conditional branches if elem_type == OptionalProto.TENSOR or elem_type == OptionalProto.SPARSE_TENSOR: opt = to_array(value) elif elem_type == OptionalProto.SEQUENCE: opt = to_list(value) elif elem_type == OptionalProto.MAP: opt = to_dict(value) elif elem_type == OptionalProto.OPTIONAL: return to_optional(value) else: raise TypeError("The element type in the input optional is not supported.") return opt
[docs]def from_optional( opt: Optional[Any], name: Optional[Text] = None, dtype: Optional[int] = None ) -> OptionalProto: """Converts an optional value into a Optional def. Inputs: opt: a Python optional name: (optional) the name of the optional. dtype: (optional) type of element in the input, used for specifying optional values when converting empty none. dtype must be a valid OptionalProto.DataType value Returns: optional: the converted optional def. """ # TODO: create a map and replace conditional branches optional = OptionalProto() if name: optional.name = name if dtype: # dtype must be a valid OptionalProto.DataType valid_dtypes = [v for v in OptionalProto.DataType.values()] assert dtype in valid_dtypes elem_type = dtype elif isinstance(opt, dict): elem_type = OptionalProto.MAP elif isinstance(opt, list): elem_type = OptionalProto.SEQUENCE elif opt is None: elem_type = OptionalProto.UNDEFINED else: elem_type = OptionalProto.TENSOR optional.elem_type = elem_type if opt is not None: if elem_type == OptionalProto.TENSOR: optional.tensor_value.CopyFrom(from_array(opt)) elif elem_type == OptionalProto.SEQUENCE: optional.sequence_value.CopyFrom(from_list(opt)) elif elem_type == OptionalProto.MAP: optional.map_value.CopyFrom(from_dict(opt)) else: raise TypeError("The element type in the input is not a tensor, " "sequence, or map and is not supported.") return optional
[docs]def convert_endian(tensor: TensorProto) -> None: """ call to convert endianess of raw data in tensor. @params TensorProto: TensorProto to be converted. """ tensor_dtype = tensor.data_type np_dtype = mapping.TENSOR_TYPE_TO_NP_TYPE[tensor_dtype] tensor.raw_data = np.frombuffer(tensor.raw_data, dtype=np_dtype).byteswap().tobytes()