SQL conversion#
This page answers common “how do I…” questions for converting SQL and
polars lazy plans to ONNX with yobx.sql.to_onnx().
How to convert a SQL query string#
Pass the SQL query and the input dtypes to yobx.sql.to_onnx().
<<<
import numpy as np
from yobx.helpers.onnx_helper import pretty_onnx
from yobx.sql import to_onnx
dtypes = {"a": np.float32, "b": np.float32}
onx = to_onnx("SELECT a + b AS total FROM t WHERE a > 0", dtypes)
print(pretty_onnx(onx))
>>>
opset: domain='' version=21
input: name='a' type=dtype('float32') shape=['N']
input: name='b' type=dtype('float32') shape=['N']
init: name='filter_mask_r_lit' type=int64 shape=(1,) -- array([0])
CastLike(filter_mask_r_lit, a) -> _onx_castlike_filter_mask_r_lit
Greater(a, _onx_castlike_filter_mask_r_lit) -> _onx_greater_a
Compress(a, _onx_greater_a, axis=0) -> _onx_compress_a
Compress(b, _onx_greater_a, axis=0) -> _onx_compress_b
Add(_onx_compress_a, _onx_compress_b) -> total
output: name='total' type='NOTENSOR' shape=None
How to convert a SQL WHERE query#
Use the WHERE clause directly in the SQL query passed to
yobx.sql.to_onnx().
<<<
import numpy as np
from yobx.helpers.onnx_helper import pretty_onnx
from yobx.sql import to_onnx
dtypes = {"a": np.float32, "b": np.float32}
query = "SELECT a, b FROM t WHERE b >= 5"
onx = to_onnx(query, dtypes)
print(pretty_onnx(onx))
>>>
opset: domain='' version=21
input: name='b' type=dtype('float32') shape=['N']
input: name='a' type=dtype('float32') shape=['N']
init: name='filter_mask_r_lit' type=int64 shape=(1,) -- array([5])
CastLike(filter_mask_r_lit, b) -> _onx_castlike_filter_mask_r_lit
GreaterOrEqual(b, _onx_castlike_filter_mask_r_lit) -> _onx_greaterorequal_b
Compress(b, _onx_greaterorequal_b, axis=0) -> output_1
Compress(a, _onx_greaterorequal_b, axis=0) -> output_0
output: name='output_0' type='NOTENSOR' shape=None
output: name='output_1' type='NOTENSOR' shape=None
How to convert a SQL JOIN query#
Use yobx.sql.sql_to_onnx() and pass the right-table dtypes through
right_input_dtypes.
<<<
import numpy as np
from yobx.helpers.onnx_helper import pretty_onnx
from yobx.sql import sql_to_onnx
query = "SELECT a.x, b.y FROM a JOIN b ON a.id = b.rid"
left_dtypes = {"id": np.int64, "x": np.float32}
right_dtypes = {"rid": np.int64, "y": np.float32}
onx = sql_to_onnx(query, left_dtypes, right_input_dtypes=right_dtypes)
print(pretty_onnx(onx))
>>>
opset: domain='' version=21
input: name='id' type=dtype('int64') shape=['N']
input: name='x' type=dtype('float32') shape=['N']
input: name='rid' type=dtype('int64') shape=['N']
input: name='y' type=dtype('float32') shape=['N']
init: name='init7_s1_1' type=int64 shape=(1,) -- array([1]) -- Opset.make_node.1/Shape##Opset.make_node.1/Shape##ReduceArgTopKPattern.K##ReduceArgTopKPattern.K
init: name='init7_s1_0' type=int64 shape=(1,) -- array([0]) -- Opset.make_node.1/Shape
Unsqueeze(id, init7_s1_1) -> id::UnSq1
Unsqueeze(rid, init7_s1_0) -> rid::UnSq0
Equal(id::UnSq1, rid::UnSq0) -> _onx_equal_id::UnSq1
Cast(_onx_equal_id::UnSq1, to=6) -> _onx_equal_id::UnSq1::C6
TopK(_onx_equal_id::UnSq1::C6, init7_s1_1, axis=1, largest=1) -> ReduceArgTopKPattern__onx_reducemax_equal_id::UnSq1::C6, ReduceArgTopKPattern__onx_argmax_equal_id::UnSq1::C6
Squeeze(ReduceArgTopKPattern__onx_reducemax_equal_id::UnSq1::C6, init7_s1_1) -> _onx_reducemax_equal_id::UnSq1::C6
Cast(_onx_reducemax_equal_id::UnSq1::C6, to=9) -> _onx_reducemax_equal_id::UnSq1::C6::C9
Compress(x, _onx_reducemax_equal_id::UnSq1::C6::C9, axis=0) -> output_0
Squeeze(ReduceArgTopKPattern__onx_argmax_equal_id::UnSq1::C6, init7_s1_1) -> _onx_argmax_equal_id::UnSq1::C6
Compress(_onx_argmax_equal_id::UnSq1::C6, _onx_reducemax_equal_id::UnSq1::C6::C9, axis=0) -> _onx_compress_argmax_equal_id::UnSq1::C6
Gather(y, _onx_compress_argmax_equal_id::UnSq1::C6, axis=0) -> output_1
output: name='output_0' type='NOTENSOR' shape=None
output: name='output_1' type='NOTENSOR' shape=None
How to convert a SQL GROUP BY query#
Use aggregate functions with GROUP BY in the SQL query.
<<<
import numpy as np
from yobx.helpers.onnx_helper import pretty_onnx
from yobx.sql import to_onnx
dtypes = {"city": np.str_, "sales": np.float32}
query = "SELECT city, SUM(sales) AS total_sales FROM t GROUP BY city"
onx = to_onnx(query, dtypes)
print(pretty_onnx(onx))
>>>
opset: domain='' version=21
input: name='city' type=dtype('O') shape=['N']
input: name='sales' type=dtype('float32') shape=['N']
Unique(city, sorted=1) -> output_0, _onx_unique_city_1, _onx_unique_city_2, _onx_unique_city_3
Shape(output_0, end=1, start=0) -> GatherShapePattern__onx_gather_unique_city_0::Shape:_1d
ConstantOfShape(GatherShapePattern__onx_gather_unique_city_0::Shape:_1d) -> _onx_constantofshape_gather_unique_city_0::Shape:::UnSq0
CastLike(_onx_constantofshape_gather_unique_city_0::Shape:::UnSq0, sales) -> _onx_castlike_constantofshape_gather_unique_city_0::Shape:::UnSq0
ScatterElements(_onx_castlike_constantofshape_gather_unique_city_0::Shape:::UnSq0, _onx_unique_city_2, sales, axis=0, reduction=b'add') -> total_sales
output: name='output_0' type='NOTENSOR' shape=None
output: name='total_sales' type='NOTENSOR' shape=None
How to convert a polars LazyFrame#
The same yobx.sql.to_onnx() entrypoint also accepts a
polars.LazyFrame.
<<<
import numpy as np
import polars as pl
from yobx.helpers.onnx_helper import pretty_onnx
from yobx.sql import to_onnx
lf = pl.LazyFrame({"a": [1.0, -2.0, 3.0], "b": [4.0, 5.0, 6.0]}).select(
[(pl.col("a") + pl.col("b")).alias("total")]
)
dtypes = {"a": np.float64, "b": np.float64}
onx = to_onnx(lf, dtypes)
print(pretty_onnx(onx))
>>>
opset: domain='' version=21
input: name='a' type=dtype('float64') shape=['N']
input: name='b' type=dtype('float64') shape=['N']
Add(a, b) -> total
output: name='total' type='NOTENSOR' shape=None
See also
SQL-to-ONNX Converter — SQL converter design details.
Polars LazyFrame to ONNX — polars LazyFrame conversion design.