SQL conversion#

This page answers common “how do I…” questions for converting SQL and polars lazy plans to ONNX with yobx.sql.to_onnx().


How to convert a SQL query string#

Pass the SQL query and the input dtypes to yobx.sql.to_onnx().

<<<

import numpy as np
from yobx.helpers.onnx_helper import pretty_onnx
from yobx.sql import to_onnx

dtypes = {"a": np.float32, "b": np.float32}
onx = to_onnx("SELECT a + b AS total FROM t WHERE a > 0", dtypes)
print(pretty_onnx(onx))

>>>

    opset: domain='' version=21
    input: name='a' type=dtype('float32') shape=['N']
    input: name='b' type=dtype('float32') shape=['N']
    init: name='filter_mask_r_lit' type=int64 shape=(1,) -- array([0])
    CastLike(filter_mask_r_lit, a) -> _onx_castlike_filter_mask_r_lit
      Greater(a, _onx_castlike_filter_mask_r_lit) -> _onx_greater_a
        Compress(a, _onx_greater_a, axis=0) -> _onx_compress_a
    Compress(b, _onx_greater_a, axis=0) -> _onx_compress_b
      Add(_onx_compress_a, _onx_compress_b) -> total
    output: name='total' type='NOTENSOR' shape=None

How to convert a SQL WHERE query#

Use the WHERE clause directly in the SQL query passed to yobx.sql.to_onnx().

<<<

import numpy as np
from yobx.helpers.onnx_helper import pretty_onnx
from yobx.sql import to_onnx

dtypes = {"a": np.float32, "b": np.float32}
query = "SELECT a, b FROM t WHERE b >= 5"
onx = to_onnx(query, dtypes)
print(pretty_onnx(onx))

>>>

    opset: domain='' version=21
    input: name='b' type=dtype('float32') shape=['N']
    input: name='a' type=dtype('float32') shape=['N']
    init: name='filter_mask_r_lit' type=int64 shape=(1,) -- array([5])
    CastLike(filter_mask_r_lit, b) -> _onx_castlike_filter_mask_r_lit
      GreaterOrEqual(b, _onx_castlike_filter_mask_r_lit) -> _onx_greaterorequal_b
        Compress(b, _onx_greaterorequal_b, axis=0) -> output_1
    Compress(a, _onx_greaterorequal_b, axis=0) -> output_0
    output: name='output_0' type='NOTENSOR' shape=None
    output: name='output_1' type='NOTENSOR' shape=None

How to convert a SQL JOIN query#

Use yobx.sql.sql_to_onnx() and pass the right-table dtypes through right_input_dtypes.

<<<

import numpy as np
from yobx.helpers.onnx_helper import pretty_onnx
from yobx.sql import sql_to_onnx

query = "SELECT a.x, b.y FROM a JOIN b ON a.id = b.rid"
left_dtypes = {"id": np.int64, "x": np.float32}
right_dtypes = {"rid": np.int64, "y": np.float32}
onx = sql_to_onnx(query, left_dtypes, right_input_dtypes=right_dtypes)
print(pretty_onnx(onx))

>>>

    opset: domain='' version=21
    input: name='id' type=dtype('int64') shape=['N']
    input: name='x' type=dtype('float32') shape=['N']
    input: name='rid' type=dtype('int64') shape=['N']
    input: name='y' type=dtype('float32') shape=['N']
    init: name='init7_s1_1' type=int64 shape=(1,) -- array([1])           -- Opset.make_node.1/Shape##Opset.make_node.1/Shape##ReduceArgTopKPattern.K##ReduceArgTopKPattern.K
    init: name='init7_s1_0' type=int64 shape=(1,) -- array([0])           -- Opset.make_node.1/Shape
    Unsqueeze(id, init7_s1_1) -> id::UnSq1
    Unsqueeze(rid, init7_s1_0) -> rid::UnSq0
      Equal(id::UnSq1, rid::UnSq0) -> _onx_equal_id::UnSq1
        Cast(_onx_equal_id::UnSq1, to=6) -> _onx_equal_id::UnSq1::C6
          TopK(_onx_equal_id::UnSq1::C6, init7_s1_1, axis=1, largest=1) -> ReduceArgTopKPattern__onx_reducemax_equal_id::UnSq1::C6, ReduceArgTopKPattern__onx_argmax_equal_id::UnSq1::C6
            Squeeze(ReduceArgTopKPattern__onx_reducemax_equal_id::UnSq1::C6, init7_s1_1) -> _onx_reducemax_equal_id::UnSq1::C6
              Cast(_onx_reducemax_equal_id::UnSq1::C6, to=9) -> _onx_reducemax_equal_id::UnSq1::C6::C9
                Compress(x, _onx_reducemax_equal_id::UnSq1::C6::C9, axis=0) -> output_0
            Squeeze(ReduceArgTopKPattern__onx_argmax_equal_id::UnSq1::C6, init7_s1_1) -> _onx_argmax_equal_id::UnSq1::C6
              Compress(_onx_argmax_equal_id::UnSq1::C6, _onx_reducemax_equal_id::UnSq1::C6::C9, axis=0) -> _onx_compress_argmax_equal_id::UnSq1::C6
                Gather(y, _onx_compress_argmax_equal_id::UnSq1::C6, axis=0) -> output_1
    output: name='output_0' type='NOTENSOR' shape=None
    output: name='output_1' type='NOTENSOR' shape=None

How to convert a SQL GROUP BY query#

Use aggregate functions with GROUP BY in the SQL query.

<<<

import numpy as np
from yobx.helpers.onnx_helper import pretty_onnx
from yobx.sql import to_onnx

dtypes = {"city": np.str_, "sales": np.float32}
query = "SELECT city, SUM(sales) AS total_sales FROM t GROUP BY city"
onx = to_onnx(query, dtypes)
print(pretty_onnx(onx))

>>>

    opset: domain='' version=21
    input: name='city' type=dtype('O') shape=['N']
    input: name='sales' type=dtype('float32') shape=['N']
    Unique(city, sorted=1) -> output_0, _onx_unique_city_1, _onx_unique_city_2, _onx_unique_city_3
      Shape(output_0, end=1, start=0) -> GatherShapePattern__onx_gather_unique_city_0::Shape:_1d
        ConstantOfShape(GatherShapePattern__onx_gather_unique_city_0::Shape:_1d) -> _onx_constantofshape_gather_unique_city_0::Shape:::UnSq0
          CastLike(_onx_constantofshape_gather_unique_city_0::Shape:::UnSq0, sales) -> _onx_castlike_constantofshape_gather_unique_city_0::Shape:::UnSq0
      ScatterElements(_onx_castlike_constantofshape_gather_unique_city_0::Shape:::UnSq0, _onx_unique_city_2, sales, axis=0, reduction=b'add') -> total_sales
    output: name='output_0' type='NOTENSOR' shape=None
    output: name='total_sales' type='NOTENSOR' shape=None

How to convert a polars LazyFrame#

The same yobx.sql.to_onnx() entrypoint also accepts a polars.LazyFrame.

<<<

import numpy as np
import polars as pl
from yobx.helpers.onnx_helper import pretty_onnx
from yobx.sql import to_onnx

lf = pl.LazyFrame({"a": [1.0, -2.0, 3.0], "b": [4.0, 5.0, 6.0]}).select(
    [(pl.col("a") + pl.col("b")).alias("total")]
)
dtypes = {"a": np.float64, "b": np.float64}
onx = to_onnx(lf, dtypes)
print(pretty_onnx(onx))

>>>

    opset: domain='' version=21
    input: name='a' type=dtype('float64') shape=['N']
    input: name='b' type=dtype('float64') shape=['N']
    Add(a, b) -> total
    output: name='total' type='NOTENSOR' shape=None

See also

SQL-to-ONNX Converter — SQL converter design details.

Polars LazyFrame to ONNX — polars LazyFrame conversion design.