In [None]:
%matplotlib inline



# Benchmark, comparison scikit-learn - onnxruntime-training

The benchmark compares the processing time between :epkg:`scikit-learn`
and :epkg:`onnxruntime-training` on a linear regression and a neural network.
It uses the model trained in `l-orttraining-nn-gpu`.

## First comparison: neural network


In [None]:
import warnings
from pprint import pprint
import time
import numpy
import matplotlib.pyplot as plt
from pandas import DataFrame
from onnxruntime import get_device
from pyquickhelper.pycode.profiling import profile, profile2graph
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from mlprodict.onnx_conv import to_onnx
from onnxcustom.utils.orttraining_helper import (
    add_loss_output, get_train_initializer)
from onnxcustom.training.optimizers import OrtGradientOptimizer


X, y = make_regression(1000, n_features=100, bias=2)
X = X.astype(numpy.float32)
y = y.astype(numpy.float32)
X_train, X_test, y_train, y_test = train_test_split(X, y)

Benchmark function.



In [None]:
def benchmark(skl_model, train_session, name, verbose=True):
    """
    :param skl_model: model from scikit-learn
    :param train_session: instance of OrtGradientOptimizer
    :param name: experiment name
    :param verbose: to debug
    """
    print("[benchmark] %s" % name)
    begin = time.perf_counter()
    skl_model.fit(X, y)
    duration_skl = time.perf_counter() - begin
    length_skl = len(skl_model.loss_curve_)
    print("[benchmark] skl=%r iterations - %r seconds" % (
        length_skl, duration_skl))

    begin = time.perf_counter()
    train_session.fit(X, y)
    duration_ort = time.perf_counter() - begin
    length_ort = len(train_session.train_losses_)
    print("[benchmark] ort=%r iterations - %r seconds" % (
        length_ort, duration_ort))

    return dict(skl=duration_skl, ort=duration_ort, name=name,
                iter_skl=length_skl, iter_ort=length_ort,
                losses_skl=skl_model.loss_curve_,
                losses_ort=train_session.train_losses_)

Common parameters and model



In [None]:
batch_size = 15
max_iter = 100

nn = MLPRegressor(hidden_layer_sizes=(50, 10), max_iter=max_iter,
                  solver='sgd', learning_rate_init=5e-4, alpha=0,
                  n_iter_no_change=max_iter * 3, batch_size=batch_size,
                  nesterovs_momentum=False, momentum=0,
                  learning_rate='invscaling')

with warnings.catch_warnings():
    warnings.simplefilter('ignore')
    nn.fit(X_train, y_train)

Conversion to ONNX and trainer initialization



In [None]:
onx = to_onnx(nn, X_train[:1].astype(numpy.float32), target_opset=15)
onx_train = add_loss_output(onx)

weights = get_train_initializer(onx)
pprint(list((k, v[0].shape) for k, v in weights.items()))

train_session = OrtGradientOptimizer(
    onx_train, list(weights), device='cpu', learning_rate=1e-5,
    warm_start=False, max_iter=max_iter, batch_size=batch_size)


benches = [benchmark(nn, train_session, name='NN-CPU')]

## Profiling



In [None]:
def clean_name(text):
    pos = text.find('onnxruntime')
    if pos >= 0:
        return text[pos:]
    pos = text.find('onnxcustom')
    if pos >= 0:
        return text[pos:]
    pos = text.find('site-packages')
    if pos >= 0:
        return text[pos:]
    return text


ps = profile(lambda: benchmark(nn, train_session, name='NN-CPU'))[0]
root, nodes = profile2graph(ps, clean_text=clean_name)
text = root.to_text()
print(text)

## if GPU is available



In [None]:
if get_device().upper() == 'GPU':

    train_session = OrtGradientOptimizer(
        onx_train, list(weights), device='cuda', learning_rate=5e-4,
        warm_start=False, max_iter=200, batch_size=batch_size)

    benches.append(benchmark(nn, train_session, name='NN-GPU'))

## Linear Regression



In [None]:
lr = MLPRegressor(hidden_layer_sizes=tuple(), max_iter=max_iter,
                  solver='sgd', learning_rate_init=5e-2, alpha=0,
                  n_iter_no_change=max_iter * 3, batch_size=batch_size,
                  nesterovs_momentum=False, momentum=0,
                  learning_rate='invscaling')

with warnings.catch_warnings():
    warnings.simplefilter('ignore')
    lr.fit(X, y)


onx = to_onnx(nn, X_train[:1].astype(numpy.float32), target_opset=15)
onx_train = add_loss_output(onx)

inits = get_train_initializer(onx)
weights = {k: v for k, v in inits.items() if k != "shape_tensor"}
pprint(list((k, v[0].shape) for k, v in weights.items()))

train_session = OrtGradientOptimizer(
    onx_train, list(weights), device='cpu', learning_rate=1e-4,
    warm_start=False, max_iter=max_iter, batch_size=batch_size)

benches.append(benchmark(lr, train_session, name='LR-CPU'))

if get_device().upper() == 'GPU':

    train_session = OrtGradientOptimizer(
        onx_train, list(weights), device='cuda', learning_rate=1e-4,
        warm_start=False, max_iter=200, batch_size=batch_size)

    benches.append(benchmark(nn, train_session, name='LR-GPU'))

## GPU profiling



In [None]:
if get_device().upper() == 'GPU':
    ps = profile(lambda: benchmark(nn, train_session, name='LR-GPU'))[0]
    root, nodes = profile2graph(ps, clean_text=clean_name)
    text = root.to_text()
    print(text)

## Graphs

Dataframe first.



In [None]:
df = DataFrame(benches).set_index('name')
df

text output



In [None]:
print(df)

Graphs.



In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10, 4))
df[['skl', 'ort']].plot.bar(title="Processing time", ax=ax[0])
ax[0].tick_params(axis='x', rotation=30)
for bench in benches:
    ax[1].plot(bench['losses_skl'][1:], label='skl-' + bench['name'])
    ax[1].plot(bench['losses_ort'][1:], label='ort-' + bench['name'])
ax[1].set_title("Losses")
ax[1].set_yscale('log')
ax[1].legend()

The gradient update are not exactly the same.
It should be improved for a fair comprison.



In [None]:
# plt.show()