diff --git a/.github/workflows/azureml_pipelines.yml b/.github/workflows/azureml_pipelines.yml
index 2efa112e..364b4cc0 100644
--- a/.github/workflows/azureml_pipelines.yml
+++ b/.github/workflows/azureml_pipelines.yml
@@ -34,7 +34,7 @@ jobs:
- name: Install dependencies
run: |
- python -m pip install --upgrade pip==21.3.1
+ python -m pip install --upgrade pip==22.2.2
pip install flake8==3.9.1 pytest~=6.2 pytest-cov~=2.11
sudo apt-get install libopenmpi-dev
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
diff --git a/.github/workflows/benchmark_scripts.yml b/.github/workflows/benchmark_scripts.yml
index 35b88423..75f2e1f6 100644
--- a/.github/workflows/benchmark_scripts.yml
+++ b/.github/workflows/benchmark_scripts.yml
@@ -42,7 +42,7 @@ jobs:
- name: Install dependencies
run: |
sudo apt-get install libopenmpi-dev
- python -m pip install --upgrade pip==21.3.1
+ python -m pip install --upgrade pip==22.2.2
pip install flake8==3.9.1 pytest~=6.2 pytest-cov~=2.11
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
# hotfix for azurecli issue
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 2ce8b02d..9ed1a320 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -32,7 +32,7 @@ jobs:
- name: pip install
run: |
- python -m pip install --upgrade pip==21.3.1
+ python -m pip install --upgrade pip==22.2.2
python -m pip install markdown-include==0.7.0 mkdocstrings==0.19.0 mkdocstrings-python==0.7.1 mkdocs-material==8.4.2 livereload==2.6.3
diff --git a/Exploration.ipynb b/Exploration.ipynb
new file mode 100644
index 00000000..48b06e64
--- /dev/null
+++ b/Exploration.ipynb
@@ -0,0 +1,551 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import lightgbm\n",
+ "import timeit"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 6 | \n",
+ " 7 | \n",
+ " 8 | \n",
+ " 9 | \n",
+ " ... | \n",
+ " 4854 | \n",
+ " 4855 | \n",
+ " 4856 | \n",
+ " 4857 | \n",
+ " 4858 | \n",
+ " 4859 | \n",
+ " 4860 | \n",
+ " 4861 | \n",
+ " 4862 | \n",
+ " 4863 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 129 | \n",
+ " 612.0 | \n",
+ " 153000.0 | \n",
+ " 6000.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 118470648.0 | \n",
+ " 0.0 | \n",
+ " 628.0 | \n",
+ " 6000.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 132.0 | \n",
+ " 140.0 | \n",
+ " 137.0 | \n",
+ " 135.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 120 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 120 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 76 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 120 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 4864 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1 2 3 4 5 6 7 8 \\\n",
+ "0 129 612.0 153000.0 6000.0 0.0 0.0 118470648.0 0.0 628.0 \n",
+ "1 120 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
+ "2 120 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
+ "3 76 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
+ "4 120 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
+ "\n",
+ " 9 ... 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 \n",
+ "0 6000.0 ... 0.0 132.0 140.0 137.0 135.0 0.0 0.0 0.0 0.0 0.0 \n",
+ "1 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
+ "2 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
+ "3 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
+ "4 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
+ "\n",
+ "[5 rows x 4864 columns]"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model_path = './NiR4_OFE_LGBM_model.txt'\n",
+ "data_path = './File_0-csv.txt'\n",
+ "data = pd.read_csv(data_path, header=None)\n",
+ "data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[LightGBM] [Info] Construct bin mappers from text data time 2.14 seconds\n"
+ ]
+ }
+ ],
+ "source": [
+ "booster = lightgbm.Booster(model_file=model_path)\n",
+ "\n",
+ "inference_data = lightgbm.Dataset(data_path, free_raw_data=False).construct()\n",
+ "inference_raw_data = inference_data.get_data()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([ 1.41436096, -0.27520206, -0.32896408, ..., 0.27021392,\n",
+ " 0.06719871, 2.11317219])"
+ ]
+ },
+ "execution_count": 43,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "predictions_array_lgbmpython = booster.predict(\n",
+ " data=inference_raw_data,\n",
+ " num_threads=1,\n",
+ " predict_disable_shape_check=True,\n",
+ ")\n",
+ "predictions_array_lgbmpython"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[13:57:51] D:\\a\\1\\s\\src\\compiler\\ast_native.cc:711: Using ASTNativeCompiler\n",
+ "[13:57:51] D:\\a\\1\\s\\src\\compiler\\ast\\split.cc:29: Parallel compilation enabled; member trees will be divided into 16 translation units.\n",
+ "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:92: Code generation finished. Writing code to files...\n",
+ "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu2.c...\n",
+ "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file main.c...\n",
+ "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file header.h...\n",
+ "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu5.c...\n",
+ "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu0.c...\n",
+ "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu1.c...\n",
+ "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu3.c...\n",
+ "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu4.c...\n",
+ "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu6.c...\n",
+ "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu7.c...\n",
+ "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu8.c...\n",
+ "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu9.c...\n",
+ "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu10.c...\n",
+ "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu11.c...\n",
+ "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu12.c...\n",
+ "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu13.c...\n",
+ "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu14.c...\n",
+ "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu15.c...\n",
+ "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file recipe.json...\n",
+ "[13:57:55] c:\\Users\\damajercak\\.conda\\envs\\lightgbmbenchmark\\lib\\site-packages\\treelite\\contrib\\util.py:105: Compiling sources files in directory .\\tmpa3c08ggs into object files (*.obj)...\n",
+ "[13:58:21] c:\\Users\\damajercak\\.conda\\envs\\lightgbmbenchmark\\lib\\site-packages\\treelite\\contrib\\util.py:135: Generating dynamic shared library .\\tmpa3c08ggs\\predictor.dll...\n",
+ "[13:58:23] c:\\Users\\damajercak\\.conda\\envs\\lightgbmbenchmark\\lib\\site-packages\\treelite\\contrib\\__init__.py:282: Generated shared library in 28.49 seconds\n"
+ ]
+ }
+ ],
+ "source": [
+ "import treelite, treelite_runtime\n",
+ "\n",
+ "model = treelite.Model.load(\n",
+ " model_path,\n",
+ " model_format=\"lightgbm\"\n",
+ ")\n",
+ "model.export_lib(\n",
+ " toolchain=\"msvc\",\n",
+ " libpath=model_path + \".so\",\n",
+ " verbose=True,\n",
+ " params={'parallel_comp':16}\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[13:59:32] c:\\Users\\damajercak\\.conda\\envs\\lightgbmbenchmark\\lib\\site-packages\\treelite_runtime\\predictor.py:159: Dynamic shared library c:\\Projects\\lightgbm-benchmark\\NiR4_OFE_LGBM_model.txt.so has been successfully loaded into memory\n"
+ ]
+ }
+ ],
+ "source": [
+ "predictor = treelite_runtime.Predictor(\n",
+ " model_path + '.so',\n",
+ " verbose=True,\n",
+ " nthread=1\n",
+ ")\n",
+ "dmat = treelite_runtime.DMatrix(data.to_numpy())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "predictions_array_treelite =predictor.predict(dmat)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "The maximum opset needed by this model is only 8.\n",
+ "The maximum opset needed by this model is only 8.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[LightGBM] [Info] Construct bin mappers from text data time 2.43 seconds\n"
+ ]
+ }
+ ],
+ "source": [
+ "from onnxconverter_common.data_types import FloatTensorType\n",
+ "from onnxmltools.convert import convert_lightgbm\n",
+ "import onnxruntime as ort\n",
+ "import numpy as np\n",
+ "\n",
+ "with open(model_path, \"r\") as mf:\n",
+ " model_str = mf.read()\n",
+ " model_str = model_str.replace(\n",
+ " \"objective=lambdarank\", \"objective=regression\"\n",
+ " )\n",
+ "booster_ort = lightgbm.Booster(model_str=model_str)\n",
+ "\n",
+ "onnx_input_types = [\n",
+ " (\n",
+ " \"input\",\n",
+ " FloatTensorType(\n",
+ " [1, inference_data.num_feature()]\n",
+ " ),\n",
+ " )\n",
+ "]\n",
+ "onnx_input_batch_types = [\n",
+ " (\n",
+ " \"input\",\n",
+ " FloatTensorType(\n",
+ " [inference_data.num_data(), inference_data.num_feature()]\n",
+ " ),\n",
+ " )\n",
+ "]\n",
+ "onnx_ml_model = convert_lightgbm(booster_ort, initial_types=onnx_input_types)\n",
+ "onnx_ml_batch_model = convert_lightgbm(booster_ort, initial_types=onnx_input_batch_types)\n",
+ "\n",
+ "sess_options = ort.SessionOptions()\n",
+ "sess_options.intra_op_num_threads = 0\n",
+ "sess_options.inter_op_num_threads = 0\n",
+ "\n",
+ "sess_options.execution_mode = (\n",
+ " ort.ExecutionMode.ORT_SEQUENTIAL\n",
+ ")\n",
+ "sess_options.graph_optimization_level = (\n",
+ " ort.GraphOptimizationLevel.ORT_ENABLE_ALL\n",
+ ")\n",
+ "sessionml = ort.InferenceSession(\n",
+ " onnx_ml_model.SerializeToString(), sess_options\n",
+ ")\n",
+ "sessionml_batch = ort.InferenceSession(\n",
+ " onnx_ml_batch_model.SerializeToString(), sess_options\n",
+ ")\n",
+ "inference_data = lightgbm.Dataset(\n",
+ " data_path, free_raw_data=False\n",
+ ").construct()\n",
+ "inference_raw_data = inference_data.get_data()\n",
+ "if type(inference_raw_data) == str:\n",
+ " inference_raw_data = np.loadtxt(\n",
+ " inference_raw_data, delimiter=\",\"\n",
+ " ).astype(np.float32)[:, : inference_data.num_feature()]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "predictions_array_ort = sessionml_batch.run(\n",
+ " [sessionml.get_outputs()[0].name],\n",
+ " {sessionml.get_inputs()[0].name: inference_raw_data},\n",
+ ")[0][:, 0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 72,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.7666397998491448"
+ ]
+ },
+ "execution_count": 72,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "time_inferencing_per_query = []\n",
+ "for i in range(len(inference_raw_data)):\n",
+ " prediction_time = timeit.timeit(\n",
+ " lambda: sessionml.run(\n",
+ " [sessionml.get_outputs()[0].name],\n",
+ " {sessionml.get_inputs()[0].name: inference_raw_data[i : i + 1]},\n",
+ " ),\n",
+ " number=1,\n",
+ " )\n",
+ " time_inferencing_per_query.append(prediction_time/1)\n",
+ "sum(time_inferencing_per_query)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.400464499998634"
+ ]
+ },
+ "execution_count": 71,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "timeit.timeit(lambda: sessionml_batch.run(\n",
+ " [sessionml.get_outputs()[0].name],\n",
+ " {sessionml.get_inputs()[0].name: inference_raw_data},\n",
+ "), number=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[ 1.41436096 -0.27520206 -0.32896408 -0.08315643 -0.26660063 -0.30202675\n",
+ " -0.22120572 -0.35424621 -0.25634644 -0.06725079]\n",
+ "[ 1.7355299 0.2582493 0.28444618 0.51784474 0.49668223 -0.04218447\n",
+ " 0.12811233 0.20044815 -0.10399695 0.61548153]\n",
+ "[ 1.7355288 0.25824943 0.28444648 0.5178444 0.49668252 -0.04218467\n",
+ " 0.12811226 0.20044814 -0.10399713 0.6154818 ]\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(predictions_array_lgbmpython[:10])\n",
+ "print(predictions_array_treelite[:10])\n",
+ "print(predictions_array_ort[:10])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3.8.13 ('lightgbmbenchmark')",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.13"
+ },
+ "orig_nbformat": 4,
+ "vscode": {
+ "interpreter": {
+ "hash": "218deddc5dc66f2d9cab81f1bf3043b58bb8ede28fae2157142347a8a27e0fa5"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/conf/experiments/benchmarks/lightgbm-inferencing.yaml b/conf/experiments/benchmarks/lightgbm-inferencing.yaml
index 188b7b0c..066b1974 100644
--- a/conf/experiments/benchmarks/lightgbm-inferencing.yaml
+++ b/conf/experiments/benchmarks/lightgbm-inferencing.yaml
@@ -80,6 +80,10 @@ lightgbm_inferencing_config:
- framework: lightgbm_c_api # v3.2.1 with C API prediction
build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
- framework: lightgbm_ray # ray implementation
+ - framework: lightgbm_ort # ONNX RT implementation
+ - framework: lightgbm_ort_batch # ONNX RT single batch implementation
+ - framework: lightgbm_ort_multithread # ONNX RT multithreaded implementation
+ - framework: lightgbm_ort_multithread_batch # ONNX RT multithreaded single batch implementation
- framework: treelite_python # v1.3.0
# to use custom_win_cli, you need to compile your own binaries
diff --git a/conf/experiments/lightgbm-inferencing.yaml b/conf/experiments/lightgbm-inferencing.yaml
index 2e68c4d3..940d3ac2 100644
--- a/conf/experiments/lightgbm-inferencing.yaml
+++ b/conf/experiments/lightgbm-inferencing.yaml
@@ -36,6 +36,10 @@ lightgbm_inferencing_config:
- framework: lightgbm_c_api # v3.2.1 with C API prediction
build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
- framework: lightgbm_ray # ray implementation
+ - framework: lightgbm_ort # ONNX RT implementation
+ - framework: lightgbm_ort_batch # ONNX RT single batch implementation
+ - framework: lightgbm_ort_multithread # ONNX RT multithreaded implementation
+ - framework: lightgbm_ort_multithread_batch # ONNX RT multithreaded single batch implementation
- framework: treelite_python # v1.3.0
# to use custom_win_cli, you need to compile your own binaries
diff --git a/docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile b/docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
index d8b46623..7f3748d3 100644
--- a/docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
+++ b/docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210615.v1
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1
LABEL lightgbmbenchmark.linux.cpu.mpi.build.version="3.2.1-patch/20211109.1"
# Those arguments will NOT be used by AzureML
@@ -54,7 +54,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
- python=3.8 pip=20.2.4
+ python=3.8 pip=22.2.2
# Prepend path to AzureML conda environment
ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile b/docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile
index 7041380f..a7233019 100644
--- a/docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile
+++ b/docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210615.v1
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1
LABEL lightgbmbenchmark.linux.cpu.mpi.build.version="3.3.0-patch/20211109.1"
# Those arguments will NOT be used by AzureML
@@ -54,7 +54,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
- python=3.8 pip=20.2.4
+ python=3.8 pip=22.2.2
# Prepend path to AzureML conda environment
ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile b/docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile
index 67eb7d88..6c05ad28 100644
--- a/docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile
+++ b/docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20211012.v1
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1
LABEL lightgbmbenchmark.linux.cpu.mpi.build.version="3.2.1/20211109.1"
# Those arguments will NOT be used by AzureML
@@ -49,7 +49,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
- python=3.8 pip=20.2.4
+ python=3.8 pip=22.2.2
# Prepend path to AzureML conda environment
ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/docker/lightgbm-v3.2.1/linux_cpu_mpi_pip.dockerfile b/docker/lightgbm-v3.2.1/linux_cpu_mpi_pip.dockerfile
index 7f415f76..b42984f0 100644
--- a/docker/lightgbm-v3.2.1/linux_cpu_mpi_pip.dockerfile
+++ b/docker/lightgbm-v3.2.1/linux_cpu_mpi_pip.dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210615.v1
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1
LABEL lightgbmbenchmark.linux.cpu.mpi.pip.version="3.2.1/20211108.1"
# Those arguments will NOT be used by AzureML
@@ -10,7 +10,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
- python=3.8 pip=20.2.4
+ python=3.8 pip=22.2.2
# Prepend path to AzureML conda environment
ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/docker/lightgbm-v3.2.1/linux_cuda_build.dockerfile b/docker/lightgbm-v3.2.1/linux_cuda_build.dockerfile
index 1f85db8b..58104eb0 100644
--- a/docker/lightgbm-v3.2.1/linux_cuda_build.dockerfile
+++ b/docker/lightgbm-v3.2.1/linux_cuda_build.dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.6-cudnn8-ubuntu20.04:20221129.v1
LABEL lightgbmbenchmark.linux.cuda.build.version="3.2.1/20211108.1"
# Those arguments will NOT be used by AzureML
@@ -73,7 +73,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
- python=3.8 pip=20.2.4
+ python=3.8 pip=22.2.2
# Prepend path to AzureML conda environment
ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/docker/lightgbm-v3.2.1/linux_gpu_build.dockerfile b/docker/lightgbm-v3.2.1/linux_gpu_build.dockerfile
index e3d5cc01..047f985a 100644
--- a/docker/lightgbm-v3.2.1/linux_gpu_build.dockerfile
+++ b/docker/lightgbm-v3.2.1/linux_gpu_build.dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.6-cudnn8-ubuntu20.04:20221129.v1
LABEL lightgbmbenchmark.linux.gpu.build.version="3.2.1/20211108.1"
# Those arguments will NOT be used by AzureML
@@ -10,7 +10,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
- python=3.8 pip=20.2.4
+ python=3.8 pip=22.2.2
# Prepend path to AzureML conda environment
ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/docker/lightgbm-v3.2.1/linux_gpu_pip.dockerfile b/docker/lightgbm-v3.2.1/linux_gpu_pip.dockerfile
index a8e6dea7..4cba3571 100644
--- a/docker/lightgbm-v3.2.1/linux_gpu_pip.dockerfile
+++ b/docker/lightgbm-v3.2.1/linux_gpu_pip.dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.6-cudnn8-ubuntu20.04:20221129.v1
LABEL lightgbmbenchmark.linux.gpu.pip.version="3.2.1/20211108.1"
# Those arguments will NOT be used by AzureML
# they are here just to allow for lightgbm-benchmark build to actually check
@@ -9,7 +9,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
- python=3.8 pip=20.2.4
+ python=3.8 pip=22.2.2
# Prepend path to AzureML conda environment
ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/docker/lightgbm-v3.2.1/windows_cpu_pip.dockerfile b/docker/lightgbm-v3.2.1/windows_cpu_pip.dockerfile
index cf9f2a6b..9e984465 100644
--- a/docker/lightgbm-v3.2.1/windows_cpu_pip.dockerfile
+++ b/docker/lightgbm-v3.2.1/windows_cpu_pip.dockerfile
@@ -10,7 +10,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
- python=3.8 pip=20.2.4
+ python=3.8 pip=22.2.2
# Prepend path to AzureML conda environment
#ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/docker/lightgbm-v3.3.0/linux_cpu_mpi_build.dockerfile b/docker/lightgbm-v3.3.0/linux_cpu_mpi_build.dockerfile
index 4f66f5a0..09b75851 100644
--- a/docker/lightgbm-v3.3.0/linux_cpu_mpi_build.dockerfile
+++ b/docker/lightgbm-v3.3.0/linux_cpu_mpi_build.dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20211012.v1
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1
LABEL lightgbmbenchmark.linux.cpu.mpi.build.version="3.3.0/20211115.1"
# Those arguments will NOT be used by AzureML
@@ -49,7 +49,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
- python=3.8 pip=20.2.4
+ python=3.8 pip=22.2.2
# Prepend path to AzureML conda environment
ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/docker/lightgbm-v3.3.0/linux_cpu_mpi_pip.dockerfile b/docker/lightgbm-v3.3.0/linux_cpu_mpi_pip.dockerfile
index 59ca3f32..e0d9056c 100644
--- a/docker/lightgbm-v3.3.0/linux_cpu_mpi_pip.dockerfile
+++ b/docker/lightgbm-v3.3.0/linux_cpu_mpi_pip.dockerfile
@@ -10,7 +10,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
- python=3.8 pip=20.2.4
+ python=3.8 pip=22.2.2
# Prepend path to AzureML conda environment
ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/docker/lightgbm-v3.3.0/windows_cpu_pip.dockerfile b/docker/lightgbm-v3.3.0/windows_cpu_pip.dockerfile
index f40508e0..4d974d65 100644
--- a/docker/lightgbm-v3.3.0/windows_cpu_pip.dockerfile
+++ b/docker/lightgbm-v3.3.0/windows_cpu_pip.dockerfile
@@ -10,7 +10,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
- python=3.8 pip=20.2.4
+ python=3.8 pip=22.2.2
# Prepend path to AzureML conda environment
#ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/docs/results/inferencing.md b/docs/results/inferencing.md
index 8395ab49..b9f6242d 100644
--- a/docs/results/inferencing.md
+++ b/docs/results/inferencing.md
@@ -8,31 +8,35 @@
## Variants
-| variant_id | index | framework | version | build | cpu count | num threads | machine | system |
-|:------------------|--------:|:----------------|:-----------------|:-----------------------------------------------------------|------------:|--------------:|:----------|:---------|
-| lightgbm#0 | 0 | lightgbm | PYTHON_API.3.3.0 | default | 16 | 1 | x86_64 | Linux |
-| lightgbm#1 | 1 | lightgbm | C_API.3.3.0 | default | 16 | 1 | x86_64 | Linux |
-| lightgbm#2 | 2 | lightgbm | C_API.3.3.0 | docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile | 16 | 1 | x86_64 | Linux |
-| lightgbm#3 | 3 | lightgbm | C_API.3.2.1 | docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile | 16 | 1 | x86_64 | Linux |
-| lightgbm#4 | 4 | lightgbm | C_API.3.2.1 | docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile | 16 | 1 | x86_64 | Linux |
-| treelite_python#5 | 5 | treelite_python | 1.3.0 | default | 16 | 1 | x86_64 | Linux |
+| variant_id | index | framework | version | build | cpu count | num threads | machine | system |
+|:---------------------------|--------:|:-------------------------|:-----------------|:-----------------------------------------------------------|------------:|--------------:|:----------|:---------|
+| lightgbm#0 | 0 | lightgbm | PYTHON_API.3.3.0 | default | 16 | 1 | x86_64 | Linux |
+| lightgbm#1 | 1 | lightgbm | C_API.3.3.0 | default | 16 | 1 | x86_64 | Linux |
+| lightgbm#2 | 2 | lightgbm | C_API.3.3.0 | docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile | 16 | 1 | x86_64 | Linux |
+| lightgbm#3 | 3 | lightgbm | C_API.3.2.1 | docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile | 16 | 1 | x86_64 | Linux |
+| lightgbm#4 | 4 | lightgbm | C_API.3.2.1 | docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile | 16 | 1 | x86_64 | Linux |
+| onnx#5 | 5 | onnx | ONNXRT.1.12.1 | default | 16 | 1 | x86_64 | Linux |
+| onnx_batch#6 | 6 | onnx_batch | ONNXRT.1.12.1 | default | 16 | 1 | x86_64 | Linux |
+| onnx_multithreaded#7 | 7 | onnx_multithreaded | ONNXRT.1.12.1 | default | 16 | - | x86_64 | Linux |
+| onnx_multithreaded_batch#8 | 8 | onnx_multithreaded_batch | ONNXRT.1.12.1 | default | 16 | - | x86_64 | Linux |
+| treelite_python#9 | 9 | treelite_python | 2.1.0 | default | 16 | 1 | x86_64 | Linux |
## Metric time_inferencing per prediction (usecs)
-| inferencing task config | lightgbm#0 | lightgbm#1 | lightgbm#2 | lightgbm#3 | lightgbm#4 | treelite_python#5 |
-|:---------------------------------------|-------------:|-------------:|-------------:|-------------:|-------------:|--------------------:|
-| 10 trees
31 leaves
10 cols | 6.71442 | 1.27191 | 1.88084 | 1.97014 | 1.50457 | 0.299835 |
-| 10 trees
31 leaves
100 cols | 10.0109 | 1.87281 | 1.89273 | 1.51227 | 1.93901 | 0.465536 |
-| 10 trees
31 leaves
1000 cols | 37.308 | 4.32708 | 4.70362 | 7.06888 | 4.72284 | 2.08173 |
-| 100 trees
31 leaves
10 cols | 18.8272 | 12.7087 | 14.9646 | 10.8278 | 16.6011 | 5.27241 |
-| 100 trees
31 leaves
100 cols | 23.524 | 9.6317 | 11.2825 | 15.0675 | 13.3228 | 7.3904 |
-| 100 trees
31 leaves
1000 cols | 45.8476 | 14.3042 | 18.5159 | 15.6538 | 14.9914 | 7.93605 |
-| 1000 trees
31 leaves
10 cols | 113.854 | 95.4644 | 104.575 | 93.1975 | 107.137 | 28.5369 |
-| 1000 trees
31 leaves
100 cols | 173.506 | 136.601 | 137.953 | 137.349 | 165.446 | 96.1941 |
-| 1000 trees
31 leaves
1000 cols | 178.49 | 143.14 | 143.734 | 146.814 | 149.186 | 98.9669 |
-| 5000 trees
31 leaves
10 cols | 395.046 | 394.296 | 425.493 | 326.193 | 443.607 | 251.199 |
-| 5000 trees
31 leaves
100 cols | 467.79 | 459.998 | 535.714 | 537.431 | 450.346 | 295.183 |
-| 5000 trees
31 leaves
1000 cols | 645.185 | 580.791 | 574.005 | 643.234 | 591.006 | 442.544 |
+| inferencing task config | lightgbm#0 | lightgbm#1 | lightgbm#2 | lightgbm#3 | lightgbm#4 | onnx#5 | onnx_batch#6 | onnx_multithreaded#7 | onnx_multithreaded_batch#8 | treelite_python#9 |
+|:---------------------------------------|-------------:|-------------:|-------------:|-------------:|-------------:|----------:|---------------:|-----------------------:|-----------------------------:|--------------------:|
+| 10 trees
31 leaves
10 cols | 6.95305 | 1.11553 | 1.19408 | 1.15504 | 1.12653 | 7.62398 | 0.0969134 | 21.4563 | 0.198045 | 0.303221 |
+| 10 trees
31 leaves
100 cols | 9.9608 | 1.57071 | 1.81644 | 1.55628 | 1.73756 | 7.67336 | 0.149622 | 22.5913 | 0.303975 | 0.449347 |
+| 10 trees
31 leaves
1000 cols | 36.8206 | 3.97296 | 4.00286 | 4.35525 | 4.56862 | 7.65319 | 1.23701 | 21.9663 | 1.03079 | 1.90513 |
+| 100 trees
31 leaves
10 cols | 16.081 | 10.3246 | 11.2351 | 10.4623 | 10.411 | 12.9457 | 0.489068 | 27.8963 | 0.518232 | 5.12872 |
+| 100 trees
31 leaves
100 cols | 18.419 | 10.2733 | 9.27452 | 10.6115 | 10.4095 | 13.1084 | 0.691856 | 26.6879 | 0.637577 | 5.73254 |
+| 100 trees
31 leaves
1000 cols | 45.0129 | 12.6701 | 11.4707 | 12.7013 | 12.794 | 11.9506 | 2.29946 | 28.9509 | 1.98307 | 7.35011 |
+| 1000 trees
31 leaves
10 cols | 97.3209 | 97.622 | 103.892 | 95.7561 | 97.6808 | 18.3931 | 3.95854 | 40.0455 | 4.24206 | 33.3337 |
+| 1000 trees
31 leaves
100 cols | 154.284 | 146.32 | 154.788 | 149.401 | 149.942 | 20.4271 | 5.15573 | 40.3441 | 4.93979 | 96.6871 |
+| 1000 trees
31 leaves
1000 cols | 165.235 | 140.012 | 150.223 | 143.748 | 141.769 | 20.1743 | 11.7819 | 36.897 | 12.1277 | 101.73 |
+| 5000 trees
31 leaves
10 cols | 376.015 | 407.244 | 373.407 | 366.11 | 383.453 | 43.7589 | 10.8586 | 85.8648 | 10.1721 | 219.653 |
+| 5000 trees
31 leaves
100 cols | 421.179 | 465.234 | 482.583 | 468.308 | 473.928 | 104.56 | 24.15 | 156.015 | 24.2661 | 300.779 |
+| 5000 trees
31 leaves
1000 cols | 644.905 | 587.578 | 581.033 | 625.28 | 598.814 | 94.8404 | 58.758 | 127.584 | 58.3206 | 416.228 |
## Percentile metrics for each variant
@@ -43,66 +47,100 @@ Some variants above report percentile metrics. Those are reported by computing i
| inferencing task config | p50_usecs | p90_usecs | p99_usecs |
|:---------------------------------------|------------:|------------:|------------:|
-| 10 trees
31 leaves
10 cols | 1.3 | 1.5 | 1.6 |
-| 10 trees
31 leaves
100 cols | 1.8 | 2 | 3.1 |
-| 10 trees
31 leaves
1000 cols | 4.201 | 4.5 | 5.6 |
-| 100 trees
31 leaves
10 cols | 12.6 | 13.8 | 19.1 |
-| 100 trees
31 leaves
100 cols | 9.501 | 10 | 12.802 |
-| 100 trees
31 leaves
1000 cols | 14.301 | 15.601 | 25.001 |
-| 1000 trees
31 leaves
10 cols | 95.1015 | 98.801 | 108.803 |
-| 1000 trees
31 leaves
100 cols | 131.001 | 145.6 | 215.101 |
-| 1000 trees
31 leaves
1000 cols | 142.601 | 145.202 | 157.302 |
-| 5000 trees
31 leaves
10 cols | 383.404 | 430.905 | 584.61 |
-| 5000 trees
31 leaves
100 cols | 448.404 | 504.305 | 633.407 |
-| 5000 trees
31 leaves
1000 cols | 557.003 | 640.203 | 836.145 |
+| 10 trees
31 leaves
10 cols | 1.1 | 1.2 | 1.399 |
+| 10 trees
31 leaves
100 cols | 1.6 | 1.7 | 1.9 |
+| 10 trees
31 leaves
1000 cols | 3.9 | 4.2 | 4.5 |
+| 100 trees
31 leaves
10 cols | 10.3 | 11 | 11.601 |
+| 100 trees
31 leaves
100 cols | 10.2 | 10.7 | 11.1 |
+| 100 trees
31 leaves
1000 cols | 12.601 | 13.001 | 13.6 |
+| 1000 trees
31 leaves
10 cols | 96 | 102.001 | 114.201 |
+| 1000 trees
31 leaves
100 cols | 145.899 | 150.599 | 161.099 |
+| 1000 trees
31 leaves
1000 cols | 139.124 | 142.024 | 154.528 |
+| 5000 trees
31 leaves
10 cols | 405.801 | 424.302 | 444.202 |
+| 5000 trees
31 leaves
100 cols | 464.302 | 476.601 | 490.101 |
+| 5000 trees
31 leaves
1000 cols | 585.368 | 600.169 | 611.8 |
### lightgbm#2
| inferencing task config | p50_usecs | p90_usecs | p99_usecs |
|:---------------------------------------|------------:|------------:|------------:|
-| 10 trees
31 leaves
10 cols | 1.8 | 2.1 | 2.601 |
-| 10 trees
31 leaves
100 cols | 1.9 | 2 | 2.10001 |
-| 10 trees
31 leaves
1000 cols | 4.7 | 4.901 | 5.4 |
-| 100 trees
31 leaves
10 cols | 13.7 | 15.4 | 37.204 |
-| 100 trees
31 leaves
100 cols | 10.8 | 12.901 | 17.301 |
-| 100 trees
31 leaves
1000 cols | 17.7 | 19.001 | 31.4 |
-| 1000 trees
31 leaves
10 cols | 104.003 | 108.703 | 122.603 |
-| 1000 trees
31 leaves
100 cols | 132.501 | 149.701 | 221.015 |
-| 1000 trees
31 leaves
1000 cols | 138.702 | 160.802 | 219.107 |
-| 5000 trees
31 leaves
10 cols | 425.024 | 463.626 | 496.927 |
-| 5000 trees
31 leaves
100 cols | 508.705 | 588.917 | 946.39 |
-| 5000 trees
31 leaves
1000 cols | 550.905 | 624.606 | 810.269 |
+| 10 trees
31 leaves
10 cols | 1.2 | 1.3 | 1.5 |
+| 10 trees
31 leaves
100 cols | 1.8 | 1.9 | 2.1 |
+| 10 trees
31 leaves
1000 cols | 3.9 | 4.2 | 4.8 |
+| 100 trees
31 leaves
10 cols | 11.1 | 12 | 13.8 |
+| 100 trees
31 leaves
100 cols | 9.3 | 9.601 | 10 |
+| 100 trees
31 leaves
1000 cols | 11.399 | 11.799 | 13.401 |
+| 1000 trees
31 leaves
10 cols | 103.501 | 108.1 | 116.9 |
+| 1000 trees
31 leaves
100 cols | 154.296 | 159.296 | 170.495 |
+| 1000 trees
31 leaves
1000 cols | 149.602 | 152.301 | 164.802 |
+| 5000 trees
31 leaves
10 cols | 372.405 | 389.205 | 405.207 |
+| 5000 trees
31 leaves
100 cols | 481.504 | 496.705 | 510.607 |
+| 5000 trees
31 leaves
1000 cols | 578.888 | 596.699 | 618.387 |
### lightgbm#3
| inferencing task config | p50_usecs | p90_usecs | p99_usecs |
|:---------------------------------------|------------:|------------:|------------:|
-| 10 trees
31 leaves
10 cols | 1.8 | 2.3 | 3.1 |
-| 10 trees
31 leaves
100 cols | 1.5 | 1.6 | 1.9 |
-| 10 trees
31 leaves
1000 cols | 6.3 | 7.2 | 23.901 |
-| 100 trees
31 leaves
10 cols | 10.8 | 11.6 | 12.6 |
-| 100 trees
31 leaves
100 cols | 14.3 | 15.7 | 29.903 |
-| 100 trees
31 leaves
1000 cols | 15.1 | 16.2 | 27.201 |
-| 1000 trees
31 leaves
10 cols | 85.301 | 109.901 | 168.301 |
-| 1000 trees
31 leaves
100 cols | 132.401 | 149.601 | 201.402 |
-| 1000 trees
31 leaves
1000 cols | 146.202 | 148.903 | 161.503 |
-| 5000 trees
31 leaves
10 cols | 312.703 | 354.715 | 505.311 |
-| 5000 trees
31 leaves
100 cols | 537.638 | 582.651 | 608.343 |
-| 5000 trees
31 leaves
1000 cols | 641.307 | 654.907 | 667.409 |
+| 10 trees
31 leaves
10 cols | 1.1 | 1.3 | 1.5 |
+| 10 trees
31 leaves
100 cols | 1.5 | 1.7 | 1.9 |
+| 10 trees
31 leaves
1000 cols | 4.3 | 4.5 | 4.9 |
+| 100 trees
31 leaves
10 cols | 10.401 | 11.2 | 11.8 |
+| 100 trees
31 leaves
100 cols | 10.601 | 11.001 | 11.401 |
+| 100 trees
31 leaves
1000 cols | 12.601 | 13 | 13.6 |
+| 1000 trees
31 leaves
10 cols | 95.5 | 99 | 108.8 |
+| 1000 trees
31 leaves
100 cols | 149 | 153.8 | 164.202 |
+| 1000 trees
31 leaves
1000 cols | 142.699 | 145.799 | 158.899 |
+| 5000 trees
31 leaves
10 cols | 363.53 | 384.032 | 427.939 |
+| 5000 trees
31 leaves
100 cols | 466.461 | 479.863 | 501.27 |
+| 5000 trees
31 leaves
1000 cols | 622.902 | 637.601 | 650.101 |
### lightgbm#4
| inferencing task config | p50_usecs | p90_usecs | p99_usecs |
|:---------------------------------------|------------:|------------:|------------:|
-| 10 trees
31 leaves
10 cols | 1.3 | 1.7 | 2.7 |
-| 10 trees
31 leaves
100 cols | 1.8 | 2.2 | 2.6 |
-| 10 trees
31 leaves
1000 cols | 4.7 | 4.9 | 5.3 |
-| 100 trees
31 leaves
10 cols | 15.7 | 17.2 | 34.9 |
-| 100 trees
31 leaves
100 cols | 12.201 | 13.501 | 48.706 |
-| 100 trees
31 leaves
1000 cols | 14.901 | 16.101 | 24.701 |
-| 1000 trees
31 leaves
10 cols | 97.301 | 136.401 | 201.902 |
-| 1000 trees
31 leaves
100 cols | 164.901 | 170.101 | 182.801 |
-| 1000 trees
31 leaves
1000 cols | 148.403 | 151.003 | 166.205 |
-| 5000 trees
31 leaves
10 cols | 439.327 | 492.54 | 602.444 |
-| 5000 trees
31 leaves
100 cols | 439.432 | 490.245 | 605.846 |
-| 5000 trees
31 leaves
1000 cols | 571.902 | 640.112 | 827.614 |
+| 10 trees
31 leaves
10 cols | 1.1 | 1.3 | 1.4 |
+| 10 trees
31 leaves
100 cols | 1.7 | 1.9 | 2 |
+| 10 trees
31 leaves
1000 cols | 4.5 | 4.8 | 5.2 |
+| 100 trees
31 leaves
10 cols | 10.4 | 11.1 | 11.9 |
+| 100 trees
31 leaves
100 cols | 10.4 | 10.8 | 11.3 |
+| 100 trees
31 leaves
1000 cols | 12.798 | 13.099 | 13.598 |
+| 1000 trees
31 leaves
10 cols | 97.302 | 101.201 | 111.002 |
+| 1000 trees
31 leaves
100 cols | 149.489 | 154.29 | 165.188 |
+| 1000 trees
31 leaves
1000 cols | 141.2 | 143.601 | 156.5 |
+| 5000 trees
31 leaves
10 cols | 382.303 | 398.402 | 413.602 |
+| 5000 trees
31 leaves
100 cols | 472.51 | 485.21 | 499.01 |
+| 5000 trees
31 leaves
1000 cols | 596.097 | 611.307 | 625.896 |
+
+### onnx#5
+
+| inferencing task config | p50_usecs | p90_usecs | p99_usecs |
+|:---------------------------------------|------------:|------------:|------------:|
+| 10 trees
31 leaves
10 cols | 7.51 | 7.6 | 8.88 |
+| 10 trees
31 leaves
100 cols | 7.5998 | 7.6798 | 8.8698 |
+| 10 trees
31 leaves
1000 cols | 7.59 | 7.6901 | 8.91 |
+| 100 trees
31 leaves
10 cols | 12.85 | 13.09 | 14.6201 |
+| 100 trees
31 leaves
100 cols | 12.9402 | 13.6202 | 14.7802 |
+| 100 trees
31 leaves
1000 cols | 11.8401 | 12.09 | 13.4901 |
+| 1000 trees
31 leaves
10 cols | 18.0601 | 19.2001 | 21.3902 |
+| 1000 trees
31 leaves
100 cols | 20.1093 | 21.0993 | 23.7093 |
+| 1000 trees
31 leaves
1000 cols | 19.6325 | 21.1828 | 23.7534 |
+| 5000 trees
31 leaves
10 cols | 43.3894 | 45.2993 | 47.5894 |
+| 5000 trees
31 leaves
100 cols | 104.27 | 111.281 | 118.342 |
+| 5000 trees
31 leaves
1000 cols | 94.5217 | 97.9918 | 101.332 |
+
+### onnx_multithreaded#7
+
+| inferencing task config | p50_usecs | p90_usecs | p99_usecs |
+|:---------------------------------------|------------:|------------:|------------:|
+| 10 trees
31 leaves
10 cols | 21.8309 | 22.6609 | 24.3009 |
+| 10 trees
31 leaves
100 cols | 21.985 | 23.721 | 45.6108 |
+| 10 trees
31 leaves
1000 cols | 22.2599 | 23.7498 | 25.5398 |
+| 100 trees
31 leaves
10 cols | 26.3017 | 29.4928 | 42.4632 |
+| 100 trees
31 leaves
100 cols | 25.7001 | 28.0201 | 42.1234 |
+| 100 trees
31 leaves
1000 cols | 27.34 | 29.691 | 38.3805 |
+| 1000 trees
31 leaves
10 cols | 36.7701 | 40.1901 | 105.035 |
+| 1000 trees
31 leaves
100 cols | 36.8403 | 39.6914 | 109.843 |
+| 1000 trees
31 leaves
1000 cols | 33.7296 | 36.1596 | 68.4439 |
+| 5000 trees
31 leaves
10 cols | 72.6305 | 91.5047 | 400.135 |
+| 5000 trees
31 leaves
100 cols | 122.421 | 173.977 | 828.446 |
+| 5000 trees
31 leaves
1000 cols | 101.62 | 130.733 | 732.035 |
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 9b4b581f..79c0ac6d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,28 +1,35 @@
# benchmark common code
-mlflow==1.23.1
-omegaconf~=2.1
-mpi4py==3.1.1
-matplotlib==3.4.3
-psutil==5.8.0
+mlflow==1.29.0
+omegaconf==2.1.2
+mpi4py==3.1.3
+matplotlib==3.6.1
+psutil==5.9.3
# frameworks
-ray==1.9.2
-lightgbm-ray==0.1.2
-lightgbm==3.3.1
-treelite==2.1.0
-treelite_runtime==2.1.0
-flaml==0.9.6
+ray==2.0.0
+lightgbm-ray==0.1.7
+onnxruntime==1.12.1
+onnxmltools==1.11.1
+onnxconverter-common==1.12.2
+lightgbm==3.3.3
+treelite==3.0.0
+treelite-runtime==3.0.0
+FLAML==1.0.13
hpbandster==0.7.4
-ConfigSpace==0.5.0
-optuna==2.8.0
+ConfigSpace==0.6.0
+optuna==3.0.3
# pipelines
-shrike[pipeline]==1.14.7
-azure-ml-component==0.9.4.post1 # for component dsl
-azureml-train-core==1.36.0 # for azureml.train.hyperdrive
-azureml-dataset-runtime==1.36.0 # to register dataset
-hydra-core~=1.0.3
-typing_extensions==4.0.1 # for hydra
+shrike[pipeline]==1.31.10
+azure-core==1.20.1
+azure-storage-blob==12.11.0
+azure-ml-component==0.9.13.post1
+azureml-train-core==1.37.0
+azureml-dataset-runtime==1.46.0
+hydra-core==1.0.4
+typing_extensions==4.4.0
+azureml-mlflow==1.46.0
+mlflow-skinny==1.29.0
# unit testing
pytest==6.2.4
diff --git a/src/common/tasks.py b/src/common/tasks.py
index 95504774..df3d1b25 100644
--- a/src/common/tasks.py
+++ b/src/common/tasks.py
@@ -26,6 +26,9 @@ class inferencing_task:
class inferencing_variants:
framework: str = MISSING
build: Optional[str] = None
+ threads: Optional[int] = 1
+ batch_exec: Optional[bool] = False
+ parallel_exec: Optional[bool] = False
@dataclass
class data_generation_task:
diff --git a/src/pipelines/azureml/lightgbm_inferencing.py b/src/pipelines/azureml/lightgbm_inferencing.py
index b82a9690..21bbef06 100644
--- a/src/pipelines/azureml/lightgbm_inferencing.py
+++ b/src/pipelines/azureml/lightgbm_inferencing.py
@@ -8,7 +8,7 @@
> python src/pipelines/azureml/lightgbm_inferencing.py --exp-config conf/experiments/lightgbm-inferencing.yaml
"""
# pylint: disable=no-member
-# NOTE: because it raises 'dict' has no 'outputs' member in dsl.pipeline construction
+# NOTE: because it raises "dict" has no "outputs" member in dsl.pipeline construction
import os
import sys
import json
@@ -25,8 +25,8 @@
from azure.ml.component.environment import Docker
# when running this script directly, needed to import common
-LIGHTGBM_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
-SCRIPTS_SOURCES_ROOT = os.path.join(LIGHTGBM_REPO_ROOT, 'src')
+LIGHTGBM_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
+SCRIPTS_SOURCES_ROOT = os.path.join(LIGHTGBM_REPO_ROOT, "src")
if SCRIPTS_SOURCES_ROOT not in sys.path:
logging.info(f"Adding {SCRIPTS_SOURCES_ROOT} to path")
@@ -67,6 +67,7 @@ class lightgbm_inferencing_config: # pylint: disable=invalid-name
lightgbm_python_score_module = Component.from_yaml(yaml_file=os.path.join(COMPONENTS_ROOT, "inferencing", "lightgbm_python", "spec.yaml"))
lightgbm_c_api_score_module = Component.from_yaml(yaml_file=os.path.join(COMPONENTS_ROOT, "inferencing", "lightgbm_c_api", "spec.yaml"))
lightgbm_ray_score_module = Component.from_yaml(yaml_file=os.path.join(COMPONENTS_ROOT, "inferencing", "lightgbm_ray", "spec.yaml"))
+lightgbm_ort_score_module = Component.from_yaml(yaml_file=os.path.join(COMPONENTS_ROOT, "inferencing", "lightgbm_ort", "spec.yaml"))
custom_win_cli_score_module = Component.from_yaml(yaml_file=os.path.join(COMPONENTS_ROOT, "inferencing", "custom_win_cli", "spec.yaml"))
treelite_compile_module = Component.from_yaml(yaml_file=os.path.join(COMPONENTS_ROOT, "model_transformation", "treelite_compile", "spec.yaml"))
treelite_score_module = Component.from_yaml(yaml_file=os.path.join(COMPONENTS_ROOT, "inferencing", "treelite_python", "spec.yaml"))
@@ -81,7 +82,7 @@ class lightgbm_inferencing_config: # pylint: disable=invalid-name
@dsl.pipeline(name=f"lightgbm_inferencing", # pythonic name
description=f"LightGBM inferencing on user defined dataset/model",
- non_pipeline_parameters=['benchmark_custom_properties', 'config'])
+ non_pipeline_parameters=["benchmark_custom_properties", "config"])
def inferencing_task_pipeline_function(benchmark_custom_properties,
config,
data,
@@ -106,9 +107,9 @@ def inferencing_task_pipeline_function(benchmark_custom_properties,
custom_properties = benchmark_custom_properties.copy()
custom_properties.update({
# adding build settings (docker)
- 'framework_build' : variant.build or "default",
+ "framework_build" : variant.build or "default",
# adding variant_index to spot which variant is the reference
- 'variant_index' : variant_index
+ "variant_index" : variant_index
})
# passing as json string that each module parses to digest as tags/properties
custom_properties = json.dumps(custom_properties)
@@ -151,7 +152,7 @@ def inferencing_task_pipeline_function(benchmark_custom_properties,
data = data,
model = model,
verbose = False,
- custom_properties = custom_properties.replace("\"","\\\"")
+ custom_properties = custom_properties.replace("\"", "\\\"")
)
inferencing_step.runsettings.configure(target=config.compute.windows_cpu)
@@ -172,10 +173,30 @@ def inferencing_task_pipeline_function(benchmark_custom_properties,
data = data,
model = model,
verbose = False,
- custom_properties = custom_properties
+ custom_properties = custom_properties,
+ predict_disable_shape_check = predict_disable_shape_check,
+ )
+ inferencing_step.runsettings.configure(target=config.compute.linux_cpu)
+
+ elif variant.framework == "lightgbm_ort":
+ # call module with all the right arguments
+ inferencing_step = lightgbm_ort_score_module(
+ data = data,
+ model = model,
+ verbose = False,
+ run_parallel = variant.parallel_exec,
+ run_batch = variant.batch_exec,
+ n_threads = variant.threads,
+ custom_properties = custom_properties.replace("\"", "\\\"")
)
inferencing_step.runsettings.configure(target=config.compute.linux_cpu)
+ if variant.parallel_exec:
+ variant_comment.append(f"parallel execution")
+ if variant.batch_exec:
+ variant_comment.append(f"batch execution")
+ variant_comment.append(f"num threads {variant.threads}")
+
else:
raise NotImplementedError(f"framework {variant.framework} not implemented (yet)")
@@ -196,7 +217,7 @@ def inferencing_task_pipeline_function(benchmark_custom_properties,
# provide step readable display name
inferencing_step.node_name = format_run_name(f"inferencing_{variant.framework}_{variant_index}")
- # return {key: output}'
+ # return {key: output}
return pipeline_outputs
@@ -205,7 +226,7 @@ def inferencing_task_pipeline_function(benchmark_custom_properties,
non_pipeline_parameters=["workspace", "config"] # required to use config object
)
def inferencing_all_tasks(workspace, config):
- """Pipeline's main building function.
+ """Pipeline"s main building function.
Args:
workspace (azureml.core.Workspace): the AzureML workspace
@@ -222,9 +243,9 @@ def inferencing_all_tasks(workspace, config):
# create custom properties for this task
benchmark_custom_properties = {
- 'benchmark_name' : config.lightgbm_inferencing_config.benchmark_name,
- 'benchmark_dataset' : inferencing_task.data.name,
- 'benchmark_model' : inferencing_task.model.name,
+ "benchmark_name" : config.lightgbm_inferencing_config.benchmark_name,
+ "benchmark_dataset" : inferencing_task.data.name,
+ "benchmark_model" : inferencing_task.model.name,
}
inferencing_task_subgraph_step = inferencing_task_pipeline_function(
diff --git a/src/scripts/analysis/analyze.py b/src/scripts/analysis/analyze.py
index 3d9e0ffe..fbf2ac1c 100644
--- a/src/scripts/analysis/analyze.py
+++ b/src/scripts/analysis/analyze.py
@@ -1,5 +1,5 @@
# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT license.
+# Licensed under the MIT license.
"""
TreeLite/Python inferencing script
@@ -131,9 +131,10 @@ def fetch_benchmark_data(self, experiment_id, filter_string):
self.logger.info("Fetching Benchmark Runs")
# NOTE: returns a pandas dataframe
- self.benchmark_data = mlflow.search_runs(
- filter_string=filter_string
- )
+ self.benchmark_data = mlflow.search_runs(filter_string=filter_string)
+ self.benchmark_data = self.benchmark_data[
+ self.benchmark_data.status == "FINISHED"
+ ]
# extract all model information if present
if 'tags.benchmark_model' in self.benchmark_data.columns:
@@ -160,6 +161,15 @@ def fetch_benchmark_data(self, experiment_id, filter_string):
def report_inferencing(self, output_path):
""" Uses fetched or load data to produce a reporting for inferencing tasks. """
+
+ # Drop rows which do not specify the time
+ self.benchmark_data = self.benchmark_data.dropna(
+ subset=[
+ "metrics.time_inferencing",
+ "dataset_samples",
+ ]
+ )
+
# create variant readable id
self.benchmark_data['variant_id'] = self.benchmark_data['tags.framework'] + "#" + self.benchmark_data['tags.variant_index']
@@ -190,7 +200,6 @@ def report_inferencing(self, output_path):
variant_indices_sorted = [ variant_indices[k] for k in variant_indices_sorted_keys ]
variants.columns = ['index', 'framework', 'version', 'build', 'cpu count', 'num threads', 'machine', 'system']
- #variants = variants.transpose()
# reduce time_inferencing to predict time per request, in micro seconds
self.benchmark_data['avg_predict_time_usecs'] = self.benchmark_data['metrics.time_inferencing'].astype(float) / self.benchmark_data['dataset_samples'].astype(int) * 1000000
@@ -202,6 +211,13 @@ def report_inferencing(self, output_path):
+ self.benchmark_data['model_columns'] + " cols"
)
+ # Take last measurement per inferencing task config
+ self.benchmark_data = (
+ self.benchmark_data.sort_values("start_time")
+ .groupby(["inferencing task config", "variant_id"])
+ .last()
+ ).reset_index()
+
# pivot metrics table
metrics = self.benchmark_data.pivot(
index=['inferencing task config'],
@@ -216,32 +232,38 @@ def report_inferencing(self, output_path):
for variant_id in variant_indices_sorted:
percentile_metrics_values = (
- self.benchmark_data.loc[self.benchmark_data['variant_id'] == variant_id][[
- 'inferencing task config',
- 'variant_id',
- 'metrics.batch_time_inferencing_p50_usecs',
- 'metrics.batch_time_inferencing_p90_usecs',
- 'metrics.batch_time_inferencing_p99_usecs'
- ]]
+ self.benchmark_data.loc[
+ self.benchmark_data["variant_id"] == variant_id
+ ][
+ [
+ "inferencing task config",
+ "variant_id",
+ "metrics.batch_latency_p50_usecs",
+ "metrics.batch_latency_p90_usecs",
+ "metrics.batch_latency_p99_usecs",
+ ]
+ ]
).dropna()
-
+
if len(percentile_metrics_values) == 0:
continue
- percentile_metrics = (
- percentile_metrics_values.pivot(
- index=['inferencing task config'],
- columns=['variant_id'],
- values=['metrics.batch_time_inferencing_p50_usecs', 'metrics.batch_time_inferencing_p90_usecs', 'metrics.batch_time_inferencing_p99_usecs']
- )
+ percentile_metrics = percentile_metrics_values.pivot(
+ index=["inferencing task config"],
+ columns=["variant_id"],
+ values=[
+ "metrics.batch_latency_p50_usecs",
+ "metrics.batch_latency_p90_usecs",
+ "metrics.batch_latency_p99_usecs",
+ ],
)
- percentile_metrics.columns = [ col[0].lstrip("metrics.batch_time_inferencing_") for col in percentile_metrics.columns ]
+ percentile_metrics.columns = [
+ col[0].lstrip("metrics.batch_latency_")
+ for col in percentile_metrics.columns
+ ]
percentile_metrics_reports.append(
- {
- 'variant_id' : variant_id,
- 'report' : percentile_metrics.to_markdown()
- }
+ {"variant_id": variant_id, "report": percentile_metrics.to_markdown()}
)
# load the jinja template from local files
@@ -297,10 +319,10 @@ def run(args, unknown_args=[]):
experiment_id=args.experiment_id,
filter_string=f"tags.task = 'score' and tags.benchmark_name = '{args.benchmark_id}'"
)
-
+
if args.data_save:
analysis_engine.save_benchmark_data(args.data_save)
-
+
analysis_engine.report_inferencing(args.output)
else:
diff --git a/src/scripts/data_processing/generate_data/conda_env.yaml b/src/scripts/data_processing/generate_data/conda_env.yaml
index 223c34f7..d454ecdb 100644
--- a/src/scripts/data_processing/generate_data/conda_env.yaml
+++ b/src/scripts/data_processing/generate_data/conda_env.yaml
@@ -3,7 +3,7 @@ channels:
- defaults
dependencies:
- python=3.8
-- pip=20.0
+- pip=22.2.2
- pip:
- numpy==1.21.2
- scikit-learn==0.24.2
diff --git a/src/scripts/data_processing/generate_data/generate.py b/src/scripts/data_processing/generate_data/generate.py
index ba5445d7..0f1a9163 100644
--- a/src/scripts/data_processing/generate_data/generate.py
+++ b/src/scripts/data_processing/generate_data/generate.py
@@ -252,7 +252,8 @@ def run(self, args, logger, metrics_logger, unknown_args):
os.makedirs(args.output_train, exist_ok=True)
os.makedirs(args.output_test, exist_ok=True)
os.makedirs(args.output_inference, exist_ok=True)
- os.makedirs(args.external_header, exist_ok=True)
+ if args.external_header:
+ os.makedirs(args.external_header, exist_ok=True)
# transform delimiter
diff --git a/src/scripts/data_processing/lightgbm_data2bin/conda_env.yml b/src/scripts/data_processing/lightgbm_data2bin/conda_env.yml
index 2c403f37..b7f65aba 100644
--- a/src/scripts/data_processing/lightgbm_data2bin/conda_env.yml
+++ b/src/scripts/data_processing/lightgbm_data2bin/conda_env.yml
@@ -3,7 +3,7 @@ channels:
- defaults
dependencies:
- python=3.8
-- pip=20.0
+- pip=22.2.2
- pip:
- numpy==1.21.2
- scikit-learn==0.24.2
diff --git a/src/scripts/data_processing/partition_data/conda_env.yml b/src/scripts/data_processing/partition_data/conda_env.yml
index 39dabefc..395ab493 100644
--- a/src/scripts/data_processing/partition_data/conda_env.yml
+++ b/src/scripts/data_processing/partition_data/conda_env.yml
@@ -3,7 +3,7 @@ channels:
- defaults
dependencies:
- python=3.8
-- pip=20.0
+- pip=22.2.2
- pip:
- numpy==1.21.2
- scikit-learn==0.24.2
diff --git a/src/scripts/inferencing/custom_win_cli/conda_env.yaml b/src/scripts/inferencing/custom_win_cli/conda_env.yaml
index 78eed94f..cd181b8e 100644
--- a/src/scripts/inferencing/custom_win_cli/conda_env.yaml
+++ b/src/scripts/inferencing/custom_win_cli/conda_env.yaml
@@ -3,7 +3,7 @@ channels:
- defaults
dependencies:
- python=3.8
-- pip=20.0
+- pip=22.2.2
- pip:
- azureml-defaults==1.35.0
- azureml-mlflow==1.35.0
diff --git a/src/scripts/inferencing/lightgbm_c_api/default.dockerfile b/src/scripts/inferencing/lightgbm_c_api/default.dockerfile
index f50022c5..4f22ca78 100644
--- a/src/scripts/inferencing/lightgbm_c_api/default.dockerfile
+++ b/src/scripts/inferencing/lightgbm_c_api/default.dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20211012.v1
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1
LABEL lightgbmbenchmark.linux.cpu.mpi.build.version="3.3.0/20211115.1"
# Those arguments will NOT be used by AzureML
@@ -49,7 +49,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
- python=3.8 pip=20.2.4
+ python=3.8 pip=22.2.2
# Prepend path to AzureML conda environment
ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/src/scripts/inferencing/lightgbm_ort/.amlignore b/src/scripts/inferencing/lightgbm_ort/.amlignore
new file mode 100644
index 00000000..749ccdaf
--- /dev/null
+++ b/src/scripts/inferencing/lightgbm_ort/.amlignore
@@ -0,0 +1,4 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
diff --git a/src/scripts/inferencing/lightgbm_ort/default.dockerfile b/src/scripts/inferencing/lightgbm_ort/default.dockerfile
new file mode 100644
index 00000000..e161bfbb
--- /dev/null
+++ b/src/scripts/inferencing/lightgbm_ort/default.dockerfile
@@ -0,0 +1,25 @@
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1
+
+ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
+
+ARG lightgbm_version="3.3.3"
+
+# Create conda environment
+RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
+ python=3.8 pip=22.2.2
+
+# Prepend path to AzureML conda environment
+ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
+
+# Install pip dependencies
+RUN pip install 'numpy==1.23.5' \
+ 'matplotlib==3.6.2' \
+ 'psutil==5.9.4'\
+ 'azureml-mlflow==1.48.0' \
+ 'onnxruntime==1.13.1' \
+ 'onnxmltools==1.11.1' \
+ 'onnxconverter-common==1.13.0'
+
+# install lightgbm with mpi
+RUN pip install lightgbm==${lightgbm_version} \
+ pip install 'protobuf==3.20.3'
\ No newline at end of file
diff --git a/src/scripts/inferencing/lightgbm_ort/score.py b/src/scripts/inferencing/lightgbm_ort/score.py
new file mode 100644
index 00000000..f6e585c9
--- /dev/null
+++ b/src/scripts/inferencing/lightgbm_ort/score.py
@@ -0,0 +1,282 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+LightGBM/Python inferencing script
+"""
+import os
+import sys
+import logging
+import timeit
+import numpy as np
+from distutils.util import strtobool
+import lightgbm
+
+import onnxruntime as ort
+from onnxmltools.convert import convert_lightgbm
+from onnxconverter_common.data_types import FloatTensorType
+
+COMMON_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
+
+if COMMON_ROOT not in sys.path:
+ logging.info(f"Adding {COMMON_ROOT} to PYTHONPATH")
+ sys.path.append(str(COMMON_ROOT))
+
+# useful imports from common
+from common.components import RunnableScript
+from common.io import input_file_path
+
+
+class LightGBMONNXRTInferecingScript(RunnableScript):
+ def __init__(self):
+ framework = "onnx"
+ if "--run_parallel" in sys.argv and strtobool(sys.argv[sys.argv.index("--run_parallel") + 1]):
+ framework += "_parallel"
+ if "--run_batch" in sys.argv and strtobool(sys.argv[sys.argv.index("--run_batch") + 1]):
+ framework += "_batch"
+ if "--num_threads" in sys.argv:
+ framework += f"_threads_{sys.argv[sys.argv.index('--num_threads') + 1]}"
+
+ super().__init__(
+ task="score",
+ framework=framework,
+ framework_version="ONNXRT." + str(ort.__version__),
+ )
+
+ @classmethod
+ def get_arg_parser(cls, parser=None):
+ """Adds component/module arguments to a given argument parser.
+
+ Args:
+ parser (argparse.ArgumentParser): an argument parser instance
+
+ Returns:
+ ArgumentParser: the argument parser instance
+
+ Notes:
+ if parser is None, creates a new parser instance
+ """
+ # add generic arguments
+ parser = RunnableScript.get_arg_parser(parser)
+
+ group_i = parser.add_argument_group(f"Input Data [{__name__}:{cls.__name__}]")
+ group_i.add_argument(
+ "--data",
+ required=True,
+ type=input_file_path,
+ help="Inferencing data location (file path)",
+ )
+ group_i.add_argument(
+ "--data_format",
+ type=str,
+ choices=["CSV", "PARQUET", "PETASTORM"],
+ default="CSV",
+ )
+ group_i.add_argument(
+ "--model",
+ required=False,
+ type=input_file_path,
+ help="Exported model location (file path)",
+ )
+ group_i.add_argument(
+ "--output",
+ required=False,
+ default=None,
+ type=str,
+ help="Inferencing output location (file path)",
+ )
+
+ group_params = parser.add_argument_group(
+ f"Scoring parameters [{__name__}:{cls.__name__}]"
+ )
+ group_params.add_argument(
+ "--num_threads",
+ required=False,
+ default=0,
+ type=int,
+ help="number of threads",
+ )
+ group_params.add_argument(
+ "--run_parallel",
+ required=False,
+ default="False",
+ type=strtobool,
+ help="allows intra sample parallelism",
+ )
+ group_params.add_argument(
+ "--run_batch",
+ required=False,
+ default="False",
+ type=strtobool,
+ help="runs inference in a single batch",
+ )
+ group_params.add_argument(
+ "--predict_disable_shape_check",
+ required=False,
+ default="False",
+ type=strtobool,
+ help="See LightGBM documentation",
+ )
+
+ return parser
+
+ def run(self, args, logger, metrics_logger, unknown_args):
+ """Run script with arguments (the core of the component)
+
+ Args:
+ args (argparse.namespace): command line arguments provided to script
+ logger (logging.getLogger() for this script)
+ metrics_logger (common.metrics.MetricLogger)
+ unknown_args (list[str]): list of arguments not recognized during argparse
+ """
+ # record relevant parameters
+ metrics_logger.log_parameters(num_threads=args.num_threads)
+
+ # register logger for lightgbm logs
+ lightgbm.register_logger(logger)
+
+ # make sure the output argument exists
+ if args.output:
+ os.makedirs(args.output, exist_ok=True)
+ args.output = os.path.join(args.output, "predictions.txt")
+
+ logger.info(f"Loading model from {args.model}")
+ # BUG: https://github.com/onnx/onnxmltools/issues/338
+ with open(args.model, "r") as mf:
+ model_str = mf.read()
+ model_str = model_str.replace(
+ "objective=lambdarank", "objective=regression"
+ )
+ booster = lightgbm.Booster(model_str=model_str)
+
+ logger.info(f"Loading data for inferencing")
+ assert args.data_format == "CSV"
+ with metrics_logger.log_time_block("time_data_loading"):
+ # NOTE: this is bad, but allows for libsvm format (not just numpy)
+ # inference_data = lightgbm.Dataset(
+ # args.data, free_raw_data=False
+ # ).construct()
+ # inference_raw_data = inference_data.get_data()
+ # if type(inference_raw_data) == str:
+ inference_raw_data = np.loadtxt(
+ args.data, delimiter=","
+ ).astype(np.float32)[:, : booster.num_feature()]
+
+ logger.info(f"Converting model to ONNX")
+ onnx_input_types = [
+ (
+ "input",
+ FloatTensorType([None, inference_raw_data.shape[1]]),
+ )
+ ]
+ onnx_ml_model = convert_lightgbm(booster, initial_types=onnx_input_types)
+
+ logger.info(f"Creating inference session")
+ sess_options = ort.SessionOptions()
+
+ if args.num_threads > 0:
+ logger.info(f"Setting number of threads to {args.num_threads}")
+ sess_options.intra_op_num_threads = args.num_threads
+ sess_options.inter_op_num_threads = args.num_threads
+
+ if args.run_parallel:
+ logger.info(f"Creating multithreaded inference session")
+
+ sess_options.execution_mode = (
+ ort.ExecutionMode.ORT_PARALLEL
+ if args.run_parallel
+ else ort.ExecutionMode.ORT_SEQUENTIAL
+ )
+ sess_options.graph_optimization_level = (
+ ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+ )
+ sessionml = ort.InferenceSession(
+ onnx_ml_model.SerializeToString(), sess_options
+ )
+
+ # capture data shape as property
+ metrics_logger.set_properties(
+ inference_data_length=inference_raw_data.shape[0],
+ inference_data_width=inference_raw_data.shape[1],
+ )
+
+ logger.info(f"Running .predict()")
+
+ # Warmup and compute results
+ for _ in range(100):
+ sessionml.run(
+ [sessionml.get_outputs()[0].name],
+ {sessionml.get_inputs()[0].name: inference_raw_data[0:1]},
+ )
+
+ predictions_array = sessionml.run(
+ [sessionml.get_outputs()[0].name],
+ {sessionml.get_inputs()[0].name: inference_raw_data},
+ )[0]
+
+ time_inferencing_per_query = []
+ timeit_loops = 10
+
+ if args.run_batch:
+ batch_length = len(inference_raw_data)
+ prediction_time = timeit.timeit(
+ lambda: sessionml.run(
+ [sessionml.get_outputs()[0].name],
+ {sessionml.get_inputs()[0].name: inference_raw_data},
+ ),
+ number=timeit_loops,
+ )
+ prediction_time /= timeit_loops
+ metrics_logger.log_metric("time_inferencing", prediction_time)
+ time_inferencing_per_query = [prediction_time]
+ else:
+ batch_length = 1
+ for i in range(len(inference_raw_data)):
+ prediction_time = timeit.timeit(
+ lambda: sessionml.run(
+ [sessionml.get_outputs()[0].name],
+ {sessionml.get_inputs()[0].name: inference_raw_data[i : i + 1]},
+ ),
+ number=timeit_loops,
+ )
+ prediction_time /= timeit_loops
+ time_inferencing_per_query.append(prediction_time)
+ metrics_logger.log_metric(
+ "time_inferencing", sum(time_inferencing_per_query)
+ )
+
+ logger.info(f"Batch size: {batch_length}")
+
+ # use helper to log latency with the right metric names
+ metrics_logger.log_inferencing_latencies(
+ time_inferencing_per_query, # only one big batch
+ batch_length=batch_length,
+ factor_to_usecs=1000000.0, # values are in seconds
+ )
+
+ if args.output:
+ np.savetxt(
+ args.output,
+ predictions_array,
+ fmt="%f",
+ delimiter=",",
+ newline="\n",
+ header="",
+ footer="",
+ comments="# ",
+ encoding=None,
+ )
+
+
+def get_arg_parser(parser=None):
+ """To ensure compatibility with shrike unit tests"""
+ return LightGBMONNXRTInferecingScript.get_arg_parser(parser)
+
+
+def main(cli_args=None):
+ """To ensure compatibility with shrike unit tests"""
+ LightGBMONNXRTInferecingScript.main(cli_args)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/src/scripts/inferencing/lightgbm_ort/spec.additional_includes b/src/scripts/inferencing/lightgbm_ort/spec.additional_includes
new file mode 100644
index 00000000..13e7552d
--- /dev/null
+++ b/src/scripts/inferencing/lightgbm_ort/spec.additional_includes
@@ -0,0 +1,2 @@
+../../../common/
+../../../../docker/
diff --git a/src/scripts/inferencing/lightgbm_ort/spec.yaml b/src/scripts/inferencing/lightgbm_ort/spec.yaml
new file mode 100644
index 00000000..d4288abd
--- /dev/null
+++ b/src/scripts/inferencing/lightgbm_ort/spec.yaml
@@ -0,0 +1,65 @@
+$schema: http://azureml/sdk-2-0/CommandComponent.json
+name: lightgbm_ort_score
+version: 0.0.7
+display_name: "LightGBM Inferencing (ONNX RT)"
+type: CommandComponent
+description: "LightGBM inferencing using the ONNX Runtime."
+is_deterministic: false
+inputs:
+ data:
+ type: AnyDirectory
+ description: directory to the inference data
+ optional: false
+ model:
+ type: AnyDirectory
+ description: directory to the model
+ optional: false
+ predict_disable_shape_check:
+ type: Boolean
+ description: "control whether or not LightGBM raises an error when you try to predict on data with a different number of features than the training data"
+ default: False
+ n_threads:
+ type: Integer
+ optional: true
+ run_parallel:
+ type: Boolean
+ optional: true
+ run_batch:
+ type: Boolean
+ optional: true
+ verbose:
+ type: Boolean
+ default: False
+ custom_properties:
+ type: String
+ description: additional custom tags for the job
+ optional: true
+
+outputs:
+ predictions:
+ type: AnyDirectory
+
+command: >-
+ apt update -y &&
+ apt install numactl -y &&
+ numactl -m 0 -N 0 --
+ python score.py
+ --data {inputs.data}
+ --model {inputs.model}
+ --output {outputs.predictions}
+ [--num_threads {inputs.n_threads}]
+ [--run_parallel {inputs.run_parallel}]
+ [--run_batch {inputs.run_batch}]
+ --predict_disable_shape_check {inputs.predict_disable_shape_check}
+ --verbose {inputs.verbose}
+ [--custom_properties "{inputs.custom_properties}"]
+ --cluster_auto_setup True
+
+environment:
+ docker:
+ build:
+ # file path is resolved after additional includes
+ dockerfile: file:./default.dockerfile
+ conda:
+ userManagedDependencies: true
+ os: Linux
diff --git a/src/scripts/inferencing/lightgbm_python/default.dockerfile b/src/scripts/inferencing/lightgbm_python/default.dockerfile
index 419a5444..6ceda711 100644
--- a/src/scripts/inferencing/lightgbm_python/default.dockerfile
+++ b/src/scripts/inferencing/lightgbm_python/default.dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210615.v1
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1
LABEL lightgbmbenchmark.linux.cpu.mpi.pip.version="3.3.0/20211210.1"
# Those arguments will NOT be used by AzureML
@@ -10,7 +10,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
- python=3.8 pip=20.2.4
+ python=3.8 pip=22.2.2
# Prepend path to AzureML conda environment
ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/src/scripts/inferencing/lightgbm_python/score.py b/src/scripts/inferencing/lightgbm_python/score.py
index 94505f5c..257e6603 100644
--- a/src/scripts/inferencing/lightgbm_python/score.py
+++ b/src/scripts/inferencing/lightgbm_python/score.py
@@ -8,7 +8,7 @@
import sys
import argparse
import logging
-import time
+import timeit
import numpy as np
from distutils.util import strtobool
import lightgbm
@@ -105,13 +105,21 @@ def run(self, args, logger, metrics_logger, unknown_args):
)
logger.info(f"Running .predict()")
- batch_start_time = time.monotonic()
predictions_array = booster.predict(
data=inference_raw_data,
num_threads=args.num_threads,
- predict_disable_shape_check=bool(args.predict_disable_shape_check)
+ predict_disable_shape_check=bool(args.predict_disable_shape_check),
)
- prediction_time = (time.monotonic() - batch_start_time)
+ timeit_loops = 10
+ prediction_time = timeit.timeit(
+ lambda: booster.predict(
+ data=inference_raw_data,
+ num_threads=args.num_threads,
+ predict_disable_shape_check=bool(args.predict_disable_shape_check),
+ ),
+ number=timeit_loops,
+ )
+ prediction_time /= timeit_loops
metrics_logger.log_metric("time_inferencing", prediction_time)
# use helper to log latency with the right metric names
diff --git a/src/scripts/inferencing/lightgbm_python/spec.yaml b/src/scripts/inferencing/lightgbm_python/spec.yaml
index 7fcbebca..05c30f24 100644
--- a/src/scripts/inferencing/lightgbm_python/spec.yaml
+++ b/src/scripts/inferencing/lightgbm_python/spec.yaml
@@ -1,6 +1,6 @@
$schema: http://azureml/sdk-2-0/CommandComponent.json
name: lightgbm_python_score
-version: 1.0.1
+version: 1.0.2
display_name: "LightGBM Inferencing (Python API)"
type: CommandComponent
description: "LightGBM inferencing using the Python API."
diff --git a/src/scripts/inferencing/lightgbm_ray/default.dockerfile b/src/scripts/inferencing/lightgbm_ray/default.dockerfile
index 67a87a2b..10b639dd 100644
--- a/src/scripts/inferencing/lightgbm_ray/default.dockerfile
+++ b/src/scripts/inferencing/lightgbm_ray/default.dockerfile
@@ -5,15 +5,15 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
- python=3.8 pip=20.2.4
+ python=3.8 pip=22.2.2
# Prepend path to AzureML conda environment
ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
# Install pip dependencies
RUN HOROVOD_WITH_TENSORFLOW=1 \
- pip install 'pandas>=1.1,<1.2' \
- 'numpy>=1.10,<1.20' \
+ pip install 'pandas==1.5.2' \
+ 'numpy==1.23.5' \
'matplotlib==3.4.3' \
'scipy~=1.5.0' \
'scikit-learn~=0.24.1' \
@@ -22,5 +22,6 @@ RUN HOROVOD_WITH_TENSORFLOW=1 \
'azureml-mlflow==1.35.0' \
'azureml-telemetry==1.35.0' \
'mpi4py==3.1.1' \
- 'ray==1.9.2' \
- 'lightgbm-ray==0.1.2'
+ 'protobuf==3.20.3' \
+ 'ray==2.1.0' \
+ 'lightgbm-ray==0.1.8'
diff --git a/src/scripts/inferencing/lightgbm_ray/score.py b/src/scripts/inferencing/lightgbm_ray/score.py
index 270efc5d..105725ef 100644
--- a/src/scripts/inferencing/lightgbm_ray/score.py
+++ b/src/scripts/inferencing/lightgbm_ray/score.py
@@ -8,7 +8,7 @@
import sys
import argparse
import logging
-import time
+import timeit
import numpy as np
from distutils.util import strtobool
@@ -99,13 +99,25 @@ def run(self, args, logger, metrics_logger, unknown_args):
)
logger.info(f"Running .predict()")
- batch_start_time = time.monotonic()
+
predictions_array = lightgbm_ray.predict(
booster,
inference_data,
- ray_params=lightgbm_ray.RayParams(num_actors=args.num_threads)
+ ray_params=lightgbm_ray.RayParams(num_actors=args.num_threads),
+ predict_disable_shape_check=bool(args.predict_disable_shape_check),
)
- prediction_time = (time.monotonic() - batch_start_time)
+
+ timeit_loops = 10
+ prediction_time = timeit.timeit(
+ lambda: lightgbm_ray.predict(
+ booster,
+ inference_data,
+ ray_params=lightgbm_ray.RayParams(num_actors=args.num_threads),
+ predict_disable_shape_check=bool(args.predict_disable_shape_check),
+ ),
+ number=timeit_loops,
+ )
+ prediction_time /= timeit_loops
metrics_logger.log_metric("time_inferencing", prediction_time)
# use helper to log latency with the right metric names
diff --git a/src/scripts/inferencing/lightgbm_ray/spec.yaml b/src/scripts/inferencing/lightgbm_ray/spec.yaml
index d94c7ce3..482632e5 100644
--- a/src/scripts/inferencing/lightgbm_ray/spec.yaml
+++ b/src/scripts/inferencing/lightgbm_ray/spec.yaml
@@ -1,6 +1,6 @@
$schema: http://azureml/sdk-2-0/CommandComponent.json
name: lightgbm_ray_score
-version: 1.0.1
+version: 1.0.2
display_name: "LightGBM Inferencing (Ray)"
type: CommandComponent
description: "LightGBM inferencing using the Ray Python API."
diff --git a/src/scripts/inferencing/treelite_python/conda_env.yaml b/src/scripts/inferencing/treelite_python/conda_env.yaml
index b31a7368..3d04b774 100644
--- a/src/scripts/inferencing/treelite_python/conda_env.yaml
+++ b/src/scripts/inferencing/treelite_python/conda_env.yaml
@@ -3,7 +3,7 @@ channels:
- defaults
dependencies:
- python=3.8
-- pip=20.0
+- pip=22.2.2
- pip:
- azureml-defaults==1.35.0
- azureml-mlflow==1.35.0
diff --git a/src/scripts/model_transformation/treelite_compile/conda_env.yaml b/src/scripts/model_transformation/treelite_compile/conda_env.yaml
index b31a7368..3d04b774 100644
--- a/src/scripts/model_transformation/treelite_compile/conda_env.yaml
+++ b/src/scripts/model_transformation/treelite_compile/conda_env.yaml
@@ -3,7 +3,7 @@ channels:
- defaults
dependencies:
- python=3.8
-- pip=20.0
+- pip=22.2.2
- pip:
- azureml-defaults==1.35.0
- azureml-mlflow==1.35.0
diff --git a/src/scripts/sample/conda_env.yaml b/src/scripts/sample/conda_env.yaml
index 0201788d..be0745eb 100644
--- a/src/scripts/sample/conda_env.yaml
+++ b/src/scripts/sample/conda_env.yaml
@@ -3,7 +3,7 @@ channels:
- defaults
dependencies:
- python=3.8
-- pip=20.0
+- pip=22.2.2
- pip:
- azureml-defaults==1.35.0
- azureml-mlflow==1.35.0
diff --git a/src/scripts/training/lightgbm_python/default.dockerfile b/src/scripts/training/lightgbm_python/default.dockerfile
index 6848faf6..8c12a696 100644
--- a/src/scripts/training/lightgbm_python/default.dockerfile
+++ b/src/scripts/training/lightgbm_python/default.dockerfile
@@ -31,7 +31,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
- python=3.8 pip=20.2.4
+ python=3.8 pip=22.2.2
# Prepend path to AzureML conda environment
ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/src/scripts/training/lightgbm_ray/default.dockerfile b/src/scripts/training/lightgbm_ray/default.dockerfile
index 67a87a2b..0f0db1aa 100644
--- a/src/scripts/training/lightgbm_ray/default.dockerfile
+++ b/src/scripts/training/lightgbm_ray/default.dockerfile
@@ -5,7 +5,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
- python=3.8 pip=20.2.4
+ python=3.8 pip=22.2.2
# Prepend path to AzureML conda environment
ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/src/scripts/training/ray_tune/default.dockerfile b/src/scripts/training/ray_tune/default.dockerfile
index fcf659e6..350d9fd6 100644
--- a/src/scripts/training/ray_tune/default.dockerfile
+++ b/src/scripts/training/ray_tune/default.dockerfile
@@ -7,7 +7,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
- python=3.8 pip=20.2.4
+ python=3.8 pip=22.2.2
# Prepend path to AzureML conda environment
ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/src/scripts/training/ray_tune_distributed/default.dockerfile b/src/scripts/training/ray_tune_distributed/default.dockerfile
index 5bb04a04..acba0893 100644
--- a/src/scripts/training/ray_tune_distributed/default.dockerfile
+++ b/src/scripts/training/ray_tune_distributed/default.dockerfile
@@ -7,7 +7,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
- python=3.8 pip=20.2.4
+ python=3.8 pip=22.2.2
# Prepend path to AzureML conda environment
ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH