diff --git a/pipelines/azureml/conf/experiments/benchmarks/lightgbm-inferencing.yaml b/pipelines/azureml/conf/experiments/benchmarks/lightgbm-inferencing.yaml
index 664cf987..a0e01c0a 100644
--- a/pipelines/azureml/conf/experiments/benchmarks/lightgbm-inferencing.yaml
+++ b/pipelines/azureml/conf/experiments/benchmarks/lightgbm-inferencing.yaml
@@ -31,36 +31,48 @@ lightgbm_inferencing:
   benchmark_name: "benchmark-inferencing" # need to be provided at runtime!
 
   tasks:
-    - dataset: "data-synthetic-regression-10cols-10000samples-inference"
-      model:  "model-synthetic-regression-10cols-10trees-31leaves"
-    - dataset: "data-synthetic-regression-10cols-10000samples-inference"
-      model:  "model-synthetic-regression-10cols-100trees-31leaves"
-    - dataset: "data-synthetic-regression-10cols-10000samples-inference"
-      model:  "model-synthetic-regression-10cols-1000trees-31leaves"
-    - dataset: "data-synthetic-regression-10cols-10000samples-inference"
-      model:  "model-synthetic-regression-10cols-5000trees-31leaves"
-    - dataset: "data-synthetic-regression-100cols-10000samples-inference"
-      model:  "model-synthetic-regression-100cols-10trees-31leaves"
-    - dataset: "data-synthetic-regression-100cols-10000samples-inference"
-      model:  "model-synthetic-regression-100cols-100trees-31leaves"
-    - dataset: "data-synthetic-regression-100cols-10000samples-inference"
-      model:  "model-synthetic-regression-100cols-1000trees-31leaves"
-    - dataset: "data-synthetic-regression-100cols-10000samples-inference"
-      model:  "model-synthetic-regression-100cols-5000trees-31leaves"
-    - dataset: "data-synthetic-regression-1000cols-10000samples-inference"
-      model:  "model-synthetic-regression-1000cols-10trees-31leaves"
-    - dataset: "data-synthetic-regression-1000cols-10000samples-inference"
-      model:  "model-synthetic-regression-1000cols-100trees-31leaves"
-    - dataset: "data-synthetic-regression-1000cols-10000samples-inference"
-      model:  "model-synthetic-regression-1000cols-1000trees-31leaves"
-    - dataset: "data-synthetic-regression-1000cols-10000samples-inference"
-      model:  "model-synthetic-regression-1000cols-5000trees-31leaves"
+    - inferencing_dataset: "data-synthetic-regression-10cols-10000samples-inference"
+      model_dataset:  "model-synthetic-regression-10cols-10trees-31leaves"
+    - inferencing_dataset: "data-synthetic-regression-10cols-10000samples-inference"
+      model_dataset:  "model-synthetic-regression-10cols-100trees-31leaves"
+    - inferencing_dataset: "data-synthetic-regression-10cols-10000samples-inference"
+      model_dataset:  "model-synthetic-regression-10cols-1000trees-31leaves"
+    - inferencing_dataset: "data-synthetic-regression-10cols-10000samples-inference"
+      model_dataset:  "model-synthetic-regression-10cols-5000trees-31leaves"
+    - inferencing_dataset: "data-synthetic-regression-100cols-10000samples-inference"
+      model_dataset:  "model-synthetic-regression-100cols-10trees-31leaves"
+    - inferencing_dataset: "data-synthetic-regression-100cols-10000samples-inference"
+      model_dataset:  "model-synthetic-regression-100cols-100trees-31leaves"
+    - inferencing_dataset: "data-synthetic-regression-100cols-10000samples-inference"
+      model_dataset:  "model-synthetic-regression-100cols-1000trees-31leaves"
+    - inferencing_dataset: "data-synthetic-regression-100cols-10000samples-inference"
+      model_dataset:  "model-synthetic-regression-100cols-5000trees-31leaves"
+    - inferencing_dataset: "data-synthetic-regression-1000cols-10000samples-inference"
+      model_dataset:  "model-synthetic-regression-1000cols-10trees-31leaves"
+    - inferencing_dataset: "data-synthetic-regression-1000cols-10000samples-inference"
+      model_dataset:  "model-synthetic-regression-1000cols-100trees-31leaves"
+    - inferencing_dataset: "data-synthetic-regression-1000cols-10000samples-inference"
+      model_dataset:  "model-synthetic-regression-1000cols-1000trees-31leaves"
+    - inferencing_dataset: "data-synthetic-regression-1000cols-10000samples-inference"
+      model_dataset:  "model-synthetic-regression-1000cols-5000trees-31leaves"
 
   variants:
     - framework: lightgbm_python
       build: dockers/lightgbm_cpu_mpi_pip.dockerfile
+      batch_size: 1
+      data_loader: "numpy"
+      n_threads: 1
     - framework: lightgbm_python
       build: dockers/lightgbm_cpu_mpi_build.dockerfile
+      batch_size: 1
+      data_loader: "numpy"
+      n_threads: 1
     - framework: lightgbm_python
       build: dockers/lightgbm_cpu_mpi_custom.dockerfile
+      batch_size: 1
+      data_loader: "numpy"
+      n_threads: 1
     - framework: treelite_python
+      batch_size: 1
+      data_loader: "numpy"
+      n_threads: 1
diff --git a/pipelines/azureml/conf/experiments/lightgbm-inferencing.yaml b/pipelines/azureml/conf/experiments/lightgbm-inferencing.yaml
index 8bd1c27f..18d9a937 100644
--- a/pipelines/azureml/conf/experiments/lightgbm-inferencing.yaml
+++ b/pipelines/azureml/conf/experiments/lightgbm-inferencing.yaml
@@ -35,8 +35,8 @@ lightgbm_inferencing:
 
   # list all the data/model pairs to run inferencing with
   tasks:
-    - dataset: "data-synthetic-regression-100cols-10000samples-inference"
-      model:  "model-synthetic-regression-100cols-10trees-31leaves"
+    - inferencing_dataset: "data-synthetic-regression-100cols-10000samples-inference"
+      inferencing_model:  "model-synthetic-regression-100cols-10trees-31leaves"
 
   # list all inferencing frameworks and their builds
   variants:
diff --git a/pipelines/azureml/pipelines/lightgbm_inferencing.py b/pipelines/azureml/pipelines/lightgbm_inferencing.py
index 1823d675..8c68f3eb 100644
--- a/pipelines/azureml/pipelines/lightgbm_inferencing.py
+++ b/pipelines/azureml/pipelines/lightgbm_inferencing.py
@@ -13,7 +13,7 @@
 import sys
 import json
 from dataclasses import dataclass
-from omegaconf import MISSING
+from omegaconf import MISSING, OmegaConf
 from typing import Optional, List
 from azure.ml.component import dsl
 from shrike.pipeline.pipeline_helper import AMLPipelineHelper
@@ -27,6 +27,7 @@
     sys.path.append(str(LIGHTGBM_BENCHMARK_ROOT))
 
 from common.tasks import inferencing_task, inferencing_variants
+from common.aml import dataset_from_dstore_path
 
 class LightGBMInferencing(AMLPipelineHelper):
     """Runnable/reusable pipeline helper class
@@ -127,6 +128,9 @@ def lightgbm_inferencing_pipeline_function(benchmark_custom_properties, data, mo
                     inferencing_step = treelite_score_module(
                         data = data,
                         compiled_model = treelite_compile_step.outputs.compiled_model,
+                        data_loader = variant.data_loader,
+                        batch_size = variant.batch_size,
+                        n_threads = variant.n_threads,
                         verbose = False,
                         custom_properties = custom_properties
                     )
@@ -138,10 +142,13 @@ def lightgbm_inferencing_pipeline_function(benchmark_custom_properties, data, mo
                         data = data,
                         model = model,
                         predict_disable_shape_check = predict_disable_shape_check,
+                        data_loader = variant.data_loader,
+                        batch_size = variant.batch_size,
+                        n_threads = variant.n_threads,
                         verbose = False,
                         custom_properties = custom_properties
                     )
-                    self.apply_smart_runsettings(inferencing_step)
+                    self.apply_smart_runsettings(inferencing_step, windows=(variant.os == "Windows"))
 
                 else:
                     raise NotImplementedError(f"framework {variant.framework} not implemented (yet)")
@@ -179,23 +186,43 @@ def pipeline_instance(self, pipeline_function, config):
         """
         # Here you should create an instance of a pipeline function (using your custom config dataclass)
         @dsl.pipeline(name="inferencing_all_tasks", # pythonic name
-                      description="Inferencing on all specified tasks",
+                      description=("```yaml\n"+OmegaConf.to_yaml(config)+"```"),
                       default_datastore=config.compute.noncompliant_datastore)
         def inferencing_all_tasks():
             for inferencing_task in config.lightgbm_inferencing.tasks:
-                data = self.dataset_load(inferencing_task.dataset)
-                model = self.dataset_load(inferencing_task.model)
+
+                # load the given inferencing dataset
+                if inferencing_task.inferencing_dataset:
+                    inferencing_data = self.dataset_load(
+                        name = inferencing_task.inferencing_dataset,
+                        version = inferencing_task.inferencing_dataset_version # use latest if None
+                    )
+                elif inferencing_task.inferencing_datastore and inferencing_task.inferencing_datastore_path:
+                    inferencing_data = dataset_from_dstore_path(self.workspace(), inferencing_task.inferencing_datastore, inferencing_task.inferencing_datastore_path, validate=inferencing_task.inferencing_datastore_path_validate)
+                else:
+                    raise ValueError(f"In inferencing_task {inferencing_task}, you need to provide either inferencing_dataset or inferencing_datastore+inferencing_datastore_path")
+
+                # load the given inferencing model (from a dataset)
+                if inferencing_task.model_dataset:
+                    model_data = self.dataset_load(
+                        name = inferencing_task.model_dataset,
+                        version = inferencing_task.model_dataset_version # use latest if None
+                    )
+                elif inferencing_task.model_datastore and inferencing_task.model_datastore_path:
+                    model_data = dataset_from_dstore_path(self.workspace(), inferencing_task.model_datastore, inferencing_task.model_datastore_path, validate=inferencing_task.model_datastore_path_validate)
+                else:
+                    raise ValueError(f"In inferencing_task {inferencing_task}, you need to provide either model_dataset or model_datastore+model_datastore_path")
 
                 # create custom properties for this task
                 benchmark_custom_properties = {
                     'benchmark_name' : config.lightgbm_inferencing.benchmark_name, 
-                    'benchmark_dataset' : inferencing_task.dataset,
-                    'benchmark_model' : inferencing_task.model,
+                    'benchmark_dataset' : inferencing_task.inferencing_dataset,
+                    'benchmark_model' : inferencing_task.model_dataset,
                 }
 
                 inferencing_task_subgraph_step = pipeline_function(
-                    data=data,
-                    model=model,
+                    data=inferencing_data,
+                    model=model_data,
                     predict_disable_shape_check=inferencing_task.predict_disable_shape_check or False,
                     benchmark_custom_properties=benchmark_custom_properties
                 )
diff --git a/requirements.txt b/requirements.txt
index f434d05c..e0ef678e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,10 +2,11 @@ lightgbm==3.2.1
 pytest==6.2.4
 pytest-cov==2.12.1
 pytest-mock==3.6.1
-mlflow==1.19.0
-shrike[pipeline]==1.11.1
+mlflow==1.20.2
+shrike[pipeline]==1.11.5
 hydra-core==1.0.7
 omegaconf==2.0.6
 treelite==1.3.0
 treelite_runtime==1.3.0
 mpi4py==3.1.1
+scikit-learn~=0.24.1
\ No newline at end of file
diff --git a/src/common/io.py b/src/common/io.py
index 8512a824..a3201432 100644
--- a/src/common/io.py
+++ b/src/common/io.py
@@ -1,6 +1,7 @@
 import os
 import argparse
 import logging
+import numpy as np
 
 def input_file_path(path):
     """ Resolve input path from AzureML.
@@ -156,3 +157,53 @@ def run(self, input_path, output_path):
             self.split_by_append(input_files, output_path, self.number)
         else:
             raise NotImplementedError(f"Mode {self.mode} not implemented.")
+
+
+class DataBatch():
+    # taken from https://datascience.stackexchange.com/questions/47623/how-feed-a-numpy-array-in-batches-in-keras
+    def __init__(self, x, y=None, batch_size=0):
+        self.x = x
+        self.y = y
+        if batch_size == 0:
+            self.batch_size = x.shape[0]
+            self.num_batches = 1
+        else:
+            self.batch_size = batch_size
+            self.num_batches = np.ceil(x.shape[0] / batch_size)
+        
+        self.batch_idx = np.array_split(range(x.shape[0]), self.num_batches)
+        logging.getLogger(__name__).info(f"Creating data batch with {self.num_batches} batches")
+    
+    def __len__(self):
+        return len(self.batch_idx)
+    
+    def __getitem__(self, idx):
+        return self.x[self.batch_idx[idx]], (self.y[self.batch_idx[idx]] if self.y is not None else None)
+
+
+def numpy_data_load(path, delimiter=","):
+    """Loads data using numpy (csv).
+    
+    Args:
+        path (str): path to data file
+    Returns:
+        numpy_array, number_of_rows (int), number of cols (int)
+    """
+    raw_data = np.loadtxt(path, delimiter=delimiter)
+
+    return raw_data, raw_data.shape[0], raw_data.shape[1]
+
+def libsvm_data_load(path):
+    """Loads data using libsvm.
+    
+    Args:
+        path (str): path to data file
+    Returns:
+        (y, x), number_of_rows (int), number of cols (int)
+    """
+    # importing at last minute intentionally
+    from sklearn.datasets import load_svmlight_file
+    
+    x, y = load_svmlight_file(path)
+
+    return (x,y), x.shape[0], x.shape[1]
diff --git a/src/common/tasks.py b/src/common/tasks.py
index c5fd4417..0237933b 100644
--- a/src/common/tasks.py
+++ b/src/common/tasks.py
@@ -4,9 +4,26 @@
 
 @dataclass
 class inferencing_task:
-    dataset: str = MISSING
-    model: str = MISSING
+    # specify either by dataset name
+    inferencing_dataset: Optional[str] = None
+    inferencing_dataset_version: Optional[str] = None
+    # or by datastore+path
+    inferencing_datastore: Optional[str] = None
+    inferencing_datastore_path: Optional[str] = None
+    inferencing_datastore_path_validate: bool = True
+
+    # specify either by model dataset name
+    model_dataset: Optional[str] = None
+    model_dataset_version: Optional[str] = None
+    # or by datastore+path
+    model_datastore: Optional[str] = None
+    model_datastore_path: Optional[str] = None
+    model_datastore_path_validate: bool = True
+
+    # task tag
     task_key: Optional[str] = None
+
+    # turn to True is model and dataset have different shapes
     predict_disable_shape_check: bool = False
 
 @dataclass
@@ -15,6 +32,10 @@ class inferencing_variants:
     build: Optional[str] = None
     os: str = "Linux" # linux or windows, linux by default
 
+    data_loader: str = "lightgbm"
+    batch_size: int = 0 # all data in 1 batch by default
+    n_threads: int = 1
+
 @dataclass
 class data_generation_task:
     task: str = MISSING
diff --git a/src/scripts/lightgbm_python/dockers/lightgbm_cpu_pip_win.dockerfile b/src/scripts/lightgbm_python/dockers/lightgbm_cpu_pip_win.dockerfile
new file mode 100644
index 00000000..15775598
--- /dev/null
+++ b/src/scripts/lightgbm_python/dockers/lightgbm_cpu_pip_win.dockerfile
@@ -0,0 +1,29 @@
+FROM mcr.microsoft.com/azureml/windows-servercore-1809
+
+ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
+
+# Create conda environment
+RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
+    python=3.8 pip=20.2.4
+
+# Prepend path to AzureML conda environment
+ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
+
+# Install pip dependencies
+RUN HOROVOD_WITH_TENSORFLOW=1 \
+    pip install 'pandas>=1.1,<1.2' \
+                'numpy>=1.10,<1.20' \
+                'scipy~=1.5.0' \
+                'scikit-learn~=0.24.1' \
+                'azureml-core==1.30.0' \
+                'azureml-defaults==1.30.0' \
+                'azureml-mlflow==1.30.0' \
+                'azureml-telemetry==1.30.0'
+
+# install lightgbm with mpi
+RUN pip install --upgrade pip setuptools wheel && \
+    pip install 'cmake==3.21.0' && \
+    pip install 'lightgbm==3.2.1'
+
+# This is needed for mpi to locate libpython
+ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH
diff --git a/src/scripts/lightgbm_python/score.py b/src/scripts/lightgbm_python/score.py
index 9728a517..db5bd053 100644
--- a/src/scripts/lightgbm_python/score.py
+++ b/src/scripts/lightgbm_python/score.py
@@ -11,6 +11,7 @@
 from distutils.util import strtobool
 import lightgbm
 import numpy
+import time
 
 # Add the right path to PYTHONPATH
 # so that you can import from common.*
@@ -22,8 +23,7 @@
 
 # useful imports from common
 from common.metrics import MetricsLogger
-from common.io import input_file_path
-
+from common.io import input_file_path, DataBatch, libsvm_data_load, numpy_data_load
 
 def get_arg_parser(parser=None):
     """Adds component/module arguments to a given argument parser.
@@ -44,12 +44,16 @@ def get_arg_parser(parser=None):
     group_i = parser.add_argument_group("Input Data")
     group_i.add_argument("--data",
         required=True, type=input_file_path, help="Inferencing data location (file path)")
+    group_i.add_argument(f"--data_loader",
+        required=False, type=str, default="lightgbm", choices=["lightgbm", "libsvm", "numpy"], help="use numpy for csv, libsvm for libsvm, or lightgbm for both")        
     group_i.add_argument("--model",
         required=False, type=input_file_path, help="Exported model location (file path)")
     group_i.add_argument("--output",
         required=False, default=None, type=str, help="Inferencing output location (file path)")
     
     group_params = parser.add_argument_group("Scoring parameters")
+    group_params.add_argument(f"--batch_size",
+        required=False, type=int, default=0, help="size of batches (default: all data in 1 batch")
     group_params.add_argument("--num_threads",
         required=False, default=1, type=int, help="number of threads")
     group_params.add_argument("--predict_disable_shape_check",
@@ -102,7 +106,9 @@ def run(args, unknown_args=[]):
 
     # record relevant parameters
     metrics_logger.log_parameters(
-        num_threads=args.num_threads
+        batch_size=args.batch_size,
+        data_loader=args.data_loader,
+        num_threads=args.num_threads,
     )
 
     # register logger for lightgbm logs
@@ -113,28 +119,59 @@ def run(args, unknown_args=[]):
         os.makedirs(args.output, exist_ok=True)
         args.output = os.path.join(args.output, "predictions.txt")
 
-    logger.info(f"Loading model from {args.model}")
-    booster = lightgbm.Booster(model_file=args.model)
+    if args.batch_size > 0 and args.data_loader == "lightgbm":
+        logger.warning("--data_loader lightgbm does not support --batch_size > 0 (currently)")
+        args.batch_size = 0
 
     logger.info(f"Loading data for inferencing")
     with metrics_logger.log_time_block("time_data_loading"):
-        # NOTE: this is bad, but allows for libsvm format (not just numpy)
-        inference_data = lightgbm.Dataset(args.data, free_raw_data=False).construct()
-        inference_raw_data = inference_data.get_data()
+        if args.data_loader == "lightgbm":
+            inference_data = lightgbm.Dataset(args.data, free_raw_data=False).construct()
+            inference_raw_data = inference_data.get_data()
+            row_count = inference_data.num_data()
+            feature_count = inference_data.num_feature()
+        elif args.data_loader == "libsvm":
+            inference_data, row_count, feature_count = libsvm_data_load(args.data)
+            inference_raw_data = inference_data[0] # (x,y) -> x
+        elif args.data_loader == "numpy":
+            inference_data, row_count, feature_count = numpy_data_load(args.data)
+        else:
+            raise NotImplementedError(f"--data_loader {args.data_loader} is not implemented.")
+
+    logger.info(f"Loading model from {args.model}")
+    booster = lightgbm.Booster(model_file=args.model)
 
     # capture data shape as property
     metrics_logger.set_properties(
-        inference_data_length = inference_data.num_data(),
-        inference_data_width = inference_data.num_feature()
+        inference_data_length = row_count,
+        inference_data_width = feature_count
     )
 
     logger.info(f"Running .predict()")
+    batch_run_times = [] # collect time for each batch
     with metrics_logger.log_time_block("time_inferencing"):
-        booster.predict(
-            data=inference_raw_data,
-            num_threads=args.num_threads,
-            predict_disable_shape_check=bool(args.predict_disable_shape_check)
-        )
+        if args.batch_size > 0:
+            inference_batches = DataBatch(x=inference_raw_data, y=None, batch_size=args.batch_size)
+            for data_batch, _ in inference_batches:
+                batch_start_time = time.time()
+                booster.predict(
+                    data=data_batch,
+                    num_threads=args.num_threads,
+                    predict_disable_shape_check=bool(args.predict_disable_shape_check)
+                )
+                batch_run_times.append(time.time() - batch_start_time)
+        else:
+            booster.predict(
+                data=inference_raw_data,
+                num_threads=args.num_threads,
+                predict_disable_shape_check=bool(args.predict_disable_shape_check)
+            )
+
+    if len(batch_run_times) > 1:
+        batch_run_times = numpy.array(batch_run_times)
+        metrics_logger.log_metric("batch_time_inferencing_p50_usecs", numpy.percentile(batch_run_times, 50) * 1000000)
+        metrics_logger.log_metric("batch_time_inferencing_p90_usecs", numpy.percentile(batch_run_times, 90) * 1000000)
+        metrics_logger.log_metric("batch_time_inferencing_p99_usecs", numpy.percentile(batch_run_times, 99) * 1000000)
 
     # Important: close logging session before exiting
     metrics_logger.close()
diff --git a/src/scripts/lightgbm_python/score_spec.yaml b/src/scripts/lightgbm_python/score_spec.yaml
index 8348224e..5a4e73a0 100644
--- a/src/scripts/lightgbm_python/score_spec.yaml
+++ b/src/scripts/lightgbm_python/score_spec.yaml
@@ -18,6 +18,15 @@ inputs:
     type: Boolean
     description: "control whether or not LightGBM raises an error when you try to predict on data with a different number of features than the training data"
     optional: true
+  data_loader:
+    type: Enum
+    enum:
+      - lightgbm
+      - libsvm
+      - numpy
+  batch_size:
+    type: Integer
+    default: 0
   n_threads:
     type: Integer
     optional: true
@@ -33,6 +42,8 @@ command: >-
   python score.py
   --data {inputs.data}
   --model {inputs.model}
+  --data_loader {inputs.data_loader}
+  --batch_size {inputs.batch_size}
   [--num_threads {inputs.n_threads}]
   [--predict_disable_shape_check {inputs.predict_disable_shape_check}]
   [--verbose {inputs.verbose}]
diff --git a/src/scripts/treelite_python/conda_env.yaml b/src/scripts/treelite_python/conda_env.yaml
index 10eb17c2..c2cc4254 100644
--- a/src/scripts/treelite_python/conda_env.yaml
+++ b/src/scripts/treelite_python/conda_env.yaml
@@ -9,3 +9,4 @@ dependencies:
   - azureml-mlflow==1.30.0
   - treelite==1.3.0
   - treelite_runtime==1.3.0
+  - scikit-learn~=0.24.1
diff --git a/src/scripts/treelite_python/score.py b/src/scripts/treelite_python/score.py
index 9d5e69f2..520cb007 100644
--- a/src/scripts/treelite_python/score.py
+++ b/src/scripts/treelite_python/score.py
@@ -8,6 +8,7 @@
 import sys
 import argparse
 import logging
+import time
 import numpy
 from distutils.util import strtobool
 import pandas as pd
@@ -23,7 +24,7 @@
 
 # useful imports from common
 from common.metrics import MetricsLogger
-from common.io import input_file_path
+from common.io import input_file_path, DataBatch, libsvm_data_load, numpy_data_load
 
 
 def get_arg_parser(parser=None):
@@ -45,12 +46,16 @@ def get_arg_parser(parser=None):
     group_i = parser.add_argument_group("Input Data")
     group_i.add_argument("--data",
         required=True, type=input_file_path, help="Inferencing data location (file path)")
+    group_i.add_argument(f"--data_loader",
+        required=False, type=str, default="libsvm", choices=["libsvm", "numpy"], help="use numpy for csv, libsvm for libsvm, or lightgbm for both")        
     group_i.add_argument("--so_path",
         required=False, default = "./mymodel.so" , help="full path to model so")
     group_i.add_argument("--output",
         required=False, default=None, type=str, help="Inferencing output location (file path)")
     
     group_params = parser.add_argument_group("Scoring parameters")
+    group_params.add_argument(f"--batch_size",
+        required=False, type=int, default=0, help="size of batches (default: all data in 1 batch")
     group_params.add_argument("--nthreads",
         required=False, default=1, type=int, help="number of threads")
     
@@ -116,18 +121,46 @@ def run(args, unknown_args=[]):
 
     logger.info(f"Loading data for inferencing")
     with metrics_logger.log_time_block("time_data_loading"):
-        my_data = pd.read_csv(args.data).to_numpy()
-        
-        predictor = treelite_runtime.Predictor(
-            args.so_path,
-            verbose=True,
-            nthread=args.nthreads
-        )
-        dmat = treelite_runtime.DMatrix(my_data)
+        if args.data_loader == "libsvm":
+            inference_data, row_count, feature_count = libsvm_data_load(args.data)
+            inference_raw_data = inference_data[0] # (x,y) -> x
+        elif args.data_loader == "numpy":
+            inference_raw_data, row_count, feature_count = numpy_data_load(args.data)
+        else:
+            raise NotImplementedError(f"--data_loader {args.data_loader} is not implemented.")
+
+        inference_data_raw = treelite_runtime.DMatrix(inference_raw_data)
+
+    logger.info(f"Loading model from {args.model}")
+    predictor = treelite_runtime.Predictor(
+        args.so_path,
+        verbose=True,
+        nthread=args.nthreads
+    )
+
+    # capture data shape as property
+    metrics_logger.set_properties(
+        inference_data_length = row_count,
+        inference_data_width = feature_count
+    )
 
     logger.info(f"Running .predict()")
+    batch_run_times = [] # collect time for each batch
     with metrics_logger.log_time_block("time_inferencing"):
-        predictor.predict(dmat)
+        if args.batch_size > 0:
+            inference_batches = DataBatch(x=inference_raw_data, y=None, batch_size=args.batch_size)
+            for data_batch, _ in inference_batches:
+                batch_start_time = time.time()
+                predictor.predict(data_batch)
+                batch_run_times.append(time.time() - batch_start_time)
+        else:
+            predictor.predict(inference_data_raw)
+
+    if len(batch_run_times) > 1:
+        batch_run_times = numpy.array(batch_run_times)
+        metrics_logger.log_metric("batch_time_inferencing_p50_usecs", numpy.percentile(batch_run_times, 50) * 1000000)
+        metrics_logger.log_metric("batch_time_inferencing_p90_usecs", numpy.percentile(batch_run_times, 90) * 1000000)
+        metrics_logger.log_metric("batch_time_inferencing_p99_usecs", numpy.percentile(batch_run_times, 99) * 1000000)
 
     # Important: close logging session before exiting
     metrics_logger.close()
diff --git a/src/scripts/treelite_python/score_spec.yaml b/src/scripts/treelite_python/score_spec.yaml
index d325c0a5..43805269 100644
--- a/src/scripts/treelite_python/score_spec.yaml
+++ b/src/scripts/treelite_python/score_spec.yaml
@@ -15,6 +15,17 @@ inputs:
     type: AnyDirectory
     description: directory to the model
     optional: false
+  data_loader:
+    type: Enum
+    enum:
+      - libsvm
+      - numpy
+  batch_size:
+    type: Integer
+    default: 0
+  n_threads:
+    type: Integer
+    optional: true
   verbose:
     type: Boolean
     optional: true
@@ -28,6 +39,9 @@ command: >-
   python score.py
   --data {inputs.data}
   --so_path {inputs.compiled_model}
+  --data_loader {inputs.data_loader}
+  --batch_size {inputs.batch_size}
+  [--nthreads {inputs.n_threads}]
   [--verbose {inputs.verbose}]
   [--custom_properties {inputs.custom_properties}]