diff --git a/pipelines/azureml/conf/experiments/benchmarks/lightgbm-inferencing.yaml b/pipelines/azureml/conf/experiments/benchmarks/lightgbm-inferencing.yaml
index dfe29347..abd67da4 100644
--- a/pipelines/azureml/conf/experiments/benchmarks/lightgbm-inferencing.yaml
+++ b/pipelines/azureml/conf/experiments/benchmarks/lightgbm-inferencing.yaml
@@ -28,7 +28,7 @@ module_loader: # module loading params
 
 lightgbm_inferencing:
   # name of your particular benchmark
-  benchmark_name: "benchmark-inferencing-20211109.3" # need to be provided at runtime!
+  benchmark_name: "benchmark-inferencing-20211124.1" # need to be provided at runtime!
 
   tasks:
     - data:
@@ -82,11 +82,19 @@ lightgbm_inferencing:
 
   variants:
     - framework: lightgbm_python # v3.3.0 via pypi
+      num_threads: 1
+
     - framework: lightgbm_c_api # v3.3.0 with C API prediction
+
     - framework: lightgbm_c_api # v3.3.0 with C API prediction
       build: docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile
+
     - framework: lightgbm_c_api # v3.2.1 with C API prediction
       build: docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile
+
     - framework: lightgbm_c_api # v3.2.1 with C API prediction
       build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
+
     - framework: treelite_python # v1.3.0
+      num_threads: 1
+      batch_size: 0 # use whole file as batch
diff --git a/pipelines/azureml/conf/experiments/lightgbm-inferencing.yaml b/pipelines/azureml/conf/experiments/lightgbm-inferencing.yaml
index 79712320..9647e538 100644
--- a/pipelines/azureml/conf/experiments/lightgbm-inferencing.yaml
+++ b/pipelines/azureml/conf/experiments/lightgbm-inferencing.yaml
@@ -43,12 +43,21 @@ lightgbm_inferencing:
   # list all inferencing frameworks and their builds
   variants:
     - framework: lightgbm_python # v3.3.0 via pypi
+      num_threads: 1
+
     - framework: lightgbm_c_api # v3.3.0 with C API prediction
+
     - framework: lightgbm_c_api # v3.3.0 with C API prediction
       build: docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile
+
     - framework: lightgbm_c_api # v3.2.1 with C API prediction
       build: docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile
+
     - framework: lightgbm_c_api # v3.2.1 with C API prediction
       build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
+
     - framework: treelite_python # v1.3.0
+      num_threads: 1
+      batch_size: 0 # use whole file as batch
+
 
diff --git a/pipelines/azureml/pipelines/lightgbm_inferencing.py b/pipelines/azureml/pipelines/lightgbm_inferencing.py
index dca94266..e4091d57 100644
--- a/pipelines/azureml/pipelines/lightgbm_inferencing.py
+++ b/pipelines/azureml/pipelines/lightgbm_inferencing.py
@@ -130,6 +130,8 @@ def lightgbm_inferencing_pipeline_function(benchmark_custom_properties, data, mo
                     inferencing_step = treelite_score_module(
                         data = data,
                         compiled_model = treelite_compile_step.outputs.compiled_model,
+                        num_threads = variant.num_threads,
+                        batch_size = variant.batch_size,
                         verbose = False,
                         custom_properties = custom_properties
                     )
@@ -140,6 +142,8 @@ def lightgbm_inferencing_pipeline_function(benchmark_custom_properties, data, mo
                     inferencing_step = lightgbm_c_api_score_module(
                         data = data,
                         model = model,
+                        num_threads = variant.num_threads,
+                        # batch_size = variant.batch_size, # not supported yet
                         predict_disable_shape_check = predict_disable_shape_check,
                         verbose = False,
                         custom_properties = custom_properties
@@ -151,6 +155,8 @@ def lightgbm_inferencing_pipeline_function(benchmark_custom_properties, data, mo
                     inferencing_step = lightgbm_cli_score_module(
                         data = data,
                         model = model,
+                        num_threads = variant.num_threads,
+                        # batch_size = variant.batch_size, # not supported yet
                         predict_disable_shape_check = predict_disable_shape_check,
                         verbose = False,
                         custom_properties = custom_properties
@@ -162,6 +168,8 @@ def lightgbm_inferencing_pipeline_function(benchmark_custom_properties, data, mo
                     inferencing_step = lightgbm_python_score_module(
                         data = data,
                         model = model,
+                        num_threads = variant.num_threads,
+                        # batch_size = variant.batch_size, # not supported yet
                         predict_disable_shape_check = predict_disable_shape_check,
                         verbose = False,
                         custom_properties = custom_properties
diff --git a/src/common/io.py b/src/common/io.py
index 84898249..2e8317ad 100644
--- a/src/common/io.py
+++ b/src/common/io.py
@@ -8,6 +8,7 @@
 import os
 import argparse
 import logging
+import csv
 
 def input_file_path(path):
     """ Argparse type to resolve input path as single file from directory.
@@ -225,3 +226,42 @@ def run(self, input_path, output_path):
             self.split_by_append(input_files, output_path, self.number)
         else:
             raise NotImplementedError(f"Mode {self.mode} not implemented.")
+
+
+class CustomLightGBMDataBatchIterator():
+    def __init__(self, file_path, batch_size=0, file_format="csv", **kwargs):
+        self.file_path = file_path
+        self.batch_size = batch_size
+        self.file_format = file_format
+        self.reader_options = kwargs
+    
+    def iter(self):
+        if self.file_format == "csv":
+            with open(self.file_path, "r") as i_file:
+                reader = csv.reader(i_file, **self.reader_options)
+                
+                batch = []
+                if self.batch_size == 0:
+                    # use the entire file as a batch
+                    batch = [
+                        [
+                            float(col) for col in row # convert all values to float for lightgbm
+                        ] for row in reader
+                    ]
+                elif self.batch_size > 1:
+                    # create batches
+                    for row in reader:
+                        batch.append(
+                            [ float(col) for col in row ] # convert all values to float for lightgbm
+                        )
+                        if len(batch) >= self.batch_size:
+                            yield batch
+                            batch = [] # reset batch
+                else:
+                    raise ValueError("batch_size must be >= 0")
+
+                # any remaining batch, or whole file
+                if len(batch) >= 0:
+                    yield batch
+        else:
+            raise NotImplementedError("file_format={self.file_format} is not implemented yet.")
diff --git a/src/common/tasks.py b/src/common/tasks.py
index 616f1172..d8b5d39b 100644
--- a/src/common/tasks.py
+++ b/src/common/tasks.py
@@ -24,10 +24,15 @@ class inferencing_task:
 
 @dataclass
 class inferencing_variants:
+    # framework
     framework: str = MISSING
     build: Optional[str] = None
     os: str = "Linux" # linux or windows, linux by default
 
+    # parameters
+    batch_size: int = 0 # use whole file as batch
+    num_threads: int = 1 # use only one thread
+
 @dataclass
 class data_generation_task:
     task: str = MISSING
diff --git a/src/scripts/inferencing/lightgbm_c_api/spec.yaml b/src/scripts/inferencing/lightgbm_c_api/spec.yaml
index 59d9bfad..fd664142 100644
--- a/src/scripts/inferencing/lightgbm_c_api/spec.yaml
+++ b/src/scripts/inferencing/lightgbm_c_api/spec.yaml
@@ -18,7 +18,7 @@ inputs:
     type: Boolean
     description: "control whether or not LightGBM raises an error when you try to predict on data with a different number of features than the training data"
     default: False
-  n_threads:
+  num_threads:
     type: Integer
     default: 1
   verbose:
@@ -37,7 +37,7 @@ command: >-
   python score.py
   --data {inputs.data}
   --model {inputs.model}
-  --num_threads {inputs.n_threads}
+  --num_threads {inputs.num_threads}
   --output {outputs.predictions}
   --predict_disable_shape_check {inputs.predict_disable_shape_check}
   --verbose {inputs.verbose}
diff --git a/src/scripts/inferencing/lightgbm_cli/spec.yaml b/src/scripts/inferencing/lightgbm_cli/spec.yaml
index 9b51d5ab..41bfa111 100644
--- a/src/scripts/inferencing/lightgbm_cli/spec.yaml
+++ b/src/scripts/inferencing/lightgbm_cli/spec.yaml
@@ -18,9 +18,9 @@ inputs:
     type: Boolean
     description: "control whether or not LightGBM raises an error when you try to predict on data with a different number of features than the training data"
     optional: true
-  n_threads:
+  num_threads:
     type: Integer
-    optional: true
+    default: 1
   lightgbm_exec_path:
     type: String
     optional: true
@@ -37,7 +37,7 @@ command: >-
   python score.py
   --data {inputs.data}
   --model {inputs.model}
-  [--num_threads {inputs.n_threads}]
+  --num_threads {inputs.num_threads}
   [--lightgbm_exec_path {inputs.lightgbm_exec_path}]
   [--predict_disable_shape_check {inputs.predict_disable_shape_check}]
   [--verbose {inputs.verbose}]
diff --git a/src/scripts/inferencing/lightgbm_python/spec.yaml b/src/scripts/inferencing/lightgbm_python/spec.yaml
index 7fcbebca..cf40e260 100644
--- a/src/scripts/inferencing/lightgbm_python/spec.yaml
+++ b/src/scripts/inferencing/lightgbm_python/spec.yaml
@@ -18,9 +18,9 @@ inputs:
     type: Boolean
     description: "control whether or not LightGBM raises an error when you try to predict on data with a different number of features than the training data"
     default: False
-  n_threads:
+  num_threads:
     type: Integer
-    optional: true
+    default: 1
   verbose:
     type: Boolean
     default: False
@@ -38,7 +38,7 @@ command: >-
   --data {inputs.data}
   --model {inputs.model}
   --output {outputs.predictions}
-  [--num_threads {inputs.n_threads}]
+  --num_threads {inputs.num_threads}
   --predict_disable_shape_check {inputs.predict_disable_shape_check}
   --verbose {inputs.verbose}
   [--custom_properties {inputs.custom_properties}]
diff --git a/src/scripts/inferencing/treelite_python/conda_env.yaml b/src/scripts/inferencing/treelite_python/conda_env.yaml
index b31a7368..0d08ea4c 100644
--- a/src/scripts/inferencing/treelite_python/conda_env.yaml
+++ b/src/scripts/inferencing/treelite_python/conda_env.yaml
@@ -12,3 +12,4 @@ dependencies:
   - treelite_runtime==2.1.0
   - pandas>=1.1,<1.2
   - numpy>=1.10,<1.20
+  - matplotlib==3.4.3
diff --git a/src/scripts/inferencing/treelite_python/score.py b/src/scripts/inferencing/treelite_python/score.py
index 26fef4bc..e846c9a5 100644
--- a/src/scripts/inferencing/treelite_python/score.py
+++ b/src/scripts/inferencing/treelite_python/score.py
@@ -8,9 +8,9 @@
 import sys
 import argparse
 import logging
-import numpy
+import time
+import numpy as np
 from distutils.util import strtobool
-import pandas as pd
 import treelite, treelite_runtime
 
 # Add the right path to PYTHONPATH
@@ -18,20 +18,20 @@
 COMMON_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
 
 if COMMON_ROOT not in sys.path:
-    print(f"Adding {COMMON_ROOT} to PYTHONPATH")
+    logging.info(f"Adding {COMMON_ROOT} to PYTHONPATH")
     sys.path.append(str(COMMON_ROOT))
 
 # useful imports from common
 from common.components import RunnableScript
-from common.io import input_file_path
+from common.io import input_file_path, CustomLightGBMDataBatchIterator
 
 
 class TreeLightInferencingScript(RunnableScript):
     def __init__(self):
         super().__init__(
-            task = 'score',
+            task = "score",
             framework = 'treelite_python',
-            framework_version = treelite.__version__
+            framework_version = "PYTHON_API."+str(treelite.__version__)
         )
 
     @classmethod
@@ -61,6 +61,8 @@ def get_arg_parser(cls, parser=None):
         group_params = parser.add_argument_group("Scoring parameters")
         group_params.add_argument("--num_threads",
             required=False, default=1, type=int, help="number of threads")
+        group_params.add_argument("--batch_size",
+            required=False, default=0, type=int, help="size of batches for predict call")
 
         return parser
 
@@ -76,31 +78,66 @@ def run(self, args, logger, metrics_logger, unknown_args):
         """
         # record relevant parameters
         metrics_logger.log_parameters(
-            num_threads=args.num_threads
+            num_threads=args.num_threads,
+            batch_size=args.batch_size,
         )
 
+        # make sure the output argument exists
         if args.output:
-            # make sure the output argument exists
             os.makedirs(args.output, exist_ok=True)
             
             # and create your own file inside the output
             args.output = os.path.join(args.output, "predictions.txt")
 
+        logger.info(f"Loading model from {args.so_path}")
+        predictor = treelite_runtime.Predictor(
+            args.so_path,
+            verbose=True,
+            nthread=args.num_threads
+        )
 
-        logger.info(f"Loading data for inferencing")
-        with metrics_logger.log_time_block("time_data_loading"):
-            my_data = pd.read_csv(args.data).to_numpy()
-            
-            predictor = treelite_runtime.Predictor(
-                args.so_path,
-                verbose=True,
-                nthread=args.num_threads
-            )
-            dmat = treelite_runtime.DMatrix(my_data)
+        # accumulate predictions and latencies
+        predictions = []
+        time_inferencing_per_batch = []
+        batch_lengths = []
+
+        # loop through batches
+        for batch in CustomLightGBMDataBatchIterator(args.data, batch_size=args.batch_size, file_format="csv").iter():
+            if len(batch) == 0:
+                break
+            batch_lengths.append(len(batch))
+
+            # transform into dense matrix for treelite
+            batch_data = np.array(batch)
+            batch_dmat = treelite_runtime.DMatrix(batch_data)
+
+            # run prediction on batch
+            batch_start_time = time.monotonic()
+            predictions.extend(predictor.predict(batch_dmat))
+            time_inferencing_per_batch.append((time.monotonic() - batch_start_time)) # usecs
+
+        # log overall time
+        metrics_logger.log_metric("time_inferencing", sum(time_inferencing_per_batch))
+
+        # use helper to log latency with the right metric names
+        metrics_logger.log_inferencing_latencies(
+            time_inferencing_per_batch,
+            batch_length=batch_lengths,
+            factor_to_usecs=1000000.0 # values are in seconds
+        )
 
-        logger.info(f"Running .predict()")
-        with metrics_logger.log_time_block("time_inferencing"):
-            predictor.predict(dmat)
+        if args.output:
+            np.savetxt(
+                args.output,
+                predictions,
+                fmt='%f',
+                delimiter=',',
+                newline='\n',
+                header='',
+                footer='',
+                comments='# ',
+                encoding=None
+            )
 
 
 def get_arg_parser(parser=None):
diff --git a/src/scripts/inferencing/treelite_python/spec.yaml b/src/scripts/inferencing/treelite_python/spec.yaml
index 5e9b18f6..d1e7a804 100644
--- a/src/scripts/inferencing/treelite_python/spec.yaml
+++ b/src/scripts/inferencing/treelite_python/spec.yaml
@@ -15,9 +15,12 @@ inputs:
     type: AnyDirectory
     description: directory to the model
     optional: false
-  n_threads:
+  num_threads:
     type: Integer
-    optional: true
+    default: 1
+  batch_size:
+    type: Integer
+    default: 0 # default: use whole file as a batch
   verbose:
     type: Boolean
     default: False
@@ -31,7 +34,8 @@ command: >-
   python score.py
   --data {inputs.data}
   --so_path {inputs.compiled_model}
-  [--num_threads {inputs.n_threads}]
+  --num_threads {inputs.num_threads}
+  --batch_size {inputs.batch_size}
   --verbose {inputs.verbose}
   [--custom_properties {inputs.custom_properties}]