Adding lightgbm-transform to train component by piyushmadan · Pull Request #253 · microsoft/lightgbm-benchmark

requirements.txt

-Original file line number
+Diff line change
@@ Expand Up / @@ -8,7 +8,7 @@ psutil==5.8.0 @@
     # frameworks
     ray==1.9.2
     lightgbm-ray==0.1.2
-    lightgbm==3.3.0
+    lightgbm==3.3.1
     treelite==2.1.0
     treelite_runtime==2.1.0
     flaml==0.9.6
@@ Expand Down @@

src/scripts/data_processing/generate_data/generate.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -252,6 +252,7 @@ def run(self, args, logger, metrics_logger, unknown_args): @@
             os.makedirs(args.output_train, exist_ok=True)
             os.makedirs(args.output_test, exist_ok=True)
             os.makedirs(args.output_inference, exist_ok=True)
+            os.makedirs(args.external_header, exist_ok=True)
             # transform delimiter
@@ Expand Down @@

src/scripts/data_processing/generate_data/spec.yaml

-Original file line number
+Diff line change
@@ Expand Up / @@ -111,7 +111,7 @@ outputs: @@
         type: AnyDirectory
       external_header:
         type: AnyDirectory
     command: >-
       python generate.py
       --type {inputs.learning_task}
@@ Expand All / @@ -135,7 +135,7 @@ command: >- @@
       [--custom_properties {inputs.custom_properties}]
       [--docs_per_query {inputs.docs_per_query}]
       [--n_label_classes {inputs.n_label_classes}]
     environment:
       conda:
         # conda file path is resolved after additional includes
@@ Expand Down @@

src/scripts/training/lightgbm_python/default.dockerfile

-Original file line number
+Diff line change
@@ -1,10 +1,29 @@
     FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest
-    LABEL lightgbmbenchmark.linux.cpu.mpi.pip.version="3.3.0/20210118.1"
+    LABEL lightgbmbenchmark.linux.cpu.mpi.pip.version="3.3.1/20211210.1"
+    # https://github.com/microsoft/lightgbm-transform/blob/main/docs/Installation-Guide.rst
+    # Install CMake, gcc, g++, boost.
+    RUN apt-get update && apt-get -y upgrade && DEBIAN_FRONTEND="noninteractive" apt-get install -y libboost-all-dev gcc g++ wget cmake git curl libtinfo5
+    # Install LLVM with RTTI feature.
+    WORKDIR /root
+    RUN wget https://github.com/llvm/llvm-project/archive/refs/tags/llvmorg-3.5.1.tar.gz && tar zxvf llvmorg-3.5.1.tar.gz
+    WORKDIR /root/llvm-project-llvmorg-3.5.1/llvm
+    RUN mkdir build && cd build && cmake -DLLVM_REQUIRES_RTTI=1 .. && make -j4 && make install
+    # Install bond.
+    WORKDIR /root
+    RUN git clone --recursive https://github.com/microsoft/bond.git
+    RUN DEBIAN_FRONTEND="noninteractive" apt-get install -y clang zlib1g-dev
+    RUN curl -sSL https://get.haskellstack.org/ | sh
+    WORKDIR /root/bond
+    RUN mkdir build && cd build && cmake -DBOND_ENABLE_GRPC=FALSE .. && make -j4 && make install
     # Those arguments will NOT be used by AzureML
     # they are here just to allow for lightgbm-benchmark build to actually check
     # dockerfiles in a PR against their actual branch
-    ARG lightgbm_version="3.3.0"
+    ARG lightgbm_version="3.3.1"
     ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
@@ Expand All / @@ -27,7 +46,13 @@ RUN HOROVOD_WITH_TENSORFLOW=1 \ @@
                     'azureml-mlflow==1.35.0' \
                     'azureml-telemetry==1.35.0' \
                     'mpi4py==3.1.1' \
-                    lightgbm==${lightgbm_version}
+                    'omegaconf'
+    # install lightgbm with mpi
+    RUN pip install --upgrade pip setuptools wheel && \
+        pip install 'cmake==3.21.0' && \
+        pip install lightgbm==${lightgbm_version} --install-option=--mpi &&\
+        pip install lightgbm-transform==${lightgbm_version}
     # This is needed for mpi to locate libpython
     ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH

src/scripts/training/lightgbm_python/spec.yaml

-Original file line number
+Diff line change
@@ Expand Up / @@ -30,6 +30,10 @@ inputs: @@
         type: AnyDirectory
         description: Directory to the testing data
         optional: false
+      parser_config_file:
+        type: AnyDirectory
+        description: directory to the transform parser config
+        optional: true
       construct:
         type: Boolean
         description: "Use lazy intialization during data loading phase (both train and test datasets)"
@@ Expand Down Expand Up / @@ -167,6 +171,7 @@ launcher: @@
         python train.py
         --train {inputs.train}
         --test {inputs.test}
+        [--parser_config_file {inputs.parser_config_file}]
         --construct {inputs.construct}
         --header {inputs.header}
         --label_column {inputs.label_column}
@@ Expand Down @@

src/scripts/training/lightgbm_python/train.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -63,6 +63,7 @@ def get_arg_parser(cls, parser=None): @@
             group_i.add_argument("--header", required=False, default=False, type=strtobool)
             group_i.add_argument("--label_column", required=False, default="0", type=str)
             group_i.add_argument("--group_column", required=False, default=None, type=str)
+            group_i.add_argument("--parser_config_file", required=False, default=None, type=str, help="transfomr parser config location (file path)")
             group_o = parser.add_argument_group(f"Outputs [{__name__}:{cls.__name__}]")
             group_o.add_argument("--export_model",
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Adding lightgbm-transform to train component #253

Uh oh!

Diff view

Diff view

There are no files selected for viewing