Skip to content
This repository was archived by the owner on Apr 8, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ psutil==5.8.0
# frameworks
ray==1.9.2
lightgbm-ray==0.1.2
lightgbm==3.3.0
lightgbm==3.3.1
treelite==2.1.0
treelite_runtime==2.1.0
flaml==0.9.6
Expand Down
1 change: 1 addition & 0 deletions src/scripts/data_processing/generate_data/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ def run(self, args, logger, metrics_logger, unknown_args):
os.makedirs(args.output_train, exist_ok=True)
os.makedirs(args.output_test, exist_ok=True)
os.makedirs(args.output_inference, exist_ok=True)
os.makedirs(args.external_header, exist_ok=True)


# transform delimiter
Expand Down
4 changes: 2 additions & 2 deletions src/scripts/data_processing/generate_data/spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ outputs:
type: AnyDirectory
external_header:
type: AnyDirectory

command: >-
python generate.py
--type {inputs.learning_task}
Expand All @@ -135,7 +135,7 @@ command: >-
[--custom_properties {inputs.custom_properties}]
[--docs_per_query {inputs.docs_per_query}]
[--n_label_classes {inputs.n_label_classes}]

environment:
conda:
# conda file path is resolved after additional includes
Expand Down
31 changes: 28 additions & 3 deletions src/scripts/training/lightgbm_python/default.dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,29 @@
FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest
LABEL lightgbmbenchmark.linux.cpu.mpi.pip.version="3.3.0/20210118.1"
LABEL lightgbmbenchmark.linux.cpu.mpi.pip.version="3.3.1/20211210.1"

# https://github.com/microsoft/lightgbm-transform/blob/main/docs/Installation-Guide.rst
# Install CMake, gcc, g++, boost.
RUN apt-get update && apt-get -y upgrade && DEBIAN_FRONTEND="noninteractive" apt-get install -y libboost-all-dev gcc g++ wget cmake git curl libtinfo5

# Install LLVM with RTTI feature.
WORKDIR /root
RUN wget https://github.com/llvm/llvm-project/archive/refs/tags/llvmorg-3.5.1.tar.gz && tar zxvf llvmorg-3.5.1.tar.gz
WORKDIR /root/llvm-project-llvmorg-3.5.1/llvm
RUN mkdir build && cd build && cmake -DLLVM_REQUIRES_RTTI=1 .. && make -j4 && make install

# Install bond.
WORKDIR /root
RUN git clone --recursive https://github.com/microsoft/bond.git
RUN DEBIAN_FRONTEND="noninteractive" apt-get install -y clang zlib1g-dev
RUN curl -sSL https://get.haskellstack.org/ | sh
WORKDIR /root/bond
RUN mkdir build && cd build && cmake -DBOND_ENABLE_GRPC=FALSE .. && make -j4 && make install


# Those arguments will NOT be used by AzureML
# they are here just to allow for lightgbm-benchmark build to actually check
# dockerfiles in a PR against their actual branch
ARG lightgbm_version="3.3.0"
ARG lightgbm_version="3.3.1"

ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm

Expand All @@ -27,7 +46,13 @@ RUN HOROVOD_WITH_TENSORFLOW=1 \
'azureml-mlflow==1.35.0' \
'azureml-telemetry==1.35.0' \
'mpi4py==3.1.1' \
lightgbm==${lightgbm_version}
'omegaconf'

# install lightgbm with mpi
RUN pip install --upgrade pip setuptools wheel && \
pip install 'cmake==3.21.0' && \
pip install lightgbm==${lightgbm_version} --install-option=--mpi &&\
pip install lightgbm-transform==${lightgbm_version}

# This is needed for mpi to locate libpython
ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH
5 changes: 5 additions & 0 deletions src/scripts/training/lightgbm_python/spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ inputs:
type: AnyDirectory
description: Directory to the testing data
optional: false
parser_config_file:
type: AnyDirectory
description: directory to the transform parser config
optional: true
construct:
type: Boolean
description: "Use lazy intialization during data loading phase (both train and test datasets)"
Expand Down Expand Up @@ -167,6 +171,7 @@ launcher:
python train.py
--train {inputs.train}
--test {inputs.test}
[--parser_config_file {inputs.parser_config_file}]
--construct {inputs.construct}
--header {inputs.header}
--label_column {inputs.label_column}
Expand Down
1 change: 1 addition & 0 deletions src/scripts/training/lightgbm_python/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def get_arg_parser(cls, parser=None):
group_i.add_argument("--header", required=False, default=False, type=strtobool)
group_i.add_argument("--label_column", required=False, default="0", type=str)
group_i.add_argument("--group_column", required=False, default=None, type=str)
group_i.add_argument("--parser_config_file", required=False, default=None, type=str, help="transfomr parser config location (file path)")

group_o = parser.add_argument_group(f"Outputs [{__name__}:{cls.__name__}]")
group_o.add_argument("--export_model",
Expand Down