From 18c68a57605b5ec9d35923b76436e3bc1b215092 Mon Sep 17 00:00:00 2001 From: Sze Wai Yuen Date: Fri, 23 Jun 2023 16:37:08 -0700 Subject: [PATCH 1/4] Batch processing API documentation --- .../model-usage/batch_processing_api.rst | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 docs/model-dev-guide/model-usage/batch_processing_api.rst diff --git a/docs/model-dev-guide/model-usage/batch_processing_api.rst b/docs/model-dev-guide/model-usage/batch_processing_api.rst new file mode 100644 index 00000000000..b441999cd4c --- /dev/null +++ b/docs/model-dev-guide/model-usage/batch_processing_api.rst @@ -0,0 +1,116 @@ +##################### + Torch Batch Processing API +##################### +.. caution:: + This is an experimental API and may change in the future. + +.. _torch_batch_processing_ug: + +Overview +============= +This API takes in (1) a dataset and (2) a user-defined processor class and runs distributed data +processing. + +Under the hood, the API helps you to: + +- shard a dataset by number of workers available +- apply user-defined logic to each batch of data +- handle synchronization between workers +- track job progress to enable preemption and resumption of trial + +This is a flexible API that can be used for many different tasks, including batch (offline) inference. + +The API +============= +The main arguments to torch_batch_process is processor class and dataset. + +.. code:: python + + torch_batch_process( + batch_processor_cls=MyProcessor + dataset=dataset + ) +[Placeholder for torch_batch_process API docstring pull] + +Processor should be a subclass of TorchBatchProcessor. The two functions you must implement are the __init__ and +process_batch. The other lifecycle functions are optional. + +[Placeholder for TorchBatchProcessor API docstring pull] + +During __init__ of TorchBatchProcessor, we pass in a TorchBatchProcessorContext object, which contains useful methods +that can be used within the TorchBatchProcessor class. + +[Placeholder for TorchBatchProcessorContext API docstring pull] + +Example: Batch (offline) inference +============= + +torch_batch_process API can support batch inference use case. The example below is taken from [PLACEHOLDER]. + +Step 1: define a InferenceProcessor. You should initialize your model in the __init__ function of InferenceProcessor. + +.. code:: python + + """ + Define custom processor class + """ + class InferenceProcessor(TorchBatchProcessor): + def __init__(self, context): + self.context = context + self.model = context.prepare_model_for_inference(get_model()) + self.output = [] + self.last_index = 0 + + def process_batch(self, batch, batch_idx) -> None: + model_input = batch[0] + model_input = self.context.to_device(model_input) + + with torch.no_grad(): + with self.profiler as p: + pred = self.model(model_input) + p.step() + output = {"predictions": pred, "input": batch} + self.output.append(output) + + self.last_index = batch_idx + + def on_checkpoint_start(self): + # During checkpoint, we persist prediction result + if len(self.output) == 0: + return + file_name = f"prediction_output_{self.last_index}" + with self.context.upload_path() as path: + file_path = pathlib.Path(path, file_name) + torch.save(self.output, file_path) + + self.output = [] + +Step 2: Initialize the dataset to be processed + +.. code:: python + + """ + Initialize dataset + """ + transform = transforms.Compose( + [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))] + ) + with filelock.FileLock(os.path.join("/tmp", "inference.lock")): + inference_data = tv.datasets.CIFAR10( + root="/data", train=False, download=True, transform=transform + ) +Step 3: Pass the InferenceProcessor class and the dataset to torch_batch_process + +.. code:: python + + """ + Pass processor class and dataset to torch_batch_process + """ + torch_batch_process( + InferenceProcessor, + dataset, + batch_size=64, + checkpoint_interval=10 + ) + + From cfcb3a1269a7a2156d4f2327a7c63ee375e864c2 Mon Sep 17 00:00:00 2001 From: Sze Wai Yuen Date: Tue, 27 Jun 2023 12:30:36 -0700 Subject: [PATCH 2/4] incorporated Tara's edit #1 --- .../model-usage/batch-process-api-ug.rst | 154 ++++++++++++++++++ .../model-usage/batch_processing_api.rst | 116 ------------- .../api-torch-batch-process-reference.rst | 49 ++++++ 3 files changed, 203 insertions(+), 116 deletions(-) create mode 100644 docs/model-dev-guide/model-usage/batch-process-api-ug.rst delete mode 100644 docs/model-dev-guide/model-usage/batch_processing_api.rst create mode 100644 docs/reference/batch-processing/api-torch-batch-process-reference.rst diff --git a/docs/model-dev-guide/model-usage/batch-process-api-ug.rst b/docs/model-dev-guide/model-usage/batch-process-api-ug.rst new file mode 100644 index 00000000000..dd8c569bde3 --- /dev/null +++ b/docs/model-dev-guide/model-usage/batch-process-api-ug.rst @@ -0,0 +1,154 @@ +:orphan: + +.. _torch_batch_processing_ug: + +############################ + Torch Batch Processing API +############################ + +.. meta:: + :description: Learn how to use the Torch Batch Processing API. + +In this guide, you'll learn about the :ref:`torch_batch_process_api_ref` and how to perform batch +inference (also known as offline inference). + ++---------------------------------------------------------------------+ +| Visit the API reference | ++=====================================================================+ +| :ref:`torch_batch_process_api_ref` | ++---------------------------------------------------------------------+ + +.. caution:: + + This is an experimental API and may change at any time. + +********** + Overview +********** + +The Torch Batch Processing API takes in (1) a dataset and (2) a user-defined processor class and +runs distributed data processing. + +With this API, you can perform the following tasks: + +- shard a dataset by number of workers available +- apply user-defined logic to each batch of data +- handle synchronization between workers +- track job progress to enable preemption and resumption of trial + +This is a flexible API that can be used for many different tasks, including batch (offline) +inference. + +******* + Usage +******* + +The main arguments to torch_batch_process are processor class and dataset. + +.. code:: python + + torch_batch_process( + batch_processor_cls=MyProcessor + dataset=dataset + ) + +class or function e.g., ``torch_batch_process`` +=============================================== + +Processor should be a subclass of TorchBatchProcessor. The two functions you must implement are the +__init__ and process_batch. The other lifecycle functions are optional. + +class or function e.g., ``torch_batch_processor`` +================================================= + +During __init__ of TorchBatchProcessor, we pass in a TorchBatchProcessorContext object, which +contains useful methods that can be used within the TorchBatchProcessor class. + +class or function e.g., ``torch_batch_processor`` +================================================= + +Add content here and a ref link to the class or function in the reference page. + +****************************************** + How To Perform Batch (Offline) Inference +****************************************** + +In this section, we'll learn how to perform batch inference using the ``torch_batch_process`` API. +For more information about this use case or to obtain the tutorial files, visit this URL. + +Step 1: Define a InferenceProcessor +=================================== + +The first step is to Define a InferenceProcessor. You should initialize your model in the __init__ +function of InferenceProcessor. + +.. code:: python + + """ + Define custom processor class + """ + class InferenceProcessor(TorchBatchProcessor): + def __init__(self, context): + self.context = context + self.model = context.prepare_model_for_inference(get_model()) + self.output = [] + self.last_index = 0 + + def process_batch(self, batch, batch_idx) -> None: + model_input = batch[0] + model_input = self.context.to_device(model_input) + + with torch.no_grad(): + with self.profiler as p: + pred = self.model(model_input) + p.step() + output = {"predictions": pred, "input": batch} + self.output.append(output) + + self.last_index = batch_idx + + def on_checkpoint_start(self): + # During checkpoint, we persist prediction result + if len(self.output) == 0: + return + file_name = f"prediction_output_{self.last_index}" + with self.context.upload_path() as path: + file_path = pathlib.Path(path, file_name) + torch.save(self.output, file_path) + + self.output = [] + +Step 2: Initialize the Dataset +============================== + +Initialize the dataset you want to process. + +.. code:: python + + """ + Initialize dataset + """ + transform = transforms.Compose( + [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))] + ) + with filelock.FileLock(os.path.join("/tmp", "inference.lock")): + inference_data = tv.datasets.CIFAR10( + root="/data", train=False, download=True, transform=transform + ) + +Step 3: Pass the InferenceProcessor Class and Dataset +===================================================== + +Finally, pass the InferenceProcessor class and the dataset to ``torch_batch_process``. + +.. code:: python + + """ + Pass processor class and dataset to torch_batch_process + """ + torch_batch_process( + InferenceProcessor, + dataset, + batch_size=64, + checkpoint_interval=10 + ) diff --git a/docs/model-dev-guide/model-usage/batch_processing_api.rst b/docs/model-dev-guide/model-usage/batch_processing_api.rst deleted file mode 100644 index b441999cd4c..00000000000 --- a/docs/model-dev-guide/model-usage/batch_processing_api.rst +++ /dev/null @@ -1,116 +0,0 @@ -##################### - Torch Batch Processing API -##################### -.. caution:: - This is an experimental API and may change in the future. - -.. _torch_batch_processing_ug: - -Overview -============= -This API takes in (1) a dataset and (2) a user-defined processor class and runs distributed data -processing. - -Under the hood, the API helps you to: - -- shard a dataset by number of workers available -- apply user-defined logic to each batch of data -- handle synchronization between workers -- track job progress to enable preemption and resumption of trial - -This is a flexible API that can be used for many different tasks, including batch (offline) inference. - -The API -============= -The main arguments to torch_batch_process is processor class and dataset. - -.. code:: python - - torch_batch_process( - batch_processor_cls=MyProcessor - dataset=dataset - ) -[Placeholder for torch_batch_process API docstring pull] - -Processor should be a subclass of TorchBatchProcessor. The two functions you must implement are the __init__ and -process_batch. The other lifecycle functions are optional. - -[Placeholder for TorchBatchProcessor API docstring pull] - -During __init__ of TorchBatchProcessor, we pass in a TorchBatchProcessorContext object, which contains useful methods -that can be used within the TorchBatchProcessor class. - -[Placeholder for TorchBatchProcessorContext API docstring pull] - -Example: Batch (offline) inference -============= - -torch_batch_process API can support batch inference use case. The example below is taken from [PLACEHOLDER]. - -Step 1: define a InferenceProcessor. You should initialize your model in the __init__ function of InferenceProcessor. - -.. code:: python - - """ - Define custom processor class - """ - class InferenceProcessor(TorchBatchProcessor): - def __init__(self, context): - self.context = context - self.model = context.prepare_model_for_inference(get_model()) - self.output = [] - self.last_index = 0 - - def process_batch(self, batch, batch_idx) -> None: - model_input = batch[0] - model_input = self.context.to_device(model_input) - - with torch.no_grad(): - with self.profiler as p: - pred = self.model(model_input) - p.step() - output = {"predictions": pred, "input": batch} - self.output.append(output) - - self.last_index = batch_idx - - def on_checkpoint_start(self): - # During checkpoint, we persist prediction result - if len(self.output) == 0: - return - file_name = f"prediction_output_{self.last_index}" - with self.context.upload_path() as path: - file_path = pathlib.Path(path, file_name) - torch.save(self.output, file_path) - - self.output = [] - -Step 2: Initialize the dataset to be processed - -.. code:: python - - """ - Initialize dataset - """ - transform = transforms.Compose( - [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))] - ) - with filelock.FileLock(os.path.join("/tmp", "inference.lock")): - inference_data = tv.datasets.CIFAR10( - root="/data", train=False, download=True, transform=transform - ) -Step 3: Pass the InferenceProcessor class and the dataset to torch_batch_process - -.. code:: python - - """ - Pass processor class and dataset to torch_batch_process - """ - torch_batch_process( - InferenceProcessor, - dataset, - batch_size=64, - checkpoint_interval=10 - ) - - diff --git a/docs/reference/batch-processing/api-torch-batch-process-reference.rst b/docs/reference/batch-processing/api-torch-batch-process-reference.rst new file mode 100644 index 00000000000..11e771b4888 --- /dev/null +++ b/docs/reference/batch-processing/api-torch-batch-process-reference.rst @@ -0,0 +1,49 @@ +:orphan: + +.. _torch_batch_process_api_ref: + +################################################### + ``name of det torch batch process`` API Reference +################################################### + +.. meta:: + :description: Familiarize yourself with the Torch Batch Process API. + ++--------------------------------------------+ +| User Guide | ++============================================+ +| :ref:`torch_batch_processing_ug` | ++--------------------------------------------+ + +.. caution:: + + This is an experimental API and may change at any time. + +The main arguments to torch_batch_process is processor class and dataset. + +.. code:: python + + torch_batch_process( + batch_processor_cls=MyProcessor + dataset=dataset + ) + +******************************************************** + Placeholder for torch_batch_process API docstring pull +******************************************************** + +Processor should be a subclass of TorchBatchProcessor. The two functions you must implement are the +__init__ and process_batch. The other lifecycle functions are optional. + +******************************************************** + Placeholder for TorchBatchProcessor API docstring pull +******************************************************** + +During __init__ of TorchBatchProcessor, we pass in a TorchBatchProcessorContext object, which +contains useful methods that can be used within the TorchBatchProcessor class. + +*************************************************************** + Placeholder for TorchBatchProcessorContext API docstring pull +*************************************************************** + +Add the Sphinx autoclass directive here. From a42a822b284f3ae1d3b355faab2e53a2d8ae554f Mon Sep 17 00:00:00 2001 From: Tara Date: Thu, 29 Jun 2023 13:15:36 -0500 Subject: [PATCH 3/4] Add autoclass sections Add formatted autoclass sections for classes in determined/pytorch/experimental/_torch_batch_process.py --- .../api-torch-batch-process-reference.rst | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/docs/reference/batch-processing/api-torch-batch-process-reference.rst b/docs/reference/batch-processing/api-torch-batch-process-reference.rst index 11e771b4888..2b0564dd65a 100644 --- a/docs/reference/batch-processing/api-torch-batch-process-reference.rst +++ b/docs/reference/batch-processing/api-torch-batch-process-reference.rst @@ -28,22 +28,20 @@ The main arguments to torch_batch_process is processor class and dataset. dataset=dataset ) -******************************************************** - Placeholder for torch_batch_process API docstring pull -******************************************************** +************************************************** + ``determined.pytorch.TorchBatchProcessorContext`` +************************************************** -Processor should be a subclass of TorchBatchProcessor. The two functions you must implement are the -__init__ and process_batch. The other lifecycle functions are optional. +.. autoclass:: determined.pytorch.experimental.TorchBatchProcessorContext + :members: + :member-order: bysource -******************************************************** - Placeholder for TorchBatchProcessor API docstring pull -******************************************************** -During __init__ of TorchBatchProcessor, we pass in a TorchBatchProcessorContext object, which -contains useful methods that can be used within the TorchBatchProcessor class. +******************************************* + ``determined.pytorch.TorchBatchProcessor`` +******************************************* -*************************************************************** - Placeholder for TorchBatchProcessorContext API docstring pull -*************************************************************** +.. autoclass:: determined.pytorch.experimental.TorchBatchProcessor + :members: + :member-order: bysource -Add the Sphinx autoclass directive here. From af68e8f4c112124bac3ba58605b9033663f30c22 Mon Sep 17 00:00:00 2001 From: Tara Charter Date: Thu, 29 Jun 2023 13:58:48 -0500 Subject: [PATCH 4/4] Add autoclasses and formatting --- .../model-usage/batch-process-api-ug.rst | 33 +++++++++---------- .../api-torch-batch-process-reference.rst | 16 ++++----- 2 files changed, 23 insertions(+), 26 deletions(-) diff --git a/docs/model-dev-guide/model-usage/batch-process-api-ug.rst b/docs/model-dev-guide/model-usage/batch-process-api-ug.rst index dd8c569bde3..fa54c804860 100644 --- a/docs/model-dev-guide/model-usage/batch-process-api-ug.rst +++ b/docs/model-dev-guide/model-usage/batch-process-api-ug.rst @@ -52,35 +52,34 @@ The main arguments to torch_batch_process are processor class and dataset. dataset=dataset ) -class or function e.g., ``torch_batch_process`` -=============================================== - -Processor should be a subclass of TorchBatchProcessor. The two functions you must implement are the -__init__ and process_batch. The other lifecycle functions are optional. - -class or function e.g., ``torch_batch_processor`` -================================================= +``TorchBatchProcessorContext`` +============================== -During __init__ of TorchBatchProcessor, we pass in a TorchBatchProcessorContext object, which -contains useful methods that can be used within the TorchBatchProcessor class. +:class:`~determined.pytorch.experimental.TorchBatchProcessorContext` + should be a subclass of :class:`~determined.pytorch.experimental.TorchBatchProcessor`. + The two functions you must implement are the +``__init__`` and ``process_batch``. The other lifecycle functions are optional. -class or function e.g., ``torch_batch_processor`` -================================================= +``TorchBatchProcessor`` +======================= -Add content here and a ref link to the class or function in the reference page. +During ``__init__`` of :class:`~determined.pytorch.experimental.TorchBatchProcessor`, +we pass in a :class:`~determined.pytorch.experimental.TorchBatchProcessorContext` object, +which contains useful methods that can be used within the ``TorchBatchProcessor`` class. ****************************************** How To Perform Batch (Offline) Inference ****************************************** -In this section, we'll learn how to perform batch inference using the ``torch_batch_process`` API. -For more information about this use case or to obtain the tutorial files, visit this URL. +In this section, we'll learn how to perform batch inference using the Torch Batch Processing API. +For more information about this use case or to obtain the tutorial files, visit this [placeholder +URL]. Step 1: Define a InferenceProcessor =================================== -The first step is to Define a InferenceProcessor. You should initialize your model in the __init__ -function of InferenceProcessor. +The first step is to define an InferenceProcessor. You should initialize your model in the ``__init__`` +function of the InferenceProcessor. .. code:: python diff --git a/docs/reference/batch-processing/api-torch-batch-process-reference.rst b/docs/reference/batch-processing/api-torch-batch-process-reference.rst index 2b0564dd65a..3dfe3ed34d1 100644 --- a/docs/reference/batch-processing/api-torch-batch-process-reference.rst +++ b/docs/reference/batch-processing/api-torch-batch-process-reference.rst @@ -2,9 +2,9 @@ .. _torch_batch_process_api_ref: -################################################### - ``name of det torch batch process`` API Reference -################################################### +####################################### + ``Torch Batch Process`` API Reference +####################################### .. meta:: :description: Familiarize yourself with the Torch Batch Process API. @@ -28,20 +28,18 @@ The main arguments to torch_batch_process is processor class and dataset. dataset=dataset ) -************************************************** +*************************************************** ``determined.pytorch.TorchBatchProcessorContext`` -************************************************** +*************************************************** .. autoclass:: determined.pytorch.experimental.TorchBatchProcessorContext :members: :member-order: bysource - -******************************************* +******************************************** ``determined.pytorch.TorchBatchProcessor`` -******************************************* +******************************************** .. autoclass:: determined.pytorch.experimental.TorchBatchProcessor :members: :member-order: bysource -