From b30a9cae4f66e3c7b01ede0e1cc44984bfac7f6a Mon Sep 17 00:00:00 2001 From: Oliver Holworthy <1216955+oliverholworthy@users.noreply.github.com> Date: Thu, 1 Jun 2023 11:19:25 +0100 Subject: [PATCH 01/12] Add OPAL_PREFIX to passenv in test-gpu-postmerge This is required for horovod init to work correctly --- tox.ini | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tox.ini b/tox.ini index 35e20e9e2..0abfdd4aa 100644 --- a/tox.ini +++ b/tox.ini @@ -71,6 +71,8 @@ sitepackages=true ; need to add some back. setenv = TF_GPU_ALLOCATOR=cuda_malloc_async +passenv = + OPAL_PREFIX deps = pytest pytest-cov From 067140377a68fce1d325b1a33ec8eea509c4302a Mon Sep 17 00:00:00 2001 From: Oliver Holworthy <1216955+oliverholworthy@users.noreply.github.com> Date: Thu, 1 Jun 2023 11:30:01 +0100 Subject: [PATCH 02/12] Move merlin-models dependency to test.txt --- requirements/test-cpu.txt | 2 -- requirements/test-gpu.txt | 1 - requirements/test.txt | 2 ++ 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/requirements/test-cpu.txt b/requirements/test-cpu.txt index e682e3549..1a59d917b 100644 --- a/requirements/test-cpu.txt +++ b/requirements/test-cpu.txt @@ -1,8 +1,6 @@ -r test.txt -merlin-models>=0.6.0 faiss-cpu==1.7.2 -tensorflow<=2.9.0 treelite==2.4.0 treelite_runtime==2.4.0 torch~=1.12 diff --git a/requirements/test-gpu.txt b/requirements/test-gpu.txt index ef1b403ac..28cd15508 100644 --- a/requirements/test-gpu.txt +++ b/requirements/test-gpu.txt @@ -1,4 +1,3 @@ -r test.txt -tensorflow faiss-gpu==1.7.2 diff --git a/requirements/test.txt b/requirements/test.txt index ea42bcb12..fc43407f2 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -18,5 +18,7 @@ feast==0.31 xgboost==1.6.2 implicit==0.6.0 +merlin-models[tensorflow,pytorch,transformers]@git+https://github.com/NVIDIA-Merlin/models.git + # TODO: do we need more of these? # https://github.com/NVIDIA-Merlin/Merlin/blob/a1cc48fe23c4dfc627423168436f26ef7e028204/ci/dockerfile.ci#L13-L18 From 9ce1a10617cb07613e5685c411dafc75f5123dc4 Mon Sep 17 00:00:00 2001 From: Oliver Holworthy <1216955+oliverholworthy@users.noreply.github.com> Date: Thu, 1 Jun 2023 11:33:15 +0100 Subject: [PATCH 03/12] Trigger postmerge on pull_request to check they run sucessfully --- .github/workflows/postmerge-cpu.yml | 1 + .github/workflows/postmerge-gpu.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/postmerge-cpu.yml b/.github/workflows/postmerge-cpu.yml index 378eaab88..d3912f0b8 100644 --- a/.github/workflows/postmerge-cpu.yml +++ b/.github/workflows/postmerge-cpu.yml @@ -6,6 +6,7 @@ on: branches: [main] tags: - "v[0-9]+.[0-9]+.[0-9]+" + pull_request: jobs: cpu-ci-postmerge: diff --git a/.github/workflows/postmerge-gpu.yml b/.github/workflows/postmerge-gpu.yml index 1c8c1772e..82b203854 100644 --- a/.github/workflows/postmerge-gpu.yml +++ b/.github/workflows/postmerge-gpu.yml @@ -6,6 +6,7 @@ on: branches: [main] tags: - "v[0-9]+.[0-9]+.[0-9]+" + pull_request: jobs: gpu-ci-postmerge: From 546905649d2378ee21124becf96195da19d70426 Mon Sep 17 00:00:00 2001 From: Oliver Holworthy <1216955+oliverholworthy@users.noreply.github.com> Date: Thu, 1 Jun 2023 11:50:01 +0100 Subject: [PATCH 04/12] Skip test_transformer_model if tritonserver not found --- tests/integration/tf/test_transformer_model.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/integration/tf/test_transformer_model.py b/tests/integration/tf/test_transformer_model.py index 5e4eb662d..e56b41ea7 100644 --- a/tests/integration/tf/test_transformer_model.py +++ b/tests/integration/tf/test_transformer_model.py @@ -14,6 +14,8 @@ # limitations under the License. # +import shutil + import pytest tf = pytest.importorskip("tensorflow") @@ -31,7 +33,10 @@ from merlin.systems.dag.ops.tensorflow import PredictTensorflow # noqa from merlin.systems.triton.utils import run_ensemble_on_tritonserver # noqa +TRITON_SERVER_PATH = shutil.which("tritonserver") + +@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found") def test_serve_tf_session_based_with_libtensorflow(tmpdir): # =========================================== From 3e57380c2039db9cf77e0ddf4234fa558aecdd9b Mon Sep 17 00:00:00 2001 From: Oliver Holworthy <1216955+oliverholworthy@users.noreply.github.com> Date: Thu, 1 Jun 2023 16:05:25 +0100 Subject: [PATCH 05/12] Use match_representations for example input matching input schema --- merlin/systems/triton/conversions.py | 31 +++++++++++++++++-- tests/integration/t4r/test_pytorch_backend.py | 13 ++++++-- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/merlin/systems/triton/conversions.py b/merlin/systems/triton/conversions.py index 4a6b57f87..20663a883 100644 --- a/merlin/systems/triton/conversions.py +++ b/merlin/systems/triton/conversions.py @@ -26,12 +26,14 @@ import itertools from typing import Any, Dict, List +from functools import singledispatch import numpy as np import pandas as pd from merlin.core.compat import cudf from merlin.core.compat import cupy as cp +from merlin.core.compat.torch import torch from merlin.core.dispatch import build_cudf_list_column, is_list_dtype from merlin.dag import Supports from merlin.schema import Schema @@ -104,19 +106,23 @@ def match_representations(schema: Schema, dict_array: Dict[str, Any]) -> Dict[st return aligned -def _to_values_offsets(array): +@singledispatch +def _to_values_offsets(values): """Convert array to values/offsets representation Parameters ---------- - array : numpy.ndarray or cupy.ndarray - Array to convert + values : array or tensor + Array or tensor to convert Returns ------- values, offsets Tuple of values and offsets """ + raise NotImplementedError(f"_to_values_offsets not implemented for {type(values)}") + +def _to_values_offsets_array(array): num_rows = array.shape[0] row_lengths = [array.shape[1]] * num_rows offsets = [0] + list(itertools.accumulate(row_lengths)) @@ -125,6 +131,25 @@ def _to_values_offsets(array): values = array.reshape(-1, *array.shape[2:]) return values, offsets +@_to_values_offsets.register(np.ndarray) +def _(array): + return _to_values_offsets_array(array) + +if cp: + @_to_values_offsets.register(cp.ndarray) + def _(array): + return _to_values_offsets_array(array) + +if torch: + @_to_values_offsets.register(torch.Tensor) + def _(tensor): + num_rows = tensor.shape[0] + row_lengths = [tensor.shape[1]] * num_rows + offsets = [0] + list(itertools.accumulate(row_lengths)) + offsets = torch.tensor(offsets, dtype=torch.int32, device=tensor.device) + values = tensor.reshape(-1, *tensor.shape[2:]) + return values, offsets + def triton_request_to_tensor_table(request, schema): """ diff --git a/tests/integration/t4r/test_pytorch_backend.py b/tests/integration/t4r/test_pytorch_backend.py index fe16f05a7..bc4e933da 100644 --- a/tests/integration/t4r/test_pytorch_backend.py +++ b/tests/integration/t4r/test_pytorch_backend.py @@ -14,6 +14,8 @@ # limitations under the License. # +import shutil + import pytest np = pytest.importorskip("numpy") @@ -30,9 +32,13 @@ from merlin.core.dispatch import make_df # noqa from merlin.systems.dag import Ensemble # noqa from merlin.systems.dag.ops.pytorch import PredictPyTorch # noqa +from merlin.systems.triton.conversions import match_representations from merlin.systems.triton.utils import run_ensemble_on_tritonserver # noqa +TRITON_SERVER_PATH = shutil.which("tritonserver") + +@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found") def test_serve_t4r_with_torchscript(tmpdir): # =========================================== # Generate training data @@ -69,11 +75,12 @@ def test_serve_t4r_with_torchscript(tmpdir): model.eval() - traced_model = torch.jit.trace(model, torch_yoochoose_like, strict=True) + example_inputs = match_representations(model.input_schema, torch_yoochoose_like) + traced_model = torch.jit.trace(model, example_inputs, strict=True) assert isinstance(traced_model, torch.jit.TopLevelTracedModule) assert torch.allclose( - model(torch_yoochoose_like), - traced_model(torch_yoochoose_like), + model(example_inputs), + traced_model(example_inputs), ) # =========================================== From 984693bea1fbdb3c49560da0b4f1fdcc801b4f29 Mon Sep 17 00:00:00 2001 From: Oliver Holworthy <1216955+oliverholworthy@users.noreply.github.com> Date: Thu, 1 Jun 2023 17:09:57 +0100 Subject: [PATCH 06/12] Update formatting of conversions.py --- merlin/systems/triton/conversions.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/merlin/systems/triton/conversions.py b/merlin/systems/triton/conversions.py index 20663a883..88a1f52ab 100644 --- a/merlin/systems/triton/conversions.py +++ b/merlin/systems/triton/conversions.py @@ -25,8 +25,8 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import itertools -from typing import Any, Dict, List from functools import singledispatch +from typing import Any, Dict, List import numpy as np import pandas as pd @@ -122,6 +122,7 @@ def _to_values_offsets(values): """ raise NotImplementedError(f"_to_values_offsets not implemented for {type(values)}") + def _to_values_offsets_array(array): num_rows = array.shape[0] row_lengths = [array.shape[1]] * num_rows @@ -131,16 +132,21 @@ def _to_values_offsets_array(array): values = array.reshape(-1, *array.shape[2:]) return values, offsets + @_to_values_offsets.register(np.ndarray) def _(array): return _to_values_offsets_array(array) + if cp: + @_to_values_offsets.register(cp.ndarray) def _(array): return _to_values_offsets_array(array) + if torch: + @_to_values_offsets.register(torch.Tensor) def _(tensor): num_rows = tensor.shape[0] From 584961486cf4b5c310fc67ed07339869d90cb669 Mon Sep 17 00:00:00 2001 From: Oliver Holworthy <1216955+oliverholworthy@users.noreply.github.com> Date: Thu, 1 Jun 2023 17:13:51 +0100 Subject: [PATCH 07/12] Add noqa to match_representations import --- tests/integration/t4r/test_pytorch_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/t4r/test_pytorch_backend.py b/tests/integration/t4r/test_pytorch_backend.py index bc4e933da..bb23bdf51 100644 --- a/tests/integration/t4r/test_pytorch_backend.py +++ b/tests/integration/t4r/test_pytorch_backend.py @@ -32,7 +32,7 @@ from merlin.core.dispatch import make_df # noqa from merlin.systems.dag import Ensemble # noqa from merlin.systems.dag.ops.pytorch import PredictPyTorch # noqa -from merlin.systems.triton.conversions import match_representations +from merlin.systems.triton.conversions import match_representations # noqa from merlin.systems.triton.utils import run_ensemble_on_tritonserver # noqa TRITON_SERVER_PATH = shutil.which("tritonserver") From 35cc3dfcffdc3ba7157ce331aefb656a4a179f48 Mon Sep 17 00:00:00 2001 From: Oliver Holworthy <1216955+oliverholworthy@users.noreply.github.com> Date: Thu, 1 Jun 2023 20:43:42 +0100 Subject: [PATCH 08/12] Try setting LD_LIBRARY_PATH for pytorch tests --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 0abfdd4aa..6cbe7c932 100644 --- a/tox.ini +++ b/tox.ini @@ -85,7 +85,7 @@ commands = python -m pip install . python -m pytest -rxs -m "postmerge" tests/ - python -m pytest -rxs tests/integration + LD_LIBRARY_PATH="/opt/tritonserver/backends/pytorch:$LD_LIBRARY_PATH" python -m pytest -rxs tests/integration [testenv:test-merlin] ; Runs in: Internal Jenkins From cd71dca5cfd4e95d21a0712a7f534a375fcf2eeb Mon Sep 17 00:00:00 2001 From: Oliver Holworthy <1216955+oliverholworthy@users.noreply.github.com> Date: Thu, 1 Jun 2023 21:52:12 +0100 Subject: [PATCH 09/12] Move LD_LIBRARY_PATH config to setenv --- tox.ini | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 6cbe7c932..0ee41462d 100644 --- a/tox.ini +++ b/tox.ini @@ -71,6 +71,7 @@ sitepackages=true ; need to add some back. setenv = TF_GPU_ALLOCATOR=cuda_malloc_async + LD_LIBRARY_PATH=/opt/tritonserver/backends/pytorch passenv = OPAL_PREFIX deps = @@ -85,7 +86,7 @@ commands = python -m pip install . python -m pytest -rxs -m "postmerge" tests/ - LD_LIBRARY_PATH="/opt/tritonserver/backends/pytorch:$LD_LIBRARY_PATH" python -m pytest -rxs tests/integration + python -m pytest -rxs tests/integration [testenv:test-merlin] ; Runs in: Internal Jenkins From 2407071a87a7f1b535058b15c8207038114cf1ec Mon Sep 17 00:00:00 2001 From: Oliver Holworthy <1216955+oliverholworthy@users.noreply.github.com> Date: Fri, 2 Jun 2023 11:49:46 +0100 Subject: [PATCH 10/12] Add OPAL_PREFIX passenv to test-gpu tox environment --- tox.ini | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tox.ini b/tox.ini index 0ee41462d..657eac65d 100644 --- a/tox.ini +++ b/tox.ini @@ -47,6 +47,8 @@ sitepackages=true ; need to add some back. setenv = TF_GPU_ALLOCATOR=cuda_malloc_async +passenv = + OPAL_PREFIX deps = -rrequirements/test-gpu.txt pytest From 7b21b8d1043a6cc5af596e897842d20fa8afc9fe Mon Sep 17 00:00:00 2001 From: Oliver Holworthy <1216955+oliverholworthy@users.noreply.github.com> Date: Mon, 5 Jun 2023 20:36:12 +0100 Subject: [PATCH 11/12] Restore tensorflow constraint on test-cpu.txt --- requirements/test-cpu.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements/test-cpu.txt b/requirements/test-cpu.txt index 1a59d917b..871c5c051 100644 --- a/requirements/test-cpu.txt +++ b/requirements/test-cpu.txt @@ -1,6 +1,7 @@ -r test.txt faiss-cpu==1.7.2 +tensorflow<=2.9.0 treelite==2.4.0 treelite_runtime==2.4.0 torch~=1.12 From a2ab674794faf1590ef7c659342b95d2eb7c7688 Mon Sep 17 00:00:00 2001 From: Oliver Holworthy <1216955+oliverholworthy@users.noreply.github.com> Date: Mon, 5 Jun 2023 20:43:44 +0100 Subject: [PATCH 12/12] Remove pull_request trigger from postmerge workflows --- .github/workflows/postmerge-cpu.yml | 1 - .github/workflows/postmerge-gpu.yml | 1 - 2 files changed, 2 deletions(-) diff --git a/.github/workflows/postmerge-cpu.yml b/.github/workflows/postmerge-cpu.yml index d3912f0b8..378eaab88 100644 --- a/.github/workflows/postmerge-cpu.yml +++ b/.github/workflows/postmerge-cpu.yml @@ -6,7 +6,6 @@ on: branches: [main] tags: - "v[0-9]+.[0-9]+.[0-9]+" - pull_request: jobs: cpu-ci-postmerge: diff --git a/.github/workflows/postmerge-gpu.yml b/.github/workflows/postmerge-gpu.yml index 82b203854..1c8c1772e 100644 --- a/.github/workflows/postmerge-gpu.yml +++ b/.github/workflows/postmerge-gpu.yml @@ -6,7 +6,6 @@ on: branches: [main] tags: - "v[0-9]+.[0-9]+.[0-9]+" - pull_request: jobs: gpu-ci-postmerge: