From 326d359c7996e7ac1852e5364a588e9ba84b9e3f Mon Sep 17 00:00:00 2001 From: Zhi Lin Date: Wed, 21 Jun 2023 15:37:57 +0000 Subject: [PATCH 1/7] fix bug Signed-off-by: Zhi Lin --- python/raydp/spark/dataset.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/python/raydp/spark/dataset.py b/python/raydp/spark/dataset.py index 3b09b114..b5a4f0dd 100644 --- a/python/raydp/spark/dataset.py +++ b/python/raydp/spark/dataset.py @@ -259,14 +259,21 @@ def get_locations(blocks): ] def ray_dataset_to_spark_dataframe(spark: sql.SparkSession, - arrow_schema, + dataset_schema, blocks: List[ObjectRef], locations = None) -> DataFrame: locations = get_locations(blocks) - if not isinstance(arrow_schema, pa.lib.Schema): - if hasattr(arrow_schema, "base_schema") and \ - not isinstance(arrow_schema.base_schema, pa.lib.Schema): - raise RuntimeError(f"Schema is {type(arrow_schema)}, required pyarrow.lib.Schema. \n" \ + arrow_schema = dataset_schema + if not isinstance(dataset_schema, pa.lib.Schema): + if hasattr(dataset_schema, "base_schema"): + if isinstance(dataset_schema.base_schema, pa.lib.Schema): + arrow_schema = dataset_schema.base_schema + else: + raise RuntimeError(f"Schema is {type(dataset_schema.base_schema)}, " \ + f"required pyarrow.lib.Schema. \n" \ + f"to_spark does not support converting non-arrow ray datasets.") + else: + raise RuntimeError(f"Schema is {type(dataset_schema)}, required pyarrow.lib.Schema. \n" \ f"to_spark does not support converting non-arrow ray datasets.") schema = StructType() for field in arrow_schema: From 69e4781f8af395dbdac4f727f9072e17400f8d2a Mon Sep 17 00:00:00 2001 From: Zhi Lin Date: Wed, 21 Jun 2023 15:39:46 +0000 Subject: [PATCH 2/7] use ray 2.5.0 in CI Signed-off-by: Zhi Lin --- .github/workflows/raydp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/raydp.yml b/.github/workflows/raydp.yml index c6f6bdd1..2b64c57a 100644 --- a/.github/workflows/raydp.yml +++ b/.github/workflows/raydp.yml @@ -78,7 +78,7 @@ jobs: else pip install torch fi - pip install pyarrow==6.0.1 ray[default]==2.4.0 pytest koalas tensorflow tabulate grpcio-tools wget + pip install pyarrow==6.0.1 ray[default]==2.5.0 pytest koalas tensorflow tabulate grpcio-tools wget pip install "xgboost_ray[default]<=0.1.13" pip install torchmetrics HOROVOD_WITH_GLOO=1 From d24cc9bd431a38b048fbedda9b0d2dedefeb3f4b Mon Sep 17 00:00:00 2001 From: Zhi Lin Date: Wed, 21 Jun 2023 15:55:32 +0000 Subject: [PATCH 3/7] fix lint Signed-off-by: Zhi Lin --- python/raydp/spark/dataset.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/raydp/spark/dataset.py b/python/raydp/spark/dataset.py index b5a4f0dd..ee43c285 100644 --- a/python/raydp/spark/dataset.py +++ b/python/raydp/spark/dataset.py @@ -273,7 +273,8 @@ def ray_dataset_to_spark_dataframe(spark: sql.SparkSession, f"required pyarrow.lib.Schema. \n" \ f"to_spark does not support converting non-arrow ray datasets.") else: - raise RuntimeError(f"Schema is {type(dataset_schema)}, required pyarrow.lib.Schema. \n" \ + raise RuntimeError(f"Schema is {type(dataset_schema)}, " \ + f"required pyarrow.lib.Schema. \n" \ f"to_spark does not support converting non-arrow ray datasets.") schema = StructType() for field in arrow_schema: From 491963f9c705a6436e3832ca76f0e8cf8f041610 Mon Sep 17 00:00:00 2001 From: Zhi Lin Date: Wed, 21 Jun 2023 16:10:52 +0000 Subject: [PATCH 4/7] fix lint Signed-off-by: Zhi Lin --- python/raydp/spark/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/raydp/spark/dataset.py b/python/raydp/spark/dataset.py index ee43c285..1e976da4 100644 --- a/python/raydp/spark/dataset.py +++ b/python/raydp/spark/dataset.py @@ -273,7 +273,7 @@ def ray_dataset_to_spark_dataframe(spark: sql.SparkSession, f"required pyarrow.lib.Schema. \n" \ f"to_spark does not support converting non-arrow ray datasets.") else: - raise RuntimeError(f"Schema is {type(dataset_schema)}, " \ + raise RuntimeError(f"Schema is {type(dataset_schema)}, " \ f"required pyarrow.lib.Schema. \n" \ f"to_spark does not support converting non-arrow ray datasets.") schema = StructType() From a3d3ba98c8953f0c76b33fd75a487d8c686c4a08 Mon Sep 17 00:00:00 2001 From: Zhi Lin Date: Sun, 25 Jun 2023 14:35:11 +0000 Subject: [PATCH 5/7] skip horovod test Signed-off-by: Zhi Lin --- .github/workflows/raydp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/raydp.yml b/.github/workflows/raydp.yml index 2b64c57a..af89b182 100644 --- a/.github/workflows/raydp.yml +++ b/.github/workflows/raydp.yml @@ -114,7 +114,7 @@ jobs: ray stop python examples/pytorch_nyctaxi.py python examples/tensorflow_nyctaxi.py - python examples/horovod_nyctaxi.py + # python examples/horovod_nyctaxi.py python examples/xgboost_ray_nyctaxi.py # python examples/raytrain_nyctaxi.py python examples/data_process.py From fbe6497b726bd92b0eacf97fe03c1135dcfd9a2d Mon Sep 17 00:00:00 2001 From: Zhi Lin Date: Mon, 26 Jun 2023 15:56:49 +0000 Subject: [PATCH 6/7] test ray 2.4.0 Signed-off-by: Zhi Lin --- .github/workflows/raydp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/raydp.yml b/.github/workflows/raydp.yml index af89b182..4a71c5d4 100644 --- a/.github/workflows/raydp.yml +++ b/.github/workflows/raydp.yml @@ -78,7 +78,7 @@ jobs: else pip install torch fi - pip install pyarrow==6.0.1 ray[default]==2.5.0 pytest koalas tensorflow tabulate grpcio-tools wget + pip install pyarrow==6.0.1 ray[default]==2.4.0 pytest koalas tensorflow tabulate grpcio-tools wget pip install "xgboost_ray[default]<=0.1.13" pip install torchmetrics HOROVOD_WITH_GLOO=1 From 703789a093faf802cc4dcf7c281d0e6d2f1c463d Mon Sep 17 00:00:00 2001 From: Zhi Lin Date: Tue, 27 Jun 2023 08:59:16 +0000 Subject: [PATCH 7/7] revert horovod test Signed-off-by: Zhi Lin --- .github/workflows/raydp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/raydp.yml b/.github/workflows/raydp.yml index 4a71c5d4..c6f6bdd1 100644 --- a/.github/workflows/raydp.yml +++ b/.github/workflows/raydp.yml @@ -114,7 +114,7 @@ jobs: ray stop python examples/pytorch_nyctaxi.py python examples/tensorflow_nyctaxi.py - # python examples/horovod_nyctaxi.py + python examples/horovod_nyctaxi.py python examples/xgboost_ray_nyctaxi.py # python examples/raytrain_nyctaxi.py python examples/data_process.py