From 65e1ff5bab6c7d65a3229aa97bcac8b2033e6a95 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Thu, 26 Feb 2026 15:15:33 +0800 Subject: [PATCH 01/33] from_protobuf Signed-off-by: Haoyang Li --- integration_tests/pom.xml | 2 +- integration_tests/run_pyspark_from_build.sh | 69 +- integration_tests/src/main/python/data_gen.py | 402 ++- .../src/main/python/protobuf_test.py | 2200 +++++++++++++++++ .../src/main/python/spark_init_internal.py | 29 +- .../protobuf_test/gen_nested_proto_data.sh | 34 + .../nested_proto/device_req.proto | 9 + .../nested_proto/generated/main_log.desc | Bin 0 -> 5838 bytes .../protobuf_test/nested_proto/main_log.proto | 101 + .../nested_proto/module_a_res.proto | 90 + .../nested_proto/module_b_res.proto | 27 + .../nested_proto/predictor_schema.proto | 80 + pom.xml | 2 +- .../nvidia/spark/rapids/GpuOverrides.scala | 6 +- .../com/nvidia/spark/rapids/TypeChecks.scala | 6 - .../protobuf/ProtobufDescriptorUtils.scala | 87 + .../spark/sql/rapids/GpuFromProtobuf.scala | 384 +++ .../sql/rapids/complexTypeExtractors.scala | 22 +- .../rapids/shims/ProtobufExprShims.scala | 1013 ++++++++ .../rapids/shims/Spark340PlusNonDBShims.scala | 4 +- 20 files changed, 4546 insertions(+), 21 deletions(-) create mode 100644 integration_tests/src/main/python/protobuf_test.py create mode 100755 integration_tests/src/test/resources/protobuf_test/gen_nested_proto_data.sh create mode 100644 integration_tests/src/test/resources/protobuf_test/nested_proto/device_req.proto create mode 100644 integration_tests/src/test/resources/protobuf_test/nested_proto/generated/main_log.desc create mode 100644 integration_tests/src/test/resources/protobuf_test/nested_proto/main_log.proto create mode 100644 integration_tests/src/test/resources/protobuf_test/nested_proto/module_a_res.proto create mode 100644 integration_tests/src/test/resources/protobuf_test/nested_proto/module_b_res.proto create mode 100644 integration_tests/src/test/resources/protobuf_test/nested_proto/predictor_schema.proto create mode 100644 sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala create mode 100644 sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobuf.scala create mode 100644 sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml index e3d91be0ce3..d8ee2301e5b 100644 --- a/integration_tests/pom.xml +++ b/integration_tests/pom.xml @@ -1,6 +1,6 @@