From 2ab55570ec443419c2c540ebe157837f02961fb3 Mon Sep 17 00:00:00 2001
From: Haoyang Li <haoyangl@nvidia.com>
Date: Fri, 19 Dec 2025 17:24:22 +0800
Subject: [PATCH 1/9] AI draft for protocol buffer support

Signed-off-by: Haoyang Li <haoyangl@nvidia.com>
---
 integration_tests/pom.xml                     |  20 ++
 integration_tests/run_pyspark_from_build.sh   |  23 +-
 integration_tests/src/main/python/data_gen.py | 110 ++++++++
 .../src/main/python/protobuf_test.py          | 229 +++++++++++++++++
 pom.xml                                       |  11 +
 .../protobuf/ProtobufDescriptorUtils.scala    |  82 ++++++
 .../sql/rapids/GpuFromProtobufSimple.scala    |  79 ++++++
 .../rapids/shims/ProtobufExprShims.scala      | 235 ++++++++++++++++++
 .../rapids/shims/Spark340PlusNonDBShims.scala |   2 +-
 9 files changed, 788 insertions(+), 3 deletions(-)
 create mode 100644 integration_tests/src/main/python/protobuf_test.py
 create mode 100644 sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala
 create mode 100644 sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala
 create mode 100644 sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml
index e3d91be0ce3..825083b7fbe 100644
--- a/integration_tests/pom.xml
+++ b/integration_tests/pom.xml
@@ -142,6 +142,7 @@
                                     <includes>
                                         <include>parquet-hadoop*.jar</include>
                                         <include>spark-avro*.jar</include>
+                                        <include>spark-protobuf*.jar</include>
                                     </includes>
                                 </filesets>
                             </filesets>
@@ -176,6 +177,24 @@
                             </artifactItems>
                         </configuration>
                     </execution>
+                    <execution>
+                        <id>copy-spark-protobuf</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>copy</goal>
+                        </goals>
+                        <configuration>
+                            <skip>${spark.protobuf.copy.skip}</skip>
+                            <useBaseVersion>true</useBaseVersion>
+                            <artifactItems>
+                                <artifactItem>
+                                    <groupId>org.apache.spark</groupId>
+                                    <artifactId>spark-protobuf_${scala.binary.version}</artifactId>
+                                    <version>${spark.version}</version>
+                                </artifactItem>
+                            </artifactItems>
+                        </configuration>
+                    </execution>
                 </executions>
             </plugin>
             <plugin>
@@ -216,4 +235,5 @@
             </resource>
         </resources>
     </build>
+
 </project>
diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index 6550a3cc59f..baf04d44282 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -29,6 +29,7 @@
 #   - SPARK_HOME: Path to your Apache Spark installation.
 #   - SKIP_TESTS: If set to true, skips running the Python integration tests.
 #   - INCLUDE_SPARK_AVRO_JAR: If set to true, includes Avro tests.
+#   - INCLUDE_SPARK_PROTOBUF_JAR: If set to true, includes spark-protobuf (Spark 3.4.0+) on the JVM classpath.
 #   - TEST: Specifies a specific test to run.
 #   - TEST_TAGS: Allows filtering tests based on tags.
 #   - TEST_TYPE: Specifies the type of tests to run.
@@ -100,6 +101,7 @@ else
     # support alternate local jars NOT building from the source code
     if [ -d "$LOCAL_JAR_PATH" ]; then
         AVRO_JARS=$(echo "$LOCAL_JAR_PATH"/spark-avro*.jar)
+        PROTOBUF_JARS=$(echo "$LOCAL_JAR_PATH"/spark-protobuf*.jar)
         PLUGIN_JAR=$(echo "$LOCAL_JAR_PATH"/rapids-4-spark_*.jar)
         if [ -f $(echo $LOCAL_JAR_PATH/parquet-hadoop*.jar) ]; then
             export INCLUDE_PARQUET_HADOOP_TEST_JAR=true
@@ -116,6 +118,7 @@ else
     else
         [[ "$SCALA_VERSION" != "2.12"  ]] && TARGET_DIR=${TARGET_DIR/integration_tests/scala$SCALA_VERSION\/integration_tests}
         AVRO_JARS=$(echo "$TARGET_DIR"/dependency/spark-avro*.jar)
+        PROTOBUF_JARS=$(echo "$TARGET_DIR"/dependency/spark-protobuf*.jar)
         PARQUET_HADOOP_TESTS=$(echo "$TARGET_DIR"/dependency/parquet-hadoop*.jar)
         # remove the log4j.properties file so it doesn't conflict with ours, ignore errors
         # if it isn't present or already removed
@@ -141,9 +144,25 @@ else
         AVRO_JARS=""
     fi
 
-    # ALL_JARS includes dist.jar integration-test.jar avro.jar parquet.jar if they exist
+    # spark-protobuf is an optional Spark module that exists in Spark 3.4.0+. If we have the jar staged
+    # under target/dependency, include it so from_protobuf() is callable from PySpark.
+    if [[ $( echo ${INCLUDE_SPARK_PROTOBUF_JAR:-true} | tr '[:upper:]' '[:lower:]' ) == "true" ]];
+    then
+        # VERSION_STRING >= 3.4.0 ?
+        if printf '%s\n' "3.4.0" "$VERSION_STRING" | sort -V | head -1 | grep -qx "3.4.0"; then
+            export INCLUDE_SPARK_PROTOBUF_JAR=true
+        else
+            export INCLUDE_SPARK_PROTOBUF_JAR=false
+            PROTOBUF_JARS=""
+        fi
+    else
+        export INCLUDE_SPARK_PROTOBUF_JAR=false
+        PROTOBUF_JARS=""
+    fi
+
+    # ALL_JARS includes dist.jar integration-test.jar avro.jar protobuf.jar parquet.jar if they exist
     # Remove non-existing paths and canonicalize the paths including get rid of links and `..`
-    ALL_JARS=$(readlink -e $PLUGIN_JAR $TEST_JARS $AVRO_JARS $PARQUET_HADOOP_TESTS || true)
+    ALL_JARS=$(readlink -e $PLUGIN_JAR $TEST_JARS $AVRO_JARS $PROTOBUF_JARS $PARQUET_HADOOP_TESTS || true)
     # `:` separated jars
     ALL_JARS="${ALL_JARS//$'\n'/:}"
 
diff --git a/integration_tests/src/main/python/data_gen.py b/integration_tests/src/main/python/data_gen.py
index fa7decac82d..837d4990832 100644
--- a/integration_tests/src/main/python/data_gen.py
+++ b/integration_tests/src/main/python/data_gen.py
@@ -857,6 +857,116 @@ def gen_bytes():
             return bytes([ rand.randint(0, 255) for _ in range(length) ])
         self._start(rand, gen_bytes)
 
+
+# -----------------------------------------------------------------------------
+# Protobuf (simple types) generators/utilities (for from_protobuf/to_protobuf tests)
+# -----------------------------------------------------------------------------
+
+_PROTOBUF_WIRE_VARINT = 0
+_PROTOBUF_WIRE_64BIT = 1
+_PROTOBUF_WIRE_LEN_DELIM = 2
+_PROTOBUF_WIRE_32BIT = 5
+
+def _encode_protobuf_uvarint(value):
+    """Encode a non-negative integer as protobuf varint."""
+    if value is None:
+        raise ValueError("value must not be None")
+    if value < 0:
+        raise ValueError("uvarint only supports non-negative integers")
+    out = bytearray()
+    v = int(value)
+    while True:
+        b = v & 0x7F
+        v >>= 7
+        if v:
+            out.append(b | 0x80)
+        else:
+            out.append(b)
+            break
+    return bytes(out)
+
+def _encode_protobuf_key(field_number, wire_type):
+    return _encode_protobuf_uvarint((int(field_number) << 3) | int(wire_type))
+
+def _encode_protobuf_field(field_number, spark_type, value):
+    """
+    Encode a single protobuf field for a subset of scalar types.
+    Notes on signed ints:
+    - Protobuf `int32`/`int64` use *varint* encoding of the two's-complement integer.
+    - Negative `int32` values are encoded as a 10-byte varint (because they are sign-extended to 64 bits).
+    """
+    if value is None:
+        return b""
+
+    if isinstance(spark_type, BooleanType):
+        return _encode_protobuf_key(field_number, _PROTOBUF_WIRE_VARINT) + _encode_protobuf_uvarint(1 if value else 0)
+    elif isinstance(spark_type, IntegerType):
+        # Match protobuf-java behavior for writeInt32NoTag: negative values are sign-extended and written as uint64.
+        u64 = int(value) & 0xFFFFFFFFFFFFFFFF
+        return _encode_protobuf_key(field_number, _PROTOBUF_WIRE_VARINT) + _encode_protobuf_uvarint(u64)
+    elif isinstance(spark_type, LongType):
+        u64 = int(value) & 0xFFFFFFFFFFFFFFFF
+        return _encode_protobuf_key(field_number, _PROTOBUF_WIRE_VARINT) + _encode_protobuf_uvarint(u64)
+    elif isinstance(spark_type, FloatType):
+        return _encode_protobuf_key(field_number, _PROTOBUF_WIRE_32BIT) + struct.pack("<f", float(value))
+    elif isinstance(spark_type, DoubleType):
+        return _encode_protobuf_key(field_number, _PROTOBUF_WIRE_64BIT) + struct.pack("<d", float(value))
+    elif isinstance(spark_type, StringType):
+        b = value.encode("utf-8")
+        return (_encode_protobuf_key(field_number, _PROTOBUF_WIRE_LEN_DELIM) +
+                _encode_protobuf_uvarint(len(b)) + b)
+    else:
+        raise ValueError("Unsupported type for protobuf simple generator: {}".format(spark_type))
+
+
+class ProtobufSimpleMessageRowGen(DataGen):
+    """
+    Generates rows that include:
+      - one column per message field (Spark scalar types)
+      - a binary column containing a serialized protobuf message containing those fields
+
+    This is intentionally limited to the simple scalar types supported in Patch 1:
+    boolean/int32/int64/float/double/string.
+
+    Fields are omitted from the encoded message if the corresponding value is None.
+    """
+    def __init__(self, fields, binary_col_name="bin", nullable=False):
+        """
+        fields: list of (field_name, field_number, DataGen)
+        """
+        self._fields = fields
+        self._binary_col_name = binary_col_name
+
+        struct_fields = []
+        for (name, _num, gen) in fields:
+            struct_fields.append(StructField(name, gen.data_type, nullable=gen.nullable))
+        struct_fields.append(StructField(binary_col_name, BinaryType(), nullable=True))
+        super().__init__(StructType(struct_fields), nullable=nullable)
+
+    def __repr__(self):
+        return "ProtobufSimpleMessageRowGen({})".format(
+            ",".join(["{}#{}".format(n, num) for (n, num, _g) in self._fields]))
+
+    def _cache_repr(self):
+        kids = ",".join(["{}:{}#{}".format(n, str(g.data_type), num) for (n, num, g) in self._fields])
+        return super()._cache_repr() + "(" + kids + "," + self._binary_col_name + ")"
+
+    def start(self, rand):
+        for (_name, _num, gen) in self._fields:
+            gen.start(rand)
+
+        def make_row():
+            values = []
+            encoded_parts = []
+            for (name, num, gen) in self._fields:
+                v = gen.gen()
+                values.append(v)
+                encoded_parts.append(_encode_protobuf_field(num, gen.data_type, v))
+            msg = b"".join(encoded_parts)
+            return tuple(values + [msg])
+
+        self._start(rand, make_row)
+
 # Note: Current(2023/06/06) maxmium IT data size is 7282688 bytes, so LRU cache with maxsize 128
 # will lead to 7282688 * 128 = 932 MB additional memory usage in edge case, which is acceptable.
 @lru_cache(maxsize=128, typed=True)
diff --git a/integration_tests/src/main/python/protobuf_test.py b/integration_tests/src/main/python/protobuf_test.py
new file mode 100644
index 00000000000..f85f1384b1f
--- /dev/null
+++ b/integration_tests/src/main/python/protobuf_test.py
@@ -0,0 +1,229 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import inspect
+
+import pytest
+
+from asserts import assert_gpu_and_cpu_are_equal_collect
+from data_gen import (
+    BooleanGen, IntegerGen, LongGen, FloatGen, DoubleGen, StringGen,
+    ProtobufSimpleMessageRowGen, gen_df
+)
+from marks import ignore_order
+from spark_session import with_cpu_session, is_before_spark_340
+import pyspark.sql.functions as f
+
+pytestmark = [pytest.mark.premerge_ci_1]
+
+
+def _try_import_from_protobuf():
+    try:
+        from pyspark.sql.protobuf.functions import from_protobuf
+        return from_protobuf
+    except Exception:
+        return None
+
+
+def _spark_protobuf_jvm_available(spark) -> bool:
+    """
+    `spark-protobuf` is an optional external module. PySpark may have the Python wrappers
+    even when the JVM side isn't present on the classpath, which manifests as:
+      TypeError: 'JavaPackage' object is not callable
+    when calling into `sc._jvm.org.apache.spark.sql.protobuf.functions.from_protobuf`.
+    """
+    jvm = spark.sparkContext._jvm
+    candidates = [
+        # Scala object `functions` compiles to `functions$`
+        "org.apache.spark.sql.protobuf.functions$",
+        # Some environments may expose it differently
+        "org.apache.spark.sql.protobuf.functions",
+    ]
+    for cls in candidates:
+        try:
+            jvm.java.lang.Class.forName(cls)
+            return True
+        except Exception:
+            continue
+    return False
+
+
+def _build_simple_descriptor_set_bytes(spark):
+    """
+    Build a FileDescriptorSet for:
+      package test;
+      syntax = "proto2";
+      message Simple {
+        optional bool   b   = 1;
+        optional int32  i32 = 2;
+        optional int64  i64 = 3;
+        optional float  f32 = 4;
+        optional double f64 = 5;
+        optional string s   = 6;
+      }
+    """
+    jvm = spark.sparkContext._jvm
+    D = jvm.com.google.protobuf.DescriptorProtos
+
+    fd = D.FileDescriptorProto.newBuilder() \
+        .setName("simple.proto") \
+        .setPackage("test")
+    # Some Spark distributions bring an older protobuf-java where FileDescriptorProto.Builder
+    # does not expose setSyntax(String). For this test we only need proto2 semantics, and
+    # leaving syntax unset is sufficient/compatible.
+    try:
+        fd = fd.setSyntax("proto2")
+    except Exception:
+        pass
+
+    msg = D.DescriptorProto.newBuilder().setName("Simple")
+    label_opt = D.FieldDescriptorProto.Label.LABEL_OPTIONAL
+
+    def add_field(name, number, ftype):
+        msg.addField(
+            D.FieldDescriptorProto.newBuilder()
+              .setName(name)
+              .setNumber(number)
+              .setLabel(label_opt)
+              .setType(ftype)
+              .build()
+        )
+
+    add_field("b", 1, D.FieldDescriptorProto.Type.TYPE_BOOL)
+    add_field("i32", 2, D.FieldDescriptorProto.Type.TYPE_INT32)
+    add_field("i64", 3, D.FieldDescriptorProto.Type.TYPE_INT64)
+    add_field("f32", 4, D.FieldDescriptorProto.Type.TYPE_FLOAT)
+    add_field("f64", 5, D.FieldDescriptorProto.Type.TYPE_DOUBLE)
+    add_field("s", 6, D.FieldDescriptorProto.Type.TYPE_STRING)
+
+    fd.addMessageType(msg.build())
+
+    fds = D.FileDescriptorSet.newBuilder().addFile(fd.build()).build()
+    # py4j converts Java byte[] to a Python bytes-like object
+    return bytes(fds.toByteArray())
+
+
+def _write_bytes_to_hadoop_path(spark, path_str, data_bytes):
+    sc = spark.sparkContext
+    config = sc._jsc.hadoopConfiguration()
+    jpath = sc._jvm.org.apache.hadoop.fs.Path(path_str)
+    fs = sc._jvm.org.apache.hadoop.fs.FileSystem.get(config)
+    out = fs.create(jpath, True)
+    try:
+        out.write(bytearray(data_bytes))
+    finally:
+        out.close()
+
+
+@pytest.mark.skipif(is_before_spark_340(), reason="from_protobuf is Spark 3.4.0+")
+@ignore_order(local=True)
+def test_from_protobuf_simple_parquet_binary_round_trip(spark_tmp_path):
+    from_protobuf = _try_import_from_protobuf()
+    # if from_protobuf is None:
+    #     pytest.skip("pyspark.sql.protobuf.functions.from_protobuf is not available")
+    # if not with_cpu_session(lambda spark: _spark_protobuf_jvm_available(spark)):
+    #     pytest.skip("spark-protobuf JVM module is not available on the classpath")
+
+    data_path = spark_tmp_path + "/PROTOBUF_SIMPLE_PARQUET/"
+    desc_path = spark_tmp_path + "/simple.desc"
+    message_name = "test.Simple"
+
+    # Generate descriptor bytes once using the JVM (no protoc dependency)
+    desc_bytes = with_cpu_session(lambda spark: _build_simple_descriptor_set_bytes(spark))
+    with_cpu_session(lambda spark: _write_bytes_to_hadoop_path(spark, desc_path, desc_bytes))
+
+    # Build a DF with scalar columns + binary protobuf column and write to parquet
+    row_gen = ProtobufSimpleMessageRowGen([
+        ("b", 1, BooleanGen(nullable=True)),
+        ("i32", 2, IntegerGen(nullable=True, min_val=0, max_val=1 << 20)),
+        ("i64", 3, LongGen(nullable=True, min_val=0, max_val=1 << 40, special_cases=[])),
+        ("f32", 4, FloatGen(nullable=True, no_nans=True)),
+        ("f64", 5, DoubleGen(nullable=True, no_nans=True)),
+        ("s", 6, StringGen(nullable=True)),
+    ], binary_col_name="bin")
+
+    def write_parquet(spark):
+        df = gen_df(spark, row_gen, length=512)
+        df.write.mode("overwrite").parquet(data_path)
+
+    with_cpu_session(write_parquet)
+
+    # Sanity check correctness on CPU (decoded struct matches the original scalar columns)
+    def cpu_correctness_check(spark):
+        df = spark.read.parquet(data_path)
+        expected = f.struct(
+            f.col("b").alias("b"),
+            f.col("i32").alias("i32"),
+            f.col("i64").alias("i64"),
+            f.col("f32").alias("f32"),
+            f.col("f64").alias("f64"),
+            f.col("s").alias("s"),
+        ).alias("expected")
+
+        sig = inspect.signature(from_protobuf)
+        if "binaryDescriptorSet" in sig.parameters:
+            decoded = from_protobuf(f.col("bin"), message_name, binaryDescriptorSet=bytearray(desc_bytes)).alias("decoded")
+        else:
+            decoded = from_protobuf(f.col("bin"), message_name, desc_path).alias("decoded")
+
+        rows = df.select(expected, decoded).collect()
+        for r in rows:
+            assert r["expected"] == r["decoded"]
+
+    with_cpu_session(cpu_correctness_check)
+
+    # Main assertion: CPU and GPU results match for from_protobuf on a binary column read from parquet
+    def run_on_spark(spark):
+        df = spark.read.parquet(data_path)
+        sig = inspect.signature(from_protobuf)
+        if "binaryDescriptorSet" in sig.parameters:
+            decoded = from_protobuf(f.col("bin"), message_name, binaryDescriptorSet=bytearray(desc_bytes))
+        else:
+            decoded = from_protobuf(f.col("bin"), message_name, desc_path)
+        return df.select(decoded.alias("decoded"))
+
+    assert_gpu_and_cpu_are_equal_collect(run_on_spark)
+
+
+@pytest.mark.skipif(is_before_spark_340(), reason="from_protobuf is Spark 3.4.0+")
+@ignore_order(local=True)
+def test_from_protobuf_simple_null_input_returns_null(spark_tmp_path):
+    from_protobuf = _try_import_from_protobuf()
+    desc_path = spark_tmp_path + "/simple_null_input.desc"
+    message_name = "test.Simple"
+
+    # Generate descriptor bytes once using the JVM (no protoc dependency)
+    desc_bytes = with_cpu_session(lambda spark: _build_simple_descriptor_set_bytes(spark))
+    with_cpu_session(lambda spark: _write_bytes_to_hadoop_path(spark, desc_path, desc_bytes))
+
+    # Spark's ProtobufDataToCatalyst is NullIntolerant (null input -> null output).
+    def run_on_spark(spark):
+        df = spark.createDataFrame(
+            [(None,), (bytes([0x08, 0x01, 0x10, 0x7B]),)],  # b=true, i32=123
+            schema="bin binary",
+        )
+        sig = inspect.signature(from_protobuf)
+        if "binaryDescriptorSet" in sig.parameters:
+            decoded = from_protobuf(
+                f.col("bin"),
+                message_name,
+                binaryDescriptorSet=bytearray(desc_bytes),
+            )
+        else:
+            decoded = from_protobuf(f.col("bin"), message_name, desc_path)
+        return df.select(decoded.alias("decoded"))
+
+    assert_gpu_and_cpu_are_equal_collect(run_on_spark)
+
+
diff --git a/pom.xml b/pom.xml
index 6eeff9d35be..8679b7ddf7e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -318,6 +318,7 @@
                 <parquet.hadoop.version>1.12.3</parquet.hadoop.version>
                 <rapids.delta.artifactId1>rapids-4-spark-delta-24x</rapids.delta.artifactId1>
                 <slf4j.version>2.0.6</slf4j.version>
+                <spark.protobuf.copy.skip>false</spark.protobuf.copy.skip>
             </properties>
             <modules>
                 <module>delta-lake/delta-24x</module>
@@ -338,6 +339,7 @@
                 <parquet.hadoop.version>1.12.3</parquet.hadoop.version>
                 <rapids.delta.artifactId1>rapids-4-spark-delta-24x</rapids.delta.artifactId1>
                 <slf4j.version>2.0.6</slf4j.version>
+                <spark.protobuf.copy.skip>false</spark.protobuf.copy.skip>
             </properties>
             <modules>
                 <module>delta-lake/delta-24x</module>
@@ -358,6 +360,7 @@
                 <parquet.hadoop.version>1.12.3</parquet.hadoop.version>
                 <rapids.delta.artifactId1>rapids-4-spark-delta-24x</rapids.delta.artifactId1>
                 <slf4j.version>2.0.6</slf4j.version>
+                <spark.protobuf.copy.skip>false</spark.protobuf.copy.skip>
             </properties>
             <modules>
                 <module>delta-lake/delta-24x</module>
@@ -378,6 +381,7 @@
                 <parquet.hadoop.version>1.12.3</parquet.hadoop.version>
                 <rapids.delta.artifactId1>rapids-4-spark-delta-24x</rapids.delta.artifactId1>
                 <slf4j.version>2.0.6</slf4j.version>
+                <spark.protobuf.copy.skip>false</spark.protobuf.copy.skip>
             </properties>
             <modules>
                 <module>delta-lake/delta-24x</module>
@@ -398,6 +402,7 @@
                 <parquet.hadoop.version>1.12.3</parquet.hadoop.version>
                 <rapids.delta.artifactId1>rapids-4-spark-delta-24x</rapids.delta.artifactId1>
                 <slf4j.version>2.0.6</slf4j.version>
+                <spark.protobuf.copy.skip>false</spark.protobuf.copy.skip>
             </properties>
             <modules>
                 <module>delta-lake/delta-24x</module>
@@ -895,6 +900,12 @@
         <pytest.TEST_PARALLEL/>
         <pytest.TEST_TYPE>developer</pytest.TEST_TYPE>
         <rat.consoleOutput>false</rat.consoleOutput>
+        <!--
+          spark-protobuf is an optional external Spark module (Spark 3.4.0+). The plugin must not require it
+          at load time, but some integration tests need it on the Spark JVM classpath.
+        -->
+        <!-- Default to skipping staging spark-protobuf for integration tests unless enabled by a Spark 3.4.x profile -->
+        <spark.protobuf.copy.skip>true</spark.protobuf.copy.skip>
         <!--
          If you update a dependency version so it is no longer a SNAPSHOT
          please update the snapshot-shims profile as well so it is accurate -->
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala
new file mode 100644
index 00000000000..f40cc2af03f
--- /dev/null
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.protobuf
+
+import scala.collection.mutable
+import scala.collection.JavaConverters._
+
+import com.google.protobuf.DescriptorProtos
+import com.google.protobuf.Descriptors
+
+/**
+ * Minimal descriptor utilities for locating a message descriptor in a FileDescriptorSet.
+ *
+ * This is intentionally lightweight for the "simple types" from_protobuf patch: it supports
+ * descriptor sets produced by `protoc --include_imports --descriptor_set_out=...`.
+ */
+object ProtobufDescriptorUtils {
+
+  def buildMessageDescriptor(
+      fileDescriptorSetBytes: Array[Byte],
+      messageName: String): Descriptors.Descriptor = {
+    val fds = DescriptorProtos.FileDescriptorSet.parseFrom(fileDescriptorSetBytes)
+    val protos = fds.getFileList.asScala.toSeq
+    val byName = protos.map(p => p.getName -> p).toMap
+    val cache = mutable.HashMap.empty[String, Descriptors.FileDescriptor]
+
+    def buildFileDescriptor(name: String): Descriptors.FileDescriptor = {
+      cache.getOrElseUpdate(name, {
+        val p = byName.getOrElse(name,
+          throw new IllegalArgumentException(s"Missing FileDescriptorProto for '$name'"))
+        val deps = p.getDependencyList.asScala.map(buildFileDescriptor _).toArray
+        Descriptors.FileDescriptor.buildFrom(p, deps)
+      })
+    }
+
+    val fileDescriptors = protos.map(p => buildFileDescriptor(p.getName))
+    val candidates = fileDescriptors.iterator.flatMap(fd => findMessageDescriptors(fd, messageName))
+      .toSeq
+
+    candidates match {
+      case Seq(d) => d
+      case Seq() =>
+        throw new IllegalArgumentException(
+          s"Message '$messageName' not found in FileDescriptorSet")
+      case many =>
+        val names = many.map(_.getFullName).distinct.sorted
+        throw new IllegalArgumentException(
+          s"Message '$messageName' is ambiguous; matches: ${names.mkString(", ")}")
+    }
+  }
+
+  private def findMessageDescriptors(
+      fd: Descriptors.FileDescriptor,
+      messageName: String): Iterator[Descriptors.Descriptor] = {
+    def matches(d: Descriptors.Descriptor): Boolean = {
+      d.getName == messageName || d.getFullName == messageName || d.getFullName.endsWith("." + messageName)
+    }
+
+    def walk(d: Descriptors.Descriptor): Iterator[Descriptors.Descriptor] = {
+      val nested = d.getNestedTypes.asScala.iterator.flatMap(walk _)
+      if (matches(d)) Iterator.single(d) ++ nested else nested
+    }
+
+    fd.getMessageTypes.asScala.iterator.flatMap(walk _)
+  }
+}
+
+
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala
new file mode 100644
index 00000000000..73c23fe2f82
--- /dev/null
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.rapids
+
+import ai.rapids.cudf
+import ai.rapids.cudf.BinaryOp
+import ai.rapids.cudf.DType
+import com.nvidia.spark.rapids.Arm.withResource
+import com.nvidia.spark.rapids.{GpuColumnVector, GpuUnaryExpression}
+import com.nvidia.spark.rapids.jni.ProtobufSimple
+import com.nvidia.spark.rapids.shims.NullIntolerantShim
+
+import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression}
+import org.apache.spark.sql.types._
+
+/**
+ * GPU implementation for Spark's `from_protobuf` decode path (simple types only).
+ *
+ * This is designed to replace `org.apache.spark.sql.protobuf.ProtobufDataToCatalyst` when supported.
+ */
+case class GpuFromProtobufSimple(
+    outputSchema: StructType,
+    fieldNumbers: Array[Int],
+    cudfTypeIds: Array[Int],
+    cudfTypeScales: Array[Int],
+    child: Expression)
+  extends GpuUnaryExpression with ExpectsInputTypes with NullIntolerantShim {
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType)
+
+  override def dataType: DataType = outputSchema.asNullable
+
+  override def nullable: Boolean = true
+
+  override protected def doColumnar(input: GpuColumnVector): cudf.ColumnVector = {
+    // Spark BinaryType is represented in cuDF as a LIST<UINT8/INT8>.
+    // ProtobufSimple returns a non-null STRUCT with nullable children. Spark's
+    // ProtobufDataToCatalyst is NullIntolerant, so if the input binary row is null the output
+    // struct row must be null as well.
+    val decoded = ProtobufSimple.decodeToStruct(input.getBase, fieldNumbers, cudfTypeIds, cudfTypeScales)
+    if (input.getBase.hasNulls) {
+      withResource(decoded) { _ =>
+        decoded.mergeAndSetValidity(BinaryOp.BITWISE_AND, input.getBase)
+      }
+    } else {
+      decoded
+    }
+  }
+}
+
+object GpuFromProtobufSimple {
+  def sparkTypeToCudfId(dt: DataType): (Int, Int) = dt match {
+    case BooleanType => (DType.BOOL8.getTypeId.getNativeId, 0)
+    case IntegerType => (DType.INT32.getTypeId.getNativeId, 0)
+    case LongType => (DType.INT64.getTypeId.getNativeId, 0)
+    case FloatType => (DType.FLOAT32.getTypeId.getNativeId, 0)
+    case DoubleType => (DType.FLOAT64.getTypeId.getNativeId, 0)
+    case StringType => (DType.STRING.getTypeId.getNativeId, 0)
+    case other =>
+      throw new IllegalArgumentException(s"Unsupported Spark type for protobuf(simple): $other")
+  }
+}
+
+
+
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
new file mode 100644
index 00000000000..a75dda64b14
--- /dev/null
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "340"}
+{"spark": "341"}
+{"spark": "342"}
+{"spark": "343"}
+{"spark": "344"}
+{"spark": "350"}
+{"spark": "351"}
+{"spark": "352"}
+{"spark": "353"}
+{"spark": "354"}
+{"spark": "355"}
+{"spark": "356"}
+{"spark": "357"}
+{"spark": "400"}
+{"spark": "401"}
+spark-rapids-shim-json-lines ***/
+
+package com.nvidia.spark.rapids.shims
+
+import java.nio.file.{Files, Path}
+
+import scala.util.Try
+
+import com.nvidia.spark.rapids._
+import org.apache.spark.sql.rapids.GpuFromProtobufSimple
+
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.expressions.UnaryExpression
+import org.apache.spark.sql.types._
+
+/**
+ * Spark 3.4+ optional integration for spark-protobuf expressions.
+ *
+ * spark-protobuf is an external module, so these rules must be registered by reflection.
+ */
+object ProtobufExprShims {
+  private[this] val protobufDataToCatalystClassName =
+    "org.apache.spark.sql.protobuf.ProtobufDataToCatalyst"
+
+  private[this] val sparkProtobufUtilsObjectClassName =
+    "org.apache.spark.sql.protobuf.utils.ProtobufUtils$"
+
+  def exprs: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] = {
+    try {
+      val clazz = ShimReflectionUtils.loadClass(protobufDataToCatalystClassName)
+        .asInstanceOf[Class[_ <: UnaryExpression]]
+      Map(clazz.asInstanceOf[Class[_ <: Expression]] -> fromProtobufRule)
+    } catch {
+      case _: ClassNotFoundException => Map.empty
+    }
+  }
+
+  private def fromProtobufRule: ExprRule[_ <: Expression] = {
+    GpuOverrides.expr[UnaryExpression](
+      "Decode a BinaryType column (protobuf) into a Spark SQL struct (simple types only)",
+      ExprChecks.unaryProject(
+        // Output is a struct; the rule does detailed checks in tagExprForGpu.
+        TypeSig.STRUCT.nested(TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.STRING),
+        TypeSig.all,
+        TypeSig.BINARY,
+        TypeSig.BINARY),
+      (e, conf, p, r) => new UnaryExprMeta[UnaryExpression](e, conf, p, r) {
+
+        private var schema: StructType = _
+        private var fieldNumbers: Array[Int] = _
+        private var cudfTypeIds: Array[Int] = _
+        private var cudfTypeScales: Array[Int] = _
+
+        override def tagExprForGpu(): Unit = {
+          schema = e.dataType match {
+            case st: StructType => st
+            case other =>
+              willNotWorkOnGpu(s"Only StructType output is supported for from_protobuf(simple), got $other")
+              return
+          }
+
+          val options = getOptionsMap(e)
+          if (options.nonEmpty) {
+            willNotWorkOnGpu(s"from_protobuf options are not supported yet on GPU: ${options.keys.mkString(",")}")
+            return
+          }
+
+          val messageName = getMessageName(e)
+          val descFilePathOpt = getDescFilePath(e).orElse {
+            // Newer Spark may embed a descriptor set (binaryDescriptorSet). Write it to a temp file so we can
+            // reuse Spark's own ProtobufUtils + shaded protobuf classes to resolve the descriptor.
+            getDescriptorBytes(e).map(writeTempDescFile)
+          }
+          if (descFilePathOpt.isEmpty) {
+            willNotWorkOnGpu("from_protobuf(simple) requires a descriptor set (descFilePath or binaryDescriptorSet)")
+            return
+          }
+
+          val msgDesc = try {
+            // Spark 3.4.x builds the descriptor as: ProtobufUtils.buildDescriptor(messageName, descFilePathOpt)
+            buildMessageDescriptorWithSparkProtobuf(messageName, descFilePathOpt)
+          } catch {
+            case t: Throwable =>
+              willNotWorkOnGpu(s"Failed to resolve protobuf descriptor for message '$messageName': ${t.getMessage}")
+              return
+          }
+
+          val fields = schema.fields
+          val fnums = new Array[Int](fields.length)
+          val typeIds = new Array[Int](fields.length)
+          val scales = new Array[Int](fields.length)
+
+          fields.zipWithIndex.foreach { case (sf, idx) =>
+            sf.dataType match {
+              case BooleanType | IntegerType | LongType | FloatType | DoubleType | StringType =>
+              case other =>
+                willNotWorkOnGpu(s"Unsupported field type for from_protobuf(simple): ${sf.name}: $other")
+                return
+            }
+
+            val fd = invoke1[AnyRef](msgDesc, "findFieldByName", classOf[String], sf.name)
+            if (fd == null) {
+              willNotWorkOnGpu(s"Protobuf field '${sf.name}' not found in message '$messageName'")
+              return
+            }
+
+            val isRepeated = Try(invoke0[java.lang.Boolean](fd, "isRepeated").booleanValue()).getOrElse(false)
+            if (isRepeated) {
+              willNotWorkOnGpu(s"Repeated fields are not supported for from_protobuf(simple): ${sf.name}")
+              return
+            }
+
+            val protoType = invoke0[AnyRef](fd, "getType")
+            val protoTypeName = typeName(protoType)
+            val ok = (sf.dataType, protoTypeName) match {
+              case (BooleanType, "BOOL") => true
+              case (IntegerType, "INT32") => true
+              case (LongType, "INT64") => true
+              case (FloatType, "FLOAT") => true
+              case (DoubleType, "DOUBLE") => true
+              case (StringType, "STRING") => true
+              case _ => false
+            }
+            if (!ok) {
+              willNotWorkOnGpu(s"Field type mismatch for '${sf.name}': Spark ${sf.dataType} vs Protobuf $protoTypeName")
+              return
+            }
+
+            fnums(idx) = invoke0[java.lang.Integer](fd, "getNumber").intValue()
+            val (tid, scale) = GpuFromProtobufSimple.sparkTypeToCudfId(sf.dataType)
+            typeIds(idx) = tid
+            scales(idx) = scale
+          }
+
+          fieldNumbers = fnums
+          cudfTypeIds = typeIds
+          cudfTypeScales = scales
+        }
+
+        override def convertToGpu(child: Expression): GpuExpression = {
+          GpuFromProtobufSimple(schema, fieldNumbers, cudfTypeIds, cudfTypeScales, child)
+        }
+      }
+    )
+  }
+
+  private def getMessageName(e: Expression): String =
+    invoke0[String](e, "messageName")
+
+  /**
+   * Newer Spark versions may carry an in-expression descriptor set payload (e.g. binaryDescriptorSet).
+   * Spark 3.4.x does not, so callers should fall back to descFilePath().
+   */
+  private def getDescriptorBytes(e: Expression): Option[Array[Byte]] = {
+    // Spark 4.x/3.5+ (depending on the API): may be Array[Byte] or Option[Array[Byte]].
+    val direct = Try(invoke0[Array[Byte]](e, "binaryDescriptorSet")).toOption
+    direct.orElse {
+      Try(invoke0[Option[Array[Byte]]](e, "binaryDescriptorSet")).toOption.flatten
+    }
+  }
+
+  private def getDescFilePath(e: Expression): Option[String] =
+    Try(invoke0[Option[String]](e, "descFilePath")).toOption.flatten
+
+  private def writeTempDescFile(descBytes: Array[Byte]): String = {
+    val tmp: Path = Files.createTempFile("spark-rapids-protobuf-desc-", ".desc")
+    Files.write(tmp, descBytes)
+    tmp.toFile.deleteOnExit()
+    tmp.toString
+  }
+
+  private def buildMessageDescriptorWithSparkProtobuf(
+      messageName: String,
+      descFilePathOpt: Option[String]): AnyRef = {
+    val cls = ShimReflectionUtils.loadClass(sparkProtobufUtilsObjectClassName)
+    val module = cls.getField("MODULE$").get(null)
+    // buildDescriptor(messageName: String, descFilePath: Option[String])
+    val m = cls.getMethod("buildDescriptor", classOf[String], classOf[scala.Option[_]])
+    m.invoke(module, messageName, descFilePathOpt).asInstanceOf[AnyRef]
+  }
+
+  private def typeName(t: AnyRef): String = {
+    if (t == null) {
+      "null"
+    } else {
+      // Prefer Enum.name() when available; fall back to toString.
+      Try(invoke0[String](t, "name")).getOrElse(t.toString)
+    }
+  }
+
+  private def getOptionsMap(e: Expression): Map[String, String] = {
+    val opt = Try(invoke0[scala.collection.Map[String, String]](e, "options")).toOption
+    opt.map(_.toMap).getOrElse(Map.empty)
+  }
+
+  private def invoke0[T](obj: AnyRef, method: String): T =
+    obj.getClass.getMethod(method).invoke(obj).asInstanceOf[T]
+
+  private def invoke1[T](obj: AnyRef, method: String, arg0Cls: Class[_], arg0: AnyRef): T =
+    obj.getClass.getMethod(method, arg0Cls).invoke(obj, arg0).asInstanceOf[T]
+}
+
+
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala
index 6e28a071a00..cc406a156fd 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala
@@ -162,7 +162,7 @@ trait Spark340PlusNonDBShims extends Spark331PlusNonDBShims {
       ),
       GpuElementAtMeta.elementAtRule(true)
     ).map(r => (r.getClassFor.asSubclass(classOf[Expression]), r)).toMap
-    super.getExprs ++ shimExprs
+    super.getExprs ++ shimExprs ++ ProtobufExprShims.exprs
   }
 
   override def getDataWriteCmds: Map[Class[_ <: DataWritingCommand],

From 084e9c2a65eb13f71e45243f65c536c0b85020b9 Mon Sep 17 00:00:00 2001
From: Haoyang Li <haoyangl@nvidia.com>
Date: Tue, 23 Dec 2025 17:00:38 +0800
Subject: [PATCH 2/9] style

---
 .../protobuf/ProtobufDescriptorUtils.scala    |  6 ++-
 .../sql/rapids/GpuFromProtobufSimple.scala    | 11 +++--
 .../rapids/shims/ProtobufExprShims.scala      | 45 ++++++++++++-------
 3 files changed, 42 insertions(+), 20 deletions(-)

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala
index f40cc2af03f..1975db14966 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala
@@ -16,8 +16,8 @@
 
 package com.nvidia.spark.rapids.protobuf
 
-import scala.collection.mutable
 import scala.collection.JavaConverters._
+import scala.collection.mutable
 
 import com.google.protobuf.DescriptorProtos
 import com.google.protobuf.Descriptors
@@ -67,7 +67,9 @@ object ProtobufDescriptorUtils {
       fd: Descriptors.FileDescriptor,
       messageName: String): Iterator[Descriptors.Descriptor] = {
     def matches(d: Descriptors.Descriptor): Boolean = {
-      d.getName == messageName || d.getFullName == messageName || d.getFullName.endsWith("." + messageName)
+      d.getName == messageName ||
+        d.getFullName == messageName ||
+        d.getFullName.endsWith("." + messageName)
     }
 
     def walk(d: Descriptors.Descriptor): Iterator[Descriptors.Descriptor] = {
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala
index 73c23fe2f82..7d85d277e40 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala
@@ -19,8 +19,8 @@ package org.apache.spark.sql.rapids
 import ai.rapids.cudf
 import ai.rapids.cudf.BinaryOp
 import ai.rapids.cudf.DType
-import com.nvidia.spark.rapids.Arm.withResource
 import com.nvidia.spark.rapids.{GpuColumnVector, GpuUnaryExpression}
+import com.nvidia.spark.rapids.Arm.withResource
 import com.nvidia.spark.rapids.jni.ProtobufSimple
 import com.nvidia.spark.rapids.shims.NullIntolerantShim
 
@@ -30,7 +30,8 @@ import org.apache.spark.sql.types._
 /**
  * GPU implementation for Spark's `from_protobuf` decode path (simple types only).
  *
- * This is designed to replace `org.apache.spark.sql.protobuf.ProtobufDataToCatalyst` when supported.
+ * This is designed to replace `org.apache.spark.sql.protobuf.ProtobufDataToCatalyst` when
+ * supported.
  */
 case class GpuFromProtobufSimple(
     outputSchema: StructType,
@@ -51,7 +52,11 @@ case class GpuFromProtobufSimple(
     // ProtobufSimple returns a non-null STRUCT with nullable children. Spark's
     // ProtobufDataToCatalyst is NullIntolerant, so if the input binary row is null the output
     // struct row must be null as well.
-    val decoded = ProtobufSimple.decodeToStruct(input.getBase, fieldNumbers, cudfTypeIds, cudfTypeScales)
+    val decoded = ProtobufSimple.decodeToStruct(
+      input.getBase,
+      fieldNumbers,
+      cudfTypeIds,
+      cudfTypeScales)
     if (input.getBase.hasNulls) {
       withResource(decoded) { _ =>
         decoded.mergeAndSetValidity(BinaryOp.BITWISE_AND, input.getBase)
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
index a75dda64b14..629a119aaf8 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
@@ -39,10 +39,9 @@ import java.nio.file.{Files, Path}
 import scala.util.Try
 
 import com.nvidia.spark.rapids._
-import org.apache.spark.sql.rapids.GpuFromProtobufSimple
 
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.expressions.UnaryExpression
+import org.apache.spark.sql.catalyst.expressions.{Expression, UnaryExpression}
+import org.apache.spark.sql.rapids.GpuFromProtobufSimple
 import org.apache.spark.sql.types._
 
 /**
@@ -87,33 +86,42 @@ object ProtobufExprShims {
           schema = e.dataType match {
             case st: StructType => st
             case other =>
-              willNotWorkOnGpu(s"Only StructType output is supported for from_protobuf(simple), got $other")
+              willNotWorkOnGpu(
+                s"Only StructType output is supported for from_protobuf(simple), got $other")
               return
           }
 
           val options = getOptionsMap(e)
           if (options.nonEmpty) {
-            willNotWorkOnGpu(s"from_protobuf options are not supported yet on GPU: ${options.keys.mkString(",")}")
+            val keys = options.keys.mkString(",")
+            willNotWorkOnGpu(
+              s"from_protobuf options are not supported yet on GPU: $keys")
             return
           }
 
           val messageName = getMessageName(e)
           val descFilePathOpt = getDescFilePath(e).orElse {
-            // Newer Spark may embed a descriptor set (binaryDescriptorSet). Write it to a temp file so we can
-            // reuse Spark's own ProtobufUtils + shaded protobuf classes to resolve the descriptor.
+            // Newer Spark may embed a descriptor set (binaryDescriptorSet). Write it to a temp file
+            // so we can reuse Spark's ProtobufUtils (and its shaded protobuf classes) to resolve
+            // the descriptor.
             getDescriptorBytes(e).map(writeTempDescFile)
           }
           if (descFilePathOpt.isEmpty) {
-            willNotWorkOnGpu("from_protobuf(simple) requires a descriptor set (descFilePath or binaryDescriptorSet)")
+            willNotWorkOnGpu(
+              "from_protobuf(simple) requires a descriptor set " +
+                "(descFilePath or binaryDescriptorSet)")
             return
           }
 
           val msgDesc = try {
-            // Spark 3.4.x builds the descriptor as: ProtobufUtils.buildDescriptor(messageName, descFilePathOpt)
+            // Spark 3.4.x builds the descriptor as:
+            // ProtobufUtils.buildDescriptor(messageName, descFilePathOpt)
             buildMessageDescriptorWithSparkProtobuf(messageName, descFilePathOpt)
           } catch {
             case t: Throwable =>
-              willNotWorkOnGpu(s"Failed to resolve protobuf descriptor for message '$messageName': ${t.getMessage}")
+              willNotWorkOnGpu(
+                s"Failed to resolve protobuf descriptor for message '$messageName': " +
+                  s"${t.getMessage}")
               return
           }
 
@@ -126,7 +134,8 @@ object ProtobufExprShims {
             sf.dataType match {
               case BooleanType | IntegerType | LongType | FloatType | DoubleType | StringType =>
               case other =>
-                willNotWorkOnGpu(s"Unsupported field type for from_protobuf(simple): ${sf.name}: $other")
+                willNotWorkOnGpu(
+                  s"Unsupported field type for from_protobuf(simple): ${sf.name}: $other")
                 return
             }
 
@@ -136,9 +145,12 @@ object ProtobufExprShims {
               return
             }
 
-            val isRepeated = Try(invoke0[java.lang.Boolean](fd, "isRepeated").booleanValue()).getOrElse(false)
+            val isRepeated = Try {
+              invoke0[java.lang.Boolean](fd, "isRepeated").booleanValue()
+            }.getOrElse(false)
             if (isRepeated) {
-              willNotWorkOnGpu(s"Repeated fields are not supported for from_protobuf(simple): ${sf.name}")
+              willNotWorkOnGpu(
+                s"Repeated fields are not supported for from_protobuf(simple): ${sf.name}")
               return
             }
 
@@ -154,7 +166,9 @@ object ProtobufExprShims {
               case _ => false
             }
             if (!ok) {
-              willNotWorkOnGpu(s"Field type mismatch for '${sf.name}': Spark ${sf.dataType} vs Protobuf $protoTypeName")
+              willNotWorkOnGpu(
+                s"Field type mismatch for '${sf.name}': Spark ${sf.dataType} vs " +
+                  s"Protobuf $protoTypeName")
               return
             }
 
@@ -180,7 +194,8 @@ object ProtobufExprShims {
     invoke0[String](e, "messageName")
 
   /**
-   * Newer Spark versions may carry an in-expression descriptor set payload (e.g. binaryDescriptorSet).
+   * Newer Spark versions may carry an in-expression descriptor set payload
+   * (e.g. binaryDescriptorSet).
    * Spark 3.4.x does not, so callers should fall back to descFilePath().
    */
   private def getDescriptorBytes(e: Expression): Option[Array[Byte]] = {

From 7606925907476c407bbeef5202a1e3599ddc65d3 Mon Sep 17 00:00:00 2001
From: Haoyang Li <haoyangl@nvidia.com>
Date: Thu, 25 Dec 2025 14:11:11 +0800
Subject: [PATCH 3/9] address comments

Signed-off-by: Haoyang Li <haoyangl@nvidia.com>
---
 .../src/main/python/protobuf_test.py          | 20 ++++---
 .../src/main/python/spark_init_internal.py    | 27 +++++++++
 .../protobuf/ProtobufDescriptorUtils.scala    |  2 -
 .../sql/rapids/GpuFromProtobufSimple.scala    | 25 ++++----
 .../rapids/shims/ProtobufExprShims.scala      | 58 +++++++++++++------
 5 files changed, 94 insertions(+), 38 deletions(-)

diff --git a/integration_tests/src/main/python/protobuf_test.py b/integration_tests/src/main/python/protobuf_test.py
index f85f1384b1f..3e9d0a1d1cd 100644
--- a/integration_tests/src/main/python/protobuf_test.py
+++ b/integration_tests/src/main/python/protobuf_test.py
@@ -85,6 +85,7 @@ def _build_simple_descriptor_set_bytes(spark):
     try:
         fd = fd.setSyntax("proto2")
     except Exception:
+        # If setSyntax is unavailable (older protobuf-java), we intentionally leave syntax unset.
         pass
 
     msg = D.DescriptorProto.newBuilder().setName("Simple")
@@ -130,17 +131,17 @@ def _write_bytes_to_hadoop_path(spark, path_str, data_bytes):
 @ignore_order(local=True)
 def test_from_protobuf_simple_parquet_binary_round_trip(spark_tmp_path):
     from_protobuf = _try_import_from_protobuf()
-    # if from_protobuf is None:
-    #     pytest.skip("pyspark.sql.protobuf.functions.from_protobuf is not available")
-    # if not with_cpu_session(lambda spark: _spark_protobuf_jvm_available(spark)):
-    #     pytest.skip("spark-protobuf JVM module is not available on the classpath")
+    if from_protobuf is None:
+        pytest.skip("pyspark.sql.protobuf.functions.from_protobuf is not available")
+    if not with_cpu_session(lambda spark: _spark_protobuf_jvm_available(spark)):
+        pytest.skip("spark-protobuf JVM module is not available on the classpath")
 
     data_path = spark_tmp_path + "/PROTOBUF_SIMPLE_PARQUET/"
     desc_path = spark_tmp_path + "/simple.desc"
     message_name = "test.Simple"
 
     # Generate descriptor bytes once using the JVM (no protoc dependency)
-    desc_bytes = with_cpu_session(lambda spark: _build_simple_descriptor_set_bytes(spark))
+    desc_bytes = with_cpu_session(_build_simple_descriptor_set_bytes)
     with_cpu_session(lambda spark: _write_bytes_to_hadoop_path(spark, desc_path, desc_bytes))
 
     # Build a DF with scalar columns + binary protobuf column and write to parquet
@@ -200,11 +201,16 @@ def run_on_spark(spark):
 @ignore_order(local=True)
 def test_from_protobuf_simple_null_input_returns_null(spark_tmp_path):
     from_protobuf = _try_import_from_protobuf()
+    if from_protobuf is None:
+        pytest.skip("pyspark.sql.protobuf.functions.from_protobuf is not available")
+    if not with_cpu_session(lambda spark: _spark_protobuf_jvm_available(spark)):
+        pytest.skip("spark-protobuf JVM module is not available on the classpath")
+
     desc_path = spark_tmp_path + "/simple_null_input.desc"
     message_name = "test.Simple"
 
     # Generate descriptor bytes once using the JVM (no protoc dependency)
-    desc_bytes = with_cpu_session(lambda spark: _build_simple_descriptor_set_bytes(spark))
+    desc_bytes = with_cpu_session(_build_simple_descriptor_set_bytes)
     with_cpu_session(lambda spark: _write_bytes_to_hadoop_path(spark, desc_path, desc_bytes))
 
     # Spark's ProtobufDataToCatalyst is NullIntolerant (null input -> null output).
@@ -225,5 +231,3 @@ def run_on_spark(spark):
         return df.select(decoded.alias("decoded"))
 
     assert_gpu_and_cpu_are_equal_collect(run_on_spark)
-
-
diff --git a/integration_tests/src/main/python/spark_init_internal.py b/integration_tests/src/main/python/spark_init_internal.py
index 90861746b64..787dc2d7eb0 100644
--- a/integration_tests/src/main/python/spark_init_internal.py
+++ b/integration_tests/src/main/python/spark_init_internal.py
@@ -61,11 +61,38 @@ def findspark_init():
     if spark_jars is not None:
         logging.info(f"Adding to findspark jars: {spark_jars}")
         findspark.add_jars(spark_jars)
+        # Also add to driver classpath so classes are available to Class.forName()
+        # This is needed for optional modules like spark-protobuf
+        _add_driver_classpath(spark_jars)
 
     if spark_jars_packages is not None:
         logging.info(f"Adding to findspark packages: {spark_jars_packages}")
         findspark.add_packages(spark_jars_packages)
 
+
+def _add_driver_classpath(jars):
+    """
+    Add jars to the driver classpath via PYSPARK_SUBMIT_ARGS.
+    findspark.add_jars() only adds --jars, which doesn't make classes available
+    to Class.forName() on the driver. This function adds --driver-class-path.
+    """
+    if not jars:
+        return
+    current_args = os.environ.get('PYSPARK_SUBMIT_ARGS', '')
+    # Remove trailing 'pyspark-shell' if present
+    if current_args.endswith('pyspark-shell'):
+        current_args = current_args[:-len('pyspark-shell')].strip()
+    # Skip if driver-class-path is already present
+    if '--driver-class-path' in current_args:
+        logging.info("driver-class-path already in PYSPARK_SUBMIT_ARGS, skipping")
+        return
+    # Add driver-class-path for each jar
+    jar_list = jars.replace(',', ' ').split()
+    driver_cp = ':'.join(jar_list)
+    new_args = f"{current_args} --driver-class-path {driver_cp} pyspark-shell".strip()
+    os.environ['PYSPARK_SUBMIT_ARGS'] = new_args
+    logging.info(f"Updated PYSPARK_SUBMIT_ARGS with driver-class-path")
+
 def running_with_xdist(session, is_worker):
     try:
         import xdist
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala
index 1975db14966..89ed22f6e2d 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala
@@ -80,5 +80,3 @@ object ProtobufDescriptorUtils {
     fd.getMessageTypes.asScala.iterator.flatMap(walk _)
   }
 }
-
-
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala
index 7d85d277e40..6153f3f96ab 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala
@@ -38,6 +38,7 @@ case class GpuFromProtobufSimple(
     fieldNumbers: Array[Int],
     cudfTypeIds: Array[Int],
     cudfTypeScales: Array[Int],
+    failOnErrors: Boolean,
     child: Expression)
   extends GpuUnaryExpression with ExpectsInputTypes with NullIntolerantShim {
 
@@ -56,7 +57,8 @@ case class GpuFromProtobufSimple(
       input.getBase,
       fieldNumbers,
       cudfTypeIds,
-      cudfTypeScales)
+      cudfTypeScales,
+      failOnErrors)
     if (input.getBase.hasNulls) {
       withResource(decoded) { _ =>
         decoded.mergeAndSetValidity(BinaryOp.BITWISE_AND, input.getBase)
@@ -68,17 +70,20 @@ case class GpuFromProtobufSimple(
 }
 
 object GpuFromProtobufSimple {
+  // Encodings from com.nvidia.spark.rapids.jni.ProtobufSimple
+  val ENC_DEFAULT = 0
+  val ENC_FIXED   = 1
+  val ENC_ZIGZAG  = 2
+
   def sparkTypeToCudfId(dt: DataType): (Int, Int) = dt match {
-    case BooleanType => (DType.BOOL8.getTypeId.getNativeId, 0)
-    case IntegerType => (DType.INT32.getTypeId.getNativeId, 0)
-    case LongType => (DType.INT64.getTypeId.getNativeId, 0)
-    case FloatType => (DType.FLOAT32.getTypeId.getNativeId, 0)
-    case DoubleType => (DType.FLOAT64.getTypeId.getNativeId, 0)
-    case StringType => (DType.STRING.getTypeId.getNativeId, 0)
+    case BooleanType => (DType.BOOL8.getTypeId.getNativeId, ENC_DEFAULT)
+    case IntegerType => (DType.INT32.getTypeId.getNativeId, ENC_DEFAULT)
+    case LongType => (DType.INT64.getTypeId.getNativeId, ENC_DEFAULT)
+    case FloatType => (DType.FLOAT32.getTypeId.getNativeId, ENC_DEFAULT)
+    case DoubleType => (DType.FLOAT64.getTypeId.getNativeId, ENC_DEFAULT)
+    case StringType => (DType.STRING.getTypeId.getNativeId, ENC_DEFAULT)
+    case BinaryType => (DType.LIST.getTypeId.getNativeId, ENC_DEFAULT)
     case other =>
       throw new IllegalArgumentException(s"Unsupported Spark type for protobuf(simple): $other")
   }
 }
-
-
-
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
index 629a119aaf8..a4ad3f42145 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
@@ -71,7 +71,8 @@ object ProtobufExprShims {
       "Decode a BinaryType column (protobuf) into a Spark SQL struct (simple types only)",
       ExprChecks.unaryProject(
         // Output is a struct; the rule does detailed checks in tagExprForGpu.
-        TypeSig.STRUCT.nested(TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.STRING),
+        TypeSig.STRUCT.nested(
+          TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.STRING + TypeSig.BINARY),
         TypeSig.all,
         TypeSig.BINARY,
         TypeSig.BINARY),
@@ -81,6 +82,7 @@ object ProtobufExprShims {
         private var fieldNumbers: Array[Int] = _
         private var cudfTypeIds: Array[Int] = _
         private var cudfTypeScales: Array[Int] = _
+        private var failOnErrors: Boolean = _
 
         override def tagExprForGpu(): Unit = {
           schema = e.dataType match {
@@ -92,13 +94,17 @@ object ProtobufExprShims {
           }
 
           val options = getOptionsMap(e)
-          if (options.nonEmpty) {
-            val keys = options.keys.mkString(",")
+          val supportedOptions = Set("enums.as.ints", "mode")
+          val unsupportedOptions = options.keys.filterNot(supportedOptions.contains)
+          if (unsupportedOptions.nonEmpty) {
+            val keys = unsupportedOptions.mkString(",")
             willNotWorkOnGpu(
               s"from_protobuf options are not supported yet on GPU: $keys")
             return
           }
 
+          val enumsAsInts = options.getOrElse("enums.as.ints", "false").toBoolean
+          failOnErrors = options.getOrElse("mode", "PERMISSIVE").equalsIgnoreCase("FAILFAST")
           val messageName = getMessageName(e)
           val descFilePathOpt = getDescFilePath(e).orElse {
             // Newer Spark may embed a descriptor set (binaryDescriptorSet). Write it to a temp file
@@ -132,7 +138,8 @@ object ProtobufExprShims {
 
           fields.zipWithIndex.foreach { case (sf, idx) =>
             sf.dataType match {
-              case BooleanType | IntegerType | LongType | FloatType | DoubleType | StringType =>
+              case BooleanType | IntegerType | LongType | FloatType | DoubleType |
+                   StringType | BinaryType =>
               case other =>
                 willNotWorkOnGpu(
                   s"Unsupported field type for from_protobuf(simple): ${sf.name}: $other")
@@ -156,16 +163,32 @@ object ProtobufExprShims {
 
             val protoType = invoke0[AnyRef](fd, "getType")
             val protoTypeName = typeName(protoType)
-            val ok = (sf.dataType, protoTypeName) match {
-              case (BooleanType, "BOOL") => true
-              case (IntegerType, "INT32") => true
-              case (LongType, "INT64") => true
-              case (FloatType, "FLOAT") => true
-              case (DoubleType, "DOUBLE") => true
-              case (StringType, "STRING") => true
-              case _ => false
+
+            val encoding = (sf.dataType, protoTypeName) match {
+              case (BooleanType, "BOOL") => Some(GpuFromProtobufSimple.ENC_DEFAULT)
+              case (IntegerType, "INT32" | "UINT32") => Some(GpuFromProtobufSimple.ENC_DEFAULT)
+              case (IntegerType, "SINT32") => Some(GpuFromProtobufSimple.ENC_ZIGZAG)
+              case (IntegerType, "FIXED32" | "SFIXED32") => Some(GpuFromProtobufSimple.ENC_FIXED)
+              case (LongType, "INT64" | "UINT64") => Some(GpuFromProtobufSimple.ENC_DEFAULT)
+              case (LongType, "SINT64") => Some(GpuFromProtobufSimple.ENC_ZIGZAG)
+              case (LongType, "FIXED64" | "SFIXED64") => Some(GpuFromProtobufSimple.ENC_FIXED)
+              // Spark may upcast smaller integers to LongType
+              case (LongType, "INT32" | "UINT32" | "SINT32" | "FIXED32" | "SFIXED32") =>
+                val enc = protoTypeName match {
+                  case "SINT32" => GpuFromProtobufSimple.ENC_ZIGZAG
+                  case "FIXED32" | "SFIXED32" => GpuFromProtobufSimple.ENC_FIXED
+                  case _ => GpuFromProtobufSimple.ENC_DEFAULT
+                }
+                Some(enc)
+              case (FloatType, "FLOAT") => Some(GpuFromProtobufSimple.ENC_DEFAULT)
+              case (DoubleType, "DOUBLE") => Some(GpuFromProtobufSimple.ENC_DEFAULT)
+              case (StringType, "STRING") => Some(GpuFromProtobufSimple.ENC_DEFAULT)
+              case (BinaryType, "BYTES") => Some(GpuFromProtobufSimple.ENC_DEFAULT)
+              case (IntegerType, "ENUM") if enumsAsInts => Some(GpuFromProtobufSimple.ENC_DEFAULT)
+              case _ => None
             }
-            if (!ok) {
+
+            if (encoding.isEmpty) {
               willNotWorkOnGpu(
                 s"Field type mismatch for '${sf.name}': Spark ${sf.dataType} vs " +
                   s"Protobuf $protoTypeName")
@@ -173,9 +196,9 @@ object ProtobufExprShims {
             }
 
             fnums(idx) = invoke0[java.lang.Integer](fd, "getNumber").intValue()
-            val (tid, scale) = GpuFromProtobufSimple.sparkTypeToCudfId(sf.dataType)
+            val (tid, _) = GpuFromProtobufSimple.sparkTypeToCudfId(sf.dataType)
             typeIds(idx) = tid
-            scales(idx) = scale
+            scales(idx) = encoding.get
           }
 
           fieldNumbers = fnums
@@ -184,7 +207,8 @@ object ProtobufExprShims {
         }
 
         override def convertToGpu(child: Expression): GpuExpression = {
-          GpuFromProtobufSimple(schema, fieldNumbers, cudfTypeIds, cudfTypeScales, child)
+          GpuFromProtobufSimple(
+            schema, fieldNumbers, cudfTypeIds, cudfTypeScales, failOnErrors, child)
         }
       }
     )
@@ -246,5 +270,3 @@ object ProtobufExprShims {
   private def invoke1[T](obj: AnyRef, method: String, arg0Cls: Class[_], arg0: AnyRef): T =
     obj.getClass.getMethod(method, arg0Cls).invoke(obj, arg0).asInstanceOf[T]
 }
-
-

From c6cde2d4f87d4d5d6d5077947d5d9d2e43a8ce00 Mon Sep 17 00:00:00 2001
From: Haoyang Li <haoyangl@nvidia.com>
Date: Thu, 25 Dec 2025 15:38:38 +0800
Subject: [PATCH 4/9] address comments

Signed-off-by: Haoyang Li <haoyangl@nvidia.com>
---
 integration_tests/src/main/python/data_gen.py |  2 +-
 .../src/main/python/protobuf_test.py          |  4 ++--
 .../src/main/python/spark_init_internal.py    |  4 ++--
 .../protobuf/ProtobufDescriptorUtils.scala    |  7 +++++-
 .../sql/rapids/GpuFromProtobufSimple.scala    | 23 +++++++++++--------
 .../rapids/shims/ProtobufExprShims.scala      |  5 ++--
 6 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/integration_tests/src/main/python/data_gen.py b/integration_tests/src/main/python/data_gen.py
index 837d4990832..9298bd189ec 100644
--- a/integration_tests/src/main/python/data_gen.py
+++ b/integration_tests/src/main/python/data_gen.py
@@ -925,7 +925,7 @@ class ProtobufSimpleMessageRowGen(DataGen):
       - one column per message field (Spark scalar types)
       - a binary column containing a serialized protobuf message containing those fields
 
-    This is intentionally limited to the simple scalar types supported in Patch 1:
+    This is intentionally limited to the simple scalar types currently supported:
     boolean/int32/int64/float/double/string.
 
     Fields are omitted from the encoded message if the corresponding value is None.
diff --git a/integration_tests/src/main/python/protobuf_test.py b/integration_tests/src/main/python/protobuf_test.py
index 3e9d0a1d1cd..4694d0aaa38 100644
--- a/integration_tests/src/main/python/protobuf_test.py
+++ b/integration_tests/src/main/python/protobuf_test.py
@@ -133,7 +133,7 @@ def test_from_protobuf_simple_parquet_binary_round_trip(spark_tmp_path):
     from_protobuf = _try_import_from_protobuf()
     if from_protobuf is None:
         pytest.skip("pyspark.sql.protobuf.functions.from_protobuf is not available")
-    if not with_cpu_session(lambda spark: _spark_protobuf_jvm_available(spark)):
+    if not with_cpu_session(_spark_protobuf_jvm_available):
         pytest.skip("spark-protobuf JVM module is not available on the classpath")
 
     data_path = spark_tmp_path + "/PROTOBUF_SIMPLE_PARQUET/"
@@ -203,7 +203,7 @@ def test_from_protobuf_simple_null_input_returns_null(spark_tmp_path):
     from_protobuf = _try_import_from_protobuf()
     if from_protobuf is None:
         pytest.skip("pyspark.sql.protobuf.functions.from_protobuf is not available")
-    if not with_cpu_session(lambda spark: _spark_protobuf_jvm_available(spark)):
+    if not with_cpu_session(_spark_protobuf_jvm_available):
         pytest.skip("spark-protobuf JVM module is not available on the classpath")
 
     desc_path = spark_tmp_path + "/simple_null_input.desc"
diff --git a/integration_tests/src/main/python/spark_init_internal.py b/integration_tests/src/main/python/spark_init_internal.py
index 787dc2d7eb0..e9fc3ca8413 100644
--- a/integration_tests/src/main/python/spark_init_internal.py
+++ b/integration_tests/src/main/python/spark_init_internal.py
@@ -86,9 +86,9 @@ def _add_driver_classpath(jars):
     if '--driver-class-path' in current_args:
         logging.info("driver-class-path already in PYSPARK_SUBMIT_ARGS, skipping")
         return
-    # Add driver-class-path for each jar
+    # Add driver-class-path for each jar (use os.pathsep for platform independence)
     jar_list = jars.replace(',', ' ').split()
-    driver_cp = ':'.join(jar_list)
+    driver_cp = os.pathsep.join(jar_list)
     new_args = f"{current_args} --driver-class-path {driver_cp} pyspark-shell".strip()
     os.environ['PYSPARK_SUBMIT_ARGS'] = new_args
     logging.info(f"Updated PYSPARK_SUBMIT_ARGS with driver-class-path")
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala
index 89ed22f6e2d..aade729f000 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala
@@ -27,6 +27,11 @@ import com.google.protobuf.Descriptors
  *
  * This is intentionally lightweight for the "simple types" from_protobuf patch: it supports
  * descriptor sets produced by `protoc --include_imports --descriptor_set_out=...`.
+ *
+ * NOTE: This utility is currently not used in the initial implementation, which relies on
+ * Spark's ProtobufUtils via reflection (buildMessageDescriptorWithSparkProtobuf). This class
+ * is preserved for potential future use cases where direct descriptor parsing is needed
+ * without depending on Spark's shaded protobuf classes.
  */
 object ProtobufDescriptorUtils {
 
@@ -79,4 +84,4 @@ object ProtobufDescriptorUtils {
 
     fd.getMessageTypes.asScala.iterator.flatMap(walk _)
   }
-}
+}
\ No newline at end of file
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala
index 6153f3f96ab..c41269f418f 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala
@@ -75,15 +75,20 @@ object GpuFromProtobufSimple {
   val ENC_FIXED   = 1
   val ENC_ZIGZAG  = 2
 
-  def sparkTypeToCudfId(dt: DataType): (Int, Int) = dt match {
-    case BooleanType => (DType.BOOL8.getTypeId.getNativeId, ENC_DEFAULT)
-    case IntegerType => (DType.INT32.getTypeId.getNativeId, ENC_DEFAULT)
-    case LongType => (DType.INT64.getTypeId.getNativeId, ENC_DEFAULT)
-    case FloatType => (DType.FLOAT32.getTypeId.getNativeId, ENC_DEFAULT)
-    case DoubleType => (DType.FLOAT64.getTypeId.getNativeId, ENC_DEFAULT)
-    case StringType => (DType.STRING.getTypeId.getNativeId, ENC_DEFAULT)
-    case BinaryType => (DType.LIST.getTypeId.getNativeId, ENC_DEFAULT)
+  /**
+   * Maps a Spark DataType to the corresponding cuDF native type ID.
+   * Note: The encoding (varint/zigzag/fixed) is determined by the protobuf field type,
+   * not the Spark data type, so it must be set separately based on the protobuf schema.
+   */
+  def sparkTypeToCudfId(dt: DataType): Int = dt match {
+    case BooleanType => DType.BOOL8.getTypeId.getNativeId
+    case IntegerType => DType.INT32.getTypeId.getNativeId
+    case LongType => DType.INT64.getTypeId.getNativeId
+    case FloatType => DType.FLOAT32.getTypeId.getNativeId
+    case DoubleType => DType.FLOAT64.getTypeId.getNativeId
+    case StringType => DType.STRING.getTypeId.getNativeId
+    case BinaryType => DType.LIST.getTypeId.getNativeId
     case other =>
       throw new IllegalArgumentException(s"Unsupported Spark type for protobuf(simple): $other")
   }
-}
+}
\ No newline at end of file
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
index a4ad3f42145..8aeab9b34b2 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
@@ -196,8 +196,7 @@ object ProtobufExprShims {
             }
 
             fnums(idx) = invoke0[java.lang.Integer](fd, "getNumber").intValue()
-            val (tid, _) = GpuFromProtobufSimple.sparkTypeToCudfId(sf.dataType)
-            typeIds(idx) = tid
+            typeIds(idx) = GpuFromProtobufSimple.sparkTypeToCudfId(sf.dataType)
             scales(idx) = encoding.get
           }
 
@@ -269,4 +268,4 @@ object ProtobufExprShims {
 
   private def invoke1[T](obj: AnyRef, method: String, arg0Cls: Class[_], arg0: AnyRef): T =
     obj.getClass.getMethod(method, arg0Cls).invoke(obj, arg0).asInstanceOf[T]
-}
+}
\ No newline at end of file

From 6d4eb166202ada65418598a1efddfdc581060f0b Mon Sep 17 00:00:00 2001
From: Haoyang Li <haoyangl@nvidia.com>
Date: Sun, 4 Jan 2026 15:43:57 +0800
Subject: [PATCH 5/9] copyrights

Signed-off-by: Haoyang Li <haoyangl@nvidia.com>
---
 integration_tests/pom.xml                                       | 2 +-
 integration_tests/run_pyspark_from_build.sh                     | 2 +-
 integration_tests/src/main/python/data_gen.py                   | 2 +-
 integration_tests/src/main/python/protobuf_test.py              | 2 +-
 integration_tests/src/main/python/spark_init_internal.py        | 2 +-
 pom.xml                                                         | 2 +-
 .../nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala  | 2 +-
 .../org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala     | 2 +-
 .../scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala | 2 +-
 .../com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala  | 2 +-
 10 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml
index 825083b7fbe..8180c1dbd49 100644
--- a/integration_tests/pom.xml
+++ b/integration_tests/pom.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!--
-  Copyright (c) 2020-2025, NVIDIA CORPORATION.
+  Copyright (c) 2020-2026, NVIDIA CORPORATION.
 
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index baf04d44282..a976d50a16c 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
+# Copyright (c) 2020-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/integration_tests/src/main/python/data_gen.py b/integration_tests/src/main/python/data_gen.py
index 9298bd189ec..1ca10736db3 100644
--- a/integration_tests/src/main/python/data_gen.py
+++ b/integration_tests/src/main/python/data_gen.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
+# Copyright (c) 2020-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/integration_tests/src/main/python/protobuf_test.py b/integration_tests/src/main/python/protobuf_test.py
index 4694d0aaa38..218f6db3dff 100644
--- a/integration_tests/src/main/python/protobuf_test.py
+++ b/integration_tests/src/main/python/protobuf_test.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# Copyright (c) 2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/integration_tests/src/main/python/spark_init_internal.py b/integration_tests/src/main/python/spark_init_internal.py
index e9fc3ca8413..765bcac036d 100644
--- a/integration_tests/src/main/python/spark_init_internal.py
+++ b/integration_tests/src/main/python/spark_init_internal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
+# Copyright (c) 2020-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/pom.xml b/pom.xml
index 8679b7ddf7e..92cc83b8b4d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!--
-  Copyright (c) 2020-2025, NVIDIA CORPORATION.
+  Copyright (c) 2020-2026, NVIDIA CORPORATION.
 
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala
index aade729f000..cabc8d2905d 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2025, NVIDIA CORPORATION.
+ * Copyright (c) 2026, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala
index c41269f418f..2d57ae11150 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2025, NVIDIA CORPORATION.
+ * Copyright (c) 2026, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
index 8aeab9b34b2..e53a2893653 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2025, NVIDIA CORPORATION.
+ * Copyright (c) 2026, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala
index cc406a156fd..56bfa229051 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2025, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2026, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.

From 044ea96792a0b4462d93ad73cb23b587a582c87a Mon Sep 17 00:00:00 2001
From: Haoyang Li <haoyangl@nvidia.com>
Date: Fri, 9 Jan 2026 14:10:31 +0800
Subject: [PATCH 6/9] column pruning

Signed-off-by: Haoyang Li <haoyangl@nvidia.com>
---
 .../src/main/python/protobuf_test.py          | 139 +++++++
 .../spark/sql/rapids/GpuFromProtobuf.scala    | 199 ++++++++++
 .../sql/rapids/GpuFromProtobufSimple.scala    |  94 -----
 .../rapids/shims/ProtobufExprShims.scala      | 353 ++++++++++++++----
 4 files changed, 625 insertions(+), 160 deletions(-)
 create mode 100644 sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobuf.scala
 delete mode 100644 sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala

diff --git a/integration_tests/src/main/python/protobuf_test.py b/integration_tests/src/main/python/protobuf_test.py
index 4694d0aaa38..adfb2a287f8 100644
--- a/integration_tests/src/main/python/protobuf_test.py
+++ b/integration_tests/src/main/python/protobuf_test.py
@@ -231,3 +231,142 @@ def run_on_spark(spark):
         return df.select(decoded.alias("decoded"))
 
     assert_gpu_and_cpu_are_equal_collect(run_on_spark)
+
+
+def _build_nested_descriptor_set_bytes(spark):
+    """
+    Build a FileDescriptorSet for a message with both simple fields and nested message:
+      package test;
+      syntax = "proto2";
+      message Nested {
+        optional int32 x = 1;
+      }
+      message WithNested {
+        optional int32  simple_int  = 1;
+        optional string simple_str  = 2;
+        optional Nested nested_msg  = 3;   // nested message - not supported by GPU
+        optional int64  simple_long = 4;
+      }
+    """
+    jvm = spark.sparkContext._jvm
+    D = jvm.com.google.protobuf.DescriptorProtos
+
+    fd = D.FileDescriptorProto.newBuilder() \
+        .setName("nested.proto") \
+        .setPackage("test")
+    try:
+        fd = fd.setSyntax("proto2")
+    except Exception:
+        pass
+
+    label_opt = D.FieldDescriptorProto.Label.LABEL_OPTIONAL
+
+    # Define Nested message
+    nested_msg = D.DescriptorProto.newBuilder().setName("Nested")
+    nested_msg.addField(
+        D.FieldDescriptorProto.newBuilder()
+            .setName("x")
+            .setNumber(1)
+            .setLabel(label_opt)
+            .setType(D.FieldDescriptorProto.Type.TYPE_INT32)
+            .build()
+    )
+    fd.addMessageType(nested_msg.build())
+
+    # Define WithNested message
+    with_nested_msg = D.DescriptorProto.newBuilder().setName("WithNested")
+    # simple_int
+    with_nested_msg.addField(
+        D.FieldDescriptorProto.newBuilder()
+            .setName("simple_int")
+            .setNumber(1)
+            .setLabel(label_opt)
+            .setType(D.FieldDescriptorProto.Type.TYPE_INT32)
+            .build()
+    )
+    # simple_str
+    with_nested_msg.addField(
+        D.FieldDescriptorProto.newBuilder()
+            .setName("simple_str")
+            .setNumber(2)
+            .setLabel(label_opt)
+            .setType(D.FieldDescriptorProto.Type.TYPE_STRING)
+            .build()
+    )
+    # nested_msg (nested message type)
+    with_nested_msg.addField(
+        D.FieldDescriptorProto.newBuilder()
+            .setName("nested_msg")
+            .setNumber(3)
+            .setLabel(label_opt)
+            .setType(D.FieldDescriptorProto.Type.TYPE_MESSAGE)
+            .setTypeName(".test.Nested")
+            .build()
+    )
+    # simple_long
+    with_nested_msg.addField(
+        D.FieldDescriptorProto.newBuilder()
+            .setName("simple_long")
+            .setNumber(4)
+            .setLabel(label_opt)
+            .setType(D.FieldDescriptorProto.Type.TYPE_INT64)
+            .build()
+    )
+    fd.addMessageType(with_nested_msg.build())
+
+    fds = D.FileDescriptorSet.newBuilder().addFile(fd.build()).build()
+    return bytes(fds.toByteArray())
+
+
+@pytest.mark.skipif(is_before_spark_340(), reason="from_protobuf is Spark 3.4.0+")
+@ignore_order(local=True)
+def test_from_protobuf_schema_projection_simple_fields_only(spark_tmp_path):
+    """
+    Test schema projection: when only simple fields are selected from a protobuf message
+    that also contains unsupported types (nested message), GPU should be able to decode
+    just the simple fields without falling back to CPU.
+    """
+    from_protobuf = _try_import_from_protobuf()
+    if from_protobuf is None:
+        pytest.skip("pyspark.sql.protobuf.functions.from_protobuf is not available")
+    if not with_cpu_session(_spark_protobuf_jvm_available):
+        pytest.skip("spark-protobuf JVM module is not available on the classpath")
+
+    desc_path = spark_tmp_path + "/nested.desc"
+    message_name = "test.WithNested"
+
+    desc_bytes = with_cpu_session(_build_nested_descriptor_set_bytes)
+    with_cpu_session(lambda spark: _write_bytes_to_hadoop_path(spark, desc_path, desc_bytes))
+
+    # Create test data: protobuf binary with simple fields set
+    # Field 1 (simple_int): varint 42 -> 0x08 0x2A
+    # Field 2 (simple_str): length-delimited "hello" -> 0x12 0x05 h e l l o
+    # Field 4 (simple_long): varint 12345 -> 0x20 0xB9 0x60
+    test_data = bytes([
+        0x08, 0x2A,  # simple_int = 42
+        0x12, 0x05, 0x68, 0x65, 0x6C, 0x6C, 0x6F,  # simple_str = "hello"
+        0x20, 0xB9, 0x60,  # simple_long = 12345
+    ])
+
+    def run_on_spark(spark):
+        df = spark.createDataFrame(
+            [(test_data,), (None,)],
+            schema="bin binary",
+        )
+        sig = inspect.signature(from_protobuf)
+        if "binaryDescriptorSet" in sig.parameters:
+            decoded = from_protobuf(
+                f.col("bin"),
+                message_name,
+                binaryDescriptorSet=bytearray(desc_bytes),
+            )
+        else:
+            decoded = from_protobuf(f.col("bin"), message_name, desc_path)
+        # Only select simple fields, not the nested_msg field
+        return df.select(
+            decoded.getField("simple_int").alias("simple_int"),
+            decoded.getField("simple_str").alias("simple_str"),
+            decoded.getField("simple_long").alias("simple_long")
+        )
+
+    assert_gpu_and_cpu_are_equal_collect(run_on_spark)
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobuf.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobuf.scala
new file mode 100644
index 00000000000..be3a28a11e1
--- /dev/null
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobuf.scala
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.rapids
+
+import ai.rapids.cudf
+import ai.rapids.cudf.{BinaryOp, CudfException, DType}
+import com.nvidia.spark.rapids.{GpuColumnVector, GpuUnaryExpression}
+import com.nvidia.spark.rapids.Arm.withResource
+import com.nvidia.spark.rapids.jni.Protobuf
+import com.nvidia.spark.rapids.shims.NullIntolerantShim
+
+import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression}
+import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.types._
+
+/**
+ * GPU implementation for Spark's `from_protobuf` decode path.
+ *
+ * This is designed to replace `org.apache.spark.sql.protobuf.ProtobufDataToCatalyst` when
+ * supported.
+ *
+ * @param fullSchema The complete output schema (must match the original expression's dataType)
+ * @param decodedFieldIndices Indices into fullSchema for fields that will be decoded by GPU.
+ *                            Fields not in this array will be null columns.
+ * @param fieldNumbers Protobuf field numbers for decoded fields (parallel to decodedFieldIndices)
+ * @param cudfTypeIds cuDF type IDs for decoded fields (parallel to decodedFieldIndices)
+ * @param cudfTypeScales Encodings for decoded fields (parallel to decodedFieldIndices)
+ * @param failOnErrors If true, throw exception on malformed data; if false, return null
+ */
+case class GpuFromProtobuf(
+    fullSchema: StructType,
+    decodedFieldIndices: Array[Int],
+    fieldNumbers: Array[Int],
+    cudfTypeIds: Array[Int],
+    cudfTypeScales: Array[Int],
+    failOnErrors: Boolean,
+    child: Expression)
+  extends GpuUnaryExpression with ExpectsInputTypes with NullIntolerantShim {
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType)
+
+  override def dataType: DataType = fullSchema.asNullable
+
+  override def nullable: Boolean = true
+
+  override protected def doColumnar(input: GpuColumnVector): cudf.ColumnVector = {
+    val numRows = input.getRowCount.toInt
+
+    // Decode only the requested fields from protobuf
+    val decoded = try {
+      Protobuf.decodeToStruct(
+        input.getBase,
+        fieldNumbers,
+        cudfTypeIds,
+        cudfTypeScales,
+        failOnErrors)
+    } catch {
+      case e: CudfException if failOnErrors =>
+        // Convert CudfException to Spark's standard protobuf error for consistent error handling.
+        // This allows user code to catch the same exception type regardless of CPU/GPU execution.
+        throw QueryExecutionErrors.malformedProtobufMessageDetectedInMessageParsingError(e)
+    }
+
+    // Build the full struct with all fields from fullSchema
+    // Decoded fields come from the GPU result, others are null columns
+    val result = withResource(decoded) { decodedStruct =>
+      val fullChildren = new Array[cudf.ColumnVector](fullSchema.fields.length)
+      var decodedIdx = 0
+
+      try {
+        for (i <- fullSchema.fields.indices) {
+          if (decodedIdx < decodedFieldIndices.length && decodedFieldIndices(decodedIdx) == i) {
+            // This field was decoded - extract from decoded struct
+            fullChildren(i) = decodedStruct.getChildColumnView(decodedIdx).copyToColumnVector()
+            decodedIdx += 1
+          } else {
+            // This field was not decoded - create null column
+            fullChildren(i) = GpuFromProtobuf.createNullColumn(
+                fullSchema.fields(i).dataType, numRows)
+          }
+        }
+        cudf.ColumnVector.makeStruct(numRows, fullChildren: _*)
+      } finally {
+        fullChildren.foreach(c => if (c != null) c.close())
+      }
+    }
+
+    // Apply input nulls to output
+    if (input.getBase.hasNulls) {
+      withResource(result) { _ =>
+        result.mergeAndSetValidity(BinaryOp.BITWISE_AND, input.getBase)
+      }
+    } else {
+      result
+    }
+  }
+}
+
+object GpuFromProtobuf {
+  // Encodings from com.nvidia.spark.rapids.jni.Protobuf
+  val ENC_DEFAULT = 0
+  val ENC_FIXED   = 1
+  val ENC_ZIGZAG  = 2
+
+  /**
+   * Maps a Spark DataType to the corresponding cuDF native type ID.
+   * Note: The encoding (varint/zigzag/fixed) is determined by the protobuf field type,
+   * not the Spark data type, so it must be set separately based on the protobuf schema.
+   */
+  def sparkTypeToCudfId(dt: DataType): Int = dt match {
+    case BooleanType => DType.BOOL8.getTypeId.getNativeId
+    case IntegerType => DType.INT32.getTypeId.getNativeId
+    case LongType => DType.INT64.getTypeId.getNativeId
+    case FloatType => DType.FLOAT32.getTypeId.getNativeId
+    case DoubleType => DType.FLOAT64.getTypeId.getNativeId
+    case StringType => DType.STRING.getTypeId.getNativeId
+    case BinaryType => DType.LIST.getTypeId.getNativeId
+    case other =>
+      throw new IllegalArgumentException(s"Unsupported Spark type for protobuf: $other")
+  }
+
+  /**
+   * Creates a null column of the specified Spark data type with the given number of rows.
+   * Used for fields that are not decoded (schema projection optimization).
+   */
+  def createNullColumn(dataType: DataType, numRows: Int): cudf.ColumnVector = {
+    val cudfType = dataType match {
+      case BooleanType => DType.BOOL8
+      case IntegerType => DType.INT32
+      case LongType => DType.INT64
+      case FloatType => DType.FLOAT32
+      case DoubleType => DType.FLOAT64
+      case StringType => DType.STRING
+      case BinaryType =>
+        // Binary is LIST<INT8> in cuDF
+        return withResource(cudf.Scalar.listFromNull(
+          new cudf.HostColumnVector.BasicType(false, DType.INT8))) { nullScalar =>
+          withResource(cudf.ColumnVector.fromScalar(nullScalar, numRows)) { col =>
+            col.incRefCount()
+          }
+        }
+      case st: StructType =>
+        // For nested struct, create struct with null children and set all rows to null
+        val nullChildren = st.fields.map(f => createNullColumn(f.dataType, numRows))
+        return withResource(new AutoCloseableArray(nullChildren)) { _ =>
+          withResource(cudf.ColumnVector.makeStruct(numRows, nullChildren: _*)) { struct =>
+            // Create a validity mask of all nulls
+            withResource(cudf.Scalar.fromBool(false)) { falseBool =>
+              withResource(cudf.ColumnVector.fromScalar(falseBool, numRows)) { allFalse =>
+                struct.mergeAndSetValidity(BinaryOp.BITWISE_AND, allFalse)
+              }
+            }
+          }
+        }
+      case ArrayType(elementType, _) =>
+        val elementDType = elementType match {
+          case BooleanType => DType.BOOL8
+          case IntegerType => DType.INT32
+          case LongType => DType.INT64
+          case FloatType => DType.FLOAT32
+          case DoubleType => DType.FLOAT64
+          case StringType => DType.STRING
+          case _ => DType.INT8 // fallback
+        }
+        return withResource(cudf.Scalar.listFromNull(
+          new cudf.HostColumnVector.BasicType(false, elementDType))) { nullScalar =>
+          withResource(cudf.ColumnVector.fromScalar(nullScalar, numRows)) { col =>
+            col.incRefCount()
+          }
+        }
+      case _ =>
+        // Fallback: use INT8 and hope for the best (shouldn't happen for supported types)
+        DType.INT8
+    }
+
+    withResource(cudf.Scalar.fromNull(cudfType)) { nullScalar =>
+      cudf.ColumnVector.fromScalar(nullScalar, numRows)
+    }
+  }
+
+  /** Helper class to auto-close an array of ColumnVectors */
+  private class AutoCloseableArray(cols: Array[cudf.ColumnVector]) extends AutoCloseable {
+    override def close(): Unit = cols.foreach(c => if (c != null) c.close())
+  }
+}
\ No newline at end of file
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala
deleted file mode 100644
index c41269f418f..00000000000
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobufSimple.scala
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2025, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.rapids
-
-import ai.rapids.cudf
-import ai.rapids.cudf.BinaryOp
-import ai.rapids.cudf.DType
-import com.nvidia.spark.rapids.{GpuColumnVector, GpuUnaryExpression}
-import com.nvidia.spark.rapids.Arm.withResource
-import com.nvidia.spark.rapids.jni.ProtobufSimple
-import com.nvidia.spark.rapids.shims.NullIntolerantShim
-
-import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression}
-import org.apache.spark.sql.types._
-
-/**
- * GPU implementation for Spark's `from_protobuf` decode path (simple types only).
- *
- * This is designed to replace `org.apache.spark.sql.protobuf.ProtobufDataToCatalyst` when
- * supported.
- */
-case class GpuFromProtobufSimple(
-    outputSchema: StructType,
-    fieldNumbers: Array[Int],
-    cudfTypeIds: Array[Int],
-    cudfTypeScales: Array[Int],
-    failOnErrors: Boolean,
-    child: Expression)
-  extends GpuUnaryExpression with ExpectsInputTypes with NullIntolerantShim {
-
-  override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType)
-
-  override def dataType: DataType = outputSchema.asNullable
-
-  override def nullable: Boolean = true
-
-  override protected def doColumnar(input: GpuColumnVector): cudf.ColumnVector = {
-    // Spark BinaryType is represented in cuDF as a LIST<UINT8/INT8>.
-    // ProtobufSimple returns a non-null STRUCT with nullable children. Spark's
-    // ProtobufDataToCatalyst is NullIntolerant, so if the input binary row is null the output
-    // struct row must be null as well.
-    val decoded = ProtobufSimple.decodeToStruct(
-      input.getBase,
-      fieldNumbers,
-      cudfTypeIds,
-      cudfTypeScales,
-      failOnErrors)
-    if (input.getBase.hasNulls) {
-      withResource(decoded) { _ =>
-        decoded.mergeAndSetValidity(BinaryOp.BITWISE_AND, input.getBase)
-      }
-    } else {
-      decoded
-    }
-  }
-}
-
-object GpuFromProtobufSimple {
-  // Encodings from com.nvidia.spark.rapids.jni.ProtobufSimple
-  val ENC_DEFAULT = 0
-  val ENC_FIXED   = 1
-  val ENC_ZIGZAG  = 2
-
-  /**
-   * Maps a Spark DataType to the corresponding cuDF native type ID.
-   * Note: The encoding (varint/zigzag/fixed) is determined by the protobuf field type,
-   * not the Spark data type, so it must be set separately based on the protobuf schema.
-   */
-  def sparkTypeToCudfId(dt: DataType): Int = dt match {
-    case BooleanType => DType.BOOL8.getTypeId.getNativeId
-    case IntegerType => DType.INT32.getTypeId.getNativeId
-    case LongType => DType.INT64.getTypeId.getNativeId
-    case FloatType => DType.FLOAT32.getTypeId.getNativeId
-    case DoubleType => DType.FLOAT64.getTypeId.getNativeId
-    case StringType => DType.STRING.getTypeId.getNativeId
-    case BinaryType => DType.LIST.getTypeId.getNativeId
-    case other =>
-      throw new IllegalArgumentException(s"Unsupported Spark type for protobuf(simple): $other")
-  }
-}
\ No newline at end of file
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
index 8aeab9b34b2..a1929de348a 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
@@ -36,14 +36,28 @@ package com.nvidia.spark.rapids.shims
 
 import java.nio.file.{Files, Path}
 
+import scala.collection.mutable
 import scala.util.Try
 
 import com.nvidia.spark.rapids._
 
-import org.apache.spark.sql.catalyst.expressions.{Expression, UnaryExpression}
-import org.apache.spark.sql.rapids.GpuFromProtobufSimple
+import org.apache.spark.sql.catalyst.expressions.{Expression, GetStructField, UnaryExpression}
+import org.apache.spark.sql.execution.ProjectExec
+import org.apache.spark.sql.rapids.GpuFromProtobuf
 import org.apache.spark.sql.types._
 
+/**
+ * Information about a protobuf field for schema projection support.
+ */
+private[shims] case class ProtobufFieldInfo(
+    fieldNumber: Int,
+    protoTypeName: String,
+    sparkType: DataType,
+    encoding: Int,
+    isSupported: Boolean,
+    unsupportedReason: Option[String]
+)
+
 /**
  * Spark 3.4+ optional integration for spark-protobuf expressions.
  *
@@ -68,28 +82,31 @@ object ProtobufExprShims {
 
   private def fromProtobufRule: ExprRule[_ <: Expression] = {
     GpuOverrides.expr[UnaryExpression](
-      "Decode a BinaryType column (protobuf) into a Spark SQL struct (simple types only)",
+      "Decode a BinaryType column (protobuf) into a Spark SQL struct",
       ExprChecks.unaryProject(
-        // Output is a struct; the rule does detailed checks in tagExprForGpu.
-        TypeSig.STRUCT.nested(
-          TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.STRING + TypeSig.BINARY),
+        // Use TypeSig.all here because schema projection determines which fields
+        // actually need GPU support. Detailed type checking is done in tagExprForGpu.
+        TypeSig.all,
         TypeSig.all,
         TypeSig.BINARY,
         TypeSig.BINARY),
       (e, conf, p, r) => new UnaryExprMeta[UnaryExpression](e, conf, p, r) {
 
-        private var schema: StructType = _
+        // Full schema from the expression (must match original dataType for compatibility)
+        private var fullSchema: StructType = _
+        // Indices into fullSchema for fields that will be decoded by GPU
+        private var decodedFieldIndices: Array[Int] = _
         private var fieldNumbers: Array[Int] = _
         private var cudfTypeIds: Array[Int] = _
         private var cudfTypeScales: Array[Int] = _
         private var failOnErrors: Boolean = _
 
         override def tagExprForGpu(): Unit = {
-          schema = e.dataType match {
+          fullSchema = e.dataType match {
             case st: StructType => st
             case other =>
               willNotWorkOnGpu(
-                s"Only StructType output is supported for from_protobuf(simple), got $other")
+                s"Only StructType output is supported for from_protobuf, got $other")
               return
           }
 
@@ -114,7 +131,7 @@ object ProtobufExprShims {
           }
           if (descFilePathOpt.isEmpty) {
             willNotWorkOnGpu(
-              "from_protobuf(simple) requires a descriptor set " +
+              "from_protobuf requires a descriptor set " +
                 "(descFilePath or binaryDescriptorSet)")
             return
           }
@@ -131,83 +148,287 @@ object ProtobufExprShims {
               return
           }
 
-          val fields = schema.fields
-          val fnums = new Array[Int](fields.length)
-          val typeIds = new Array[Int](fields.length)
-          val scales = new Array[Int](fields.length)
-
-          fields.zipWithIndex.foreach { case (sf, idx) =>
-            sf.dataType match {
-              case BooleanType | IntegerType | LongType | FloatType | DoubleType |
-                   StringType | BinaryType =>
-              case other =>
-                willNotWorkOnGpu(
-                  s"Unsupported field type for from_protobuf(simple): ${sf.name}: $other")
-                return
+          // Step 1: Analyze all fields and build field info map
+          val allFieldsInfo = analyzeAllFields(fullSchema, msgDesc, enumsAsInts, messageName)
+          if (allFieldsInfo.isEmpty) {
+            // Error was already reported in analyzeAllFields
+            return
+          }
+          val fieldsInfoMap = allFieldsInfo.get
+
+          // Step 2: Determine which fields are actually required by downstream operations
+          val requiredFieldNames = analyzeRequiredFields(fieldsInfoMap.keySet)
+
+          // Step 3: Check if all required fields are supported
+          val unsupportedRequired = requiredFieldNames.filter { name =>
+            fieldsInfoMap.get(name).exists(!_.isSupported)
+          }
+
+          if (unsupportedRequired.nonEmpty) {
+            val reasons = unsupportedRequired.map { name =>
+              val info = fieldsInfoMap(name)
+              s"${name}: ${info.unsupportedReason.getOrElse("unknown reason")}"
             }
+            willNotWorkOnGpu(
+              s"Required fields not supported for from_protobuf: ${reasons.mkString(", ")}")
+            return
+          }
+
+          // Step 4: Identify which fields in fullSchema need to be decoded
+          // These are fields that are required AND supported
+          val indicesToDecode = fullSchema.fields.zipWithIndex.collect {
+            case (sf, idx) if requiredFieldNames.contains(sf.name) => idx
+          }
+          decodedFieldIndices = indicesToDecode
+
+          // Step 5: Build arrays for the fields to decode (parallel to decodedFieldIndices)
+          val fnums = new Array[Int](indicesToDecode.length)
+          val typeIds = new Array[Int](indicesToDecode.length)
+          val scales = new Array[Int](indicesToDecode.length)
+
+          indicesToDecode.zipWithIndex.foreach { case (schemaIdx, arrIdx) =>
+            val sf = fullSchema.fields(schemaIdx)
+            val info = fieldsInfoMap(sf.name)
+            fnums(arrIdx) = info.fieldNumber
+            typeIds(arrIdx) = GpuFromProtobuf.sparkTypeToCudfId(sf.dataType)
+            scales(arrIdx) = info.encoding
+          }
+
+          fieldNumbers = fnums
+          cudfTypeIds = typeIds
+          cudfTypeScales = scales
+        }
 
+        /**
+         * Analyze all fields in the schema and build a map of field name to ProtobufFieldInfo.
+         * Returns None if there's an error that should abort processing.
+         */
+        private def analyzeAllFields(
+            schema: StructType,
+            msgDesc: AnyRef,
+            enumsAsInts: Boolean,
+            messageName: String): Option[Map[String, ProtobufFieldInfo]] = {
+          val result = mutable.Map[String, ProtobufFieldInfo]()
+
+          for (sf <- schema.fields) {
             val fd = invoke1[AnyRef](msgDesc, "findFieldByName", classOf[String], sf.name)
             if (fd == null) {
-              willNotWorkOnGpu(s"Protobuf field '${sf.name}' not found in message '$messageName'")
-              return
+              willNotWorkOnGpu(
+                s"Protobuf field '${sf.name}' not found in message '$messageName'")
+              return None
             }
 
             val isRepeated = Try {
               invoke0[java.lang.Boolean](fd, "isRepeated").booleanValue()
             }.getOrElse(false)
-            if (isRepeated) {
-              willNotWorkOnGpu(
-                s"Repeated fields are not supported for from_protobuf(simple): ${sf.name}")
-              return
-            }
 
             val protoType = invoke0[AnyRef](fd, "getType")
             val protoTypeName = typeName(protoType)
+            val fieldNumber = invoke0[java.lang.Integer](fd, "getNumber").intValue()
+
+            // Check field support and determine encoding
+            val (isSupported, unsupportedReason, encoding) =
+              checkFieldSupport(sf.dataType, protoTypeName, isRepeated, enumsAsInts)
+
+            result(sf.name) = ProtobufFieldInfo(
+              fieldNumber = fieldNumber,
+              protoTypeName = protoTypeName,
+              sparkType = sf.dataType,
+              encoding = encoding,
+              isSupported = isSupported,
+              unsupportedReason = unsupportedReason
+            )
+          }
 
-            val encoding = (sf.dataType, protoTypeName) match {
-              case (BooleanType, "BOOL") => Some(GpuFromProtobufSimple.ENC_DEFAULT)
-              case (IntegerType, "INT32" | "UINT32") => Some(GpuFromProtobufSimple.ENC_DEFAULT)
-              case (IntegerType, "SINT32") => Some(GpuFromProtobufSimple.ENC_ZIGZAG)
-              case (IntegerType, "FIXED32" | "SFIXED32") => Some(GpuFromProtobufSimple.ENC_FIXED)
-              case (LongType, "INT64" | "UINT64") => Some(GpuFromProtobufSimple.ENC_DEFAULT)
-              case (LongType, "SINT64") => Some(GpuFromProtobufSimple.ENC_ZIGZAG)
-              case (LongType, "FIXED64" | "SFIXED64") => Some(GpuFromProtobufSimple.ENC_FIXED)
-              // Spark may upcast smaller integers to LongType
-              case (LongType, "INT32" | "UINT32" | "SINT32" | "FIXED32" | "SFIXED32") =>
-                val enc = protoTypeName match {
-                  case "SINT32" => GpuFromProtobufSimple.ENC_ZIGZAG
-                  case "FIXED32" | "SFIXED32" => GpuFromProtobufSimple.ENC_FIXED
-                  case _ => GpuFromProtobufSimple.ENC_DEFAULT
-                }
-                Some(enc)
-              case (FloatType, "FLOAT") => Some(GpuFromProtobufSimple.ENC_DEFAULT)
-              case (DoubleType, "DOUBLE") => Some(GpuFromProtobufSimple.ENC_DEFAULT)
-              case (StringType, "STRING") => Some(GpuFromProtobufSimple.ENC_DEFAULT)
-              case (BinaryType, "BYTES") => Some(GpuFromProtobufSimple.ENC_DEFAULT)
-              case (IntegerType, "ENUM") if enumsAsInts => Some(GpuFromProtobufSimple.ENC_DEFAULT)
+          Some(result.toMap)
+        }
+
+        /**
+         * Check if a field type is supported and return encoding information.
+         * @return (isSupported, unsupportedReason, encoding)
+         */
+        private def checkFieldSupport(
+            sparkType: DataType,
+            protoTypeName: String,
+            isRepeated: Boolean,
+            enumsAsInts: Boolean): (Boolean, Option[String], Int) = {
+
+          if (isRepeated) {
+            return (false, Some("repeated fields are not supported"), GpuFromProtobuf.ENC_DEFAULT)
+          }
+
+          // Check Spark type is one of the supported simple types
+          sparkType match {
+            case BooleanType | IntegerType | LongType | FloatType | DoubleType |
+                 StringType | BinaryType =>
+              // Supported Spark type, continue to check encoding
+            case other =>
+              return (false, Some(s"unsupported Spark type: $other"), GpuFromProtobuf.ENC_DEFAULT)
+          }
+
+          // Determine encoding based on Spark type and proto type combination
+          val encoding = (sparkType, protoTypeName) match {
+            case (BooleanType, "BOOL") => Some(GpuFromProtobuf.ENC_DEFAULT)
+            case (IntegerType, "INT32" | "UINT32") => Some(GpuFromProtobuf.ENC_DEFAULT)
+            case (IntegerType, "SINT32") => Some(GpuFromProtobuf.ENC_ZIGZAG)
+            case (IntegerType, "FIXED32" | "SFIXED32") => Some(GpuFromProtobuf.ENC_FIXED)
+            case (LongType, "INT64" | "UINT64") => Some(GpuFromProtobuf.ENC_DEFAULT)
+            case (LongType, "SINT64") => Some(GpuFromProtobuf.ENC_ZIGZAG)
+            case (LongType, "FIXED64" | "SFIXED64") => Some(GpuFromProtobuf.ENC_FIXED)
+            // Spark may upcast smaller integers to LongType
+            case (LongType, "INT32" | "UINT32" | "SINT32" | "FIXED32" | "SFIXED32") =>
+              val enc = protoTypeName match {
+                case "SINT32" => GpuFromProtobuf.ENC_ZIGZAG
+                case "FIXED32" | "SFIXED32" => GpuFromProtobuf.ENC_FIXED
+                case _ => GpuFromProtobuf.ENC_DEFAULT
+              }
+              Some(enc)
+            case (FloatType, "FLOAT") => Some(GpuFromProtobuf.ENC_DEFAULT)
+            case (DoubleType, "DOUBLE") => Some(GpuFromProtobuf.ENC_DEFAULT)
+            case (StringType, "STRING") => Some(GpuFromProtobuf.ENC_DEFAULT)
+            case (BinaryType, "BYTES") => Some(GpuFromProtobuf.ENC_DEFAULT)
+            case (IntegerType, "ENUM") if enumsAsInts => Some(GpuFromProtobuf.ENC_DEFAULT)
+            case _ => None
+          }
+
+          encoding match {
+            case Some(enc) => (true, None, enc)
+            case None =>
+              (false,
+                Some(s"type mismatch: Spark $sparkType vs Protobuf $protoTypeName"),
+                GpuFromProtobuf.ENC_DEFAULT)
+          }
+        }
+
+        /**
+         * Analyze which fields are actually required by downstream operations.
+         * Currently supports analyzing parent Project expressions.
+         *
+         * @param allFieldNames All field names in the full schema
+         * @return Set of field names that are actually required
+         */
+        private def analyzeRequiredFields(allFieldNames: Set[String]): Set[String] = {
+          // Try to find parent SparkPlanMeta and analyze downstream Project
+          val parentPlanOpt = findParentPlanMeta()
+
+          parentPlanOpt match {
+            case Some(planMeta) =>
+              analyzeDownstreamProject(planMeta) match {
+                case Some(fields) if fields.nonEmpty =>
+                  // Successfully identified required fields via schema projection
+                  fields
+                case _ =>
+                  // Could not identify specific fields from the plan, assume all are needed
+                  allFieldNames
+              }
+            case None =>
+              // No parent SparkPlanMeta found in the meta tree, assume all fields are needed
+              allFieldNames
+          }
+        }
+
+        /**
+         * Find the parent SparkPlanMeta by traversing up the parent chain.
+         */
+        private def findParentPlanMeta(): Option[SparkPlanMeta[_]] = {
+          def traverse(meta: Option[RapidsMeta[_, _, _]]): Option[SparkPlanMeta[_]] = {
+            meta match {
+              case Some(p: SparkPlanMeta[_]) => Some(p)
+              case Some(p: RapidsMeta[_, _, _]) => traverse(p.parent)
               case _ => None
             }
+          }
+          traverse(parent)
+        }
 
-            if (encoding.isEmpty) {
-              willNotWorkOnGpu(
-                s"Field type mismatch for '${sf.name}': Spark ${sf.dataType} vs " +
-                  s"Protobuf $protoTypeName")
-              return
-            }
+        /**
+         * Analyze a Project plan to find which struct fields are actually used.
+         * This looks for GetStructField expressions that reference our protobuf output.
+         */
+        private def analyzeDownstreamProject(planMeta: SparkPlanMeta[_]): Option[Set[String]] = {
+          planMeta.wrapped match {
+            case p: ProjectExec =>
+              // Collect all GetStructField references from the project list
+              val fieldRefs = mutable.Set[String]()
+              var hasDirectStructRef = false
+
+              p.projectList.foreach { expr =>
+                collectStructFieldReferences(expr, fieldRefs, hasDirectStructRefHolder = () => {
+                  hasDirectStructRef = true
+                })
+              }
+
+              if (hasDirectStructRef) {
+                // If the entire struct is referenced directly (not via GetStructField),
+                // we need all fields
+                None
+              } else if (fieldRefs.nonEmpty) {
+                Some(fieldRefs.toSet)
+              } else {
+                // No GetStructField found - this shouldn't happen for valid plans
+                // where from_protobuf is followed by field access
+                None
+              }
+            case _ =>
+              // Not a ProjectExec, cannot analyze schema projection
+              None
+          }
+        }
 
-            fnums(idx) = invoke0[java.lang.Integer](fd, "getNumber").intValue()
-            typeIds(idx) = GpuFromProtobufSimple.sparkTypeToCudfId(sf.dataType)
-            scales(idx) = encoding.get
+        /**
+         * Recursively collect field names from GetStructField expressions.
+         * Also tracks if the struct is used directly without field extraction.
+         */
+        private def collectStructFieldReferences(
+            expr: Expression,
+            fieldRefs: mutable.Set[String],
+            hasDirectStructRefHolder: () => Unit): Unit = {
+          expr match {
+            case GetStructField(child, ordinal, nameOpt) =>
+              // Check if this GetStructField extracts from our protobuf struct
+              if (isProtobufStructReference(child)) {
+                // Get field name from the schema using ordinal
+                val fieldName = nameOpt.getOrElse {
+                  if (ordinal < fullSchema.fields.length) {
+                    fullSchema.fields(ordinal).name
+                  } else {
+                    s"_$ordinal"
+                  }
+                }
+                fieldRefs += fieldName
+                // Don't recurse into child - we've handled this protobuf reference
+              } else {
+                // Child is not a protobuf struct, recurse to check for nested access
+                collectStructFieldReferences(child, fieldRefs, hasDirectStructRefHolder)
+              }
+
+            case _ =>
+              // Check if this expression directly references our protobuf struct
+              // without extracting a field (e.g., passing the whole struct to a function)
+              if (isProtobufStructReference(expr)) {
+                hasDirectStructRefHolder()
+              }
+              // Recursively check children
+              expr.children.foreach { child =>
+                collectStructFieldReferences(child, fieldRefs, hasDirectStructRefHolder)
+              }
           }
+        }
 
-          fieldNumbers = fnums
-          cudfTypeIds = typeIds
-          cudfTypeScales = scales
+        /**
+         * Check if an expression references the output of a protobuf decode expression.
+         * We check the expression type by class name since the class is in an optional module.
+         */
+        private def isProtobufStructReference(expr: Expression): Boolean = {
+          // Check if expr is a ProtobufDataToCatalyst expression
+          // We use class name check because the class is in an optional external module
+          expr.getClass.getName.contains("ProtobufDataToCatalyst")
         }
 
         override def convertToGpu(child: Expression): GpuExpression = {
-          GpuFromProtobufSimple(
-            schema, fieldNumbers, cudfTypeIds, cudfTypeScales, failOnErrors, child)
+          GpuFromProtobuf(
+            fullSchema, decodedFieldIndices, fieldNumbers, cudfTypeIds, cudfTypeScales,
+            failOnErrors, child)
         }
       }
     )
@@ -268,4 +489,4 @@ object ProtobufExprShims {
 
   private def invoke1[T](obj: AnyRef, method: String, arg0Cls: Class[_], arg0: AnyRef): T =
     obj.getClass.getMethod(method, arg0Cls).invoke(obj, arg0).asInstanceOf[T]
-}
\ No newline at end of file
+}

From a77d90eae6cbfb9e4594ecc832f91d3ef5633438 Mon Sep 17 00:00:00 2001
From: Haoyang Li <haoyangl@nvidia.com>
Date: Fri, 9 Jan 2026 16:26:18 +0800
Subject: [PATCH 7/9] fix

Signed-off-by: Haoyang Li <haoyangl@nvidia.com>
---
 .../rapids/shims/ProtobufExprShims.scala      | 42 ++++++++++++++++---
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
index 92915902631..35261ec4c33 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
@@ -41,7 +41,7 @@ import scala.util.Try
 
 import com.nvidia.spark.rapids._
 
-import org.apache.spark.sql.catalyst.expressions.{Expression, GetStructField, UnaryExpression}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, GetStructField, UnaryExpression}
 import org.apache.spark.sql.execution.ProjectExec
 import org.apache.spark.sql.rapids.GpuFromProtobuf
 import org.apache.spark.sql.types._
@@ -313,13 +313,22 @@ object ProtobufExprShims {
 
           parentPlanOpt match {
             case Some(planMeta) =>
+              // First, try to analyze the immediate parent
               analyzeDownstreamProject(planMeta) match {
                 case Some(fields) if fields.nonEmpty =>
                   // Successfully identified required fields via schema projection
                   fields
                 case _ =>
-                  // Could not identify specific fields from the plan, assume all are needed
-                  allFieldNames
+                  // The immediate parent might be a ProjectExec that just aliases the output.
+                  // Try to look at its parent (the grandparent) for GetStructField references.
+                  planMeta.parent match {
+                    case Some(grandParentMeta: SparkPlanMeta[_]) =>
+                      analyzeDownstreamProject(grandParentMeta) match {
+                        case Some(fields) if fields.nonEmpty => fields
+                        case _ => allFieldNames
+                      }
+                    case _ => allFieldNames
+                  }
               }
             case None =>
               // No parent SparkPlanMeta found in the meta tree, assume all fields are needed
@@ -417,12 +426,33 @@ object ProtobufExprShims {
 
         /**
          * Check if an expression references the output of a protobuf decode expression.
-         * We check the expression type by class name since the class is in an optional module.
+         * This can be either:
+         * 1. The ProtobufDataToCatalyst expression itself
+         * 2. An AttributeReference that references the output of ProtobufDataToCatalyst
+         *    (when accessing from a downstream ProjectExec)
          */
         private def isProtobufStructReference(expr: Expression): Boolean = {
           // Check if expr is a ProtobufDataToCatalyst expression
-          // We use class name check because the class is in an optional external module
-          expr.getClass.getName.contains("ProtobufDataToCatalyst")
+          if (expr.getClass.getName.contains("ProtobufDataToCatalyst")) {
+            return true
+          }
+          
+          // Check if expr is an AttributeReference with the same schema as our protobuf output
+          // This handles the case where GetStructField references a column from a parent Project
+          expr match {
+            case attr: AttributeReference =>
+              // Check if the data type matches our full schema (struct type from protobuf)
+              attr.dataType match {
+                case st: StructType => 
+                  // Compare field names and types - StructType equality can be tricky
+                  st.fields.length == fullSchema.fields.length &&
+                    st.fields.zip(fullSchema.fields).forall { case (a, b) =>
+                      a.name == b.name && a.dataType == b.dataType
+                    }
+                case _ => false
+              }
+            case _ => false
+          }
         }
 
         override def convertToGpu(child: Expression): GpuExpression = {

From 2a66f9a7d91e361b90cbbf4fa88239085d7d4548 Mon Sep 17 00:00:00 2001
From: Haoyang Li <haoyangl@nvidia.com>
Date: Tue, 13 Jan 2026 11:23:20 +0800
Subject: [PATCH 8/9] address comments

Signed-off-by: Haoyang Li <haoyangl@nvidia.com>
---
 integration_tests/pom.xml                         |  1 -
 integration_tests/src/main/python/data_gen.py     | 15 +++++++++++++++
 .../apache/spark/sql/rapids/GpuFromProtobuf.scala |  7 ++++++-
 .../spark/rapids/shims/ProtobufExprShims.scala    | 10 +++++++++-
 4 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml
index 8180c1dbd49..f178e84ffba 100644
--- a/integration_tests/pom.xml
+++ b/integration_tests/pom.xml
@@ -235,5 +235,4 @@
             </resource>
         </resources>
     </build>
-
 </project>
diff --git a/integration_tests/src/main/python/data_gen.py b/integration_tests/src/main/python/data_gen.py
index 1ca10736db3..23397364c7b 100644
--- a/integration_tests/src/main/python/data_gen.py
+++ b/integration_tests/src/main/python/data_gen.py
@@ -951,6 +951,21 @@ def _cache_repr(self):
         kids = ",".join(["{}:{}#{}".format(n, str(g.data_type), num) for (n, num, g) in self._fields])
         return super()._cache_repr() + "(" + kids + "," + self._binary_col_name + ")"
 
+    def __eq__(self, other):
+        if not isinstance(other, ProtobufSimpleMessageRowGen):
+            return False
+        if len(self._fields) != len(other._fields):
+            return False
+        for (n1, num1, g1), (n2, num2, g2) in zip(self._fields, other._fields):
+            if n1 != n2 or num1 != num2 or g1.data_type != g2.data_type:
+                return False
+        return (self._binary_col_name == other._binary_col_name and
+                self.nullable == other.nullable)
+
+    def __hash__(self):
+        field_tuple = tuple((n, num, str(g.data_type)) for (n, num, g) in self._fields)
+        return hash((field_tuple, self._binary_col_name, self.nullable))
+
     def start(self, rand):
         for (_name, _num, gen) in self._fields:
             gen.start(rand)
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobuf.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobuf.scala
index be3a28a11e1..52e10feef7b 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobuf.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobuf.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2025, NVIDIA CORPORATION.
+ * Copyright (c) 2025-2026, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -93,8 +93,13 @@ case class GpuFromProtobuf(
                 fullSchema.fields(i).dataType, numRows)
           }
         }
+        // cuDF's makeStruct increments the reference count of child columns, so the struct
+        // owns its own references. We must close our original references in the finally block
+        // regardless of whether makeStruct succeeds or fails.
         cudf.ColumnVector.makeStruct(numRows, fullChildren: _*)
       } finally {
+        // Safe to close: if loop failed mid-way, only non-null entries are closed.
+        // If makeStruct succeeded, struct has its own refs; if it failed, we clean up.
         fullChildren.foreach(c => if (c != null) c.close())
       }
     }
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
index 35261ec4c33..9d79de00ccf 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
@@ -444,7 +444,11 @@ object ProtobufExprShims {
               // Check if the data type matches our full schema (struct type from protobuf)
               attr.dataType match {
                 case st: StructType => 
-                  // Compare field names and types - StructType equality can be tricky
+                  // Compare field names and types only. We intentionally do not compare
+                  // nullable flags because schema transformations (like projections or
+                  // certain optimizations) may change nullability while the underlying
+                  // schema structure remains the same. For schema projection detection,
+                  // matching names and types is sufficient to identify protobuf output.
                   st.fields.length == fullSchema.fields.length &&
                     st.fields.zip(fullSchema.fields).forall { case (a, b) =>
                       a.name == b.name && a.dataType == b.dataType
@@ -486,6 +490,10 @@ object ProtobufExprShims {
   private def writeTempDescFile(descBytes: Array[Byte]): String = {
     val tmp: Path = Files.createTempFile("spark-rapids-protobuf-desc-", ".desc")
     Files.write(tmp, descBytes)
+    // deleteOnExit() is not guaranteed to run on abnormal JVM termination, but these
+    // descriptor files are small (typically < 10KB) and only created when using
+    // binaryDescriptorSet (Spark 4.0+). The risk of temporary file accumulation is
+    // acceptable for this use case.
     tmp.toFile.deleteOnExit()
     tmp.toString
   }

From f175207b2aa2f1161686b2dba5ab423b97d20608 Mon Sep 17 00:00:00 2001
From: Haoyang Li <haoyangl@nvidia.com>
Date: Tue, 20 Jan 2026 19:33:09 +0800
Subject: [PATCH 9/9] Merge

---
 integration_tests/run_pyspark_from_build.sh   |   3 +
 .../spark/sql/rapids/GpuFromProtobuf.scala    | 216 ++++++++++--------
 .../rapids/shims/ProtobufExprShims.scala      |  20 +-
 3 files changed, 140 insertions(+), 99 deletions(-)

diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index a976d50a16c..772bb1b0852 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -47,6 +47,9 @@
 #   To run all tests, including Avro tests:
 #     INCLUDE_SPARK_AVRO_JAR=true ./run_pyspark_from_build.sh
 #
+#   To run tests WITHOUT Protobuf tests (protobuf is included by default):
+#     INCLUDE_SPARK_PROTOBUF_JAR=false ./run_pyspark_from_build.sh
+#
 #   To run a specific test:
 #     TEST=my_test ./run_pyspark_from_build.sh
 #
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobuf.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobuf.scala
index 52e10feef7b..0437a85a5a4 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobuf.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobuf.scala
@@ -24,7 +24,6 @@ import com.nvidia.spark.rapids.jni.Protobuf
 import com.nvidia.spark.rapids.shims.NullIntolerantShim
 
 import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression}
-import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types._
 
 /**
@@ -33,11 +32,18 @@ import org.apache.spark.sql.types._
  * This is designed to replace `org.apache.spark.sql.protobuf.ProtobufDataToCatalyst` when
  * supported.
  *
+ * The implementation uses a two-pass approach in the CUDA kernel:
+ * - Pass 1: Scan all messages once, recording (offset, length) for each requested field
+ * - Pass 2: Extract data in parallel using the recorded locations
+ *
+ * This is significantly faster than per-field parsing when decoding multiple fields,
+ * as each message is only parsed once regardless of the number of fields.
+ *
  * @param fullSchema The complete output schema (must match the original expression's dataType)
  * @param decodedFieldIndices Indices into fullSchema for fields that will be decoded by GPU.
  *                            Fields not in this array will be null columns.
  * @param fieldNumbers Protobuf field numbers for decoded fields (parallel to decodedFieldIndices)
- * @param cudfTypeIds cuDF type IDs for decoded fields (parallel to decodedFieldIndices)
+ * @param cudfTypeIds cuDF type IDs for ALL fields in fullSchema
  * @param cudfTypeScales Encodings for decoded fields (parallel to decodedFieldIndices)
  * @param failOnErrors If true, throw exception on malformed data; if false, return null
  */
@@ -57,50 +63,58 @@ case class GpuFromProtobuf(
 
   override def nullable: Boolean = true
 
+  // Lazy computation of unsupported field indices (complex types like StructType)
+  @transient
+  private lazy val unsupportedFieldIndices: Set[Int] = {
+    fullSchema.fields.zipWithIndex.collect {
+      case (sf, idx) if !GpuFromProtobuf.isTypeSupported(sf.dataType) => idx
+    }.toSet
+  }
+
   override protected def doColumnar(input: GpuColumnVector): cudf.ColumnVector = {
     val numRows = input.getRowCount.toInt
 
-    // Decode only the requested fields from protobuf
-    val decoded = try {
+    // Call the optimized JNI API that:
+    // 1. Uses fused kernel to scan all fields in one pass
+    // 2. Creates LIST<INT8> directly for bytes fields (no intermediate strings column)
+    // 3. Returns struct with decoded fields + null columns for supported types
+    val jniResult = try {
       Protobuf.decodeToStruct(
         input.getBase,
-        fieldNumbers,
-        cudfTypeIds,
-        cudfTypeScales,
+        fullSchema.fields.length,  // total number of fields in output
+        decodedFieldIndices,       // which fields to decode
+        fieldNumbers,              // protobuf field numbers
+        cudfTypeIds,               // types for ALL fields (INT8 placeholder for unsupported)
+        cudfTypeScales,            // encodings for decoded fields
         failOnErrors)
     } catch {
       case e: CudfException if failOnErrors =>
-        // Convert CudfException to Spark's standard protobuf error for consistent error handling.
-        // This allows user code to catch the same exception type regardless of CPU/GPU execution.
-        throw QueryExecutionErrors.malformedProtobufMessageDetectedInMessageParsingError(e)
+        // Re-throw as a SparkException for consistent error handling
+        throw new org.apache.spark.SparkException("Malformed protobuf message", e)
     }
 
-    // Build the full struct with all fields from fullSchema
-    // Decoded fields come from the GPU result, others are null columns
-    val result = withResource(decoded) { decodedStruct =>
-      val fullChildren = new Array[cudf.ColumnVector](fullSchema.fields.length)
-      var decodedIdx = 0
-
-      try {
-        for (i <- fullSchema.fields.indices) {
-          if (decodedIdx < decodedFieldIndices.length && decodedFieldIndices(decodedIdx) == i) {
-            // This field was decoded - extract from decoded struct
-            fullChildren(i) = decodedStruct.getChildColumnView(decodedIdx).copyToColumnVector()
-            decodedIdx += 1
-          } else {
-            // This field was not decoded - create null column
-            fullChildren(i) = GpuFromProtobuf.createNullColumn(
-                fullSchema.fields(i).dataType, numRows)
+    // If there are fields with unsupported types, we need to replace placeholder columns
+    // with properly typed null columns
+    val result = if (unsupportedFieldIndices.isEmpty) {
+      jniResult
+    } else {
+      withResource(jniResult) { struct =>
+        // Build children array, replacing placeholders with properly typed null columns
+        val children = new Array[cudf.ColumnVector](fullSchema.fields.length)
+        try {
+          for (i <- fullSchema.fields.indices) {
+            if (unsupportedFieldIndices.contains(i)) {
+              // Create properly typed null column for unsupported types
+              children(i) = GpuFromProtobuf.createNullColumn(fullSchema.fields(i).dataType, numRows)
+            } else {
+              // Copy the column from JNI result (incRefCount to share ownership)
+              children(i) = struct.getChildColumnView(i).copyToColumnVector()
+            }
           }
+          cudf.ColumnVector.makeStruct(numRows, children: _*)
+        } finally {
+          children.foreach(c => if (c != null) c.close())
         }
-        // cuDF's makeStruct increments the reference count of child columns, so the struct
-        // owns its own references. We must close our original references in the finally block
-        // regardless of whether makeStruct succeeds or fails.
-        cudf.ColumnVector.makeStruct(numRows, fullChildren: _*)
-      } finally {
-        // Safe to close: if loop failed mid-way, only non-null entries are closed.
-        // If makeStruct succeeded, struct has its own refs; if it failed, we clean up.
-        fullChildren.foreach(c => if (c != null) c.close())
       }
     }
 
@@ -125,80 +139,94 @@ object GpuFromProtobuf {
    * Maps a Spark DataType to the corresponding cuDF native type ID.
    * Note: The encoding (varint/zigzag/fixed) is determined by the protobuf field type,
    * not the Spark data type, so it must be set separately based on the protobuf schema.
+   *
+   * @return Some(typeId) for supported types, None for unsupported types
    */
-  def sparkTypeToCudfId(dt: DataType): Int = dt match {
-    case BooleanType => DType.BOOL8.getTypeId.getNativeId
-    case IntegerType => DType.INT32.getTypeId.getNativeId
-    case LongType => DType.INT64.getTypeId.getNativeId
-    case FloatType => DType.FLOAT32.getTypeId.getNativeId
-    case DoubleType => DType.FLOAT64.getTypeId.getNativeId
-    case StringType => DType.STRING.getTypeId.getNativeId
-    case BinaryType => DType.LIST.getTypeId.getNativeId
-    case other =>
-      throw new IllegalArgumentException(s"Unsupported Spark type for protobuf: $other")
+  def sparkTypeToCudfIdOpt(dt: DataType): Option[Int] = dt match {
+    case BooleanType => Some(DType.BOOL8.getTypeId.getNativeId)
+    case IntegerType => Some(DType.INT32.getTypeId.getNativeId)
+    case LongType => Some(DType.INT64.getTypeId.getNativeId)
+    case FloatType => Some(DType.FLOAT32.getTypeId.getNativeId)
+    case DoubleType => Some(DType.FLOAT64.getTypeId.getNativeId)
+    case StringType => Some(DType.STRING.getTypeId.getNativeId)
+    case BinaryType => Some(DType.LIST.getTypeId.getNativeId)
+    case _ => None
   }
 
   /**
-   * Creates a null column of the specified Spark data type with the given number of rows.
-   * Used for fields that are not decoded (schema projection optimization).
+   * Check if a Spark DataType is supported by the GPU protobuf decoder.
+   */
+  def isTypeSupported(dt: DataType): Boolean = sparkTypeToCudfIdOpt(dt).isDefined
+
+  /**
+   * Create an all-null column of the specified Spark DataType.
+   * This is used for fields with unsupported types (nested structs, arrays, etc.)
+   * that are not decoded but need to be present in the output struct.
    */
-  def createNullColumn(dataType: DataType, numRows: Int): cudf.ColumnVector = {
-    val cudfType = dataType match {
-      case BooleanType => DType.BOOL8
-      case IntegerType => DType.INT32
-      case LongType => DType.INT64
-      case FloatType => DType.FLOAT32
-      case DoubleType => DType.FLOAT64
-      case StringType => DType.STRING
+  def createNullColumn(dt: DataType, numRows: Int): cudf.ColumnVector = {
+    // Helper to create null arrays for boxed types
+    def nullBools = Array.fill[java.lang.Boolean](numRows)(null)
+    def nullInts = Array.fill[java.lang.Integer](numRows)(null)
+    def nullLongs = Array.fill[java.lang.Long](numRows)(null)
+    def nullFloats = Array.fill[java.lang.Float](numRows)(null)
+    def nullDoubles = Array.fill[java.lang.Double](numRows)(null)
+
+    dt match {
+      case BooleanType => cudf.ColumnVector.fromBoxedBooleans(nullBools: _*)
+      case IntegerType => cudf.ColumnVector.fromBoxedInts(nullInts: _*)
+      case LongType => cudf.ColumnVector.fromBoxedLongs(nullLongs: _*)
+      case FloatType => cudf.ColumnVector.fromBoxedFloats(nullFloats: _*)
+      case DoubleType => cudf.ColumnVector.fromBoxedDoubles(nullDoubles: _*)
+      case StringType => cudf.ColumnVector.fromStrings(Array.fill[String](numRows)(null): _*)
       case BinaryType =>
-        // Binary is LIST<INT8> in cuDF
-        return withResource(cudf.Scalar.listFromNull(
-          new cudf.HostColumnVector.BasicType(false, DType.INT8))) { nullScalar =>
-          withResource(cudf.ColumnVector.fromScalar(nullScalar, numRows)) { col =>
-            col.incRefCount()
-          }
+        // Binary is LIST<INT8> - create all-null list column using Scalar API
+        val elementType = new cudf.HostColumnVector.BasicType(true, DType.INT8)
+        withResource(cudf.Scalar.listFromNull(elementType)) { nullScalar =>
+          cudf.ColumnVector.fromScalar(nullScalar, numRows)
         }
       case st: StructType =>
-        // For nested struct, create struct with null children and set all rows to null
-        val nullChildren = st.fields.map(f => createNullColumn(f.dataType, numRows))
-        return withResource(new AutoCloseableArray(nullChildren)) { _ =>
-          withResource(cudf.ColumnVector.makeStruct(numRows, nullChildren: _*)) { struct =>
-            // Create a validity mask of all nulls
-            withResource(cudf.Scalar.fromBool(false)) { falseBool =>
-              withResource(cudf.ColumnVector.fromScalar(falseBool, numRows)) { allFalse =>
-                struct.mergeAndSetValidity(BinaryOp.BITWISE_AND, allFalse)
-              }
+        // Recursively create null columns for struct fields
+        val children = st.fields.map(f => createNullColumn(f.dataType, numRows))
+        try {
+          withResource(cudf.ColumnVector.makeStruct(numRows, children: _*)) { structCol =>
+            // Set all rows to null - mergeAndSetValidity returns a NEW column
+            withResource(cudf.ColumnVector.fromBoxedBooleans(nullBools: _*)) { nullMask =>
+              structCol.mergeAndSetValidity(BinaryOp.BITWISE_AND, nullMask)
             }
           }
+        } finally {
+          children.foreach(_.close())
         }
       case ArrayType(elementType, _) =>
-        val elementDType = elementType match {
-          case BooleanType => DType.BOOL8
-          case IntegerType => DType.INT32
-          case LongType => DType.INT64
-          case FloatType => DType.FLOAT32
-          case DoubleType => DType.FLOAT64
-          case StringType => DType.STRING
-          case _ => DType.INT8 // fallback
+        // Create empty arrays with all nulls using Scalar API
+        val cudfElementDType = sparkTypeToCudfIdOpt(elementType)
+          .map(id => DType.fromNative(id, 0))
+          .getOrElse(DType.INT8)  // fallback for nested complex types
+        val elemType = new cudf.HostColumnVector.BasicType(true, cudfElementDType)
+        withResource(cudf.Scalar.listFromNull(elemType)) { nullScalar =>
+          cudf.ColumnVector.fromScalar(nullScalar, numRows)
         }
-        return withResource(cudf.Scalar.listFromNull(
-          new cudf.HostColumnVector.BasicType(false, elementDType))) { nullScalar =>
-          withResource(cudf.ColumnVector.fromScalar(nullScalar, numRows)) { col =>
-            col.incRefCount()
-          }
+      case MapType(keyType, valueType, _) =>
+        // Maps are represented as LIST<STRUCT<key, value>> in cuDF
+        // For all-null maps, we create a list column with STRUCT<key, value> element type
+        val cudfKeyDType = sparkTypeToCudfIdOpt(keyType)
+          .map(id => DType.fromNative(id, 0))
+          .getOrElse(DType.INT8)
+        val cudfValueDType = sparkTypeToCudfIdOpt(valueType)
+          .map(id => DType.fromNative(id, 0))
+          .getOrElse(DType.INT8)
+        // Create the struct type for map entries (key, value)
+        val keyFieldType = new cudf.HostColumnVector.BasicType(true, cudfKeyDType)
+        val valueFieldType = new cudf.HostColumnVector.BasicType(true, cudfValueDType)
+        val structType = new cudf.HostColumnVector.StructType(true, keyFieldType, valueFieldType)
+        // Create an all-null map column (list of structs)
+        withResource(cudf.Scalar.listFromNull(structType)) { nullScalar =>
+          cudf.ColumnVector.fromScalar(nullScalar, numRows)
         }
       case _ =>
-        // Fallback: use INT8 and hope for the best (shouldn't happen for supported types)
-        DType.INT8
-    }
-
-    withResource(cudf.Scalar.fromNull(cudfType)) { nullScalar =>
-      cudf.ColumnVector.fromScalar(nullScalar, numRows)
+        // Fallback for any other types - create INT8 nulls as placeholder
+        // This should not happen in practice since unsupported types should be caught earlier
+        cudf.ColumnVector.fromBoxedBytes(Array.fill[java.lang.Byte](numRows)(null): _*)
     }
   }
-
-  /** Helper class to auto-close an array of ColumnVectors */
-  private class AutoCloseableArray(cols: Array[cudf.ColumnVector]) extends AutoCloseable {
-    override def close(): Unit = cols.foreach(c => if (c != null) c.close())
-  }
-}
\ No newline at end of file
+}
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
index 9d79de00ccf..927747516fb 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
@@ -39,9 +39,12 @@ import java.nio.file.{Files, Path}
 import scala.collection.mutable
 import scala.util.Try
 
+import ai.rapids.cudf.DType
 import com.nvidia.spark.rapids._
 
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, GetStructField, UnaryExpression}
+import org.apache.spark.sql.catalyst.expressions.{
+  AttributeReference, Expression, GetStructField, UnaryExpression
+}
 import org.apache.spark.sql.execution.ProjectExec
 import org.apache.spark.sql.rapids.GpuFromProtobuf
 import org.apache.spark.sql.types._
@@ -97,7 +100,9 @@ object ProtobufExprShims {
         // Indices into fullSchema for fields that will be decoded by GPU
         private var decodedFieldIndices: Array[Int] = _
         private var fieldNumbers: Array[Int] = _
+        // cudfTypeIds contains type IDs for ALL fields in fullSchema (for the new optimized API)
         private var cudfTypeIds: Array[Int] = _
+        // cudfTypeScales: encodings for decoded fields (parallel to decodedFieldIndices)
         private var cudfTypeScales: Array[Int] = _
         private var failOnErrors: Boolean = _
 
@@ -181,21 +186,26 @@ object ProtobufExprShims {
           }
           decodedFieldIndices = indicesToDecode
 
-          // Step 5: Build arrays for the fields to decode (parallel to decodedFieldIndices)
+          // Step 5: Build cudfTypeIds for ALL fields in fullSchema
+          // For unsupported types (nested struct, array, etc.), use INT8 as placeholder.
+          // These placeholder columns will be replaced with properly typed null columns in Scala.
+          cudfTypeIds = fullSchema.fields.map { sf =>
+            GpuFromProtobuf.sparkTypeToCudfIdOpt(sf.dataType)
+              .getOrElse(DType.INT8.getTypeId.getNativeId)  // placeholder for unsupported types
+          }
+
+          // Step 6: Build arrays for decoded fields only (parallel to decodedFieldIndices)
           val fnums = new Array[Int](indicesToDecode.length)
-          val typeIds = new Array[Int](indicesToDecode.length)
           val scales = new Array[Int](indicesToDecode.length)
 
           indicesToDecode.zipWithIndex.foreach { case (schemaIdx, arrIdx) =>
             val sf = fullSchema.fields(schemaIdx)
             val info = fieldsInfoMap(sf.name)
             fnums(arrIdx) = info.fieldNumber
-            typeIds(arrIdx) = GpuFromProtobuf.sparkTypeToCudfId(sf.dataType)
             scales(arrIdx) = info.encoding
           }
 
           fieldNumbers = fnums
-          cudfTypeIds = typeIds
           cudfTypeScales = scales
         }