diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml
index e3d91be0ce3..f178e84ffba 100644
--- a/integration_tests/pom.xml
+++ b/integration_tests/pom.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!--
-  Copyright (c) 2020-2025, NVIDIA CORPORATION.
+  Copyright (c) 2020-2026, NVIDIA CORPORATION.
 
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
@@ -142,6 +142,7 @@
                                     <includes>
                                         <include>parquet-hadoop*.jar</include>
                                         <include>spark-avro*.jar</include>
+                                        <include>spark-protobuf*.jar</include>
                                     </includes>
                                 </filesets>
                             </filesets>
@@ -176,6 +177,24 @@
                             </artifactItems>
                         </configuration>
                     </execution>
+                    <execution>
+                        <id>copy-spark-protobuf</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>copy</goal>
+                        </goals>
+                        <configuration>
+                            <skip>${spark.protobuf.copy.skip}</skip>
+                            <useBaseVersion>true</useBaseVersion>
+                            <artifactItems>
+                                <artifactItem>
+                                    <groupId>org.apache.spark</groupId>
+                                    <artifactId>spark-protobuf_${scala.binary.version}</artifactId>
+                                    <version>${spark.version}</version>
+                                </artifactItem>
+                            </artifactItems>
+                        </configuration>
+                    </execution>
                 </executions>
             </plugin>
             <plugin>
diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index 6550a3cc59f..772bb1b0852 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
+# Copyright (c) 2020-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -29,6 +29,7 @@
 #   - SPARK_HOME: Path to your Apache Spark installation.
 #   - SKIP_TESTS: If set to true, skips running the Python integration tests.
 #   - INCLUDE_SPARK_AVRO_JAR: If set to true, includes Avro tests.
+#   - INCLUDE_SPARK_PROTOBUF_JAR: If set to true, includes spark-protobuf (Spark 3.4.0+) on the JVM classpath.
 #   - TEST: Specifies a specific test to run.
 #   - TEST_TAGS: Allows filtering tests based on tags.
 #   - TEST_TYPE: Specifies the type of tests to run.
@@ -46,6 +47,9 @@
 #   To run all tests, including Avro tests:
 #     INCLUDE_SPARK_AVRO_JAR=true ./run_pyspark_from_build.sh
 #
+#   To run tests WITHOUT Protobuf tests (protobuf is included by default):
+#     INCLUDE_SPARK_PROTOBUF_JAR=false ./run_pyspark_from_build.sh
+#
 #   To run a specific test:
 #     TEST=my_test ./run_pyspark_from_build.sh
 #
@@ -100,6 +104,7 @@ else
     # support alternate local jars NOT building from the source code
     if [ -d "$LOCAL_JAR_PATH" ]; then
         AVRO_JARS=$(echo "$LOCAL_JAR_PATH"/spark-avro*.jar)
+        PROTOBUF_JARS=$(echo "$LOCAL_JAR_PATH"/spark-protobuf*.jar)
         PLUGIN_JAR=$(echo "$LOCAL_JAR_PATH"/rapids-4-spark_*.jar)
         if [ -f $(echo $LOCAL_JAR_PATH/parquet-hadoop*.jar) ]; then
             export INCLUDE_PARQUET_HADOOP_TEST_JAR=true
@@ -116,6 +121,7 @@ else
     else
         [[ "$SCALA_VERSION" != "2.12"  ]] && TARGET_DIR=${TARGET_DIR/integration_tests/scala$SCALA_VERSION\/integration_tests}
         AVRO_JARS=$(echo "$TARGET_DIR"/dependency/spark-avro*.jar)
+        PROTOBUF_JARS=$(echo "$TARGET_DIR"/dependency/spark-protobuf*.jar)
         PARQUET_HADOOP_TESTS=$(echo "$TARGET_DIR"/dependency/parquet-hadoop*.jar)
         # remove the log4j.properties file so it doesn't conflict with ours, ignore errors
         # if it isn't present or already removed
@@ -141,9 +147,25 @@ else
         AVRO_JARS=""
     fi
 
-    # ALL_JARS includes dist.jar integration-test.jar avro.jar parquet.jar if they exist
+    # spark-protobuf is an optional Spark module that exists in Spark 3.4.0+. If we have the jar staged
+    # under target/dependency, include it so from_protobuf() is callable from PySpark.
+    if [[ $( echo ${INCLUDE_SPARK_PROTOBUF_JAR:-true} | tr '[:upper:]' '[:lower:]' ) == "true" ]];
+    then
+        # VERSION_STRING >= 3.4.0 ?
+        if printf '%s\n' "3.4.0" "$VERSION_STRING" | sort -V | head -1 | grep -qx "3.4.0"; then
+            export INCLUDE_SPARK_PROTOBUF_JAR=true
+        else
+            export INCLUDE_SPARK_PROTOBUF_JAR=false
+            PROTOBUF_JARS=""
+        fi
+    else
+        export INCLUDE_SPARK_PROTOBUF_JAR=false
+        PROTOBUF_JARS=""
+    fi
+
+    # ALL_JARS includes dist.jar integration-test.jar avro.jar protobuf.jar parquet.jar if they exist
     # Remove non-existing paths and canonicalize the paths including get rid of links and `..`
-    ALL_JARS=$(readlink -e $PLUGIN_JAR $TEST_JARS $AVRO_JARS $PARQUET_HADOOP_TESTS || true)
+    ALL_JARS=$(readlink -e $PLUGIN_JAR $TEST_JARS $AVRO_JARS $PROTOBUF_JARS $PARQUET_HADOOP_TESTS || true)
     # `:` separated jars
     ALL_JARS="${ALL_JARS//$'\n'/:}"
 
diff --git a/integration_tests/src/main/python/data_gen.py b/integration_tests/src/main/python/data_gen.py
index fa7decac82d..23397364c7b 100644
--- a/integration_tests/src/main/python/data_gen.py
+++ b/integration_tests/src/main/python/data_gen.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
+# Copyright (c) 2020-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -857,6 +857,131 @@ def gen_bytes():
             return bytes([ rand.randint(0, 255) for _ in range(length) ])
         self._start(rand, gen_bytes)
 
+
+# -----------------------------------------------------------------------------
+# Protobuf (simple types) generators/utilities (for from_protobuf/to_protobuf tests)
+# -----------------------------------------------------------------------------
+
+_PROTOBUF_WIRE_VARINT = 0
+_PROTOBUF_WIRE_64BIT = 1
+_PROTOBUF_WIRE_LEN_DELIM = 2
+_PROTOBUF_WIRE_32BIT = 5
+
+def _encode_protobuf_uvarint(value):
+    """Encode a non-negative integer as protobuf varint."""
+    if value is None:
+        raise ValueError("value must not be None")
+    if value < 0:
+        raise ValueError("uvarint only supports non-negative integers")
+    out = bytearray()
+    v = int(value)
+    while True:
+        b = v & 0x7F
+        v >>= 7
+        if v:
+            out.append(b | 0x80)
+        else:
+            out.append(b)
+            break
+    return bytes(out)
+
+def _encode_protobuf_key(field_number, wire_type):
+    return _encode_protobuf_uvarint((int(field_number) << 3) | int(wire_type))
+
+def _encode_protobuf_field(field_number, spark_type, value):
+    """
+    Encode a single protobuf field for a subset of scalar types.
+    Notes on signed ints:
+    - Protobuf `int32`/`int64` use *varint* encoding of the two's-complement integer.
+    - Negative `int32` values are encoded as a 10-byte varint (because they are sign-extended to 64 bits).
+    """
+    if value is None:
+        return b""
+
+    if isinstance(spark_type, BooleanType):
+        return _encode_protobuf_key(field_number, _PROTOBUF_WIRE_VARINT) + _encode_protobuf_uvarint(1 if value else 0)
+    elif isinstance(spark_type, IntegerType):
+        # Match protobuf-java behavior for writeInt32NoTag: negative values are sign-extended and written as uint64.
+        u64 = int(value) & 0xFFFFFFFFFFFFFFFF
+        return _encode_protobuf_key(field_number, _PROTOBUF_WIRE_VARINT) + _encode_protobuf_uvarint(u64)
+    elif isinstance(spark_type, LongType):
+        u64 = int(value) & 0xFFFFFFFFFFFFFFFF
+        return _encode_protobuf_key(field_number, _PROTOBUF_WIRE_VARINT) + _encode_protobuf_uvarint(u64)
+    elif isinstance(spark_type, FloatType):
+        return _encode_protobuf_key(field_number, _PROTOBUF_WIRE_32BIT) + struct.pack("<f", float(value))
+    elif isinstance(spark_type, DoubleType):
+        return _encode_protobuf_key(field_number, _PROTOBUF_WIRE_64BIT) + struct.pack("<d", float(value))
+    elif isinstance(spark_type, StringType):
+        b = value.encode("utf-8")
+        return (_encode_protobuf_key(field_number, _PROTOBUF_WIRE_LEN_DELIM) +
+                _encode_protobuf_uvarint(len(b)) + b)
+    else:
+        raise ValueError("Unsupported type for protobuf simple generator: {}".format(spark_type))
+
+
+class ProtobufSimpleMessageRowGen(DataGen):
+    """
+    Generates rows that include:
+      - one column per message field (Spark scalar types)
+      - a binary column containing a serialized protobuf message containing those fields
+
+    This is intentionally limited to the simple scalar types currently supported:
+    boolean/int32/int64/float/double/string.
+
+    Fields are omitted from the encoded message if the corresponding value is None.
+    """
+    def __init__(self, fields, binary_col_name="bin", nullable=False):
+        """
+        fields: list of (field_name, field_number, DataGen)
+        """
+        self._fields = fields
+        self._binary_col_name = binary_col_name
+
+        struct_fields = []
+        for (name, _num, gen) in fields:
+            struct_fields.append(StructField(name, gen.data_type, nullable=gen.nullable))
+        struct_fields.append(StructField(binary_col_name, BinaryType(), nullable=True))
+        super().__init__(StructType(struct_fields), nullable=nullable)
+
+    def __repr__(self):
+        return "ProtobufSimpleMessageRowGen({})".format(
+            ",".join(["{}#{}".format(n, num) for (n, num, _g) in self._fields]))
+
+    def _cache_repr(self):
+        kids = ",".join(["{}:{}#{}".format(n, str(g.data_type), num) for (n, num, g) in self._fields])
+        return super()._cache_repr() + "(" + kids + "," + self._binary_col_name + ")"
+
+    def __eq__(self, other):
+        if not isinstance(other, ProtobufSimpleMessageRowGen):
+            return False
+        if len(self._fields) != len(other._fields):
+            return False
+        for (n1, num1, g1), (n2, num2, g2) in zip(self._fields, other._fields):
+            if n1 != n2 or num1 != num2 or g1.data_type != g2.data_type:
+                return False
+        return (self._binary_col_name == other._binary_col_name and
+                self.nullable == other.nullable)
+
+    def __hash__(self):
+        field_tuple = tuple((n, num, str(g.data_type)) for (n, num, g) in self._fields)
+        return hash((field_tuple, self._binary_col_name, self.nullable))
+
+    def start(self, rand):
+        for (_name, _num, gen) in self._fields:
+            gen.start(rand)
+
+        def make_row():
+            values = []
+            encoded_parts = []
+            for (name, num, gen) in self._fields:
+                v = gen.gen()
+                values.append(v)
+                encoded_parts.append(_encode_protobuf_field(num, gen.data_type, v))
+            msg = b"".join(encoded_parts)
+            return tuple(values + [msg])
+
+        self._start(rand, make_row)
+
 # Note: Current(2023/06/06) maxmium IT data size is 7282688 bytes, so LRU cache with maxsize 128
 # will lead to 7282688 * 128 = 932 MB additional memory usage in edge case, which is acceptable.
 @lru_cache(maxsize=128, typed=True)
diff --git a/integration_tests/src/main/python/protobuf_test.py b/integration_tests/src/main/python/protobuf_test.py
new file mode 100644
index 00000000000..2e181141d5a
--- /dev/null
+++ b/integration_tests/src/main/python/protobuf_test.py
@@ -0,0 +1,372 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import inspect
+
+import pytest
+
+from asserts import assert_gpu_and_cpu_are_equal_collect
+from data_gen import (
+    BooleanGen, IntegerGen, LongGen, FloatGen, DoubleGen, StringGen,
+    ProtobufSimpleMessageRowGen, gen_df
+)
+from marks import ignore_order
+from spark_session import with_cpu_session, is_before_spark_340
+import pyspark.sql.functions as f
+
+pytestmark = [pytest.mark.premerge_ci_1]
+
+
+def _try_import_from_protobuf():
+    try:
+        from pyspark.sql.protobuf.functions import from_protobuf
+        return from_protobuf
+    except Exception:
+        return None
+
+
+def _spark_protobuf_jvm_available(spark) -> bool:
+    """
+    `spark-protobuf` is an optional external module. PySpark may have the Python wrappers
+    even when the JVM side isn't present on the classpath, which manifests as:
+      TypeError: 'JavaPackage' object is not callable
+    when calling into `sc._jvm.org.apache.spark.sql.protobuf.functions.from_protobuf`.
+    """
+    jvm = spark.sparkContext._jvm
+    candidates = [
+        # Scala object `functions` compiles to `functions$`
+        "org.apache.spark.sql.protobuf.functions$",
+        # Some environments may expose it differently
+        "org.apache.spark.sql.protobuf.functions",
+    ]
+    for cls in candidates:
+        try:
+            jvm.java.lang.Class.forName(cls)
+            return True
+        except Exception:
+            continue
+    return False
+
+
+def _build_simple_descriptor_set_bytes(spark):
+    """
+    Build a FileDescriptorSet for:
+      package test;
+      syntax = "proto2";
+      message Simple {
+        optional bool   b   = 1;
+        optional int32  i32 = 2;
+        optional int64  i64 = 3;
+        optional float  f32 = 4;
+        optional double f64 = 5;
+        optional string s   = 6;
+      }
+    """
+    jvm = spark.sparkContext._jvm
+    D = jvm.com.google.protobuf.DescriptorProtos
+
+    fd = D.FileDescriptorProto.newBuilder() \
+        .setName("simple.proto") \
+        .setPackage("test")
+    # Some Spark distributions bring an older protobuf-java where FileDescriptorProto.Builder
+    # does not expose setSyntax(String). For this test we only need proto2 semantics, and
+    # leaving syntax unset is sufficient/compatible.
+    try:
+        fd = fd.setSyntax("proto2")
+    except Exception:
+        # If setSyntax is unavailable (older protobuf-java), we intentionally leave syntax unset.
+        pass
+
+    msg = D.DescriptorProto.newBuilder().setName("Simple")
+    label_opt = D.FieldDescriptorProto.Label.LABEL_OPTIONAL
+
+    def add_field(name, number, ftype):
+        msg.addField(
+            D.FieldDescriptorProto.newBuilder()
+              .setName(name)
+              .setNumber(number)
+              .setLabel(label_opt)
+              .setType(ftype)
+              .build()
+        )
+
+    add_field("b", 1, D.FieldDescriptorProto.Type.TYPE_BOOL)
+    add_field("i32", 2, D.FieldDescriptorProto.Type.TYPE_INT32)
+    add_field("i64", 3, D.FieldDescriptorProto.Type.TYPE_INT64)
+    add_field("f32", 4, D.FieldDescriptorProto.Type.TYPE_FLOAT)
+    add_field("f64", 5, D.FieldDescriptorProto.Type.TYPE_DOUBLE)
+    add_field("s", 6, D.FieldDescriptorProto.Type.TYPE_STRING)
+
+    fd.addMessageType(msg.build())
+
+    fds = D.FileDescriptorSet.newBuilder().addFile(fd.build()).build()
+    # py4j converts Java byte[] to a Python bytes-like object
+    return bytes(fds.toByteArray())
+
+
+def _write_bytes_to_hadoop_path(spark, path_str, data_bytes):
+    sc = spark.sparkContext
+    config = sc._jsc.hadoopConfiguration()
+    jpath = sc._jvm.org.apache.hadoop.fs.Path(path_str)
+    fs = sc._jvm.org.apache.hadoop.fs.FileSystem.get(config)
+    out = fs.create(jpath, True)
+    try:
+        out.write(bytearray(data_bytes))
+    finally:
+        out.close()
+
+
+@pytest.mark.skipif(is_before_spark_340(), reason="from_protobuf is Spark 3.4.0+")
+@ignore_order(local=True)
+def test_from_protobuf_simple_parquet_binary_round_trip(spark_tmp_path):
+    from_protobuf = _try_import_from_protobuf()
+    if from_protobuf is None:
+        pytest.skip("pyspark.sql.protobuf.functions.from_protobuf is not available")
+    if not with_cpu_session(_spark_protobuf_jvm_available):
+        pytest.skip("spark-protobuf JVM module is not available on the classpath")
+
+    data_path = spark_tmp_path + "/PROTOBUF_SIMPLE_PARQUET/"
+    desc_path = spark_tmp_path + "/simple.desc"
+    message_name = "test.Simple"
+
+    # Generate descriptor bytes once using the JVM (no protoc dependency)
+    desc_bytes = with_cpu_session(_build_simple_descriptor_set_bytes)
+    with_cpu_session(lambda spark: _write_bytes_to_hadoop_path(spark, desc_path, desc_bytes))
+
+    # Build a DF with scalar columns + binary protobuf column and write to parquet
+    row_gen = ProtobufSimpleMessageRowGen([
+        ("b", 1, BooleanGen(nullable=True)),
+        ("i32", 2, IntegerGen(nullable=True, min_val=0, max_val=1 << 20)),
+        ("i64", 3, LongGen(nullable=True, min_val=0, max_val=1 << 40, special_cases=[])),
+        ("f32", 4, FloatGen(nullable=True, no_nans=True)),
+        ("f64", 5, DoubleGen(nullable=True, no_nans=True)),
+        ("s", 6, StringGen(nullable=True)),
+    ], binary_col_name="bin")
+
+    def write_parquet(spark):
+        df = gen_df(spark, row_gen, length=512)
+        df.write.mode("overwrite").parquet(data_path)
+
+    with_cpu_session(write_parquet)
+
+    # Sanity check correctness on CPU (decoded struct matches the original scalar columns)
+    def cpu_correctness_check(spark):
+        df = spark.read.parquet(data_path)
+        expected = f.struct(
+            f.col("b").alias("b"),
+            f.col("i32").alias("i32"),
+            f.col("i64").alias("i64"),
+            f.col("f32").alias("f32"),
+            f.col("f64").alias("f64"),
+            f.col("s").alias("s"),
+        ).alias("expected")
+
+        sig = inspect.signature(from_protobuf)
+        if "binaryDescriptorSet" in sig.parameters:
+            decoded = from_protobuf(f.col("bin"), message_name, binaryDescriptorSet=bytearray(desc_bytes)).alias("decoded")
+        else:
+            decoded = from_protobuf(f.col("bin"), message_name, desc_path).alias("decoded")
+
+        rows = df.select(expected, decoded).collect()
+        for r in rows:
+            assert r["expected"] == r["decoded"]
+
+    with_cpu_session(cpu_correctness_check)
+
+    # Main assertion: CPU and GPU results match for from_protobuf on a binary column read from parquet
+    def run_on_spark(spark):
+        df = spark.read.parquet(data_path)
+        sig = inspect.signature(from_protobuf)
+        if "binaryDescriptorSet" in sig.parameters:
+            decoded = from_protobuf(f.col("bin"), message_name, binaryDescriptorSet=bytearray(desc_bytes))
+        else:
+            decoded = from_protobuf(f.col("bin"), message_name, desc_path)
+        return df.select(decoded.alias("decoded"))
+
+    assert_gpu_and_cpu_are_equal_collect(run_on_spark)
+
+
+@pytest.mark.skipif(is_before_spark_340(), reason="from_protobuf is Spark 3.4.0+")
+@ignore_order(local=True)
+def test_from_protobuf_simple_null_input_returns_null(spark_tmp_path):
+    from_protobuf = _try_import_from_protobuf()
+    if from_protobuf is None:
+        pytest.skip("pyspark.sql.protobuf.functions.from_protobuf is not available")
+    if not with_cpu_session(_spark_protobuf_jvm_available):
+        pytest.skip("spark-protobuf JVM module is not available on the classpath")
+
+    desc_path = spark_tmp_path + "/simple_null_input.desc"
+    message_name = "test.Simple"
+
+    # Generate descriptor bytes once using the JVM (no protoc dependency)
+    desc_bytes = with_cpu_session(_build_simple_descriptor_set_bytes)
+    with_cpu_session(lambda spark: _write_bytes_to_hadoop_path(spark, desc_path, desc_bytes))
+
+    # Spark's ProtobufDataToCatalyst is NullIntolerant (null input -> null output).
+    def run_on_spark(spark):
+        df = spark.createDataFrame(
+            [(None,), (bytes([0x08, 0x01, 0x10, 0x7B]),)],  # b=true, i32=123
+            schema="bin binary",
+        )
+        sig = inspect.signature(from_protobuf)
+        if "binaryDescriptorSet" in sig.parameters:
+            decoded = from_protobuf(
+                f.col("bin"),
+                message_name,
+                binaryDescriptorSet=bytearray(desc_bytes),
+            )
+        else:
+            decoded = from_protobuf(f.col("bin"), message_name, desc_path)
+        return df.select(decoded.alias("decoded"))
+
+    assert_gpu_and_cpu_are_equal_collect(run_on_spark)
+
+
+def _build_nested_descriptor_set_bytes(spark):
+    """
+    Build a FileDescriptorSet for a message with both simple fields and nested message:
+      package test;
+      syntax = "proto2";
+      message Nested {
+        optional int32 x = 1;
+      }
+      message WithNested {
+        optional int32  simple_int  = 1;
+        optional string simple_str  = 2;
+        optional Nested nested_msg  = 3;   // nested message - not supported by GPU
+        optional int64  simple_long = 4;
+      }
+    """
+    jvm = spark.sparkContext._jvm
+    D = jvm.com.google.protobuf.DescriptorProtos
+
+    fd = D.FileDescriptorProto.newBuilder() \
+        .setName("nested.proto") \
+        .setPackage("test")
+    try:
+        fd = fd.setSyntax("proto2")
+    except Exception:
+        pass
+
+    label_opt = D.FieldDescriptorProto.Label.LABEL_OPTIONAL
+
+    # Define Nested message
+    nested_msg = D.DescriptorProto.newBuilder().setName("Nested")
+    nested_msg.addField(
+        D.FieldDescriptorProto.newBuilder()
+            .setName("x")
+            .setNumber(1)
+            .setLabel(label_opt)
+            .setType(D.FieldDescriptorProto.Type.TYPE_INT32)
+            .build()
+    )
+    fd.addMessageType(nested_msg.build())
+
+    # Define WithNested message
+    with_nested_msg = D.DescriptorProto.newBuilder().setName("WithNested")
+    # simple_int
+    with_nested_msg.addField(
+        D.FieldDescriptorProto.newBuilder()
+            .setName("simple_int")
+            .setNumber(1)
+            .setLabel(label_opt)
+            .setType(D.FieldDescriptorProto.Type.TYPE_INT32)
+            .build()
+    )
+    # simple_str
+    with_nested_msg.addField(
+        D.FieldDescriptorProto.newBuilder()
+            .setName("simple_str")
+            .setNumber(2)
+            .setLabel(label_opt)
+            .setType(D.FieldDescriptorProto.Type.TYPE_STRING)
+            .build()
+    )
+    # nested_msg (nested message type)
+    with_nested_msg.addField(
+        D.FieldDescriptorProto.newBuilder()
+            .setName("nested_msg")
+            .setNumber(3)
+            .setLabel(label_opt)
+            .setType(D.FieldDescriptorProto.Type.TYPE_MESSAGE)
+            .setTypeName(".test.Nested")
+            .build()
+    )
+    # simple_long
+    with_nested_msg.addField(
+        D.FieldDescriptorProto.newBuilder()
+            .setName("simple_long")
+            .setNumber(4)
+            .setLabel(label_opt)
+            .setType(D.FieldDescriptorProto.Type.TYPE_INT64)
+            .build()
+    )
+    fd.addMessageType(with_nested_msg.build())
+
+    fds = D.FileDescriptorSet.newBuilder().addFile(fd.build()).build()
+    return bytes(fds.toByteArray())
+
+
+@pytest.mark.skipif(is_before_spark_340(), reason="from_protobuf is Spark 3.4.0+")
+@ignore_order(local=True)
+def test_from_protobuf_schema_projection_simple_fields_only(spark_tmp_path):
+    """
+    Test schema projection: when only simple fields are selected from a protobuf message
+    that also contains unsupported types (nested message), GPU should be able to decode
+    just the simple fields without falling back to CPU.
+    """
+    from_protobuf = _try_import_from_protobuf()
+    if from_protobuf is None:
+        pytest.skip("pyspark.sql.protobuf.functions.from_protobuf is not available")
+    if not with_cpu_session(_spark_protobuf_jvm_available):
+        pytest.skip("spark-protobuf JVM module is not available on the classpath")
+
+    desc_path = spark_tmp_path + "/nested.desc"
+    message_name = "test.WithNested"
+
+    desc_bytes = with_cpu_session(_build_nested_descriptor_set_bytes)
+    with_cpu_session(lambda spark: _write_bytes_to_hadoop_path(spark, desc_path, desc_bytes))
+
+    # Create test data: protobuf binary with simple fields set
+    # Field 1 (simple_int): varint 42 -> 0x08 0x2A
+    # Field 2 (simple_str): length-delimited "hello" -> 0x12 0x05 h e l l o
+    # Field 4 (simple_long): varint 12345 -> 0x20 0xB9 0x60
+    test_data = bytes([
+        0x08, 0x2A,  # simple_int = 42
+        0x12, 0x05, 0x68, 0x65, 0x6C, 0x6C, 0x6F,  # simple_str = "hello"
+        0x20, 0xB9, 0x60,  # simple_long = 12345
+    ])
+
+    def run_on_spark(spark):
+        df = spark.createDataFrame(
+            [(test_data,), (None,)],
+            schema="bin binary",
+        )
+        sig = inspect.signature(from_protobuf)
+        if "binaryDescriptorSet" in sig.parameters:
+            decoded = from_protobuf(
+                f.col("bin"),
+                message_name,
+                binaryDescriptorSet=bytearray(desc_bytes),
+            )
+        else:
+            decoded = from_protobuf(f.col("bin"), message_name, desc_path)
+        # Only select simple fields, not the nested_msg field
+        return df.select(
+            decoded.getField("simple_int").alias("simple_int"),
+            decoded.getField("simple_str").alias("simple_str"),
+            decoded.getField("simple_long").alias("simple_long")
+        )
+
+    assert_gpu_and_cpu_are_equal_collect(run_on_spark)
diff --git a/integration_tests/src/main/python/spark_init_internal.py b/integration_tests/src/main/python/spark_init_internal.py
index 90861746b64..765bcac036d 100644
--- a/integration_tests/src/main/python/spark_init_internal.py
+++ b/integration_tests/src/main/python/spark_init_internal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
+# Copyright (c) 2020-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -61,11 +61,38 @@ def findspark_init():
     if spark_jars is not None:
         logging.info(f"Adding to findspark jars: {spark_jars}")
         findspark.add_jars(spark_jars)
+        # Also add to driver classpath so classes are available to Class.forName()
+        # This is needed for optional modules like spark-protobuf
+        _add_driver_classpath(spark_jars)
 
     if spark_jars_packages is not None:
         logging.info(f"Adding to findspark packages: {spark_jars_packages}")
         findspark.add_packages(spark_jars_packages)
 
+
+def _add_driver_classpath(jars):
+    """
+    Add jars to the driver classpath via PYSPARK_SUBMIT_ARGS.
+    findspark.add_jars() only adds --jars, which doesn't make classes available
+    to Class.forName() on the driver. This function adds --driver-class-path.
+    """
+    if not jars:
+        return
+    current_args = os.environ.get('PYSPARK_SUBMIT_ARGS', '')
+    # Remove trailing 'pyspark-shell' if present
+    if current_args.endswith('pyspark-shell'):
+        current_args = current_args[:-len('pyspark-shell')].strip()
+    # Skip if driver-class-path is already present
+    if '--driver-class-path' in current_args:
+        logging.info("driver-class-path already in PYSPARK_SUBMIT_ARGS, skipping")
+        return
+    # Add driver-class-path for each jar (use os.pathsep for platform independence)
+    jar_list = jars.replace(',', ' ').split()
+    driver_cp = os.pathsep.join(jar_list)
+    new_args = f"{current_args} --driver-class-path {driver_cp} pyspark-shell".strip()
+    os.environ['PYSPARK_SUBMIT_ARGS'] = new_args
+    logging.info(f"Updated PYSPARK_SUBMIT_ARGS with driver-class-path")
+
 def running_with_xdist(session, is_worker):
     try:
         import xdist
diff --git a/pom.xml b/pom.xml
index 6eeff9d35be..92cc83b8b4d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!--
-  Copyright (c) 2020-2025, NVIDIA CORPORATION.
+  Copyright (c) 2020-2026, NVIDIA CORPORATION.
 
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
@@ -318,6 +318,7 @@
                 <parquet.hadoop.version>1.12.3</parquet.hadoop.version>
                 <rapids.delta.artifactId1>rapids-4-spark-delta-24x</rapids.delta.artifactId1>
                 <slf4j.version>2.0.6</slf4j.version>
+                <spark.protobuf.copy.skip>false</spark.protobuf.copy.skip>
             </properties>
             <modules>
                 <module>delta-lake/delta-24x</module>
@@ -338,6 +339,7 @@
                 <parquet.hadoop.version>1.12.3</parquet.hadoop.version>
                 <rapids.delta.artifactId1>rapids-4-spark-delta-24x</rapids.delta.artifactId1>
                 <slf4j.version>2.0.6</slf4j.version>
+                <spark.protobuf.copy.skip>false</spark.protobuf.copy.skip>
             </properties>
             <modules>
                 <module>delta-lake/delta-24x</module>
@@ -358,6 +360,7 @@
                 <parquet.hadoop.version>1.12.3</parquet.hadoop.version>
                 <rapids.delta.artifactId1>rapids-4-spark-delta-24x</rapids.delta.artifactId1>
                 <slf4j.version>2.0.6</slf4j.version>
+                <spark.protobuf.copy.skip>false</spark.protobuf.copy.skip>
             </properties>
             <modules>
                 <module>delta-lake/delta-24x</module>
@@ -378,6 +381,7 @@
                 <parquet.hadoop.version>1.12.3</parquet.hadoop.version>
                 <rapids.delta.artifactId1>rapids-4-spark-delta-24x</rapids.delta.artifactId1>
                 <slf4j.version>2.0.6</slf4j.version>
+                <spark.protobuf.copy.skip>false</spark.protobuf.copy.skip>
             </properties>
             <modules>
                 <module>delta-lake/delta-24x</module>
@@ -398,6 +402,7 @@
                 <parquet.hadoop.version>1.12.3</parquet.hadoop.version>
                 <rapids.delta.artifactId1>rapids-4-spark-delta-24x</rapids.delta.artifactId1>
                 <slf4j.version>2.0.6</slf4j.version>
+                <spark.protobuf.copy.skip>false</spark.protobuf.copy.skip>
             </properties>
             <modules>
                 <module>delta-lake/delta-24x</module>
@@ -895,6 +900,12 @@
         <pytest.TEST_PARALLEL/>
         <pytest.TEST_TYPE>developer</pytest.TEST_TYPE>
         <rat.consoleOutput>false</rat.consoleOutput>
+        <!--
+          spark-protobuf is an optional external Spark module (Spark 3.4.0+). The plugin must not require it
+          at load time, but some integration tests need it on the Spark JVM classpath.
+        -->
+        <!-- Default to skipping staging spark-protobuf for integration tests unless enabled by a Spark 3.4.x profile -->
+        <spark.protobuf.copy.skip>true</spark.protobuf.copy.skip>
         <!--
          If you update a dependency version so it is no longer a SNAPSHOT
          please update the snapshot-shims profile as well so it is accurate -->
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala
new file mode 100644
index 00000000000..cabc8d2905d
--- /dev/null
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/protobuf/ProtobufDescriptorUtils.scala
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2026, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.protobuf
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import com.google.protobuf.DescriptorProtos
+import com.google.protobuf.Descriptors
+
+/**
+ * Minimal descriptor utilities for locating a message descriptor in a FileDescriptorSet.
+ *
+ * This is intentionally lightweight for the "simple types" from_protobuf patch: it supports
+ * descriptor sets produced by `protoc --include_imports --descriptor_set_out=...`.
+ *
+ * NOTE: This utility is currently not used in the initial implementation, which relies on
+ * Spark's ProtobufUtils via reflection (buildMessageDescriptorWithSparkProtobuf). This class
+ * is preserved for potential future use cases where direct descriptor parsing is needed
+ * without depending on Spark's shaded protobuf classes.
+ */
+object ProtobufDescriptorUtils {
+
+  def buildMessageDescriptor(
+      fileDescriptorSetBytes: Array[Byte],
+      messageName: String): Descriptors.Descriptor = {
+    val fds = DescriptorProtos.FileDescriptorSet.parseFrom(fileDescriptorSetBytes)
+    val protos = fds.getFileList.asScala.toSeq
+    val byName = protos.map(p => p.getName -> p).toMap
+    val cache = mutable.HashMap.empty[String, Descriptors.FileDescriptor]
+
+    def buildFileDescriptor(name: String): Descriptors.FileDescriptor = {
+      cache.getOrElseUpdate(name, {
+        val p = byName.getOrElse(name,
+          throw new IllegalArgumentException(s"Missing FileDescriptorProto for '$name'"))
+        val deps = p.getDependencyList.asScala.map(buildFileDescriptor _).toArray
+        Descriptors.FileDescriptor.buildFrom(p, deps)
+      })
+    }
+
+    val fileDescriptors = protos.map(p => buildFileDescriptor(p.getName))
+    val candidates = fileDescriptors.iterator.flatMap(fd => findMessageDescriptors(fd, messageName))
+      .toSeq
+
+    candidates match {
+      case Seq(d) => d
+      case Seq() =>
+        throw new IllegalArgumentException(
+          s"Message '$messageName' not found in FileDescriptorSet")
+      case many =>
+        val names = many.map(_.getFullName).distinct.sorted
+        throw new IllegalArgumentException(
+          s"Message '$messageName' is ambiguous; matches: ${names.mkString(", ")}")
+    }
+  }
+
+  private def findMessageDescriptors(
+      fd: Descriptors.FileDescriptor,
+      messageName: String): Iterator[Descriptors.Descriptor] = {
+    def matches(d: Descriptors.Descriptor): Boolean = {
+      d.getName == messageName ||
+        d.getFullName == messageName ||
+        d.getFullName.endsWith("." + messageName)
+    }
+
+    def walk(d: Descriptors.Descriptor): Iterator[Descriptors.Descriptor] = {
+      val nested = d.getNestedTypes.asScala.iterator.flatMap(walk _)
+      if (matches(d)) Iterator.single(d) ++ nested else nested
+    }
+
+    fd.getMessageTypes.asScala.iterator.flatMap(walk _)
+  }
+}
\ No newline at end of file
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobuf.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobuf.scala
new file mode 100644
index 00000000000..0437a85a5a4
--- /dev/null
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFromProtobuf.scala
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2025-2026, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.rapids
+
+import ai.rapids.cudf
+import ai.rapids.cudf.{BinaryOp, CudfException, DType}
+import com.nvidia.spark.rapids.{GpuColumnVector, GpuUnaryExpression}
+import com.nvidia.spark.rapids.Arm.withResource
+import com.nvidia.spark.rapids.jni.Protobuf
+import com.nvidia.spark.rapids.shims.NullIntolerantShim
+
+import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression}
+import org.apache.spark.sql.types._
+
+/**
+ * GPU implementation for Spark's `from_protobuf` decode path.
+ *
+ * This is designed to replace `org.apache.spark.sql.protobuf.ProtobufDataToCatalyst` when
+ * supported.
+ *
+ * The implementation uses a two-pass approach in the CUDA kernel:
+ * - Pass 1: Scan all messages once, recording (offset, length) for each requested field
+ * - Pass 2: Extract data in parallel using the recorded locations
+ *
+ * This is significantly faster than per-field parsing when decoding multiple fields,
+ * as each message is only parsed once regardless of the number of fields.
+ *
+ * @param fullSchema The complete output schema (must match the original expression's dataType)
+ * @param decodedFieldIndices Indices into fullSchema for fields that will be decoded by GPU.
+ *                            Fields not in this array will be null columns.
+ * @param fieldNumbers Protobuf field numbers for decoded fields (parallel to decodedFieldIndices)
+ * @param cudfTypeIds cuDF type IDs for ALL fields in fullSchema
+ * @param cudfTypeScales Encodings for decoded fields (parallel to decodedFieldIndices)
+ * @param failOnErrors If true, throw exception on malformed data; if false, return null
+ */
+case class GpuFromProtobuf(
+    fullSchema: StructType,
+    decodedFieldIndices: Array[Int],
+    fieldNumbers: Array[Int],
+    cudfTypeIds: Array[Int],
+    cudfTypeScales: Array[Int],
+    failOnErrors: Boolean,
+    child: Expression)
+  extends GpuUnaryExpression with ExpectsInputTypes with NullIntolerantShim {
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType)
+
+  override def dataType: DataType = fullSchema.asNullable
+
+  override def nullable: Boolean = true
+
+  // Lazy computation of unsupported field indices (complex types like StructType)
+  @transient
+  private lazy val unsupportedFieldIndices: Set[Int] = {
+    fullSchema.fields.zipWithIndex.collect {
+      case (sf, idx) if !GpuFromProtobuf.isTypeSupported(sf.dataType) => idx
+    }.toSet
+  }
+
+  override protected def doColumnar(input: GpuColumnVector): cudf.ColumnVector = {
+    val numRows = input.getRowCount.toInt
+
+    // Call the optimized JNI API that:
+    // 1. Uses fused kernel to scan all fields in one pass
+    // 2. Creates LIST<INT8> directly for bytes fields (no intermediate strings column)
+    // 3. Returns struct with decoded fields + null columns for supported types
+    val jniResult = try {
+      Protobuf.decodeToStruct(
+        input.getBase,
+        fullSchema.fields.length,  // total number of fields in output
+        decodedFieldIndices,       // which fields to decode
+        fieldNumbers,              // protobuf field numbers
+        cudfTypeIds,               // types for ALL fields (INT8 placeholder for unsupported)
+        cudfTypeScales,            // encodings for decoded fields
+        failOnErrors)
+    } catch {
+      case e: CudfException if failOnErrors =>
+        // Re-throw as a SparkException for consistent error handling
+        throw new org.apache.spark.SparkException("Malformed protobuf message", e)
+    }
+
+    // If there are fields with unsupported types, we need to replace placeholder columns
+    // with properly typed null columns
+    val result = if (unsupportedFieldIndices.isEmpty) {
+      jniResult
+    } else {
+      withResource(jniResult) { struct =>
+        // Build children array, replacing placeholders with properly typed null columns
+        val children = new Array[cudf.ColumnVector](fullSchema.fields.length)
+        try {
+          for (i <- fullSchema.fields.indices) {
+            if (unsupportedFieldIndices.contains(i)) {
+              // Create properly typed null column for unsupported types
+              children(i) = GpuFromProtobuf.createNullColumn(fullSchema.fields(i).dataType, numRows)
+            } else {
+              // Copy the column from JNI result (incRefCount to share ownership)
+              children(i) = struct.getChildColumnView(i).copyToColumnVector()
+            }
+          }
+          cudf.ColumnVector.makeStruct(numRows, children: _*)
+        } finally {
+          children.foreach(c => if (c != null) c.close())
+        }
+      }
+    }
+
+    // Apply input nulls to output
+    if (input.getBase.hasNulls) {
+      withResource(result) { _ =>
+        result.mergeAndSetValidity(BinaryOp.BITWISE_AND, input.getBase)
+      }
+    } else {
+      result
+    }
+  }
+}
+
+object GpuFromProtobuf {
+  // Encodings from com.nvidia.spark.rapids.jni.Protobuf
+  val ENC_DEFAULT = 0
+  val ENC_FIXED   = 1
+  val ENC_ZIGZAG  = 2
+
+  /**
+   * Maps a Spark DataType to the corresponding cuDF native type ID.
+   * Note: The encoding (varint/zigzag/fixed) is determined by the protobuf field type,
+   * not the Spark data type, so it must be set separately based on the protobuf schema.
+   *
+   * @return Some(typeId) for supported types, None for unsupported types
+   */
+  def sparkTypeToCudfIdOpt(dt: DataType): Option[Int] = dt match {
+    case BooleanType => Some(DType.BOOL8.getTypeId.getNativeId)
+    case IntegerType => Some(DType.INT32.getTypeId.getNativeId)
+    case LongType => Some(DType.INT64.getTypeId.getNativeId)
+    case FloatType => Some(DType.FLOAT32.getTypeId.getNativeId)
+    case DoubleType => Some(DType.FLOAT64.getTypeId.getNativeId)
+    case StringType => Some(DType.STRING.getTypeId.getNativeId)
+    case BinaryType => Some(DType.LIST.getTypeId.getNativeId)
+    case _ => None
+  }
+
+  /**
+   * Check if a Spark DataType is supported by the GPU protobuf decoder.
+   */
+  def isTypeSupported(dt: DataType): Boolean = sparkTypeToCudfIdOpt(dt).isDefined
+
+  /**
+   * Create an all-null column of the specified Spark DataType.
+   * This is used for fields with unsupported types (nested structs, arrays, etc.)
+   * that are not decoded but need to be present in the output struct.
+   */
+  def createNullColumn(dt: DataType, numRows: Int): cudf.ColumnVector = {
+    // Helper to create null arrays for boxed types
+    def nullBools = Array.fill[java.lang.Boolean](numRows)(null)
+    def nullInts = Array.fill[java.lang.Integer](numRows)(null)
+    def nullLongs = Array.fill[java.lang.Long](numRows)(null)
+    def nullFloats = Array.fill[java.lang.Float](numRows)(null)
+    def nullDoubles = Array.fill[java.lang.Double](numRows)(null)
+
+    dt match {
+      case BooleanType => cudf.ColumnVector.fromBoxedBooleans(nullBools: _*)
+      case IntegerType => cudf.ColumnVector.fromBoxedInts(nullInts: _*)
+      case LongType => cudf.ColumnVector.fromBoxedLongs(nullLongs: _*)
+      case FloatType => cudf.ColumnVector.fromBoxedFloats(nullFloats: _*)
+      case DoubleType => cudf.ColumnVector.fromBoxedDoubles(nullDoubles: _*)
+      case StringType => cudf.ColumnVector.fromStrings(Array.fill[String](numRows)(null): _*)
+      case BinaryType =>
+        // Binary is LIST<INT8> - create all-null list column using Scalar API
+        val elementType = new cudf.HostColumnVector.BasicType(true, DType.INT8)
+        withResource(cudf.Scalar.listFromNull(elementType)) { nullScalar =>
+          cudf.ColumnVector.fromScalar(nullScalar, numRows)
+        }
+      case st: StructType =>
+        // Recursively create null columns for struct fields
+        val children = st.fields.map(f => createNullColumn(f.dataType, numRows))
+        try {
+          withResource(cudf.ColumnVector.makeStruct(numRows, children: _*)) { structCol =>
+            // Set all rows to null - mergeAndSetValidity returns a NEW column
+            withResource(cudf.ColumnVector.fromBoxedBooleans(nullBools: _*)) { nullMask =>
+              structCol.mergeAndSetValidity(BinaryOp.BITWISE_AND, nullMask)
+            }
+          }
+        } finally {
+          children.foreach(_.close())
+        }
+      case ArrayType(elementType, _) =>
+        // Create empty arrays with all nulls using Scalar API
+        val cudfElementDType = sparkTypeToCudfIdOpt(elementType)
+          .map(id => DType.fromNative(id, 0))
+          .getOrElse(DType.INT8)  // fallback for nested complex types
+        val elemType = new cudf.HostColumnVector.BasicType(true, cudfElementDType)
+        withResource(cudf.Scalar.listFromNull(elemType)) { nullScalar =>
+          cudf.ColumnVector.fromScalar(nullScalar, numRows)
+        }
+      case MapType(keyType, valueType, _) =>
+        // Maps are represented as LIST<STRUCT<key, value>> in cuDF
+        // For all-null maps, we create a list column with STRUCT<key, value> element type
+        val cudfKeyDType = sparkTypeToCudfIdOpt(keyType)
+          .map(id => DType.fromNative(id, 0))
+          .getOrElse(DType.INT8)
+        val cudfValueDType = sparkTypeToCudfIdOpt(valueType)
+          .map(id => DType.fromNative(id, 0))
+          .getOrElse(DType.INT8)
+        // Create the struct type for map entries (key, value)
+        val keyFieldType = new cudf.HostColumnVector.BasicType(true, cudfKeyDType)
+        val valueFieldType = new cudf.HostColumnVector.BasicType(true, cudfValueDType)
+        val structType = new cudf.HostColumnVector.StructType(true, keyFieldType, valueFieldType)
+        // Create an all-null map column (list of structs)
+        withResource(cudf.Scalar.listFromNull(structType)) { nullScalar =>
+          cudf.ColumnVector.fromScalar(nullScalar, numRows)
+        }
+      case _ =>
+        // Fallback for any other types - create INT8 nulls as placeholder
+        // This should not happen in practice since unsupported types should be caught earlier
+        cudf.ColumnVector.fromBoxedBytes(Array.fill[java.lang.Byte](numRows)(null): _*)
+    }
+  }
+}
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
new file mode 100644
index 00000000000..927747516fb
--- /dev/null
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ProtobufExprShims.scala
@@ -0,0 +1,540 @@
+/*
+ * Copyright (c) 2026, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "340"}
+{"spark": "341"}
+{"spark": "342"}
+{"spark": "343"}
+{"spark": "344"}
+{"spark": "350"}
+{"spark": "351"}
+{"spark": "352"}
+{"spark": "353"}
+{"spark": "354"}
+{"spark": "355"}
+{"spark": "356"}
+{"spark": "357"}
+{"spark": "400"}
+{"spark": "401"}
+spark-rapids-shim-json-lines ***/
+
+package com.nvidia.spark.rapids.shims
+
+import java.nio.file.{Files, Path}
+
+import scala.collection.mutable
+import scala.util.Try
+
+import ai.rapids.cudf.DType
+import com.nvidia.spark.rapids._
+
+import org.apache.spark.sql.catalyst.expressions.{
+  AttributeReference, Expression, GetStructField, UnaryExpression
+}
+import org.apache.spark.sql.execution.ProjectExec
+import org.apache.spark.sql.rapids.GpuFromProtobuf
+import org.apache.spark.sql.types._
+
+/**
+ * Information about a protobuf field for schema projection support.
+ */
+private[shims] case class ProtobufFieldInfo(
+    fieldNumber: Int,
+    protoTypeName: String,
+    sparkType: DataType,
+    encoding: Int,
+    isSupported: Boolean,
+    unsupportedReason: Option[String]
+)
+
+/**
+ * Spark 3.4+ optional integration for spark-protobuf expressions.
+ *
+ * spark-protobuf is an external module, so these rules must be registered by reflection.
+ */
+object ProtobufExprShims {
+  private[this] val protobufDataToCatalystClassName =
+    "org.apache.spark.sql.protobuf.ProtobufDataToCatalyst"
+
+  private[this] val sparkProtobufUtilsObjectClassName =
+    "org.apache.spark.sql.protobuf.utils.ProtobufUtils$"
+
+  def exprs: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] = {
+    try {
+      val clazz = ShimReflectionUtils.loadClass(protobufDataToCatalystClassName)
+        .asInstanceOf[Class[_ <: UnaryExpression]]
+      Map(clazz.asInstanceOf[Class[_ <: Expression]] -> fromProtobufRule)
+    } catch {
+      case _: ClassNotFoundException => Map.empty
+    }
+  }
+
+  private def fromProtobufRule: ExprRule[_ <: Expression] = {
+    GpuOverrides.expr[UnaryExpression](
+      "Decode a BinaryType column (protobuf) into a Spark SQL struct",
+      ExprChecks.unaryProject(
+        // Use TypeSig.all here because schema projection determines which fields
+        // actually need GPU support. Detailed type checking is done in tagExprForGpu.
+        TypeSig.all,
+        TypeSig.all,
+        TypeSig.BINARY,
+        TypeSig.BINARY),
+      (e, conf, p, r) => new UnaryExprMeta[UnaryExpression](e, conf, p, r) {
+
+        // Full schema from the expression (must match original dataType for compatibility)
+        private var fullSchema: StructType = _
+        // Indices into fullSchema for fields that will be decoded by GPU
+        private var decodedFieldIndices: Array[Int] = _
+        private var fieldNumbers: Array[Int] = _
+        // cudfTypeIds contains type IDs for ALL fields in fullSchema (for the new optimized API)
+        private var cudfTypeIds: Array[Int] = _
+        // cudfTypeScales: encodings for decoded fields (parallel to decodedFieldIndices)
+        private var cudfTypeScales: Array[Int] = _
+        private var failOnErrors: Boolean = _
+
+        override def tagExprForGpu(): Unit = {
+          fullSchema = e.dataType match {
+            case st: StructType => st
+            case other =>
+              willNotWorkOnGpu(
+                s"Only StructType output is supported for from_protobuf, got $other")
+              return
+          }
+
+          val options = getOptionsMap(e)
+          val supportedOptions = Set("enums.as.ints", "mode")
+          val unsupportedOptions = options.keys.filterNot(supportedOptions.contains)
+          if (unsupportedOptions.nonEmpty) {
+            val keys = unsupportedOptions.mkString(",")
+            willNotWorkOnGpu(
+              s"from_protobuf options are not supported yet on GPU: $keys")
+            return
+          }
+
+          val enumsAsInts = options.getOrElse("enums.as.ints", "false").toBoolean
+          failOnErrors = options.getOrElse("mode", "PERMISSIVE").equalsIgnoreCase("FAILFAST")
+          val messageName = getMessageName(e)
+          val descFilePathOpt = getDescFilePath(e).orElse {
+            // Newer Spark may embed a descriptor set (binaryDescriptorSet). Write it to a temp file
+            // so we can reuse Spark's ProtobufUtils (and its shaded protobuf classes) to resolve
+            // the descriptor.
+            getDescriptorBytes(e).map(writeTempDescFile)
+          }
+          if (descFilePathOpt.isEmpty) {
+            willNotWorkOnGpu(
+              "from_protobuf requires a descriptor set " +
+                "(descFilePath or binaryDescriptorSet)")
+            return
+          }
+
+          val msgDesc = try {
+            // Spark 3.4.x builds the descriptor as:
+            // ProtobufUtils.buildDescriptor(messageName, descFilePathOpt)
+            buildMessageDescriptorWithSparkProtobuf(messageName, descFilePathOpt)
+          } catch {
+            case t: Throwable =>
+              willNotWorkOnGpu(
+                s"Failed to resolve protobuf descriptor for message '$messageName': " +
+                  s"${t.getMessage}")
+              return
+          }
+
+          // Step 1: Analyze all fields and build field info map
+          val allFieldsInfo = analyzeAllFields(fullSchema, msgDesc, enumsAsInts, messageName)
+          if (allFieldsInfo.isEmpty) {
+            // Error was already reported in analyzeAllFields
+            return
+          }
+          val fieldsInfoMap = allFieldsInfo.get
+
+          // Step 2: Determine which fields are actually required by downstream operations
+          val requiredFieldNames = analyzeRequiredFields(fieldsInfoMap.keySet)
+
+          // Step 3: Check if all required fields are supported
+          val unsupportedRequired = requiredFieldNames.filter { name =>
+            fieldsInfoMap.get(name).exists(!_.isSupported)
+          }
+
+          if (unsupportedRequired.nonEmpty) {
+            val reasons = unsupportedRequired.map { name =>
+              val info = fieldsInfoMap(name)
+              s"${name}: ${info.unsupportedReason.getOrElse("unknown reason")}"
+            }
+            willNotWorkOnGpu(
+              s"Required fields not supported for from_protobuf: ${reasons.mkString(", ")}")
+            return
+          }
+
+          // Step 4: Identify which fields in fullSchema need to be decoded
+          // These are fields that are required AND supported
+          val indicesToDecode = fullSchema.fields.zipWithIndex.collect {
+            case (sf, idx) if requiredFieldNames.contains(sf.name) => idx
+          }
+          decodedFieldIndices = indicesToDecode
+
+          // Step 5: Build cudfTypeIds for ALL fields in fullSchema
+          // For unsupported types (nested struct, array, etc.), use INT8 as placeholder.
+          // These placeholder columns will be replaced with properly typed null columns in Scala.
+          cudfTypeIds = fullSchema.fields.map { sf =>
+            GpuFromProtobuf.sparkTypeToCudfIdOpt(sf.dataType)
+              .getOrElse(DType.INT8.getTypeId.getNativeId)  // placeholder for unsupported types
+          }
+
+          // Step 6: Build arrays for decoded fields only (parallel to decodedFieldIndices)
+          val fnums = new Array[Int](indicesToDecode.length)
+          val scales = new Array[Int](indicesToDecode.length)
+
+          indicesToDecode.zipWithIndex.foreach { case (schemaIdx, arrIdx) =>
+            val sf = fullSchema.fields(schemaIdx)
+            val info = fieldsInfoMap(sf.name)
+            fnums(arrIdx) = info.fieldNumber
+            scales(arrIdx) = info.encoding
+          }
+
+          fieldNumbers = fnums
+          cudfTypeScales = scales
+        }
+
+        /**
+         * Analyze all fields in the schema and build a map of field name to ProtobufFieldInfo.
+         * Returns None if there's an error that should abort processing.
+         */
+        private def analyzeAllFields(
+            schema: StructType,
+            msgDesc: AnyRef,
+            enumsAsInts: Boolean,
+            messageName: String): Option[Map[String, ProtobufFieldInfo]] = {
+          val result = mutable.Map[String, ProtobufFieldInfo]()
+
+          for (sf <- schema.fields) {
+            val fd = invoke1[AnyRef](msgDesc, "findFieldByName", classOf[String], sf.name)
+            if (fd == null) {
+              willNotWorkOnGpu(
+                s"Protobuf field '${sf.name}' not found in message '$messageName'")
+              return None
+            }
+
+            val isRepeated = Try {
+              invoke0[java.lang.Boolean](fd, "isRepeated").booleanValue()
+            }.getOrElse(false)
+
+            val protoType = invoke0[AnyRef](fd, "getType")
+            val protoTypeName = typeName(protoType)
+            val fieldNumber = invoke0[java.lang.Integer](fd, "getNumber").intValue()
+
+            // Check field support and determine encoding
+            val (isSupported, unsupportedReason, encoding) =
+              checkFieldSupport(sf.dataType, protoTypeName, isRepeated, enumsAsInts)
+
+            result(sf.name) = ProtobufFieldInfo(
+              fieldNumber = fieldNumber,
+              protoTypeName = protoTypeName,
+              sparkType = sf.dataType,
+              encoding = encoding,
+              isSupported = isSupported,
+              unsupportedReason = unsupportedReason
+            )
+          }
+
+          Some(result.toMap)
+        }
+
+        /**
+         * Check if a field type is supported and return encoding information.
+         * @return (isSupported, unsupportedReason, encoding)
+         */
+        private def checkFieldSupport(
+            sparkType: DataType,
+            protoTypeName: String,
+            isRepeated: Boolean,
+            enumsAsInts: Boolean): (Boolean, Option[String], Int) = {
+
+          if (isRepeated) {
+            return (false, Some("repeated fields are not supported"), GpuFromProtobuf.ENC_DEFAULT)
+          }
+
+          // Check Spark type is one of the supported simple types
+          sparkType match {
+            case BooleanType | IntegerType | LongType | FloatType | DoubleType |
+                 StringType | BinaryType =>
+              // Supported Spark type, continue to check encoding
+            case other =>
+              return (false, Some(s"unsupported Spark type: $other"), GpuFromProtobuf.ENC_DEFAULT)
+          }
+
+          // Determine encoding based on Spark type and proto type combination
+          val encoding = (sparkType, protoTypeName) match {
+            case (BooleanType, "BOOL") => Some(GpuFromProtobuf.ENC_DEFAULT)
+            case (IntegerType, "INT32" | "UINT32") => Some(GpuFromProtobuf.ENC_DEFAULT)
+            case (IntegerType, "SINT32") => Some(GpuFromProtobuf.ENC_ZIGZAG)
+            case (IntegerType, "FIXED32" | "SFIXED32") => Some(GpuFromProtobuf.ENC_FIXED)
+            case (LongType, "INT64" | "UINT64") => Some(GpuFromProtobuf.ENC_DEFAULT)
+            case (LongType, "SINT64") => Some(GpuFromProtobuf.ENC_ZIGZAG)
+            case (LongType, "FIXED64" | "SFIXED64") => Some(GpuFromProtobuf.ENC_FIXED)
+            // Spark may upcast smaller integers to LongType
+            case (LongType, "INT32" | "UINT32" | "SINT32" | "FIXED32" | "SFIXED32") =>
+              val enc = protoTypeName match {
+                case "SINT32" => GpuFromProtobuf.ENC_ZIGZAG
+                case "FIXED32" | "SFIXED32" => GpuFromProtobuf.ENC_FIXED
+                case _ => GpuFromProtobuf.ENC_DEFAULT
+              }
+              Some(enc)
+            case (FloatType, "FLOAT") => Some(GpuFromProtobuf.ENC_DEFAULT)
+            case (DoubleType, "DOUBLE") => Some(GpuFromProtobuf.ENC_DEFAULT)
+            case (StringType, "STRING") => Some(GpuFromProtobuf.ENC_DEFAULT)
+            case (BinaryType, "BYTES") => Some(GpuFromProtobuf.ENC_DEFAULT)
+            case (IntegerType, "ENUM") if enumsAsInts => Some(GpuFromProtobuf.ENC_DEFAULT)
+            case _ => None
+          }
+
+          encoding match {
+            case Some(enc) => (true, None, enc)
+            case None =>
+              (false,
+                Some(s"type mismatch: Spark $sparkType vs Protobuf $protoTypeName"),
+                GpuFromProtobuf.ENC_DEFAULT)
+          }
+        }
+
+        /**
+         * Analyze which fields are actually required by downstream operations.
+         * Currently supports analyzing parent Project expressions.
+         *
+         * @param allFieldNames All field names in the full schema
+         * @return Set of field names that are actually required
+         */
+        private def analyzeRequiredFields(allFieldNames: Set[String]): Set[String] = {
+          // Try to find parent SparkPlanMeta and analyze downstream Project
+          val parentPlanOpt = findParentPlanMeta()
+
+          parentPlanOpt match {
+            case Some(planMeta) =>
+              // First, try to analyze the immediate parent
+              analyzeDownstreamProject(planMeta) match {
+                case Some(fields) if fields.nonEmpty =>
+                  // Successfully identified required fields via schema projection
+                  fields
+                case _ =>
+                  // The immediate parent might be a ProjectExec that just aliases the output.
+                  // Try to look at its parent (the grandparent) for GetStructField references.
+                  planMeta.parent match {
+                    case Some(grandParentMeta: SparkPlanMeta[_]) =>
+                      analyzeDownstreamProject(grandParentMeta) match {
+                        case Some(fields) if fields.nonEmpty => fields
+                        case _ => allFieldNames
+                      }
+                    case _ => allFieldNames
+                  }
+              }
+            case None =>
+              // No parent SparkPlanMeta found in the meta tree, assume all fields are needed
+              allFieldNames
+          }
+        }
+
+        /**
+         * Find the parent SparkPlanMeta by traversing up the parent chain.
+         */
+        private def findParentPlanMeta(): Option[SparkPlanMeta[_]] = {
+          def traverse(meta: Option[RapidsMeta[_, _, _]]): Option[SparkPlanMeta[_]] = {
+            meta match {
+              case Some(p: SparkPlanMeta[_]) => Some(p)
+              case Some(p: RapidsMeta[_, _, _]) => traverse(p.parent)
+              case _ => None
+            }
+          }
+          traverse(parent)
+        }
+
+        /**
+         * Analyze a Project plan to find which struct fields are actually used.
+         * This looks for GetStructField expressions that reference our protobuf output.
+         */
+        private def analyzeDownstreamProject(planMeta: SparkPlanMeta[_]): Option[Set[String]] = {
+          planMeta.wrapped match {
+            case p: ProjectExec =>
+              // Collect all GetStructField references from the project list
+              val fieldRefs = mutable.Set[String]()
+              var hasDirectStructRef = false
+
+              p.projectList.foreach { expr =>
+                collectStructFieldReferences(expr, fieldRefs, hasDirectStructRefHolder = () => {
+                  hasDirectStructRef = true
+                })
+              }
+
+              if (hasDirectStructRef) {
+                // If the entire struct is referenced directly (not via GetStructField),
+                // we need all fields
+                None
+              } else if (fieldRefs.nonEmpty) {
+                Some(fieldRefs.toSet)
+              } else {
+                // No GetStructField found - this shouldn't happen for valid plans
+                // where from_protobuf is followed by field access
+                None
+              }
+            case _ =>
+              // Not a ProjectExec, cannot analyze schema projection
+              None
+          }
+        }
+
+        /**
+         * Recursively collect field names from GetStructField expressions.
+         * Also tracks if the struct is used directly without field extraction.
+         */
+        private def collectStructFieldReferences(
+            expr: Expression,
+            fieldRefs: mutable.Set[String],
+            hasDirectStructRefHolder: () => Unit): Unit = {
+          expr match {
+            case GetStructField(child, ordinal, nameOpt) =>
+              // Check if this GetStructField extracts from our protobuf struct
+              if (isProtobufStructReference(child)) {
+                // Get field name from the schema using ordinal
+                val fieldName = nameOpt.getOrElse {
+                  if (ordinal < fullSchema.fields.length) {
+                    fullSchema.fields(ordinal).name
+                  } else {
+                    s"_$ordinal"
+                  }
+                }
+                fieldRefs += fieldName
+                // Don't recurse into child - we've handled this protobuf reference
+              } else {
+                // Child is not a protobuf struct, recurse to check for nested access
+                collectStructFieldReferences(child, fieldRefs, hasDirectStructRefHolder)
+              }
+
+            case _ =>
+              // Check if this expression directly references our protobuf struct
+              // without extracting a field (e.g., passing the whole struct to a function)
+              if (isProtobufStructReference(expr)) {
+                hasDirectStructRefHolder()
+              }
+              // Recursively check children
+              expr.children.foreach { child =>
+                collectStructFieldReferences(child, fieldRefs, hasDirectStructRefHolder)
+              }
+          }
+        }
+
+        /**
+         * Check if an expression references the output of a protobuf decode expression.
+         * This can be either:
+         * 1. The ProtobufDataToCatalyst expression itself
+         * 2. An AttributeReference that references the output of ProtobufDataToCatalyst
+         *    (when accessing from a downstream ProjectExec)
+         */
+        private def isProtobufStructReference(expr: Expression): Boolean = {
+          // Check if expr is a ProtobufDataToCatalyst expression
+          if (expr.getClass.getName.contains("ProtobufDataToCatalyst")) {
+            return true
+          }
+          
+          // Check if expr is an AttributeReference with the same schema as our protobuf output
+          // This handles the case where GetStructField references a column from a parent Project
+          expr match {
+            case attr: AttributeReference =>
+              // Check if the data type matches our full schema (struct type from protobuf)
+              attr.dataType match {
+                case st: StructType => 
+                  // Compare field names and types only. We intentionally do not compare
+                  // nullable flags because schema transformations (like projections or
+                  // certain optimizations) may change nullability while the underlying
+                  // schema structure remains the same. For schema projection detection,
+                  // matching names and types is sufficient to identify protobuf output.
+                  st.fields.length == fullSchema.fields.length &&
+                    st.fields.zip(fullSchema.fields).forall { case (a, b) =>
+                      a.name == b.name && a.dataType == b.dataType
+                    }
+                case _ => false
+              }
+            case _ => false
+          }
+        }
+
+        override def convertToGpu(child: Expression): GpuExpression = {
+          GpuFromProtobuf(
+            fullSchema, decodedFieldIndices, fieldNumbers, cudfTypeIds, cudfTypeScales,
+            failOnErrors, child)
+        }
+      }
+    )
+  }
+
+  private def getMessageName(e: Expression): String =
+    invoke0[String](e, "messageName")
+
+  /**
+   * Newer Spark versions may carry an in-expression descriptor set payload
+   * (e.g. binaryDescriptorSet).
+   * Spark 3.4.x does not, so callers should fall back to descFilePath().
+   */
+  private def getDescriptorBytes(e: Expression): Option[Array[Byte]] = {
+    // Spark 4.x/3.5+ (depending on the API): may be Array[Byte] or Option[Array[Byte]].
+    val direct = Try(invoke0[Array[Byte]](e, "binaryDescriptorSet")).toOption
+    direct.orElse {
+      Try(invoke0[Option[Array[Byte]]](e, "binaryDescriptorSet")).toOption.flatten
+    }
+  }
+
+  private def getDescFilePath(e: Expression): Option[String] =
+    Try(invoke0[Option[String]](e, "descFilePath")).toOption.flatten
+
+  private def writeTempDescFile(descBytes: Array[Byte]): String = {
+    val tmp: Path = Files.createTempFile("spark-rapids-protobuf-desc-", ".desc")
+    Files.write(tmp, descBytes)
+    // deleteOnExit() is not guaranteed to run on abnormal JVM termination, but these
+    // descriptor files are small (typically < 10KB) and only created when using
+    // binaryDescriptorSet (Spark 4.0+). The risk of temporary file accumulation is
+    // acceptable for this use case.
+    tmp.toFile.deleteOnExit()
+    tmp.toString
+  }
+
+  private def buildMessageDescriptorWithSparkProtobuf(
+      messageName: String,
+      descFilePathOpt: Option[String]): AnyRef = {
+    val cls = ShimReflectionUtils.loadClass(sparkProtobufUtilsObjectClassName)
+    val module = cls.getField("MODULE$").get(null)
+    // buildDescriptor(messageName: String, descFilePath: Option[String])
+    val m = cls.getMethod("buildDescriptor", classOf[String], classOf[scala.Option[_]])
+    m.invoke(module, messageName, descFilePathOpt).asInstanceOf[AnyRef]
+  }
+
+  private def typeName(t: AnyRef): String = {
+    if (t == null) {
+      "null"
+    } else {
+      // Prefer Enum.name() when available; fall back to toString.
+      Try(invoke0[String](t, "name")).getOrElse(t.toString)
+    }
+  }
+
+  private def getOptionsMap(e: Expression): Map[String, String] = {
+    val opt = Try(invoke0[scala.collection.Map[String, String]](e, "options")).toOption
+    opt.map(_.toMap).getOrElse(Map.empty)
+  }
+
+  private def invoke0[T](obj: AnyRef, method: String): T =
+    obj.getClass.getMethod(method).invoke(obj).asInstanceOf[T]
+
+  private def invoke1[T](obj: AnyRef, method: String, arg0Cls: Class[_], arg0: AnyRef): T =
+    obj.getClass.getMethod(method, arg0Cls).invoke(obj, arg0).asInstanceOf[T]
+}
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala
index 6e28a071a00..56bfa229051 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2025, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2026, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -162,7 +162,7 @@ trait Spark340PlusNonDBShims extends Spark331PlusNonDBShims {
       ),
       GpuElementAtMeta.elementAtRule(true)
     ).map(r => (r.getClassFor.asSubclass(classOf[Expression]), r)).toMap
-    super.getExprs ++ shimExprs
+    super.getExprs ++ shimExprs ++ ProtobufExprShims.exprs
   }
 
   override def getDataWriteCmds: Map[Class[_ <: DataWritingCommand],