From 2b4ef0bdb8100c1f544d4127d2105718cb372dd5 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Sat, 20 Sep 2025 20:20:21 +0200
Subject: [PATCH 1/4] Add pyarrow-stubs minus their docstings

---
 python/pyarrow-stubs/__init__.pyi             |  676 ++++++++
 python/pyarrow-stubs/_azurefs.pyi             |   33 +
 python/pyarrow-stubs/_compute.pyi             |  586 +++++++
 python/pyarrow-stubs/_csv.pyi                 |  135 ++
 python/pyarrow-stubs/_cuda.pyi                |  164 ++
 python/pyarrow-stubs/_dataset.pyi             |  669 ++++++++
 python/pyarrow-stubs/_dataset_orc.pyi         |   24 +
 python/pyarrow-stubs/_dataset_parquet.pyi     |  163 ++
 .../_dataset_parquet_encryption.pyi           |   59 +
 python/pyarrow-stubs/_feather.pyi             |   50 +
 python/pyarrow-stubs/_flight.pyi              |  656 +++++++
 python/pyarrow-stubs/_fs.pyi                  |  216 +++
 python/pyarrow-stubs/_gcsfs.pyi               |   44 +
 python/pyarrow-stubs/_hdfs.pyi                |   38 +
 python/pyarrow-stubs/_ipc.pyi                 |  301 ++++
 python/pyarrow-stubs/_json.pyi                |   70 +
 python/pyarrow-stubs/_orc.pyi                 |   73 +
 python/pyarrow-stubs/_parquet.pyi             |  492 ++++++
 python/pyarrow-stubs/_parquet_encryption.pyi  |   93 +
 python/pyarrow-stubs/_s3fs.pyi                |  103 ++
 python/pyarrow-stubs/_stubs_typing.pyi        |  117 ++
 python/pyarrow-stubs/_substrait.pyi           |   63 +
 python/pyarrow-stubs/_types.pyi               |  992 +++++++++++
 python/pyarrow-stubs/acero.pyi                |  113 ++
 python/pyarrow-stubs/array.pyi                |  860 ++++++++++
 python/pyarrow-stubs/builder.pyi              |   53 +
 python/pyarrow-stubs/cffi.pyi                 |   21 +
 python/pyarrow-stubs/compat.pyi               |   22 +
 python/pyarrow-stubs/compute.pyi              | 1518 +++++++++++++++++
 python/pyarrow-stubs/config.pyi               |   64 +
 python/pyarrow-stubs/csv.pyi                  |   44 +
 python/pyarrow-stubs/cuda.pyi                 |   42 +
 python/pyarrow-stubs/dataset.pyi              |  272 +++
 python/pyarrow-stubs/device.pyi               |   68 +
 python/pyarrow-stubs/error.pyi                |   70 +
 python/pyarrow-stubs/feather.pyi              |   78 +
 python/pyarrow-stubs/flight.pyi               |  112 ++
 python/pyarrow-stubs/fs.pyi                   |   97 ++
 python/pyarrow-stubs/gandiva.pyi              |   82 +
 python/pyarrow-stubs/interchange/__init__.pyi |   16 +
 python/pyarrow-stubs/interchange/buffer.pyi   |   41 +
 python/pyarrow-stubs/interchange/column.pyi   |   90 +
 .../pyarrow-stubs/interchange/dataframe.pyi   |   49 +
 .../interchange/from_dataframe.pyi            |   89 +
 python/pyarrow-stubs/io.pyi                   |  428 +++++
 python/pyarrow-stubs/ipc.pyi                  |  157 ++
 python/pyarrow-stubs/json.pyi                 |   20 +
 python/pyarrow-stubs/lib.pyi                  |   93 +
 python/pyarrow-stubs/memory.pyi               |   91 +
 python/pyarrow-stubs/orc.pyi                  |  146 ++
 python/pyarrow-stubs/pandas_compat.pyi        |   82 +
 python/pyarrow-stubs/pandas_shim.pyi          |   68 +
 python/pyarrow-stubs/parquet/__init__.pyi     |   18 +
 python/pyarrow-stubs/parquet/core.pyi         |  355 ++++
 python/pyarrow-stubs/parquet/encryption.pyi   |   32 +
 python/pyarrow-stubs/py.typed                 |   16 +
 python/pyarrow-stubs/scalar.pyi               |  391 +++++
 python/pyarrow-stubs/substrait.pyi            |   38 +
 python/pyarrow-stubs/table.pyi                |  653 +++++++
 python/pyarrow-stubs/tensor.pyi               |  253 +++
 python/pyarrow-stubs/types.pyi                |  217 +++
 python/pyarrow-stubs/util.pyi                 |   48 +
 62 files changed, 12724 insertions(+)
 create mode 100644 python/pyarrow-stubs/__init__.pyi
 create mode 100644 python/pyarrow-stubs/_azurefs.pyi
 create mode 100644 python/pyarrow-stubs/_compute.pyi
 create mode 100644 python/pyarrow-stubs/_csv.pyi
 create mode 100644 python/pyarrow-stubs/_cuda.pyi
 create mode 100644 python/pyarrow-stubs/_dataset.pyi
 create mode 100644 python/pyarrow-stubs/_dataset_orc.pyi
 create mode 100644 python/pyarrow-stubs/_dataset_parquet.pyi
 create mode 100644 python/pyarrow-stubs/_dataset_parquet_encryption.pyi
 create mode 100644 python/pyarrow-stubs/_feather.pyi
 create mode 100644 python/pyarrow-stubs/_flight.pyi
 create mode 100644 python/pyarrow-stubs/_fs.pyi
 create mode 100644 python/pyarrow-stubs/_gcsfs.pyi
 create mode 100644 python/pyarrow-stubs/_hdfs.pyi
 create mode 100644 python/pyarrow-stubs/_ipc.pyi
 create mode 100644 python/pyarrow-stubs/_json.pyi
 create mode 100644 python/pyarrow-stubs/_orc.pyi
 create mode 100644 python/pyarrow-stubs/_parquet.pyi
 create mode 100644 python/pyarrow-stubs/_parquet_encryption.pyi
 create mode 100644 python/pyarrow-stubs/_s3fs.pyi
 create mode 100644 python/pyarrow-stubs/_stubs_typing.pyi
 create mode 100644 python/pyarrow-stubs/_substrait.pyi
 create mode 100644 python/pyarrow-stubs/_types.pyi
 create mode 100644 python/pyarrow-stubs/acero.pyi
 create mode 100644 python/pyarrow-stubs/array.pyi
 create mode 100644 python/pyarrow-stubs/builder.pyi
 create mode 100644 python/pyarrow-stubs/cffi.pyi
 create mode 100644 python/pyarrow-stubs/compat.pyi
 create mode 100644 python/pyarrow-stubs/compute.pyi
 create mode 100644 python/pyarrow-stubs/config.pyi
 create mode 100644 python/pyarrow-stubs/csv.pyi
 create mode 100644 python/pyarrow-stubs/cuda.pyi
 create mode 100644 python/pyarrow-stubs/dataset.pyi
 create mode 100644 python/pyarrow-stubs/device.pyi
 create mode 100644 python/pyarrow-stubs/error.pyi
 create mode 100644 python/pyarrow-stubs/feather.pyi
 create mode 100644 python/pyarrow-stubs/flight.pyi
 create mode 100644 python/pyarrow-stubs/fs.pyi
 create mode 100644 python/pyarrow-stubs/gandiva.pyi
 create mode 100644 python/pyarrow-stubs/interchange/__init__.pyi
 create mode 100644 python/pyarrow-stubs/interchange/buffer.pyi
 create mode 100644 python/pyarrow-stubs/interchange/column.pyi
 create mode 100644 python/pyarrow-stubs/interchange/dataframe.pyi
 create mode 100644 python/pyarrow-stubs/interchange/from_dataframe.pyi
 create mode 100644 python/pyarrow-stubs/io.pyi
 create mode 100644 python/pyarrow-stubs/ipc.pyi
 create mode 100644 python/pyarrow-stubs/json.pyi
 create mode 100644 python/pyarrow-stubs/lib.pyi
 create mode 100644 python/pyarrow-stubs/memory.pyi
 create mode 100644 python/pyarrow-stubs/orc.pyi
 create mode 100644 python/pyarrow-stubs/pandas_compat.pyi
 create mode 100644 python/pyarrow-stubs/pandas_shim.pyi
 create mode 100644 python/pyarrow-stubs/parquet/__init__.pyi
 create mode 100644 python/pyarrow-stubs/parquet/core.pyi
 create mode 100644 python/pyarrow-stubs/parquet/encryption.pyi
 create mode 100644 python/pyarrow-stubs/py.typed
 create mode 100644 python/pyarrow-stubs/scalar.pyi
 create mode 100644 python/pyarrow-stubs/substrait.pyi
 create mode 100644 python/pyarrow-stubs/table.pyi
 create mode 100644 python/pyarrow-stubs/tensor.pyi
 create mode 100644 python/pyarrow-stubs/types.pyi
 create mode 100644 python/pyarrow-stubs/util.pyi

diff --git a/python/pyarrow-stubs/__init__.pyi b/python/pyarrow-stubs/__init__.pyi
new file mode 100644
index 00000000000..1a188eccd45
--- /dev/null
+++ b/python/pyarrow-stubs/__init__.pyi
@@ -0,0 +1,676 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# ruff: noqa: F401, I001, E402
+__version__: str
+
+import pyarrow.lib as _lib
+
+_gc_enabled: bool
+
+from pyarrow.lib import (
+    BuildInfo,
+    RuntimeInfo,
+    set_timezone_db_path,
+    MonthDayNano,
+    VersionInfo,
+    cpp_build_info,
+    cpp_version,
+    cpp_version_info,
+    runtime_info,
+    cpu_count,
+    set_cpu_count,
+    enable_signal_handlers,
+    io_thread_count,
+    set_io_thread_count,
+)
+
+
+def show_versions() -> None: ...
+def show_info() -> None: ...
+def _module_is_available(module: str) -> bool: ...
+def _filesystem_is_available(fs: str) -> bool: ...
+
+
+from pyarrow.lib import (
+    null,
+    bool_,
+    int8,
+    int16,
+    int32,
+    int64,
+    uint8,
+    uint16,
+    uint32,
+    uint64,
+    time32,
+    time64,
+    timestamp,
+    date32,
+    date64,
+    duration,
+    month_day_nano_interval,
+    float16,
+    float32,
+    float64,
+    binary,
+    string,
+    utf8,
+    binary_view,
+    string_view,
+    large_binary,
+    large_string,
+    large_utf8,
+    decimal32,
+    decimal64,
+    decimal128,
+    decimal256,
+    list_,
+    large_list,
+    list_view,
+    large_list_view,
+    map_,
+    struct,
+    union,
+    sparse_union,
+    dense_union,
+    dictionary,
+    run_end_encoded,
+    json_,
+    uuid,
+    fixed_shape_tensor,
+    bool8,
+    opaque,
+    field,
+    type_for_alias,
+    DataType,
+    DictionaryType,
+    StructType,
+    ListType,
+    LargeListType,
+    FixedSizeListType,
+    ListViewType,
+    LargeListViewType,
+    MapType,
+    UnionType,
+    SparseUnionType,
+    DenseUnionType,
+    TimestampType,
+    Time32Type,
+    Time64Type,
+    DurationType,
+    FixedSizeBinaryType,
+    Decimal32Type,
+    Decimal64Type,
+    Decimal128Type,
+    Decimal256Type,
+    BaseExtensionType,
+    ExtensionType,
+    RunEndEncodedType,
+    FixedShapeTensorType,
+    Bool8Type,
+    UuidType,
+    JsonType,
+    OpaqueType,
+    UnknownExtensionType,
+    register_extension_type,
+    unregister_extension_type,
+    DictionaryMemo,
+    KeyValueMetadata,
+    Field,
+    Schema,
+    schema,
+    unify_schemas,
+    Array,
+    Tensor,
+    array,
+    chunked_array,
+    record_batch,
+    nulls,
+    repeat,
+    SparseCOOTensor,
+    SparseCSRMatrix,
+    SparseCSCMatrix,
+    SparseCSFTensor,
+    infer_type,
+    from_numpy_dtype,
+    NullArray,
+    NumericArray,
+    IntegerArray,
+    FloatingPointArray,
+    BooleanArray,
+    Int8Array,
+    UInt8Array,
+    Int16Array,
+    UInt16Array,
+    Int32Array,
+    UInt32Array,
+    Int64Array,
+    UInt64Array,
+    HalfFloatArray,
+    FloatArray,
+    DoubleArray,
+    ListArray,
+    LargeListArray,
+    FixedSizeListArray,
+    ListViewArray,
+    LargeListViewArray,
+    MapArray,
+    UnionArray,
+    BinaryArray,
+    StringArray,
+    LargeBinaryArray,
+    LargeStringArray,
+    BinaryViewArray,
+    StringViewArray,
+    FixedSizeBinaryArray,
+    DictionaryArray,
+    Date32Array,
+    Date64Array,
+    TimestampArray,
+    Time32Array,
+    Time64Array,
+    DurationArray,
+    MonthDayNanoIntervalArray,
+    Decimal32Array,
+    Decimal64Array,
+    Decimal128Array,
+    Decimal256Array,
+    StructArray,
+    ExtensionArray,
+    RunEndEncodedArray,
+    FixedShapeTensorArray,
+    Bool8Array,
+    UuidArray,
+    JsonArray,
+    OpaqueArray,
+    scalar,
+    NA,
+    _NULL as NULL,
+    Scalar,
+    NullScalar,
+    BooleanScalar,
+    Int8Scalar,
+    Int16Scalar,
+    Int32Scalar,
+    Int64Scalar,
+    UInt8Scalar,
+    UInt16Scalar,
+    UInt32Scalar,
+    UInt64Scalar,
+    HalfFloatScalar,
+    FloatScalar,
+    DoubleScalar,
+    Decimal32Scalar,
+    Decimal64Scalar,
+    Decimal128Scalar,
+    Decimal256Scalar,
+    ListScalar,
+    LargeListScalar,
+    FixedSizeListScalar,
+    ListViewScalar,
+    LargeListViewScalar,
+    Date32Scalar,
+    Date64Scalar,
+    Time32Scalar,
+    Time64Scalar,
+    TimestampScalar,
+    DurationScalar,
+    MonthDayNanoIntervalScalar,
+    BinaryScalar,
+    LargeBinaryScalar,
+    BinaryViewScalar,
+    StringScalar,
+    LargeStringScalar,
+    StringViewScalar,
+    FixedSizeBinaryScalar,
+    DictionaryScalar,
+    MapScalar,
+    StructScalar,
+    UnionScalar,
+    RunEndEncodedScalar,
+    ExtensionScalar,
+    Bool8Scalar,
+    UuidScalar,
+    JsonScalar,
+    OpaqueScalar,
+)
+
+# Buffers, allocation
+from pyarrow.lib import DeviceAllocationType, Device, MemoryManager, default_cpu_memory_manager
+
+from pyarrow.lib import (
+    Buffer,
+    ResizableBuffer,
+    foreign_buffer,
+    py_buffer,
+    Codec,
+    compress,
+    decompress,
+    allocate_buffer,
+)
+
+from pyarrow.lib import (
+    MemoryPool,
+    LoggingMemoryPool,
+    ProxyMemoryPool,
+    total_allocated_bytes,
+    set_memory_pool,
+    default_memory_pool,
+    system_memory_pool,
+    jemalloc_memory_pool,
+    mimalloc_memory_pool,
+    logging_memory_pool,
+    proxy_memory_pool,
+    log_memory_allocations,
+    jemalloc_set_decay_ms,
+    supported_memory_backends,
+)
+
+# I/O
+from pyarrow.lib import (
+    NativeFile,
+    PythonFile,
+    BufferedInputStream,
+    BufferedOutputStream,
+    CacheOptions,
+    CompressedInputStream,
+    CompressedOutputStream,
+    TransformInputStream,
+    transcoding_input_stream,
+    FixedSizeBufferWriter,
+    BufferReader,
+    BufferOutputStream,
+    OSFile,
+    MemoryMappedFile,
+    memory_map,
+    create_memory_map,
+    MockOutputStream,
+    input_stream,
+    output_stream,
+    have_libhdfs,
+)
+
+from pyarrow.lib import (
+    ChunkedArray,
+    RecordBatch,
+    Table,
+    table,
+    concat_arrays,
+    concat_tables,
+    TableGroupBy,
+    RecordBatchReader,
+)
+
+# Exceptions
+from pyarrow.lib import (
+    ArrowCancelled,
+    ArrowCapacityError,
+    ArrowException,
+    ArrowKeyError,
+    ArrowIndexError,
+    ArrowInvalid,
+    ArrowIOError,
+    ArrowMemoryError,
+    ArrowNotImplementedError,
+    ArrowTypeError,
+    ArrowSerializationError,
+)
+
+from pyarrow.ipc import serialize_pandas, deserialize_pandas
+import pyarrow.ipc as ipc
+
+import pyarrow.types as types
+
+# ----------------------------------------------------------------------
+# Deprecations
+
+from pyarrow.util import _deprecate_api, _deprecate_class
+
+from pyarrow.ipc import (
+    Message,
+    MessageReader,
+    MetadataVersion,
+    RecordBatchFileReader,
+    RecordBatchFileWriter,
+    RecordBatchStreamReader,
+    RecordBatchStreamWriter,
+)
+
+# ----------------------------------------------------------------------
+# Returning absolute path to the pyarrow include directory (if bundled, e.g. in
+# wheels)
+
+
+def get_include() -> str: ...
+def _get_pkg_config_executable() -> str: ...
+def _has_pkg_config(pkgname: str) -> bool: ...
+def _read_pkg_config_variable(pkgname: str, cli_args: list[str]) -> str: ...
+def get_libraries() -> list[str]: ...
+def create_library_symlinks() -> None: ...
+def get_library_dirs() -> list[str]: ...
+
+
+__all__ = [
+    "__version__",
+    "_lib",
+    "_gc_enabled",
+    "BuildInfo",
+    "RuntimeInfo",
+    "set_timezone_db_path",
+    "MonthDayNano",
+    "VersionInfo",
+    "cpp_build_info",
+    "cpp_version",
+    "cpp_version_info",
+    "runtime_info",
+    "cpu_count",
+    "set_cpu_count",
+    "enable_signal_handlers",
+    "io_thread_count",
+    "set_io_thread_count",
+    "show_versions",
+    "show_info",
+    "_module_is_available",
+    "_filesystem_is_available",
+    "null",
+    "bool_",
+    "int8",
+    "int16",
+    "int32",
+    "int64",
+    "uint8",
+    "uint16",
+    "uint32",
+    "uint64",
+    "time32",
+    "time64",
+    "timestamp",
+    "date32",
+    "date64",
+    "duration",
+    "month_day_nano_interval",
+    "float16",
+    "float32",
+    "float64",
+    "binary",
+    "string",
+    "utf8",
+    "binary_view",
+    "string_view",
+    "large_binary",
+    "large_string",
+    "large_utf8",
+    "decimal32",
+    "decimal64",
+    "decimal128",
+    "decimal256",
+    "list_",
+    "large_list",
+    "list_view",
+    "large_list_view",
+    "map_",
+    "struct",
+    "union",
+    "sparse_union",
+    "dense_union",
+    "dictionary",
+    "run_end_encoded",
+    "json_",
+    "uuid",
+    "fixed_shape_tensor",
+    "bool8",
+    "opaque",
+    "field",
+    "type_for_alias",
+    "DataType",
+    "DictionaryType",
+    "StructType",
+    "ListType",
+    "LargeListType",
+    "FixedSizeListType",
+    "ListViewType",
+    "LargeListViewType",
+    "MapType",
+    "UnionType",
+    "SparseUnionType",
+    "DenseUnionType",
+    "TimestampType",
+    "Time32Type",
+    "Time64Type",
+    "DurationType",
+    "FixedSizeBinaryType",
+    "Decimal32Type",
+    "Decimal64Type",
+    "Decimal128Type",
+    "Decimal256Type",
+    "BaseExtensionType",
+    "ExtensionType",
+    "RunEndEncodedType",
+    "FixedShapeTensorType",
+    "Bool8Type",
+    "UuidType",
+    "JsonType",
+    "OpaqueType",
+    "UnknownExtensionType",
+    "register_extension_type",
+    "unregister_extension_type",
+    "DictionaryMemo",
+    "KeyValueMetadata",
+    "Field",
+    "Schema",
+    "schema",
+    "unify_schemas",
+    "Array",
+    "Tensor",
+    "array",
+    "chunked_array",
+    "record_batch",
+    "nulls",
+    "repeat",
+    "SparseCOOTensor",
+    "SparseCSRMatrix",
+    "SparseCSCMatrix",
+    "SparseCSFTensor",
+    "infer_type",
+    "from_numpy_dtype",
+    "NullArray",
+    "NumericArray",
+    "IntegerArray",
+    "FloatingPointArray",
+    "BooleanArray",
+    "Int8Array",
+    "UInt8Array",
+    "Int16Array",
+    "UInt16Array",
+    "Int32Array",
+    "UInt32Array",
+    "Int64Array",
+    "UInt64Array",
+    "HalfFloatArray",
+    "FloatArray",
+    "DoubleArray",
+    "ListArray",
+    "LargeListArray",
+    "FixedSizeListArray",
+    "ListViewArray",
+    "LargeListViewArray",
+    "MapArray",
+    "UnionArray",
+    "BinaryArray",
+    "StringArray",
+    "LargeBinaryArray",
+    "LargeStringArray",
+    "BinaryViewArray",
+    "StringViewArray",
+    "FixedSizeBinaryArray",
+    "DictionaryArray",
+    "Date32Array",
+    "Date64Array",
+    "TimestampArray",
+    "Time32Array",
+    "Time64Array",
+    "DurationArray",
+    "MonthDayNanoIntervalArray",
+    "Decimal32Array",
+    "Decimal64Array",
+    "Decimal128Array",
+    "Decimal256Array",
+    "StructArray",
+    "ExtensionArray",
+    "Bool8Array",
+    "UuidArray",
+    "JsonArray",
+    "OpaqueArray",
+    "RunEndEncodedArray",
+    "FixedShapeTensorArray",
+    "scalar",
+    "NA",
+    "NULL",
+    "Scalar",
+    "NullScalar",
+    "BooleanScalar",
+    "Int8Scalar",
+    "Int16Scalar",
+    "Int32Scalar",
+    "Int64Scalar",
+    "UInt8Scalar",
+    "UInt16Scalar",
+    "UInt32Scalar",
+    "UInt64Scalar",
+    "HalfFloatScalar",
+    "FloatScalar",
+    "DoubleScalar",
+    "Decimal32Scalar",
+    "Decimal64Scalar",
+    "Decimal128Scalar",
+    "Decimal256Scalar",
+    "ListScalar",
+    "LargeListScalar",
+    "FixedSizeListScalar",
+    "ListViewScalar",
+    "LargeListViewScalar",
+    "Date32Scalar",
+    "Date64Scalar",
+    "Time32Scalar",
+    "Time64Scalar",
+    "TimestampScalar",
+    "DurationScalar",
+    "MonthDayNanoIntervalScalar",
+    "BinaryScalar",
+    "LargeBinaryScalar",
+    "BinaryViewScalar",
+    "StringScalar",
+    "LargeStringScalar",
+    "StringViewScalar",
+    "FixedSizeBinaryScalar",
+    "DictionaryScalar",
+    "MapScalar",
+    "StructScalar",
+    "UnionScalar",
+    "RunEndEncodedScalar",
+    "ExtensionScalar",
+    "Bool8Scalar",
+    "UuidScalar",
+    "JsonScalar",
+    "OpaqueScalar",
+    "DeviceAllocationType",
+    "Device",
+    "MemoryManager",
+    "default_cpu_memory_manager",
+    "Buffer",
+    "ResizableBuffer",
+    "foreign_buffer",
+    "py_buffer",
+    "Codec",
+    "compress",
+    "decompress",
+    "allocate_buffer",
+    "MemoryPool",
+    "LoggingMemoryPool",
+    "ProxyMemoryPool",
+    "total_allocated_bytes",
+    "set_memory_pool",
+    "default_memory_pool",
+    "system_memory_pool",
+    "jemalloc_memory_pool",
+    "mimalloc_memory_pool",
+    "logging_memory_pool",
+    "proxy_memory_pool",
+    "log_memory_allocations",
+    "jemalloc_set_decay_ms",
+    "supported_memory_backends",
+    "NativeFile",
+    "PythonFile",
+    "BufferedInputStream",
+    "BufferedOutputStream",
+    "CacheOptions",
+    "CompressedInputStream",
+    "CompressedOutputStream",
+    "TransformInputStream",
+    "transcoding_input_stream",
+    "FixedSizeBufferWriter",
+    "BufferReader",
+    "BufferOutputStream",
+    "OSFile",
+    "MemoryMappedFile",
+    "memory_map",
+    "create_memory_map",
+    "MockOutputStream",
+    "input_stream",
+    "output_stream",
+    "have_libhdfs",
+    "ChunkedArray",
+    "RecordBatch",
+    "Table",
+    "table",
+    "concat_arrays",
+    "concat_tables",
+    "TableGroupBy",
+    "RecordBatchReader",
+    "ArrowCancelled",
+    "ArrowCapacityError",
+    "ArrowException",
+    "ArrowKeyError",
+    "ArrowIndexError",
+    "ArrowInvalid",
+    "ArrowIOError",
+    "ArrowMemoryError",
+    "ArrowNotImplementedError",
+    "ArrowTypeError",
+    "ArrowSerializationError",
+    "serialize_pandas",
+    "deserialize_pandas",
+    "ipc",
+    "types",
+    "_deprecate_api",
+    "_deprecate_class",
+    "Message",
+    "MessageReader",
+    "MetadataVersion",
+    "RecordBatchFileReader",
+    "RecordBatchFileWriter",
+    "RecordBatchStreamReader",
+    "RecordBatchStreamWriter",
+    "get_include",
+    "_get_pkg_config_executable",
+    "_has_pkg_config",
+    "_read_pkg_config_variable",
+    "get_libraries",
+    "create_library_symlinks",
+    "get_library_dirs",
+]
diff --git a/python/pyarrow-stubs/_azurefs.pyi b/python/pyarrow-stubs/_azurefs.pyi
new file mode 100644
index 00000000000..2d866f34dbd
--- /dev/null
+++ b/python/pyarrow-stubs/_azurefs.pyi
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Literal
+
+from ._fs import FileSystem
+
+
+class AzureFileSystem(FileSystem):
+    def __init__(
+        self,
+        account_name: str,
+        account_key: str | None = None,
+        blob_storage_authority: str | None = None,
+        dfs_storage_authority: str | None = None,
+        blob_storage_schema: Literal["http", "https"] = "https",
+        dfs_storage_schema: Literal["http", "https"] = "https",
+        sas_token: str | None = None,
+    ) -> None: ...
diff --git a/python/pyarrow-stubs/_compute.pyi b/python/pyarrow-stubs/_compute.pyi
new file mode 100644
index 00000000000..7742dbda539
--- /dev/null
+++ b/python/pyarrow-stubs/_compute.pyi
@@ -0,0 +1,586 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import (
+    Any,
+    Callable,
+    Iterable,
+    Literal,
+    Sequence,
+    TypeAlias,
+    TypedDict,
+    overload,
+)
+
+from . import lib
+
+_Order: TypeAlias = Literal["ascending", "descending"]
+_Placement: TypeAlias = Literal["at_start", "at_end"]
+
+
+class Kernel(lib._Weakrefable):
+    ...
+
+
+class Function(lib._Weakrefable):
+    @property
+    def arity(self) -> int: ...
+
+    @property
+    def kind(
+        self,
+    ) -> Literal["scalar", "vector", "scalar_aggregate", "hash_aggregate", "meta"]: ...
+    @property
+    def name(self) -> str: ...
+    @property
+    def num_kernels(self) -> int: ...
+
+    def call(
+        self,
+        args: Iterable,
+        options: FunctionOptions | None = None,
+        memory_pool: lib.MemoryPool | None = None,
+        length: int | None = None,
+    ) -> Any: ...
+
+
+class FunctionOptions(lib._Weakrefable):
+    def serialize(self) -> lib.Buffer: ...
+    @classmethod
+    def deserialize(cls, buf: lib.Buffer) -> FunctionOptions: ...
+
+
+class FunctionRegistry(lib._Weakrefable):
+    def get_function(self, name: str) -> Function: ...
+    def list_functions(self) -> list[str]: ...
+
+
+class HashAggregateFunction(Function):
+    ...
+
+
+class HashAggregateKernel(Kernel):
+    ...
+
+
+class ScalarAggregateFunction(Function):
+    ...
+
+
+class ScalarAggregateKernel(Kernel):
+    ...
+
+
+class ScalarFunction(Function):
+    ...
+
+
+class ScalarKernel(Kernel):
+    ...
+
+
+class VectorFunction(Function):
+    ...
+
+
+class VectorKernel(Kernel):
+    ...
+
+# ==================== _compute.pyx Option classes ====================
+
+
+class ArraySortOptions(FunctionOptions):
+    def __init__(
+        self,
+        order: _Order = "ascending",
+        null_placement: _Placement = "at_end",
+    ) -> None: ...
+
+
+class AssumeTimezoneOptions(FunctionOptions):
+    def __init__(
+        self,
+        timezone: str,
+        *,
+        ambiguous: Literal["raise", "earliest", "latest"] = "raise",
+        nonexistent: Literal["raise", "earliest", "latest"] = "raise",
+    ) -> None: ...
+
+
+class CastOptions(FunctionOptions):
+    allow_int_overflow: bool
+    allow_time_truncate: bool
+    allow_time_overflow: bool
+    allow_decimal_truncate: bool
+    allow_float_truncate: bool
+    allow_invalid_utf8: bool
+
+    def __init__(
+        self,
+        target_type: lib.DataType | None = None,
+        *,
+        allow_int_overflow: bool | None = None,
+        allow_time_truncate: bool | None = None,
+        allow_time_overflow: bool | None = None,
+        allow_decimal_truncate: bool | None = None,
+        allow_float_truncate: bool | None = None,
+        allow_invalid_utf8: bool | None = None,
+    ) -> None: ...
+    @staticmethod
+    def safe(target_type: lib.DataType | None = None) -> CastOptions: ...
+    @staticmethod
+    def unsafe(target_type: lib.DataType | None = None) -> CastOptions: ...
+    def is_safe(self) -> bool: ...
+
+
+class CountOptions(FunctionOptions):
+    def __init__(self, mode: Literal["only_valid",
+                 "only_null", "all"] = "only_valid") -> None: ...
+
+
+class CumulativeOptions(FunctionOptions):
+    def __init__(self, start: lib.Scalar | None = None,
+                 *, skip_nulls: bool = False) -> None: ...
+
+
+class CumulativeSumOptions(FunctionOptions):
+    def __init__(self, start: lib.Scalar | None = None,
+                 *, skip_nulls: bool = False) -> None: ...
+
+
+class DayOfWeekOptions(FunctionOptions):
+    def __init__(self, *, count_from_zero: bool = True,
+                 week_start: int = 1) -> None: ...
+
+
+class DictionaryEncodeOptions(FunctionOptions):
+    def __init__(self, null_encoding: Literal["mask", "encode"] = "mask") -> None: ...
+
+
+class RunEndEncodeOptions(FunctionOptions):
+    # TODO: default is DataType(int32)
+    def __init__(self, run_end_type: lib.DataType = ...) -> None: ...
+
+
+class ElementWiseAggregateOptions(FunctionOptions):
+    def __init__(self, *, skip_nulls: bool = True) -> None: ...
+
+
+class ExtractRegexOptions(FunctionOptions):
+    def __init__(self, pattern: str) -> None: ...
+
+
+class ExtractRegexSpanOptions(FunctionOptions):
+    def __init__(self, pattern: str) -> None: ...
+
+
+class FilterOptions(FunctionOptions):
+    def __init__(
+        self, null_selection_behavior: Literal["drop", "emit_null"] = "drop") -> None: ...
+
+
+class IndexOptions(FunctionOptions):
+    def __init__(self, value: lib.Scalar) -> None: ...
+
+
+class JoinOptions(FunctionOptions):
+    @overload
+    def __init__(
+        self, null_handling: Literal["emit_null", "skip"] = "emit_null") -> None: ...
+
+    @overload
+    def __init__(self, null_handling: Literal["replace"],
+                 null_replacement: str = "") -> None: ...
+
+
+class ListSliceOptions(FunctionOptions):
+    def __init__(
+        self,
+        start: int,
+        stop: int | None = None,
+        step: int = 1,
+        return_fixed_size_list: bool | None = None,
+    ) -> None: ...
+
+
+class ListFlattenOptions(FunctionOptions):
+    def __init__(self, recursive: bool = False) -> None: ...
+
+
+class MakeStructOptions(FunctionOptions):
+    def __init__(
+        self,
+        field_names: Sequence[str] = (),
+        *,
+        field_nullability: Sequence[bool] | None = None,
+        field_metadata: Sequence[lib.KeyValueMetadata] | None = None,
+    ) -> None: ...
+
+
+class MapLookupOptions(FunctionOptions):
+    # TODO: query_key: Scalar or Object can be converted to Scalar
+    def __init__(
+        self, query_key: lib.Scalar, occurrence: Literal["first", "last", "all"]
+    ) -> None: ...
+
+
+class MatchSubstringOptions(FunctionOptions):
+    def __init__(self, pattern: str, *, ignore_case: bool = False) -> None: ...
+
+
+class ModeOptions(FunctionOptions):
+    def __init__(self, n: int = 1, *, skip_nulls: bool = True,
+                 min_count: int = 0) -> None: ...
+
+
+class NullOptions(FunctionOptions):
+    def __init__(self, *, nan_is_null: bool = False) -> None: ...
+
+
+class PadOptions(FunctionOptions):
+    def __init__(
+        self, width: int, padding: str = " ", lean_left_on_odd_padding: bool = True
+    ) -> None: ...
+
+
+class PairwiseOptions(FunctionOptions):
+    def __init__(self, period: int = 1) -> None: ...
+
+
+class PartitionNthOptions(FunctionOptions):
+    def __init__(self, pivot: int, *,
+                 null_placement: _Placement = "at_end") -> None: ...
+
+
+class WinsorizeOptions(FunctionOptions):
+    def __init__(self, lower_limit: float, upper_limit: float) -> None: ...
+
+
+class QuantileOptions(FunctionOptions):
+    def __init__(
+        self,
+        q: float | Sequence[float],
+        *,
+        interpolation: Literal["linear", "lower",
+                               "higher", "nearest", "midpoint"] = "linear",
+        skip_nulls: bool = True,
+        min_count: int = 0,
+    ) -> None: ...
+
+
+class RandomOptions(FunctionOptions):
+    def __init__(self, *, initializer: int | Literal["system"] = "system") -> None: ...
+
+
+class RankOptions(FunctionOptions):
+    def __init__(
+        self,
+        sort_keys: _Order | Sequence[tuple[str, _Order]] = "ascending",
+        *,
+        null_placement: _Placement = "at_end",
+        tiebreaker: Literal["min", "max", "first", "dense"] = "first",
+    ) -> None: ...
+
+
+class RankQuantileOptions(FunctionOptions):
+    def __init__(
+        self,
+        sort_keys: _Order | Sequence[tuple[str, _Order]] = "ascending",
+        *,
+        null_placement: _Placement = "at_end",
+    ) -> None: ...
+
+
+class PivotWiderOptions(FunctionOptions):
+    def __init__(
+        self,
+        key_names: Sequence[str],
+        *,
+        unexpected_key_behavior: Literal["ignore", "raise"] = "ignore",
+    ) -> None: ...
+
+
+class ReplaceSliceOptions(FunctionOptions):
+    def __init__(self, start: int, stop: int, replacement: str) -> None: ...
+
+
+class ReplaceSubstringOptions(FunctionOptions):
+    def __init__(
+        self, pattern: str, replacement: str, *, max_replacements: int | None = None
+    ) -> None: ...
+
+
+_RoundMode: TypeAlias = Literal[
+    "down",
+    "up",
+    "towards_zero",
+    "towards_infinity",
+    "half_down",
+    "half_up",
+    "half_towards_zero",
+    "half_towards_infinity",
+    "half_to_even",
+    "half_to_odd",
+]
+
+
+class RoundBinaryOptions(FunctionOptions):
+    def __init__(
+        self,
+        round_mode: _RoundMode = "half_to_even",
+    ) -> None: ...
+
+
+class RoundOptions(FunctionOptions):
+    def __init__(
+        self,
+        ndigits: int = 0,
+        round_mode: _RoundMode = "half_to_even",
+    ) -> None: ...
+
+
+_DateTimeUint: TypeAlias = Literal[
+    "year",
+    "quarter",
+    "month",
+    "week",
+    "day",
+    "hour",
+    "minute",
+    "second",
+    "millisecond",
+    "microsecond",
+    "nanosecond",
+]
+
+
+class RoundTemporalOptions(FunctionOptions):
+    def __init__(
+        self,
+        multiple: int = 1,
+        unit: _DateTimeUint = "day",
+        *,
+        week_starts_monday: bool = True,
+        ceil_is_strictly_greater: bool = False,
+        calendar_based_origin: bool = False,
+    ) -> None: ...
+
+
+class RoundToMultipleOptions(FunctionOptions):
+    def __init__(self, multiple: float = 1.0,
+                 round_mode: _RoundMode = "half_to_even") -> None: ...
+
+
+class ScalarAggregateOptions(FunctionOptions):
+    def __init__(self, *, skip_nulls: bool = True, min_count: int = 1) -> None: ...
+
+
+class SelectKOptions(FunctionOptions):
+    def __init__(self, k: int, sort_keys: Sequence[tuple[str, _Order]]) -> None: ...
+
+
+class SetLookupOptions(FunctionOptions):
+    def __init__(self, value_set: lib.Array, *, skip_nulls: bool = True) -> None: ...
+
+
+class SliceOptions(FunctionOptions):
+    def __init__(self, start: int, stop: int | None = None, step: int = 1) -> None: ...
+
+
+class SortOptions(FunctionOptions):
+    def __init__(
+        self, sort_keys: Sequence[tuple[str, _Order]], *, null_placement: _Placement = "at_end"
+    ) -> None: ...
+
+
+class SplitOptions(FunctionOptions):
+    def __init__(self, *, max_splits: int | None = None,
+                 reverse: bool = False) -> None: ...
+
+
+class SplitPatternOptions(FunctionOptions):
+    def __init__(
+        self, pattern: str, *, max_splits: int | None = None, reverse: bool = False
+    ) -> None: ...
+
+
+class StrftimeOptions(FunctionOptions):
+    def __init__(self, format: str = "%Y-%m-%dT%H:%M:%S",
+                 locale: str = "C") -> None: ...
+
+
+class StrptimeOptions(FunctionOptions):
+    def __init__(
+        self, format: str, unit: Literal["s", "ms", "us", "ns"], error_is_null: bool = False
+    ) -> None: ...
+
+
+class StructFieldOptions(FunctionOptions):
+    def __init__(
+        self, indices: list[str] | list[bytes] | list[int] | Expression | bytes | str | int
+    ) -> None: ...
+
+
+class TakeOptions(FunctionOptions):
+    def __init__(self, boundscheck: bool = True) -> None: ...
+
+
+class TDigestOptions(FunctionOptions):
+    def __init__(
+        self,
+        q: float | Sequence[float] = 0.5,
+        *,
+        delta: int = 100,
+        buffer_size: int = 500,
+        skip_nulls: bool = True,
+        min_count: int = 0,
+    ) -> None: ...
+
+
+class TrimOptions(FunctionOptions):
+    def __init__(self, characters: str) -> None: ...
+
+
+class Utf8NormalizeOptions(FunctionOptions):
+    def __init__(self, form: Literal["NFC", "NFKC", "NFD", "NFKD"]) -> None: ...
+
+
+class VarianceOptions(FunctionOptions):
+    def __init__(self, *, ddof: int = 0, skip_nulls: bool = True,
+                 min_count: int = 0) -> None: ...
+
+
+class SkewOptions(FunctionOptions):
+    def __init__(
+        self, *, skip_nulls: bool = True, biased: bool = True, min_count: int = 0
+    ) -> None: ...
+
+
+class WeekOptions(FunctionOptions):
+    def __init__(
+        self,
+        *,
+        week_starts_monday: bool = True,
+        count_from_zero: bool = False,
+        first_week_is_fully_in_year: bool = False,
+    ) -> None: ...
+
+# ==================== _compute.pyx Functions ====================
+
+
+def call_function(
+    name: str,
+    args: list,
+    options: FunctionOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+    length: int | None = None,
+) -> Any: ...
+def function_registry() -> FunctionRegistry: ...
+def get_function(name: str) -> Function: ...
+def list_functions() -> list[str]: ...
+
+# ==================== _compute.pyx Udf ====================
+
+
+def call_tabular_function(
+    function_name: str, args: Iterable | None = None, func_registry: FunctionRegistry | None = None
+) -> lib.RecordBatchReader: ...
+
+
+class _FunctionDoc(TypedDict):
+    summary: str
+    description: str
+
+
+def register_scalar_function(
+    func: Callable,
+    function_name: str,
+    function_doc: _FunctionDoc,
+    in_types: dict[str, lib.DataType],
+    out_type: lib.DataType,
+    func_registry: FunctionRegistry | None = None,
+) -> None: ...
+
+
+def register_tabular_function(
+    func: Callable,
+    function_name: str,
+    function_doc: _FunctionDoc,
+    in_types: dict[str, lib.DataType],
+    out_type: lib.DataType,
+    func_registry: FunctionRegistry | None = None,
+) -> None: ...
+
+
+def register_aggregate_function(
+    func: Callable,
+    function_name: str,
+    function_doc: _FunctionDoc,
+    in_types: dict[str, lib.DataType],
+    out_type: lib.DataType,
+    func_registry: FunctionRegistry | None = None,
+) -> None: ...
+
+
+def register_vector_function(
+    func: Callable,
+    function_name: str,
+    function_doc: _FunctionDoc,
+    in_types: dict[str, lib.DataType],
+    out_type: lib.DataType,
+    func_registry: FunctionRegistry | None = None,
+) -> None: ...
+
+
+class UdfContext:
+    @property
+    def batch_length(self) -> int: ...
+    @property
+    def memory_pool(self) -> lib.MemoryPool: ...
+
+# ==================== _compute.pyx Expression ====================
+
+
+class Expression(lib._Weakrefable):
+    @staticmethod
+    def from_substrait(buffer: bytes | lib.Buffer) -> Expression: ...
+    def to_substrait(self, schema: lib.Schema,
+                     allow_arrow_extensions: bool = False) -> lib.Buffer: ...
+
+    def __invert__(self) -> Expression: ...
+    def __and__(self, other) -> Expression: ...
+    def __or__(self, other) -> Expression: ...
+    def __add__(self, other) -> Expression: ...
+    def __mul__(self, other) -> Expression: ...
+    def __sub__(self, other) -> Expression: ...
+    def __eq__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __ne__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __gt__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __lt__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __ge__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __le__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __truediv__(self, other) -> Expression: ...
+    def is_valid(self) -> bool: ...
+    def is_null(self, nan_is_null: bool = False) -> Expression: ...
+    def is_nan(self) -> Expression: ...
+
+    def cast(
+        self, type: lib.DataType, safe: bool = True, options: CastOptions | None = None
+    ) -> Expression: ...
+    def isin(self, values: lib.Array | Iterable) -> Expression: ...
+
+# ==================== _compute.py ====================
diff --git a/python/pyarrow-stubs/_csv.pyi b/python/pyarrow-stubs/_csv.pyi
new file mode 100644
index 00000000000..c62ae725ec1
--- /dev/null
+++ b/python/pyarrow-stubs/_csv.pyi
@@ -0,0 +1,135 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from dataclasses import dataclass, field
+from typing import IO, Any, Callable, Literal
+
+from _typeshed import StrPath
+
+from . import lib
+
+
+@dataclass(kw_only=True)
+class ReadOptions(lib._Weakrefable):
+    use_threads: bool = field(default=True, kw_only=False)
+    block_size: int | None = None
+    skip_rows: int = 0
+    skip_rows_after_names: int = 0
+    column_names: list[str] | None = None
+    autogenerate_column_names: bool = False
+    encoding: str = "utf8"
+    def validate(self) -> None: ...
+
+
+@dataclass(kw_only=True)
+class ParseOptions(lib._Weakrefable):
+
+    delimiter: str = field(default=",", kw_only=False)
+    quote_char: str | Literal[False] = '"'
+    double_quote: bool = True
+    escape_char: str | Literal[False] = False
+    newlines_in_values: bool = False
+    ignore_empty_lines: bool = True
+    invalid_row_handler: Callable[[InvalidRow], Literal["skip", "error"]] | None = None
+
+    def validate(self) -> None: ...
+
+
+@dataclass(kw_only=True)
+class ConvertOptions(lib._Weakrefable):
+
+    check_utf8: bool = field(default=True, kw_only=False)
+    column_types: lib.Schema | dict | None = None
+    null_values: list[str] | None = None
+    true_values: list[str] | None = None
+    false_values: list[str] | None = None
+    decimal_point: str = "."
+    strings_can_be_null: bool = False
+    quoted_strings_can_be_null: bool = True
+    include_columns: list[str] | None = None
+    include_missing_columns: bool = False
+    auto_dict_encode: bool = False
+    auto_dict_max_cardinality: int | None = None
+    timestamp_parsers: list[str] | None = None
+
+    def validate(self) -> None: ...
+
+
+@dataclass(kw_only=True)
+class WriteOptions(lib._Weakrefable):
+
+    include_header: bool = field(default=True, kw_only=False)
+    batch_size: int = 1024
+    delimiter: str = ","
+    quoting_style: Literal["needed", "all_valid", "none"] = "needed"
+
+    def validate(self) -> None: ...
+
+
+@dataclass
+class InvalidRow(lib._Weakrefable):
+
+    expected_columns: int
+    actual_columns: int
+    number: int | None
+    text: str
+
+
+class CSVWriter(lib._CRecordBatchWriter):
+
+    def __init__(
+        self,
+        # TODO: OutputStream
+        sink: StrPath | IO[Any],
+        schema: lib.Schema,
+        write_options: WriteOptions | None = None,
+        *,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> None: ...
+
+
+class CSVStreamingReader(lib.RecordBatchReader):
+    ...
+
+
+ISO8601: lib._Weakrefable
+
+
+def open_csv(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    convert_options: ConvertOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> CSVStreamingReader: ...
+
+
+def read_csv(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    convert_options: ConvertOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Table: ...
+
+
+def write_csv(
+    data: lib.RecordBatch | lib.Table,
+    output_file: StrPath | lib.NativeFile | IO[Any],
+    write_options: WriteOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> None: ...
diff --git a/python/pyarrow-stubs/_cuda.pyi b/python/pyarrow-stubs/_cuda.pyi
new file mode 100644
index 00000000000..929f448f396
--- /dev/null
+++ b/python/pyarrow-stubs/_cuda.pyi
@@ -0,0 +1,164 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any
+
+import cuda  # type: ignore[import-not-found]
+
+from numba.cuda.cudadrv import driver as _numba_driver  # type: ignore[import-not-found]
+
+from . import lib
+from ._stubs_typing import ArrayLike
+
+
+class Context(lib._Weakrefable):
+
+    def __init__(self, device_number: int = 0, handle: int | None = None) -> None: ...
+
+    @staticmethod
+    def from_numba(context: _numba_driver.Context | None = None) -> Context: ...
+
+    def to_numba(self) -> _numba_driver.Context: ...
+
+    @staticmethod
+    def get_num_devices() -> int: ...
+
+    @property
+    def device_number(self) -> int: ...
+
+    @property
+    def handle(self) -> int: ...
+
+    def synchronize(self) -> None: ...
+
+    @property
+    def bytes_allocated(self) -> int: ...
+
+    def get_device_address(self, address: int) -> int: ...
+
+    def new_buffer(self, nbytes: int) -> CudaBuffer: ...
+
+    @property
+    def memory_manager(self) -> lib.MemoryManager: ...
+
+    @property
+    def device(self) -> lib.Device: ...
+
+    def foreign_buffer(self, address: int, size: int, base: Any |
+                       None = None) -> CudaBuffer: ...
+
+    def open_ipc_buffer(self, ipc_handle: IpcMemHandle) -> CudaBuffer: ...
+
+    def buffer_from_data(
+        self,
+        data: CudaBuffer | HostBuffer | lib.Buffer | ArrayLike,
+        offset: int = 0,
+        size: int = -1,
+    ) -> CudaBuffer: ...
+
+    def buffer_from_object(self, obj: Any) -> CudaBuffer: ...
+
+
+class IpcMemHandle(lib._Weakrefable):
+
+    @staticmethod
+    def from_buffer(opaque_handle: lib.Buffer) -> IpcMemHandle: ...
+
+    def serialize(self, pool: lib.MemoryPool | None = None) -> lib.Buffer: ...
+
+
+class CudaBuffer(lib.Buffer):
+
+    @staticmethod
+    def from_buffer(buf: lib.Buffer) -> CudaBuffer: ...
+
+    @staticmethod
+    def from_numba(mem: _numba_driver.MemoryPointer) -> CudaBuffer: ...
+
+    def to_numba(self) -> _numba_driver.MemoryPointer: ...
+
+    def copy_to_host(
+        self,
+        position: int = 0,
+        nbytes: int = -1,
+        buf: lib.Buffer | None = None,
+        memory_pool: lib.MemoryPool | None = None,
+        resizable: bool = False,
+    ) -> lib.Buffer: ...
+
+    def copy_from_host(
+        self, data: lib.Buffer | ArrayLike, position: int = 0, nbytes: int = -1
+    ) -> int: ...
+
+    def copy_from_device(self, buf: CudaBuffer, position: int = 0,
+                         nbytes: int = -1) -> int: ...
+
+    def export_for_ipc(self) -> IpcMemHandle: ...
+
+    @property
+    def context(self) -> Context: ...
+
+    def slice(self, offset: int = 0, length: int | None = None) -> CudaBuffer: ...
+
+    def to_pybytes(self) -> bytes: ...
+
+
+class HostBuffer(lib.Buffer):
+
+    @property
+    def size(self) -> int: ...
+
+
+class BufferReader(lib.NativeFile):
+
+    def __init__(self, obj: CudaBuffer) -> None: ...
+    def read_buffer(self, nbytes: int | None = None) -> CudaBuffer: ...
+
+
+class BufferWriter(lib.NativeFile):
+
+    def __init__(self, obj: CudaBuffer) -> None: ...
+    def writeat(self, position: int, data: ArrayLike) -> None: ...
+
+    @property
+    def buffer_size(self) -> int: ...
+
+    @buffer_size.setter
+    def buffer_size(self, buffer_size: int): ...
+
+    @property
+    def num_bytes_buffered(self) -> int: ...
+
+
+def new_host_buffer(size: int, device: int = 0) -> HostBuffer: ...
+
+
+def serialize_record_batch(batch: lib.RecordBatch, ctx: Context) -> CudaBuffer: ...
+
+
+def read_message(
+    source: CudaBuffer | cuda.BufferReader, pool: lib.MemoryManager | None = None
+) -> lib.Message: ...
+
+
+def read_record_batch(
+    buffer: lib.Buffer,
+    object: lib.Schema,
+    *,
+    dictionary_memo: lib.DictionaryMemo | None = None,
+    pool: lib.MemoryPool | None = None,
+) -> lib.RecordBatch: ...
diff --git a/python/pyarrow-stubs/_dataset.pyi b/python/pyarrow-stubs/_dataset.pyi
new file mode 100644
index 00000000000..c3b3c4d9bec
--- /dev/null
+++ b/python/pyarrow-stubs/_dataset.pyi
@@ -0,0 +1,669 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import (
+    IO,
+    Any,
+    Callable,
+    Generic,
+    Iterator,
+    Literal,
+    NamedTuple,
+    TypeVar,
+    overload,
+)
+
+from _typeshed import StrPath
+
+from . import csv, _json, _parquet, lib
+from ._fs import FileSelector, FileSystem, SupportedFileSystem
+from ._stubs_typing import Indices, JoinType, Order
+from .acero import ExecNodeOptions
+from .compute import Expression
+from .ipc import IpcWriteOptions, RecordBatchReader
+
+
+class Dataset(lib._Weakrefable):
+
+    @property
+    def partition_expression(self) -> Expression: ...
+
+    def replace_schema(self, schema: lib.Schema) -> None: ...
+
+    def get_fragments(self, filter: Expression | None = None): ...
+
+    def scanner(
+        self,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner: ...
+
+    def to_batches(
+        self,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Iterator[lib.RecordBatch]: ...
+
+    def to_table(
+        self,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table: ...
+
+    def take(
+        self,
+        indices: Indices,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table: ...
+
+    def head(
+        self,
+        num_rows: int,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table: ...
+
+    def count_rows(
+        self,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> int: ...
+
+    @property
+    def schema(self) -> lib.Schema: ...
+
+    def filter(self, expression: Expression) -> Self: ...
+
+    def sort_by(self, sorting: str |
+                list[tuple[str, Order]], **kwargs) -> InMemoryDataset: ...
+
+    def join(
+        self,
+        right_dataset: Dataset,
+        keys: str | list[str],
+        right_keys: str | list[str] | None = None,
+        join_type: JoinType = "left outer",
+        left_suffix: str | None = None,
+        right_suffix: str | None = None,
+        coalesce_keys: bool = True,
+        use_threads: bool = True,
+    ) -> InMemoryDataset: ...
+
+    def join_asof(
+        self,
+        right_dataset: Dataset,
+        on: str,
+        by: str | list[str],
+        tolerance: int,
+        right_on: str | list[str] | None = None,
+        right_by: str | list[str] | None = None,
+    ) -> InMemoryDataset: ...
+
+
+class InMemoryDataset(Dataset):
+    ...
+
+
+class UnionDataset(Dataset):
+
+    @property
+    def children(self) -> list[Dataset]: ...
+
+
+class FileSystemDataset(Dataset):
+
+    def __init__(
+        self,
+        fragments: list[Fragment],
+        schema: lib.Schema,
+        format: FileFormat,
+        filesystem: SupportedFileSystem | None = None,
+        root_partition: Expression | None = None,
+    ) -> None: ...
+
+    @classmethod
+    def from_paths(
+        cls,
+        paths: list[str],
+        schema: lib.Schema | None = None,
+        format: FileFormat | None = None,
+        filesystem: SupportedFileSystem | None = None,
+        partitions: list[Expression] | None = None,
+        root_partition: Expression | None = None,
+    ) -> FileSystemDataset: ...
+
+    @property
+    def filesystem(self) -> FileSystem: ...
+    @property
+    def partitioning(self) -> Partitioning | None: ...
+
+    @property
+    def files(self) -> list[str]: ...
+
+    @property
+    def format(self) -> FileFormat: ...
+
+
+class FileWriteOptions(lib._Weakrefable):
+    @property
+    def format(self) -> FileFormat: ...
+
+
+class FileFormat(lib._Weakrefable):
+    def inspect(
+        self, file: StrPath | IO, filesystem: SupportedFileSystem | None = None
+    ) -> lib.Schema: ...
+
+    def make_fragment(
+        self,
+        file: StrPath | IO,
+        filesystem: SupportedFileSystem | None = None,
+        partition_expression: Expression | None = None,
+        *,
+        file_size: int | None = None,
+    ) -> Fragment: ...
+
+    def make_write_options(self) -> FileWriteOptions: ...
+    @property
+    def default_extname(self) -> str: ...
+    @property
+    def default_fragment_scan_options(self) -> FragmentScanOptions: ...
+    @default_fragment_scan_options.setter
+    def default_fragment_scan_options(self, options: FragmentScanOptions) -> None: ...
+
+
+class Fragment(lib._Weakrefable):
+
+    @property
+    def physical_schema(self) -> lib.Schema: ...
+
+    @property
+    def partition_expression(self) -> Expression: ...
+
+    def scanner(
+        self,
+        schema: lib.Schema | None = None,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner: ...
+
+    def to_batches(
+        self,
+        schema: lib.Schema | None = None,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Iterator[lib.RecordBatch]: ...
+
+    def to_table(
+        self,
+        schema: lib.Schema | None = None,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table: ...
+
+    def take(
+        self,
+        indices: Indices,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table: ...
+
+    def head(
+        self,
+        num_rows: int,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table: ...
+
+    def count_rows(
+        self,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> int: ...
+
+
+class FileFragment(Fragment):
+
+    def open(self) -> lib.NativeFile: ...
+
+    @property
+    def path(self) -> str: ...
+
+    @property
+    def filesystem(self) -> FileSystem: ...
+
+    @property
+    def buffer(self) -> lib.Buffer: ...
+
+    @property
+    def format(self) -> FileFormat: ...
+
+
+class FragmentScanOptions(lib._Weakrefable):
+
+    @property
+    def type_name(self) -> str: ...
+
+
+class IpcFileWriteOptions(FileWriteOptions):
+    @property
+    def write_options(self) -> IpcWriteOptions: ...
+    @write_options.setter
+    def write_options(self, write_options: IpcWriteOptions) -> None: ...
+
+
+class IpcFileFormat(FileFormat):
+    def equals(self, other: IpcFileFormat) -> bool: ...
+    def make_write_options(self, **kwargs) -> IpcFileWriteOptions: ...
+    @property
+    def default_extname(self) -> str: ...
+
+
+class FeatherFileFormat(IpcFileFormat):
+    ...
+
+
+class CsvFileFormat(FileFormat):
+
+    def __init__(
+        self,
+        parse_options: csv.ParseOptions | None = None,
+        default_fragment_scan_options: CsvFragmentScanOptions | None = None,
+        convert_options: csv.ConvertOptions | None = None,
+        read_options: csv.ReadOptions | None = None,
+    ) -> None: ...
+    def make_write_options(self) -> csv.WriteOptions: ...  # type: ignore[override]
+    @property
+    def parse_options(self) -> csv.ParseOptions: ...
+    @parse_options.setter
+    def parse_options(self, parse_options: csv.ParseOptions) -> None: ...
+    def equals(self, other: CsvFileFormat) -> bool: ...
+
+
+class CsvFragmentScanOptions(FragmentScanOptions):
+
+    convert_options: csv.ConvertOptions
+    read_options: csv.ReadOptions
+
+    def __init__(
+        self, convert_options: csv.ConvertOptions, read_options: csv.ReadOptions
+    ) -> None: ...
+    def equals(self, other: CsvFragmentScanOptions) -> bool: ...
+
+
+class CsvFileWriteOptions(FileWriteOptions):
+    write_options: csv.WriteOptions
+
+
+class JsonFileFormat(FileFormat):
+
+    def __init__(
+        self,
+        default_fragment_scan_options: JsonFragmentScanOptions | None = None,
+        parse_options: _json.ParseOptions | None = None,
+        read_options: _json.ReadOptions | None = None,
+    ) -> None: ...
+    def equals(self, other: JsonFileFormat) -> bool: ...
+
+
+class JsonFragmentScanOptions(FragmentScanOptions):
+
+    parse_options: _json.ParseOptions
+    read_options: _json.ReadOptions
+
+    def __init__(
+        self, parse_options: _json.ParseOptions, read_options: _json.ReadOptions
+    ) -> None: ...
+    def equals(self, other: JsonFragmentScanOptions) -> bool: ...
+
+
+class Partitioning(lib._Weakrefable):
+    def parse(self, path: str) -> Expression: ...
+
+    def format(self, expr: Expression) -> tuple[str, str]: ...
+
+    @property
+    def schema(self) -> lib.Schema: ...
+
+
+class PartitioningFactory(lib._Weakrefable):
+    @property
+    def type_name(self) -> str: ...
+
+
+class KeyValuePartitioning(Partitioning):
+    @property
+    def dictionaries(self) -> list[lib.Array | None]: ...
+
+
+class DirectoryPartitioning(KeyValuePartitioning):
+
+    @staticmethod
+    def discover(
+        field_names: list[str] | None = None,
+        infer_dictionary: bool = False,
+        max_partition_dictionary_size: int = 0,
+        schema: lib.Schema | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> PartitioningFactory: ...
+
+    def __init__(
+        self,
+        schema: lib.Schema,
+        dictionaries: dict[str, lib.Array] | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> None: ...
+
+
+class HivePartitioning(KeyValuePartitioning):
+
+    def __init__(
+        self,
+        schema: lib.Schema,
+        dictionaries: dict[str, lib.Array] | None = None,
+        null_fallback: str = "__HIVE_DEFAULT_PARTITION__",
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> None: ...
+
+    @staticmethod
+    def discover(
+        infer_dictionary: bool = False,
+        max_partition_dictionary_size: int = 0,
+        null_fallback="__HIVE_DEFAULT_PARTITION__",
+        schema: lib.Schema | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> PartitioningFactory: ...
+
+
+class FilenamePartitioning(KeyValuePartitioning):
+
+    def __init__(
+        self,
+        schema: lib.Schema,
+        dictionaries: dict[str, lib.Array] | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> None: ...
+
+    @staticmethod
+    def discover(
+        field_names: list[str] | None = None,
+        infer_dictionary: bool = False,
+        schema: lib.Schema | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> PartitioningFactory: ...
+
+
+class DatasetFactory(lib._Weakrefable):
+
+    root_partition: Expression
+    def finish(self, schema: lib.Schema | None = None) -> Dataset: ...
+
+    def inspect(self) -> lib.Schema: ...
+
+    def inspect_schemas(self) -> list[lib.Schema]: ...
+
+
+class FileSystemFactoryOptions(lib._Weakrefable):
+
+    partitioning: Partitioning
+    partitioning_factory: PartitioningFactory
+    partition_base_dir: str
+    exclude_invalid_files: bool
+    selector_ignore_prefixes: list[str]
+
+    def __init__(
+        self,
+        artition_base_dir: str | None = None,
+        partitioning: Partitioning | PartitioningFactory | None = None,
+        exclude_invalid_files: bool = True,
+        selector_ignore_prefixes: list[str] | None = None,
+    ) -> None: ...
+
+
+class FileSystemDatasetFactory(DatasetFactory):
+
+    def __init__(
+        self,
+        filesystem: SupportedFileSystem,
+        paths_or_selector: FileSelector,
+        format: FileFormat,
+        options: FileSystemFactoryOptions | None = None,
+    ) -> None: ...
+
+
+class UnionDatasetFactory(DatasetFactory):
+
+    def __init__(self, factories: list[DatasetFactory]) -> None: ...
+
+
+_RecordBatchT = TypeVar("_RecordBatchT", bound=lib.RecordBatch)
+
+
+class RecordBatchIterator(lib._Weakrefable, Generic[_RecordBatchT]):
+
+    def __iter__(self) -> Self: ...
+    def __next__(self) -> _RecordBatchT: ...
+
+
+class TaggedRecordBatch(NamedTuple):
+
+    record_batch: lib.RecordBatch
+    fragment: Fragment
+
+
+class TaggedRecordBatchIterator(lib._Weakrefable):
+
+    def __iter__(self) -> Self: ...
+    def __next__(self) -> TaggedRecordBatch: ...
+
+
+class Scanner(lib._Weakrefable):
+
+    @staticmethod
+    def from_dataset(
+        dataset: Dataset,
+        *,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner: ...
+
+    @staticmethod
+    def from_fragment(
+        fragment: Fragment,
+        *,
+        schema: lib.Schema | None = None,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner: ...
+
+    @staticmethod
+    def from_batches(
+        source: Iterator[lib.RecordBatch] | RecordBatchReader,
+        *,
+        schema: lib.Schema | None = None,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner: ...
+
+    @property
+    def dataset_schema(self) -> lib.Schema: ...
+
+    @property
+    def projected_schema(self) -> lib.Schema: ...
+
+    def to_batches(self) -> Iterator[lib.RecordBatch]: ...
+
+    def scan_batches(self) -> TaggedRecordBatchIterator: ...
+
+    def to_table(self) -> lib.Table: ...
+
+    def take(self, indices: Indices) -> lib.Table: ...
+
+    def head(self, num_rows: int) -> lib.Table: ...
+
+    def count_rows(self) -> int: ...
+
+    def to_reader(self) -> RecordBatchReader: ...
+
+
+def get_partition_keys(partition_expression: Expression) -> dict[str, Any]: ...
+
+
+class WrittenFile(lib._Weakrefable):
+
+    def __init__(self, path: str, metadata: _parquet.FileMetaData |
+                 None, size: int) -> None: ...
+
+
+def _filesystemdataset_write(
+    data: Scanner,
+    base_dir: StrPath,
+    basename_template: str,
+    filesystem: SupportedFileSystem,
+    partitioning: Partitioning,
+    file_options: FileWriteOptions,
+    max_partitions: int,
+    file_visitor: Callable[[str], None],
+    existing_data_behavior: Literal["error", "overwrite_or_ignore", "delete_matching"],
+    max_open_files: int,
+    max_rows_per_file: int,
+    min_rows_per_group: int,
+    max_rows_per_group: int,
+    create_dir: bool,
+): ...
+
+
+class _ScanNodeOptions(ExecNodeOptions):
+    def _set_options(self, dataset: Dataset, scan_options: dict) -> None: ...
+
+
+class ScanNodeOptions(_ScanNodeOptions):
+
+    def __init__(
+        self, dataset: Dataset, require_sequenced_output: bool = False, **kwargs
+    ) -> None: ...
diff --git a/python/pyarrow-stubs/_dataset_orc.pyi b/python/pyarrow-stubs/_dataset_orc.pyi
new file mode 100644
index 00000000000..62f49bf5d30
--- /dev/null
+++ b/python/pyarrow-stubs/_dataset_orc.pyi
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from ._dataset import FileFormat
+
+
+class OrcFileFormat(FileFormat):
+    def equals(self, other: OrcFileFormat) -> bool: ...
+    @property
+    def default_extname(self): ...
diff --git a/python/pyarrow-stubs/_dataset_parquet.pyi b/python/pyarrow-stubs/_dataset_parquet.pyi
new file mode 100644
index 00000000000..df9536ef725
--- /dev/null
+++ b/python/pyarrow-stubs/_dataset_parquet.pyi
@@ -0,0 +1,163 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from dataclasses import dataclass
+from typing import IO, Any, Iterable, TypedDict
+
+from _typeshed import StrPath
+
+from ._compute import Expression
+from ._dataset import (
+    DatasetFactory,
+    FileFormat,
+    FileFragment,
+    FileWriteOptions,
+    Fragment,
+    FragmentScanOptions,
+    Partitioning,
+    PartitioningFactory,
+)
+from ._dataset_parquet_encryption import ParquetDecryptionConfig
+from ._fs import SupportedFileSystem
+from ._parquet import FileDecryptionProperties, FileMetaData
+from .lib import CacheOptions, Schema, _Weakrefable
+
+parquet_encryption_enabled: bool
+
+
+class ParquetFileFormat(FileFormat):
+
+    def __init__(
+        self,
+        read_options: ParquetReadOptions | None = None,
+        default_fragment_scan_options: ParquetFragmentScanOptions | None = None,
+        **kwargs,
+    ) -> None: ...
+    @property
+    def read_options(self) -> ParquetReadOptions: ...
+    def make_write_options(
+        self) -> ParquetFileWriteOptions: ...  # type: ignore[override]
+
+    def equals(self, other: ParquetFileFormat) -> bool: ...
+    @property
+    def default_extname(self) -> str: ...
+
+    def make_fragment(
+        self,
+        file: StrPath | IO,
+        filesystem: SupportedFileSystem | None = None,
+        partition_expression: Expression | None = None,
+        row_groups: Iterable[int] | None = None,
+        *,
+        file_size: int | None = None,
+    ) -> Fragment: ...
+
+
+class _NameStats(TypedDict):
+    min: Any
+    max: Any
+
+
+class RowGroupInfo:
+
+    id: int
+    metadata: FileMetaData
+    schema: Schema
+
+    def __init__(self, id: int, metadata: FileMetaData, schema: Schema) -> None: ...
+    @property
+    def num_rows(self) -> int: ...
+    @property
+    def total_byte_size(self) -> int: ...
+    @property
+    def statistics(self) -> dict[str, _NameStats]: ...
+
+
+class ParquetFileFragment(FileFragment):
+
+    def ensure_complete_metadata(self) -> None: ...
+    @property
+    def row_groups(self) -> list[RowGroupInfo]: ...
+    @property
+    def metadata(self) -> FileMetaData: ...
+    @property
+    def num_row_groups(self) -> int: ...
+
+    def split_by_row_group(
+        self, filter: Expression | None = None, schema: Schema | None = None
+    ) -> list[Fragment]: ...
+
+    def subset(
+        self,
+        filter: Expression | None = None,
+        schema: Schema | None = None,
+        row_group_ids: list[int] | None = None,
+    ) -> ParquetFileFormat: ...
+
+
+class ParquetReadOptions(_Weakrefable):
+
+    def __init__(
+        self, dictionary_columns: list[str] | None, coerce_int96_timestamp_unit: str | None = None
+    ) -> None: ...
+    @property
+    def coerce_int96_timestamp_unit(self) -> str: ...
+    @coerce_int96_timestamp_unit.setter
+    def coerce_int96_timestamp_unit(self, unit: str) -> None: ...
+    def equals(self, other: ParquetReadOptions) -> bool: ...
+
+
+class ParquetFileWriteOptions(FileWriteOptions):
+    def update(self, **kwargs) -> None: ...
+    def _set_properties(self) -> None: ...
+    def _set_arrow_properties(self) -> None: ...
+    def _set_encryption_config(self) -> None: ...
+
+
+@dataclass(kw_only=True)
+class ParquetFragmentScanOptions(FragmentScanOptions):
+
+    use_buffered_stream: bool = False
+    buffer_size: int = 8192
+    pre_buffer: bool = True
+    cache_options: CacheOptions | None = None
+    thrift_string_size_limit: int | None = None
+    thrift_container_size_limit: int | None = None
+    decryption_config: ParquetDecryptionConfig | None = None
+    decryption_properties: FileDecryptionProperties | None = None
+    page_checksum_verification: bool = False
+
+    def equals(self, other: ParquetFragmentScanOptions) -> bool: ...
+
+
+@dataclass
+class ParquetFactoryOptions(_Weakrefable):
+
+    partition_base_dir: str | None = None
+    partitioning: Partitioning | PartitioningFactory | None = None
+    validate_column_chunk_paths: bool = False
+
+
+class ParquetDatasetFactory(DatasetFactory):
+
+    def __init__(
+        self,
+        metadata_path: str,
+        filesystem: SupportedFileSystem,
+        format: FileFormat,
+        options: ParquetFactoryOptions | None = None,
+    ) -> None: ...
diff --git a/python/pyarrow-stubs/_dataset_parquet_encryption.pyi b/python/pyarrow-stubs/_dataset_parquet_encryption.pyi
new file mode 100644
index 00000000000..d8338776481
--- /dev/null
+++ b/python/pyarrow-stubs/_dataset_parquet_encryption.pyi
@@ -0,0 +1,59 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from ._dataset_parquet import ParquetFileWriteOptions, ParquetFragmentScanOptions
+from ._parquet import FileDecryptionProperties
+from ._parquet_encryption import CryptoFactory, EncryptionConfiguration, KmsConnectionConfig
+from .lib import _Weakrefable
+
+
+class ParquetEncryptionConfig(_Weakrefable):
+
+    def __init__(
+        self,
+        crypto_factory: CryptoFactory,
+        kms_connection_config: KmsConnectionConfig,
+        encryption_config: EncryptionConfiguration,
+    ) -> None: ...
+
+
+class ParquetDecryptionConfig(_Weakrefable):
+
+    def __init__(
+        self,
+        crypto_factory: CryptoFactory,
+        kms_connection_config: KmsConnectionConfig,
+        encryption_config: EncryptionConfiguration,
+    ) -> None: ...
+
+
+def set_encryption_config(
+    opts: ParquetFileWriteOptions,
+    config: ParquetEncryptionConfig,
+) -> None: ...
+
+
+def set_decryption_properties(
+    opts: ParquetFragmentScanOptions,
+    config: FileDecryptionProperties,
+): ...
+
+
+def set_decryption_config(
+    opts: ParquetFragmentScanOptions,
+    config: ParquetDecryptionConfig,
+): ...
diff --git a/python/pyarrow-stubs/_feather.pyi b/python/pyarrow-stubs/_feather.pyi
new file mode 100644
index 00000000000..edd3a089f82
--- /dev/null
+++ b/python/pyarrow-stubs/_feather.pyi
@@ -0,0 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import IO
+
+from _typeshed import StrPath
+
+from .lib import Buffer, NativeFile, Table, _Weakrefable
+
+
+class FeatherError(Exception):
+    ...
+
+
+def write_feather(
+    table: Table,
+    dest: StrPath | IO | NativeFile,
+    compression: str | None = None,
+    compression_level: int | None = None,
+    chunksize: int | None = None,
+    version: int = 2,
+): ...
+
+
+class FeatherReader(_Weakrefable):
+    def __init__(
+        self,
+        source: StrPath | IO | NativeFile | Buffer,
+        use_memory_map: bool,
+        use_threads: bool,
+    ) -> None: ...
+    @property
+    def version(self) -> str: ...
+    def read(self) -> Table: ...
+    def read_indices(self, indices: list[int]) -> Table: ...
+    def read_names(self, names: list[str]) -> Table: ...
diff --git a/python/pyarrow-stubs/_flight.pyi b/python/pyarrow-stubs/_flight.pyi
new file mode 100644
index 00000000000..e4d226a9f60
--- /dev/null
+++ b/python/pyarrow-stubs/_flight.pyi
@@ -0,0 +1,656 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import asyncio
+import enum
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import Generator, Generic, Iterable, Iterator, NamedTuple, TypeVar
+
+from typing_extensions import deprecated
+
+from .ipc import _ReadPandasMixin
+from .lib import (
+    ArrowCancelled,
+    ArrowException,
+    ArrowInvalid,
+    Buffer,
+    IpcReadOptions,
+    IpcWriteOptions,
+    RecordBatch,
+    RecordBatchReader,
+    Schema,
+    Table,
+    TimestampScalar,
+    _CRecordBatchWriter,
+    _Weakrefable,
+)
+
+_T = TypeVar("_T")
+
+class FlightCallOptions(_Weakrefable):
+
+
+    def __init__(
+        self,
+        timeout: float | None = None,
+        write_options: IpcWriteOptions | None = None,
+        headers: list[tuple[str, str]] | None = None,
+        read_options: IpcReadOptions | None = None,
+    ) -> None: ...
+
+
+class CertKeyPair(NamedTuple):
+
+
+    cert: str
+    key: str
+
+class FlightError(Exception):
+
+
+    extra_info: bytes
+
+class FlightInternalError(FlightError, ArrowException): ...
+
+
+class FlightTimedOutError(FlightError, ArrowException): ...
+
+
+class FlightCancelledError(FlightError, ArrowCancelled): ...
+
+
+class FlightServerError(FlightError, ArrowException): ...
+
+
+class FlightUnauthenticatedError(FlightError, ArrowException): ...
+
+
+class FlightUnauthorizedError(FlightError, ArrowException): ...
+
+
+class FlightUnavailableError(FlightError, ArrowException): ...
+
+
+class FlightWriteSizeExceededError(ArrowInvalid):
+
+
+    limit: int
+    actual: int
+
+class Action(_Weakrefable):
+
+
+    def __init__(self, action_type: bytes | str, buf: Buffer | bytes) -> None: ...
+
+    @property
+    def type(self) -> str: ...
+
+    @property
+    def body(self) -> Buffer: ...
+
+    def serialize(self) -> bytes: ...
+
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+
+class ActionType(NamedTuple):
+
+
+    type: str
+    description: str
+
+    def make_action(self, buf: Buffer | bytes) -> Action: ...
+
+
+class Result(_Weakrefable):
+
+    def __init__(self, buf: Buffer | bytes) -> None: ...
+
+    @property
+    def body(self) -> Buffer: ...
+
+    def serialize(self) -> bytes: ...
+
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+
+class BasicAuth(_Weakrefable):
+
+    def __init__(
+        self, username: str | bytes | None = None, password: str | bytes | None = None
+    ) -> None: ...
+
+    @property
+    def username(self) -> bytes: ...
+    @property
+    def password(self) -> bytes: ...
+    def serialize(self) -> str: ...
+    @staticmethod
+    def deserialize(serialized: str | bytes) -> BasicAuth: ...
+
+class DescriptorType(enum.Enum):
+
+
+    UNKNOWN = 0
+    PATH = 1
+    CMD = 2
+
+class FlightMethod(enum.Enum):
+
+
+    INVALID = 0
+    HANDSHAKE = 1
+    LIST_FLIGHTS = 2
+    GET_FLIGHT_INFO = 3
+    GET_SCHEMA = 4
+    DO_GET = 5
+    DO_PUT = 6
+    DO_ACTION = 7
+    LIST_ACTIONS = 8
+    DO_EXCHANGE = 9
+
+class FlightDescriptor(_Weakrefable):
+
+    @staticmethod
+    def for_path(*path: str | bytes) -> FlightDescriptor: ...
+
+
+    @staticmethod
+    def for_command(command: str | bytes) -> FlightDescriptor: ...
+
+    @property
+    def descriptor_type(self) -> DescriptorType: ...
+
+    @property
+    def path(self) -> list[bytes] | None: ...
+
+    @property
+    def command(self) -> bytes | None: ...
+
+    def serialize(self) -> bytes: ...
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+class Ticket(_Weakrefable):
+
+    def __init__(self, ticket: str | bytes) -> None: ...
+    @property
+    def ticket(self) -> bytes: ...
+    def serialize(self) -> bytes: ...
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+class Location(_Weakrefable):
+
+    def __init__(self, uri: str | bytes) -> None: ...
+    @property
+    def uri(self) -> bytes: ...
+    def equals(self, other: Location) -> bool: ...
+    @staticmethod
+    def for_grpc_tcp(host: str | bytes, port: int) -> Location: ...
+
+    @staticmethod
+    def for_grpc_tls(host: str | bytes, port: int) -> Location: ...
+
+    @staticmethod
+    def for_grpc_unix(path: str | bytes) -> Location: ...
+
+
+class FlightEndpoint(_Weakrefable):
+
+    def __init__(
+        self,
+        ticket: Ticket | str | bytes,
+        locations: list[str | Location],
+        expiration_time: TimestampScalar | None = ...,
+        app_metadata: bytes | str = ...,
+    ): ...
+
+    @property
+    def ticket(self) -> Ticket: ...
+
+    @property
+    def locations(self) -> list[Location]: ...
+
+    def serialize(self) -> bytes: ...
+    @property
+    def expiration_time(self) -> TimestampScalar | None: ...
+
+    @property
+    def app_metadata(self) -> bytes | str: ...
+
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+class SchemaResult(_Weakrefable):
+
+    def __init__(self, schema: Schema) -> None: ...
+
+    @property
+    def schema(self) -> Schema: ...
+
+    def serialize(self) -> bytes: ...
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+class FlightInfo(_Weakrefable):
+
+    def __init__(
+        self,
+        schema: Schema,
+        descriptor: FlightDescriptor,
+        endpoints: list[FlightEndpoint],
+        total_records: int = ...,
+        total_bytes: int = ...,
+        ordered: bool = ...,
+        app_metadata: bytes | str = ...,
+    ) -> None: ...
+
+    @property
+    def schema(self) -> Schema: ...
+
+    @property
+    def descriptor(self) -> FlightDescriptor: ...
+
+    @property
+    def endpoints(self) -> list[FlightEndpoint]: ...
+
+    @property
+    def total_records(self) -> int: ...
+
+    @property
+    def total_bytes(self) -> int: ...
+
+    @property
+    def ordered(self) -> bool: ...
+
+    @property
+    def app_metadata(self) -> bytes | str: ...
+
+    def serialize(self) -> bytes: ...
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+class FlightStreamChunk(_Weakrefable):
+
+    @property
+    def data(self) -> RecordBatch | None: ...
+    @property
+    def app_metadata(self) -> Buffer | None: ...
+    def __iter__(self): ...
+
+class _MetadataRecordBatchReader(_Weakrefable, _ReadPandasMixin):
+
+
+    # Needs to be separate class so the "real" class can subclass the
+    # pure-Python mixin class
+
+    def __iter__(self) -> Self: ...
+    def __next__(self) -> FlightStreamChunk: ...
+    @property
+    def schema(self) -> Schema: ...
+
+    def read_all(self) -> Table: ...
+
+    def read_chunk(self) -> FlightStreamChunk: ...
+
+    def to_reader(self) -> RecordBatchReader: ...
+
+
+class MetadataRecordBatchReader(_MetadataRecordBatchReader): ...
+
+
+class FlightStreamReader(MetadataRecordBatchReader):
+
+    def cancel(self) -> None: ...
+
+    def read_all(self) -> Table: ...
+
+
+class MetadataRecordBatchWriter(_CRecordBatchWriter):
+
+
+    def begin(self, schema: Schema, options: IpcWriteOptions | None = None) -> None: ...
+
+    def write_metadata(self, buf: Buffer) -> None: ...
+
+    def write_batch(self, batch: RecordBatch) -> None: ...  # type: ignore[override]
+
+    def write_table(self, table: Table, max_chunksize: int | None = None, **kwargs) -> None: ...
+
+    def close(self) -> None: ...
+
+    def write_with_metadata(self, batch: RecordBatch, buf: Buffer) -> None: ...
+
+
+class FlightStreamWriter(MetadataRecordBatchWriter):
+
+    def done_writing(self) -> None: ...
+
+
+class FlightMetadataReader(_Weakrefable):
+
+    def read(self) -> Buffer | None: ...
+
+
+class FlightMetadataWriter(_Weakrefable):
+
+    def write(self, message: Buffer) -> None: ...
+
+
+class AsyncioCall(Generic[_T]):
+
+
+    _future: asyncio.Future[_T]
+
+    def as_awaitable(self) -> asyncio.Future[_T]: ...
+    def wakeup(self, result_or_exception: BaseException | _T) -> None: ...
+
+class AsyncioFlightClient:
+
+
+    def __init__(self, client: FlightClient) -> None: ...
+    async def get_flight_info(
+        self,
+        descriptor: FlightDescriptor,
+        *,
+        options: FlightCallOptions | None = None,
+    ): ...
+
+class FlightClient(_Weakrefable):
+
+    def __init__(
+        self,
+        location: str | tuple[str, int] | Location,
+        *,
+        tls_root_certs: str | None = None,
+        cert_chain: str | None = None,
+        private_key: str | None = None,
+        override_hostname: str | None = None,
+        middleware: list[ClientMiddlewareFactory] | None = None,
+        write_size_limit_bytes: int | None = None,
+        disable_server_verification: bool = False,
+        generic_options: list[tuple[str, int | str]] | None = None,
+    ): ...
+    @property
+    def supports_async(self) -> bool: ...
+    def as_async(self) -> AsyncioFlightClient: ...
+    def wait_for_available(self, timeout: int = 5) -> None: ...
+
+    @deprecated(
+        "Use the ``FlightClient`` constructor or ``pyarrow.flight.connect`` function instead."
+    )
+    @classmethod
+    def connect(
+        cls,
+        location: str | tuple[str, int] | Location,
+        tls_root_certs: str | None = None,
+        cert_chain: str | None = None,
+        private_key: str | None = None,
+        override_hostname: str | None = None,
+        disable_server_verification: bool = False,
+    ) -> FlightClient: ...
+
+    def authenticate(
+        self, auth_handler: ClientAuthHandler, options: FlightCallOptions | None = None
+    ) -> None: ...
+
+    def authenticate_basic_token(
+        self, username: str, password: str, options: FlightCallOptions | None = None
+    ) -> tuple[str, str]: ...
+
+    def list_actions(self, options: FlightCallOptions | None = None) -> list[Action]: ...
+
+    def do_action(
+        self, action: Action, options: FlightCallOptions | None = None
+    ) -> Iterator[Result]: ...
+
+    def list_flights(
+        self, criteria: str | None = None, options: FlightCallOptions | None = None
+    ) -> Generator[FlightInfo, None, None]: ...
+
+    def get_flight_info(
+        self, descriptor: FlightDescriptor, options: FlightCallOptions | None = None
+    ) -> FlightInfo: ...
+
+    def get_schema(
+        self, descriptor: FlightDescriptor, options: FlightCallOptions | None = None
+    ) -> Schema: ...
+
+    def do_get(
+        self, ticket: Ticket, options: FlightCallOptions | None = None
+    ) -> FlightStreamReader: ...
+
+    def do_put(
+        self,
+        descriptor: FlightDescriptor,
+        schema: Schema,
+        options: FlightCallOptions | None = None,
+    ) -> tuple[FlightStreamWriter, FlightStreamReader]: ...
+
+    def do_exchange(
+        self, descriptor: FlightDescriptor, options: FlightCallOptions | None = None
+    ) -> tuple[FlightStreamWriter, FlightStreamReader]: ...
+
+    def close(self) -> None: ...
+
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_value, traceback) -> None: ...
+
+class FlightDataStream(_Weakrefable): ...
+
+
+class RecordBatchStream(FlightDataStream):
+
+    def __init__(
+        self, data_source: RecordBatchReader | Table, options: IpcWriteOptions | None = None
+    ) -> None: ...
+
+
+class GeneratorStream(FlightDataStream):
+
+    def __init__(
+        self,
+        schema: Schema,
+        generator: Iterable[FlightDataStream | Table | RecordBatch | RecordBatchReader],
+        options: IpcWriteOptions | None = None,
+    ) -> None: ...
+
+
+class ServerCallContext(_Weakrefable):
+
+    def peer_identity(self) -> bytes: ...
+
+    def peer(self) -> str: ...
+
+        # Set safe=True as gRPC on Windows sometimes gives garbage bytes
+    def is_cancelled(self) -> bool: ...
+
+    def add_header(self, key: str, value: str) -> None: ...
+
+    def add_trailer(self, key: str, value: str) -> None: ...
+
+    def get_middleware(self, key: str) -> ServerMiddleware | None: ...
+
+
+class ServerAuthReader(_Weakrefable):
+
+    def read(self) -> str: ...
+
+class ServerAuthSender(_Weakrefable):
+
+    def write(self, message: str) -> None: ...
+
+class ClientAuthReader(_Weakrefable):
+
+    def read(self) -> str: ...
+
+class ClientAuthSender(_Weakrefable):
+
+    def write(self, message: str) -> None: ...
+
+class ServerAuthHandler(_Weakrefable):
+
+    def authenticate(self, outgoing: ServerAuthSender, incoming: ServerAuthReader): ...
+
+    def is_valid(self, token: str) -> bool: ...
+
+
+class ClientAuthHandler(_Weakrefable):
+
+    def authenticate(self, outgoing: ClientAuthSender, incoming: ClientAuthReader): ...
+
+    def get_token(self) -> str: ...
+
+
+class CallInfo(NamedTuple):
+
+
+    method: FlightMethod
+
+class ClientMiddlewareFactory(_Weakrefable):
+
+    def start_call(self, info: CallInfo) -> ClientMiddleware | None: ...
+
+
+class ClientMiddleware(_Weakrefable):
+
+
+    def sending_headers(self) -> dict[str, list[str] | list[bytes]]: ...
+
+
+    def received_headers(self, headers: dict[str, list[str] | list[bytes]]): ...
+
+
+    def call_completed(self, exception: ArrowException): ...
+
+
+class ServerMiddlewareFactory(_Weakrefable):
+
+
+    def start_call(
+        self, info: CallInfo, headers: dict[str, list[str] | list[bytes]]
+    ) -> ServerMiddleware | None: ...
+
+
+class TracingServerMiddlewareFactory(ServerMiddlewareFactory): ...
+
+
+class ServerMiddleware(_Weakrefable):
+
+
+    def sending_headers(self) -> dict[str, list[str] | list[bytes]]: ...
+
+    def call_completed(self, exception: ArrowException): ...
+
+
+class TracingServerMiddleware(ServerMiddleware):
+    trace_context: dict
+    def __init__(self, trace_context: dict) -> None: ...
+
+class _ServerMiddlewareFactoryWrapper(ServerMiddlewareFactory):
+
+
+    def __init__(self, factories: dict[str, ServerMiddlewareFactory]) -> None: ...
+    def start_call(  # type: ignore[override]
+        self, info: CallInfo, headers: dict[str, list[str] | list[bytes]]
+    ) -> _ServerMiddlewareFactoryWrapper | None: ...
+
+class _ServerMiddlewareWrapper(ServerMiddleware):
+    def __init__(self, middleware: dict[str, ServerMiddleware]) -> None: ...
+    def send_headers(self) -> dict[str, dict[str, list[str] | list[bytes]]]: ...
+    def call_completed(self, exception: ArrowException) -> None: ...
+
+class _FlightServerFinalizer(_Weakrefable):
+
+
+    def finalize(self) -> None: ...
+
+class FlightServerBase(_Weakrefable):
+
+    def __init__(
+        self,
+        location: str | tuple[str, int] | Location | None = None,
+        auth_handler: ServerAuthHandler | None = None,
+        tls_certificates: list[tuple[str, str]] | None = None,
+        verify_client: bool = False,
+        root_certificates: str | None = None,
+        middleware: dict[str, ServerMiddlewareFactory] | None = None,
+    ): ...
+    @property
+    def port(self) -> int: ...
+
+    def list_flights(self, context: ServerCallContext, criteria: str) -> Iterator[FlightInfo]: ...
+
+    def get_flight_info(
+        self, context: ServerCallContext, descriptor: FlightDescriptor
+    ) -> FlightInfo: ...
+
+    def get_schema(self, context: ServerCallContext, descriptor: FlightDescriptor) -> Schema: ...
+
+    def do_put(
+        self,
+        context: ServerCallContext,
+        descriptor: FlightDescriptor,
+        reader: MetadataRecordBatchReader,
+        writer: FlightMetadataWriter,
+    ) -> None: ...
+
+    def do_get(self, context: ServerCallContext, ticket: Ticket) -> FlightDataStream: ...
+
+    def do_exchange(
+        self,
+        context: ServerCallContext,
+        descriptor: FlightDescriptor,
+        reader: MetadataRecordBatchReader,
+        writer: MetadataRecordBatchWriter,
+    ) -> None: ...
+
+    def list_actions(self, context: ServerCallContext) -> Iterable[Action]: ...
+
+    def do_action(self, context: ServerCallContext, action: Action) -> Iterable[bytes]: ...
+
+    def serve(self) -> None: ...
+
+    def run(self) -> None: ...
+
+    def shutdown(self) -> None: ...
+
+    def wait(self) -> None: ...
+
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_value, traceback): ...
+
+def connect(
+    location: str | tuple[str, int] | Location,
+    *,
+    tls_root_certs: str | None = None,
+    cert_chain: str | None = None,
+    private_key: str | None = None,
+    override_hostname: str | None = None,
+    middleware: list[ClientMiddlewareFactory] | None = None,
+    write_size_limit_bytes: int | None = None,
+    disable_server_verification: bool = False,
+    generic_options: list[tuple[str, int | str]] | None = None,
+) -> FlightClient: ...
diff --git a/python/pyarrow-stubs/_fs.pyi b/python/pyarrow-stubs/_fs.pyi
new file mode 100644
index 00000000000..9ec5c543c58
--- /dev/null
+++ b/python/pyarrow-stubs/_fs.pyi
@@ -0,0 +1,216 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import datetime as dt
+import enum
+import sys
+
+from abc import ABC, abstractmethod
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+from typing import Union, overload
+
+from fsspec import AbstractFileSystem  # type: ignore
+
+from .lib import NativeFile, _Weakrefable
+
+
+class FileType(enum.IntFlag):
+    NotFound = enum.auto()
+    Unknown = enum.auto()
+    File = enum.auto()
+    Directory = enum.auto()
+
+
+class FileInfo(_Weakrefable):
+
+    def __init__(
+        self,
+        path: str,
+        type: FileType = FileType.Unknown,
+        *,
+        mtime: dt.datetime | float | None = None,
+        mtime_ns: int | None = None,
+        size: int | None = None,
+    ): ...
+    @property
+    def type(self) -> FileType: ...
+
+    @property
+    def is_file(self) -> bool: ...
+    @property
+    def path(self) -> str: ...
+
+    @property
+    def base_name(self) -> str: ...
+
+    @property
+    def size(self) -> int: ...
+
+    @property
+    def extension(self) -> str: ...
+
+    @property
+    def mtime(self) -> dt.datetime | None: ...
+
+    @property
+    def mtime_ns(self) -> int | None: ...
+
+
+class FileSelector(_Weakrefable):
+
+    base_dir: str
+    allow_not_found: bool
+    recursive: bool
+    def __init__(self, base_dir: str, allow_not_found: bool = False,
+                 recursive: bool = False): ...
+
+
+class FileSystem(_Weakrefable):
+
+    @classmethod
+    def from_uri(cls, uri: str) -> tuple[Self, str]: ...
+
+    def equals(self, other: FileSystem) -> bool: ...
+
+    @property
+    def type_name(self) -> str: ...
+
+    def get_file_info(self, paths_or_selector: str | FileSelector |
+                      list[str]) -> FileInfo | list[FileInfo]: ...
+
+    def create_dir(self, path: str, *, recursive: bool = True) -> None: ...
+
+    def delete_dir(self, path: str) -> None: ...
+
+    def delete_dir_contents(
+        self, path: str, *, accept_root_dir: bool = False, missing_dir_ok: bool = False
+    ) -> None: ...
+
+    def move(self, src: str, dest: str) -> None: ...
+
+    def copy_file(self, src: str, dest: str) -> None: ...
+
+    def delete_file(self, path: str) -> None: ...
+
+    def open_input_file(self, path: str) -> NativeFile: ...
+
+    def open_input_stream(
+        self, path: str, compression: str | None = "detect", buffer_size: int | None = None
+    ) -> NativeFile: ...
+
+    def open_output_stream(
+        self,
+        path: str,
+        compression: str | None = "detect",
+        buffer_size: int | None = None,
+        metadata: dict[str, str] | None = None,
+    ) -> NativeFile: ...
+
+    def open_append_stream(
+        self,
+        path: str,
+        compression: str | None = "detect",
+        buffer_size: int | None = None,
+        metadata: dict[str, str] | None = None,
+    ): ...
+
+    def normalize_path(self, path: str) -> str: ...
+
+
+class LocalFileSystem(FileSystem):
+
+    def __init__(self, *, use_mmap: bool = False) -> None: ...
+
+
+class SubTreeFileSystem(FileSystem):
+
+    def __init__(self, base_path: str, base_fs: FileSystem): ...
+    @property
+    def base_path(self) -> str: ...
+    @property
+    def base_fs(self) -> FileSystem: ...
+
+
+class _MockFileSystem(FileSystem):
+    def __init__(self, current_time: dt.datetime | None = None) -> None: ...
+
+
+class PyFileSystem(FileSystem):
+
+    def __init__(self, handler: FileSystemHandler) -> None: ...
+    @property
+    def handler(self) -> FileSystemHandler: ...
+
+
+class FileSystemHandler(ABC):
+
+    @abstractmethod
+    def get_type_name(self) -> str: ...
+
+    @abstractmethod
+    def get_file_info(self, paths: str | list[str]) -> FileInfo | list[FileInfo]: ...
+
+    @abstractmethod
+    def get_file_info_selector(self, selector: FileSelector) -> list[FileInfo]: ...
+
+    @abstractmethod
+    def create_dir(self, path: str, recursive: bool) -> None: ...
+
+    @abstractmethod
+    def delete_dir(self, path: str) -> None: ...
+
+    @abstractmethod
+    def delete_dir_contents(self, path: str, missing_dir_ok: bool = False) -> None: ...
+
+    @abstractmethod
+    def delete_root_dir_contents(self) -> None: ...
+
+    @abstractmethod
+    def delete_file(self, path: str) -> None: ...
+
+    @abstractmethod
+    def move(self, src: str, dest: str) -> None: ...
+
+    @abstractmethod
+    def copy_file(self, src: str, dest: str) -> None: ...
+
+    @abstractmethod
+    def open_input_stream(self, path: str) -> NativeFile: ...
+
+    @abstractmethod
+    def open_input_file(self, path: str) -> NativeFile: ...
+
+    @abstractmethod
+    def open_output_stream(self, path: str, metadata: dict[str, str]) -> NativeFile: ...
+
+    @abstractmethod
+    def open_append_stream(self, path: str, metadata: dict[str, str]) -> NativeFile: ...
+
+    @abstractmethod
+    def normalize_path(self, path: str) -> str: ...
+
+
+SupportedFileSystem: TypeAlias = Union[AbstractFileSystem, FileSystem]
diff --git a/python/pyarrow-stubs/_gcsfs.pyi b/python/pyarrow-stubs/_gcsfs.pyi
new file mode 100644
index 00000000000..631c7ae4932
--- /dev/null
+++ b/python/pyarrow-stubs/_gcsfs.pyi
@@ -0,0 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import datetime as dt
+
+from ._fs import FileSystem
+from .lib import KeyValueMetadata
+
+
+class GcsFileSystem(FileSystem):
+
+    def __init__(
+        self,
+        *,
+        anonymous: bool = False,
+        access_token: str | None = None,
+        target_service_account: str | None = None,
+        credential_token_expiration: dt.datetime | None = None,
+        default_bucket_location: str = "US",
+        scheme: str = "https",
+        endpoint_override: str | None = None,
+        default_metadata: dict | KeyValueMetadata | None = None,
+        retry_time_limit: dt.timedelta | None = None,
+        project_id: str | None = None,
+    ): ...
+    @property
+    def default_bucket_location(self) -> str: ...
+
+    @property
+    def project_id(self) -> str: ...
diff --git a/python/pyarrow-stubs/_hdfs.pyi b/python/pyarrow-stubs/_hdfs.pyi
new file mode 100644
index 00000000000..ee1253d64b6
--- /dev/null
+++ b/python/pyarrow-stubs/_hdfs.pyi
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from _typeshed import StrPath
+
+from ._fs import FileSystem
+
+
+class HadoopFileSystem(FileSystem):
+
+    def __init__(
+        self,
+        host: str,
+        port: int = 8020,
+        *,
+        user: str | None = None,
+        replication: int = 3,
+        buffer_size: int = 0,
+        default_block_size: int | None = None,
+        kerb_ticket: StrPath | None = None,
+        extra_conf: dict | None = None,
+    ): ...
+    @staticmethod
+    def from_uri(uri: str) -> HadoopFileSystem: ...  # type: ignore[override]
diff --git a/python/pyarrow-stubs/_ipc.pyi b/python/pyarrow-stubs/_ipc.pyi
new file mode 100644
index 00000000000..6e83541bf5c
--- /dev/null
+++ b/python/pyarrow-stubs/_ipc.pyi
@@ -0,0 +1,301 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import enum
+import sys
+
+from io import IOBase
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import Iterable, Iterator, Literal, Mapping, NamedTuple
+
+import pandas as pd
+
+from pyarrow._stubs_typing import SupportArrowStream, SupportPyBuffer
+from pyarrow.lib import MemoryPool, RecordBatch, Schema, Table, Tensor, _Weakrefable
+
+from .io import Buffer, Codec, NativeFile
+from ._types import DictionaryMemo, KeyValueMetadata
+
+
+class MetadataVersion(enum.IntEnum):
+    ...
+    V1 = enum.auto()
+    V2 = enum.auto()
+    V3 = enum.auto()
+    V4 = enum.auto()
+    V5 = enum.auto()
+
+
+class WriteStats(NamedTuple):
+    ...
+    num_messages: int
+    num_record_batches: int
+    num_dictionary_batches: int
+    num_dictionary_deltas: int
+    num_replaced_dictionaries: int
+
+
+class ReadStats(NamedTuple):
+    ...
+    num_messages: int
+    num_record_batches: int
+    num_dictionary_batches: int
+    num_dictionary_deltas: int
+    num_replaced_dictionaries: int
+
+
+class IpcReadOptions(_Weakrefable):
+    ...
+    ensure_native_endian: bool
+    use_threads: bool
+    included_fields: list[int]
+
+    def __init__(
+        self,
+        *,
+        ensure_native_endian: bool = True,
+        use_threads: bool = True,
+        included_fields: list[int] | None = None,
+    ) -> None: ...
+
+
+class IpcWriteOptions(_Weakrefable):
+    ...
+    metadata_version: MetadataVersion
+    allow_64bit: bool
+    use_legacy_format: bool
+    compression: Codec | Literal["lz4", "zstd"] | None
+    use_threads: bool
+    emit_dictionary_deltas: bool
+    unify_dictionaries: bool
+
+    def __init__(
+        self,
+        *,
+        metadata_version: MetadataVersion = MetadataVersion.V5,
+        allow_64bit: bool = False,
+        use_legacy_format: bool = False,
+        compression: Codec | Literal["lz4", "zstd"] | None = None,
+        use_threads: bool = True,
+        emit_dictionary_deltas: bool = False,
+        unify_dictionaries: bool = False,
+    ) -> None: ...
+
+
+class Message(_Weakrefable):
+    ...
+    @property
+    def type(self) -> str: ...
+    @property
+    def metadata(self) -> Buffer: ...
+    @property
+    def metadata_version(self) -> MetadataVersion: ...
+    @property
+    def body(self) -> Buffer | None: ...
+    def equals(self, other: Message) -> bool: ...
+
+    def serialize_to(
+        self, sink: NativeFile, alignment: int = 8, memory_pool: MemoryPool | None = None
+    ): ...
+
+    def serialize(self, alignment: int = 8, memory_pool: MemoryPool |
+                  None = None) -> Buffer: ...
+
+
+class MessageReader(_Weakrefable):
+    ...
+    @classmethod
+    def open_stream(cls, source: bytes | NativeFile |
+                    IOBase | SupportPyBuffer) -> Self: ...
+
+    def __iter__(self) -> Self: ...
+    def read_next_message(self) -> Message: ...
+
+    __next__ = read_next_message
+
+# ----------------------------------------------------------------------
+# File and stream readers and writers
+
+
+class _CRecordBatchWriter(_Weakrefable):
+    ...
+    def write(self, table_or_batch: Table | RecordBatch): ...
+
+    def write_batch(
+        self,
+        batch: RecordBatch,
+        custom_metadata: Mapping[bytes, bytes] | KeyValueMetadata | None = None,
+    ): ...
+
+    def write_table(self, table: Table, max_chunksize: int | None = None) -> None: ...
+
+    def close(self) -> None: ...
+
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_val, exc_tb): ...
+    @property
+    def stats(self) -> WriteStats: ...
+
+
+class _RecordBatchStreamWriter(_CRecordBatchWriter):
+    ...
+    @property
+    def _use_legacy_format(self) -> bool: ...
+    @property
+    def _metadata_version(self) -> MetadataVersion: ...
+    def _open(self, sink, schema: Schema,
+              options: IpcWriteOptions = IpcWriteOptions()): ...
+
+
+class _ReadPandasMixin:
+    ...
+    def read_pandas(self, **options) -> pd.DataFrame: ...
+
+
+class RecordBatchReader(_Weakrefable):
+    ...
+    def __iter__(self) -> Self: ...
+    def read_next_batch(self) -> RecordBatch: ...
+
+    __next__ = read_next_batch
+    @property
+    def schema(self) -> Schema: ...
+
+    def read_next_batch_with_custom_metadata(self) -> RecordBatchWithMetadata: ...
+
+    def iter_batches_with_custom_metadata(
+        self,
+    ) -> Iterator[RecordBatchWithMetadata]: ...
+
+    def read_all(self) -> Table: ...
+
+    # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
+    read_pandas = _ReadPandasMixin.read_pandas
+    def close(self) -> None: ...
+
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_val, exc_tb): ...
+    def cast(self, target_schema: Schema) -> Self: ...
+
+    def _export_to_c(self, out_ptr: int) -> None: ...
+
+    @classmethod
+    def _import_from_c(cls, in_ptr: int) -> Self: ...
+
+    def __arrow_c_stream__(self, requested_schema=None): ...
+
+    @classmethod
+    def _import_from_c_capsule(cls, stream) -> Self: ...
+
+    @classmethod
+    def from_stream(cls, data: SupportArrowStream,
+                    schema: Schema | None = None) -> Self: ...
+
+    @classmethod
+    def from_batches(cls, schema: Schema, batches: Iterable[RecordBatch]) -> Self: ...
+
+
+class _RecordBatchStreamReader(RecordBatchReader):
+    ...
+    @property
+    def stats(self) -> ReadStats: ...
+
+
+class _RecordBatchFileWriter(_RecordBatchStreamWriter):
+    ...
+
+
+class RecordBatchWithMetadata(NamedTuple):
+    ...
+    batch: RecordBatch
+    custom_metadata: KeyValueMetadata
+
+
+class _RecordBatchFileReader(_Weakrefable):
+    ...
+    @property
+    def num_record_batches(self) -> int: ...
+
+    def get_batch(self, i: int) -> RecordBatch: ...
+
+    get_record_batch = get_batch
+    def get_batch_with_custom_metadata(self, i: int) -> RecordBatchWithMetadata: ...
+
+    def read_all(self) -> Table: ...
+
+    # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
+    read_pandas = _ReadPandasMixin.read_pandas
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_val, exc_tb): ...
+    @property
+    def schema(self) -> Schema: ...
+    @property
+    def stats(self) -> ReadStats: ...
+
+
+def get_tensor_size(tensor: Tensor) -> int: ...
+
+
+def get_record_batch_size(batch: RecordBatch) -> int: ...
+
+
+def write_tensor(tensor: Tensor, dest: NativeFile) -> int: ...
+
+
+def read_tensor(source: NativeFile) -> Tensor: ...
+
+
+def read_message(source: NativeFile | IOBase | SupportPyBuffer) -> Message: ...
+
+
+def read_schema(obj: Buffer | Message, dictionary_memo: DictionaryMemo |
+                None = None) -> Schema: ...
+
+
+def read_record_batch(
+    obj: Message | SupportPyBuffer, schema: Schema, dictionary_memo: DictionaryMemo | None = None
+) -> RecordBatch: ...
+
+
+__all__ = [
+    "MetadataVersion",
+    "WriteStats",
+    "ReadStats",
+    "IpcReadOptions",
+    "IpcWriteOptions",
+    "Message",
+    "MessageReader",
+    "_CRecordBatchWriter",
+    "_RecordBatchStreamWriter",
+    "_ReadPandasMixin",
+    "RecordBatchReader",
+    "_RecordBatchStreamReader",
+    "_RecordBatchFileWriter",
+    "RecordBatchWithMetadata",
+    "_RecordBatchFileReader",
+    "get_tensor_size",
+    "get_record_batch_size",
+    "write_tensor",
+    "read_tensor",
+    "read_message",
+    "read_schema",
+    "read_record_batch",
+]
diff --git a/python/pyarrow-stubs/_json.pyi b/python/pyarrow-stubs/_json.pyi
new file mode 100644
index 00000000000..b52be2bf028
--- /dev/null
+++ b/python/pyarrow-stubs/_json.pyi
@@ -0,0 +1,70 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import IO, Any, Literal
+
+from _typeshed import StrPath
+
+from .lib import MemoryPool, RecordBatchReader, Schema, Table, _Weakrefable
+
+class ReadOptions(_Weakrefable):
+
+
+    use_threads: bool
+
+    block_size: int
+
+    def __init__(self, use_threads: bool | None = None, block_size: int | None = None): ...
+    def equals(self, other: ReadOptions) -> bool: ...
+
+
+class ParseOptions(_Weakrefable):
+
+
+    explicit_schema: Schema
+
+    newlines_in_values: bool
+
+    unexpected_field_behavior: Literal["ignore", "error", "infer"]
+
+    def __init__(
+        self,
+        explicit_schema: Schema | None = None,
+        newlines_in_values: bool | None = None,
+        unexpected_field_behavior: Literal["ignore", "error", "infer"] = "infer",
+    ): ...
+    def equals(self, other: ParseOptions) -> bool: ...
+
+
+class JSONStreamingReader(RecordBatchReader): ...
+
+
+def read_json(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Table: ...
+
+
+def open_json(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> JSONStreamingReader: ...
+
diff --git a/python/pyarrow-stubs/_orc.pyi b/python/pyarrow-stubs/_orc.pyi
new file mode 100644
index 00000000000..7587cc121c3
--- /dev/null
+++ b/python/pyarrow-stubs/_orc.pyi
@@ -0,0 +1,73 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import IO, Literal
+
+from .lib import (
+    Buffer,
+    KeyValueMetadata,
+    MemoryPool,
+    NativeFile,
+    RecordBatch,
+    Schema,
+    Table,
+    _Weakrefable,
+)
+
+class ORCReader(_Weakrefable):
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def open(self, source: str | NativeFile | Buffer, use_memory_map: bool = True): ...
+    def metadata(self) -> KeyValueMetadata: ...
+    def schema(self) -> Schema: ...
+    def nrows(self) -> int: ...
+    def nstripes(self) -> int: ...
+    def file_version(self) -> str: ...
+    def software_version(self) -> str: ...
+    def compression(self) -> Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"]: ...
+    def compression_size(self) -> int: ...
+    def row_index_stride(self) -> int: ...
+    def writer(self) -> str: ...
+    def writer_version(self) -> str: ...
+    def nstripe_statistics(self) -> int: ...
+    def content_length(self) -> int: ...
+    def stripe_statistics_length(self) -> int: ...
+    def file_footer_length(self) -> int: ...
+    def file_postscript_length(self) -> int: ...
+    def file_length(self) -> int: ...
+    def serialized_file_tail(self) -> int: ...
+    def read_stripe(self, n: int, columns: list[str] | None = None) -> RecordBatch: ...
+    def read(self, columns: list[str] | None = None) -> Table: ...
+
+class ORCWriter(_Weakrefable):
+    def open(
+        self,
+        where: str | NativeFile | IO,
+        *,
+        file_version: str | None = None,
+        batch_size: int | None = None,
+        stripe_size: int | None = None,
+        compression: Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"] | None = None,
+        compression_block_size: int | None = None,
+        compression_strategy: Literal["COMPRESSION", "SPEED"] | None = None,
+        row_index_stride: int | None = None,
+        padding_tolerance: float | None = None,
+        dictionary_key_size_threshold: float | None = None,
+        bloom_filter_columns: list[int] | None = None,
+        bloom_filter_fpp: float | None = None,
+    ) -> None: ...
+    def write(self, table: Table) -> None: ...
+    def close(self) -> None: ...
diff --git a/python/pyarrow-stubs/_parquet.pyi b/python/pyarrow-stubs/_parquet.pyi
new file mode 100644
index 00000000000..35ee2b41fde
--- /dev/null
+++ b/python/pyarrow-stubs/_parquet.pyi
@@ -0,0 +1,492 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import IO, Any, Iterable, Iterator, Literal, Sequence, TypeAlias, TypedDict
+
+from _typeshed import StrPath
+
+from ._stubs_typing import Order
+from .lib import (
+    Buffer,
+    ChunkedArray,
+    KeyValueMetadata,
+    MemoryPool,
+    NativeFile,
+    RecordBatch,
+    Schema,
+    Table,
+    _Weakrefable,
+)
+
+_PhysicalType: TypeAlias = Literal[
+    "BOOLEAN",
+    "INT32",
+    "INT64",
+    "INT96",
+    "FLOAT",
+    "DOUBLE",
+    "BYTE_ARRAY",
+    "FIXED_LEN_BYTE_ARRAY",
+    "UNKNOWN",
+]
+_LogicTypeName: TypeAlias = Literal[
+    "UNDEFINED",
+    "STRING",
+    "MAP",
+    "LIST",
+    "ENUM",
+    "DECIMAL",
+    "DATE",
+    "TIME",
+    "TIMESTAMP",
+    "INT",
+    "FLOAT16",
+    "JSON",
+    "BSON",
+    "UUID",
+    "NONE",
+    "UNKNOWN",
+]
+_ConvertedType: TypeAlias = Literal[
+    "NONE",
+    "UTF8",
+    "MAP",
+    "MAP_KEY_VALUE",
+    "LIST",
+    "ENUM",
+    "DECIMAL",
+    "DATE",
+    "TIME_MILLIS",
+    "TIME_MICROS",
+    "TIMESTAMP_MILLIS",
+    "TIMESTAMP_MICROS",
+    "UINT_8",
+    "UINT_16",
+    "UINT_32",
+    "UINT_64",
+    "INT_8",
+    "INT_16",
+    "INT_32",
+    "INT_64",
+    "JSON",
+    "BSON",
+    "INTERVAL",
+    "UNKNOWN",
+]
+_Encoding: TypeAlias = Literal[
+    "PLAIN",
+    "PLAIN_DICTIONARY",
+    "RLE",
+    "BIT_PACKED",
+    "DELTA_BINARY_PACKED",
+    "DELTA_LENGTH_BYTE_ARRAY",
+    "DELTA_BYTE_ARRAY",
+    "RLE_DICTIONARY",
+    "BYTE_STREAM_SPLIT",
+    "UNKNOWN",
+]
+_Compression: TypeAlias = Literal[
+    "UNCOMPRESSED",
+    "SNAPPY",
+    "GZIP",
+    "LZO",
+    "BROTLI",
+    "LZ4",
+    "ZSTD",
+    "UNKNOWN",
+]
+
+
+class _Statistics(TypedDict):
+    has_min_max: bool
+    min: Any | None
+    max: Any | None
+    null_count: int | None
+    distinct_count: int | None
+    num_values: int
+    physical_type: _PhysicalType
+
+
+class Statistics(_Weakrefable):
+    def to_dict(self) -> _Statistics: ...
+    def equals(self, other: Statistics) -> bool: ...
+    @property
+    def has_min_max(self) -> bool: ...
+    @property
+    def has_null_count(self) -> bool: ...
+    @property
+    def has_distinct_count(self) -> bool: ...
+    @property
+    def min_raw(self) -> Any | None: ...
+    @property
+    def max_raw(self) -> Any | None: ...
+    @property
+    def min(self) -> Any | None: ...
+    @property
+    def max(self) -> Any | None: ...
+    @property
+    def null_count(self) -> int | None: ...
+    @property
+    def distinct_count(self) -> int | None: ...
+    @property
+    def num_values(self) -> int: ...
+    @property
+    def physical_type(self) -> _PhysicalType: ...
+    @property
+    def logical_type(self) -> ParquetLogicalType: ...
+    @property
+    def converted_type(self) -> _ConvertedType | None: ...
+
+
+class ParquetLogicalType(_Weakrefable):
+    def to_json(self) -> str: ...
+    @property
+    def type(self) -> _LogicTypeName: ...
+
+
+class _ColumnChunkMetaData(TypedDict):
+    file_offset: int
+    file_path: str | None
+    physical_type: _PhysicalType
+    num_values: int
+    path_in_schema: str
+    is_stats_set: bool
+    statistics: Statistics | None
+    compression: _Compression
+    encodings: tuple[_Encoding, ...]
+    has_dictionary_page: bool
+    dictionary_page_offset: int | None
+    data_page_offset: int
+    total_compressed_size: int
+    total_uncompressed_size: int
+
+
+class ColumnChunkMetaData(_Weakrefable):
+    def to_dict(self) -> _ColumnChunkMetaData: ...
+    def equals(self, other: ColumnChunkMetaData) -> bool: ...
+    @property
+    def file_offset(self) -> int: ...
+    @property
+    def file_path(self) -> str | None: ...
+    @property
+    def physical_type(self) -> _PhysicalType: ...
+    @property
+    def num_values(self) -> int: ...
+    @property
+    def path_in_schema(self) -> str: ...
+    @property
+    def is_stats_set(self) -> bool: ...
+    @property
+    def statistics(self) -> Statistics | None: ...
+    @property
+    def compression(self) -> _Compression: ...
+    @property
+    def encodings(self) -> tuple[_Encoding, ...]: ...
+    @property
+    def has_dictionary_page(self) -> bool: ...
+    @property
+    def dictionary_page_offset(self) -> int | None: ...
+    @property
+    def data_page_offset(self) -> int: ...
+    @property
+    def has_index_page(self) -> bool: ...
+    @property
+    def index_page_offset(self) -> int: ...
+    @property
+    def total_compressed_size(self) -> int: ...
+    @property
+    def total_uncompressed_size(self) -> int: ...
+    @property
+    def has_offset_index(self) -> bool: ...
+    @property
+    def has_column_index(self) -> bool: ...
+    @property
+    def metadata(self) -> dict[bytes, bytes] | None: ...
+
+
+class _SortingColumn(TypedDict):
+    column_index: int
+    descending: bool
+    nulls_first: bool
+
+
+class SortingColumn:
+    def __init__(
+        self, column_index: int, descending: bool = False, nulls_first: bool = False
+    ) -> None: ...
+
+    @classmethod
+    def from_ordering(
+        cls,
+        schema: Schema,
+        sort_keys: Sequence[tuple[str, Order]],
+        null_placement: Literal["at_start", "at_end"] = "at_end",
+    ) -> tuple[SortingColumn, ...]: ...
+
+    @staticmethod
+    def to_ordering(
+        schema: Schema, sorting_columns: tuple[SortingColumn, ...]
+    ) -> tuple[Sequence[tuple[str, Order]], Literal["at_start", "at_end"]]: ...
+    def __hash__(self) -> int: ...
+    @property
+    def column_index(self) -> int: ...
+    @property
+    def descending(self) -> bool: ...
+    @property
+    def nulls_first(self) -> bool: ...
+    def to_dict(self) -> _SortingColumn: ...
+
+
+class _RowGroupMetaData(TypedDict):
+    num_columns: int
+    num_rows: int
+    total_byte_size: int
+    columns: list[ColumnChunkMetaData]
+    sorting_columns: list[SortingColumn]
+
+
+class RowGroupMetaData(_Weakrefable):
+    def __init__(self, parent: FileMetaData, index: int) -> None: ...
+    def equals(self, other: RowGroupMetaData) -> bool: ...
+    def column(self, i: int) -> ColumnChunkMetaData: ...
+    def to_dict(self) -> _RowGroupMetaData: ...
+    @property
+    def num_columns(self) -> int: ...
+    @property
+    def num_rows(self) -> int: ...
+    @property
+    def total_byte_size(self) -> int: ...
+    @property
+    def sorting_columns(self) -> list[SortingColumn]: ...
+
+
+class _FileMetaData(TypedDict):
+    created_by: str
+    num_columns: int
+    num_rows: int
+    num_row_groups: int
+    format_version: str
+    serialized_size: int
+
+
+class FileMetaData(_Weakrefable):
+    def __hash__(self) -> int: ...
+    def to_dict(self) -> _FileMetaData: ...
+    def equals(self, other: FileMetaData) -> bool: ...
+    @property
+    def schema(self) -> ParquetSchema: ...
+    @property
+    def serialized_size(self) -> int: ...
+    @property
+    def num_columns(self) -> int: ...
+    @property
+    def num_rows(self) -> int: ...
+    @property
+    def num_row_groups(self) -> int: ...
+    @property
+    def format_version(self) -> str: ...
+    @property
+    def created_by(self) -> str: ...
+    @property
+    def metadata(self) -> dict[bytes, bytes] | None: ...
+    def row_group(self, i: int) -> RowGroupMetaData: ...
+    def set_file_path(self, path: str) -> None: ...
+    def append_row_groups(self, other: FileMetaData) -> None: ...
+    def write_metadata_file(self, where: StrPath | Buffer |
+                            NativeFile | IO) -> None: ...
+
+
+class ParquetSchema(_Weakrefable):
+    def __init__(self, container: FileMetaData) -> None: ...
+    def __getitem__(self, i: int) -> ColumnChunkMetaData: ...
+    def __hash__(self) -> int: ...
+    def __len__(self) -> int: ...
+    @property
+    def names(self) -> list[str]: ...
+    def to_arrow_schema(self) -> Schema: ...
+    def equals(self, other: ParquetSchema) -> bool: ...
+    def column(self, i: int) -> ColumnSchema: ...
+
+
+class ColumnSchema(_Weakrefable):
+    def __init__(self, schema: ParquetSchema, index: int) -> None: ...
+    def equals(self, other: ColumnSchema) -> bool: ...
+    @property
+    def name(self) -> str: ...
+    @property
+    def path(self) -> str: ...
+    @property
+    def max_definition_level(self) -> int: ...
+    @property
+    def max_repetition_level(self) -> int: ...
+    @property
+    def physical_type(self) -> _PhysicalType: ...
+    @property
+    def logical_type(self) -> ParquetLogicalType: ...
+    @property
+    def converted_type(self) -> _ConvertedType | None: ...
+    @property
+    def length(self) -> int | None: ...
+    @property
+    def precision(self) -> int | None: ...
+    @property
+    def scale(self) -> int | None: ...
+
+
+class ParquetReader(_Weakrefable):
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+
+    def open(
+        self,
+        source: StrPath | NativeFile | IO,
+        *,
+        use_memory_map: bool = False,
+        read_dictionary: Iterable[int] | Iterable[str] | None = None,
+        metadata: FileMetaData | None = None,
+        buffer_size: int = 0,
+        pre_buffer: bool = False,
+        coerce_int96_timestamp_unit: str | None = None,
+        decryption_properties: FileDecryptionProperties | None = None,
+        thrift_string_size_limit: int | None = None,
+        thrift_container_size_limit: int | None = None,
+        page_checksum_verification: bool = False,
+    ): ...
+    @property
+    def column_paths(self) -> list[str]: ...
+    @property
+    def metadata(self) -> FileMetaData: ...
+    @property
+    def schema_arrow(self) -> Schema: ...
+    @property
+    def num_row_groups(self) -> int: ...
+    def set_use_threads(self, use_threads: bool) -> None: ...
+    def set_batch_size(self, batch_size: int) -> None: ...
+
+    def iter_batches(
+        self,
+        batch_size: int,
+        row_groups: list[int],
+        column_indices: list[int] | None = None,
+        use_threads: bool = True,
+    ) -> Iterator[RecordBatch]: ...
+
+    def read_row_group(
+        self, i: int, column_indices: list[int] | None = None, use_threads: bool = True
+    ) -> Table: ...
+
+    def read_row_groups(
+        self,
+        row_groups: list[int],
+        column_indices: list[int] | None = None,
+        use_threads: bool = True,
+    ) -> Table: ...
+
+    def read_all(
+        self, column_indices: list[int] | None = None, use_threads: bool = True
+    ) -> Table: ...
+    def scan_contents(
+        self, column_indices: list[int] | None = None, batch_size: int = 65536): ...
+
+    def column_name_idx(self, column_name: str) -> int: ...
+    def read_column(self, column_index: int) -> ChunkedArray: ...
+    def close(self) -> None: ...
+    @property
+    def closed(self) -> bool: ...
+
+
+class ParquetWriter(_Weakrefable):
+    def __init__(
+        self,
+        where: StrPath | NativeFile | IO,
+        schema: Schema,
+        use_dictionary: bool | list[str] | None = None,
+        compression: _Compression | dict[str, _Compression] | None = None,
+        version: str | None = None,
+        write_statistics: bool | list[str] | None = None,
+        memory_pool: MemoryPool | None = None,
+        use_deprecated_int96_timestamps: bool = False,
+        coerce_timestamps: Literal["ms", "us"] | None = None,
+        data_page_size: int | None = None,
+        allow_truncated_timestamps: bool = False,
+        compression_level: int | dict[str, int] | None = None,
+        use_byte_stream_split: bool | list[str] = False,
+        column_encoding: _Encoding | dict[str, _Encoding] | None = None,
+        writer_engine_version: str | None = None,
+        data_page_version: str | None = None,
+        use_compliant_nested_type: bool = True,
+        encryption_properties: FileDecryptionProperties | None = None,
+        write_batch_size: int | None = None,
+        dictionary_pagesize_limit: int | None = None,
+        store_schema: bool = True,
+        write_page_index: bool = False,
+        write_page_checksum: bool = False,
+        sorting_columns: tuple[SortingColumn, ...] | None = None,
+        store_decimal_as_integer: bool = False,
+    ): ...
+    def close(self) -> None: ...
+    def write_table(self, table: Table, row_group_size: int | None = None) -> None: ...
+    def add_key_value_metadata(self, key_value_metadata: KeyValueMetadata) -> None: ...
+    @property
+    def metadata(self) -> FileMetaData: ...
+    @property
+    def use_dictionary(self) -> bool | list[str] | None: ...
+    @property
+    def use_deprecated_int96_timestamps(self) -> bool: ...
+    @property
+    def use_byte_stream_split(self) -> bool | list[str]: ...
+    @property
+    def column_encoding(self) -> _Encoding | dict[str, _Encoding] | None: ...
+    @property
+    def coerce_timestamps(self) -> Literal["ms", "us"] | None: ...
+    @property
+    def allow_truncated_timestamps(self) -> bool: ...
+    @property
+    def compression(self) -> _Compression | dict[str, _Compression] | None: ...
+    @property
+    def compression_level(self) -> int | dict[str, int] | None: ...
+    @property
+    def data_page_version(self) -> str | None: ...
+    @property
+    def use_compliant_nested_type(self) -> bool: ...
+    @property
+    def version(self) -> str | None: ...
+    @property
+    def write_statistics(self) -> bool | list[str] | None: ...
+    @property
+    def writer_engine_version(self) -> str: ...
+    @property
+    def row_group_size(self) -> int: ...
+    @property
+    def data_page_size(self) -> int: ...
+    @property
+    def encryption_properties(self) -> FileDecryptionProperties: ...
+    @property
+    def write_batch_size(self) -> int: ...
+    @property
+    def dictionary_pagesize_limit(self) -> int: ...
+    @property
+    def store_schema(self) -> bool: ...
+    @property
+    def store_decimal_as_integer(self) -> bool: ...
+
+
+class FileEncryptionProperties:
+    ...
+
+
+class FileDecryptionProperties:
+    ...
diff --git a/python/pyarrow-stubs/_parquet_encryption.pyi b/python/pyarrow-stubs/_parquet_encryption.pyi
new file mode 100644
index 00000000000..cf09b6ee39c
--- /dev/null
+++ b/python/pyarrow-stubs/_parquet_encryption.pyi
@@ -0,0 +1,93 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import datetime as dt
+
+from typing import Callable
+
+from ._parquet import FileDecryptionProperties, FileEncryptionProperties
+from .lib import _Weakrefable
+
+
+class EncryptionConfiguration(_Weakrefable):
+    footer_key: str
+    column_keys: dict[str, list[str]]
+    encryption_algorithm: str
+    plaintext_footer: bool
+    double_wrapping: bool
+    cache_lifetime: dt.timedelta
+    internal_key_material: bool
+    data_key_length_bits: int
+
+    def __init__(
+        self,
+        footer_key: str,
+        *,
+        column_keys: dict[str, str | list[str]] | None = None,
+        encryption_algorithm: str | None = None,
+        plaintext_footer: bool | None = None,
+        double_wrapping: bool | None = None,
+        cache_lifetime: dt.timedelta | None = None,
+        internal_key_material: bool | None = None,
+        data_key_length_bits: int | None = None,
+    ) -> None: ...
+
+
+class DecryptionConfiguration(_Weakrefable):
+    cache_lifetime: dt.timedelta
+    def __init__(self, *, cache_lifetime: dt.timedelta | None = None): ...
+
+
+class KmsConnectionConfig(_Weakrefable):
+    kms_instance_id: str
+    kms_instance_url: str
+    key_access_token: str
+    custom_kms_conf: dict[str, str]
+
+    def __init__(
+        self,
+        *,
+        kms_instance_id: str | None = None,
+        kms_instance_url: str | None = None,
+        key_access_token: str | None = None,
+        custom_kms_conf: dict[str, str] | None = None,
+    ) -> None: ...
+    def refresh_key_access_token(self, value: str) -> None: ...
+
+
+class KmsClient(_Weakrefable):
+    def wrap_key(self, key_bytes: bytes, master_key_identifier: str) -> str: ...
+    def unwrap_key(self, wrapped_key: str, master_key_identifier: str) -> str: ...
+
+
+class CryptoFactory(_Weakrefable):
+    def __init__(self, kms_client_factory: Callable[[
+                 KmsConnectionConfig], KmsClient]): ...
+
+    def file_encryption_properties(
+        self,
+        kms_connection_config: KmsConnectionConfig,
+        encryption_config: EncryptionConfiguration,
+    ) -> FileEncryptionProperties: ...
+
+    def file_decryption_properties(
+        self,
+        kms_connection_config: KmsConnectionConfig,
+        decryption_config: DecryptionConfiguration | None = None,
+    ) -> FileDecryptionProperties: ...
+    def remove_cache_entries_for_token(self, access_token: str) -> None: ...
+    def remove_cache_entries_for_all_tokens(self) -> None: ...
diff --git a/python/pyarrow-stubs/_s3fs.pyi b/python/pyarrow-stubs/_s3fs.pyi
new file mode 100644
index 00000000000..f065d78f993
--- /dev/null
+++ b/python/pyarrow-stubs/_s3fs.pyi
@@ -0,0 +1,103 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import enum
+
+from typing import Literal, TypedDict
+from typing_extensions import Required, NotRequired
+
+from ._fs import FileSystem
+from .lib import KeyValueMetadata
+
+
+class _ProxyOptions(TypedDict):
+    schema: Required[Literal["http", "https"]]
+    host: Required[str]
+    port: Required[int]
+    username: NotRequired[str]
+    password: NotRequired[str]
+
+
+class S3LogLevel(enum.IntEnum):
+    Off = enum.auto()
+    Fatal = enum.auto()
+    Error = enum.auto()
+    Warn = enum.auto()
+    Info = enum.auto()
+    Debug = enum.auto()
+    Trace = enum.auto()
+
+
+Off = S3LogLevel.Off
+Fatal = S3LogLevel.Fatal
+Error = S3LogLevel.Error
+Warn = S3LogLevel.Warn
+Info = S3LogLevel.Info
+Debug = S3LogLevel.Debug
+Trace = S3LogLevel.Trace
+
+
+def initialize_s3(
+    log_level: S3LogLevel = S3LogLevel.Fatal, num_event_loop_threads: int = 1
+) -> None: ...
+def ensure_s3_initialized() -> None: ...
+def finalize_s3() -> None: ...
+def ensure_s3_finalized() -> None: ...
+def resolve_s3_region(bucket: str) -> str: ...
+
+
+class S3RetryStrategy:
+    max_attempts: int
+    def __init__(self, max_attempts=3) -> None: ...
+
+
+class AwsStandardS3RetryStrategy(S3RetryStrategy):
+    ...
+
+
+class AwsDefaultS3RetryStrategy(S3RetryStrategy):
+    ...
+
+
+class S3FileSystem(FileSystem):
+    def __init__(
+        self,
+        *,
+        access_key: str | None = None,
+        secret_key: str | None = None,
+        session_token: str | None = None,
+        anonymous: bool = False,
+        region: str | None = None,
+        request_timeout: float | None = None,
+        connect_timeout: float | None = None,
+        scheme: Literal["http", "https"] = "https",
+        endpoint_override: str | None = None,
+        background_writes: bool = True,
+        default_metadata: dict | KeyValueMetadata | None = None,
+        role_arn: str | None = None,
+        session_name: str | None = None,
+        external_id: str | None = None,
+        load_frequency: int = 900,
+        proxy_options: _ProxyOptions | str | None = None,
+        allow_bucket_creation: bool = False,
+        allow_bucket_deletion: bool = False,
+        check_directory_existence_before_creation: bool = False,
+        retry_strategy: S3RetryStrategy = AwsStandardS3RetryStrategy(max_attempts=3),
+        force_virtual_addressing: bool = False,
+    ): ...
+    @property
+    def region(self) -> str: ...
diff --git a/python/pyarrow-stubs/_stubs_typing.pyi b/python/pyarrow-stubs/_stubs_typing.pyi
new file mode 100644
index 00000000000..56aa7fd1123
--- /dev/null
+++ b/python/pyarrow-stubs/_stubs_typing.pyi
@@ -0,0 +1,117 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import datetime as dt
+
+from collections.abc import Sequence
+from decimal import Decimal
+from typing import Any, Collection, Literal, Protocol, TypeAlias, TypeVar, Iterator
+
+import numpy as np
+
+from numpy.typing import NDArray
+
+from pyarrow.lib import BooleanArray, IntegerArray
+
+ArrayLike: TypeAlias = Any
+ScalarLike: TypeAlias = Any
+Order: TypeAlias = Literal["ascending", "descending"]
+JoinType: TypeAlias = Literal[
+    "left semi",
+    "right semi",
+    "left anti",
+    "right anti",
+    "inner",
+    "left outer",
+    "right outer",
+    "full outer",
+]
+Compression: TypeAlias = Literal[
+    "gzip", "bz2", "brotli", "lz4", "lz4_frame", "lz4_raw", "zstd", "snappy"
+]
+NullEncoding: TypeAlias = Literal["mask", "encode"]
+NullSelectionBehavior: TypeAlias = Literal["drop", "emit_null"]
+Mask: TypeAlias = Sequence[bool | None] | NDArray[np.bool_] | BooleanArray
+Indices: TypeAlias = Sequence[int] | NDArray[np.integer[Any]] | IntegerArray
+PyScalar: TypeAlias = (
+    bool | int | float | Decimal | str | bytes | dt.date | dt.datetime | dt.time | dt.timedelta
+)
+
+_T = TypeVar("_T")
+_V = TypeVar("_V", covariant=True)
+
+SingleOrList: TypeAlias = list[_T] | _T
+
+
+class SupportEq(Protocol):
+    def __eq__(self, other) -> bool: ...
+
+
+class SupportLt(Protocol):
+    def __lt__(self, other) -> bool: ...
+
+
+class SupportGt(Protocol):
+    def __gt__(self, other) -> bool: ...
+
+
+class SupportLe(Protocol):
+    def __le__(self, other) -> bool: ...
+
+
+class SupportGe(Protocol):
+    def __ge__(self, other) -> bool: ...
+
+
+FilterTuple: TypeAlias = (
+    tuple[str, Literal["=", "==", "!="], SupportEq]
+    | tuple[str, Literal["<"], SupportLt]
+    | tuple[str, Literal[">"], SupportGt]
+    | tuple[str, Literal["<="], SupportLe]
+    | tuple[str, Literal[">="], SupportGe]
+    | tuple[str, Literal["in", "not in"], Collection]
+)
+
+
+class Buffer(Protocol):
+    ...
+
+
+class SupportPyBuffer(Protocol):
+    ...
+
+
+class SupportArrowStream(Protocol):
+    def __arrow_c_stream__(self, requested_schema=None) -> Any: ...
+
+
+class SupportArrowArray(Protocol):
+    def __arrow_c_array__(self, requested_schema=None) -> Any: ...
+
+
+class SupportArrowDeviceArray(Protocol):
+    def __arrow_c_device_array__(self, requested_schema=None, **kwargs) -> Any: ...
+
+
+class SupportArrowSchema(Protocol):
+    def __arrow_c_schema(self) -> Any: ...
+
+
+class NullableCollection(Protocol[_V]):  # pyright: ignore[reportInvalidTypeVarUse]
+    def __iter__(self) -> Iterator[_V] | Iterator[_V | None]: ...
+    def __len__(self) -> int: ...
+    def __contains__(self, item: Any, /) -> bool: ...
diff --git a/python/pyarrow-stubs/_substrait.pyi b/python/pyarrow-stubs/_substrait.pyi
new file mode 100644
index 00000000000..12dd437412f
--- /dev/null
+++ b/python/pyarrow-stubs/_substrait.pyi
@@ -0,0 +1,63 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, Callable
+
+from ._compute import Expression
+from .lib import Buffer, RecordBatchReader, Schema, Table, _Weakrefable
+
+
+def run_query(
+    plan: Buffer | int,
+    *,
+    table_provider: Callable[[list[str], Schema], Table] | None = None,
+    use_threads: bool = True,
+) -> RecordBatchReader: ...
+def _parse_json_plan(plan: bytes) -> Buffer: ...
+
+
+class SubstraitSchema:
+    schema: Schema
+    expression: Expression
+    def __init__(self, schema: Schema, expression: Expression) -> None: ...
+    def to_pysubstrait(self) -> Any: ...
+
+
+def serialize_schema(schema: Schema) -> SubstraitSchema: ...
+def deserialize_schema(buf: Buffer | bytes) -> Schema: ...
+
+
+def serialize_expressions(
+    exprs: list[Expression],
+    names: list[str],
+    schema: Schema,
+    *,
+    allow_arrow_extensions: bool = False,
+) -> Buffer: ...
+
+
+class BoundExpressions(_Weakrefable):
+    @property
+    def schema(self) -> Schema: ...
+    @property
+    def expressions(self) -> dict[str, Expression]: ...
+    @classmethod
+    def from_substrait(cls, message: Buffer | bytes) -> BoundExpressions: ...
+
+
+def deserialize_expressions(buf: Buffer | bytes) -> BoundExpressions: ...
+def get_supported_functions() -> list[str]: ...
diff --git a/python/pyarrow-stubs/_types.pyi b/python/pyarrow-stubs/_types.pyi
new file mode 100644
index 00000000000..0cb4bba6a6f
--- /dev/null
+++ b/python/pyarrow-stubs/_types.pyi
@@ -0,0 +1,992 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import datetime as dt
+import sys
+
+from collections.abc import Mapping, Sequence
+from decimal import Decimal
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+from typing import Any, Generic, Iterable, Iterator, Literal
+
+import numpy as np
+import pandas as pd
+
+from pyarrow._stubs_typing import SupportArrowSchema
+from pyarrow.lib import (
+    Array,
+    ChunkedArray,
+    ExtensionArray,
+    MemoryPool,
+    MonthDayNano,
+    Table,
+)
+from typing_extensions import TypeVar, deprecated
+
+from .io import Buffer
+from .scalar import ExtensionScalar
+
+
+class _Weakrefable:
+    ...
+
+
+class _Metadata(_Weakrefable):
+    ...
+
+
+class DataType(_Weakrefable):
+
+    def field(self, i: int) -> Field: ...
+
+    @property
+    def id(self) -> int: ...
+    @property
+    def bit_width(self) -> int: ...
+
+    @property
+    def byte_width(self) -> int: ...
+
+    @property
+    def num_fields(self) -> int: ...
+
+    @property
+    def num_buffers(self) -> int: ...
+
+    def __hash__(self) -> int: ...
+
+    def equals(self, other: DataType | str, *,
+               check_metadata: bool = False) -> bool: ...
+
+    def to_pandas_dtype(self) -> np.generic: ...
+
+    def _export_to_c(self, out_ptr: int) -> None: ...
+
+    @classmethod
+    def _import_from_c(cls, in_ptr: int) -> Self: ...
+
+    def __arrow_c_schema__(self) -> Any: ...
+
+    @classmethod
+    def _import_from_c_capsule(cls, schema) -> Self: ...
+
+
+_AsPyType = TypeVar("_AsPyType")
+_DataTypeT = TypeVar("_DataTypeT", bound=DataType)
+
+
+class _BasicDataType(DataType, Generic[_AsPyType]):
+    ...
+
+
+class NullType(_BasicDataType[None]):
+    ...
+
+
+class BoolType(_BasicDataType[bool]):
+    ...
+
+
+class UInt8Type(_BasicDataType[int]):
+    ...
+
+
+class Int8Type(_BasicDataType[int]):
+    ...
+
+
+class UInt16Type(_BasicDataType[int]):
+    ...
+
+
+class Int16Type(_BasicDataType[int]):
+    ...
+
+
+class Uint32Type(_BasicDataType[int]):
+    ...
+
+
+class Int32Type(_BasicDataType[int]):
+    ...
+
+
+class UInt64Type(_BasicDataType[int]):
+    ...
+
+
+class Int64Type(_BasicDataType[int]):
+    ...
+
+
+class Float16Type(_BasicDataType[float]):
+    ...
+
+
+class Float32Type(_BasicDataType[float]):
+    ...
+
+
+class Float64Type(_BasicDataType[float]):
+    ...
+
+
+class Date32Type(_BasicDataType[dt.date]):
+    ...
+
+
+class Date64Type(_BasicDataType[dt.date]):
+    ...
+
+
+class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]):
+    ...
+
+
+class StringType(_BasicDataType[str]):
+    ...
+
+
+class LargeStringType(_BasicDataType[str]):
+    ...
+
+
+class StringViewType(_BasicDataType[str]):
+    ...
+
+
+class BinaryType(_BasicDataType[bytes]):
+    ...
+
+
+class LargeBinaryType(_BasicDataType[bytes]):
+    ...
+
+
+class BinaryViewType(_BasicDataType[bytes]):
+    ...
+
+
+_Unit = TypeVar("_Unit", bound=Literal["s", "ms", "us", "ns"], default=Literal["us"])
+_Tz = TypeVar("_Tz", str, None, default=None)
+
+
+class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]):
+
+    @property
+    def unit(self) -> _Unit: ...
+
+    @property
+    def tz(self) -> _Tz: ...
+
+
+_Time32Unit = TypeVar("_Time32Unit", bound=Literal["s", "ms"])
+
+
+class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]):
+
+    @property
+    def unit(self) -> _Time32Unit: ...
+
+
+_Time64Unit = TypeVar("_Time64Unit", bound=Literal["us", "ns"])
+
+
+class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]):
+
+    @property
+    def unit(self) -> _Time64Unit: ...
+
+
+class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]):
+
+    @property
+    def unit(self) -> _Unit: ...
+
+
+class FixedSizeBinaryType(_BasicDataType[Decimal]):
+    ...
+
+
+_Precision = TypeVar("_Precision", default=Any)
+_Scale = TypeVar("_Scale", default=Any)
+
+
+class Decimal32Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
+
+    @property
+    def precision(self) -> _Precision: ...
+
+    @property
+    def scale(self) -> _Scale: ...
+
+
+class Decimal64Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
+
+    @property
+    def precision(self) -> _Precision: ...
+
+    @property
+    def scale(self) -> _Scale: ...
+
+
+class Decimal128Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
+
+    @property
+    def precision(self) -> _Precision: ...
+
+    @property
+    def scale(self) -> _Scale: ...
+
+
+class Decimal256Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
+
+    @property
+    def precision(self) -> _Precision: ...
+
+    @property
+    def scale(self) -> _Scale: ...
+
+
+class ListType(DataType, Generic[_DataTypeT]):
+
+    @property
+    def value_field(self) -> Field[_DataTypeT]: ...
+
+    @property
+    def value_type(self) -> _DataTypeT: ...
+
+
+class LargeListType(DataType, Generic[_DataTypeT]):
+
+    @property
+    def value_field(self) -> Field[_DataTypeT]: ...
+    @property
+    def value_type(self) -> _DataTypeT: ...
+
+
+class ListViewType(DataType, Generic[_DataTypeT]):
+
+    @property
+    def value_field(self) -> Field[_DataTypeT]: ...
+
+    @property
+    def value_type(self) -> _DataTypeT: ...
+
+
+class LargeListViewType(DataType, Generic[_DataTypeT]):
+
+    @property
+    def value_field(self) -> Field[_DataTypeT]: ...
+
+    @property
+    def value_type(self) -> _DataTypeT: ...
+
+
+class FixedSizeListType(DataType, Generic[_DataTypeT, _Size]):
+
+    @property
+    def value_field(self) -> Field[_DataTypeT]: ...
+
+    @property
+    def value_type(self) -> _DataTypeT: ...
+
+    @property
+    def list_size(self) -> _Size: ...
+
+
+class DictionaryMemo(_Weakrefable):
+    ...
+
+
+_IndexT = TypeVar(
+    "_IndexT",
+    UInt8Type,
+    Int8Type,
+    UInt16Type,
+    Int16Type,
+    Uint32Type,
+    Int32Type,
+    UInt64Type,
+    Int64Type,
+)
+_BasicValueT = TypeVar("_BasicValueT", bound=_BasicDataType)
+_ValueT = TypeVar("_ValueT", bound=DataType)
+_Ordered = TypeVar("_Ordered", Literal[True], Literal[False], default=Literal[False])
+
+
+class DictionaryType(DataType, Generic[_IndexT, _BasicValueT, _Ordered]):
+
+    @property
+    def ordered(self) -> _Ordered: ...
+
+    @property
+    def index_type(self) -> _IndexT: ...
+
+    @property
+    def value_type(self) -> _BasicValueT: ...
+
+
+_K = TypeVar("_K", bound=DataType)
+
+
+class MapType(DataType, Generic[_K, _ValueT, _Ordered]):
+
+    @property
+    def key_field(self) -> Field[_K]: ...
+
+    @property
+    def key_type(self) -> _K: ...
+
+    @property
+    def item_field(self) -> Field[_ValueT]: ...
+
+    @property
+    def item_type(self) -> _ValueT: ...
+
+    @property
+    def keys_sorted(self) -> _Ordered: ...
+
+
+_Size = TypeVar("_Size", default=int)
+
+
+class StructType(DataType):
+
+    def get_field_index(self, name: str) -> int: ...
+
+    def field(self, i: int | str) -> Field: ...
+
+    def get_all_field_indices(self, name: str) -> list[int]: ...
+
+    def __len__(self) -> int: ...
+
+    def __iter__(self) -> Iterator[Field]: ...
+
+    __getitem__ = field  # pyright: ignore[reportUnknownVariableType]
+    @property
+    def names(self) -> list[str]: ...
+
+    @property
+    def fields(self) -> list[Field]: ...
+
+
+class UnionType(DataType):
+
+    @property
+    def mode(self) -> Literal["sparse", "dense"]: ...
+
+    @property
+    def type_codes(self) -> list[int]: ...
+
+    def __len__(self) -> int: ...
+
+    def __iter__(self) -> Iterator[Field]: ...
+
+    def field(self, i: int) -> Field: ...
+
+    __getitem__ = field  # pyright: ignore[reportUnknownVariableType]
+
+
+class SparseUnionType(UnionType):
+
+    @property
+    def mode(self) -> Literal["sparse"]: ...
+
+
+class DenseUnionType(UnionType):
+
+    @property
+    def mode(self) -> Literal["dense"]: ...
+
+
+_RunEndType = TypeVar("_RunEndType", Int16Type, Int32Type, Int64Type)
+
+
+class RunEndEncodedType(DataType, Generic[_RunEndType, _BasicValueT]):
+
+    @property
+    def run_end_type(self) -> _RunEndType: ...
+    @property
+    def value_type(self) -> _BasicValueT: ...
+
+
+_StorageT = TypeVar("_StorageT", bound=Array | ChunkedArray)
+
+
+class BaseExtensionType(DataType):
+
+    def __arrow_ext_class__(self) -> type[ExtensionArray]: ...
+
+    def __arrow_ext_scalar_class__(self) -> type[ExtensionScalar]: ...
+
+    @property
+    def extension_name(self) -> str: ...
+
+    @property
+    def storage_type(self) -> DataType: ...
+
+    def wrap_array(self, storage: _StorageT) -> _StorageT: ...
+
+
+class ExtensionType(BaseExtensionType):
+
+    def __init__(self, storage_type: DataType, extension_name: str) -> None: ...
+
+    def __arrow_ext_serialize__(self) -> bytes: ...
+
+    @classmethod
+    def __arrow_ext_deserialize__(
+        cls, storage_type: DataType, serialized: bytes) -> Self: ...
+
+
+class FixedShapeTensorType(BaseExtensionType, Generic[_ValueT]):
+
+    @property
+    def value_type(self) -> _ValueT: ...
+
+    @property
+    def shape(self) -> list[int]: ...
+
+    @property
+    def dim_names(self) -> list[str] | None: ...
+
+    @property
+    def permutation(self) -> list[int] | None: ...
+
+
+class Bool8Type(BaseExtensionType):
+    ...
+
+
+class UuidType(BaseExtensionType):
+    ...
+
+
+class JsonType(BaseExtensionType):
+    ...
+
+
+class OpaqueType(BaseExtensionType):
+
+    @property
+    def type_name(self) -> str: ...
+
+    @property
+    def vendor_name(self) -> str: ...
+
+
+# TODO
+# @deprecated(
+#     "This class is deprecated and its deserialization is disabled by default. "
+#     ":class:`ExtensionType` is recommended instead."
+# )
+# class PyExtensionType(ExtensionType):
+#     """
+#     Concrete base class for Python-defined extension types based on pickle
+#     for (de)serialization.
+#
+#     .. warning::
+#        This class is deprecated and its deserialization is disabled by default.
+#        :class:`ExtensionType` is recommended instead.
+#
+#     Parameters
+#     ----------
+#     storage_type : DataType
+#         The storage type for which the extension is built.
+#     """
+#     def __init__(self, storage_type: DataType) -> None: ...
+#     @classmethod
+#     def set_auto_load(cls, value: bool) -> None:
+#         """
+#         Enable or disable auto-loading of serialized PyExtensionType instances.
+#
+#         Parameters
+#         ----------
+#         value : bool
+#             Whether to enable auto-loading.
+#         """
+
+class UnknownExtensionType(ExtensionType):  # type: ignore
+
+    def __init__(self, storage_type: DataType, serialized: bytes) -> None: ...
+
+
+def register_extension_type(ext_type: ExtensionType) -> None: ...  # type: ignore
+
+
+def unregister_extension_type(type_name: str) -> None: ...
+
+
+class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]):
+
+    def __init__(self, __arg0__: Mapping[bytes, bytes]
+                 | None = None, **kwargs) -> None: ...
+
+    def equals(self, other: KeyValueMetadata) -> bool: ...
+
+    def __len__(self) -> int: ...
+
+    def __contains__(self, __key: object) -> bool: ...
+
+    def __getitem__(self, __key: Any) -> Any: ...
+
+    def __iter__(self) -> Iterator[bytes]: ...
+
+    def get_all(self, key: str) -> list[bytes]: ...
+
+    def to_dict(self) -> dict[bytes, bytes]: ...
+
+
+class Field(_Weakrefable, Generic[_DataTypeT]):
+
+    def equals(self, other: Field, check_metadata: bool = False) -> bool: ...
+
+    def __hash__(self) -> int: ...
+
+    @property
+    def nullable(self) -> bool: ...
+
+    @property
+    def name(self) -> str: ...
+
+    @property
+    def metadata(self) -> dict[bytes, bytes] | None: ...
+
+    @property
+    def type(self) -> _DataTypeT: ...
+    def with_metadata(self, metadata: dict[bytes | str, bytes | str]) -> Self: ...
+
+    def remove_metadata(self) -> Self: ...
+
+    def with_type(self, new_type: _DataTypeT) -> Field[_DataTypeT]: ...
+
+    def with_name(self, name: str) -> Self: ...
+
+    def with_nullable(self, nullable: bool) -> Field[_DataTypeT]: ...
+
+    def flatten(self) -> list[Field]: ...
+
+    def _export_to_c(self, out_ptr: int) -> None: ...
+
+    @classmethod
+    def _import_from_c(cls, in_ptr: int) -> Self: ...
+
+    def __arrow_c_schema__(self) -> Any: ...
+
+    @classmethod
+    def _import_from_c_capsule(cls, schema) -> Self: ...
+
+
+class Schema(_Weakrefable):
+
+    def __len__(self) -> int: ...
+
+    def __getitem__(self, key: str) -> Field: ...
+
+    _field = __getitem__  # pyright: ignore[reportUnknownVariableType]
+    def __iter__(self) -> Iterator[Field]: ...
+
+    def __hash__(self) -> int: ...
+
+    def __sizeof__(self) -> int: ...
+    @property
+    def pandas_metadata(self) -> dict: ...
+
+    @property
+    def names(self) -> list[str]: ...
+
+    @property
+    def types(self) -> list[DataType]: ...
+
+    @property
+    def metadata(self) -> dict[bytes, bytes]: ...
+
+    def empty_table(self) -> Table: ...
+
+    def equals(self, other: Schema, check_metadata: bool = False) -> bool: ...
+
+    @classmethod
+    def from_pandas(cls, df: pd.DataFrame, preserve_index: bool |
+                    None = None) -> Schema: ...
+
+    def field(self, i: int | str | bytes) -> Field: ...
+
+    @deprecated("Use 'field' instead")
+    def field_by_name(self, name: str) -> Field: ...
+
+    def get_field_index(self, name: str) -> int: ...
+
+    def get_all_field_indices(self, name: str) -> list[int]: ...
+
+    def append(self, field: Field) -> Schema: ...
+
+    def insert(self, i: int, field: Field) -> Schema: ...
+
+    def remove(self, i: int) -> Schema: ...
+
+    def set(self, i: int, field: Field) -> Schema: ...
+
+    @deprecated("Use 'with_metadata' instead")
+    def add_metadata(self, metadata: dict) -> Schema: ...
+
+    def with_metadata(self, metadata: dict) -> Schema: ...
+
+    def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer: ...
+
+    def remove_metadata(self) -> Schema: ...
+
+    def to_string(
+        self,
+        truncate_metadata: bool = True,
+        show_field_metadata: bool = True,
+        show_schema_metadata: bool = True,
+    ) -> str: ...
+
+    def _export_to_c(self, out_ptr: int) -> None: ...
+
+    @classmethod
+    def _import_from_c(cls, in_ptr: int) -> Schema: ...
+
+    def __arrow_c_schema__(self) -> Any: ...
+
+    @staticmethod
+    def _import_from_c_capsule(schema: Any) -> Schema: ...
+
+
+def unify_schemas(
+    schemas: list[Schema], *, promote_options: Literal["default", "permissive"] = "default"
+) -> Schema: ...
+
+
+def field(
+    name: SupportArrowSchema | str, type: _DataTypeT, nullable: bool = ..., metadata: dict[Any, Any] | None = None
+) -> Field[_DataTypeT] | Field[Any]: ...
+
+
+def null() -> NullType: ...
+
+
+def bool_() -> BoolType: ...
+
+
+def uint8() -> UInt8Type: ...
+
+
+def int8() -> Int8Type: ...
+
+
+def uint16() -> UInt16Type: ...
+
+
+def int16() -> Int16Type: ...
+
+
+def uint32() -> Uint32Type: ...
+
+
+def int32() -> Int32Type: ...
+
+
+def int64() -> Int64Type: ...
+
+
+def uint64() -> UInt64Type: ...
+
+
+def timestamp(unit: _Unit, tz: _Tz | None = None) -> TimestampType[_Unit, _Tz]: ...
+
+
+def time32(unit: _Time32Unit) -> Time32Type[_Time32Unit]: ...
+
+
+def time64(unit: _Time64Unit) -> Time64Type[_Time64Unit]: ...
+
+
+def duration(unit: _Unit) -> DurationType[_Unit]: ...
+
+
+def month_day_nano_interval() -> MonthDayNanoIntervalType: ...
+
+
+def date32() -> Date32Type: ...
+
+
+def date64() -> Date64Type: ...
+
+
+def float16() -> Float16Type: ...
+
+
+def float32() -> Float32Type: ...
+
+
+def float64() -> Float64Type: ...
+
+
+def decimal32(precision: _Precision, scale: _Scale |
+              None = None) -> Decimal32Type[_Precision, _Scale| Literal[0]]: ...
+
+
+def decimal64(precision: _Precision, scale: _Scale |
+              None = None) -> Decimal64Type[_Precision, _Scale | Literal[0]]: ...
+
+
+def decimal128(precision: _Precision, scale: _Scale |
+               None = None) -> Decimal128Type[_Precision, _Scale | Literal[0]]: ...
+
+
+def decimal256(precision: _Precision, scale: _Scale |
+               None = None) -> Decimal256Type[_Precision, _Scale | Literal[0]]: ...
+
+
+def string() -> StringType: ...
+
+
+utf8 = string
+
+
+def binary(length: Literal[-1] | int = ...) -> BinaryType | FixedSizeBinaryType: ...
+
+
+def large_binary() -> LargeBinaryType: ...
+
+
+def large_string() -> LargeStringType: ...
+
+
+large_utf8 = large_string
+
+
+def binary_view() -> BinaryViewType: ...
+
+
+def string_view() -> StringViewType: ...
+
+
+def list_(
+    value_type: _DataTypeT | Field[_DataTypeT], list_size: Literal[-1] | _Size | None = None
+) -> ListType[_DataTypeT] | FixedSizeListType[_DataTypeT, _Size]: ...
+
+
+def large_list(value_type: _DataTypeT |
+               Field[_DataTypeT]) -> LargeListType[_DataTypeT]: ...
+
+
+def list_view(value_type: _DataTypeT |
+              Field[_DataTypeT]) -> ListViewType[_DataTypeT]: ...
+
+
+def large_list_view(
+    value_type: _DataTypeT | Field[_DataTypeT],
+) -> LargeListViewType[_DataTypeT]: ...
+
+
+def map_(
+    key_type: _K, item_type: _ValueT, key_sorted: _Ordered | None = None
+) -> MapType[_K, _ValueT, _Ordered]: ...
+
+
+def dictionary(
+    index_type: _IndexT, value_type: _BasicValueT, ordered: _Ordered | None = None
+) -> DictionaryType[_IndexT, _BasicValueT, _Ordered]: ...
+
+
+def struct(
+    fields: Iterable[Field[Any] | tuple[str, Field[Any]] | tuple[str, DataType]]
+    | Mapping[str, Field[Any]],
+) -> StructType: ...
+
+
+def sparse_union(
+    child_fields: list[Field[Any]], type_codes: list[int] | None = None
+) -> SparseUnionType: ...
+
+
+def dense_union(
+    child_fields: list[Field[Any]], type_codes: list[int] | None = None
+) -> DenseUnionType: ...
+
+
+def union(
+    child_fields: list[Field[Any]], mode: Literal["sparse"] | Literal["dense"], type_codes: list[int] | None = None
+) -> SparseUnionType | DenseUnionType: ...
+
+
+def run_end_encoded(
+    run_end_type: _RunEndType, value_type: _BasicValueT
+) -> RunEndEncodedType[_RunEndType, _BasicValueT]: ...
+
+
+def json_(storage_type: DataType = ...) -> JsonType: ...
+
+
+def uuid() -> UuidType: ...
+
+
+def fixed_shape_tensor(
+    value_type: _ValueT,
+    shape: Sequence[int],
+    dim_names: Sequence[str] | None = None,
+    permutation: Sequence[int] | None = None,
+) -> FixedShapeTensorType[_ValueT]: ...
+
+
+def bool8() -> Bool8Type: ...
+
+
+def opaque(storage_type: DataType, type_name: str, vendor_name: str) -> OpaqueType: ...
+
+
+def type_for_alias(name: Any) -> DataType: ...
+
+
+def schema(
+    fields: Iterable[Field[Any]] | Iterable[tuple[str, DataType]] | Mapping[str, DataType],
+    metadata: dict[bytes | str, bytes | str] | None = None,
+) -> Schema: ...
+
+
+def from_numpy_dtype(dtype: np.dtype[Any]) -> DataType: ...
+
+
+__all__ = [
+    "_Weakrefable",
+    "_Metadata",
+    "DataType",
+    "_BasicDataType",
+    "NullType",
+    "BoolType",
+    "UInt8Type",
+    "Int8Type",
+    "UInt16Type",
+    "Int16Type",
+    "Uint32Type",
+    "Int32Type",
+    "UInt64Type",
+    "Int64Type",
+    "Float16Type",
+    "Float32Type",
+    "Float64Type",
+    "Date32Type",
+    "Date64Type",
+    "MonthDayNanoIntervalType",
+    "StringType",
+    "LargeStringType",
+    "StringViewType",
+    "BinaryType",
+    "LargeBinaryType",
+    "BinaryViewType",
+    "TimestampType",
+    "Time32Type",
+    "Time64Type",
+    "DurationType",
+    "FixedSizeBinaryType",
+    "Decimal32Type",
+    "Decimal64Type",
+    "Decimal128Type",
+    "Decimal256Type",
+    "ListType",
+    "LargeListType",
+    "ListViewType",
+    "LargeListViewType",
+    "FixedSizeListType",
+    "DictionaryMemo",
+    "DictionaryType",
+    "MapType",
+    "StructType",
+    "UnionType",
+    "SparseUnionType",
+    "DenseUnionType",
+    "RunEndEncodedType",
+    "BaseExtensionType",
+    "ExtensionType",
+    "FixedShapeTensorType",
+    "Bool8Type",
+    "UuidType",
+    "JsonType",
+    "OpaqueType",
+    "UnknownExtensionType",
+    "register_extension_type",
+    "unregister_extension_type",
+    "KeyValueMetadata",
+    "Field",
+    "Schema",
+    "unify_schemas",
+    "field",
+    "null",
+    "bool_",
+    "uint8",
+    "int8",
+    "uint16",
+    "int16",
+    "uint32",
+    "int32",
+    "int64",
+    "uint64",
+    "timestamp",
+    "time32",
+    "time64",
+    "duration",
+    "month_day_nano_interval",
+    "date32",
+    "date64",
+    "float16",
+    "float32",
+    "float64",
+    "decimal32",
+    "decimal64",
+    "decimal128",
+    "decimal256",
+    "string",
+    "utf8",
+    "binary",
+    "large_binary",
+    "large_string",
+    "large_utf8",
+    "binary_view",
+    "string_view",
+    "list_",
+    "large_list",
+    "list_view",
+    "large_list_view",
+    "map_",
+    "dictionary",
+    "struct",
+    "sparse_union",
+    "dense_union",
+    "union",
+    "run_end_encoded",
+    "json_",
+    "uuid",
+    "fixed_shape_tensor",
+    "bool8",
+    "opaque",
+    "type_for_alias",
+    "schema",
+    "from_numpy_dtype",
+    "_Unit",
+    "_Tz",
+    "_Time32Unit",
+    "_Time64Unit",
+    "_DataTypeT",
+]
diff --git a/python/pyarrow-stubs/acero.pyi b/python/pyarrow-stubs/acero.pyi
new file mode 100644
index 00000000000..b3bc83382fb
--- /dev/null
+++ b/python/pyarrow-stubs/acero.pyi
@@ -0,0 +1,113 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+from typing import Literal
+
+from . import lib
+from .compute import Expression, FunctionOptions
+
+_StrOrExpr: TypeAlias = str | Expression
+
+
+class Declaration(lib._Weakrefable):
+    def __init__(
+        self,
+        factory_name: str,
+        options: ExecNodeOptions,
+        inputs: list[Declaration] | None = None,
+    ) -> None: ...
+    @classmethod
+    def from_sequence(cls, decls: list[Declaration]) -> Self: ...
+    def to_reader(self, use_threads: bool = True) -> lib.RecordBatchReader: ...
+    def to_table(self, use_threads: bool = True) -> lib.Table: ...
+
+
+class ExecNodeOptions(lib._Weakrefable):
+    ...
+
+
+class TableSourceNodeOptions(ExecNodeOptions):
+    def __init__(self, table: lib.Table) -> None: ...
+
+
+class FilterNodeOptions(ExecNodeOptions):
+    def __init__(self, filter_expression: Expression) -> None: ...
+
+
+class ProjectNodeOptions(ExecNodeOptions):
+    def __init__(self, expressions: list[Expression],
+                 names: list[str] | None = None) -> None: ...
+
+
+class AggregateNodeOptions(ExecNodeOptions):
+    def __init__(
+        self,
+        aggregates: list[tuple[list[str], str, FunctionOptions, str]],
+        keys: list[_StrOrExpr] | None = None,
+    ) -> None: ...
+
+
+class OrderByNodeOptions(ExecNodeOptions):
+    def __init__(
+        self,
+        sort_keys: tuple[tuple[str, Literal["ascending", "descending"]], ...] = (),
+        *,
+        null_placement: Literal["at_start", "at_end"] = "at_end",
+    ) -> None: ...
+
+
+class HashJoinNodeOptions(ExecNodeOptions):
+    def __init__(
+        self,
+        join_type: Literal[
+            "left semi",
+            "right semi",
+            "left anti",
+            "right anti",
+            "inner",
+            "left outer",
+            "right outer",
+            "full outer",
+        ],
+        left_keys: _StrOrExpr | list[_StrOrExpr],
+        right_keys: _StrOrExpr | list[_StrOrExpr],
+        left_output: list[_StrOrExpr] | None = None,
+        right_output: list[_StrOrExpr] | None = None,
+        output_suffix_for_left: str = "",
+        output_suffix_for_right: str = "",
+    ) -> None: ...
+
+
+class AsofJoinNodeOptions(ExecNodeOptions):
+    def __init__(
+        self,
+        left_on: _StrOrExpr,
+        left_by: _StrOrExpr | list[_StrOrExpr],
+        right_on: _StrOrExpr,
+        right_by: _StrOrExpr | list[_StrOrExpr],
+        tolerance: int,
+    ) -> None: ...
diff --git a/python/pyarrow-stubs/array.pyi b/python/pyarrow-stubs/array.pyi
new file mode 100644
index 00000000000..7aa67fc8955
--- /dev/null
+++ b/python/pyarrow-stubs/array.pyi
@@ -0,0 +1,860 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+from collections.abc import Callable
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import (
+    Any,
+    Generic,
+    Iterable,
+    Iterator,
+    Literal,
+    TypeVar,
+)
+
+import numpy as np
+import pandas as pd
+
+from pandas.core.dtypes.base import ExtensionDtype
+from pyarrow._compute import CastOptions  # type: ignore[import-not-found]
+from pyarrow._stubs_typing import (
+    ArrayLike,
+    Indices,
+    Mask,
+    Order,
+    SupportArrowArray,
+    SupportArrowDeviceArray,
+)
+from pyarrow.lib import (  # type: ignore[attr-defined]
+    Buffer,
+    Device,  # type: ignore[reportAttributeAccessIssue]
+    MemoryManager,  # type: ignore[reportAttributeAccessIssue]
+    MemoryPool,
+    Tensor,
+    _Weakrefable,
+)
+from typing_extensions import deprecated
+import builtins
+
+from .scalar import *
+from .device import DeviceAllocationType  # type: ignore[import-not-found]
+from ._types import (
+    BaseExtensionType,
+    BinaryType,
+    DataType,
+    Field,
+    Float64Type,
+    Int16Type,
+    Int32Type,
+    Int64Type,
+    MapType,
+    StringType,
+    StructType,
+    _AsPyType,
+    _BasicDataType,
+    _BasicValueT,
+    _DataTypeT,
+    _IndexT,
+    _RunEndType,
+    _Size,
+    _Time32Unit,
+    _Time64Unit,
+    _Tz,
+    _Unit,
+)
+from ._stubs_typing import NullableCollection
+
+
+def array(
+    values: NullableCollection[Any] | Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Any | None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> ArrayLike: ...
+
+
+def asarray(
+    values: NullableCollection[Any] | Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: _DataTypeT | Any | None = None,
+) -> Array[Scalar[_DataTypeT]] | ArrayLike: ...
+
+
+def nulls(
+    size: int,
+    type: Any | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ArrayLike: ...
+
+
+def repeat(
+    value: Any,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> ArrayLike: ...
+
+
+def infer_type(values: Iterable[Any], mask: Mask,
+               from_pandas: bool = False) -> DataType: ...
+
+
+class ArrayStatistics(_Weakrefable):
+
+    @property
+    def null_count(self) -> int: ...
+
+    @property
+    def distinct_count(self) -> int: ...
+
+    @property
+    def min(self) -> Any: ...
+
+    @property
+    def is_min_exact(self) -> bool: ...
+
+    @property
+    def max(self) -> Any: ...
+
+    @property
+    def is_max_exact(self) -> bool: ...
+
+
+_ConvertAs = TypeVar("_ConvertAs", pd.DataFrame, pd.Series)
+
+
+class _PandasConvertible(_Weakrefable, Generic[_ConvertAs]):
+    def to_pandas(
+        self,
+        memory_pool: MemoryPool | None = None,
+        categories: list | None = None,
+        strings_to_categorical: bool = False,
+        zero_copy_only: bool = False,
+        integer_object_nulls: bool = False,
+        date_as_object: bool = True,
+        timestamp_as_object: bool = False,
+        use_threads: bool = True,
+        deduplicate_objects: bool = True,
+        ignore_metadata: bool = False,
+        safe: bool = True,
+        split_blocks: bool = False,
+        self_destruct: bool = False,
+        maps_as_pydicts: Literal["None", "lossy", "strict"] | None = None,
+        types_mapper: Callable[[DataType], ExtensionDtype | None] | None = None,
+        coerce_temporal_nanoseconds: bool = False,
+    ) -> _ConvertAs: ...
+
+
+_CastAs = TypeVar("_CastAs", bound=DataType)
+_Scalar_co = TypeVar("_Scalar_co", bound=Scalar, covariant=True)
+_ScalarT = TypeVar("_ScalarT", bound=Scalar)
+
+
+class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
+
+    def diff(self, other: Self) -> str: ...
+
+    def cast(
+        self,
+        target_type: _CastAs,
+        safe: bool = True,
+        options: CastOptions | None = None,
+        memory_pool: MemoryPool | None = None,
+    ) -> Array[Scalar[_CastAs]]: ...
+
+    def view(self, target_type: _CastAs) -> Array[Scalar[_CastAs]]: ...
+
+    def sum(self, **kwargs) -> _Scalar_co: ...
+
+    @property
+    def type(self: Array[Scalar[_DataTypeT]]) -> _DataTypeT: ...
+    def unique(self) -> Self: ...
+
+    def dictionary_encode(self, null_encoding: str = "mask") -> DictionaryArray: ...
+
+    def value_counts(self) -> StructArray: ...
+
+    @staticmethod
+    def from_pandas(
+        obj: pd.Series | np.ndarray | ArrayLike,
+        *,
+        mask: Mask | None = None,
+        type: _DataTypeT | None = None,
+        safe: bool = True,
+        memory_pool: MemoryPool | None = None,
+    ) -> Array[Scalar[_DataTypeT]] | Array[Scalar]: ...
+
+    @staticmethod
+    def from_buffers(
+        type: _DataTypeT,
+        length: int,
+        buffers: list[Buffer],
+        null_count: int = -1,
+        offset=0,
+        children: NullableCollection[Array[Scalar[_DataTypeT]]] | None = None,
+    ) -> Array[Scalar[_DataTypeT]]: ...
+
+    @property
+    def null_count(self) -> int: ...
+    @property
+    def nbytes(self) -> int: ...
+
+    def get_total_buffer_size(self) -> int: ...
+
+    def __sizeof__(self) -> int: ...
+    def __iter__(self) -> Iterator[_Scalar_co]: ...
+
+    def to_string(
+        self,
+        *,
+        indent: int = 2,
+        top_level_indent: int = 0,
+        window: int = 10,
+        container_window: int = 2,
+        skip_new_lines: bool = False,
+    ) -> str: ...
+
+    format = to_string
+    def equals(self, other: Self) -> bool: ...
+
+    def __len__(self) -> int: ...
+
+    def is_null(self, *, nan_is_null: bool = False) -> BooleanArray: ...
+
+    def is_nan(self) -> BooleanArray: ...
+
+    def is_valid(self) -> BooleanArray: ...
+
+    def fill_null(
+        self: Array[Scalar[_BasicDataType[_AsPyType]]], fill_value: _AsPyType
+    ) -> Array[Scalar[_BasicDataType[_AsPyType]]]: ...
+
+    def __getitem__(self, key: int | builtins.slice) -> _Scalar_co | Self: ...
+
+    def slice(self, offset: int = 0, length: int | None = None) -> Self: ...
+
+    def take(self, indices: Indices) -> Self: ...
+
+    def drop_null(self) -> Self: ...
+
+    def filter(
+        self,
+        mask: Mask,
+        *,
+        null_selection_behavior: Literal["drop", "emit_null"] = "drop",
+    ) -> Self: ...
+
+    def index(
+        self: Array[_ScalarT] | Array[Scalar[_BasicDataType[_AsPyType]]],
+        value: _ScalarT | _AsPyType,
+        start: int | None = None,
+        end: int | None = None,
+        *,
+        memory_pool: MemoryPool | None = None,
+    ) -> Int64Scalar: ...
+
+    def sort(self, order: Order = "ascending", **kwargs) -> Self: ...
+
+    def __array__(self, dtype: np.dtype | None = None,
+                  copy: bool | None = None) -> np.ndarray: ...
+
+    def to_numpy(self, zero_copy_only: bool = True,
+                 writable: bool = False) -> np.ndarray: ...
+
+    def to_pylist(
+        self: Array[Scalar[_BasicDataType[_AsPyType]]],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[_AsPyType | None]: ...
+
+    tolist = to_pylist
+    def validate(self, *, full: bool = False) -> None: ...
+
+    @property
+    def offset(self) -> int: ...
+
+    def buffers(self) -> list[Buffer | None]: ...
+
+    def copy_to(self, destination: MemoryManager | Device) -> Self: ...
+
+    def _export_to_c(self, out_ptr: int, out_schema_ptr: int = 0) -> None: ...
+
+    @classmethod
+    def _import_from_c(cls, in_ptr: int, type: int | DataType) -> Self: ...
+
+    def __arrow_c_array__(self, requested_schema=None) -> Any: ...
+
+    @classmethod
+    def _import_from_c_capsule(cls, schema_capsule, array_capsule) -> Self: ...
+    def _export_to_c_device(self, out_ptr: int, out_schema_ptr: int = 0) -> None: ...
+
+    @classmethod
+    def _import_from_c_device(cls, in_ptr: int, type: DataType | int) -> Self: ...
+
+    def __arrow_c_device_array__(self, requested_schema=None, **kwargs) -> Any: ...
+
+    @classmethod
+    def _import_from_c_device_capsule(cls, schema_capsule, array_capsule) -> Self: ...
+    def __dlpack__(self, stream: int | None = None) -> Any: ...
+
+    def __dlpack_device__(self) -> tuple[int, int]: ...
+
+    @property
+    def device_type(self) -> DeviceAllocationType: ...
+
+    @property
+    def is_cpu(self) -> bool: ...
+
+    @property
+    def statistics(self) -> ArrayStatistics | None: ...
+
+
+class NullArray(Array[NullScalar]):
+    ...
+
+
+class BooleanArray(Array[BooleanScalar]):
+
+    @property
+    def false_count(self) -> int: ...
+    @property
+    def true_count(self) -> int: ...
+
+
+class NumericArray(Array[_ScalarT]):
+    ...
+
+
+class IntegerArray(NumericArray[_ScalarT]):
+    ...
+
+
+class FloatingPointArray(NumericArray[_ScalarT]):
+    ...
+
+
+class Int8Array(IntegerArray[Int8Scalar]):
+    ...
+
+
+class UInt8Array(IntegerArray[UInt8Scalar]):
+    ...
+
+
+class Int16Array(IntegerArray[Int16Scalar]):
+    ...
+
+
+class UInt16Array(IntegerArray[UInt16Scalar]):
+    ...
+
+
+class Int32Array(IntegerArray[Int32Scalar]):
+    ...
+
+
+class UInt32Array(IntegerArray[UInt32Scalar]):
+    ...
+
+
+class Int64Array(IntegerArray[Int64Scalar]):
+    ...
+
+
+class UInt64Array(IntegerArray[UInt64Scalar]):
+    ...
+
+
+class Date32Array(NumericArray[Date32Scalar]):
+    ...
+
+
+class Date64Array(NumericArray[Date64Scalar]):
+    ...
+
+
+class TimestampArray(NumericArray[TimestampScalar[_Unit, _Tz]]):
+    ...
+
+
+class Time32Array(NumericArray[Time32Scalar[_Time32Unit]]):
+    ...
+
+
+class Time64Array(NumericArray[Time64Scalar[_Time64Unit]]):
+    ...
+
+
+class DurationArray(NumericArray[DurationScalar[_Unit]]):
+    ...
+
+
+class MonthDayNanoIntervalArray(Array[MonthDayNanoIntervalScalar]):
+    ...
+
+
+class HalfFloatArray(FloatingPointArray[HalfFloatScalar]):
+    ...
+
+
+class FloatArray(FloatingPointArray[FloatScalar]):
+    ...
+
+
+class DoubleArray(FloatingPointArray[DoubleScalar]):
+    ...
+
+
+class FixedSizeBinaryArray(Array[FixedSizeBinaryScalar]):
+    ...
+
+
+class Decimal32Array(FixedSizeBinaryArray):
+    ...
+
+
+class Decimal64Array(FixedSizeBinaryArray):
+    ...
+
+
+class Decimal128Array(FixedSizeBinaryArray):
+    ...
+
+
+class Decimal256Array(FixedSizeBinaryArray):
+    ...
+
+
+class BaseListArray(Array[_ScalarT]):
+    def flatten(self, recursive: bool = False) -> Array: ...
+
+    def value_parent_indices(self) -> Int64Array: ...
+
+    def value_lengths(self) -> Int32Array: ...
+
+
+class ListArray(BaseListArray[_ScalarT]):
+
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array | list[int],
+        values: Array[Scalar[_DataTypeT]] | list[int] | list[float] | list[str] | list[bytes] | list,
+        *,
+        type: _DataTypeT | None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> ListArray[ListScalar[_DataTypeT | Int64Type | Float64Type | StringType | BinaryType]] | ListArray: ...
+
+    @property
+    def values(self) -> Array: ...
+
+    @property
+    def offsets(self) -> Int32Array: ...
+
+
+class LargeListArray(BaseListArray[LargeListScalar[_DataTypeT]]):
+
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int64Array,
+        values: Array[Scalar[_DataTypeT]] | Array,
+        *,
+        type: _DataTypeT | None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> LargeListArray[_DataTypeT] | LargeListArray[_DataTypeT]: ...
+
+    @property
+    def values(self) -> Array: ...
+
+    @property
+    def offsets(self) -> Int64Array: ...
+
+
+class ListViewArray(BaseListArray[ListViewScalar[_DataTypeT]]):
+
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array,
+        values: Array[Scalar[_DataTypeT]] | Array,
+        *,
+        type: _DataTypeT | None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> ListViewArray[_DataTypeT] | ListViewArray[_DataTypeT]: ...
+
+    @property
+    def values(self) -> Array: ...
+
+    @property
+    def offsets(self) -> Int32Array: ...
+
+    @property
+    def sizes(self) -> Int32Array: ...
+
+
+class LargeListViewArray(BaseListArray[LargeListScalar[_DataTypeT]]):
+
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int64Array,
+        values: Array[Scalar[_DataTypeT]] | Array,
+        *,
+        type: _DataTypeT | None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> LargeListViewArray[_DataTypeT]: ...
+
+    @property
+    def values(self) -> Array: ...
+
+    @property
+    def offsets(self) -> Int64Array: ...
+
+    @property
+    def sizes(self) -> Int64Array: ...
+
+
+class FixedSizeListArray(BaseListArray[FixedSizeListScalar[_DataTypeT, _Size]]):
+
+    @classmethod
+    def from_arrays(
+        cls,
+        values: Array[Scalar[_DataTypeT]],
+        limit_size: _Size | None = None,
+        *,
+        type: None = None,
+        mask: Mask | None = None,
+    ) -> FixedSizeListArray[_DataTypeT, _Size | None]: ...
+
+    @property
+    def values(self) -> BaseListArray[ListScalar[_DataTypeT]]: ...
+
+
+_MapKeyT = TypeVar("_MapKeyT", bound=_BasicDataType)
+_MapItemT = TypeVar("_MapItemT", bound=_BasicDataType)
+
+
+class MapArray(BaseListArray[MapScalar[_MapKeyT, _MapItemT]]):
+
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int64Array | list[int] | None,
+        keys: Array[Scalar[_MapKeyT]] | None = None,
+        items: Array[Scalar[_MapItemT]] | None = None,
+        values: Array | None = None,
+        *,
+        type: MapType[_MapKeyT, _MapItemT] | None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> MapArray[_MapKeyT, _MapItemT]: ...
+
+    @property
+    def keys(self) -> Array: ...
+
+    @property
+    def items(self) -> Array: ...
+
+
+class UnionArray(Array[UnionScalar]):
+
+    @deprecated("Use fields() instead")
+    def child(self, pos: int) -> Field: ...
+
+    def field(self, pos: int) -> Array: ...
+
+    @property
+    def type_codes(self) -> Int8Array: ...
+
+    @property
+    def offsets(self) -> Int32Array: ...
+
+    @staticmethod
+    def from_dense(
+        types: Int8Array,
+        value_offsets: Int32Array,
+        children: NullableCollection[Array],
+        field_names: list[str] | None = None,
+        type_codes: Int8Array | None = None,
+    ) -> UnionArray: ...
+
+    @staticmethod
+    def from_sparse(
+        types: Int8Array,
+        children: NullableCollection[Array],
+        field_names: list[str] | None = None,
+        type_codes: Int8Array | None = None,
+    ) -> UnionArray: ...
+
+
+class StringArray(Array[StringScalar]):
+
+    @staticmethod
+    def from_buffers(  # type: ignore[override]
+        length: int,
+        value_offsets: Buffer,
+        data: Buffer,
+        null_bitmap: Buffer | None = None,
+        null_count: int | None = -1,
+        offset: int | None = 0,
+    ) -> StringArray: ...
+
+
+class LargeStringArray(Array[LargeStringScalar]):
+
+    @staticmethod
+    def from_buffers(  # type: ignore[override]
+        length: int,
+        value_offsets: Buffer,
+        data: Buffer,
+        null_bitmap: Buffer | None = None,
+        null_count: int | None = -1,
+        offset: int | None = 0,
+    ) -> StringArray: ...
+
+
+class StringViewArray(Array[StringViewScalar]):
+    ...
+
+
+class BinaryArray(Array[BinaryScalar]):
+
+    @property
+    def total_values_length(self) -> int: ...
+
+
+class LargeBinaryArray(Array[LargeBinaryScalar]):
+
+    @property
+    def total_values_length(self) -> int: ...
+
+
+class BinaryViewArray(Array[BinaryViewScalar]):
+    ...
+
+
+class DictionaryArray(Array[DictionaryScalar[_IndexT, _BasicValueT]]):
+
+    def dictionary_encode(self) -> Self: ...  # type: ignore[override]
+    def dictionary_decode(self) -> Array[Scalar[_BasicValueT]]: ...
+
+    @property
+    def indices(self) -> Array[Scalar[_IndexT]]: ...
+    @property
+    def dictionary(self) -> Array[Scalar[_BasicValueT]]: ...
+
+    @staticmethod
+    def from_buffers(  # type: ignore[override]
+        type: _BasicValueT,
+        length: int,
+        buffers: list[Buffer],
+        dictionary: Array | np.ndarray | pd.Series,
+        null_count: int = -1,
+        offset: int = 0,
+    ) -> DictionaryArray[Any, _BasicValueT]: ...
+
+    @staticmethod
+    def from_arrays(
+        indices: Indices,
+        dictionary: Array | np.ndarray | pd.Series,
+        mask: np.ndarray | pd.Series | BooleanArray | None = None,
+        ordered: bool = False,
+        from_pandas: bool = False,
+        safe: bool = True,
+        memory_pool: MemoryPool | None = None,
+    ) -> DictionaryArray: ...
+
+
+class StructArray(Array[StructScalar]):
+
+    def field(self, index: int | str) -> Array: ...
+
+    def flatten(self, memory_pool: MemoryPool | None = None) -> list[Array]: ...
+
+    @staticmethod
+    def from_arrays(
+        arrays: Iterable[Array],
+        names: list[str] | None = None,
+        fields: list[Field] | None = None,
+        mask=None,
+        memory_pool: MemoryPool | None = None,
+        type: StructType | None = None,
+    ) -> StructArray: ...
+
+    def sort(self, order: Order = "ascending", by: str |
+             None = None, **kwargs) -> StructArray: ...
+
+
+class RunEndEncodedArray(Array[RunEndEncodedScalar[_RunEndType, _BasicValueT]]):
+
+    @staticmethod
+    def from_arrays(
+        run_ends: Int16Array | Int32Array | Int64Array,
+        values: Array,
+        type: DataType | None = None,
+    ) -> RunEndEncodedArray[Int16Type | Int32Type | Int64Type, _BasicValueT]: ...  # type: ignore[type-var]
+
+    @staticmethod
+    def from_buffers(  # type: ignore[override]
+        type: DataType,
+        length: int,
+        buffers: list[Buffer],
+        null_count: int = -1,
+        offset=0,
+        children: tuple[Array, Array] | None = None,
+    ) -> RunEndEncodedArray[Any, _BasicValueT]: ...
+
+    @property
+    def run_ends(self) -> Array[Scalar[_RunEndType]]: ...
+
+    @property
+    def values(self) -> Array[Scalar[_BasicValueT]]: ...
+
+    def find_physical_offset(self) -> int: ...
+
+    def find_physical_length(self) -> int: ...
+
+
+_ArrayT = TypeVar("_ArrayT", bound=Array)
+
+
+class ExtensionArray(Array[ExtensionScalar], Generic[_ArrayT]):
+
+    @property
+    def storage(self) -> Any: ...
+
+    @staticmethod
+    def from_storage(typ: BaseExtensionType,
+                     storage: _ArrayT) -> ExtensionArray[_ArrayT]: ...
+
+
+class JsonArray(ExtensionArray[_ArrayT]):
+    ...
+
+
+class UuidArray(ExtensionArray[_ArrayT]):
+    ...
+
+
+class FixedShapeTensorArray(ExtensionArray[_ArrayT]):
+
+    def to_numpy_ndarray(self) -> np.ndarray: ...
+
+    def to_tensor(self) -> Tensor: ...
+
+    @classmethod
+    def from_numpy_ndarray(cls, obj: np.ndarray) -> Self: ...
+
+
+class OpaqueArray(ExtensionArray[_ArrayT]):
+    ...
+
+
+class Bool8Array(ExtensionArray):
+
+    def to_numpy(self, zero_copy_only: bool = ...,
+                 writable: bool = ...) -> np.ndarray: ...
+
+    @classmethod
+    def from_storage(cls, storage: Int8Array) -> Self: ...  # type: ignore[override]
+
+    @classmethod
+    def from_numpy(cls, obj: np.ndarray) -> Self: ...
+
+
+def concat_arrays(arrays: Iterable[_ArrayT],
+                  memory_pool: MemoryPool | None = None) -> _ArrayT: ...
+
+
+def _empty_array(type: _DataTypeT) -> Array[Scalar[_DataTypeT]]: ...
+
+
+__all__ = [
+    "array",
+    "asarray",
+    "nulls",
+    "repeat",
+    "infer_type",
+    "_PandasConvertible",
+    "Array",
+    "NullArray",
+    "BooleanArray",
+    "NumericArray",
+    "IntegerArray",
+    "FloatingPointArray",
+    "Int8Array",
+    "UInt8Array",
+    "Int16Array",
+    "UInt16Array",
+    "Int32Array",
+    "UInt32Array",
+    "Int64Array",
+    "UInt64Array",
+    "Date32Array",
+    "Date64Array",
+    "TimestampArray",
+    "Time32Array",
+    "Time64Array",
+    "DurationArray",
+    "MonthDayNanoIntervalArray",
+    "HalfFloatArray",
+    "FloatArray",
+    "DoubleArray",
+    "FixedSizeBinaryArray",
+    "Decimal32Array",
+    "Decimal64Array",
+    "Decimal128Array",
+    "Decimal256Array",
+    "BaseListArray",
+    "ListArray",
+    "LargeListArray",
+    "ListViewArray",
+    "LargeListViewArray",
+    "FixedSizeListArray",
+    "MapArray",
+    "UnionArray",
+    "StringArray",
+    "LargeStringArray",
+    "StringViewArray",
+    "BinaryArray",
+    "LargeBinaryArray",
+    "BinaryViewArray",
+    "DictionaryArray",
+    "StructArray",
+    "RunEndEncodedArray",
+    "ExtensionArray",
+    "Bool8Array",
+    "UuidArray",
+    "JsonArray",
+    "OpaqueArray",
+    "FixedShapeTensorArray",
+    "concat_arrays",
+    "_empty_array",
+    "_CastAs",
+]
diff --git a/python/pyarrow-stubs/builder.pyi b/python/pyarrow-stubs/builder.pyi
new file mode 100644
index 00000000000..c379bd83afb
--- /dev/null
+++ b/python/pyarrow-stubs/builder.pyi
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Iterable
+
+from pyarrow.lib import MemoryPool, _Weakrefable
+
+from .array import StringArray, StringViewArray
+
+
+class StringBuilder(_Weakrefable):
+
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def append(self, value: str | bytes | None): ...
+
+    def append_values(self, values: Iterable[str | bytes | None]): ...
+
+    def finish(self) -> StringArray: ...
+
+    @property
+    def null_count(self) -> int: ...
+    def __len__(self) -> int: ...
+
+
+class StringViewBuilder(_Weakrefable):
+
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def append(self, value: str | bytes | None): ...
+
+    def append_values(self, values: Iterable[str | bytes | None]): ...
+
+    def finish(self) -> StringViewArray: ...
+
+    @property
+    def null_count(self) -> int: ...
+    def __len__(self) -> int: ...
+
+
+__all__ = ["StringBuilder", "StringViewBuilder"]
diff --git a/python/pyarrow-stubs/cffi.pyi b/python/pyarrow-stubs/cffi.pyi
new file mode 100644
index 00000000000..e4f077d7155
--- /dev/null
+++ b/python/pyarrow-stubs/cffi.pyi
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import cffi
+
+c_source: str
+ffi: cffi.FFI
diff --git a/python/pyarrow-stubs/compat.pyi b/python/pyarrow-stubs/compat.pyi
new file mode 100644
index 00000000000..2ea013555c0
--- /dev/null
+++ b/python/pyarrow-stubs/compat.pyi
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+def encode_file_path(path: str | bytes) -> bytes: ...
+def tobytes(o: str | bytes) -> bytes: ...
+def frombytes(o: bytes, *, safe: bool = False): ...
+
+__all__ = ["encode_file_path", "tobytes", "frombytes"]
diff --git a/python/pyarrow-stubs/compute.pyi b/python/pyarrow-stubs/compute.pyi
new file mode 100644
index 00000000000..235e8ffc34d
--- /dev/null
+++ b/python/pyarrow-stubs/compute.pyi
@@ -0,0 +1,1518 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# ruff: noqa: I001
+from typing import Literal, TypeAlias, TypeVar, overload, Any, Iterable, ParamSpec, Sequence
+from collections.abc import Callable
+
+# Option classes
+from pyarrow._compute import ArraySortOptions as ArraySortOptions
+from pyarrow._compute import AssumeTimezoneOptions as AssumeTimezoneOptions
+from pyarrow._compute import CastOptions as CastOptions
+from pyarrow._compute import CountOptions as CountOptions
+from pyarrow._compute import CumulativeOptions as CumulativeOptions
+from pyarrow._compute import CumulativeSumOptions as CumulativeSumOptions
+from pyarrow._compute import DayOfWeekOptions as DayOfWeekOptions
+from pyarrow._compute import DictionaryEncodeOptions as DictionaryEncodeOptions
+from pyarrow._compute import ElementWiseAggregateOptions as ElementWiseAggregateOptions
+
+# Expressions
+from pyarrow._compute import Expression as Expression
+from pyarrow._compute import ExtractRegexOptions as ExtractRegexOptions
+from pyarrow._compute import ExtractRegexSpanOptions as ExtractRegexSpanOptions
+from pyarrow._compute import FilterOptions as FilterOptions
+from pyarrow._compute import Function as Function
+from pyarrow._compute import FunctionOptions as FunctionOptions
+from pyarrow._compute import FunctionRegistry as FunctionRegistry
+from pyarrow._compute import HashAggregateFunction as HashAggregateFunction
+from pyarrow._compute import HashAggregateKernel as HashAggregateKernel
+from pyarrow._compute import IndexOptions as IndexOptions
+from pyarrow._compute import JoinOptions as JoinOptions
+from pyarrow._compute import Kernel as Kernel
+from pyarrow._compute import ListFlattenOptions as ListFlattenOptions
+from pyarrow._compute import ListSliceOptions as ListSliceOptions
+from pyarrow._compute import MakeStructOptions as MakeStructOptions
+from pyarrow._compute import MapLookupOptions as MapLookupOptions
+from pyarrow._compute import MatchSubstringOptions as MatchSubstringOptions
+from pyarrow._compute import ModeOptions as ModeOptions
+from pyarrow._compute import NullOptions as NullOptions
+from pyarrow._compute import PadOptions as PadOptions
+from pyarrow._compute import PairwiseOptions as PairwiseOptions
+from pyarrow._compute import PartitionNthOptions as PartitionNthOptions
+from pyarrow._compute import PivotWiderOptions as PivotWiderOptions
+from pyarrow._compute import QuantileOptions as QuantileOptions
+from pyarrow._compute import RandomOptions as RandomOptions
+from pyarrow._compute import RankOptions as RankOptions
+from pyarrow._compute import RankQuantileOptions as RankQuantileOptions
+from pyarrow._compute import ReplaceSliceOptions as ReplaceSliceOptions
+from pyarrow._compute import ReplaceSubstringOptions as ReplaceSubstringOptions
+from pyarrow._compute import RoundBinaryOptions as RoundBinaryOptions
+from pyarrow._compute import RoundOptions as RoundOptions
+from pyarrow._compute import RoundTemporalOptions as RoundTemporalOptions
+from pyarrow._compute import RoundToMultipleOptions as RoundToMultipleOptions
+from pyarrow._compute import RunEndEncodeOptions as RunEndEncodeOptions
+from pyarrow._compute import ScalarAggregateFunction as ScalarAggregateFunction
+from pyarrow._compute import ScalarAggregateKernel as ScalarAggregateKernel
+from pyarrow._compute import ScalarAggregateOptions as ScalarAggregateOptions
+from pyarrow._compute import ScalarFunction as ScalarFunction
+from pyarrow._compute import ScalarKernel as ScalarKernel
+from pyarrow._compute import SelectKOptions as SelectKOptions
+from pyarrow._compute import SetLookupOptions as SetLookupOptions
+from pyarrow._compute import SkewOptions as SkewOptions
+from pyarrow._compute import SliceOptions as SliceOptions
+from pyarrow._compute import SortOptions as SortOptions
+from pyarrow._compute import SplitOptions as SplitOptions
+from pyarrow._compute import SplitPatternOptions as SplitPatternOptions
+from pyarrow._compute import StrftimeOptions as StrftimeOptions
+from pyarrow._compute import StrptimeOptions as StrptimeOptions
+from pyarrow._compute import StructFieldOptions as StructFieldOptions
+from pyarrow._compute import TakeOptions as TakeOptions
+from pyarrow._compute import TDigestOptions as TDigestOptions
+from pyarrow._compute import TrimOptions as TrimOptions
+from pyarrow._compute import UdfContext as UdfContext
+from pyarrow._compute import Utf8NormalizeOptions as Utf8NormalizeOptions
+from pyarrow._compute import VarianceOptions as VarianceOptions
+from pyarrow._compute import VectorFunction as VectorFunction
+from pyarrow._compute import VectorKernel as VectorKernel
+from pyarrow._compute import WeekOptions as WeekOptions
+from pyarrow._compute import WinsorizeOptions as WinsorizeOptions
+
+# Functions
+from pyarrow._compute import call_function as call_function
+
+# Udf
+from pyarrow._compute import call_tabular_function as call_tabular_function
+from pyarrow._compute import function_registry as function_registry
+from pyarrow._compute import get_function as get_function
+from pyarrow._compute import list_functions as list_functions
+from pyarrow._compute import register_aggregate_function as register_aggregate_function
+from pyarrow._compute import register_scalar_function as register_scalar_function
+from pyarrow._compute import register_tabular_function as register_tabular_function
+from pyarrow._compute import register_vector_function as register_vector_function
+
+from pyarrow._compute import _Order, _Placement
+from pyarrow._stubs_typing import ArrayLike, ScalarLike
+from . import lib
+
+_P = ParamSpec("_P")
+_R = TypeVar("_R")
+
+def field(*name_or_index: str | tuple[str, ...] | int) -> Expression: ...
+
+
+def scalar(value: bool | float | str) -> Expression: ...
+
+
+def _clone_signature(f: Callable[_P, _R]) -> Callable[_P, _R]: ...
+
+# ============= compute functions =============
+_DataTypeT = TypeVar("_DataTypeT", bound=lib.DataType)
+_Scalar_CoT = TypeVar("_Scalar_CoT", bound=lib.Scalar, covariant=True)
+_ScalarT = TypeVar("_ScalarT", bound=lib.Scalar)
+_ArrayT = TypeVar("_ArrayT", bound=lib.Array | lib.ChunkedArray)
+_ScalarOrArrayT = TypeVar("_ScalarOrArrayT", bound=lib.Array | lib.Scalar | lib.ChunkedArray)
+ArrayOrChunkedArray: TypeAlias = lib.Array[_Scalar_CoT] | lib.ChunkedArray[_Scalar_CoT]
+ScalarOrArray: TypeAlias = ArrayOrChunkedArray[_Scalar_CoT] | _Scalar_CoT
+
+SignedIntegerScalar: TypeAlias = (
+    lib.Scalar[lib.Int8Type]
+    | lib.Scalar[lib.Int16Type]
+    | lib.Scalar[lib.Int32Type]
+    | lib.Scalar[lib.Int64Type]
+)
+UnsignedIntegerScalar: TypeAlias = (
+    lib.Scalar[lib.UInt8Type]
+    | lib.Scalar[lib.UInt16Type]
+    | lib.Scalar[lib.Uint32Type]
+    | lib.Scalar[lib.UInt64Type]
+)
+IntegerScalar: TypeAlias = SignedIntegerScalar | UnsignedIntegerScalar
+FloatScalar: TypeAlias = (
+    lib.Scalar[lib.Float16Type] | lib.Scalar[lib.Float32Type] | lib.Scalar[lib.Float64Type]
+)
+DecimalScalar: TypeAlias = (
+    lib.Scalar[lib.Decimal32Type]
+    | lib.Scalar[lib.Decimal64Type]
+    | lib.Scalar[lib.Decimal128Type]
+    | lib.Scalar[lib.Decimal256Type]
+)
+NonFloatNumericScalar: TypeAlias = IntegerScalar | DecimalScalar
+NumericScalar: TypeAlias = IntegerScalar | FloatScalar | DecimalScalar
+BinaryScalar: TypeAlias = (
+    lib.Scalar[lib.BinaryType]
+    | lib.Scalar[lib.LargeBinaryType]
+    | lib.Scalar[lib.FixedSizeBinaryType]
+)
+StringScalar: TypeAlias = lib.Scalar[lib.StringType] | lib.Scalar[lib.LargeStringType]
+StringOrBinaryScalar: TypeAlias = StringScalar | BinaryScalar
+_ListScalar: TypeAlias = lib.ListViewScalar[_DataTypeT] | lib.FixedSizeListScalar[_DataTypeT, Any]
+_LargeListScalar: TypeAlias = lib.LargeListScalar[_DataTypeT] | lib.LargeListViewScalar[_DataTypeT]
+ListScalar: TypeAlias = (
+    lib.ListScalar[_DataTypeT] | _ListScalar[_DataTypeT] | _LargeListScalar[_DataTypeT]
+)
+TemporalScalar: TypeAlias = (
+    lib.Date32Scalar
+    | lib.Date64Scalar
+    | lib.Time32Scalar[Any]
+    | lib.Time64Scalar[Any]
+    | lib.TimestampScalar[Any]
+    | lib.DurationScalar[Any]
+    | lib.MonthDayNanoIntervalScalar
+)
+NumericOrDurationScalar: TypeAlias = NumericScalar | lib.DurationScalar
+NumericOrTemporalScalar: TypeAlias = NumericScalar | TemporalScalar
+
+_NumericOrTemporalScalarT = TypeVar("_NumericOrTemporalScalarT", bound=NumericOrTemporalScalar)
+_NumericScalarT = TypeVar("_NumericScalarT", bound=NumericScalar)
+NumericArray: TypeAlias = ArrayOrChunkedArray[_NumericScalarT]
+_NumericArrayT = TypeVar("_NumericArrayT", bound=NumericArray)
+_NumericOrDurationT = TypeVar("_NumericOrDurationT", bound=NumericOrDurationScalar)
+NumericOrDurationArray: TypeAlias = ArrayOrChunkedArray[NumericOrDurationScalar]
+_NumericOrDurationArrayT = TypeVar("_NumericOrDurationArrayT", bound=NumericOrDurationArray)
+NumericOrTemporalArray: TypeAlias = ArrayOrChunkedArray[_NumericOrTemporalScalarT]
+_NumericOrTemporalArrayT = TypeVar("_NumericOrTemporalArrayT", bound=NumericOrTemporalArray)
+BooleanArray: TypeAlias = ArrayOrChunkedArray[lib.BooleanScalar]
+_BooleanArrayT = TypeVar("_BooleanArrayT", bound=BooleanArray)
+IntegerArray: TypeAlias = ArrayOrChunkedArray[IntegerScalar]
+_FloatScalarT = TypeVar("_FloatScalarT", bound=FloatScalar)
+FloatArray: TypeAlias = ArrayOrChunkedArray[FloatScalar]
+_FloatArrayT = TypeVar("_FloatArrayT", bound=FloatArray)
+_StringScalarT = TypeVar("_StringScalarT", bound=StringScalar)
+StringArray: TypeAlias = ArrayOrChunkedArray[StringScalar]
+_StringArrayT = TypeVar("_StringArrayT", bound=StringArray)
+_BinaryScalarT = TypeVar("_BinaryScalarT", bound=BinaryScalar)
+BinaryArray: TypeAlias = ArrayOrChunkedArray[BinaryScalar]
+_BinaryArrayT = TypeVar("_BinaryArrayT", bound=BinaryArray)
+_StringOrBinaryScalarT = TypeVar("_StringOrBinaryScalarT", bound=StringOrBinaryScalar)
+StringOrBinaryArray: TypeAlias = StringArray | BinaryArray
+_StringOrBinaryArrayT = TypeVar("_StringOrBinaryArrayT", bound=StringOrBinaryArray)
+_TemporalScalarT = TypeVar("_TemporalScalarT", bound=TemporalScalar)
+TemporalArray: TypeAlias = ArrayOrChunkedArray[TemporalScalar]
+_TemporalArrayT = TypeVar("_TemporalArrayT", bound=TemporalArray)
+_ListArray: TypeAlias = ArrayOrChunkedArray[_ListScalar[_DataTypeT]]
+_LargeListArray: TypeAlias = ArrayOrChunkedArray[_LargeListScalar[_DataTypeT]]
+ListArray: TypeAlias = ArrayOrChunkedArray[ListScalar[_DataTypeT]]
+# =============================== 1. Aggregation ===============================
+
+# ========================= 1.1 functions =========================
+
+def all(
+    array: lib.BooleanScalar | BooleanArray,
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar: ...
+
+
+any = _clone_signature(all)
+
+def approximate_median(
+    array: NumericScalar | NumericArray,
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleScalar: ...
+
+
+def count(
+    array: lib.Array | lib.ChunkedArray,
+    /,
+    mode: Literal["only_valid", "only_null", "all"] = "only_valid",
+    *,
+    options: CountOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar: ...
+
+
+def count_distinct(
+    array: lib.Array | lib.ChunkedArray,
+    /,
+    mode: Literal["only_valid", "only_null", "all"] = "only_valid",
+    *,
+    options: CountOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar: ...
+
+
+def first(
+    array: lib.Array[_ScalarT] | lib.ChunkedArray[_ScalarT],
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ScalarT: ...
+
+
+def first_last(
+    array: lib.Array[Any] | lib.ChunkedArray[Any],
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructScalar: ...
+
+
+def index(
+    data: lib.Array[Any] | lib.ChunkedArray[Any],
+    value,
+    start: int | None = None,
+    end: int | None = None,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar: ...
+
+
+last = _clone_signature(first)
+max = _clone_signature(first)
+min = _clone_signature(first)
+min_max = _clone_signature(first_last)
+
+def mean(
+    array: FloatScalar | FloatArray
+    | lib.NumericArray[lib.Scalar[Any]]
+    | lib.ChunkedArray[lib.Scalar[Any]]
+    | lib.Scalar[Any],
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Scalar[Any]: ...
+
+
+def mode(
+    array: NumericScalar | NumericArray,
+    /,
+    n: int = 1,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: ModeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructArray: ...
+
+
+def product(
+    array: _ScalarT | lib.NumericArray[_ScalarT],
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ScalarT: ...
+
+
+def quantile(
+    array: NumericScalar | NumericArray,
+    /,
+    q: float = 0.5,
+    *,
+    interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"] = "linear",
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: QuantileOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleArray: ...
+
+
+def stddev(
+    array: NumericScalar | NumericArray,
+    /,
+    *,
+    ddof: float = 0,
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: VarianceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleScalar: ...
+
+
+def sum(
+    array: _NumericScalarT | NumericArray[_NumericScalarT],
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT: ...
+
+
+def tdigest(
+    array: NumericScalar | NumericArray,
+    /,
+    q: float = 0.5,
+    *,
+    delta: int = 100,
+    buffer_size: int = 500,
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: TDigestOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleArray: ...
+
+
+def variance(
+    array: NumericScalar | NumericArray,
+    /,
+    *,
+    ddof: int = 0,
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: VarianceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleScalar: ...
+
+
+def top_k_unstable(
+    values: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table,
+    k: int,
+    sort_keys: list | None = None,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Array: ...
+
+
+def bottom_k_unstable(
+    values: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table,
+    k: int,
+    sort_keys: list | None = None,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Array: ...
+
+
+# ========================= 2. Element-wise (“scalar”) functions =========================
+
+# ========================= 2.1 Arithmetic =========================
+def abs(
+    x: _NumericOrDurationT | _NumericOrDurationArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericOrDurationT | _NumericOrDurationArrayT | Expression: ...
+
+
+abs_checked = _clone_signature(abs)
+
+def add(
+    x: _NumericOrTemporalScalarT | NumericOrTemporalScalar | _NumericOrTemporalArrayT | Expression,
+    y: _NumericOrTemporalScalarT | NumericOrTemporalScalar | _NumericOrTemporalArrayT | Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalScalarT | _NumericOrTemporalArrayT | Expression: ...
+
+
+add_checked = _clone_signature(add)
+
+def divide(
+    x: _NumericOrTemporalScalarT | NumericOrTemporalScalar | _NumericOrTemporalArrayT | Expression,
+    y: _NumericOrTemporalScalarT | NumericOrTemporalScalar | _NumericOrTemporalArrayT | Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalScalarT | _NumericOrTemporalArrayT | Expression: ...
+
+
+divide_checked = _clone_signature(divide)
+
+def exp(
+    exponent: _FloatArrayT | ArrayOrChunkedArray[NonFloatNumericScalar] | _FloatScalarT | NonFloatNumericScalar | lib.DoubleScalar,
+        /, *, memory_pool: lib.MemoryPool | None = None
+) -> _FloatArrayT | lib.DoubleArray | _FloatScalarT | lib.DoubleScalar | Expression: ...
+
+
+multiply = _clone_signature(add)
+multiply_checked = _clone_signature(add)
+
+def negate(
+    x: _NumericOrDurationT | _NumericOrDurationArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericOrDurationT | _NumericOrDurationArrayT | Expression: ...
+
+
+negate_checked = _clone_signature(negate)
+
+def power(
+    base: _NumericScalarT | _NumericArrayT | Expression | _NumericArrayT | NumericScalar,
+    exponent: _NumericScalarT | _NumericArrayT | Expression | _NumericArrayT | NumericScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT | _NumericArrayT | Expression: ...
+
+
+power_checked = _clone_signature(power)
+
+def sign(
+    x: NumericOrDurationArray | NumericOrDurationScalar | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> (
+    lib.NumericArray[lib.Int8Scalar]
+    | lib.NumericArray[lib.FloatScalar]
+    | lib.NumericArray[lib.DoubleScalar]
+    | lib.Int8Scalar | lib.FloatScalar | lib.DoubleScalar | Expression
+): ...
+
+
+def sqrt(x: NumericArray | NumericScalar | Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> FloatArray | FloatScalar | Expression: ...
+
+
+sqrt_checked = _clone_signature(sqrt)
+
+subtract = _clone_signature(add)
+subtract_checked = _clone_signature(add)
+
+# ========================= 2.1 Bit-wise functions =========================
+def bit_wise_and(
+    x: _NumericScalarT | _NumericArrayT | NumericScalar | Expression | ArrayOrChunkedArray[NumericScalar],
+    y: _NumericScalarT | _NumericArrayT | NumericScalar | Expression | ArrayOrChunkedArray[NumericScalar],
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericScalarT | _NumericArrayT | Expression: ...
+
+
+def bit_wise_not(
+    x: _NumericScalarT | _NumericArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericScalarT | _NumericArrayT | Expression: ...
+
+
+bit_wise_or = _clone_signature(bit_wise_and)
+bit_wise_xor = _clone_signature(bit_wise_and)
+shift_left = _clone_signature(bit_wise_and)
+shift_left_checked = _clone_signature(bit_wise_and)
+shift_right = _clone_signature(bit_wise_and)
+shift_right_checked = _clone_signature(bit_wise_and)
+
+# ========================= 2.2 Rounding functions =========================
+def ceil(x: _FloatScalarT | _FloatArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> _FloatScalarT | _FloatArrayT | Expression: ...
+
+
+floor = _clone_signature(ceil)
+
+def round(
+    x: _NumericScalarT | _NumericArrayT | Expression,
+    /,
+    ndigits: int = 0,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT | _NumericArrayT | Expression: ...
+
+
+def round_to_multiple(
+    x: _NumericScalarT | _NumericArrayT | Expression,
+    /,
+    multiple: int = 0,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundToMultipleOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT | _NumericArrayT | Expression: ...
+
+
+def round_binary(
+    x: _NumericScalarT | _NumericArrayT | Expression,
+    s: int | lib.Int8Scalar | lib.Int16Scalar | lib.Int32Scalar | lib.Int64Scalar | Iterable,
+    /,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundBinaryOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT | lib.NumericArray[_NumericScalarT] | _NumericArrayT | Expression: ...
+
+
+trunc = _clone_signature(ceil)
+
+# ========================= 2.3 Logarithmic functions =========================
+def ln(
+    x: FloatScalar | FloatArray | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.FloatScalar | lib.DoubleScalar | lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar] | Expression: ...
+
+
+ln_checked = _clone_signature(ln)
+log10 = _clone_signature(ln)
+log10_checked = _clone_signature(ln)
+log1p = _clone_signature(ln)
+log1p_checked = _clone_signature(ln)
+log2 = _clone_signature(ln)
+log2_checked = _clone_signature(ln)
+
+def logb(
+    x: FloatScalar | FloatArray | Expression | Any, b: FloatScalar | FloatArray | Expression | Any,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.FloatScalar | lib.DoubleScalar | lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar] | Expression | Any: ...
+
+
+logb_checked = _clone_signature(logb)
+
+# ========================= 2.4 Trigonometric functions =========================
+acos = _clone_signature(ln)
+acos_checked = _clone_signature(ln)
+asin = _clone_signature(ln)
+asin_checked = _clone_signature(ln)
+atan = _clone_signature(ln)
+cos = _clone_signature(ln)
+cos_checked = _clone_signature(ln)
+sin = _clone_signature(ln)
+sin_checked = _clone_signature(ln)
+tan = _clone_signature(ln)
+tan_checked = _clone_signature(ln)
+
+def atan2(
+    y: FloatScalar | FloatArray | Expression | Any, x: FloatScalar | FloatArray | Expression | Any,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.FloatScalar | lib.DoubleScalar | lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar] | Expression: ...
+
+
+# ========================= 2.5 Comparisons functions =========================
+def equal(
+    x: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
+    y: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
+
+
+greater = _clone_signature(equal)
+greater_equal = _clone_signature(equal)
+less = _clone_signature(equal)
+less_equal = _clone_signature(equal)
+not_equal = _clone_signature(equal)
+
+def max_element_wise(
+    *args: ScalarOrArray[_Scalar_CoT] | Expression,
+    skip_nulls: bool = True,
+    options: ElementWiseAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _Scalar_CoT | Expression: ...
+
+
+min_element_wise = _clone_signature(max_element_wise)
+
+# ========================= 2.6 Logical functions =========================
+def and_(
+    x: lib.BooleanScalar | BooleanArray | Expression | ScalarOrArray[lib.BooleanScalar],
+    y: lib.BooleanScalar | BooleanArray | Expression | ScalarOrArray[lib.BooleanScalar],
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar | lib.BooleanArray | Expression | ScalarOrArray[lib.BooleanScalar]: ...
+
+
+and_kleene = _clone_signature(and_)
+and_not = _clone_signature(and_)
+and_not_kleene = _clone_signature(and_)
+or_ = _clone_signature(and_)
+or_kleene = _clone_signature(and_)
+xor = _clone_signature(and_)
+
+def invert(
+    x: lib.BooleanScalar | _BooleanArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar | _BooleanArrayT | Expression: ...
+
+
+# ========================= 2.10 String predicates =========================
+def ascii_is_alnum(
+    strings: StringScalar | StringArray | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
+
+
+ascii_is_alpha = _clone_signature(ascii_is_alnum)
+ascii_is_decimal = _clone_signature(ascii_is_alnum)
+ascii_is_lower = _clone_signature(ascii_is_alnum)
+ascii_is_printable = _clone_signature(ascii_is_alnum)
+ascii_is_space = _clone_signature(ascii_is_alnum)
+ascii_is_upper = _clone_signature(ascii_is_alnum)
+utf8_is_alnum = _clone_signature(ascii_is_alnum)
+utf8_is_alpha = _clone_signature(ascii_is_alnum)
+utf8_is_decimal = _clone_signature(ascii_is_alnum)
+utf8_is_digit = _clone_signature(ascii_is_alnum)
+utf8_is_lower = _clone_signature(ascii_is_alnum)
+utf8_is_numeric = _clone_signature(ascii_is_alnum)
+utf8_is_printable = _clone_signature(ascii_is_alnum)
+utf8_is_space = _clone_signature(ascii_is_alnum)
+utf8_is_upper = _clone_signature(ascii_is_alnum)
+ascii_is_title = _clone_signature(ascii_is_alnum)
+utf8_is_title = _clone_signature(ascii_is_alnum)
+string_is_ascii = _clone_signature(ascii_is_alnum)
+
+# ========================= 2.11 String transforms =========================
+def ascii_capitalize(
+    strings: _StringScalarT | _StringArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
+
+ascii_lower = _clone_signature(ascii_capitalize)
+ascii_reverse = _clone_signature(ascii_capitalize)
+ascii_swapcase = _clone_signature(ascii_capitalize)
+ascii_title = _clone_signature(ascii_capitalize)
+ascii_upper = _clone_signature(ascii_capitalize)
+
+def binary_length(
+    strings: lib.BinaryScalar | lib.StringScalar | lib.LargeBinaryScalar | lib.LargeStringScalar
+    | lib.BinaryArray | lib.StringArray
+    | lib.ChunkedArray[lib.BinaryScalar] | lib.ChunkedArray[lib.StringScalar]
+    | lib.LargeBinaryArray | lib.LargeStringArray
+    | lib.ChunkedArray[lib.LargeBinaryScalar] | lib.ChunkedArray[lib.LargeStringScalar]
+    | Expression,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int32Scalar | lib.Int64Scalar | lib.Int32Array | lib.Int64Array | Expression: ...
+
+
+def binary_repeat(
+    strings: _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression,
+    num_repeats: int | list[int] | list[int | None],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringOrBinaryScalarT | lib.Array[_StringOrBinaryScalarT] | _StringOrBinaryArrayT | Expression: ...
+
+
+def binary_replace_slice(
+    strings: _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression,
+    /,
+    start: int,
+    stop: int,
+    replacement: str | bytes,
+    *,
+    options: ReplaceSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression: ...
+
+
+def binary_reverse(
+    strings: _BinaryScalarT | _BinaryArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _BinaryScalarT | _BinaryArrayT | Expression: ...
+
+
+def replace_substring(
+    strings: _StringScalarT | _StringArrayT | Expression,
+    /,
+    pattern: str | bytes,
+    replacement: str | bytes,
+    *,
+    max_replacements: int | None = None,
+    options: ReplaceSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
+
+replace_substring_regex = _clone_signature(replace_substring)
+
+def utf8_capitalize(
+    strings: _StringScalarT | _StringArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
+
+def utf8_length(
+    strings: lib.StringScalar | lib.LargeStringScalar | lib.StringArray | lib.ChunkedArray[lib.StringScalar]
+    | lib.LargeStringArray | lib.ChunkedArray[lib.LargeStringScalar] | Expression,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int32Scalar | lib.Int64Scalar | lib.Int32Array | lib.Int64Array | Expression: ...
+
+
+utf8_lower = _clone_signature(utf8_capitalize)
+
+def utf8_replace_slice(
+    strings: _StringScalarT | _StringArrayT | Expression,
+    /,
+    start: int,
+    stop: int,
+    replacement: str | bytes,
+    *,
+    options: ReplaceSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
+
+utf8_reverse = _clone_signature(utf8_capitalize)
+utf8_swapcase = _clone_signature(utf8_capitalize)
+utf8_title = _clone_signature(utf8_capitalize)
+utf8_upper = _clone_signature(utf8_capitalize)
+
+# ========================= 2.12 String padding =========================
+def ascii_center(
+    strings: _StringScalarT | _StringArrayT | Expression,
+    /,
+    width: int,
+    padding: str = " ",
+    lean_left_on_odd_padding: bool = True,
+    *,
+    options: PadOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
+
+ascii_lpad = _clone_signature(ascii_center)
+ascii_rpad = _clone_signature(ascii_center)
+utf8_center = _clone_signature(ascii_center)
+utf8_lpad = _clone_signature(ascii_center)
+utf8_rpad = _clone_signature(ascii_center)
+
+# ========================= 2.13 String trimming =========================
+def ascii_ltrim(
+    strings: _StringScalarT | _StringArrayT | Expression,
+    /,
+    characters: str,
+    *,
+    options: TrimOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
+
+ascii_rtrim = _clone_signature(ascii_ltrim)
+ascii_trim = _clone_signature(ascii_ltrim)
+utf8_ltrim = _clone_signature(ascii_ltrim)
+utf8_rtrim = _clone_signature(ascii_ltrim)
+utf8_trim = _clone_signature(ascii_ltrim)
+
+def ascii_ltrim_whitespace(
+    strings: _StringScalarT | _StringArrayT | Expression,
+    /,
+    *,
+    options: TrimOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
+
+ascii_rtrim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+ascii_trim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+utf8_ltrim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+utf8_rtrim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+utf8_trim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+
+# ========================= 2.14 String splitting =========================
+def ascii_split_whitespace(
+    strings: _StringScalarT | lib.Array[lib.Scalar[_DataTypeT]] | Expression,
+    /,
+    *,
+    max_splits: int | None = None,
+    reverse: bool = False,
+    options: SplitOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ListArray[_StringScalarT] | lib.ListArray[lib.ListScalar[_DataTypeT]] | Expression: ...
+
+
+def split_pattern(
+    strings: _StringOrBinaryScalarT | lib.Array[lib.Scalar[_DataTypeT]] | Expression,
+    /,
+    pattern: str,
+    *,
+    max_splits: int | None = None,
+    reverse: bool = False,
+    options: SplitOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ListArray[_StringOrBinaryScalarT] | lib.ListArray[lib.ListScalar[_DataTypeT]] | Expression: ...
+
+
+split_pattern_regex = _clone_signature(split_pattern)
+utf8_split_whitespace = _clone_signature(ascii_split_whitespace)
+
+# ========================= 2.15 String component extraction =========================
+def extract_regex(
+    strings: StringOrBinaryScalar | StringOrBinaryArray | Expression,
+    /,
+    pattern: str,
+    *,
+    options: ExtractRegexOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructScalar | lib.StructArray | Expression: ...
+
+
+# ========================= 2.16 String join =========================
+def binary_join(
+    strings, separator, /, *, memory_pool: lib.MemoryPool | None = None
+) -> StringScalar | StringArray: ...
+
+
+def binary_join_element_wise(
+    *strings: _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression,
+    null_handling: Literal["emit_null", "skip", "replace"] = "emit_null",
+    null_replacement: str = "",
+    options: JoinOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression: ...
+
+
+# ========================= 2.17 String Slicing =========================
+def binary_slice(
+    strings: _BinaryScalarT | _BinaryArrayT | Expression,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    *,
+    options: SliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _BinaryScalarT | _BinaryArrayT | Expression: ...
+
+
+def utf8_slice_codeunits(
+    strings: _StringScalarT | _StringArrayT | Expression,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    *,
+    options: SliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
+
+# ========================= 2.18 Containment tests =========================
+def count_substring(
+    strings: lib.StringScalar | lib.BinaryScalar | lib.LargeStringScalar | lib.LargeBinaryScalar
+    | lib.StringArray | lib.BinaryArray
+    | lib.ChunkedArray[lib.StringScalar] | lib.ChunkedArray[lib.BinaryScalar]
+    | lib.LargeStringArray | lib.LargeBinaryArray
+    | lib.ChunkedArray[lib.LargeStringScalar] | lib.ChunkedArray[lib.LargeBinaryScalar]
+    | Expression,
+    /,
+    pattern: str,
+    *,
+    ignore_case: bool = False,
+    options: MatchSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Scalar | lib.Int64Scalar | lib.Int32Array | lib.Int64Array | Expression: ...
+
+
+count_substring_regex = _clone_signature(count_substring)
+
+def ends_with(
+    strings: StringScalar | BinaryScalar | StringArray | BinaryArray | Expression,
+    /,
+    pattern: str,
+    *,
+    ignore_case: bool = False,
+    options: MatchSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
+
+
+find_substring = _clone_signature(count_substring)
+find_substring_regex = _clone_signature(count_substring)
+
+def index_in(
+    values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
+    /,
+    value_set: lib.Array | lib.ChunkedArray | Expression,
+    *,
+    skip_nulls: bool = False,
+    options: SetLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Scalar | lib.Int32Array | Expression: ...
+
+
+def is_in(
+    values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
+    /,
+    value_set: lib.Array | lib.ChunkedArray | Expression,
+    *,
+    skip_nulls: bool = False,
+    options: SetLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar | lib.BooleanArray: ...
+
+
+match_like = _clone_signature(ends_with)
+match_substring = _clone_signature(ends_with)
+match_substring_regex = _clone_signature(ends_with)
+starts_with = _clone_signature(ends_with)
+
+# ========================= 2.19 Categorizations =========================
+def is_finite(
+    values: NumericScalar | lib.NullScalar | NumericArray | lib.NullArray | Expression,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
+
+
+is_inf = _clone_signature(is_finite)
+is_nan = _clone_signature(is_finite)
+
+def is_null(
+    values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
+    /,
+    *,
+    nan_is_null: bool = False,
+    options: NullOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
+
+
+def is_valid(
+    values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
+
+
+true_unless_null = _clone_signature(is_valid)
+
+# ========================= 2.20 Selecting / multiplexing =========================
+def case_when(cond, /, *cases, memory_pool: lib.MemoryPool | None = None): ...
+
+
+def choose(indices, /, *values, memory_pool: lib.MemoryPool | None = None): ...
+
+
+def coalesce(
+    *values: _ScalarOrArrayT, memory_pool: lib.MemoryPool | None = None
+) -> _ScalarOrArrayT: ...
+
+
+fill_null = coalesce
+
+def if_else(
+    cond: ArrayLike | ScalarLike,
+    left: ArrayLike | ScalarLike,
+    right: ArrayLike | ScalarLike,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> ArrayLike | ScalarLike: ...
+
+
+# ========================= 2.21 Structural transforms =========================
+
+def list_value_length(
+    lists: _ListArray[Any] | _LargeListArray[Any] | ListArray[Any] | Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Array | lib.Int64Array | Expression: ...
+
+
+def make_struct(
+    *args: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
+    field_names: list[str] | tuple[str, ...] = (),
+    field_nullability: bool | None = None,
+    field_metadata: list[lib.KeyValueMetadata] | None = None,
+    options: MakeStructOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructScalar | lib.StructArray | Expression: ...
+
+
+# ========================= 2.22 Conversions =========================
+def ceil_temporal(
+    timestamps: _TemporalScalarT | _TemporalArrayT | Expression,
+    /,
+    multiple: int = 1,
+    unit: Literal[
+        "year",
+        "quarter",
+        "month",
+        "week",
+        "day",
+        "hour",
+        "minute",
+        "second",
+        "millisecond",
+        "microsecond",
+        "nanosecond",
+    ] = "day",
+    *,
+    week_starts_monday: bool = True,
+    ceil_is_strictly_greater: bool = False,
+    calendar_based_origin: bool = False,
+    options: RoundTemporalOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _TemporalScalarT | _TemporalArrayT | Expression: ...
+
+
+floor_temporal = _clone_signature(ceil_temporal)
+round_temporal = _clone_signature(ceil_temporal)
+
+def cast(
+    arr: lib.Scalar | lib.Array | lib.ChunkedArray,
+    target_type: _DataTypeT,
+    safe: bool | None = None,
+    options: CastOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Scalar[_DataTypeT] | lib.Array[lib.Scalar[_DataTypeT]] | lib.ChunkedArray[lib.Scalar[_DataTypeT]]: ...
+
+
+def strftime(
+    timestamps: TemporalScalar | TemporalArray | Expression,
+    /,
+    format: str = "%Y-%m-%dT%H:%M:%S",
+    locale: str = "C",
+    *,
+    options: StrftimeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StringScalar | lib.StringArray | Expression: ...
+
+
+def strptime(
+    strings: StringScalar | StringArray | Expression,
+    /,
+    format: str,
+    unit: Literal["s", "ms", "us", "ns"],
+    error_is_null: bool = False,
+    *,
+    options: StrptimeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.TimestampScalar | lib.TimestampArray | Expression: ...
+
+
+# ========================= 2.23 Temporal component extraction =========================
+def day(
+    values: TemporalScalar | TemporalArray | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int64Scalar | lib.Int64Array | Expression: ...
+
+
+def day_of_week(
+    values: TemporalScalar | TemporalArray | Expression,
+    /,
+    *,
+    count_from_zero: bool = True,
+    week_start: int = 1,
+    options: DayOfWeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar | lib.Int64Array | Expression: ...
+
+
+day_of_year = _clone_signature(day)
+
+def hour(
+    values: lib.TimestampScalar[Any] | lib.Time32Scalar[Any] | lib.Time64Scalar[Any]
+    | lib.TimestampArray[Any] | lib.Time32Array[Any] | lib.Time64Array[Any]
+    | lib.ChunkedArray[lib.TimestampScalar[Any]]
+    | lib.ChunkedArray[lib.Time32Scalar[Any]]
+    | lib.ChunkedArray[lib.Time64Scalar[Any]] | Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar | lib.Int64Array | Expression: ...
+
+
+def is_dst(
+    values: lib.TimestampScalar | lib.TimestampArray[Any] | lib.ChunkedArray[lib.TimestampScalar] | Expression,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
+
+
+def iso_week(
+    values: lib.TimestampScalar | lib.TimestampArray[Any] | lib.ChunkedArray[lib.TimestampScalar[Any]] | Expression,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int64Scalar | lib.Int64Array | Expression: ...
+
+
+iso_year = _clone_signature(iso_week)
+
+def is_leap_year(
+    values: lib.TimestampScalar[Any] | lib.Date32Scalar | lib.Date64Scalar | lib.TimestampArray
+    | lib.Date32Array
+    | lib.Date64Array
+    | lib.ChunkedArray[lib.TimestampScalar]
+    | lib.ChunkedArray[lib.Date32Scalar]
+    | lib.ChunkedArray[lib.Date64Scalar] | Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
+
+
+microsecond = _clone_signature(iso_week)
+millisecond = _clone_signature(iso_week)
+minute = _clone_signature(iso_week)
+month = _clone_signature(day_of_week)
+nanosecond = _clone_signature(hour)
+quarter = _clone_signature(day_of_week)
+second = _clone_signature(hour)
+subsecond = _clone_signature(hour)
+us_week = _clone_signature(iso_week)
+us_year = _clone_signature(iso_week)
+year = _clone_signature(iso_week)
+
+def week(
+    values: lib.TimestampScalar | lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar] | Expression,
+    /,
+    *,
+    week_starts_monday: bool = True,
+    count_from_zero: bool = False,
+    first_week_is_fully_in_year: bool = False,
+    options: WeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar | lib.Int64Array | Expression: ...
+
+
+def year_month_day(
+    values: TemporalScalar | TemporalArray | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.StructScalar | lib.StructArray | Expression: ...
+
+
+# ========================= 2.24 Temporal difference =========================
+def day_time_interval_between(start, end, /, *, memory_pool: lib.MemoryPool | None = None): ...
+
+
+def days_between(
+    start, end, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int64Scalar | lib.Int64Array: ...
+
+
+hours_between = _clone_signature(days_between)
+microseconds_between = _clone_signature(days_between)
+milliseconds_between = _clone_signature(days_between)
+minutes_between = _clone_signature(days_between)
+
+def month_day_nano_interval_between(
+    start, end, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.MonthDayNanoIntervalScalar | lib.MonthDayNanoIntervalArray: ...
+
+
+def month_interval_between(start, end, /, *, memory_pool: lib.MemoryPool | None = None): ...
+
+
+nanoseconds_between = _clone_signature(days_between)
+quarters_between = _clone_signature(days_between)
+seconds_between = _clone_signature(days_between)
+
+def weeks_between(
+    start,
+    end,
+    /,
+    *,
+    count_from_zero: bool = True,
+    week_start: int = 1,
+    options: DayOfWeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar | lib.Int64Array: ...
+
+
+years_between = _clone_signature(days_between)
+
+# ========================= 2.25 Timezone handling =========================
+def assume_timezone(
+    timestamps: lib.TimestampScalar | lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar] | Expression,
+    /,
+    timezone: str,
+    *,
+    ambiguous: Literal["raise", "earliest", "latest"] = "raise",
+    nonexistent: Literal["raise", "earliest", "latest"] = "raise",
+    options: AssumeTimezoneOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.TimestampScalar | lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar] | Expression: ...
+
+
+def local_timestamp(
+    timestamps: lib.TimestampScalar | lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar] | Expression,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.TimestampScalar | lib.TimestampArray | Expression: ...
+
+
+# ========================= 2.26 Random number generation =========================
+def random(
+    n: int,
+    *,
+    initializer: Literal["system"] | int = "system",
+    options: RandomOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleArray: ...
+
+
+# ========================= 3. Array-wise (“vector”) functions =========================
+
+# ========================= 3.1 Cumulative Functions =========================
+def cumulative_sum(
+    values: _NumericArrayT | Expression,
+    /,
+    start: lib.Scalar | None = None,
+    *,
+    skip_nulls: bool = False,
+    options: CumulativeSumOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericArrayT | Expression: ...
+
+
+cumulative_sum_checked = _clone_signature(cumulative_sum)
+cumulative_prod = _clone_signature(cumulative_sum)
+cumulative_prod_checked = _clone_signature(cumulative_sum)
+cumulative_max = _clone_signature(cumulative_sum)
+cumulative_min = _clone_signature(cumulative_sum)
+cumulative_mean = _clone_signature(cumulative_sum)
+# ========================= 3.2 Associative transforms =========================
+
+def dictionary_encode(
+    array: _ScalarOrArrayT | Expression,
+    /,
+    null_encoding: Literal["mask", "encode"] = "mask",
+    *,
+    options=None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ScalarOrArrayT | Expression: ...
+def unique(array: _ArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> _ArrayT | Expression: ...
+def value_counts(
+    array: lib.Array | lib.ChunkedArray | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.StructArray | Expression: ...
+
+# ========================= 3.3 Selections =========================
+@overload
+def array_filter(
+    array: _ArrayT,
+    selection_filter: list[bool] | list[bool | None] | BooleanArray,
+    /,
+    null_selection_behavior: Literal["drop", "emit_null"] = "drop",
+    *,
+    options: FilterOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ArrayT: ...
+@overload
+def array_filter(
+    array: Expression,
+    selection_filter: list[bool] | list[bool | None] | BooleanArray,
+    /,
+    null_selection_behavior: Literal["drop", "emit_null"] = "drop",
+    *,
+    options: FilterOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def array_take(
+    array: _ArrayT,
+    indices: list[int]
+    | list[int | None]
+    | lib.Int16Array
+    | lib.Int32Array
+    | lib.Int64Array
+    | lib.ChunkedArray[lib.Int16Scalar]
+    | lib.ChunkedArray[lib.Int32Scalar]
+    | lib.ChunkedArray[lib.Int64Scalar],
+    /,
+    *,
+    boundscheck: bool = True,
+    options: TakeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ArrayT: ...
+@overload
+def array_take(
+    array: Expression,
+    indices: list[int]
+    | list[int | None]
+    | lib.Int16Array
+    | lib.Int32Array
+    | lib.Int64Array
+    | lib.ChunkedArray[lib.Int16Scalar]
+    | lib.ChunkedArray[lib.Int32Scalar]
+    | lib.ChunkedArray[lib.Int64Scalar],
+    /,
+    *,
+    boundscheck: bool = True,
+    options: TakeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def drop_null(input: _ArrayT, /, *, memory_pool: lib.MemoryPool | None = None) -> _ArrayT: ...
+@overload
+def drop_null(
+    input: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+
+filter = array_filter
+take = array_take
+
+# ========================= 3.4 Containment tests  =========================
+def indices_nonzero(
+    values: lib.BooleanArray
+    | lib.NullArray
+    | NumericArray
+    | lib.Decimal128Array
+    | lib.Decimal256Array | Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array | Expression: ...
+
+
+# ========================= 3.5 Sorts and partitions  =========================
+def array_sort_indices(
+    array: lib.Array | lib.ChunkedArray | Expression,
+    /,
+    order: _Order = "ascending",
+    *,
+    null_placement: _Placement = "at_end",
+    options: ArraySortOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array | Expression: ...
+
+
+def partition_nth_indices(
+    array: lib.Array | lib.ChunkedArray | Expression,
+    /,
+    pivot: int,
+    *,
+    null_placement: _Placement = "at_end",
+    options: PartitionNthOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array | Expression: ...
+
+
+def rank(
+    input: lib.Array | lib.ChunkedArray,
+    /,
+    sort_keys: _Order = "ascending",
+    *,
+    null_placement: _Placement = "at_end",
+    tiebreaker: Literal["min", "max", "first", "dense"] = "first",
+    options: RankOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array: ...
+
+
+def select_k_unstable(
+    input: lib.Array | lib.ChunkedArray | Expression,
+    /,
+    k: int,
+    sort_keys: list[tuple[str, _Order]],
+    *,
+    options: SelectKOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array | Expression: ...
+
+
+def sort_indices(
+    input: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table | Expression,
+    /,
+    sort_keys: Sequence[tuple[str, _Order]] = (),
+    *,
+    null_placement: _Placement = "at_end",
+    options: SortOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array | Expression: ...
+
+
+# ========================= 3.6 Structural transforms =========================
+def list_element(
+    lists: lib.Array[ListScalar[_DataTypeT]] | lib.ChunkedArray[ListScalar[_DataTypeT]] | ListScalar[_DataTypeT] | Expression,
+    index: ScalarLike, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Array[lib.Scalar[_DataTypeT]] | lib.ChunkedArray[lib.Scalar[_DataTypeT]] | _DataTypeT | Expression: ...
+
+
+def list_flatten(
+    lists: ArrayOrChunkedArray[ListScalar[Any]] | Expression,
+    /,
+    recursive: bool = False,
+    *,
+    options: ListFlattenOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ListArray[Any] | Expression: ...
+
+
+def list_parent_indices(
+    lists: ArrayOrChunkedArray[Any] | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int64Array | Expression: ...
+
+
+def list_slice(
+    lists: ArrayOrChunkedArray[Any] | Expression,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    return_fixed_size_list: bool | None = None,
+    *,
+    options: ListSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ListArray[Any] | Expression: ...
+
+
+def map_lookup(
+    container,
+    /,
+    query_key,
+    occurrence: str,
+    *,
+    options: MapLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+): ...
+
+
+def struct_field(
+    values,
+    /,
+    indices,
+    *,
+    options: StructFieldOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+): ...
+
+
+def fill_null_backward(values, /, *, memory_pool: lib.MemoryPool | None = None): ...
+
+
+def fill_null_forward(values, /, *, memory_pool: lib.MemoryPool | None = None): ...
+
+
+def replace_with_mask(
+    values,
+    mask: list[bool] | list[bool | None] | BooleanArray,
+    replacements,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+): ...
+
+
+# ========================= 3.7 Pairwise functions =========================
+def pairwise_diff(
+    input: _NumericOrTemporalArrayT | Expression,
+    /,
+    period: int = 1,
+    *,
+    options: PairwiseOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalArrayT | Expression: ...
+
+
+pairwise_diff_checked = _clone_signature(pairwise_diff)
diff --git a/python/pyarrow-stubs/config.pyi b/python/pyarrow-stubs/config.pyi
new file mode 100644
index 00000000000..62555a506f3
--- /dev/null
+++ b/python/pyarrow-stubs/config.pyi
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import NamedTuple
+
+
+class VersionInfo(NamedTuple):
+    major: int
+    minor: int
+    patch: int
+
+
+class BuildInfo(NamedTuple):
+    version: str
+    version_info: VersionInfo
+    so_version: str
+    full_so_version: str
+    compiler_id: str
+    compiler_version: str
+    compiler_flags: str
+    git_id: str
+    git_description: str
+    package_kind: str
+    build_type: str
+
+
+class RuntimeInfo(NamedTuple):
+    simd_level: str
+    detected_simd_level: str
+
+
+cpp_build_info: BuildInfo
+cpp_version: str
+cpp_version_info: VersionInfo
+
+
+def runtime_info() -> RuntimeInfo: ...
+def set_timezone_db_path(path: str) -> None: ...
+
+
+__all__ = [
+    "VersionInfo",
+    "BuildInfo",
+    "RuntimeInfo",
+    "cpp_build_info",
+    "cpp_version",
+    "cpp_version_info",
+    "runtime_info",
+    "set_timezone_db_path",
+]
diff --git a/python/pyarrow-stubs/csv.pyi b/python/pyarrow-stubs/csv.pyi
new file mode 100644
index 00000000000..a7abd413aab
--- /dev/null
+++ b/python/pyarrow-stubs/csv.pyi
@@ -0,0 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow._csv import (
+    ISO8601,
+    ConvertOptions,
+    CSVStreamingReader,
+    CSVWriter,
+    InvalidRow,
+    ParseOptions,
+    ReadOptions,
+    WriteOptions,
+    open_csv,
+    read_csv,
+    write_csv,
+)
+
+__all__ = [
+    "ISO8601",
+    "ConvertOptions",
+    "CSVStreamingReader",
+    "CSVWriter",
+    "InvalidRow",
+    "ParseOptions",
+    "ReadOptions",
+    "WriteOptions",
+    "open_csv",
+    "read_csv",
+    "write_csv",
+]
diff --git a/python/pyarrow-stubs/cuda.pyi b/python/pyarrow-stubs/cuda.pyi
new file mode 100644
index 00000000000..0394965bb73
--- /dev/null
+++ b/python/pyarrow-stubs/cuda.pyi
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow._cuda import (
+    BufferReader,
+    BufferWriter,
+    Context,
+    CudaBuffer,
+    HostBuffer,
+    IpcMemHandle,
+    new_host_buffer,
+    read_message,
+    read_record_batch,
+    serialize_record_batch,
+)
+
+__all__ = [
+    "BufferReader",
+    "BufferWriter",
+    "Context",
+    "CudaBuffer",
+    "HostBuffer",
+    "IpcMemHandle",
+    "new_host_buffer",
+    "read_message",
+    "read_record_batch",
+    "serialize_record_batch",
+]
diff --git a/python/pyarrow-stubs/dataset.pyi b/python/pyarrow-stubs/dataset.pyi
new file mode 100644
index 00000000000..160ed19ee4b
--- /dev/null
+++ b/python/pyarrow-stubs/dataset.pyi
@@ -0,0 +1,272 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Callable, Iterable, Literal, Sequence, TypeAlias, overload
+
+from _typeshed import StrPath
+from pyarrow._dataset import (
+    CsvFileFormat,
+    CsvFragmentScanOptions,
+    Dataset,
+    DatasetFactory,
+    DirectoryPartitioning,
+    FeatherFileFormat,
+    FileFormat,
+    FileFragment,
+    FilenamePartitioning,
+    FileSystemDataset,
+    FileSystemDatasetFactory,
+    FileSystemFactoryOptions,
+    FileWriteOptions,
+    Fragment,
+    FragmentScanOptions,
+    HivePartitioning,
+    InMemoryDataset,
+    IpcFileFormat,
+    IpcFileWriteOptions,
+    JsonFileFormat,
+    JsonFragmentScanOptions,
+    Partitioning,
+    PartitioningFactory,
+    Scanner,
+    TaggedRecordBatch,
+    UnionDataset,
+    UnionDatasetFactory,
+    WrittenFile,
+    get_partition_keys,
+)
+from pyarrow._dataset_orc import OrcFileFormat
+from pyarrow._dataset_parquet import (
+    ParquetDatasetFactory,
+    ParquetFactoryOptions,
+    ParquetFileFormat,
+    ParquetFileFragment,
+    ParquetFileWriteOptions,
+    ParquetFragmentScanOptions,
+    ParquetReadOptions,
+    RowGroupInfo,
+)
+from pyarrow._dataset_parquet_encryption import (
+    ParquetDecryptionConfig,
+    ParquetEncryptionConfig,
+)
+from pyarrow.compute import Expression, field, scalar
+from pyarrow.lib import Array, RecordBatch, RecordBatchReader, Schema, Table
+
+from ._fs import SupportedFileSystem
+
+_orc_available: bool
+_parquet_available: bool
+
+__all__ = [
+    "CsvFileFormat",
+    "CsvFragmentScanOptions",
+    "Dataset",
+    "DatasetFactory",
+    "DirectoryPartitioning",
+    "FeatherFileFormat",
+    "FileFormat",
+    "FileFragment",
+    "FilenamePartitioning",
+    "FileSystemDataset",
+    "FileSystemDatasetFactory",
+    "FileSystemFactoryOptions",
+    "FileWriteOptions",
+    "Fragment",
+    "FragmentScanOptions",
+    "HivePartitioning",
+    "InMemoryDataset",
+    "IpcFileFormat",
+    "IpcFileWriteOptions",
+    "JsonFileFormat",
+    "JsonFragmentScanOptions",
+    "Partitioning",
+    "PartitioningFactory",
+    "Scanner",
+    "TaggedRecordBatch",
+    "UnionDataset",
+    "UnionDatasetFactory",
+    "WrittenFile",
+    "get_partition_keys",
+    # Orc
+    "OrcFileFormat",
+    # Parquet
+    "ParquetDatasetFactory",
+    "ParquetFactoryOptions",
+    "ParquetFileFormat",
+    "ParquetFileFragment",
+    "ParquetFileWriteOptions",
+    "ParquetFragmentScanOptions",
+    "ParquetReadOptions",
+    "RowGroupInfo",
+    # Parquet Encryption
+    "ParquetDecryptionConfig",
+    "ParquetEncryptionConfig",
+    # Compute
+    "Expression",
+    "field",
+    "scalar",
+    # Dataset
+    "partitioning",
+    "parquet_dataset",
+    "write_dataset",
+]
+
+_DatasetFormat: TypeAlias = Literal["parquet", "ipc", "arrow", "feather", "csv"]
+
+
+@overload
+def partitioning(
+    schema: Schema,
+) -> Partitioning: ...
+
+
+@overload
+def partitioning(
+    schema: Schema,
+    *,
+    flavor: Literal["filename"],
+    dictionaries: dict[str, Array] | None = None,
+) -> Partitioning: ...
+
+
+@overload
+def partitioning(
+    schema: Schema,
+    *,
+    flavor: Literal["filename"],
+    dictionaries: Literal["infer"],
+) -> PartitioningFactory: ...
+
+
+@overload
+def partitioning(
+    field_names: list[str],
+    *,
+    flavor: Literal["filename"],
+) -> PartitioningFactory: ...
+
+
+@overload
+def partitioning(
+    schema: Schema,
+    *,
+    flavor: Literal["hive"],
+    dictionaries: Literal["infer"],
+) -> PartitioningFactory: ...
+
+
+@overload
+def partitioning(
+    *,
+    flavor: Literal["hive"],
+) -> PartitioningFactory: ...
+
+
+@overload
+def partitioning(
+    schema: Schema,
+    *,
+    flavor: Literal["hive"],
+    dictionaries: dict[str, Array] | None = None,
+) -> Partitioning: ...
+
+
+def parquet_dataset(
+    metadata_path: StrPath,
+    schema: Schema | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    format: ParquetFileFormat | None = None,
+    partitioning: Partitioning | PartitioningFactory | None = None,
+    partition_base_dir: str | None = None,
+) -> FileSystemDataset: ...
+
+
+@overload
+def dataset(
+    source: StrPath | Sequence[StrPath],
+    schema: Schema | None = None,
+    format: FileFormat | _DatasetFormat | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+    partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
+    partition_base_dir: str | None = None,
+    exclude_invalid_files: bool | None = None,
+    ignore_prefixes: list[str] | None = None,
+) -> FileSystemDataset: ...
+
+
+@overload
+def dataset(
+    source: list[Dataset],
+    schema: Schema | None = None,
+    format: FileFormat | _DatasetFormat | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+    partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
+    partition_base_dir: str | None = None,
+    exclude_invalid_files: bool | None = None,
+    ignore_prefixes: list[str] | None = None,
+) -> UnionDataset: ...
+
+
+@overload
+def dataset(
+    source: Iterable[RecordBatch] | Iterable[Table] | RecordBatchReader,
+    schema: Schema | None = None,
+    format: FileFormat | _DatasetFormat | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+    partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
+    partition_base_dir: str | None = None,
+    exclude_invalid_files: bool | None = None,
+    ignore_prefixes: list[str] | None = None,
+) -> InMemoryDataset: ...
+
+
+@overload
+def dataset(
+    source: RecordBatch | Table,
+    schema: Schema | None = None,
+    format: FileFormat | _DatasetFormat | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+    partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
+    partition_base_dir: str | None = None,
+    exclude_invalid_files: bool | None = None,
+    ignore_prefixes: list[str] | None = None,
+) -> InMemoryDataset: ...
+
+
+def write_dataset(
+    data: Dataset | Table | RecordBatch | RecordBatchReader | list[Table] | Iterable[RecordBatch],
+    base_dir: StrPath,
+    *,
+    basename_template: str | None = None,
+    format: FileFormat | _DatasetFormat | None = None,
+    partitioning: Partitioning | list[str] | None = None,
+    partitioning_flavor: str | None = None,
+    schema: Schema | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    file_options: FileWriteOptions | None = None,
+    use_threads: bool = True,
+    max_partitions: int = 1024,
+    max_open_files: int = 1024,
+    max_rows_per_file: int = 0,
+    min_rows_per_group: int = 0,
+    max_rows_per_group: int = 1024 * 1024,
+    file_visitor: Callable[[str], None] | None = None,
+    existing_data_behavior: Literal["error",
+                                    "overwrite_or_ignore", "delete_matching"] = "error",
+    create_dir: bool = True,
+): ...
diff --git a/python/pyarrow-stubs/device.pyi b/python/pyarrow-stubs/device.pyi
new file mode 100644
index 00000000000..d77fe2504af
--- /dev/null
+++ b/python/pyarrow-stubs/device.pyi
@@ -0,0 +1,68 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import enum
+
+from pyarrow.lib import _Weakrefable
+
+
+class DeviceAllocationType(enum.Flag):
+    CPU = enum.auto()
+    CUDA = enum.auto()
+    CUDA_HOST = enum.auto()
+    OPENCL = enum.auto()
+    VULKAN = enum.auto()
+    METAL = enum.auto()
+    VPI = enum.auto()
+    ROCM = enum.auto()
+    ROCM_HOST = enum.auto()
+    EXT_DEV = enum.auto()
+    CUDA_MANAGED = enum.auto()
+    ONEAPI = enum.auto()
+    WEBGPU = enum.auto()
+    HEXAGON = enum.auto()
+
+
+class Device(_Weakrefable):
+
+    @property
+    def type_name(self) -> str: ...
+
+    @property
+    def device_id(self) -> int: ...
+
+    @property
+    def is_cpu(self) -> bool: ...
+
+    @property
+    def device_type(self) -> DeviceAllocationType: ...
+
+
+class MemoryManager(_Weakrefable):
+
+    @property
+    def device(self) -> Device: ...
+
+    @property
+    def is_cpu(self) -> bool: ...
+
+
+def default_cpu_memory_manager() -> MemoryManager: ...
+
+
+__all__ = ["DeviceAllocationType", "Device",
+           "MemoryManager", "default_cpu_memory_manager"]
diff --git a/python/pyarrow-stubs/error.pyi b/python/pyarrow-stubs/error.pyi
new file mode 100644
index 00000000000..c1e1a04ee40
--- /dev/null
+++ b/python/pyarrow-stubs/error.pyi
@@ -0,0 +1,70 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+class ArrowException(Exception): ...
+class ArrowInvalid(ValueError, ArrowException): ...
+class ArrowMemoryError(MemoryError, ArrowException): ...
+class ArrowKeyError(KeyError, ArrowException): ...
+class ArrowTypeError(TypeError, ArrowException): ...
+class ArrowNotImplementedError(NotImplementedError, ArrowException): ...
+class ArrowCapacityError(ArrowException): ...
+class ArrowIndexError(IndexError, ArrowException): ...
+class ArrowSerializationError(ArrowException): ...
+
+class ArrowCancelled(ArrowException):
+    signum: int | None
+    def __init__(self, message: str, signum: int | None = None) -> None: ...
+
+ArrowIOError = IOError
+
+class StopToken: ...
+
+def enable_signal_handlers(enable: bool) -> None: ...
+
+have_signal_refcycle: bool
+
+class SignalStopHandler:
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_value, exc_tb) -> None: ...
+    def __dealloc__(self) -> None: ...
+    @property
+    def stop_token(self) -> StopToken: ...
+
+__all__ = [
+    "ArrowException",
+    "ArrowInvalid",
+    "ArrowMemoryError",
+    "ArrowKeyError",
+    "ArrowTypeError",
+    "ArrowNotImplementedError",
+    "ArrowCapacityError",
+    "ArrowIndexError",
+    "ArrowSerializationError",
+    "ArrowCancelled",
+    "ArrowIOError",
+    "StopToken",
+    "enable_signal_handlers",
+    "have_signal_refcycle",
+    "SignalStopHandler",
+]
diff --git a/python/pyarrow-stubs/feather.pyi b/python/pyarrow-stubs/feather.pyi
new file mode 100644
index 00000000000..10281e91152
--- /dev/null
+++ b/python/pyarrow-stubs/feather.pyi
@@ -0,0 +1,78 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import IO, Literal
+
+import pandas as pd
+
+from _typeshed import StrPath
+from pyarrow._feather import FeatherError
+from pyarrow.lib import Table
+
+__all__ = [
+    "FeatherError",
+    "FeatherDataset",
+    "check_chunked_overflow",
+    "write_feather",
+    "read_feather",
+    "read_table",
+]
+
+
+class FeatherDataset:
+    path_or_paths: str | list[str]
+    validate_schema: bool
+
+    def __init__(self, path_or_paths: str |
+                 list[str], validate_schema: bool = True) -> None: ...
+
+    def read_table(self, columns: list[str] | None = None) -> Table: ...
+    def validate_schemas(self, piece, table: Table) -> None: ...
+
+    def read_pandas(
+        self, columns: list[str] | None = None, use_threads: bool = True
+    ) -> pd.DataFrame: ...
+
+
+def check_chunked_overflow(name: str, col) -> None: ...
+
+
+def write_feather(
+    df: pd.DataFrame | Table,
+    dest: StrPath | IO,
+    compression: Literal["zstd", "lz4", "uncompressed"] | None = None,
+    compression_level: int | None = None,
+    chunksize: int | None = None,
+    version: Literal[1, 2] = 2,
+) -> None: ...
+
+
+def read_feather(
+    source: StrPath | IO,
+    columns: list[str] | None = None,
+    use_threads: bool = True,
+    memory_map: bool = False,
+    **kwargs,
+) -> pd.DataFrame: ...
+
+
+def read_table(
+    source: StrPath | IO,
+    columns: list[str] | None = None,
+    memory_map: bool = False,
+    use_threads: bool = True,
+) -> Table: ...
diff --git a/python/pyarrow-stubs/flight.pyi b/python/pyarrow-stubs/flight.pyi
new file mode 100644
index 00000000000..dcc6ee2244b
--- /dev/null
+++ b/python/pyarrow-stubs/flight.pyi
@@ -0,0 +1,112 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow._flight import (
+    Action,
+    ActionType,
+    BasicAuth,
+    CallInfo,
+    CertKeyPair,
+    ClientAuthHandler,
+    ClientMiddleware,
+    ClientMiddlewareFactory,
+    DescriptorType,
+    FlightCallOptions,
+    FlightCancelledError,
+    FlightClient,
+    FlightDataStream,
+    FlightDescriptor,
+    FlightEndpoint,
+    FlightError,
+    FlightInfo,
+    FlightInternalError,
+    FlightMetadataReader,
+    FlightMetadataWriter,
+    FlightMethod,
+    FlightServerBase,
+    FlightServerError,
+    FlightStreamChunk,
+    FlightStreamReader,
+    FlightStreamWriter,
+    FlightTimedOutError,
+    FlightUnauthenticatedError,
+    FlightUnauthorizedError,
+    FlightUnavailableError,
+    FlightWriteSizeExceededError,
+    GeneratorStream,
+    Location,
+    MetadataRecordBatchReader,
+    MetadataRecordBatchWriter,
+    RecordBatchStream,
+    Result,
+    SchemaResult,
+    ServerAuthHandler,
+    ServerCallContext,
+    ServerMiddleware,
+    ServerMiddlewareFactory,
+    Ticket,
+    TracingServerMiddlewareFactory,
+    connect,
+)
+
+__all__ = [
+    "Action",
+    "ActionType",
+    "BasicAuth",
+    "CallInfo",
+    "CertKeyPair",
+    "ClientAuthHandler",
+    "ClientMiddleware",
+    "ClientMiddlewareFactory",
+    "DescriptorType",
+    "FlightCallOptions",
+    "FlightCancelledError",
+    "FlightClient",
+    "FlightDataStream",
+    "FlightDescriptor",
+    "FlightEndpoint",
+    "FlightError",
+    "FlightInfo",
+    "FlightInternalError",
+    "FlightMetadataReader",
+    "FlightMetadataWriter",
+    "FlightMethod",
+    "FlightServerBase",
+    "FlightServerError",
+    "FlightStreamChunk",
+    "FlightStreamReader",
+    "FlightStreamWriter",
+    "FlightTimedOutError",
+    "FlightUnauthenticatedError",
+    "FlightUnauthorizedError",
+    "FlightUnavailableError",
+    "FlightWriteSizeExceededError",
+    "GeneratorStream",
+    "Location",
+    "MetadataRecordBatchReader",
+    "MetadataRecordBatchWriter",
+    "RecordBatchStream",
+    "Result",
+    "SchemaResult",
+    "ServerAuthHandler",
+    "ServerCallContext",
+    "ServerMiddleware",
+    "ServerMiddlewareFactory",
+    "Ticket",
+    "TracingServerMiddlewareFactory",
+    "connect",
+]
diff --git a/python/pyarrow-stubs/fs.pyi b/python/pyarrow-stubs/fs.pyi
new file mode 100644
index 00000000000..61a557ea428
--- /dev/null
+++ b/python/pyarrow-stubs/fs.pyi
@@ -0,0 +1,97 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow._fs import (  # noqa
+    FileSelector,
+    FileType,
+    FileInfo,
+    FileSystem,
+    LocalFileSystem,
+    SubTreeFileSystem,
+    _MockFileSystem,
+    FileSystemHandler,
+    PyFileSystem,
+    SupportedFileSystem,
+)
+from pyarrow._azurefs import AzureFileSystem
+from pyarrow._hdfs import HadoopFileSystem
+from pyarrow._gcsfs import GcsFileSystem
+from pyarrow._s3fs import (  # noqa
+    AwsDefaultS3RetryStrategy,
+    AwsStandardS3RetryStrategy,
+    S3FileSystem,
+    S3LogLevel,
+    S3RetryStrategy,
+    ensure_s3_initialized,
+    finalize_s3,
+    ensure_s3_finalized,
+    initialize_s3,
+    resolve_s3_region,
+)
+
+FileStats = FileInfo
+
+
+def copy_files(
+    source: str,
+    destination: str,
+    source_filesystem: SupportedFileSystem | None = None,
+    destination_filesystem: SupportedFileSystem | None = None,
+    *,
+    chunk_size: int = 1024 * 1024,
+    use_threads: bool = True,
+) -> None: ...
+
+
+class FSSpecHandler(FileSystemHandler):  # type: ignore[misc]
+    fs: SupportedFileSystem
+    def __init__(self, fs: SupportedFileSystem) -> None: ...
+
+
+__all__ = [
+    # _fs
+    "FileSelector",
+    "FileType",
+    "FileInfo",
+    "FileSystem",
+    "LocalFileSystem",
+    "SubTreeFileSystem",
+    "_MockFileSystem",
+    "FileSystemHandler",
+    "PyFileSystem",
+    # _azurefs
+    "AzureFileSystem",
+    # _hdfs
+    "HadoopFileSystem",
+    # _gcsfs
+    "GcsFileSystem",
+    # _s3fs
+    "AwsDefaultS3RetryStrategy",
+    "AwsStandardS3RetryStrategy",
+    "S3FileSystem",
+    "S3LogLevel",
+    "S3RetryStrategy",
+    "ensure_s3_initialized",
+    "finalize_s3",
+    "ensure_s3_finalized",
+    "initialize_s3",
+    "resolve_s3_region",
+    # fs
+    "FileStats",
+    "copy_files",
+    "FSSpecHandler",
+]
diff --git a/python/pyarrow-stubs/gandiva.pyi b/python/pyarrow-stubs/gandiva.pyi
new file mode 100644
index 00000000000..bc07e15c4a6
--- /dev/null
+++ b/python/pyarrow-stubs/gandiva.pyi
@@ -0,0 +1,82 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Iterable, Literal
+
+from .lib import Array, DataType, Field, MemoryPool, RecordBatch, Schema, _Weakrefable
+
+class Node(_Weakrefable):
+    def return_type(self) -> DataType: ...
+
+class Expression(_Weakrefable):
+    def root(self) -> Node: ...
+    def result(self) -> Field: ...
+
+class Condition(_Weakrefable):
+    def root(self) -> Node: ...
+    def result(self) -> Field: ...
+
+class SelectionVector(_Weakrefable):
+    def to_array(self) -> Array: ...
+
+class Projector(_Weakrefable):
+    @property
+    def llvm_ir(self): ...
+    def evaluate(
+        self, batch: RecordBatch, selection: SelectionVector | None = None
+    ) -> list[Array]: ...
+
+class Filter(_Weakrefable):
+    @property
+    def llvm_ir(self): ...
+    def evaluate(
+        self, batch: RecordBatch, pool: MemoryPool, dtype: DataType | str = "int32"
+    ) -> SelectionVector: ...
+
+class TreeExprBuilder(_Weakrefable):
+    def make_literal(self, value: float | str | bytes | bool, dtype: DataType) -> Node: ...
+    def make_expression(self, root_node: Node, return_field: Field) -> Expression: ...
+    def make_function(self, name: str, children: list[Node], return_type: DataType) -> Node: ...
+    def make_field(self, field: Field) -> Node: ...
+    def make_if(
+        self, condition: Node, this_node: Node, else_node: Node, return_type: DataType
+    ) -> Node: ...
+    def make_and(self, children: list[Node]) -> Node: ...
+    def make_or(self, children: list[Node]) -> Node: ...
+    def make_in_expression(self, node: Node, values: Iterable, dtype: DataType) -> Node: ...
+    def make_condition(self, condition: Node) -> Condition: ...
+
+class Configuration(_Weakrefable):
+    def __init__(self, optimize: bool = True, dump_ir: bool = False) -> None: ...
+
+def make_projector(
+    schema: Schema,
+    children: list[Expression],
+    pool: MemoryPool,
+    selection_mode: Literal["NONE", "UINT16", "UINT32", "UINT64"] = "NONE",
+    configuration: Configuration | None = None,
+) -> Projector: ...
+def make_filter(
+    schema: Schema, condition: Condition, configuration: Configuration | None = None
+) -> Filter: ...
+
+class FunctionSignature(_Weakrefable):
+    def return_type(self) -> DataType: ...
+    def param_types(self) -> list[DataType]: ...
+    def name(self) -> str: ...
+
+def get_registered_function_signatures() -> list[FunctionSignature]: ...
diff --git a/python/pyarrow-stubs/interchange/__init__.pyi b/python/pyarrow-stubs/interchange/__init__.pyi
new file mode 100644
index 00000000000..13a83393a91
--- /dev/null
+++ b/python/pyarrow-stubs/interchange/__init__.pyi
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/python/pyarrow-stubs/interchange/buffer.pyi b/python/pyarrow-stubs/interchange/buffer.pyi
new file mode 100644
index 00000000000..e1d8ae949c9
--- /dev/null
+++ b/python/pyarrow-stubs/interchange/buffer.pyi
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import enum
+
+from pyarrow.lib import Buffer
+
+
+class DlpackDeviceType(enum.IntEnum):
+    CPU = 1
+    CUDA = 2
+    CPU_PINNED = 3
+    OPENCL = 4
+    VULKAN = 7
+    METAL = 8
+    VPI = 9
+    ROCM = 10
+
+
+class _PyArrowBuffer:
+    def __init__(self, x: Buffer, allow_copy: bool = True) -> None: ...
+    @property
+    def bufsize(self) -> int: ...
+    @property
+    def ptr(self) -> int: ...
+    def __dlpack__(self): ...
+    def __dlpack_device__(self) -> tuple[DlpackDeviceType, int | None]: ...
diff --git a/python/pyarrow-stubs/interchange/column.pyi b/python/pyarrow-stubs/interchange/column.pyi
new file mode 100644
index 00000000000..04861a72b0b
--- /dev/null
+++ b/python/pyarrow-stubs/interchange/column.pyi
@@ -0,0 +1,90 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import enum
+
+from typing import Any, Iterable, TypeAlias, TypedDict
+
+from pyarrow.lib import Array, ChunkedArray
+
+from .buffer import _PyArrowBuffer
+
+
+class DtypeKind(enum.IntEnum):
+    INT = 0
+    UINT = 1
+    FLOAT = 2
+    BOOL = 20
+    STRING = 21  # UTF-8
+    DATETIME = 22
+    CATEGORICAL = 23
+
+
+Dtype: TypeAlias = tuple[DtypeKind, int, str, str]
+
+
+class ColumnNullType(enum.IntEnum):
+    NON_NULLABLE = 0
+    USE_NAN = 1
+    USE_SENTINEL = 2
+    USE_BITMASK = 3
+    USE_BYTEMASK = 4
+
+
+class ColumnBuffers(TypedDict):
+    data: tuple[_PyArrowBuffer, Dtype]
+    validity: tuple[_PyArrowBuffer, Dtype] | None
+    offsets: tuple[_PyArrowBuffer, Dtype] | None
+
+
+class CategoricalDescription(TypedDict):
+    is_ordered: bool
+    is_dictionary: bool
+    categories: _PyArrowColumn | None
+
+
+class Endianness(enum.Enum):
+    LITTLE = "<"
+    BIG = ">"
+    NATIVE = "="
+    NA = "|"
+
+
+class NoBufferPresent(Exception):
+    ...
+
+
+class _PyArrowColumn:
+    def __init__(self, column: Array | ChunkedArray,
+                 allow_copy: bool = True) -> None: ...
+
+    def size(self) -> int: ...
+    @property
+    def offset(self) -> int: ...
+    @property
+    def dtype(self) -> tuple[DtypeKind, int, str, str]: ...
+    @property
+    def describe_categorical(self) -> CategoricalDescription: ...
+    @property
+    def describe_null(self) -> tuple[ColumnNullType, Any]: ...
+    @property
+    def null_count(self) -> int: ...
+    @property
+    def metadata(self) -> dict[str, Any]: ...
+    def num_chunks(self) -> int: ...
+    def get_chunks(self, n_chunks: int | None = None) -> Iterable[_PyArrowColumn]: ...
+    def get_buffers(self) -> ColumnBuffers: ...
diff --git a/python/pyarrow-stubs/interchange/dataframe.pyi b/python/pyarrow-stubs/interchange/dataframe.pyi
new file mode 100644
index 00000000000..cafbe0fc200
--- /dev/null
+++ b/python/pyarrow-stubs/interchange/dataframe.pyi
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import Any, Iterable, Sequence
+
+from pyarrow.interchange.column import _PyArrowColumn
+from pyarrow.lib import RecordBatch, Table
+
+
+class _PyArrowDataFrame:
+    def __init__(
+        self, df: Table | RecordBatch, nan_as_null: bool = False, allow_copy: bool = True
+    ) -> None: ...
+
+    def __dataframe__(
+        self, nan_as_null: bool = False, allow_copy: bool = True
+    ) -> _PyArrowDataFrame: ...
+    @property
+    def metadata(self) -> dict[str, Any]: ...
+    def num_columns(self) -> int: ...
+    def num_rows(self) -> int: ...
+    def num_chunks(self) -> int: ...
+    def column_names(self) -> Iterable[str]: ...
+    def get_column(self, i: int) -> _PyArrowColumn: ...
+    def get_column_by_name(self, name: str) -> _PyArrowColumn: ...
+    def get_columns(self) -> Iterable[_PyArrowColumn]: ...
+    def select_columns(self, indices: Sequence[int]) -> Self: ...
+    def select_columns_by_name(self, names: Sequence[str]) -> Self: ...
+    def get_chunks(self, n_chunks: int | None = None) -> Iterable[Self]: ...
diff --git a/python/pyarrow-stubs/interchange/from_dataframe.pyi b/python/pyarrow-stubs/interchange/from_dataframe.pyi
new file mode 100644
index 00000000000..e7f1c6e91ff
--- /dev/null
+++ b/python/pyarrow-stubs/interchange/from_dataframe.pyi
@@ -0,0 +1,89 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, Protocol, TypeAlias
+
+from pyarrow.lib import Array, Buffer, DataType, DictionaryArray, RecordBatch, Table
+
+from .column import (
+    ColumnBuffers,
+    ColumnNullType,
+    Dtype,
+    DtypeKind,
+)
+
+
+class DataFrameObject(Protocol):
+    def __dataframe__(self, nan_as_null: bool = False,
+                      allow_copy: bool = True) -> Any: ...
+
+
+ColumnObject: TypeAlias = Any
+
+
+def from_dataframe(df: DataFrameObject, allow_copy=True) -> Table: ...
+
+
+def protocol_df_chunk_to_pyarrow(
+    df: DataFrameObject, allow_copy: bool = True) -> RecordBatch: ...
+
+
+def column_to_array(col: ColumnObject, allow_copy: bool = True) -> Array: ...
+
+
+def bool_column_to_array(col: ColumnObject, allow_copy: bool = True) -> Array: ...
+
+
+def categorical_column_to_dictionary(
+    col: ColumnObject, allow_copy: bool = True
+) -> DictionaryArray: ...
+
+
+def parse_datetime_format_str(format_str: str) -> tuple[str, str]: ...
+
+
+def map_date_type(data_type: tuple[DtypeKind, int, str, str]) -> DataType: ...
+
+
+def buffers_to_array(
+    buffers: ColumnBuffers,
+    data_type: tuple[DtypeKind, int, str, str],
+    length: int,
+    describe_null: ColumnNullType,
+    offset: int = 0,
+    allow_copy: bool = True,
+) -> Array: ...
+
+
+def validity_buffer_from_mask(
+    validity_buff: Buffer,
+    validity_dtype: Dtype,
+    describe_null: ColumnNullType,
+    length: int,
+    offset: int = 0,
+    allow_copy: bool = True,
+) -> Buffer: ...
+
+
+def validity_buffer_nan_sentinel(
+    data_pa_buffer: Buffer,
+    data_type: Dtype,
+    describe_null: ColumnNullType,
+    length: int,
+    offset: int = 0,
+    allow_copy: bool = True,
+) -> Buffer: ...
diff --git a/python/pyarrow-stubs/io.pyi b/python/pyarrow-stubs/io.pyi
new file mode 100644
index 00000000000..ea259f02142
--- /dev/null
+++ b/python/pyarrow-stubs/io.pyi
@@ -0,0 +1,428 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+from collections.abc import Callable
+from io import IOBase
+
+from _typeshed import StrPath
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+from typing import Any, Literal, SupportsIndex
+import builtins
+
+from pyarrow._stubs_typing import Compression, SupportPyBuffer
+from pyarrow.lib import MemoryPool, _Weakrefable
+
+from .device import Device, DeviceAllocationType, MemoryManager
+from ._types import KeyValueMetadata
+
+def have_libhdfs() -> bool: ...
+
+
+def io_thread_count() -> int: ...
+
+
+def set_io_thread_count(count: int) -> None: ...
+
+
+Mode: TypeAlias = Literal["rb", "wb", "rb+", "ab"]
+
+class NativeFile(_Weakrefable):
+
+
+    _default_chunk_size: int
+
+    def __enter__(self) -> Self: ...
+    def __exit__(self, *args) -> None: ...
+    @property
+    def mode(self) -> Mode: ...
+
+    def readable(self) -> bool: ...
+    def seekable(self) -> bool: ...
+    def isatty(self) -> bool: ...
+    def fileno(self) -> int: ...
+
+    @property
+    def closed(self) -> bool: ...
+    def close(self) -> None: ...
+    def size(self) -> int: ...
+
+    def metadata(self) -> KeyValueMetadata: ...
+
+    def tell(self) -> int: ...
+
+    def seek(self, position: int, whence: int = 0) -> int: ...
+
+    def flush(self) -> None: ...
+
+    def write(self, data: bytes | SupportPyBuffer) -> int: ...
+
+    def read(self, nbytes: int | None = None) -> bytes: ...
+
+    def get_stream(self, file_offset: int, nbytes: int) -> Self: ...
+
+    def read_at(self, nbytes: int, offset: int) -> bytes: ...
+
+    def read1(self, nbytes: int | None = None) -> bytes: ...
+
+    def readall(self) -> bytes: ...
+    def readinto(self, b: SupportPyBuffer) -> int: ...
+
+
+    def readline(self, size: int | None = None) -> bytes: ...
+
+    def readlines(self, hint: int | None = None) -> list[bytes]: ...
+
+    def __iter__(self) -> Self: ...
+
+    def __next__(self) -> bytes: ...
+    def read_buffer(self, nbytes: int | None = None) -> Buffer: ...
+
+    def truncate(self) -> None: ...
+
+    def writelines(self, lines: list[bytes]): ...
+
+    def download(self, stream_or_path: StrPath | IOBase, buffer_size: int | None = None) -> None: ...
+
+    def upload(self, stream: IOBase, buffer_size: int | None) -> None: ...
+
+
+    def writable(self): ...
+
+# ----------------------------------------------------------------------
+# Python file-like objects
+
+class PythonFile(NativeFile):
+
+    def __init__(self, handle: IOBase, mode: Literal["r", "w"] | None = None) -> None: ...
+    def truncate(self, pos: int | None = None) -> None: ...
+
+
+class MemoryMappedFile(NativeFile):
+
+    @classmethod
+    def create(cls, path: str, size: int) -> Self: ...
+
+    def _open(self, path: str, mode: Literal["r", "rb", "w", "wb", "r+", "r+b", "rb+"] = "r"): ...
+    def resize(self, new_size: int) -> None: ...
+
+
+def memory_map(
+    path: str, mode: Literal["r", "rb", "w", "wb", "r+", "r+b", "rb+"] = "r"
+) -> MemoryMappedFile: ...
+
+
+create_memory_map = MemoryMappedFile.create
+
+class OSFile(NativeFile):
+
+    def __init__(
+        self,
+        path: str,
+        mode: Literal["r", "rb", "w", "wb", "a", "ab"],
+        memory_pool: MemoryPool | None = None,
+    ) -> None: ...
+
+class FixedSizeBufferWriter(NativeFile):
+
+    def __init__(self, buffer: Buffer) -> None: ...
+    def set_memcopy_threads(self, num_threads: int) -> None: ...
+
+    def set_memcopy_blocksize(self, blocksize: int) -> None: ...
+
+    def set_memcopy_threshold(self, threshold: int) -> None: ...
+
+
+# ----------------------------------------------------------------------
+# Arrow buffers
+
+class Buffer(_Weakrefable):
+
+    def __len__(self) -> int: ...
+
+    def _assert_cpu(self) -> None: ...
+    @property
+    def size(self) -> int: ...
+
+    @property
+    def address(self) -> int: ...
+
+    def hex(self) -> bytes: ...
+
+    @property
+    def is_mutable(self) -> bool: ...
+
+    @property
+    def is_cpu(self) -> bool: ...
+
+    @property
+    def device(self) -> Device: ...
+
+    @property
+    def memory_manager(self) -> MemoryManager: ...
+
+    @property
+    def device_type(self) -> DeviceAllocationType: ...
+
+    @property
+    def parent(self) -> Buffer | None: ...
+    def __getitem__(self, key: builtins.slice | int) -> Self | int: ...
+
+    def slice(self, offset: int = 0, length: int | None = None) -> Self: ...
+
+    def equals(self, other: Self) -> bool: ...
+
+    def __reduce_ex__(self, protocol: SupportsIndex) -> str | tuple[Any, ...]: ...
+    def to_pybytes(self) -> bytes: ...
+
+
+class ResizableBuffer(Buffer):
+
+
+    def resize(self, new_size: int, shrink_to_fit: bool = False) -> None: ...
+
+
+def allocate_buffer(
+    size: int, memory_pool: MemoryPool | None = None, resizable: Literal[False] | Literal[True] | None = None
+) -> Buffer | ResizableBuffer: ...
+
+
+# ----------------------------------------------------------------------
+# Arrow Stream
+class BufferOutputStream(NativeFile):
+
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def getvalue(self) -> Buffer: ...
+
+
+class MockOutputStream(NativeFile): ...
+
+class BufferReader(NativeFile):
+
+    def __init__(self, obj) -> None: ...
+
+
+class CompressedInputStream(NativeFile):
+
+
+    def __init__(
+        self,
+        stream: StrPath | NativeFile | IOBase,
+        compression: Literal["bz2", "brotli", "gzip", "lz4", "zstd"],
+    ) -> None: ...
+
+
+class CompressedOutputStream(NativeFile):
+
+    def __init__(
+        self,
+        stream: StrPath | NativeFile | IOBase,
+        compression: Literal["bz2", "brotli", "gzip", "lz4", "zstd"],
+    ) -> None: ...
+
+
+class BufferedInputStream(NativeFile):
+
+    def __init__(
+        self, stream: NativeFile, buffer_size: int, memory_pool: MemoryPool | None = None
+    ) -> None: ...
+
+    def detach(self) -> NativeFile: ...
+
+
+class BufferedOutputStream(NativeFile):
+
+    def __init__(
+        self, stream: NativeFile, buffer_size: int, memory_pool: MemoryPool | None = None
+    ) -> None: ...
+
+    def detach(self) -> NativeFile: ...
+
+
+class TransformInputStream(NativeFile):
+
+    def __init__(self, stream: NativeFile, transform_func: Callable[[Buffer], Any]) -> None: ...
+
+
+class Transcoder:
+    def __init__(self, decoder, encoder) -> None: ...
+    def __call__(self, buf: Buffer): ...
+
+def transcoding_input_stream(
+    stream: NativeFile, src_encoding: str, dest_encoding: str
+) -> TransformInputStream: ...
+
+
+def py_buffer(obj: SupportPyBuffer) -> Buffer: ...
+
+
+def foreign_buffer(address: int, size: int, base: Any | None = None) -> Buffer: ...
+
+
+def as_buffer(o: Buffer | SupportPyBuffer) -> Buffer: ...
+
+# ---------------------------------------------------------------------
+
+class CacheOptions(_Weakrefable):
+
+
+    hole_size_limit: int
+    range_size_limit: int
+    lazy: bool
+    prefetch_limit: int
+    def __init__(
+        self,
+        *,
+        hole_size_limit: int | None = None,
+        range_size_limit: int | None = None,
+        lazy: bool = True,
+        prefetch_limit: int = 0,
+    ) -> None: ...
+
+    @classmethod
+    def from_network_metrics(
+        cls,
+        time_to_first_byte_millis: int,
+        transfer_bandwidth_mib_per_sec: int,
+        ideal_bandwidth_utilization_frac: float = 0.9,
+        max_ideal_request_size_mib: int = 64,
+    ) -> Self: ...
+
+
+class Codec(_Weakrefable):
+
+    def __init__(self, compression: Compression, compression_level: int | None = None) -> None: ...
+
+    @classmethod
+    def detect(cls, path: StrPath) -> Self: ...
+
+    @staticmethod
+    def is_available(compression: Compression) -> bool: ...
+
+    @staticmethod
+    def supports_compression_level(compression: Compression) -> int: ...
+
+    @staticmethod
+    def default_compression_level(compression: Compression) -> int: ...
+
+    @staticmethod
+    def minimum_compression_level(compression: Compression) -> int: ...
+
+    @staticmethod
+    def maximum_compression_level(compression: Compression) -> int: ...
+
+    @property
+    def name(self) -> Compression: ...
+
+    @property
+    def compression_level(self) -> int: ...
+
+    def compress(
+        self,
+        buf: Buffer | bytes | SupportPyBuffer,
+        *,
+        asbytes: Literal[False] | Literal[True] | None = None,
+        memory_pool: MemoryPool | None = None,
+    ) -> Buffer | bytes: ...
+
+    def decompress(
+        self,
+        buf: Buffer | bytes | SupportPyBuffer,
+        decompressed_size: int | None = None,
+        *,
+        asbytes: Literal[False] | Literal[True] | None = None,
+        memory_pool: MemoryPool | None = None,
+    ) -> Buffer | bytes: ...
+
+
+def compress(
+    buf: Buffer | bytes | SupportPyBuffer,
+    codec: Compression = "lz4",
+    *,
+    asbytes: Literal[False] | Literal[True] | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Buffer | bytes: ...
+
+
+def decompress(
+    buf: Buffer | bytes | SupportPyBuffer,
+    decompressed_size: int | None = None,
+    codec: Compression = "lz4",
+    *,
+    asbytes: Literal[False] | Literal[True] | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Buffer | bytes: ...
+
+
+def input_stream(
+    source: StrPath | Buffer | IOBase,
+    compression: Literal["detect", "bz2", "brotli", "gzip", "lz4", "zstd"] = "detect",
+    buffer_size: int | None = None,
+) -> BufferReader: ...
+
+
+def output_stream(
+    source: StrPath | Buffer | IOBase,
+    compression: Literal["detect", "bz2", "brotli", "gzip", "lz4", "zstd"] = "detect",
+    buffer_size: int | None = None,
+) -> NativeFile: ...
+
+
+__all__ = [
+    "have_libhdfs",
+    "io_thread_count",
+    "set_io_thread_count",
+    "NativeFile",
+    "PythonFile",
+    "MemoryMappedFile",
+    "memory_map",
+    "create_memory_map",
+    "OSFile",
+    "FixedSizeBufferWriter",
+    "Buffer",
+    "ResizableBuffer",
+    "allocate_buffer",
+    "BufferOutputStream",
+    "MockOutputStream",
+    "BufferReader",
+    "CompressedInputStream",
+    "CompressedOutputStream",
+    "BufferedInputStream",
+    "BufferedOutputStream",
+    "TransformInputStream",
+    "Transcoder",
+    "transcoding_input_stream",
+    "py_buffer",
+    "foreign_buffer",
+    "as_buffer",
+    "CacheOptions",
+    "Codec",
+    "compress",
+    "decompress",
+    "input_stream",
+    "output_stream",
+]
diff --git a/python/pyarrow-stubs/ipc.pyi b/python/pyarrow-stubs/ipc.pyi
new file mode 100644
index 00000000000..a6e7c71dd12
--- /dev/null
+++ b/python/pyarrow-stubs/ipc.pyi
@@ -0,0 +1,157 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from io import IOBase
+
+import pandas as pd
+import pyarrow.lib as lib
+
+from pyarrow.lib import (
+    IpcReadOptions,
+    IpcWriteOptions,
+    Message,
+    MessageReader,
+    MetadataVersion,
+    ReadStats,
+    RecordBatchReader,
+    WriteStats,
+    _ReadPandasMixin,
+    get_record_batch_size,
+    get_tensor_size,
+    read_message,
+    read_record_batch,
+    read_schema,
+    read_tensor,
+    write_tensor,
+)
+
+
+class RecordBatchStreamReader(lib._RecordBatchStreamReader):
+    def __init__(
+        self,
+        source: bytes | lib.Buffer | lib.NativeFile | IOBase,
+        *,
+        options: IpcReadOptions | None = None,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> None: ...
+
+
+class RecordBatchStreamWriter(lib._RecordBatchStreamWriter):
+    def __init__(
+        self,
+        sink: str | lib.NativeFile | IOBase,
+        schema: lib.Schema,
+        *,
+        use_legacy_format: bool | None = None,
+        options: IpcWriteOptions | None = None,
+    ) -> None: ...
+
+
+class RecordBatchFileReader(lib._RecordBatchFileReader):
+    def __init__(
+        self,
+        source: bytes | lib.Buffer | lib.NativeFile | IOBase,
+        footer_offset: int | None = None,
+        *,
+        options: IpcReadOptions | None,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> None: ...
+
+
+class RecordBatchFileWriter(lib._RecordBatchFileWriter):
+    def __init__(
+        self,
+        sink: str | lib.NativeFile | IOBase,
+        schema: lib.Schema,
+        *,
+        use_legacy_format: bool | None = None,
+        options: IpcWriteOptions | None = None,
+    ) -> None: ...
+
+
+def new_stream(
+    sink: str | lib.NativeFile | IOBase,
+    schema: lib.Schema,
+    *,
+    use_legacy_format: bool | None = None,
+    options: IpcWriteOptions | None = None,
+) -> RecordBatchStreamWriter: ...
+
+
+def open_stream(
+    source: bytes | lib.Buffer | lib.NativeFile | IOBase,
+    *,
+    options: IpcReadOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> RecordBatchStreamReader: ...
+
+
+def new_file(
+    sink: str | lib.NativeFile | IOBase,
+    schema: lib.Schema,
+    *,
+    use_legacy_format: bool | None = None,
+    options: IpcWriteOptions | None = None,
+) -> RecordBatchFileWriter: ...
+
+
+def open_file(
+    source: bytes | lib.Buffer | lib.NativeFile | IOBase,
+    footer_offset: int | None = None,
+    *,
+    options: IpcReadOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> RecordBatchFileReader: ...
+
+
+def serialize_pandas(
+    df: pd.DataFrame, *, nthreads: int | None = None, preserve_index: bool | None = None
+) -> lib.Buffer: ...
+
+
+def deserialize_pandas(
+    buf: lib.Buffer, *, use_threads: bool = True) -> pd.DataFrame: ...
+
+
+__all__ = [
+    "IpcReadOptions",
+    "IpcWriteOptions",
+    "Message",
+    "MessageReader",
+    "MetadataVersion",
+    "ReadStats",
+    "RecordBatchReader",
+    "WriteStats",
+    "_ReadPandasMixin",
+    "get_record_batch_size",
+    "get_tensor_size",
+    "read_message",
+    "read_record_batch",
+    "read_schema",
+    "read_tensor",
+    "write_tensor",
+    "RecordBatchStreamReader",
+    "RecordBatchStreamWriter",
+    "RecordBatchFileReader",
+    "RecordBatchFileWriter",
+    "new_stream",
+    "open_stream",
+    "new_file",
+    "open_file",
+    "serialize_pandas",
+    "deserialize_pandas",
+]
diff --git a/python/pyarrow-stubs/json.pyi b/python/pyarrow-stubs/json.pyi
new file mode 100644
index 00000000000..67768db42e4
--- /dev/null
+++ b/python/pyarrow-stubs/json.pyi
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow._json import ParseOptions, ReadOptions, open_json, read_json
+
+__all__ = ["ParseOptions", "ReadOptions", "read_json", "open_json"]
diff --git a/python/pyarrow-stubs/lib.pyi b/python/pyarrow-stubs/lib.pyi
new file mode 100644
index 00000000000..43c40b61cf8
--- /dev/null
+++ b/python/pyarrow-stubs/lib.pyi
@@ -0,0 +1,93 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# ruff: noqa: F403
+from typing import NamedTuple
+
+from .array import *
+from .builder import *
+from .compat import *
+from .config import *
+from .device import *
+from .error import *
+from .io import *
+from ._ipc import *
+from .memory import *
+from .pandas_shim import *
+from .scalar import *
+from .table import *
+from .tensor import *
+from ._types import *
+
+
+class MonthDayNano(NamedTuple):
+    days: int
+    months: int
+    nanoseconds: int
+
+
+def cpu_count() -> int: ...
+
+
+def set_cpu_count(count: int) -> None: ...
+
+
+def is_threading_enabled() -> bool: ...
+
+
+Type_NA: int
+Type_BOOL: int
+Type_UINT8: int
+Type_INT8: int
+Type_UINT16: int
+Type_INT16: int
+Type_UINT32: int
+Type_INT32: int
+Type_UINT64: int
+Type_INT64: int
+Type_HALF_FLOAT: int
+Type_FLOAT: int
+Type_DOUBLE: int
+Type_DECIMAL128: int
+Type_DECIMAL256: int
+Type_DATE32: int
+Type_DATE64: int
+Type_TIMESTAMP: int
+Type_TIME32: int
+Type_TIME64: int
+Type_DURATION: int
+Type_INTERVAL_MONTH_DAY_NANO: int
+Type_BINARY: int
+Type_STRING: int
+Type_LARGE_BINARY: int
+Type_LARGE_STRING: int
+Type_FIXED_SIZE_BINARY: int
+Type_BINARY_VIEW: int
+Type_STRING_VIEW: int
+Type_LIST: int
+Type_LARGE_LIST: int
+Type_LIST_VIEW: int
+Type_LARGE_LIST_VIEW: int
+Type_MAP: int
+Type_FIXED_SIZE_LIST: int
+Type_STRUCT: int
+Type_SPARSE_UNION: int
+Type_DENSE_UNION: int
+Type_DICTIONARY: int
+Type_RUN_END_ENCODED: int
+UnionMode_SPARSE: int
+UnionMode_DENSE: int
diff --git a/python/pyarrow-stubs/memory.pyi b/python/pyarrow-stubs/memory.pyi
new file mode 100644
index 00000000000..ab5db5b1f06
--- /dev/null
+++ b/python/pyarrow-stubs/memory.pyi
@@ -0,0 +1,91 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.lib import _Weakrefable
+
+class MemoryPool(_Weakrefable):
+
+
+    def release_unused(self) -> None: ...
+
+    def bytes_allocated(self) -> int: ...
+
+    def total_bytes_allocated(self) -> int: ...
+
+    def max_memory(self) -> int | None: ...
+
+    def num_allocations(self) -> int: ...
+
+    def print_stats(self) -> None: ...
+
+    @property
+    def backend_name(self) -> str: ...
+
+
+class LoggingMemoryPool(MemoryPool): ...
+class ProxyMemoryPool(MemoryPool): ...
+
+
+def default_memory_pool() -> MemoryPool: ...
+
+
+def proxy_memory_pool(parent: MemoryPool) -> ProxyMemoryPool: ...
+
+
+def logging_memory_pool(parent: MemoryPool) -> LoggingMemoryPool: ...
+
+
+def system_memory_pool() -> MemoryPool: ...
+
+
+def jemalloc_memory_pool() -> MemoryPool: ...
+
+
+def mimalloc_memory_pool() -> MemoryPool: ...
+
+
+def set_memory_pool(pool: MemoryPool) -> None: ...
+
+
+def log_memory_allocations(enable: bool = True) -> None: ...
+
+
+def total_allocated_bytes() -> int: ...
+
+
+def jemalloc_set_decay_ms(decay_ms: int) -> None: ...
+
+
+def supported_memory_backends() -> list[str]: ...
+
+
+__all__ = [
+    "MemoryPool",
+    "LoggingMemoryPool",
+    "ProxyMemoryPool",
+    "default_memory_pool",
+    "proxy_memory_pool",
+    "logging_memory_pool",
+    "system_memory_pool",
+    "jemalloc_memory_pool",
+    "mimalloc_memory_pool",
+    "set_memory_pool",
+    "log_memory_allocations",
+    "total_allocated_bytes",
+    "jemalloc_set_decay_ms",
+    "supported_memory_backends",
+]
diff --git a/python/pyarrow-stubs/orc.pyi b/python/pyarrow-stubs/orc.pyi
new file mode 100644
index 00000000000..5e0289e61f7
--- /dev/null
+++ b/python/pyarrow-stubs/orc.pyi
@@ -0,0 +1,146 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import IO, Literal
+
+from _typeshed import StrPath
+
+from . import _orc
+from ._fs import SupportedFileSystem
+from .lib import KeyValueMetadata, NativeFile, RecordBatch, Schema, Table
+
+
+class ORCFile:
+
+    reader: _orc.ORCReader
+    def __init__(self, source: StrPath | NativeFile | IO) -> None: ...
+    @property
+    def metadata(self) -> KeyValueMetadata: ...
+
+    @property
+    def schema(self) -> Schema: ...
+
+    @property
+    def nrows(self) -> int: ...
+
+    @property
+    def nstripes(self) -> int: ...
+
+    @property
+    def file_version(self) -> str: ...
+
+    @property
+    def software_version(self) -> str: ...
+
+    @property
+    def compression(self) -> Literal["UNCOMPRESSED",
+                                     "ZLIB", "SNAPPY", "LZ4", "ZSTD"]: ...
+
+    @property
+    def compression_size(self) -> int: ...
+
+    @property
+    def writer(self) -> str: ...
+
+    @property
+    def writer_version(self) -> str: ...
+
+    @property
+    def row_index_stride(self) -> int: ...
+
+    @property
+    def nstripe_statistics(self) -> int: ...
+
+    @property
+    def content_length(self) -> int: ...
+
+    @property
+    def stripe_statistics_length(self) -> int: ...
+
+    @property
+    def file_footer_length(self) -> int: ...
+
+    @property
+    def file_postscript_length(self) -> int: ...
+
+    @property
+    def file_length(self) -> int: ...
+
+    def read_stripe(self, n: int, columns: list[str] | None = None) -> RecordBatch: ...
+
+    def read(self, columns: list[str] | None = None) -> Table: ...
+
+
+class ORCWriter:
+
+    writer: _orc.ORCWriter
+    is_open: bool
+
+    def __init__(
+        self,
+        where: StrPath | NativeFile | IO,
+        *,
+        file_version: str = "0.12",
+        batch_size: int = 1024,
+        stripe_size: int = 64 * 1024 * 1024,
+        compression: Literal["UNCOMPRESSED", "ZLIB",
+                             "SNAPPY", "LZ4", "ZSTD"] = "UNCOMPRESSED",
+        compression_block_size: int = 65536,
+        compression_strategy: Literal["COMPRESSION", "SPEED"] = "SPEED",
+        row_index_stride: int = 10000,
+        padding_tolerance: float = 0.0,
+        dictionary_key_size_threshold: float = 0.0,
+        bloom_filter_columns: list[int] | None = None,
+        bloom_filter_fpp: float = 0.05,
+    ): ...
+    def __enter__(self) -> Self: ...
+    def __exit__(self, *args, **kwargs) -> None: ...
+    def write(self, table: Table) -> None: ...
+
+    def close(self) -> None: ...
+
+
+def read_table(
+    source: StrPath | NativeFile | IO,
+    columns: list[str] | None = None,
+    filesystem: SupportedFileSystem | None = None,
+) -> Table: ...
+
+
+def write_table(
+    table: Table,
+    where: StrPath | NativeFile | IO,
+    *,
+    file_version: str = "0.12",
+    batch_size: int = 1024,
+    stripe_size: int = 64 * 1024 * 1024,
+    compression: Literal["UNCOMPRESSED", "ZLIB",
+                         "SNAPPY", "LZ4", "ZSTD"] = "UNCOMPRESSED",
+    compression_block_size: int = 65536,
+    compression_strategy: Literal["COMPRESSION", "SPEED"] = "SPEED",
+    row_index_stride: int = 10000,
+    padding_tolerance: float = 0.0,
+    dictionary_key_size_threshold: float = 0.0,
+    bloom_filter_columns: list[int] | None = None,
+    bloom_filter_fpp: float = 0.05,
+) -> None: ...
diff --git a/python/pyarrow-stubs/pandas_compat.pyi b/python/pyarrow-stubs/pandas_compat.pyi
new file mode 100644
index 00000000000..f25d1ad24a6
--- /dev/null
+++ b/python/pyarrow-stubs/pandas_compat.pyi
@@ -0,0 +1,82 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, TypedDict, TypeVar
+
+import numpy as np
+import pandas as pd
+
+from pandas import DatetimeTZDtype
+
+from .lib import Array, DataType, Schema, Table
+
+_T = TypeVar("_T")
+
+
+def get_logical_type_map() -> dict[int, str]: ...
+def get_logical_type(arrow_type: DataType) -> str: ...
+def get_numpy_logical_type_map() -> dict[type[np.generic], str]: ...
+def get_logical_type_from_numpy(pandas_collection) -> str: ...
+def get_extension_dtype_info(column) -> tuple[str, dict[str, Any]]: ...
+
+
+class _ColumnMetadata(TypedDict):
+    name: str
+    field_name: str
+    pandas_type: int
+    numpy_type: str
+    metadata: dict | None
+
+
+def get_column_metadata(
+    column: pd.Series | pd.Index, name: str, arrow_type: DataType, field_name: str
+) -> _ColumnMetadata: ...
+
+
+def construct_metadata(
+    columns_to_convert: list[pd.Series],
+    df: pd.DataFrame,
+    column_names: list[str],
+    index_levels: list[pd.Index],
+    index_descriptors: list[dict],
+    preserve_index: bool,
+    types: list[DataType],
+    column_field_names: list[str] = ...,
+) -> dict[bytes, bytes]: ...
+
+
+def dataframe_to_types(
+    df: pd.DataFrame, preserve_index: bool | None, columns: list[str] | None = None
+) -> tuple[list[str], list[DataType], dict[bytes, bytes]]: ...
+
+
+def dataframe_to_arrays(
+    df: pd.DataFrame,
+    schema: Schema,
+    preserve_index: bool | None,
+    nthreads: int = 1,
+    columns: list[str] | None = None,
+    safe: bool = True,
+) -> tuple[Array, Schema, int]: ...
+def get_datetimetz_type(values: _T, dtype, type_) -> tuple[_T, DataType]: ...
+def make_datetimetz(unit: str, tz: str) -> DatetimeTZDtype: ...
+
+
+def table_to_dataframe(
+    options, table: Table, categories=None, ignore_metadata: bool = False, types_mapper=None
+) -> pd.DataFrame: ...
+def make_tz_aware(series: pd.Series, tz: str) -> pd.Series: ...
diff --git a/python/pyarrow-stubs/pandas_shim.pyi b/python/pyarrow-stubs/pandas_shim.pyi
new file mode 100644
index 00000000000..e62767b1591
--- /dev/null
+++ b/python/pyarrow-stubs/pandas_shim.pyi
@@ -0,0 +1,68 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from types import ModuleType
+from typing import Any, Iterable, TypeGuard
+
+from pandas import Categorical, DatetimeTZDtype, Index, Series, DataFrame
+
+from numpy import dtype
+from pandas.core.dtypes.base import ExtensionDtype
+
+class _PandasAPIShim:
+    has_sparse: bool
+
+    def series(self, *args, **kwargs) -> Series: ...
+    def data_frame(self, *args, **kwargs) -> DataFrame: ...
+    @property
+    def have_pandas(self) -> bool: ...
+    @property
+    def compat(self) -> ModuleType: ...
+    @property
+    def pd(self) -> ModuleType: ...
+    def infer_dtype(self, obj: Iterable) -> str: ...
+    def pandas_dtype(self, dtype: str) -> dtype: ...
+    @property
+    def loose_version(self) -> Any: ...
+    @property
+    def version(self) -> str: ...
+    def is_v1(self) -> bool: ...
+    def is_ge_v21(self) -> bool: ...
+    def is_ge_v23(self) -> bool: ...
+    def is_ge_v3(self) -> bool: ...
+    @property
+    def categorical_type(self) -> type[Categorical]: ...
+    @property
+    def datetimetz_type(self) -> type[DatetimeTZDtype]: ...
+    @property
+    def extension_dtype(self) -> type[ExtensionDtype]: ...
+    def is_array_like(
+        self, obj: Any
+    ) -> TypeGuard[Series | Index | Categorical | ExtensionDtype]: ...
+    def is_categorical(self, obj: Any) -> TypeGuard[Categorical]: ...
+    def is_datetimetz(self, obj: Any) -> TypeGuard[DatetimeTZDtype]: ...
+    def is_extension_array_dtype(self, obj: Any) -> TypeGuard[ExtensionDtype]: ...
+    def is_sparse(self, obj: Any) -> bool: ...
+    def is_data_frame(self, obj: Any) -> TypeGuard[DataFrame]: ...
+    def is_series(self, obj: Any) -> TypeGuard[Series]: ...
+    def is_index(self, obj: Any) -> TypeGuard[Index]: ...
+    def get_values(self, obj: Any) -> bool: ...
+    def get_rangeindex_attribute(self, level, name): ...
+
+_pandas_api: _PandasAPIShim
+
+__all__ = ["_PandasAPIShim", "_pandas_api"]
diff --git a/python/pyarrow-stubs/parquet/__init__.pyi b/python/pyarrow-stubs/parquet/__init__.pyi
new file mode 100644
index 00000000000..8d0b5374ea0
--- /dev/null
+++ b/python/pyarrow-stubs/parquet/__init__.pyi
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from .core import *  # noqa
diff --git a/python/pyarrow-stubs/parquet/core.pyi b/python/pyarrow-stubs/parquet/core.pyi
new file mode 100644
index 00000000000..8cb4f152ff7
--- /dev/null
+++ b/python/pyarrow-stubs/parquet/core.pyi
@@ -0,0 +1,355 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+from pathlib import Path
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import IO, Callable, Iterator, Literal, Sequence
+
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+from pyarrow import _parquet
+from pyarrow._compute import Expression
+from pyarrow._fs import FileSystem, SupportedFileSystem
+from pyarrow._parquet import (
+    ColumnChunkMetaData,
+    ColumnSchema,
+    FileDecryptionProperties,
+    FileEncryptionProperties,
+    FileMetaData,
+    ParquetLogicalType,
+    ParquetReader,
+    ParquetSchema,
+    RowGroupMetaData,
+    SortingColumn,
+    Statistics,
+)
+from pyarrow._stubs_typing import FilterTuple, SingleOrList
+from pyarrow.dataset import ParquetFileFragment, Partitioning
+from pyarrow.lib import NativeFile, RecordBatch, Schema, Table
+from typing_extensions import deprecated
+
+__all__ = (
+    "ColumnChunkMetaData",
+    "ColumnSchema",
+    "FileDecryptionProperties",
+    "FileEncryptionProperties",
+    "FileMetaData",
+    "ParquetDataset",
+    "ParquetFile",
+    "ParquetLogicalType",
+    "ParquetReader",
+    "ParquetSchema",
+    "ParquetWriter",
+    "RowGroupMetaData",
+    "SortingColumn",
+    "Statistics",
+    "read_metadata",
+    "read_pandas",
+    "read_schema",
+    "read_table",
+    "write_metadata",
+    "write_table",
+    "write_to_dataset",
+    "_filters_to_expression",
+    "filters_to_expression",
+)
+
+
+def filters_to_expression(
+    filters: list[FilterTuple | list[FilterTuple]]) -> Expression: ...
+
+
+@deprecated("use filters_to_expression")
+def _filters_to_expression(
+    filters: list[FilterTuple | list[FilterTuple]]) -> Expression: ...
+
+
+_Compression: TypeAlias = Literal["gzip", "bz2",
+                                  "brotli", "lz4", "zstd", "snappy", "none"]
+
+
+class ParquetFile:
+    reader: ParquetReader
+    common_metadata: FileMetaData
+
+    def __init__(
+        self,
+        source: str | Path | NativeFile | IO,
+        *,
+        metadata: FileMetaData | None = None,
+        common_metadata: FileMetaData | None = None,
+        read_dictionary: list[str] | None = None,
+        memory_map: bool = False,
+        buffer_size: int = 0,
+        pre_buffer: bool = False,
+        coerce_int96_timestamp_unit: str | None = None,
+        decryption_properties: FileDecryptionProperties | None = None,
+        thrift_string_size_limit: int | None = None,
+        thrift_container_size_limit: int | None = None,
+        filesystem: SupportedFileSystem | None = None,
+        page_checksum_verification: bool = False,
+    ): ...
+    def __enter__(self) -> Self: ...
+    def __exit__(self, *args, **kwargs) -> None: ...
+    @property
+    def metadata(self) -> FileMetaData: ...
+    @property
+    def schema(self) -> ParquetSchema: ...
+    @property
+    def schema_arrow(self) -> Schema: ...
+    @property
+    def num_row_groups(self) -> int: ...
+    def close(self, force: bool = False) -> None: ...
+    @property
+    def closed(self) -> bool: ...
+
+    def read_row_group(
+        self,
+        i: int,
+        columns: list | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Table: ...
+
+    def read_row_groups(
+        self,
+        row_groups: list,
+        columns: list | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Table: ...
+
+    def iter_batches(
+        self,
+        batch_size: int = 65536,
+        row_groups: list | None = None,
+        columns: list | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Iterator[RecordBatch]: ...
+
+    def read(
+        self,
+        columns: list | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Table: ...
+    def scan_contents(self, columns: list | None = None,
+                      batch_size: int = 65536) -> int: ...
+
+
+class ParquetWriter:
+    flavor: str
+    schema_changed: bool
+    schema: ParquetSchema
+    where: str | Path | IO
+    file_handler: NativeFile | None
+    writer: _parquet.ParquetWriter
+    is_open: bool
+
+    def __init__(
+        self,
+        where: str | Path | IO | NativeFile,
+        schema: Schema,
+        filesystem: SupportedFileSystem | None = None,
+        flavor: str | None = None,
+        version: Literal["1.0", "2.4", "2.6"] = ...,
+        use_dictionary: bool = True,
+        compression: _Compression | dict[str, _Compression] = "snappy",
+        write_statistics: bool | list = True,
+        use_deprecated_int96_timestamps: bool | None = None,
+        compression_level: int | dict | None = None,
+        use_byte_stream_split: bool | list = False,
+        column_encoding: str | dict | None = None,
+        writer_engine_version=None,
+        data_page_version: Literal["1.0", "2.0"] = ...,
+        use_compliant_nested_type: bool = True,
+        encryption_properties: FileEncryptionProperties | None = None,
+        write_batch_size: int | None = None,
+        dictionary_pagesize_limit: int | None = None,
+        store_schema: bool = True,
+        write_page_index: bool = False,
+        write_page_checksum: bool = False,
+        sorting_columns: Sequence[SortingColumn] | None = None,
+        store_decimal_as_integer: bool = False,
+        **options,
+    ) -> None: ...
+    def __enter__(self) -> Self: ...
+    def __exit__(self, *args, **kwargs) -> Literal[False]: ...
+
+    def write(
+        self, table_or_batch: RecordBatch | Table, row_group_size: int | None = None
+    ) -> None: ...
+    def write_batch(self, batch: RecordBatch,
+                    row_group_size: int | None = None) -> None: ...
+
+    def write_table(self, table: Table, row_group_size: int | None = None) -> None: ...
+    def close(self) -> None: ...
+    def add_key_value_metadata(self, key_value_metadata: dict[str, str]) -> None: ...
+
+
+class ParquetDataset:
+    def __init__(
+        self,
+        path_or_paths: SingleOrList[str]
+        | SingleOrList[Path]
+        | SingleOrList[NativeFile]
+        | SingleOrList[IO],
+        filesystem: SupportedFileSystem | None = None,
+        schema: Schema | None = None,
+        *,
+        filters: Expression | FilterTuple | list[FilterTuple] | None = None,
+        read_dictionary: list[str] | None = None,
+        memory_map: bool = False,
+        buffer_size: int = 0,
+        partitioning: str | list[str] | Partitioning | None = "hive",
+        ignore_prefixes: list[str] | None = None,
+        pre_buffer: bool = True,
+        coerce_int96_timestamp_unit: str | None = None,
+        decryption_properties: FileDecryptionProperties | None = None,
+        thrift_string_size_limit: int | None = None,
+        thrift_container_size_limit: int | None = None,
+        page_checksum_verification: bool = False,
+    ): ...
+    def equals(self, other: ParquetDataset) -> bool: ...
+    @property
+    def schema(self) -> Schema: ...
+
+    def read(
+        self,
+        columns: list[str] | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Table: ...
+    def read_pandas(self, **kwargs) -> Table: ...
+    @property
+    def fragments(self) -> list[ParquetFileFragment]: ...
+    @property
+    def files(self) -> list[str]: ...
+    @property
+    def filesystem(self) -> FileSystem: ...
+    @property
+    def partitioning(self) -> Partitioning: ...
+
+
+def read_table(
+    source: SingleOrList[str] | SingleOrList[Path] | SingleOrList[NativeFile] | SingleOrList[IO],
+    *,
+    columns: list | None = None,
+    use_threads: bool = True,
+    schema: Schema | None = None,
+    use_pandas_metadata: bool = False,
+    read_dictionary: list[str] | None = None,
+    memory_map: bool = False,
+    buffer_size: int = 0,
+    partitioning: str | list[str] | Partitioning | None = "hive",
+    filesystem: SupportedFileSystem | None = None,
+    filters: Expression | FilterTuple | list[FilterTuple] | None = None,
+    ignore_prefixes: list[str] | None = None,
+    pre_buffer: bool = True,
+    coerce_int96_timestamp_unit: str | None = None,
+    decryption_properties: FileDecryptionProperties | None = None,
+    thrift_string_size_limit: int | None = None,
+    thrift_container_size_limit: int | None = None,
+    page_checksum_verification: bool = False,
+) -> Table: ...
+
+
+def read_pandas(
+    source: str | Path | NativeFile | IO, columns: list | None = None, **kwargs
+) -> Table: ...
+
+
+def write_table(
+    table: Table,
+    where: str | Path | NativeFile | IO,
+    row_group_size: int | None = None,
+    version: Literal["1.0", "2.4", "2.6"] = "2.6",
+    use_dictionary: bool = True,
+    compression: _Compression | dict[str, _Compression] = "snappy",
+    write_statistics: bool | list = True,
+    use_deprecated_int96_timestamps: bool | None = None,
+    coerce_timestamps: str | None = None,
+    allow_truncated_timestamps: bool = False,
+    data_page_size: int | None = None,
+    flavor: str | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    compression_level: int | dict | None = None,
+    use_byte_stream_split: bool = False,
+    column_encoding: str | dict | None = None,
+    data_page_version: Literal["1.0", "2.0"] = ...,
+    use_compliant_nested_type: bool = True,
+    encryption_properties: FileEncryptionProperties | None = None,
+    write_batch_size: int | None = None,
+    dictionary_pagesize_limit: int | None = None,
+    store_schema: bool = True,
+    write_page_index: bool = False,
+    write_page_checksum: bool = False,
+    sorting_columns: Sequence[SortingColumn] | None = None,
+    store_decimal_as_integer: bool = False,
+    **kwargs,
+) -> None: ...
+
+
+def write_to_dataset(
+    table: Table,
+    root_path: str | Path,
+    partition_cols: list[str] | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    schema: Schema | None = None,
+    partitioning: Partitioning | list[str] | None = None,
+    basename_template: str | None = None,
+    use_threads: bool | None = None,
+    file_visitor: Callable[[str], None] | None = None,
+    existing_data_behavior: Literal["overwrite_or_ignore", "error", "delete_matching"]
+    | None = None,
+    **kwargs,
+) -> None: ...
+
+
+def write_metadata(
+    schema: Schema,
+    where: str | NativeFile,
+    metadata_collector: list[FileMetaData] | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    **kwargs,
+) -> None: ...
+
+
+def read_metadata(
+    where: str | Path | IO | NativeFile,
+    memory_map: bool = False,
+    decryption_properties: FileDecryptionProperties | None = None,
+    filesystem: SupportedFileSystem | None = None,
+) -> FileMetaData: ...
+
+
+def read_schema(
+    where: str | Path | IO | NativeFile,
+    memory_map: bool = False,
+    decryption_properties: FileDecryptionProperties | None = None,
+    filesystem: SupportedFileSystem | None = None,
+) -> Schema: ...
diff --git a/python/pyarrow-stubs/parquet/encryption.pyi b/python/pyarrow-stubs/parquet/encryption.pyi
new file mode 100644
index 00000000000..fe9a454e593
--- /dev/null
+++ b/python/pyarrow-stubs/parquet/encryption.pyi
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow._parquet_encryption import (
+    CryptoFactory,
+    DecryptionConfiguration,
+    EncryptionConfiguration,
+    KmsClient,
+    KmsConnectionConfig,
+)
+
+__all__ = [
+    "CryptoFactory",
+    "DecryptionConfiguration",
+    "EncryptionConfiguration",
+    "KmsClient",
+    "KmsConnectionConfig",
+]
diff --git a/python/pyarrow-stubs/py.typed b/python/pyarrow-stubs/py.typed
new file mode 100644
index 00000000000..13a83393a91
--- /dev/null
+++ b/python/pyarrow-stubs/py.typed
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/python/pyarrow-stubs/scalar.pyi b/python/pyarrow-stubs/scalar.pyi
new file mode 100644
index 00000000000..4563b97fef7
--- /dev/null
+++ b/python/pyarrow-stubs/scalar.pyi
@@ -0,0 +1,391 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import collections.abc
+import datetime as dt
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+from typing import Any, Generic, Iterator, Literal
+
+import numpy as np
+
+from pyarrow._compute import CastOptions  # type: ignore[import-not-found]
+from pyarrow.lib import Array, Buffer, MemoryPool, MonthDayNano, Tensor, _Weakrefable
+from typing_extensions import  TypeVar
+
+from ._types import (
+    _DataTypeT,
+    _Time32Unit,
+    _Time64Unit,
+    _Tz,
+    _Unit,
+    DataType,
+    ListType,
+    LargeListType,
+    ListViewType,
+    LargeListViewType,
+    FixedSizeListType,
+)
+from ._types import (
+    Decimal256Type, _Precision, _Scale, NullType, BoolType, UInt8Type, Int8Type,
+    UInt16Type, Int16Type, Uint32Type, Int32Type, UInt64Type, Int64Type,
+    Float16Type, Float32Type, Float64Type, Decimal32Type, Decimal64Type,
+    Decimal128Type, Date32Type, Date64Type, Time32Type, Time64Type, TimestampType,
+    _Size, DurationType, MonthDayNanoIntervalType, BinaryType, LargeBinaryType,
+    FixedSizeBinaryType, StringType, LargeStringType, BinaryViewType, StringViewType,
+    StructType, _K, _ValueT, _IndexT, _BasicValueT, RunEndEncodedType, _RunEndType,
+    UnionType, ExtensionType, BaseExtensionType, Bool8Type, UuidType, JsonType,
+    OpaqueType, DictionaryType, MapType, _BasicDataType,
+)
+
+_AsPyTypeK = TypeVar("_AsPyTypeK")
+_AsPyTypeV = TypeVar("_AsPyTypeV")
+_DataType_co = TypeVar("_DataType_co", bound=DataType, covariant=True)
+
+class Scalar(_Weakrefable, Generic[_DataType_co]):
+
+    @property
+    def type(self) -> _DataType_co: ...
+
+    @property
+    def is_valid(self) -> bool: ...
+
+    def cast(
+        self,
+        target_type: None | _DataTypeT,
+        safe: bool = True,
+        options: CastOptions | None = None,
+        memory_pool: MemoryPool | None = None,
+    ) -> Self | Scalar[_DataTypeT]: ...
+
+    def validate(self, *, full: bool = False) -> None: ...
+
+    def equals(self, other: Scalar) -> bool: ...
+
+    def __hash__(self) -> int: ...
+
+    def as_py(self: Scalar[Any], *, maps_as_pydicts: Literal["lossy", "strict"] | None = None) -> Any: ...
+
+
+_NULL: TypeAlias = None
+NA = _NULL
+
+class NullScalar(Scalar[NullType]): ...
+
+class BooleanScalar(Scalar[BoolType]): ...
+
+class UInt8Scalar(Scalar[UInt8Type]): ...
+
+class Int8Scalar(Scalar[Int8Type]): ...
+
+class UInt16Scalar(Scalar[UInt16Type]): ...
+
+class Int16Scalar(Scalar[Int16Type]): ...
+
+class UInt32Scalar(Scalar[Uint32Type]): ...
+
+class Int32Scalar(Scalar[Int32Type]): ...
+
+class UInt64Scalar(Scalar[UInt64Type]): ...
+
+class Int64Scalar(Scalar[Int64Type]): ...
+
+class HalfFloatScalar(Scalar[Float16Type]): ...
+
+class FloatScalar(Scalar[Float32Type]): ...
+
+class DoubleScalar(Scalar[Float64Type]): ...
+
+class Decimal32Scalar(Scalar[Decimal32Type[_Precision, _Scale]]): ...
+
+class Decimal64Scalar(Scalar[Decimal64Type[_Precision, _Scale]]): ...
+
+class Decimal128Scalar(Scalar[Decimal128Type[_Precision, _Scale]]): ...
+
+class Decimal256Scalar(Scalar[Decimal256Type[_Precision, _Scale]]): ...
+
+class Date32Scalar(Scalar[Date32Type]): ...
+
+
+class Date64Scalar(Scalar[Date64Type]):
+
+    @property
+    def value(self) -> dt.date | None: ...
+
+class Time32Scalar(Scalar[Time32Type[_Time32Unit]]):
+
+    @property
+    def value(self) -> dt.time | None: ...
+
+class Time64Scalar(Scalar[Time64Type[_Time64Unit]]):
+
+    @property
+    def value(self) -> dt.time | None: ...
+
+class TimestampScalar(Scalar[TimestampType[_Unit, _Tz]]):
+
+    @property
+    def value(self) -> int | None: ...
+
+class DurationScalar(Scalar[DurationType[_Unit]]):
+
+    @property
+    def value(self) -> dt.timedelta | None: ...
+
+class MonthDayNanoIntervalScalar(Scalar[MonthDayNanoIntervalType]):
+
+    @property
+    def value(self) -> MonthDayNano | None: ...
+
+
+class BinaryScalar(Scalar[BinaryType]):
+
+    def as_buffer(self) -> Buffer: ...
+
+
+class LargeBinaryScalar(Scalar[LargeBinaryType]):
+
+    def as_buffer(self) -> Buffer: ...
+
+
+class FixedSizeBinaryScalar(Scalar[FixedSizeBinaryType]):
+
+    def as_buffer(self) -> Buffer: ...
+
+
+class StringScalar(Scalar[StringType]):
+
+    def as_buffer(self) -> Buffer: ...
+
+
+class LargeStringScalar(Scalar[LargeStringType]):
+
+    def as_buffer(self) -> Buffer: ...
+
+
+class BinaryViewScalar(Scalar[BinaryViewType]):
+
+    def as_buffer(self) -> Buffer: ...
+
+
+class StringViewScalar(Scalar[StringViewType]):
+
+    def as_buffer(self) -> Buffer: ...
+
+
+class ListScalar(Scalar[ListType[_DataTypeT]]):
+
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
+
+    def __iter__(self) -> Iterator[Array]: ...
+
+
+class FixedSizeListScalar(Scalar[FixedSizeListType[_DataTypeT, _Size]]):
+
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
+
+    def __iter__(self) -> Iterator[Array]: ...
+
+
+class LargeListScalar(Scalar[LargeListType[_DataTypeT]]):
+
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
+
+    def __iter__(self) -> Iterator[Array]: ...
+
+
+class ListViewScalar(Scalar[ListViewType[_DataTypeT]]):
+
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
+
+    def __iter__(self) -> Iterator[Array]: ...
+
+
+class LargeListViewScalar(Scalar[LargeListViewType[_DataTypeT]]):
+
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
+
+    def __iter__(self) -> Iterator[Array]: ...
+
+
+class StructScalar(Scalar[StructType], collections.abc.Mapping[str, Scalar]):
+
+    def __len__(self) -> int: ...
+
+    def __iter__(self) -> Iterator[str]: ...
+
+    def __getitem__(self, key: int | str) -> Scalar[Any]: ...
+
+    def _as_py_tuple(self) -> list[tuple[str, Any]]: ...
+
+class MapScalar(Scalar[MapType[_K, _ValueT]]):
+
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+
+    def __getitem__(self, i: int) -> tuple[Scalar[_K], _ValueT, Any]: ...
+
+    def __iter__(
+        self: Scalar[
+            MapType[_BasicDataType[_AsPyTypeK], _BasicDataType[_AsPyTypeV]],]
+            | Scalar[MapType[Any, _BasicDataType[_AsPyTypeV]]]
+            | Scalar[MapType[_BasicDataType[_AsPyTypeK], Any]]
+    ) -> Iterator[tuple[_AsPyTypeK, _AsPyTypeV]] | Iterator[tuple[Any, _AsPyTypeV]] | Iterator[tuple[_AsPyTypeK, Any]]: ...
+
+
+class DictionaryScalar(Scalar[DictionaryType[_IndexT, _BasicValueT]]):
+
+    @property
+    def index(self) -> Scalar[_IndexT]: ...
+
+    @property
+    def value(self) -> Scalar[_BasicValueT]: ...
+
+    @property
+    def dictionary(self) -> Array: ...
+
+class RunEndEncodedScalar(Scalar[RunEndEncodedType[_RunEndType, _BasicValueT]]):
+
+    @property
+    def value(self) -> tuple[int, _BasicValueT] | None: ...
+
+
+class UnionScalar(Scalar[UnionType]):
+
+    @property
+    def value(self) -> Any | None: ...
+
+    @property
+    def type_code(self) -> str: ...
+
+
+class ExtensionScalar(Scalar[ExtensionType]):
+
+    @property
+    def value(self) -> Any | None: ...
+
+    @staticmethod
+    def from_storage(typ: BaseExtensionType, value) -> ExtensionScalar: ...
+
+
+class Bool8Scalar(Scalar[Bool8Type]): ...
+
+class UuidScalar(Scalar[UuidType]): ...
+
+class JsonScalar(Scalar[JsonType]): ...
+
+class OpaqueScalar(Scalar[OpaqueType]): ...
+
+
+class FixedShapeTensorScalar(ExtensionScalar):
+
+    def to_numpy(self) -> np.ndarray: ...
+
+    def to_tensor(self) -> Tensor: ...
+
+
+def scalar(
+    value: Any,
+    type: _DataTypeT,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Scalar[_DataTypeT]: ...
+
+
+__all__ = [
+    "Scalar",
+    "_NULL",
+    "NA",
+    "NullScalar",
+    "BooleanScalar",
+    "UInt8Scalar",
+    "Int8Scalar",
+    "UInt16Scalar",
+    "Int16Scalar",
+    "UInt32Scalar",
+    "Int32Scalar",
+    "UInt64Scalar",
+    "Int64Scalar",
+    "HalfFloatScalar",
+    "FloatScalar",
+    "DoubleScalar",
+    "Decimal32Scalar",
+    "Decimal64Scalar",
+    "Decimal128Scalar",
+    "Decimal256Scalar",
+    "Date32Scalar",
+    "Date64Scalar",
+    "Time32Scalar",
+    "Time64Scalar",
+    "TimestampScalar",
+    "DurationScalar",
+    "MonthDayNanoIntervalScalar",
+    "BinaryScalar",
+    "LargeBinaryScalar",
+    "FixedSizeBinaryScalar",
+    "StringScalar",
+    "LargeStringScalar",
+    "BinaryViewScalar",
+    "StringViewScalar",
+    "ListScalar",
+    "FixedSizeListScalar",
+    "LargeListScalar",
+    "ListViewScalar",
+    "LargeListViewScalar",
+    "StructScalar",
+    "MapScalar",
+    "DictionaryScalar",
+    "RunEndEncodedScalar",
+    "UnionScalar",
+    "ExtensionScalar",
+    "FixedShapeTensorScalar",
+    "Bool8Scalar",
+    "UuidScalar",
+    "JsonScalar",
+    "OpaqueScalar",
+    "scalar",
+]
diff --git a/python/pyarrow-stubs/substrait.pyi b/python/pyarrow-stubs/substrait.pyi
new file mode 100644
index 00000000000..b78bbd8aebd
--- /dev/null
+++ b/python/pyarrow-stubs/substrait.pyi
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow._substrait import (
+    BoundExpressions,
+    SubstraitSchema,
+    deserialize_expressions,
+    deserialize_schema,
+    get_supported_functions,
+    run_query,
+    serialize_expressions,
+    serialize_schema,
+)
+
+__all__ = [
+    "BoundExpressions",
+    "get_supported_functions",
+    "run_query",
+    "deserialize_expressions",
+    "serialize_expressions",
+    "deserialize_schema",
+    "serialize_schema",
+    "SubstraitSchema",
+]
diff --git a/python/pyarrow-stubs/table.pyi b/python/pyarrow-stubs/table.pyi
new file mode 100644
index 00000000000..29784d274df
--- /dev/null
+++ b/python/pyarrow-stubs/table.pyi
@@ -0,0 +1,653 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import datetime as dt
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+from typing import (
+    Any,
+    Collection,
+    Generator,
+    Generic,
+    Iterable,
+    Iterator,
+    Literal,
+    Mapping,
+    Sequence,
+    TypeVar,
+)
+import builtins
+
+import numpy as np
+import pandas as pd
+
+from numpy.typing import NDArray
+from pyarrow._compute import (
+    CastOptions,
+    CountOptions,
+    FunctionOptions,
+    ScalarAggregateOptions,
+    TDigestOptions,
+    VarianceOptions,
+)
+from pyarrow._stubs_typing import (
+    Indices,
+    Mask,
+    NullEncoding,
+    NullSelectionBehavior,
+    Order,
+    SupportArrowArray,
+    SupportArrowDeviceArray,
+    SupportArrowStream,
+)
+from pyarrow.compute import ArrayOrChunkedArray, Expression
+from pyarrow.interchange.dataframe import _PyArrowDataFrame
+from pyarrow.lib import Device, MemoryManager, MemoryPool, Schema
+from pyarrow.lib import Field as _Field
+
+from .array import Array, StructArray, _CastAs, _PandasConvertible
+from .device import DeviceAllocationType
+from .io import Buffer
+from ._ipc import RecordBatchReader
+from .scalar import BooleanScalar, Int64Scalar, Scalar, StructScalar
+from .tensor import Tensor
+from ._stubs_typing import NullableCollection
+from ._types import DataType, _AsPyType, _BasicDataType, _DataTypeT
+
+Field: TypeAlias = _Field[DataType]
+_ScalarT = TypeVar("_ScalarT", bound=Scalar)
+_Scalar_co = TypeVar("_Scalar_co", bound=Scalar, covariant=True)
+
+_Aggregation: TypeAlias = Literal[
+    "all",
+    "any",
+    "approximate_median",
+    "count",
+    "count_all",
+    "count_distinct",
+    "distinct",
+    "first",
+    "first_last",
+    "last",
+    "list",
+    "max",
+    "mean",
+    "min",
+    "min_max",
+    "one",
+    "product",
+    "stddev",
+    "sum",
+    "tdigest",
+    "variance",
+]
+_AggregationPrefixed: TypeAlias = Literal[
+    "hash_all",
+    "hash_any",
+    "hash_approximate_median",
+    "hash_count",
+    "hash_count_all",
+    "hash_count_distinct",
+    "hash_distinct",
+    "hash_first",
+    "hash_first_last",
+    "hash_last",
+    "hash_list",
+    "hash_max",
+    "hash_mean",
+    "hash_min",
+    "hash_min_max",
+    "hash_one",
+    "hash_product",
+    "hash_stddev",
+    "hash_sum",
+    "hash_tdigest",
+    "hash_variance",
+]
+Aggregation: TypeAlias = _Aggregation | _AggregationPrefixed
+AggregateOptions: TypeAlias = (
+    ScalarAggregateOptions | CountOptions | TDigestOptions | VarianceOptions | FunctionOptions
+)
+
+UnarySelector: TypeAlias = str
+NullarySelector: TypeAlias = tuple[()]
+NarySelector: TypeAlias = list[str] | tuple[str, ...]
+ColumnSelector: TypeAlias = UnarySelector | NullarySelector | NarySelector
+
+class ChunkedArray(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
+
+
+    @property
+    def data(self) -> Self: ...
+    @property
+    def type(self: ChunkedArray[Scalar[_DataTypeT]]) -> _DataTypeT: ...
+
+    def length(self) -> int: ...
+
+    __len__ = length
+    def to_string(
+        self,
+        *,
+        indent: int = 0,
+        window: int = 5,
+        container_window: int = 2,
+        skip_new_lines: bool = False,
+    ) -> str: ...
+
+    format = to_string
+    def validate(self, *, full: bool = False) -> None: ...
+
+    @property
+    def null_count(self) -> int: ...
+
+    @property
+    def nbytes(self) -> int: ...
+
+    def get_total_buffer_size(self) -> int: ...
+
+    def __sizeof__(self) -> int: ...
+    def __getitem__(self, key: int | builtins.slice) -> Self | _Scalar_co: ...
+
+    def getitem(self, i: int) -> Scalar: ...
+    def is_null(self, *, nan_is_null: bool = False) -> ChunkedArray[BooleanScalar]: ...
+
+    def is_nan(self) -> ChunkedArray[BooleanScalar]: ...
+
+    def is_valid(self) -> ChunkedArray[BooleanScalar]: ...
+
+    def fill_null(self, fill_value: Scalar[_DataTypeT]) -> Self: ...
+
+    def equals(self, other: Self) -> bool: ...
+
+    def to_numpy(self, zero_copy_only: bool = False) -> np.ndarray: ...
+
+    def __array__(self, dtype: np.dtype | None = None, copy: bool | None = None) -> np.ndarray: ...
+    def cast(
+        self,
+        target_type: None | _CastAs = None,
+        safe: bool | None = None,
+        options: CastOptions | None = None,
+    ) -> Self | ChunkedArray[Scalar[_CastAs]]: ...
+
+    def dictionary_encode(self, null_encoding: NullEncoding = "mask") -> Self: ...
+
+    def flatten(self, memory_pool: MemoryPool | None = None) -> list[ChunkedArray[Any]]: ...
+
+    def combine_chunks(self, memory_pool: MemoryPool | None = None) -> Array[_Scalar_co]: ...
+
+    def unique(self) -> ChunkedArray[_Scalar_co]: ...
+
+    def value_counts(self) -> StructArray: ...
+
+    def slice(self, offset: int = 0, length: int | None = None) -> Self: ...
+
+    def filter(self, mask: Mask, null_selection_behavior: NullSelectionBehavior = "drop") -> Self: ...
+
+    def index(
+        self: ChunkedArray[Scalar[_BasicDataType[_AsPyType]]],
+        value: Scalar[_DataTypeT] | _AsPyType,
+        start: int | None = None,
+        end: int | None = None,
+        *,
+        memory_pool: MemoryPool | None = None,
+    ) -> Int64Scalar: ...
+
+    def take(self, indices: Indices) -> Self: ...
+
+    def drop_null(self) -> Self: ...
+
+    def sort(self, order: Order = "ascending", **kwargs) -> Self: ...
+
+    def unify_dictionaries(self, memory_pool: MemoryPool | None = None) -> Self: ...
+
+    @property
+    def num_chunks(self) -> int: ...
+
+    def chunk(self, i: int) -> ChunkedArray[_Scalar_co]: ...
+
+    @property
+    def chunks(self) -> list[Array[_Scalar_co]]: ...
+
+    def iterchunks(
+        self: ArrayOrChunkedArray[_ScalarT],
+    ) -> Generator[Array, None, None]: ...
+
+    def __iter__(self) -> Iterator[_Scalar_co]: ...
+    def to_pylist(
+        self: ChunkedArray[Scalar[_BasicDataType[_AsPyType]]],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[_AsPyType | None]: ...
+
+    def __arrow_c_stream__(self, requested_schema=None) -> Any: ...
+
+    @classmethod
+    def _import_from_c_capsule(cls, stream) -> Self: ...
+
+    @property
+    def is_cpu(self) -> bool: ...
+
+
+def chunked_array(
+    arrays: Iterable[NullableCollection[Any]] | Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray] | Iterable[Array[_ScalarT]],
+    type: DataType | str | None = None,
+) -> ChunkedArray[Scalar[Any]] | ChunkedArray[_ScalarT]: ...
+
+
+_ColumnT = TypeVar("_ColumnT", bound=ArrayOrChunkedArray[Any])
+
+class _Tabular(_PandasConvertible[pd.DataFrame], Generic[_ColumnT]):
+    def __array__(self, dtype: np.dtype | None = None, copy: bool | None = None) -> np.ndarray: ...
+    def __dataframe__(
+        self, nan_as_null: bool = False, allow_copy: bool = True
+    ) -> _PyArrowDataFrame: ...
+
+    def __getitem__(self, key: int | str | slice) -> _ColumnT | Self: ...
+
+    def __len__(self) -> int: ...
+    def column(self, i: int | str) -> _ColumnT: ...
+
+    @property
+    def column_names(self) -> list[str]: ...
+
+    @property
+    def columns(self) -> list[_ColumnT]: ...
+
+    def drop_null(self) -> Self: ...
+
+    def field(self, i: int | str) -> Field: ...
+
+    @classmethod
+    def from_pydict(
+        cls,
+        mapping: Mapping[str, ArrayOrChunkedArray[Any] | list[Any] | np.ndarray],
+        schema: Schema | None = None,
+        metadata: Mapping[str | bytes, str | bytes] | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_pylist(
+        cls,
+        mapping: Sequence[Mapping[str, Any]],
+        schema: Schema | None = None,
+        metadata: Mapping[str | bytes, str | bytes] | None = None,
+    ) -> Self: ...
+
+    def itercolumns(self) -> Generator[_ColumnT, None, None]: ...
+
+    @property
+    def num_columns(self) -> int: ...
+    @property
+    def num_rows(self) -> int: ...
+    @property
+    def shape(self) -> tuple[int, int]: ...
+
+    @property
+    def schema(self) -> Schema: ...
+    @property
+    def nbytes(self) -> int: ...
+    def sort_by(self, sorting: str | list[tuple[str, Order]], **kwargs) -> Self: ...
+
+    def take(self, indices: Indices) -> Self: ...
+
+    def filter(
+        self, mask: Mask | Expression, null_selection_behavior: NullSelectionBehavior = "drop"
+    ) -> Self: ...
+
+    def to_pydict(
+        self, *, maps_as_pydicts: Literal["lossy", "strict"] | None = None
+    ) -> dict[str, list[Any]]: ...
+
+    def to_pylist(
+        self, *, maps_as_pydicts: Literal["lossy", "strict"] | None = None
+    ) -> list[dict[str, Any]]: ...
+
+    def to_string(self, *, show_metadata: bool = False, preview_cols: int = 0) -> str: ...
+
+    def remove_column(self, i: int) -> Self: ...
+    def drop_columns(self, columns: str | list[str]) -> Self: ...
+
+    def add_column(
+        self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list[list[Any]]
+    ) -> Self: ...
+    def append_column(
+        self, field_: str | Field, column: ArrayOrChunkedArray[Any] | list[list[Any]]
+    ) -> Self: ...
+
+
+class RecordBatch(_Tabular[Array]):
+
+
+    def validate(self, *, full: bool = False) -> None: ...
+
+    def replace_schema_metadata(
+        self, metadata: dict[str | bytes, str | bytes] | None = None
+    ) -> Self: ...
+
+    @property
+    def num_columns(self) -> int: ...
+
+
+    @property
+    def num_rows(self) -> int: ...
+
+    @property
+    def schema(self) -> Schema: ...
+
+    @property
+    def nbytes(self) -> int: ...
+
+    def get_total_buffer_size(self) -> int: ...
+
+
+    def __sizeof__(self) -> int: ...
+    def add_column(
+        self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list
+    ) -> Self: ...
+
+    def remove_column(self, i: int) -> Self: ...
+
+    def set_column(self, i: int, field_: str | Field, column: Array | list) -> Self: ...
+
+    def rename_columns(self, names: list[str] | dict[str, str]) -> Self: ...
+
+    def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer: ...
+
+    def slice(self, offset: int = 0, length: int | None = None) -> Self: ...
+
+    def equals(self, other: Self, check_metadata: bool = False) -> bool: ...
+
+    def select(self, columns: Iterable[str] | Iterable[int] | NDArray[np.str_]) -> Self: ...
+
+    def cast(
+        self, target_schema: Schema, safe: bool | None = None, options: CastOptions | None = None
+    ) -> Self: ...
+
+    @classmethod
+    def from_arrays(
+        cls,
+        arrays: Collection[Array],
+        names: list[str] | None = None,
+        schema: Schema | None = None,
+        metadata: Mapping[str | bytes, str | bytes] | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_pandas(
+        cls,
+        df: pd.DataFrame,
+        schema: Schema | None = None,
+        preserve_index: bool | None = None,
+        nthreads: int | None = None,
+        columns: list[str] | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_struct_array(
+        cls, struct_array: StructArray | ChunkedArray[StructScalar]
+    ) -> Self: ...
+
+    def to_struct_array(self) -> StructArray: ...
+
+    def to_tensor(
+        self,
+        null_to_nan: bool = False,
+        row_major: bool = True,
+        memory_pool: MemoryPool | None = None,
+    ) -> Tensor: ...
+
+    def _export_to_c(self, out_ptr: int, out_schema_ptr: int = 0): ...
+
+    @classmethod
+    def _import_from_c(cls, in_ptr: int, schema: Schema) -> Self: ...
+
+    def __arrow_c_array__(self, requested_schema=None): ...
+
+    def __arrow_c_stream__(self, requested_schema=None): ...
+
+    @classmethod
+    def _import_from_c_capsule(cls, schema_capsule, array_capsule) -> Self: ...
+
+    def _export_to_c_device(self, out_ptr: int, out_schema_ptr: int = 0) -> None: ...
+
+    @classmethod
+    def _import_from_c_device(cls, in_ptr: int, schema: Schema) -> Self: ...
+
+    def __arrow_c_device_array__(self, requested_schema=None, **kwargs): ...
+
+    @classmethod
+    def _import_from_c_device_capsule(cls, schema_capsule, array_capsule) -> Self: ...
+
+    @property
+    def device_type(self) -> DeviceAllocationType: ...
+
+    @property
+    def is_cpu(self) -> bool: ...
+
+    def copy_to(self, destination: MemoryManager | Device) -> Self: ...
+
+
+def table_to_blocks(options, table: Table, categories, extension_columns): ...
+
+JoinType: TypeAlias = Literal[
+    "left semi",
+    "right semi",
+    "left anti",
+    "right anti",
+    "inner",
+    "left outer",
+    "right outer",
+    "full outer",
+]
+
+class Table(_Tabular[ChunkedArray[Any]]):
+
+
+    def validate(self, *, full: bool = False) -> None: ...
+
+    def slice(self, offset: int = 0, length: int | None = None) -> Self: ...
+
+    def select(self, columns: Iterable[str] | Iterable[int] | NDArray[np.str_]) -> Self: ...
+
+    def replace_schema_metadata(
+        self, metadata: dict[str | bytes, str | bytes] | None = None
+    ) -> Self: ...
+
+    def flatten(self, memory_pool: MemoryPool | None = None) -> Self: ...
+
+    def combine_chunks(self, memory_pool: MemoryPool | None = None) -> Self: ...
+
+    def unify_dictionaries(self, memory_pool: MemoryPool | None = None) -> Self: ...
+
+    def equals(self, other: Self, check_metadata: bool = False) -> Self: ...
+
+    def cast(
+        self, target_schema: Schema, safe: bool | None = None, options: CastOptions | None = None
+    ) -> Self: ...
+
+    @classmethod
+    def from_pandas(
+        cls,
+        df: pd.DataFrame,
+        schema: Schema | None = None,
+        preserve_index: bool | None = None,
+        nthreads: int | None = None,
+        columns: list[str] | None = None,
+        safe: bool = True,
+    ) -> Self: ...
+
+    @classmethod
+    def from_arrays(
+        cls,
+        arrays: Collection[ArrayOrChunkedArray[Any]],
+        names: list[str] | None = None,
+        schema: Schema | None = None,
+        metadata: Mapping[str | bytes, str | bytes] | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_struct_array(
+        cls, struct_array: StructArray | ChunkedArray[StructScalar]
+    ) -> Self: ...
+
+    def to_struct_array(
+        self, max_chunksize: int | None = None
+    ) -> ChunkedArray[StructScalar]: ...
+
+    @classmethod
+    def from_batches(cls, batches: Iterable[RecordBatch], schema: Schema | None = None) -> Self: ...
+
+    def to_batches(self, max_chunksize: int | None = None) -> list[RecordBatch]: ...
+
+    def to_reader(self, max_chunksize: int | None = None) -> RecordBatchReader: ...
+
+    @property
+    def schema(self) -> Schema: ...
+
+    @property
+    def num_columns(self) -> int: ...
+
+    @property
+    def num_rows(self) -> int: ...
+
+    @property
+    def nbytes(self) -> int: ...
+
+    def get_total_buffer_size(self) -> int: ...
+
+    def __sizeof__(self) -> int: ...
+    def add_column(
+        self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list[list[Any]]
+    ) -> Self: ...
+
+    def remove_column(self, i: int) -> Self: ...
+
+    def set_column(
+        self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list[list[Any]]
+    ) -> Self: ...
+
+    def rename_columns(self, names: list[str] | dict[str, str]) -> Self: ...
+
+    def drop(self, columns: str | list[str]) -> Self: ...
+
+    def group_by(self, keys: str | list[str], use_threads: bool = True) -> TableGroupBy: ...
+
+    def join(
+        self,
+        right_table: Self,
+        keys: str | list[str],
+        right_keys: str | list[str] | None = None,
+        join_type: JoinType = "left outer",
+        left_suffix: str | None = None,
+        right_suffix: str | None = None,
+        coalesce_keys: bool = True,
+        use_threads: bool = True,
+    ) -> Self: ...
+
+    def join_asof(
+        self,
+        right_table: Self,
+        on: str,
+        by: str | list[str],
+        tolerance: int,
+        right_on: str | list[str] | None = None,
+        right_by: str | list[str] | None = None,
+    ) -> Self: ...
+
+    def __arrow_c_stream__(self, requested_schema=None): ...
+
+    @property
+    def is_cpu(self) -> bool: ...
+
+
+def record_batch(
+    data: dict[str, list[Any] | Array[Any]]
+    | Collection[Array[Any]]
+    | pd.DataFrame
+    | SupportArrowArray
+    | SupportArrowDeviceArray,
+    names: list[str] | None = None,
+    schema: Schema | None = None,
+    metadata: Mapping[str | bytes, str | bytes] | None = None,
+) -> RecordBatch: ...
+
+
+def table(
+    data: dict[str, list[Any] | Array[Any]]
+    | Collection[ArrayOrChunkedArray[Any]]
+    | pd.DataFrame
+    | SupportArrowArray
+    | SupportArrowStream
+    | SupportArrowDeviceArray,
+    names: list[str] | None = None,
+    schema: Schema | None = None,
+    metadata: Mapping[str | bytes, str | bytes] | None = None,
+    nthreads: int | None = None,
+) -> Table: ...
+
+
+def concat_tables(
+    tables: Iterable[Table],
+    memory_pool: MemoryPool | None = None,
+    promote_options: Literal["none", "default", "permissive"] = "none",
+    **kwargs: Any,
+) -> Table: ...
+
+
+class TableGroupBy:
+
+
+    keys: str | list[str]
+    def __init__(self, table: Table, keys: str | list[str], use_threads: bool = True): ...
+    def aggregate(
+        self,
+        aggregations: Iterable[
+            tuple[ColumnSelector, Aggregation]
+            | tuple[ColumnSelector, Aggregation, AggregateOptions | None]
+        ],
+    ) -> Table: ...
+
+    def _table(self) -> Table: ...
+    @property
+    def _use_threads(self) -> bool: ...
+
+def concat_batches(
+    recordbatches: Iterable[RecordBatch], memory_pool: MemoryPool | None = None
+) -> RecordBatch: ...
+
+
+__all__ = [
+    "ChunkedArray",
+    "chunked_array",
+    "_Tabular",
+    "RecordBatch",
+    "table_to_blocks",
+    "Table",
+    "record_batch",
+    "table",
+    "concat_tables",
+    "TableGroupBy",
+    "concat_batches",
+]
diff --git a/python/pyarrow-stubs/tensor.pyi b/python/pyarrow-stubs/tensor.pyi
new file mode 100644
index 00000000000..471f0ec1e98
--- /dev/null
+++ b/python/pyarrow-stubs/tensor.pyi
@@ -0,0 +1,253 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+import numpy as np
+
+from pyarrow.lib import _Weakrefable
+from scipy.sparse import coo_matrix, csr_matrix
+from sparse import COO  # type: ignore
+
+class Tensor(_Weakrefable):
+
+
+    @classmethod
+    def from_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self: ...
+
+    def to_numpy(self) -> np.ndarray: ...
+
+    def equals(self, other: Tensor) -> bool: ...
+
+    def dim_name(self, i: int) -> str: ...
+
+    @property
+    def dim_names(self) -> list[str]: ...
+
+    @property
+    def is_mutable(self) -> bool: ...
+
+    @property
+    def is_contiguous(self) -> bool: ...
+
+    @property
+    def ndim(self) -> int: ...
+
+    @property
+    def size(self) -> str: ...
+
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+
+    @property
+    def strides(self) -> tuple[int, ...]: ...
+
+
+class SparseCOOTensor(_Weakrefable):
+
+    @classmethod
+    def from_dense_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self: ...
+
+
+    @classmethod
+    def from_numpy(
+        cls,
+        data: np.ndarray,
+        coords: np.ndarray,
+        shape: tuple[int, ...],
+        dim_names: list[str] | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_scipy(cls, obj: csr_matrix, dim_names: list[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_pydata_sparse(cls, obj: COO, dim_names: list[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_tensor(cls, obj: Tensor) -> Self: ...
+
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray]: ...
+
+    def to_scipy(self) -> coo_matrix: ...
+
+    def to_pydata_sparse(self) -> COO: ...
+
+    def to_tensor(self) -> Tensor: ...
+
+    def equals(self, other: Self) -> bool: ...
+
+    @property
+    def is_mutable(self) -> bool: ...
+    @property
+    def ndim(self) -> int: ...
+    @property
+    def size(self) -> str: ...
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+    def dim_name(self, i: int) -> str: ...
+
+    @property
+    def dim_names(self) -> list[str]: ...
+    @property
+    def non_zero_length(self) -> int: ...
+    @property
+    def has_canonical_format(self) -> bool: ...
+
+class SparseCSRMatrix(_Weakrefable):
+
+
+    @classmethod
+    def from_dense_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_numpy(
+        cls,
+        data: np.ndarray,
+        indptr: np.ndarray,
+        indices: np.ndarray,
+        shape: tuple[int, ...],
+        dim_names: list[str] | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_scipy(cls, obj: csr_matrix, dim_names: list[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_tensor(cls, obj: Tensor) -> Self: ...
+
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]: ...
+
+    def to_scipy(self) -> csr_matrix: ...
+
+    def to_tensor(self) -> Tensor: ...
+
+    def equals(self, other: Self) -> bool: ...
+
+    @property
+    def is_mutable(self) -> bool: ...
+    @property
+    def ndim(self) -> int: ...
+    @property
+    def size(self) -> str: ...
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+    def dim_name(self, i: int) -> str: ...
+
+    @property
+    def dim_names(self) -> list[str]: ...
+    @property
+    def non_zero_length(self) -> int: ...
+
+class SparseCSCMatrix(_Weakrefable):
+
+
+    @classmethod
+    def from_dense_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_numpy(
+        cls,
+        data: np.ndarray,
+        indptr: np.ndarray,
+        indices: np.ndarray,
+        shape: tuple[int, ...],
+        dim_names: list[str] | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_scipy(cls, obj: csr_matrix, dim_names: list[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_tensor(cls, obj: Tensor) -> Self: ...
+
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]: ...
+
+    def to_scipy(self) -> csr_matrix: ...
+
+    def to_tensor(self) -> Tensor: ...
+
+    def equals(self, other: Self) -> bool: ...
+
+    @property
+    def is_mutable(self) -> bool: ...
+    @property
+    def ndim(self) -> int: ...
+    @property
+    def size(self) -> str: ...
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+    def dim_name(self, i: int) -> str: ...
+
+    @property
+    def dim_names(self) -> list[str]: ...
+    @property
+    def non_zero_length(self) -> int: ...
+
+class SparseCSFTensor(_Weakrefable):
+
+
+    @classmethod
+    def from_dense_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_numpy(
+        cls,
+        data: np.ndarray,
+        indptr: np.ndarray,
+        indices: np.ndarray,
+        shape: tuple[int, ...],
+        axis_order: list[int] | None = None,
+        dim_names: list[str] | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_tensor(cls, obj: Tensor) -> Self: ...
+
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]: ...
+
+    def to_tensor(self) -> Tensor: ...
+
+    def equals(self, other: Self) -> bool: ...
+
+    @property
+    def is_mutable(self) -> bool: ...
+    @property
+    def ndim(self) -> int: ...
+    @property
+    def size(self) -> str: ...
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+    def dim_name(self, i: int) -> str: ...
+
+    @property
+    def dim_names(self) -> list[str]: ...
+    @property
+    def non_zero_length(self) -> int: ...
+
+__all__ = [
+    "Tensor",
+    "SparseCOOTensor",
+    "SparseCSRMatrix",
+    "SparseCSCMatrix",
+    "SparseCSFTensor",
+]
diff --git a/python/pyarrow-stubs/types.pyi b/python/pyarrow-stubs/types.pyi
new file mode 100644
index 00000000000..def5e3771ab
--- /dev/null
+++ b/python/pyarrow-stubs/types.pyi
@@ -0,0 +1,217 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+from typing import Any
+
+if sys.version_info >= (3, 13):
+    from typing import TypeIs
+else:
+    from typing_extensions import TypeIs
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+from pyarrow.lib import (
+    BinaryType,
+    BinaryViewType,
+    BoolType,
+    DataType,
+    Date32Type,
+    Date64Type,
+    Decimal32Type,
+    Decimal64Type,
+    Decimal128Type,
+    Decimal256Type,
+    DenseUnionType,
+    DictionaryType,
+    DurationType,
+    FixedSizeBinaryType,
+    FixedSizeListType,
+    Float16Type,
+    Float32Type,
+    Float64Type,
+    Int8Type,
+    Int16Type,
+    Int32Type,
+    Int64Type,
+    LargeBinaryType,
+    LargeListType,
+    LargeListViewType,
+    LargeStringType,
+    ListType,
+    ListViewType,
+    MapType,
+    MonthDayNanoIntervalType,
+    NullType,
+    RunEndEncodedType,
+    SparseUnionType,
+    StringType,
+    StringViewType,
+    StructType,
+    Time32Type,
+    Time64Type,
+    TimestampType,
+    UInt8Type,
+    UInt16Type,
+    Uint32Type,
+    UInt64Type,
+)
+
+_SignedInteger: TypeAlias = Int8Type | Int16Type | Int32Type | Int64Type
+_UnsignedInteger: TypeAlias = UInt8Type | UInt16Type | Uint32Type | UInt64Type
+_Integer: TypeAlias = _SignedInteger | _UnsignedInteger
+_Floating: TypeAlias = Float16Type | Float32Type | Float64Type
+_Decimal: TypeAlias = (
+    Decimal32Type[Any, Any]
+    | Decimal64Type[Any, Any]
+    | Decimal128Type[Any, Any]
+    | Decimal256Type[Any, Any]
+)
+_Date: TypeAlias = Date32Type | Date64Type
+_Time: TypeAlias = Time32Type[Any] | Time64Type[Any]
+_Interval: TypeAlias = MonthDayNanoIntervalType
+_Temporal: TypeAlias = TimestampType[Any,
+                                     Any] | DurationType[Any] | _Time | _Date | _Interval
+_Union: TypeAlias = SparseUnionType | DenseUnionType
+_Nested: TypeAlias = (
+    ListType[Any]
+    | FixedSizeListType[Any, Any]
+    | LargeListType[Any]
+    | ListViewType[Any]
+    | LargeListViewType[Any]
+    | StructType
+    | MapType[Any, Any, Any]
+    | _Union
+)
+
+
+def is_null(t: DataType) -> TypeIs[NullType]: ...
+def is_boolean(t: DataType) -> TypeIs[BoolType]: ...
+def is_integer(t: DataType) -> TypeIs[_Integer]: ...
+def is_signed_integer(t: DataType) -> TypeIs[_SignedInteger]: ...
+def is_unsigned_integer(t: DataType) -> TypeIs[_UnsignedInteger]: ...
+def is_int8(t: DataType) -> TypeIs[Int8Type]: ...
+def is_int16(t: DataType) -> TypeIs[Int16Type]: ...
+def is_int32(t: DataType) -> TypeIs[Int32Type]: ...
+def is_int64(t: DataType) -> TypeIs[Int64Type]: ...
+def is_uint8(t: DataType) -> TypeIs[UInt8Type]: ...
+def is_uint16(t: DataType) -> TypeIs[UInt16Type]: ...
+def is_uint32(t: DataType) -> TypeIs[Uint32Type]: ...
+def is_uint64(t: DataType) -> TypeIs[UInt64Type]: ...
+def is_floating(t: DataType) -> TypeIs[_Floating]: ...
+def is_float16(t: DataType) -> TypeIs[Float16Type]: ...
+def is_float32(t: DataType) -> TypeIs[Float32Type]: ...
+def is_float64(t: DataType) -> TypeIs[Float64Type]: ...
+def is_list(t: DataType) -> TypeIs[ListType[Any]]: ...
+def is_large_list(t: DataType) -> TypeIs[LargeListType[Any]]: ...
+def is_fixed_size_list(t: DataType) -> TypeIs[FixedSizeListType[Any, Any]]: ...
+def is_list_view(t: DataType) -> TypeIs[ListViewType[Any]]: ...
+def is_large_list_view(t: DataType) -> TypeIs[LargeListViewType[Any]]: ...
+def is_struct(t: DataType) -> TypeIs[StructType]: ...
+def is_union(t: DataType) -> TypeIs[_Union]: ...
+def is_nested(t: DataType) -> TypeIs[_Nested]: ...
+def is_run_end_encoded(t: DataType) -> TypeIs[RunEndEncodedType[Any, Any]]: ...
+def is_temporal(t: DataType) -> TypeIs[_Temporal]: ...
+def is_timestamp(t: DataType) -> TypeIs[TimestampType[Any, Any]]: ...
+def is_duration(t: DataType) -> TypeIs[DurationType[Any]]: ...
+def is_time(t: DataType) -> TypeIs[_Time]: ...
+def is_time32(t: DataType) -> TypeIs[Time32Type[Any]]: ...
+def is_time64(t: DataType) -> TypeIs[Time64Type[Any]]: ...
+def is_binary(t: DataType) -> TypeIs[BinaryType]: ...
+def is_large_binary(t: DataType) -> TypeIs[LargeBinaryType]: ...
+def is_unicode(t: DataType) -> TypeIs[StringType]: ...
+def is_string(t: DataType) -> TypeIs[StringType]: ...
+def is_large_unicode(t: DataType) -> TypeIs[LargeStringType]: ...
+def is_large_string(t: DataType) -> TypeIs[LargeStringType]: ...
+def is_fixed_size_binary(t: DataType) -> TypeIs[FixedSizeBinaryType]: ...
+def is_binary_view(t: DataType) -> TypeIs[BinaryViewType]: ...
+def is_string_view(t: DataType) -> TypeIs[StringViewType]: ...
+def is_date(t: DataType) -> TypeIs[_Date]: ...
+def is_date32(t: DataType) -> TypeIs[Date32Type]: ...
+def is_date64(t: DataType) -> TypeIs[Date64Type]: ...
+def is_map(t: DataType) -> TypeIs[MapType[Any, Any, Any]]: ...
+def is_decimal(t: DataType) -> TypeIs[_Decimal]: ...
+def is_decimal32(t: DataType) -> TypeIs[Decimal32Type[Any, Any]]: ...
+def is_decimal64(t: DataType) -> TypeIs[Decimal64Type[Any, Any]]: ...
+def is_decimal128(t: DataType) -> TypeIs[Decimal128Type[Any, Any]]: ...
+def is_decimal256(t: DataType) -> TypeIs[Decimal256Type[Any, Any]]: ...
+def is_dictionary(t: DataType) -> TypeIs[DictionaryType[Any, Any, Any]]: ...
+def is_interval(t: DataType) -> TypeIs[_Interval]: ...
+def is_primitive(t: DataType) -> bool: ...
+def is_boolean_value(obj: Any) -> bool: ...
+def is_integer_value(obj: Any) -> bool: ...
+def is_float_value(obj: Any) -> bool: ...
+
+
+__all__ = [
+    "is_binary",
+    "is_binary_view",
+    "is_boolean",
+    "is_date",
+    "is_date32",
+    "is_date64",
+    "is_decimal",
+    "is_decimal128",
+    "is_decimal256",
+    "is_decimal32",
+    "is_decimal64",
+    "is_dictionary",
+    "is_duration",
+    "is_fixed_size_binary",
+    "is_fixed_size_list",
+    "is_float16",
+    "is_float32",
+    "is_float64",
+    "is_floating",
+    "is_int16",
+    "is_int32",
+    "is_int64",
+    "is_int8",
+    "is_integer",
+    "is_interval",
+    "is_large_binary",
+    "is_large_list",
+    "is_large_list_view",
+    "is_large_string",
+    "is_large_unicode",
+    "is_list",
+    "is_list_view",
+    "is_map",
+    "is_nested",
+    "is_null",
+    "is_primitive",
+    "is_run_end_encoded",
+    "is_signed_integer",
+    "is_string",
+    "is_string_view",
+    "is_struct",
+    "is_temporal",
+    "is_time",
+    "is_time32",
+    "is_time64",
+    "is_timestamp",
+    "is_uint16",
+    "is_uint32",
+    "is_uint64",
+    "is_uint8",
+    "is_unicode",
+    "is_union",
+    "is_unsigned_integer",
+]
diff --git a/python/pyarrow-stubs/util.pyi b/python/pyarrow-stubs/util.pyi
new file mode 100644
index 00000000000..db74524d77d
--- /dev/null
+++ b/python/pyarrow-stubs/util.pyi
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections.abc import Callable
+from os import PathLike
+from typing import Any, Protocol, Sequence, TypeVar
+
+_F = TypeVar("_F", bound=Callable)
+_N = TypeVar("_N")
+
+
+class _DocStringComponents(Protocol):
+    _docstring_components: list[str]
+
+
+def doc(
+    *docstrings: str | _DocStringComponents | Callable | None, **params: Any
+) -> Callable[[_F], _F]: ...
+def _is_iterable(obj) -> bool: ...
+def _is_path_like(path) -> bool: ...
+def _stringify_path(path: str | PathLike) -> str: ...
+def product(seq: Sequence[_N]) -> _N: ...
+
+
+def get_contiguous_span(
+    shape: tuple[int, ...], strides: tuple[int, ...], itemsize: int
+) -> tuple[int, int]: ...
+def find_free_port() -> int: ...
+def guid() -> str: ...
+def _download_urllib(url, out_path) -> None: ...
+def _download_requests(url, out_path) -> None: ...
+def download_tzdata_on_windows() -> None: ...
+def _deprecate_api(old_name, new_name, api, next_version, type=...): ...
+def _deprecate_class(old_name, new_class, next_version, instancecheck=True): ...

From 144aef59154c211c2906595aa684637c74ff303d Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Sat, 20 Sep 2025 20:21:14 +0200
Subject: [PATCH 2/4] Minor changes to pyarrow so some typechecks pass

---
 python/pyarrow/array.pxi           |  2 +-
 python/pyarrow/scalar.pxi          |  2 +-
 python/pyarrow/tests/strategies.py | 38 ++++++++++++++++--------------
 python/pyarrow/tests/test_array.py |  8 +++----
 python/pyarrow/tests/test_io.py    | 20 +++++++++-------
 5 files changed, 37 insertions(+), 33 deletions(-)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index bf5beab589d..109d8ebe597 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -3634,7 +3634,7 @@ cdef class FixedSizeListArray(BaseListArray):
         Or create from a values array, list size and matching type:
 
         >>> typ = pa.list_(pa.field("values", pa.int64()), 2)
-        >>> arr = pa.FixedSizeListArray.from_arrays(values,type=typ)
+        >>> arr = pa.FixedSizeListArray.from_arrays(values, type=typ)
         >>> arr
         <pyarrow.lib.FixedSizeListArray object at ...>
         [
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 5934a7aa8cf..d26933e3f39 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -1036,7 +1036,7 @@ cdef class StructScalar(Scalar, Mapping):
 
         Parameters
         ----------
-        index : Union[int, str]
+        key : Union[int, str]
             Index / position or name of the field.
 
         Returns
diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py
index 450cce74f1d..6d7ec6f724f 100644
--- a/python/pyarrow/tests/strategies.py
+++ b/python/pyarrow/tests/strategies.py
@@ -18,21 +18,21 @@
 import datetime
 import sys
 
-import pytest
-import hypothesis as h
-import hypothesis.strategies as st
+import pytest  # type: ignore[import-not-found]
+import hypothesis as h  # type: ignore[import-not-found]
+import hypothesis.strategies as st  # type: ignore[import-not-found]
 try:
-    import hypothesis.extra.numpy as npst
+    import hypothesis.extra.numpy as npst  # type: ignore[import-not-found]
 except ImportError:
-    npst = None
+    npst = None  # type: ignore[assignment]
 try:
-    import hypothesis.extra.pytz as tzst
+    import hypothesis.extra.pytz as tzst  # type: ignore[import-not-found]
 except ImportError:
-    tzst = None
+    tzst = None  # type: ignore[assignment]
 try:
     import zoneinfo
 except ImportError:
-    zoneinfo = None
+    zoneinfo = None  # type: ignore[assignment]
 if sys.platform == 'win32':
     try:
         import tzdata  # noqa:F401
@@ -41,7 +41,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 
 import pyarrow as pa
 
@@ -234,13 +234,13 @@ def schemas(type_strategy=primitive_types, max_fields=None):
 
 all_types = st.deferred(
     lambda: (
-        primitive_types |
-        list_types() |
-        struct_types() |
-        dictionary_types() |
-        map_types() |
-        list_types(all_types) |
-        struct_types(all_types)
+        primitive_types
+        | list_types()
+        | struct_types()
+        | dictionary_types()
+        | map_types()
+        | list_types(all_types)  # type: ignore[has-type]
+        | struct_types(all_types)  # type: ignore[has-type]
     )
 )
 all_fields = fields(all_types)
@@ -467,7 +467,9 @@ def pandas_compatible_list_types(
         dictionary_types(
             value_strategy=pandas_compatible_dictionary_value_types
         ),
-        pandas_compatible_list_types(pandas_compatible_types),
-        struct_types(pandas_compatible_types)
+        pandas_compatible_list_types(
+            pandas_compatible_types  # type: ignore[has-type]
+        ),
+        struct_types(pandas_compatible_types)  # type: ignore[has-type]
     )
 )
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 009ab1e849b..5686420c688 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -18,10 +18,10 @@
 from collections.abc import Iterable
 import datetime
 import decimal
-import hypothesis as h
-import hypothesis.strategies as st
+import hypothesis as h  # type: ignore[import-not-found]
+import hypothesis.strategies as st  # type: ignore[import-not-found]
 import itertools
-import pytest
+import pytest  # type: ignore[import-not-found]
 import struct
 import subprocess
 import sys
@@ -30,7 +30,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index a6d3546e57c..7c86f37587c 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -24,7 +24,7 @@
 import math
 import os
 import pathlib
-import pytest
+import pytest  # type: ignore[import-not-found]
 import random
 import sys
 import tempfile
@@ -33,7 +33,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 
 from pyarrow.util import guid
 from pyarrow import Codec
@@ -811,8 +811,9 @@ def test_cache_options_pickling(pickle_module):
 
 @pytest.mark.numpy
 @pytest.mark.parametrize("compression", [
-    pytest.param(
-        "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
+    pytest.param("bz2", marks=pytest.mark.xfail(
+        raises=pa.lib.ArrowNotImplementedError  # type: ignore[attr-defined]
+    )
     ),
     "brotli",
     "gzip",
@@ -852,8 +853,9 @@ def test_compress_decompress(compression):
 
 @pytest.mark.numpy
 @pytest.mark.parametrize("compression", [
-    pytest.param(
-        "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
+    pytest.param("bz2", marks=pytest.mark.xfail(
+        raises=pa.lib.ArrowNotImplementedError  # type: ignore[attr-defined]
+    )
     ),
     "brotli",
     "gzip",
@@ -1748,9 +1750,9 @@ def test_unknown_compression_raises():
     "gzip",
     "lz4",
     "zstd",
-    pytest.param(
-        "snappy",
-        marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
+    pytest.param("snappy", marks=pytest.mark.xfail(
+        raises=pa.lib.ArrowNotImplementedError  # type: ignore[attr-defined]
+    )
     )
 ])
 def test_compressed_roundtrip(compression):

From 8e6c6c83978602a3336aee1bee646b0503eeb8d3 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Sat, 20 Sep 2025 20:21:38 +0200
Subject: [PATCH 3/4] Add utility for adding docstrings into annotations

---
 dev/update_stub_docstrings.py | 214 ++++++++++++++++++++++++++++++++++
 1 file changed, 214 insertions(+)
 create mode 100644 dev/update_stub_docstrings.py

diff --git a/dev/update_stub_docstrings.py b/dev/update_stub_docstrings.py
new file mode 100644
index 00000000000..7eb1ee2925d
--- /dev/null
+++ b/dev/update_stub_docstrings.py
@@ -0,0 +1,214 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Utility to extract docstrings from pyarrow and update
+# docstrings in stubfiles.
+#
+# Usage
+# =====
+#
+# python ./dev/update_stub_docstrings.py -f ./python/pyarrow-stubs
+
+
+from pathlib import Path
+from textwrap import indent
+
+import click
+# TODO: perhaps replace griffe with importlib
+import griffe
+from griffe import AliasResolutionError
+import libcst
+from libcst import matchers as m
+
+
+def _get_docstring(name, package, indentation):
+    # print("extract_docstrings", name)
+    try:
+        obj = package.get_member(name)
+    except (KeyError, ValueError, AliasResolutionError):
+        # Some cython __init__ symbols can't be found
+        # e.g. pyarrow.lib.OSFile.__init__
+        stack = name.split(".")
+        parent_name = ".".join(stack[:-1])
+
+        try:
+            obj = package.get_member(parent_name).all_members[stack[-1]]
+        except (KeyError, ValueError, AliasResolutionError):
+            print(f"{name} not found in {package.name}, it's probably ok.")
+            return None
+
+    if obj.has_docstring:
+        docstring = obj.docstring.value
+        # Remove signature if present in docstring
+        if docstring.startswith(obj.name) or (
+            (hasattr(obj.parent, "name") and
+                docstring.startswith(f"{obj.parent.name}.{obj.name}"))):
+            docstring = "\n".join(docstring.splitlines()[2:])
+        # Skip empty docstrings
+        if docstring.strip() == "":
+            return None
+        # Indent docstring
+        indentation_prefix = indentation * "    "
+        docstring = indent(docstring + '\n"""', indentation_prefix)
+        docstring = '"""\n' + docstring
+        return docstring
+    return None
+
+
+class ReplaceEllipsis(libcst.CSTTransformer):
+    def __init__(self, package, namespace):
+        self.package = package
+        self.base_namespace = namespace
+        self.stack = []
+        self.indentation = 0
+
+    # Insert module level docstring if _clone_signature is used
+    def leave_Module(self, original_node, updated_node):
+        new_body = []
+        clone_matcher = m.SimpleStatementLine(
+            body=[m.Assign(
+                value=m.Call(func=m.Name(value="_clone_signature"))
+            ), m.ZeroOrMore()]
+        )
+        for statement in updated_node.body:
+            new_body.append(statement)
+            if m.matches(statement, clone_matcher):
+                name = statement.body[0].targets[0].target.value
+                if self.base_namespace:
+                    name = f"{self.base_namespace}.{name}"
+                docstring = _get_docstring(name, self.package, 0)
+                if docstring is not None:
+                    new_expr = libcst.Expr(value=libcst.SimpleString(docstring))
+                    new_line = libcst.SimpleStatementLine(body=[new_expr])
+                    new_body.append(new_line)
+
+        return updated_node.with_changes(body=new_body)
+
+    def visit_ClassDef(self, node):
+        self.stack.append(node.name.value)
+        self.indentation += 1
+
+    def leave_ClassDef(self, original_node, updated_node):
+        name = ".".join(self.stack)
+        if self.base_namespace:
+            name = self.base_namespace + "." + name
+
+        class_matcher_1 = m.ClassDef(
+            name=m.Name(),
+            body=m.IndentedBlock(
+                body=[m.SimpleStatementLine(
+                    body=[m.Expr(m.Ellipsis()), m.ZeroOrMore()]
+                ), m.ZeroOrMore()]
+            )
+        )
+        class_matcher_2 = m.ClassDef(
+            name=m.Name(),
+            body=m.IndentedBlock(
+                body=[m.FunctionDef(), m.ZeroOrMore()]
+            )
+        )
+
+        if m.matches(updated_node, class_matcher_1):
+            docstring = _get_docstring(name, self.package, self.indentation)
+            if docstring is not None:
+                new_node = libcst.SimpleString(value=docstring)
+                updated_node = updated_node.deep_replace(
+                    updated_node.body.body[0].body[0].value, new_node)
+
+        if m.matches(updated_node, class_matcher_2):
+            docstring = _get_docstring(name, self.package, self.indentation)
+            if docstring is not None:
+                new_docstring = libcst.SimpleString(value=docstring)
+                new_body = [
+                    libcst.SimpleWhitespace(self.indentation * "    "),
+                    libcst.Expr(value=new_docstring),
+                    libcst.Newline()
+                ] + list(updated_node.body.body)
+                new_body = libcst.IndentedBlock(body=new_body)
+                updated_node = updated_node.with_changes(body=new_body)
+
+        self.stack.pop()
+        self.indentation -= 1
+        return updated_node
+
+    def visit_FunctionDef(self, node):
+        self.stack.append(node.name.value)
+        self.indentation += 1
+
+    def leave_FunctionDef(self, original_node, updated_node):
+        name = ".".join(self.stack)
+        if self.base_namespace:
+            name = self.base_namespace + "." + name
+
+        function_matcher = m.FunctionDef(
+            name=m.Name(),
+            body=m.SimpleStatementSuite(
+                body=[m.Expr(
+                    m.Ellipsis()
+                )]))
+        if m.matches(original_node, function_matcher):
+            docstring = _get_docstring(name, self.package, self.indentation)
+            if docstring is not None:
+                new_docstring = libcst.SimpleString(value=docstring)
+                new_body = [
+                    libcst.SimpleWhitespace(self.indentation * "    "),
+                    libcst.Expr(value=new_docstring),
+                    libcst.Newline()
+                ]
+                new_body = libcst.IndentedBlock(body=new_body)
+                updated_node = updated_node.with_changes(body=new_body)
+
+        self.stack.pop()
+        self.indentation -= 1
+        return updated_node
+
+
+@click.command()
+@click.option('--pyarrow_folder', '-f', type=click.Path(resolve_path=True))
+def add_docs_to_stub_files(pyarrow_folder):
+    print("Updating docstrings of stub files in:", pyarrow_folder)
+    package = griffe.load("pyarrow", try_relative_path=True,
+                          force_inspection=True, resolve_aliases=True)
+    lib_modules = ["array", "builder", "compat", "config", "device", "error", "io",
+                   "_ipc", "memory", "pandas_shim", "scalar", "table", "tensor",
+                   "_types"]
+
+    for stub_file in Path(pyarrow_folder).rglob('*.pyi'):
+        if stub_file.name == "_stubs_typing.pyi":
+            continue
+        module = stub_file.with_suffix('').name
+        print(f"[{stub_file} {module}]")
+
+        with open(stub_file, 'r') as f:
+            tree = libcst.parse_module(f.read())
+
+        if module in lib_modules:
+            module = "lib"
+        elif stub_file.parent.name in ["parquet", "interchange"]:
+            module = f"{stub_file.parent.name}.{module}"
+        elif module == "__init__":
+            module = ""
+
+        modified_tree = tree.visit(ReplaceEllipsis(package, module))
+        with open(stub_file, "w") as f:
+            f.write(modified_tree.code)
+        print("\n")
+
+
+if __name__ == "__main__":
+    docstrings_map = {}
+    add_docs_to_stub_files(obj={})

From a0ce53c19216c44bc5011e74af1dd131df2b1594 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Sat, 20 Sep 2025 20:21:59 +0200
Subject: [PATCH 4/4] Add CI check

---
 .github/workflows/python.yml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 0d12accda4e..1c0faed062f 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -138,6 +138,15 @@ jobs:
         continue-on-error: true
         run: archery docker push ${{ matrix.image }}
 
+      - name: Type check with mypy and pyright
+        run: |-
+            python -m pip install mypy pyright griffe libcst scipy-stubs pandas-stubs types-python-dateutil types-psutil types-requests griffe libcst types-cffi
+            pip install -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pyarrow
+            cd python
+            mypy ./pyarrow-stubs ./pyarrow/tests/test_array.py ./pyarrow/tests/test_io.py
+            pyright ./pyarrow-stubs
+            python ../dev/update_stub_docstrings.py -f ./python/pyarrow-stubs
+
   macos:
     name: ${{ matrix.architecture }} macOS ${{ matrix.macos-version }} Python 3
     runs-on: macos-${{ matrix.macos-version }}