From ec67a62e97057c1d0ba0d47a6e8c7733cfaaaab9 Mon Sep 17 00:00:00 2001 From: Hongze Zhang Date: Wed, 14 Jan 2026 11:55:49 +0100 Subject: [PATCH 1/2] [VL] Delta write / Spark 3.2, 3.3 write: Fix null sink memory pool causing crash Fixes the following error: ``` facebook::velox::filesystems::S3WriteFile::Impl::Impl(std::basic_string_view >, Aws::S3::S3Client*, facebook::velox::memory::MemoryPool*)+0xb6)[0x7f9892b99726] facebook::velox::filesystems::S3WriteFile::S3WriteFile(std::basic_string_view >, Aws::S3::S3Client*, facebook::velox::memory::MemoryPool*)+0x81)[0x7f9892b973e1] facebook::velox::filesystems::S3FileSystem::openFileForWrite(std::basic_string_view >, facebook::velox::filesystems::FileOptions const&)+0x54)[0x7f9892b85e64] facebook::velox::filesystems::s3WriteFileSinkGenerator(std::__cxx11::basic_string, std::allocator > const&, facebook::velox::dwio::common::FileSink::Options const&)+0x155)[0x7f9892b83335] std::_Function_handler > (std::__cxx11::basic_string, std::allocator > const&, facebook::velox::dwio::common::FileSink::Options const&), std::unique_ptr > (*)(std::__cxx11::basic_string, std::allocator > const&, facebook::velox::dwio::common::FileSink::Options const&)>::_M_invoke(std::_Any_data const&, std::__cxx11::basic_string, std::allocator > const&, facebook::velox::dwio::common::FileSink::Options const&)+0x11)[0x7f9891b68111] facebook::velox::dwio::common::FileSink::create(std::__cxx11::basic_string, std::allocator > const&, facebook::velox::dwio::common::FileSink::Options const&)+0x5f)[0x7f9891b668bf] gluten::VeloxParquetDataSourceS3::initSink(std::unordered_map, std::allocator >, std::__cxx11::basic_string, std::allocator >, std::hash, std::allocator > >, std::equal_to, std::allocator > >, std::allocator, std::allocator > const, std::__cxx11::basic_string, std::allocator > > > > const&)+0x205)[0x7f98916d41a5] gluten::VeloxParquetDataSource::init(std::unordered_map, std::allocator >, std::__cxx11::basic_string, std::allocator >, std::hash, std::allocator > >, std::equal_to, std::allocator > >, std::allocator, std::allocator > const, std::__cxx11::basic_string, std::allocator > > > > const&)+0x16)[0x7f9891774266] Java_org_apache_gluten_datasource_VeloxDataSourceJniWrapper_init+0x370)[0x7f98916f5d70] ``` --- cpp/velox/operators/writer/VeloxParquetDataSource.cc | 2 +- cpp/velox/operators/writer/VeloxParquetDataSource.h | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/cpp/velox/operators/writer/VeloxParquetDataSource.cc b/cpp/velox/operators/writer/VeloxParquetDataSource.cc index cd5063a61d4a..5e9f4251368e 100644 --- a/cpp/velox/operators/writer/VeloxParquetDataSource.cc +++ b/cpp/velox/operators/writer/VeloxParquetDataSource.cc @@ -36,7 +36,7 @@ namespace gluten { void VeloxParquetDataSource::initSink(const std::unordered_map& /* sparkConfs */) { if (strncmp(filePath_.c_str(), "file:", 5) == 0) { - sink_ = dwio::common::FileSink::create(filePath_, {.pool = pool_.get()}); + sink_ = dwio::common::FileSink::create(filePath_, {.pool = sinkPool_.get()}); } else { throw std::runtime_error("The file path is not local when writing data with parquet format in velox runtime!"); } diff --git a/cpp/velox/operators/writer/VeloxParquetDataSource.h b/cpp/velox/operators/writer/VeloxParquetDataSource.h index 4dde5c69aa85..30130c826499 100644 --- a/cpp/velox/operators/writer/VeloxParquetDataSource.h +++ b/cpp/velox/operators/writer/VeloxParquetDataSource.h @@ -84,7 +84,11 @@ class VeloxParquetDataSource : public VeloxDataSource { std::shared_ptr veloxPool, std::shared_ptr sinkPool, std::shared_ptr schema) - : VeloxDataSource(filePath, schema), filePath_(filePath), schema_(schema), pool_(std::move(veloxPool)) {} + : VeloxDataSource(filePath, schema), + filePath_(filePath), + sinkPool_(sinkPool), + schema_(schema), + pool_(std::move(veloxPool)) {} void init(const std::unordered_map& sparkConfs) override; virtual void initSink(const std::unordered_map& sparkConfs); From 75ae605680335f7d735dcb005e1b6c887321cb36 Mon Sep 17 00:00:00 2001 From: Hongze Zhang Date: Wed, 14 Jan 2026 12:47:21 +0100 Subject: [PATCH 2/2] fixup --- cpp/velox/operators/writer/VeloxParquetDataSource.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/velox/operators/writer/VeloxParquetDataSource.h b/cpp/velox/operators/writer/VeloxParquetDataSource.h index 30130c826499..dac97168d5a0 100644 --- a/cpp/velox/operators/writer/VeloxParquetDataSource.h +++ b/cpp/velox/operators/writer/VeloxParquetDataSource.h @@ -86,7 +86,7 @@ class VeloxParquetDataSource : public VeloxDataSource { std::shared_ptr schema) : VeloxDataSource(filePath, schema), filePath_(filePath), - sinkPool_(sinkPool), + sinkPool_(std::move(sinkPool)), schema_(schema), pool_(std::move(veloxPool)) {}