From e95e705cbb33286dc56b7cbad1e31ef6c720276d Mon Sep 17 00:00:00 2001 From: Ankita Victor-Levi Date: Mon, 16 Feb 2026 12:27:23 +0530 Subject: [PATCH] Replace C-style casts with C++ casts in shuffle writers Replace 6 C-style pointer casts with reinterpret_cast or static_cast in VeloxHashShuffleWriter and VeloxGpuShuffleWriter. C-style casts bypass type safety checks and are harder to search for during code review. This aligns with the existing style in the same files (e.g. line 111 of VeloxHashShuffleWriter.cc already uses reinterpret_cast). --- cpp/velox/shuffle/VeloxGpuShuffleWriter.cc | 4 ++-- cpp/velox/shuffle/VeloxHashShuffleWriter.cc | 6 +++--- cpp/velox/shuffle/VeloxHashShuffleWriter.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/velox/shuffle/VeloxGpuShuffleWriter.cc b/cpp/velox/shuffle/VeloxGpuShuffleWriter.cc index f7233e7c1a49..36ee60c309e3 100644 --- a/cpp/velox/shuffle/VeloxGpuShuffleWriter.cc +++ b/cpp/velox/shuffle/VeloxGpuShuffleWriter.cc @@ -26,7 +26,7 @@ void VeloxGpuHashShuffleWriter::splitBoolValueType(const uint8_t* srcAddr, const if (dstaddr == nullptr) { continue; } - auto dstPidBase = (uint8_t*)(dstaddr + partitionBufferBase_[pid] * sizeof(uint8_t)); + auto dstPidBase = reinterpret_cast(dstaddr + partitionBufferBase_[pid] * sizeof(uint8_t)); auto pos = partition2RowOffsetBase_[pid]; auto end = partition2RowOffsetBase_[pid + 1]; for (; pos < end; ++pos) { @@ -42,7 +42,7 @@ void VeloxGpuHashShuffleWriter::splitBoolValueType(const uint8_t* srcAddr, const // Split timestamp from int128_t to int64_t, both of them represents the timestamp nanoseconds. arrow::Status VeloxGpuHashShuffleWriter::splitTimestamp(const uint8_t* srcAddr, const std::vector& dstAddrs) { for (auto& pid : partitionUsed_) { - auto dstPidBase = (int64_t*)(dstAddrs[pid] + partitionBufferBase_[pid] * sizeof(int64_t)); + auto dstPidBase = reinterpret_cast(dstAddrs[pid] + partitionBufferBase_[pid] * sizeof(int64_t)); auto pos = partition2RowOffsetBase_[pid]; auto end = partition2RowOffsetBase_[pid + 1]; for (; pos < end; ++pos) { diff --git a/cpp/velox/shuffle/VeloxHashShuffleWriter.cc b/cpp/velox/shuffle/VeloxHashShuffleWriter.cc index dda2c42a2e7d..e8b455e448f5 100644 --- a/cpp/velox/shuffle/VeloxHashShuffleWriter.cc +++ b/cpp/velox/shuffle/VeloxHashShuffleWriter.cc @@ -637,7 +637,7 @@ arrow::Status VeloxHashShuffleWriter::splitBinaryType( auto& binaryBuf = dst[pid]; // use 32bit offset - auto dstLengthBase = (StringLengthType*)(binaryBuf.lengthPtr) + partitionBufferBase_[pid]; + auto dstLengthBase = reinterpret_cast(binaryBuf.lengthPtr) + partitionBufferBase_[pid]; auto valueOffset = binaryBuf.valueOffset; auto dstValuePtr = binaryBuf.valuePtr + valueOffset; @@ -661,7 +661,7 @@ arrow::Status VeloxHashShuffleWriter::splitBinaryType( if (valueOffset >= capacity) { auto oldCapacity = capacity; (void)oldCapacity; // suppress warning - capacity = capacity + std::max((capacity >> multiply), (uint64_t)stringLen); + capacity = capacity + std::max((capacity >> multiply), static_cast(stringLen)); multiply = std::min(3, multiply + 1); const auto& valueBuffer = partitionBuffers_[fixedWidthColumnCount_ + binaryIdx][pid][kBinaryValueBufferIndex]; @@ -675,7 +675,7 @@ arrow::Status VeloxHashShuffleWriter::splitBinaryType( binaryBuf.valueCapacity = capacity; dstValuePtr = binaryBuf.valuePtr + valueOffset - stringLen; // Need to update dstLengthBase because lengthPtr can be updated if Reserve triggers spill. - dstLengthBase = (StringLengthType*)(binaryBuf.lengthPtr) + partitionBufferBase_[pid]; + dstLengthBase = reinterpret_cast(binaryBuf.lengthPtr) + partitionBufferBase_[pid]; } // 2. copy value diff --git a/cpp/velox/shuffle/VeloxHashShuffleWriter.h b/cpp/velox/shuffle/VeloxHashShuffleWriter.h index da67413eb256..fd889d4c327e 100644 --- a/cpp/velox/shuffle/VeloxHashShuffleWriter.h +++ b/cpp/velox/shuffle/VeloxHashShuffleWriter.h @@ -270,7 +270,7 @@ class VeloxHashShuffleWriter : public VeloxShuffleWriter { template arrow::Status splitFixedType(const uint8_t* srcAddr, const std::vector& dstAddrs) { for (auto& pid : partitionUsed_) { - auto dstPidBase = (T*)(dstAddrs[pid] + partitionBufferBase_[pid] * sizeof(T)); + auto dstPidBase = reinterpret_cast(dstAddrs[pid] + partitionBufferBase_[pid] * sizeof(T)); auto pos = partition2RowOffsetBase_[pid]; auto end = partition2RowOffsetBase_[pid + 1]; for (; pos < end; ++pos) {