diff --git a/store_handler/bigtable_handler.cpp b/store_handler/bigtable_handler.cpp
index 52a712de..172c321f 100644
--- a/store_handler/bigtable_handler.cpp
+++ b/store_handler/bigtable_handler.cpp
@@ -710,6 +710,13 @@ void EloqDS::BigTableHandler::FetchRangeSlices(
                      fetch_cc));
 }
 
+void EloqDS::BigTableHandler::FetchTableRangeSize(
+    txservice::FetchTableRangeSizeCc *fetch_cc)
+{
+    LOG(ERROR) << "BigTableHandler::FetchTableRangeSize not implemented";
+    assert(false);
+}
+
 void EloqDS::BigTableHandler::OnFetchRangeSlices(
     google::cloud::future<google::cloud::StatusOr<
         std::pair<bool, google::cloud::bigtable::Row>>> f,
diff --git a/store_handler/bigtable_handler.h b/store_handler/bigtable_handler.h
index 10006bbe..e3ccd39c 100644
--- a/store_handler/bigtable_handler.h
+++ b/store_handler/bigtable_handler.h
@@ -82,6 +82,9 @@ class BigTableHandler : public txservice::store::DataStoreHandler
 
     void FetchRangeSlices(txservice::FetchRangeSlicesReq *fetch_cc) override;
 
+    void FetchTableRangeSize(
+        txservice::FetchTableRangeSizeCc *fetch_cc) override;
+
     /**
      * @brief Read a row from base table or skindex table in datastore with
      * specified key. Caller should pass in complete primary key or skindex key.
diff --git a/store_handler/data_store_service_client.cpp b/store_handler/data_store_service_client.cpp
index 1c55d901..2fd359b8 100644
--- a/store_handler/data_store_service_client.cpp
+++ b/store_handler/data_store_service_client.cpp
@@ -1059,6 +1059,30 @@ void DataStoreServiceClient::FetchRangeSlices(
          &FetchRangeSlicesCallback);
 }
 
+void DataStoreServiceClient::FetchTableRangeSize(
+    txservice::FetchTableRangeSizeCc *fetch_cc)
+{
+    txservice::TableName range_table_name(fetch_cc->table_name_->StringView(),
+                                          txservice::TableType::RangePartition,
+                                          fetch_cc->table_name_->Engine());
+
+    int32_t kv_partition_id =
+        KvPartitionIdOfRangeSlices(range_table_name, fetch_cc->partition_id_);
+    uint32_t shard_id = GetShardIdByPartitionId(kv_partition_id, false);
+
+    auto catalog_factory = GetCatalogFactory(range_table_name.Engine());
+    assert(catalog_factory != nullptr);
+    fetch_cc->kv_start_key_ =
+        EncodeRangeKey(catalog_factory, range_table_name, fetch_cc->start_key_);
+
+    Read(kv_range_table_name,
+         kv_partition_id,
+         shard_id,
+         fetch_cc->kv_start_key_,
+         fetch_cc,
+         &FetchRangeSizeCallback);
+}
+
 /**
  * @brief Deletes data that is out of the specified range.
  *
@@ -1275,16 +1299,19 @@ std::string DataStoreServiceClient::EncodeRangeKey(
  * @param range_version The version of the range.
  * @param version The general version number.
  * @param segment_cnt The number of segments in the range.
+ * @param range_size The size of the range.
  * @return Binary string containing the encoded range value.
  */
 std::string DataStoreServiceClient::EncodeRangeValue(int32_t range_id,
                                                      uint64_t range_version,
                                                      uint64_t version,
-                                                     uint32_t segment_cnt)
+                                                     uint32_t segment_cnt,
+                                                     int32_t range_size)
 {
     std::string kv_range_record;
     kv_range_record.reserve(sizeof(int32_t) + sizeof(uint64_t) +
-                            sizeof(uint64_t) + sizeof(uint32_t));
+                            sizeof(uint64_t) + sizeof(uint32_t) +
+                            sizeof(int32_t));
     kv_range_record.append(reinterpret_cast<const char *>(&range_id),
                            sizeof(int32_t));
     kv_range_record.append(reinterpret_cast<const char *>(&range_version),
@@ -1294,6 +1321,8 @@ std::string DataStoreServiceClient::EncodeRangeValue(int32_t range_id,
     // segment_cnt of slices
     kv_range_record.append(reinterpret_cast<const char *>(&segment_cnt),
                            sizeof(uint32_t));
+    kv_range_record.append(reinterpret_cast<const char *>(&range_size),
+                           sizeof(int32_t));
     return kv_range_record;
 }
 
@@ -1361,6 +1390,7 @@ RangeSliceBatchPlan DataStoreServiceClient::PrepareRangeSliceBatches(
     RangeSliceBatchPlan plan;
     plan.segment_cnt = 0;
     plan.version = version;
+    plan.range_size = 0;
 
     // Estimate capacity based on slices size
     plan.segment_keys.reserve(slices.size() / 10 + 1);  // Rough estimate
@@ -1409,6 +1439,7 @@ RangeSliceBatchPlan DataStoreServiceClient::PrepareRangeSliceBatches(
                               sizeof(uint32_t));
         segment_record.append(slice_start_key.Data(), key_size);
         uint32_t slice_size = static_cast<uint32_t>(slices[i]->Size());
+        plan.range_size += static_cast<int32_t>(slice_size);
         segment_record.append(reinterpret_cast<const char *>(&slice_size),
                               sizeof(uint32_t));
     }
@@ -1574,6 +1605,7 @@ void DataStoreServiceClient::EnqueueRangeMetadataRecord(
     uint64_t range_version,
     uint64_t version,
     uint32_t segment_cnt,
+    int32_t range_size,
     RangeMetadataAccumulator &accumulator)
 {
     // Compute kv_table_name and kv_partition_id
@@ -1584,8 +1616,8 @@ void DataStoreServiceClient::EnqueueRangeMetadataRecord(
     // Encode key and value
     std::string key_str =
         EncodeRangeKey(catalog_factory, table_name, range_start_key);
-    std::string rec_str =
-        EncodeRangeValue(partition_id, range_version, version, segment_cnt);
+    std::string rec_str = EncodeRangeValue(
+        partition_id, range_version, version, segment_cnt, range_size);
 
     // Get or create entry in accumulator
     auto key = std::make_pair(kv_table_name, kv_partition_id);
@@ -1753,6 +1785,7 @@ bool DataStoreServiceClient::UpdateRangeSlices(
                                                    req.range_slices_,
                                                    req.partition_id_);
         uint32_t segment_cnt = slice_plan.segment_cnt;
+        int32_t range_size = slice_plan.range_size;
         int32_t kv_partition_id =
             KvPartitionIdOfRangeSlices(*req.table_name_, req.partition_id_);
         auto iter = slice_plans.find(kv_partition_id);
@@ -1777,6 +1810,7 @@ bool DataStoreServiceClient::UpdateRangeSlices(
                                    req.range_version_,
                                    req.ckpt_ts_,
                                    segment_cnt,
+                                   range_size,
                                    meta_acc);
     }
 
@@ -1978,6 +2012,7 @@ bool DataStoreServiceClient::UpdateRangeSlices(
                                range_version,
                                version,
                                segment_cnt,
+                               slice_plans[0].range_size,
                                meta_acc);
 
     SyncConcurrentRequest *meta_sync_concurrent =
@@ -2069,6 +2104,7 @@ bool DataStoreServiceClient::UpsertRanges(
         auto slice_plan = PrepareRangeSliceBatches(
             table_name, version, range.slices_, range.partition_id_);
         uint32_t segment_cnt = slice_plan.segment_cnt;
+        int32_t range_size = slice_plan.range_size;
 
         int32_t kv_partition_id =
             KvPartitionIdOfRangeSlices(table_name, range.partition_id_);
@@ -2092,6 +2128,7 @@ bool DataStoreServiceClient::UpsertRanges(
             version,  // range_version (using version for now)
             version,
             segment_cnt,
+            range_size,
             meta_acc);
     }
 
@@ -4683,7 +4720,8 @@ bool DataStoreServiceClient::InitTableRanges(
 
     std::string key_str =
         EncodeRangeKey(catalog_factory, table_name, *neg_inf_key);
-    std::string rec_str = EncodeRangeValue(init_range_id, version, version, 0);
+    std::string rec_str =
+        EncodeRangeValue(init_range_id, version, version, 0, 0);
 
     keys.emplace_back(std::string_view(key_str.data(), key_str.size()));
     records.emplace_back(std::string_view(rec_str.data(), rec_str.size()));
diff --git a/store_handler/data_store_service_client.h b/store_handler/data_store_service_client.h
index 4d860174..fb877d1e 100644
--- a/store_handler/data_store_service_client.h
+++ b/store_handler/data_store_service_client.h
@@ -66,6 +66,7 @@ struct RangeSliceBatchPlan
     std::vector<std::string> segment_keys;     // Owned string buffers
     std::vector<std::string> segment_records;  // Owned string buffers
     size_t version;
+    int32_t range_size{0};
 
     // Clear method for reuse
     void Clear()
@@ -74,6 +75,7 @@ struct RangeSliceBatchPlan
         segment_keys.clear();
         segment_records.clear();
         version = 0;
+        range_size = 0;
     }
 };
 
@@ -278,6 +280,9 @@ class DataStoreServiceClient : public txservice::store::DataStoreHandler
 
     void FetchRangeSlices(txservice::FetchRangeSlicesReq *fetch_cc) override;
 
+    void FetchTableRangeSize(
+        txservice::FetchTableRangeSizeCc *fetch_cc) override;
+
     bool DeleteOutOfRangeData(
         const txservice::TableName &table_name,
         int32_t partition_id,
@@ -346,7 +351,8 @@ class DataStoreServiceClient : public txservice::store::DataStoreHandler
     std::string EncodeRangeValue(int32_t range_id,
                                  uint64_t range_version,
                                  uint64_t version,
-                                 uint32_t segment_cnt);
+                                 uint32_t segment_cnt,
+                                 int32_t range_size);
     std::string EncodeRangeSliceKey(const txservice::TableName &table_name,
                                     int32_t range_id,
                                     uint32_t segment_id);
@@ -654,6 +660,7 @@ class DataStoreServiceClient : public txservice::store::DataStoreHandler
         uint64_t range_version,
         uint64_t version,
         uint32_t segment_cnt,
+        int32_t range_size,
         RangeMetadataAccumulator &accumulator);
 
     void DispatchRangeMetadataBatches(
@@ -934,6 +941,11 @@ class DataStoreServiceClient : public txservice::store::DataStoreHandler
         ::google::protobuf::Closure *closure,
         DataStoreServiceClient &client,
         const remote::CommonResult &result);
+
+    friend void FetchRangeSizeCallback(void *data,
+                                       ::google::protobuf::Closure *closure,
+                                       DataStoreServiceClient &client,
+                                       const remote::CommonResult &result);
 };
 
 struct UpsertTableData
diff --git a/store_handler/data_store_service_client_closure.cpp b/store_handler/data_store_service_client_closure.cpp
index ab11ce5b..bdddbec3 100644
--- a/store_handler/data_store_service_client_closure.cpp
+++ b/store_handler/data_store_service_client_closure.cpp
@@ -811,8 +811,9 @@ void FetchTableRangesCallback(void *data,
         for (uint32_t i = 0; i < items_size; i++)
         {
             scan_next_closure->GetItem(i, key, value, ts, ttl);
-            assert(value.size() == (sizeof(int32_t) + sizeof(uint64_t) +
-                                    sizeof(uint64_t) + sizeof(uint32_t)));
+            assert(value.size() ==
+                   (sizeof(int32_t) + sizeof(uint64_t) + sizeof(uint64_t) +
+                    sizeof(uint32_t) + sizeof(int32_t)));
             const char *buf = value.data();
             int32_t partition_id = *(reinterpret_cast<const int32_t *>(buf));
             buf += sizeof(partition_id);
@@ -925,6 +926,45 @@ void FetchTableRangesCallback(void *data,
     }
 }
 
+void FetchRangeSizeCallback(void *data,
+                            ::google::protobuf::Closure *closure,
+                            DataStoreServiceClient &client,
+                            const remote::CommonResult &result)
+{
+    txservice::FetchTableRangeSizeCc *fetch_range_size_cc =
+        static_cast<txservice::FetchTableRangeSizeCc *>(data);
+
+    if (result.error_code() == remote::DataStoreError::KEY_NOT_FOUND)
+    {
+        fetch_range_size_cc->store_range_size_ = 0;
+        fetch_range_size_cc->SetFinish(
+            static_cast<uint32_t>(txservice::CcErrorCode::NO_ERROR));
+    }
+    else if (result.error_code() != remote::DataStoreError::NO_ERROR)
+    {
+        LOG(ERROR) << "Fetch range size failed with error code: "
+                   << result.error_code();
+        fetch_range_size_cc->SetFinish(
+            static_cast<uint32_t>(txservice::CcErrorCode::DATA_STORE_ERR));
+    }
+    else
+    {
+        ReadClosure *read_closure = static_cast<ReadClosure *>(closure);
+        std::string_view read_val = read_closure->Value();
+        assert(read_closure->TableName() == kv_range_table_name);
+        assert(read_val.size() ==
+               (sizeof(int32_t) + sizeof(uint64_t) + sizeof(uint64_t) +
+                sizeof(uint32_t) + sizeof(int32_t)));
+        const char *buf = read_val.data();
+        buf += read_val.size() - sizeof(int32_t);
+        fetch_range_size_cc->store_range_size_ =
+            *reinterpret_cast<const int32_t *>(buf);
+
+        fetch_range_size_cc->SetFinish(
+            static_cast<uint32_t>(txservice::CcErrorCode::NO_ERROR));
+    }
+}
+
 void FetchRangeSlicesCallback(void *data,
                               ::google::protobuf::Closure *closure,
                               DataStoreServiceClient &client,
@@ -965,8 +1005,9 @@ void FetchRangeSlicesCallback(void *data,
         else
         {
             assert(read_closure->TableName() == kv_range_table_name);
-            assert(read_val.size() == (sizeof(int32_t) + sizeof(uint64_t) +
-                                       sizeof(uint64_t) + sizeof(uint32_t)));
+            assert(read_val.size() ==
+                   (sizeof(int32_t) + sizeof(uint64_t) + sizeof(uint64_t) +
+                    sizeof(uint32_t) + sizeof(int32_t)));
             const char *buf = read_val.data();
             int32_t range_partition_id =
                 *(reinterpret_cast<const int32_t *>(buf));
diff --git a/store_handler/data_store_service_client_closure.h b/store_handler/data_store_service_client_closure.h
index 4bb72373..b8c3813c 100644
--- a/store_handler/data_store_service_client_closure.h
+++ b/store_handler/data_store_service_client_closure.h
@@ -3102,6 +3102,14 @@ void FetchTableRangesCallback(void *data,
                               DataStoreServiceClient &client,
                               const remote::CommonResult &result);
 
+/**
+ * Callback for fetching range size from table_ranges.
+ */
+void FetchRangeSizeCallback(void *data,
+                            ::google::protobuf::Closure *closure,
+                            DataStoreServiceClient &client,
+                            const remote::CommonResult &result);
+
 /**
  * Callback for fetching range slices.
  *
diff --git a/store_handler/dynamo_handler.cpp b/store_handler/dynamo_handler.cpp
index 0aa7ef78..5bfa9029 100644
--- a/store_handler/dynamo_handler.cpp
+++ b/store_handler/dynamo_handler.cpp
@@ -2534,6 +2534,12 @@ void EloqDS::DynamoHandler::FetchRangeSlices(FetchRangeSlicesReq *fetch_cc)
     assert(false);
 }
 
+void EloqDS::DynamoHandler::FetchTableRangeSize(FetchTableRangeSizeCc *fetch_cc)
+{
+    LOG(ERROR) << "DynamoHandler::FetchTableRangeSize not implemented";
+    assert(false);
+}
+
 void EloqDS::DynamoHandler::OnFetchRangeSlices(
     const Aws::DynamoDB::DynamoDBClient *client,
     const Aws::DynamoDB::Model::GetItemRequest &request,
diff --git a/store_handler/dynamo_handler.h b/store_handler/dynamo_handler.h
index f2fc9ba5..704200e6 100644
--- a/store_handler/dynamo_handler.h
+++ b/store_handler/dynamo_handler.h
@@ -158,6 +158,7 @@ class DynamoHandler : public txservice::store::DataStoreHandler
     //-- range partition
     void FetchTableRanges(FetchTableRangesCc *fetch_cc) override;
     void FetchRangeSlices(FetchRangeSlicesReq *fetch_cc) override;
+    void FetchTableRangeSize(FetchTableRangeSizeCc *fetch_cc) override;
 
     bool DeleteOutOfRangeData(
         const txservice::TableName &table_name,
diff --git a/store_handler/rocksdb_handler.cpp b/store_handler/rocksdb_handler.cpp
index e741748b..47c039aa 100644
--- a/store_handler/rocksdb_handler.cpp
+++ b/store_handler/rocksdb_handler.cpp
@@ -1128,6 +1128,13 @@ void RocksDBHandler::FetchRangeSlices(txservice::FetchRangeSlicesReq *fetch_cc)
     assert(false);
 }
 
+void RocksDBHandler::FetchTableRangeSize(
+    txservice::FetchTableRangeSizeCc *fetch_cc)
+{
+    LOG(ERROR) << "RocksDBHandler::FetchTableRangeSize not implemented";
+    assert(false);
+}
+
 bool DeleteOutOfRangeDataInternal(std::string delete_from_partition_sql,
                                   int32_t partition_id,
                                   const txservice::TxKey *start_k)
diff --git a/store_handler/rocksdb_handler.h b/store_handler/rocksdb_handler.h
index c8717a49..8742b064 100644
--- a/store_handler/rocksdb_handler.h
+++ b/store_handler/rocksdb_handler.h
@@ -346,6 +346,9 @@ class RocksDBHandler : public txservice::store::DataStoreHandler
 
     void FetchRangeSlices(txservice::FetchRangeSlicesReq *fetch_cc) override;
 
+    void FetchTableRangeSize(
+        txservice::FetchTableRangeSizeCc *fetch_cc) override;
+
     bool DeleteOutOfRangeDataInternal(std::string delete_from_partition_sql,
                                       int32_t partition_id,
                                       const txservice::TxKey *start_k);
diff --git a/tx_service/include/cc/cc_handler.h b/tx_service/include/cc/cc_handler.h
index 3d4640b8..cad6db33 100644
--- a/tx_service/include/cc/cc_handler.h
+++ b/tx_service/include/cc/cc_handler.h
@@ -166,7 +166,9 @@ class CcHandler
                                   const TxRecord *record,
                                   OperationType operation_type,
                                   uint32_t key_shard_code,
-                                  CcHandlerResult<PostProcessResult> &hres) = 0;
+                                  CcHandlerResult<PostProcessResult> &hres,
+                                  int32_t partition_id = -1,
+                                  bool on_dirty_range = false) = 0;
 
     /**
      * @briefPost-processes a read/scan key. Post-processing clears the read
diff --git a/tx_service/include/cc/cc_map.h b/tx_service/include/cc/cc_map.h
index 0d1434b6..9aaa8c58 100644
--- a/tx_service/include/cc/cc_map.h
+++ b/tx_service/include/cc/cc_map.h
@@ -21,10 +21,12 @@
  */
 #pragma once
 
+#include <cstdint>
 #include <map>
 #include <memory>
 #include <utility>  // std::pair
 
+#include "absl/container/flat_hash_map.h"
 #include "cc/cc_req_base.h"
 #include "cc_protocol.h"
 #include "error_messages.h"  // CcErrorCode
@@ -260,6 +262,20 @@ class CcMap
     virtual const txservice::KeySchema *KeySchema() const = 0;
     virtual const txservice::RecordSchema *RecordSchema() const = 0;
 
+    /**
+     * Called by FetchTableRangeSizeCc::Execute when async load completes.
+     * Merges loaded size with accumulated delta (second), or resets to
+     * kNotInitialized on failure.
+     * When emplace is true and partition_id is absent, inserts (partition_id,
+     * (0,0)) before merging; used for new ranges after split.
+     */
+    bool InitRangeSize(uint32_t partition_id,
+                       int32_t persisted_size,
+                       bool succeed = true,
+                       bool emplace = false);
+
+    void ResetRangeStatus(uint32_t partition_id);
+
     uint64_t SchemaTs() const
     {
         return schema_ts_;
@@ -294,6 +310,15 @@ class CcMap
     uint64_t last_dirty_commit_ts_{0};
 
 protected:
+    // Range id -> (range_size, delta_range_size). Only used when
+    // RangePartitioned.
+    // - first: current range size; RangeSizeState::Loading (-1) = loading from
+    //   store; RangeSizeState::Uninitialized (-2) = not yet loaded.
+    // - second: delta accumulated during load (first==-1) or split (first>=0).
+    // - third: True if a split task been triggered due to reaching a threshold.
+    absl::flat_hash_map<uint32_t, std::tuple<int32_t, int32_t, bool>>
+        range_sizes_;
+
     /**
      * @brief After the input request is executed at the current shard, moves
      * the request to another shard for execution.
diff --git a/tx_service/include/cc/cc_page_clean_guard.h b/tx_service/include/cc/cc_page_clean_guard.h
index 39c1c316..c2a8d94d 100644
--- a/tx_service/include/cc/cc_page_clean_guard.h
+++ b/tx_service/include/cc/cc_page_clean_guard.h
@@ -263,8 +263,7 @@ struct CcPageCleanGuard
                               cce->PayloadStatus() != RecordStatus::Unknown) ||
                              cce->PayloadStatus() == RecordStatus::Deleted))
                         {
-                            store_range->DeleteKey(
-                                key, cc_shard_->core_id_, store_slice);
+                            store_range->DeleteKey(key, store_slice);
                         }
 
                         MarkClean(cc_ng_id_, idx, delay_free);
diff --git a/tx_service/include/cc/cc_req_misc.h b/tx_service/include/cc/cc_req_misc.h
index 2c1807dd..eedae7e7 100644
--- a/tx_service/include/cc/cc_req_misc.h
+++ b/tx_service/include/cc/cc_req_misc.h
@@ -367,7 +367,6 @@ struct InitKeyCacheCc : public CcRequestBase
 
     void Reset(StoreRange *range,
                StoreSlice *slice,
-               uint16_t core_cnt,
                const TableName &tbl_name,
                int64_t term,
                NodeGroupId ng_id)
@@ -380,18 +379,15 @@ struct InitKeyCacheCc : public CcRequestBase
         ng_id_ = ng_id;
         range_ = range;
         slice_ = slice;
-        unfinished_cnt_ = core_cnt;
-
-        pause_pos_.clear();
-        pause_pos_.resize(core_cnt);
+        pause_pos_ = TxKey();
     }
 
     bool Execute(CcShard &ccs) override;
-    bool SetFinish(uint16_t core, bool succ);
+    void SetFinish(bool succ);
     StoreSlice &Slice();
     StoreRange &Range();
-    void SetPauseKey(TxKey &key, uint16_t core_id);
-    TxKey &PauseKey(uint16_t core_id);
+    void SetPauseKey(TxKey &key);
+    TxKey &PauseKey();
 
 private:
     TableName tbl_name_{std::string(""), TableType::Primary, TableEngine::None};
@@ -399,8 +395,7 @@ struct InitKeyCacheCc : public CcRequestBase
     NodeGroupId ng_id_;
     StoreRange *range_;
     StoreSlice *slice_;
-    std::atomic<uint16_t> unfinished_cnt_{0};
-    std::vector<TxKey> pause_pos_;
+    TxKey pause_pos_;
 };
 
 struct FillStoreSliceCc : public CcRequestBase
@@ -426,10 +421,9 @@ struct FillStoreSliceCc : public CcRequestBase
 
     bool Execute(CcShard &ccs) override;
 
-    std::deque<SliceDataItem> &SliceData(uint16_t core_id)
+    std::deque<SliceDataItem> &SliceData()
     {
-        assert(core_id < partitioned_slice_data_.size());
-        return partitioned_slice_data_[core_id];
+        return slice_data_;
     }
 
     void AddDataItem(TxKey key,
@@ -437,8 +431,8 @@ struct FillStoreSliceCc : public CcRequestBase
                      uint64_t version_ts,
                      bool is_deleted);
 
-    bool SetFinish(CcShard *cc_shard);
-    bool SetError(CcErrorCode err_code);
+    void SetFinish(CcShard *cc_shard);
+    void SetError(CcErrorCode err_code);
 
     void SetKvFinish(bool success);
 
@@ -447,12 +441,9 @@ struct FillStoreSliceCc : public CcRequestBase
         assert(err_code != CcErrorCode::NO_ERROR);
         DLOG(ERROR) << "Abort this FillStoreSliceCc request with error: "
                     << CcErrorMessage(err_code);
-        bool finish_all = SetError(err_code);
+        SetError(err_code);
         // Recycle request
-        if (finish_all)
-        {
-            Free();
-        }
+        Free();
     }
 
     const TableName &TblName() const
@@ -485,17 +476,16 @@ struct FillStoreSliceCc : public CcRequestBase
         force_load_ = force_load;
     }
 
-    size_t NextIndex(size_t core_idx) const
+    size_t NextIndex() const
     {
-        size_t next_idx = next_idxs_[core_idx];
-        assert(next_idx <= partitioned_slice_data_[core_idx].size());
-        return next_idx;
+        assert(next_idx_ <= slice_data_.size());
+        return next_idx_;
     }
 
-    void SetNextIndex(size_t core_idx, size_t index)
+    void SetNextIndex(size_t index)
     {
-        assert(index <= partitioned_slice_data_[core_idx].size());
-        next_idxs_[core_idx] = index;
+        assert(index <= slice_data_.size());
+        next_idx_ = index;
     }
 
     NodeGroupId NodeGroup() const
@@ -533,6 +523,8 @@ struct FillStoreSliceCc : public CcRequestBase
         return true;
     }
 
+    int32_t PartitionId() const;
+
     metrics::TimePoint start_;
 
 private:
@@ -540,13 +532,11 @@ struct FillStoreSliceCc : public CcRequestBase
     NodeGroupId cc_ng_id_;
     int64_t cc_ng_term_;
     bool force_load_;
-    uint16_t finish_cnt_;
-    uint16_t core_cnt_;
     std::mutex mux_;
     CcErrorCode err_code_{CcErrorCode::NO_ERROR};
 
-    std::vector<size_t> next_idxs_;
-    std::vector<std::deque<SliceDataItem>> partitioned_slice_data_;
+    size_t next_idx_;
+    std::deque<SliceDataItem> slice_data_;
 
     StoreSlice *range_slice_ = nullptr;
     StoreRange *range_ = nullptr;
@@ -1157,4 +1147,35 @@ struct ShardCleanCc : public CcRequestBase
 private:
     size_t free_count_{0};
 };
+
+struct FetchTableRangeSizeCc : public CcRequestBase
+{
+public:
+    FetchTableRangeSizeCc() = default;
+    ~FetchTableRangeSizeCc() = default;
+
+    void Reset(const TableName &table_name,
+               int32_t partition_id,
+               const TxKey &start_key,
+               CcShard *ccs,
+               NodeGroupId ng_id,
+               int64_t ng_term);
+
+    bool ValidTermCheck();
+    bool Execute(CcShard &ccs) override;
+    void SetFinish(uint32_t error);
+
+    const TableName *table_name_;
+    int32_t partition_id_{0};
+    TxKey start_key_{};
+    NodeGroupId node_group_id_{0};
+    int64_t node_group_term_{-1};
+    CcShard *ccs_{nullptr};
+
+    uint32_t error_code_{0};
+    int32_t store_range_size_{0};
+
+    // Only used in DataStoreHandler
+    std::string kv_start_key_;
+};
 }  // namespace txservice
diff --git a/tx_service/include/cc/cc_request.h b/tx_service/include/cc/cc_request.h
index 97e93fae..8096672a 100644
--- a/tx_service/include/cc/cc_request.h
+++ b/tx_service/include/cc/cc_request.h
@@ -740,7 +740,9 @@ struct PostWriteCc : public TemplatedCcRequest<PostWriteCc, PostProcessResult>
                const TxRecord *rec,
                OperationType operation_type,
                uint32_t key_shard_code,
-               CcHandlerResult<PostProcessResult> *res)
+               CcHandlerResult<PostProcessResult> *res,
+               int32_t partition_id = -1,
+               bool on_dirty_range = false)
     {
         TemplatedCcRequest<PostWriteCc, PostProcessResult>::Reset(
             nullptr, res, addr->NodeGroupId(), tx_number, tx_term);
@@ -754,6 +756,8 @@ struct PostWriteCc : public TemplatedCcRequest<PostWriteCc, PostProcessResult>
         is_remote_ = false;
         ccm_ = nullptr;
         is_initial_insert_ = false;
+        partition_id_ = partition_id;
+        on_dirty_range_ = on_dirty_range;
     }
 
     void Reset(const TxKey *key,
@@ -767,7 +771,9 @@ struct PostWriteCc : public TemplatedCcRequest<PostWriteCc, PostProcessResult>
                uint32_t key_shard_code,
                CcHandlerResult<PostProcessResult> *res,
                bool initial_insertion = false,
-               int64_t ng_term = INIT_TERM)
+               int64_t ng_term = INIT_TERM,
+               int32_t partition_id = -1,
+               bool on_dirty_range = false)
     {
         TemplatedCcRequest<PostWriteCc, PostProcessResult>::Reset(
             &table_name,
@@ -788,6 +794,8 @@ struct PostWriteCc : public TemplatedCcRequest<PostWriteCc, PostProcessResult>
         is_remote_ = false;
         ccm_ = nullptr;
         is_initial_insert_ = initial_insertion;
+        partition_id_ = partition_id;
+        on_dirty_range_ = on_dirty_range;
     }
 
     void Reset(const CcEntryAddr *addr,
@@ -797,7 +805,9 @@ struct PostWriteCc : public TemplatedCcRequest<PostWriteCc, PostProcessResult>
                const std::string *rec,
                OperationType operation_type,
                uint32_t key_shard_code,
-               CcHandlerResult<PostProcessResult> *res)
+               CcHandlerResult<PostProcessResult> *res,
+               int32_t partition_id = -1,
+               bool on_dirty_range = false)
     {
         TemplatedCcRequest<PostWriteCc, PostProcessResult>::Reset(
             nullptr, res, addr->NodeGroupId(), tx_number, tx_term);
@@ -811,6 +821,8 @@ struct PostWriteCc : public TemplatedCcRequest<PostWriteCc, PostProcessResult>
         is_remote_ = true;
         ccm_ = nullptr;
         is_initial_insert_ = false;
+        partition_id_ = partition_id;
+        on_dirty_range_ = on_dirty_range;
     }
 
     void Reset(const TableName *table_name,
@@ -824,7 +836,9 @@ struct PostWriteCc : public TemplatedCcRequest<PostWriteCc, PostProcessResult>
                uint32_t key_shard_code,
                CcHandlerResult<PostProcessResult> *res,
                bool initial_insertion = false,
-               int64_t ng_term = INIT_TERM)
+               int64_t ng_term = INIT_TERM,
+               int32_t partition_id = -1,
+               bool on_dirty_range = false)
     {
         TemplatedCcRequest<PostWriteCc, PostProcessResult>::Reset(
             table_name,
@@ -845,6 +859,8 @@ struct PostWriteCc : public TemplatedCcRequest<PostWriteCc, PostProcessResult>
         is_remote_ = true;
         ccm_ = nullptr;
         is_initial_insert_ = initial_insertion;
+        partition_id_ = partition_id;
+        on_dirty_range_ = on_dirty_range;
     }
 
     const CcEntryAddr *CceAddr() const
@@ -877,6 +893,11 @@ struct PostWriteCc : public TemplatedCcRequest<PostWriteCc, PostProcessResult>
         return key_shard_code_;
     }
 
+    int32_t PartitionId() const
+    {
+        return partition_id_;
+    }
+
     const void *Key() const
     {
         return is_remote_ ? nullptr : key_;
@@ -892,6 +913,16 @@ struct PostWriteCc : public TemplatedCcRequest<PostWriteCc, PostProcessResult>
         return is_initial_insert_;
     }
 
+    bool OnDirtyRange() const
+    {
+        return on_dirty_range_;
+    }
+
+    bool NeedUpdateRangeSize() const
+    {
+        return partition_id_ >= 0;
+    }
+
 private:
     const CcEntryAddr *cce_addr_;
     uint64_t commit_ts_;
@@ -909,6 +940,9 @@ struct PostWriteCc : public TemplatedCcRequest<PostWriteCc, PostProcessResult>
         const void *key_;
         const std::string *key_str_;
     };
+    int32_t partition_id_{-1};
+    // True if the key is located in a splitting range.
+    bool on_dirty_range_{false};
 };
 
 struct PostWriteAllCc
@@ -2341,7 +2375,6 @@ struct ScanSliceCc
           end_key_type_(RangeKeyType::RawPtr),
           schema_version_(0)
     {
-        parallel_req_ = true;
     }
 
     ~ScanSliceCc()
@@ -2409,12 +2442,12 @@ struct ScanSliceCc
         is_require_keys_ = is_require_keys;
         is_require_recs_ = is_require_recs;
 
-        unfinished_core_cnt_.store(1, std::memory_order_relaxed);
         range_slice_id_.Reset();
         last_pinned_slice_ = nullptr;
         prefetch_size_ = prefetch_size;
-        err_.store(CcErrorCode::NO_ERROR, std::memory_order_relaxed);
+        err_ = CcErrorCode::NO_ERROR;
         cache_hit_miss_collected_ = false;
+        blocking_info_.Reset();
     }
 
     void Set(const TableName &tbl_name,
@@ -2472,11 +2505,11 @@ struct ScanSliceCc
         is_require_recs_ = is_require_recs;
         prefetch_size_ = prefetch_size;
 
-        unfinished_core_cnt_.store(1, std::memory_order_relaxed);
         range_slice_id_.Reset();
         last_pinned_slice_ = nullptr;
-        err_.store(CcErrorCode::NO_ERROR, std::memory_order_relaxed);
+        err_ = CcErrorCode::NO_ERROR;
         cache_hit_miss_collected_ = false;
+        blocking_info_.Reset();
     }
 
     bool Execute(CcShard &ccs) override
@@ -2485,7 +2518,8 @@ struct ScanSliceCc
         {
             // Do not modify res_ directly since there could be other cores
             // still working on this cc req.
-            return SetError(CcErrorCode::REQUESTED_NODE_NOT_LEADER);
+            SetError(CcErrorCode::REQUESTED_NODE_NOT_LEADER);
+            return true;
         }
 
         CcMap *ccm = nullptr;
@@ -2518,7 +2552,8 @@ struct ScanSliceCc
                     // is marked as errored.
                     if (init_res.error != CcErrorCode::NO_ERROR)
                     {
-                        return SetError(init_res.error);
+                        SetError(init_res.error);
+                        return true;
                     }
                     // The req will be re-enqueued.
                     return false;
@@ -2545,16 +2580,13 @@ struct ScanSliceCc
 
     void AbortCcRequest(CcErrorCode err_code) override
     {
-        if (SetError(err_code))
+        SetError(err_code);
+        // If the request has pinned any slice, unpin it.
+        if (range_slice_id_.Range() != nullptr)
         {
-            // Last core finished. If the request has pinned any slice, unpin
-            // it.
-            if (range_slice_id_.Range() != nullptr)
-            {
-                UnpinSlices();
-            }
-            Free();
+            UnpinSlices();
         }
+        Free();
     }
 
     bool IsLocal() const
@@ -2685,18 +2717,18 @@ struct ScanSliceCc
         return ts_;
     }
 
-    ScanCache *GetLocalScanCache(size_t shard_id)
+    ScanCache *GetLocalScanCache()
     {
         assert(IsLocal());
-        return res_->Value().ccm_scanner_->Cache(shard_id);
+        return res_->Value().ccm_scanner_->Cache(0);
     }
 
-    RemoteScanSliceCache *GetRemoteScanCache(size_t shard_id)
+    RemoteScanSliceCache *GetRemoteScanCache()
     {
         assert(!IsLocal());
         RangeScanSliceResult &slice_result = res_->Value();
-        assert(shard_id < slice_result.remote_scan_caches_->size());
-        return &slice_result.remote_scan_caches_->at(shard_id);
+        assert(slice_result.remote_scan_caches_ != nullptr);
+        return slice_result.remote_scan_caches_;
     }
 
     CcScanner *GetLocalScanner()
@@ -2704,161 +2736,70 @@ struct ScanSliceCc
         return IsLocal() ? res_->Value().ccm_scanner_ : nullptr;
     }
 
-    uint64_t BlockingCceLockAddr(uint16_t core_id)
+    uint64_t BlockingCceLockAddr() const
     {
-        assert(core_id < blocking_vec_.size());
-        return blocking_vec_[core_id].cce_lock_addr_;
+        return blocking_info_.cce_lock_addr_;
     }
 
-    std::pair<ScanBlockingType, ScanType> BlockingPair(uint16_t core_id)
+    std::pair<ScanBlockingType, ScanType> BlockingPair() const
     {
-        assert(core_id < blocking_vec_.size());
-        return {blocking_vec_[core_id].type_,
-                blocking_vec_[core_id].scan_type_};
+        return {blocking_info_.type_, blocking_info_.scan_type_};
     }
 
-    void SetBlockingInfo(uint16_t core_id,
-                         uint64_t cce_lock_addr,
+    void SetBlockingInfo(uint64_t cce_lock_addr,
                          ScanType scan_type,
                          ScanBlockingType blocking_type)
     {
-        assert(core_id < blocking_vec_.size());
-        blocking_vec_[core_id] = {cce_lock_addr, scan_type, blocking_type};
+        blocking_info_.cce_lock_addr_ = cce_lock_addr;
+        blocking_info_.scan_type_ = scan_type;
+        blocking_info_.type_ = blocking_type;
     }
 
-    void SetShardCount(uint16_t shard_cnt)
+    void SetPriorCceLockAddr(uint64_t addr)
     {
-        blocking_vec_.resize(shard_cnt);
-        for (auto &it : blocking_vec_)
-        {
-            it.cce_lock_addr_ = 0;
-            it.scan_type_ = ScanType::ScanUnknow;
-            it.type_ = ScanBlockingType::NoBlocking;
-        }
-
-        wait_for_snapshot_cnt_.resize(shard_cnt);
-        for (uint16_t i = 0; i < shard_cnt; ++i)
-        {
-            wait_for_snapshot_cnt_[i] = 0;
-        }
-    }
-
-    uint64_t GetShardCount() const
-    {
-        return blocking_vec_.size();
-    }
-
-    void SetUnfinishedCoreCnt(uint16_t core_cnt)
-    {
-        unfinished_core_cnt_.store(core_cnt, std::memory_order_release);
-    }
-
-    void SetPriorCceLockAddr(uint64_t addr, uint16_t shard_id)
-    {
-        assert(shard_id < blocking_vec_.size());
-        blocking_vec_[shard_id] = {
-            addr, ScanType::ScanUnknow, ScanBlockingType::NoBlocking};
+        blocking_info_.cce_lock_addr_ = addr;
+        blocking_info_.scan_type_ = ScanType::ScanUnknow;
+        blocking_info_.type_ = ScanBlockingType::NoBlocking;
     }
 
     /**
      * @brief Notifies the scan slice request that the scan at the calling core
      * has finished.
      *
-     * @return true, if all cores have finished the scan.
-     * @return false, if the scan is not completed in all cores.
      */
-    bool SetFinish()
+    void SetFinish()
     {
-        uint16_t remaining_cnt =
-            unfinished_core_cnt_.fetch_sub(1, std::memory_order_acq_rel);
-
-        if (remaining_cnt == 1)
+        if (err_ == CcErrorCode::NO_ERROR)
         {
-            // Only update result if this is local request. Remote request
-            // result will be updated by dedicated core.
-            if (res_->Value().is_local_)
-            {
-                if (err_.load(std::memory_order_relaxed) ==
-                    CcErrorCode::NO_ERROR)
-                {
-                    res_->Value().ccm_scanner_->FinalizeCommit();
-
-                    res_->SetFinished();
-                }
-                else
-                {
-                    res_->SetError(err_.load(std::memory_order_relaxed));
-                }
-            }
+            res_->SetFinished();
+        }
+        else
+        {
+            res_->SetError(err_);
         }
-
-        return remaining_cnt == 1;
     }
 
-    bool SetError(CcErrorCode err)
+    void SetError(CcErrorCode err)
     {
-        CcErrorCode expected = CcErrorCode::NO_ERROR;
-        err_.compare_exchange_strong(expected,
-                                     err,
-                                     std::memory_order_relaxed,
-                                     std::memory_order_relaxed);
-        uint16_t remaining_cnt =
-            unfinished_core_cnt_.fetch_sub(1, std::memory_order_acq_rel);
-
-        // remaining_cnt might be 0 if all cores have finished and the req is
-        // put back into the result sending core's queue.
-        if (remaining_cnt <= 1)
+        if (err_ == CcErrorCode::NO_ERROR)
         {
-            res_->SetError(err_.load(std::memory_order_relaxed));
+            err_ = err;
         }
 
-        return remaining_cnt <= 1;
+        res_->SetError(err_);
     }
 
     void DeferSetError(CcErrorCode err)
     {
-        CcErrorCode expected = CcErrorCode::NO_ERROR;
-        err_.compare_exchange_strong(expected,
-                                     err,
-                                     std::memory_order_relaxed,
-                                     std::memory_order_relaxed);
-    }
-
-    CcErrorCode GetError() const
-    {
-        return err_.load(std::memory_order_acquire);
-    }
-
-    /**
-     * @brief Send response to src node if all cores have finished.
-     * We use this method to send scan slice response if this request is
-     * a remote request.
-     * We assign a dedicated core to be the response sender instead of directly
-     * sending the response on the last finished core. This is to avoid
-     * serialization of response message causing one core to become
-     * significantly slower than others and would end up being the sender of all
-     * scan slice response.
-     */
-    bool SendResponseIfFinished()
-    {
-        if (unfinished_core_cnt_.load(std::memory_order_relaxed) == 0)
+        if (err_ == CcErrorCode::NO_ERROR)
         {
-            if (err_.load(std::memory_order_relaxed) == CcErrorCode::NO_ERROR)
-            {
-                res_->SetFinished();
-            }
-            else
-            {
-                res_->SetError(err_.load(std::memory_order_relaxed));
-            }
-            return true;
+            err_ = err;
         }
-        return false;
     }
 
-    bool IsResponseSender(uint16_t core_id) const
+    CcErrorCode GetError() const
     {
-        return ((tx_number_ & 0x3FF) % blocking_vec_.size()) == core_id;
+        return err_;
     }
 
     bool IsForWrite() const
@@ -2931,30 +2872,30 @@ struct ScanSliceCc
         cache_hit_miss_collected_ = true;
     }
 
-    bool IsWaitForSnapshot(uint16_t core_id) const
+    bool IsWaitForSnapshot() const
     {
-        return blocking_vec_[core_id].type_ ==
-               ScanBlockingType::BlockOnWaitSnapshots;
+        return blocking_info_.type_ == ScanBlockingType::BlockOnWaitSnapshots;
     }
 
-    void SetIsWaitForSnapshot(uint16_t core_id)
+    void SetIsWaitForSnapshot()
     {
-        blocking_vec_[core_id].type_ = ScanBlockingType::BlockOnWaitSnapshots;
+        blocking_info_.type_ = ScanBlockingType::BlockOnWaitSnapshots;
     }
 
-    size_t WaitForSnapshotCnt(uint16_t core_id) const
+    size_t WaitForSnapshotCnt() const
     {
-        return wait_for_snapshot_cnt_[core_id];
+        return wait_for_snapshot_cnt_;
     }
 
-    void DecreaseWaitForSnapshotCnt(uint16_t core_id)
+    void DecreaseWaitForSnapshotCnt()
     {
-        wait_for_snapshot_cnt_[core_id]--;
+        assert(wait_for_snapshot_cnt_ > 0);
+        wait_for_snapshot_cnt_--;
     }
 
-    void IncreaseWaitForSnapshotCnt(uint16_t core_id)
+    void IncreaseWaitForSnapshotCnt()
     {
-        wait_for_snapshot_cnt_[core_id]++;
+        wait_for_snapshot_cnt_++;
     }
 
     bool AbortIfOom() const override
@@ -3008,8 +2949,7 @@ struct ScanSliceCc
 
     uint32_t range_id_{0};
 
-    std::atomic<uint16_t> unfinished_core_cnt_{1};
-    std::atomic<CcErrorCode> err_{CcErrorCode::NO_ERROR};
+    CcErrorCode err_{CcErrorCode::NO_ERROR};
 
     uint64_t ts_{0};
 
@@ -3019,13 +2959,20 @@ struct ScanSliceCc
 
     struct ScanBlockingInfo
     {
-        uint64_t cce_lock_addr_;
-        ScanType scan_type_;
-        ScanBlockingType type_;
+        void Reset()
+        {
+            cce_lock_addr_ = 0;
+            scan_type_ = ScanType::ScanUnknow;
+            type_ = ScanBlockingType::NoBlocking;
+        }
+
+        uint64_t cce_lock_addr_{0};
+        ScanType scan_type_{ScanType::ScanUnknow};
+        ScanBlockingType type_{ScanBlockingType::NoBlocking};
     };
-    std::vector<ScanBlockingInfo> blocking_vec_;
+    ScanBlockingInfo blocking_info_;
 
-    std::vector<size_t> wait_for_snapshot_cnt_;
+    size_t wait_for_snapshot_cnt_{0};
 
     RangeSliceId range_slice_id_;
 
@@ -3234,36 +3181,14 @@ struct ProcessRemoteScanRespCc : public CcRequestBase
 
     void Reset(remote::CcStreamReceiver *receiver,
                std::unique_ptr<remote::ScanSliceResponse> resp_msg,
-               std::vector<size_t> &&offset_tables,
-               CcHandlerResult<RangeScanSliceResult> *hd_res,
-               size_t worker_cnt)
+               CcHandlerResult<RangeScanSliceResult> *hd_res)
     {
         receiver_ = receiver;
         resp_msg_ = std::move(resp_msg);
-        offset_tables_ = std::move(offset_tables);
         hd_res_ = hd_res;
-
-        unfinished_cnt_ = worker_cnt;
-        next_remote_core_idx_ = worker_cnt;
-
-        assert(offset_tables_.size() == RemoteCoreCnt());
-        assert(worker_cnt <= RemoteCoreCnt());
-
-        cur_idxs_.clear();
-        key_offsets_.clear();
-        rec_offsets_.clear();
-
-        assert(cur_idxs_.empty());
-        assert(key_offsets_.empty());
-        assert(rec_offsets_.empty());
-
-        for (size_t worker_idx = 0; worker_idx < worker_cnt; ++worker_idx)
-        {
-            // worker idx must be less or equal than remote core count
-            cur_idxs_.push_back({worker_idx, 0});
-            key_offsets_.push_back(KeyStartOffset(worker_idx));
-            rec_offsets_.push_back(RecStartOffset(worker_idx));
-        }
+        cur_tuple_idx_ = 0;
+        key_offset_ = 0;
+        rec_offset_ = 0;
     }
 
     ProcessRemoteScanRespCc(const ProcessRemoteScanRespCc &) = delete;
@@ -3276,74 +3201,56 @@ struct ProcessRemoteScanRespCc : public CcRequestBase
 
         do
         {
-            auto &[remote_core_idx, tuple_idx] = cur_idxs_.at(ccs.core_id_);
-
+            uint32_t remote_core_idx = resp_msg_->core_id();
             const uint64_t *key_ts_ptr =
                 (const uint64_t *) resp_msg_->key_ts().data();
-            key_ts_ptr += MetaOffset(remote_core_idx);
 
             const uint64_t *gap_ts_ptr =
                 (const uint64_t *) resp_msg_->gap_ts().data();
-            gap_ts_ptr += MetaOffset(remote_core_idx);
 
             const uint64_t *term_ptr =
                 (const uint64_t *) resp_msg_->term().data();
-            term_ptr += MetaOffset(remote_core_idx);
 
             const uint64_t *cce_lock_ptr_ptr =
                 (const uint64_t *) resp_msg_->cce_lock_ptr().data();
-            cce_lock_ptr_ptr += MetaOffset(remote_core_idx);
 
             const remote::RecordStatusType *rec_status_ptr =
                 (const remote::RecordStatusType *) resp_msg_->rec_status()
                     .data();
-            rec_status_ptr += MetaOffset(remote_core_idx);
 
             RangeScanSliceResult &scan_slice_result = hd_res_->Value();
             CcScanner &range_scanner = *scan_slice_result.ccm_scanner_;
-            ScanCache *shard_cache = range_scanner.Cache(remote_core_idx);
+            ScanCache *shard_cache = range_scanner.Cache(0);
 
-            size_t &key_offset = key_offsets_[ccs.core_id_];
-            size_t &rec_offset = rec_offsets_[ccs.core_id_];
-            size_t tuple_cnt = TupleCnt(remote_core_idx);
+            size_t tuple_cnt = TupleCnt();
 
-            for (; tuple_idx < tuple_cnt && scan_cnt < SCAN_BATCH_SIZE;
-                 ++tuple_idx, ++scan_cnt)
+            for (; cur_tuple_idx_ < tuple_cnt && scan_cnt < SCAN_BATCH_SIZE;
+                 ++cur_tuple_idx_, ++scan_cnt)
             {
                 RecordStatus rec_status =
                     remote::ToLocalType::ConvertRecordStatusType(
-                        rec_status_ptr[tuple_idx]);
+                        rec_status_ptr[cur_tuple_idx_]);
 
                 shard_cache->AddScanTuple(resp_msg_->keys(),
-                                          key_offset,
-                                          key_ts_ptr[tuple_idx],
+                                          key_offset_,
+                                          key_ts_ptr[cur_tuple_idx_],
                                           resp_msg_->records(),
-                                          rec_offset,
+                                          rec_offset_,
                                           rec_status,
                                           -1,
-                                          gap_ts_ptr[tuple_idx],
-                                          cce_lock_ptr_ptr[tuple_idx],
-                                          term_ptr[tuple_idx],
+                                          gap_ts_ptr[cur_tuple_idx_],
+                                          cce_lock_ptr_ptr[cur_tuple_idx_],
+                                          term_ptr[cur_tuple_idx_],
                                           remote_core_idx,
                                           scan_slice_result.cc_ng_id_,
                                           true);
             }
 
-            if (tuple_idx == tuple_cnt)
+            if (cur_tuple_idx_ == tuple_cnt)
             {
-                size_t trailing_cnt = TrailingCnt(remote_core_idx);
-                while (trailing_cnt-- > 0)
-                {
-                    shard_cache->RemoveLast();
-                }
-
-                range_scanner.CommitAtCore(remote_core_idx);
-
-                if (!MoveForward(ccs.core_id_))
-                {
-                    // No more data
-                    return SetFinished();
-                }
+                // No more data
+                SetFinished();
+                return true;
             }
 
             //  To avoid blocking other request for a long time, we only process
@@ -3355,115 +3262,43 @@ struct ProcessRemoteScanRespCc : public CcRequestBase
         return false;
     }
 
-    bool SetFinished()
+    void SetFinished()
     {
-        // This core is last finished worker. We need to set handler result and
-        // recycle message.
-        if (unfinished_cnt_.fetch_sub(1, std::memory_order_release) == 1)
+        if (resp_msg_->error_code() != 0)
         {
-            if (resp_msg_->error_code() != 0)
-            {
-                hd_res_->SetError(remote::ToLocalType::ConvertCcErrorCode(
-                    resp_msg_->error_code()));
-            }
-            else
-            {
-                hd_res_->Value().ccm_scanner_->FinalizeCommit();
-
-                hd_res_->SetFinished();
-            }
-
-            TransactionExecution *txm =
-                reinterpret_cast<TransactionExecution *>(resp_msg_->txm_addr());
-            txm->ReleaseSharedForwardLatch();
-
-            // Recycle message
-            receiver_->RecycleScanSliceResp(std::move(resp_msg_));
-
-            // Return true to recycle this request
-            return true;
+            hd_res_->SetError(remote::ToLocalType::ConvertCcErrorCode(
+                resp_msg_->error_code()));
         }
-
-        return false;
-    }
-
-private:
-    bool MoveForward(size_t worker_idx)
-    {
-        size_t new_remote_core_idx = next_remote_core_idx_.fetch_add(1);
-        if (new_remote_core_idx < RemoteCoreCnt())
+        else
         {
-            cur_idxs_.at(worker_idx) = {new_remote_core_idx, 0};
-            key_offsets_.at(worker_idx) = KeyStartOffset(new_remote_core_idx);
-            rec_offsets_.at(worker_idx) = RecStartOffset(new_remote_core_idx);
-
-            return true;
+            hd_res_->SetFinished();
         }
 
-        // No more data
-        return false;
-    }
-
-    size_t KeyStartOffset(size_t remote_core_idx) const
-    {
-        const size_t *ptr = reinterpret_cast<const size_t *>(
-            resp_msg_->key_start_offsets().data());
-        ptr += remote_core_idx;
-        return *ptr;
-    }
-
-    size_t RecStartOffset(size_t remote_core_idx) const
-    {
-        const size_t *ptr = reinterpret_cast<const size_t *>(
-            resp_msg_->record_start_offsets().data());
-        ptr += remote_core_idx;
-        return *ptr;
-    }
+        TransactionExecution *txm =
+            reinterpret_cast<TransactionExecution *>(resp_msg_->txm_addr());
+        txm->ReleaseSharedForwardLatch();
 
-    size_t MetaOffset(size_t remote_core_idx) const
-    {
-        return offset_tables_[remote_core_idx];
+        // Recycle message
+        receiver_->RecycleScanSliceResp(std::move(resp_msg_));
     }
 
-    size_t TupleCnt(size_t remote_core_idx) const
+private:
+    size_t TupleCnt() const
     {
         const char *tuple_cnt_info = resp_msg_->tuple_cnt().data();
-        // remote core count
-        tuple_cnt_info += sizeof(uint16_t);
-        // tuple count
-        tuple_cnt_info += remote_core_idx * sizeof(size_t);
         return *(reinterpret_cast<const size_t *>(tuple_cnt_info));
     }
 
-    size_t TrailingCnt(size_t remote_core_idx) const
-    {
-        const size_t *ptr =
-            reinterpret_cast<const size_t *>(resp_msg_->trailing_cnts().data());
-        ptr += remote_core_idx;
-        return *ptr;
-    }
-
-    uint16_t RemoteCoreCnt() const
-    {
-        const char *tuple_cnt_info = resp_msg_->tuple_cnt().data();
-        return *reinterpret_cast<const uint16_t *>(tuple_cnt_info);
-    }
-
     remote::CcStreamReceiver *receiver_{nullptr};
     std::unique_ptr<remote::ScanSliceResponse> resp_msg_{nullptr};
-    // Store the start postition of meta data like `key_ts`.
-    std::vector<size_t> offset_tables_;
-    // The vector of {remote_core_idx, current_tuple_idx}.
-    std::vector<std::pair<size_t, size_t>> cur_idxs_;
+    // current_tuple_idx}.
+    size_t cur_tuple_idx_;
 
     // We need to store key/rec offset so that we could restart from pause
     // point.
-    std::vector<size_t> key_offsets_;
-    std::vector<size_t> rec_offsets_;
+    size_t key_offset_;
+    size_t rec_offset_;
 
-    // Unfinished worker count. std::min(this_node_core_count,
-    // remote_core_count)
-    std::atomic<size_t> unfinished_cnt_{0};
     // Next remote core idx we need to process.
     std::atomic<size_t> next_remote_core_idx_{0};
     CcHandlerResult<RangeScanSliceResult> *hd_res_{nullptr};
@@ -4037,7 +3872,6 @@ struct RangePartitionDataSyncScanCc : public CcRequestBase
         uint64_t data_sync_ts,
         uint64_t node_group_id,
         int64_t node_group_term,
-        uint16_t core_cnt,
         size_t scan_batch_size,
         uint64_t txn,
         const TxKey *target_start_key,
@@ -4052,14 +3886,13 @@ struct RangePartitionDataSyncScanCc : public CcRequestBase
           table_name_(&table_name),
           node_group_id_(node_group_id),
           node_group_term_(node_group_term),
-          core_cnt_(core_cnt),
           last_data_sync_ts_(last_data_sync_ts),
           data_sync_ts_(data_sync_ts),
           start_key_(target_start_key),
           end_key_(target_end_key),
           scan_batch_size_(scan_batch_size),
           err_(CcErrorCode::NO_ERROR),
-          unfinished_cnt_(core_cnt_),
+          finished_(false),
           mux_(),
           cv_(),
           export_base_table_item_(export_base_table_item),
@@ -4082,24 +3915,19 @@ struct RangePartitionDataSyncScanCc : public CcRequestBase
                                   false);
                           });
         }
-        for (size_t i = 0; i < core_cnt; i++)
+        data_sync_vec_.resize(scan_batch_size);
+        if (!export_base_table_item_only_)
         {
-            data_sync_vec_.emplace_back();
-            data_sync_vec_.back().resize(scan_batch_size);
-            if (!export_base_table_item_only_)
-            {
-                archive_vec_.emplace_back();
-                archive_vec_.back().reserve(scan_batch_size);
-                mv_base_idx_vec_.emplace_back();
-                mv_base_idx_vec_.back().reserve(scan_batch_size);
-            }
-
-            pause_pos_.emplace_back(TxKey(), false);
-            curr_slice_index_.emplace_back(0);
-            accumulated_scan_cnt_.emplace_back(0);
-            accumulated_flush_data_size_.emplace_back(0);
-            scan_heap_is_full_.emplace_back(0);
+            archive_vec_.reserve(scan_batch_size);
+            mv_base_idx_vec_.reserve(scan_batch_size);
         }
+
+        pause_pos_.first = std::move(TxKey());
+        pause_pos_.second = false;
+        curr_slice_index_ = 0;
+        accumulated_scan_cnt_ = 0;
+        accumulated_flush_data_size_ = 0;
+        scan_heap_is_full_ = 0;
     }
 
     bool ValidTermCheck()
@@ -4133,7 +3961,6 @@ struct RangePartitionDataSyncScanCc : public CcRequestBase
             SetError(CcErrorCode::REQUESTED_NODE_NOT_LEADER);
             return false;
         }
-        scan_count_++;
         CcMap *ccm = ccs.GetCcm(*table_name_, node_group_id_);
         if (ccm == nullptr)
         {
@@ -4169,49 +3996,44 @@ struct RangePartitionDataSyncScanCc : public CcRequestBase
         return false;
     }
 
-    bool IsDrained(size_t core_idx) const
+    bool IsDrained() const
     {
-        return pause_pos_[core_idx].second;
+        return pause_pos_.second;
     }
 
-    std::pair<TxKey, bool> &PausePos(size_t core_idx)
+    std::pair<TxKey, bool> &PausePos()
     {
-        return pause_pos_[core_idx];
+        return pause_pos_;
     }
 
     void Wait()
     {
         std::unique_lock<std::mutex> lk(mux_);
-        cv_.wait(lk, [this] { return unfinished_cnt_ == 0; });
+        cv_.wait(lk, [this] { return finished_; });
     }
 
     void Reset(OpType op_type = OpType::Normal)
     {
         std::lock_guard<std::mutex> lk(mux_);
-        unfinished_cnt_ = 1;
-        for (size_t i = 0; i < core_cnt_; i++)
+        finished_ = false;
+        if (!export_base_table_item_only_)
         {
-            if (!export_base_table_item_only_)
-            {
-                archive_vec_.at(i).clear();
-                archive_vec_.at(i).reserve(scan_batch_size_);
-                mv_base_idx_vec_.at(i).clear();
-                mv_base_idx_vec_.at(i).reserve(scan_batch_size_);
-            }
+            archive_vec_.clear();
+            mv_base_idx_vec_.clear();
+        }
 
-            accumulated_scan_cnt_.at(i) = 0;
-            accumulated_flush_data_size_.at(i) = 0;
-            if (scan_heap_is_full_[i] == 1)
-            {
-                // vec has been cleared during ReleaseDataSyncScanHeapCc,
-                // resize to prepared size
-                data_sync_vec_[i].resize(scan_batch_size_);
-                scan_heap_is_full_[i] = 0;
-            }
-            if (export_base_table_item_)
-            {
-                curr_slice_index_[i] = 0;
-            }
+        accumulated_scan_cnt_ = 0;
+        accumulated_flush_data_size_ = 0;
+        if (scan_heap_is_full_ == 1)
+        {
+            // vec has been cleared during ReleaseDataSyncScanHeapCc,
+            // resize to prepared size
+            data_sync_vec_.resize(scan_batch_size_);
+            scan_heap_is_full_ = 0;
+        }
+        if (export_base_table_item_)
+        {
+            curr_slice_index_ = 0;
         }
 
         err_ = CcErrorCode::NO_ERROR;
@@ -4223,12 +4045,9 @@ struct RangePartitionDataSyncScanCc : public CcRequestBase
     {
         std::lock_guard<std::mutex> lk(mux_);
         err_ = err;
-        --unfinished_cnt_;
-        if (unfinished_cnt_ == 0)
-        {
-            UnpinSlices();
-            cv_.notify_one();
-        }
+        finished_ = true;
+        UnpinSlices();
+        cv_.notify_one();
     }
 
     void AbortCcRequest(CcErrorCode err_code) override
@@ -4249,26 +4068,22 @@ struct RangePartitionDataSyncScanCc : public CcRequestBase
         return err_;
     }
 
-    void SetFinish(size_t core_id)
+    void SetFinish()
     {
         std::unique_lock<std::mutex> lk(mux_);
-        --unfinished_cnt_;
-        if (export_base_table_item_ && !pause_pos_[core_id].second)
+        finished_ = true;
+        if (export_base_table_item_ && !pause_pos_.second)
         {
             // Only not drained on this core, should set the paused key.
-            UpdateMinPausedSlice(&pause_pos_[core_id].first);
+            UpdateMinPausedSlice(&pause_pos_.first);
         }
         else if (!export_base_table_item_)
         {
-            UpdateMinPausedSlice(curr_slice_index_[core_id]);
-        }
-
-        if (unfinished_cnt_ == 0)
-        {
-            // Unpin the slices
-            UnpinSlices();
-            cv_.notify_one();
+            UpdateMinPausedSlice(curr_slice_index_);
         }
+        // Unpin the slices
+        UnpinSlices();
+        cv_.notify_one();
     }
 
     uint32_t NodeGroupId()
@@ -4276,19 +4091,19 @@ struct RangePartitionDataSyncScanCc : public CcRequestBase
         return node_group_id_;
     }
 
-    std::vector<FlushRecord> &DataSyncVec(uint16_t core_id)
+    std::vector<FlushRecord> &DataSyncVec()
     {
-        return data_sync_vec_[core_id];
+        return data_sync_vec_;
     }
 
-    std::vector<FlushRecord> &ArchiveVec(uint16_t core_id)
+    std::vector<FlushRecord> &ArchiveVec()
     {
-        return archive_vec_[core_id];
+        return archive_vec_;
     }
 
-    std::vector<size_t> &MoveBaseIdxVec(uint16_t core_id)
+    std::vector<size_t> &MoveBaseIdxVec()
     {
-        return mv_base_idx_vec_[core_id];
+        return mv_base_idx_vec_;
     }
 
     int64_t NodeGroupTerm() const
@@ -4312,66 +4127,47 @@ struct RangePartitionDataSyncScanCc : public CcRequestBase
         return store_range_;
     }
 
-    void FixCurrentSliceIndex(uint16_t core_id)
+    StoreSlice *CurrentSlice() const
     {
-        assert(export_base_table_item_);
-        if (pause_pos_[core_id].first.KeyPtr() != nullptr)
-        {
-            size_t curr_slice_idx = 0;
-            StoreSlice *curr_slice =
-                slice_coordinator_.pinned_slices_[curr_slice_idx];
-            while (curr_slice->EndTxKey() < pause_pos_[core_id].first)
-            {
-                ++curr_slice_idx;
-                assert(curr_slice_idx <
-                       slice_coordinator_.pinned_slices_.size());
-                curr_slice = slice_coordinator_.pinned_slices_[curr_slice_idx];
-            }
-            curr_slice_index_[core_id] = curr_slice_idx;
-        }
-    }
-
-    StoreSlice *CurrentSlice(uint16_t core_id) const
-    {
-        size_t curr_slice_idx = curr_slice_index_[core_id];
         if (export_base_table_item_)
         {
-            assert(curr_slice_idx < slice_coordinator_.pinned_slices_.size());
-            return slice_coordinator_.pinned_slices_.at(curr_slice_idx);
+            assert(curr_slice_index_ <
+                   slice_coordinator_.pinned_slices_.size());
+            return slice_coordinator_.pinned_slices_.at(curr_slice_index_);
         }
-        assert(curr_slice_idx < slices_to_scan_.size());
-        const TxKey &curr_slice_key = slices_to_scan_.at(curr_slice_idx).first;
+        assert(curr_slice_index_ < slices_to_scan_.size());
+        const TxKey &curr_slice_key =
+            slices_to_scan_.at(curr_slice_index_).first;
         return store_range_->FindSlice(curr_slice_key);
     }
 
-    const TxKey &CurrentSliceKey(uint16_t core_id) const
+    const TxKey &CurrentSliceKey() const
     {
         assert(!export_base_table_item_);
-        size_t curr_slice_index = curr_slice_index_[core_id];
-        assert(curr_slice_index < slices_to_scan_.size());
-        return slices_to_scan_[curr_slice_index].first;
+        assert(curr_slice_index_ < slices_to_scan_.size());
+        return slices_to_scan_[curr_slice_index_].first;
     }
 
-    void MoveToNextSlice(uint16_t core_id)
+    void MoveToNextSlice()
     {
-        curr_slice_index_[core_id]++;
+        curr_slice_index_++;
     }
 
-    bool TheBatchEnd(uint16_t core_id) const
+    bool TheBatchEnd() const
     {
-        return curr_slice_index_[core_id] >=
+        return curr_slice_index_ >=
                (export_base_table_item_
                     ? slice_coordinator_.pinned_slices_.size()
                     : slice_coordinator_.batch_end_slice_index_);
     }
 
-    bool IsSlicePinned(uint16_t core_id) const
+    bool IsSlicePinned() const
     {
         assert(export_base_table_item_ ||
-               curr_slice_index_[core_id] < slices_to_scan_.size());
+               curr_slice_index_ < slices_to_scan_.size());
         return export_base_table_item_
                    ? true
-                   : slices_to_scan_[curr_slice_index_[core_id]].second;
+                   : slices_to_scan_[curr_slice_index_].second;
     }
 
     uint64_t SchemaVersion() const override
@@ -4379,11 +4175,6 @@ struct RangePartitionDataSyncScanCc : public CcRequestBase
         return schema_version_;
     }
 
-    void SetUnfinishedCoreCnt(uint16_t core_cnt)
-    {
-        unfinished_cnt_ = core_cnt;
-    }
-
     void UnpinSlices()
     {
         if (slice_coordinator_.first_slice_id_.Range() != nullptr)
@@ -4427,13 +4218,10 @@ struct RangePartitionDataSyncScanCc : public CcRequestBase
         return last_data_sync_ts_;
     }
 
-    std::vector<size_t> accumulated_scan_cnt_;
-    std::vector<uint64_t> accumulated_flush_data_size_;
+    size_t accumulated_scan_cnt_;
+    uint64_t accumulated_flush_data_size_;
 
-    // std::vector<bool> is not safe to use in multi-threaded environment,
-    std::vector<uint32_t> scan_heap_is_full_{0};
-
-    size_t scan_count_{0};
+    uint32_t scan_heap_is_full_{0};
 
 private:
     struct SliceCoordinator
@@ -4553,7 +4341,6 @@ struct RangePartitionDataSyncScanCc : public CcRequestBase
     const TableName *table_name_{nullptr};
     uint32_t node_group_id_;
     int64_t node_group_term_;
-    uint16_t core_cnt_;
     // It is used as a hint to decide if a page has dirty data since last round
     // of checkpoint. It is guaranteed that all entries committed before this ts
     // are synced into data store.
@@ -4561,10 +4348,10 @@ struct RangePartitionDataSyncScanCc : public CcRequestBase
     // Target ts. Collect all data changes committed before this ts into data
     // sync vec.
     uint64_t data_sync_ts_;
-    std::vector<std::vector<FlushRecord>> data_sync_vec_;
-    std::vector<std::vector<FlushRecord>> archive_vec_;
+    std::vector<FlushRecord> data_sync_vec_;
+    std::vector<FlushRecord> archive_vec_;
     // Cache the entries to move record from "base" table to "archive" table
-    std::vector<std::vector<size_t>> mv_base_idx_vec_;
+    std::vector<size_t> mv_base_idx_vec_;
 
     // Start/end key of target range if the scan is on a range only, nullptr if
     // it's on entire table.
@@ -4573,11 +4360,11 @@ struct RangePartitionDataSyncScanCc : public CcRequestBase
     // Position that we left off during last round of ckpt scan.
     // pause_pos_.first is the key that we stopped at (has not been scanned
     // though), bool is if this core has finished scanning all keys already.
-    std::vector<std::pair<TxKey, bool>> pause_pos_;
+    std::pair<TxKey, bool> pause_pos_;
     size_t scan_batch_size_;
 
     CcErrorCode err_{CcErrorCode::NO_ERROR};
-    uint32_t unfinished_cnt_;
+    bool finished_{false};
     std::mutex mux_;
     std::condition_variable cv_;
 
@@ -4595,7 +4382,7 @@ struct RangePartitionDataSyncScanCc : public CcRequestBase
     // The index of the current slice to be scanned. If export_base_table_item_
     // is true, it is the index of the SliceCoordinator::pinned_slices_ vector,
     // and if false, it is the index of the slices_to_scan_ vector.
-    std::vector<size_t> curr_slice_index_;
+    size_t curr_slice_index_;
     // keep schema vesion after acquire read lock on catalog, to prevent the
     // concurrency issue with Truncate Table, detail ref to tx issue #1130
     // If schema_version_ is 0, the check will be bypassed, since this data sync
@@ -4837,7 +4624,10 @@ struct ReplayLogCc : public TemplatedCcRequest<ReplayLogCc, Void>
         std::shared_ptr<std::atomic_uint32_t> range_split_started = nullptr,
         std::unordered_set<TableName> *range_splitting = nullptr,
         uint16_t first_core = 0,
-        ParseDataLogCc *parse_cc = nullptr)
+        ParseDataLogCc *parse_cc = nullptr,
+        const std::unordered_map<TableName,
+                                 std::unordered_map<int32_t, uint64_t>>
+            *split_range_info = nullptr)
     {
         table_name_holder_ =
             TableName(table_name_view, table_type, table_engine);
@@ -4865,6 +4655,15 @@ struct ReplayLogCc : public TemplatedCcRequest<ReplayLogCc, Void>
         is_lock_recovery_ = is_lock_recovery;
         upsert_kv_err_code_ = {true, CcErrorCode::NO_ERROR};
         parse_cc_ = parse_cc;
+        split_ranges_ = nullptr;
+        if (split_range_info != nullptr)
+        {
+            auto table_it = split_range_info->find(table_name_holder_);
+            if (table_it != split_range_info->end())
+            {
+                split_ranges_ = &table_it->second;
+            }
+        }
     }
 
     ReplayLogCc(const ReplayLogCc &rhs) = delete;
@@ -5063,6 +4862,16 @@ struct ReplayLogCc : public TemplatedCcRequest<ReplayLogCc, Void>
         return first_core_;
     }
 
+    uint64_t RangeSplitCommitTs(int32_t range_id) const
+    {
+        if (split_ranges_ == nullptr)
+        {
+            return 0;
+        }
+        auto it = split_ranges_->find(range_id);
+        return it == split_ranges_->end() ? 0 : it->second;
+    }
+
     void SetOffset(size_t offset)
     {
         offset_ = offset;
@@ -5130,6 +4939,9 @@ struct ReplayLogCc : public TemplatedCcRequest<ReplayLogCc, Void>
                                                      CcErrorCode::NO_ERROR};
     ParseDataLogCc *parse_cc_{nullptr};
 
+    // Range split commit ts per range for the current table, if available.
+    const std::unordered_map<int32_t, uint64_t> *split_ranges_{nullptr};
+
     friend std::ostream &operator<<(std::ostream &outs,
                                     txservice::ReplayLogCc *r);
 };
@@ -5146,7 +4958,10 @@ struct ParseDataLogCc : public CcRequestBase
                std::atomic<fault::RecoveryService::WaitingStatus> &status,
                std::atomic<uint64_t> &on_fly_cnt,
                bool &recovery_error,
-               const bool is_lock_recovery = false)
+               const bool is_lock_recovery = false,
+               const std::unordered_map<TableName,
+                                        std::unordered_map<int32_t, uint64_t>>
+                   *split_range_info = nullptr)
     {
         log_records_sv_ =
             std::string_view(log_records.data(), log_records.size());
@@ -5158,6 +4973,7 @@ struct ParseDataLogCc : public CcRequestBase
         on_fly_cnt_ = &on_fly_cnt;
         recovery_error_ = &recovery_error;
         is_lock_recovery_ = is_lock_recovery;
+        split_range_info_ = split_range_info;
     }
 
     void Reset(::txlog::ReplayMessage &&replay_message,
@@ -5167,7 +4983,10 @@ struct ParseDataLogCc : public CcRequestBase
                std::atomic<fault::RecoveryService::WaitingStatus> &status,
                std::atomic<uint64_t> &on_fly_cnt,
                bool &recovery_error,
-               const bool is_lock_recovery = false)
+               const bool is_lock_recovery = false,
+               const std::unordered_map<TableName,
+                                        std::unordered_map<int32_t, uint64_t>>
+                   *split_range_info = nullptr)
     {
         replay_message_ =
             std::make_unique<::txlog::ReplayMessage>(std::move(replay_message));
@@ -5182,13 +5001,15 @@ struct ParseDataLogCc : public CcRequestBase
         on_fly_cnt_ = &on_fly_cnt;
         recovery_error_ = &recovery_error;
         is_lock_recovery_ = is_lock_recovery;
+        split_range_info_ = split_range_info;
     }
 
     bool Execute(CcShard &ccs) override
     {
         size_t offset = 0;
         // core of first key in log
-        int dest_core = 0;
+        uint32_t core_rand = butil::fast_rand();
+        uint16_t dest_core = static_cast<uint16_t>(core_rand % ccs.core_cnt_);
         std::vector<ReplayLogCc *> replay_cc_list;
         replay_cc_list.reserve(160);
         while (offset < log_records_sv_.size())
@@ -5259,10 +5080,19 @@ struct ParseDataLogCc : public CcRequestBase
                 uint32_t kv_len = *reinterpret_cast<const uint32_t *>(
                     blob.data() + blob_offset);
                 blob_offset += sizeof(uint32_t);
-                size_t hash = ccs.GetCatalogFactory(table_engine)
-                                  ->KeyHash(blob.data(), blob_offset, nullptr);
-                dest_core = hash ? (hash & 0x3FF) % ccs.core_cnt_
-                                 : (dest_core + 1) % ccs.core_cnt_;
+                if (table_engine == TableEngine::EloqSql ||
+                    table_engine == TableEngine::EloqDoc)
+                {
+                    dest_core = (dest_core + 1) % ccs.core_cnt_;
+                }
+                else
+                {
+                    size_t hash =
+                        ccs.GetCatalogFactory(table_engine)
+                            ->KeyHash(blob.data(), blob_offset, nullptr);
+                    dest_core = hash ? (hash & 0x3FF) % ccs.core_cnt_
+                                     : (dest_core + 1) % ccs.core_cnt_;
+                }
                 ReplayLogCc *cc_req = replay_cc_pool_.NextRequest();
                 replay_cc_list.push_back(cc_req);
                 assert(cc_ng_term_ >= 0);
@@ -5283,7 +5113,8 @@ struct ParseDataLogCc : public CcRequestBase
                     nullptr,
                     nullptr,
                     dest_core,
-                    this);
+                    this,
+                    split_range_info_);
 
                 blob_offset += kv_len;
             }
@@ -5321,6 +5152,8 @@ struct ParseDataLogCc : public CcRequestBase
     std::atomic<uint64_t> *on_fly_cnt_;
     bool *recovery_error_;
     bool is_lock_recovery_;
+    const std::unordered_map<TableName, std::unordered_map<int32_t, uint64_t>>
+        *split_range_info_{nullptr};
 };
 
 struct BroadcastStatisticsCc
@@ -6649,7 +6482,6 @@ struct UpdateKeyCacheCc : public CcRequestBase
     void Reset(const TableName &tbl_name,
                uint32_t ng_id,
                int64_t ng_term,
-               size_t core_cnt,
                const TxKey &start_key,
                const TxKey &end_key,
                StoreRange *range,
@@ -6663,10 +6495,8 @@ struct UpdateKeyCacheCc : public CcRequestBase
         start_key_ = &start_key;
         end_key_ = &end_key;
         store_range_ = range;
-        unfinished_core_ = core_cnt;
         hd_res_ = res;
-        paused_pos_.clear();
-        paused_pos_.resize(core_cnt);
+        paused_pos_ = TxKey();
     }
 
     bool Execute(CcShard &ccs) override
@@ -6674,7 +6504,8 @@ struct UpdateKeyCacheCc : public CcRequestBase
         int64_t ng_term = Sharder::Instance().LeaderTerm(node_group_id_);
         if (ng_term < 0 || ng_term != ng_term_)
         {
-            return SetFinish();
+            SetFinish();
+            return true;
         }
 
         CcMap *ccm = ccs.GetCcm(*table_name_, node_group_id_);
@@ -6683,14 +6514,9 @@ struct UpdateKeyCacheCc : public CcRequestBase
         return ccm->Execute(*this);
     }
 
-    bool SetFinish()
+    void SetFinish()
     {
-        if (unfinished_core_.fetch_sub(1, std::memory_order_acq_rel) == 1)
-        {
-            hd_res_->SetFinished();
-            return true;
-        }
-        return false;
+        hd_res_->SetFinished();
     }
 
     const TableName *table_name_{nullptr};
@@ -6699,8 +6525,7 @@ struct UpdateKeyCacheCc : public CcRequestBase
     const TxKey *start_key_{nullptr};
     const TxKey *end_key_{nullptr};
     StoreRange *store_range_{nullptr};
-    std::vector<TxKey> paused_pos_;
-    std::atomic<size_t> unfinished_core_;
+    TxKey paused_pos_;
     CcHandlerResult<Void> *hd_res_{nullptr};
 };
 
@@ -7714,7 +7539,9 @@ struct CollectMemStatsCc : public CcRequestBase
 
 struct UploadBatchCc : public CcRequestBase
 {
+    // keys, records, commit_ts, rec_status, range_size_flags
     using WriteEntryTuple = std::tuple<const std::string &,
+                                       const std::string &,
                                        const std::string &,
                                        const std::string &,
                                        const std::string &>;
@@ -7731,10 +7558,10 @@ struct UploadBatchCc : public CcRequestBase
     void Reset(const TableName &table_name,
                txservice::NodeGroupId ng_id,
                int64_t &ng_term,
-               size_t core_cnt,
+               int32_t partition_id,
                size_t batch_size,
                size_t start_key_idx,
-               const std::vector<WriteEntry *> &entry_vec,
+               const std::vector<std::pair<uint8_t, WriteEntry *>> &entry_vec,
                bthread::Mutex &req_mux,
                bthread::ConditionVariable &req_cv,
                size_t &finished_req_cnt,
@@ -7745,6 +7572,7 @@ struct UploadBatchCc : public CcRequestBase
         node_group_id_ = ng_id;
         node_group_term_ = &ng_term;
         is_remote_ = false;
+        partition_id_ = partition_id;
         batch_size_ = batch_size;
         start_key_idx_ = start_key_idx;
         entry_vector_ = &entry_vec;
@@ -7752,16 +7580,17 @@ struct UploadBatchCc : public CcRequestBase
         req_cv_ = &req_cv;
         finished_req_cnt_ = &finished_req_cnt;
         req_result_ = &req_result;
-        unfinished_cnt_.store(core_cnt, std::memory_order_relaxed);
+        unfinished_cnt_.store(1, std::memory_order_relaxed);
         err_code_.store(CcErrorCode::NO_ERROR, std::memory_order_relaxed);
         paused_pos_.clear();
-        paused_pos_.resize(core_cnt, {});
+        paused_pos_.resize(1, {});
         data_type_ = data_type;
     }
 
     void Reset(const TableName &table_name,
                txservice::NodeGroupId ng_id,
                int64_t &ng_term,
+               int32_t partition_id,
                size_t core_cnt,
                uint32_t batch_size,
                const WriteEntryTuple &entry_tuple,
@@ -7774,6 +7603,7 @@ struct UploadBatchCc : public CcRequestBase
         node_group_id_ = ng_id;
         node_group_term_ = &ng_term;
         is_remote_ = true;
+        partition_id_ = partition_id;
         batch_size_ = batch_size;
         start_key_idx_ = 0;
         entry_tuples_ = &entry_tuple;
@@ -7916,7 +7746,12 @@ struct UploadBatchCc : public CcRequestBase
         return batch_size_;
     }
 
-    const std::vector<WriteEntry *> *EntryVector() const
+    int32_t PartitionId() const
+    {
+        return partition_id_;
+    }
+
+    const std::vector<std::pair<uint8_t, WriteEntry *>> *EntryVector() const
     {
         return is_remote_ ? nullptr : entry_vector_;
     }
@@ -7931,19 +7766,23 @@ struct UploadBatchCc : public CcRequestBase
                            size_t key_off,
                            size_t rec_off,
                            size_t ts_off,
-                           size_t status_off)
+                           size_t status_off,
+                           size_t flags_off)
     {
+        core_id = partition_id_ >= 0 ? 0 : core_id;
         auto &key_pos = paused_pos_.at(core_id);
         std::get<0>(key_pos) = key_index;
         std::get<1>(key_pos) = key_off;
         std::get<2>(key_pos) = rec_off;
         std::get<3>(key_pos) = ts_off;
         std::get<4>(key_pos) = status_off;
+        std::get<5>(key_pos) = flags_off;
     }
 
-    const std::tuple<size_t, size_t, size_t, size_t, size_t> &GetPausedPosition(
-        uint16_t core_id) const
+    const std::tuple<size_t, size_t, size_t, size_t, size_t, size_t> &
+    GetPausedPosition(uint16_t core_id) const
     {
+        core_id = partition_id_ >= 0 ? 0 : core_id;
         return paused_pos_.at(core_id);
     }
 
@@ -7967,12 +7806,14 @@ struct UploadBatchCc : public CcRequestBase
     uint32_t node_group_id_{0};
     int64_t *node_group_term_{nullptr};
     bool is_remote_{false};
+    // -1 means broadcast to all shards(used by hash partition)
+    int32_t partition_id_{-1};
     uint32_t batch_size_{0};
     size_t start_key_idx_{0};
     union
     {
-        // for local request
-        const std::vector<WriteEntry *> *entry_vector_;
+        // for local request: (range_size_flags, WriteEntry*)
+        const std::vector<std::pair<uint8_t, WriteEntry *>> *entry_vector_;
         // for remote request
         const WriteEntryTuple *entry_tuples_;
     };
@@ -7984,8 +7825,10 @@ struct UploadBatchCc : public CcRequestBase
     // This two variables may be accessed by multi-cores.
     std::atomic<size_t> unfinished_cnt_{0};
     std::atomic<CcErrorCode> err_code_{CcErrorCode::NO_ERROR};
-    // key index, key offset, record offset, ts offset, record status offset
-    std::vector<std::tuple<size_t, size_t, size_t, size_t, size_t>> paused_pos_;
+    // key index, key offset, record offset, ts offset, record status offset,
+    // range_size_flags offset
+    std::vector<std::tuple<size_t, size_t, size_t, size_t, size_t, size_t>>
+        paused_pos_;
 
     UploadBatchType data_type_{UploadBatchType::SkIndexData};
 };
@@ -8270,25 +8113,19 @@ struct UploadBatchSlicesCc : public CcRequestBase
     void Reset(const TableName &table_name,
                txservice::NodeGroupId ng_id,
                int64_t &ng_term,
-               size_t core_cnt,
                const WriteEntryTuple &entry_tuple,
                std::shared_ptr<SliceUpdation> slice_info)
     {
         table_name_ = &table_name;
         node_group_id_ = ng_id;
         node_group_term_ = &ng_term;
-        core_cnt_ = core_cnt;
-        partitioned_slice_data_.resize(core_cnt);
-        next_idxs_.resize(core_cnt);
-        for (size_t i = 0; i < core_cnt; i++)
-        {
-            next_idxs_[i] = 0;
-        }
+        slice_data_.clear();
+        next_idx_ = 0;
 
         entry_tuples_ = &entry_tuple;
         slices_info_ = slice_info;
 
-        unfinished_cnt_ = core_cnt;
+        finished_ = false;
         err_code_ = CcErrorCode::NO_ERROR;
     }
 
@@ -8354,14 +8191,12 @@ struct UploadBatchSlicesCc : public CcRequestBase
     std::pair<bool, std::shared_ptr<SliceUpdation>> SetFinish()
     {
         std::unique_lock<bthread::Mutex> req_lk(req_mux_);
-        if (--unfinished_cnt_ == 0)
-        {
-            // Make a copy of slices_info_ to avoid race condition.
-            std::shared_ptr<SliceUpdation> slices_info = slices_info_;
-            req_cv_.notify_one();
-            return {true, std::move(slices_info)};
-        }
-        return {false, nullptr};
+        finished_ = true;
+
+        // Make a copy of slices_info_ to avoid race condition.
+        std::shared_ptr<SliceUpdation> slices_info = slices_info_;
+        req_cv_.notify_one();
+        return {true, std::move(slices_info)};
     }
 
     bool SetError(CcErrorCode err_code)
@@ -8371,13 +8206,9 @@ struct UploadBatchSlicesCc : public CcRequestBase
         {
             err_code_ = err_code;
         }
-        if (--unfinished_cnt_ == 0)
-        {
-            req_cv_.notify_one();
-
-            return true;
-        }
-        return false;
+        finished_ = true;
+        req_cv_.notify_one();
+        return true;
     }
 
     void AbortCcRequest(CcErrorCode err_code) override
@@ -8394,7 +8225,7 @@ struct UploadBatchSlicesCc : public CcRequestBase
     void Wait()
     {
         std::unique_lock<bthread::Mutex> lk(req_mux_);
-        while (unfinished_cnt_ != 0)
+        while (!finished_)
         {
             req_cv_.wait(lk);
         }
@@ -8457,7 +8288,7 @@ struct UploadBatchSlicesCc : public CcRequestBase
     }
     void SetParsed()
     {
-        parsed_.store(true, std::memory_order_release);
+        parsed_ = true;
     }
 
     void AddDataItem(TxKey key,
@@ -8465,34 +8296,26 @@ struct UploadBatchSlicesCc : public CcRequestBase
                      uint64_t version_ts,
                      bool is_deleted)
     {
-        size_t hash = key.Hash();
-        // Uses the lower 10 bits of the hash code to shard the key across
-        // CPU cores at this node.
-        uint16_t core_code = hash & 0x3FF;
-        uint16_t core_id = core_code % core_cnt_;
-
-        partitioned_slice_data_[core_id].emplace_back(
+        slice_data_.emplace_back(
             std::move(key), std::move(record), version_ts, is_deleted);
     }
 
-    size_t NextIndex(size_t core_idx) const
+    size_t NextIndex() const
     {
-        size_t next_idx = next_idxs_[core_idx];
-        assert(next_idx <= partitioned_slice_data_[core_idx].size());
-        return next_idx;
+        assert(next_idx_ <= slice_data_.size());
+        return next_idx_;
     }
 
-    void SetNextIndex(size_t core_idx, size_t index)
+    void SetNextIndex(size_t index)
     {
-        assert(index <= partitioned_slice_data_[core_idx].size());
-        next_idxs_[core_idx] = index;
+        assert(index <= slice_data_.size());
+        next_idx_ = index;
     }
 
     // Notice: these data items belong to multi slices.
-    std::deque<SliceDataItem> &SliceData(uint16_t core_id)
+    std::deque<SliceDataItem> &SliceData()
     {
-        assert(core_id < partitioned_slice_data_.size());
-        return partitioned_slice_data_[core_id];
+        return slice_data_;
     }
 
     bool AbortIfOom() const override
@@ -8501,7 +8324,6 @@ struct UploadBatchSlicesCc : public CcRequestBase
     }
 
 private:
-    uint16_t core_cnt_;
     const TableName *table_name_{nullptr};
     uint32_t node_group_id_{0};
     int64_t *node_group_term_{nullptr};
@@ -8514,17 +8336,16 @@ struct UploadBatchSlicesCc : public CcRequestBase
     // key offset, record offset, ts offset, record status offset
     // when parse items
     std::tuple<size_t, size_t, size_t, size_t> parse_offset_{0, 0, 0, 0};
-    // parse items on one core, then put the req to other cores.
-    std::atomic_bool parsed_{false};
+    bool parsed_{false};
 
-    std::vector<std::deque<SliceDataItem>> partitioned_slice_data_;
+    std::deque<SliceDataItem> slice_data_;
     // pause position when emplace keys into ccmap in batches
-    std::vector<size_t> next_idxs_;
+    size_t next_idx_;
 
     bthread::Mutex req_mux_{};
     bthread::ConditionVariable req_cv_{};
     // This two variables may be accessed by multi-cores.
-    size_t unfinished_cnt_{0};
+    bool finished_{false};
     CcErrorCode err_code_{CcErrorCode::NO_ERROR};
 };
 
@@ -8747,7 +8568,6 @@ struct ScanSliceDeltaSizeCcForRangePartition : public CcRequestBase
                                           uint64_t scan_ts,
                                           uint64_t ng_id,
                                           int64_t ng_term,
-                                          uint64_t core_cnt,
                                           uint64_t txn,
                                           const TxKey &target_start_key,
                                           const TxKey &target_end_key,
@@ -8764,20 +8584,14 @@ struct ScanSliceDeltaSizeCcForRangePartition : public CcRequestBase
           store_range_(store_range),
           is_dirty_(is_dirty),
           has_dml_since_ddl_(false),
-          unfinished_cnt_(core_cnt),
+          finished_(false),
           schema_version_(schema_version)
     {
         tx_number_ = txn;
-        pause_pos_.resize(core_cnt);
+        pause_pos_.first = std::move(TxKey());
+        pause_pos_.second = nullptr;
         size_t slice_cnt = store_range ? store_range->SlicesCount() : 0;
-        for (size_t i = 0; i < core_cnt; ++i)
-        {
-            slice_delta_size_.emplace_back();
-            if (slice_cnt > 0)
-            {
-                slice_delta_size_.back().reserve(slice_cnt);
-            }
-        }
+        slice_delta_size_.reserve(slice_cnt);
     }
 
     bool ValidTermCheck() const
@@ -8820,26 +8634,22 @@ struct ScanSliceDeltaSizeCcForRangePartition : public CcRequestBase
     void Wait()
     {
         std::unique_lock<std::mutex> lk(mux_);
-        cv_.wait(lk, [this] { return unfinished_cnt_ == 0; });
+        cv_.wait(lk, [this] { return finished_; });
     }
 
     void SetFinish()
     {
         std::unique_lock<std::mutex> lk(mux_);
-        if (--unfinished_cnt_ == 0)
-        {
-            cv_.notify_one();
-        }
+        finished_ = true;
+        cv_.notify_one();
     }
 
     void SetError(CcErrorCode err)
     {
         std::unique_lock<std::mutex> lk(mux_);
         err_ = err;
-        if (--unfinished_cnt_ == 0)
-        {
-            cv_.notify_one();
-        }
+        finished_ = true;
+        cv_.notify_one();
     }
 
     bool IsError()
@@ -8901,18 +8711,18 @@ struct ScanSliceDeltaSizeCcForRangePartition : public CcRequestBase
         assert(store_range);
         bool res = store_range_.compare_exchange_strong(
             expect, store_range, std::memory_order_acq_rel);
-        slice_delta_size_[core_id].reserve(store_range->SlicesCount());
+        slice_delta_size_.reserve(store_range->SlicesCount());
         return res;
     }
 
-    std::pair<TxKey, StoreSlice *> &PausedPos(size_t core_id)
+    std::pair<TxKey, StoreSlice *> &PausedPos()
     {
-        return pause_pos_[core_id];
+        return pause_pos_;
     }
 
-    std::vector<std::pair<TxKey, int64_t>> &SliceDeltaSize(size_t core_id)
+    std::vector<std::pair<TxKey, int64_t>> &SliceDeltaSize()
     {
-        return slice_delta_size_[core_id];
+        return slice_delta_size_;
     }
 
     bool IsDirty() const
@@ -8956,10 +8766,10 @@ struct ScanSliceDeltaSizeCcForRangePartition : public CcRequestBase
     // pause_pos_.first is the key that we stopped at (has not been scanned
     // though), .second is the slice that we stopped in (has not been scanned
     // completed yet).
-    std::vector<std::pair<TxKey, StoreSlice *>> pause_pos_;
+    std::pair<TxKey, StoreSlice *> pause_pos_;
     // The delta size of the slices. First is the TxKey of the slice, second is
     // the delta size. The TxKey is not the owner of the key.
-    std::vector<std::vector<std::pair<TxKey, int64_t>>> slice_delta_size_;
+    std::vector<std::pair<TxKey, int64_t>> slice_delta_size_;
 
     // Generally, if the size of a key in the data store is unknown (the
     // data_store_size_ is INT32_MAX), we need to read the storage (via
@@ -8977,7 +8787,7 @@ struct ScanSliceDeltaSizeCcForRangePartition : public CcRequestBase
     std::atomic<bool> has_dml_since_ddl_{false};
 
     CcErrorCode err_{CcErrorCode::NO_ERROR};
-    uint32_t unfinished_cnt_;
+    bool finished_{false};
     uint64_t schema_version_;
     std::mutex mux_;
     std::condition_variable cv_;
diff --git a/tx_service/include/cc/cc_shard.h b/tx_service/include/cc/cc_shard.h
index 09e4081d..c1554381 100644
--- a/tx_service/include/cc/cc_shard.h
+++ b/tx_service/include/cc/cc_shard.h
@@ -315,6 +315,11 @@ class CcShard
      */
     CcMap *GetCcm(const TableName &table_name, uint32_t node_group);
 
+    void FetchTableRangeSize(const TableName &table_name,
+                             int32_t partition_id,
+                             NodeGroupId cc_ng_id,
+                             int64_t cc_ng_term);
+
     void AdjustDataKeyStats(const TableName &table_name,
                             int64_t size_delta,
                             int64_t dirty_delta);
@@ -1138,6 +1143,10 @@ class CcShard
         }
     }
 
+    void ResetRangeSplittingStatus(const TableName &table_name,
+                                   uint32_t ng_id,
+                                   uint32_t range_id);
+
     FillStoreSliceCc *NewFillStoreSliceCc()
     {
         return fill_store_slice_cc_pool_.NextRequest();
@@ -1156,6 +1165,12 @@ class CcShard
 
     void DeleteSchemaCntl(const TableName &tbl_name);
 
+    void CreateSplitRangeDataSyncTask(const TableName &table_name,
+                                      uint32_t ng_id,
+                                      int64_t ng_term,
+                                      int32_t range_id,
+                                      uint64_t data_sync_ts);
+
     void ClearNativeSchemaCntl();
     void CollectCacheHit();
     void CollectCacheMiss();
@@ -1222,6 +1237,7 @@ class CcShard
 
     CcRequestPool<FillStoreSliceCc> fill_store_slice_cc_pool_;
     CcRequestPool<InitKeyCacheCc> init_key_cache_cc_pool_;
+    CcRequestPool<FetchTableRangeSizeCc> fetch_range_size_cc_pool_;
 
     // CcRequest queue on this shard/core.
     moodycamel::ConcurrentQueue<CcRequestBase *> cc_queue_;
diff --git a/tx_service/include/cc/ccm_scanner.h b/tx_service/include/cc/ccm_scanner.h
index 7de8dbb2..96c5d898 100644
--- a/tx_service/include/cc/ccm_scanner.h
+++ b/tx_service/include/cc/ccm_scanner.h
@@ -424,7 +424,6 @@ class CcScanner
         return TxKey();
     }
 
-    virtual void ResetShards(size_t shard_cnt) = 0;
     virtual void ResetCaches() = 0;
     virtual void Reset(const KeySchema *key_schema) = 0;
     virtual void Close() = 0;
@@ -466,16 +465,6 @@ class CcScanner
 
     virtual uint32_t ShardCount() const = 0;
 
-    virtual void CommitAtCore(uint16_t core_id)
-    {
-        assert(false);
-    }
-
-    virtual void FinalizeCommit()
-    {
-        assert(false);
-    }
-
     ScanDirection Direction() const
     {
         return direct_;
@@ -841,12 +830,6 @@ class HashParitionCcScanner : public CcScanner
     {
     }
 
-    void ResetShards(size_t shard_cnt) override
-    {
-        assert(false &&
-               "ResetShards is designed for RangePartitionedCcmScanner.");
-    }
-
     void ResetCaches() override
     {
         for (auto &[shard_code, cache] : shard_caches_)
@@ -1199,7 +1182,9 @@ class RangePartitionedCcmScanner : public CcScanner
     RangePartitionedCcmScanner(ScanDirection direct,
                                ScanIndexType index_type,
                                const KeySchema *schema)
-        : CcScanner(direct, index_type), scans_(), key_schema_(schema)
+        : CcScanner(direct, index_type),
+          scan_cache_(this, schema),
+          key_schema_(schema)
     {
     }
 
@@ -1207,113 +1192,59 @@ class RangePartitionedCcmScanner : public CcScanner
     {
     }
 
-    void ResetShards(size_t shard_cnt) override
-    {
-        size_t old_size = scans_.size();
-        if (shard_cnt > old_size)
-        {
-            scans_.reserve(shard_cnt);
-            index_chain_.reserve(shard_cnt);
-            for (size_t idx = old_size; idx < shard_cnt; ++idx)
-            {
-                scans_.emplace_back(this, key_schema_);
-                index_chain_.emplace_back();
-            }
-        }
-        else if (shard_cnt < old_size)
-        {
-            for (size_t idx = shard_cnt; idx < old_size; ++idx)
-            {
-                scans_.pop_back();
-            }
-            index_chain_.resize(shard_cnt);
-        }
-
-        assert(scans_.size() == shard_cnt);
-
-        for (size_t idx = 0; idx < old_size && idx < shard_cnt; ++idx)
-        {
-            scans_[idx].Reset();
-            index_chain_[idx].clear();
-        }
-
-        std::unique_lock<std::mutex> lk(mux_);
-        head_index_ = Inf();
-        head_occupied_ = false;
-    }
-
     void ResetCaches() override
     {
-        for (size_t core_id = 0; core_id < scans_.size(); ++core_id)
-        {
-            scans_[core_id].Reset();
-            index_chain_[core_id].clear();
-        }
-
-        head_index_ = Inf();
-        head_occupied_ = false;
+        scan_cache_.Reset();
     }
 
     ScanCache *Cache(uint32_t shard_code) override
     {
-        // For RangePartitionedCcmScanner, shard_code is core_id.
-        return &scans_[shard_code];
+        (void) shard_code;
+        return &scan_cache_;
     }
 
     void ShardCacheSizes(std::vector<std::pair<uint32_t, size_t>>
                              *shard_code_and_sizes) const override
     {
-        for (size_t core_id = 0; core_id < scans_.size(); ++core_id)
-        {
-            shard_code_and_sizes->emplace_back(core_id, scans_[core_id].Size());
-        }
+        shard_code_and_sizes->emplace_back(0u, scan_cache_.Size());
     }
 
     void MemoryShardCacheLastTuples(
         std::vector<const ScanTuple *> *last_tuples) const override
     {
-        last_tuples->reserve(scans_.size());
-        for (size_t core_id = 0; core_id < scans_.size(); ++core_id)
-        {
-            last_tuples->emplace_back(scans_[core_id].LastTuple());
-        }
+        last_tuples->emplace_back(scan_cache_.LastTuple());
     }
 
     void MemoryShardCacheTrailingTuples(
         std::vector<const ScanTuple *> *trailing_tuples) const override
     {
-        for (size_t core_id = 0; core_id < scans_.size(); ++core_id)
-        {
-            scans_[core_id].TrailingTuples(*trailing_tuples);
-        }
+        scan_cache_.TrailingTuples(*trailing_tuples);
     }
 
     const ScanTuple *Current() override
     {
-        if (head_index_ == Inf())
+        if (status_ != ScannerStatus::Open)
         {
-            status_ = ScannerStatus::Blocked;
             return nullptr;
         }
-        else
+
+        const TemplateScanTuple<KeyT, ValueT> *tuple = scan_cache_.Current();
+        if (tuple == nullptr)
         {
-            assert(status_ == ScannerStatus::Open);
-            return At(head_index_);
+            status_ = ScannerStatus::Blocked;
         }
+
+        return tuple;
     }
 
     void MoveNext() override
     {
-        if (head_index_ == Inf())
+        if (status_ != ScannerStatus::Open)
         {
             return;
         }
 
-        head_index_ = AdvanceMergeIndex(head_index_);
-        if (head_index_ == Inf())
-        {
-            status_ = ScannerStatus::Blocked;
-        }
+        scan_cache_.MoveNext();
     }
 
     CcmScannerType Type() const override
@@ -1342,7 +1273,7 @@ class RangePartitionedCcmScanner : public CcScanner
 
     uint32_t ShardCount() const override
     {
-        return scans_.size();
+        return 1;
     }
 
     void Reset(const KeySchema *key_schema) override
@@ -1354,289 +1285,11 @@ class RangePartitionedCcmScanner : public CcScanner
     void Close() override
     {
         status_ = ScannerStatus::Closed;
-        scans_.clear();
-        index_chain_.clear();
-        head_index_ = Inf();
-        head_occupied_ = false;
-    }
-
-    /**
-     * @brief Commits the scan at the specified core.
-     *
-     * @param core_id
-     */
-    void CommitAtCore(uint16_t core_id) override
-    {
-        size_t sz = scans_[core_id].Size();
-        if (sz > 0)
-        {
-            std::vector<CompoundIndex> &next_chain = index_chain_[core_id];
-            assert(next_chain.empty());
-            next_chain.reserve(sz);
-
-            for (uint32_t idx = 0; idx < sz - 1; ++idx)
-            {
-                next_chain.emplace_back(core_id, idx + 1);
-            }
-            // The next index of the last tuple is infinity.
-            next_chain.emplace_back(Inf());
-            assert(next_chain.size() == sz);
-
-            if (is_require_sort_)
-            {
-                CompoundIndex head_index(core_id, 0);
-                MergeCompoundIndex(head_index);
-            }
-            else
-            {
-                // Concat. Delay concat to FinalizeCommit() to avoid lock.
-            }
-        }
-    }
-
-    void FinalizeCommit() override
-    {
-        if (is_require_sort_)
-        {
-            // Already sorted by CommitAtCore().
-        }
-        else
-        {
-            ConcatAll();
-        }
+        scan_cache_.Reset();
     }
 
 private:
-    struct CompoundIndex
-    {
-    public:
-        CompoundIndex() : index_(UINT32_MAX)
-        {
-        }
-
-        CompoundIndex(uint16_t core_id, uint32_t offset)
-        {
-            index_ = (offset << 10) | core_id;
-        }
-
-        friend bool operator==(const CompoundIndex &lhs,
-                               const CompoundIndex &rhs)
-        {
-            return lhs.index_ == rhs.index_;
-        }
-
-        friend bool operator!=(const CompoundIndex &lhs,
-                               const CompoundIndex &rhs)
-        {
-            return !(lhs == rhs);
-        }
-
-        uint16_t CoreId() const
-        {
-            return index_ & 0x3FF;
-        }
-
-        uint32_t Offset() const
-        {
-            return index_ >> 10;
-        }
-
-    private:
-        /**
-         * @brief The lower 10 bits represent the core ID. The remaining higher
-         * bits represent the offset in the scan result vector.
-         *
-         */
-        uint32_t index_;
-    };
-
-    const CompoundIndex &Inf() const
-    {
-        static CompoundIndex inf;
-        return inf;
-    }
-
-    void MergeCompoundIndex(CompoundIndex head)
-    {
-        std::unique_lock<std::mutex> lk(mux_);
-        if (!head_occupied_)
-        {
-            // The head is empty. There is nothing to merge. Sets the head to
-            // the input scan list's head.
-            head_index_ = head;
-            head_occupied_ = true;
-        }
-        else if (head != Inf())
-        {
-            // Merges the input scan list with the list pointed by the head.
-            if (head_index_ == Inf())
-            {
-                head_index_ = head;
-                return;
-            }
-            CompoundIndex curr_head = head_index_;
-            head_occupied_ = false;
-
-            lk.unlock();
-            MergeCompoundIndex(head, curr_head);
-        }
-    }
-
-    void MergeCompoundIndex(CompoundIndex left, CompoundIndex right)
-    {
-        CompoundIndex merge_head;
-        CompoundIndex prev_index;
-
-        if (left == Inf())
-        {
-            // The left is empty.
-            return MergeCompoundIndex(right);
-        }
-        else if (right == Inf())
-        {
-            // The right is empty.
-            return MergeCompoundIndex(left);
-        }
-
-        const TemplateScanTuple<KeyT, ValueT> *left_tuple = At(left);
-        const TemplateScanTuple<KeyT, ValueT> *right_tuple = At(right);
-
-        if (IsForward)
-        {
-            if (left_tuple->KeyObj() < right_tuple->KeyObj())
-            {
-                merge_head = left;
-                prev_index = left;
-                left = AdvanceMergeIndex(left);
-            }
-            else
-            {
-                merge_head = right;
-                prev_index = right;
-                right = AdvanceMergeIndex(right);
-            }
-
-            while (left != Inf() && right != Inf())
-            {
-                left_tuple = At(left);
-                right_tuple = At(right);
-
-                if (left_tuple->KeyObj() < right_tuple->KeyObj())
-                {
-                    UpdateNextIndex(prev_index, left);
-                    prev_index = left;
-                    left = AdvanceMergeIndex(left);
-                }
-                else
-                {
-                    UpdateNextIndex(prev_index, right);
-                    prev_index = right;
-                    right = AdvanceMergeIndex(right);
-                }
-            }
-        }
-        else
-        {
-            if (left_tuple->KeyObj() < right_tuple->KeyObj())
-            {
-                merge_head = right;
-                prev_index = right;
-                right = AdvanceMergeIndex(right);
-            }
-            else
-            {
-                merge_head = left;
-                prev_index = left;
-                left = AdvanceMergeIndex(left);
-            }
-
-            while (left != Inf() && right != Inf())
-            {
-                left_tuple = At(left);
-                right_tuple = At(right);
-
-                if (left_tuple->KeyObj() < right_tuple->KeyObj())
-                {
-                    UpdateNextIndex(prev_index, right);
-                    prev_index = right;
-                    right = AdvanceMergeIndex(right);
-                }
-                else
-                {
-                    UpdateNextIndex(prev_index, left);
-                    prev_index = left;
-                    left = AdvanceMergeIndex(left);
-                }
-            }
-        }
-
-        if (left != Inf())
-        {
-            UpdateNextIndex(prev_index, left);
-        }
-
-        if (right != Inf())
-        {
-            UpdateNextIndex(prev_index, right);
-        }
-
-        MergeCompoundIndex(merge_head);
-    }
-
-    /**
-     * @brief Concat all chains at last finished core to avoid lock.
-     */
-    void ConcatAll()
-    {
-        assert(head_index_ == Inf());
-        for (uint16_t core_id = 0; core_id < index_chain_.size(); ++core_id)
-        {
-            std::vector<CompoundIndex> &chain = index_chain_[core_id];
-            if (!chain.empty())
-            {
-                ConcatLockFree(core_id, chain);
-            }
-        }
-    }
-
-    void ConcatLockFree(uint16_t core_id, std::vector<CompoundIndex> &chain)
-    {
-        chain.back() = head_index_;
-        head_index_ = {core_id, 0};
-    }
-
-    CompoundIndex AdvanceMergeIndex(CompoundIndex index)
-    {
-        assert(index.CoreId() < index_chain_.size());
-        assert(index.Offset() < index_chain_[index.CoreId()].size());
-
-        return index_chain_[index.CoreId()][index.Offset()];
-    }
-
-    const TemplateScanTuple<KeyT, ValueT> *At(CompoundIndex index) const
-    {
-        assert(index.CoreId() < scans_.size());
-        assert(index.Offset() < scans_[index.CoreId()].Size());
-
-        return scans_[index.CoreId()].At(index.Offset());
-    }
-
-    void UpdateNextIndex(CompoundIndex prev_index, CompoundIndex index)
-    {
-        assert(prev_index.CoreId() < index_chain_.size());
-        assert(prev_index.Offset() < index_chain_[prev_index.CoreId()].size());
-
-        index_chain_[prev_index.CoreId()][prev_index.Offset()] = index;
-    }
-
-    // Scan caches of the target node group. Its size is core count of the
-    // target node.
-    std::vector<TemplateScanCache<KeyT, ValueT>> scans_;
-    std::vector<std::vector<CompoundIndex>> index_chain_;
-    std::mutex mux_;
-    bool head_occupied_{false};
-    CompoundIndex head_index_{Inf()};
-
+    TemplateScanCache<KeyT, ValueT> scan_cache_;
     const KeySchema *key_schema_;
     /**
      * @brief The term of the cc node group where the range partition resides.
diff --git a/tx_service/include/cc/local_cc_handler.h b/tx_service/include/cc/local_cc_handler.h
index eae6ba46..8e0fb115 100644
--- a/tx_service/include/cc/local_cc_handler.h
+++ b/tx_service/include/cc/local_cc_handler.h
@@ -103,7 +103,9 @@ class LocalCcHandler : public CcHandler
                           const TxRecord *record,
                           OperationType operation_type,
                           uint32_t key_shard_code,
-                          CcHandlerResult<PostProcessResult> &hres) override;
+                          CcHandlerResult<PostProcessResult> &hres,
+                          int32_t partition_id = -1,
+                          bool on_dirty_range = false) override;
 
     CcReqStatus PostRead(
         uint64_t tx_number,
diff --git a/tx_service/include/cc/local_cc_shards.h b/tx_service/include/cc/local_cc_shards.h
index 961bee52..870eb7a0 100644
--- a/tx_service/include/cc/local_cc_shards.h
+++ b/tx_service/include/cc/local_cc_shards.h
@@ -1129,7 +1129,6 @@ class LocalCcShards
     template <typename KeyT>
     RangeSliceOpStatus AddKeyToKeyCache(const TableName &table_name,
                                         NodeGroupId cc_ng_id,
-                                        uint16_t core_id,
                                         const KeyT &key)
     {
         std::shared_lock<std::shared_mutex> lk(meta_data_mux_);
@@ -1156,7 +1155,7 @@ class LocalCcShards
             return RangeSliceOpStatus::Error;
         }
         store_range->UpdateLastAccessedTs(ClockTs());
-        return store_range->AddKey(key, core_id);
+        return store_range->AddKey(key);
     }
 
     template <typename KeyT>
@@ -1757,6 +1756,12 @@ class LocalCcShards
         uint64_t txn,
         CcHandlerResult<Void> *hres);
 
+    void CreateSplitRangeDataSyncTask(const TableName &table_name,
+                                      uint32_t ng_id,
+                                      int64_t ng_term,
+                                      int32_t range_id,
+                                      uint64_t data_sync_ts);
+
     std::pair<TableRangeEntry *, StoreRange *> PinStoreRange(
         const TableName &table_name,
         const NodeGroupId ng_id,
@@ -1913,7 +1918,8 @@ class LocalCcShards
                                   bool can_be_skipped,
                                   uint64_t &last_sync_ts,
                                   std::shared_ptr<DataSyncStatus> status,
-                                  CcHandlerResult<Void> *hres);
+                                  CcHandlerResult<Void> *hres,
+                                  bool high_priority = false);
     bool EnqueueDataSyncTaskToCore(
         const TableName &table_name,
         uint32_t ng_id,
@@ -2120,7 +2126,6 @@ class LocalCcShards
                                    .GetLocalCcShards()
                                    ->GetRangeOwner(new_range_id_, ng_id_)
                                    ->BucketOwner();
-            assert(new_range_owner_ != ng_id_);
 
             dest_node_id_ = Sharder::Instance().LeaderNodeId(new_range_owner_);
             channel_ =
@@ -2303,7 +2308,7 @@ class LocalCcShards
     {
         // `0` means no pending task
         uint64_t latest_pending_task_ts_{0};
-        std::queue<std::shared_ptr<DataSyncTask>> pending_tasks_;
+        std::deque<std::shared_ptr<DataSyncTask>> pending_tasks_;
 
         uint64_t UnsetLatestPendingTs()
         {
diff --git a/tx_service/include/cc/object_cc_map.h b/tx_service/include/cc/object_cc_map.h
index a2b31c8e..bbd4d17b 100644
--- a/tx_service/include/cc/object_cc_map.h
+++ b/tx_service/include/cc/object_cc_map.h
@@ -1571,7 +1571,8 @@ class ObjectCcMap : public TemplateCcMap<KeyT, ValueT, false, false>
             next_ts_offset = ts_offset;
             next_status_offset = status_offset;
 
-            auto [key_str, rec_str, ts_str, status_str] = *entry_tuples;
+            auto [key_str, rec_str, ts_str, status_str, flags_str] =
+                *entry_tuples;
             // deserialize key
             decoded_key.Deserialize(
                 key_str.data(), next_key_offset, KeySchema());
@@ -1739,7 +1740,8 @@ class ObjectCcMap : public TemplateCcMap<KeyT, ValueT, false, false>
                                   key_offset,
                                   rec_offset,
                                   ts_offset,
-                                  status_offset);
+                                  status_offset,
+                                  0);
             shard_->Enqueue(shard_->LocalCoreId(), &req);
             return false;
         }
diff --git a/tx_service/include/cc/range_cc_map.h b/tx_service/include/cc/range_cc_map.h
index 29b679a5..d2a39d50 100644
--- a/tx_service/include/cc/range_cc_map.h
+++ b/tx_service/include/cc/range_cc_map.h
@@ -743,7 +743,56 @@ class RangeCcMap : public TemplateCcMap<KeyT, RangeRecord, true, false>
                 // update previous cce's end key
                 cce->SetCommitTsPayloadStatus(new_range_info->version_ts_,
                                               RecordStatus::Normal);
+
+                // Reset new range size on the data table ccmap (emplace if
+                // absent).
+                int32_t new_range_id = new_range_info->PartitionId();
+                NodeGroupId new_range_owner =
+                    shard_->GetRangeOwner(new_range_id, this->cc_ng_id_)
+                        ->BucketOwner();
+                if (new_range_owner == this->cc_ng_id_ &&
+                    static_cast<uint16_t>((new_range_id & 0x3FF) %
+                                          shard_->core_cnt_) ==
+                        shard_->core_id_)
+                {
+                    TableType data_table_type =
+                        TableName::Type(this->table_name_.StringView());
+                    TableName data_table_name(this->table_name_.StringView(),
+                                              data_table_type,
+                                              this->table_name_.Engine());
+                    CcMap *ccm =
+                        shard_->GetCcm(data_table_name, this->cc_ng_id_);
+                    assert(ccm != nullptr);
+                    size_t range_size = new_range_entries.at(idx)
+                                            ->TypedStoreRange()
+                                            ->PostCkptSize();
+                    ccm->InitRangeSize(static_cast<uint32_t>(new_range_id),
+                                       static_cast<int32_t>(range_size),
+                                       true,
+                                       true);
+                }
+            }
+            // Reset old range size on the data table ccmap (no emplace).
+            int32_t old_partition_id =
+                upload_range_rec->GetRangeInfo()->PartitionId();
+            if (range_owner == this->cc_ng_id_ &&
+                static_cast<uint16_t>((old_partition_id & 0x3FF) %
+                                      shard_->core_cnt_) == shard_->core_id_)
+            {
+                TableType data_table_type =
+                    TableName::Type(this->table_name_.StringView());
+                TableName data_table_name(this->table_name_.StringView(),
+                                          data_table_type,
+                                          this->table_name_.Engine());
+                CcMap *ccm = shard_->GetCcm(data_table_name, this->cc_ng_id_);
+                assert(ccm != nullptr);
+                size_t old_range_size =
+                    old_entry->TypedStoreRange()->PostCkptSize();
+                ccm->InitRangeSize(static_cast<uint32_t>(old_partition_id),
+                                   static_cast<int32_t>(old_range_size));
+                ccm->ResetRangeStatus(static_cast<uint32_t>(old_partition_id));
             }
+
             // range_owner_rec_ needs to be reset on each core since they point
             // to bucket records on different cores.
             upload_range_rec->range_owner_rec_ =
@@ -1159,6 +1208,14 @@ class RangeCcMap : public TemplateCcMap<KeyT, RangeRecord, true, false>
             // add new range entry to range cc map
             auto bucket_map = static_cast<RangeBucketCcMap *>(
                 shard_->GetCcm(range_bucket_ccm_name, this->cc_ng_id_));
+            TableType data_table_type =
+                TableName::Type(this->table_name_.StringView());
+            TableName data_table_name(this->table_name_.StringView(),
+                                      data_table_type,
+                                      this->table_name_.Engine());
+            CcMap *data_ccm = shard_->GetCcm(data_table_name, this->cc_ng_id_);
+            assert(data_ccm != nullptr);
+
             for (uint idx = 0; idx < new_range_infos.size(); idx++)
             {
                 const TemplateRangeInfo<KeyT> *new_range_info =
@@ -1181,6 +1238,51 @@ class RangeCcMap : public TemplateCcMap<KeyT, RangeRecord, true, false>
                         new_range_info->PartitionId()));
                 cce->SetCommitTsPayloadStatus(new_range_info->version_ts_,
                                               RecordStatus::Normal);
+
+                // Reset new range size on data table ccmap if this core owns
+                // it.
+                int32_t new_range_id = new_range_info->PartitionId();
+                NodeGroupId new_range_owner =
+                    shard_->GetRangeOwner(new_range_id, this->cc_ng_id_)
+                        ->BucketOwner();
+                if (new_range_owner == this->cc_ng_id_ &&
+                    static_cast<uint16_t>((new_range_id & 0x3FF) %
+                                          shard_->core_cnt_) ==
+                        shard_->core_id_)
+                {
+                    const TableRangeEntry *new_range_entry =
+                        shard_->GetTableRangeEntry(
+                            this->table_name_, this->cc_ng_id_, new_range_id);
+                    assert(new_range_entry != nullptr);
+                    size_t range_size =
+                        static_cast<const TemplateTableRangeEntry<KeyT> *>(
+                            new_range_entry)
+                            ->TypedStoreRange()
+                            ->PostCkptSize();
+                    data_ccm->InitRangeSize(static_cast<uint32_t>(new_range_id),
+                                            static_cast<int32_t>(range_size),
+                                            true,
+                                            true);
+                }
+            }
+
+            // Reset old range size on the data table ccmap if this core owns
+            // it.
+            int32_t old_range_id =
+                old_table_range_entry->GetRangeInfo()->PartitionId();
+            NodeGroupId range_owner =
+                shard_->GetRangeOwner(old_range_id, this->cc_ng_id_)
+                    ->BucketOwner();
+            if (range_owner == this->cc_ng_id_ &&
+                static_cast<uint16_t>((old_range_id & 0x3FF) %
+                                      shard_->core_cnt_) == shard_->core_id_)
+            {
+                size_t old_range_size =
+                    old_table_range_entry->RangeSlices()->PostCkptSize();
+                data_ccm->InitRangeSize(static_cast<uint32_t>(old_range_id),
+                                        static_cast<int32_t>(old_range_size),
+                                        true,
+                                        true);
             }
         }
 
diff --git a/tx_service/include/cc/range_slice.h b/tx_service/include/cc/range_slice.h
index 0291d224..0961534c 100644
--- a/tx_service/include/cc/range_slice.h
+++ b/tx_service/include/cc/range_slice.h
@@ -303,22 +303,12 @@ class StoreSlice
                SliceStatus status,
                bool init_key_cache,
                bool empty_slice)
-        : size_(size),
-          status_(status),
-          fetch_slice_cc_(nullptr),
-          cache_validity_((txservice_enable_key_cache && init_key_cache)
-                              ? Sharder::Instance().GetLocalCcShardsCount()
-                              : 0)
-    {
-        if (empty_slice && !cache_validity_.empty())
+        : size_(size), status_(status), fetch_slice_cc_(nullptr)
+    {
+        if (empty_slice && (txservice_enable_key_cache && init_key_cache))
         {
             // If slice is empty, set the key cache as valid at the start.
-            for (uint16_t i = 0;
-                 i < Sharder::Instance().GetLocalCcShardsCount();
-                 i++)
-            {
-                SetKeyCacheValidity(i, true);
-            }
+            SetKeyCacheValidity(true);
         }
     }
 
@@ -419,42 +409,38 @@ class StoreSlice
         last_load_ts_ = load_ts;
     }
 
-    bool IsValidInKeyCache(uint16_t core_id) const
+    bool IsValidInKeyCache() const
     {
-        assert(!cache_validity_.empty());
-        return cache_validity_[core_id] & 1;
+        return cache_validity_ & 1;
     }
 
-    void SetKeyCacheValidity(uint16_t core_id, bool valid)
+    void SetKeyCacheValidity(bool valid)
     {
-        assert(!cache_validity_.empty());
         if (valid)
         {
-            cache_validity_[core_id] |= 1;
+            cache_validity_ |= 1;
         }
         else
         {
-            cache_validity_[core_id] &= ~(1);
+            cache_validity_ &= ~(1);
         }
     }
 
-    void SetLoadingKeyCache(uint16_t core_id, bool status)
+    void SetLoadingKeyCache(bool status)
     {
-        assert(!cache_validity_.empty());
         if (status)
         {
-            cache_validity_[core_id] |= (1 << 1);
+            cache_validity_ |= (1 << 1);
         }
         else
         {
-            cache_validity_[core_id] &= ~(1 << 1);
+            cache_validity_ &= ~(1 << 1);
         }
     }
 
-    bool IsLoadingKeyCache(uint16_t core_id)
+    bool IsLoadingKeyCache()
     {
-        assert(!cache_validity_.empty());
-        return cache_validity_[core_id] & (1 << 1);
+        return cache_validity_ & (1 << 1);
     }
 
     void InitKeyCache(CcShard *cc_shard,
@@ -508,13 +494,12 @@ class StoreSlice
 
     std::mutex slice_mux_;
 
-    // If this slice is included in the range key filter. Each core should only
-    // access its own bitset, so we do not need mutex protection.
-    // Note that byte is the smallest unit c++ sync across threads. To avoid
-    // data corruption we need at least 1 byte for each core mask.
-    // The first bit implies if the key cache is valid on this core, the second
-    // bit implies if the key cache is being loaded on this core.
-    std::vector<uint8_t> cache_validity_;
+    // If this slice is included in the range key filter. The first bit implies
+    // if the key cache is valid, the second bit implies if the key cache is
+    // being loaded.
+    // All keys in this range are sharding to the same core, so we only need to
+    // maintain one cache validity for this range.
+    uint8_t cache_validity_{0};
 
     friend class StoreRange;
     template <typename KeyT>
@@ -722,10 +707,9 @@ class StoreRange
         return last_accessed_ts_.load(std::memory_order_relaxed);
     }
 
-    std::string KeyCacheInfo(uint16_t core_id) const
+    std::string KeyCacheInfo() const
     {
-        assert(core_id < key_cache_.size());
-        return key_cache_[core_id]->Info();
+        return key_cache_->Info();
     }
 
     void SetHasDmlSinceDdl()
@@ -856,8 +840,9 @@ class StoreRange
     // cache. Removing keys from cache when they are evicted reduces the number
     // of look ups to find the slice of the key since we can evict the keys in
     // batch.
-    std::vector<std::unique_ptr<cuckoofilter::CuckooFilter<size_t, 12>>>
-        key_cache_;
+    // All keys in this range are sharding to the same core, so we only need to
+    // maintain one key cache for this range.
+    std::unique_ptr<cuckoofilter::CuckooFilter<size_t, 12>> key_cache_;
     std::atomic<uint64_t> last_init_key_cache_time_{0};
 
     // This variable is used during the upsert table scheme transaction(such as,
@@ -957,7 +942,7 @@ class TemplateStoreRange : public StoreRange
                                                        slice_end,
                                                        slice_size,
                                                        slice_status,
-                                                       !key_cache_.empty());
+                                                       key_cache_ != nullptr);
 
         slices_.emplace_back(std::move(slice));
 
@@ -970,12 +955,12 @@ class TemplateStoreRange : public StoreRange
             slice_size = slice_keys[idx].size_;
             slice_status = slice_keys[idx].status_;
 
-            slice =
-                std::make_unique<TemplateStoreSlice<KeyT>>(slice_start,
-                                                           slice_end,
-                                                           slice_size,
-                                                           slice_status,
-                                                           !key_cache_.empty());
+            slice = std::make_unique<TemplateStoreSlice<KeyT>>(
+                slice_start,
+                slice_end,
+                slice_size,
+                slice_status,
+                key_cache_ != nullptr);
 
             slices_.emplace_back(std::move(slice));
 
@@ -1063,25 +1048,24 @@ class TemplateStoreRange : public StoreRange
         return slices_;
     }
 
-    void InvalidateKeyCache(uint16_t core_id)
+    void InvalidateKeyCache()
     {
-        if (key_cache_.empty())
+        if (key_cache_ == nullptr)
         {
             return;
         }
         LOG(INFO) << "Invalidate key cache of range " << partition_id_
-                  << " on core " << core_id << " due to collision";
+                  << " due to collision";
         std::shared_lock<std::shared_mutex> s_lk(mux_);
         // shared lock to avoid slice split
         for (auto &slice : slices_)
         {
-            slice->SetKeyCacheValidity(core_id, false);
+            slice->SetKeyCacheValidity(false);
         }
         // Create a larger key cache if the old one cannot hold enough keys.
-        size_t last_key_cache_size = key_cache_[core_id]->Size();
-        key_cache_[core_id] =
-            std::make_unique<cuckoofilter::CuckooFilter<size_t, 12>>(
-                last_key_cache_size * 1.2);
+        size_t last_key_cache_size = key_cache_->Size();
+        key_cache_ = std::make_unique<cuckoofilter::CuckooFilter<size_t, 12>>(
+            last_key_cache_size * 1.2);
     }
     /**
      * @brief Split the range with new_end. new_end will be the new
@@ -1212,7 +1196,7 @@ class TemplateStoreRange : public StoreRange
         }
         CODE_FAULT_INJECTOR("PinSlices_Fail", {
             LOG(INFO) << "FaultInject  PinSlices_Fail, " << check_key_cache
-                      << ", is valid " << slice->IsValidInKeyCache(shard_id);
+                      << ", is valid " << slice->IsValidInKeyCache();
             if (slice->status_ == SliceStatus::FullyCached)
             {
                 slice->status_ = SliceStatus::PartiallyCached;
@@ -1305,9 +1289,9 @@ class TemplateStoreRange : public StoreRange
         else if (check_key_cache)
         {
             assert(to_prefetch == false);
-            if (slice->IsValidInKeyCache(shard_id))
+            if (slice->IsValidInKeyCache())
             {
-                bool found = ContainsKey(search_key, shard_id);
+                bool found = ContainsKey(search_key);
                 if (!found)
                 {
                     // If the key is not found in range, directly return and
@@ -1318,7 +1302,7 @@ class TemplateStoreRange : public StoreRange
                 // If key is found in range key cache, the key must exist in kv
                 // store. Load slice from kv to get the value.
             }
-            else if (!slice->IsLoadingKeyCache(shard_id))
+            else if (!slice->IsLoadingKeyCache())
             {
                 // If this slice can use key cache but the key cache is not
                 // intialized, always load slice from kv to initialize the key
@@ -1628,17 +1612,16 @@ class TemplateStoreRange : public StoreRange
         return true;
     }
 
-    void DeleteKey(const KeyT &key, uint16_t core_id, StoreSlice *slice)
+    void DeleteKey(const KeyT &key, StoreSlice *slice)
     {
         if (slice == nullptr)
         {
             TxKey search_key(&key);
             slice = FindSlice(search_key);
         }
-        if (slice->IsValidInKeyCache(core_id))
+        if (slice->IsValidInKeyCache())
         {
-            cuckoofilter::Status status =
-                key_cache_[core_id]->Delete(key.Hash());
+            cuckoofilter::Status status = key_cache_->Delete(key.Hash());
             // We should not try to delete a non-existing key.
             if (status == cuckoofilter::Status::NotFound)
             {
@@ -1651,9 +1634,9 @@ class TemplateStoreRange : public StoreRange
     }
 
     // NOTE: The slice to which the @@key belong must be valid in key cache.
-    void DeleteKey(const KeyT &key, uint16_t core_id)
+    void DeleteKey(const KeyT &key)
     {
-        cuckoofilter::Status status = key_cache_[core_id]->Delete(key.Hash());
+        cuckoofilter::Status status = key_cache_->Delete(key.Hash());
         // We should not try to delete a non-existing key.
         if (status == cuckoofilter::Status::NotFound)
         {
@@ -1663,7 +1646,6 @@ class TemplateStoreRange : public StoreRange
     }
 
     RangeSliceOpStatus AddKey(const KeyT &key,
-                              uint16_t core_id,
                               StoreSlice *slice = nullptr,
                               bool init = false)
     {
@@ -1673,10 +1655,10 @@ class TemplateStoreRange : public StoreRange
             TxKey search_key(&key);
             slice = FindSlice(search_key);
         }
-        if (init || slice->IsValidInKeyCache(core_id))
+        if (init || slice->IsValidInKeyCache())
         {
-            assert(init || !slice->IsLoadingKeyCache(core_id));
-            cuckoofilter::Status status = key_cache_[core_id]->Add(key.Hash());
+            assert(init || !slice->IsLoadingKeyCache());
+            cuckoofilter::Status status = key_cache_->Add(key.Hash());
             if (status == cuckoofilter::Status::Ok)
             {
                 return RangeSliceOpStatus::Successful;
@@ -1685,11 +1667,11 @@ class TemplateStoreRange : public StoreRange
             {
                 assert(status == cuckoofilter::Status::NotEnoughSpace);
                 // Add failed, we need to invalidate the filter.
-                InvalidateKeyCache(core_id);
+                InvalidateKeyCache();
                 return RangeSliceOpStatus::Error;
             }
         }
-        else if (slice->IsLoadingKeyCache(core_id))
+        else if (slice->IsLoadingKeyCache())
         {
             // Retry later when key cache is initialized.
             return RangeSliceOpStatus::Retry;
@@ -1720,10 +1702,9 @@ class TemplateStoreRange : public StoreRange
         }
     }
 
-    bool ContainsKey(const KeyT &key, uint16_t core_id)
+    bool ContainsKey(const KeyT &key)
     {
-        return key_cache_[core_id]->Contain(key.Hash()) ==
-               cuckoofilter::Status::Ok;
+        return key_cache_->Contain(key.Hash()) == cuckoofilter::Status::Ok;
     }
 
     size_t PostCkptSize() override
@@ -1940,7 +1921,7 @@ class TemplateStoreRange : public StoreRange
                     sub_slice_end,
                     split_keys[idx].cur_size_,
                     SliceStatus::PartiallyCached,
-                    !slice->cache_validity_.empty());
+                    slice->cache_validity_ != 0);
 
             sub_slice->post_ckpt_size_ = split_keys[idx].post_update_size_;
             sub_slice->status_ = slice->status_;
diff --git a/tx_service/include/cc/template_cc_map.h b/tx_service/include/cc/template_cc_map.h
index 77fb5be1..136b9b00 100644
--- a/tx_service/include/cc/template_cc_map.h
+++ b/tx_service/include/cc/template_cc_map.h
@@ -38,6 +38,7 @@
 #include <variant>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "cc_entry.h"
 #include "cc_map.h"
 #include "cc_page_clean_guard.h"
@@ -250,7 +251,7 @@ class TemplateCcMap : public CcMap
                 auto it = Iterator(cce_ptr, ccp, &neg_inf_);
                 target_key = it->first;
                 auto res = shard_->local_shards_.AddKeyToKeyCache(
-                    table_name_, cc_ng_id_, shard_->core_id_, *target_key);
+                    table_name_, cc_ng_id_, *target_key);
                 if (res == RangeSliceOpStatus::Retry)
                 {
                     // If the insert fails due to key cache is being
@@ -418,10 +419,7 @@ class TemplateCcMap : public CcMap
                         // or auto incr pk insert, the ReadCc is skipped and we
                         // need to update key cache here.
                         auto res = shard_->local_shards_.AddKeyToKeyCache(
-                            table_name_,
-                            cc_ng_id_,
-                            shard_->core_id_,
-                            *target_key);
+                            table_name_, cc_ng_id_, *target_key);
                         if (res == RangeSliceOpStatus::Retry)
                         {
                             // If the insert fails due to key cache is being
@@ -591,6 +589,8 @@ class TemplateCcMap : public CcMap
                     cce->ArchiveBeforeUpdate();
                 }
 
+                [[maybe_unused]] const size_t old_payload_size =
+                    cce->PayloadSize();
                 if (is_del)
                 {
                     cce->payload_.SetCurrentPayload(nullptr);
@@ -612,6 +612,42 @@ class TemplateCcMap : public CcMap
                 bool was_dirty = cce->IsDirty();
                 cce->SetCommitTsPayloadStatus(commit_ts, new_status);
 
+                if constexpr (RangePartitioned)
+                {
+                    if (req.NeedUpdateRangeSize())
+                    {
+                        const int64_t key_delta_size =
+                            (new_status == RecordStatus::Deleted)
+                                ? (-static_cast<int64_t>(write_key->Size() +
+                                                         old_payload_size))
+                                : (cce_old_status != RecordStatus::Normal
+                                       ? static_cast<int64_t>(
+                                             write_key->Size() +
+                                             cce->PayloadSize())
+                                       : static_cast<int64_t>(
+                                             cce->PayloadSize() -
+                                             old_payload_size));
+                        const uint32_t range_id = req.PartitionId();
+                        // is_dirty: true when range is splitting.
+                        bool need_split = UpdateRangeSize(
+                            range_id,
+                            static_cast<int32_t>(key_delta_size),
+                            req.OnDirtyRange());
+
+                        if (need_split)
+                        {
+                            assert(!req.OnDirtyRange());
+                            // Create a data sync task for the range.
+                            shard_->CreateSplitRangeDataSyncTask(
+                                table_name_,
+                                cc_ng_id_,
+                                cce_addr->Term(),
+                                range_id,
+                                commit_ts);
+                        }
+                    }
+                }
+
                 if (req.IsInitialInsert())
                 {
                     // Updates the ckpt ts after commit ts is set.
@@ -1673,7 +1709,6 @@ class TemplateCcMap : public CcMap
                                         static_cast<TemplateStoreRange<KeyT> *>(
                                             slice_id.Range());
                                     auto res = range->AddKey(*look_key,
-                                                             shard_->core_id_,
                                                              slice_id.Slice());
                                     if (res == RangeSliceOpStatus::Error)
                                     {
@@ -3434,7 +3469,8 @@ class TemplateCcMap : public CcMap
         if (ng_term < 0 ||
             (req.RangeCcNgTerm() > 0 && req.RangeCcNgTerm() != ng_term))
         {
-            return req.SetError(CcErrorCode::REQUESTED_NODE_NOT_LEADER);
+            req.SetError(CcErrorCode::REQUESTED_NODE_NOT_LEADER);
+            return true;
         }
 
         if (req.SchemaVersion() != 0 && req.SchemaVersion() != schema_ts_)
@@ -3443,41 +3479,14 @@ class TemplateCcMap : public CcMap
             return true;
         }
 
-        if (req.SendResponseIfFinished())
+        if (req.IsWaitForSnapshot())
         {
+            assert(req.WaitForSnapshotCnt() == 0);
             req.UnpinSlices();
+            req.SetFinish();
             return true;
         }
 
-        if (req.IsWaitForSnapshot(shard_->core_id_))
-        {
-            assert(req.WaitForSnapshotCnt(shard_->core_id_) == 0);
-            if (req.SetFinish())
-            {
-                if (req.Result()->Value().is_local_)
-                {
-                    req.UnpinSlices();
-                    return true;
-                }
-                else if (req.IsResponseSender(shard_->core_id_))
-                {
-                    req.SendResponseIfFinished();
-                    req.UnpinSlices();
-                    return true;
-                }
-                else
-                {
-                    shard_->local_shards_.EnqueueCcRequest(
-                        shard_->core_id_, req.Txn(), &req);
-                    return false;
-                }
-            }
-            else
-            {
-                return false;
-            }
-        }
-
         CcOperation cc_op;
         bool is_read_snapshot;
         if (table_name_.Type() == TableType::Secondary ||
@@ -3544,18 +3553,17 @@ class TemplateCcMap : public CcMap
             req.SetEndKey(TxKey(std::move(decoded_end_key)));
         }
 
-        uint16_t core_id = shard_->LocalCoreId();
         TemplateScanCache<KeyT, ValueT> *scan_cache = nullptr;
         RemoteScanSliceCache *remote_scan_cache = nullptr;
         if (req.IsLocal())
         {
             scan_cache = static_cast<TemplateScanCache<KeyT, ValueT> *>(
-                req.GetLocalScanCache(core_id));
+                req.GetLocalScanCache());
             assert(scan_cache != nullptr);
         }
         else
         {
-            remote_scan_cache = req.GetRemoteScanCache(core_id);
+            remote_scan_cache = req.GetRemoteScanCache();
             assert(remote_scan_cache != nullptr);
         }
 
@@ -3597,10 +3605,6 @@ class TemplateCcMap : public CcMap
 
         if (req.SliceId().Slice() == nullptr)
         {
-            // The scan slice request is first dispatched to one core, which
-            // pins the slice in memory. After the slice is pinned, the request
-            // is dispatched to other cores to scan in parallel. The slice is
-            // unpinned by the last core finishing the scan batch.
             RangeSliceOpStatus pin_status = RangeSliceOpStatus::NotPinned;
             uint32_t max_pin_cnt = req.PrefetchSize();
             const StoreSlice *last_pinned_slice;
@@ -3650,7 +3654,8 @@ class TemplateCcMap : public CcMap
             {
                 if (slice_id.Range()->HasLock())
                 {
-                    return req.SetError(CcErrorCode::OUT_OF_MEMORY);
+                    req.SetError(CcErrorCode::OUT_OF_MEMORY);
+                    return true;
                 }
                 else
                 {
@@ -3667,27 +3672,12 @@ class TemplateCcMap : public CcMap
             {
                 // If the pin operation returns an error, the data store
                 // is inaccessible.
-                return req.SetError(CcErrorCode::PIN_RANGE_SLICE_FAILED);
+                req.SetError(CcErrorCode::PIN_RANGE_SLICE_FAILED);
+                return true;
             }
 
             assert(pin_status == RangeSliceOpStatus::Successful);
             req.PinSlices(slice_id, last_pinned_slice);
-            // Update unfinished cnt before dispatching to remaining cores.
-            req.SetUnfinishedCoreCnt(req.GetShardCount());
-
-            // Dispatches to remaining cores to scan pinned slice(s) in
-            // parallel.
-            for (uint16_t core_id = 0; core_id < shard_->local_shards_.Count();
-                 ++core_id)
-            {
-                if (core_id == shard_->core_id_)
-                {
-                    continue;
-                }
-
-                shard_->local_shards_.EnqueueCcRequest(
-                    shard_->core_id_, core_id, &req);
-            }
         }
 
         Iterator scan_ccm_it;
@@ -3745,7 +3735,6 @@ class TemplateCcMap : public CcMap
                 case CcErrorCode::MVCC_READ_MUST_WAIT_WRITE:
                 {
                     req.SetBlockingInfo(
-                        shard_->core_id_,
                         reinterpret_cast<uint64_t>(cce->GetLockAddr()),
                         scan_type,
                         ScanBlockingType::BlockOnFuture);
@@ -3754,7 +3743,6 @@ class TemplateCcMap : public CcMap
                 case CcErrorCode::ACQUIRE_LOCK_BLOCKED:
                 {
                     req.SetBlockingInfo(
-                        shard_->core_id_,
                         reinterpret_cast<uint64_t>(cce->GetLockAddr()),
                         scan_type,
                         ScanBlockingType::BlockOnLock);
@@ -3814,7 +3802,7 @@ class TemplateCcMap : public CcMap
                     assert(fetch_ret_status ==
                            store::DataStoreHandler::DataStoreOpStatus::Success);
                     (void) fetch_ret_status;
-                    req.IncreaseWaitForSnapshotCnt(shard_->core_id_);
+                    req.IncreaseWaitForSnapshotCnt();
                 }
             }
             else
@@ -3864,14 +3852,14 @@ class TemplateCcMap : public CcMap
                     assert(fetch_ret_status ==
                            store::DataStoreHandler::DataStoreOpStatus::Success);
                     (void) fetch_ret_status;
-                    req.IncreaseWaitForSnapshotCnt(shard_->core_id_);
+                    req.IncreaseWaitForSnapshotCnt();
                 }
             }
 
             return {ScanReturnType::Success, CcErrorCode::NO_ERROR};
         };
 
-        uint64_t cce_lock_addr = req.BlockingCceLockAddr(core_id);
+        uint64_t cce_lock_addr = req.BlockingCceLockAddr();
         if (cce_lock_addr != 0)
         {
             KeyGapLockAndExtraData *lock =
@@ -3881,7 +3869,7 @@ class TemplateCcMap : public CcMap
                 CcEntry<KeyT, ValueT, VersionedRecord, RangePartitioned> *>(
                 lock->GetCcEntry());
 
-            auto [blocking_type, scan_type] = req.BlockingPair(core_id);
+            auto [blocking_type, scan_type] = req.BlockingPair();
             CcPage<KeyT, ValueT, VersionedRecord, RangePartitioned> *ccp =
                 static_cast<
                     CcPage<KeyT, ValueT, VersionedRecord, RangePartitioned> *>(
@@ -3936,43 +3924,16 @@ class TemplateCcMap : public CcMap
                         assert(lock_pair.second ==
                                CcErrorCode::MVCC_READ_FOR_WRITE_CONFLICT);
 
-                        if (req.IsLocal())
-                        {
-                            req.GetLocalScanner()->CommitAtCore(core_id);
-                        }
-
-                        if (is_read_snapshot &&
-                            req.WaitForSnapshotCnt(shard_->core_id_) > 0)
+                        if (is_read_snapshot && req.WaitForSnapshotCnt() > 0)
                         {
-                            req.SetIsWaitForSnapshot(shard_->core_id_);
+                            req.SetIsWaitForSnapshot();
                             req.DeferSetError(lock_pair.second);
                             return false;
                         }
 
-                        if (req.SetError(lock_pair.second))
-                        {
-                            if (req.Result()->Value().is_local_)
-                            {
-                                req.UnpinSlices();
-                                return true;
-                            }
-                            else if (req.IsResponseSender(shard_->core_id_))
-                            {
-                                req.SendResponseIfFinished();
-                                req.UnpinSlices();
-                                return true;
-                            }
-                            else
-                            {
-                                shard_->local_shards_.EnqueueCcRequest(
-                                    shard_->core_id_, req.Txn(), &req);
-                                return false;
-                            }
-                        }
-                        else
-                        {
-                            return false;
-                        }
+                        req.UnpinSlices();
+                        req.SetError(lock_pair.second);
+                        return true;
                     }
 
                     is_locked = lock_pair.first != LockType::NoLock;
@@ -4019,7 +3980,7 @@ class TemplateCcMap : public CcMap
                                store::DataStoreHandler::DataStoreOpStatus::
                                    Success);
                         (void) fetch_ret_status;
-                        req.IncreaseWaitForSnapshotCnt(shard_->core_id_);
+                        req.IncreaseWaitForSnapshotCnt();
                     }
                 }
                 else
@@ -4070,7 +4031,7 @@ class TemplateCcMap : public CcMap
                                store::DataStoreHandler::DataStoreOpStatus::
                                    Success);
                         (void) fetch_ret_status;
-                        req.IncreaseWaitForSnapshotCnt(shard_->core_id_);
+                        req.IncreaseWaitForSnapshotCnt();
                     }
                 }
             }
@@ -4116,90 +4077,39 @@ class TemplateCcMap : public CcMap
         }
 
         RangeScanSliceResult &slice_result = hd_res->Value();
-        auto [final_end_tx_key, end_finalized] = slice_result.PeekLastKey();
         if (req.Direction() == ScanDirection::Forward)
         {
             const TemplateStoreSlice<KeyT> *last_slice =
                 static_cast<const TemplateStoreSlice<KeyT> *>(
                     req.LastPinnedSlice());
 
-            // The scan at core 0 sets the scan's end key. By default, the
-            // scan's end is the exclusive end of the slice or the request's
-            // specified end key, whichever is smaller. In case keys in the
-            // slice are too many to fit into the scan cache, the key right
-            // after the last scanned tuple at core 0 becomes the exclusive end
-            // of scans at other cores. In such a case, it is mandatory that all
-            // keys smaller than the end key at other cores are returned in this
-            // batch. So, scans at other cores may slightly exceed the scan
-            // cache's capacity.
-
+            // By default, the scan's end is the exclusive end of the slice or
+            // the request's specified end key, whichever is smaller. In case
+            // keys in the slice are too many to fit into the scan cache, the
+            // key right after the last scanned tuple becomes the exclusive end
+            // of scans.
             const KeyT *initial_end = nullptr;
             bool init_end_inclusive = false;
 
-            // Given the scan batch's final end key, deduces the local scan's
-            // end and inclusiveness.
-            auto deduce_scan_end =
-                [](const KeyT *batch_end_key,
-                   const KeyT *req_end_key,
-                   bool req_inclusive) -> std::pair<const KeyT *, bool>
-            {
-                const KeyT *end = nullptr;
-                bool inclusive = false;
-
-                assert(batch_end_key != nullptr);
-                // If the request specifies the end key and it is the scan
-                // batch's end key, the scan's inclusiveness is determined by
-                // the request. Or, the scan batch's end must be the exclusive
-                // end of a slice or positive infinity.
-                if (batch_end_key == req_end_key)
-                {
-                    end = req_end_key;
-                    inclusive = req_inclusive;
-                }
-                else
-                {
-                    end = batch_end_key;
-                    inclusive = false;
-                }
-
-                return {end, inclusive};
-            };
+            // Takes the smaller of the slice's last key and the request's end
+            // key as the local scan's initial end.
+            const KeyT *slice_end = last_slice->EndKey();
+            assert(slice_end != nullptr);
 
-            if (!end_finalized)
+            // If the request specifies the end key and it falls into the
+            // slice, initializes the local scan's end to the request's end
+            // key. Or, the scan end is the slice's end.
+            if (req_end_key != nullptr &&
+                (*req_end_key < *slice_end ||
+                 (*req_end_key == *slice_end && !req.EndInclusive())))
             {
-                // This scan batch's end key has not been set. Takes the smaller
-                // of the slice's last key and the request's end key as the
-                // local scan's initial end. The initial end may be modified, if
-                // another core finishes earlier and finalizes the batch's end
-                // before this core. The final end may be smaller or greater
-                // than the initial end.
-                const KeyT *slice_end = last_slice->EndKey();
-                assert(slice_end != nullptr);
-
-                // If the request specifies the end key and it falls into the
-                // slice, initializes the local scan's end to the request's end
-                // key. Or, the scan end is the slice's end.
-                if (req_end_key != nullptr &&
-                    (*req_end_key < *slice_end ||
-                     (*req_end_key == *slice_end && !req.EndInclusive())))
-                {
-                    initial_end = req_end_key;
-                    init_end_inclusive = req.EndInclusive();
-                }
-                else
-                {
-                    initial_end = slice_end;
-                    init_end_inclusive = false;
-                }
+                initial_end = req_end_key;
+                init_end_inclusive = req.EndInclusive();
             }
             else
             {
-                // This scan batch's end key has been finalized by one of the
-                // cores. Deduces the local scan's end and inclusiveness.
-                std::tie(initial_end, init_end_inclusive) =
-                    deduce_scan_end(final_end_tx_key->GetKey<KeyT>(),
-                                    req_end_key,
-                                    req.EndInclusive());
+                initial_end = slice_end;
+                init_end_inclusive = false;
             }
 
             auto scan_batch_func =
@@ -4226,12 +4136,11 @@ class TemplateCcMap : public CcMap
                 return {scan_ret, err_code};
             };
 
-            auto scan_loop_func = [this, &scan_batch_func, &is_cache_full](
-                                      Iterator &scan_ccm_it,
-                                      const KeyT &end_key,
-                                      bool inclusive,
-                                      bool end_finalized)
-                -> std::pair<ScanReturnType, CcErrorCode>
+            auto scan_loop_func =
+                [this, &scan_batch_func, &is_cache_full](
+                    Iterator &scan_ccm_it,
+                    const KeyT &end_key,
+                    bool inclusive) -> std::pair<ScanReturnType, CcErrorCode>
             {
                 ScanReturnType scan_ret = ScanReturnType::Success;
                 CcErrorCode err_code = CcErrorCode::NO_ERROR;
@@ -4283,7 +4192,7 @@ class TemplateCcMap : public CcMap
                             scan_ccm_it = End();
                             ccp = nullptr;
                         }
-                        else if (!end_finalized && is_cache_full())
+                        else if (is_cache_full())
                         {
                             scan_ccm_it =
                                 Iterator(ccp->next_page_, 0, &neg_inf_);
@@ -4305,50 +4214,23 @@ class TemplateCcMap : public CcMap
                 return {scan_ret, err_code};
             };
 
-            auto [scan_ret, err] = scan_loop_func(
-                scan_ccm_it, *initial_end, init_end_inclusive, end_finalized);
+            auto [scan_ret, err] =
+                scan_loop_func(scan_ccm_it, *initial_end, init_end_inclusive);
             switch (scan_ret)
             {
             case ScanReturnType::Blocked:
                 return false;
             case ScanReturnType::Error:
-                if (req.IsLocal())
+                if (is_read_snapshot && req.WaitForSnapshotCnt() > 0)
                 {
-                    req.GetLocalScanner()->CommitAtCore(core_id);
-                }
-
-                if (is_read_snapshot &&
-                    req.WaitForSnapshotCnt(shard_->core_id_) > 0)
-                {
-                    req.SetIsWaitForSnapshot(shard_->core_id_);
+                    req.SetIsWaitForSnapshot();
                     req.DeferSetError(err);
                     return false;
                 }
 
-                if (req.SetError(err))
-                {
-                    if (req.Result()->Value().is_local_)
-                    {
-                        req.UnpinSlices();
-                        return true;
-                    }
-                    else if (req.IsResponseSender(shard_->core_id_))
-                    {
-                        req.SendResponseIfFinished();
-                        req.UnpinSlices();
-                        return true;
-                    }
-                    else
-                    {
-                        shard_->local_shards_.EnqueueCcRequest(
-                            shard_->core_id_, req.Txn(), &req);
-                        return false;
-                    }
-                }
-                else
-                {
-                    return false;
-                }
+                req.UnpinSlices();
+                req.SetError(err);
+                return true;
             case ScanReturnType::Yield:
                 shard_->Enqueue(shard_->core_id_, &req);
                 return false;
@@ -4356,234 +4238,61 @@ class TemplateCcMap : public CcMap
                 break;
             }
 
-            // If the end of this scan batch is not finalized when the local
-            // scan at this core started, tries to set the batch's end using the
-            // local end. If another core has finalized the batch's end, the
-            // scan at this core may need to be adjusted: if the batch's final
-            // end is less than the end at this core, keys after the final end
-            // needs to be removed from the local scan cache; if the batch's
-            // final end is greater than the end of this core, keys smaller than
-            // the batch's final end but greater than the local end need to be
-            // included in the local scan cache.
-            if (!end_finalized)
-            {
-                const KeyT *local_end = nullptr;
-                SlicePosition slice_position;
-
-                // scan_ccm_it points to the entry after the last scanned tuple.
-                // If the slice ends with positive infinity and has been fully
-                // scanned, scan_ccm_it would point to positive infinity.
-                auto pos_inf_it = End();
-                if (scan_ccm_it != pos_inf_it &&
-                    (*scan_ccm_it->first < *initial_end ||
-                     (init_end_inclusive &&
-                      *scan_ccm_it->first == *initial_end)))
-                {
-                    // The slice is too large. The scan has not fully scanned
-                    // the slice, before reaching the cache's size limit.
-                    // Pretends the slice's exclusive end to be the key after
-                    // the last scanned tuple, from which the next scan batch
-                    // resume.
-                    local_end = scan_ccm_it->first;
-                    slice_position = SlicePosition::Middle;
-                }
-                else
-                {
-                    // The slice has been fully scanned. If the request
-                    // specifies the end key, which falls into the slice, given
-                    // that the slice has been fully scanned, no future scan
-                    // batches are needed. So, we pretend that the scan has
-                    // reached the last slice ending with positive infinity.
-                    // The calling tx will terminate the scan.
-                    if (initial_end == KeyT::PositiveInfinity() ||
-                        req_end_key == initial_end)
-                    {
-                        local_end = initial_end;
-                        slice_position = SlicePosition::LastSlice;
-                    }
-                    else
-                    {
-                        // The local scan end must be the end of the slice.
-                        local_end = initial_end;
-                        const TemplateStoreRange<KeyT> *range =
-                            static_cast<const TemplateStoreRange<KeyT> *>(
-                                req.SliceId().Range());
-                        const KeyT *range_end = range->RangeEndKey();
-                        if (range_end != nullptr && *initial_end == *range_end)
-                        {
-                            slice_position = SlicePosition::LastSliceInRange;
-                        }
-                        else
-                        {
-                            slice_position = SlicePosition::Middle;
-                        }
-                    }
-                }
-
-                auto [batch_end, set_success] =
-                    slice_result.UpdateLastKey(local_end, slice_position);
+            const KeyT *local_end = nullptr;
+            SlicePosition slice_position;
 
-                if (set_success)
+            // scan_ccm_it points to the entry after the last scanned tuple.
+            // If the slice ends with positive infinity and has been fully
+            // scanned, scan_ccm_it would point to positive infinity.
+            auto pos_inf_it = End();
+            if (scan_ccm_it != pos_inf_it &&
+                (*scan_ccm_it->first < *initial_end ||
+                 (init_end_inclusive && *scan_ccm_it->first == *initial_end)))
+            {
+                // The slice is too large. The scan has not fully scanned
+                // the slice, before reaching the cache's size limit.
+                // Pretends the slice's exclusive end to be the key after
+                // the last scanned tuple, from which the next scan batch
+                // resume.
+                local_end = scan_ccm_it->first;
+                slice_position = SlicePosition::Middle;
+            }
+            else
+            {
+                // The slice has been fully scanned. If the request
+                // specifies the end key, which falls into the slice, given
+                // that the slice has been fully scanned, no future scan
+                // batches are needed. So, we pretend that the scan has
+                // reached the last slice ending with positive infinity.
+                // The calling tx will terminate the scan.
+                if (initial_end == KeyT::PositiveInfinity() ||
+                    req_end_key == initial_end)
                 {
-                    req.SetRangeCcNgTerm(ng_term);
+                    local_end = initial_end;
+                    slice_position = SlicePosition::LastSlice;
                 }
                 else
                 {
-                    // The local scan tries to set the scan batch's end, but the
-                    // scan at another core have set the batch's end. The scan
-                    // results need to be adjusted, if the results include the
-                    // keys greater than the batch's end, or the results miss
-                    // some keys smaller than the batch's end.
-                    auto [end_key, end_inclusive] = deduce_scan_end(
-                        batch_end, req_end_key, req.EndInclusive());
-                    size_t trailing_cnt = 0;
-
-                    // Excludes keys from the scan cache greater than the
-                    // batch's end.
-                    if (req.IsLocal())
+                    // The local scan end must be the end of the slice.
+                    local_end = initial_end;
+                    const TemplateStoreRange<KeyT> *range =
+                        static_cast<const TemplateStoreRange<KeyT> *>(
+                            req.SliceId().Range());
+                    const KeyT *range_end = range->RangeEndKey();
+                    if (range_end != nullptr && *initial_end == *range_end)
                     {
-                        while (scan_cache->Size() > 0)
-                        {
-                            // If req.is_require_keys_ is false, the KeyT object
-                            // in scan cache is invalid, so, should use the cce,
-                            // which is valid in any situation, to get the
-                            // corresponding key.
-                            auto last_cce =
-                                reinterpret_cast<CcEntry<KeyT,
-                                                         ValueT,
-                                                         VersionedRecord,
-                                                         RangePartitioned> *>(
-                                    scan_cache->Last()->cce_ptr_);
-                            while (scan_ccm_it->second != last_cce)
-                            {
-                                --scan_ccm_it;
-                                assert(scan_ccm_it != Begin());
-                            }
-                            const KeyT *last_key =
-                                static_cast<const KeyT *>(scan_ccm_it->first);
-                            if (*end_key < *last_key ||
-                                (*end_key == *last_key && !end_inclusive))
-                            {
-                                ++trailing_cnt;
-                                // Remove cce from scan cache, but keep possible
-                                // locks, because those locks might acquired by
-                                // other ScanSliceCc/ReadCc from the
-                                // transaction.
-                                scan_cache->RemoveLast();
-                            }
-                            else
-                            {
-                                // Reset iterator to the key after the last
-                                // scanned tuple since we might need to continue
-                                // scanning if trailing_cnt == 0.
-                                ++scan_ccm_it;
-                                break;
-                            }
-                        }
+                        slice_position = SlicePosition::LastSliceInRange;
                     }
                     else
                     {
-                        while (remote_scan_cache->Size() > 0)
-                        {
-                            // Cc entry pointers here are always valid since
-                            // the slices are still pinned so the cce cannot
-                            // be kicked from memory regardless of the lock
-                            // type.
-                            auto last_remote_cce =
-                                reinterpret_cast<CcEntry<KeyT,
-                                                         ValueT,
-                                                         VersionedRecord,
-                                                         RangePartitioned> *>(
-                                    remote_scan_cache->LastCce());
-                            while (scan_ccm_it->second != last_remote_cce)
-                            {
-                                // As long as remote scan cache is not empty,
-                                // iterator should not reach neg inf.
-                                --scan_ccm_it;
-                                assert(scan_ccm_it != Begin());
-                            }
-                            const KeyT *last_key =
-                                static_cast<const KeyT *>(scan_ccm_it->first);
-                            if (*end_key < *last_key ||
-                                (*end_key == *last_key && !end_inclusive))
-                            {
-                                trailing_cnt++;
-                                // Remove cce from scan cache, but keep possible
-                                // locks, because those locks might acquired by
-                                // other ScanSliceCc/ReadCc from the
-                                // transaction.
-                                remote_scan_cache->RemoveLast();
-                            }
-                            else
-                            {
-                                // Reset iterator to the key after the last
-                                // scanned tuple since we might need to continue
-                                // scanning if trailing_cnt == 0.
-                                ++scan_ccm_it;
-                                break;
-                            }
-                        }
-                    }
-
-                    // If no key is removed from the scan cache, it's possible
-                    // that the local scan may miss keys smaller than the
-                    // batch's end. Re-scans the cc map using the batch's end.
-                    if (trailing_cnt == 0)
-                    {
-                        auto [scan_ret, err] = scan_loop_func(
-                            scan_ccm_it, *end_key, end_inclusive, true);
-                        switch (scan_ret)
-                        {
-                        case ScanReturnType::Blocked:
-                            return false;
-                        case ScanReturnType::Error:
-                            if (req.IsLocal())
-                            {
-                                req.GetLocalScanner()->CommitAtCore(core_id);
-                            }
-
-                            if (is_read_snapshot &&
-                                req.WaitForSnapshotCnt(shard_->core_id_) > 0)
-                            {
-                                req.SetIsWaitForSnapshot(shard_->core_id_);
-                                req.DeferSetError(err);
-                                return false;
-                            }
-
-                            if (req.SetError(err))
-                            {
-                                if (req.Result()->Value().is_local_)
-                                {
-                                    req.UnpinSlices();
-                                    return true;
-                                }
-                                else if (req.IsResponseSender(shard_->core_id_))
-                                {
-                                    req.SendResponseIfFinished();
-                                    req.UnpinSlices();
-                                    return true;
-                                }
-                                else
-                                {
-                                    shard_->local_shards_.EnqueueCcRequest(
-                                        shard_->core_id_, req.Txn(), &req);
-                                    return false;
-                                }
-                            }
-                            else
-                            {
-                                return false;
-                            }
-                        case ScanReturnType::Yield:
-                            shard_->Enqueue(shard_->core_id_, &req);
-                            return false;
-                        default:
-                            break;
-                        }
+                        slice_position = SlicePosition::Middle;
                     }
                 }
             }
 
+            slice_result.SetLastKey(local_end, slice_position);
+            req.SetRangeCcNgTerm(ng_term);
+
             // Sets the iterator to the last cce, which may need to be pinned to
             // resume the next scan batch.
             if (CcEntry<KeyT, ValueT, VersionedRecord, RangePartitioned>
@@ -4596,7 +4305,7 @@ class TemplateCcMap : public CcMap
                 }
             }
         }
-        else
+        else  // Backward scan
         {
             const TemplateStoreSlice<KeyT> *last_slice =
                 static_cast<const TemplateStoreSlice<KeyT> *>(
@@ -4605,53 +4314,19 @@ class TemplateCcMap : public CcMap
             const KeyT *initial_end = nullptr;
             bool init_end_inclusive = false;
 
-            auto deduce_scan_end =
-                [](const KeyT *batch_end_key,
-                   const KeyT *req_end_key,
-                   bool req_inclusive) -> std::pair<const KeyT *, bool>
-            {
-                const KeyT *end = nullptr;
-                bool inclusive = false;
-
-                if (batch_end_key == req_end_key)
-                {
-                    end = req_end_key;
-                    inclusive = req_inclusive;
-                }
-                else
-                {
-                    end = batch_end_key;
-                    inclusive = true;
-                }
-
-                return {end, inclusive};
-            };
+            const KeyT *slice_begin = last_slice->StartKey();
+            assert(slice_begin != nullptr);
 
-            if (!end_finalized)
+            if (req_end_key != nullptr &&
+                (*slice_begin < *req_end_key || *slice_begin == *req_end_key))
             {
-                const KeyT *slice_begin = last_slice->StartKey();
-                assert(slice_begin != nullptr);
-
-                if (req_end_key != nullptr && (*slice_begin < *req_end_key ||
-                                               *slice_begin == *req_end_key))
-                {
-                    initial_end = req_end_key;
-                    init_end_inclusive = req.EndInclusive();
-                }
-                else
-                {
-                    initial_end = slice_begin;
-                    init_end_inclusive = true;
-                }
+                initial_end = req_end_key;
+                init_end_inclusive = req.EndInclusive();
             }
             else
             {
-                // This scan batch's end key has been finalized by one of the
-                // cores. Deduces the local scan's end and inclusiveness.
-                std::tie(initial_end, init_end_inclusive) =
-                    deduce_scan_end(final_end_tx_key->GetKey<KeyT>(),
-                                    req_end_key,
-                                    req.EndInclusive());
+                initial_end = slice_begin;
+                init_end_inclusive = true;
             }
 
             auto scan_batch_func =
@@ -4678,12 +4353,11 @@ class TemplateCcMap : public CcMap
                 return {scan_ret, err_code};
             };
 
-            auto scan_loop_func = [this, &scan_batch_func, &is_cache_full](
-                                      Iterator &scan_ccm_it,
-                                      const KeyT &end_key,
-                                      bool inclusive,
-                                      bool end_finalized)
-                -> std::pair<ScanReturnType, CcErrorCode>
+            auto scan_loop_func =
+                [this, &scan_batch_func, &is_cache_full](
+                    Iterator &scan_ccm_it,
+                    const KeyT &end_key,
+                    bool inclusive) -> std::pair<ScanReturnType, CcErrorCode>
             {
                 ScanReturnType scan_ret = ScanReturnType::Success;
                 CcErrorCode err_code = CcErrorCode::NO_ERROR;
@@ -4738,7 +4412,7 @@ class TemplateCcMap : public CcMap
                             scan_ccm_it = Begin();
                             ccp = nullptr;
                         }
-                        else if (!end_finalized && is_cache_full())
+                        else if (is_cache_full())
                         {
                             scan_ccm_it = Iterator(ccp->prev_page_,
                                                    ccp->prev_page_->Size() - 1,
@@ -4761,50 +4435,23 @@ class TemplateCcMap : public CcMap
                 return {scan_ret, err_code};
             };
 
-            auto [scan_ret, err] = scan_loop_func(
-                scan_ccm_it, *initial_end, init_end_inclusive, end_finalized);
+            auto [scan_ret, err] =
+                scan_loop_func(scan_ccm_it, *initial_end, init_end_inclusive);
             switch (scan_ret)
             {
             case ScanReturnType::Blocked:
                 return false;
             case ScanReturnType::Error:
-                if (req.IsLocal())
-                {
-                    req.GetLocalScanner()->CommitAtCore(core_id);
-                }
-
-                if (is_read_snapshot &&
-                    req.WaitForSnapshotCnt(shard_->core_id_) > 0)
+                if (is_read_snapshot && req.WaitForSnapshotCnt() > 0)
                 {
-                    req.SetIsWaitForSnapshot(shard_->core_id_);
+                    req.SetIsWaitForSnapshot();
                     req.DeferSetError(err);
                     return false;
                 }
 
-                if (req.SetError(err))
-                {
-                    if (req.Result()->Value().is_local_)
-                    {
-                        req.UnpinSlices();
-                        return true;
-                    }
-                    else if (req.IsResponseSender(shard_->core_id_))
-                    {
-                        req.SendResponseIfFinished();
-                        req.UnpinSlices();
-                        return true;
-                    }
-                    else
-                    {
-                        shard_->local_shards_.EnqueueCcRequest(
-                            shard_->core_id_, req.Txn(), &req);
-                        return false;
-                    }
-                }
-                else
-                {
-                    return false;
-                }
+                req.UnpinSlices();
+                req.SetError(err);
+                return true;
             case ScanReturnType::Yield:
                 shard_->Enqueue(shard_->core_id_, &req);
                 return false;
@@ -4812,234 +4459,61 @@ class TemplateCcMap : public CcMap
                 break;
             }
 
-            // If the end of this scan batch is not finalized when the local
-            // scan at this core started, tries to set the batch's end using the
-            // local end. If another core has finalized the batch's end, the
-            // scan at this core may need to be adjusted: if the batch's final
-            // end is less than the end at this core, keys before the final end
-            // needs to be removed from the local scan cache; if the batch's
-            // final end is smaller than the end of this core, keys greater than
-            // the batch's final end but less than the local end need to be
-            // included in the local scan cache.
-
-            if (!end_finalized)
-            {
-                const KeyT *local_end = nullptr;
-                SlicePosition slice_position;
-
-                // scan_ccm_it points to the entry before the last scanned
-                // tuple.
-                auto neg_inf_it = Begin();
-                if (scan_ccm_it != neg_inf_it &&
-                    (*initial_end < *scan_ccm_it->first ||
-                     (init_end_inclusive &&
-                      *scan_ccm_it->first == *initial_end)))
-                {
-                    // The slice is too large. The scan has not fully scanned
-                    // the slice, before reaching the cache's size limit.
-                    // Pretends the slice's inclusive start to be the last
-                    // scanned key, from which the next scan batch resumes.
-                    ++scan_ccm_it;
-                    local_end = scan_ccm_it->first;
-                    slice_position = SlicePosition::Middle;
-                }
-                else
-                {
-                    // The slice has been fully scanned. If the request
-                    // specifies the end key, which falls into the slice, given
-                    // that the slice has been fully scanned, no future scan
-                    // batches are needed. So, we pretend that the scan has
-                    // reached the first slice (starting with negative
-                    // infinity). The calling tx will terminate the scan.
-                    if (initial_end == KeyT::NegativeInfinity() ||
-                        req_end_key == initial_end)
-                    {
-                        local_end = initial_end;
-                        slice_position = SlicePosition::FirstSlice;
-                    }
-                    else
-                    {
-                        // The local scan end must be the start of the slice.
-                        local_end = initial_end;
-
-                        const TemplateStoreRange<KeyT> *range =
-                            static_cast<const TemplateStoreRange<KeyT> *>(
-                                req.SliceId().Range());
-                        const KeyT *range_start = range->RangeStartKey();
-                        if (range_start != nullptr &&
-                            *initial_end == *range_start)
-                        {
-                            slice_position = SlicePosition::FirstSliceInRange;
-                        }
-                        else
-                        {
-                            slice_position = SlicePosition::Middle;
-                        }
-                    }
-                }
-
-                auto [batch_end, set_success] =
-                    slice_result.UpdateLastKey(local_end, slice_position);
+            const KeyT *local_end = nullptr;
+            SlicePosition slice_position;
 
-                if (set_success)
+            // scan_ccm_it points to the entry before the last scanned
+            // tuple.
+            auto neg_inf_it = Begin();
+            if (scan_ccm_it != neg_inf_it &&
+                (*initial_end < *scan_ccm_it->first ||
+                 (init_end_inclusive && *scan_ccm_it->first == *initial_end)))
+            {
+                // The slice is too large. The scan has not fully scanned
+                // the slice, before reaching the cache's size limit.
+                // Pretends the slice's inclusive start to be the last
+                // scanned key, from which the next scan batch resumes.
+                ++scan_ccm_it;
+                local_end = scan_ccm_it->first;
+                slice_position = SlicePosition::Middle;
+            }
+            else
+            {
+                // The slice has been fully scanned. If the request
+                // specifies the end key, which falls into the slice, given
+                // that the slice has been fully scanned, no future scan
+                // batches are needed. So, we pretend that the scan has
+                // reached the first slice (starting with negative
+                // infinity). The calling tx will terminate the scan.
+                if (initial_end == KeyT::NegativeInfinity() ||
+                    req_end_key == initial_end)
                 {
-                    req.SetRangeCcNgTerm(ng_term);
+                    local_end = initial_end;
+                    slice_position = SlicePosition::FirstSlice;
                 }
                 else
                 {
-                    // The local scan tries to set the scan batch's end, but the
-                    // scan at another core have set the batch's end. The scan
-                    // results need to be adjusted, if the results include the
-                    // keys smaller than the batch's end, or the results miss
-                    // some keys greater than the batch's end.
-                    auto [end_key, end_inclusive] = deduce_scan_end(
-                        batch_end, req_end_key, req.EndInclusive());
-                    size_t trailing_cnt = 0;
-
-                    // Excludes keys from the scan cache smaller than the
-                    // batch's end.
-                    if (req.IsLocal())
+                    // The local scan end must be the start of the slice.
+                    local_end = initial_end;
+
+                    const TemplateStoreRange<KeyT> *range =
+                        static_cast<const TemplateStoreRange<KeyT> *>(
+                            req.SliceId().Range());
+                    const KeyT *range_start = range->RangeStartKey();
+                    if (range_start != nullptr && *initial_end == *range_start)
                     {
-                        while (scan_cache->Size() > 0)
-                        {
-                            // If req.is_require_keys_ is false, the KeyT object
-                            // in scan cache is invalid, so, should use the cce,
-                            // which is valid in any situation, to get the
-                            // corresponding key.
-                            CcEntry<KeyT,
-                                    ValueT,
-                                    VersionedRecord,
-                                    RangePartitioned> *last_cce =
-                                reinterpret_cast<CcEntry<KeyT,
-                                                         ValueT,
-                                                         VersionedRecord,
-                                                         RangePartitioned> *>(
-                                    scan_cache->Last()->cce_ptr_);
-                            while (scan_ccm_it->second != last_cce)
-                            {
-                                ++scan_ccm_it;
-                                assert(scan_ccm_it != End());
-                            }
-                            const KeyT *last_key =
-                                static_cast<const KeyT *>(scan_ccm_it->first);
-                            if (*last_key < *end_key ||
-                                (*last_key == *end_key && !end_inclusive))
-                            {
-                                ++trailing_cnt;
-                                scan_cache->RemoveLast();
-                            }
-                            else
-                            {
-                                // Reset iterator to the key after the last
-                                // scanned tuple since we might need to continue
-                                // scanning if trailing_cnt == 0.
-                                --scan_ccm_it;
-                                break;
-                            }
-                        }
+                        slice_position = SlicePosition::FirstSliceInRange;
                     }
                     else
                     {
-                        while (remote_scan_cache->Size() > 0)
-                        {
-                            // Cc entry pointers here are always valid since
-                            // the slices are still pinned so the cce cannot
-                            // be kicked from memory regardless of the lock
-                            // type.
-                            CcEntry<KeyT,
-                                    ValueT,
-                                    VersionedRecord,
-                                    RangePartitioned> *last_remote_cce =
-                                reinterpret_cast<CcEntry<KeyT,
-                                                         ValueT,
-                                                         VersionedRecord,
-                                                         RangePartitioned> *>(
-                                    remote_scan_cache->LastCce());
-                            while (scan_ccm_it->second != last_remote_cce)
-                            {
-                                // As long as remote scan cache is not empty,
-                                // iterator should not reach pos inf.
-                                ++scan_ccm_it;
-                                assert(scan_ccm_it != End());
-                            }
-                            const KeyT *last_key =
-                                static_cast<const KeyT *>(scan_ccm_it->first);
-                            if (*last_key < *end_key ||
-                                (*last_key == *end_key && !end_inclusive))
-                            {
-                                trailing_cnt++;
-                                remote_scan_cache->RemoveLast();
-                            }
-                            else
-                            {
-                                // Reset iterator to the key after the last
-                                // scanned tuple since we might need to continue
-                                // scanning if trailing_cnt == 0.
-                                --scan_ccm_it;
-                                break;
-                            }
-                        }
-                    }
-
-                    // If no key is removed from the scan cache, it's possible
-                    // that the local scan may miss keys greater than the
-                    // batch's end. Re-scans the cc map using the batch's end.
-                    if (trailing_cnt == 0)
-                    {
-                        auto [scan_ret, err] = scan_loop_func(
-                            scan_ccm_it, *end_key, end_inclusive, true);
-                        switch (scan_ret)
-                        {
-                        case ScanReturnType::Blocked:
-                            return false;
-                        case ScanReturnType::Error:
-                            if (req.IsLocal())
-                            {
-                                req.GetLocalScanner()->CommitAtCore(core_id);
-                            }
-
-                            if (is_read_snapshot &&
-                                req.WaitForSnapshotCnt(shard_->core_id_) > 0)
-                            {
-                                req.SetIsWaitForSnapshot(shard_->core_id_);
-                                req.DeferSetError(err);
-                                return false;
-                            }
-
-                            if (req.SetError(err))
-                            {
-                                if (req.Result()->Value().is_local_)
-                                {
-                                    req.UnpinSlices();
-                                    return true;
-                                }
-                                else if (req.IsResponseSender(shard_->core_id_))
-                                {
-                                    req.SendResponseIfFinished();
-                                    req.UnpinSlices();
-                                    return true;
-                                }
-                                else
-                                {
-                                    shard_->local_shards_.EnqueueCcRequest(
-                                        shard_->core_id_, req.Txn(), &req);
-                                    return false;
-                                }
-                            }
-                            else
-                            {
-                                return false;
-                            }
-                        case ScanReturnType::Yield:
-                            shard_->Enqueue(shard_->core_id_, &req);
-                            return false;
-                        default:
-                            break;
-                        }
+                        slice_position = SlicePosition::Middle;
                     }
                 }
             }
 
+            slice_result.SetLastKey(local_end, slice_position);
+            req.SetRangeCcNgTerm(ng_term);
+
             // Sets the iterator to the last cce, which may need to be pinned to
             // resume the next scan batch.
             if (CcEntry<KeyT, ValueT, VersionedRecord, RangePartitioned>
@@ -5090,47 +4564,15 @@ class TemplateCcMap : public CcMap
             }
         }
 
-        if (req.IsLocal())
-        {
-            req.GetLocalScanner()->CommitAtCore(core_id);
-        }
-
-        if (is_read_snapshot && req.WaitForSnapshotCnt(shard_->core_id_) > 0)
+        if (is_read_snapshot && req.WaitForSnapshotCnt() > 0)
         {
-            req.SetIsWaitForSnapshot(shard_->core_id_);
+            req.SetIsWaitForSnapshot();
             return false;
         }
 
-        if (req.SetFinish())
-        {
-            if (req.Result()->Value().is_local_)
-            {
-                req.UnpinSlices();
-                return true;
-            }
-            else if (req.IsResponseSender(shard_->core_id_))
-            {
-                req.SendResponseIfFinished();
-                req.UnpinSlices();
-                return true;
-            }
-            else
-            {
-                // Renqueue the cc req to the sender req list.
-                // We assign a dedicated core to be the response sender instead
-                // of directly sending the response on the last finished core.
-                // This is to avoid serialization of response message causing
-                // one core to become significantly slower than others and would
-                // end up being the sender of all scan slice response.
-                shard_->local_shards_.EnqueueCcRequest(
-                    shard_->core_id_, req.Txn(), &req);
-                return false;
-            }
-        }
-        else
-        {
-            return false;
-        }
+        req.UnpinSlices();
+        req.SetFinish();
+        return true;
     }
 
     /**
@@ -5524,37 +4966,17 @@ class TemplateCcMap : public CcMap
             req.slice_coordinator_.UpdatePreparedSliceCnt(prepared_slice_cnt);
             req.slice_coordinator_.UpdateBatchEnd();
 
-            if (req.export_base_table_item_)
-            {
-                // Fix the slice index of the current core
-                for (uint16_t core_id = 0; core_id < shard_->core_cnt_;
-                     ++core_id)
-                {
-                    req.FixCurrentSliceIndex(core_id);
-                }
-            }
             req.slice_coordinator_.SetReadyForScan();
-            req.SetUnfinishedCoreCnt(shard_->core_cnt_);
-
-            // Dispatch the request to the cores
-            for (uint16_t core_id = 0; core_id < shard_->core_cnt_; ++core_id)
-            {
-                if (core_id == shard_->core_id_)
-                {
-                    continue;
-                }
-                shard_->Enqueue(shard_->LocalCoreId(), core_id, &req);
-            }
         }
 
-        if (req.IsDrained(shard_->core_id_))
+        if (req.IsDrained())
         {
             // scan is already finished on this core
-            req.SetFinish(shard_->core_id_);
+            req.SetFinish();
             return false;
         }
 
-        auto &pause_key_and_is_drained = req.PausePos(shard_->core_id_);
+        auto &pause_key_and_is_drained = req.PausePos();
 
         auto find_non_empty_slice =
             [this, &req, &deduce_iterator](const KeyT &search_key)
@@ -5568,8 +4990,7 @@ class TemplateCcMap : public CcMap
             }
             else
             {
-                const TxKey &curr_start_tx_key =
-                    req.CurrentSliceKey(shard_->core_id_);
+                const TxKey &curr_start_tx_key = req.CurrentSliceKey();
                 const KeyT *curr_start_key = curr_start_tx_key.GetKey<KeyT>();
                 start_key = (*curr_start_key < search_key ? &search_key
                                                           : curr_start_key);
@@ -5594,7 +5015,7 @@ class TemplateCcMap : public CcMap
             const KeyT *slice_end_key = nullptr;
             do
             {
-                store_slice = req.CurrentSlice(shard_->core_id_);
+                store_slice = req.CurrentSlice();
                 const TemplateStoreSlice<KeyT> *typed_slice =
                     static_cast<const TemplateStoreSlice<KeyT> *>(store_slice);
                 start_key =
@@ -5611,11 +5032,11 @@ class TemplateCcMap : public CcMap
                 }
 
                 // The current slice is empty, try to find next slice.
-                req.MoveToNextSlice(shard_->core_id_);
+                req.MoveToNextSlice();
                 start_key = nullptr;
 
                 // Continue to handle the next slice if not the batch end
-            } while (!req.TheBatchEnd(shard_->core_id_));
+            } while (!req.TheBatchEnd());
 
             return {it, end_it, slice_end_key};
         };
@@ -5656,16 +5077,14 @@ class TemplateCcMap : public CcMap
 
         // If reach to the batch end, it means there are no slices that need to
         // be scanned.
-        bool slice_pinned = req.TheBatchEnd(shard_->core_id_)
-                                ? false
-                                : req.IsSlicePinned(shard_->core_id_);
+        bool slice_pinned = req.TheBatchEnd() ? false : req.IsSlicePinned();
         // The following flag is used to mark the behavior of one slice.
         // Only need to export the key if the key is already persisted, this
         // will happen when the slice need to split, and should export all the
         // keys in this slice to get the subslice keys.
         bool export_persisted_key_only =
             !req.export_base_table_item_ && slice_pinned;
-        assert(key_it != slice_end_it || req.TheBatchEnd(shard_->core_id_));
+        assert(key_it != slice_end_it || req.TheBatchEnd());
 
         // 3. Loop to scan keys
         // DataSyncScanCc is running on TxProcessor thread. To avoid
@@ -5674,8 +5093,7 @@ class TemplateCcMap : public CcMap
         for (size_t scan_cnt = 0;
              key_it != slice_end_it && key_it != slice_end_next_page_it &&
              scan_cnt < RangePartitionDataSyncScanCc::DataSyncScanBatchSize &&
-             req.accumulated_scan_cnt_.at(shard_->core_id_) <
-                 req.scan_batch_size_;
+             req.accumulated_scan_cnt_ < req.scan_batch_size_;
              ++scan_cnt)
         {
             const KeyT *key = key_it->first;
@@ -5708,8 +5126,8 @@ class TemplateCcMap : public CcMap
                 {
                     // Reach to the end of current slice.
                     // Move to the next slice.
-                    req.MoveToNextSlice(shard_->core_id_);
-                    if (!req.TheBatchEnd(shard_->core_id_))
+                    req.MoveToNextSlice();
+                    if (!req.TheBatchEnd())
                     {
                         search_start_key = slice_end_key;
                         std::tie(key_it, slice_end_it, slice_end_key) =
@@ -5720,9 +5138,7 @@ class TemplateCcMap : public CcMap
                         // If reach to the batch end, it means there are no
                         // slices that need to be scanned.
                         slice_pinned =
-                            req.TheBatchEnd(shard_->core_id_)
-                                ? false
-                                : req.IsSlicePinned(shard_->core_id_);
+                            req.TheBatchEnd() ? false : req.IsSlicePinned();
                         export_persisted_key_only =
                             !req.export_base_table_item_ && slice_pinned;
                     }
@@ -5776,20 +5192,19 @@ class TemplateCcMap : public CcMap
                 auto export_result =
                     ExportForCkpt(cce,
                                   *key,
-                                  req.DataSyncVec(shard_->core_id_),
-                                  req.ArchiveVec(shard_->core_id_),
-                                  req.MoveBaseIdxVec(shard_->core_id_),
+                                  req.DataSyncVec(),
+                                  req.ArchiveVec(),
+                                  req.MoveBaseIdxVec(),
                                   req.data_sync_ts_,
                                   recycle_ts,
                                   shard_->EnableMvcc(),
-                                  req.accumulated_scan_cnt_[shard_->core_id_],
+                                  req.accumulated_scan_cnt_,
                                   req.export_base_table_item_,
                                   req.export_base_table_item_only_,
                                   export_persisted_key_only,
                                   flush_size);
 
-                req.accumulated_flush_data_size_[shard_->core_id_] +=
-                    flush_size;
+                req.accumulated_flush_data_size_ += flush_size;
 
                 if (export_result.second)
                 {
@@ -5806,8 +5221,8 @@ class TemplateCcMap : public CcMap
             {
                 slice_pinned = false;
                 // Reach to the end of current slice. Move to the next slice.
-                req.MoveToNextSlice(shard_->core_id_);
-                if (!req.TheBatchEnd(shard_->core_id_))
+                req.MoveToNextSlice();
+                if (!req.TheBatchEnd())
                 {
                     search_start_key = slice_end_key;
                     std::tie(key_it, slice_end_it, slice_end_key) =
@@ -5817,9 +5232,8 @@ class TemplateCcMap : public CcMap
 
                     // If reach to the batch end, it means there are no slices
                     // that need to be scanned.
-                    slice_pinned = req.TheBatchEnd(shard_->core_id_)
-                                       ? false
-                                       : req.IsSlicePinned(shard_->core_id_);
+                    slice_pinned =
+                        req.TheBatchEnd() ? false : req.IsSlicePinned();
                     export_persisted_key_only =
                         !req.export_base_table_item_ && slice_pinned;
                 }
@@ -5830,7 +5244,7 @@ class TemplateCcMap : public CcMap
         // scan batch size, or reach to the end slice of the current batch
         // slices.
         assert((key_it != slice_end_it && key_it != slice_end_next_page_it) ||
-               req.TheBatchEnd(shard_->core_id_));
+               req.TheBatchEnd());
         // 4. Check whether the request is finished.
         TxKey next_pause_key;
         bool no_more_data =
@@ -5852,16 +5266,15 @@ class TemplateCcMap : public CcMap
 
         if (is_scan_mem_full)
         {
-            req.scan_heap_is_full_[shard_->core_id_] = 1;
+            req.scan_heap_is_full_ = 1;
         }
 
         if (is_scan_mem_full || no_more_data ||
-            req.accumulated_scan_cnt_[shard_->core_id_] >=
-                req.scan_batch_size_ ||
-            req.TheBatchEnd(shard_->core_id_))
+            req.accumulated_scan_cnt_ >= req.scan_batch_size_ ||
+            req.TheBatchEnd())
         {
             // Request is finished
-            req.SetFinish(shard_->core_id_);
+            req.SetFinish();
             return false;
         }
 
@@ -6858,37 +6271,33 @@ class TemplateCcMap : public CcMap
 
             offset += sizeof(uint8_t);
 
-            uint16_t core_id = (key.Hash() & 0x3FF) % shard_->core_cnt_;
-            if (core_id != shard_->core_id_)
-            {
-                // Skips the key in the log record that is not sharded
-                // to this core.
-                if (op_type == OperationType::Insert ||
-                    op_type == OperationType::Update)
-                {
-                    rec.Deserialize(log_blob.data(), offset);
-                }
-                if (shard_->core_id_ == req.FirstCore() ||
-                    (core_id != req.FirstCore() && core_id > shard_->core_id_))
-                {
-                    // Move to the smallest unvisited core id
-                    next_core = std::min(core_id, next_core);
-                }
-                continue;
-            }
+            uint16_t core_id = 0;
+            bool is_dirty = false;
+            bool need_update_size = true;
+            int32_t partition_id = -1;
 
-            // Skip records that no longer belong to this ng.
-            if (RangePartitioned)
+            if constexpr (RangePartitioned)
             {
                 const TableRangeEntry *range_entry = shard_->GetTableRangeEntry(
                     table_name_, cc_ng_id_, TxKey(&key));
+                if (range_entry == nullptr)
+                {
+                    // range metadata missing, conservative handling: only
+                    // consume value / skip.
+                    if (op_type == OperationType::Insert ||
+                        op_type == OperationType::Update)
+                    {
+                        rec.Deserialize(log_blob.data(), offset);
+                    }
+                    continue;
+                }
 
+                partition_id = range_entry->GetRangeInfo()->PartitionId();
                 const BucketInfo *bucket_info = shard_->GetBucketInfo(
-                    Sharder::MapRangeIdToBucketId(
-                        range_entry->GetRangeInfo()->PartitionId()),
-                    cc_ng_id_);
-                // Check if range bucket belongs to this ng or is migrating
-                // to this ng.
+                    Sharder::MapRangeIdToBucketId(partition_id), cc_ng_id_);
+
+                // Old range bucket does not belong to this ng, nor is it a
+                // "dirty bucket" migrating to this ng.
                 if (bucket_info->BucketOwner() != cc_ng_id_ &&
                     bucket_info->DirtyBucketOwner() != cc_ng_id_)
                 {
@@ -6901,20 +6310,60 @@ class TemplateCcMap : public CcMap
                     {
                         const BucketInfo *new_bucket_info =
                             shard_->GetBucketInfo(
-                                Sharder::MapRangeIdToBucketId(
-                                    range_entry->GetRangeInfo()->PartitionId()),
+                                Sharder::MapRangeIdToBucketId(new_range_id),
                                 cc_ng_id_);
                         if (new_bucket_info->BucketOwner() != cc_ng_id_ &&
                             new_bucket_info->DirtyBucketOwner() != cc_ng_id_)
                         {
+                            // Neither old bucket nor new bucket belongs to this
+                            // ng: only consume value and continue.
                             if (op_type != OperationType::Delete)
                             {
                                 rec.Deserialize(log_blob.data(), offset);
                             }
                             continue;
                         }
+
+                        // new range belongs to this ng: determine core based on
+                        // new_range_id and mark dirty.
+                        core_id = static_cast<uint16_t>((new_range_id & 0x3FF) %
+                                                        shard_->core_cnt_);
+                        is_dirty = true;
+
+                        uint64_t range_split_commit_ts =
+                            req.RangeSplitCommitTs(partition_id);
+                        // Only update range size for keys updated during the
+                        // double-write phase.
+                        need_update_size =
+                            (range_split_commit_ts == 0) ||
+                            (req.CommitTs() > range_split_commit_ts);
+                    }
+                    else
+                    {
+                        // new_range_id < 0: key still belongs to old range, but
+                        // old range bucket does not belong to this ng.
+                        // Semantically, it should not be applied to this ng:
+                        // only consume and continue.
+                        if (op_type != OperationType::Delete)
+                        {
+                            rec.Deserialize(log_blob.data(), offset);
+                        }
+                        continue;
                     }
                 }
+                else
+                {
+                    // Old range bucket belongs to this ng or is migrating to
+                    // this ng.
+                    core_id = static_cast<uint16_t>((partition_id & 0x3FF) %
+                                                    shard_->core_cnt_);
+                    is_dirty = range_entry->GetRangeInfo()->IsDirty();
+
+                    uint64_t range_split_commit_ts =
+                        req.RangeSplitCommitTs(partition_id);
+                    need_update_size = (range_split_commit_ts == 0) ||
+                                       (req.CommitTs() > range_split_commit_ts);
+                }
             }
             else
             {
@@ -6926,6 +6375,26 @@ class TemplateCcMap : public CcMap
                 {
                     continue;
                 }
+                core_id = static_cast<uint16_t>((key.Hash() & 0x3FF) %
+                                                shard_->core_cnt_);
+            }
+
+            if (core_id != shard_->core_id_)
+            {
+                // Skips the key in the log record that is not sharded
+                // to this core.
+                if (op_type == OperationType::Insert ||
+                    op_type == OperationType::Update)
+                {
+                    rec.Deserialize(log_blob.data(), offset);
+                }
+                if (shard_->core_id_ == req.FirstCore() ||
+                    (core_id != req.FirstCore() && core_id > shard_->core_id_))
+                {
+                    // Move to the smallest unvisited core id
+                    next_core = std::min(core_id, next_core);
+                }
+                continue;
             }
 
             Iterator it = FindEmplace(key);
@@ -7000,6 +6469,12 @@ class TemplateCcMap : public CcMap
                 {
                     cce->ArchiveBeforeUpdate();
                 }
+
+                [[maybe_unused]] const size_t old_payload_size =
+                    cce->PayloadSize();
+                [[maybe_unused]] const RecordStatus cce_old_status =
+                    cce->PayloadStatus();
+
                 RecordStatus rec_status;
                 if (op_type == OperationType::Insert ||
                     op_type == OperationType::Update)
@@ -7021,6 +6496,26 @@ class TemplateCcMap : public CcMap
                 cce->SetCommitTsPayloadStatus(commit_ts, rec_status);
                 OnCommittedUpdate(cce, was_dirty);
 
+                if constexpr (RangePartitioned)
+                {
+                    if (need_update_size)
+                    {
+                        int32_t delta_size =
+                            (rec_status == RecordStatus::Deleted)
+                                ? -static_cast<int32_t>(key.Size() +
+                                                        old_payload_size)
+                                : static_cast<int32_t>(
+                                      cce_old_status != RecordStatus::Normal
+                                          ? (key.Size() + cce->PayloadSize())
+                                          : (cce->PayloadSize() -
+                                             old_payload_size));
+
+                        UpdateRangeSize(static_cast<uint32_t>(partition_id),
+                                        delta_size,
+                                        is_dirty);
+                    }
+                }
+
                 if (commit_ts > last_dirty_commit_ts_)
                 {
                     last_dirty_commit_ts_ = commit_ts;
@@ -7205,9 +6700,9 @@ class TemplateCcMap : public CcMap
 
     bool Execute(FillStoreSliceCc &req) override
     {
-        std::deque<SliceDataItem> &slice_vec = req.SliceData(shard_->core_id_);
+        std::deque<SliceDataItem> &slice_vec = req.SliceData();
 
-        size_t index = req.NextIndex(shard_->core_id_);
+        size_t index = req.NextIndex();
         size_t last_index = std::min(index + FillStoreSliceCc::MaxScanBatchSize,
                                      slice_vec.size());
 
@@ -7224,11 +6719,12 @@ class TemplateCcMap : public CcMap
         if (index == slice_vec.size())
         {
             slice_vec.clear();
-            return req.SetFinish(shard_);
+            req.SetFinish(shard_);
+            return true;
         }
         else
         {
-            req.SetNextIndex(shard_->core_id_, index);
+            req.SetNextIndex(index);
             shard_->Enqueue(shard_->LocalCoreId(), &req);
             return false;
         }
@@ -7237,17 +6733,18 @@ class TemplateCcMap : public CcMap
     bool Execute(InitKeyCacheCc &req) override
     {
         Iterator map_it, map_end_it;
-        TxKey &resume_key = req.PauseKey(shard_->core_id_);
+        TxKey &resume_key = req.PauseKey();
         const KeyT *start_key = nullptr;
         if (!resume_key.KeyPtr())
         {
             // First time being processed.
-            if (req.Slice().IsValidInKeyCache(shard_->core_id_))
+            if (req.Slice().IsValidInKeyCache())
             {
                 // No need to init key cache.
-                return req.SetFinish(shard_->core_id_, true);
+                req.SetFinish(true);
+                return true;
             }
-            req.Slice().SetLoadingKeyCache(shard_->core_id_, true);
+            req.Slice().SetLoadingKeyCache(true);
             start_key = req.Slice().StartTxKey().GetKey<KeyT>();
         }
         else
@@ -7307,24 +6804,25 @@ class TemplateCcMap : public CcMap
                 continue;
             }
             const KeyT *key = map_it->first;
-            auto ret =
-                range->AddKey(*key, shard_->core_id_, &req.Slice(), true);
+            auto ret = range->AddKey(*key, &req.Slice(), true);
             if (ret == RangeSliceOpStatus::Error)
             {
                 // Stop immediately if one of the add key fails.
-                return req.SetFinish(shard_->core_id_, false);
+                req.SetFinish(false);
+                return true;
             }
         }
 
         if (map_it == map_end_it)
         {
-            return req.SetFinish(shard_->core_id_, true);
+            req.SetFinish(true);
+            return true;
         }
         else
         {
             // record pause position and resume in next round.
             TxKey pause_key(map_it->first);
-            req.SetPauseKey(pause_key, shard_->core_id_);
+            req.SetPauseKey(pause_key);
             shard_->Enqueue(&req);
             return false;
         }
@@ -7399,9 +6897,12 @@ class TemplateCcMap : public CcMap
         }
         LruPage *lru_page;
         uint16_t pause_idx = shard_->core_id_;
-        if (req.GetCleanType() == CleanType::CleanBucketData)
+        CleanType clean_type = req.GetCleanType();
+        if (clean_type == CleanType::CleanBucketData ||
+            clean_type == CleanType::CleanRangeData)
         {
-            // For clean bucket data, cc req is only sent to 1 core.
+            // For clean bucket data and range data, cc req is only sent to 1
+            // core.
             pause_idx = 0;
         }
         if (req.ResumeKey(pause_idx)->KeyPtr() != nullptr)
@@ -7501,8 +7002,8 @@ class TemplateCcMap : public CcMap
                                             : KeyT::PositiveInfinity();
 
         const KeyT *start_key =
-            req.paused_pos_[shard_->core_id_].KeyPtr() != nullptr
-                ? req.paused_pos_[shard_->core_id_].GetKey<KeyT>()
+            req.paused_pos_.KeyPtr() != nullptr
+                ? req.paused_pos_.GetKey<KeyT>()
                 : (req.end_key_ != nullptr ? req.start_key_->GetKey<KeyT>()
                                            : KeyT::NegativeInfinity());
 
@@ -7539,8 +7040,7 @@ class TemplateCcMap : public CcMap
                 curr_slice = range->FindSlice(*key);
                 it = deduce_iterator(*key);
                 end_it = deduce_iterator(*(curr_slice->EndKey()));
-                if ((!curr_slice->IsValidInKeyCache(shard_->core_id_) ||
-                     it == end_it) &&
+                if ((!curr_slice->IsValidInKeyCache() || it == end_it) &&
                     end_it != req_end_it)
                 {
                     // The slice is empty or the slice is invalid in key cache,
@@ -7549,7 +7049,7 @@ class TemplateCcMap : public CcMap
                     key = curr_slice->EndKey();
                     curr_slice = nullptr;
                 }
-                else if (!curr_slice->IsValidInKeyCache(shard_->core_id_) &&
+                else if (!curr_slice->IsValidInKeyCache() &&
                          end_it == req_end_it)
                 {
                     // Reach to the last slice, and the slice is invalid in key
@@ -7577,7 +7077,7 @@ class TemplateCcMap : public CcMap
             {
                 assert(cce->PayloadStatus() == RecordStatus::Normal ||
                        cce->PayloadStatus() == RecordStatus::Deleted);
-                range->DeleteKey(*cce_key, shard_->core_id_);
+                range->DeleteKey(*cce_key);
             }
 
             // Forward the iterator.
@@ -7593,12 +7093,13 @@ class TemplateCcMap : public CcMap
 
         if (key_it == slice_end_it)
         {
-            req.paused_pos_[shard_->core_id_] = TxKey();
-            return req.SetFinish();
+            req.paused_pos_ = TxKey();
+            req.SetFinish();
+            return true;
         }
         else
         {
-            req.paused_pos_[shard_->core_id_] = key_it->first->CloneTxKey();
+            req.paused_pos_ = key_it->first->CloneTxKey();
             shard_->Enqueue(&req);
             return false;
         }
@@ -7621,6 +7122,7 @@ class TemplateCcMap : public CcMap
         auto entry_tuples = req.EntryTuple();
         size_t batch_size = req.BatchSize();
         size_t start_key_index = req.StartKeyIndex();
+        const int32_t partition_id = req.PartitionId();
 
         const TxRecord *req_rec = nullptr;
 
@@ -7630,6 +7132,7 @@ class TemplateCcMap : public CcMap
         ValueT decoded_rec;
         uint64_t commit_ts = 0;
         RecordStatus rec_status = RecordStatus::Normal;
+        uint8_t range_size_flags = 0;
 
         auto &resume_pos = req.GetPausedPosition(shard_->core_id_);
         size_t key_pos = std::get<0>(resume_pos);
@@ -7637,6 +7140,7 @@ class TemplateCcMap : public CcMap
         size_t rec_offset = std::get<2>(resume_pos);
         size_t ts_offset = std::get<3>(resume_pos);
         size_t status_offset = std::get<4>(resume_pos);
+        size_t flags_offset = std::get<5>(resume_pos);
         size_t hash = 0;
 
         Iterator it;
@@ -7649,6 +7153,7 @@ class TemplateCcMap : public CcMap
         size_t next_rec_offset = 0;
         size_t next_ts_offset = 0;
         size_t next_status_offset = 0;
+        size_t next_flags_offset = 0;
         for (size_t cnt = 0;
              key_pos < batch_size && cnt < UploadBatchCc::UploadBatchBatchSize;
              ++key_pos, ++cnt)
@@ -7657,13 +7162,16 @@ class TemplateCcMap : public CcMap
             next_rec_offset = rec_offset;
             next_ts_offset = ts_offset;
             next_status_offset = status_offset;
+            next_flags_offset = flags_offset;
+
             if (entry_vec != nullptr)
             {
                 key_idx = start_key_index + key_pos;
-                // get key
-                key = entry_vec->at(key_idx)->key_.GetKey<KeyT>();
-                // get record
-                req_rec = entry_vec->at(key_idx)->rec_.get();
+                const auto &pair = entry_vec->at(key_idx);
+                range_size_flags = pair.first;
+                const WriteEntry *we = pair.second;
+                key = we->key_.GetKey<KeyT>();
+                req_rec = we->rec_.get();
                 if (req_rec)
                 {
                     rec_status = RecordStatus::Normal;
@@ -7675,11 +7183,12 @@ class TemplateCcMap : public CcMap
                     commit_val = nullptr;
                 }
                 // get commit ts
-                commit_ts = entry_vec->at(key_idx)->commit_ts_;
+                commit_ts = we->commit_ts_;
             }
             else
             {
-                auto [key_str, rec_str, ts_str, status_str] = *entry_tuples;
+                auto [key_str, rec_str, ts_str, status_str, flags_str] =
+                    *entry_tuples;
                 // deserialize key
                 decoded_key.Deserialize(
                     key_str.data(), next_key_offset, KeySchema());
@@ -7702,21 +7211,43 @@ class TemplateCcMap : public CcMap
                 // deserialize commit ts
                 commit_ts = *((uint64_t *) (ts_str.data() + next_ts_offset));
                 next_ts_offset += sizeof(uint64_t);
+                if (RangePartitioned)
+                {
+                    range_size_flags =
+                        static_cast<uint8_t>(flags_str[next_flags_offset]);
+                    next_flags_offset += sizeof(uint8_t);
+                }
             }
 
-            hash = key->Hash();
-            size_t core_idx = (hash & 0x3FF) % shard_->core_cnt_;
-            if (!(core_idx == shard_->core_id_) || commit_ts <= 1)
+            if (commit_ts <= 1)
             {
-                // Skip the key that does not belong to this core or
-                // commit ts does not greater than 1. Move to next key.
+                // skip the key that commit ts does not greater than 1.
                 key_offset = next_key_offset;
                 rec_offset = next_rec_offset;
                 ts_offset = next_ts_offset;
                 status_offset = next_status_offset;
+                if constexpr (RangePartitioned)
+                {
+                    flags_offset = next_flags_offset;
+                }
                 continue;
             }
 
+            if constexpr (!RangePartitioned)
+            {
+                hash = key->Hash();
+                size_t core_idx = (hash & 0x3FF) % shard_->core_cnt_;
+                if (core_idx != shard_->core_id_)
+                {
+                    // skip the key that does not belong to this core.
+                    key_offset = next_key_offset;
+                    rec_offset = next_rec_offset;
+                    ts_offset = next_ts_offset;
+                    status_offset = next_status_offset;
+                    continue;
+                }
+            }
+
             it = FindEmplace(*key);
             cce = it->second;
             cc_page = it.GetPage();
@@ -7748,9 +7279,14 @@ class TemplateCcMap : public CcMap
                 rec_offset = next_rec_offset;
                 ts_offset = next_ts_offset;
                 status_offset = next_status_offset;
+                if constexpr (RangePartitioned)
+                {
+                    flags_offset = next_flags_offset;
+                }
                 continue;
             }
 
+            [[maybe_unused]] const size_t old_payload_size = cce->PayloadSize();
             // Now, all versions of non-unique SecondaryIndex key shared
             // the unpack info in current version's payload, though the
             // unpack info will not be used for deleted key, we must not
@@ -7770,6 +7306,8 @@ class TemplateCcMap : public CcMap
             }
 
             bool was_dirty = cce->IsDirty();
+            [[maybe_unused]] const RecordStatus cce_old_status =
+                cce->PayloadStatus();
             cce->SetCommitTsPayloadStatus(commit_ts, rec_status);
             if (req.Kind() == UploadBatchType::DirtyBucketData)
             {
@@ -7783,6 +7321,43 @@ class TemplateCcMap : public CcMap
                 }
                 cce->SetCkptTs(commit_ts);
             }
+
+            if constexpr (RangePartitioned)
+            {
+                if ((range_size_flags >> 4) != 0)
+                {
+                    int32_t delta =
+                        (rec_status == RecordStatus::Deleted)
+                            ? -(static_cast<int32_t>(write_key->Size() +
+                                                     old_payload_size))
+                            : (cce_old_status != RecordStatus::Normal
+                                   ? static_cast<int32_t>(write_key->Size() +
+                                                          cce->PayloadSize())
+                                   : static_cast<int32_t>(cce->PayloadSize() -
+                                                          old_payload_size));
+                    bool need_split =
+                        UpdateRangeSize(static_cast<uint32_t>(partition_id),
+                                        delta,
+                                        (range_size_flags & 0x0F) != 0);
+                    if (need_split)
+                    {
+                        // Create a data sync task for the range.
+                        uint64_t data_sync_ts =
+                            std::chrono::duration_cast<
+                                std::chrono::microseconds>(
+                                std::chrono::high_resolution_clock::now()
+                                    .time_since_epoch())
+                                .count();
+                        shard_->CreateSplitRangeDataSyncTask(
+                            table_name_,
+                            cc_ng_id_,
+                            req.CcNgTerm(),
+                            static_cast<uint32_t>(partition_id),
+                            data_sync_ts);
+                    }
+                }
+            }
+
             OnCommittedUpdate(cce, was_dirty);
             OnFlushed(cce, was_dirty);
             DLOG_IF(INFO, TRACE_OCC_ERR)
@@ -7809,6 +7384,10 @@ class TemplateCcMap : public CcMap
             rec_offset = next_rec_offset;
             ts_offset = next_ts_offset;
             status_offset = next_status_offset;
+            if constexpr (RangePartitioned)
+            {
+                flags_offset = next_flags_offset;
+            }
         }
         if (key_pos < batch_size)
         {
@@ -7820,7 +7399,8 @@ class TemplateCcMap : public CcMap
                                   key_offset,
                                   rec_offset,
                                   ts_offset,
-                                  status_offset);
+                                  status_offset,
+                                  flags_offset);
             shard_->Enqueue(shard_->LocalCoreId(), &req);
             return false;
         }
@@ -7902,22 +7482,12 @@ class TemplateCcMap : public CcMap
             {
                 // Parsed all records
                 req.SetParsed();
-
-                // Emplace key on all cores
-                for (size_t core = 0; core < shard_->core_cnt_; ++core)
-                {
-                    if (core != shard_->core_id_)
-                    {
-                        shard_->Enqueue(shard_->core_id_, core, &req);
-                    }
-                }
             }
-
         }  // end-parsed
 
-        std::deque<SliceDataItem> &slice_vec = req.SliceData(shard_->core_id_);
+        std::deque<SliceDataItem> &slice_vec = req.SliceData();
 
-        size_t index = req.NextIndex(shard_->core_id_);
+        size_t index = req.NextIndex();
         size_t last_index = std::min(
             index + UploadBatchSlicesCc::MaxEmplaceBatchSize, slice_vec.size());
 
@@ -7953,7 +7523,7 @@ class TemplateCcMap : public CcMap
         else
         {
             index = last_index;
-            req.SetNextIndex(shard_->core_id_, index);
+            req.SetNextIndex(index);
             shard_->Enqueue(shard_->LocalCoreId(), &req);
         }
         return false;
@@ -8050,7 +7620,7 @@ class TemplateCcMap : public CcMap
         const KeyT *const req_start_key = req.StartTxKey().GetKey<KeyT>();
         const KeyT *const req_end_key = req.EndTxKey().GetKey<KeyT>();
 
-        auto &paused_position = req.PausedPos(shard_->core_id_);
+        auto &paused_position = req.PausedPos();
 
         bool is_dirty = req.IsDirty();
 
@@ -8121,8 +7691,7 @@ class TemplateCcMap : public CcMap
 
             slice_end_next_page_it = next_page_it(slice_end_it);
 
-            curr_slice_delta_size =
-                &(req.SliceDeltaSize(shard_->core_id_).back().second);
+            curr_slice_delta_size = &(req.SliceDeltaSize().back().second);
         }
 
         bool has_dml_since_ddl = false;
@@ -8324,8 +7893,7 @@ class TemplateCcMap : public CcMap
 
                         slice_end_next_page_it = next_page_it(slice_end_it);
 
-                        auto &slice_delta_size =
-                            req.SliceDeltaSize(shard_->core_id_);
+                        auto &slice_delta_size = req.SliceDeltaSize();
                         slice_delta_size.emplace_back(slice->StartTxKey(), 0);
                         curr_slice_delta_size = &slice_delta_size.back().second;
                     }
@@ -8771,6 +8339,10 @@ class TemplateCcMap : public CcMap
         }
 
         normal_obj_sz_ = 0;
+        if constexpr (RangePartitioned)
+        {
+            range_sizes_.clear();
+        }
         ccmp_.clear();
     }
 
@@ -10483,10 +10055,7 @@ class TemplateCcMap : public CcMap
                 // status, it should already be in the key cache. Only add it if
                 // it's in DELETED.
                 auto res = shard_->local_shards_.AddKeyToKeyCache(
-                    table_name_,
-                    cc_ng_id_,
-                    shard_->core_id_,
-                    *ccp->KeyOfEntry(cce));
+                    table_name_, cc_ng_id_, *ccp->KeyOfEntry(cce));
                 if (res == RangeSliceOpStatus::Retry)
                 {
                     // Retry if the slice key cache is being loaded.
@@ -11914,6 +11483,74 @@ class TemplateCcMap : public CcMap
         return &pos_inf_page_;
     }
 
+    bool UpdateRangeSize(uint32_t partition_id,
+                         int32_t delta_size,
+                         bool is_dirty)
+    {
+        if constexpr (RangePartitioned)
+        {
+            auto it = range_sizes_.find(partition_id);
+            if (it == range_sizes_.end())
+            {
+                it = range_sizes_
+                         .emplace(partition_id,
+                                  std::make_tuple(
+                                      static_cast<int32_t>(
+                                          RangeSizeStatus::kNotInitialized),
+                                      0,
+                                      false))
+                         .first;
+            }
+            if (std::get<0>(it->second) ==
+                    static_cast<int32_t>(RangeSizeStatus::kNotInitialized) &&
+                !is_dirty)
+            {
+                std::get<1>(it->second) += delta_size;
+                // Init the range size of this range.
+                std::get<0>(it->second) =
+                    static_cast<int32_t>(RangeSizeStatus::kLoading);
+
+                int64_t ng_term = Sharder::Instance().LeaderTerm(cc_ng_id_);
+                shard_->FetchTableRangeSize(table_name_,
+                                            static_cast<int32_t>(partition_id),
+                                            cc_ng_id_,
+                                            ng_term);
+                return false;
+            }
+
+            if (std::get<0>(it->second) ==
+                    static_cast<int32_t>(RangeSizeStatus::kLoading) ||
+                is_dirty)
+            {
+                // Loading or split: record delta in delta part (.second).
+                std::get<1>(it->second) += delta_size;
+            }
+            else
+            {
+                int32_t new_range_size = std::get<0>(it->second) + delta_size;
+                std::get<0>(it->second) =
+                    new_range_size > 0 ? new_range_size : 0;
+
+                bool trigger_split =
+                    !is_dirty && !std::get<2>(it->second) &&
+                    std::get<0>(it->second) >=
+                        static_cast<int32_t>(StoreRange::range_max_size);
+
+                DLOG_IF(INFO, trigger_split)
+                    << "Range size is too large, need to split. table: "
+                    << table_name_.StringView()
+                    << " partition: " << partition_id
+                    << " range size: " << std::get<0>(it->second)
+                    << " range max size: " << StoreRange::range_max_size;
+                std::get<2>(it->second) =
+                    trigger_split == true ? true : std::get<2>(it->second);
+                return trigger_split;
+            }
+        }  // RangePartitioned
+
+        return false;
+    }
+
     absl::btree_map<
         KeyT,
         std::unique_ptr<
@@ -11941,7 +11578,7 @@ void BackfillSnapshotForScanSlice(FetchSnapshotCc *fetch_cc,
     {
         TemplateScanCache<KeyT, ValueT> *scan_cache =
             static_cast<TemplateScanCache<KeyT, ValueT> *>(
-                req->GetLocalScanCache(core_id));
+                req->GetLocalScanCache());
         assert(scan_cache != nullptr);
         auto *scan_tuple = const_cast<TemplateScanTuple<KeyT, ValueT> *>(
             scan_cache->At(tuple_idx));
@@ -11960,8 +11597,7 @@ void BackfillSnapshotForScanSlice(FetchSnapshotCc *fetch_cc,
     }
     else
     {
-        RemoteScanSliceCache *remote_scan_cache =
-            req->GetRemoteScanCache(core_id);
+        RemoteScanSliceCache *remote_scan_cache = req->GetRemoteScanCache();
         assert(remote_scan_cache != nullptr);
         assert(remote_scan_cache->archive_records_.size() >= tuple_idx);
         auto &tmp_pair = remote_scan_cache->archive_positions_[tuple_idx];
@@ -11977,9 +11613,8 @@ void BackfillSnapshotForScanSlice(FetchSnapshotCc *fetch_cc,
     }
 
     // trigger request
-    req->DecreaseWaitForSnapshotCnt(core_id);
-    if (req->IsWaitForSnapshot(core_id) &&
-        req->WaitForSnapshotCnt(core_id) == 0)
+    req->DecreaseWaitForSnapshotCnt();
+    if (req->IsWaitForSnapshot() && req->WaitForSnapshotCnt() == 0)
     {
         shard.Enqueue(core_id, req);
     }
diff --git a/tx_service/include/data_sync_task.h b/tx_service/include/data_sync_task.h
index 06aa8d01..1c640f7b 100644
--- a/tx_service/include/data_sync_task.h
+++ b/tx_service/include/data_sync_task.h
@@ -138,7 +138,8 @@ struct DataSyncTask
         CcHandlerResult<Void> *hres,
         std::function<bool(size_t)> filter_lambda = nullptr,
         bool forward_cache = false,
-        bool is_standby_node_ckpt = false)
+        bool is_standby_node_ckpt = false,
+        bool high_priority = false)
         : table_name_(table_name),
           id_(id),
           range_version_(range_version),
@@ -152,7 +153,8 @@ struct DataSyncTask
           is_dirty_(is_dirty),
           sync_ts_adjustable_(need_adjust_ts),
           task_res_(hres),
-          need_update_ckpt_ts_(true)
+          need_update_ckpt_ts_(true),
+          high_priority_(high_priority)
     {
     }
 
@@ -180,6 +182,12 @@ struct DataSyncTask
     // flush data buffer.
     void SetScanTaskFinished();
 
+    // Once the range size reaches the threshold, a DataSyncTask is created to
+    // trigger the split range operation, and a flag is set indicating that the
+    // range has been split. This flag needs to be reset after the DataSyncTask
+    // completes.
+    void ResetRangeSplittingStatus();
+
     void SetErrorCode(CcErrorCode err_code)
     {
         std::unique_lock<std::mutex> lk(status_->mux_);
@@ -252,6 +260,7 @@ struct DataSyncTask
         cce_entries_;
 
     bool need_update_ckpt_ts_{true};
+    bool high_priority_{false};
 };
 
 struct FlushTaskEntry
diff --git a/tx_service/include/fault/log_replay_service.h b/tx_service/include/fault/log_replay_service.h
index e9fa2fc2..eb308a58 100644
--- a/tx_service/include/fault/log_replay_service.h
+++ b/tx_service/include/fault/log_replay_service.h
@@ -35,6 +35,7 @@
 #include <unordered_map>
 
 #include "txlog.h"
+#include "type.h"
 
 namespace txservice
 {
@@ -174,6 +175,17 @@ class RecoveryService : public brpc::StreamInputHandler,
 
     void ProcessRecoverTxTask(RecoverTxTask &task);
 
+    // Range split info management.
+    void SetSplitRangeInfo(uint32_t ng_id,
+                           TableName table_name,
+                           int32_t range_id,
+                           uint64_t commit_ts);
+
+    const std::unordered_map<TableName, std::unordered_map<int32_t, uint64_t>> *
+    GetSplitRangeInfo(uint32_t ng_id) const;
+
+    void CleanSplitRangeInfo(uint32_t ng_id);
+
     struct ConnectionInfo
     {
         ConnectionInfo() = default;
@@ -237,6 +249,13 @@ class RecoveryService : public brpc::StreamInputHandler,
     uint16_t port_;
 
     void ClearTx(uint64_t tx_number);
+
+    // Range split info for each node group:
+    // ng_id -> <data_table_name -> <partition id -> split range commit ts>>
+    std::unordered_map<
+        uint32_t,
+        std::unordered_map<TableName, std::unordered_map<int32_t, uint64_t>>>
+        split_range_info_;
 };
 }  // namespace fault
 }  // namespace txservice
diff --git a/tx_service/include/proto/cc_request.proto b/tx_service/include/proto/cc_request.proto
index 889d8259..d9d722ec 100644
--- a/tx_service/include/proto/cc_request.proto
+++ b/tx_service/include/proto/cc_request.proto
@@ -176,6 +176,10 @@ message UploadBatchRequest
     bytes commit_ts = 9;
     bytes rec_status = 10;
     UploadBatchKind kind = 11;
+    // Target range partition;
+    int32 partition_id = 12;
+    // Per-key one byte: [uint8_t, ...]
+    bytes range_size_flags = 13;
 }
 
 message UploadBatchSlicesRequest
@@ -920,6 +924,8 @@ message PostCommitRequest {
     bytes record = 5;
     uint32 operation_type = 6;
     uint32 key_shard_code = 7;
+    int32 partition_id = 8;
+    bool on_dirty_range = 9;
 }
 
 message ForwardPostCommitRequest {
@@ -1088,7 +1094,7 @@ message ScanSliceRequest {
     bool end_inclusive = 11;
     bool is_forward = 12;
     uint64 ts = 13;
-    repeated uint64 prior_cce_lock_vec = 14;
+    uint64 prior_cce_lock = 14;
     IsolationType iso_level = 15;
     CcProtocolType protocol = 16;
     bool is_for_write = 17;
@@ -1105,6 +1111,7 @@ message ScanSliceResponse {
     int64 tx_term = 3;
     uint32 command_id=4;
     int32 error_code = 5;
+    uint32 core_id = 16;
     bytes tuple_cnt = 6;
     bytes last_key = 7;
     SlicePosition slice_position = 8;
@@ -1115,9 +1122,6 @@ message ScanSliceResponse {
     bytes gap_ts = 13;
     bytes cce_lock_ptr = 14;
     bytes term = 15;
-    bytes key_start_offsets = 16;
-    bytes record_start_offsets = 17;
-    bytes trailing_cnts = 18;
     uint64 txm_addr = 19;
 }
 
diff --git a/tx_service/include/read_write_entry.h b/tx_service/include/read_write_entry.h
index 4d86c34c..36463be1 100644
--- a/tx_service/include/read_write_entry.h
+++ b/tx_service/include/read_write_entry.h
@@ -49,17 +49,25 @@ struct WriteSetEntry
           op_(other.op_),
           cce_addr_(other.cce_addr_),
           key_shard_code_(other.key_shard_code_),
-          forward_addr_(std::move(other.forward_addr_))
+          partition_id_(other.partition_id_),
+          forward_addr_(std::move(other.forward_addr_)),
+          on_dirty_range_(other.on_dirty_range_)
     {
     }
 
     WriteSetEntry &operator=(WriteSetEntry &&other) noexcept
     {
+        if (this == &other)
+        {
+            return *this;
+        }
         rec_ = std::move(other.rec_);
         op_ = other.op_;
         cce_addr_ = other.cce_addr_;
         key_shard_code_ = other.key_shard_code_;
+        partition_id_ = other.partition_id_;
         forward_addr_ = std::move(other.forward_addr_);
+        on_dirty_range_ = other.on_dirty_range_;
 
         return *this;
     }
@@ -68,8 +76,11 @@ struct WriteSetEntry
     OperationType op_;
     CcEntryAddr cce_addr_;
     uint32_t key_shard_code_{};
+    int32_t partition_id_{-1};
     // Used in double write scenarios during online DDL.
-    std::unordered_map<uint32_t, CcEntryAddr> forward_addr_;
+    // key shard code -> (partition id, cce addr)
+    std::unordered_map<uint32_t, std::pair<int32_t, CcEntryAddr>> forward_addr_;
+    bool on_dirty_range_{false};
 };
 
 /**
diff --git a/tx_service/include/remote/remote_cc_handler.h b/tx_service/include/remote/remote_cc_handler.h
index 83695f21..b7c43cdd 100644
--- a/tx_service/include/remote/remote_cc_handler.h
+++ b/tx_service/include/remote/remote_cc_handler.h
@@ -84,7 +84,9 @@ class RemoteCcHandler
                    const TxRecord *record,
                    OperationType operation_type,
                    uint32_t key_shard_code,
-                   CcHandlerResult<PostProcessResult> &hres);
+                   CcHandlerResult<PostProcessResult> &hres,
+                   int32_t partition_id = -1,
+                   bool on_dirty_range = false);
 
     void PostWriteAll(uint32_t src_node_id,
                       const TableName &table_name,
diff --git a/tx_service/include/remote/remote_cc_request.h b/tx_service/include/remote/remote_cc_request.h
index b59af76b..1c0c2604 100644
--- a/tx_service/include/remote/remote_cc_request.h
+++ b/tx_service/include/remote/remote_cc_request.h
@@ -763,7 +763,7 @@ struct RemoteScanSlice : public ScanSliceCc
 {
 public:
     RemoteScanSlice();
-    void Reset(std::unique_ptr<CcMessage> input_msg, uint16_t core_cnt);
+    void Reset(std::unique_ptr<CcMessage> input_msg);
 
 private:
     ScanSliceResponse output_msg_;
@@ -773,7 +773,7 @@ struct RemoteScanSlice : public ScanSliceCc
     TableName remote_tbl_name_{
         empty_sv, TableType::Primary, txservice::TableEngine::None};
     CcHandlerResult<RangeScanSliceResult> cc_res_{nullptr};
-    std::vector<RemoteScanSliceCache> scan_cache_vec_;
+    RemoteScanSliceCache scan_cache_;
 };
 
 struct RemoteReloadCacheCc : public ReloadCacheCc
diff --git a/tx_service/include/sk_generator.h b/tx_service/include/sk_generator.h
index 050d6b27..b33941e8 100644
--- a/tx_service/include/sk_generator.h
+++ b/tx_service/include/sk_generator.h
@@ -40,8 +40,11 @@ class UploadIndexContext
 public:
     using TableIndexSet =
         std::unordered_map<TableName, std::vector<WriteEntry>>;
-    using NGIndexSet =
-        std::unordered_map<NodeGroupId, std::vector<WriteEntry *>>;
+    // ng_id -> (range_id -> vector of (range_size_flags, WriteEntry*))
+    using NGIndexSet = std::unordered_map<
+        NodeGroupId,
+        std::unordered_map<int32_t,
+                           std::vector<std::pair<uint8_t, WriteEntry *>>>>;
 
 private:
     enum struct UploadTaskStatus
@@ -101,16 +104,18 @@ class UploadIndexContext
     CcErrorCode UploadEncodedIndex(UploadIndexTask &upload_task);
     CcErrorCode UploadIndexInternal(
         std::unordered_map<TableName, NGIndexSet> &ng_index_set);
-    void SendIndexes(const TableName &table_name,
-                     NodeGroupId dest_ng_id,
-                     int64_t &ng_term,
-                     const std::vector<WriteEntry *> &write_entry_vec,
-                     size_t batch_size,
-                     size_t start_key_idx,
-                     bthread::Mutex &req_mux,
-                     bthread::ConditionVariable &req_cv,
-                     size_t &finished_req_cnt,
-                     CcErrorCode &res_code);
+    void SendIndexes(
+        const TableName &table_name,
+        NodeGroupId dest_ng_id,
+        int64_t &ng_term,
+        int32_t partition_id,
+        const std::vector<std::pair<uint8_t, WriteEntry *>> &write_entry_vec,
+        size_t batch_size,
+        size_t start_key_idx,
+        bthread::Mutex &req_mux,
+        bthread::ConditionVariable &req_cv,
+        size_t &finished_req_cnt,
+        CcErrorCode &res_code);
     // Acquire and release range read lock.
     CcErrorCode AcquireRangeReadLocks(
         TransactionExecution *acq_lock_txm,
diff --git a/tx_service/include/store/data_store_handler.h b/tx_service/include/store/data_store_handler.h
index d0ca96d8..4059431a 100644
--- a/tx_service/include/store/data_store_handler.h
+++ b/tx_service/include/store/data_store_handler.h
@@ -135,6 +135,8 @@ class DataStoreHandler
 
     virtual void FetchRangeSlices(FetchRangeSlicesReq *fetch_cc) = 0;
 
+    virtual void FetchTableRangeSize(FetchTableRangeSizeCc *fetch_cc) = 0;
+
     /**
      * @brief Read a row from base table or skindex table in datastore with
      * specified key. Caller should pass in complete primary key or skindex key.
diff --git a/tx_service/include/tx_operation_result.h b/tx_service/include/tx_operation_result.h
index d31492dc..b03417fa 100644
--- a/tx_service/include/tx_operation_result.h
+++ b/tx_service/include/tx_operation_result.h
@@ -447,11 +447,8 @@ struct RemoteScanSliceCache
     static constexpr size_t MetaDataSize = 8;
     static constexpr size_t DefaultCacheMaxBytes = 10 * 1024 * 1024;
 
-    RemoteScanSliceCache(uint16_t shard_cnt)
-        : cache_mem_size_(0),
-          mem_max_bytes_(DefaultCacheMaxBytes),
-          shard_cnt_(shard_cnt),
-          trailing_cnt_(0)
+    RemoteScanSliceCache()
+        : cache_mem_size_(0), mem_max_bytes_(DefaultCacheMaxBytes)
     {
     }
 
@@ -465,7 +462,7 @@ struct RemoteScanSliceCache
         mem_max_bytes_ = max_bytes;
     }
 
-    void Reset(uint16_t shard_cnt)
+    void Reset()
     {
         key_ts_.clear();
         gap_ts_.clear();
@@ -476,26 +473,19 @@ struct RemoteScanSliceCache
         keys_.clear();
         records_.clear();
         cache_mem_size_ = 0;
-        trailing_cnt_ = 0;
         mem_max_bytes_ = DefaultCacheMaxBytes;
-        shard_cnt_ = shard_cnt;
         archive_positions_.clear();
         archive_records_.clear();
     }
 
-    void RemoveLast()
-    {
-        trailing_cnt_++;
-    }
-
     uint64_t LastCce()
     {
-        return cce_ptr_.at(cce_ptr_.size() - 1 - trailing_cnt_);
+        return cce_ptr_.at(cce_ptr_.size() - 1);
     }
 
     size_t Size() const
     {
-        return cce_ptr_.size() - trailing_cnt_;
+        return cce_ptr_.size();
     }
 
     void SetLastCceLock(uint64_t lock_ptr)
@@ -514,8 +504,6 @@ struct RemoteScanSliceCache
     std::string records_;
     uint32_t cache_mem_size_;
     uint32_t mem_max_bytes_;
-    uint16_t shard_cnt_;
-    size_t trailing_cnt_;
 
     // The first element of archive_positions_ is the index of key_ts_ to
     // backfill and the second element is the position in records_ to be
@@ -531,8 +519,7 @@ struct RangeScanSliceResult
           slice_position_(SlicePosition::FirstSlice),
           cc_ng_id_(0),
           ccm_scanner_(nullptr),
-          is_local_(true),
-          last_key_status_(LastKeySetStatus::Unset)
+          is_local_(true)
     {
     }
 
@@ -541,8 +528,7 @@ struct RangeScanSliceResult
           slice_position_(status),
           cc_ng_id_(0),
           ccm_scanner_(nullptr),
-          is_local_(true),
-          last_key_status_(LastKeySetStatus::Setup)
+          is_local_(true)
     {
     }
 
@@ -550,8 +536,7 @@ struct RangeScanSliceResult
         : last_key_(std::move(rhs.last_key_)),
           slice_position_(rhs.slice_position_),
           cc_ng_id_(rhs.cc_ng_id_),
-          is_local_(rhs.is_local_),
-          last_key_status_(rhs.last_key_status_.load(std::memory_order_acquire))
+          is_local_(rhs.is_local_)
     {
         if (rhs.is_local_)
         {
@@ -576,9 +561,6 @@ struct RangeScanSliceResult
         slice_position_ = rhs.slice_position_;
         is_local_ = rhs.is_local_;
         cc_ng_id_ = rhs.cc_ng_id_;
-        last_key_status_.store(
-            rhs.last_key_status_.load(std::memory_order_acquire),
-            std::memory_order_release);
 
         if (rhs.is_local_)
         {
@@ -594,85 +576,47 @@ struct RangeScanSliceResult
 
     void Reset()
     {
-        last_key_status_.store(LastKeySetStatus::Unset,
-                               std::memory_order_release);
         last_key_ = TxKey();
     }
 
     const TxKey *SetLastKey(TxKey key)
     {
-        assert(last_key_status_.load(std::memory_order_acquire) ==
-               LastKeySetStatus::Unset);
         last_key_ = std::move(key);
-        last_key_status_.store(LastKeySetStatus::Setup,
-                               std::memory_order_release);
-
         return &last_key_;
     }
 
     template <typename KeyT>
-    std::pair<const KeyT *, bool> UpdateLastKey(const KeyT *key,
-                                                SlicePosition slice_pos)
+    void SetLastKey(const KeyT *key, SlicePosition slice_pos)
     {
-        bool success = false;
+        slice_position_ = slice_pos;
 
-        LastKeySetStatus actual = LastKeySetStatus::Unset;
-        if (last_key_status_.compare_exchange_strong(
-                actual, LastKeySetStatus::Setting, std::memory_order_acq_rel))
+        // If the slice position is the last or the first, this is the last
+        // scan batch, which must end with positive/negative infinity or the
+        // request's end key. In both cases, the input key is a valid
+        // reference throughout the lifetime of RangeScanSliceResult. So,
+        // the tx key does not own a new copy of the input key.
+        if (slice_pos == SlicePosition::FirstSlice ||
+            slice_pos == SlicePosition::LastSlice)
         {
-            slice_position_ = slice_pos;
-
-            // If the slice position is the last or the first, this is the last
-            // scan batch, which must end with positive/negative infinity or the
-            // request's end key. In both cases, the input key is a valid
-            // reference throughout the lifetime of RangeScanSliceResult. So,
-            // the tx key does not own a new copy of the input key.
-            if (slice_pos == SlicePosition::FirstSlice ||
-                slice_pos == SlicePosition::LastSlice)
-            {
-                last_key_ = TxKey(key);
-            }
-            else
-            {
-                last_key_ = key->CloneTxKey();
-            }
-
-            last_key_status_.store(LastKeySetStatus::Setup,
-                                   std::memory_order_release);
-            success = true;
+            last_key_ = TxKey(key);
         }
         else
         {
-            if (actual != LastKeySetStatus::Setup)
-            {
-                while (last_key_status_.load(std::memory_order_acquire) !=
-                       LastKeySetStatus::Setup)
-                {
-                    // Busy poll.
-                }
-            }
+            last_key_ = key->CloneTxKey();
         }
-
-        return {last_key_.GetKey<KeyT>(), success};
     }
 
-    std::pair<const TxKey *, bool> PeekLastKey() const
+    const TxKey *LastKey() const
     {
-        if (last_key_status_.load(std::memory_order_acquire) ==
-            LastKeySetStatus::Setup)
-        {
-            return {&last_key_, true};
-        }
-        else
+        if (last_key_.KeyPtr() != nullptr)
         {
-            return {nullptr, false};
+            return &last_key_;
         }
+        return nullptr;
     }
 
     TxKey MoveLastKey()
     {
-        last_key_status_.store(LastKeySetStatus::Unset,
-                               std::memory_order_release);
         return std::move(last_key_);
     }
 
@@ -691,23 +635,9 @@ struct RangeScanSliceResult
     union
     {
         CcScanner *ccm_scanner_;
-        std::vector<RemoteScanSliceCache> *remote_scan_caches_;
+        RemoteScanSliceCache *remote_scan_caches_;
     };
     bool is_local_{true};
-
-    /**
-     * For scene like: (1-write, n-read), atomic variable has obvious
-     * performance advantage over mutex/shared_mutex. For readers, mutex needs
-     * to modify a flag, and shared_mutex needs to modify a counter. However,
-     * atomic variable merely load a variable.
-     */
-    enum struct LastKeySetStatus : uint8_t
-    {
-        Unset,
-        Setting,
-        Setup,
-    };
-    std::atomic<LastKeySetStatus> last_key_status_;
 };
 
 struct BucketScanProgress
diff --git a/tx_service/include/type.h b/tx_service/include/type.h
index 2fe288c5..566e4171 100644
--- a/tx_service/include/type.h
+++ b/tx_service/include/type.h
@@ -167,6 +167,13 @@ enum class TableEngine : uint8_t
     InternalHash = 5,  // eg. Sequence table is a kind of internal hash table.
 };
 
+// Status values for range_sizes_.first (range size not yet known).
+enum RangeSizeStatus : int32_t
+{
+    kNotInitialized = -2,  // Range size not yet initialized; need to fetch.
+    kLoading = -1,         // Range size is being loaded; delta goes to .second.
+};
+
 inline std::string KvTablePrefixOf(TableEngine engine)
 {
     switch (engine)
diff --git a/tx_service/src/cc/cc_map.cpp b/tx_service/src/cc/cc_map.cpp
index 52443b45..ede1962c 100644
--- a/tx_service/src/cc/cc_map.cpp
+++ b/tx_service/src/cc/cc_map.cpp
@@ -27,6 +27,7 @@
 #include "cc/local_cc_shards.h"
 #include "cc_entry.h"
 #include "tx_trace.h"
+#include "type.h"
 
 namespace txservice
 {
@@ -461,4 +462,57 @@ void CcMap::DecrReadIntent(NonBlockingLock *lock,
     }
 }
 
+bool CcMap::InitRangeSize(uint32_t partition_id,
+                          int32_t persisted_size,
+                          bool succeed,
+                          bool emplace)
+{
+    auto it = range_sizes_.find(partition_id);
+    if (it == range_sizes_.end())
+    {
+        if (!emplace)
+        {
+            return false;
+        }
+        it = range_sizes_.emplace(partition_id, std::make_tuple(0, 0, false))
+                 .first;
+    }
+
+    if (succeed)
+    {
+        int32_t final_size = persisted_size + std::get<1>(it->second);
+        std::get<0>(it->second) = final_size < 0 ? 0 : final_size;
+        std::get<1>(it->second) = 0;
+
+        bool trigger_split =
+            !std::get<2>(it->second) &&
+            std::get<0>(it->second) >=
+                static_cast<int32_t>(StoreRange::range_max_size);
+        std::get<2>(it->second) =
+            trigger_split == true ? true : std::get<2>(it->second);
+        return trigger_split;
+    }
+    else
+    {
+        // Load range size failed; reset to not-initialized for retry.
+        std::get<0>(it->second) =
+            static_cast<int32_t>(RangeSizeStatus::kNotInitialized);
+    }
+    return false;
+}
+
+void CcMap::ResetRangeStatus(uint32_t partition_id)
+{
+    auto it = range_sizes_.find(partition_id);
+    if (it == range_sizes_.end())
+    {
+        return;
+    }
+    std::get<2>(it->second) = false;
+
+    DLOG(INFO) << "ResetRangeStatus: table: " << table_name_.StringView()
+               << " partition: " << partition_id
+               << " status: " << std::boolalpha << std::get<2>(it->second);
+}
+
 }  // namespace txservice
diff --git a/tx_service/src/cc/cc_req_misc.cpp b/tx_service/src/cc/cc_req_misc.cpp
index eae335c7..2d6dbf31 100644
--- a/tx_service/src/cc/cc_req_misc.cpp
+++ b/tx_service/src/cc/cc_req_misc.cpp
@@ -509,27 +509,20 @@ bool ClearCcNodeGroup::Execute(CcShard &ccs)
     return false;
 }
 
-bool InitKeyCacheCc::SetFinish(uint16_t core, bool succ)
+void InitKeyCacheCc::SetFinish(bool succ)
 {
     if (succ)
     {
-        slice_->SetKeyCacheValidity(core, succ);
+        slice_->SetKeyCacheValidity(succ);
     }
-    slice_->SetLoadingKeyCache(core, false);
+    slice_->SetLoadingKeyCache(false);
 
-    if (unfinished_cnt_.fetch_sub(1, std::memory_order_relaxed) == 1)
-    {
-        pause_pos_.clear();
-
-        // Unpin the slice.
-        range_->UnpinSlice(slice_, true);
-        std::unique_lock<std::mutex> slice_lk(slice_->slice_mux_);
-        slice_->init_key_cache_cc_ = nullptr;
-
-        return true;
-    }
+    pause_pos_ = TxKey();
 
-    return false;
+    // Unpin the slice.
+    range_->UnpinSlice(slice_, true);
+    std::unique_lock<std::mutex> slice_lk(slice_->slice_mux_);
+    slice_->init_key_cache_cc_ = nullptr;
 }
 
 bool InitKeyCacheCc::Execute(CcShard &ccs)
@@ -538,15 +531,15 @@ bool InitKeyCacheCc::Execute(CcShard &ccs)
     int64_t cc_ng_term = Sharder::Instance().LeaderTerm(ng_id_);
     if (std::max(cc_ng_candid_term, cc_ng_term) != term_)
     {
-        return SetFinish(ccs.core_id_, false);
+        SetFinish(false);
+        return true;
     }
 
     CcMap *ccm = ccs.GetCcm(tbl_name_, ng_id_);
     if (ccm == nullptr)
     {
-        // ccm is empty when slice is fully cached. That means this slice is
-        // empty on this core.
-        return SetFinish(ccs.core_id_, true);
+        SetFinish(true);
+        return true;
     }
 
     return ccm->Execute(*this);
@@ -561,14 +554,14 @@ StoreSlice &InitKeyCacheCc::Slice()
     return *slice_;
 }
 
-void InitKeyCacheCc::SetPauseKey(TxKey &key, uint16_t core_id)
+void InitKeyCacheCc::SetPauseKey(TxKey &key)
 {
-    pause_pos_[core_id] = key.Clone();
+    pause_pos_ = key.Clone();
 }
 
-TxKey &InitKeyCacheCc::PauseKey(uint16_t core_id)
+TxKey &InitKeyCacheCc::PauseKey()
 {
-    return pause_pos_[core_id];
+    return pause_pos_;
 }
 
 void FillStoreSliceCc::Reset(const TableName &table_name,
@@ -590,14 +583,9 @@ void FillStoreSliceCc::Reset(const TableName &table_name,
     cc_ng_id_ = cc_ng_id;
     cc_ng_term_ = cc_ng_term;
     force_load_ = force_load;
-    finish_cnt_ = 0;
-    core_cnt_ = cc_shards.Count();
 
-    next_idxs_.clear();
-    next_idxs_.resize(cc_shards.Count(), 0);
-
-    partitioned_slice_data_.clear();
-    partitioned_slice_data_.resize(cc_shards.Count());
+    next_idx_ = 0;
+    slice_data_.clear();
 
     range_slice_ = slice;
     range_ = range;
@@ -619,7 +607,7 @@ void FillStoreSliceCc::SetKvFinish(bool success)
 {
     CODE_FAULT_INJECTOR("LoadRangeSliceRequest_SetFinish_Error", {
         success = false;
-        partitioned_slice_data_.clear();
+        slice_data_.clear();
         slice_size_ = 0;
         snapshot_ts_ = 0;
     });
@@ -656,7 +644,8 @@ bool FillStoreSliceCc::Execute(CcShard &ccs)
     int64_t cc_ng_term = Sharder::Instance().LeaderTerm(cc_ng_id_);
     if (std::max(cc_ng_candid_term, cc_ng_term) != cc_ng_term_)
     {
-        return SetError(CcErrorCode::NG_TERM_CHANGED);
+        SetError(CcErrorCode::NG_TERM_CHANGED);
+        return true;
     }
 
     CcMap *ccm = ccs.GetCcm(*table_name_, cc_ng_id_);
@@ -705,106 +694,65 @@ void FillStoreSliceCc::AddDataItem(
         rec_cnt_++;
     }
 
-    size_t hash = key.Hash();
-    // Uses the lower 10 bits of the hash code to shard the key across
-    // CPU cores at this node.
-    uint16_t core_code = hash & 0x3FF;
-    uint16_t core_id = core_code % core_cnt_;
-
-    partitioned_slice_data_[core_id].emplace_back(
+    slice_data_.emplace_back(
         std::move(key), std::move(record), version_ts, is_deleted);
 }
 
-bool FillStoreSliceCc::SetFinish(CcShard *cc_shard)
+void FillStoreSliceCc::SetFinish(CcShard *cc_shard)
 {
-    bool finish_all = false;
-    CcErrorCode err_code;
+    if (err_code_ == CcErrorCode::NO_ERROR)
     {
-        std::lock_guard<std::mutex> lk(mux_);
-        ++finish_cnt_;
-
-        if (finish_cnt_ == core_cnt_)
+        bool init_key_cache =
+            txservice_enable_key_cache && table_name_->IsBase();
+        // Cache  the pointer since FillStoreSliceCc will be freed after
+        // CommitLoading.
+
+        const TableName *tbl_name = table_name_;
+        auto cc_ng_id = cc_ng_id_;
+        auto cc_ng_term = cc_ng_term_;
+        if (init_key_cache && rec_cnt_ > 0)
         {
-            finish_all = true;
-            err_code = err_code_;
-        }
-    }
+            LocalCcShards *shards = Sharder::Instance().GetLocalCcShards();
+            size_t estimate_rec_size = UINT64_MAX;
 
-    if (finish_all)
-    {
-        if (err_code == CcErrorCode::NO_ERROR)
-        {
-            bool init_key_cache =
-                txservice_enable_key_cache && table_name_->IsBase();
-            // Cache  the pointer since FillStoreSliceCc will be freed after
-            // CommitLoading.
-
-            const TableName *tbl_name = table_name_;
-            auto cc_ng_id = cc_ng_id_;
-            auto cc_ng_term = cc_ng_term_;
-            if (init_key_cache && rec_cnt_ > 0)
-            {
-                LocalCcShards *shards = Sharder::Instance().GetLocalCcShards();
-                size_t estimate_rec_size = UINT64_MAX;
-
-                // Get estiamte record size for key cache
-                auto schema = shards->GetSharedTableSchema(
-                    TableName(table_name_->GetBaseTableNameSV(),
-                              TableType::Primary,
-                              table_name_->Engine()),
-                    cc_ng_id_);
-                auto stats = schema->StatisticsObject();
-                assert(slice_size_ > 0);
-                estimate_rec_size = slice_size_ / rec_cnt_;
-                if (stats)
-                {
-                    // Update estimate size in table stats with the loaded
-                    // slice.
-                    stats->SetEstimateRecordSize(estimate_rec_size);
-                }
-            }
-            range_slice_->CommitLoading(*range_, slice_size_);
-            if (init_key_cache)
+            // Get estiamte record size for key cache
+            auto schema = shards->GetSharedTableSchema(
+                TableName(table_name_->GetBaseTableNameSV(),
+                          TableType::Primary,
+                          table_name_->Engine()),
+                cc_ng_id_);
+            auto stats = schema->StatisticsObject();
+            assert(slice_size_ > 0);
+            estimate_rec_size = slice_size_ / rec_cnt_;
+            if (stats)
             {
-                range_slice_->InitKeyCache(
-                    cc_shard, range_, tbl_name, cc_ng_id, cc_ng_term);
+                // Update estimate size in table stats with the loaded
+                // slice.
+                stats->SetEstimateRecordSize(estimate_rec_size);
             }
         }
-        else
-        {
-            range_slice_->SetLoadingError(*range_, err_code);
-        }
-
-        next_idxs_.clear();
-        partitioned_slice_data_.clear();
-    }
-
-    return finish_all;
-}
-
-bool FillStoreSliceCc::SetError(CcErrorCode err_code)
-{
-    bool finish_all = false;
-    {
-        std::lock_guard<std::mutex> lk(mux_);
-        ++finish_cnt_;
-        err_code_ = err_code;
-
-        if (finish_cnt_ == core_cnt_)
+        range_slice_->CommitLoading(*range_, slice_size_);
+        if (init_key_cache)
         {
-            finish_all = true;
+            range_slice_->InitKeyCache(
+                cc_shard, range_, tbl_name, cc_ng_id, cc_ng_term);
         }
     }
-
-    if (finish_all)
+    else
     {
         range_slice_->SetLoadingError(*range_, err_code_);
-
-        next_idxs_.clear();
-        partitioned_slice_data_.clear();
     }
 
-    return finish_all;
+    next_idx_ = 0;
+    slice_data_.clear();
+}
+
+void FillStoreSliceCc::SetError(CcErrorCode err_code)
+{
+    err_code_ = err_code;
+    range_slice_->SetLoadingError(*range_, err_code_);
+    next_idx_ = 0;
+    slice_data_.clear();
 }
 
 void FillStoreSliceCc::StartFilling()
@@ -818,8 +766,14 @@ void FillStoreSliceCc::TerminateFilling()
     // The slice has not been filled into memory. So, the out-of-memory flag is
     // false.
     range_slice_->SetLoadingError(*range_, CcErrorCode::DATA_STORE_ERR);
-    next_idxs_.clear();
-    partitioned_slice_data_.clear();
+    next_idx_ = 0;
+    slice_data_.clear();
+}
+
+int32_t FillStoreSliceCc::PartitionId() const
+{
+    assert(range_ != nullptr);
+    return range_->PartitionId();
 }
 
 FetchRecordCc::FetchRecordCc(const TableName *tbl_name,
@@ -1535,4 +1489,63 @@ bool ShardCleanCc::Execute(CcShard &ccs)
     }
 }
 
+void FetchTableRangeSizeCc::Reset(const TableName &table_name,
+                                  int32_t partition_id,
+                                  const TxKey &start_key,
+                                  CcShard *ccs,
+                                  NodeGroupId ng_id,
+                                  int64_t ng_term)
+{
+    table_name_ = &table_name;
+    partition_id_ = partition_id;
+    start_key_ = start_key.GetShallowCopy();
+    node_group_id_ = ng_id;
+    node_group_term_ = ng_term;
+    ccs_ = ccs;
+    error_code_ = 0;
+    store_range_size_ = 0;
+}
+
+bool FetchTableRangeSizeCc::ValidTermCheck()
+{
+    int64_t ng_leader_term = Sharder::Instance().LeaderTerm(node_group_id_);
+    return ng_leader_term == node_group_term_;
+}
+
+bool FetchTableRangeSizeCc::Execute(CcShard &ccs)
+{
+    if (!ValidTermCheck())
+    {
+        error_code_ = static_cast<uint32_t>(CcErrorCode::NG_TERM_CHANGED);
+    }
+
+    bool succ = (error_code_ == 0);
+    CcMap *ccm = ccs.GetCcm(*table_name_, node_group_id_);
+    if (ccm == nullptr)
+    {
+        assert(error_code_ != 0);
+        return true;
+    }
+    bool need_split = ccm->InitRangeSize(
+        static_cast<uint32_t>(partition_id_), store_range_size_, succ);
+
+    if (need_split)
+    {
+        uint64_t data_sync_ts = ccs.local_shards_.ClockTs();
+        ccs.CreateSplitRangeDataSyncTask(*table_name_,
+                                         node_group_id_,
+                                         node_group_term_,
+                                         partition_id_,
+                                         data_sync_ts);
+    }
+
+    return true;
+}
+
+void FetchTableRangeSizeCc::SetFinish(uint32_t error)
+{
+    error_code_ = error;
+    ccs_->Enqueue(this);
+}
+
 }  // namespace txservice
diff --git a/tx_service/src/cc/cc_shard.cpp b/tx_service/src/cc/cc_shard.cpp
index 2036d569..d3c009ef 100644
--- a/tx_service/src/cc/cc_shard.cpp
+++ b/tx_service/src/cc/cc_shard.cpp
@@ -398,6 +398,26 @@ CcMap *CcShard::GetCcm(const TableName &table_name, uint32_t node_group)
     }
 }
 
+void CcShard::FetchTableRangeSize(const TableName &table_name,
+                                  int32_t partition_id,
+                                  NodeGroupId cc_ng_id,
+                                  int64_t cc_ng_term)
+{
+    FetchTableRangeSizeCc *fetch_cc = fetch_range_size_cc_pool_.NextRequest();
+
+    const TableName range_table_name(table_name.StringView(),
+                                     TableType::RangePartition,
+                                     table_name.Engine());
+    const TableRangeEntry *range_entry =
+        GetTableRangeEntry(range_table_name, cc_ng_id, partition_id);
+    assert(range_entry != nullptr);
+    TxKey start_key = range_entry->GetRangeInfo()->StartTxKey();
+
+    fetch_cc->Reset(
+        table_name, partition_id, start_key, this, cc_ng_id, cc_ng_term);
+    local_shards_.store_hd_->FetchTableRangeSize(fetch_cc);
+}
+
 void CcShard::AdjustDataKeyStats(const TableName &table_name,
                                  int64_t size_delta,
                                  int64_t dirty_delta)
@@ -3560,6 +3580,29 @@ void CcShard::RecycleTxLockInfo(TxLockInfo::uptr lock_info)
     tx_lock_info_head_.next_ = std::move(lock_info);
 }
 
+void CcShard::ResetRangeSplittingStatus(const TableName &table_name,
+                                        uint32_t ng_id,
+                                        uint32_t range_id)
+{
+    CcMap *ccm = GetCcm(table_name, ng_id);
+    if (ccm == nullptr)
+    {
+        return;
+    }
+
+    ccm->ResetRangeStatus(range_id);
+}
+
+void CcShard::CreateSplitRangeDataSyncTask(const TableName &table_name,
+                                           uint32_t ng_id,
+                                           int64_t ng_term,
+                                           int32_t range_id,
+                                           uint64_t data_sync_ts)
+{
+    local_shards_.CreateSplitRangeDataSyncTask(
+        table_name, ng_id, ng_term, range_id, data_sync_ts);
+}
+
 void CcShard::CollectCacheHit()
 {
     assert(metrics::enable_cache_hit_rate);
diff --git a/tx_service/src/cc/local_cc_handler.cpp b/tx_service/src/cc/local_cc_handler.cpp
index 9dd7962d..60c5a33e 100644
--- a/tx_service/src/cc/local_cc_handler.cpp
+++ b/tx_service/src/cc/local_cc_handler.cpp
@@ -274,7 +274,9 @@ txservice::CcReqStatus txservice::LocalCcHandler::PostWrite(
     const TxRecord *record,
     OperationType operation_type,
     uint32_t key_shard_code,
-    CcHandlerResult<PostProcessResult> &hres)
+    CcHandlerResult<PostProcessResult> &hres,
+    int32_t partition_id,
+    bool on_dirty_range)
 {
     uint32_t ng_id = cce_addr.NodeGroupId();
     uint32_t dest_node_id = Sharder::Instance().LeaderNodeId(ng_id);
@@ -293,7 +295,9 @@ txservice::CcReqStatus txservice::LocalCcHandler::PostWrite(
                    record,
                    operation_type,
                    key_shard_code,
-                   &hres);
+                   &hres,
+                   partition_id,
+                   on_dirty_range);
         TX_TRACE_ACTION(this, req);
         TX_TRACE_DUMP(req);
         cc_shards_.EnqueueCcRequest(thd_id_, cce_addr.CoreId(), req);
@@ -312,7 +316,9 @@ txservice::CcReqStatus txservice::LocalCcHandler::PostWrite(
                              record,
                              operation_type,
                              key_shard_code,
-                             hres);
+                             hres,
+                             partition_id,
+                             on_dirty_range);
     }
     return req_status;
 }
@@ -1283,34 +1289,22 @@ void txservice::LocalCcHandler::ScanNextBatch(
                  scanner.is_require_recs_,
                  prefetch_size);
 
-        uint32_t core_cnt = cc_shards_.Count();
-        req->SetShardCount(core_cnt);
-
         // When the cc ng term is less than 0, this is the first scan of the
         // specified range.
-        if (cc_ng_term < 0)
+        if (cc_ng_term >= 0)
         {
-            scanner.ResetShards(core_cnt);
-        }
-
-        for (uint32_t core_id = 0; core_id < core_cnt; ++core_id)
-        {
-            ScanCache *cache = scanner.Cache(core_id);
+            ScanCache *cache = scanner.Cache(0);
             const ScanTuple *last_tuple = cache->LastTuple();
 
             req->SetPriorCceLockAddr(
-                last_tuple != nullptr ? last_tuple->cce_addr_.CceLockPtr() : 0,
-                core_id);
+                last_tuple != nullptr ? last_tuple->cce_addr_.CceLockPtr() : 0);
         }
 
         scanner.ResetCaches();
 
-        uint32_t core_rand = butil::fast_rand();
+        uint16_t dest_core = (range_id & 0x3FF) % cc_shards_.Count();
 
-        // The scan slice request is dispatched to the first core. The first
-        // core tries to pin the slice in memory and if succeeds, further
-        // dispatches the request to remaining cores for parallel scans.
-        cc_shards_.EnqueueCcRequest(thd_id_, core_rand % core_cnt, req);
+        cc_shards_.EnqueueCcRequest(thd_id_, dest_core, req);
     }
     else
     {
@@ -1907,7 +1901,8 @@ void txservice::LocalCcHandler::KickoutData(const TableName &table_name,
         KickoutCcEntryCc *req = kickout_ccentry_pool_.NextRequest();
         // For hash partition, all data in a single bucket should be hashed to
         // the same core.
-        uint16_t core_cnt = clean_type == CleanType::CleanBucketData
+        uint16_t core_cnt = (clean_type == CleanType::CleanBucketData ||
+                             clean_type == CleanType::CleanRangeData)
                                 ? 1
                                 : Sharder::Instance().GetLocalCcShardsCount();
         req->Reset(table_name,
@@ -1934,6 +1929,14 @@ void txservice::LocalCcHandler::KickoutData(const TableName &table_name,
                 Sharder::Instance().ShardBucketIdToCoreIdx((*bucket_id)[0]),
                 req);
         }
+        else if (clean_type == CleanType::CleanRangeData)
+        {
+            assert(range_id != INT32_MAX);
+            uint16_t dest_core = static_cast<uint16_t>(
+                (range_id & 0x3FF) %
+                Sharder::Instance().GetLocalCcShardsCount());
+            cc_shards_.EnqueueToCcShard(dest_core, req);
+        }
         else
         {
             // Dispatch the request to all cores and run in parallel
@@ -2013,20 +2016,13 @@ void txservice::LocalCcHandler::UpdateKeyCache(const TableName &table_name,
     hres.SetToBlock();
 #endif
 
-    size_t core_cnt = cc_shards_.Count();
     UpdateKeyCacheCc *req = update_key_cache_pool_.NextRequest();
-    req->Reset(table_name,
-               ng_id,
-               tx_term,
-               core_cnt,
-               start_key,
-               end_key,
-               store_range,
-               &hres);
-    for (size_t idx = 0; idx < core_cnt; ++idx)
-    {
-        cc_shards_.EnqueueCcRequest(idx, req);
-    }
+    req->Reset(
+        table_name, ng_id, tx_term, start_key, end_key, store_range, &hres);
+
+    uint16_t dest_core = static_cast<uint16_t>(
+        (store_range->PartitionId() & 0x3FF) % cc_shards_.Count());
+    cc_shards_.EnqueueCcRequest(dest_core, req);
 }
 
 /*
diff --git a/tx_service/src/cc/local_cc_shards.cpp b/tx_service/src/cc/local_cc_shards.cpp
index 810b5607..fec2f065 100644
--- a/tx_service/src/cc/local_cc_shards.cpp
+++ b/tx_service/src/cc/local_cc_shards.cpp
@@ -2337,7 +2337,8 @@ bool LocalCcShards::EnqueueRangeDataSyncTask(
     bool can_be_skipped,
     uint64_t &last_sync_ts,
     std::shared_ptr<DataSyncStatus> status,
-    CcHandlerResult<Void> *hres)
+    CcHandlerResult<Void> *hres,
+    bool high_priority)
 {
     const RangeInfo *range_info = range_entry->GetRangeInfo();
     NodeGroupId range_ng =
@@ -2371,19 +2372,33 @@ bool LocalCcShards::EnqueueRangeDataSyncTask(
             // Push task to worker task queue.
             std::lock_guard<std::mutex> task_worker_lk(
                 data_sync_worker_ctx_.mux_);
-            data_sync_task_queue_[range_info->PartitionId() %
-                                  data_sync_task_queue_.size()]
-                .emplace_back(
-                    std::make_shared<DataSyncTask>(table_name,
-                                                   range_info->PartitionId(),
-                                                   range_info->VersionTs(),
-                                                   ng_id,
-                                                   ng_term,
-                                                   data_sync_ts,
-                                                   status,
-                                                   is_dirty,
-                                                   can_be_skipped,
-                                                   hres));
+            std::deque<std::shared_ptr<DataSyncTask>> &task_queue =
+                data_sync_task_queue_[range_info->PartitionId() %
+                                      data_sync_task_queue_.size()];
+
+            auto task =
+                std::make_shared<DataSyncTask>(table_name,
+                                               range_info->PartitionId(),
+                                               range_info->VersionTs(),
+                                               ng_id,
+                                               ng_term,
+                                               data_sync_ts,
+                                               status,
+                                               is_dirty,
+                                               can_be_skipped,
+                                               hres,
+                                               nullptr,
+                                               false,
+                                               false,
+                                               high_priority);
+            if (high_priority)
+            {
+                task_queue.push_front(std::move(task));
+            }
+            else
+            {
+                task_queue.push_back(std::move(task));
+            }
             return true;
         }
         else
@@ -2391,11 +2406,12 @@ bool LocalCcShards::EnqueueRangeDataSyncTask(
             if (can_be_skipped)
             {
                 assert(hres == nullptr);
+                assert(!high_priority);
                 // '0' means have no pending task on queue.
                 if (iter->second->latest_pending_task_ts_ == 0)
                 {
                     iter->second->latest_pending_task_ts_ = data_sync_ts;
-                    iter->second->pending_tasks_.push(
+                    iter->second->pending_tasks_.push_back(
                         std::make_shared<DataSyncTask>(
                             table_name,
                             range_info->PartitionId(),
@@ -2424,7 +2440,7 @@ bool LocalCcShards::EnqueueRangeDataSyncTask(
                 // This task can't be skipped(DataMigration, CraeteIndex,
                 // LastCheckpoint). So we push this task to the pending task
                 // queue of `Limiter`
-                iter->second->pending_tasks_.push(
+                auto task =
                     std::make_shared<DataSyncTask>(table_name,
                                                    range_info->PartitionId(),
                                                    range_info->VersionTs(),
@@ -2434,7 +2450,19 @@ bool LocalCcShards::EnqueueRangeDataSyncTask(
                                                    status,
                                                    is_dirty,
                                                    can_be_skipped,
-                                                   hres));
+                                                   hres,
+                                                   nullptr,
+                                                   false,
+                                                   false,
+                                                   high_priority);
+                if (high_priority)
+                {
+                    iter->second->pending_tasks_.push_front(std::move(task));
+                }
+                else
+                {
+                    iter->second->pending_tasks_.push_back(std::move(task));
+                }
                 return true;
             }
         }
@@ -2509,22 +2537,24 @@ void LocalCcShards::EnqueueDataSyncTaskForSplittingRange(
     TxKey old_start_key = range_entry->GetRangeInfo()->StartTxKey();
     TxKey old_end_key = range_entry->GetRangeInfo()->EndTxKey();
     // The old range
-    data_sync_task_queue_[range_entry->GetRangeInfo()->PartitionId() %
-                          data_sync_task_queue_.size()]
-        .emplace_back(std::make_shared<DataSyncTask>(
-            table_name,
-            ng_id,
-            ng_term,
-            table_schema,
-            range_entry,
-            range_entry->GetRangeInfo()->StartTxKey(),
-            *new_keys->begin(),
-            data_sync_ts,
-            is_dirty,
-            false,
-            txn,
-            status,
-            hres));
+    auto &task_queue =
+        data_sync_task_queue_[range_entry->GetRangeInfo()->PartitionId() %
+                              data_sync_task_queue_.size()];
+    auto old_range_task = std::make_shared<DataSyncTask>(
+        table_name,
+        ng_id,
+        ng_term,
+        table_schema,
+        range_entry,
+        range_entry->GetRangeInfo()->StartTxKey(),
+        *new_keys->begin(),
+        data_sync_ts,
+        is_dirty,
+        false,
+        txn,
+        status,
+        hres);
+    task_queue.push_front(std::move(old_range_task));
 
     bool need_copy_range = store_hd_->NeedCopyRange();
 
@@ -2534,20 +2564,22 @@ void LocalCcShards::EnqueueDataSyncTaskForSplittingRange(
         TxKey end_key =
             (i == new_keys->size() - 1 ? range_entry->GetRangeInfo()->EndTxKey()
                                        : (*new_keys)[i + 1].GetShallowCopy());
-        data_sync_task_queue_[new_range_id % data_sync_task_queue_.size()]
-            .emplace_back(std::make_shared<DataSyncTask>(table_name,
-                                                         ng_id,
-                                                         ng_term,
-                                                         table_schema,
-                                                         range_entry,
-                                                         (*new_keys)[i],
-                                                         end_key,
-                                                         data_sync_ts,
-                                                         is_dirty,
-                                                         need_copy_range,
-                                                         txn,
-                                                         status,
-                                                         hres));
+        auto &task_queue =
+            data_sync_task_queue_[new_range_id % data_sync_task_queue_.size()];
+        auto new_range_task = std::make_shared<DataSyncTask>(table_name,
+                                                             ng_id,
+                                                             ng_term,
+                                                             table_schema,
+                                                             range_entry,
+                                                             (*new_keys)[i],
+                                                             end_key,
+                                                             data_sync_ts,
+                                                             is_dirty,
+                                                             need_copy_range,
+                                                             txn,
+                                                             status,
+                                                             hres);
+        task_queue.push_front(std::move(new_range_task));
     }
 
     data_sync_worker_ctx_.cv_.notify_all();
@@ -2641,7 +2673,7 @@ bool LocalCcShards::EnqueueDataSyncTaskToCore(
             if (iter->second->latest_pending_task_ts_ == 0)
             {
                 iter->second->latest_pending_task_ts_ = data_sync_ts;
-                iter->second->pending_tasks_.push(
+                iter->second->pending_tasks_.push_back(
                     std::make_shared<DataSyncTask>(table_name,
                                                    core_idx,
                                                    0,
@@ -2672,7 +2704,7 @@ bool LocalCcShards::EnqueueDataSyncTaskToCore(
             // LastCheckpoint). Because these operations need to explicitly
             // flush data into storage, rather than relying on other
             // checkpoint tasks.
-            iter->second->pending_tasks_.push(
+            iter->second->pending_tasks_.push_back(
                 std::make_shared<DataSyncTask>(table_name,
                                                core_idx,
                                                0,
@@ -2913,6 +2945,37 @@ void LocalCcShards::EnqueueDataSyncTaskForBucket(
     data_sync_worker_ctx_.cv_.notify_all();
 }
 
+void LocalCcShards::CreateSplitRangeDataSyncTask(const TableName &table_name,
+                                                 uint32_t ng_id,
+                                                 int64_t ng_term,
+                                                 int32_t range_id,
+                                                 uint64_t data_sync_ts)
+{
+    std::shared_lock<std::shared_mutex> meta_lk(meta_data_mux_);
+    std::shared_ptr<DataSyncStatus> status =
+        std::make_shared<DataSyncStatus>(ng_id, ng_term, false);
+    TableName range_table_name(table_name.StringView(),
+                               TableType::RangePartition,
+                               table_name.Engine());
+    TableRangeEntry *range_entry = const_cast<TableRangeEntry *>(
+        GetTableRangeEntryInternal(range_table_name, ng_id, range_id));
+    assert(range_entry != nullptr);
+    uint64_t last_sync_ts = 0;
+    EnqueueRangeDataSyncTask(table_name,
+                             ng_id,
+                             ng_term,
+                             range_entry,
+                             data_sync_ts,
+                             false,
+                             false,
+                             last_sync_ts,
+                             status,
+                             nullptr,
+                             true);
+
+    data_sync_worker_ctx_.cv_.notify_all();
+}
+
 void LocalCcShards::Terminate()
 {
     // Terminate the data sync task worker thds.
@@ -3158,6 +3221,7 @@ void LocalCcShards::PostProcessFlushTaskEntries(
                                            task->id_);
                         }
                         task->SetError(CcErrorCode::REQUESTED_NODE_NOT_LEADER);
+                        task->ResetRangeSplittingStatus();
                         continue;
                     }
 
@@ -3217,6 +3281,7 @@ void LocalCcShards::PostProcessFlushTaskEntries(
                         }
 
                         task->SetError(err_code);
+                        task->ResetRangeSplittingStatus();
                     }
                     else
                     {
@@ -3228,6 +3293,7 @@ void LocalCcShards::PostProcessFlushTaskEntries(
                             txservice::AbortTx(entry->data_sync_txm_);
                         }
                         task->SetError(CcErrorCode::DATA_STORE_ERR);
+                        task->ResetRangeSplittingStatus();
                     }
                 }
             }
@@ -3272,6 +3338,7 @@ void LocalCcShards::PostProcessFlushTaskEntries(
                 }
 
                 task->SetFinish();
+                task->ResetRangeSplittingStatus();
             }
             else
             {
@@ -3303,6 +3370,7 @@ void LocalCcShards::PostProcessFlushTaskEntries(
                 }
 
                 task->SetError(err_code);
+                task->ResetRangeSplittingStatus();
             }
         }
     }
@@ -3359,6 +3427,7 @@ void LocalCcShards::PostProcessRangePartitionDataSyncTask(
                                task->id_);
             }
             task->SetError(CcErrorCode::REQUESTED_NODE_NOT_LEADER);
+            task->ResetRangeSplittingStatus();
             if (ng_term >= 0)
             {
                 Sharder::Instance().UnpinNodeGroupData(task->node_group_id_);
@@ -3427,6 +3496,7 @@ void LocalCcShards::PostProcessRangePartitionDataSyncTask(
             }
 
             task->SetFinish();
+            task->ResetRangeSplittingStatus();
         }
         else if (task_ckpt_err == DataSyncTask::CkptErrorCode::SCAN_ERROR)
         {
@@ -3478,6 +3548,7 @@ void LocalCcShards::PostProcessRangePartitionDataSyncTask(
             }
 
             task->SetError(err_code);
+            task->ResetRangeSplittingStatus();
         }
         else
         {
@@ -3489,6 +3560,7 @@ void LocalCcShards::PostProcessRangePartitionDataSyncTask(
                 txservice::AbortTx(data_sync_txm);
             }
             task->SetError(CcErrorCode::DATA_STORE_ERR);
+            task->ResetRangeSplittingStatus();
         }
     }
 
@@ -3541,6 +3613,7 @@ void LocalCcShards::DataSyncForRangePartition(
             // table dropped
             data_sync_task->SetError(CcErrorCode::REQUESTED_TABLE_NOT_EXISTS);
             data_sync_task->SetScanTaskFinished();
+            data_sync_task->ResetRangeSplittingStatus();
             ClearAllPendingTasks(ng_id, expected_ng_term, table_name, range_id);
         }
         else
@@ -3578,6 +3651,7 @@ void LocalCcShards::DataSyncForRangePartition(
                 {
                     data_sync_task->SetFinish();
                     data_sync_task->SetScanTaskFinished();
+                    data_sync_task->ResetRangeSplittingStatus();
                     PopPendingTask(
                         ng_id, expected_ng_term, table_name, range_id);
                     assert(need_process == false);
@@ -3593,6 +3667,7 @@ void LocalCcShards::DataSyncForRangePartition(
                 data_sync_task->SetError(
                     CcErrorCode::REQUESTED_NODE_NOT_LEADER);
                 data_sync_task->SetScanTaskFinished();
+                data_sync_task->ResetRangeSplittingStatus();
                 PopPendingTask(ng_id, expected_ng_term, table_name, range_id);
             }
         }
@@ -3618,6 +3693,7 @@ void LocalCcShards::DataSyncForRangePartition(
             // Finish this task and notify the caller.
             data_sync_task->SetError(CcErrorCode::REQUESTED_NODE_NOT_LEADER);
             data_sync_task->SetScanTaskFinished();
+            data_sync_task->ResetRangeSplittingStatus();
             PopPendingTask(ng_id, expected_ng_term, table_name, range_id);
 
             if (ng_term >= 0)
@@ -3702,6 +3778,7 @@ void LocalCcShards::DataSyncForRangePartition(
                 // directly.
                 data_sync_task->SetError();
                 data_sync_task->SetScanTaskFinished();
+                data_sync_task->ResetRangeSplittingStatus();
 
                 ClearAllPendingTasks(
                     ng_id, expected_ng_term, table_name, range_id);
@@ -3761,6 +3838,7 @@ void LocalCcShards::DataSyncForRangePartition(
 
                 data_sync_task->SetFinish();
                 data_sync_task->SetScanTaskFinished();
+                data_sync_task->ResetRangeSplittingStatus();
                 PopPendingTask(ng_id, expected_ng_term, table_name, range_id);
 
                 return;
@@ -3815,6 +3893,7 @@ void LocalCcShards::DataSyncForRangePartition(
 
             data_sync_task->SetError();
             data_sync_task->SetScanTaskFinished();
+            data_sync_task->ResetRangeSplittingStatus();
             PopPendingTask(ng_id, expected_ng_term, table_name, range_id);
 
             return;
@@ -3830,6 +3909,7 @@ void LocalCcShards::DataSyncForRangePartition(
             txservice::AbortTx(data_sync_txm);
             data_sync_task->SetError(CcErrorCode::GET_RANGE_ID_ERR);
             data_sync_task->SetScanTaskFinished();
+            data_sync_task->ResetRangeSplittingStatus();
             PopPendingTask(ng_id, expected_ng_term, table_name, range_id);
 
             return;
@@ -3871,7 +3951,6 @@ void LocalCcShards::DataSyncForRangePartition(
         data_sync_task->data_sync_ts_,
         ng_id,
         ng_term,
-        cc_shards_.size(),
         tx_number,
         start_tx_key,
         end_tx_key,
@@ -3879,10 +3958,10 @@ void LocalCcShards::DataSyncForRangePartition(
         is_dirty,
         schema_version);
 
-    for (size_t i = 0; i < cc_shards_.size(); i++)
-    {
-        EnqueueLowPriorityCcRequestToShard(i, &scan_delta_size_cc);
-    }
+    uint16_t dest_core = static_cast<uint16_t>(
+        (range_entry->GetRangeInfo()->PartitionId() & 0x3FF) % Count());
+    EnqueueLowPriorityCcRequestToShard(dest_core, &scan_delta_size_cc);
+
     scan_delta_size_cc.Wait();
 
     if (scan_delta_size_cc.IsError())
@@ -3905,14 +3984,10 @@ void LocalCcShards::DataSyncForRangePartition(
         return;
     }
 
-    for (size_t i = 0; i < cc_shards_.size(); ++i)
+    auto &delta_size = scan_delta_size_cc.SliceDeltaSize();
+    for (auto &delta : delta_size)
     {
-        auto &delta_size = scan_delta_size_cc.SliceDeltaSize(i);
-        for (size_t j = 0; j < delta_size.size(); ++j)
-        {
-            slices_delta_size[std::move(delta_size[j].first)] +=
-                delta_size[j].second;
-        }
+        slices_delta_size[std::move(delta.first)] += delta.second;
     }
 
     if (!export_base_table_items && slices_delta_size.size() == 0)
@@ -3947,6 +4022,7 @@ void LocalCcShards::DataSyncForRangePartition(
         }
         data_sync_task->SetFinish();
         data_sync_task->SetScanTaskFinished();
+        data_sync_task->ResetRangeSplittingStatus();
         return;
     }
     assert(slices_delta_size.size() > 0 || export_base_table_items);
@@ -3979,6 +4055,7 @@ void LocalCcShards::DataSyncForRangePartition(
 
             data_sync_task->SetError();
             data_sync_task->SetScanTaskFinished();
+            data_sync_task->ResetRangeSplittingStatus();
             // Handle the pending tasks for the same range
             PopPendingTask(ng_id, expected_ng_term, table_name, range_id);
 
@@ -4007,40 +4084,6 @@ void LocalCcShards::DataSyncForRangePartition(
     }
 
     // 3. Scan records.
-    // The data sync worker thread is the owner of those vectors.
-
-    // Sort output vectors in key sorting order.
-    auto key_greater = [](const std::pair<TxKey, int32_t> &r1,
-                          const std::pair<TxKey, int32_t> &r2) -> bool
-    { return r2.first < r1.first; };
-    auto rec_greater = [](const FlushRecord &r1, const FlushRecord &r2) -> bool
-    { return r2.Key() < r1.Key(); };
-
-    std::vector<std::vector<FlushRecord>> data_sync_vecs;
-    std::vector<std::vector<FlushRecord>> archive_vecs;
-    std::vector<std::vector<std::pair<TxKey, int32_t>>> mv_base_vecs;
-
-    // Add an extra vector as a remaining vector to store the remaining keys
-    // of the current batch of FlushRecords.
-    // DataSyncScanCc request is executed in parallel on all cores. For a
-    // batch of scan results, the end keys among the cores are different.
-    // In order to ensure the accuracy of the calculated subslice keys, for
-    // this batch of FlushRecords, the minimum end key of all cores's scan
-    // result is obtained, and the FlushRecords after this key is placed in
-    // this remaining vector, which will be merged with the next batch of
-    // FlushRecords. For example: core1[10,15,20], core2[8,16,24,32], only
-    // [8,10,15,16,20] will be flushed into data store in this round，and
-    // the remaining vector stores [24,32]
-    for (size_t i = 0; i < (cc_shards_.size() + 1); ++i)
-    {
-        data_sync_vecs.emplace_back();
-        data_sync_vecs.back().reserve(DATA_SYNC_SCAN_BATCH_SIZE);
-        archive_vecs.emplace_back();
-        archive_vecs.back().reserve(DATA_SYNC_SCAN_BATCH_SIZE);
-        mv_base_vecs.emplace_back();
-        mv_base_vecs.back().reserve(DATA_SYNC_SCAN_BATCH_SIZE);
-    }
-
     // Scan the FlushRecords.
     // Paused position
     UpdateSliceStatus update_slice_status;
@@ -4053,8 +4096,11 @@ void LocalCcShards::DataSyncForRangePartition(
             GetRangeOwner(old_range_id, ng_id)->BucketOwner();
         NodeGroupId new_range_owner =
             GetRangeOwner(range_id, ng_id)->BucketOwner();
+        uint16_t old_range_owner_shard = (old_range_id & 0x3FF) % Count();
+        uint16_t new_range_owner_shard = (range_id & 0x3FF) % Count();
 
-        need_send_range_cache = new_range_owner != old_range_owner;
+        need_send_range_cache = new_range_owner != old_range_owner ||
+                                new_range_owner_shard != old_range_owner_shard;
         if (need_send_range_cache)
         {
             range_cache_sender = std::make_unique<RangeCacheSender>(
@@ -4073,7 +4119,6 @@ void LocalCcShards::DataSyncForRangePartition(
                                          data_sync_task->data_sync_ts_,
                                          ng_id,
                                          ng_term,
-                                         cc_shards_.size(),
                                          DATA_SYNC_SCAN_BATCH_SIZE,
                                          tx_number,
                                          &start_tx_key,
@@ -4095,12 +4140,7 @@ void LocalCcShards::DataSyncForRangePartition(
 
     while (!scan_data_drained)
     {
-        uint32_t core_rand = butil::fast_rand();
-        // The scan slice request is dispatched to the first core. The first
-        // core tries to pin the slice if necessary and if succeeds, further
-        // dispatches the request to remaining cores for parallel scans.
-        EnqueueLowPriorityCcRequestToShard(core_rand % cc_shards_.size(),
-                                           &scan_cc);
+        EnqueueLowPriorityCcRequestToShard(dest_core, &scan_cc);
         scan_cc.Wait();
 
         if (scan_cc.IsError())
@@ -4119,61 +4159,51 @@ void LocalCcShards::DataSyncForRangePartition(
         else
         {
             scan_data_drained = true;
-            assert(scan_cc.accumulated_flush_data_size_.size() ==
-                   cc_shards_.size());
-            uint64_t flush_data_size = 0;
-            for (size_t flush_data_size_per_core :
-                 scan_cc.accumulated_flush_data_size_)
-            {
-                flush_data_size += flush_data_size_per_core;
-            }
+            uint64_t flush_data_size = scan_cc.accumulated_flush_data_size_;
 
             // The cost of FlushRecord also needs to be considered.
-            for (size_t i = 0; i < cc_shards_.size(); ++i)
-            {
 #ifdef WITH_JEMALLOC
-                flush_data_size +=
-                    (scan_cc.DataSyncVec(i).size() * sizeof(FlushRecord) +
-                     scan_cc.ArchiveVec(i).size() * sizeof(FlushRecord) +
-                     scan_cc.MoveBaseIdxVec(i).size() *
-                         sizeof(std::pair<TxKey, int32_t>));
+            flush_data_size +=
+                (scan_cc.DataSyncVec().size() * sizeof(FlushRecord) +
+                 scan_cc.ArchiveVec().size() * sizeof(FlushRecord) +
+                 scan_cc.MoveBaseIdxVec().size() *
+                     sizeof(std::pair<TxKey, int32_t>));
 #else
-                // Check if vectors are empty before calling malloc_usable_size
-                // to avoid SEGV on nullptr or invalid pointers.
-                // Use malloc_usable_size when ASan is enabled (vectors may be
-                // allocated by ASan's allocator), otherwise use
-                // mi_malloc_usable_size for mimalloc-allocated memory.
-                auto &data_sync_vec_ref = scan_cc.DataSyncVec(i);
-                auto &archive_vec_ref = scan_cc.ArchiveVec(i);
-                auto &move_base_idx_vec_ref = scan_cc.MoveBaseIdxVec(i);
+            // Check if vectors are empty before calling malloc_usable_size
+            // to avoid SEGV on nullptr or invalid pointers.
+            // Use malloc_usable_size when ASan is enabled (vectors may be
+            // allocated by ASan's allocator), otherwise use
+            // mi_malloc_usable_size for mimalloc-allocated memory.
+            auto &data_sync_vec_ref = scan_cc.DataSyncVec();
+            auto &archive_vec_ref = scan_cc.ArchiveVec();
+            auto &move_base_idx_vec_ref = scan_cc.MoveBaseIdxVec();
 
 #ifdef __SANITIZE_ADDRESS__
-                // When ASan is enabled, use standard malloc_usable_size
-                flush_data_size +=
-                    (data_sync_vec_ref.empty()
-                         ? 0
-                         : malloc_usable_size(data_sync_vec_ref.data())) +
-                    (archive_vec_ref.empty()
-                         ? 0
-                         : malloc_usable_size(archive_vec_ref.data())) +
-                    (move_base_idx_vec_ref.empty()
-                         ? 0
-                         : malloc_usable_size(move_base_idx_vec_ref.data()));
+            // When ASan is enabled, use standard malloc_usable_size
+            flush_data_size +=
+                (data_sync_vec_ref.empty()
+                     ? 0
+                     : malloc_usable_size(data_sync_vec_ref.data())) +
+                (archive_vec_ref.empty()
+                     ? 0
+                     : malloc_usable_size(archive_vec_ref.data())) +
+                (move_base_idx_vec_ref.empty()
+                     ? 0
+                     : malloc_usable_size(move_base_idx_vec_ref.data()));
 #else
-                // When ASan is not enabled, use mimalloc's API
-                flush_data_size +=
-                    (data_sync_vec_ref.empty()
-                         ? 0
-                         : mi_malloc_usable_size(data_sync_vec_ref.data())) +
-                    (archive_vec_ref.empty()
-                         ? 0
-                         : mi_malloc_usable_size(archive_vec_ref.data())) +
-                    (move_base_idx_vec_ref.empty()
-                         ? 0
-                         : mi_malloc_usable_size(move_base_idx_vec_ref.data()));
+            // When ASan is not enabled, use mimalloc's API
+            flush_data_size +=
+                (data_sync_vec_ref.empty()
+                     ? 0
+                     : mi_malloc_usable_size(data_sync_vec_ref.data())) +
+                (archive_vec_ref.empty()
+                     ? 0
+                     : mi_malloc_usable_size(archive_vec_ref.data())) +
+                (move_base_idx_vec_ref.empty()
+                     ? 0
+                     : mi_malloc_usable_size(move_base_idx_vec_ref.data()));
 #endif
 #endif
-            }
 
             // This thread will wait in AllocatePendingFlushDataMemQuota if
             // quota is not available
@@ -4189,53 +4219,6 @@ void LocalCcShards::DataSyncForRangePartition(
                        << " of range: " << range_id
                        << " for table: " << table_name.StringView();
 
-            // The minimum end key of this batch data between all the cores.
-            TxKey min_scanned_end_key =
-                GetCatalogFactory(table_name.Engine())->PositiveInfKey();
-            for (size_t i = 0; i < cc_shards_.size(); ++i)
-            {
-                for (size_t j = 0; j < scan_cc.accumulated_scan_cnt_[i]; ++j)
-                {
-                    auto &rec = scan_cc.DataSyncVec(i)[j];
-                    // Clone key
-                    data_sync_vecs[i].emplace_back(
-                        rec.Key().Clone(),
-                        rec.ReleaseVersionedPayload(),
-                        rec.payload_status_,
-                        rec.commit_ts_,
-                        rec.cce_,
-                        rec.post_flush_size_,
-                        range_id);
-                }
-
-                // Get the minimum end key.
-                if (!data_sync_vecs[i].empty() &&
-                    data_sync_vecs[i].back().Key() < min_scanned_end_key)
-                {
-                    min_scanned_end_key = data_sync_vecs[i].back().Key();
-                }
-
-                for (size_t j = 0; j < scan_cc.ArchiveVec(i).size(); ++j)
-                {
-                    auto &rec = scan_cc.ArchiveVec(i)[j];
-                    rec.SetKey(data_sync_vecs[i][rec.GetKeyIndex()].Key());
-                }
-
-                for (size_t j = 0; j < scan_cc.MoveBaseIdxVec(i).size(); ++j)
-                {
-                    size_t key_idx = scan_cc.MoveBaseIdxVec(i)[j];
-                    TxKey key_raw = data_sync_vecs[i][key_idx].Key();
-                    mv_base_vecs[i].emplace_back(std::move(key_raw), range_id);
-                }
-
-                // Move the bucket into the tank
-                std::move(scan_cc.ArchiveVec(i).begin(),
-                          scan_cc.ArchiveVec(i).end(),
-                          std::back_inserter(archive_vecs.at(i)));
-
-                scan_data_drained = scan_cc.IsDrained(i) && scan_data_drained;
-            }
-
             std::unique_ptr<std::vector<FlushRecord>> data_sync_vec =
                 std::make_unique<std::vector<FlushRecord>>();
             std::unique_ptr<std::vector<FlushRecord>> archive_vec =
@@ -4244,90 +4227,46 @@ void LocalCcShards::DataSyncForRangePartition(
                 mv_base_vec =
                     std::make_unique<std::vector<std::pair<TxKey, int32_t>>>();
 
-            MergeSortedVectors(
-                std::move(mv_base_vecs), *mv_base_vec, key_greater, false);
-
-            // Set the ckpt_ts_ of a cc entry repeatedly, which might cause the
-            // ccentry become invalid in between. But, there should be no
-            // duplication here. we don't need to remove duplicate record.
-            MergeSortedVectors(
-                std::move(data_sync_vecs), *data_sync_vec, rec_greater, false);
-
-            // For archive vec we don't need to worry about duplicate causing
-            // issue since we're not visiting their cc entry. Also we cannot
-            // rely on key compare to dedup archive vec since a key could have
-            // multiple version of archive versions.
-            MergeSortedVectors(
-                std::move(archive_vecs), *archive_vec, rec_greater, false);
-
-            data_sync_vecs.resize(cc_shards_.size() + 1);
-            archive_vecs.resize(cc_shards_.size() + 1);
-            mv_base_vecs.resize(cc_shards_.size() + 1);
-            for (size_t i = 0; i <= cc_shards_.size(); ++i)
+            data_sync_vec->reserve(DATA_SYNC_SCAN_BATCH_SIZE);
+            archive_vec->reserve(DATA_SYNC_SCAN_BATCH_SIZE);
+            mv_base_vec->reserve(DATA_SYNC_SCAN_BATCH_SIZE);
+
+            for (size_t j = 0; j < scan_cc.accumulated_scan_cnt_; ++j)
+            {
+                auto &rec = scan_cc.DataSyncVec()[j];
+                // Clone key
+                data_sync_vec->emplace_back(rec.Key().Clone(),
+                                            rec.ReleaseVersionedPayload(),
+                                            rec.payload_status_,
+                                            rec.commit_ts_,
+                                            rec.cce_,
+                                            rec.post_flush_size_,
+                                            range_id);
+            }
+
+            for (size_t j = 0; j < scan_cc.ArchiveVec().size(); ++j)
             {
-                data_sync_vecs.at(i).clear();
-                archive_vecs.at(i).clear();
-                mv_base_vecs.at(i).clear();
+                auto &rec = scan_cc.ArchiveVec()[j];
+                rec.SetKey(data_sync_vec->at(rec.GetKeyIndex()).Key());
             }
 
-            size_t data_sync_vec_size = data_sync_vec->size();
-            // Fix the vector of FlushRecords.
-            if (!scan_data_drained)
+            for (size_t j = 0; j < scan_cc.MoveBaseIdxVec().size(); ++j)
             {
-                // Only flush the keys that are not greater than the
-                // min_scanned_end_key
-                auto iter = std::upper_bound(
-                    data_sync_vec->begin(),
-                    data_sync_vec->end(),
-                    min_scanned_end_key,
-                    [](const TxKey &key, const FlushRecord &rec)
-                    { return key < rec.Key(); });
-
-                auto &remaining_vec = data_sync_vecs[cc_shards_.size()];
-                remaining_vec.clear();
-                remaining_vec.insert(
-                    remaining_vec.begin(),
-                    std::make_move_iterator(iter),
-                    std::make_move_iterator(data_sync_vec->end()));
-                data_sync_vec->erase(iter, data_sync_vec->end());
-
-                // archive vector
-                auto archive_iter = std::upper_bound(
-                    archive_vec->begin(),
-                    archive_vec->end(),
-                    min_scanned_end_key,
-                    [](const TxKey &key, const FlushRecord &rec)
-                    { return key < rec.Key(); });
-                auto &archive_remaining_vec = archive_vecs[cc_shards_.size()];
-                archive_remaining_vec.clear();
-                archive_remaining_vec.insert(
-                    archive_remaining_vec.begin(),
-                    std::make_move_iterator(archive_iter),
-                    std::make_move_iterator(archive_vec->end()));
-                archive_vec->erase(archive_iter, archive_vec->end());
-
-                // mv base vector
-                auto mv_base_iter = std::upper_bound(
-                    mv_base_vec->begin(),
-                    mv_base_vec->end(),
-                    min_scanned_end_key,
-                    [](const TxKey &t_key,
-                       const std::pair<TxKey, int32_t> &key_and_partition_id)
-                    { return t_key < key_and_partition_id.first; });
-                auto &mv_base_remaining_vec = mv_base_vecs[cc_shards_.size()];
-                mv_base_remaining_vec.clear();
-                mv_base_remaining_vec.insert(
-                    mv_base_remaining_vec.begin(),
-                    std::make_move_iterator(mv_base_iter),
-                    std::make_move_iterator(mv_base_vec->end()));
-                mv_base_vec->erase(mv_base_iter, mv_base_vec->end());
+                size_t key_idx = scan_cc.MoveBaseIdxVec()[j];
+                TxKey key_raw = data_sync_vec->at(key_idx).Key();
+                mv_base_vec->emplace_back(std::move(key_raw), range_id);
             }
 
+            // Move the bucket into the tank
+            std::move(scan_cc.ArchiveVec().begin(),
+                      scan_cc.ArchiveVec().end(),
+                      std::back_inserter(*archive_vec));
+
+            scan_data_drained = scan_cc.IsDrained();
+
             if (data_sync_vec->empty())
             {
-                LOG(WARNING) << "data_sync_vec becomes empty after erase, old "
-                                "size of data_sync_vec_size: "
-                             << data_sync_vec_size;
+                LOG(WARNING) << "data_sync_vec is empty.";
                 // Reset
                 scan_cc.Reset();
                 // Return the quota to flush data memory usage pool since the
@@ -4403,20 +4342,17 @@ void LocalCcShards::DataSyncForRangePartition(
                                                  table_schema,
                                                  flush_data_size));
 
-            for (size_t i = 0; i < cc_shards_.size(); ++i)
+            if (scan_cc.scan_heap_is_full_ == 1)
             {
-                if (scan_cc.scan_heap_is_full_[i] == 1)
-                {
-                    // Clear the FlushRecords' memory of scan cc since the
-                    // DataSyncScan heap is full.
-                    auto &data_sync_vec_ref = scan_cc.DataSyncVec(i);
-                    auto &archive_vec_ref = scan_cc.ArchiveVec(i);
-                    ReleaseDataSyncScanHeapCc release_scan_heap_cc(
-                        &data_sync_vec_ref, &archive_vec_ref);
-                    EnqueueLowPriorityCcRequestToShard(i,
-                                                       &release_scan_heap_cc);
-                    release_scan_heap_cc.Wait();
-                }
+                // Clear the FlushRecords' memory of scan cc since the
+                // DataSyncScan heap is full.
+                auto &data_sync_vec_ref = scan_cc.DataSyncVec();
+                auto &archive_vec_ref = scan_cc.ArchiveVec();
+                ReleaseDataSyncScanHeapCc release_scan_heap_cc(
+                    &data_sync_vec_ref, &archive_vec_ref);
+                EnqueueLowPriorityCcRequestToShard(dest_core,
+                                                   &release_scan_heap_cc);
+                release_scan_heap_cc.Wait();
             }
             // Reset
             scan_cc.Reset();
@@ -4431,19 +4367,12 @@ void LocalCcShards::DataSyncForRangePartition(
     }
 
     // Release scan heap memory after scan finish.
-    std::list<ReleaseDataSyncScanHeapCc> req_vec;
-    for (size_t core_idx = 0; core_idx < Count(); ++core_idx)
-    {
-        auto &data_sync_vec_ref = scan_cc.DataSyncVec(core_idx);
-        auto &archive_vec_ref = scan_cc.ArchiveVec(core_idx);
-        req_vec.emplace_back(&data_sync_vec_ref, &archive_vec_ref);
-        EnqueueLowPriorityCcRequestToShard(core_idx, &req_vec.back());
-    }
-    while (req_vec.size() > 0)
-    {
-        req_vec.back().Wait();
-        req_vec.pop_back();
-    }
+    auto &data_sync_vec_ref = scan_cc.DataSyncVec();
+    auto &archive_vec_ref = scan_cc.ArchiveVec();
+    ReleaseDataSyncScanHeapCc release_scan_heap_cc(&data_sync_vec_ref,
+                                                   &archive_vec_ref);
+    EnqueueLowPriorityCcRequestToShard(dest_core, &release_scan_heap_cc);
+    release_scan_heap_cc.Wait();
 
     PostProcessRangePartitionDataSyncTask(std::move(data_sync_task),
                                           data_sync_txm,
@@ -4939,6 +4868,7 @@ void LocalCcShards::DataSyncForHashPartition(
                         req_ptr = upload_batch_closure->UploadBatchRequest();
                         req_ptr->set_node_group_id(dest_ng);
                         req_ptr->set_node_group_term(-1);
+                        req_ptr->set_partition_id(-1);
                         req_ptr->set_table_name_str(table_name.String());
                         req_ptr->set_table_type(
                             remote::ToRemoteType::ConvertTableType(
@@ -5281,12 +5211,20 @@ void LocalCcShards::PopPendingTask(NodeGroupId ng_id,
     {
         std::shared_ptr<DataSyncTask> task =
             iter->second->pending_tasks_.front();
-        iter->second->pending_tasks_.pop();
+        iter->second->pending_tasks_.pop_front();
         task_limiter_lk.unlock();
 
         std::lock_guard<std::mutex> task_worker_lk(data_sync_worker_ctx_.mux_);
-        data_sync_task_queue_[id % data_sync_task_queue_.size()].push_back(
-            std::move(task));
+        auto &task_queue =
+            data_sync_task_queue_[id % data_sync_task_queue_.size()];
+        if (task->high_priority_)
+        {
+            task_queue.push_front(std::move(task));
+        }
+        else
+        {
+            task_queue.push_back(std::move(task));
+        }
         data_sync_worker_ctx_.cv_.notify_all();
     }
     else
@@ -5318,7 +5256,7 @@ void LocalCcShards::ClearAllPendingTasks(NodeGroupId ng_id,
         auto &task = iter->second->pending_tasks_.front();
         task->SetError(CcErrorCode::REQUESTED_TABLE_NOT_EXISTS);
         task->SetScanTaskFinished();
-        iter->second->pending_tasks_.pop();
+        iter->second->pending_tasks_.pop_front();
     }
 
     task_limiters_.erase(iter);
@@ -5899,7 +5837,9 @@ void LocalCcShards::FlushData(std::unique_lock<std::mutex> &flush_worker_lk)
                         size_t key_core_idx = 0;
                         if (!table_name.IsHashPartitioned())
                         {
-                            key_core_idx = (rec.Key().Hash() & 0x3FF) % Count();
+                            int32_t range_id = entry->data_sync_task_->id_;
+                            key_core_idx = static_cast<size_t>(
+                                (range_id & 0x3FF) % Count());
                         }
                         else
                         {
@@ -6992,79 +6932,84 @@ void LocalCcShards::RangeCacheSender::SendRangeCacheRequest(
 
     // 1- upload dirty range slices info (with PartiallyCached)
     int64_t ng_term = INIT_TERM;
-    remote::CcRpcService_Stub stub(channel_.get());
-
-    brpc::Controller cntl;
-    cntl.set_timeout_ms(10000);
-    cntl.set_write_to_socket_in_background(true);
-    // cntl.ignore_eovercrowded(true);
-    remote::UploadRangeSlicesRequest req;
-    remote::UploadRangeSlicesResponse resp;
-
-    req.set_node_group_id(new_range_owner_);
-    req.set_ng_term(ng_term);
-    req.set_table_name_str(table_name_.String());
-    req.set_table_engine(
-        remote::ToRemoteType::ConvertTableEngine(table_name_.Engine()));
-    req.set_old_partition_id(old_range_id_);
-    req.set_version_ts(version_ts_);
-    req.set_new_partition_id(new_range_id_);
-    req.set_new_slices_num(slices_vec_.size());
-    std::string *keys_str = req.mutable_new_slices_keys();
-    std::string *sizes_str = req.mutable_new_slices_sizes();
-    std::string *status_str = req.mutable_new_slices_status();
-    for (const StoreSlice *slice : slices_vec_)
-    {
-        // key
-        TxKey slice_key = slice->StartTxKey();
-        slice_key.Serialize(*keys_str);
-        // size
-        // If post ckpt size of the slice is UINT64_MAX, it means that there is
-        // no item need to be ckpt in this slice, so should use the current size
-        // of the slice.
-        uint32_t slice_size =
-            (slice->PostCkptSize() == UINT64_MAX ? slice->Size()
-                                                 : slice->PostCkptSize());
-        const char *slice_size_ptr =
-            reinterpret_cast<const char *>(&slice_size);
-        sizes_str->append(slice_size_ptr, sizeof(slice_size));
-        // status
-        int8_t slice_status = static_cast<int8_t>(SliceStatus::PartiallyCached);
-        const char *slice_status_ptr =
-            reinterpret_cast<const char *>(&slice_status);
-        status_str->append(slice_status_ptr, sizeof(slice_status));
-    }
-    req.set_has_dml_since_ddl(store_range_->HasDmlSinceDdl());
-    stub.UploadRangeSlices(&cntl, &req, &resp, nullptr);
-
-    if (cntl.Failed())
-    {
-        LOG(WARNING) << "SendRangeCacheRequest: Fail to upload dirty range "
-                        "slices RPC ng#"
-                     << new_range_owner_ << ". Error code: " << cntl.ErrorCode()
-                     << ". Msg: " << cntl.ErrorText();
-        return;
-    }
+    if (new_range_owner_ != ng_id_)
+    {
+        remote::CcRpcService_Stub stub(channel_.get());
+
+        brpc::Controller cntl;
+        cntl.set_timeout_ms(10000);
+        cntl.set_write_to_socket_in_background(true);
+        // cntl.ignore_eovercrowded(true);
+        remote::UploadRangeSlicesRequest req;
+        remote::UploadRangeSlicesResponse resp;
+
+        req.set_node_group_id(new_range_owner_);
+        req.set_ng_term(ng_term);
+        req.set_table_name_str(table_name_.String());
+        req.set_table_engine(
+            remote::ToRemoteType::ConvertTableEngine(table_name_.Engine()));
+        req.set_old_partition_id(old_range_id_);
+        req.set_version_ts(version_ts_);
+        req.set_new_partition_id(new_range_id_);
+        req.set_new_slices_num(slices_vec_.size());
+        std::string *keys_str = req.mutable_new_slices_keys();
+        std::string *sizes_str = req.mutable_new_slices_sizes();
+        std::string *status_str = req.mutable_new_slices_status();
+        for (const StoreSlice *slice : slices_vec_)
+        {
+            // key
+            TxKey slice_key = slice->StartTxKey();
+            slice_key.Serialize(*keys_str);
+            // size
+            // If post ckpt size of the slice is UINT64_MAX, it means that there
+            // is no item need to be ckpt in this slice, so should use the
+            // current size of the slice.
+            uint32_t slice_size =
+                (slice->PostCkptSize() == UINT64_MAX ? slice->Size()
+                                                     : slice->PostCkptSize());
+            const char *slice_size_ptr =
+                reinterpret_cast<const char *>(&slice_size);
+            sizes_str->append(slice_size_ptr, sizeof(slice_size));
+            // status
+            int8_t slice_status =
+                static_cast<int8_t>(SliceStatus::PartiallyCached);
+            const char *slice_status_ptr =
+                reinterpret_cast<const char *>(&slice_status);
+            status_str->append(slice_status_ptr, sizeof(slice_status));
+        }
+        req.set_has_dml_since_ddl(store_range_->HasDmlSinceDdl());
+        stub.UploadRangeSlices(&cntl, &req, &resp, nullptr);
+
+        if (cntl.Failed())
+        {
+            LOG(WARNING) << "SendRangeCacheRequest: Fail to upload dirty range "
+                            "slices RPC ng#"
+                         << new_range_owner_
+                         << ". Error code: " << cntl.ErrorCode()
+                         << ". Msg: " << cntl.ErrorText();
+            return;
+        }
 
-    if (remote::ToLocalType::ConvertCcErrorCode(resp.error_code()) !=
-        CcErrorCode::NO_ERROR)
-    {
-        LOG(WARNING) << "SendRangeCacheRequest: New owner ng#"
-                     << new_range_owner_
-                     << " reject to receive dirty range data";
-        return;
-    }
+        if (remote::ToLocalType::ConvertCcErrorCode(resp.error_code()) !=
+            CcErrorCode::NO_ERROR)
+        {
+            LOG(WARNING) << "SendRangeCacheRequest: New owner ng#"
+                         << new_range_owner_
+                         << " reject to receive dirty range data";
+            return;
+        }
 
-    ng_term = resp.ng_term();
-    LOG(INFO) << "SendRangeCacheRequest: Uploaded new range slices info to "
-                 "future owner, range#"
-              << old_range_id_ << ", new_range#" << new_range_id_;
+        ng_term = resp.ng_term();
+        LOG(INFO) << "SendRangeCacheRequest: Uploaded new range slices info to "
+                     "future owner, range#"
+                  << old_range_id_ << ", new_range#" << new_range_id_;
+    }
 
     // 2- upload records belongs to dirty range
     assert(closure_vec_->size() > 0);
     LOG(INFO) << "SendRangeCacheRequest: Sending range data, old_range_id: "
               << old_range_id_ << ", to upload " << closure_vec_->size()
-              << " batches to ng#" << new_range_owner_;
+              << " batches to ng#" << new_range_owner_ << " from ng#" << ng_id_;
 
     uint32_t sender_cnt = 5;
     auto closures_idx = std::make_shared<std::atomic_uint64_t>(sender_cnt);
@@ -7084,6 +7029,8 @@ void LocalCcShards::RangeCacheSender::SendRangeCacheRequest(
                     size_t vec_size = vec.size();
                     size_t end_idx = std::min(begin_idx + 5, vec_size);
                     bool rejected = false;
+                    int64_t term =
+                        ng_term == INIT_TERM ? dest_ng_term : ng_term;
                     while (begin_idx < end_idx)
                     {
                         std::unique_ptr<UploadBatchSlicesClosure> closure(
@@ -7096,6 +7043,7 @@ void LocalCcShards::RangeCacheSender::SendRangeCacheRequest(
                             end_idx = std::min(begin_idx + 5, vec_size);
                         }
 
+                        rejected = rejected || term != dest_ng_term;
                         if (rejected)
                         {
                             // Must continue to delete left closures in
@@ -7110,7 +7058,7 @@ void LocalCcShards::RangeCacheSender::SendRangeCacheRequest(
                         cntl_ptr->set_timeout_ms(closure->TimeoutValue());
                         // Fix the term
                         closure->UploadBatchRequest()->set_node_group_term(
-                            ng_term);
+                            term);
                         stub.UploadBatchSlices(cntl_ptr,
                                                closure->UploadBatchRequest(),
                                                closure->UploadBatchResponse(),
@@ -7131,6 +7079,7 @@ void LocalCcShards::RangeCacheSender::SendRangeCacheRequest(
                                        << closure->NodeId()
                                        << " is reject for no free memory";
                         }
+                        term = resp->ng_term();
                     }
 
                     LOG(INFO) << "Old_Range#" << range_id
diff --git a/tx_service/src/cc/range_slice.cpp b/tx_service/src/cc/range_slice.cpp
index 91b1973b..baa051ee 100644
--- a/tx_service/src/cc/range_slice.cpp
+++ b/tx_service/src/cc/range_slice.cpp
@@ -70,10 +70,9 @@ void StoreSlice::StartLoading(FillStoreSliceCc *fill_req,
     assert(pins_ == 0);
     status_ = SliceStatus::BeingLoaded;
 
-    for (uint16_t core_id = 0; core_id < cc_shards.Count(); ++core_id)
-    {
-        cc_shards.EnqueueCcRequest(core_id, fill_req);
-    }
+    uint16_t dest_core = static_cast<uint16_t>(
+        (fill_req->PartitionId() & 0x3FF) % cc_shards.Count());
+    cc_shards.EnqueueToCcShard(dest_core, fill_req);
 }
 
 void StoreSlice::CommitLoading(StoreRange &range, uint32_t slice_size)
@@ -173,19 +172,9 @@ void StoreSlice::InitKeyCache(CcShard *cc_shard,
         pins_++;
 
         init_key_cache_cc_ = cc_shard->NewInitKeyCacheCc();
-        init_key_cache_cc_->Reset(range,
-                                  this,
-                                  range->local_cc_shards_.Count(),
-                                  *tbl_name,
-                                  term,
-                                  ng_id);
-
-        uint16_t core_cnt = range->local_cc_shards_.Count();
-        for (uint16_t core_id = 0; core_id < core_cnt; core_id++)
-        {
-            Sharder::Instance().GetLocalCcShards()->EnqueueToCcShard(
-                core_id, init_key_cache_cc_);
-        }
+        init_key_cache_cc_->Reset(range, this, *tbl_name, term, ng_id);
+
+        cc_shard->Enqueue(init_key_cache_cc_);
     }
 }
 
@@ -254,17 +243,12 @@ StoreRange::StoreRange(uint32_t partition_id,
                                    estimate_rec_size));
         }
 
-        uint16_t core_cnt = Sharder::Instance().GetLocalCcShardsCount();
-        for (uint16_t id = 0; id < core_cnt; id++)
-        {
-            key_cache_.push_back(
-                std::make_unique<cuckoofilter::CuckooFilter<size_t, 12>>(
-                    key_cache_size / core_cnt));
-        }
+        key_cache_ = std::make_unique<cuckoofilter::CuckooFilter<size_t, 12>>(
+            key_cache_size);
     }
     else
     {
-        key_cache_.resize(0);
+        key_cache_ = nullptr;
     }
 }
 
@@ -449,12 +433,11 @@ bool StoreRange::SampleSubRangeKeys(StoreSlice *slice,
                                         &end_key,
                                         key_cnt);
 
-    // Send the request to one shard randomly.
-    uint64_t core_rand = butil::fast_rand();
-    local_cc_shards_.EnqueueLowPriorityCcRequestToShard(
-        core_rand % local_cc_shards_.Count(), &sample_keys_cc);
-    DLOG(INFO) << "Send the sample range keys request to shard#"
-               << core_rand % local_cc_shards_.Count();
+    uint16_t dest_core = static_cast<uint16_t>((partition_id_ & 0x3FF) %
+                                               local_cc_shards_.Count());
+    local_cc_shards_.EnqueueLowPriorityCcRequestToShard(dest_core,
+                                                        &sample_keys_cc);
+    DLOG(INFO) << "Send the sample range keys request to shard#" << dest_core;
 
     sample_keys_cc.Wait();
     CcErrorCode res = sample_keys_cc.ErrorCode();
diff --git a/tx_service/src/data_sync_task.cpp b/tx_service/src/data_sync_task.cpp
index ec1e4815..d12d8c30 100644
--- a/tx_service/src/data_sync_task.cpp
+++ b/tx_service/src/data_sync_task.cpp
@@ -79,7 +79,8 @@ DataSyncTask::DataSyncTask(const TableName &table_name,
       range_entry_(range_entry),
       during_split_range_(true),
       export_base_table_items_(export_base_table_items),
-      tx_number_(txn)
+      tx_number_(txn),
+      high_priority_(true)
 {
     assert(!table_name_.IsHashPartitioned());
     if (start_key_.KeyPtr() ==
@@ -98,7 +99,15 @@ DataSyncTask::DataSyncTask(const TableName &table_name,
                                   .GetLocalCcShards()
                                   ->GetRangeOwner(id_, ng_id)
                                   ->BucketOwner();
-    need_update_ckpt_ts_ = range_owner == ng_id;
+
+    size_t local_shard_count = Sharder::Instance().GetLocalCcShardsCount();
+    int32_t old_range_id = range_entry_->GetRangeInfo()->PartitionId();
+    uint16_t old_range_owner_shard =
+        static_cast<uint16_t>((old_range_id & 0x3FF) % local_shard_count);
+    uint16_t new_range_owner_shard =
+        static_cast<uint16_t>((id_ & 0x3FF) % local_shard_count);
+    need_update_ckpt_ts_ =
+        range_owner == ng_id && old_range_owner_shard == new_range_owner_shard;
 }
 
 void DataSyncTask::SetFinish()
@@ -227,4 +236,24 @@ void DataSyncTask::SetScanTaskFinished()
     }
 }
 
+void DataSyncTask::ResetRangeSplittingStatus()
+{
+    if (!high_priority_ || during_split_range_)
+    {
+        return;
+    }
+
+    WaitableCc reset_cc(
+        [&](CcShard &ccs)
+        {
+            ccs.ResetRangeSplittingStatus(table_name_, node_group_id_, id_);
+            return true;
+        });
+
+    LocalCcShards *local_cc_shards = Sharder::Instance().GetLocalCcShards();
+    uint16_t dest_core = (id_ & 0x3FF) % local_cc_shards->Count();
+    local_cc_shards->EnqueueToCcShard(dest_core, &reset_cc);
+    reset_cc.Wait();
+}
+
 }  // namespace txservice
diff --git a/tx_service/src/fault/log_replay_service.cpp b/tx_service/src/fault/log_replay_service.cpp
index 739caa70..8c65958f 100644
--- a/tx_service/src/fault/log_replay_service.cpp
+++ b/tx_service/src/fault/log_replay_service.cpp
@@ -584,6 +584,21 @@ int RecoveryService::on_received_messages(brpc::StreamId stream_id,
             auto res_pair = table_range_split_cnt.try_emplace(
                 base_table_name, std::make_shared<std::atomic_uint32_t>(0));
 
+            // Record split range commit ts for data log replay.
+            ::txlog::SplitRangeOpMessage ds_split_range_op_msg;
+            if (!ds_split_range_op_msg.ParseFromArray(
+                    split_range_op_blob.data() + blob_offset,
+                    split_range_op_blob.length() - blob_offset))
+            {
+                recovery_error = true;
+                CleanSplitRangeInfo(cc_ng_id);
+                return 0;
+            }
+            int32_t range_id = ds_split_range_op_msg.partition_id();
+            uint64_t split_commit_ts = split_range_msg.commit_ts();
+            SetSplitRangeInfo(
+                cc_ng_id, base_table_name, range_id, split_commit_ts);
+
             // Replay Split
             ReplayLogCc *cc_req = replay_cc_pool_.NextRequest();
             cc_req->Reset(
@@ -611,6 +626,7 @@ int RecoveryService::on_received_messages(brpc::StreamId stream_id,
                 stream_id, mux, on_fly_cnt, status, recovery_error);
             if (recovery_error)
             {
+                CleanSplitRangeInfo(cc_ng_id);
                 return 0;
             }
         }
@@ -618,6 +634,7 @@ int RecoveryService::on_received_messages(brpc::StreamId stream_id,
         // parse and process log records
         if (!msg.has_finish())
         {
+            const auto *split_range_info = GetSplitRangeInfo(cc_ng_id);
             ParseDataLogCc *cc_req = parse_datalog_cc_pool_.NextRequest();
             cc_req->Reset(std::move(msg),
                           cc_ng_id,
@@ -626,7 +643,8 @@ int RecoveryService::on_received_messages(brpc::StreamId stream_id,
                           status,
                           on_fly_cnt,
                           recovery_error,
-                          is_lock_recovery);
+                          is_lock_recovery,
+                          split_range_info);
             on_fly_cnt.fetch_add(1, std::memory_order_release);
             local_shards_.EnqueueCcRequest(next_core, cc_req);
             next_core = (next_core + 1) % local_shards_.Count();
@@ -634,6 +652,7 @@ int RecoveryService::on_received_messages(brpc::StreamId stream_id,
         else  // has finish message
         {
             const std::string &log_records = msg.binary_log_records();
+            const auto *split_range_info = GetSplitRangeInfo(cc_ng_id);
             ParseDataLogCc *cc_req = parse_datalog_cc_pool_.NextRequest();
             cc_req->Reset(log_records,
                           cc_ng_id,
@@ -642,7 +661,8 @@ int RecoveryService::on_received_messages(brpc::StreamId stream_id,
                           status,
                           on_fly_cnt,
                           recovery_error,
-                          is_lock_recovery);
+                          is_lock_recovery,
+                          split_range_info);
             on_fly_cnt.fetch_add(1, std::memory_order_release);
             local_shards_.EnqueueCcRequest(next_core, cc_req);
             next_core = (next_core + 1) % local_shards_.Count();
@@ -687,6 +707,8 @@ int RecoveryService::on_received_messages(brpc::StreamId stream_id,
                           << ", log group: " << info->log_group_id_
                           << ", set recovering status to finished";
             }
+            // Clean up split range info for this node group.
+            CleanSplitRangeInfo(cc_ng_id);
             brpc::StreamClose(stream_id);
             // assumption: finish message must be the last message so return
             return 0;
@@ -1060,5 +1082,40 @@ void RecoveryService::ProcessRecoverTxTask(RecoverTxTask &task)
     }
 }
 
+void RecoveryService::SetSplitRangeInfo(uint32_t ng_id,
+                                        TableName table_name,
+                                        int32_t range_id,
+                                        uint64_t commit_ts)
+{
+    auto ng_it = split_range_info_.try_emplace(ng_id).first;
+    auto &table_map = ng_it->second;
+    auto table_it =
+        table_map
+            .try_emplace(table_name, std::unordered_map<int32_t, uint64_t>{})
+            .first;
+    auto &range_map = table_it->second;
+    auto [it, inserted] = range_map.try_emplace(range_id, commit_ts);
+    if (!inserted)
+    {
+        it->second = commit_ts;
+    }
+}
+
+const std::unordered_map<TableName, std::unordered_map<int32_t, uint64_t>> *
+RecoveryService::GetSplitRangeInfo(uint32_t ng_id) const
+{
+    auto ng_it = split_range_info_.find(ng_id);
+    if (ng_it == split_range_info_.end())
+    {
+        return nullptr;
+    }
+    return &ng_it->second;
+}
+
+void RecoveryService::CleanSplitRangeInfo(uint32_t ng_id)
+{
+    split_range_info_.erase(ng_id);
+}
+
 }  // namespace fault
 }  // namespace txservice
diff --git a/tx_service/src/remote/cc_node_service.cpp b/tx_service/src/remote/cc_node_service.cpp
index 123cd440..37a2e7c7 100644
--- a/tx_service/src/remote/cc_node_service.cpp
+++ b/tx_service/src/remote/cc_node_service.cpp
@@ -1172,6 +1172,7 @@ void CcNodeService::UploadBatch(
 
     NodeGroupId ng_id = request->node_group_id();
     int64_t ng_term = request->node_group_term();
+    int32_t partition_id = request->partition_id();
 
     std::string_view table_name_sv{request->table_name_str()};
     TableType table_type =
@@ -1199,14 +1200,15 @@ void CcNodeService::UploadBatch(
                << " for table:" << table_name.Trace();
 
     LocalCcShards *cc_shards = Sharder::Instance().GetLocalCcShards();
-    size_t core_cnt = cc_shards->Count();
+    size_t core_cnt = (partition_id >= 0) ? 1 : cc_shards->Count();
     uint32_t batch_size = request->batch_size();
 
     auto write_entry_tuple =
         UploadBatchCc::WriteEntryTuple(request->keys(),
                                        request->records(),
                                        request->commit_ts(),
-                                       request->rec_status());
+                                       request->rec_status(),
+                                       request->range_size_flags());
 
     size_t finished_req = 0;
     bthread::Mutex req_mux;
@@ -1217,6 +1219,7 @@ void CcNodeService::UploadBatch(
     req.Reset(table_name,
               ng_id,
               ng_term,
+              partition_id,
               core_cnt,
               batch_size,
               write_entry_tuple,
@@ -1224,9 +1227,18 @@ void CcNodeService::UploadBatch(
               req_cv,
               finished_req,
               data_type);
-    for (size_t core = 0; core < core_cnt; ++core)
+    if (partition_id >= 0)
+    {
+        uint16_t dest_core =
+            static_cast<uint16_t>((partition_id & 0x3FF) % cc_shards->Count());
+        cc_shards->EnqueueToCcShard(dest_core, &req);
+    }
+    else
     {
-        cc_shards->EnqueueToCcShard(core, &req);
+        for (size_t core = 0; core < cc_shards->Count(); ++core)
+        {
+            cc_shards->EnqueueToCcShard(core, &req);
+        }
     }
 
     {
@@ -1383,30 +1395,32 @@ void CcNodeService::UploadBatchSlices(
     }
 
     UploadBatchSlicesCc req;
-    req.Reset(
-        table_name, ng_id, ng_term, core_cnt, write_entry_tuple, slices_info);
+    req.Reset(table_name, ng_id, ng_term, write_entry_tuple, slices_info);
 
-    // Select a core randomly to parse items. After parsed, this core will push
-    // the request to other cores to emplace keys.
-    uint16_t rand_core = std::rand() % core_cnt;
-    cc_shards->EnqueueToCcShard(rand_core, &req);
+    uint16_t dest_core =
+        static_cast<uint16_t>((slices_info->new_range_ & 0x3FF) % core_cnt);
+    cc_shards->EnqueueToCcShard(dest_core, &req);
     req.Wait();
 
     CcErrorCode err = CcErrorCode::NO_ERROR;
     if (req.ErrorCode() != CcErrorCode::NO_ERROR)
     {
-        LOG(INFO) << "CcNodeService UploadBatch RPC of #ng" << ng_id
+        LOG(INFO) << "CcNodeService UploadBatchRecordCache RPC of #ng" << ng_id
+                  << " for range#" << slices_info->range_ << ", new_range#"
+                  << slices_info->new_range_
                   << " finished with error: " << static_cast<uint32_t>(err);
         err = req.ErrorCode();
     }
     else
     {
-        DLOG(INFO) << "CcNodeService UploadBatch RPC of #ng" << ng_id
+        DLOG(INFO) << "CcNodeService UploadBatchRecordCache RPC of #ng" << ng_id
+                   << " for range#" << slices_info->range_ << ", new_range#"
+                   << slices_info->new_range_
                    << " finished with error: " << static_cast<uint32_t>(err);
     }
 
     response->set_error_code(ToRemoteType::ConvertCcErrorCode(err));
-    response->set_ng_term(ng_term);
+    response->set_ng_term(req.CcNgTerm());
 }
 
 void CcNodeService::FetchPayload(
diff --git a/tx_service/src/remote/cc_stream_receiver.cpp b/tx_service/src/remote/cc_stream_receiver.cpp
index 3a0166e7..e015881e 100644
--- a/tx_service/src/remote/cc_stream_receiver.cpp
+++ b/tx_service/src/remote/cc_stream_receiver.cpp
@@ -377,44 +377,14 @@ void CcStreamReceiver::PreProcessScanResp(
         ToLocalType::ConvertSlicePosition(msg->slice_position());
 
     const char *tuple_cnt_info = msg->tuple_cnt().data();
-    uint16_t remote_core_cnt = *((const uint16_t *) tuple_cnt_info);
-    tuple_cnt_info += sizeof(uint16_t);
-    range_scanner.ResetShards(remote_core_cnt);
+    size_t tuple_cnt = *((const size_t *) tuple_cnt_info);
 
-    const uint64_t *term_ptr = (const uint64_t *) msg->term().data();
-
-    // The offset_table stores the start postition of meta data like `key_ts`
-    // for all remote cores
-    std::vector<size_t> offset_table;
-    size_t meta_offset = 0;
-
-    range_scanner.SetPartitionNgTerm(-1);
-
-    bool all_remote_core_no_more_data = true;
-
-    for (uint16_t core_id = 0; core_id < remote_core_cnt; ++core_id)
-    {
-        size_t tuple_cnt = *((const size_t *) tuple_cnt_info);
-        tuple_cnt_info += sizeof(size_t);
-
-        all_remote_core_no_more_data =
-            all_remote_core_no_more_data && (tuple_cnt == 0);
-
-        // All term value are same. We only set `partition_ng_term` once.
-        if (range_scanner.PartitionNgTerm() == -1 && tuple_cnt != 0)
-        {
-            range_scanner.SetPartitionNgTerm(term_ptr[0]);
-        }
+    bool remote_no_more_data = tuple_cnt == 0;
 
-        offset_table.push_back(meta_offset);
-        meta_offset += tuple_cnt;
-        term_ptr += tuple_cnt;
-    }
-
-    assert(offset_table.size() == remote_core_cnt);
+    const uint64_t *term_ptr = (const uint64_t *) msg->term().data();
 
     // No more data.
-    if (all_remote_core_no_more_data)
+    if (remote_no_more_data)
     {
         if (msg->error_code() != 0)
         {
@@ -430,21 +400,18 @@ void CcStreamReceiver::PreProcessScanResp(
         RecycleScanSliceResp(std::move(msg));
         return;
     }
-
-    // Worker count means how many tx processer to parallel deserialize msg.
-    // remote core count is not always equal to local core count
-    size_t worker_cnt = std::min((size_t) remote_core_cnt,
-                                 Sharder::Instance().GetLocalCcShardsCount());
+    else
+    {
+        range_scanner.SetPartitionNgTerm(term_ptr[0]);
+    }
 
     ProcessRemoteScanRespCc *request =
         process_remote_scan_resp_pool_.NextRequest();
-    request->Reset(
-        this, std::move(msg), std::move(offset_table), hd_res, worker_cnt);
+    request->Reset(this, std::move(msg), hd_res);
 
-    for (size_t idx = 0; idx < worker_cnt; ++idx)
-    {
-        local_shards_.EnqueueCcRequest(idx, request);
-    }
+    uint32_t core_rand = butil::fast_rand();
+    uint16_t dest_core = core_rand % local_shards_.Count();
+    local_shards_.EnqueueCcRequest(dest_core, request);
 }
 
 void CcStreamReceiver::OnReceiveCcMsg(std::unique_ptr<CcMessage> msg)
@@ -1283,9 +1250,8 @@ void CcStreamReceiver::OnReceiveCcMsg(std::unique_ptr<CcMessage> msg)
     case CcMessage::MessageType::CcMessage_MessageType_ScanSliceRequest:
     {
         RemoteScanSlice *scan_slice_req = scan_slice_pool.NextRequest();
-        uint32_t local_core_cnt = (uint32_t) local_shards_.Count();
         TX_TRACE_ASSOCIATE(msg.get(), scan_slice_req);
-        scan_slice_req->Reset(std::move(msg), local_core_cnt);
+        scan_slice_req->Reset(std::move(msg));
         // The scan slice request is enqueued into the first core, where it pins
         // the slice and sets the scan's end key. The request is then dispatched
         // to remaining cores to scan the slice in parallel.
diff --git a/tx_service/src/remote/remote_cc_handler.cpp b/tx_service/src/remote/remote_cc_handler.cpp
index 848ae8f7..7b863703 100644
--- a/tx_service/src/remote/remote_cc_handler.cpp
+++ b/tx_service/src/remote/remote_cc_handler.cpp
@@ -159,7 +159,9 @@ void txservice::remote::RemoteCcHandler::PostWrite(
     const TxRecord *record,
     OperationType operation_type,
     uint32_t key_shard_code,
-    CcHandlerResult<PostProcessResult> &hres)
+    CcHandlerResult<PostProcessResult> &hres,
+    int32_t partition_id,
+    bool on_dirty_range)
 {
     CcMessage send_msg;
 
@@ -194,6 +196,8 @@ void txservice::remote::RemoteCcHandler::PostWrite(
     post_commit->set_commit_ts(commit_ts);
     post_commit->set_operation_type(static_cast<uint32_t>(operation_type));
     post_commit->set_key_shard_code(key_shard_code);
+    post_commit->set_partition_id(partition_id);
+    post_commit->set_on_dirty_range(on_dirty_range);
 
     stream_sender_.SendMessageToNg(cce_addr.NodeGroupId(), send_msg, &hres);
 }
@@ -720,20 +724,15 @@ void txservice::remote::RemoteCcHandler::ScanNext(
 
     CcScanner &scanner = *hd_res.Value().ccm_scanner_;
 
-    scan_slice->clear_prior_cce_lock_vec();
+    scan_slice->clear_prior_cce_lock();
     // When the cc ng term is greater than 0, this scan resumes the last scan in
     // the range. Sets the cc entry addresses where last scan stops.
     if (cc_ng_term > 0)
     {
-        uint32_t remote_core_cnt = scanner.ShardCount();
-
-        for (uint32_t core_id = 0; core_id < remote_core_cnt; ++core_id)
-        {
-            ScanCache *cache = scanner.Cache(core_id);
-            const ScanTuple *last_tuple = cache->LastTuple();
-            scan_slice->add_prior_cce_lock_vec(
-                last_tuple != nullptr ? last_tuple->cce_addr_.CceLockPtr() : 0);
-        }
+        ScanCache *cache = scanner.Cache(0);
+        const ScanTuple *last_tuple = cache->LastTuple();
+        scan_slice->set_prior_cce_lock(
+            last_tuple != nullptr ? last_tuple->cce_addr_.CceLockPtr() : 0);
 
         scanner.ResetCaches();
     }
diff --git a/tx_service/src/remote/remote_cc_request.cpp b/tx_service/src/remote/remote_cc_request.cpp
index 32fbb935..7b24630b 100644
--- a/tx_service/src/remote/remote_cc_request.cpp
+++ b/tx_service/src/remote/remote_cc_request.cpp
@@ -594,7 +594,9 @@ void txservice::remote::RemotePostWrite::Reset(
             rec_str,
             static_cast<OperationType>(post_commit.operation_type()),
             post_commit.key_shard_code(),
-            &cc_res_);
+            &cc_res_,
+            post_commit.partition_id(),
+            post_commit.on_dirty_range());
     }
     else
     {
@@ -1317,7 +1319,6 @@ bool txservice::remote::RemoteScanNextBatch::EndKeyInclusive()
 
 txservice::remote::RemoteScanSlice::RemoteScanSlice()
 {
-    parallel_req_ = true;
     res_ = &cc_res_;
 
     cc_res_.Value().is_local_ = false;
@@ -1359,8 +1360,8 @@ txservice::remote::RemoteScanSlice::RemoteScanSlice()
 
         const RangeScanSliceResult &slice_result = cc_res_.Value();
         output_msg_.clear_last_key();
-        auto [last_key, key_set] = slice_result.PeekLastKey();
-        assert(key_set || cc_res_.IsError());
+        const TxKey *last_key = slice_result.LastKey();
+        assert(last_key != nullptr || cc_res_.IsError());
         // Only sends back the last key if this scan batch is not the last. The
         // next scan batch will use this last key as the beginning of the next
         // batch.
@@ -1376,95 +1377,69 @@ txservice::remote::RemoteScanSlice::RemoteScanSlice()
         output_msg_.set_slice_position(
             ToRemoteType::ConvertSlicePosition(slice_result.slice_position_));
 
-        uint16_t core_cnt = GetShardCount();
-        // Add core cnt first
-        output_msg_.mutable_tuple_cnt()->append((const char *) &core_cnt,
-                                                sizeof(uint16_t));
-        // Add tuple count for each core
-        for (size_t idx = 0; idx < core_cnt; ++idx)
-        {
-            size_t tuple_cnt;
-            if (send_cache)
-            {
-                tuple_cnt = scan_cache_vec_[idx].rec_status_.size();
-            }
-            else
-            {
-                tuple_cnt = 0;
-            }
-            output_msg_.mutable_tuple_cnt()->append((const char *) &tuple_cnt,
-                                                    sizeof(size_t));
-        }
+        // Add tuple count
+        size_t tuple_cnt =
+            send_cache ? slice_result.remote_scan_caches_->Size() : 0;
+        output_msg_.mutable_tuple_cnt()->append((const char *) &tuple_cnt,
+                                                sizeof(size_t));
 
         if (send_cache)
         {
-            // Merge scan cache info into a single byte array to reduce
-            // deserialization time on the receiver side.
-            for (size_t idx = 0; idx < core_cnt; ++idx)
-            {
-                RemoteScanSliceCache &cache = scan_cache_vec_[idx];
-
-                size_t keys_start_offset = output_msg_.keys().size();
-                output_msg_.mutable_key_start_offsets()->append(
-                    (const char *) &keys_start_offset, sizeof(size_t));
-                size_t record_start_offset = output_msg_.records().size();
-                output_msg_.mutable_record_start_offsets()->append(
-                    (const char *) &record_start_offset, sizeof(size_t));
+            output_msg_.mutable_keys()->append(scan_cache_.keys_);
 
-                output_msg_.mutable_keys()->append(cache.keys_);
-
-                if (cache.archive_positions_.size() > 0)
+            if (scan_cache_.archive_positions_.size() > 0)
+            {
+                // Merge the backfilled archive records.
+                size_t rec_offset = 0;
+                for (size_t j = 0; j < scan_cache_.archive_positions_.size();
+                     j++)
                 {
-                    // Merge the backfilled archive records.
-                    size_t rec_offset = 0;
-                    for (size_t j = 0; j < cache.archive_positions_.size(); j++)
-                    {
-                        output_msg_.mutable_records()->append(
-                            cache.records_.data() + rec_offset,
-                            cache.records_.data() +
-                                cache.archive_positions_[j].second);
-                        rec_offset = cache.archive_positions_[j].second;
-                        assert(cache.archive_records_[j].size() > 0);
-                        output_msg_.mutable_records()->append(
-                            cache.archive_records_[j]);
-                    }
                     output_msg_.mutable_records()->append(
-                        cache.records_.data() + rec_offset,
-                        cache.records_.data() + cache.records_.size());
-                }
-                else
-                {
-                    output_msg_.mutable_records()->append(cache.records_);
+                        scan_cache_.records_.data() + rec_offset,
+                        scan_cache_.records_.data() +
+                            scan_cache_.archive_positions_[j].second);
+                    rec_offset = scan_cache_.archive_positions_[j].second;
+                    assert(scan_cache_.archive_records_[j].size() > 0);
+                    output_msg_.mutable_records()->append(
+                        scan_cache_.archive_records_[j]);
                 }
-
-                output_msg_.mutable_key_ts()->append(
-                    (const char *) cache.key_ts_.data(),
-                    cache.key_ts_.size() * sizeof(uint64_t));
-                output_msg_.mutable_gap_ts()->append(
-                    (const char *) cache.gap_ts_.data(),
-                    cache.gap_ts_.size() * sizeof(uint64_t));
-                output_msg_.mutable_term()->append(
-                    (const char *) cache.term_.data(),
-                    cache.term_.size() * sizeof(uint64_t));
-                output_msg_.mutable_cce_lock_ptr()->append(
-                    (const char *) cache.cce_lock_ptr_.data(),
-                    cache.cce_lock_ptr_.size() * sizeof(uint64_t));
-                output_msg_.mutable_rec_status()->append(
-                    (const char *) cache.rec_status_.data(),
-                    cache.rec_status_.size() * sizeof(RecordStatusType));
-
-                output_msg_.mutable_trailing_cnts()->append(
-                    (const char *) &cache.trailing_cnt_, sizeof(size_t));
+                output_msg_.mutable_records()->append(
+                    scan_cache_.records_.data() + rec_offset,
+                    scan_cache_.records_.data() + scan_cache_.records_.size());
             }
+            else
+            {
+                output_msg_.mutable_records()->append(scan_cache_.records_);
+            }
+
+            output_msg_.mutable_key_ts()->append(
+                (const char *) scan_cache_.key_ts_.data(),
+                scan_cache_.key_ts_.size() * sizeof(uint64_t));
+            output_msg_.mutable_gap_ts()->append(
+                (const char *) scan_cache_.gap_ts_.data(),
+                scan_cache_.gap_ts_.size() * sizeof(uint64_t));
+            output_msg_.mutable_term()->append(
+                (const char *) scan_cache_.term_.data(),
+                scan_cache_.term_.size() * sizeof(uint64_t));
+            output_msg_.mutable_cce_lock_ptr()->append(
+                (const char *) scan_cache_.cce_lock_ptr_.data(),
+                scan_cache_.cce_lock_ptr_.size() * sizeof(uint64_t));
+            output_msg_.mutable_rec_status()->append(
+                (const char *) scan_cache_.rec_status_.data(),
+                scan_cache_.rec_status_.size() * sizeof(RecordStatusType));
         }
         const ScanSliceRequest &req = input_msg_->scan_slice_req();
+        uint32_t range_id = req.range_id();
+        uint32_t core_id =
+            (range_id & 0x3FF) % Sharder::Instance().GetLocalCcShardsCount();
+        output_msg_.set_core_id(core_id);
         hd_->SendScanRespToNode(req.src_node_id(), output_msg_, false);
         hd_->RecycleCcMsg(std::move(input_msg_));
     };
 }
 
 void txservice::remote::RemoteScanSlice::Reset(
-    std::unique_ptr<CcMessage> input_msg, uint16_t core_cnt)
+    std::unique_ptr<CcMessage> input_msg)
 {
     assert(input_msg->has_scan_slice_req());
 
@@ -1508,30 +1483,13 @@ void txservice::remote::RemoteScanSlice::Reset(
     output_msg_.set_tx_term(input_msg->tx_term());
     output_msg_.set_command_id(input_msg->command_id());
 
-    SetShardCount(core_cnt);
-
-    size_t vec_size = scan_slice_req.prior_cce_lock_vec_size();
-    for (size_t core_id = 0; core_id < core_cnt; ++core_id)
-    {
-        uint64_t cce_lock_addr =
-            core_id < vec_size ? scan_slice_req.prior_cce_lock_vec(core_id) : 0;
-        SetPriorCceLockAddr(cce_lock_addr, core_id);
-    }
+    uint64_t cce_lock_addr = scan_slice_req.prior_cce_lock();
+    SetPriorCceLockAddr(cce_lock_addr);
 
     RangeScanSliceResult &slice_result = cc_res_.Value();
 
-    for (uint16_t core_id = 0; core_id < core_cnt; ++core_id)
-    {
-        if (core_id == scan_cache_vec_.size())
-        {
-            scan_cache_vec_.emplace_back(core_cnt);
-        }
-        else
-        {
-            scan_cache_vec_[core_id].Reset(core_cnt);
-        }
-    }
-    slice_result.remote_scan_caches_ = &scan_cache_vec_;
+    scan_cache_.Reset();
+    slice_result.remote_scan_caches_ = &scan_cache_;
 
     input_msg_ = std::move(input_msg);
 
diff --git a/tx_service/src/sk_generator.cpp b/tx_service/src/sk_generator.cpp
index e3fc928e..01ff589e 100644
--- a/tx_service/src/sk_generator.cpp
+++ b/tx_service/src/sk_generator.cpp
@@ -324,7 +324,6 @@ void SkGenerator::ScanAndEncodeIndex(const TxKey *start_key,
                                           scan_ts_,
                                           node_group_id_,
                                           ng_term,
-                                          core_cnt,
                                           scan_batch_size_,
                                           tx_number,
                                           start_key,
@@ -336,12 +335,7 @@ void SkGenerator::ScanAndEncodeIndex(const TxKey *start_key,
     CcErrorCode scan_res = CcErrorCode::NO_ERROR;
     bool scan_data_drained = false;
     bool scan_pk_finished = false;
-    std::vector<TxKey> last_finished_pos;
-    last_finished_pos.reserve(core_cnt);
-    for (size_t i = 0; i < core_cnt; ++i)
-    {
-        last_finished_pos.emplace_back(start_key->Clone());
-    }
+    TxKey last_finished_pos = start_key->Clone();
 
     TxKey target_key;
     const TxRecord *target_rec = nullptr;
@@ -355,11 +349,8 @@ void SkGenerator::ScanAndEncodeIndex(const TxKey *start_key,
     {
         batch_tuples = 0;
 
-        uint32_t core_rand = butil::fast_rand();
-        // The scan slice request is dispatched to the first core. The first
-        // core tries to pin the slice if necessary and if succeeds, further
-        // dispatches the request to remaining cores for parallel scans.
-        cc_shards->EnqueueToCcShard(core_rand % core_cnt, &scan_req);
+        uint16_t dest_core = (partition_id_ & 0x3FF) % cc_shards->Count();
+        cc_shards->EnqueueToCcShard(dest_core, &scan_req);
         scan_req.Wait();
 
         if (scan_req.IsError())
@@ -381,17 +372,14 @@ void SkGenerator::ScanAndEncodeIndex(const TxKey *start_key,
             {
                 std::this_thread::sleep_for(std::chrono::seconds(30));
                 // Reset the paused key.
-                for (size_t i = 0; i < core_cnt; ++i)
+                const TxKey &paused_key = scan_req.PausePos().first;
+                if (!scan_req.IsDrained())
                 {
-                    const TxKey &paused_key = scan_req.PausePos(i).first;
-                    if (!scan_req.IsDrained(i))
-                    {
-                        // Should use one copy of the key, instead of move the
-                        // ownership of the key, because this round of scan may
-                        // failed again.
-                        assert(paused_key.IsOwner());
-                        paused_key.Copy(last_finished_pos[i]);
-                    }
+                    // Should use one copy of the key, instead of move the
+                    // ownership of the key, because this round of scan may
+                    // failed again.
+                    assert(paused_key.IsOwner());
+                    paused_key.Copy(last_finished_pos);
                 }
                 scan_req.Reset();
                 scan_pk_finished = false;
@@ -431,71 +419,63 @@ void SkGenerator::ScanAndEncodeIndex(const TxKey *start_key,
             }
             sk_encoder = sk_encoder_vec_[vec_idx].get();
 
-            for (size_t core_idx = 0; core_idx < core_cnt; ++core_idx)
+            for (size_t key_idx = 0; key_idx < scan_req.accumulated_scan_cnt_;
+                 ++key_idx)
             {
-                for (size_t key_idx = 0;
-                     key_idx < scan_req.accumulated_scan_cnt_.at(core_idx);
-                     ++key_idx)
+                auto &tuple = scan_req.DataSyncVec().at(key_idx);
+                target_key = tuple.Key();
+                target_rec = tuple.Payload();
+                version_ts = tuple.commit_ts_;
+                if (tuple.payload_status_ == RecordStatus::Deleted)
                 {
-                    auto &tuple = scan_req.DataSyncVec(core_idx).at(key_idx);
-                    target_key = tuple.Key();
-                    target_rec = tuple.Payload();
-                    version_ts = tuple.commit_ts_;
-                    if (tuple.payload_status_ == RecordStatus::Deleted)
-                    {
-                        // Skip the deleted record.
-                        continue;
-                    }
-                    assert(target_key.KeyPtr() != nullptr &&
-                           target_rec != nullptr);
+                    // Skip the deleted record.
+                    continue;
+                }
+                assert(target_key.KeyPtr() != nullptr && target_rec != nullptr);
 
-                    int32_t appended_sk_size = sk_encoder->AppendPackedSk(
-                        &target_key, target_rec, version_ts, index_set);
-                    if (appended_sk_size < 0)
-                    {
-                        LOG(ERROR)
-                            << "ScanAndEncodeIndex: Failed to encode "
-                            << "key for index: " << tbl_name_it->StringView()
-                            << "of ng#" << node_group_id_;
-                        // Finish the pack sk operation
-                        task_result_ = CcErrorCode::PACK_SK_ERR;
-                        pack_sk_err_ = std::move(sk_encoder->GetError());
-                        return;
-                    }
-                } /* End of each key */
+                int32_t appended_sk_size = sk_encoder->AppendPackedSk(
+                    &target_key, target_rec, version_ts, index_set);
+                if (appended_sk_size < 0)
+                {
+                    LOG(ERROR) << "ScanAndEncodeIndex: Failed to encode "
+                               << "key for index: " << tbl_name_it->StringView()
+                               << "of ng#" << node_group_id_;
+                    // Finish the pack sk operation
+                    task_result_ = CcErrorCode::PACK_SK_ERR;
+                    pack_sk_err_ = std::move(sk_encoder->GetError());
+                    return;
+                }
+            } /* End of each key */
 
-                if (tbl_name_it == new_indexes_name_->cbegin())
+            if (tbl_name_it == new_indexes_name_->cbegin())
+            {
+                batch_tuples += scan_req.accumulated_scan_cnt_;
+                if (batch_tuples % 10240 == 0 &&
+                    !task_status_->CheckTxTermStatus())
+                {
+                    LOG(WARNING)
+                        << "ScanAndEncodeIndex: Terminate this task cause "
+                        << "the tx leader transferred of ng#" << node_group_id_;
+                    task_status_->TerminateGenerateSk();
+                    task_result_ = CcErrorCode::TX_NODE_NOT_LEADER;
+                    return;
+                }
+                // Update the last finished key.
+                auto &paused_key = scan_req.PausePos().first;
+                if (!scan_req.IsDrained())
                 {
-                    batch_tuples += scan_req.accumulated_scan_cnt_.at(core_idx);
-                    if (batch_tuples % 10240 == 0 &&
-                        !task_status_->CheckTxTermStatus())
+                    if (last_finished_pos.IsOwner())
                     {
-                        LOG(WARNING)
-                            << "ScanAndEncodeIndex: Terminate this task cause "
-                            << "the tx leader transferred of ng#"
-                            << node_group_id_;
-                        task_status_->TerminateGenerateSk();
-                        task_result_ = CcErrorCode::TX_NODE_NOT_LEADER;
-                        return;
+                        last_finished_pos.Copy(paused_key);
                     }
-                    // Update the last finished key.
-                    auto &paused_key = scan_req.PausePos(core_idx).first;
-                    if (!scan_req.IsDrained(core_idx))
+                    else
                     {
-                        if (last_finished_pos[core_idx].IsOwner())
-                        {
-                            last_finished_pos[core_idx].Copy(paused_key);
-                        }
-                        else
-                        {
-                            last_finished_pos[core_idx] = paused_key.Clone();
-                        }
+                        last_finished_pos = paused_key.Clone();
                     }
-                    // If the data is drained
-                    scan_data_drained =
-                        scan_req.IsDrained(core_idx) && scan_data_drained;
                 }
-            } /* End of each core */
+                // If the data is drained
+                scan_data_drained = scan_req.IsDrained();
+            }
         } /* End of foreach new_indexes_name */
 
         scan_pk_finished = scan_data_drained;
@@ -680,37 +660,41 @@ CcErrorCode UploadIndexContext::UploadIndexInternal(
     size_t finished_upload_count = 0;
     CcErrorCode upload_res_code = CcErrorCode::NO_ERROR;
     size_t upload_req_count = 0;
+
     for (auto &[table_name, ng_entries] : ng_index_set)
     {
-        for (auto &[ng_id, entry_vec] : ng_entries)
+        for (auto &[ng_id, range_entries] : ng_entries)
         {
-            entry_vec_size = entry_vec.size();
-            batch_req_cnt = (entry_vec_size / upload_batch_size_ +
-                             (entry_vec_size % upload_batch_size_ ? 1 : 0));
-
             int64_t &expected_term = leader_terms_.at(ng_id);
 
-            size_t start_idx = 0;
-            size_t end_idx =
-                (batch_req_cnt > 1 ? upload_batch_size_ : entry_vec_size);
-            for (size_t idx = 0; idx < batch_req_cnt; ++idx)
+            for (auto &[range_id, entry_vec] : range_entries)
             {
-                SendIndexes(table_name,
-                            ng_id,
-                            expected_term,
-                            entry_vec,
-                            (end_idx - start_idx),
-                            start_idx,
-                            req_mux,
-                            req_cv,
-                            finished_upload_count,
-                            upload_res_code);
-                ++upload_req_count;
-                // Next batch
-                start_idx = end_idx;
-                end_idx = ((start_idx + upload_batch_size_) > entry_vec_size
-                               ? entry_vec_size
-                               : (start_idx + upload_batch_size_));
+                entry_vec_size = entry_vec.size();
+                batch_req_cnt = (entry_vec_size / upload_batch_size_ +
+                                 (entry_vec_size % upload_batch_size_ ? 1 : 0));
+
+                size_t start_idx = 0;
+                size_t end_idx =
+                    (batch_req_cnt > 1 ? upload_batch_size_ : entry_vec_size);
+                for (size_t idx = 0; idx < batch_req_cnt; ++idx)
+                {
+                    SendIndexes(table_name,
+                                ng_id,
+                                expected_term,
+                                range_id,
+                                entry_vec,
+                                (end_idx - start_idx),
+                                start_idx,
+                                req_mux,
+                                req_cv,
+                                finished_upload_count,
+                                upload_res_code);
+                    ++upload_req_count;
+                    start_idx = end_idx;
+                    end_idx = ((start_idx + upload_batch_size_) > entry_vec_size
+                                   ? entry_vec_size
+                                   : (start_idx + upload_batch_size_));
+                }
             }
         }
     }
@@ -730,7 +714,8 @@ void UploadIndexContext::SendIndexes(
     const TableName &table_name,
     NodeGroupId dest_ng_id,
     int64_t &ng_term,
-    const std::vector<WriteEntry *> &write_entry_vec,
+    int32_t partition_id,
+    const std::vector<std::pair<uint8_t, WriteEntry *>> &write_entry_vec,
     size_t batch_size,
     size_t start_key_idx,
     bthread::Mutex &req_mux,
@@ -740,14 +725,13 @@ void UploadIndexContext::SendIndexes(
 {
     uint32_t dest_node_id = Sharder::Instance().LeaderNodeId(dest_ng_id);
     LocalCcShards *cc_shards = Sharder::Instance().GetLocalCcShards();
-    size_t core_cnt = cc_shards->Count();
     if (dest_node_id == cc_shards->NodeId())
     {
         UploadBatchCc *req_ptr = NextRequest();
         req_ptr->Reset(table_name,
                        dest_ng_id,
                        ng_term,
-                       core_cnt,
+                       partition_id,
                        batch_size,
                        start_key_idx,
                        write_entry_vec,
@@ -757,10 +741,9 @@ void UploadIndexContext::SendIndexes(
                        res_code,
                        UploadBatchType::SkIndexData);
 
-        for (size_t core = 0; core < core_cnt; ++core)
-        {
-            cc_shards->EnqueueToCcShard(core, req_ptr);
-        }
+        uint16_t dest_core =
+            static_cast<uint16_t>((partition_id & 0x3FF) % cc_shards->Count());
+        cc_shards->EnqueueToCcShard(dest_core, req_ptr);
     }
     else
     {
@@ -834,6 +817,7 @@ void UploadIndexContext::SendIndexes(
             remote::ToRemoteType::ConvertTableType(table_name.Type()));
         req_ptr->set_table_engine(
             remote::ToRemoteType::ConvertTableEngine(table_name.Engine()));
+        req_ptr->set_partition_id(partition_id);
         size_t end_key_idx = start_key_idx + batch_size;
         req_ptr->set_kind(remote::UploadBatchKind::SK_DATA);
         req_ptr->set_batch_size(batch_size);
@@ -853,15 +837,24 @@ void UploadIndexContext::SendIndexes(
         std::string *rec_status_str = req_ptr->mutable_rec_status();
         // All generated sk should be normal status.
         const RecordStatus rec_status = RecordStatus::Normal;
+        // range_size_flags
+        req_ptr->clear_range_size_flags();
+        std::string *range_size_flags_str = req_ptr->mutable_range_size_flags();
+
         for (size_t idx = start_key_idx; idx < end_key_idx; ++idx)
         {
-            write_entry_vec.at(idx)->key_.Serialize(*keys_str);
-            write_entry_vec.at(idx)->rec_->Serialize(*recs_str);
-            val_ptr = reinterpret_cast<const char *>(
-                &(write_entry_vec.at(idx)->commit_ts_));
+            uint8_t range_size_flags = write_entry_vec.at(idx).first;
+            WriteEntry *write_entry = write_entry_vec.at(idx).second;
+            write_entry->key_.Serialize(*keys_str);
+            write_entry->rec_->Serialize(*recs_str);
+            val_ptr =
+                reinterpret_cast<const char *>(&(write_entry->commit_ts_));
             commit_ts_str->append(val_ptr, len_sizeof);
             rec_status_str->append(reinterpret_cast<const char *>(&rec_status),
                                    sizeof(rec_status));
+            range_size_flags_str->append(
+                reinterpret_cast<const char *>(&range_size_flags),
+                sizeof(range_size_flags));
         }
 
         brpc::Controller *cntl_ptr = upload_batch_closure->Controller();
@@ -989,17 +982,24 @@ void UploadIndexContext::AdvanceWriteEntryForRangeInfo(
     size_t new_range_idx = 0;
 
     auto *range_info = range_record.GetRangeInfo();
+    const int32_t range_id = range_info->PartitionId();
+    const uint8_t default_flags =
+        0x10 | static_cast<uint8_t>(range_info->IsDirty());
     while (cur_write_entry_it != next_range_start)
     {
         WriteEntry &write_entry = *cur_write_entry_it;
-        auto ng_it = ng_write_entrys.try_emplace(range_ng);
-        ng_it.first->second.push_back(&write_entry);
 
+        auto &range_vec = ng_write_entrys[range_ng][range_id];
+        range_vec.emplace_back(default_flags, &write_entry);
+        uint8_t *old_range_flags_ptr = &range_vec.back().first;
+
+        uint8_t *new_bucket_flags_ptr = nullptr;
         // If current range is migrating, forward to new range owner.
         if (new_bucket_ng != UINT32_MAX)
         {
-            ng_write_entrys.try_emplace(new_bucket_ng)
-                .first->second.push_back(&write_entry);
+            auto &new_bucket_vec = ng_write_entrys[new_bucket_ng][range_id];
+            new_bucket_vec.emplace_back(default_flags, &write_entry);
+            new_bucket_flags_ptr = &new_bucket_vec.back().first;
         }
 
         // If range is splitting and the key will fall on a new range after
@@ -1016,18 +1016,25 @@ void UploadIndexContext::AdvanceWriteEntryForRangeInfo(
         }
         if (new_range_ng != UINT32_MAX)
         {
-            if (new_range_ng != range_ng)
-            {
-                ng_write_entrys.try_emplace(new_range_ng)
-                    .first->second.push_back(&write_entry);
-            }
+            const int32_t new_range_id =
+                range_info->NewPartitionId()->at(new_range_idx - 1);
+
+            ng_write_entrys[new_range_ng][new_range_id].emplace_back(
+                default_flags, &write_entry);
+            // Only update range size on the new range
+            *old_range_flags_ptr &= 0x0F;
+
             // If the new range is migrating, forward to the new owner of new
             // range.
             if (new_range_new_bucket_ng != UINT32_MAX &&
                 new_range_new_bucket_ng != range_ng)
             {
-                ng_write_entrys.try_emplace(new_range_new_bucket_ng)
-                    .first->second.push_back(&write_entry);
+                ng_write_entrys[new_range_new_bucket_ng][new_range_id]
+                    .emplace_back(default_flags, &write_entry);
+                if (new_bucket_flags_ptr)
+                {
+                    *new_bucket_flags_ptr &= 0x0F;
+                }
             }
         }
 
diff --git a/tx_service/src/tx_execution.cpp b/tx_service/src/tx_execution.cpp
index 6e80dbba..f46fb46a 100644
--- a/tx_service/src/tx_execution.cpp
+++ b/tx_service/src/tx_execution.cpp
@@ -1963,13 +1963,14 @@ void TransactionExecution::Process(ReadOperation &read)
                     // error to the tx read request.
                     assert(!lock_range_bucket_result_.IsError());
 
-                    // Uses the lower 10 bits of the key's hash code to shard
-                    // the key across CPU cores in a cc node.
-                    uint32_t residual = key.Hash() & 0x3FF;
+                    // Uses the partition id to shard the key across CPU cores
+                    // in a cc node.
+                    partition_id = range_rec_.GetRangeInfo()->PartitionId();
+                    uint32_t residual =
+                        static_cast<uint32_t>((partition_id & 0x3FF));
                     NodeGroupId range_ng =
                         range_rec_.GetRangeOwnerNg()->BucketOwner();
                     key_shard_code = range_ng << 10 | residual;
-                    partition_id = range_rec_.GetRangeInfo()->PartitionId();
                 }
             }
             else
@@ -4611,12 +4612,17 @@ bool TransactionExecution::FillDataLogRequest(WriteToLogOp &write_log)
                 // ngs, write log for both ngs.
                 uint32_t forward_ng_id =
                     Sharder::Instance().ShardToCcNodeGroup(forward_shard_code);
-                auto table_rec_it = ng_table_set.try_emplace(forward_ng_id);
+                auto [table_rec_it, inserted] =
+                    ng_table_set.try_emplace(forward_ng_id);
+                if (!inserted)
+                {
+                    continue;
+                }
                 std::unordered_map<
                     TableName,
                     std::vector<
                         std::pair<const TxKey *, const WriteSetEntry *>>>
-                    &table_rec_set = table_rec_it.first->second.second;
+                    &table_rec_set = table_rec_it->second.second;
 
                 auto rec_vec_it = table_rec_set.emplace(
                     std::piecewise_construct,
@@ -5288,6 +5294,7 @@ void TransactionExecution::Process(PostProcessOp &post_process)
         {
             for (const auto &[key, write_entry] : pair.second)
             {
+                bool on_dirty_range = write_entry.on_dirty_range_;
                 CcReqStatus ret =
                     cc_handler_->PostWrite(tx_number,
                                            tx_term_,
@@ -5297,10 +5304,12 @@ void TransactionExecution::Process(PostProcessOp &post_process)
                                            write_entry.rec_.get(),
                                            write_entry.op_,
                                            write_entry.key_shard_code_,
-                                           post_process.hd_result_);
+                                           post_process.hd_result_,
+                                           write_entry.partition_id_,
+                                           on_dirty_range);
                 update_post_cnt(ret);
 
-                for (auto &[forward_shard_code, cce_addr] :
+                for (auto &[forward_shard_code, forward_pair] :
                      write_entry.forward_addr_)
                 {
                     CcReqStatus ret =
@@ -5308,11 +5317,13 @@ void TransactionExecution::Process(PostProcessOp &post_process)
                                                tx_term_,
                                                command_id,
                                                commit_ts_,
-                                               cce_addr,
+                                               forward_pair.second,
                                                write_entry.rec_.get(),
                                                write_entry.op_,
                                                forward_shard_code,
-                                               post_process.hd_result_);
+                                               post_process.hd_result_,
+                                               forward_pair.first,
+                                               on_dirty_range);
                     update_post_cnt(ret);
                 }
             }
@@ -5394,9 +5405,10 @@ void TransactionExecution::Process(PostProcessOp &post_process)
                     // Keys that were not successfully locked in the cc
                     // map do not need post-processing.
 
-                    for (const auto &[forward_shard_code, cce_addr] :
+                    for (const auto &[forward_shard_code, forward_pair] :
                          write_entry.forward_addr_)
                     {
+                        const CcEntryAddr &cce_addr = forward_pair.second;
                         if (cce_addr.Term() >= 0)
                         {
                             assert(!cce_addr.Empty());
@@ -7763,17 +7775,19 @@ void TransactionExecution::Process(BatchReadOperation &batch_read_op)
         TxRecord &rec = *read_batch[idx].record_;
 
         uint32_t sharding_code = 0;
-        size_t key_hash = key.Hash();
-        sharding_code =
-            read_batch[idx].cce_addr_.NodeGroupId() << 10 | (key_hash & 0x3FF);
         int32_t partition_id = -1;
         if (table_name.IsHashPartitioned())
         {
+            size_t key_hash = key.Hash();
+            sharding_code = read_batch[idx].cce_addr_.NodeGroupId() << 10 |
+                            (key_hash & 0x3FF);
             partition_id = Sharder::MapKeyHashToHashPartitionId(key_hash);
         }
         else
         {
             partition_id = batch_read_op.range_ids_[idx];
+            sharding_code = read_batch[idx].cce_addr_.NodeGroupId() << 10 |
+                            (partition_id & 0x3FF);
         }
         cc_handler_->Read(
             table_name,
diff --git a/tx_service/src/tx_operation.cpp b/tx_service/src/tx_operation.cpp
index 926ff090..275309ae 100644
--- a/tx_service/src/tx_operation.cpp
+++ b/tx_service/src/tx_operation.cpp
@@ -464,19 +464,20 @@ void AcquireWriteOperation::AggregateAcquiredKeys(TransactionExecution *txm)
             }
         }
 
-        for (auto &[forward_shard_code, cce_addr] : write_entry->forward_addr_)
+        for (auto &[forward_shard_code, forward_pair] :
+             write_entry->forward_addr_)
         {
             AcquireKeyResult &acquire_key_res = acquire_key_vec[res_idx++];
             CcEntryAddr &addr = acquire_key_res.cce_addr_;
             term = addr.Term();
             if (term < 0)
             {
-                cce_addr.SetCceLock(0, -1, 0);
+                forward_pair.second.SetCceLock(0, -1, 0);
             }
             else if (acquire_key_res.commit_ts_ == 0)
             {
                 // acqurie write failed on forward addr.
-                cce_addr.SetCceLock(0, -1, 0);
+                forward_pair.second.SetCceLock(0, -1, 0);
                 // Set term to -1 so that post write will not be sent to this
                 // addr.
                 addr.SetTerm(-1);
@@ -485,7 +486,7 @@ void AcquireWriteOperation::AggregateAcquiredKeys(TransactionExecution *txm)
             {
                 // Assigns to the write entry the cc entry address obtained
                 // in the acquire phase.
-                cce_addr = addr;
+                forward_pair.second = addr;
             }
 
             // No need to dedup forwarded req since they are not visible to read
@@ -720,17 +721,23 @@ void LockWriteRangeBucketsOp::Advance(TransactionExecution *txm)
         size_t new_range_idx = 0;
 
         auto *range_info = txm->range_rec_.GetRangeInfo();
+        int32_t range_id = range_info->PartitionId();
+        uint32_t residual = static_cast<uint32_t>(range_id & 0x3FF);
+        bool on_dirty_range = range_info->IsDirty();
         while (write_key_it_ != next_range_start)
         {
             const TxKey &write_tx_key = write_key_it_->first;
             WriteSetEntry &write_entry = write_key_it_->second;
-            size_t hash = write_tx_key.Hash();
-            write_entry.key_shard_code_ = (range_ng << 10) | (hash & 0x3FF);
+            write_entry.key_shard_code_ = (range_ng << 10) | residual;
+            write_entry.partition_id_ = range_id;
+            write_entry.on_dirty_range_ = on_dirty_range;
             // If current range is migrating, forward to new range owner.
             if (new_bucket_ng != UINT32_MAX)
             {
-                write_entry.forward_addr_.try_emplace((new_bucket_ng << 10) |
-                                                      (hash & 0x3FF));
+                assert(new_bucket_ng != range_ng);
+                write_entry.forward_addr_.try_emplace(
+                    ((new_bucket_ng << 10) | residual),
+                    std::make_pair(range_id, CcEntryAddr()));
             }
 
             // If range is splitting and the key will fall on a new range after
@@ -748,18 +755,47 @@ void LockWriteRangeBucketsOp::Advance(TransactionExecution *txm)
             }
             if (new_range_ng != UINT32_MAX)
             {
-                if (new_range_ng != range_ng)
+                int32_t new_range_id =
+                    range_info->NewPartitionId()->at(new_range_idx - 1);
+                uint32_t new_residual =
+                    static_cast<uint32_t>(new_range_id & 0x3FF);
+                uint16_t core_cnt =
+                    Sharder::Instance().GetLocalCcShards()->Count();
+                uint16_t new_range_shard =
+                    static_cast<uint16_t>(new_residual % core_cnt);
+                uint16_t range_shard =
+                    static_cast<uint16_t>(residual % core_cnt);
+                if (new_range_ng != range_ng || new_range_shard != range_shard)
+                {
+                    write_entry.forward_addr_.try_emplace(
+                        ((new_range_ng << 10) | new_residual),
+                        std::make_pair(new_range_id, CcEntryAddr()));
+                    // There is no need to update the range size of the old
+                    // range.
+                    write_entry.partition_id_ = -1;
+                }
+                else if (new_range_ng == range_ng &&
+                         new_range_shard == range_shard)
                 {
-                    write_entry.forward_addr_.try_emplace((new_range_ng << 10) |
-                                                          (hash & 0x3FF));
+                    // Only update the range size on the new range id in case of
+                    // the new range and the old range are located on the same
+                    // shard.
+                    write_entry.partition_id_ = new_range_id;
                 }
+
                 // If the new range is migrating, forward to the new owner of
                 // new range.
-                if (new_range_new_bucket_ng != UINT32_MAX &&
-                    new_range_new_bucket_ng != range_ng)
+                // TODO(ysw): double check the logic here.
+                if (new_range_new_bucket_ng != UINT32_MAX)
                 {
-                    write_entry.forward_addr_.try_emplace(
-                        (new_range_new_bucket_ng << 10) | (hash & 0x3FF));
+                    assert(new_range_new_bucket_ng != new_range_ng);
+                    if (new_range_new_bucket_ng != range_ng ||
+                        new_range_shard != range_shard)
+                    {
+                        write_entry.forward_addr_.try_emplace(
+                            ((new_range_new_bucket_ng << 10) | new_residual),
+                            std::make_pair(new_range_id, CcEntryAddr()));
+                    }
                 }
             }
 
@@ -4605,14 +4641,21 @@ void SplitFlushRangeOp::Forward(TransactionExecution *txm)
                 int64_t tx_term = txm->TxTerm();
                 LocalCcShards *local_shards =
                     Sharder::Instance().GetLocalCcShards();
-                // The new ranges that still lands to the same ng after split.
+                // The new ranges that still lands to the same core of same ng
+                // after split.
                 std::vector<std::pair<const TxKey *, const TxKey *>> ranges;
                 ranges.reserve(new_ranges.size());
+                uint16_t range_shard_id =
+                    static_cast<uint16_t>((range_info_->PartitionId() & 0x3FF) %
+                                          local_shards->Count());
                 for (auto iter = new_ranges.begin(); iter != new_ranges.end();
                      ++iter)
                 {
+                    uint16_t new_range_shard_id = static_cast<uint16_t>(
+                        (iter->second & 0x3FF) % local_shards->Count());
                     if (local_shards->GetRangeOwner(iter->second, node_group)
-                            ->BucketOwner() == node_group)
+                                ->BucketOwner() == node_group &&
+                        (new_range_shard_id == range_shard_id))
                     {
                         const TxKey *start_key = &(iter->first);
                         const TxKey *end_key =
@@ -5132,8 +5175,13 @@ bool SplitFlushRangeOp::ForwardKickoutIterator(TransactionExecution *txm)
             NodeGroupId new_owner = new_range_bucket_info->BucketOwner();
             NodeGroupId dirty_new_owner =
                 new_range_bucket_info->DirtyBucketOwner();
-            if (new_owner != txm->TxCcNodeId() &&
-                dirty_new_owner != txm->TxCcNodeId())
+            uint16_t range_shard_id = static_cast<uint16_t>(
+                (range_info_->PartitionId() & 0x3FF) % local_shards->Count());
+            uint16_t new_range_shard_id = static_cast<uint16_t>(
+                (kickout_data_it_->second & 0x3FF) % local_shards->Count());
+            if ((new_owner != txm->TxCcNodeId() &&
+                 dirty_new_owner != txm->TxCcNodeId()) ||
+                (range_shard_id != new_range_shard_id))
             {
                 // Note that even if the new node group falls on the same node,
                 // we still need to clean the cc entry from native ccmap since
@@ -5152,11 +5200,14 @@ bool SplitFlushRangeOp::ForwardKickoutIterator(TransactionExecution *txm)
                 }
                 kickout_old_range_data_op_.clean_type_ =
                     CleanType::CleanRangeData;
+                kickout_old_range_data_op_.range_id_ =
+                    range_info_->PartitionId();
                 kickout_old_range_data_op_.node_group_ = txm->TxCcNodeId();
                 LOG(INFO)
                     << "Split Flush transaction kickout old data in range "
                     << kickout_data_it_->second << ", original range id "
                     << range_info_->PartitionId()
+                    << ", new range id: " << kickout_data_it_->second
                     << ", txn: " << txm->TxNumber();
                 kickout_data_it_++;
                 return false;