From bf722edf7b7e90170fe8503526cc162d923a977b Mon Sep 17 00:00:00 2001 From: lixin <1037997956@qq.com> Date: Mon, 12 Jan 2026 11:46:49 +0800 Subject: [PATCH] mem: add strict mechanism in mdp Change-Id: Ic8ecdc370e8ff3eb7d99065c7b389b062a133f5e --- src/cpu/o3/BaseO3CPU.py | 1 + src/cpu/o3/mem_dep_unit.cc | 2 ++ src/cpu/o3/store_set.cc | 39 ++++++++++++++++++++++++++++++++++---- src/cpu/o3/store_set.hh | 13 +++++++++++++ 4 files changed, 51 insertions(+), 4 deletions(-) diff --git a/src/cpu/o3/BaseO3CPU.py b/src/cpu/o3/BaseO3CPU.py index b233e62af7..e072cce65b 100644 --- a/src/cpu/o3/BaseO3CPU.py +++ b/src/cpu/o3/BaseO3CPU.py @@ -197,6 +197,7 @@ def support_take_over(cls): LFSTEntrySize = Param.Unsigned(4,"The number of store table inst in every entry of LFST can contain") SSITSize = Param.Unsigned(1024, "Store set ID table size") enable_storeSet_train = Param.Bool(True, "Training store set predictor") + enable_storeSet_strict_wait = Param.Bool(True, "Enable StoreSet strict wait for loads") BankConflictCheck = Param.Bool(True, "open Bank conflict check") sbufferBankWriteAccurately = Param.Bool(False, "Sbuffer write to memory with bank conflict check") diff --git a/src/cpu/o3/mem_dep_unit.cc b/src/cpu/o3/mem_dep_unit.cc index 2fe1279a4d..ac7ce551d2 100644 --- a/src/cpu/o3/mem_dep_unit.cc +++ b/src/cpu/o3/mem_dep_unit.cc @@ -63,6 +63,7 @@ MemDepUnit::MemDepUnit(const BaseO3CPUParams ¶ms) stats(nullptr) { DPRINTF(MemDepUnit, "Creating MemDepUnit object.\n"); + depPred.setStrictWaitEnabled(params.enable_storeSet_strict_wait); } MemDepUnit::~MemDepUnit() @@ -99,6 +100,7 @@ MemDepUnit::init(const BaseO3CPUParams ¶ms, ThreadID tid, CPU *cpu) depPred.init(params.store_set_clear_period, params.store_set_clear_thres, params.SSITSize, params.LFSTSize, params.LFSTEntrySize); + depPred.setStrictWaitEnabled(params.enable_storeSet_strict_wait); std::string stats_group_name = csprintf("MemDepUnit__%i", tid); cpu->addStatGroup(stats_group_name.c_str(), &stats); diff --git a/src/cpu/o3/store_set.cc b/src/cpu/o3/store_set.cc index cce1e2061a..77989b9e16 100644 --- a/src/cpu/o3/store_set.cc +++ b/src/cpu/o3/store_set.cc @@ -54,9 +54,12 @@ StoreSet::StoreSet(uint64_t clear_period, int _SSIT_size, int _LFST_size,int _st SSIT.resize(SSITSize); validSSIT.resize(SSITSize); + SSITStrict.resize(SSITSize); - for (int i = 0; i < SSITSize; ++i) + for (int i = 0; i < SSITSize; ++i) { validSSIT[i] = false; + SSITStrict[i] = false; + } if (!isPowerOf2(LFSTSize)) { fatal("Invalid LFST size!\n"); @@ -68,6 +71,7 @@ StoreSet::StoreSet(uint64_t clear_period, int _SSIT_size, int _LFST_size,int _st validLFSTLarge.resize(LFSTSize); //validLFST.resize(LFSTSize); VictimEntryID.resize(LFSTSize); + pendingStores.clear(); for (int i = 0; i < LFSTSize; ++i) { // validLFST[i] = false; @@ -120,6 +124,7 @@ StoreSet::init(uint64_t clear_period, int clear_period_thres, int _SSIT_size, in LFSTLargePC.resize(LFSTSize); validLFSTLarge.resize(LFSTSize); VictimEntryID.resize(LFSTSize); + pendingStores.clear(); // LFST.resize(LFSTSize); @@ -170,10 +175,12 @@ StoreSet::violation(Addr store_PC, Addr load_PC) validSSIT[load_index] = true; SSIT[load_index] = ld_new_set; + SSITStrict[load_index] = false; validSSIT[store_index] = true; SSIT[store_index] = sd_new_set; + SSITStrict[store_index] = false; assert(ld_new_set < LFSTSize); assert(sd_new_set < LFSTSize); @@ -187,6 +194,7 @@ StoreSet::violation(Addr store_PC, Addr load_PC) validSSIT[store_index] = true; SSIT[store_index] = sd_new_set; + SSITStrict[store_index] = false; assert(sd_new_set < LFSTSize); @@ -200,6 +208,7 @@ StoreSet::violation(Addr store_PC, Addr load_PC) validSSIT[load_index] = true; SSIT[load_index] = ld_new_set; + SSITStrict[load_index] = false; DPRINTF(StoreSet, "StoreSet: Store had a valid store set: %i for " "load %#x, store %#x\n", @@ -213,6 +222,7 @@ StoreSet::violation(Addr store_PC, Addr load_PC) // The store set with the lower number wins if (store_SSID > load_SSID) { SSIT[store_index] = load_SSID; + SSITStrict[store_index] = false; DPRINTF(StoreSet, "StoreSet: Load had smaller store set: %i; " "for load %#x, store %#x\n", @@ -220,9 +230,7 @@ StoreSet::violation(Addr store_PC, Addr load_PC) } else { SSIT[load_index] = store_SSID; - if (store_SSID == load_SSID) { - SSITStrict[load_index] = true; - } + SSITStrict[load_index] = (store_SSID == load_SSID); DPRINTF(StoreSet, "StoreSet: Store had smaller store set: %i; " "for load %#x, store %#x\n", @@ -264,6 +272,7 @@ StoreSet::insertStore(Addr store_PC, InstSeqNum store_seq_num, ThreadID tid, Cyc // checkClear(); int victim_inst; checkClear(curCycle); + pendingStores.insert(store_seq_num); assert(index < SSITSize); if (!validSSIT[index]) { @@ -327,6 +336,16 @@ StoreSet::checkInst(Addr PC) assert(inst_SSID < LFSTSize); + if (enableStrictWait && checkInstStrict(PC)) { + vec.insert(vec.end(), + pendingStores.begin(), + pendingStores.end()); + DPRINTF(StoreSet, + "Strict inst %#x with index=%i, ssid=%i, had %lu outstanding stores\n", + PC, index, inst_SSID, vec.size()); + return vec; + } + // if (!validLFST[inst_SSID]) { // DPRINTF(StoreSet, "Inst %#x with index %i and SSID %i had no " @@ -358,6 +377,8 @@ StoreSet::issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store) return; } + pendingStores.erase(issued_seq_num); + int index = calcIndexSSIT(issued_PC); int store_SSID; @@ -398,6 +419,14 @@ StoreSet::issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store) void StoreSet::squash(InstSeqNum squashed_num, ThreadID tid) { + for (auto it = pendingStores.begin(); it != pendingStores.end();) { + if (*it > squashed_num) { + it = pendingStores.erase(it); + } else { + ++it; + } + } + for (int i=0;i squashed_num) { @@ -418,6 +447,7 @@ StoreSet::clear() { for (int i = 0; i < SSITSize; ++i) { validSSIT[i] = false; + SSITStrict[i] = false; } for (int i = 0; i < LFSTSize; ++i) { @@ -425,6 +455,7 @@ StoreSet::clear() validLFSTLarge[i][j] = false; } } + pendingStores.clear(); } diff --git a/src/cpu/o3/store_set.hh b/src/cpu/o3/store_set.hh index 573d4eea2e..203cdc697c 100644 --- a/src/cpu/o3/store_set.hh +++ b/src/cpu/o3/store_set.hh @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -103,6 +104,8 @@ class StoreSet */ std::vector checkInst(Addr PC); + void setStrictWaitEnabled(bool enable) { enableStrictWait = enable; } + /** Records this PC/sequence number as issued. */ void issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store); @@ -146,6 +149,16 @@ class StoreSet /** Bit vector to tell if the LFST has a valid entry. */ std::vector> validLFSTLarge; + /** + * Stores inserted but not yet issued/squashed/cleared. + * + * For strict loads, we conservatively wait on all outstanding stores + * tracked here. + */ + std::unordered_set pendingStores; + + bool enableStrictWait = true; + /** Map of stores that have been inserted into the store set, but * not yet issued or squashed. */