diff --git a/src/cpu/o3/issue_queue.cc b/src/cpu/o3/issue_queue.cc index ad6fa21c5b..fa2e440e6b 100644 --- a/src/cpu/o3/issue_queue.cc +++ b/src/cpu/o3/issue_queue.cc @@ -36,14 +36,6 @@ selector->deallocate(x); \ } while (0) -#define READYQ_PUSH(x) \ - do { \ - (x)->setInReadyQ(); \ - auto& readyQ = readyQclassify[(x)->opClass()]; \ - auto it = std::lower_bound(readyQ->begin(), readyQ->end(), (x), select_policy()); \ - readyQ->insert(it, (x)); \ - } while (0) - // must be consistent with FUScheduler.py // rfTypePortId = regfile typeid + portid #define MAXVAL_TYPEPORTID (1 << (2 + 4)) // [5:4] is typeid, [3:0] is portid @@ -191,21 +183,18 @@ IssueQue::IssueQue(const IssueQueParams& params) iqsize(params.size), scheduleToExecDelay(params.scheduleToExecDelay), iqname(params.name), - inflightIssues(scheduleToExecDelay, 0), selector(params.sel) { - toIssue = inflightIssues.getWire(0); - toFu = inflightIssues.getWire(-scheduleToExecDelay); if (outports > 8) { panic("%s: outports > 8 is not supported\n", iqname); } + toIssue.resize(outports); + toFu.resize(outports); portBusy.resize(outports, 0); - intRdRfTPI.resize(outports); fpRdRfTPI.resize(outports); intWrRfTPI.resize(outports); - readyQs.resize(outports, nullptr); readyQclassify.resize(Num_OpClasses, nullptr); @@ -355,7 +344,6 @@ IssueQue::addToFu(const DynInstPtr& inst) void IssueQue::issueToFu() { - int size = toFu->size; int replayed = 0; int issued = 0; @@ -393,8 +381,8 @@ IssueQue::issueToFu() issued++; } - for (int i = 0; i < size; i++) { - auto inst = toFu->pop(); + for (int i = 0; i < outports; i++) { + auto& inst = *toFu[i]; if (!inst) { continue; } @@ -409,7 +397,7 @@ IssueQue::issueToFu() (inst->isStore() && (issuedStore >= numStorePipe)) || blockLoad) { inst->clearScheduled(); // only for load/store - READYQ_PUSH(inst); + readyQInsert(inst); DPRINTF(Schedule, "[sn:%llu] issue failed due to being occupied\n", inst->seqNum); continue; } @@ -526,7 +514,7 @@ IssueQue::addIfReady(const DynInstPtr& inst) DPRINTF(Schedule, "[sn:%llu] add to readyInstsQue\n", inst->seqNum); inst->clearCancel(); if (!inst->inReadyQ()) { - READYQ_PUSH(inst); + readyQInsert(inst); } } } @@ -629,12 +617,12 @@ IssueQue::scheduleInst() iqstats->arbFailed++; assert(inst->readyToIssue()); - READYQ_PUSH(inst); + readyQInsert(inst); } else [[likely]] { DPRINTF(Schedule, "[sn:%llu] no conflict, scheduled\n", inst->seqNum); iqstats->portissued[pi]++; inst->setScheduled(); - toIssue->push(inst); + *toIssue[pi] = inst; inst->issueportid = pi; if (!opPipelined[inst->opClass()]) { @@ -661,7 +649,6 @@ IssueQue::tick() instNumInsert = 0; scheduleInst(); - inflightIssues.advance(); for (auto& t : portBusy) { t = t >> 1; @@ -774,16 +761,6 @@ IssueQue::doSquash(const InstSeqNum seqNum) } } - for (int i = 0; i <= getIssueStages(); i++) { - int size = inflightIssues[-i].size; - for (int j = 0; j < size; j++) { - auto& inst = inflightIssues[-i].insts[j]; - if (inst && inst->isSquashed()) { - inst = nullptr; - } - } - } - // clear in depGraph for (auto& entrys : subDepGraph) { for (auto it = entrys.begin(); it != entrys.end();) { @@ -853,10 +830,13 @@ Scheduler::Scheduler(const SchedulerParams& params) int maxRdTypePortId = 0; int maxWrTypePortId = 0; for (int i = 0; i < issueQues.size(); i++) { + auto iq = issueQues[i]; issueQues[i]->setIQID(i); issueQues[i]->scheduler = this; - combinedFus += issueQues[i]->outports; panic_if(issueQues[i]->fuDescs.size() == 0, "Empty config IssueQue: " + issueQues[i]->getName()); + for (int j = 0; j < iq->outports; j++) { + inflightIssues.push_back(TimeBuffer(0, iq->scheduleToExecDelay)); + } for (auto fu : issueQues[i]->fuDescs) { for (auto op : fu->opDescList) { opExecTimeTable[op->opClass] = op->opLat; @@ -899,6 +879,13 @@ Scheduler::Scheduler(const SchedulerParams& params) } wrRfPortOccupancy.resize(maxWrTypePortId, {nullptr, 0, 0}); + int portid = 0; + for (auto iq : issueQues) { + for (int i = 0; i < iq->outports; i++) { + iq->setIssuePipe(inflightIssues[portid], i); + portid++; + } + } // dispatch distance counter allocate dispOpdist.resize(Num_OpClasses, nullptr); @@ -1027,6 +1014,10 @@ Scheduler::tick() for (auto it : issueQues) { it->tick(); } + + for (auto& it : inflightIssues) { + it.advance(); + } } void @@ -1392,14 +1383,11 @@ Scheduler::loadCancel(const DynInstPtr& inst) } } - for (auto iq : issueQues) { - for (int i = 0; i <= iq->getIssueStages(); i++) { - int size = iq->inflightIssues[-i].size; - for (int j = 0; j < size; j++) { - auto& inst = iq->inflightIssues[-i].insts[j]; - if (inst && inst->canceled()) { - inst = nullptr; - } + for (auto& it : inflightIssues) { + for (int i = 0; i < it.getSize(); i++) { + auto& inst = it[i]; + if (inst && inst->canceled()) { + inst = nullptr; } } } @@ -1500,6 +1488,15 @@ Scheduler::doSquash(const InstSeqNum seqNum) for (auto it : issueQues) { it->doSquash(seqNum); } + + for (auto& it : inflightIssues) { + for (int i = 0; i < it.getSize(); i++) { + auto& inst = it[i]; + if (inst && inst->isSquashed()) { + inst = nullptr; + } + } + } } uint32_t diff --git a/src/cpu/o3/issue_queue.hh b/src/cpu/o3/issue_queue.hh index 997815e1d7..a90dbae703 100644 --- a/src/cpu/o3/issue_queue.hh +++ b/src/cpu/o3/issue_queue.hh @@ -106,12 +106,15 @@ class IssueQue : public SimObject friend class PAgeSelector; std::string _name; + public: const int inports; const int outports; const int iqsize; const int replayQsize = 32; const int scheduleToExecDelay; const std::string iqname; + + private: std::vector> portFuDescs; std::vector fuDescs; std::vector opPipelined; @@ -133,10 +136,8 @@ class IssueQue : public SimObject DynInstPtr pop(); }; - std::vector skidBuffer; - TimeBuffer inflightIssues; - TimeBuffer::wire toIssue; - TimeBuffer::wire toFu; + std::vector::wire> toIssue; + std::vector::wire> toFu; std::list instList; uint64_t instNumInsert = 0; @@ -196,9 +197,20 @@ class IssueQue : public SimObject void scheduleInst(); void addIfReady(const DynInstPtr& inst); void cancel(const DynInstPtr& inst); + inline void readyQInsert(const DynInstPtr& x) { + x->setInReadyQ(); + auto& readyQ = readyQclassify[x->opClass()]; + auto it = std::lower_bound(readyQ->begin(), readyQ->end(), x, select_policy()); + readyQ->insert(it, x); + } + public: inline void clearBusy(uint32_t pi) { portBusy.at(pi) = 0; } + inline void setIssuePipe(TimeBuffer& issuepipe, int pi) { + toIssue[pi] = issuepipe.getWire(scheduleToExecDelay); + toFu[pi] = issuepipe.getWire(0); + } IssueQue(const IssueQueParams& params); void setIQID(int id) { IQID = id; } @@ -288,11 +300,12 @@ class Scheduler : public SimObject std::vector dispTable; std::vector issueQues; std::vector> wakeMatrix; - uint32_t combinedFus; std::vector totalDispCounter; std::vector dispOpdist; + // Centralized management + std::vector> inflightIssues; std::vector instsToFu; std::vector earlyScoreboard; diff --git a/src/cpu/timebuf.hh b/src/cpu/timebuf.hh index f35f5688dd..8a6954390f 100644 --- a/src/cpu/timebuf.hh +++ b/src/cpu/timebuf.hh @@ -40,14 +40,12 @@ template class TimeBuffer { protected: - int past; - int future; - unsigned size; - int _id; - - char *data; - std::vector index; - unsigned base; + int _id = -1; + int past = 0; + int future = 0; + unsigned size = 0; + unsigned base = 0; + T* datas = nullptr; void valid(int idx) const { @@ -138,31 +136,38 @@ class TimeBuffer public: TimeBuffer(int p, int f) - : past(p), future(f), size(past + future + 1), - data(new char[size * sizeof(T)]), index(size), base(0) + : past(p), future(f), size(past + future + 1) { assert(past >= 0 && future >= 0); - char *ptr = data; + datas = (T*)new char[sizeof(T) * size]; + std::memset((void*)datas, 0, sizeof(T) * size); for (unsigned i = 0; i < size; i++) { - index[i] = ptr; - std::memset(ptr, 0, sizeof(T)); - new (ptr) T; - ptr += sizeof(T); + new (datas + i) T; } - _id = -1; } - TimeBuffer() - : data(NULL) + TimeBuffer() {} + + TimeBuffer(const TimeBuffer &other) + : _id(other._id), past(other.past), future(other.future), size(other.size), base(other.base) + { + datas = new T[size]; + for (unsigned i = 0; i < size; i++) { + datas[i] = other.datas[i]; // must use explicit copy to handle non-POD types + } + } + + TimeBuffer(TimeBuffer &&other) noexcept + : _id(other._id), past(other.past), future(other.future), size(other.size), base(other.base), datas(other.datas) { + // Null out the other datas pointer to avoid double deletion + other.datas = nullptr; } ~TimeBuffer() { - for (unsigned i = 0; i < size; ++i) - (reinterpret_cast(index[i]))->~T(); - delete [] data; + delete [] datas; } void id(int id) @@ -184,9 +189,9 @@ class TimeBuffer int ptr = base + future; if (ptr >= (int)size) ptr -= size; - (reinterpret_cast(index[ptr]))->~T(); - std::memset(index[ptr], 0, sizeof(T)); - new (index[ptr]) T; + datas[ptr].~T(); + std::memset((void*)(datas + ptr), 0, sizeof(T)); + new (datas + ptr) T; } protected: @@ -212,21 +217,21 @@ class TimeBuffer { int vector_index = calculateVectorIndex(idx); - return reinterpret_cast(index[vector_index]); + return datas + vector_index; } T &operator[](int idx) { int vector_index = calculateVectorIndex(idx); - return reinterpret_cast(*index[vector_index]); + return datas[vector_index]; } const T &operator[] (int idx) const { int vector_index = calculateVectorIndex(idx); - return reinterpret_cast(*index[vector_index]); + return datas[vector_index]; } wire getWire(int idx)