diff --git a/src/AddressMapping.cpp b/src/AddressMapping.cpp index ecf24b9..10bc426 100644 --- a/src/AddressMapping.cpp +++ b/src/AddressMapping.cpp @@ -28,11 +28,10 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *********************************************************************************/ -#include "AddressMapping.h" - #include #include +#include "AddressMapping.h" #include "SystemConfiguration.h" #include "Utils.h" @@ -49,6 +48,7 @@ AddrMapping::AddrMapping() rankBitWidth = uLog2(getConfigParam(UINT, "NUM_RANKS")); bankBitWidth = uLog2(getConfigParam(UINT, "NUM_BANKS")); bankgroupBitWidth = uLog2(getConfigParam(UINT, "NUM_BANK_GROUPS")); + //subarrayBitWidth = uLog2(getConfigParam(UINT, "NUM_SUBARRAYS")); //how we make, how about we use ... rowBitWidth = uLog2(getConfigParam(UINT, "NUM_ROWS")); colBitWidth = uLog2(getConfigParam(UINT, "NUM_COLS")); byteOffsetWidth = uLog2(getConfigParam(UINT, "JEDEC_DATA_BUS_BITS") / 8); @@ -70,6 +70,18 @@ bool AddrMapping::isSameBankgroup(int bank0, int bank1) { return bankgroupId(bank0) == bankgroupId(bank1); } +unsigned AddrMapping::findsubarray(unsigned row) +{ + if(row < 0x2000) return 0; + else if(row<0x4000) return 1; + else if(row<0x6000) return 2; + else return 3; +} + +bool AddrMapping::isSameSubarray(int row, int sub) +{ + return findsubarray(row) == sub; +} void AddrMapping::addressMapping(uint64_t physicalAddress, unsigned& newTransactionChan, unsigned& newTransactionRank, unsigned& newTransactionBank, @@ -172,7 +184,7 @@ void AddrMapping::addressMapping(uint64_t physicalAddress, unsigned& newTransact newTransactionChan = diffBitWidth(&physicalAddress, channelBitWidth); } // clone of scheme 5, but channel moved to lower bits - else if (addressMappingScheme == Scheme7) + else if (addressMappingScheme == Scheme7) //how about using this logic? { // row:col:rank:bank:chan newTransactionChan = diffBitWidth(&physicalAddress, channelBitWidth); diff --git a/src/AddressMapping.h b/src/AddressMapping.h index bb8fe81..c0a6f1b 100644 --- a/src/AddressMapping.h +++ b/src/AddressMapping.h @@ -55,14 +55,16 @@ class AddrMapping } unsigned bankgroupId(int bank); + static unsigned findsubarray(unsigned row); bool isSameBankgroup(int bank0, int bank1); - + bool isSameSubarray(int row, int sub); private: uint64_t transactionSize; uint64_t transactionMask; uint64_t channelBitWidth; uint64_t rankBitWidth; uint64_t bankBitWidth; + uint64_t subarrayBitWidth; uint64_t bankgroupBitWidth; uint64_t rowBitWidth; uint64_t colBitWidth; diff --git a/src/Bank.cpp b/src/Bank.cpp index 63644e1..0585b64 100644 --- a/src/Bank.cpp +++ b/src/Bank.cpp @@ -28,10 +28,9 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *********************************************************************************/ -#include "Bank.h" - #include +#include "Bank.h" #include "BusPacket.h" using namespace std; @@ -73,11 +72,12 @@ shared_ptr Bank::searchForRow(unsigned row, shared_ptrnext; } // if we get here, didn't find it - return NULL; + return NULL; } void Bank::read(BusPacket* busPacket) { + //cout<<"Bank::read() and busPacket->bank is "<bank<<" and row is "<row<<" and col is " <column<<" and bank's entry is "< rowHeadNode = rowEntries[busPacket->column]; shared_ptr foundNode = NULL; if ((foundNode = Bank::searchForRow(busPacket->row, rowHeadNode)) == NULL) @@ -88,46 +88,52 @@ void Bank::read(BusPacket* busPacket) *(busPacket->data) = foundNode->data; } } - +//i'd like to use this logic same as subarray.... void Bank::write(const BusPacket* busPacket) { // TODO: move all the error checking to BusPacket so once we have a bus // packet, // we know the fields are all legal - if (busPacket->column >= numCols) { - ERROR("== Error - Bus Packet column " << busPacket->column << " out of bounds"); - exit(-1); + //cout<<"== Error - Bus Packet column " << busPacket->column << " out of bounds and num_col is " <column << " out of bounds"); + //exit(-1); + return; } - // head of the list we need to search shared_ptr rowHeadNode = rowEntries[busPacket->column]; shared_ptr foundNode = NULL; - if ((foundNode = Bank::searchForRow(busPacket->row, rowHeadNode)) == NULL) - { - // not found - shared_ptr newRowNode = make_shared(); - // DataStruct* newRowNode = (DataStruct*)malloc(sizeof(DataStruct)); + uintptr_t const_addr = 0x55ffffff; + uintptr_t addr = reinterpret_cast(busPacket->data); + //cout<<"busPacket->row: "<row<<" and bank is "<bank<<" and col is "<column<const_addr) + { + if ((foundNode = Bank::searchForRow(busPacket->row, rowHeadNode)) == NULL) + { + // not found + shared_ptr newRowNode = make_shared(); + // DataStruct* newRowNode = (DataStruct*)malloc(sizeof(DataStruct)); - // insert at the head for speed - // TODO: Optimize this data structure for speedier lookups? - newRowNode->row = busPacket->row; - if (busPacket->data) - newRowNode->data = *(busPacket->data); - newRowNode->next = rowHeadNode; - rowEntries[busPacket->column] = newRowNode; - } - else - { - // found it, just plaster in the new data - foundNode->data = *(busPacket->data); - if (DEBUG_BANKS) + // insert at the head for speed + // TODO: Optimize this data structure for speedier lookups? + newRowNode->row = busPacket->row; + if(busPacket->data) + newRowNode->data = *(busPacket->data); + newRowNode->next = rowHeadNode; + rowEntries[busPacket->column] = newRowNode; + } + else { - PRINTN(" -- Bank " << busPacket->bank << " writing to physical address 0x" << hex - << busPacket->physicalAddress << dec << ":"); - busPacket->printData(); - PRINT(""); + // found it, just plaster in the new data + foundNode->data = *(busPacket->data); + if (DEBUG_BANKS) + { + PRINTN(" -- Bank " << busPacket->bank << " writing to physical address 0x" << hex + << busPacket->physicalAddress << dec << ":"); + busPacket->printData(); + PRINT(""); + } } } } diff --git a/src/Bank.h b/src/Bank.h index db8ef23..ce9c073 100644 --- a/src/Bank.h +++ b/src/Bank.h @@ -40,6 +40,7 @@ #include "BusPacket.h" #include "SimulatorObject.h" #include "SystemConfiguration.h" +#include "SubArray.h" namespace DRAMSim { @@ -51,7 +52,7 @@ class Bank BurstType data; std::shared_ptr next; } DataStruct; - + //how about use this in subarray level logic? public: // functions Bank(ostream& simLog); @@ -59,13 +60,13 @@ class Bank void read(BusPacket* busPacket); void write(const BusPacket* busPacket); BankState currentState; + unsigned numCols; private: // private member std::vector> rowEntries; ostream& dramsimLog; static std::shared_ptr searchForRow(unsigned row, std::shared_ptr head); - unsigned numCols; }; } // namespace DRAMSim diff --git a/src/BankState.h b/src/BankState.h index b850e79..d327092 100644 --- a/src/BankState.h +++ b/src/BankState.h @@ -31,6 +31,9 @@ #ifndef BANKSTATE_H #define BANKSTATE_H +#include +#include + #include "BusPacket.h" #include "SystemConfiguration.h" @@ -42,7 +45,7 @@ enum CurrentBankState RowActive, Precharging, Refreshing, - PowerDown + PowerDown, }; class BankState @@ -52,10 +55,13 @@ class BankState public: // Fields CurrentBankState currentBankState; + //vector currentBankStates; unsigned openRowAddress; + //vector openRowAddresses; //exactly subsel.... uint64_t nextRead; uint64_t nextWrite; uint64_t nextActivate; + uint64_t nextSubSel; //subsel logic with activation.... uint64_t nextPrecharge; uint64_t nextPowerUp; BusPacketType lastCommand; diff --git a/src/Burst.h b/src/Burst.h index 8009bf0..439f3d7 100644 --- a/src/Burst.h +++ b/src/Burst.h @@ -20,6 +20,7 @@ #include #include #include +#include #include "FP16.h" #include "npy.h" @@ -169,7 +170,6 @@ union BurstType fp16Data_[i] = (fp16)dis(gen); } } - string binToStr() const { stringstream ss; @@ -300,7 +300,15 @@ union BurstType return sum[0]; } - + fp16 fp16max() + { + float maxValue = convertH2F(fp16Data_[0]); + for(int i = 1; i < 16; i++) + { + maxValue = std::max(maxValue, convertH2F(fp16Data_[i])); + } + return convertF2H(maxValue); + } float fp32ReduceSum() { float sum = 0.0; @@ -340,13 +348,15 @@ union BurstType return ret; } - fp16 fp16Data_[16]; uint8_t u8Data_[32]; float fp32Data_[8]; uint32_t u32Data_[8]; uint16_t u16Data_[16]; + fp16 fp16Data_[16]; }; + + struct NumpyBurstType { vector shape; diff --git a/src/BusPacket.cpp b/src/BusPacket.cpp index fa29e89..0c64cea 100644 --- a/src/BusPacket.cpp +++ b/src/BusPacket.cpp @@ -59,6 +59,19 @@ BusPacket::BusPacket(BusPacketType packtype, uint64_t physicalAddr, unsigned col { } +/*BusPacket::BusPacket(BusPacketType packtype, uint64_t physicalAddr, unsigned col, unsigned rw, unsigned r, + unsigned b, unsigned s, BurstType* dat, ostream& simLog, std::string tg) + : dramsimLog(simLog), + BusPacketType(packtype), + column(col), + row(rw), + subarray(s), + bank(b), + physicalAddress(physicalAddr), + data(dat), + tag(tg) +{ +} //nothing help b*/ void BusPacket::print(uint64_t currentClockCycle, bool dataStart) { if (VERIFICATION_OUTPUT) diff --git a/src/BusPacket.h b/src/BusPacket.h index 15fc201..a5c65e6 100644 --- a/src/BusPacket.h +++ b/src/BusPacket.h @@ -46,7 +46,8 @@ enum BusPacketType PRECHARGE, // 3 REF, // 4 DATA, // 5 - RFCSB + RFCSB, + SUBSEL }; class BusPacket @@ -57,10 +58,12 @@ class BusPacket public: // Fields BusPacketType busPacketType; - unsigned column; + unsigned column; //important problem: buspacket has 256bit wide -->need cell logic unsigned row; + unsigned subarray; //or psub; designate psub area by unsigned bank; unsigned rank; + unsigned chan; uint64_t physicalAddress; BurstType* data; std::string tag; diff --git a/src/CSVWriter.h b/src/CSVWriter.h index df07bda..a0c9d74 100644 --- a/src/CSVWriter.h +++ b/src/CSVWriter.h @@ -34,7 +34,6 @@ #include #include #include - #include #include #include diff --git a/src/C_ALU.cpp b/src/C_ALU.cpp new file mode 100644 index 0000000..5d9e4c6 --- /dev/null +++ b/src/C_ALU.cpp @@ -0,0 +1,91 @@ +#include + +#include "C_ALU.h" +//#include "tests/KernelAddrGen.h" +#include "Burst.h" + +using namespace std; +using namespace DRAMSim; + +C_ALU::~C_ALU() +{ + delete C_REG; + delete S_REG; +} +/*void C_ALU::addertree(BurstType* result, int output_dim, int num_tile, int step, fp16* temp) //we got 16 types of things and measure +//prototype: output_dim = 1, num_tile = 16, step = 0 +{ + if (num_tile == 1) + return; + + int iter = num_tile / 2; //example we got num_tile for + if (step == 0) + { + for (int i = 0; i < iter; i++) + { + + temp[i] = result[2 * i * output_dim].fp16AdderTree() + + result[(2 * i + 1) * output_dim].fp16AdderTree(); + } + } + else + { + for (int i = 0; i < iter; i++) temp[i] = temp[i * 2] + temp[i * 2 + 1]; + + if (num_tile % 2 == 1) + temp[iter] = temp[num_tile]; + } + + adderTree(result, output_dim, ceil(double(num_tile) / (double)2), step + 1, temp); + return; +}*/ +void C_ALU::adderTree() +{ + for(int i = 0; i < 16; i++) + { + if(S_REG != nullptr) + *S_REG = *S_REG + C_REG->fp16Data_[i]; + } + return; +} +void C_ALU::accum(BurstType& burst, bool is_neg){ + if(pim_precision_==FP16){ + for(int fp = 0; fp < 16; fp++) + { + if(is_neg) C_REG->fp16Data_[fp] -= burst.fp16Data_[fp]; + else C_REG->fp16Data_[fp] += burst.fp16Data_[fp]; + } + } + else if(pim_precision_ == FP32){ + for(int fp = 0; fp < 8; fp++) + { + if(is_neg) C_REG->fp32Data_[fp] -= burst.fp32Data_[fp]; + else C_REG->fp32Data_[fp] += burst.fp16Data_[fp]; + } + + } +} +void C_ALU::zeroize() +{ + S_REG = 0; + return; +} + +void C_ALU::C_max(BurstType& src0Bst) +{ + if(pim_precision_ == FP16) + { + for(int fp = 0; fp < 16; fp++) + { + C_REG->fp16Data_[fp] = (C_REG->fp16Data_[fp] > src0Bst.fp16Data_[fp])?C_REG->fp16Data_[fp]:src0Bst.fp16Data_[fp]; + } + } +} +void C_ALU::setmax() +{ + for(int fp = 0; fp < 16; fp++) + { + S_REG = (C_REG->fp16Data_[fp] > *S_REG)?&C_REG->fp16Data_[fp]:S_REG; + } +} +//how about just put c_alu outside the bank... \ No newline at end of file diff --git a/src/C_ALU.h b/src/C_ALU.h new file mode 100644 index 0000000..f9d3c66 --- /dev/null +++ b/src/C_ALU.h @@ -0,0 +1,45 @@ +#ifndef C_ALU_H +#define C_ALU_H + +#include + +#include "Burst.h" +#include "tests/KernelAddrGen.h" +#include "SystemConfiguration.h" + +using namespace DRAMSim; +using namespace std; +class C_ALU +{ +public: + + C_ALU(){ + pim_precision_ = PIMConfiguration::getPIMPrecision(); + C_REG = new BurstType(); + S_REG = new fp16(); + }; + C_ALU(PIMPrecision pimprecision) + :pim_precision_(pimprecision) + { + C_REG = new BurstType(); + S_REG = new fp16(); + }; + ~C_ALU(); + void AdderTree(fp16 C_REG); //use from kernel.cpp + void load(); //how? + void accum(BurstType& burst, bool is_neg); + void adderTree(); + void zeroize(); + void C_max(BurstType& src0Bst); + void setmax(); + + BurstType* C_REG; + PIMPrecision pim_precision_; + fp16* S_REG; +private: + int chan_id; + int rank_id; + +//main problem, how to add transaction to this channel level logic +}; +#endif diff --git a/src/ClockDomain.h b/src/ClockDomain.h index de873eb..66537dd 100644 --- a/src/ClockDomain.h +++ b/src/ClockDomain.h @@ -2,7 +2,6 @@ #define __CLOCKDOMAIN__ #include - #include #include diff --git a/src/CommandQueue.cpp b/src/CommandQueue.cpp index 52f5e00..def61ac 100644 --- a/src/CommandQueue.cpp +++ b/src/CommandQueue.cpp @@ -28,11 +28,10 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *********************************************************************************/ -#include "CommandQueue.h" - #include #include "AddressMapping.h" +#include "CommandQueue.h" #include "MemoryController.h" using namespace DRAMSim; @@ -40,6 +39,7 @@ using namespace DRAMSim; CommandQueue::CommandQueue(vector>& states, ostream& simLog) : dramsimLog(simLog), bankStates(states), + bankStates_sub(emptyBankStatesSub), nextBank(0), nextRank(0), nextBankPRE(0), @@ -51,10 +51,12 @@ CommandQueue::CommandQueue(vector>& states, ostream& simLog) { // set here to avoid compile errors currentClockCycle = 0; - + ranks = nullptr; // set system parameters num_ranks_ = getConfigParam(UINT, "NUM_RANKS"); num_banks_ = getConfigParam(UINT, "NUM_BANKS"); + num_subarrays_ = 1; + cmd_queue_depth_ = getConfigParam(UINT, "CMD_QUEUE_DEPTH"); xaw_ = getConfigParam(UINT, "XAW"); total_row_accesses_ = getConfigParam(UINT, "TOTAL_ROW_ACCESSES"); @@ -70,6 +72,7 @@ CommandQueue::CommandQueue(vector>& states, ostream& simLog) else if (queuingStructure_ == PerRankPerBank) { numBankQueues = num_banks_; + //how about make subqueues? } else { @@ -79,6 +82,10 @@ CommandQueue::CommandQueue(vector>& states, ostream& simLog) // vector of counters used to ensure rows don't stay open too long rowAccessCounters = vector>(num_ranks_, vector(num_banks_, 0)); + commandCounters.reserve(cmd_queue_depth_); + processedCommands.reserve(cmd_queue_depth_); + commandCounters.clear(); + processedCommands.clear(); // create queue based on the structure we want BusPacket1D actualQueue; @@ -109,13 +116,106 @@ CommandQueue::CommandQueue(vector>& states, ostream& simLog) } } +CommandQueue::CommandQueue(vector>& states, ostream& simLog, bool is_salp) + : dramsimLog(simLog), + bankStates(emptyBankStates), + bankStates_sub(states), + nextBank(0), + nextRank(0), + nextSub(0), + nextBankPRE(0), + nextRankPRE(0), + nextSubPRE(0), + refreshRank(0), + refreshBank(0), + refreshSub(0), + refreshWaiting(false), + sendAct(true) +{ + // set here to avoid compile errors + currentClockCycle = 0; + ranks = nullptr; + // set system parameters + num_ranks_ = getConfigParam(UINT, "NUM_RANKS"); + num_banks_ = getConfigParam(UINT, "NUM_BANKS"); + + cmd_queue_depth_ = getConfigParam(UINT, "CMD_QUEUE_DEPTH"); + xaw_ = getConfigParam(UINT, "XAW"); + total_row_accesses_ = getConfigParam(UINT, "TOTAL_ROW_ACCESSES"); + schedulingPolicy_ = PIMConfiguration::getSchedulingPolicy(); + queuingStructure_ = PIMConfiguration::getQueueingStructure(); + + // use numBankQueus below to create queue structure + size_t numBankQueues; + if (queuingStructure_ == PerRank) + { + numBankQueues = 1; //we use this logic bugt... + num_subarrays_ = 1; + } + else if (queuingStructure_ == PerRankPerBank) + { + numBankQueues = num_banks_; + //how about make subqueues? + num_subarrays_ = 1; + } + else if (queuingStructure_ == PerRankPerBankPerSubarray) + { + numBankQueues = num_banks_; + num_subarrays_ = 4; + } + else + { + ERROR("== Error - Unknown queuing structure"); + exit(0); + } + + // vector of counters used to ensure rows don't stay open too long + rowAccessCounters_sub = vector>>(num_ranks_, vector>(num_banks_, vector(4, 0))); + commandCounters.reserve(cmd_queue_depth_); + processedCommands.reserve(cmd_queue_depth_); + commandCounters.clear(); + processedCommands.clear(); + // create queue based on the structure we want + BusPacket1D actualQueue_sub, actualQueue; + BusPacket2D perSubQueue = BusPacket2D(); + BusPacket2D perbankqueue = BusPacket2D(); + BusPacket3D queues_bank = BusPacket3D(); + queues = BusPacket3D(); + queues_sub = BusPacket4D(); + for (size_t rank = 0; rank < num_ranks_; rank++) + { + // this loop will run only once for per-rank and NUM_BANKS times for + // per-rank-per-bank + for (size_t bank = 0; bank < numBankQueues; bank++) + { + for(size_t sub = 0; sub < num_subarrays_; sub++) + { + actualQueue_sub = BusPacket1D(); + perSubQueue.push_back(actualQueue_sub); + } + actualQueue = BusPacket1D(); + perbankqueue.push_back(actualQueue); + queues_bank.push_back(perSubQueue); + } + queues.push_back(perbankqueue); + queues_sub.push_back(queues_bank); + + } + tXAWCountdown.reserve(num_ranks_); + for (size_t i = 0; i < num_ranks_; i++) + { + tXAWCountdown.push_back(vector()); + } +} CommandQueue::~CommandQueue() { // ERROR("COMMAND QUEUE destructor"); size_t bankMax = num_ranks_; + size_t subarrayMax = 4; if (queuingStructure_ == PerRank) { bankMax = 1; + subarrayMax = 1; } for (size_t r = 0; r < num_ranks_; r++) { @@ -123,9 +223,21 @@ CommandQueue::~CommandQueue() { for (size_t i = 0; i < queues[r][b].size(); i++) { + queues[r][b][i] = nullptr; delete (queues[r][b][i]); - } + + } + for (size_t s = 0; s< subarrayMax; s++) + { + for(size_t j = 0; j < queues_sub[r][b][s].size(); j++) + { + queues_sub[r][b][s][j] = nullptr; + delete (queues_sub[r][b][s][j]); //per rank logic... + } + queues_sub[r][b][s].clear(); + } queues[r][b].clear(); + queues_sub[r][b].clear(); } } } @@ -133,11 +245,24 @@ CommandQueue::~CommandQueue() // Adds a command to appropriate queue void CommandQueue::enqueue(BusPacket* newBusPacket) { + //cout<<"[commandqueue] enqueue: cycle is "<rank<<" and bank is "<bank<< + //" and queue size is "<rank; unsigned bank = newBusPacket->bank; if (queuingStructure_ == PerRank) { - queues[rank][0].push_back(newBusPacket); + if(newBusPacket!=nullptr) + { + if(queues[rank][0].capacity() == queues[rank][0].size()) + { + queues[rank][0].reserve(queues[rank][0].size() + 64); + } + queues[rank][0].push_back(newBusPacket); + //if(newBusPacket->chan!=0) cout<<"[commandqueue] enqueue: cycle is "<chan<row< cmd_queue_depth_) { ERROR("== Error - Enqueued more than allowed in command queue"); @@ -166,7 +291,41 @@ void CommandQueue::enqueue(BusPacket* newBusPacket) } } -bool CommandQueue::process_refresh(BusPacket** busPacket) +void CommandQueue::enqueue_sub(BusPacket* newBusPacket) +{ + //cout<<"[commandqueue] enqueue_sub: cycle is "<rank<<" and bank is "<bank<<" and size is "<rank; + unsigned bank = newBusPacket->bank; + unsigned subarray = AddrMapping::findsubarray(newBusPacket->row); + if(queuingStructure_ == PerRank) + { + //if(newBusPacket!=nullptr) + //{ + /*if(queues_sub[rank][0][0].capacity() == queues_sub[rank][0][0].size()) + { + queues_sub[rank][0][0].reserve(queues_sub[rank][0][0].size() + 10); + }*/ + queues_sub[rank][0][0].push_back(newBusPacket); + //commandCounters.push_back(0); + //processedCommands.push_back(false); + //} + } + else if(queuingStructure_ == PerRankPerBank) + { + queues_sub[rank][bank][0].push_back(newBusPacket); + } + else if (queuingStructure_ == PerRankPerBankPerSubarray) + { + queues_sub[rank][bank][subarray].push_back(newBusPacket); + } + else + { + ERROR("== Error - Unknown queuing structure"); + exit(0); + } +} + +bool CommandQueue::process_refresh(BusPacket** busPacket) //it's buspacket for command, not transaction { if (refreshWaiting) { @@ -186,6 +345,7 @@ bool CommandQueue::process_refresh(BusPacket** busPacket) else { delete *busPacket; + //*busPacket = nullptr; } } } @@ -200,12 +360,61 @@ bool CommandQueue::process_refresh(BusPacket** busPacket) else { delete *busPacket; + //*busPacket = nullptr; } } } return false; } - +//7800마다 오류가 생기는건 아마도 refresh의 문제일 가능성이 높아보이긴 함 +bool CommandQueue::process_refresh_sub(BusPacket** busPacket) +{ + if (refreshWaiting) + { + bool sendREF = true; + for (int b = 0; b < 16; b++) + { + for(size_t s = 0; s < 4; s++) + { + if(bankStates_sub[refreshRank][b*4+s].currentBankState == RowActive) + { + sendREF = false; + *busPacket = new BusPacket(PRECHARGE, 0, 0, bankStates_sub[refreshRank][b*4+s].openRowAddress, + refreshRank, b, nullptr, dramsimLog); + if(isIssuable_sub(*busPacket)) + { + //cout<<"[commandqueue] process_refresh_sub: cycle is "<busPacketType<busPacketType<<" and sendref is "<busPacketType<busPacketType<& queue = getCommandQueue(nextRank, nextBank); for (size_t i = 0; i < queue.size(); i++) { - BusPacket* packet = queue[i]; - - if (isIssuable(packet)) + if(queue[i]!=nullptr) { - if (i != 0 && queue[i]->tag.find("BAR", 0) != std::string::npos) - { - break; - } - else + BusPacket* packet = queue[i]; + if (isIssuable(packet)) { - bool depend = false; - for (size_t j = 0; j < i; j++) + if (i != 0 && queue[i]->tag.find("BAR", 0) != std::string::npos) { - if (queue[i]->bank == queue[j]->bank && queue[i]->row == queue[j]->row && - queue[i]->column == queue[j]->column) + break; + } + else + { + bool depend = false; + for (size_t j = 0; j < i; j++) { - depend = true; - break; + if(queue[j]!=nullptr) + { + if (queue[i]->bank == queue[j]->bank && queue[i]->row == queue[j]->row && + queue[i]->column == queue[j]->column) + { + depend = true; + break; + } + if (queue[j]->tag.find("BAR", 0) != std::string::npos) + { + depend = true; + break; + } + } } - if (queue[j]->tag.find("BAR", 0) != std::string::npos) + if (!depend) { - depend = true; - break; + *busPacket = queue[i]; + queues[0][0].erase(queues[0][0].begin() + i); + return true; } } - if (!depend) - { - *busPacket = packet; - queue.erase(queue.begin() + i); - return true; - } } } + else + { + //queue.erase(queue.begin() + i); + } } for (size_t i = 0; i < queue.size(); i++) { - if (i != 0 && queue[i]->tag.find("BAR", 0) != std::string::npos) - { - break; - } - - BusPacket* packet = queue[i]; - if (bankStates[packet->rank][packet->bank].currentBankState == Idle) + if(queue[i]!=nullptr) { - *busPacket = - new BusPacket(ACTIVATE, packet->physicalAddress, packet->column, packet->row, - packet->rank, packet->bank, nullptr, dramsimLog, packet->tag); - if (isIssuable(*busPacket)) - { - return true; - } - else + if (i != 0 && queue[i]->tag.find("BAR", 0) != std::string::npos) { - delete *busPacket; + break; } + + BusPacket* packet = queue[i]; + //if(packet->rank <= num_ranks_ && packet->bank <= num_banks_) + //{ + if (bankStates[packet->rank][packet->bank].currentBankState == Idle) //activate command! + { + *busPacket = + new BusPacket(ACTIVATE, packet->physicalAddress, packet->column, packet->row, + packet->rank, packet->bank, nullptr, dramsimLog, packet->tag); + if (isIssuable(*busPacket)) + { + return true; + } + else + { + //*busPacket=nullptr; + delete *busPacket; + } + } + //} + } + else + { + //queue.erase(queue.begin() + i); } } if (queuingStructure_ == PerRank) nextRank = (nextRank + 1) % num_ranks_; - else + else if(queuingStructure_ == PerRankPerBank) nextRankAndBank(nextRank, nextBank); } while (!(startingRank == nextRank && startingBank == nextBank)); return false; } +bool CommandQueue::process_command_sub(BusPacket** busPacket) +{ + unsigned startingRank = nextRank; + unsigned startingBank = nextBank; + unsigned startingSub = nextSub; + int sub = 0; + // if(refreshWaiting) + // return false; + do + { + vector& queue = getCommandQueue(nextRank, nextBank, nextSub); + //cout<<"[commandQueue] process_command_sub: cycle is "<getChanId()<<" and size is "<row!=NULL) + //{ + //cout<<"[commandqueue] process_command_sub: cycle is "<getChanId()<<" and row is "<physicalAddress<row<0x2000)?0:(packet->row<0x4000)?1:(packet->row<0x6000)?2:3; + if (isIssuable_sub(packet)) + { + if (i != 0 && queue[i]->tag.find("BAR", 0) != std::string::npos) + { + //cout<<"[commandqueue] error: cycle is "<bank<<" and row is "<row<tag.find("BAR", 0) != std::string::npos) + { + depend = true; + break; + } + if (queue[i]->bank == queue[j]->bank && queue[i]->row == queue[j]->row && + queue[i]->column == queue[j]->column) + { + //cout<<"[commandqueue] error: cycle is "<bank<<" and row is "<row<bank<< " and row is " + //<row<<" and col is "<column<busPacketType<<" and openrow is "<bank][sub].openRowAddress<tag.find("BAR", 0) != std::string::npos) + { + break; + } + BusPacket* packet = queue[i]; + //if(queue[i]->physicalAddress!=NULL) + //{ + sub = (packet->row < 0x2000)?0:(packet->row < 0x4000)?1:(packet->row < 0x6000)?2:3; + if (bankStates_sub[packet->rank][4*packet->bank + sub].currentBankState == Idle) + { + //cout<<"[commandqueue] process_command_sub: cycle is "<rank<<" and bank is "<bank<<" and row is "<row<data<<" and tag is "<tag<physicalAddress, packet->column, packet->row, + 0, packet->bank, nullptr, dramsimLog, "activate"); + if (isIssuable_sub(*busPacket)) + { + return true; + } + else + { + delete *busPacket; + } + } + //} + //else + //{ + //queue.erase(queue.begin() + i); + //} + } + //} + if (queuingStructure_ == PerRank) + nextRank = (nextRank + 1) % num_ranks_; //we got only one rank, which means... + else if(queuingStructure_ == PerRankPerBank) + nextRankAndBank(nextRank, nextBank); + else if(queuingStructure_ == PerRankPerBankPerSubarray) + nextRankAndBankandSubarray(nextRank, nextBank, nextSub); + } while (!(startingRank == nextRank && startingBank == nextBank && startingSub == nextSub)); + return false; +} bool CommandQueue::process_precharge(BusPacket** busPacket) { unsigned startingRank = nextRankPRE; unsigned startingBank = nextBankPRE; - + //for this logic nextbankpre is constrained to 0.. do { bool found = false; vector& queue = getCommandQueue(nextRankPRE, nextBankPRE); - for (size_t i = 0; i < queue.size(); i++) + for (auto it = queues[0][0].begin(); it != queues[0][0].end(); it++) { - BusPacket* packet = queue[i]; - if (nextRankPRE == packet->rank && nextBankPRE == packet->bank && - bankStates[packet->rank][packet->bank].currentBankState == RowActive && - packet->row == bankStates[packet->rank][packet->bank].openRowAddress) - found = true; - if (packet->tag.find("BAR", 0) != std::string::npos) - break; + BusPacket* packet = *it; + auto index = it - queues[0][0].begin(); + if(packet != nullptr) + { + if(nextRankPRE == packet->rank && nextBankPRE == packet->bank && + bankStates[packet->rank][packet->bank].currentBankState == RowActive && + packet->row == bankStates[packet->rank][packet->bank].openRowAddress) + { + found = true; + break; + } + if (packet->tag.find("BAR", 0) != std::string::npos) + break; + } + else + { + //*it = nullptr; + //queues[0][0].erase(queues[0][0].begin() + index); + } } if (!found) { @@ -310,9 +659,12 @@ bool CommandQueue::process_precharge(BusPacket** busPacket) new BusPacket(PRECHARGE, 0, 0, bankStates[nextRankPRE][nextBankPRE].openRowAddress, nextRankPRE, nextBankPRE, nullptr, dramsimLog); if (isIssuable(*busPacket)) + { return true; + } else delete *busPacket; + *busPacket = nullptr; } nextRankAndBank(nextRankPRE, nextBankPRE); } while (!(startingRank == nextRankPRE && startingBank == nextBankPRE)); @@ -320,11 +672,61 @@ bool CommandQueue::process_precharge(BusPacket** busPacket) return false; } +bool CommandQueue::process_precharge_sub(BusPacket** busPacket) +{ + unsigned startingRank = nextRankPRE; + unsigned startingBank = nextBankPRE; + unsigned startingSub = nextSubPRE; + //if(currentClockCycle > 610) cout<<"[commandqueue] process_precharge_sub: cycle is "<& queue_sub = getCommandQueue(nextRankPRE, nextBankPRE, nextSubPRE); + //if(&queue_sub != nullptr) + //{ + for (size_t j = 0; j < queue_sub.size(); j++) + { + BusPacket* packet = queue_sub[j]; + unsigned sub = (packet->row < 0x2000)?0:(packet->row < 0x4000)?1:(packet->row < 0x6000)?2:3; + if (nextRankPRE == packet->rank && nextBankPRE == packet->bank && nextSubPRE == sub && + bankStates_sub[packet->rank][4*packet->bank + sub].currentBankState == RowActive && + packet->row == bankStates_sub[packet->rank][4*packet->bank + sub].openRowAddress) + { + found = true; + //cout<<"[commandqueue] cannot precharge: cycle is "<tag.find("BAR", 0) != std::string::npos) + break; + } + //} + if (!found) + { + *busPacket = + new BusPacket(PRECHARGE, 0, 0, bankStates_sub[nextRankPRE][nextBankPRE*4+nextSubPRE].openRowAddress, + nextRankPRE, nextBankPRE, nullptr, dramsimLog); + unsigned sub = (bankStates_sub[nextRankPRE][nextBankPRE*4+nextSubPRE].openRowAddress < 0x2000)?0: + (bankStates_sub[nextRankPRE][nextBankPRE*4+nextSubPRE].openRowAddress < 0x4000)?1: + (bankStates_sub[nextRankPRE][nextBankPRE*4+nextSubPRE].openRowAddress < 0x6000)?2:3; + if (isIssuable_sub(*busPacket) && sub == nextSubPRE) + { + return true; + } + else + { + delete *busPacket; + //*busPacket = nullptr; + } + } + nextRankAndBankandSubarray(nextRankPRE, nextBankPRE, nextSubPRE); + }while (!(startingRank == nextRankPRE && startingBank == nextBankPRE && startingSub == nextSubPRE)); + + return false; +} bool CommandQueue::pop(BusPacket** busPacket) { if (queuingStructure_ == PerRankPerBank) { - ERROR("== Error - queuingStructure_ PerRankPerBank is not allowed"); + ERROR("== Error - queuingStructure_ PerRankPerBank and queueingStructure_ PerRankPerBankPerSubarray is not allowed"); exit(0); } for (size_t i = 0; i < num_ranks_; i++) @@ -337,16 +739,116 @@ bool CommandQueue::pop(BusPacket** busPacket) } if (process_refresh(busPacket)) + { return true; + } else if (process_command(busPacket)) + { return true; + } else if (process_precharge(busPacket)) + { return true; + } else + { + //*(busPacket) = new BusPacket(RFCSB, 0, 0, 0, 0, 0, nullptr, dramsimLog); + //*busPacket = nullptr; return false; - return false; + } } +bool CommandQueue::pop_sub(BusPacket** busPacket) +{ + //if((*busPacket)!=nullptr) cout<<"[commandqueue] pop_sub: cycle is "<rank<<" and bank is "<<(*busPacket)->bank<<" and type is "<<(*busPacket)->busPacketType< 0 && tXAWCountdown[i][0] == 0) + tXAWCountdown[i].erase(tXAWCountdown[i].begin()); + } + //in first time, poppedBusPacket is null! + if (process_refresh_sub(busPacket)) + { + //if((*busPacket)!=nullptr) cout<<"[commandqueue] pop_sub_refresh: cycle is "<rank<<" and bank is "<<(*busPacket)->bank<<" and type is "<<(*busPacket)->busPacketType<rank<<" and bank is "<<(*busPacket)->bank<<" and type is "<<(*busPacket)->busPacketType<rank<<" and bank is "<<(*busPacket)->bank<<" and type is "<<(*busPacket)->busPacketType<rank<<" and bank is "<<(*busPacket)->bank<<" and type is "<<(*busPacket)->busPacketType<= cmd_queue_depth_) + { + for(int i = 0; i < queues[0][0].size(); i++) + { + if(commandCounters[i] >= 250) + { + queues[0][0].erase(queues[0][0].begin() + i); + commandCounters.erase(commandCounters.begin() + i); + } + } + } + } +} +void CommandQueue::process_queue_sub() +{ + if (queuingStructure_ == PerRankPerBankPerSubarray || queuingStructure_ == PerRankPerBank) + { + ERROR("== Error - queuingStructure_ PerRank and queueingStructure_ PerRankPerBankPerSubarray is not allowed"); + exit(0); + } + else + { + for(int i = 0; i < queues_sub[0][0][0].size(); i++)//if(cmd_queue_depth_ <= queues_sub[0][0][0].size()) + { + //commandCounters[i]++; + //cout<= cmd_queue_depth_) + { + for(int i = 0; i < queues_sub[0][0][0].size(); i++) + { + if(commandCounters[i] >= 300 && processedCommands[i] == true) + { + queues_sub[0][0][0].erase(queues_sub[0][0][0].begin() + i); + //commandCounters.erase(commandCounters.begin() + i); + } + } + } + } +} // check if a rank/bank queue has room for a certain number of bus packets bool CommandQueue::hasRoomFor(unsigned numberToEnqueue, unsigned rank, unsigned bank) { @@ -354,6 +856,12 @@ bool CommandQueue::hasRoomFor(unsigned numberToEnqueue, unsigned rank, unsigned return ((cmd_queue_depth_ - queue.size()) >= numberToEnqueue); } +bool CommandQueue::hasRoomFor(unsigned numberToEnqueue, unsigned rank, unsigned bank, unsigned sub) +{ + vector& queue = getCommandQueue(rank, bank, sub); + return ((cmd_queue_depth_ - queue.size()) >= numberToEnqueue); +} + // prints the contents of the command queue void CommandQueue::print() { @@ -409,6 +917,23 @@ vector& CommandQueue::getCommandQueue(unsigned rank, unsigned bank) } } +vector& CommandQueue::getCommandQueue(unsigned rank, unsigned bank, unsigned sub) +{ + if(queuingStructure_ == PerRankPerBankPerSubarray) + return queues_sub[rank][bank][sub]; + else if(queuingStructure_ == PerRankPerBank) + return queues_sub[rank][bank][0]; + else if(queuingStructure_ == PerRank) + { + //cout<<"[commandqueue] error: cycle is "<rank]->mode_ != dramMode::SB && busPacket->bank >= 2) { return false; } if ((bankStates[busPacket->rank][busPacket->bank].currentBankState == Idle || - bankStates[busPacket->rank][busPacket->bank].currentBankState == Refreshing) && + bankStates[busPacket->rank][busPacket->bank].currentBankState == Refreshing) && currentClockCycle >= bankStates[busPacket->rank][busPacket->bank].nextActivate && tXAWCountdown[busPacket->rank].size() < xaw_) { @@ -452,11 +976,95 @@ bool CommandQueue::isIssuable(BusPacket* busPacket) } break; case READ: + if(bankStates[busPacket->rank].size() == num_banks_) + { + if (bankStates[busPacket->rank][busPacket->bank].currentBankState == RowActive && + currentClockCycle >= bankStates[busPacket->rank][busPacket->bank].nextRead && + busPacket->row == bankStates[busPacket->rank][busPacket->bank].openRowAddress && + rowAccessCounters[busPacket->rank][busPacket->bank] < total_row_accesses_) + { + return true; + } + else + { + return false; + } + } + break; + case PRECHARGE: if (bankStates[busPacket->rank][busPacket->bank].currentBankState == RowActive && - currentClockCycle >= bankStates[busPacket->rank][busPacket->bank].nextRead && - busPacket->row == bankStates[busPacket->rank][busPacket->bank].openRowAddress && - rowAccessCounters[busPacket->rank][busPacket->bank] < total_row_accesses_) + currentClockCycle >= bankStates[busPacket->rank][busPacket->bank].nextPrecharge) + { + return true; + } + else + { + return false; + } + break; + + default: + cout<<"[commandqueue] error: cycle is "<busPacketType<print(); + exit(0); + } + return false; +} +bool CommandQueue::isIssuable_sub(BusPacket* packet) +{ + int sub = (packet->row<0X2000)?0:(packet->row<0x4000)?1:(packet->row<0x6000)?2:3; + switch (packet->busPacketType) + { + case REF: + case RFCSB: + return true; + break; + case ACTIVATE: + if ((*ranks)[packet->rank]->mode_ != dramMode::SB && packet->bank >= 2) + { + return false; + } + if ((bankStates_sub[packet->rank][4*packet->bank + sub].currentBankState == Idle || + bankStates_sub[packet->rank][4*packet->bank + sub].currentBankState == Refreshing) && + currentClockCycle >= bankStates_sub[packet->rank][4*packet->bank + sub].nextActivate && + tXAWCountdown[packet->rank].size() < xaw_) + { + //cout<<"[commandqueue]: activate issue and clock is"<bank<<" and sub is "<rank][4*packet->bank + sub].currentBankState<<" and nextact is "<rank][4*packet->bank + sub].nextActivate + //<<" and openrow is "<rank][4*packet->bank + sub].openRowAddress<<" and type is "<busPacketType<rank][4*packet->bank + sub].currentBankState == RowActive && + currentClockCycle >= bankStates_sub[packet->rank][4*packet->bank + sub].nextWrite && + packet->row == bankStates_sub[packet->rank][4*packet->bank + sub].openRowAddress && + rowAccessCounters_sub[packet->rank][packet->bank][sub] < total_row_accesses_) { + //cout<<"[commandqueue]: write issue and clock is "<bank<<" and sub is "<rank][4*packet->bank + sub].currentBankState<<" and nextpre is "<rank][4*packet->bank + sub].nextPrecharge + //<<" and openrow is "<rank][4*packet->bank + sub].openRowAddress<<" and type is "<busPacketType<rank][4*packet->bank + sub].currentBankState == RowActive && + currentClockCycle >= bankStates_sub[packet->rank][4*packet->bank + sub].nextRead && + packet->row == bankStates_sub[packet->rank][4*packet->bank + sub].openRowAddress && + rowAccessCounters_sub[packet->rank][packet->bank][sub] < total_row_accesses_) + { + //cout<<"[commandqueue]: read issue and clock is "<bank<<" and sub is "<rank][4*packet->bank + sub].currentBankState<<" and nextact is "<rank][4*packet->bank + sub].nextActivate + //<<" and openrow is "<rank][4*packet->bank + sub].openRowAddress<<" and type is "<busPacketType<rank][busPacket->bank].currentBankState == RowActive && - currentClockCycle >= bankStates[busPacket->rank][busPacket->bank].nextPrecharge) + //if(currentClockCycle > 615) cout<<"[commandqueue]: precharge issue and clock is "<bank<<" and sub is "<rank][4*packet->bank + sub].currentBankState<<" and nextact is "<rank][4*packet->bank + sub].nextActivate + //<<" and openrow is "<rank][4*packet->bank + sub].openRowAddress<<" and type is "<busPacketType<rank][4*packet->bank + sub].currentBankState == RowActive && + currentClockCycle >= bankStates_sub[packet->rank][4*packet->bank + sub].nextPrecharge) { + /*for(size_t j = 0; j < queues_sub[0][0][0].size(); j++) + { + BusPacket* index = queues_sub[0][0][0][j]; + if(index->rank == packet->rank && index->bank == packet->bank && index->rank == packet->rank && + processedCommands[j] == true &&(index->busPacketType == READ || index->busPacketType == WRITE)) + { + queues_sub[0][0][0].erase(queues_sub[0][0][0].begin() + j); + commandCounters.erase(commandCounters.begin() + j); + processedCommands.erase(processedCommands.begin() + j); + } + }*/ + //cout<<"[commandqueue]: precharge issue and clock is "<bank<<" and sub is "<rank][4*packet->bank + sub].currentBankState<<" and nextact is "<rank][4*packet->bank + sub].nextActivate + //<<" and openrow is "<rank][4*packet->bank + sub].openRowAddress<<" and type is "<busPacketType<print(); + packet->print(); exit(0); } return false; } - // figures out if a rank's queue is empty bool CommandQueue::isEmpty(unsigned rank) { @@ -506,10 +1129,44 @@ bool CommandQueue::isEmpty(unsigned rank) abort(); } } +bool CommandQueue::isEmpty_sub(unsigned rank) +{ + if(queuingStructure_ == PerRank) + { + return queues_sub[rank][0][0].empty(); + } + else if(queuingStructure_ == PerRankPerBank) + { + for(size_t i = 0; i < num_banks_; i++) + { + if(!queues_sub[rank][i][0].empty()) + return false; + } + return true; + } + else if(queuingStructure_== PerRankPerBankPerSubarray) + { + for(size_t i = 0; i < num_banks_; i++) + { + for(size_t j = 0; j < num_subarrays_; j++) + { + if(!queues_sub[rank][i][j].empty()) + return false; + } + } + return true; + } + else + { + DEBUG("Invalid Queueing Structure"); + abort(); + } +} // tells the command queue that a particular rank is in need of a refresh void CommandQueue::needRefresh(unsigned rank) { + cout<<"[commandqueue] needRefresh: cycle is "< BusPacket1D; typedef vector BusPacket2D; typedef vector BusPacket3D; + typedef vector BusPacket4D; // functions CommandQueue(vector>& states, ostream& dramsimLog); + CommandQueue(vector>& states, ostream& dramsimLog, bool is_salp); virtual ~CommandQueue(); void enqueue(BusPacket* newBusPacket); - bool pop(BusPacket** busPacket); + void enqueue_sub(BusPacket* newBusPacket); + void process_queue(); + void process_queue_sub(); + bool pop(BusPacket **busPacket); + bool pop_sub(BusPacket** busPacket); // TODO: rename this... bool process_refresh(BusPacket** busPacket); + bool process_refresh_sub(BusPacket** busPacket); bool process_command(BusPacket** busPacket); + bool process_command_sub(BusPacket** busPacket); bool process_precharge(BusPacket** busPacket); + bool process_precharge_sub(BusPacket** busPacket); bool hasRoomFor(unsigned numberToEnqueue, unsigned rank, unsigned bank); + bool hasRoomFor(unsigned numberToEnqueue, unsigned rank, unsigned bank, unsigned subarray); bool isIssuable(BusPacket* busPacket); + bool isIssuable_sub(BusPacket* busPacket); bool isEmpty(unsigned rank); + bool isEmpty_sub(unsigned rank); void needRefresh(unsigned rank); void print(); void update(); // SimulatorObject requirement vector& getCommandQueue(unsigned rank, unsigned bank); + vector& getCommandQueue(unsigned rank, unsigned bank, unsigned sub); // fields BusPacket3D queues; // 3D array of BusPacket pointers + BusPacket4D queues_sub; vector>& bankStates; + vector>& bankStates_sub; vector* ranks; + vector> emptyBankStates; + vector> emptyBankStatesSub; + private: void nextRankAndBank(unsigned& rank, unsigned& bank); + void nextRankAndBankandSubarray(unsigned& rank, unsigned& bank, unsigned& sub); // fields unsigned nextBank; unsigned nextRank; + unsigned nextSub; //clearly need cause.. in unsigned nextBankPRE; unsigned nextRankPRE; + unsigned nextSubPRE; unsigned refreshRank; unsigned refreshBank; + unsigned refreshSub; bool refreshWaiting; - + + vector commandCounters; + vector processedCommands; vector> tXAWCountdown; vector> rowAccessCounters; + vector>> rowAccessCounters_sub; bool sendAct; + bool is_salp_; // preloaded system configuration parameters unsigned num_ranks_; unsigned num_banks_; + unsigned num_subarrays_; unsigned cmd_queue_depth_; unsigned xaw_; unsigned total_row_accesses_; diff --git a/src/Configuration.h b/src/Configuration.h index 11f1713..83461d6 100644 --- a/src/Configuration.h +++ b/src/Configuration.h @@ -37,6 +37,7 @@ class Configuration NUM_COLS = getConfigParam(UINT, "NUM_COLS"); NUM_CHANS = getConfigParam(UINT, "NUM_CHANS"); NUM_PIM_BLOCKS = getConfigParam(UINT, "NUM_PIM_BLOCKS"); + NUM_S_BLOCKS = getConfigParam(UINT, "NUM_S_BLOCKS"); NUM_RANKS = getConfigParam(UINT, "NUM_RANKS"); NUM_ROWS = getConfigParam(UINT, "NUM_ROWS"); RL = getConfigParam(UINT, "RL"); @@ -45,6 +46,7 @@ class Configuration tCK = getConfigParam(FLOAT, "tCK"); tCMD = getConfigParam(UINT, "tCMD"); tCKE = getConfigParam(UINT, "tCKE"); + tRA = getConfigParam(UINT, "tRA"); tRAS = getConfigParam(UINT, "tRAS"); tRC = getConfigParam(UINT, "tRC"); tRCDRD = getConfigParam(UINT, "tRCDRD"); @@ -59,9 +61,14 @@ class Configuration tRTPL = getConfigParam(UINT, "tRTPL"); tRTPS = getConfigParam(UINT, "tRTPS"); tRTRS = getConfigParam(UINT, "tRTRS"); + tCWL = getConfigParam(UINT, "tCWL"); + tRTW = getConfigParam(UINT, "tRTW"); + tWA = getConfigParam(UINT, "tWA"); tWR = getConfigParam(UINT, "tWR"); tWTRL = getConfigParam(UINT, "tWTRL"); tWTRS = getConfigParam(UINT, "tWTRS"); + tWTR = getConfigParam(UINT, "tWTR"); + tWTP = getConfigParam(UINT, "tWTP"); tXP = getConfigParam(UINT, "tXP"); TOTAL_ROW_ACCESSES = getConfigParam(UINT, "TOTAL_ROW_ACCESSES"); TRANS_QUEUE_DEPTH = getConfigParam(UINT, "TRANS_QUEUE_DEPTH"); @@ -91,10 +98,6 @@ class Configuration throw invalid_argument("Not allowed zero channel"); } - PIM_REG_RA = 0x3fff; - PIM_ABMR_RA = 0x27ff; - PIM_SBMR_RA = 0x2fff; - setDebugConfiguration(); setOutputConfiguration(); } @@ -132,8 +135,10 @@ class Configuration unsigned JEDEC_DATA_BUS_BITS; unsigned NUM_BANKS; unsigned NUM_COLS; + unsigned NUM_SUBARRAYS; unsigned NUM_CHANS; unsigned NUM_PIM_BLOCKS; + unsigned NUM_S_BLOCKS; unsigned NUM_RANKS; unsigned NUM_ROWS; unsigned RL; @@ -142,6 +147,8 @@ class Configuration float tCK; unsigned tCMD; unsigned tCKE; + unsigned tCWL; + unsigned tRA; unsigned tRAS; unsigned tRC; unsigned tRCDRD; @@ -153,12 +160,16 @@ class Configuration unsigned tRRDL; unsigned tRRDS; unsigned tRTP; + unsigned tRTW; unsigned tRTPL; unsigned tRTPS; unsigned tRTRS; + unsigned tWA; unsigned tWR; + unsigned tWTR; unsigned tWTRL; unsigned tWTRS; + unsigned tWTP; unsigned tXP; unsigned TOTAL_ROW_ACCESSES; unsigned TRANS_QUEUE_DEPTH; @@ -183,10 +194,6 @@ class Configuration unsigned WRITE_TO_READ_DELAY_B_SHORT; unsigned WRITE_TO_READ_DELAY_R; - uint32_t PIM_REG_RA; - uint32_t PIM_ABMR_RA; - uint32_t PIM_SBMR_RA; - AddrMapping& addrMapping; }; diff --git a/src/ConfigurationData.h b/src/ConfigurationData.h index c8d2100..eb1d270 100644 --- a/src/ConfigurationData.h +++ b/src/ConfigurationData.h @@ -77,6 +77,8 @@ typedef struct _ConfigurationData const static ConfigurationData defaultConfiguration[] = { DEFINE_UINT_CONFIG(NUM_BANKS, DEV_PARAM), DEFINE_UINT_CONFIG(NUM_BANK_GROUPS, DEV_PARAM), + DEFINE_UINT_CONFIG(NUM_SUBARRAYS, DEV_PARAM), + DEFINE_UINT_CONFIG(NUM_S_BLOCKS, DEV_PARAM), DEFINE_UINT_CONFIG(NUM_ROWS, DEV_PARAM), DEFINE_UINT_CONFIG(NUM_COLS, DEV_PARAM), DEFINE_UINT_CONFIG(NUM_PIM_BLOCKS, DEV_PARAM), @@ -100,6 +102,7 @@ const static ConfigurationData defaultConfiguration[] = { DEFINE_UINT_CONFIG(XAW, DEV_PARAM), DEFINE_UINT_CONFIG(tXAW, DEV_PARAM), DEFINE_UINT_CONFIG(tCKE, DEV_PARAM), + DEFINE_UINT_CONFIG(tCWL, DEV_PARAM), DEFINE_UINT_CONFIG(tXP, DEV_PARAM), DEFINE_UINT_CONFIG(tCMD, DEV_PARAM), DEFINE_UINT_CONFIG(IDD0, DEV_PARAM), @@ -175,10 +178,13 @@ const static ConfigurationData defaultConfiguration[] = { // DDR4 support DEFINE_UINT_CONFIG(tCCDL, DEV_PARAM), DEFINE_UINT_CONFIG(tCCDS, DEV_PARAM), + DEFINE_UINT_CONFIG(tRTW, DEV_PARAM), DEFINE_UINT_CONFIG(tRRDL, DEV_PARAM), DEFINE_UINT_CONFIG(tRRDS, DEV_PARAM), + DEFINE_UINT_CONFIG(tWTR, DEV_PARAM), DEFINE_UINT_CONFIG(tWTRL, DEV_PARAM), DEFINE_UINT_CONFIG(tWTRS, DEV_PARAM), + DEFINE_UINT_CONFIG(tWTP, DEV_PARAM), DEFINE_UINT_CONFIG(tRTPL, DEV_PARAM), DEFINE_UINT_CONFIG(tRTPS, DEV_PARAM), DEFINE_STRING_CONFIG(PIM_PRECISION, SYS_PARAM), @@ -190,7 +196,7 @@ const static ConfigurationData defaultConfiguration[] = { DEFINE_DEFAULT_CONFIG(PIM_PRECISION, STRING, SYS_PARAM, "FP16"), // Controller related DEFINE_DEFAULT_CONFIG(ROW_BUFFER_POLICY, STRING, SYS_PARAM, "open_page"), - DEFINE_DEFAULT_CONFIG(SCHEDULING_POLICY, STRING, SYS_PARAM, "rank_then_bank_round_robin"), + //DEFINE_DEFAULT_CONFIG(SCHEDULING_POLICY, STRING, SYS_PARAM, "rank_then_bank_round_robin"), DEFINE_DEFAULT_CONFIG(QUEUING_STRUCTURE, STRING, SYS_PARAM, "per_rank"), DEFINE_DEFAULT_CONFIG(ADDRESS_MAPPING_SCHEME, STRING, SYS_PARAM, "Scheme8"), // shcha // WARNING, do not remove end of config macro diff --git a/src/MemoryController.cpp b/src/MemoryController.cpp index 82cfeb8..73b7c85 100644 --- a/src/MemoryController.cpp +++ b/src/MemoryController.cpp @@ -28,28 +28,30 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *********************************************************************************/ -#include "MemoryController.h" - #include #include "AddressMapping.h" +#include "MemoryController.h" #include "MemorySystem.h" #define SEQUENTIAL(rank, bank) (rank * config.NUM_BANKS) + bank +#define SEQUENTIAL_SUB(rank, bank, sub) (rank * config.NUM_BANKS * 4) + (bank * 4) + sub using namespace DRAMSim; - MemoryController::MemoryController(MemorySystem* parent, CSVWriter& csvOut_, ostream& simLog, Configuration& configuration) : dramsimLog(simLog), config(configuration), bankStates(getConfigParam(UINT, "NUM_RANKS"), vector(getConfigParam(UINT, "NUM_BANKS"), dramsimLog)), + bankStates_SUB(getConfigParam(UINT, "NUM_RANKS"), + vector(getConfigParam(UINT, "NUM_BANKS")*4, dramsimLog)), outgoingCmdPacket(NULL), outgoingDataPacket(NULL), dataCyclesLeft(0), cmdCyclesLeft(0), commandQueue(bankStates, simLog), + commandQueue_SUB(bankStates_SUB, simLog, true), poppedBusPacket(NULL), csvOut(csvOut_), totalTransactions(0), @@ -68,6 +70,68 @@ MemoryController::MemoryController(MemorySystem* parent, CSVWriter& csvOut_, ost transactionQueue.reserve(config.TRANS_QUEUE_DEPTH); powerDown = vector(config.NUM_RANKS, false); + grandTotalBankAccesses = totalReadsPerBank = totalWritesPerBank = totalActivatesPerBank = + vector(config.NUM_RANKS * config.NUM_BANKS, 0); + totalReadsPerRank = totalWritesPerRank = totalActivatesPerRank = + vector(config.NUM_RANKS, 0); + writeDataCountdown.reserve(config.NUM_RANKS); + writeDataToSend.reserve(config.NUM_RANKS); + refreshCountdown.reserve(config.NUM_RANKS); + refreshCountdownBank.reserve(config.NUM_BANKS); + + // Power related packets + backgroundEnergy = burstEnergy = actpreEnergy = vector(config.NUM_RANKS, 0); + refreshEnergy = aluPIMEnergy = vector(config.NUM_RANKS, 0); //logic of pimrank... + readPIMEnergy = vector(config.NUM_BANKS, 0); + totalBandwidth = 0.0; + + totalEpochLatency = vector(config.NUM_RANKS * config.NUM_BANKS * 4, 0); + + // staggers when each rank is due for a refresh + for (size_t i = 0; i < config.NUM_RANKS; i++) + refreshCountdown.push_back((int)((config.tREFI / config.tCK) / config.NUM_RANKS) * (i + 1)); + for (size_t i = 0; i < config.NUM_BANKS; i++) + refreshCountdownBank.push_back((int)((config.tREFISB / config.tCK)) * (i + 1)); + + memoryContStats = new MemoryControllerStats( + parentMemorySystem, csvOut, dramsimLog, config, totalTransactions, grandTotalBankAccesses, + totalReadsPerRank, totalWritesPerRank, totalReadsPerBank, totalWritesPerBank, + totalActivatesPerRank, totalActivatesPerBank, totalRefreshes, backgroundEnergy, burstEnergy, + actpreEnergy, refreshEnergy, aluPIMEnergy, refreshEnergy, pendingReadTransactions, false); +} + +MemoryController::MemoryController(MemorySystem* parent, CSVWriter& csvOut_, ostream& simLog, + Configuration& configuration, bool is_salp) + : dramsimLog(simLog), + config(configuration), + bankStates(getConfigParam(UINT, "NUM_RANKS"), + vector(getConfigParam(UINT, "NUM_BANKS"), dramsimLog)), + bankStates_SUB(getConfigParam(UINT, "NUM_RANKS"), + vector(getConfigParam(UINT, "NUM_BANKS")*4, dramsimLog)), + outgoingCmdPacket(nullptr), + outgoingDataPacket(nullptr), + dataCyclesLeft(0), + cmdCyclesLeft(0), + commandQueue(bankStates, simLog), + commandQueue_SUB(bankStates_SUB, simLog, true), + poppedBusPacket(nullptr), + csvOut(csvOut_), + totalTransactions(0), + totalRefreshes(0), + refreshRank(0), + refreshBank(0), + totalReads(0), + totalWrites(0), + is_salp_(is_salp) +{ + // get handle on parent + parentMemorySystem = parent; + currentClockCycle = 0; + + // reserve memory for vectors + transactionQueue.reserve(config.TRANS_QUEUE_DEPTH); + powerDown = vector(config.NUM_RANKS, false); + grandTotalBankAccesses = totalReadsPerBank = totalWritesPerBank = totalActivatesPerBank = vector(config.NUM_RANKS * config.NUM_BANKS, 0); totalReadsPerRank = totalWritesPerRank = totalActivatesPerRank = @@ -80,25 +144,30 @@ MemoryController::MemoryController(MemorySystem* parent, CSVWriter& csvOut_, ost // Power related packets backgroundEnergy = burstEnergy = actpreEnergy = vector(config.NUM_RANKS, 0); - refreshEnergy = aluPIMEnergy = vector(config.NUM_RANKS, 0); + refreshEnergy = aluPIMEnergy = vector(config.NUM_RANKS, 0); //logic of pimrank... readPIMEnergy = vector(config.NUM_BANKS, 0); totalBandwidth = 0.0; - totalEpochLatency = vector(config.NUM_RANKS * config.NUM_BANKS, 0); + totalEpochLatency = vector(config.NUM_RANKS * config.NUM_BANKS * 4, 0); // staggers when each rank is due for a refresh for (size_t i = 0; i < config.NUM_RANKS; i++) refreshCountdown.push_back((int)((config.tREFI / config.tCK) / config.NUM_RANKS) * (i + 1)); for (size_t i = 0; i < config.NUM_BANKS; i++) refreshCountdownBank.push_back((int)((config.tREFISB / config.tCK)) * (i + 1)); - + pendingReadTransactions.reserve(32); + pendingReadTransactions.clear(); + returnTransaction.reserve(32); + returnTransaction.clear(); memoryContStats = new MemoryControllerStats( parentMemorySystem, csvOut, dramsimLog, config, totalTransactions, grandTotalBankAccesses, totalReadsPerRank, totalWritesPerRank, totalReadsPerBank, totalWritesPerBank, totalActivatesPerRank, totalActivatesPerBank, totalRefreshes, backgroundEnergy, burstEnergy, - actpreEnergy, refreshEnergy, aluPIMEnergy, refreshEnergy, pendingReadTransactions); + actpreEnergy, refreshEnergy, aluPIMEnergy, refreshEnergy, pendingReadTransactions, is_salp_); } +//do we need subarray controller? + // get a bus packet from either data or cmd bus void MemoryController::receiveFromBus(BusPacket* bpacket) { @@ -116,8 +185,9 @@ void MemoryController::receiveFromBus(BusPacket* bpacket) } // add to return read data queue - returnTransaction.push_back( - new Transaction(RETURN_DATA, bpacket->physicalAddress, bpacket->data)); + if(bpacket->busPacketType == DATA) + returnTransaction.push_back( + new Transaction(RETURN_DATA, bpacket->physicalAddress, bpacket->data)); // this delete statement saves a mindboggling amount of memory delete (bpacket); @@ -146,7 +216,8 @@ bool MemoryController::addBarrier() void MemoryController::attachRanks(vector* ranks) { this->ranks = ranks; - commandQueue.ranks = ranks; + if(!is_salp_) commandQueue.ranks = ranks; + else commandQueue_SUB.ranks = ranks; } void MemoryController::setBankStatesRW(size_t ra, size_t ba, uint64_t RdCycle, uint64_t WrCycle) @@ -155,6 +226,12 @@ void MemoryController::setBankStatesRW(size_t ra, size_t ba, uint64_t RdCycle, u bankStates[ra][ba].nextWrite = max(bankStates[ra][ba].nextWrite, currentClockCycle + WrCycle); } +void MemoryController::setBankStatesRW(size_t ra, size_t ba, size_t sa, uint64_t RdCycle, uint64_t WrCycle) +{ + bankStates_SUB[ra][ba*4+sa].nextRead = max(bankStates_SUB[ra][ba*4+sa].nextRead, currentClockCycle+RdCycle); + bankStates_SUB[ra][ba*4+sa].nextWrite = max(bankStates_SUB[ra][ba*4+sa].nextWrite, currentClockCycle + WrCycle); +} + void MemoryController::setBankStates(size_t rank, size_t bank, CurrentBankState currentBankState, BusPacketType lastCommand, uint64_t stateChangeCountdown, uint64_t nextActivate) @@ -166,28 +243,53 @@ void MemoryController::setBankStates(size_t rank, size_t bank, CurrentBankState bankStates[rank][bank].nextActivate = nextActivate; } +void MemoryController::setBankStates(size_t rank, size_t bank, size_t sub, CurrentBankState currentBankState, + BusPacketType lastCommand, uint64_t stateChangeCountdown, + uint64_t nextActivate) +{ + bankStates_SUB[rank][bank*4+sub].currentBankState = currentBankState; + bankStates_SUB[rank][bank*4+sub].lastCommand = lastCommand; + if (stateChangeCountdown != 0) + bankStates_SUB[rank][bank*4+sub].stateChangeCountdown = stateChangeCountdown; + bankStates_SUB[rank][bank*4+sub].nextActivate = nextActivate; +} + void MemoryController::updateCommandQueue(BusPacket* poppedBusPacket) { - if (poppedBusPacket->busPacketType == WRITE) - { + if (poppedBusPacket!=nullptr && poppedBusPacket->busPacketType == WRITE) + { + if(writeDataToSend.capacity() == writeDataToSend.size()) + { + writeDataToSend.reserve(writeDataToSend.size() + 32); + writeDataCountdown.reserve(writeDataCountdown.size() + 32); + } writeDataToSend.push_back(new BusPacket( DATA, poppedBusPacket->physicalAddress, poppedBusPacket->column, poppedBusPacket->row, poppedBusPacket->rank, poppedBusPacket->bank, poppedBusPacket->data, dramsimLog)); writeDataCountdown.push_back(config.WL); } - + // update each bank's state based on the command that was just popped // out of the command queue for readability's sake + //if(poppedBusPacket == nullptr) return; unsigned rank = poppedBusPacket->rank; unsigned bank = poppedBusPacket->bank; + unsigned sub = (poppedBusPacket->row < 0x2000)? 0 : (poppedBusPacket->row < 0x4000)? 1 : (poppedBusPacket->row < 0x6000)? 2 : 3; auto am = config.addrMapping; - switch (poppedBusPacket->busPacketType) { case READ: - bankStates[rank][bank].nextPrecharge = max(currentClockCycle + config.READ_TO_PRE_DELAY, + if(!is_salp_) + { + bankStates[rank][bank].nextPrecharge = max(currentClockCycle + config.READ_TO_PRE_DELAY, bankStates[rank][bank].nextPrecharge); - bankStates[rank][bank].lastCommand = READ; + bankStates[rank][bank].lastCommand = READ; + } + else{ + bankStates_SUB[rank][4*bank + sub].lastCommand = READ; + bankStates_SUB[rank][4*bank + sub].nextPrecharge = max(currentClockCycle + config.READ_TO_PRE_DELAY, + bankStates_SUB[rank][4*bank + sub].nextPrecharge); + } for (size_t i = 0; i < config.NUM_RANKS; i++) { for (size_t j = 0; j < config.NUM_BANKS; j++) @@ -196,8 +298,15 @@ void MemoryController::updateCommandQueue(BusPacket* poppedBusPacket) { if (bankStates[i][j].currentBankState == RowActive) { - setBankStatesRW(i, j, config.BL / 2 + config.tRTRS, - config.READ_TO_WRITE_DELAY); + if(!is_salp_) setBankStatesRW(i, j, config.BL / 2 + config.tRTRS, + config.READ_TO_WRITE_DELAY); //MAYBE ROWMISS FUNCTION + else{ + for(int s = 0; s<4; s++) + { + setBankStatesRW(i, j, s, config.BL / 2 + config.tRTRS, + config.READ_TO_WRITE_DELAY); + } + } } } else @@ -205,83 +314,187 @@ void MemoryController::updateCommandQueue(BusPacket* poppedBusPacket) uint64_t RdCycle = max((am.isSameBankgroup(j, bank) ? config.tCCDL : config.tCCDS), config.BL / 2); - setBankStatesRW(i, j, RdCycle, config.READ_TO_WRITE_DELAY); + if(!is_salp_) setBankStatesRW(i, j, RdCycle, config.READ_TO_WRITE_DELAY); + else{ + if(j == bank) + { + for(int s = 0; s<4; s++){ + setBankStatesRW(i, j, s, config.tCCDL, config.tRTW); + //bankStates_SUB[i][j][s].nextPrecharge = max(currentClockCycle + config.READ_TO_PRE_DELAY, + // bankStates_SUB[i][j][s].nextPrecharge); + } + } + else{ + for(int s = 0; s<4; s++) setBankStatesRW(i, j, s, RdCycle, config.READ_TO_WRITE_DELAY); + } + } } } } totalReads++; - break; case WRITE: - bankStates[rank][bank].nextPrecharge = + if(!is_salp_) + { + bankStates[rank][bank].nextPrecharge = max(currentClockCycle + config.WRITE_TO_PRE_DELAY, bankStates[rank][bank].nextPrecharge); - bankStates[rank][bank].lastCommand = WRITE; + bankStates[rank][bank].lastCommand = WRITE; + } + else{ + bankStates_SUB[rank][4*bank + sub].nextPrecharge = + max(currentClockCycle + config.tWTP, + bankStates_SUB[rank][4*bank + sub].nextPrecharge); + bankStates_SUB[rank][4*bank + sub].lastCommand = WRITE; + } for (size_t i = 0; i < config.NUM_RANKS; i++) { for (size_t j = 0; j < config.NUM_BANKS; j++) { if (i != poppedBusPacket->rank) { - if (bankStates[i][j].currentBankState == RowActive) + if(!is_salp_) + { + if (bankStates[i][j].currentBankState == RowActive) //different rank est + { + setBankStatesRW(i, j, config.WRITE_TO_READ_DELAY_R, + config.BL / 2 + config.tRTRS); + } + } + else { - setBankStatesRW(i, j, config.WRITE_TO_READ_DELAY_R, - config.BL / 2 + config.tRTRS); + for(int s = 0; s<4; s++) + { + if(bankStates_SUB[i][j*4+s].currentBankState == RowActive) + setBankStatesRW(i, j, s, config.WRITE_TO_READ_DELAY_R, + config.BL / 2 + config.tRTRS); + } } } else { - uint64_t WrCycle = - max((am.isSameBankgroup(j, bank) ? config.tCCDL : config.tCCDS), - config.BL / 2); - setBankStatesRW(i, j, config.WRITE_TO_READ_DELAY_B_LONG, WrCycle); + if(!is_salp_) + { + uint64_t WrCycle = + max((am.isSameBankgroup(j, bank) ? config.tCCDL : config.tCCDS), + config.BL / 2); //burst + setBankStatesRW(i, j, config.WRITE_TO_READ_DELAY_B_LONG, WrCycle); + } + else + { + for(int s = 0; s < 4; s++) + { + uint64_t WrCycle = + max((am.isSameBankgroup(j, bank) ? config.tCCDL : config.tCCDS), + config.BL / 2); //burst + uint64_t RCycle = max((am.isSameBankgroup(j, bank) ? config.WRITE_TO_READ_DELAY_B_LONG : config.WRITE_TO_READ_DELAY_B_SHORT), + config.BL / 2); //burst + if(j == bank) + { + setBankStatesRW(i, j, s, config.tWTR, WrCycle); + //bankStates_SUB[i][j][s].nextPrecharge = max(currentClockCycle + config.READ_TO_PRE_DELAY, + // bankStates_SUB[i][j][s].nextPrecharge); + } + else setBankStatesRW(i, j, s, RCycle, WrCycle); + } + } } } } totalWrites++; - + //cout<<"[MC] updatecommand for write and type is "<busPacketType<<" and clock is "<rank][poppedBusPacket->bank][AddrMapping::findsubarray(poppedBusPacket->row)].openRowAddress<< + //" and bank is "<bank<<" and sub is "<row)<<" and nextpre is "<< + //bankStates_SUB[poppedBusPacket->rank][poppedBusPacket->bank][AddrMapping::findsubarray(poppedBusPacket->row)].nextPrecharge<row; - bankStates[rank][bank].nextPrecharge = - max(currentClockCycle + config.tRAS, bankStates[rank][bank].nextPrecharge); - - // if we are using posted-CAS, the next column access can be sooner than normal - // operation - setBankStatesRW(rank, bank, (config.tRCDRD - config.AL), (config.tRCDWR - config.AL)); - + bankStates[rank][bank].openRowAddress = poppedBusPacket->row; + bankStates[rank][bank].nextPrecharge = + max(currentClockCycle + config.tRAS, bankStates[rank][bank].nextPrecharge); + setBankStatesRW(rank, bank, (config.tRCDRD - config.AL), (config.tRCDWR - config.AL)); + } + else{ + //bool cond = (poppedBusPacket->row!=bankStates_SUB[rank][4*bank + sub].openRowAddress); + setBankStates(rank, bank, sub, RowActive, ACTIVATE, 0, + max(currentClockCycle+config.tRC, bankStates_SUB[rank][4*bank + sub].nextActivate)); + bankStates_SUB[rank][4*bank + sub].openRowAddress = poppedBusPacket->row; + bankStates_SUB[rank][4*bank + sub].nextPrecharge = + max(currentClockCycle + config.tRAS, bankStates_SUB[rank][4*bank + sub].nextPrecharge); + setBankStatesRW(rank, bank, sub, (config.tRCDRD - config.AL), (config.tRCDWR - config.AL)); + } for (size_t i = 0; i < config.NUM_BANKS; i++) { - if (i != poppedBusPacket->bank) + if(!is_salp_) { - bankStates[rank][i].nextActivate = - max(currentClockCycle + - (am.isSameBankgroup(i, bank) ? config.tRRDL : config.tRRDS), - bankStates[rank][i].nextActivate); + if (i != poppedBusPacket->bank) + { + bankStates[rank][i].nextActivate = + max(currentClockCycle + + (am.isSameBankgroup(i, bank) ? config.tRRDL : config.tRRDS), + bankStates[rank][i].nextActivate); + } //latency is ready but not salp logic indeed + } + else{ + for(int s = 0; s <4; s++) + { + if(i != poppedBusPacket->bank || s != sub) + { + //bankStates_SUB[rank][i][s].currentBankState = Idle; + bankStates_SUB[rank][i*4+s].nextActivate = max(currentClockCycle + (am.isSameBankgroup(i, bank) ? config.tRRDL : config.tRRDS), + bankStates_SUB[rank][i*4+s].nextActivate); + } + } } } - + //cout<<"[MC] updatecommand and type is "<busPacketType<<" and clock is "<rank][poppedBusPacket->bank][AddrMapping::findsubarray(poppedBusPacket->row)].openRowAddress<< + //" and bank is "<bank<<" and sub is "<row)<busPacketType<<" and clock is "<busPacketType); + << poppedBusPacket->busPacketType <<" at cycle "<address, newTransactionChan, newTransactionRank, newTransactionBank, newTransactionRow, newTransactionColumn); - - // if we have room, break up the transaction into the appropriate commands - // and add them to the command queue - if (commandQueue.hasRoomFor(1, newTransactionRank, newTransactionBank)) + //if(transaction->tag!="" && currentClockCycle == 77091) cout<<"[MC] tag is "<tag<<" and cycle is "<getBusPacketType(); - command = new BusPacket(bpType, transaction->address, newTransactionColumn, - newTransactionRow, newTransactionRank, newTransactionBank, - transaction->data, dramsimLog); - command->tag = transaction->tag; - commandQueue.enqueue(command); - - // If we have a read, save the transaction so when the data comes back - // in a bus packet, we can staple it back into a transaction and return it - if (transaction->transactionType == DATA_READ) - pendingReadTransactions.push_back(transaction); + // create read or write command and enqueue it + if((transaction->transactionType==DATA_READ || transaction->transactionType==DATA_WRITE)) + { + BusPacket* command; + BusPacketType bpType = transaction->getBusPacketType(); + //string tg = ""; + //cout<<"[MC] transaction type is "<transactionType<<" and bpType is "<data<address, newTransactionColumn, + newTransactionRow, newTransactionRank, newTransactionBank, + transaction->data, dramsimLog);//, tg); + //if(!transaction->tag.empty() && transaction != nullptr) + //{ + /*if(currentClockCycle > 67700 && (*ranks)[0]->mode_ == dramMode::SB) + { + cout<<"[MC] currentcycle is "<address<tag = transaction->tag; + //} + if(!is_salp_) + { + commandQueue.enqueue(command); + } + else + { + commandQueue_SUB.enqueue_sub(command); + } + // If we have a read, save the transaction so when the data comes back + // in a bus packet, we can staple it back into a transaction and return it + if (transaction->transactionType == DATA_READ && transaction!= nullptr) + { + if(pendingReadTransactions.capacity() == pendingReadTransactions.size()) + { + pendingReadTransactions.reserve(pendingReadTransactions.size() + 32); + } + pendingReadTransactions.push_back(transaction); + } + else + { + //transaction = nullptr; + //delete transaction; + } + /* only allow one transaction to be scheduled per cycle -- this + * should + * be a reasonable assumption considering how much logic would be + * required to schedule multiple entries per cycle (parallel data + * lines, switching logic, decision logic) + */ + break; + } + else - // just delete the transaction now that it's a buspacket + { + transaction = nullptr; delete transaction; - /* only allow one transaction to be scheduled per cycle -- this - * should - * be a reasonable assumption considering how much logic would be - * required to schedule multiple entries per cycle (parallel data - * lines, switching logic, decision logic) - */ - break; + break; + } } - /* else // no room, do nothing this cycle { // PRINT( "== Warning - No room in command queue" << endl; + //transaction = nullptr; + //delete transaction; } - */ } } - +//need print for subarray logic void MemoryController::printDebugOnUpate() { if (DEBUG_TRANS_Q) @@ -401,37 +645,69 @@ void MemoryController::printDebugOnUpate() else if (bankStates[i][j].currentBankState == PowerDown) PRINTN("[lowp] "); } - PRINT(""); // effectively just cout< 0) + if(!is_salp_) { - // decrement counters - bankStates[i][j].stateChangeCountdown--; + if (bankStates[i][j].stateChangeCountdown > 0) + { + // decrement counters + bankStates[i][j].stateChangeCountdown--; - // if counter has reached 0, change state - if (bankStates[i][j].stateChangeCountdown == 0) + // if counter has reached 0, change state + if (bankStates[i][j].stateChangeCountdown == 0) + { + switch (bankStates[i][j].lastCommand) + { + case REF: + case RFCSB: + case PRECHARGE: + bankStates[i][j].currentBankState = Idle; + break; + default: + break; + } + } + } + } + else + { + for(int s = 0; s<4; s++) { - switch (bankStates[i][j].lastCommand) + if (bankStates_SUB[i][j*4+s].stateChangeCountdown > 0) { - case REF: - case RFCSB: - case PRECHARGE: - bankStates[i][j].currentBankState = Idle; - break; - default: - break; + // decrement counters + bankStates_SUB[i][j*4+s].stateChangeCountdown--; + + // if counter has reached 0, change state + if (bankStates_SUB[i][j*4+s].stateChangeCountdown == 0) + { + switch (bankStates_SUB[i][j*4+s].lastCommand) + { + case REF: + case RFCSB: + case PRECHARGE: + bankStates_SUB[i][j*4+s].currentBankState = Idle; + break; + default: + break; + } + } } } } @@ -441,9 +717,11 @@ void MemoryController::updateBankState() void MemoryController::updateRefresh() { + //cout<<"[MC] update refresh and clock is "<refreshWaiting = true; // PRINT("REF request rank" << refreshRank << " @" << currentClockCycle); refreshCountdown[refreshRank] = config.tREFI / config.tCK; @@ -452,25 +730,29 @@ void MemoryController::updateRefresh() refreshRank = 0; } // if a rank is powered down, make sure we power it up in time for a refresh - else if (powerDown[refreshRank] && refreshCountdown[refreshRank] <= config.tXP) - (*ranks)[refreshRank]->refreshWaiting = true; + else if (refreshCountdown[refreshRank] <= config.tXP) + { + //if(powerDown[refreshRank]) (*ranks)[refreshRank]->refreshWaiting = true; + } } void MemoryController::update() { + //if((*ranks)[0]->getChanId() == 1) cout<<"[MC] update and clock is "<bankStates_SUB[4*4+3].currentBankState<getChanId() == 1) cout<<"[MC] update and clock is "<bankStates_SUB[4*4+3].currentBankState<rank]->receiveFromBus(outgoingCmdPacket); + //cout<<"[MC] cmdCyclesLeft is 0 and clock is "<bank<<" and row is "<row<rank]->receiveFromBus(outgoingCmdPacket); outgoingCmdPacket = NULL; } } - - // check for outgoing data packets and handle countdowns + //if((*ranks)[0]->getChanId() == 1) cout<<"[MC] update and clock is "<bankStates_SUB[4*4+3].currentBankState<getChanId() == 1) cout<<"[MC] update and clock is "<bankStates_SUB[4*4+3].currentBankState< 0) { for (size_t i = 0; i < writeDataCountdown.size(); i++) writeDataCountdown[i]--; @@ -515,29 +797,36 @@ void MemoryController::update() } outgoingDataPacket = writeDataToSend[0]; - dataCyclesLeft = config.BL / 2; + dataCyclesLeft = config.BL / 2; //which is the bitline totalTransactions++; writeDataCountdown.erase(writeDataCountdown.begin()); writeDataToSend.erase(writeDataToSend.begin()); + //cout<<"[MC] writeDataCountdown is 0 and clock is "<bank<<" and row is " + //<row<busPacketType<<" and bank is "<bank<<" and sub is "<row)< 0) { if (DEBUG_BUS) @@ -548,22 +837,33 @@ void MemoryController::update() // find the pending read transaction to calculate latency for (size_t i = 0; i < pendingReadTransactions.size(); i++) { - if (pendingReadTransactions[i]->address == returnTransaction[0]->address) + if(pendingReadTransactions[i] != nullptr && returnTransaction[0] != nullptr) { - unsigned chan, rank, bank, row, col; - config.addrMapping.addressMapping(returnTransaction[0]->address, chan, rank, bank, - row, col); - memoryContStats->insertHistogram( - currentClockCycle - pendingReadTransactions[i]->timeAdded, rank, bank); - // FIXME. Is it correct? - // memcpy(pendingReadTransactions[i]->data, - // returnTransaction[0]->data, config.BL * (JEDEC_DATA_BUS_BITS / 8)); - returnReadData(pendingReadTransactions[i]); - - delete pendingReadTransactions[i]; - pendingReadTransactions.erase(pendingReadTransactions.begin() + i); - foundMatch = true; - break; + if (pendingReadTransactions[i]->address == returnTransaction[0]->address) + { + unsigned chan, rank, bank, row, col, sub; + config.addrMapping.addressMapping(returnTransaction[0]->address, chan, rank, bank, + row, col); + sub = (row < 0x2000)? 0 : (row < 0x4000)? 1 : (row < 0x6000)? 2 : 3; + //if(!is_salp_) memoryContStats->insertHistogram(currentClockCycle - pendingReadTransactions[i]->timeAdded, rank, bank); + //else memoryContStats->insertHistogram(currentClockCycle - pendingReadTransactions[i]->timeAdded, rank, bank, sub); + // FIXME. Is it correct? + // memcpy(pendingReadTransactions[i]->data, + // returnTransaction[0]->data, config.BL * (JEDEC_DATA_BUS_BITS / 8)); + returnReadData(pendingReadTransactions[i]); + pendingReadTransactions[i] = nullptr; + delete pendingReadTransactions[i]; + pendingReadTransactions.erase(pendingReadTransactions.begin() + i); + foundMatch = true; + break; + } + } + else + { + //delete pendingReadTransactions[i]; + //pendingReadTransactions.erase(pendingReadTransactions.begin() + i); + //foundMatch = true; + //break; } } if (!foundMatch) @@ -575,15 +875,14 @@ void MemoryController::update() delete returnTransaction[0]; returnTransaction.erase(returnTransaction.begin()); } - // decrement refresh counters for (size_t i = 0; i < config.NUM_RANKS; i++) refreshCountdown[i]--; for (size_t i = 0; i < config.NUM_BANKS; i++) refreshCountdownBank[i]--; // print debug printDebugOnUpate(); - - commandQueue.step(); + if(is_salp_) commandQueue_SUB.step(); + else commandQueue.step(); } bool MemoryController::WillAcceptTransaction() @@ -594,11 +893,19 @@ bool MemoryController::WillAcceptTransaction() // allows outside source to make request of memory system bool MemoryController::addTransaction(Transaction* trans) { + auto am = config.addrMapping; + unsigned newTransactionChan, newTransactionRank, newTransactionBank, newTransactionRow, + newTransactionColumn; + + // pass these in as references so they get set by the addressMapping function + am.addressMapping(trans->address, newTransactionChan, newTransactionRank, + newTransactionBank, newTransactionRow, newTransactionColumn); if (WillAcceptTransaction()) { parentMemorySystem->numOnTheFlyTransactions++; trans->timeAdded = currentClockCycle; transactionQueue.push_back(trans); + //cout<<" [MC] addTransaction and clock is "<address< aluPIMPower = vector(config.NUM_RANKS, 0.0); // per bank variables - vector averageLatency = vector(config.NUM_RANKS * config.NUM_BANKS, 0.0); - vector bandwidth = vector(config.NUM_RANKS * config.NUM_BANKS, 0.0); + vector averageLatency = vector(config.NUM_RANKS * config.NUM_BANKS*4, 0.0); + vector bandwidth = vector(config.NUM_RANKS * config.NUM_BANKS*4, 0.0); for (size_t i = 0; i < config.NUM_RANKS; i++) { for (size_t j = 0; j < config.NUM_BANKS; j++) { - bandwidth[SEQUENTIAL(i, j)] = (((double)(totalReadsPerBank[SEQUENTIAL(i, j)] + - totalWritesPerBank[SEQUENTIAL(i, j)]) * - (double)bytesPerTransaction) / - (1024.0 * 1024.0 * 1024.0)) / - secondsThisEpoch; - averageLatency[SEQUENTIAL(i, j)] = ((float)totalEpochLatency[SEQUENTIAL(i, j)] / - (float)(totalReadsPerBank[SEQUENTIAL(i, j)])) * - config.tCK; - totalBandwidth += bandwidth[SEQUENTIAL(i, j)]; - totalReadsPerRank[i] += totalReadsPerBank[SEQUENTIAL(i, j)]; - totalWritesPerRank[i] += totalWritesPerBank[SEQUENTIAL(i, j)]; - totalActivatesPerRank[i] += totalActivatesPerBank[SEQUENTIAL(i, j)]; + if(is_salp_) + { + for(size_t s = 0; s < 4; s++) + { + bandwidth[SEQUENTIAL_SUB(i, j, s)] = (((double)(totalReadsPerBank[SEQUENTIAL_SUB(i, j, s)] + + totalWritesPerBank[SEQUENTIAL_SUB(i, j, s)]) * + (double)bytesPerTransaction) / + (1024.0 * 1024.0 * 1024.0)) / + secondsThisEpoch; + averageLatency[SEQUENTIAL_SUB(i, j, s)] = ((float)totalEpochLatency[SEQUENTIAL_SUB(i, j, s)] / + (float)(totalReadsPerBank[SEQUENTIAL_SUB(i, j, s)])) * + config.tCK; + totalBandwidth += bandwidth[SEQUENTIAL_SUB(i, j, s)]; + totalReadsPerRank[i] += totalReadsPerBank[SEQUENTIAL_SUB(i, j, s)]; + totalWritesPerRank[i] += totalWritesPerBank[SEQUENTIAL_SUB(i, j, s)]; + totalActivatesPerRank[i] += totalActivatesPerBank[SEQUENTIAL_SUB(i, j, s)]; + + } + } + else{ + bandwidth[SEQUENTIAL(i, j)] = (((double)(totalReadsPerBank[SEQUENTIAL(i, j)] + + totalWritesPerBank[SEQUENTIAL(i, j)]) * + (double)bytesPerTransaction) / + (1024.0 * 1024.0 * 1024.0)) / + secondsThisEpoch; + averageLatency[SEQUENTIAL(i, j)] = ((float)totalEpochLatency[SEQUENTIAL(i, j)] / + (float)(totalReadsPerBank[SEQUENTIAL(i, j)])) * + config.tCK; + totalBandwidth += bandwidth[SEQUENTIAL(i, j)]; + totalReadsPerRank[i] += totalReadsPerBank[SEQUENTIAL(i, j)]; + totalWritesPerRank[i] += totalWritesPerBank[SEQUENTIAL(i, j)]; + totalActivatesPerRank[i] += totalActivatesPerBank[SEQUENTIAL(i, j)]; + } } } LOG_OUTPUT ? dramsimLog.setf(ios::fixed, ios::floatfield) @@ -779,8 +1115,16 @@ void MemoryControllerStats::printStats(bool finalStats, unsigned myChannel, PRINTC(PRINT_CHAN_STAT, "Rank " << i << ":"); for (size_t j = 0; j < config.NUM_BANKS; j++) { - PRINTC(PRINT_CHAN_STAT, + if(!is_salp_) PRINTC(PRINT_CHAN_STAT, " b" << j << ": " << grandTotalBankAccesses[SEQUENTIAL(i, j)]); + else + { + for(size_t s = 0; s < 4; s++) + { + PRINTC(PRINT_CHAN_STAT, + " b" << j << ": " << grandTotalBankAccesses[SEQUENTIAL_SUB(i, j, s)]); + } + } } } } @@ -805,12 +1149,28 @@ void MemoryControllerStats::resetStats() for (size_t j = 0; j < config.NUM_BANKS; j++) { // XXX: this means the bank list won't be printed for partial epochs - grandTotalBankAccesses[SEQUENTIAL(i, j)] += - (totalReadsPerBank[SEQUENTIAL(i, j)] + totalWritesPerBank[SEQUENTIAL(i, j)]); - totalReadsPerBank[SEQUENTIAL(i, j)] = 0; - totalWritesPerBank[SEQUENTIAL(i, j)] = 0; - totalActivatesPerBank[SEQUENTIAL(i, j)] = 0; - totalEpochLatency[SEQUENTIAL(i, j)] = 0; + if(!is_salp_) + { + grandTotalBankAccesses[SEQUENTIAL(i, j)] += + (totalReadsPerBank[SEQUENTIAL(i, j)] + totalWritesPerBank[SEQUENTIAL(i, j)]); + totalReadsPerBank[SEQUENTIAL(i, j)] = 0; + totalWritesPerBank[SEQUENTIAL(i, j)] = 0; + totalActivatesPerBank[SEQUENTIAL(i, j)] = 0; + totalEpochLatency[SEQUENTIAL(i, j)] = 0; + } + else + { + for(size_t s = 0; s < 4; s++) + { + /*grandTotalBankAccesses[SEQUENTIAL_SUB(i, j, s)] += + (totalReadsPerBank[SEQUENTIAL_SUB(i, j, s)] + totalWritesPerBank[SEQUENTIAL_SUB(i, j, s)]); + totalReadsPerBank[SEQUENTIAL_SUB(i, j, s)] = 0; + totalWritesPerBank[SEQUENTIAL_SUB(i, j, s)] = 0; + totalActivatesPerBank[SEQUENTIAL_SUB(i, j, s)] = 0; + //totalEpochLatency[SEQUENTIAL_SUB(i, j, s)] = 0;*/ + } + + } } burstEnergy[i] = 0; actpreEnergy[i] = 0; diff --git a/src/MemoryController.h b/src/MemoryController.h index 804ec79..245fa0c 100644 --- a/src/MemoryController.h +++ b/src/MemoryController.h @@ -56,13 +56,15 @@ class MemoryController : public SimulatorObject public: // functions MemoryController(MemorySystem* ms, CSVWriter& csvOut_, ostream& simLog, Configuration& config); + MemoryController(MemorySystem* ms, CSVWriter& csvOut_, ostream& simLog, Configuration& config, + bool is_salp); virtual ~MemoryController(); bool addTransaction(Transaction* trans); void returnReadData(const Transaction* trans); void receiveFromBus(BusPacket* bpacket); void attachRanks(vector* ranks); - void update(); + void update(); //bool is_salp_? void printDebugOnUpate(); void printStats(bool finalStats = false); void resetStats(); @@ -73,9 +75,11 @@ class MemoryController : public SimulatorObject vector transactionQueue; private: + bool is_salp_; ostream& dramsimLog; vector> bankStates; - + //if subarray mode howabout use bankstates to 3d array? + vector> bankStates_SUB; // functions void insertHistogram(unsigned latencyValue, unsigned rank, unsigned bank); void updateCommandQueue(BusPacket* poppedBusPacket); @@ -83,14 +87,17 @@ class MemoryController : public SimulatorObject void updateBankState(); void updateRefresh(); void setBankStatesRW(size_t rank, size_t bank, uint64_t nextRead, uint64_t nextWrite); + void setBankStatesRW(size_t rank, size_t bank, size_t sub, uint64_t nextRead, uint64_t nextWrite); void setBankStates(size_t rank, size_t bank, CurrentBankState currentBankState, BusPacketType lastCommand, uint64_t stateChangeCountdown, uint64_t nextAct); + void setBankStates(size_t rank, size_t bank, size_t sub, CurrentBankState currentBankState, + BusPacketType lastCommand, uint64_t stateChangeCountdown, uint64_t nextAct); // fields MemorySystem* parentMemorySystem; CommandQueue commandQueue; - BusPacket* poppedBusPacket; + CommandQueue commandQueue_SUB; vector writeDataToSend; vector writeDataCountdown; vector returnTransaction; @@ -110,7 +117,7 @@ class MemoryController : public SimulatorObject vector grandTotalBankAccesses, totalReadsPerBank, totalWritesPerBank; vector totalReadsPerRank, totalWritesPerRank; vector totalActivatesPerBank, totalActivatesPerRank, totalEpochLatency; - unsigned refreshRank, refreshBank; + unsigned refreshRank, refreshBank, refreshSubarray; vector refreshCountdown, refreshCountdownBank; Configuration& config; MemoryControllerStats* memoryContStats; @@ -120,6 +127,7 @@ class MemoryController : public SimulatorObject vector backgroundEnergy, burstEnergy, actpreEnergy, refreshEnergy, aluPIMEnergy, readPIMEnergy; double totalBandwidth; + BusPacket* poppedBusPacket; uint64_t totalReads, totalWrites; }; @@ -136,7 +144,7 @@ class MemoryControllerStats vector& backgroundE, vector& burstE, vector& actpreE, vector& refreshE, vector& aluPIME, vector& readPIME, - vector& pendingReadTrans) + vector& pendingReadTrans, bool is_salp_ = false) : csvOut(csvOut_), dramsimLog(simLog), config(configuration), @@ -155,15 +163,17 @@ class MemoryControllerStats refreshEnergy(refreshE), aluPIMEnergy(aluPIME), readPIMEnergy(readPIME), - pendingReadTransactions(pendingReadTrans) + pendingReadTransactions(pendingReadTrans), + is_salp_(is_salp_) { parentMemorySystem = parent; - totalEpochLatency = vector(config.NUM_RANKS * config.NUM_BANKS, 0); + totalEpochLatency = vector(config.NUM_RANKS * config.NUM_BANKS * 4, 0); resetStats(); } void printStats(bool finalStats, unsigned myChannel, uint64_t currentClockCycle); void insertHistogram(unsigned latencyValue, unsigned rank, unsigned bank); + void insertHistogram(unsigned latencyValue, unsigned rank, unsigned bank, unsigned subarray); void resetStats(); private: @@ -193,6 +203,7 @@ class MemoryControllerStats double totalBandwidth; map latencies; vector totalEpochLatency; + bool is_salp_; }; } // namespace DRAMSim diff --git a/src/MemorySystem.cpp b/src/MemorySystem.cpp index 0f16529..5ddce2d 100644 --- a/src/MemorySystem.cpp +++ b/src/MemorySystem.cpp @@ -28,10 +28,10 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *********************************************************************************/ -#include "MemorySystem.h" - #include +#include "MemorySystem.h" + using namespace std; ofstream cmd_verify_out; // used in Rank.cpp and MemoryController.cpp if @@ -41,17 +41,18 @@ namespace DRAMSim powerCallBack_t MemorySystem::ReportPower = NULL; MemorySystem::MemorySystem(unsigned id, unsigned int megsOfMemory, CSVWriter& csvOut_, - ostream& simLog, Configuration& configuration) + ostream& simLog, Configuration& configuration, bool is_salp) : dramsimLog(simLog), ReturnReadData(NULL), WriteDataDone(NULL), systemID(id), csvOut(csvOut_), numOnTheFlyTransactions(0), - config(configuration) + config(configuration), + is_salp_(is_salp) { currentClockCycle = 0; - + //maybe i need some boolean salp logic to figure whether use subarray level or not DEBUG("===== MemorySystem " << systemID << " ====="); // calculate the total storage based on the devices the user selected and the number of @@ -103,7 +104,7 @@ MemorySystem::MemorySystem(unsigned id, unsigned int megsOfMemory, CSVWriter& cs 8); uint64_t megsOfStoragePerRank = bytePerRank >> 20; - num_ranks_ = (megsOfMemory / Byte2MB(bytePerRank)); + num_ranks_ = 1;//(megsOfMemory / Byte2MB(bytePerRank)); // If this is set, effectively override the number of ranks if (megsOfMemory != 0) @@ -129,14 +130,14 @@ MemorySystem::MemorySystem(unsigned id, unsigned int megsOfMemory, CSVWriter& cs << "MB | " << getConfigParam(UINT, "NUM_RANKS") << " Ranks | " << getConfigParam(UINT, "NUM_DEVICES") << " Devices per rank"); - memoryController = new MemoryController(this, csvOut, dramsimLog, config); + memoryController = new MemoryController(this, csvOut, dramsimLog, config, is_salp_); // TODO: change to other vector constructor? - ranks = new vector(); + ranks = new vector(); - for (size_t i = 0; i < num_ranks_; i++) + for (size_t i = 0; i < num_ranks_; i++) //only one rank indeed { - Rank* r = new Rank(dramsimLog, config); + Rank* r = new Rank(dramsimLog, config, is_salp_); r->setChanId(systemID); r->setRankId(i); r->attachMemoryController(memoryController); @@ -229,23 +230,39 @@ void MemorySystem::printStats(bool finalStats) // update the memory systems state void MemorySystem::update() { + if(currentClockCycle % 100 == 0 && systemID == 0) + cout<<"MemorySystem::update() and clock is "<bankStates_SUB[4*4+3].currentBankState<<" and openrow is "<<(*ranks)[0]->bankStates_SUB[4*4+3].openRowAddress<banks_sub[0].size()<<" and bank 1 size is "<<(*ranks)[0]->banks_sub[1].size()<<" and bank 2 size is "<<(*ranks)[0]->banks_sub[2].size()<< + " and bank 3 size is "<<(*ranks)[0]->banks_sub[3].size()<<" and bank 4 size is "<<(*ranks)[0]->banks_sub[4].size()<<" and bank 5 size is "<<(*ranks)[0]->banks_sub[5].size()<<" and bank 6 size is "<<(*ranks)[0]->banks_sub[6].size()<<" and bank 7 size is "<<(*ranks)[0]->banks_sub[7].size()<< + " and bank 8 size is "<<(*ranks)[0]->banks_sub[8].size()<<" and bank 9 size is "<<(*ranks)[0]->banks_sub[9].size()<<" and bank 10 size is "<<(*ranks)[0]->banks_sub[10].size()<<" and bank 11 size is "<<(*ranks)[0]->banks_sub[11].size()<<" and bank 12 size is "<<(*ranks)[0]->banks_sub[12].size()<< + " and bank 13 size is "<<(*ranks)[0]->banks_sub[13].size()<<" and bank 14 size is "<<(*ranks)[0]->banks_sub[14].size()<<" and bank 15 size is "<<(*ranks)[0]->banks_sub[15].size()<update(); } - + //if(systemID==1) cout<<"MemorySystem::update() and clock is "<bankStates_SUB[4*4+3].currentBankState<<" and openrow is "<<(*ranks)[0]->bankStates_SUB[4*4+3].openRowAddress<banks_sub[0].size()<<" and bank 1 size is "<<(*ranks)[0]->banks_sub[1].size()<<" and bank 2 size is "<<(*ranks)[0]->banks_sub[2].size()<< + " and bank 3 size is "<<(*ranks)[0]->banks_sub[3].size()<<" and bank 4 size is "<<(*ranks)[0]->banks_sub[4].size()<<" and bank 5 size is "<<(*ranks)[0]->banks_sub[5].size()<<" and bank 6 size is "<<(*ranks)[0]->banks_sub[6].size()<<" and bank 7 size is "<<(*ranks)[0]->banks_sub[7].size()<< + " and bank 8 size is "<<(*ranks)[0]->banks_sub[8].size()<<" and bank 9 size is "<<(*ranks)[0]->banks_sub[9].size()<<" and bank 10 size is "<<(*ranks)[0]->banks_sub[10].size()<<" and bank 11 size is "<<(*ranks)[0]->banks_sub[11].size()<<" and bank 12 size is "<<(*ranks)[0]->banks_sub[12].size()<< + " and bank 13 size is "<<(*ranks)[0]->banks_sub[13].size()<<" and bank 14 size is "<<(*ranks)[0]->banks_sub[14].size()<<" and bank 15 size is "<<(*ranks)[0]->banks_sub[15].size()< 0 && memoryController->WillAcceptTransaction()) { memoryController->addTransaction(pendingTransactions.front()); pendingTransactions.pop_front(); + //if(pendingTransactions.size() < 100) cout<<"pendingTransactions.size() = "<update(); - + //if(systemID==1) cout<<"MemorySystem::update() and clock is "<bankStates_SUB[4*4+3].currentBankState<<" and openrow is "<<(*ranks)[0]->bankStates_SUB[4*4+3].openRowAddress<banks_sub[0].size()<<" and bank 1 size is "<<(*ranks)[0]->banks_sub[1].size()<<" and bank 2 size is "<<(*ranks)[0]->banks_sub[2].size()<< + " and bank 3 size is "<<(*ranks)[0]->banks_sub[3].size()<<" and bank 4 size is "<<(*ranks)[0]->banks_sub[4].size()<<" and bank 5 size is "<<(*ranks)[0]->banks_sub[5].size()<<" and bank 6 size is "<<(*ranks)[0]->banks_sub[6].size()<<" and bank 7 size is "<<(*ranks)[0]->banks_sub[7].size()<< + " and bank 8 size is "<<(*ranks)[0]->banks_sub[8].size()<<" and bank 9 size is "<<(*ranks)[0]->banks_sub[9].size()<<" and bank 10 size is "<<(*ranks)[0]->banks_sub[10].size()<<" and bank 11 size is "<<(*ranks)[0]->banks_sub[11].size()<<" and bank 12 size is "<<(*ranks)[0]->banks_sub[12].size()<< + " and bank 13 size is "<<(*ranks)[0]->banks_sub[13].size()<<" and bank 14 size is "<<(*ranks)[0]->banks_sub[14].size()<<" and bank 15 size is "<<(*ranks)[0]->banks_sub[15].size()<step(); this->step(); - + //if(systemID==1) cout<<"MemorySystem::update() and clock is "<bankStates_SUB[4*4+3].currentBankState<<" and openrow is "<<(*ranks)[0]->bankStates_SUB[4*4+3].openRowAddress<banks_sub[0].size()<<" and bank 1 size is "<<(*ranks)[0]->banks_sub[1].size()<<" and bank 2 size is "<<(*ranks)[0]->banks_sub[2].size()<< + " and bank 3 size is "<<(*ranks)[0]->banks_sub[3].size()<<" and bank 4 size is "<<(*ranks)[0]->banks_sub[4].size()<<" and bank 5 size is "<<(*ranks)[0]->banks_sub[5].size()<<" and bank 6 size is "<<(*ranks)[0]->banks_sub[6].size()<<" and bank 7 size is "<<(*ranks)[0]->banks_sub[7].size()<< + " and bank 8 size is "<<(*ranks)[0]->banks_sub[8].size()<<" and bank 9 size is "<<(*ranks)[0]->banks_sub[9].size()<<" and bank 10 size is "<<(*ranks)[0]->banks_sub[10].size()<<" and bank 11 size is "<<(*ranks)[0]->banks_sub[11].size()<<" and bank 12 size is "<<(*ranks)[0]->banks_sub[12].size()<< + " and bank 13 size is "<<(*ranks)[0]->banks_sub[13].size()<<" and bank 14 size is "<<(*ranks)[0]->banks_sub[14].size()<<" and bank 15 size is "<<(*ranks)[0]->banks_sub[15].size()<* ranks; + MemoryController* memoryController; // decide whether salp or not + vector* ranks; //decide whether salp or not deque pendingTransactions; // function pointers @@ -96,6 +98,7 @@ class MemorySystem : public MemoryObject // system and timing parameters unsigned num_ranks_; Configuration& config; + bool is_salp_; }; } // namespace DRAMSim diff --git a/src/MultiChannelMemorySystem.cpp b/src/MultiChannelMemorySystem.cpp index 4b1c936..b574f48 100644 --- a/src/MultiChannelMemorySystem.cpp +++ b/src/MultiChannelMemorySystem.cpp @@ -30,7 +30,6 @@ #include #include // getenv() - #include #include #include @@ -59,7 +58,8 @@ using namespace DRAMSim; MultiChannelMemorySystem::MultiChannelMemorySystem(const string& deviceIniFilename_, const string& systemIniFilename_, const string& pwd_, const string& traceFilename_, - unsigned megsOfMemory_, string* visFilename_) + unsigned megsOfMemory_, string* visFilename_, + bool is_salp) : megsOfMemory(megsOfMemory_), deviceIniFilename(deviceIniFilename_), systemIniFilename(systemIniFilename_), @@ -68,7 +68,8 @@ MultiChannelMemorySystem::MultiChannelMemorySystem(const string& deviceIniFilena visFilename(visFilename_), clockDomainCrosser(new ClockDomain::Callback( this, &MultiChannelMemorySystem::actual_update)), - csvOut(new CSVWriter(visDataOut)) + csvOut(new CSVWriter(visDataOut)), + is_salp_(is_salp) { currentClockCycle = 0; if (visFilename) @@ -91,8 +92,9 @@ MultiChannelMemorySystem::MultiChannelMemorySystem(const string& deviceIniFilena for (size_t i = 0; i < configuration->NUM_CHANS; i++) { - MemorySystem* channel = new MemorySystem(i, megsOfMemory / configuration->NUM_CHANS, - (*csvOut), dramsimLog, *configuration); + MemorySystem* channel = new MemorySystem(i, megsOfMemory / 64, + (*csvOut), dramsimLog, *configuration, is_salp_); + //cout<<"channel "<ranks->front()->banks_sub.size()<NUM_CHANS; i++) { @@ -364,6 +364,8 @@ MultiChannelMemorySystem::~MultiChannelMemorySystem() } channels.clear(); + delete configuration; + delete csvOut; // flush our streams and close them up if (LOG_OUTPUT) { @@ -385,15 +387,15 @@ void MultiChannelMemorySystem::update() void MultiChannelMemorySystem::actual_update() { - if (currentClockCycle == 0) + /*if (currentClockCycle == 0) { InitOutputFiles(traceFilename); DEBUG("DRAMSim2 Clock Frequency =" << clockDomainCrosser.clock1 << "Hz, CPU Clock Frequency=" << clockDomainCrosser.clock2 << "Hz"); - } + }*/ - if (currentClockCycle > 0 && currentClockCycle % configuration->EPOCH_LENGTH == 0) + /*if (currentClockCycle > 0 && currentClockCycle % configuration->EPOCH_LENGTH == 0) { (*csvOut) << "ms" << currentClockCycle * configuration->tCK * 1E-6; for (size_t i = 0; i < configuration->NUM_CHANS; i++) @@ -401,7 +403,7 @@ void MultiChannelMemorySystem::actual_update() channels[i]->printStats(false); } csvOut->finalize(); - } + }*/ for (size_t i = 0; i < configuration->NUM_CHANS; i++) { @@ -419,13 +421,13 @@ unsigned MultiChannelMemorySystem::findChannelNumber(uint64_t addr) return 0; } - if (!isPowerOfTwo(configuration->NUM_CHANS)) + /*if (!isPowerOfTwo(configuration->NUM_CHANS)) { ERROR("We can only support power of two # of channels.\n" << "I don't know what Intel was thinking, but trying to address map half" " a bit is a neat trick that we're not sure how to do"); abort(); - } + }*/ // only chan is used from this set unsigned channelNumber, rank, bank, row, col; @@ -469,6 +471,8 @@ bool MultiChannelMemorySystem::addTransaction(bool isWrite, uint64_t addr, const return channels[channelNumber]->addTransaction(isWrite, addr, tag, data); } +//using data mode, we can do ... + void MultiChannelMemorySystem::printStats(bool finalStats) { uint64_t cyclesElapsed; diff --git a/src/MultiChannelMemorySystem.h b/src/MultiChannelMemorySystem.h index f2ab86c..8f4cfe4 100644 --- a/src/MultiChannelMemorySystem.h +++ b/src/MultiChannelMemorySystem.h @@ -50,7 +50,7 @@ class MultiChannelMemorySystem : public MemoryObject { public: MultiChannelMemorySystem(const string& dev, const string& sys, const string& pwd, - const string& trc, unsigned megsOfMemory, string* visFilename = NULL); + const string& trc, unsigned megsOfMemory, string* visFilename = NULL, bool is_salp = false); virtual ~MultiChannelMemorySystem(); virtual bool addTransaction(Transaction* trans); @@ -123,6 +123,7 @@ class MultiChannelMemorySystem : public MemoryObject double backgroundPower; unsigned* numFence; + bool is_salp_; Configuration* configuration; }; } // namespace DRAMSim diff --git a/src/PIMBlock.cpp b/src/PIMBlock.cpp index 1357c94..226da82 100644 --- a/src/PIMBlock.cpp +++ b/src/PIMBlock.cpp @@ -10,15 +10,14 @@ * only) **************************************************************************************************/ -#include "PIMBlock.h" - #include #include +#include "PIMBlock.h" #include "PrintMacros.h" #include "SystemConfiguration.h" #include "half.h" - +//PIMBLOCK SEEMS LIKE BANK LEVEL LOGIC using namespace DRAMSim; void PIMBlock::add(BurstType& dstBst, BurstType& src0Bst, BurstType& src1Bst) @@ -91,7 +90,7 @@ void PIMBlock::mad(BurstType& dstBst, BurstType& src0Bst, BurstType& src1Bst, Bu for (int i = 0; i < 16; i++) { dstBst.fp16Data_[i] = - src0Bst.fp16Data_[i] * src1Bst.fp16Data_[i] + src2Bst.fp16Data_[i]; + src0Bst.fp16Data_[i] * src1Bst.fp16Data_[i] + src2Bst.fp16Data_[i]; //put srfA or srfM to src2bst } } else if (pimPrecision_ == FP32) @@ -106,6 +105,34 @@ void PIMBlock::mad(BurstType& dstBst, BurstType& src0Bst, BurstType& src1Bst, Bu dstBst = src0Bst * src1Bst + src2Bst; } +void PIMBlock::burstmax(BurstType& dstBst, BurstType& src0Bst, BurstType& src1Bst) +{ + if (pimPrecision_ == FP16) + { + for (int i = 0; i < 16; i++) + { + dstBst.fp16Data_[i] = (src0Bst.fp16Data_[i] > src1Bst.fp16Data_[i])?src0Bst.fp16Data_[i]:src1Bst.fp16Data_[i]; + } + } +} +void PIMBlock::reducesum(BurstType& dstBst) +{ + if(pimPrecision_ == FP16) + { + for (int i = 0; i<16; i++) + { + dstBst.fp16Data_[0] += dstBst.fp16Data_[i]; + } + } + if(pimPrecision_ == FP32) + { + for (int i = 0; i<8; i++) + { + dstBst.fp32Data_[0] += dstBst.fp32Data_[i]; + } + } +} + std::string PIMBlock::print() { stringstream ss; diff --git a/src/PIMBlock.h b/src/PIMBlock.h index 42cc228..63f0638 100644 --- a/src/PIMBlock.h +++ b/src/PIMBlock.h @@ -34,9 +34,9 @@ class PIMBlock } PIMBlock(const PIMPrecision& pimPrecision) : pimPrecision_(pimPrecision) {} - BurstType srf; + BurstType srf; // srf has A part and M part each consists of 8 scalas each BurstType grfA[8]; // FIXME: hard coding shcha - BurstType grfB[8]; + BurstType grfB[8]; // use as subarray block, too BurstType mOut; BurstType aOut; @@ -44,7 +44,10 @@ class PIMBlock void mac(BurstType& dstBst, BurstType& src0Bst, BurstType& src1Bst); void mul(BurstType& dstBst, BurstType& src0Bst, BurstType& src1Bst); void mad(BurstType& dstBst, BurstType& src0Bst, BurstType& src1Bst, BurstType& src2Bst); - + void div(BurstType& dstBst, BurstType& src0Bst, BurstType& src1Bst); + void burstmax(BurstType& dstBst, BurstType& src0Bst, BurstType& src1Bst); + void reducesum(BurstType& dstBst); + void ldexpf(BurstType& dstBst, BurstType& src0Bst); std::string print(); private: diff --git a/src/PIMCmd.cpp b/src/PIMCmd.cpp index 06469bd..cd81e8b 100644 --- a/src/PIMCmd.cpp +++ b/src/PIMCmd.cpp @@ -42,6 +42,13 @@ void PIMCmd::fromInt(uint32_t val) break; case PIMCmdType::FILL: + dst_ = PIMOpdType(fromBit(val, 3, 25)); + src0_ = PIMOpdType(fromBit(val, 3, 22)); + dstIdx_ = fromBit(val, 4, 8); + src0Idx_ = fromBit(val, 4, 4); + src1Idx_ = fromBit(val, 4, 0); + isRelu_ = fromBit(val, 1, 12); + break; case PIMCmdType::MOV: dst_ = PIMOpdType(fromBit(val, 3, 25)); src0_ = PIMOpdType(fromBit(val, 3, 22)); @@ -53,8 +60,41 @@ void PIMCmd::fromInt(uint32_t val) case PIMCmdType::MAD: src2_ = PIMOpdType(fromBit(val, 3, 16)); + dst_ = PIMOpdType(fromBit(val, 3, 25)); + src0_ = PIMOpdType(fromBit(val, 3, 22)); + src1_ = PIMOpdType(fromBit(val, 3, 19)); + isAuto_ = fromBit(val, 1, 15); + dstIdx_ = fromBit(val, 4, 8); + src0Idx_ = fromBit(val, 4, 4); + src1Idx_ = fromBit(val, 4, 0); + break; case PIMCmdType::ADD: + dst_ = PIMOpdType(fromBit(val, 3, 25)); + src0_ = PIMOpdType(fromBit(val, 3, 22)); + src1_ = PIMOpdType(fromBit(val, 3, 19)); + isAuto_ = fromBit(val, 1, 15); + dstIdx_ = fromBit(val, 4, 8); + src0Idx_ = fromBit(val, 4, 4); + src1Idx_ = fromBit(val, 4, 0); + break; + case PIMCmdType::MAX: + dst_ = PIMOpdType(fromBit(val, 3, 25)); + src0_ = PIMOpdType(fromBit(val, 3, 22)); + src1_ = PIMOpdType(fromBit(val, 3, 19)); + isAuto_ = fromBit(val, 1, 15); + dstIdx_ = fromBit(val, 4, 8); + src0Idx_ = fromBit(val, 4, 4); + src1Idx_ = fromBit(val, 4, 0); + break; case PIMCmdType::MUL: + dst_ = PIMOpdType(fromBit(val, 3, 25)); + src0_ = PIMOpdType(fromBit(val, 3, 22)); + src1_ = PIMOpdType(fromBit(val, 3, 19)); + isAuto_ = fromBit(val, 1, 15); + dstIdx_ = fromBit(val, 4, 8); + src0Idx_ = fromBit(val, 4, 4); + src1Idx_ = fromBit(val, 4, 0); + break; case PIMCmdType::MAC: dst_ = PIMOpdType(fromBit(val, 3, 25)); src0_ = PIMOpdType(fromBit(val, 3, 22)); @@ -72,7 +112,7 @@ void PIMCmd::fromInt(uint32_t val) void PIMCmd::validationCheck() const { - if (type_ == PIMCmdType::MOV || type_ == PIMCmdType::FILL) + /*if (type_ == PIMCmdType::MOV || type_ == PIMCmdType::FILL) { if (dst_ == PIMOpdType::EVEN_BANK || dst_ == PIMOpdType::ODD_BANK) { @@ -93,8 +133,8 @@ void PIMCmd::validationCheck() const exit(-1); } } - */ - } + + } */ } uint32_t PIMCmd::toInt() const @@ -127,8 +167,43 @@ uint32_t PIMCmd::toInt() const case PIMCmdType::MAD: val |= toBit(int(src2_), 3, 16); + val |= toBit(int(dst_), 3, 25); + val |= toBit(int(src0_), 3, 22); + val |= toBit(int(src1_), 3, 19); + val |= toBit(isAuto_, 1, 15); + val |= toBit(dstIdx_, 4, 8); + val |= toBit(src0Idx_, 4, 4); + val |= toBit(src1Idx_, 4, 0); + break; + case PIMCmdType::ADD: + val |= toBit(int(dst_), 3, 25); + val |= toBit(int(src0_), 3, 22); + val |= toBit(int(src1_), 3, 19); + val |= toBit(isAuto_, 1, 15); + val |= toBit(dstIdx_, 4, 8); + val |= toBit(src0Idx_, 4, 4); + val |= toBit(src1Idx_, 4, 0); + break; + case PIMCmdType::MAX: + val |= toBit(int(dst_), 3, 25); + val |= toBit(int(src0_), 3, 22); + val |= toBit(int(src1_), 3, 19); + val |= toBit(isAuto_, 1, 15); + val |= toBit(dstIdx_, 4, 8); + val |= toBit(src0Idx_, 4, 4); + val |= toBit(src1Idx_, 4, 0); + break; case PIMCmdType::MUL: + val |= toBit(int(dst_), 3, 25); + val |= toBit(int(src0_), 3, 22); + val |= toBit(int(src1_), 3, 19); + val |= toBit(isAuto_, 1, 15); + val |= toBit(dstIdx_, 4, 8); + val |= toBit(src0Idx_, 4, 4); + val |= toBit(src1Idx_, 4, 0); + break; + case PIMCmdType::MAC: val |= toBit(int(dst_), 3, 25); val |= toBit(int(src0_), 3, 22); diff --git a/src/PIMCmd.h b/src/PIMCmd.h index 3af4839..4bc2551 100644 --- a/src/PIMCmd.h +++ b/src/PIMCmd.h @@ -18,6 +18,7 @@ #include #include + using namespace std; namespace DRAMSim @@ -39,7 +40,10 @@ enum class PIMCmdType REV5, REV6, JUMP, - EXIT + EXIT, + LDEXPF, + COPY, + MAX }; enum class PIMOpdType @@ -48,10 +52,13 @@ enum class PIMOpdType M_OUT, EVEN_BANK, ODD_BANK, + BANK, GRF_A, GRF_B, SRF_M, - SRF_A + SRF_A, + GRF, + BLF, }; class PIMCmd @@ -130,12 +137,11 @@ class PIMCmd dstIdx_(dst_idx), src0Idx_(src0_idx), src1Idx_(src1_idx), - isRelu_(is_relu) + isRelu_(is_relu) // we can use this for fill or mov logic { } - - PIMCmd(PIMCmdType type, PIMOpdType dst, PIMOpdType src0, PIMOpdType src1, int is_auto = 0, - int dst_idx = 0, int src0_idx = 0, int src1_idx = 0) +/* PIMCmd(PIMCmdType type, PIMOpdType dst, PIMOpdType src0, PIMOpdType src1, int is_auto = 0, + int dst_idx = 0, int src0_idx = 0, int src1_idx = 0, int is_relu = 0) : type_(type), dst_(dst), src0_(src0), @@ -146,12 +152,29 @@ class PIMCmd isAuto_(is_auto), dstIdx_(dst_idx), src0Idx_(src0_idx), + src1Idx_(src1_idx), + isRelu_(is_relu) // we can use this for fill or mov logic + { + } +*/ + PIMCmd(PIMCmdType type, PIMOpdType dst, PIMOpdType src0, PIMOpdType src1, int is_auto = 0, + int dst_idx = 0, int src0_idx = 0, int src1_idx = 0) + : type_(type), + dst_(dst), + src0_(src0), + src1_(src1), + src2_(PIMOpdType::A_OUT), //WH + loopCounter_(0), + loopOffset_(0), + isAuto_(is_auto), + dstIdx_(dst_idx), + src0Idx_(src0_idx), src1Idx_(src1_idx) { } PIMCmd(PIMCmdType type, PIMOpdType dst, PIMOpdType src0, PIMOpdType src1, PIMOpdType src2, - int is_auto = 0, int dst_idx = 0, int src0_idx = 0, int src1_idx = 0) + int is_auto = 0, int dst_idx = 0, int src0_idx = 0, int src1_idx = 0) //mad : type_(type), dst_(dst), src0_(src0), @@ -166,6 +189,7 @@ class PIMCmd { } + uint32_t bitmask(int bit) const { return (1 << bit) - 1; @@ -192,10 +216,16 @@ class PIMCmd return "EVEN_BANK"; case PIMOpdType::ODD_BANK: return "ODD_BANK"; + case PIMOpdType::BANK: + return "BANK"; + case PIMOpdType::BLF: + return "BLF"; case PIMOpdType::GRF_A: return "GRF_A[" + to_string(idx) + "]"; case PIMOpdType::GRF_B: return "GRF_B[" + to_string(idx) + "]"; + case PIMOpdType::GRF: + return "GRF[" + to_string(idx) + "]"; case PIMOpdType::SRF_M: return "SRF_M[" + to_string(idx) + "]"; case PIMOpdType::SRF_A: @@ -223,10 +253,14 @@ class PIMCmd return "ADD"; case PIMCmdType::MUL: return "MUL"; + case PIMCmdType::MAX: + return "MAX"; case PIMCmdType::MAC: return "MAC"; case PIMCmdType::MAD: return "MAD"; + case PIMCmdType::COPY: + return "COPY"; default: return "NOT_DEFINED"; } diff --git a/src/PIMRank.cpp b/src/PIMRank.cpp index 81d1ed8..3d167b8 100644 --- a/src/PIMRank.cpp +++ b/src/PIMRank.cpp @@ -10,18 +10,17 @@ * only) **************************************************************************************************/ -#include "PIMRank.h" - #include #include #include "AddressMapping.h" #include "PIMCmd.h" +#include "PIMRank.h" using namespace std; using namespace DRAMSim; -PIMRank::PIMRank(ostream& simLog, Configuration& configuration) +PIMRank::PIMRank(ostream& simLog, Configuration& configuration, bool is_salp) : chanId(-1), rankId(-1), dramsimLog(simLog), @@ -30,17 +29,26 @@ PIMRank::PIMRank(ostream& simLog, Configuration& configuration) numJumpToBeTaken_(-1), lastRepeatIdx_(-1), numRepeatToBeDone_(-1), + useAllGrf_(true), crfExit_(false), config(configuration), pimBlocks(getConfigParam(UINT, "NUM_PIM_BLOCKS"), - PIMBlock(PIMConfiguration::getPIMPrecision())) + PIMBlock(PIMConfiguration::getPIMPrecision())), + sblocks(getConfigParam(UINT, "NUM_S_BLOCKS"), + SBlock(PIMConfiguration::getPIMPrecision())), + is_salp_(is_salp) { currentClockCycle = 0; + rank = nullptr; } void PIMRank::attachRank(Rank* r) { this->rank = r; + /*cout<<"[pimrank] attach rank and bank0 size is "<banks_sub[0].size()<<" and bank1 size is "<banks_sub[1].size()<<" and bank2 size is "<banks_sub[2].size()<<" and bank3 size is "<banks_sub[3].size()<< + " and bank4 size is "<banks_sub[4].size()<<" and bank5 size is "<banks_sub[5].size()<<" and bank6 size is "<banks_sub[6].size()<<" and bank7 size is "<banks_sub[7].size()<< + " and bank8 size is "<banks_sub[8].size()<<" and bank9 size is "<banks_sub[9].size()<<" and bank10 size is "<banks_sub[10].size()<<" and bank11 size is "<banks_sub[11].size()<< + " and bank12 size is "<banks_sub[12].size()<<" and bank13 size is "<banks_sub[13].size()<<" and bank14 size is "<banks_sub[14].size()<<" and bank15 size is "<banks_sub[15].size()<how to control each pim block operate solely? not together? +void PIMRank::controlPIM(BusPacket* packet) //make salp-control pim { - uint8_t grf_a_zeroize = packet->data->u8Data_[20]; - if (grf_a_zeroize) + if(!is_salp_) { - if (DEBUG_CMD_TRACE) + uint8_t grf_a_zeroize = packet->data->u8Data_[20]; + if (grf_a_zeroize) { - PRINTC(RED, OUTLOG_CH_RA("GRF_A_ZEROIZE")); + if (DEBUG_CMD_TRACE) + { + PRINTC(RED, OUTLOG_CH_RA("GRF_A_ZEROIZE")); + } + BurstType burst_zero; + for (int pb = 0; pb < config.NUM_PIM_BLOCKS; pb++) + { + for (int i = 0; i < 8; i++) pimBlocks[pb].grfA[i] = burst_zero; + } } - BurstType burst_zero; - for (int pb = 0; pb < config.NUM_PIM_BLOCKS; pb++) + uint8_t grf_b_zeroize = packet->data->u8Data_[21]; + if (grf_b_zeroize) { - for (int i = 0; i < 8; i++) pimBlocks[pb].grfA[i] = burst_zero; + if (DEBUG_CMD_TRACE) + { + PRINTC(RED, OUTLOG_CH_RA("GRF_B_ZEROIZE")); + } + BurstType burst_zero; + for (int pb = 0; pb < config.NUM_PIM_BLOCKS; pb++) + { + for (int i = 0; i < 8; i++) pimBlocks[pb].grfB[i] = burst_zero; //set idx.. + } } } - uint8_t grf_b_zeroize = packet->data->u8Data_[21]; - if (grf_b_zeroize) - { - if (DEBUG_CMD_TRACE) - { - PRINTC(RED, OUTLOG_CH_RA("GRF_B_ZEROIZE")); - } - BurstType burst_zero; - for (int pb = 0; pb < config.NUM_PIM_BLOCKS; pb++) + else{ + uint8_t grf_zeroize = packet->data->u8Data_[20]; + if(grf_zeroize) { - for (int i = 0; i < 8; i++) pimBlocks[pb].grfB[i] = burst_zero; + if (DEBUG_CMD_TRACE) + { + PRINTC(RED, OUTLOG_CH_RA("GRF_ZEROIZE")); + } + BurstType burst_zero; + for (int sb = 0; sb < config.NUM_S_BLOCKS; sb++) + { + for (int i = 0; i < 4; i++) sblocks[sb].grf[i] = burst_zero; + sblocks[sb].blf = burst_zero; + } } } pimOpMode_ = packet->data->u8Data_[0] & 1; + //pimOpMode_single_ = packet->data->u8Data_[0] & 2; toggleEvenBank_ = !(packet->data->u8Data_[16] & 1); toggleOddBank_ = !(packet->data->u8Data_[16] & 2); - toggleRa13h_ = (packet->data->u8Data_[16] & 4); - - if (pimOpMode_) + toggleRa12h_ = (packet->data->u8Data_[16] & 4); + useAllGrf_ = packet->data->u8Data_[10] & 1; //how to set? + //cout<<"[pimrank] control pim and clock is "<mode_ = dramMode::HAB_PIM; pimPC_ = 0; lastJumpIdx_ = numJumpToBeTaken_ = lastRepeatIdx_ = numRepeatToBeDone_ = -1; @@ -117,7 +149,7 @@ bool PIMRank::isToggleCond(BusPacket* packet) { if (pimOpMode_ && !crfExit_) { - if (toggleRa13h_) + if (toggleRa12h_) { if (toggleEvenBank_ && ((packet->bank & 1) == 0)) return true; @@ -125,7 +157,7 @@ bool PIMRank::isToggleCond(BusPacket* packet) return true; return false; } - else if (!toggleRa13h_ && !isReservedRA(packet->row)) + else if (!toggleRa12h_ && !(packet->row & (1 << 12))) { if (toggleEvenBank_ && ((packet->bank & 1) == 0)) return true; @@ -141,9 +173,11 @@ bool PIMRank::isToggleCond(BusPacket* packet) } } -void PIMRank::readHab(BusPacket* packet) + +//to control pim block for solely executing --> use pch and bank to do it.. +void PIMRank::readHab(BusPacket* packet) { - if (isReservedRA(packet->row)) // ignored + if (packet->row & (1 << 12)) // ignored { PRINTC(GRAY, OUTLOG_ALL("READ")); } @@ -151,24 +185,74 @@ void PIMRank::readHab(BusPacket* packet) { PRINTC(GRAY, OUTLOG_ALL("BANK_TO_PIM")); #ifndef NO_STORAGE - int grf_id = getGrfIdx(packet->column); - for (int pb = 0; pb < config.NUM_PIM_BLOCKS; pb++) + int grf_id; + int grf_id_sub; + if (useAllGrf_) //THINK ABOUT THIS... WE WOULD USE HTIS? + { + grf_id = packet->column & 0xf; + grf_id_sub = packet->column & 0x4; + if(!is_salp_) + { + for (int pb = 0; pb < config.NUM_PIM_BLOCKS; pb++) + { + { + rank->banks[pb * 2 + packet->bank].read(packet); + if (grf_id < 8) + pimBlocks[pb].grfA[grf_id] = *(packet->data); + else + pimBlocks[pb].grfB[grf_id - 8] = *(packet->data); + } + } + } + else{ + for(int sb = 0; sb < config.NUM_S_BLOCKS; sb++) + { + rank->banks_sub[4*sb+grf_id_sub].read(packet); + if(grf_id_sub < 3) + sblocks[sb].grf[grf_id_sub] = *(packet->data); + else + sblocks[sb].blf = *(packet->data); + } + } + } + else { - rank->banks[pb * 2 + packet->bank].read(packet); - pimBlocks[pb].grfB[grf_id] = *(packet->data); + grf_id = getGrfIdx(packet->column); + grf_id_sub = getGrfIdxsalp(packet->column); + if(!is_salp_) + { + for (int pb = 0; pb < config.NUM_PIM_BLOCKS; pb++) + { + rank->banks[pb * 2 + packet->bank].read(packet); + pimBlocks[pb].grfB[grf_id] = *(packet->data); + } + } + else + { + for (int sb = 0; sb row < 0x2000) ? 0 : (packet->row < 0x4000) ? 1 : (packet->row < 0x6000) ? 2 : 3; + rank->banks_sub[sb*4+sub].read(packet); + sblocks[sb].grf[grf_id_sub] = *(packet->data); + } + } } #endif } } + void PIMRank::writeHab(BusPacket* packet) { - if (packet->row == config.PIM_REG_RA) // WRIO to PIM Broadcasting + //cout<<"[pimrank] control pim and clock is "<row<<" and col is "<column<<" and data is " <data<row == 0x3fff) // WRIO to PIM Broadcasting { if (packet->column == 0x00) - controlPIM(packet); + { + controlPIM(packet); //same for + } if ((0x08 <= packet->column && packet->column <= 0x0f) || - (0x18 <= packet->column && packet->column <= 0x1f)) + (0x18 <= packet->column && packet->column <= 0x1f)) //GRFA = 0X08~0X0F / GRFB = 0X18~0X1F { if (DEBUG_CMD_TRACE) { @@ -178,12 +262,33 @@ void PIMRank::writeHab(BusPacket* packet) PRINTC(GREEN, OUTLOG_B_GRF_B("BWRITE_GRF_B")); } #ifndef NO_STORAGE - for (int pb = 0; pb < config.NUM_PIM_BLOCKS; pb++) + if(!is_salp_) + { + for (int pb = 0; pb < config.NUM_PIM_BLOCKS; pb++) + { + if (packet->column - 8 < 8) + { + pimBlocks[pb].grfA[packet->column - 0x8] = *(packet->data); + } + else + pimBlocks[pb].grfB[packet->column - 0x18] = *(packet->data); + } + } + else { - if (packet->column - 8 < 8) - pimBlocks[pb].grfA[packet->column - 0x8] = *(packet->data); - else - pimBlocks[pb].grfB[packet->column - 0x18] = *(packet->data); + for(int sb = 0; sbrow<<" and col is "<column<<" and data is "<data<column - 12 == 0) sblocks[sb].blf = *(packet->data); + else if(packet->column >= 0x8 && packet->column <= 0x11) + { + //cout<<"[pimrank] write grf and clock is "<row<<" and col is "<column<<" and data is "<data<column - 0x8] = *(packet->data); + } + } + } } #endif } @@ -191,7 +296,7 @@ void PIMRank::writeHab(BusPacket* packet) { if (DEBUG_CMD_TRACE) PRINTC(GREEN, OUTLOG_B_CRF("BWRITE_CRF")); - crf.bst[packet->column - 0x04] = *(packet->data); + crf.bst[packet->column - 0x04] = *(packet->data); //same for salpim and } else if (packet->column == 0x1) { @@ -200,7 +305,7 @@ void PIMRank::writeHab(BusPacket* packet) for (int pb = 0; pb < config.NUM_PIM_BLOCKS; pb++) pimBlocks[pb].srf = *(packet->data); } } - else if (isReservedRA(packet->row)) + else if (packet->row & 1 << 12) { PRINTC(GRAY, OUTLOG_ALL("WRITE")); } @@ -209,29 +314,60 @@ void PIMRank::writeHab(BusPacket* packet) PRINTC(GREEN, OUTLOG_ALL("PIM_TO_BANK")); #ifndef NO_STORAGE - int grf_id = getGrfIdx(packet->column); - for (int pb = 0; pb < config.NUM_PIM_BLOCKS; pb++) + int grf_id = packet->column & 0xf; + int grf_id_sub = packet->column & 0x4; + if (useAllGrf_) { - if (packet->bank == 0) + for (int pb = 0; pb < config.NUM_PIM_BLOCKS; pb++) { - *(packet->data) = pimBlocks[pb].grfA[grf_id]; - rank->banks[pb * 2].write(packet); // basically read from bank; + if (grf_id < 8) + *(packet->data) = pimBlocks[pb].grfA[grf_id]; + else + *(packet->data) = pimBlocks[pb].grfB[grf_id - 8]; + //*(packet->data) = sblocks[pb].grf[grf_id_sub]; + rank->banks[pb * 2 + packet->bank].write(packet); + //rank->banks_sub[pb][grf_id].write(packet); } - else if (packet->bank == 1) + } + else + { + if(!is_salp_) + { + for (int pb = 0; pb < config.NUM_PIM_BLOCKS; pb++) + { + if (packet->bank == 0) + { + *(packet->data) = pimBlocks[pb].grfA[grf_id]; + rank->banks[pb * 2].write(packet); // basically read from bank; + } + else if (packet->bank == 1) + { + *(packet->data) = pimBlocks[pb].grfB[grf_id]; + rank->banks[pb * 2 + 1].write(packet); // basically read from bank. + } + } + } + else { - *(packet->data) = pimBlocks[pb].grfB[grf_id]; - rank->banks[pb * 2 + 1].write(packet); // basically read from bank. + int sub = (packet->row < 0x2000) ? 0 : (packet->row < 0x4000) ? 1 : (packet->row < 0x6000) ? 2 : 3; + for (int sb = 0; sb < config.NUM_S_BLOCKS; sb++) + { + if(grf_id_sub < 4) *(packet->data) = sblocks[sb].grf[grf_id_sub]; + else *(packet->data) = sblocks[sb].blf; + rank->banks_sub[sb*4+sub].write(packet); + } } } #endif } } +//for subarray logic we need other read/write opds (such as s_reg/c_reg) void PIMRank::readOpd(int pb, BurstType& bst, PIMOpdType type, BusPacket* packet, int idx, bool is_auto, bool is_mac) { - idx = getGrfIdx(idx); - + idx = (is_salp_)?getGrfIdxsalp(idx):getGrfIdx(idx); + unsigned sub = (packet->row<0x2000)?0:(packet->row<0x4000)?1:(packet->row<0x6000)?2:3; switch (type) { case PIMOpdType::A_OUT: @@ -240,76 +376,124 @@ void PIMRank::readOpd(int pb, BurstType& bst, PIMOpdType type, BusPacket* packet case PIMOpdType::M_OUT: bst = pimBlocks[pb].mOut; return; - case PIMOpdType::EVEN_BANK: - if (packet->bank % 2 != 0) - PRINT("Warning, CRF bank coding and bank id from packet are inconsistent"); - rank->banks[pb * 2].read(packet); // basically read from bank. - bst = *(packet->data); - return; - case PIMOpdType::ODD_BANK: - if (packet->bank % 2 == 0) - PRINT("Warning, CRF bank coding and bank id from packet are inconsistent"); - rank->banks[pb * 2 + 1].read(packet); // basically read from bank. - bst = *(packet->data); + case PIMOpdType::BANK: + if(is_salp_) + { + //cout<<"row is "<row<<" and sub is "<getChanId()<<" and size is "<banks_sub.size()<banks_sub[pb].size() ==4) + //{ + rank->banks_sub[pb*4+sub].read(packet); + //if(packet->data != nullptr) + bst = *(packet->data); + //} + } + else{} return; case PIMOpdType::GRF_A: - bst = pimBlocks[pb].grfA[(is_auto) ? getGrfIdx(packet->column) : idx]; - return; - case PIMOpdType::GRF_B: if (is_auto) - bst = pimBlocks[pb].grfB[(is_mac) ? getGrfIdxHigh(packet->row, packet->column) + bst = pimBlocks[pb].grfA[(is_mac) ? getGrfIdxHigh(packet->row, packet->column) : getGrfIdx(packet->column)]; else - bst = pimBlocks[pb].grfB[idx]; + bst = pimBlocks[pb].grfA[idx]; + return; + case PIMOpdType::GRF_B: //why + bst = pimBlocks[pb].grfB[(is_auto) ? getGrfIdx(packet->column) : idx]; return; + case PIMOpdType::GRF: //no auto mode indeed + //cout<<"[pimrank] read grf and clock is "<bank % 2 != 0) + PRINT("Warning, CRF bank coding and bank id from packet are inconsistent"); + //cout<<"[pimrank] read even bank and clock is "<banks[pb * 2].read(packet); // basically read from bank. + bst = *(packet->data); + } + return; + case PIMOpdType::ODD_BANK: + if(!is_salp_) + { + if (packet->bank % 2 == 0) + PRINT("Warning, CRF bank coding and bank id from packet are inconsistent"); + //cout<<"[pimrank] read odd bank and clock is "<banks[pb * 2 + 1].read(packet); // basically read from bank. + bst = *(packet->data); + } + return; } + return; } void PIMRank::writeOpd(int pb, BurstType& bst, PIMOpdType type, BusPacket* packet, int idx, - bool is_auto, bool is_mac) + bool is_auto, bool is_mac) //pb-->pb_id { - idx = getGrfIdx(idx); - + idx = (is_salp_)?getGrfIdxsalp(idx):getGrfIdx(idx); + int sub = (packet->row<0x2000)?0:(packet->row<0x4000)?1:(packet->row<0x6000)?2:3; switch (type) { case PIMOpdType::A_OUT: - pimBlocks[pb].aOut = bst; + pimBlocks[pb].aOut = bst; //which means a_out return; case PIMOpdType::M_OUT: pimBlocks[pb].mOut = bst; return; - case PIMOpdType::EVEN_BANK: - if (packet->bank % 2 != 0) + case PIMOpdType::BANK: + //cout<<"[pimrank] write bank and clock is "<banks_sub[pb].size()<data) = bst; + //if(rank->banks_sub[pb].size() == 4) + rank->banks_sub[pb*4+sub].write(packet); } - *(packet->data) = bst; - rank->banks[pb * 2].write(packet); // basically read from bank. return; - case PIMOpdType::ODD_BANK: - if (packet->bank % 2 == 0) + case PIMOpdType::GRF_A: + if(!is_salp_) { - PRINT("CRF bank coding and bank id from packet are inconsistent"); - exit(-1); + if (is_auto) + pimBlocks[pb].grfA[(is_mac) ? getGrfIdxHigh(packet->row, packet->column) + : getGrfIdx(packet->column)] = bst; + else + pimBlocks[pb].grfA[idx] = bst; } - *(packet->data) = bst; - rank->banks[pb * 2 + 1].write(packet); // basically read from bank. - return; - case PIMOpdType::GRF_A: - pimBlocks[pb].grfA[(is_auto) ? getGrfIdx(packet->column) : idx] = bst; return; case PIMOpdType::GRF_B: - if (is_auto) - pimBlocks[pb].grfB[(is_mac) ? getGrfIdxHigh(packet->row, packet->column) - : getGrfIdx(packet->column)] = bst; - else - pimBlocks[pb].grfB[idx] = bst; + if(!is_salp_) + { + if (is_auto) + pimBlocks[pb].grfB[getGrfIdx(packet->column)] = bst; + else + pimBlocks[pb].grfB[idx] = bst; + } + return; + case PIMOpdType::GRF: + //cout<<"[pimrank] write grf and clock is "<row, packet->column) + : getGrfIdxsalp(packet->column)] = bst; + else + sblocks[pb].grf[idx] = bst; + } + return; + case PIMOpdType::BLF: + sblocks[pb].blf = bst; return; case PIMOpdType::SRF_M: pimBlocks[pb].srf = bst; @@ -317,13 +501,42 @@ void PIMRank::writeOpd(int pb, BurstType& bst, PIMOpdType type, BusPacket* packe case PIMOpdType::SRF_A: pimBlocks[pb].srf = bst; return; + case PIMOpdType::EVEN_BANK: + if(!is_salp_) + { + if (packet->bank % 2 != 0) + { + PRINT("CRF bank coding and bank id from packet are inconsistent"); + cout<<"crf bank coding and bank id from packet are inconsistent"<data) = bst; + rank->banks[pb * 2].write(packet); // basically read from bank. + } + return; + case PIMOpdType::ODD_BANK: + if(!is_salp_) + { + if (packet->bank % 2 == 0) + { + PRINT("CRF bank coding and bank id from packet are inconsistent"); //bank data? how to connect crf to bank idx? + exit(-1); + } + //cout<<"[pimrank] read even bank and clock is "<data) = bst; + rank->banks[pb * 2 + 1].write(packet); // basically read from bank. + } + return; } + return; } void PIMRank::doPIM(BusPacket* packet) { PIMCmd cCmd; - packet->row = masked2accessibleRA(packet->row); + //cout<<"[pimrank] do pim and clock is "<row<<" and col is "<column<row = packet->row & ((1 << 16) - 1); //which is 0x7fff + //cout<<"[pimrank] do pim and clock is "<row<<" and col is "<column<<" and cmd type is " < 0) @@ -375,9 +588,9 @@ void PIMRank::doPIM(BusPacket* packet) else lastRepeatIdx_ = -1; } - else if (cCmd.type_ == PIMCmdType::NOP) + else if(cCmd.type_ == PIMCmdType::MOV) { - if (lastRepeatIdx_ != pimPC_) + if (lastRepeatIdx_!=pimPC_) { lastRepeatIdx_ = pimPC_; numRepeatToBeDone_ = cCmd.loopCounter_; @@ -391,22 +604,62 @@ void PIMRank::doPIM(BusPacket* packet) else lastRepeatIdx_ = -1; } - - for (int pimblock_id = 0; pimblock_id < config.NUM_PIM_BLOCKS; pimblock_id++) + else if (cCmd.type_ == PIMCmdType::NOP) { - if (DEBUG_PIM_BLOCK && pimblock_id == 0) + if (lastRepeatIdx_ != pimPC_) { - PRINT(pimBlocks[pimblock_id].print()); - PRINT("[BANK_R]" << packet->data->binToStr()); - PRINT("[CMD]" << bitset<32>(cCmd.toInt()) << "(" << cCmd.toStr() << ")"); + lastRepeatIdx_ = pimPC_; + numRepeatToBeDone_ = cCmd.loopCounter_; } - doPIMBlock(packet, cCmd, pimblock_id); - - if (DEBUG_PIM_BLOCK && pimblock_id == 0) + if (numRepeatToBeDone_ > 0) + { + pimPC_ -= 1; + numRepeatToBeDone_--; + } + else + lastRepeatIdx_ = -1; + } + if(!is_salp_){ + for (int pimblock_id = 0; pimblock_id < config.NUM_PIM_BLOCKS; pimblock_id++) + { + if (DEBUG_PIM_BLOCK && pimblock_id == 0) + { + PRINT(pimBlocks[pimblock_id].print()); + PRINT("[BANK_R]" << packet->data->binToStr()); + PRINT("[CMD]" << bitset<32>(cCmd.toInt()) << "(" << cCmd.toStr() << ")"); + } + + doPIMBlock(packet, cCmd, pimblock_id); + + if (DEBUG_PIM_BLOCK && pimblock_id == 0) + { + PRINT(pimBlocks[pimblock_id].print()); + PRINT("----------"); + } + } + } + else + { + for (int sblock_id = 0; sblock_id < config.NUM_S_BLOCKS; sblock_id++) { - PRINT(pimBlocks[pimblock_id].print()); - PRINT("----------"); + //cout<<"[pimrank] do pim and clock is "<row<<" and col is "<column<<" and id is " + // <data->binToStr()); + PRINT("[CMD]" << bitset<32>(cCmd.toInt()) << "(" << cCmd.toStr() << ")"); + } + //cout<<"[pimrank] do pim and clock is "<row<<" and col is "<column<<" and id is " + //<row<<" and col is "<column<<" and id is"<< + //pimblock_id<row<<" and col is "<column<row<<" and col is "<column<busPacketType == WRITE) { int grf_id = getGrfIdx(packet->column); - if (packet->bank == 0) - { - *(packet->data) = pimBlocks[pimblock_id].grfA[grf_id]; - rank->banks[pimblock_id * 2].write(packet); // basically read from bank; + int grf_id_sub = getGrfIdxsalp(packet->column); + //cout<<"[pimrank] do pim block and clock is "<row<<" and col is "<column<bank == 0) + { + *(packet->data) = pimBlocks[pimblock_id].grfA[grf_id]; + rank->banks[pimblock_id * 2].write(packet); + } + else if (packet->bank == 1) + { + *(packet->data) = pimBlocks[pimblock_id].grfB[grf_id]; + rank->banks[pimblock_id * 2 + 1].write(packet); + } } - else if (packet->bank == 1) + else { - *(packet->data) = pimBlocks[pimblock_id].grfB[grf_id]; - rank->banks[pimblock_id * 2 + 1].write(packet); // basically read from bank. + *(packet->data) = sblocks[pimblock_id].grf[grf_id_sub]; + //if(rank->banks_sub[pimblock_id].size() == 4) + rank->banks_sub[pimblock_id*4+grf_id_sub].write(packet); } } } +//we need some store logic, but this risc-v form has only load logic(fill) --> need to prove diff --git a/src/PIMRank.h b/src/PIMRank.h index 70a756b..fe29041 100644 --- a/src/PIMRank.h +++ b/src/PIMRank.h @@ -22,6 +22,7 @@ #include "PIMCmd.h" #include "Rank.h" #include "SimulatorObject.h" +#include "SBlock.h" using namespace std; using namespace DRAMSim; @@ -64,10 +65,10 @@ class PIMRank : public SimulatorObject ostream& dramsimLog; Configuration& config; int pimPC_, lastJumpIdx_, numJumpToBeTaken_, lastRepeatIdx_, numRepeatToBeDone_; - bool pimOpMode_, toggleEvenBank_, toggleOddBank_, toggleRa13h_, crfExit_; + bool pimOpMode_, pimOpMode_single_, toggleEvenBank_, toggleOddBank_, toggleRa12h_, useAllGrf_, crfExit_; public: - PIMRank(ostream& simLog, Configuration& configuration); + PIMRank(ostream& simLog, Configuration& configuration, bool is_salp); ~PIMRank() {} void attachRank(Rank* r); @@ -77,10 +78,13 @@ class PIMRank : public SimulatorObject void setRankId(int id); void update(); void readHab(BusPacket* packet); + //void readSab(BusPacket* packet); //use single level pim block logic to.... after double bank... void writeHab(BusPacket* packet); + //void writeSab(BusPacket* packet); void doPIM(BusPacket* packet); void doPIMBlock(BusPacket* packet, PIMCmd curCmd, int pimblock_id); void controlPIM(BusPacket* packet); + void controlPIMsub(BusPacket* packet); void readOpd(int pb, BurstType& bst, PIMOpdType type, BusPacket* packet, int idx, bool is_auto, bool is_mac); void writeOpd(int pb, BurstType& bst, PIMOpdType type, BusPacket* packet, int idx, bool is_auto, @@ -89,33 +93,34 @@ class PIMRank : public SimulatorObject union crf_t { - uint32_t data[32]; + uint32_t data[32]; //WHICH IS THE TERM OF .. BurstType bst[4]; crf_t() { memset(data, 0, sizeof(uint32_t) * 32); } - } crf; + } crf; //crt is 32x8x4, 4 burst logic unsigned inline getGrfIdx(unsigned idx) { - return idx & 0x7; + return idx & 0x7; //get under low 3 bits } - unsigned inline getGrfIdxHigh(unsigned r, unsigned c) + unsigned inline getGrfIdxsalp(unsigned idx) { - return ((r & 0x1) << 2 | ((c >> 3) & 0x3)); + return idx & 0x3; } - unsigned inline isReservedRA(unsigned row) + unsigned inline getGrfIdxHigh(unsigned r, unsigned c) { - return (row & (1 << 13)); + return ((r & 0x1) << 2 | ((c >> 3) & 0x3)); } - unsigned inline masked2accessibleRA(unsigned row) + /*unsigned inline getGrfIdxHighsalp(unsigned r, unsigned c) { - return (row & ((1 << 13) - 1)); - } - - Rank* rank; - vector pimBlocks; + return; //calculate....((r & 0x1) << 2 | ((c >> 3) & 0x3)) + }*/ + Rank* rank; + vector pimBlocks; + vector sblocks; + bool is_salp_; }; } // namespace DRAMSim #endif diff --git a/src/Rank.cpp b/src/Rank.cpp index 2e5b28c..32e1b89 100644 --- a/src/Rank.cpp +++ b/src/Rank.cpp @@ -28,12 +28,11 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *********************************************************************************/ -#include "Rank.h" - #include #include "AddressMapping.h" #include "MemoryController.h" +#include "Rank.h" using namespace std; using namespace DRAMSim; @@ -51,13 +50,41 @@ Rank::Rank(ostream& simLog, Configuration& configuration) outgoingDataPacket(NULL), dataCyclesLeft(0), mode_(dramMode::SB) +{ + memoryController = NULL; + currentClockCycle = 0; + abmr1Even_ = abmr1Odd_ = abmr2Even_ = abmr2Odd_ = sbmr1_ = sbmr2_ = false; + pimRank = new PIMRank(dramsimLog, config, false); + pimRank->attachRank(this); +} + +Rank::Rank(ostream& simLog, Configuration& configuration, bool is_salp) + : chanId(-1), + rankId(-1), + dramsimLog(simLog), + isPowerDown(false), + refreshWaiting(false), + readReturnCountdown(0), + banks(getConfigParam(UINT, "NUM_BANKS"), Bank(simLog)), + bankStates(getConfigParam(UINT, "NUM_BANKS"), BankState(simLog)), + banks_sub(getConfigParam(UINT, "NUM_BANKS")*4, Bank(simLog)), + bankStates_SUB(getConfigParam(UINT, "NUM_BANKS")*4, BankState(simLog)), + config(configuration), + outgoingDataPacket(NULL), + dataCyclesLeft(0), + mode_(dramMode::SB), + is_salp_(is_salp) { memoryController = NULL; currentClockCycle = 0; abmr1Even_ = abmr1Odd_ = abmr2Even_ = abmr2Odd_ = sbmr1_ = sbmr2_ = false; - pimRank = make_shared(dramsimLog, config); + readReturnCountdown.clear(); + readReturnCountdown.reserve(32); + //banks_sub = vector>(16*4); + //bankStates_SUB = vector>(16, vector(4, dramsimLog)); + pimRank = new PIMRank(dramsimLog, config, is_salp_); pimRank->attachRank(this); } @@ -95,8 +122,9 @@ Rank::~Rank() delete outgoingDataPacket; } -void Rank::receiveFromBus(BusPacket* packet) +void Rank::receiveFromBus(BusPacket* packet) //outgoingcmdpacket -->comes from poppedbuspacket { + //cout<<"[receiveFromBus] packettype"<busPacketType<<" and currentcycle is "<bank<<" and row is "<row<print(currentClockCycle, false); } - if (!pimRank->isReservedRA(packet->row)) + if (!(packet->row & 1 << 12)) { check(packet); updateState(packet); } - sendToBank(packet); + execute(packet); } void Rank::checkBank(BusPacketType type, int bank, int row) { + //if(bank==4) cout<<"[rank]:check and mode_ is sb and cycle is "<busPacketType == REF) + if (packet->busPacketType == REF) //referesh mode { for (size_t i = 0; i < config.NUM_BANKS; i++) { @@ -184,48 +285,206 @@ void Rank::check(BusPacket* packet) ERROR("== Error - ch " << getChanId() << " ra" << getRankId() << " received a REF when not allowed"); exit(-1); - } + } //in refresh mode: every bankstate should be in idle. } } else if (mode_ == dramMode::SB) { - checkBank(packet->busPacketType, packet->bank, packet->row); + if(!is_salp_) checkBank(packet->busPacketType, packet->bank, packet->row); + else checkBank(packet->busPacketType, packet->bank, controlsubarray(packet), packet->row); } else { - for (int bank = (packet->bank % 2); bank < config.NUM_BANKS; bank += 2) - checkBank(packet->busPacketType, bank, packet->row); + cout<<"[rank]:check and mode_ is hab_pim and cycle is "<row < 0x2000)?0:(packet->row < 0x4000)?1:(packet->row < 0x6000)?2:3; + checkBank(packet->busPacketType, bank, sub, packet->row); + } + } + else + { + for (int bank = (packet->bank % 2); bank < config.NUM_BANKS; bank += 2) + checkBank(packet->busPacketType, bank, packet->row); + } } } void Rank::updateState(BusPacket* packet) { auto addrMapping = config.addrMapping; + int targetsub = (packet->row < 0x2000)?0:(packet->row < 0x4000)?1:(packet->row < 0x6000)?2:3; if (packet->busPacketType == REF) { refreshWaiting = false; for (size_t i = 0; i < config.NUM_BANKS; i++) { - bankStates[i].nextActivate = currentClockCycle + config.tRFC; + if(is_salp_) + { + for(size_t j = 0; j < 4; j++) + { + bankStates_SUB[i*4 + j].nextActivate = currentClockCycle + config.tRFC; + } + } + else bankStates[i].nextActivate = currentClockCycle + config.tRFC; } } else if (mode_ == dramMode::SB) { - for (int bank = 0; bank < config.NUM_BANKS; bank++) + for (int bank = 0; bank < 16; bank++) { - updateBank(packet->busPacketType, bank, packet->row, bank == packet->bank, - addrMapping.isSameBankgroup(bank, packet->bank)); + if(is_salp_) + { + for(int sub = 0; sub < 4; sub++) + { + updateBank(packet->busPacketType, bank, packet->row, sub, bank==packet->bank, + addrMapping.isSameBankgroup(bank, packet->bank), targetsub == sub); + } + } + else updateBank(packet->busPacketType, bank, packet->row, bank == packet->bank, addrMapping.isSameBankgroup(bank, packet->bank)); } } - else + else //drawmode all pim or something { - for (int bank = 0; bank < config.NUM_BANKS; bank++) + for (int bank = 0; bank < 16; bank++) { - updateBank(packet->busPacketType, bank, packet->row, (bank % 2) == packet->bank, true); + if(is_salp_) + { + for(int sub = 0; sub < 4; sub++) + { + updateBank(packet->busPacketType, bank, packet->row, sub, true, + true, targetsub == sub); + } + } + else updateBank(packet->busPacketType, bank, packet->row, (bank % 2) == packet->bank, true); } } } +int Rank::controlsubarray(BusPacket* packet) +{ + int row = packet->row; + if(row < 0x2000) return 0; + else if(row < 0x4000) return 1; + else if(row < 0x6000) return 2; + else return 3; +} +//need tRA, tWA which means next activate, next select logic... +void Rank::updateBank(BusPacketType type, int bank, int row, int sub, bool targetBank, bool targetBankgroup, bool targetSubarray) //just use target subarray to do +{ + switch (type) + { + case READ: + if (targetBank){ + if(targetSubarray) + bankStates_SUB[4*bank + sub].nextPrecharge = max(bankStates_SUB[4*bank + sub].nextPrecharge, + currentClockCycle + config.READ_TO_PRE_DELAY); //precharge keeps later.... + bankStates_SUB[4*bank + sub].nextWrite = max(bankStates_SUB[4*bank + sub].nextWrite, currentClockCycle + config.tRTW); + bankStates_SUB[4*bank + sub].nextRead = + max(bankStates_SUB[4*bank + sub].nextRead, + currentClockCycle + max(config.tCCDL, config.BL / 2)); + } + if (targetBankgroup) + { + bankStates_SUB[4*bank + sub].nextRead = + max(bankStates_SUB[4*bank + sub].nextRead, + currentClockCycle + max(config.tCCDL, config.BL / 2)); + if(!targetBank) bankStates_SUB[4*bank + sub].nextWrite = + max(bankStates_SUB[4*bank + sub].nextWrite, currentClockCycle + config.READ_TO_WRITE_DELAY); + } + else //not targetgroup mode... + { + bankStates_SUB[4*bank + sub].nextRead = + max(bankStates_SUB[4*bank + sub].nextRead, + currentClockCycle + max(config.tCCDS, config.BL / 2)); + bankStates_SUB[4*bank + sub].nextWrite = + max(bankStates_SUB[4*bank + sub].nextWrite, currentClockCycle + config.READ_TO_WRITE_DELAY); + } + //if(bank==4 && sub == 3) cout<<"[updateBank] and cycle is "<row<0x2000)?0:(packet->row<0x4000)?1:(packet->row<0x6000)?2:3; if (DEBUG_CMD_TRACE) { - if (packet->row == config.PIM_REG_RA) + if (packet->row == 0x3fff) { - if (0x08 <= packet->column && packet->column <= 0x0f) + if(!is_salp_) { - PRINT(OUTLOG_GRF_A("READ_GRF_A")); + if (0x08 <= packet->column && packet->column <= 0x0f) + { + PRINT(OUTLOG_GRF_A("READ_GRF_A")); + } + else if (0x18 <= packet->column && packet->column <= 0x1f) + { + PRINT(OUTLOG_GRF_B("READ_GRF_B")); + } } - else if (0x18 <= packet->column && packet->column <= 0x1f) + else { - PRINT(OUTLOG_GRF_B("READ_GRF_B")); + if (0x08 <= packet->column && packet->column <= 0x11) + { + PRINT(OUTLOG_GRF_A("READ_GRF_A")); + } + else if(packet->column == 0x12) + { + PRINT(OUTLOG_B_GRF_B("READ_GRF_B")); + } } } - else if (pimRank->isReservedRA(packet->row)) + else if (packet->row & (1 << 12)) { PRINTC(GRAY, OUTLOG_ALL("READ")); } @@ -339,17 +612,40 @@ void Rank::readSb(BusPacket* packet) } #ifndef NO_STORAGE - if (packet->row == config.PIM_REG_RA) + if (packet->row == 0x3fff) { - if (0x08 <= packet->column && packet->column <= 0x0f) + if(!is_salp_) + { + if (0x08 <= packet->column && packet->column <= 0x0f) *(packet->data) = pimRank->pimBlocks[packet->bank / 2].grfA[packet->column - 0x8]; - else if (0x18 <= packet->column && packet->column <= 0x1f) - *(packet->data) = pimRank->pimBlocks[packet->bank / 2].grfB[packet->column - 0x18]; + else if (0x18 <= packet->column && packet->column <= 0x1f) + *(packet->data) = pimRank->pimBlocks[packet->bank / 2].grfB[packet->column - 0x18]; + else + banks[packet->bank].read(packet); + } else - banks[packet->bank].read(packet); + { + if (0x08 <= packet->column && packet->column <= 0x11) + *(packet->data) = pimRank->sblocks[packet->bank].grf[packet->column - 0x8]; + else if (packet->column == 0x12) + *(packet->data) = pimRank->sblocks[packet->bank].blf; + else + banks_sub[packet->bank*4+sub].read(packet); + } } else - banks[packet->bank].read(packet); + { + if(is_salp_) + { + //if(banks_sub[packet->bank].size() == 4) + banks_sub[packet->bank*4+sub].read(packet); + } + else + { + //cout<<"[RANK] readsb and currentClockCycle is "<chan<<" and bank is "<bank<<" and row is "<row<<" and col is "<column<bank].read(packet); + } + } #endif } @@ -357,7 +653,7 @@ void Rank::writeSb(BusPacket* packet) { if (DEBUG_CMD_TRACE) { - if (packet->row == config.PIM_REG_RA || pimRank->isReservedRA(packet->row)) + if (packet->row == 0x3fff || packet->row & (1 << 12)) { PRINTC(GRAY, OUTLOG_ALL("WRITE")); } @@ -368,44 +664,84 @@ void Rank::writeSb(BusPacket* packet) } #ifndef NO_STORAGE - if (!(packet->row == config.PIM_REG_RA) && !pimRank->isReservedRA(packet->row)) - banks[packet->bank].write(packet); + if (!(packet->row == 0x3fff) && !(packet->row & (1 << 12))) + { + if(!is_salp_){ + if(packet!=NULL && packet->data!=NULL) + { + banks[packet->bank].write(packet); + } + } + else + { + //cout<<"[rank]:write and cycle is "<bank<<" and row is "<row + //<<" and col is" <column<row < 0x2000)?0:(packet->row < 0x4000)?1:(packet->row < 0x6000)?2:3; + //if(banks_sub[packet->bank].size() == 4) + //{ + banks_sub[packet->bank*4+sub].write(packet); + //} + //else{} + } + } #endif } -void Rank::sendToBank(BusPacket* packet) +void Rank::execute(BusPacket* packet) { + //if(mode_ == dramMode::HAB_PIM) cout<<"[rank]:execute and cycle is "<bank<<" and row is "<row + //<<" and col is" <column<<" and type is "<busPacketType<isToggleCond(packet)<busPacketType) { case READ: if (mode_ == dramMode::SB) - readSb(packet); - else if (mode_ == dramMode::HAB_PIM && pimRank->isToggleCond(packet)) + { + //cout<<"[rank]:execute for read cycle is "<bank<<" and row is "<row + //<<" and col is" <column<isToggleCond(packet)) //hab_pim mode + { + //cout<<"clock is "<row<doPIM(packet); + } + /*else if (mode_ == dramMode::HAB_PIM && is_salp_) + { + //cout<<"[rank]:execute for read cycle is "<bank<<" and row is "<row + //<<" and col is" <column<doPIM(packet); + }*/ else pimRank->readHab(packet); packet->busPacketType = DATA; + if(readReturnPacket.capacity() == readReturnPacket.size()) readReturnPacket.reserve(readReturnPacket.size() + 32); readReturnPacket.push_back(packet); - readReturnCountdown.push_back(config.RL); + if(readReturnCountdown.capacity() == readReturnCountdown.size()) + readReturnCountdown.reserve(readReturnCountdown.size() + 32); + readReturnCountdown.push_back(config.RL); + //delete(packet); break; case WRITE: if (mode_ == dramMode::SB) writeSb(packet); else if (mode_ == dramMode::HAB_PIM && pimRank->isToggleCond(packet)) pimRank->doPIM(packet); + /*else if(mode_ == dramMode::HAB_PIM && is_salp_) + pimRank->doPIM(packet);*/ else pimRank->writeHab(packet); - delete (packet); + //delete (packet); break; case ACTIVATE: if (DEBUG_CMD_TRACE) { PRINTC(getModeColor(), OUTLOG_ALL("ACTIVATE") << " tag : " << packet->tag); } - if (mode_ == dramMode::SB && packet->row == config.PIM_ABMR_RA && - packet->column == 0x1f) - { - abmr1Even_ = (packet->bank == 0) ? true : abmr1Even_; + if (mode_ == dramMode::SB && packet->row == 0x17ff && packet->column == 0x1f) //avoid selecting these ones + { //need mode change for sb_pim + abmr1Even_ = (packet->bank == 0) ? true : abmr1Even_; //nothing to bother.... abmr1Odd_ = (packet->bank == 1) ? true : abmr1Odd_; abmr2Even_ = (packet->bank == 8) ? true : abmr2Even_; abmr2Odd_ = (packet->bank == 9) ? true : abmr2Odd_; @@ -413,6 +749,7 @@ void Rank::sendToBank(BusPacket* packet) if ((config.NUM_BANKS <= 2 && abmr1Even_ && abmr1Odd_) || (config.NUM_BANKS > 2 && abmr1Even_ && abmr1Odd_ && abmr2Even_ && abmr2Odd_)) { + //cout<<"[rank]: execute and mode is hab and cycle is "<row == config.PIM_SBMR_RA) + if (mode_ == dramMode::HAB && packet->row == 0x1fff) { - sbmr1_ = (packet->bank == 0) ? true : sbmr1_; + sbmr1_ = (packet->bank == 0) ? true : sbmr1_; //toggle condition? sbmr2_ = (packet->bank == 1) ? true : sbmr2_; if (sbmr1_ && sbmr2_) { + //cout<<"[rank]: execute and mode is sb and cycle is "<busPacketType<<" and clockcycle is "<busPacketType == DATA) { // if the packet is done on the bus, call receiveFromBus and free up // the bus @@ -492,13 +834,20 @@ void Rank::update() } // decrement the counter for all packets waiting to be sent back - for (size_t i = 0; i < readReturnCountdown.size(); i++) readReturnCountdown[i]--; - - if (readReturnCountdown.size() > 0 && readReturnCountdown[0] == 0) + //auto it = readReturnCountdown.begin(); + if (readReturnCountdown.size() > 0){ + for (int i = 0; i < readReturnCountdown.size(); i++) { + if(readReturnCountdown[i]>0){ + /*if(readReturnCountdown.size() > 10000000000) + cout<<"[READ] "<<"readReturnCountdown["< 0 /*&& readReturnCountdown.size() < 100*/ && readReturnCountdown[0] == 0) { // RL time has passed since the read was issued; this packet is // ready to go out on the bus - outgoingDataPacket = readReturnPacket[0]; dataCyclesLeft = config.BL / 2; @@ -515,6 +864,7 @@ void Rank::update() } pimRank->update(); pimRank->step(); + //if(chanId ==1) cout<<"[rank]:update and cycle is "< currentClockCycle) + if(!is_salp_) { - ERROR("== Error - Trying to power up rank " << getChanId() - << " before we're allowed to"); - ERROR(bankStates[i].nextPowerUp << " " << currentClockCycle); - exit(0); + if (bankStates[i].nextPowerUp > currentClockCycle) + { + ERROR("== Error - Trying to power up rank " << getChanId() + << " before we're allowed to"); + ERROR(bankStates[i].nextPowerUp << " " << currentClockCycle); + exit(0); + } + bankStates[i].nextActivate = currentClockCycle + config.tXP; + bankStates[i].currentBankState = Idle; + } + else + { + for(size_t s = 0; s < 4; s++) + { + if(bankStates_SUB[4*i+s].nextPowerUp > currentClockCycle) + { + ERROR("== Error - Trying to power up rank" << getChanId() + << " before we're allowed to"); + ERROR(bankStates_SUB[4*i+s].nextPowerUp << " "< pimRank; + PIMRank* pimRank; unsigned dataCyclesLeft; bool refreshWaiting; @@ -92,11 +98,15 @@ class Rank : public SimulatorObject vector readReturnPacket; vector readReturnCountdown; + //vector> banks_sub; //which to modify... vector banks; + vector banks_sub; vector bankStates; + //vector> bankStates_SUB; + vector bankStates_SUB; dramMode mode_; - bool abmr1Even_, abmr1Odd_, abmr2Even_, abmr2Odd_, sbmr1_, sbmr2_; + bool abmr1Even_, abmr1Odd_, abmr2Even_, abmr2Odd_, sbmr1_, sbmr2_; //single bank/all bank what is mr? const char* getModeColor() { diff --git a/src/SBlock.cpp b/src/SBlock.cpp new file mode 100644 index 0000000..b2ffbd2 --- /dev/null +++ b/src/SBlock.cpp @@ -0,0 +1,91 @@ +#include +#include + +#include "SBlock.h" +#include "PrintMacros.h" +#include "SystemConfiguration.h" +#include "half.h" + +using namespace DRAMSim; + +void SBlock::add(BurstType& dstBst, BurstType& src0Bst, BurstType& src1Bst) +{ + if (pimPrecision_ == FP16) + { + for (int i = 0; i < 16; i++) + { + dstBst.fp16Data_[i] = src0Bst.fp16Data_[i] + src1Bst.fp16Data_[i]; + } + } + else if (pimPrecision_ == FP32) + { + for (int i = 0; i < 8; i++) + { + dstBst.fp32Data_[i] = src0Bst.fp32Data_[i] + src1Bst.fp32Data_[i]; + } + } + else + dstBst = src0Bst + src1Bst; +} + +void SBlock::mul(BurstType& dstBst, BurstType& src0Bst, BurstType& src1Bst) +{ + if (pimPrecision_ == FP16) + { + for (int i = 0; i < 16; i++) + { + dstBst.fp16Data_[i] = src0Bst.fp16Data_[i] * src1Bst.fp16Data_[i]; + } + } + else if (pimPrecision_ == FP32) + { + for (int i = 0; i < 8; i++) + { + dstBst.fp32Data_[i] = src0Bst.fp32Data_[i] * src1Bst.fp32Data_[i]; + } + } + else + dstBst = src0Bst * src1Bst; +} + +void SBlock::mac(BurstType& dstBst, BurstType& src0Bst, BurstType& src1Bst) +{ + if (pimPrecision_ == FP16) + { + for (int i = 0; i < 16; i++) + { + dstBst.fp16Data_[i] = src0Bst.fp16Data_[i] * src1Bst.fp16Data_[i] + dstBst.fp16Data_[i]; + } + + DEBUG("MAC " << src0Bst.hexToStr2() << "*+" << src1Bst.hexToStr2() << "" + << dstBst.hexToStr2()); + } + else if (pimPrecision_ == FP32) + { + for (int i = 0; i < 8; i++) + { + dstBst.fp32Data_[i] = src0Bst.fp32Data_[i] * src1Bst.fp32Data_[i] + dstBst.fp32Data_[i]; + } + } + else + dstBst = src0Bst * src1Bst + dstBst; +} +void SBlock::burstmax(BurstType& dstBst, BurstType& src0Bst, BurstType& src1Bst) //don't need solely.. +{ + //maybe using this as expf or gelu or some nonlinear functions to .... + if (pimPrecision_ == FP16) + { + for (int i = 0; i < 16; i++) + { + dstBst.fp16Data_[i] = (src0Bst.fp16Data_[i] > src1Bst.fp16Data_[i])?src0Bst.fp16Data_[i]:src1Bst.fp16Data_[i]; + } + } +} +std::string SBlock::print() +{ + stringstream ss; + ss << "[BLF]" << blf.binToStr(); + for (int i = 0; i < 4; i++) ss << grf[i].binToStr(); + + return ss.str(); +} \ No newline at end of file diff --git a/src/SBlock.h b/src/SBlock.h new file mode 100644 index 0000000..422ce55 --- /dev/null +++ b/src/SBlock.h @@ -0,0 +1,36 @@ +#ifndef __S_BLOCK_HPP__ +#define __S_BLOCK_HPP__ + +#include +#include +#include +#include + +#include "Burst.h" +#include "SystemConfiguration.h" + +using namespace std; + +namespace DRAMSim{ +class SBlock{ + public: + SBlock() + { + pimPrecision_ = PIMConfiguration::getPIMPrecision(); + } + SBlock(const PIMPrecision& pimPrecision) : pimPrecision_(pimPrecision) {} + + BurstType grf[4]; + BurstType blf; + + void add(BurstType& dstBst, BurstType& src0Bst, BurstType& src1Bst); + void mac(BurstType& dstBst, BurstType& src0Bst, BurstType& src1Bst); + void mul(BurstType& dstBst, BurstType& src0Bst, BurstType& src1Bst); + void burstmax(BurstType& dstBst, BurstType& src0Bst, BurstType& src1Bst); + std::string print(); + private: + PIMPrecision pimPrecision_; +}; + +} +#endif \ No newline at end of file diff --git a/src/Subarray.cpp b/src/Subarray.cpp new file mode 100644 index 0000000..cb2219d --- /dev/null +++ b/src/Subarray.cpp @@ -0,0 +1,134 @@ +/********************************************************************************* + * Copyright (c) 2010-2011, Elliott Cooper-Balis + * Paul Rosenfeld + * Bruce Jacob + * University of Maryland + * dramninjas [at] gmail [dot] com + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *********************************************************************************/ + +#include + +#include "Subarray.h" +#include "BusPacket.h" + +using namespace std; +using namespace DRAMSim; + +Subarray::Subarray(ostream& simLog) + : currentState(simLog), rowEntries(getConfigParam(UINT, "NUM_COLS")), dramsimLog(simLog) +{ + numCols = getConfigParam(UINT, "NUM_COLS"); +} + +/* The bank class is just a glorified sparse storage data structure + * that keeps track of written data in case the simulator wants a + * function DRAM model + * + * A vector of size NUM_COLS keeps a linked list of rows and their + * associated values. + * + * write() adds an entry to the proper linked list or replaces the + * value in a row that was already written + * + * read() searches for a node with the right row value, if not found + * returns the tracer value 0xDEADBEEF + * + * TODO: if anyone wants to actually store data, see the 'data_storage' + *branch and perhaps try to merge that into master + */ + +shared_ptr Subarray::searchForRow(unsigned row, shared_ptr head) +{ + while (head != NULL) + { + if (head->row == row) + { + // found it + return head; + } + // keep looking + head = head->next; + } + // if we get here, didn't find it + return NULL; +} +//how about subarray model to array +void Subarray::read(BusPacket* busPacket) +{ + shared_ptr rowHeadNode = rowEntries[busPacket->column]; + shared_ptr foundNode = NULL; + cout<<"subarray read"<<" and row is "<row<<" and col is "<column<row, rowHeadNode)) == NULL) + { + } + else // found it + { + *(busPacket->data) = foundNode->data; + } +} +//i'd like to use this logic same as subarray.... +void Subarray::write(const BusPacket* busPacket) +{ + // TODO: move all the error checking to BusPacket so once we have a bus + // packet, + // we know the fields are all legal + cout<<"subarray write"<<" and row is "<row<<" and col is "<column<column >= numCols) + { + ERROR("== Error - Bus Packet column " << busPacket->column << " out of bounds"); + exit(-1); + } + + // head of the list we need to search + shared_ptr rowHeadNode = rowEntries[busPacket->column]; //such row groups that contains such column... + shared_ptr foundNode = NULL; + if ((foundNode = Subarray::searchForRow(busPacket->row, rowHeadNode)) == NULL) //for certain row, no data + { + // not found + shared_ptr newRowNode = make_shared(); + // DataStruct* newRowNode = (DataStruct*)malloc(sizeof(DataStruct)); + + // insert at the head for speed + // TODO: Optimize this data structure for speedier lookups? + newRowNode->row = busPacket->row; + if (busPacket->data) //not null_bst_ + newRowNode->data = *(busPacket->data); + newRowNode->next = rowHeadNode; + rowEntries[busPacket->column] = newRowNode; + } + else + { + // found it, just plaster in the new data + foundNode->data = *(busPacket->data); + if (DEBUG_BANKS) + { + PRINTN(" -- Subarray " << busPacket->bank << " writing to physical address 0x" << hex + << busPacket->physicalAddress << dec << ":"); + busPacket->printData(); + PRINT(""); + } + } +} +//nothing to change ildan... diff --git a/src/Subarray.h b/src/Subarray.h new file mode 100644 index 0000000..0a33de1 --- /dev/null +++ b/src/Subarray.h @@ -0,0 +1,71 @@ +/********************************************************************************* + * Copyright (c) 2010-2011, Elliott Cooper-Balis + * Paul Rosenfeld + * Bruce Jacob + * University of Maryland + * dramninjas [at] gmail [dot] com + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *********************************************************************************/ +#ifndef SUBARRAY_H +#define SUBARRAY_H + +#include +#include +#include + +#include "BankState.h" +#include "Burst.h" +#include "BusPacket.h" +#include "SimulatorObject.h" +#include "SystemConfiguration.h" + +namespace DRAMSim +{ +class Subarray //x4 +{ + typedef struct _DataStruct + { + unsigned row; + BurstType data; //16x16 + std::shared_ptr next; //points to next datastruct + } DataStruct; + //how about use this in subarray level logic? + public: + // functions + Subarray(ostream& simLog); + + void read(BusPacket* busPacket); + void write(const BusPacket* busPacket); + BankState currentState; + int getRow(); + + private: + // private member + std::vector> rowEntries; + ostream& dramsimLog; + static std::shared_ptr searchForRow(unsigned row, std::shared_ptr head); //memory leak/dealloc easily + unsigned numCols; +}; +} +#endif \ No newline at end of file diff --git a/src/SystemConfiguration.h b/src/SystemConfiguration.h index 2cc989b..36f2a14 100644 --- a/src/SystemConfiguration.h +++ b/src/SystemConfiguration.h @@ -32,7 +32,6 @@ #define SYSCONFIG_H #include - #include #include #include @@ -64,6 +63,7 @@ extern bool PRINT_CHAN_STAT; extern bool DEBUG_PIM_TIME; extern bool DEBUG_CMD_TRACE; extern bool DEBUG_PIM_BLOCK; +extern bool DEBUG_SUBARRAYS; extern std::string SIM_TRACE_FILE; extern bool SHOW_SIM_OUTPUT; @@ -102,12 +102,14 @@ enum RowBufferPolicy enum QueuingStructure { PerRank, - PerRankPerBank + PerRankPerBank, + PerRankPerBankPerSubarray }; enum SchedulingPolicy { RankThenBankRoundRobin, - BankThenRankRoundRobin + BankThenRankRoundRobin, + RankThenBankThenSubarrayRoundRobin }; enum PIMMode @@ -115,11 +117,13 @@ enum PIMMode mac_in_bankgroup, mac_in_bank }; + enum PIMPrecision { + INT4, FP16, INT8, - FP32 + FP32, }; enum class dramMode @@ -133,7 +137,7 @@ enum class pimBankType { EVEN_BANK, ODD_BANK, - ALL_BANK + ALL_BANK//which is two but ...maybe.. }; // set by IniReader.cpp @@ -357,11 +361,12 @@ class PIMConfiguration string param = getConfigParam(STRING, "ROW_BUFFER_POLICY"); if (param == "open_page") { - return OpenPage; + return OpenPage; // default } else if (param == "close_page") { - return ClosePage; + //return ClosePage; + throw invalid_argument("Close page policy is not supported"); } throw invalid_argument("Invalid row buffer policy"); } @@ -378,6 +383,10 @@ class PIMConfiguration { return BankThenRankRoundRobin; } + else if (param == "rank_then_bank_then_subarray_round_robin") + { + return RankThenBankThenSubarrayRoundRobin; + } throw invalid_argument("Invalid scheduling policy"); } @@ -407,6 +416,10 @@ class PIMConfiguration { return PerRank; } + else if (param == "per_rank_per_bank_per_subarray") + { + return PerRankPerBankPerSubarray; + } throw invalid_argument("Invalid queueing structure"); } @@ -439,6 +452,10 @@ class PIMConfiguration { return FP32; } + else if (param == "INT4") + { + return INT4; + } throw invalid_argument("Invalid PIM precision"); } diff --git a/src/Transaction.cpp b/src/Transaction.cpp index 064f226..894fdbb 100644 --- a/src/Transaction.cpp +++ b/src/Transaction.cpp @@ -28,11 +28,10 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *********************************************************************************/ -#include "Transaction.h" - #include #include "PrintMacros.h" +#include "Transaction.h" using std::dec; using std::endl; @@ -43,14 +42,24 @@ namespace DRAMSim Transaction::Transaction(TransactionType transType, uint64_t addr, BurstType* dat) : transactionType(transType), address(addr), data(dat) { - rowBufferPolicy = PIMConfiguration::getRowBufferPolicy(); + if(transactionType != DATA_READ && transactionType != DATA_WRITE && transactionType != RETURN_DATA) + { + ERROR("Transaction type is not read or write\n"); + abort(); + } + rowBufferPolicy = RowBufferPolicy::OpenPage; } Transaction::Transaction(TransactionType transType, uint64_t addr, const std::string& str, BurstType* dat) : transactionType(transType), address(addr), tag(str), data(dat) { - rowBufferPolicy = PIMConfiguration::getRowBufferPolicy(); + if(transactionType != DATA_READ && transactionType != DATA_WRITE && transactionType != RETURN_DATA) + { + ERROR("Transaction type is not read or write\n"); + abort(); + } + rowBufferPolicy = RowBufferPolicy::OpenPage; } Transaction::Transaction(const Transaction& t) @@ -60,7 +69,12 @@ Transaction::Transaction(const Transaction& t) timeAdded(t.timeAdded), timeReturned(t.timeReturned) { - rowBufferPolicy = PIMConfiguration::getRowBufferPolicy(); + if(transactionType != DATA_READ && transactionType != DATA_WRITE && transactionType != RETURN_DATA) + { + ERROR("Transaction type is not read or write\n"); + abort(); + } + rowBufferPolicy = RowBufferPolicy::OpenPage; #ifndef NO_STORAGE ERROR( "Data storage is really outdated and these copies happen in an \n " diff --git a/src/Transaction.h b/src/Transaction.h index 1881e01..bb7703a 100644 --- a/src/Transaction.h +++ b/src/Transaction.h @@ -65,13 +65,13 @@ class Transaction Transaction(TransactionType transType, uint64_t addr, BurstType* dat); Transaction(TransactionType transType, uint64_t addr, const std::string& str, BurstType* dat); Transaction(const Transaction& t); - + //are there other transaction type? BusPacketType getBusPacketType() { - if (!isAllowedRowBufferPolicy(rowBufferPolicy)) + /*if (!isAllowedRowBufferPolicy(rowBufferPolicy)) { throw invalid_argument("Unknown row buffer policy"); - } + }*/ switch (transactionType) { case DATA_READ: diff --git a/src/lut_table.cpp b/src/lut_table.cpp new file mode 100644 index 0000000..0f89622 --- /dev/null +++ b/src/lut_table.cpp @@ -0,0 +1,57 @@ +#include "lut_table.h" + +#define GELU(x) 0.5 * x * (1 + tanhf(sqrtf(2/M_PI) * (x + 0.044715*pow(x, 3)))) + +using namespace DRAMSim; +using namespace std; +void lut_table::fill(layerType layertype, PIMPrecision pimprecision){ + int size = sec_num / 16; + fp16 former = convertF2H(float(0)); + fp16 latter = convertF2H(float(0)); + fp16 slope = convertF2H(float(0)); + fp16 intercept = convertF2H(float(0)); + int b_idx = 0; + for(int s = 0; sfp16Data_[0] = convertF2H(float(0.0)); + sets[size]->fp16Data_[0] = convertF2H(float(-65504.0)); + sets[2*size]->fp16Data_[0] = convertF2H(float(0.0)); + sets[3*size]->fp16Data_[0] = convertF2H(float(-65504.0)); + } + else if(b_idx == sec_num){ + sets[s]->fp16Data_[15] = convertF2H(float(0)); + sets[s+ size]->fp16Data_[15] = convertF2H(float(65504.0)); + sets[s+ 2* size]->fp16Data_[15] =convertF2H(float(0.0)); + sets[s + 3*size]->fp16Data_[15] = convertF2H(float(65504.0)); + } + else{ + switch (layertype) + { + case layerType::GELU: //scope as -4, 4 + slope = convertF2H((GELU(convertH2F(latter)) - GELU(convertH2F(former))) / pow((convertH2F(latter)-convertH2F(former)), 2)); + intercept = convertF2H(expf(former)) - slope * former; + sets[s]->fp16Data_[bst] = slope; + sets[s+size]->fp16Data_[bst] = intercept; + break; + case layerType::EXP: //scope as -11.0898, 11.0898 + slope = convertF2H((expf(convertH2F(latter)) - expf(convertH2F(former))) / pow((convertH2F(latter) - convertH2F(former)), 2)); + intercept = convertF2H(expf(former)) - slope * former; + sets[s + 2*size]->fp16Data_[bst] = slope; + sets[s + 3*size]->fp16Data_[bst] = intercept; + break; + default: + break; + } + } + } + } +} + +//slope/intercept model seems like +//make lut_table as numpyburst how? diff --git a/src/lut_table.h b/src/lut_table.h new file mode 100644 index 0000000..03f5225 --- /dev/null +++ b/src/lut_table.h @@ -0,0 +1,44 @@ +#ifndef LUT_TABLE_H +#define LUT_TABLE_H + +#include +#include +#include + +#include "tests/KernelAddrGen.h" +#include "FP16.h" +#include "half.h" +#include "Burst.h" + +using namespace DRAMSim; +using namespace std; +class lut_table +{ +private: +public: + lut_table() + {}; + lut_table(int secnum, fp16 lowerbound, fp16 lowerbound_, fp16 upperbound, fp16 upperbound_){ + sec_num = secnum; + low_bound_gelu = lowerbound; + up_bound_gelu = upperbound; + low_bound_exp = lowerbound_; + up_bound_exp = upperbound_; + for(int i = 0; i < secnum / 4; i++){ + sets.push_back(new BurstType()); + } + }; + ~lut_table(){ + for(int i = 0; i < sets.size(); i++){ + delete sets[i]; + } + sets.clear(); + }; + void fill(layerType layertype, PIMPrecision pim_cmds); + //void change(vector slopes, vector intercepts); + //vector slopes, intercepts; + vector sets; + int sec_num; + fp16 low_bound_gelu, up_bound_gelu, low_bound_exp, up_bound_exp; +}; +#endif