Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 27 additions & 2 deletions rtl/vproc_pipeline.sv
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ module vproc_pipeline import vproc_pkg::*; #(
logic alt_last_cycle;
logic init_addr; // initialize address (used by LSU)
logic requires_flush;
logic red_op;
logic [XIF_ID_W -1:0] id;
op_unit unit;
op_mode mode;
Expand Down Expand Up @@ -179,6 +180,7 @@ module vproc_pipeline import vproc_pkg::*; #(
counter_t count_next_inc, alt_count_next_inc;
logic last_cycle_next, alt_last_cycle_next, wait_alt_count_next;
logic [OP_CNT-1:0] op_load_next, op_shift_next;
logic vcompress_flushing;
always_comb begin
state_valid_d = state_valid_q;
state_wait_alt_count_d = state_wait_alt_count_q;
Expand Down Expand Up @@ -241,6 +243,7 @@ module vproc_pipeline import vproc_pkg::*; #(
state_next.first_cycle = 1'b1;
state_next.init_addr = 1'b1;
state_next.requires_flush = pipe_in_state_i.requires_flush;
state_next.red_op = pipe_in_state_i.red_op;
state_next.id = pipe_in_state_i.id;
state_next.unit = pipe_in_state_i.unit;
state_next.mode = pipe_in_state_i.mode;
Expand Down Expand Up @@ -356,6 +359,9 @@ module vproc_pipeline import vproc_pkg::*; #(
end
default: ;
endcase
if (state_q.requires_flush) begin
last_cycle_next &= vcompress_flushing;
end else begin
// clear last cycle based on EMUL (note: the alt_last_cycle signal is not cleared here
// as that is only required to indicate completion of one vreg cycle)
unique case (state_q.emul)
Expand All @@ -364,6 +370,7 @@ module vproc_pipeline import vproc_pkg::*; #(
EMUL_8: last_cycle_next &= count_next_inc.part.mul[2:0] == '1;
default: ;
endcase
end
if ((OP_ALT_COUNTER != '0) & state_q.count.part.sign) begin
last_cycle_next = '0;
end
Expand All @@ -373,6 +380,20 @@ module vproc_pipeline import vproc_pkg::*; #(
end
end

// Extra cycles for flushing output in vcompress
always_comb begin
vcompress_flushing = 1'b0;
unique case (state_q.emul)
EMUL_1: vcompress_flushing = count_next_inc.part.mul[0] == 1'b1;
EMUL_2: vcompress_flushing = count_next_inc.part.mul[1] == 1'b1;
EMUL_4: vcompress_flushing = count_next_inc.part.mul[2] == 1'b1;
EMUL_8: vcompress_flushing = count_next_inc.part.sign == 1'b1;
default: ;
endcase
// Doesn't make sense to consider the vcompress_flushing when starting the pipe operation
vcompress_flushing &= ~pipe_in_ready_o;
end

// Operand load and shift signals
counter_t [OP_CNT-1:0] op_count;
always_comb begin
Expand All @@ -399,7 +420,8 @@ module vproc_pipeline import vproc_pkg::*; #(
else if (~aux_count_used | (state_next.aux_count == '0) | pipe_in_ready_o) begin
if (~OP_MASK[i]) begin
if ((op_count[i].part.low == '0) &
(~OP_NARROW[i] | ~state_next.op_flags[i].narrow | ~op_count[i].part.mul[0])
(~OP_NARROW[i] | ~state_next.op_flags[i].narrow | ~op_count[i].part.mul[0]) &
(~state_next.requires_flush | ~vcompress_flushing) // We don't load ops for vcompress when flushing
) begin
op_load_next[i] = OP_ALWAYS_VREG[i] | state_next.op_flags[i].vreg;

Expand Down Expand Up @@ -594,7 +616,8 @@ module vproc_pipeline import vproc_pkg::*; #(
end
//else if (OP_ALT_COUNTER != '0) begin
//end
else begin
// In the second part of vcompress, we don't generate extra pending reads
else if(~state_q.requires_flush | (~vcompress_flushing & ~state_done)) begin
if (OP_ALWAYS_VREG[i] | state_q.op_flags[i].vreg) begin
op_pend_reads[i] = DONT_CARE_ZERO ? '0 : 'x;
unique case ({state_q.emul, OP_NARROW[i] & state_q.op_flags[i].narrow})
Expand Down Expand Up @@ -679,6 +702,7 @@ module vproc_pipeline import vproc_pkg::*; #(
logic last_cycle;
logic init_addr; // initialize address (used by LSU)
logic requires_flush;
logic red_op;
logic alt_count_valid; // alternative counter value is valid
logic [AUX_COUNTER_W-1:0] aux_count;
logic [XIF_ID_W-1:0] id;
Expand Down Expand Up @@ -711,6 +735,7 @@ module vproc_pipeline import vproc_pkg::*; #(
(~FIELD_COUNT_USED | (state_q.field_count == '0));
unpack_ctrl.init_addr = state_q.init_addr;
unpack_ctrl.requires_flush = state_q.requires_flush;
unpack_ctrl.red_op = state_q.red_op;
unpack_ctrl.alt_count_valid = DONT_CARE_ZERO ? '0 : 'x;
unique case (state_q.emul)
EMUL_1: unpack_ctrl.alt_count_valid = state_q.alt_count.val[COUNTER_W-1 -: 4] == '0;
Expand Down
78 changes: 58 additions & 20 deletions rtl/vproc_pipeline_wrapper.sv
Original file line number Diff line number Diff line change
Expand Up @@ -97,19 +97,20 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #(
// by indices loaded by a previous operand. These operands have following indices (negative
// indices must be added to the operand count):
//
// | Idx | Type | Address | Units using it | Comment |
// +-----+------+----------+----------------+-----------------------------------------------+
// | 0 | data | vs2 (vd) | all | Only MUL may change address to vd |
// | 1 | data | vs1 (vd) | all except SLD | Only LSU uses vd as address instead of vs1 |
// | 2 | data | vd/vs2 | MUL | MUL may use either vd or vs2 as address |
// | -3 | data | dynamic | ELEM | Index-based dynamic address within vreg group |
// | -2 | mask | vs2 | ELEM | Mask operand for some ELEM operations |
// | -1 | mask | v0 | all | Mask operand for masked operations |
// | Idx | Type | Address | Units using it | Comment |
// +-----+------+----------+----------------+--------------------------------------------------------------------+
// | 0 | data | vs2 (vd) | all | Only MUL may change address to vd |
// | 1 | data | anything | all except SLD | LSU uses as vd, vcompress uses as vs2, everyone else use as vs1 |
// | 2 | data | vd/vs2 | MUL | MUL may use either vd or vs2 as address |
// | -3 | data | dynamic | ELEM | Index-based dynamic address within vreg group |
// | -2 | mask | vs2(vs1) | ELEM | Mask operand for some ELEM operations |
// | -1 | mask | v0 | all | Mask operand for masked operations |

// Operand count:
// - default is 3 (indices 0, 1, and -1 from above table, required by almost all units)
// - MUL unit additionally requires index 2, raising the operand count to a minimum of 4
// - ELEM unit additionally requires indices -3 and -2, hence a minimum of 5 operands
// - For indice -2, vs2 is required by vpopc, vfirst and viota, while vs1 is required for vcompress
// - if MUL and ELEM units are both present in same pipeline, then all 6 operands are required
// - in case a pipeline contains only the SLD unit the operand count is 2 (indices 0 and -1)
localparam int unsigned OP_CNT = UNITS[UNIT_MUL] ? (
Expand Down Expand Up @@ -198,6 +199,7 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #(
count_inc_e count_inc; // counter increment policy
logic [2:0] field_count_init; // field counter initial value
logic requires_flush; // whether the instr requires flushing
logic red_op; // whether the instr is a reduction
logic [XIF_ID_W -1:0] id;
op_unit unit;
op_mode mode;
Expand All @@ -224,100 +226,132 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #(
assign unit_elem = UNITS[UNIT_ELEM] & (pipe_in_data_i.unit == UNIT_ELEM);

// identify the type of data that vs2 supplies for ELEM instructions
logic elem_flush, elem_vs2_data, elem_vs2_mask, elem_vs2_dyn_addr;
logic elem_flush, red_op, elem_vs2_data, elem_vs1_mask, elem_vs2_mask, elem_vs2_dyn_addr;
always_comb begin
elem_flush = DONT_CARE_ZERO ? 1'b0 : 1'bx;
red_op = DONT_CARE_ZERO ? 1'b0 : 1'bx;
elem_vs2_data = DONT_CARE_ZERO ? 1'b0 : 1'bx;
elem_vs1_mask = DONT_CARE_ZERO ? 1'b0 : 1'bx;
elem_vs2_mask = DONT_CARE_ZERO ? 1'b0 : 1'bx;
elem_vs2_dyn_addr = DONT_CARE_ZERO ? 1'b0 : 1'bx;
unique case (pipe_in_data_i.mode.elem.op)
ELEM_XMV: begin
elem_flush = 1'b0;
red_op = 1'b0;
elem_vs2_data = 1'b1;
elem_vs1_mask = 1'b0;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VPOPC: begin
elem_flush = 1'b0;
red_op = 1'b0;
elem_vs2_data = 1'b0;
elem_vs1_mask = 1'b0;
elem_vs2_mask = 1'b1;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VFIRST: begin
elem_flush = 1'b0;
red_op = 1'b0;
elem_vs2_data = 1'b0;
elem_vs1_mask = 1'b0;
elem_vs2_mask = 1'b1;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VID: begin
elem_flush = 1'b0;
red_op = 1'b0;
elem_vs2_data = 1'b0;
elem_vs1_mask = 1'b0;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VIOTA: begin
elem_flush = 1'b0;
red_op = 1'b0;
elem_vs2_data = 1'b0;
elem_vs1_mask = 1'b0;
elem_vs2_mask = 1'b1;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VRGATHER: begin
elem_flush = 1'b0;
red_op = 1'b0;
elem_vs2_data = 1'b0;
elem_vs1_mask = 1'b0;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b1;
end
ELEM_VCOMPRESS: begin
elem_flush = 1'b1;
red_op = 1'b0;
elem_vs2_data = 1'b0;
elem_vs2_mask = 1'b1;
elem_vs1_mask = 1'b1;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VREDSUM: begin
elem_flush = 1'b1;
elem_flush = 1'b0;
red_op = 1'b1;
elem_vs2_data = 1'b1;
elem_vs1_mask = 1'b0;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VREDAND: begin
elem_flush = 1'b1;
elem_flush = 1'b0;
red_op = 1'b1;
elem_vs2_data = 1'b1;
elem_vs1_mask = 1'b0;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VREDOR: begin
elem_flush = 1'b1;
elem_flush = 1'b0;
red_op = 1'b1;
elem_vs2_data = 1'b1;
elem_vs1_mask = 1'b0;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VREDXOR: begin
elem_flush = 1'b1;
elem_flush = 1'b0;
red_op = 1'b1;
elem_vs2_data = 1'b1;
elem_vs1_mask = 1'b0;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VREDMINU: begin
elem_flush = 1'b1;
elem_flush = 1'b0;
red_op = 1'b1;
elem_vs2_data = 1'b1;
elem_vs1_mask = 1'b0;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VREDMIN: begin
elem_flush = 1'b1;
elem_flush = 1'b0;
red_op = 1'b1;
elem_vs2_data = 1'b1;
elem_vs1_mask = 1'b0;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VREDMAXU: begin
elem_flush = 1'b1;
elem_flush = 1'b0;
red_op = 1'b1;
elem_vs2_data = 1'b1;
elem_vs1_mask = 1'b0;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VREDMAX: begin
elem_flush = 1'b1;
elem_flush = 1'b0;
red_op = 1'b1;
elem_vs2_data = 1'b1;
elem_vs1_mask = 1'b0;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b0;
end
Expand Down Expand Up @@ -403,6 +437,7 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #(

state_init.field_count_init = unit_lsu ? pipe_in_data_i.mode.lsu.nfields : '0;
state_init.requires_flush = unit_elem & elem_flush;
state_init.red_op = red_op;
state_init.id = pipe_in_data_i.id;
state_init.unit = pipe_in_data_i.unit;
state_init.mode = pipe_in_data_i.mode;
Expand Down Expand Up @@ -483,16 +518,19 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #(
state_init.op_vaddr[(OP_CNT >= 3) ? 2 : 0] = pipe_in_data_i.mode.mul.op2_is_vd ? pipe_in_data_i.rs2.r.vaddr : pipe_in_data_i.rd.addr;
end
if (unit_elem) begin
// elem_vs1_mask is used for vcompress, where OP1_SRC is used to fetch vs2 and OP0_SRC fetches vs1 (mask)
state_init.op_flags[0 ].vreg = pipe_in_data_i.rs2.vreg & elem_vs2_data;
state_init.op_flags[0 ].elemwise = 1'b1;
state_init.op_flags[0 ].sigext = pipe_in_data_i.mode.elem.sigext;
state_init.op_flags[1 ].vreg = pipe_in_data_i.rs1.vreg | (elem_vs1_mask & pipe_in_data_i.rs2.vreg);
state_init.op_flags[1 ].elemwise = 1'b1;
state_init.op_flags[1 ].narrow = 1'b0; // only op 0 can be narrow
state_init.op_vaddr[1 ] = elem_vs1_mask ? pipe_in_data_i.rs2.r.vaddr : pipe_in_data_i.rs1.r.vaddr;
state_init.op_flags[(OP_CNT >= 3) ? OP_CNT-3 : 0].vreg = elem_vs2_dyn_addr;
state_init.op_vaddr[(OP_CNT >= 3) ? OP_CNT-3 : 0] = pipe_in_data_i.rs2.r.vaddr;
state_init.op_flags[ OP_CNT-2 ].vreg = pipe_in_data_i.rs2.vreg & elem_vs2_mask;
state_init.op_flags[ OP_CNT-2 ].vreg = (pipe_in_data_i.rs2.vreg & elem_vs2_mask) | (pipe_in_data_i.rs1.vreg & elem_vs1_mask);
state_init.op_flags[ OP_CNT-2 ].elemwise = 1'b1;
state_init.op_vaddr[ OP_CNT-2 ] = pipe_in_data_i.rs2.r.vaddr;
state_init.op_vaddr[ OP_CNT-2 ] = elem_vs1_mask ? pipe_in_data_i.rs1.r.vaddr : pipe_in_data_i.rs2.r.vaddr;
state_init.op_flags[ OP_CNT-1 ].elemwise = 1'b1;
end
end
Expand Down
1 change: 1 addition & 0 deletions rtl/vproc_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ typedef struct packed {
logic narrow;
logic saturate;
logic sig;
logic red_op;
logic [2:0] mul_idx;
} pack_flags;

Expand Down
2 changes: 1 addition & 1 deletion rtl/vproc_unit_mux.sv
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ module vproc_unit_mux import vproc_pkg::*; #(
pipe_out_res_mask_o = unit_out_res_mask [i];
pipe_out_pend_clear_o = unit_out_pend_clear [i];
pipe_out_pend_clear_cnt_o = unit_out_pend_clear_cnt[i];
pipe_out_instr_done_o = unit_out_instr_done [i];
pipe_out_instr_done_o = unit_out_instr_done [i] & unit_out_valid[i];
end
end
end
Expand Down
Loading