Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions rtl/vproc_pipeline.sv
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ module vproc_pipeline import vproc_pkg::*; #(
logic alt_last_cycle;
logic init_addr; // initialize address (used by LSU)
logic requires_flush;
logic red_op;
logic [XIF_ID_W -1:0] id;
op_unit unit;
op_mode mode;
Expand Down Expand Up @@ -241,6 +242,7 @@ module vproc_pipeline import vproc_pkg::*; #(
state_next.first_cycle = 1'b1;
state_next.init_addr = 1'b1;
state_next.requires_flush = pipe_in_state_i.requires_flush;
state_next.red_op = pipe_in_state_i.red_op;
state_next.id = pipe_in_state_i.id;
state_next.unit = pipe_in_state_i.unit;
state_next.mode = pipe_in_state_i.mode;
Expand Down Expand Up @@ -679,6 +681,7 @@ module vproc_pipeline import vproc_pkg::*; #(
logic last_cycle;
logic init_addr; // initialize address (used by LSU)
logic requires_flush;
logic red_op;
logic alt_count_valid; // alternative counter value is valid
logic [AUX_COUNTER_W-1:0] aux_count;
logic [XIF_ID_W-1:0] id;
Expand Down Expand Up @@ -711,6 +714,7 @@ module vproc_pipeline import vproc_pkg::*; #(
(~FIELD_COUNT_USED | (state_q.field_count == '0));
unpack_ctrl.init_addr = state_q.init_addr;
unpack_ctrl.requires_flush = state_q.requires_flush;
unpack_ctrl.red_op = state_q.red_op;
unpack_ctrl.alt_count_valid = DONT_CARE_ZERO ? '0 : 'x;
unique case (state_q.emul)
EMUL_1: unpack_ctrl.alt_count_valid = state_q.alt_count.val[COUNTER_W-1 -: 4] == '0;
Expand Down
36 changes: 27 additions & 9 deletions rtl/vproc_pipeline_wrapper.sv
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #(
count_inc_e count_inc; // counter increment policy
logic [2:0] field_count_init; // field counter initial value
logic requires_flush; // whether the instr requires flushing
logic red_op; // whether the instr is a reduction
logic [XIF_ID_W -1:0] id;
op_unit unit;
op_mode mode;
Expand All @@ -224,99 +225,115 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #(
assign unit_elem = UNITS[UNIT_ELEM] & (pipe_in_data_i.unit == UNIT_ELEM);

// identify the type of data that vs2 supplies for ELEM instructions
logic elem_flush, elem_vs2_data, elem_vs2_mask, elem_vs2_dyn_addr;
logic elem_flush, red_op, elem_vs2_data, elem_vs2_mask, elem_vs2_dyn_addr;
always_comb begin
elem_flush = DONT_CARE_ZERO ? 1'b0 : 1'bx;
red_op = DONT_CARE_ZERO ? 1'b0 : 1'bx;
elem_vs2_data = DONT_CARE_ZERO ? 1'b0 : 1'bx;
elem_vs2_mask = DONT_CARE_ZERO ? 1'b0 : 1'bx;
elem_vs2_dyn_addr = DONT_CARE_ZERO ? 1'b0 : 1'bx;
unique case (pipe_in_data_i.mode.elem.op)
ELEM_XMV: begin
elem_flush = 1'b0;
red_op = 1'b0;
elem_vs2_data = 1'b1;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VPOPC: begin
elem_flush = 1'b0;
red_op = 1'b0;
elem_vs2_data = 1'b0;
elem_vs2_mask = 1'b1;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VFIRST: begin
elem_flush = 1'b0;
red_op = 1'b0;
elem_vs2_data = 1'b0;
elem_vs2_mask = 1'b1;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VID: begin
elem_flush = 1'b0;
red_op = 1'b0;
elem_vs2_data = 1'b0;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VIOTA: begin
elem_flush = 1'b0;
red_op = 1'b0;
elem_vs2_data = 1'b0;
elem_vs2_mask = 1'b1;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VRGATHER: begin
elem_flush = 1'b0;
red_op = 1'b0;
elem_vs2_data = 1'b0;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b1;
end
ELEM_VCOMPRESS: begin
elem_flush = 1'b1;
red_op = 1'b0;
elem_vs2_data = 1'b0;
elem_vs2_mask = 1'b1;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VREDSUM: begin
elem_flush = 1'b1;
elem_flush = 1'b0;
red_op = 1'b1;
elem_vs2_data = 1'b1;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VREDAND: begin
elem_flush = 1'b1;
elem_flush = 1'b0;
red_op = 1'b1;
elem_vs2_data = 1'b1;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VREDOR: begin
elem_flush = 1'b1;
elem_flush = 1'b0;
red_op = 1'b1;
elem_vs2_data = 1'b1;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VREDXOR: begin
elem_flush = 1'b1;
elem_flush = 1'b0;
red_op = 1'b1;
elem_vs2_data = 1'b1;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VREDMINU: begin
elem_flush = 1'b1;
elem_flush = 1'b0;
red_op = 1'b1;
elem_vs2_data = 1'b1;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VREDMIN: begin
elem_flush = 1'b1;
elem_flush = 1'b0;
red_op = 1'b1;
elem_vs2_data = 1'b1;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VREDMAXU: begin
elem_flush = 1'b1;
elem_flush = 1'b0;
red_op = 1'b1;
elem_vs2_data = 1'b1;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b0;
end
ELEM_VREDMAX: begin
elem_flush = 1'b1;
elem_flush = 1'b0;
red_op = 1'b1;
elem_vs2_data = 1'b1;
elem_vs2_mask = 1'b0;
elem_vs2_dyn_addr = 1'b0;
Expand Down Expand Up @@ -403,6 +420,7 @@ module vproc_pipeline_wrapper import vproc_pkg::*; #(

state_init.field_count_init = unit_lsu ? pipe_in_data_i.mode.lsu.nfields : '0;
state_init.requires_flush = unit_elem & elem_flush;
state_init.red_op = red_op;
state_init.id = pipe_in_data_i.id;
state_init.unit = pipe_in_data_i.unit;
state_init.mode = pipe_in_data_i.mode;
Expand Down
1 change: 1 addition & 0 deletions rtl/vproc_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ typedef struct packed {
logic narrow;
logic saturate;
logic sig;
logic red_op;
logic [2:0] mul_idx;
} pack_flags;

Expand Down
3 changes: 2 additions & 1 deletion rtl/vproc_unit_wrapper.sv
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,8 @@ module vproc_unit_wrapper import vproc_pkg::*; #(
default: ;
endcase
pipe_out_res_flags_o[0].elemwise = 1'b1;
pipe_out_res_store_o[0] = ((~unit_out_ctrl.mode.elem.xreg & unit_out_res_valid) | flushing_q) & (vd_count_d.part.low == '1);
pipe_out_res_flags_o[0].red_op = unit_out_ctrl.red_op;
pipe_out_res_store_o[0] = ((~unit_out_ctrl.mode.elem.xreg & unit_out_res_valid) | flushing_q) & (vd_count_d.part.low == '1 | unit_out_ctrl.red_op);
pipe_out_res_valid_o[0] = flushing_q | unit_out_res_valid;
pipe_out_res_data_o [0] = unit_out_res;
pipe_out_res_mask_o [0][3:0] = flushing_q ? '0 : unit_out_mask;
Expand Down
25 changes: 25 additions & 0 deletions rtl/vproc_vregpack.sv
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,31 @@ module vproc_vregpack #(
res_buffer_next[i][VPORT_W -RES_W[i] -1:0] = res_buffer[i][VPORT_W -1:RES_W[i] ];
msk_buffer_next[i][VPORT_W/8-RES_W[i]/8-1:0] = msk_buffer[i][VPORT_W/8-1:RES_W[i]/8];
end
// For reduction operations, we write the reduction value directly in the lowest bits of the result
// and set only the lowest bits of the mask buffer
// We did this, because the current Vicuna code has a bug when the following instruction is executed
// at the same pipeline, the unit deque will not be available for the new instruction because
// it is processing the flush logic for the reduction/compress operations
// This extra if removes the need for the flush logic for reduction operations, but doesn't fix
// the problem for compress instructions
if((RES_ALLOW_ELEMWISE[i] | RES_ALWAYS_ELEMWISE[i]) & pipe_in_res_flags_i[i].red_op) begin
msk_buffer_next[i] = '0;
unique case (pipe_in_eew_i)
VSEW_8: begin
res_buffer_next[i][7:0] = pipe_in_res_data_i[i][7 :0];
msk_buffer_next[i][0] = pipe_in_res_mask_i[i][0];
end
VSEW_16: begin
res_buffer_next[i][15:0] = pipe_in_res_data_i[i][15:0];
msk_buffer_next[i][1:0] = {2{pipe_in_res_mask_i[i][0]}};
end
VSEW_32: begin
res_buffer_next[i][31:0] = pipe_in_res_data_i[i][31:0];
msk_buffer_next[i][3:0] = {4{pipe_in_res_mask_i[i][0]}};
end
default: ;
endcase
end
end

end
Expand Down