// Copyright 2018 ETH Zurich and University of Bologna. // Copyright and related rights are licensed under the Solderpad Hardware // License, Version 0.51 (the "License"); you may not use this file except in // compliance with the License. You may obtain a copy of the License at // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law // or agreed to in writing, software, hardware and materials distributed under // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. // // Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich // Date: 13.09.2018 // Description: miss controller for serpent dcache. Note that the current assumption // is that the port with the highest index issues writes instead of reads. import ariane_pkg::*; import serpent_cache_pkg::*; module serpent_dcache_missunit #( parameter logic [DCACHE_ID_WIDTH-1:0] AmoTxId = 1, // TX id to be used for AMOs parameter int unsigned NumPorts = 3 // number of miss ports ) ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low // cache management, signals from/to core input logic enable_i, // from CSR input logic flush_i, // flush request, this waits for pending tx (write, read) to finish and will clear the cache output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed output logic miss_o, // we missed on a ld/st // local cache management signals input logic wbuffer_empty_i, output logic cache_en_o, // local cache enable signal // AMO interface input amo_req_t amo_req_i, output amo_resp_t amo_resp_o, // miss handling interface (ld, ptw, wbuffer) input logic [NumPorts-1:0] miss_req_i, output logic [NumPorts-1:0] miss_ack_o, input logic [NumPorts-1:0] miss_nc_i, input logic [NumPorts-1:0] miss_we_i, input logic [NumPorts-1:0][63:0] miss_wdata_i, input logic [NumPorts-1:0][63:0] miss_paddr_i, input logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_i, input logic [NumPorts-1:0][2:0] miss_size_i, input logic [NumPorts-1:0][DCACHE_ID_WIDTH-1:0] miss_id_i, // used as transaction ID // signals that the request collided with a pending read output logic [NumPorts-1:0] miss_replay_o, // signals response from memory output logic [NumPorts-1:0] miss_rtrn_vld_o, output logic [DCACHE_ID_WIDTH-1:0] miss_rtrn_id_o, // only used for writes, set to zero fro reads // from writebuffer input logic [DCACHE_MAX_TX-1:0][63:0] tx_paddr_i, // used to check for address collisions with read operations input logic [DCACHE_MAX_TX-1:0] tx_vld_i, // used to check for address collisions with read operations // write interface to cache memory output logic wr_cl_vld_o, // writes a full cacheline output logic wr_cl_nc_o, // writes a full cacheline output logic [DCACHE_SET_ASSOC-1:0] wr_cl_we_o, // writes a full cacheline output logic [DCACHE_TAG_WIDTH-1:0] wr_cl_tag_o, output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_o, output logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_o, output logic [DCACHE_LINE_WIDTH-1:0] wr_cl_data_o, output logic [DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_o, output logic [DCACHE_SET_ASSOC-1:0] wr_vld_bits_o, // memory interface input logic mem_rtrn_vld_i, input dcache_rtrn_t mem_rtrn_i, output logic mem_data_req_o, input logic mem_data_ack_i, output dcache_req_t mem_data_o ); // controller FSM typedef enum logic[2:0] {IDLE, DRAIN, AMO, FLUSH, STORE_WAIT, LOAD_WAIT, AMO_WAIT} state_t; state_t state_d, state_q; // MSHR for reads typedef struct packed { logic [63:0] paddr ; logic [2:0] size ; logic [DCACHE_SET_ASSOC-1:0] vld_bits; logic [DCACHE_ID_WIDTH-1:0] id ; logic nc ; logic [$clog2(DCACHE_SET_ASSOC)-1:0] repl_way; logic [$clog2(NumPorts)-1:0] miss_port_idx; } mshr_t; mshr_t mshr_d, mshr_q; logic [$clog2(DCACHE_SET_ASSOC)-1:0] repl_way, inv_way, rnd_way; logic mshr_vld_d, mshr_vld_q, mshr_vld_q1; logic mshr_allocate; logic update_lfsr, all_ways_valid; logic enable_d, enable_q; logic flush_ack_d, flush_ack_q; logic flush_en, flush_done; logic mask_reads, lock_reqs; logic amo_sel, miss_is_write; logic [63:0] amo_data, tmp_paddr, amo_rtrn_mux; logic [$clog2(NumPorts)-1:0] miss_port_idx; logic [DCACHE_CL_IDX_WIDTH-1:0] cnt_d, cnt_q; logic [NumPorts-1:0] miss_req_masked_d, miss_req_masked_q; logic inv_vld, inv_vld_all, cl_write_en; logic load_ack, store_ack, amo_ack; logic [NumPorts-1:0] mshr_rdrd_collision_d, mshr_rdrd_collision_q; logic [NumPorts-1:0] mshr_rdrd_collision; logic tx_rdwr_collision, mshr_rdwr_collision; /////////////////////////////////////////////////////// // input arbitration and general control sigs /////////////////////////////////////////////////////// assign cache_en_o = enable_q; assign cnt_d = (flush_en) ? cnt_q + 1 : '0; assign flush_done = (cnt_q == serpent_cache_pkg::DCACHE_NUM_WORDS-1); assign miss_req_masked_d = ( lock_reqs ) ? miss_req_masked_q : ( mask_reads ) ? miss_we_i & miss_req_i : miss_req_i; assign miss_is_write = miss_we_i[miss_port_idx]; // read port arbiter lzc #( .WIDTH ( NumPorts ) ) i_lzc_reqs ( .in_i ( miss_req_masked_d ), .cnt_o ( miss_port_idx ), .empty_o ( ) ); always_comb begin : p_ack miss_ack_o = '0; if (~amo_sel) begin miss_ack_o[miss_port_idx] = mem_data_ack_i & mem_data_req_o; end end /////////////////////////////////////////////////////// // MSHR and way replacement logic (only for read ops) /////////////////////////////////////////////////////// // find invalid cache line lzc #( .WIDTH ( ariane_pkg::DCACHE_SET_ASSOC ) ) i_lzc_inv ( .in_i ( ~miss_vld_bits_i[miss_port_idx] ), .cnt_o ( inv_way ), .empty_o ( all_ways_valid ) ); // generate random cacheline index lfsr_8bit #( .WIDTH ( ariane_pkg::DCACHE_SET_ASSOC ) ) i_lfsr_inv ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .en_i ( update_lfsr ), .refill_way_oh ( ), .refill_way_bin ( rnd_way ) ); assign repl_way = (all_ways_valid) ? rnd_way : inv_way; assign mshr_d.size = (mshr_allocate) ? miss_size_i [miss_port_idx] : mshr_q.size; assign mshr_d.paddr = (mshr_allocate) ? miss_paddr_i [miss_port_idx] : mshr_q.paddr; assign mshr_d.vld_bits = (mshr_allocate) ? miss_vld_bits_i[miss_port_idx] : mshr_q.vld_bits; assign mshr_d.id = (mshr_allocate) ? miss_id_i [miss_port_idx] : mshr_q.id; assign mshr_d.nc = (mshr_allocate) ? miss_nc_i [miss_port_idx] : mshr_q.nc; assign mshr_d.repl_way = (mshr_allocate) ? repl_way : mshr_q.repl_way; assign mshr_d.miss_port_idx = (mshr_allocate) ? miss_port_idx : mshr_q.miss_port_idx; // currently we only have one outstanding read TX, hence an incoming load clears the MSHR assign mshr_vld_d = (mshr_allocate) ? 1'b1 : (load_ack) ? 1'b0 : mshr_vld_q; assign miss_o = (mshr_allocate) ? ~miss_nc_i[miss_port_idx] : 1'b0; generate for(genvar k=0; k<NumPorts; k++) begin assign mshr_rdrd_collision[k] = (mshr_q.paddr[63:DCACHE_OFFSET_WIDTH] == miss_paddr_i[k][63:DCACHE_OFFSET_WIDTH]) && (mshr_vld_q | mshr_vld_q1); assign mshr_rdrd_collision_d[k] = (~miss_req_i[k]) ? 1'b0 : mshr_rdrd_collision_q[k] | mshr_rdrd_collision[k]; end endgenerate // read/write collision, stalls the corresponding request // write collides with MSHR assign mshr_rdwr_collision = (mshr_q.paddr[63:DCACHE_OFFSET_WIDTH] == miss_paddr_i[NumPorts-1][63:DCACHE_OFFSET_WIDTH]) && mshr_vld_q; // read collides with inflight TX always_comb begin : p_tx_coll tx_rdwr_collision = 1'b0; for(int k=0; k<DCACHE_MAX_TX; k++) begin tx_rdwr_collision |= (miss_paddr_i[miss_port_idx][63:DCACHE_OFFSET_WIDTH] == tx_paddr_i[k][63:DCACHE_OFFSET_WIDTH]) && tx_vld_i[k]; end end /////////////////////////////////////////////////////// // to memory /////////////////////////////////////////////////////// // if size = 32bit word, select appropriate offset, replicate for openpiton... assign amo_data = (amo_req_i.size==2'b10) ? {amo_req_i.operand_b[0 +: 32], amo_req_i.operand_b[0 +: 32]} : amo_req_i.operand_b; // note: openpiton returns a full cacheline! assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[DCACHE_OFFSET_WIDTH-1:3]*64 +: 64]; // always sign extend 32bit values assign amo_resp_o.result = (amo_req_i.size==2'b10) ? {{32{amo_rtrn_mux[amo_req_i.operand_a[2]*32 + 31]}}, amo_rtrn_mux[amo_req_i.operand_a[2]*32 +: 32]} : amo_rtrn_mux; // outgoing memory requests (AMOs are always uncached) assign mem_data_o.tid = (amo_sel) ? AmoTxId : miss_id_i[miss_port_idx]; assign mem_data_o.nc = (amo_sel) ? 1'b1 : miss_nc_i[miss_port_idx]; assign mem_data_o.way = (amo_sel) ? '0 : repl_way; assign mem_data_o.data = (amo_sel) ? amo_data : miss_wdata_i[miss_port_idx]; assign mem_data_o.size = (amo_sel) ? amo_req_i.size : miss_size_i [miss_port_idx]; assign mem_data_o.amo_op = (amo_sel) ? amo_req_i.amo_op : AMO_NONE; assign tmp_paddr = (amo_sel) ? amo_req_i.operand_a : miss_paddr_i[miss_port_idx]; assign mem_data_o.paddr = serpent_cache_pkg::paddrSizeAlign(tmp_paddr, mem_data_o.size); /////////////////////////////////////////////////////// // responses from memory /////////////////////////////////////////////////////// // incoming responses always_comb begin : p_rtrn_logic load_ack = 1'b0; store_ack = 1'b0; amo_ack = 1'b0; inv_vld = 1'b0; inv_vld_all = 1'b0; miss_rtrn_vld_o ='0; if(mem_rtrn_vld_i) begin unique case (mem_rtrn_i.rtype) DCACHE_LOAD_ACK: begin load_ack = 1'b1; miss_rtrn_vld_o[mshr_q.miss_port_idx] = 1'b1; end DCACHE_STORE_ACK: begin store_ack = 1'b1; miss_rtrn_vld_o[NumPorts-1] = 1'b1; end DCACHE_ATOMIC_ACK: begin amo_ack = 1'b1; end DCACHE_INV_REQ: begin inv_vld = mem_rtrn_i.inv.vld | mem_rtrn_i.inv.all; inv_vld_all = mem_rtrn_i.inv.all; end // TODO: // DCACHE_INT_REQ: begin // end default : begin end endcase end end // to write buffer assign miss_rtrn_id_o = mem_rtrn_i.tid; /////////////////////////////////////////////////////// // writes to cache memory /////////////////////////////////////////////////////// // cacheline write port assign wr_cl_nc_o = mshr_q.nc; assign wr_cl_vld_o = load_ack | |wr_cl_we_o; assign wr_cl_we_o = ( flush_en ) ? '1 : ( inv_vld_all ) ? '1 : ( inv_vld ) ? dcache_way_bin2oh(mem_rtrn_i.inv.way) : ( cl_write_en ) ? dcache_way_bin2oh(mshr_q.repl_way) : '0; assign wr_vld_bits_o = ( flush_en ) ? '0 : ( inv_vld ) ? '0 : ( cl_write_en ) ? dcache_way_bin2oh(mshr_q.repl_way) : '0; assign wr_cl_idx_o = ( flush_en ) ? cnt_q : ( inv_vld ) ? mem_rtrn_i.inv.idx[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] : mshr_q.paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH]; assign wr_cl_tag_o = mshr_q.paddr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH]; assign wr_cl_off_o = mshr_q.paddr[DCACHE_OFFSET_WIDTH-1:0]; assign wr_cl_data_o = mem_rtrn_i.data; assign wr_cl_data_be_o = ( cl_write_en ) ? '1 : '0;// we only write complete cachelines into the memory // only NC responses write to the cache assign cl_write_en = load_ack & ~mshr_q.nc; /////////////////////////////////////////////////////// // main control logic for generating tx /////////////////////////////////////////////////////// always_comb begin : p_fsm // default assignment state_d = state_q; flush_ack_o = 1'b0; mem_data_o.rtype = DCACHE_LOAD_REQ; mem_data_req_o = 1'b0; amo_resp_o.ack = 1'b0; miss_replay_o = '0; // disabling cache is possible anytime, enabling goes via flush enable_d = enable_q & enable_i; flush_ack_d = flush_ack_q; flush_en = 1'b0; amo_sel = 1'b0; update_lfsr = 1'b0; mshr_allocate = 1'b0; lock_reqs = 1'b0; mask_reads = mshr_vld_q; // interfaces unique case (state_q) ////////////////////////////////// // wait for misses / amo ops IDLE: begin if(flush_i | (enable_i & ~enable_q)) begin if(wbuffer_empty_i && ~mshr_vld_q) begin flush_ack_d = flush_i; state_d = FLUSH; end else begin state_d = DRAIN; end end else if(amo_req_i.req) begin if(wbuffer_empty_i && ~mshr_vld_q) begin state_d = AMO; end else begin state_d = DRAIN; end // we've got a miss to handle end else if(|miss_req_masked_d) begin // this is a write miss, just pass through (but check whether write collides with MSHR) if(miss_is_write) begin // stall in case this write collides with the MSHR address if(~mshr_rdwr_collision) begin mem_data_req_o = 1'b1; mem_data_o.rtype = DCACHE_STORE_REQ; if(~mem_data_ack_i) begin state_d = STORE_WAIT; end end // this is a read miss, can only allocate 1 MSHR // in case of a load_ack we can accept a new miss, since the MSHR is being cleared end else if(~mshr_vld_q | load_ack) begin // replay the read request in case the address has collided with MSHR during the time the request was pending // i.e., the cache state may have been updated in the mean time due to a refill at the same CL address if(mshr_rdrd_collision_d[miss_port_idx]) begin miss_replay_o[miss_port_idx] = 1'b1; // stall in case this CL address overlaps with a write TX that is in flight end else if(~tx_rdwr_collision) begin mem_data_req_o = 1'b1; mem_data_o.rtype = DCACHE_LOAD_REQ; update_lfsr = all_ways_valid & mem_data_ack_i;// need to evict a random way mshr_allocate = mem_data_ack_i; if(~mem_data_ack_i) begin state_d = LOAD_WAIT; end end end end end ////////////////////////////////// // wait until this request is acked STORE_WAIT: begin lock_reqs = 1'b1; mem_data_req_o = 1'b1; mem_data_o.rtype = DCACHE_STORE_REQ; if(mem_data_ack_i) begin state_d = IDLE; end end ////////////////////////////////// // wait until this request is acked LOAD_WAIT: begin lock_reqs = 1'b1; mem_data_req_o = 1'b1; mem_data_o.rtype = DCACHE_LOAD_REQ; if(mem_data_ack_i) begin update_lfsr = all_ways_valid;// need to evict a random way mshr_allocate = 1'b1;; state_d = IDLE; end end ////////////////////////////////// // only handle stores, do not accept new read requests // wait until MSHR is cleared and wbuffer is empty DRAIN: begin mask_reads = 1'b1; // these are writes, check whether they collide with MSHR if(|miss_req_masked_d && ~mshr_rdwr_collision) begin mem_data_req_o = 1'b1; mem_data_o.rtype = DCACHE_STORE_REQ; end if(wbuffer_empty_i && ~mshr_vld_q) begin state_d = IDLE; end end ////////////////////////////////// // flush the cache FLUSH: begin // internal flush signal flush_en = 1'b1; if(flush_done) begin state_d = IDLE; flush_ack_o = flush_ack_q; flush_ack_d = 1'b0; enable_d = enable_i; end end ////////////////////////////////// // send out amo op request AMO: begin mem_data_o.rtype = DCACHE_ATOMIC_REQ; mem_data_req_o = 1'b1; amo_sel = 1'b1; if(mem_data_ack_i) begin state_d = AMO_WAIT; end end ////////////////////////////////// // block and wait until AMO OP returns AMO_WAIT: begin amo_sel = 1'b1; if(amo_ack) begin amo_resp_o.ack = 1'b1; state_d = IDLE; end end ////////////////////////////////// default: begin // we should never get here state_d = IDLE; end endcase // state_q end /////////////////////////////////////////////////////// // ff's /////////////////////////////////////////////////////// always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs if(~rst_ni) begin state_q <= FLUSH; cnt_q <= '0; enable_q <= '0; flush_ack_q <= '0; mshr_vld_q <= '0; mshr_vld_q1 <= '0; mshr_q <= '0; mshr_rdrd_collision_q <= '0; miss_req_masked_q <= '0; end else begin state_q <= state_d; cnt_q <= cnt_d; enable_q <= enable_d; flush_ack_q <= flush_ack_d; mshr_vld_q <= mshr_vld_d; mshr_vld_q1 <= mshr_vld_q; mshr_q <= mshr_d; mshr_rdrd_collision_q <= mshr_rdrd_collision_d; miss_req_masked_q <= miss_req_masked_d; end end /////////////////////////////////////////////////////// // assertions /////////////////////////////////////////////////////// //pragma translate_off `ifndef VERILATOR nc_response : assert property ( @(posedge clk_i) disable iff (~rst_ni) mshr_vld_q |-> mshr_q.nc |-> mem_rtrn_vld_i |-> load_ack |-> mem_rtrn_i.nc) else $fatal(1,"[l1 dcache missunit] NC load response implies NC load response"); read_tid : assert property ( @(posedge clk_i) disable iff (~rst_ni) mshr_vld_q |-> mem_rtrn_vld_i |-> load_ack |-> mem_rtrn_i.tid == mshr_q.id) else $fatal(1,"[l1 dcache missunit] TID of load response doesn't match"); read_ports : assert property ( @(posedge clk_i) disable iff (~rst_ni) |miss_req_i[NumPorts-2:0] |-> miss_we_i[NumPorts-2:0] == 0) else $fatal(1,"[l1 dcache missunit] only last port can issue write requests"); write_port : assert property ( @(posedge clk_i) disable iff (~rst_ni) miss_req_i[NumPorts-1] |-> miss_we_i[NumPorts-1]) else $fatal(1,"[l1 dcache missunit] last port can only issue write requests"); initial begin // assert wrong parameterizations assert (NumPorts>=2) else $fatal(1,"[l1 dcache missunit] at least two ports are required (one read port, one write port)"); end `endif //pragma translate_on endmodule // serpent_dcache_missunit