// Copyright 2018 ETH Zurich and University of Bologna. // Copyright and related rights are licensed under the Solderpad Hardware // License, Version 0.51 (the "License"); you may not use this file except in // compliance with the License. You may obtain a copy of the License at // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law // or agreed to in writing, software, hardware and materials distributed under // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. // // File: cache_ctrl.svh // Author: Florian Zaruba <zarubaf@ethz.ch> // Date: 14.10.2017 // // Copyright (C) 2017 ETH Zurich, University of Bologna // All rights reserved. // // Description: Cache controller import ariane_pkg::*; import std_cache_pkg::*; module cache_ctrl #( parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000 ) ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low input logic flush_i, input logic bypass_i, // enable cache output logic busy_o, // Core request ports input dcache_req_i_t req_port_i, output dcache_req_o_t req_port_o, // SRAM interface output logic [DCACHE_SET_ASSOC-1:0] req_o, // req is valid output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array input logic gnt_i, output cache_line_t data_o, output cl_be_t be_o, output logic [DCACHE_TAG_WIDTH-1:0] tag_o, //valid one cycle later input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i, output logic we_o, input logic [DCACHE_SET_ASSOC-1:0] hit_way_i, // Miss handling output miss_req_t miss_req_o, // return input logic miss_gnt_i, input logic active_serving_i, // the miss unit is currently active for this unit, serving the miss input logic [63:0] critical_word_i, input logic critical_word_valid_i, // bypass ports input logic bypass_gnt_i, input logic bypass_valid_i, input logic [63:0] bypass_data_i, // check MSHR for aliasing output logic [55:0] mshr_addr_o, input logic mshr_addr_matches_i, input logic mshr_index_matches_i ); enum logic [3:0] { IDLE, // 0 WAIT_TAG, // 1 WAIT_TAG_BYPASSED, // 2 STORE_REQ, // 3 WAIT_REFILL_VALID, // 4 WAIT_REFILL_GNT, // 5 WAIT_TAG_SAVED, // 6 WAIT_MSHR, // 7 WAIT_CRITICAL_WORD // 8 } state_d, state_q; typedef struct packed { logic [DCACHE_INDEX_WIDTH-1:0] index; logic [DCACHE_TAG_WIDTH-1:0] tag; logic [7:0] be; logic [1:0] size; logic we; logic [63:0] wdata; logic bypass; } mem_req_t; logic [DCACHE_SET_ASSOC-1:0] hit_way_d, hit_way_q; assign busy_o = (state_q != IDLE); mem_req_t mem_req_d, mem_req_q; logic [DCACHE_LINE_WIDTH-1:0] cl_i; always_comb begin : way_select cl_i = '0; for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) if (hit_way_i[i]) cl_i = data_i[i].data; // cl_i = data_i[one_hot_to_bin(hit_way_i)].data; end // -------------- // Cache FSM // -------------- always_comb begin : cache_ctrl_fsm automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset; // incoming cache-line -> this is needed as synthesis is not supporting +: indexing in a multi-dimensional array // cache-line offset -> multiple of 64 cl_offset = mem_req_q.index[DCACHE_BYTE_OFFSET-1:3] << 6; // shift by 6 to the left // default assignments state_d = state_q; mem_req_d = mem_req_q; hit_way_d = hit_way_q; // output assignments req_port_o.data_gnt = 1'b0; req_port_o.data_rvalid = 1'b0; req_port_o.data_rdata = '0; miss_req_o = '0; mshr_addr_o = '0; // Memory array communication req_o = '0; addr_o = req_port_i.address_index; data_o = '0; be_o = '0; tag_o = '0; we_o = '0; tag_o = 'b0; case (state_q) IDLE: begin // a new request arrived if (req_port_i.data_req && !flush_i) begin // request the cache line - we can do this speculatively req_o = '1; // save index, be and we mem_req_d.index = req_port_i.address_index; mem_req_d.tag = req_port_i.address_tag; mem_req_d.be = req_port_i.data_be; mem_req_d.size = req_port_i.data_size; mem_req_d.we = req_port_i.data_we; mem_req_d.wdata = req_port_i.data_wdata; // Bypass mode, check for uncacheable address here as well if (bypass_i) begin state_d = (req_port_i.data_we) ? WAIT_REFILL_GNT : WAIT_TAG_BYPASSED; // grant this access only if it was a load req_port_o.data_gnt = (req_port_i.data_we) ? 1'b0 : 1'b1; mem_req_d.bypass = 1'b1; // ------------------ // Cache is enabled // ------------------ end else begin // Wait that we have access on the memory array if (gnt_i) begin state_d = WAIT_TAG; mem_req_d.bypass = 1'b0; // only for a read if (!req_port_i.data_we) req_port_o.data_gnt = 1'b1; end end end end // cache enabled and waiting for tag WAIT_TAG, WAIT_TAG_SAVED: begin // depending on where we come from // For the store case the tag comes in the same cycle tag_o = (state_q == WAIT_TAG_SAVED || mem_req_q.we) ? mem_req_q.tag : req_port_i.address_tag; // we speculatively request another transfer if (req_port_i.data_req && !flush_i) begin req_o = '1; end // check that the client really wants to do the request if (!req_port_i.kill_req) begin // ------------ // HIT CASE // ------------ if (|hit_way_i) begin // we can request another cache-line if this was a load if (req_port_i.data_req && !mem_req_q.we && !flush_i) begin state_d = WAIT_TAG; // switch back to WAIT_TAG mem_req_d.index = req_port_i.address_index; mem_req_d.be = req_port_i.data_be; mem_req_d.size = req_port_i.data_size; mem_req_d.we = req_port_i.data_we; mem_req_d.wdata = req_port_i.data_wdata; mem_req_d.tag = req_port_i.address_tag; mem_req_d.bypass = 1'b0; req_port_o.data_gnt = gnt_i; if (!gnt_i) begin state_d = IDLE; end end else begin state_d = IDLE; end // this is timing critical // req_port_o.data_rdata = cl_i[cl_offset +: 64]; case (mem_req_q.index[3]) 1'b0: req_port_o.data_rdata = cl_i[63:0]; 1'b1: req_port_o.data_rdata = cl_i[127:64]; endcase // report data for a read if (!mem_req_q.we) begin req_port_o.data_rvalid = 1'b1; // else this was a store so we need an extra step to handle it end else begin state_d = STORE_REQ; hit_way_d = hit_way_i; end // ------------ // MISS CASE // ------------ end else begin // also save the tag mem_req_d.tag = req_port_i.address_tag; // make a miss request state_d = WAIT_REFILL_GNT; end // ---------------------------------------------- // Check MSHR - Miss Status Handling Register // ---------------------------------------------- mshr_addr_o = {tag_o, mem_req_q.index}; // 1. We've got a match on MSHR and while are going down the // store path. This means that the miss controller is // currently evicting our cache-line. As the store is // non-atomic we need to constantly check whether we are // matching the address the miss handler is serving. // Furthermore we need to check for the whole index // because a completely different memory line could alias // with the cache-line we are evicting. // 2. The second case is where we are currently loading and // the address matches the exact CL the miss controller // is currently serving. That means we need to wait for // the miss controller to finish its request before we // can continue to serve this CL. Otherwise we will fetch // the cache-line again and potentially loosing any // content we've written so far. This as a consequence // means we can't have hit on the CL which mean the // req_port_o.data_rvalid will be de-asserted. if ((mshr_index_matches_i && mem_req_q.we) || mshr_addr_matches_i) begin state_d = WAIT_MSHR; // save tag if we didn't already save it e.g.: we are not in in the Tag saved state if (state_q != WAIT_TAG_SAVED) begin mem_req_d.tag = req_port_i.address_tag; end end // ------------------------- // Check for cache-ability // ------------------------- if (tag_o < CACHE_START_ADDR[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH]) begin mem_req_d.tag = req_port_i.address_tag; mem_req_d.bypass = 1'b1; state_d = WAIT_REFILL_GNT; end end end // ~> we are here as we need a second round of memory access for a store STORE_REQ: begin // check if the MSHR still doesn't match mshr_addr_o = {mem_req_q.tag, mem_req_q.index}; // We need to re-check for MSHR aliasing here as the store requires at least // two memory look-ups on a single-ported SRAM and therefore is non-atomic if (!mshr_index_matches_i) begin // store data, write dirty bit req_o = hit_way_q; addr_o = mem_req_q.index; we_o = 1'b1; be_o.vldrty = hit_way_q; // set the correct byte enable be_o.data[cl_offset>>3 +: 8] = mem_req_q.be; data_o.data[cl_offset +: 64] = mem_req_q.wdata; // ~> change the state data_o.dirty = 1'b1; data_o.valid = 1'b1; // got a grant ~> this is finished now if (gnt_i) begin req_port_o.data_gnt = 1'b1; state_d = IDLE; end end else begin state_d = WAIT_MSHR; end end // case: STORE_REQ // we've got a match on MSHR ~> miss unit is currently serving a request WAIT_MSHR: begin mshr_addr_o = {mem_req_q.tag, mem_req_q.index}; // we can start a new request if (!mshr_index_matches_i) begin req_o = '1; addr_o = mem_req_q.index; if (gnt_i) state_d = WAIT_TAG_SAVED; end end // its for sure a miss WAIT_TAG_BYPASSED: begin // the request was killed if (!req_port_i.kill_req) begin // save tag mem_req_d.tag = req_port_i.address_tag; state_d = WAIT_REFILL_GNT; end end // ~> wait for grant from miss unit WAIT_REFILL_GNT: begin mshr_addr_o = {mem_req_q.tag, mem_req_q.index}; miss_req_o.valid = 1'b1; miss_req_o.bypass = mem_req_q.bypass; miss_req_o.addr = {mem_req_q.tag, mem_req_q.index}; miss_req_o.be = mem_req_q.be; miss_req_o.size = mem_req_q.size; miss_req_o.we = mem_req_q.we; miss_req_o.wdata = mem_req_q.wdata; // got a grant so go to valid if (bypass_gnt_i) begin state_d = WAIT_REFILL_VALID; // if this was a write we still need to give a grant to the store unit if (mem_req_q.we) req_port_o.data_gnt = 1'b1; end if (miss_gnt_i && !mem_req_q.we) state_d = WAIT_CRITICAL_WORD; else if (miss_gnt_i) begin state_d = IDLE; req_port_o.data_gnt = 1'b1; end // it can be the case that the miss unit is currently serving a // request which matches ours // so we need to check the MSHR for matching continuously // if the MSHR matches we need to go to a different state -> we should never get a matching MSHR and a high miss_gnt_i if (mshr_addr_matches_i && !active_serving_i) begin state_d = WAIT_MSHR; end end // ~> wait for critical word to arrive WAIT_CRITICAL_WORD: begin // speculatively request another word if (req_port_i.data_req) begin // request the cache line req_o = '1; end if (critical_word_valid_i) begin req_port_o.data_rvalid = 1'b1; req_port_o.data_rdata = critical_word_i; // we can make another request if (req_port_i.data_req) begin // save index, be and we mem_req_d.index = req_port_i.address_index; mem_req_d.be = req_port_i.data_be; mem_req_d.size = req_port_i.data_size; mem_req_d.we = req_port_i.data_we; mem_req_d.wdata = req_port_i.data_wdata; mem_req_d.tag = req_port_i.address_tag; state_d = IDLE; // Wait until we have access on the memory array if (gnt_i) begin state_d = WAIT_TAG; mem_req_d.bypass = 1'b0; req_port_o.data_gnt = 1'b1; end end else begin state_d = IDLE; end end end // ~> wait until the bypass request is valid WAIT_REFILL_VALID: begin // got a valid answer if (bypass_valid_i) begin req_port_o.data_rdata = bypass_data_i; req_port_o.data_rvalid = 1'b1; state_d = IDLE; end end endcase if (req_port_i.kill_req) begin state_d = IDLE; req_port_o.data_rvalid = 1'b1; end end // -------------- // Registers // -------------- always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin state_q <= IDLE; mem_req_q <= '0; hit_way_q <= '0; end else begin state_q <= state_d; mem_req_q <= mem_req_d; hit_way_q <= hit_way_d; end end //pragma translate_off `ifndef VERILATOR initial begin assert (DCACHE_LINE_WIDTH == 128) else $error ("Cacheline width has to be 128 for the moment. But only small changes required in data select logic"); end // if the full MSHR address matches so should also match the partial one partial_full_mshr_match: assert property(@(posedge clk_i) disable iff (~rst_ni) mshr_addr_matches_i -> mshr_index_matches_i) else $fatal (1, "partial mshr index doesn't match"); // there should never be a valid answer when the MSHR matches and we are not being served no_valid_on_mshr_match: assert property(@(posedge clk_i) disable iff (~rst_ni) (mshr_addr_matches_i && !active_serving_i)-> !req_port_o.data_rvalid || req_port_i.kill_req) else $fatal (1, "rvalid_o should not be set on MSHR match"); `endif //pragma translate_on endmodule