// Copyright 2020 ETH Zurich and University of Bologna. // Solderpad Hardware License, Version 0.51, see LICENSE for details. // SPDX-License-Identifier: SHL-0.51 // Fabian Schuiki <fschuiki@iis.ee.ethz.ch> // Florian Zaruba <zarubaf@iis.ee.ethz.ch> `include "common_cells/registers.svh" module snitch_icache #( /// Number of request (fetch) ports parameter int NR_FETCH_PORTS = -1, /// L0 Cache Line Count parameter int L0_LINE_COUNT = -1, /// Cache Line Width parameter int LINE_WIDTH = -1, /// The number of cache lines per set. Power of two; >= 2. parameter int LINE_COUNT = -1, /// The set associativity of the cache. Power of two; >= 1. parameter int SET_COUNT = 1, /// Fetch interface address width. Same as FILL_AW; >= 1. parameter int FETCH_AW = -1, /// Fetch interface data width. Power of two; >= 8. parameter int FETCH_DW = -1, /// Fill interface address width. Same as FETCH_AW; >= 1. parameter int FILL_AW = -1, /// Fill interface data width. Power of two; >= 8. parameter int FILL_DW = -1, /// Replace the L1 tag banks with latch-based SCM. parameter bit L1_TAG_SCM = 0, /// This reduces area impact at the cost of /// increased hassle of having latches in /// the design. /// i_snitch_icache/gen_prefetcher*i_snitch_icache_l0/data*/Q parameter bit EARLY_LATCH = 0, /// Tag width of the data determining logic, this can reduce the /// the critical path into the L0 cache when small. The trade-off /// is a higher miss-rate in case the smaller tag matches more /// tags. The tag must be smaller than the necessary L0 tag. /// If configured to `-1` the entire tag is used, effectively /// disabling this feature. parameter int L0_EARLY_TAG_WIDTH = -1, /// Operate L0 cache in slower clock-domain parameter bit ISO_CROSSING = 1 ) ( input logic clk_i, input logic clk_d2_i, input logic rst_ni, input logic [NR_FETCH_PORTS-1:0] enable_prefetching_i, output snitch_icache_pkg::icache_events_t [NR_FETCH_PORTS-1:0] icache_events_o, input logic flush_valid_i, output logic flush_ready_o, input logic [NR_FETCH_PORTS-1:0][FETCH_AW-1:0] inst_addr_i, output logic [NR_FETCH_PORTS-1:0][FETCH_DW-1:0] inst_data_o, input logic [NR_FETCH_PORTS-1:0] inst_cacheable_i, input logic [NR_FETCH_PORTS-1:0] inst_valid_i, output logic [NR_FETCH_PORTS-1:0] inst_ready_o, output logic [NR_FETCH_PORTS-1:0] inst_error_o, // AXI-like read-only interface output logic [FILL_AW-1:0] refill_qaddr_o, output logic [7:0] refill_qlen_o, output logic refill_qvalid_o, input logic refill_qready_i, input logic [FILL_DW-1:0] refill_pdata_i, input logic refill_perror_i, input logic refill_pvalid_i, input logic refill_plast_i, output logic refill_pready_o ); // Bundle the parameters up into a proper configuration struct that we can // pass to submodules. localparam PENDING_COUNT = 8; localparam snitch_icache_pkg::config_t CFG = '{ NR_FETCH_PORTS: NR_FETCH_PORTS, LINE_WIDTH: LINE_WIDTH, LINE_COUNT: LINE_COUNT, L0_LINE_COUNT: L0_LINE_COUNT, SET_COUNT: SET_COUNT, PENDING_COUNT: PENDING_COUNT, FETCH_AW: FETCH_AW, FETCH_DW: FETCH_DW, FILL_AW: FILL_AW, FILL_DW: FILL_DW, L1_TAG_SCM: L1_TAG_SCM, EARLY_LATCH: EARLY_LATCH, FETCH_ALIGN: $clog2(FETCH_DW/8), FILL_ALIGN: $clog2(FILL_DW/8), LINE_ALIGN: $clog2(LINE_WIDTH/8), COUNT_ALIGN: $clog2(LINE_COUNT), SET_ALIGN: $clog2(SET_COUNT), TAG_WIDTH: FETCH_AW - $clog2(LINE_WIDTH/8) - $clog2(LINE_COUNT) + 1, L0_TAG_WIDTH: FETCH_AW - $clog2(LINE_WIDTH/8), L0_EARLY_TAG_WIDTH: (L0_EARLY_TAG_WIDTH == -1) ? FETCH_AW - $clog2(LINE_WIDTH/8) : L0_EARLY_TAG_WIDTH, ID_WIDTH_REQ: $clog2(NR_FETCH_PORTS) + 1, ID_WIDTH_RESP: 2*NR_FETCH_PORTS, PENDING_IW: $clog2(PENDING_COUNT) }; // pragma translate_off `ifndef VERILATOR // Check invariants. initial begin assert(L0_LINE_COUNT > 0); assert(LINE_WIDTH > 0); assert(LINE_COUNT > 1); assert(SET_COUNT >= 2) else $warning("Only >= 2 sets are supported"); assert(FETCH_AW > 0); assert(FETCH_DW > 0); assert(FILL_AW > 0); assert(FILL_DW > 0); assert(CFG.L0_EARLY_TAG_WIDTH < CFG.L0_TAG_WIDTH); assert(FETCH_AW == FILL_AW); assert(2**$clog2(LINE_WIDTH) == LINE_WIDTH); assert(2**$clog2(LINE_COUNT) == LINE_COUNT); assert(2**$clog2(SET_COUNT) == SET_COUNT); assert(2**$clog2(FETCH_DW) == FETCH_DW); assert(2**$clog2(FILL_DW) == FILL_DW); end `endif // pragma translate_on // Instantiate the optional early cache, or bypass it. logic [NR_FETCH_PORTS-1:0][FETCH_AW-1:0] early_addr; logic [NR_FETCH_PORTS-1:0][LINE_WIDTH-1:0] early_data; logic [NR_FETCH_PORTS-1:0] early_valid; logic [NR_FETCH_PORTS-1:0] early_ready; logic [NR_FETCH_PORTS-1:0] early_error; // The prefetch module is responsible for taking the 1-channel valid/ready // transaction from the early cache and translate it into a 2-channel // transaction. Once the actual incoming request has been accepted on the // `req` channel, the prefetcher issues another low-priority request for the // next cache line. typedef struct packed { logic [CFG.FETCH_AW-1:0] addr; logic [CFG.ID_WIDTH_REQ-1:0] id; } prefetch_req_t; typedef struct packed { logic [CFG.LINE_WIDTH-1:0] data; logic error; logic [CFG.ID_WIDTH_RESP-1:0] id; } prefetch_resp_t; prefetch_req_t [NR_FETCH_PORTS-1:0] prefetch_req ; logic [NR_FETCH_PORTS-1:0] prefetch_req_valid ; logic [NR_FETCH_PORTS-1:0] prefetch_req_ready ; prefetch_req_t prefetch_lookup_req ; logic prefetch_lookup_req_valid ; logic prefetch_lookup_req_ready ; prefetch_resp_t [NR_FETCH_PORTS-1:0] prefetch_rsp ; logic [NR_FETCH_PORTS-1:0] prefetch_rsp_valid ; logic [NR_FETCH_PORTS-1:0] prefetch_rsp_ready ; prefetch_resp_t prefetch_lookup_rsp ; logic prefetch_lookup_rsp_valid ; logic prefetch_lookup_rsp_ready ; typedef struct packed { logic [CFG.FETCH_AW-1:0] addr; logic [CFG.PENDING_IW-1:0] id; logic bypass; } miss_refill_req_t; miss_refill_req_t handler_req, bypass_req, bypass_req_q, refill_req; logic handler_req_valid, bypass_req_valid, bypass_req_valid_q, refill_req_valid; logic handler_req_ready, bypass_req_ready, bypass_req_ready_q, refill_req_ready; typedef struct packed { logic [CFG.LINE_WIDTH-1:0] data; logic error; logic [CFG.PENDING_IW-1:0] id; logic bypass; } miss_refill_rsp_t; miss_refill_rsp_t handler_rsp, bypass_rsp, bypass_rsp_q, refill_rsp; logic handler_rsp_valid, bypass_rsp_valid, bypass_rsp_valid_q, refill_rsp_valid; logic handler_rsp_ready, bypass_rsp_ready, bypass_rsp_ready_q, refill_rsp_ready; logic [NR_FETCH_PORTS-1:0][FETCH_DW-1:0] bypass_data; logic [NR_FETCH_PORTS-1:0] bypass_error; logic [NR_FETCH_PORTS-1:0] bypass_valid; logic [NR_FETCH_PORTS-1:0] bypass_ready; logic [NR_FETCH_PORTS-1:0][FETCH_AW-1:0] bypass_addr; // logic [NR_FETCH_PORTS-1:0] logic [NR_FETCH_PORTS-1:0] in_cache_valid, in_bypass_valid; logic [NR_FETCH_PORTS-1:0] in_cache_ready, in_bypass_ready; logic [NR_FETCH_PORTS-1:0] [FETCH_DW-1:0] in_cache_data, in_bypass_data; logic [NR_FETCH_PORTS-1:0] in_cache_error, in_bypass_error; for (genvar i = 0; i < NR_FETCH_PORTS; i++) begin : gen_prefetcher prefetch_req_t local_prefetch_req; logic local_prefetch_req_valid; logic local_prefetch_req_ready; prefetch_resp_t local_prefetch_rsp; logic local_prefetch_rsp_valid; logic local_prefetch_rsp_ready; assign in_cache_valid[i] = inst_cacheable_i[i] & inst_valid_i[i]; assign in_bypass_valid[i] = ~inst_cacheable_i[i] & inst_valid_i[i]; assign inst_ready_o[i] = (inst_cacheable_i[i] & in_cache_ready [i]) | (~inst_cacheable_i[i] & in_bypass_ready [i]); // multiplex results assign {inst_error_o[i], inst_data_o[i]} = ({($bits(in_cache_data[i])+1){inst_cacheable_i[i]}} & {in_cache_error [i], in_cache_data[i]}) | (~{($bits(in_cache_data[i])+1){inst_cacheable_i[i]}} & {in_bypass_error[i], in_bypass_data[i]}); snitch_icache_l0 #( .CFG ( CFG ), .L0_ID ( i ) ) i_snitch_icache_l0 ( .clk_i ( clk_d2_i ), .rst_ni, .flush_valid_i, .enable_prefetching_i ( enable_prefetching_i [i] ), .icache_events_o ( icache_events_o [i] ), .in_addr_i ( inst_addr_i [i] ), .in_data_o ( in_cache_data [i] ), .in_error_o ( in_cache_error [i] ), .in_valid_i ( in_cache_valid [i] ), .in_ready_o ( in_cache_ready [i] ), .out_req_addr_o ( local_prefetch_req.addr ), .out_req_id_o ( local_prefetch_req.id ), .out_req_valid_o ( local_prefetch_req_valid ), .out_req_ready_i ( local_prefetch_req_ready ), .out_rsp_data_i ( local_prefetch_rsp.data ), .out_rsp_error_i ( local_prefetch_rsp.error ), .out_rsp_id_i ( local_prefetch_rsp.id ), .out_rsp_valid_i ( local_prefetch_rsp_valid ), .out_rsp_ready_o ( local_prefetch_rsp_ready ) ); isochronous_spill_register #( .T ( prefetch_req_t ), .Bypass ( !ISO_CROSSING ) ) i_spill_register_prefetch_req ( .src_clk_i ( clk_d2_i ), .src_rst_ni ( rst_ni ), .src_valid_i ( local_prefetch_req_valid ), .src_ready_o ( local_prefetch_req_ready ), .src_data_i ( local_prefetch_req ), .dst_clk_i ( clk_i ), .dst_rst_ni ( rst_ni ), .dst_valid_o ( prefetch_req_valid [i] ), .dst_ready_i ( prefetch_req_ready [i] ), .dst_data_o ( prefetch_req [i] ) ); isochronous_spill_register #( .T ( prefetch_resp_t ), .Bypass ( !ISO_CROSSING ) ) i_spill_register_prefetch_resp ( .src_clk_i ( clk_i ), .src_rst_ni ( rst_ni ), .src_valid_i ( prefetch_rsp_valid [i] ), .src_ready_o ( prefetch_rsp_ready [i] ), .src_data_i ( prefetch_rsp [i] ), .dst_clk_i ( clk_d2_i ), .dst_rst_ni ( rst_ni ), .dst_valid_o ( local_prefetch_rsp_valid ), .dst_ready_i ( local_prefetch_rsp_ready ), .dst_data_o ( local_prefetch_rsp ) ); end l0_to_bypass #( .CFG ( CFG ) ) i_l0_to_bypass ( .clk_i ( clk_d2_i ), .rst_ni, .in_valid_i ( in_bypass_valid ), .in_ready_o ( in_bypass_ready ), .in_addr_i ( inst_addr_i ), .in_data_o ( in_bypass_data ), .in_error_o ( in_bypass_error ), .refill_req_addr_o ( bypass_req.addr ), .refill_req_bypass_o ( bypass_req.bypass ), .refill_req_valid_o ( bypass_req_valid ), .refill_req_ready_i ( bypass_req_ready ), .refill_rsp_data_i ( bypass_rsp_q.data ), .refill_rsp_error_i ( bypass_rsp_q.error ), .refill_rsp_valid_i ( bypass_rsp_valid_q ), .refill_rsp_ready_o ( bypass_rsp_ready_q ) ); assign bypass_req.id = '0; isochronous_spill_register #( .T ( miss_refill_req_t ), .Bypass ( !ISO_CROSSING ) ) i_spill_register_bypass_req ( .src_clk_i ( clk_d2_i ), .src_rst_ni ( rst_ni ), .src_valid_i ( bypass_req_valid ), .src_ready_o ( bypass_req_ready ), .src_data_i ( bypass_req ), .dst_clk_i ( clk_i ), .dst_rst_ni ( rst_ni ), .dst_valid_o ( bypass_req_valid_q ), .dst_ready_i ( bypass_req_ready_q ), .dst_data_o ( bypass_req_q ) ); isochronous_spill_register #( .T ( miss_refill_rsp_t ), .Bypass ( !ISO_CROSSING ) ) i_spill_register_bypass_resp ( .src_clk_i ( clk_i ), .src_rst_ni ( rst_ni ), .src_valid_i ( bypass_rsp_valid ), .src_ready_o ( bypass_rsp_ready ), .src_data_i ( bypass_rsp ), .dst_clk_i ( clk_d2_i ), .dst_rst_ni ( rst_ni ), .dst_valid_o ( bypass_rsp_valid_q ), .dst_ready_i ( bypass_rsp_ready_q ), .dst_data_o ( bypass_rsp_q ) ); /// Arbitrate cache port // 1. Request Side stream_arbiter #( .DATA_T ( prefetch_req_t ), .N_INP ( NR_FETCH_PORTS ) ) i_stream_arbiter ( .clk_i, .rst_ni, .inp_data_i ( prefetch_req ), .inp_valid_i ( prefetch_req_valid ), .inp_ready_o ( prefetch_req_ready ), .oup_data_o ( prefetch_lookup_req ), .oup_valid_o ( prefetch_lookup_req_valid ), .oup_ready_i ( prefetch_lookup_req_ready ) ); // 2. Response Side // This breaks if the pre-fetcher would not alway be ready // which is the case for the moment for (genvar i = 0; i < NR_FETCH_PORTS; i++) begin : gen_resp assign prefetch_rsp[i] = prefetch_lookup_rsp; // check if one of the ID bits is set assign prefetch_rsp_valid[i] = ((|((prefetch_rsp[i].id >> 2*i) & 2'b11)) & prefetch_lookup_rsp_valid); end assign prefetch_lookup_rsp_ready = |prefetch_rsp_ready; /// Tag lookup // The lookup module contains the actual cache RAMs and performs lookups. logic [CFG.FETCH_AW-1:0] lookup_addr ; logic [CFG.ID_WIDTH_REQ-1:0] lookup_id ; logic [CFG.SET_ALIGN-1:0] lookup_set ; logic lookup_hit ; logic [CFG.LINE_WIDTH-1:0] lookup_data ; logic lookup_error ; logic lookup_valid ; logic lookup_ready ; logic [CFG.COUNT_ALIGN-1:0] write_addr ; logic [CFG.SET_ALIGN-1:0] write_set ; logic [CFG.LINE_WIDTH-1:0] write_data ; logic [CFG.TAG_WIDTH-1:0] write_tag ; logic write_error ; logic write_valid ; logic write_ready ; logic flush_valid, flush_ready; // We need to propagate the handshake into the other // clock domain in case we operate w/ different clocks. if (ISO_CROSSING) begin : gen_flush_crossing isochronous_spill_register i_isochronous_4phase_handshake ( .src_clk_i ( clk_d2_i ), .src_rst_ni ( rst_ni ), .src_valid_i ( flush_valid_i ), .src_ready_o ( flush_ready_o ), .src_data_i ( '0 ), .dst_clk_i ( clk_i ), .dst_rst_ni ( rst_ni ), .dst_valid_o ( flush_valid ), .dst_ready_i ( flush_ready ), .dst_data_o ( /* Unused */ ) ); end else begin : gen_no_flush_crossing assign flush_valid = flush_valid_i; assign flush_ready_o = flush_ready; end snitch_icache_lookup #(CFG) i_lookup ( .clk_i, .rst_ni, .flush_valid_i (flush_valid), .flush_ready_o (flush_ready), .in_addr_i ( prefetch_lookup_req.addr ), .in_id_i ( prefetch_lookup_req.id ), .in_valid_i ( prefetch_lookup_req_valid ), .in_ready_o ( prefetch_lookup_req_ready ), .out_addr_o ( lookup_addr ), .out_id_o ( lookup_id ), .out_set_o ( lookup_set ), .out_hit_o ( lookup_hit ), .out_data_o ( lookup_data ), .out_error_o ( lookup_error ), .out_valid_o ( lookup_valid ), .out_ready_i ( lookup_ready ), .write_addr_i ( write_addr ), .write_set_i ( write_set ), .write_data_i ( write_data ), .write_tag_i ( write_tag ), .write_error_i ( write_error ), .write_valid_i ( write_valid ), .write_ready_o ( write_ready ) ); // The miss handler module deals with the result of the lookup. It also // keeps track of the pending refills and ensures that no redundant memory // requests are made. Upon refill completion, it sends a new tag/data item // to the lookup module and the received data to the prefetch module. snitch_icache_handler #(CFG) i_handler ( .clk_i, .rst_ni, .in_req_addr_i ( lookup_addr ), .in_req_id_i ( lookup_id ), .in_req_set_i ( lookup_set ), .in_req_hit_i ( lookup_hit ), .in_req_data_i ( lookup_data ), .in_req_error_i ( lookup_error ), .in_req_valid_i ( lookup_valid ), .in_req_ready_o ( lookup_ready ), .in_rsp_data_o ( prefetch_lookup_rsp.data ), .in_rsp_error_o ( prefetch_lookup_rsp.error ), .in_rsp_id_o ( prefetch_lookup_rsp.id ), .in_rsp_valid_o ( prefetch_lookup_rsp_valid ), .in_rsp_ready_i ( prefetch_lookup_rsp_ready ), .write_addr_o ( write_addr ), .write_set_o ( write_set ), .write_data_o ( write_data ), .write_tag_o ( write_tag ), .write_error_o ( write_error ), .write_valid_o ( write_valid ), .write_ready_i ( write_ready ), .out_req_addr_o ( handler_req.addr ), .out_req_id_o ( handler_req.id ), .out_req_valid_o ( handler_req_valid ), .out_req_ready_i ( handler_req_ready ), .out_rsp_data_i ( handler_rsp.data ), .out_rsp_error_i ( handler_rsp.error ), .out_rsp_id_i ( handler_rsp.id ), .out_rsp_valid_i ( handler_rsp_valid ), .out_rsp_ready_o ( handler_rsp_ready ) ); assign handler_req.bypass = 1'b0; // Arbitrate between bypass and cache-refills stream_arbiter #( .DATA_T ( miss_refill_req_t ), .N_INP ( 2 ) ) i_stream_arbiter_miss_refill ( .clk_i, .rst_ni, .inp_data_i ( {bypass_req_q, handler_req} ), .inp_valid_i ( {bypass_req_valid_q, handler_req_valid} ), .inp_ready_o ( {bypass_req_ready_q, handler_req_ready} ), .oup_data_o ( refill_req ), .oup_valid_o ( refill_req_valid ), .oup_ready_i ( refill_req_ready ) ); // Response path muxing stream_demux #( .N_OUP ( 2 ) ) i_stream_demux_miss_refill ( .inp_valid_i ( refill_rsp_valid ), .inp_ready_o ( refill_rsp_ready ), .oup_sel_i ( refill_rsp.bypass ), .oup_valid_o ( {{bypass_rsp_valid, handler_rsp_valid}} ), .oup_ready_i ( {{bypass_rsp_ready, handler_rsp_ready}} ) ); assign handler_rsp = refill_rsp; assign bypass_rsp = refill_rsp; // AXI-like read-only interface typedef struct packed { logic [FILL_AW-1:0] addr; logic [7:0] len; } extern_req_t; typedef struct packed { logic [FILL_DW-1:0] data; logic error; logic last; } extern_rsp_t; extern_req_t extern_req, extern_req_q; logic extern_qvalid; logic extern_qready; extern_rsp_t extern_rsp, extern_rsp_q; logic extern_pvalid_q; logic extern_pready_q; // Instantiate the cache refill module which emits AXI transactions. snitch_icache_refill #(CFG) i_refill ( .clk_i, .rst_ni, .in_req_addr_i ( refill_req.addr ), .in_req_id_i ( refill_req.id ), .in_req_bypass_i ( refill_req.bypass ), .in_req_valid_i ( refill_req_valid ), .in_req_ready_o ( refill_req_ready ), .in_rsp_data_o ( refill_rsp.data ), .in_rsp_error_o ( refill_rsp.error ), .in_rsp_id_o ( refill_rsp.id ), .in_rsp_bypass_o ( refill_rsp.bypass ), .in_rsp_valid_o ( refill_rsp_valid ), .in_rsp_ready_i ( refill_rsp_ready ), .refill_qaddr_o ( extern_req.addr ), .refill_qlen_o ( extern_req.len ), .refill_qvalid_o ( extern_qvalid ), .refill_qready_i ( extern_qready ), .refill_pdata_i ( extern_rsp_q.data ), .refill_perror_i ( extern_rsp_q.error ), .refill_plast_i ( extern_rsp_q.last ), .refill_pvalid_i ( extern_pvalid_q ), .refill_pready_o ( extern_pready_q ) ); // Insert Slices. spill_register #(.T(extern_req_t)) i_spill_register_req ( .clk_i, .rst_ni, .valid_i ( extern_qvalid ), .ready_o ( extern_qready ), .data_i ( extern_req ), // Q Output .valid_o ( refill_qvalid_o ), .ready_i ( refill_qready_i ), .data_o ( extern_req_q ) ); assign refill_qaddr_o = extern_req_q.addr; assign refill_qlen_o = extern_req_q.len; spill_register #(.T(extern_rsp_t)) i_spill_register_resp ( .clk_i, .rst_ni, .valid_i ( refill_pvalid_i ), .ready_o ( refill_pready_o ), .data_i ( extern_rsp ), // Q Output .valid_o ( extern_pvalid_q ), .ready_i ( extern_pready_q ), .data_o ( extern_rsp_q ) ); assign extern_rsp.data = refill_pdata_i; assign extern_rsp.error = refill_perror_i; assign extern_rsp.last = refill_plast_i; endmodule // Translate register interface to refill requests. // Used for bypassable accesses. module l0_to_bypass #( parameter snitch_icache_pkg::config_t CFG = '0 ) ( input logic clk_i, input logic rst_ni, input logic [CFG.NR_FETCH_PORTS-1:0] in_valid_i, output logic [CFG.NR_FETCH_PORTS-1:0] in_ready_o, input logic [CFG.NR_FETCH_PORTS-1:0][CFG.FETCH_AW-1:0] in_addr_i, output logic [CFG.NR_FETCH_PORTS-1:0][CFG.FETCH_DW-1:0] in_data_o, output logic [CFG.NR_FETCH_PORTS-1:0] in_error_o, output logic [CFG.FETCH_AW-1:0] refill_req_addr_o, output logic refill_req_bypass_o, output logic refill_req_valid_o, input logic refill_req_ready_i, input logic [CFG.LINE_WIDTH-1:0] refill_rsp_data_i, input logic refill_rsp_error_i, input logic refill_rsp_valid_i, output logic refill_rsp_ready_o ); assign refill_req_bypass_o = 1'b1; logic [CFG.NR_FETCH_PORTS-1:0] in_valid; logic [CFG.NR_FETCH_PORTS-1:0] in_ready; enum logic [1:0] { Idle, RequestData, WaitResponse, PresentResponse } state_d [CFG.NR_FETCH_PORTS-1:0], state_q [CFG.NR_FETCH_PORTS-1:0]; // Mask address so that it is aligned to the cache-line width. logic [CFG.NR_FETCH_PORTS-1:0][CFG.FETCH_AW-1:0] in_addr_masked; for (genvar i = 0; i < CFG.NR_FETCH_PORTS; i++) begin assign in_addr_masked[i] = in_addr_i[i] >> CFG.LINE_ALIGN << CFG.LINE_ALIGN; end stream_arbiter #( .DATA_T ( logic [CFG.FETCH_AW-1:0] ), .N_INP ( CFG.NR_FETCH_PORTS ) ) i_stream_arbiter ( .clk_i, .rst_ni, .inp_data_i ( in_addr_masked ), .inp_valid_i ( in_valid ), .inp_ready_o ( in_ready ), .oup_data_o ( refill_req_addr_o ), .oup_valid_o ( refill_req_valid_o ), .oup_ready_i ( refill_req_ready_i ) ); localparam int unsigned NR_FETCH_PORTS_BIN = CFG.NR_FETCH_PORTS == 1 ? 1 : $clog2(CFG.NR_FETCH_PORTS); logic [CFG.NR_FETCH_PORTS-1:0] rsp_fifo_mux; logic [NR_FETCH_PORTS_BIN-1:0] onehot_mux; logic [CFG.NR_FETCH_PORTS-1:0] rsp_fifo_pop; logic rsp_fifo_full; logic [CFG.NR_FETCH_PORTS-1:0] rsp_valid; logic [CFG.NR_FETCH_PORTS-1:0] rsp_ready; fifo_v3 #( .DATA_WIDTH ( CFG.NR_FETCH_PORTS ), .DEPTH ( 4 ) ) rsp_fifo ( .clk_i, .rst_ni, .flush_i ( 1'b0 ), .testmode_i ( 1'b0 ), .full_o ( rsp_fifo_full ), .empty_o ( ), .usage_o ( ), .data_i ( {in_valid & in_ready} ), .push_i ( |{in_valid & in_ready}), .data_o ( rsp_fifo_mux ), .pop_i ( |rsp_fifo_pop ) ); onehot_to_bin #( .ONEHOT_WIDTH (CFG.NR_FETCH_PORTS) ) i_onehot_to_bin ( .onehot (rsp_fifo_mux), .bin (onehot_mux) ); assign rsp_ready = '1; stream_demux #( .N_OUP ( CFG.NR_FETCH_PORTS ) ) i_stream_mux_miss_refill ( .inp_valid_i ( refill_rsp_valid_i ), .inp_ready_o ( refill_rsp_ready_o ), .oup_sel_i ( onehot_mux ), .oup_valid_o ( rsp_valid ), .oup_ready_i ( rsp_ready ) ); for (genvar i = 0; i < CFG.NR_FETCH_PORTS; i++) begin : gen_bypass_request always_comb begin state_d[i] = state_q[i]; in_ready_o[i] = 1'b0; rsp_fifo_pop[i] = 1'b0; in_valid[i] = 1'b0; unique case (state_q[i]) // latch data when idle Idle: if (in_valid_i[i]) state_d[i] = RequestData; RequestData: begin // check that there is still space for the response to be accepted. if (!rsp_fifo_full) begin in_valid[i] = 1'b1; if (in_ready[i]) state_d[i] = WaitResponse; end end WaitResponse: begin if (rsp_valid[i]) begin rsp_fifo_pop[i] = 1'b1; state_d[i] = PresentResponse; end end // The response will be served from the register and is valid for one cycle. PresentResponse: begin state_d[i] = Idle; in_ready_o[i] = 1'b1; end default:; endcase end logic [CFG.FILL_DW-1:0] fill_rsp_data; assign fill_rsp_data = refill_rsp_data_i >> (in_addr_i[i][CFG.LINE_ALIGN-1:CFG.FETCH_ALIGN] * CFG.FETCH_DW); `FFLNR({in_data_o[i], in_error_o[i]}, {fill_rsp_data[CFG.FETCH_DW-1:0], refill_rsp_error_i}, rsp_valid[i], clk_i) end `FF(state_q, state_d, '{default: Idle}) endmodule