snitch_icache_lookup.sv 11.1 KB
Newer Older
sakundu committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
// Copyright 2021 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51

// Samuel Riedel  <sriedel@iis.ee.ethz.ch>

`include "common_cells/registers.svh"

/// An actual cache lookup.
module snitch_icache_lookup #(
    parameter snitch_icache_pkg::config_t CFG = '0
)(
    input  logic                        clk_i,
    input  logic                        rst_ni,

    input  logic                        flush_valid_i,
    output logic                        flush_ready_o,

    input  logic [CFG.FETCH_AW-1:0]     in_addr_i,
    input  logic [CFG.ID_WIDTH_REQ-1:0] in_id_i,
    input  logic                        in_valid_i,
    output logic                        in_ready_o,

    output logic [CFG.FETCH_AW-1:0]     out_addr_o,
    output logic [CFG.ID_WIDTH_REQ-1:0] out_id_o,
    output logic [CFG.SET_ALIGN-1:0]    out_set_o,
    output logic                        out_hit_o,
    output logic [CFG.LINE_WIDTH-1:0]   out_data_o,
    output logic                        out_error_o,
    output logic                        out_valid_o,
    input  logic                        out_ready_i,

    input  logic [CFG.COUNT_ALIGN-1:0]  write_addr_i,
    input  logic [CFG.SET_ALIGN-1:0]    write_set_i,
    input  logic [CFG.LINE_WIDTH-1:0]   write_data_i,
    input  logic [CFG.TAG_WIDTH-1:0]    write_tag_i,
    input  logic                        write_error_i,
    input  logic                        write_valid_i,
    output logic                        write_ready_o
);

    localparam int unsigned DATA_ADDR_WIDTH = $clog2(CFG.SET_COUNT) + CFG.COUNT_ALIGN;

    `ifndef SYNTHESIS
    initial assert(CFG != '0);
    `endif

    logic [CFG.COUNT_ALIGN:0] init_count_q;
    logic                     init_phase;

    // We are always ready to flush
    assign flush_ready_o = 1'b1;
    assign init_phase = init_count_q != $unsigned(CFG.LINE_COUNT);
    // Initialization and flush FSM
    always_ff @(posedge clk_i, negedge rst_ni) begin
        if (!rst_ni)
            init_count_q <= '0;
        else if (init_count_q != $unsigned(CFG.LINE_COUNT))
            init_count_q <= init_count_q + 1;
        else if (flush_valid_i)
            init_count_q <= '0;
    end

    // --------------------------------------------------
    // Tag stage
    // --------------------------------------------------
    typedef struct packed {
        logic [CFG.FETCH_AW-1:0]     addr;
        logic [CFG.ID_WIDTH_REQ-1:0] id;
    } tag_req_t;

    typedef struct packed {
        logic [CFG.SET_ALIGN-1:0] cset;
        logic                     hit;
        logic                     error;
    } tag_rsp_t;

    logic                       req_valid, req_ready;
    logic                       req_handshake;

    logic [CFG.COUNT_ALIGN-1:0] tag_addr;
    logic [CFG.SET_COUNT-1:0]   tag_enable;
    logic [CFG.TAG_WIDTH+1:0]   tag_wdata, tag_rdata [CFG.SET_COUNT];
    logic                       tag_write;

    tag_req_t                   tag_req_d, tag_req_q;
    tag_rsp_t                   tag_rsp_s, tag_rsp_d, tag_rsp_q, tag_rsp;
    logic                       tag_valid, tag_ready;
    logic                       tag_handshake;

    logic [CFG.TAG_WIDTH-1:0]   required_tag;
    logic [CFG.SET_COUNT-1:0]   line_hit;

    logic [DATA_ADDR_WIDTH-1:0] lookup_addr;
    logic [DATA_ADDR_WIDTH-1:0] write_addr;

    // Connect input requests to tag stage
    assign tag_req_d.addr = in_addr_i;
    assign tag_req_d.id   = in_id_i;

    // Multiplex read and write access to the tag banks onto one port, prioritizing write accesses
    always_comb begin
        tag_addr   = in_addr_i >> CFG.LINE_ALIGN;
        tag_enable = '0;
        tag_wdata  = {1'b1, write_error_i, write_tag_i};
        tag_write  = 1'b0;

        write_ready_o = 1'b0;
        in_ready_o    = 1'b0;
        req_valid     = 1'b0;

        if (init_phase) begin
            tag_addr   = init_count_q;
            tag_enable = '1;
            tag_wdata  = '0;
            tag_write  = 1'b1;
        end else if (write_valid_i) begin
            // Write a refill request
            tag_addr      = write_addr_i;
            tag_enable    = $unsigned(1 << write_set_i);
            tag_write     = 1'b1;
            write_ready_o = 1'b1;
        end else if (in_valid_i) begin
            // Check cache
            tag_enable = '1;
            in_ready_o = req_ready;
            // Request to store data in pipeline
            req_valid  = 1'b1;
        end
    end

    // Instantiate the tag sets.
    for (genvar i = 0; i < CFG.SET_COUNT; i++) begin : g_sets
        if (CFG.L1_TAG_SCM) begin : gen_scm
            latch_scm #(
                .ADDR_WIDTH ($clog2(CFG.LINE_COUNT)),
                .DATA_WIDTH (CFG.TAG_WIDTH+2       )
            ) i_tag (
                .clk         ( clk_i                       ),
                .ReadEnable  ( tag_enable[i] && !tag_write ),
                .ReadAddr    ( tag_addr                    ),
                .ReadData    ( tag_rdata[i]                ),
                .WriteEnable ( tag_enable[i] && tag_write  ),
                .WriteAddr   ( tag_addr                    ),
                .WriteData   ( tag_wdata                   )
            );
        end else begin : gen_sram
            tc_sram #(
                .DataWidth ( CFG.TAG_WIDTH+2 ),
                .NumWords  ( CFG.LINE_COUNT  ),
                .NumPorts  ( 1               )
            ) i_tag (
                .clk_i   ( clk_i         ),
                .rst_ni  ( rst_ni        ),
                .req_i   ( tag_enable[i] ),
                .we_i    ( tag_write     ),
                .addr_i  ( tag_addr      ),
                .wdata_i ( tag_wdata     ),
                .be_i    ( '1            ),
                .rdata_o ( tag_rdata[i]  )
            );
        end
    end

    // Determine which set hit
    always_comb begin
        automatic logic [CFG.SET_COUNT-1:0] errors;
        required_tag = tag_req_q.addr >> (CFG.LINE_ALIGN + CFG.COUNT_ALIGN);
        for (int i = 0; i < CFG.SET_COUNT; i++) begin
            line_hit[i] = tag_rdata[i][CFG.TAG_WIDTH+1] && tag_rdata[i][CFG.TAG_WIDTH-1:0] == required_tag;
            errors[i] = tag_rdata[i][CFG.TAG_WIDTH] && line_hit[i];
        end
        tag_rsp_s.hit = |line_hit;
        tag_rsp_s.error = |errors;
    end

    lzc #(.WIDTH(CFG.SET_COUNT)) i_lzc (
        .in_i     ( line_hit       ),
        .cnt_o    ( tag_rsp_s.cset ),
        .empty_o  (                )
    );

    // Buffer the metadata on a valid handshake. Stall on write (implicit in req_valid/ready)
    `FFL(tag_req_q, tag_req_d, req_valid && req_ready, '0, clk_i, rst_ni)
    `FF(tag_valid, req_valid ? 1'b1 : tag_ready ? 1'b0 : tag_valid, '0, clk_i, rst_ni)
    // Ready if buffer is empy or downstream is reading. Stall on write
    assign req_ready = (!tag_valid || tag_ready) && !tag_write;

    // Register the handshake of the reg stage to buffer the tag output data in the next cycle
    `FF(req_handshake, req_valid && req_ready, 1'b0, clk_i, rst_ni)

    // Fall-through buffer the tag data: Store the tag data if the SRAM bank accepted a request in
    // the previous cycle and if we actually have to buffer them because the receiver is not ready
    `FF(tag_rsp_q, tag_rsp_d, '0, clk_i, rst_ni)
    assign tag_rsp = req_handshake ? tag_rsp_s : tag_rsp_q;
    always_comb begin
        tag_rsp_d = tag_rsp_q;
        // Load the FF if new data is incoming and downstream is not ready
        if (req_handshake && !tag_ready) begin
            tag_rsp_d = tag_rsp_s;
        end
        // Override the hit if the write that stalled us invalidated the data
        if (lookup_addr == write_addr && write_valid_i) begin
            tag_rsp_d.hit = 1'b0;
        end
    end

    // --------------------------------------------------
    // Data stage
    // --------------------------------------------------

    typedef struct packed {
        logic [CFG.FETCH_AW-1:0]     addr;
        logic [CFG.ID_WIDTH_REQ-1:0] id;
        logic [CFG.SET_ALIGN-1:0]    cset;
        logic                        hit;
        logic                        error;
    } data_req_t;

    typedef logic [CFG.LINE_WIDTH-1:0] data_rsp_t;

    logic [DATA_ADDR_WIDTH-1:0] data_addr;
    logic                       data_enable;
    data_rsp_t                  data_wdata, data_rdata;
    logic                       data_write;

    data_req_t                  data_req_d, data_req_q;
    data_rsp_t                  data_rsp_q;
    logic                       data_valid, data_ready;

    // Connect tag stage response to data stage request
    assign data_req_d.addr  = tag_req_q.addr;
    assign data_req_d.id    = tag_req_q.id;
    assign data_req_d.cset  = tag_rsp.cset;
    assign data_req_d.hit   = tag_rsp.hit;
    assign data_req_d.error = tag_rsp.error;

    assign lookup_addr = {tag_rsp.cset, tag_req_q.addr[CFG.LINE_ALIGN +: CFG.COUNT_ALIGN]};
    assign write_addr  = {write_set_i, write_addr_i};

    // Data bank port mux
    always_comb begin
        // Default read request
        data_addr   = lookup_addr;
        data_enable = tag_valid && tag_rsp.hit; // Only read data on hit
        data_wdata  = write_data_i;
        data_write  = 1'b0;
        // Write takes priority
        if (!init_phase && write_valid_i) begin
            data_addr   = write_addr;
            data_enable = 1'b1;
            data_write  = 1'b1;
        end
    end

    tc_sram #(
        .DataWidth ( CFG.LINE_WIDTH                 ),
        .NumWords  ( CFG.LINE_COUNT * CFG.SET_COUNT ),
        .NumPorts  ( 1                              )
    ) i_data (
        .clk_i   ( clk_i       ),
        .rst_ni  ( rst_ni      ),
        .req_i   ( data_enable ),
        .we_i    ( data_write  ),
        .addr_i  ( data_addr   ),
        .wdata_i ( data_wdata  ),
        .be_i    ( '1          ),
        .rdata_o ( data_rdata  )
    );

    // Buffer the metadata on a valid handshake. Stall on write (implicit in tag_ready)
    `FFL(data_req_q, data_req_d, tag_valid && tag_ready, '0, clk_i, rst_ni)
    `FF(data_valid, (tag_valid && !data_write) ? 1'b1 : data_ready ? 1'b0 : data_valid, '0, clk_i, rst_ni)
    // Ready if buffer is empy or downstream is reading. Stall on write
    assign tag_ready = (!data_valid || data_ready) && !data_write;

    // Register the handshake of the tag stage to buffer the data output data in the next cycle
    // but only if it was a hit. Otherwise, the data is not read anyway.
    `FF(tag_handshake, tag_valid && tag_ready && data_req_d.hit, 1'b0, clk_i, rst_ni)

    // Fall-through buffer the read data: Store the read data if the SRAM bank accepted a request in
    // the previous cycle and if we actually have to buffer them because the receiver is not ready
    `FFL(data_rsp_q, data_rdata, tag_handshake && !data_ready, '0, clk_i, rst_ni)
    assign out_data_o = tag_handshake ? data_rdata : data_rsp_q;

    // Generate the remaining output signals.
    assign out_addr_o  = data_req_q.addr;
    assign out_id_o    = data_req_q.id;
    assign out_set_o   = data_req_q.cset;
    assign out_hit_o   = data_req_q.hit;
    assign out_error_o = data_req_q.error;
    assign out_valid_o = data_valid;
    assign data_ready  = out_ready_i;

endmodule