std_icache.sv 18.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License.  You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 12.02.2018
// ------------------------------
// Instruction Cache
// ------------------------------
import ariane_pkg::*;
import std_cache_pkg::*;

module std_icache (
    input  logic                     clk_i,
    input  logic                     rst_ni,
    input riscv::priv_lvl_t          priv_lvl_i,

    input  logic                     flush_i,          // flush the icache, flush and kill have to be asserted together
    input  logic                     en_i,             // enable icache
    output logic                     miss_o,           // to performance counter
    // address translation requests
    input  icache_areq_i_t           areq_i,
    output icache_areq_o_t           areq_o,
    // data requests
    input  icache_dreq_i_t           dreq_i,
    output icache_dreq_o_t           dreq_o,
    // AXI refill port
    output ariane_axi::req_t         axi_req_o,
    input  ariane_axi::resp_t        axi_resp_i
);

    localparam int unsigned ICACHE_BYTE_OFFSET = $clog2(ICACHE_LINE_WIDTH/8); // 3
    localparam int unsigned ICACHE_NUM_WORD    = 2**(ICACHE_INDEX_WIDTH - ICACHE_BYTE_OFFSET);
    localparam int unsigned NR_AXI_REFILLS     = ($clog2(ICACHE_LINE_WIDTH/64) == 0) ? 1 : $clog2(ICACHE_LINE_WIDTH/64);
    // registers
    enum logic [3:0] { FLUSH, IDLE, TAG_CMP, WAIT_AXI_R_RESP, WAIT_KILLED_REFILL, WAIT_KILLED_AXI_R_RESP,
                       REDO_REQ, TAG_CMP_SAVED, REFILL,
                       WAIT_ADDRESS_TRANSLATION, WAIT_ADDRESS_TRANSLATION_KILLED
                     }                      state_d, state_q;
    logic [$clog2(ICACHE_NUM_WORD)-1:0]     cnt_d, cnt_q;
    logic [NR_AXI_REFILLS-1:0]              burst_cnt_d, burst_cnt_q; // counter for AXI transfers
    logic [63:0]                            vaddr_d, vaddr_q;
    logic [ICACHE_TAG_WIDTH-1:0]            tag_d, tag_q;
    logic [ICACHE_SET_ASSOC-1:0]            evict_way_d, evict_way_q;
    logic                                   flushing_d, flushing_q;

    // signals
    logic [ICACHE_SET_ASSOC-1:0]          req;           // request to data memory
    logic [ICACHE_SET_ASSOC-1:0]          vld_req;       // request to valid/tag memory
    logic [(ICACHE_LINE_WIDTH+7)/8-1:0]   data_be;       // byte enable for data memory
    logic [(2**NR_AXI_REFILLS-1):0][7:0]  be;            // byte enable
    logic [$clog2(ICACHE_NUM_WORD)-1:0]   addr;          // this is a cache-line address, to memory array
    logic                                 we;            // write enable to memory array
    logic [ICACHE_SET_ASSOC-1:0]          hit;           // hit from tag compare
    logic [$clog2(ICACHE_NUM_WORD)-1:0]   idx;           // index in cache line
    logic                                 update_lfsr;   // shift the LFSR
    logic [ICACHE_SET_ASSOC-1:0]          random_way;    // random way select from LFSR
    logic [ICACHE_SET_ASSOC-1:0]          way_valid;     // bit string which contains the zapped valid bits
    logic [$clog2(ICACHE_SET_ASSOC)-1:0]  repl_invalid;  // first non-valid encountered
    logic                                 repl_w_random; // we need to switch repl strategy since all are valid
    logic [ICACHE_TAG_WIDTH-1:0]          tag;           // tag to do comparison with

    // tag + valid bit read/write data
    struct packed {
        logic                 valid;
        logic [ICACHE_TAG_WIDTH-1:0] tag;
    } tag_rdata [ICACHE_SET_ASSOC-1:0], tag_wdata;

    logic [ICACHE_LINE_WIDTH-1:0] data_rdata [ICACHE_SET_ASSOC-1:0], data_wdata;
    logic [(2**NR_AXI_REFILLS-1):0][63:0] wdata;

    for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : sram_block
        // ------------
        // Tag RAM
        // ------------
        sram #(
            // tag + valid bit
            .DATA_WIDTH ( ICACHE_TAG_WIDTH + 1   ),
            .NUM_WORDS  ( ICACHE_NUM_WORD )
        ) tag_sram (
            .clk_i     ( clk_i            ),
            .rst_ni    ( rst_ni           ),
            .req_i     ( vld_req[i]       ),
            .we_i      ( we               ),
            .addr_i    ( addr             ),
            .wdata_i   ( tag_wdata        ),
            .be_i      ( '1               ),
            .rdata_o   ( tag_rdata[i]     )
        );
        // ------------
        // Data RAM
        // ------------
        sram #(
            .DATA_WIDTH ( ICACHE_LINE_WIDTH ),
            .NUM_WORDS  ( ICACHE_NUM_WORD   )
        ) data_sram (
            .clk_i     ( clk_i              ),
            .rst_ni    ( rst_ni             ),
            .req_i     ( req[i]             ),
            .we_i      ( we                 ),
            .addr_i    ( addr               ),
            .wdata_i   ( data_wdata         ),
            .be_i      ( data_be            ),
            .rdata_o   ( data_rdata[i]      )
        );
    end

    // --------------------
    // Tag Comparison and way select
    // --------------------

    // cacheline selected by hit
    logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0] cl_sel;

    assign idx = vaddr_q[ICACHE_BYTE_OFFSET-1:2];

    generate
        for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : g_tag_cmpsel
            assign hit[i] = (tag_rdata[i].tag == tag) ? tag_rdata[i].valid : 1'b0;
            assign cl_sel[i] = (hit[i]) ? data_rdata[i][{idx, 5'b0} +: FETCH_WIDTH] : '0;
            assign way_valid[i] = tag_rdata[i].valid;
        end
    endgenerate

    // OR reduction of selected cachelines
    always_comb begin : p_reduction
        dreq_o.data = cl_sel[0];
        for(int i = 1; i < ICACHE_SET_ASSOC; i++)
            dreq_o.data |= cl_sel[i];
    end

    // ------------------
    // AXI Plumbing
    // ------------------
    // instruction cache is read-only
    assign axi_req_o.aw_valid  = '0;
    assign axi_req_o.aw.addr   = '0;
    assign axi_req_o.aw.prot   = '0;
    assign axi_req_o.aw.region = '0;
    assign axi_req_o.aw.len    = '0;
    assign axi_req_o.aw.size   = 3'b000;
    assign axi_req_o.aw.burst  = 2'b00;
    assign axi_req_o.aw.lock   = '0;
    assign axi_req_o.aw.cache  = '0;
    assign axi_req_o.aw.qos    = '0;
    assign axi_req_o.aw.id     = '0;
    assign axi_req_o.aw.atop   = '0;
    assign axi_req_o.w_valid   = '0;
    assign axi_req_o.w.data    = '0;
    assign axi_req_o.w.strb    = '0;
    assign axi_req_o.w.last    = 1'b0;
    assign axi_req_o.b_ready   = 1'b0;

    // set protection flag, MSB -> instruction fetch, LSB -> privileged access or not
    assign axi_req_o.ar.prot   = {1'b1, 1'b0, (priv_lvl_i == riscv::PRIV_LVL_M)};
    assign axi_req_o.ar.region = '0;
    assign axi_req_o.ar.len    = (2**NR_AXI_REFILLS) - 1;
    assign axi_req_o.ar.size   = 3'b011;
    assign axi_req_o.ar.burst  = 2'b01;
    assign axi_req_o.ar.lock   = '0;
    assign axi_req_o.ar.cache  = '0;
    assign axi_req_o.ar.qos    = '0;
    assign axi_req_o.ar.id     = '0;

    assign axi_req_o.r_ready   = 1'b1;

    assign data_be = be;
    assign data_wdata = wdata;

    assign dreq_o.ex = areq_i.fetch_exception;

    assign addr = (state_q==FLUSH) ? cnt_q : vaddr_d[ICACHE_INDEX_WIDTH-1:ICACHE_BYTE_OFFSET];

    // ------------------
    // Cache Ctrl
    // ------------------
    // for bypassing we use the existing infrastructure of the cache
    // but on every access we are re-fetching the cache-line
    always_comb begin : cache_ctrl
        // default assignments
        state_d      = state_q;
        cnt_d        = cnt_q;
        vaddr_d      = vaddr_q;
        tag_d        = tag_q;
        evict_way_d  = evict_way_q;
        flushing_d   = flushing_q;
        burst_cnt_d  = burst_cnt_q;

        dreq_o.vaddr = vaddr_q;

        req          = '0;
        vld_req      = '0;
        we           = 1'b0;
        be           = '0;
        wdata        = '0;
        tag_wdata    = '0;
        dreq_o.ready = 1'b0;
        tag          = areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH];
        dreq_o.valid = 1'b0;
        update_lfsr  = 1'b0;
        miss_o       = 1'b0;

        axi_req_o.ar_valid = 1'b0;
        axi_req_o.ar.addr  = '0;

        areq_o.fetch_req = 1'b0;
        areq_o.fetch_vaddr = vaddr_q;

        case (state_q)
            // ~> we are ready to receive a new request
            IDLE: begin
                dreq_o.ready = 1'b1;
                vaddr_d      = dreq_i.vaddr;

                // we are getting a new request
                if (dreq_i.req) begin
                    // request the content of all arrays
                    req     = '1;
                    vld_req = '1;
                    // save the virtual address
                    state_d = TAG_CMP;
                end

                // go to flushing state
                if (flush_i || flushing_q)
                    state_d = FLUSH;

                if (dreq_i.kill_s1)
                    state_d = IDLE;
            end
            // ~> compare the tag
            TAG_CMP, TAG_CMP_SAVED: begin
                areq_o.fetch_req = 1'b1; // request address translation

                // (speculatively) request the content of all arrays
                req     = '1;
                vld_req = '1;

                // use the saved tag
                if (state_q == TAG_CMP_SAVED)
                    tag = tag_q;
                // -------
                // Hit
                // -------
                // disabling the icache just makes it fetch on every request
                if (|hit && areq_i.fetch_valid && (en_i || (state_q != TAG_CMP))) begin
                    dreq_o.ready = 1'b1;
                    dreq_o.valid = 1'b1;
                    vaddr_d      = dreq_i.vaddr;

                    // we've got another request
                    if (dreq_i.req) begin
                        // save the index and stay in compare mode
                        state_d = TAG_CMP;
                    // no new request -> go back to idle
                    end else begin
                        state_d = IDLE;
                    end

                    if (dreq_i.kill_s1)
                        state_d = IDLE;
                // -------
                // Miss
                // -------
                end else begin
                    state_d     = REFILL;
                    // hit gonna be zero in most cases except for when the cache is disabled
                    evict_way_d = hit;
                    // save tag
                    tag_d       = areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH];
                    miss_o      = en_i;
                    // get way which to replace
                    // only if there is no hit we should fall back to real replacement. If there was a hit then
                    // it means we are in bypass mode (!en_i) and should update the cache-line with the most recent
                    // value fetched from memory.
                    if (!(|hit)) begin
                        // all ways are currently full, randomly replace one of them
                        if (repl_w_random) begin
                            evict_way_d = random_way;
                            // shift the lfsr
                            update_lfsr = 1'b1;
                        // there is still one cache-line which is not valid ~> replace that one
                        end else begin
                            evict_way_d[repl_invalid] = 1'b1;
                        end
                    end
                end
                // if we didn't hit on the TLB we need to wait until the request has been completed
                if (!areq_i.fetch_valid) begin
                    state_d = WAIT_ADDRESS_TRANSLATION;
                end
            end
            // ~> wait here for a valid address translation, or on a translation even if the request has been killed
            WAIT_ADDRESS_TRANSLATION, WAIT_ADDRESS_TRANSLATION_KILLED: begin
                areq_o.fetch_req = 1'b1;
                // retry the request if no exception occurred
                if (areq_i.fetch_valid && (state_q == WAIT_ADDRESS_TRANSLATION)) begin
                    if (areq_i.fetch_exception.valid) begin
                        dreq_o.valid = 1'b1;
                        state_d = IDLE;
                    end else begin
                        state_d = REDO_REQ;
                        tag_d = areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH];
                    end
                end else if (areq_i.fetch_valid) begin
                    state_d = IDLE;
                end

                if (dreq_i.kill_s2)
                    state_d = WAIT_ADDRESS_TRANSLATION_KILLED;
            end
            // ~> request a cache-line refill
            REFILL, WAIT_KILLED_REFILL: begin
                axi_req_o.ar_valid  = 1'b1;
                axi_req_o.ar.addr[ICACHE_INDEX_WIDTH+ICACHE_TAG_WIDTH-1:0] = {tag_q, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_BYTE_OFFSET], {ICACHE_BYTE_OFFSET{1'b0}}};
                burst_cnt_d = '0;

                if (dreq_i.kill_s2)
                    state_d = WAIT_KILLED_REFILL;

                // we need to finish this AXI transfer
                if (axi_resp_i.ar_ready)
                    state_d = (dreq_i.kill_s2 || (state_q == WAIT_KILLED_REFILL)) ? WAIT_KILLED_AXI_R_RESP : WAIT_AXI_R_RESP;
            end
            // ~> wait for the read response
            WAIT_AXI_R_RESP, WAIT_KILLED_AXI_R_RESP: begin

                req     = evict_way_q;
                vld_req = evict_way_q;

                if (axi_resp_i.r_valid) begin
                    we = 1'b1;
                    tag_wdata.tag = tag_q;
                    tag_wdata.valid = 1'b1;
                    wdata[burst_cnt_q] = axi_resp_i.r.data;
                    // enable the right write path
                    be[burst_cnt_q] = '1;
                    // increase burst count
                    burst_cnt_d = burst_cnt_q + 1;
                end

                if (dreq_i.kill_s2)
                    state_d = WAIT_KILLED_AXI_R_RESP;

                if (axi_resp_i.r_valid && axi_resp_i.r.last) begin
                    state_d = (dreq_i.kill_s2) ? IDLE : REDO_REQ;
                end

                if ((state_q == WAIT_KILLED_AXI_R_RESP) && axi_resp_i.r.last && axi_resp_i.r_valid)
                    state_d = IDLE;
            end
            // ~> redo the request,
            REDO_REQ: begin
                req     = '1;
                vld_req = '1;
                tag     = tag_q;
                state_d = TAG_CMP_SAVED; // do tag comparison on the saved tag
            end
            // ~> we are coming here after reset or when a flush was requested
            FLUSH: begin
                cnt_d   = cnt_q + 1;
                vld_req = '1;
                we      = 1;
                // we've finished flushing, go back to idle
                if (cnt_q == ICACHE_NUM_WORD - 1) begin
                    state_d = IDLE;
                    flushing_d = 1'b0;
                end
            end

            default : state_d = IDLE;
        endcase

        // those are the states where we need to wait a little longer until we can safely exit
        if (dreq_i.kill_s2 && !(state_q inside {
                                                    REFILL,
                                                    WAIT_AXI_R_RESP,
                                                    WAIT_KILLED_AXI_R_RESP,
                                                    WAIT_KILLED_REFILL,
                                                    WAIT_ADDRESS_TRANSLATION,
                                                    WAIT_ADDRESS_TRANSLATION_KILLED})
                           && !dreq_o.ready) begin
            state_d = IDLE;
        end

        // if we are killing we can never give a valid response
        if (dreq_i.kill_s2)
            dreq_o.valid = 1'b0;

        if (flush_i) begin
            flushing_d = 1'b1;
            dreq_o.ready = 1'b0; // we are not ready to accept a further request here
        end
        // if we are going to flush -> do not accept any new requests
        if (flushing_q)
            dreq_o.ready = 1'b0;
    end

    lzc #(
        .WIDTH ( ICACHE_SET_ASSOC )
    ) i_lzc (
        .in_i    ( ~way_valid    ),
        .cnt_o   ( repl_invalid  ),
        .empty_o ( repl_w_random )
    );

    // -----------------
    // Replacement LFSR
    // -----------------
    lfsr_8bit #(.WIDTH (ICACHE_SET_ASSOC)) i_lfsr (
        .clk_i          ( clk_i       ),
        .rst_ni         ( rst_ni      ),
        .en_i           ( update_lfsr ),
        .refill_way_oh  ( random_way  ),
        .refill_way_bin (             ) // left open
    );

    always_ff @(posedge clk_i or negedge rst_ni) begin
        if (~rst_ni) begin
            state_q     <= FLUSH;
            cnt_q       <= '0;
            vaddr_q     <= '0;
            tag_q       <= '0;
            evict_way_q <= '0;
            flushing_q  <= 1'b0;
            burst_cnt_q <= '0;;
        end else begin
            state_q     <= state_d;
            cnt_q       <= cnt_d;
            vaddr_q     <= vaddr_d;
            tag_q       <= tag_d;
            evict_way_q <= evict_way_d;
            flushing_q  <= flushing_d;
            burst_cnt_q <= burst_cnt_d;
        end
    end

///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////

//pragma translate_off
`ifndef VERILATOR
initial begin
    assert ($bits(axi_req_o.aw.addr) == 64)
        else $fatal(1, "[icache] Ariane needs a 64-bit bus");
end

// assert that cache only hits on one way
onehot: assert property (
    @(posedge clk_i) disable iff (~rst_ni) $onehot0(hit))
        else $fatal(1, "[icache] Hit should be one-hot encoded");
`endif
//pragma translate_on
endmodule