// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License.  You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 12.02.2018
// ------------------------------
// Instruction Cache
// ------------------------------
import ariane_pkg::*;
import std_cache_pkg::*;

module std_icache (
    input  logic                     clk_i,
    input  logic                     rst_ni,
    input riscv::priv_lvl_t          priv_lvl_i,

    input  logic                     flush_i,          // flush the icache, flush and kill have to be asserted together
    input  logic                     en_i,             // enable icache
    output logic                     miss_o,           // to performance counter
    // address translation requests
    input  icache_areq_i_t           areq_i,
    output icache_areq_o_t           areq_o,
    // data requests
    input  icache_dreq_i_t           dreq_i,
    output icache_dreq_o_t           dreq_o,
    // AXI refill port
    output ariane_axi::req_t         axi_req_o,
    input  ariane_axi::resp_t        axi_resp_i
);

    localparam int unsigned ICACHE_BYTE_OFFSET = $clog2(ICACHE_LINE_WIDTH/8); // 3
    localparam int unsigned ICACHE_NUM_WORD    = 2**(ICACHE_INDEX_WIDTH - ICACHE_BYTE_OFFSET);
    localparam int unsigned NR_AXI_REFILLS     = ($clog2(ICACHE_LINE_WIDTH/64) == 0) ? 1 : $clog2(ICACHE_LINE_WIDTH/64);
    // registers
    enum logic [3:0] { FLUSH, IDLE, TAG_CMP, WAIT_AXI_R_RESP, WAIT_KILLED_REFILL, WAIT_KILLED_AXI_R_RESP,
                       REDO_REQ, TAG_CMP_SAVED, REFILL,
                       WAIT_ADDRESS_TRANSLATION, WAIT_ADDRESS_TRANSLATION_KILLED
                     }                      state_d, state_q;
    logic [$clog2(ICACHE_NUM_WORD)-1:0]     cnt_d, cnt_q;
    logic [NR_AXI_REFILLS-1:0]              burst_cnt_d, burst_cnt_q; // counter for AXI transfers
    logic [63:0]                            vaddr_d, vaddr_q;
    logic [ICACHE_TAG_WIDTH-1:0]            tag_d, tag_q;
    logic [ICACHE_SET_ASSOC-1:0]            evict_way_d, evict_way_q;
    logic                                   flushing_d, flushing_q;

    // signals
    logic [ICACHE_SET_ASSOC-1:0]          req;           // request to data memory
    logic [ICACHE_SET_ASSOC-1:0]          vld_req;       // request to valid/tag memory
    logic [(ICACHE_LINE_WIDTH+7)/8-1:0]   data_be;       // byte enable for data memory
    logic [(2**NR_AXI_REFILLS-1):0][7:0]  be;            // byte enable
    logic [$clog2(ICACHE_NUM_WORD)-1:0]   addr;          // this is a cache-line address, to memory array
    logic                                 we;            // write enable to memory array
    logic [ICACHE_SET_ASSOC-1:0]          hit;           // hit from tag compare
    logic [$clog2(ICACHE_NUM_WORD)-1:0]   idx;           // index in cache line
    logic                                 update_lfsr;   // shift the LFSR
    logic [ICACHE_SET_ASSOC-1:0]          random_way;    // random way select from LFSR
    logic [ICACHE_SET_ASSOC-1:0]          way_valid;     // bit string which contains the zapped valid bits
    logic [$clog2(ICACHE_SET_ASSOC)-1:0]  repl_invalid;  // first non-valid encountered
    logic                                 repl_w_random; // we need to switch repl strategy since all are valid
    logic [ICACHE_TAG_WIDTH-1:0]          tag;           // tag to do comparison with

    // tag + valid bit read/write data
    struct packed {
        logic                 valid;
        logic [ICACHE_TAG_WIDTH-1:0] tag;
    } tag_rdata [ICACHE_SET_ASSOC-1:0], tag_wdata;

    logic [ICACHE_LINE_WIDTH-1:0] data_rdata [ICACHE_SET_ASSOC-1:0], data_wdata;
    logic [(2**NR_AXI_REFILLS-1):0][63:0] wdata;

    for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : sram_block
        // ------------
        // Tag RAM
        // ------------
        sram #(
            // tag + valid bit
            .DATA_WIDTH ( ICACHE_TAG_WIDTH + 1   ),
            .NUM_WORDS  ( ICACHE_NUM_WORD )
        ) tag_sram (
            .clk_i     ( clk_i            ),
            .rst_ni    ( rst_ni           ),
            .req_i     ( vld_req[i]       ),
            .we_i      ( we               ),
            .addr_i    ( addr             ),
            .wdata_i   ( tag_wdata        ),
            .be_i      ( '1               ),
            .rdata_o   ( tag_rdata[i]     )
        );
        // ------------
        // Data RAM
        // ------------
        sram #(
            .DATA_WIDTH ( ICACHE_LINE_WIDTH ),
            .NUM_WORDS  ( ICACHE_NUM_WORD   )
        ) data_sram (
            .clk_i     ( clk_i              ),
            .rst_ni    ( rst_ni             ),
            .req_i     ( req[i]             ),
            .we_i      ( we                 ),
            .addr_i    ( addr               ),
            .wdata_i   ( data_wdata         ),
            .be_i      ( data_be            ),
            .rdata_o   ( data_rdata[i]      )
        );
    end

    // --------------------
    // Tag Comparison and way select
    // --------------------

    // cacheline selected by hit
    logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0] cl_sel;

    assign idx = vaddr_q[ICACHE_BYTE_OFFSET-1:2];

    generate
        for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : g_tag_cmpsel
            assign hit[i] = (tag_rdata[i].tag == tag) ? tag_rdata[i].valid : 1'b0;
            assign cl_sel[i] = (hit[i]) ? data_rdata[i][{idx, 5'b0} +: FETCH_WIDTH] : '0;
            assign way_valid[i] = tag_rdata[i].valid;
        end
    endgenerate

    // OR reduction of selected cachelines
    always_comb begin : p_reduction
        dreq_o.data = cl_sel[0];
        for(int i = 1; i < ICACHE_SET_ASSOC; i++)
            dreq_o.data |= cl_sel[i];
    end

    // ------------------
    // AXI Plumbing
    // ------------------
    // instruction cache is read-only
    assign axi_req_o.aw_valid  = '0;
    assign axi_req_o.aw.addr   = '0;
    assign axi_req_o.aw.prot   = '0;
    assign axi_req_o.aw.region = '0;
    assign axi_req_o.aw.len    = '0;
    assign axi_req_o.aw.size   = 3'b000;
    assign axi_req_o.aw.burst  = 2'b00;
    assign axi_req_o.aw.lock   = '0;
    assign axi_req_o.aw.cache  = '0;
    assign axi_req_o.aw.qos    = '0;
    assign axi_req_o.aw.id     = '0;
    assign axi_req_o.aw.atop   = '0;
    assign axi_req_o.w_valid   = '0;
    assign axi_req_o.w.data    = '0;
    assign axi_req_o.w.strb    = '0;
    assign axi_req_o.w.last    = 1'b0;
    assign axi_req_o.b_ready   = 1'b0;

    // set protection flag, MSB -> instruction fetch, LSB -> privileged access or not
    assign axi_req_o.ar.prot   = {1'b1, 1'b0, (priv_lvl_i == riscv::PRIV_LVL_M)};
    assign axi_req_o.ar.region = '0;
    assign axi_req_o.ar.len    = (2**NR_AXI_REFILLS) - 1;
    assign axi_req_o.ar.size   = 3'b011;
    assign axi_req_o.ar.burst  = 2'b01;
    assign axi_req_o.ar.lock   = '0;
    assign axi_req_o.ar.cache  = '0;
    assign axi_req_o.ar.qos    = '0;
    assign axi_req_o.ar.id     = '0;

    assign axi_req_o.r_ready   = 1'b1;

    assign data_be = be;
    assign data_wdata = wdata;

    assign dreq_o.ex = areq_i.fetch_exception;

    assign addr = (state_q==FLUSH) ? cnt_q : vaddr_d[ICACHE_INDEX_WIDTH-1:ICACHE_BYTE_OFFSET];

    // ------------------
    // Cache Ctrl
    // ------------------
    // for bypassing we use the existing infrastructure of the cache
    // but on every access we are re-fetching the cache-line
    always_comb begin : cache_ctrl
        // default assignments
        state_d      = state_q;
        cnt_d        = cnt_q;
        vaddr_d      = vaddr_q;
        tag_d        = tag_q;
        evict_way_d  = evict_way_q;
        flushing_d   = flushing_q;
        burst_cnt_d  = burst_cnt_q;

        dreq_o.vaddr = vaddr_q;

        req          = '0;
        vld_req      = '0;
        we           = 1'b0;
        be           = '0;
        wdata        = '0;
        tag_wdata    = '0;
        dreq_o.ready = 1'b0;
        tag          = areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH];
        dreq_o.valid = 1'b0;
        update_lfsr  = 1'b0;
        miss_o       = 1'b0;

        axi_req_o.ar_valid = 1'b0;
        axi_req_o.ar.addr  = '0;

        areq_o.fetch_req = 1'b0;
        areq_o.fetch_vaddr = vaddr_q;

        case (state_q)
            // ~> we are ready to receive a new request
            IDLE: begin
                dreq_o.ready = 1'b1;
                vaddr_d      = dreq_i.vaddr;

                // we are getting a new request
                if (dreq_i.req) begin
                    // request the content of all arrays
                    req     = '1;
                    vld_req = '1;
                    // save the virtual address
                    state_d = TAG_CMP;
                end

                // go to flushing state
                if (flush_i || flushing_q)
                    state_d = FLUSH;

                if (dreq_i.kill_s1)
                    state_d = IDLE;
            end
            // ~> compare the tag
            TAG_CMP, TAG_CMP_SAVED: begin
                areq_o.fetch_req = 1'b1; // request address translation

                // (speculatively) request the content of all arrays
                req     = '1;
                vld_req = '1;

                // use the saved tag
                if (state_q == TAG_CMP_SAVED)
                    tag = tag_q;
                // -------
                // Hit
                // -------
                // disabling the icache just makes it fetch on every request
                if (|hit && areq_i.fetch_valid && (en_i || (state_q != TAG_CMP))) begin
                    dreq_o.ready = 1'b1;
                    dreq_o.valid = 1'b1;
                    vaddr_d      = dreq_i.vaddr;

                    // we've got another request
                    if (dreq_i.req) begin
                        // save the index and stay in compare mode
                        state_d = TAG_CMP;
                    // no new request -> go back to idle
                    end else begin
                        state_d = IDLE;
                    end

                    if (dreq_i.kill_s1)
                        state_d = IDLE;
                // -------
                // Miss
                // -------
                end else begin
                    state_d     = REFILL;
                    // hit gonna be zero in most cases except for when the cache is disabled
                    evict_way_d = hit;
                    // save tag
                    tag_d       = areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH];
                    miss_o      = en_i;
                    // get way which to replace
                    // only if there is no hit we should fall back to real replacement. If there was a hit then
                    // it means we are in bypass mode (!en_i) and should update the cache-line with the most recent
                    // value fetched from memory.
                    if (!(|hit)) begin
                        // all ways are currently full, randomly replace one of them
                        if (repl_w_random) begin
                            evict_way_d = random_way;
                            // shift the lfsr
                            update_lfsr = 1'b1;
                        // there is still one cache-line which is not valid ~> replace that one
                        end else begin
                            evict_way_d[repl_invalid] = 1'b1;
                        end
                    end
                end
                // if we didn't hit on the TLB we need to wait until the request has been completed
                if (!areq_i.fetch_valid) begin
                    state_d = WAIT_ADDRESS_TRANSLATION;
                end
            end
            // ~> wait here for a valid address translation, or on a translation even if the request has been killed
            WAIT_ADDRESS_TRANSLATION, WAIT_ADDRESS_TRANSLATION_KILLED: begin
                areq_o.fetch_req = 1'b1;
                // retry the request if no exception occurred
                if (areq_i.fetch_valid && (state_q == WAIT_ADDRESS_TRANSLATION)) begin
                    if (areq_i.fetch_exception.valid) begin
                        dreq_o.valid = 1'b1;
                        state_d = IDLE;
                    end else begin
                        state_d = REDO_REQ;
                        tag_d = areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH];
                    end
                end else if (areq_i.fetch_valid) begin
                    state_d = IDLE;
                end

                if (dreq_i.kill_s2)
                    state_d = WAIT_ADDRESS_TRANSLATION_KILLED;
            end
            // ~> request a cache-line refill
            REFILL, WAIT_KILLED_REFILL: begin
                axi_req_o.ar_valid  = 1'b1;
                axi_req_o.ar.addr[ICACHE_INDEX_WIDTH+ICACHE_TAG_WIDTH-1:0] = {tag_q, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_BYTE_OFFSET], {ICACHE_BYTE_OFFSET{1'b0}}};
                burst_cnt_d = '0;

                if (dreq_i.kill_s2)
                    state_d = WAIT_KILLED_REFILL;

                // we need to finish this AXI transfer
                if (axi_resp_i.ar_ready)
                    state_d = (dreq_i.kill_s2 || (state_q == WAIT_KILLED_REFILL)) ? WAIT_KILLED_AXI_R_RESP : WAIT_AXI_R_RESP;
            end
            // ~> wait for the read response
            WAIT_AXI_R_RESP, WAIT_KILLED_AXI_R_RESP: begin

                req     = evict_way_q;
                vld_req = evict_way_q;

                if (axi_resp_i.r_valid) begin
                    we = 1'b1;
                    tag_wdata.tag = tag_q;
                    tag_wdata.valid = 1'b1;
                    wdata[burst_cnt_q] = axi_resp_i.r.data;
                    // enable the right write path
                    be[burst_cnt_q] = '1;
                    // increase burst count
                    burst_cnt_d = burst_cnt_q + 1;
                end

                if (dreq_i.kill_s2)
                    state_d = WAIT_KILLED_AXI_R_RESP;

                if (axi_resp_i.r_valid && axi_resp_i.r.last) begin
                    state_d = (dreq_i.kill_s2) ? IDLE : REDO_REQ;
                end

                if ((state_q == WAIT_KILLED_AXI_R_RESP) && axi_resp_i.r.last && axi_resp_i.r_valid)
                    state_d = IDLE;
            end
            // ~> redo the request,
            REDO_REQ: begin
                req     = '1;
                vld_req = '1;
                tag     = tag_q;
                state_d = TAG_CMP_SAVED; // do tag comparison on the saved tag
            end
            // ~> we are coming here after reset or when a flush was requested
            FLUSH: begin
                cnt_d   = cnt_q + 1;
                vld_req = '1;
                we      = 1;
                // we've finished flushing, go back to idle
                if (cnt_q == ICACHE_NUM_WORD - 1) begin
                    state_d = IDLE;
                    flushing_d = 1'b0;
                end
            end

            default : state_d = IDLE;
        endcase

        // those are the states where we need to wait a little longer until we can safely exit
        if (dreq_i.kill_s2 && !(state_q inside {
                                                    REFILL,
                                                    WAIT_AXI_R_RESP,
                                                    WAIT_KILLED_AXI_R_RESP,
                                                    WAIT_KILLED_REFILL,
                                                    WAIT_ADDRESS_TRANSLATION,
                                                    WAIT_ADDRESS_TRANSLATION_KILLED})
                           && !dreq_o.ready) begin
            state_d = IDLE;
        end

        // if we are killing we can never give a valid response
        if (dreq_i.kill_s2)
            dreq_o.valid = 1'b0;

        if (flush_i) begin
            flushing_d = 1'b1;
            dreq_o.ready = 1'b0; // we are not ready to accept a further request here
        end
        // if we are going to flush -> do not accept any new requests
        if (flushing_q)
            dreq_o.ready = 1'b0;
    end

    lzc #(
        .WIDTH ( ICACHE_SET_ASSOC )
    ) i_lzc (
        .in_i    ( ~way_valid    ),
        .cnt_o   ( repl_invalid  ),
        .empty_o ( repl_w_random )
    );

    // -----------------
    // Replacement LFSR
    // -----------------
    lfsr_8bit #(.WIDTH (ICACHE_SET_ASSOC)) i_lfsr (
        .clk_i          ( clk_i       ),
        .rst_ni         ( rst_ni      ),
        .en_i           ( update_lfsr ),
        .refill_way_oh  ( random_way  ),
        .refill_way_bin (             ) // left open
    );

    always_ff @(posedge clk_i or negedge rst_ni) begin
        if (~rst_ni) begin
            state_q     <= FLUSH;
            cnt_q       <= '0;
            vaddr_q     <= '0;
            tag_q       <= '0;
            evict_way_q <= '0;
            flushing_q  <= 1'b0;
            burst_cnt_q <= '0;;
        end else begin
            state_q     <= state_d;
            cnt_q       <= cnt_d;
            vaddr_q     <= vaddr_d;
            tag_q       <= tag_d;
            evict_way_q <= evict_way_d;
            flushing_q  <= flushing_d;
            burst_cnt_q <= burst_cnt_d;
        end
    end

///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////

//pragma translate_off
`ifndef VERILATOR
initial begin
    assert ($bits(axi_req_o.aw.addr) == 64)
        else $fatal(1, "[icache] Ariane needs a 64-bit bus");
end

// assert that cache only hits on one way
onehot: assert property (
    @(posedge clk_i) disable iff (~rst_ni) $onehot0(hit))
        else $fatal(1, "[icache] Hit should be one-hot encoded");
`endif
//pragma translate_on
endmodule