// Copyright 2020 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51

// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
// Florian Zaruba <zarubaf@iis.ee.ethz.ch>

`include "common_cells/registers.svh"
`include "common_cells/assertions.svh"

/// A simple single-line cache private to each port.
module snitch_icache_l0 import snitch_icache_pkg::*; #(
    parameter config_t CFG = '0,
    parameter int unsigned L0_ID = 0
) (
    input  logic clk_i,
    input  logic rst_ni,
    input  logic flush_valid_i,

    input  logic                      enable_prefetching_i,
    output icache_events_t            icache_events_o,

    input  logic [CFG.FETCH_AW-1:0]   in_addr_i,
    input  logic                      in_valid_i,
    output logic [CFG.FETCH_DW-1:0]   in_data_o,
    output logic                      in_ready_o,
    output logic                      in_error_o,

    output logic [CFG.FETCH_AW-1:0]      out_req_addr_o,
    output logic [CFG.ID_WIDTH_REQ-1:0]  out_req_id_o,
    output logic                         out_req_valid_o,
    input  logic                         out_req_ready_i,

    input  logic [CFG.LINE_WIDTH-1:0]    out_rsp_data_i,
    input  logic                         out_rsp_error_i,
    input  logic [CFG.ID_WIDTH_RESP-1:0] out_rsp_id_i,
    input  logic                         out_rsp_valid_i,
    output logic                         out_rsp_ready_o
);

  typedef logic [CFG.FETCH_AW-1:0] addr_t;
  typedef struct packed {
    logic [CFG.L0_TAG_WIDTH-1:0] tag;
    logic                        vld;
  } tag_t;

  logic [CFG.L0_TAG_WIDTH-1:0] addr_tag, addr_tag_prefetch;

  tag_t [CFG.L0_LINE_COUNT-1:0] tag;
  logic [CFG.L0_LINE_COUNT-1:0][CFG.LINE_WIDTH-1:0] data;

  logic [CFG.L0_LINE_COUNT-1:0] hit, hit_early, hit_prefetch;
  logic hit_early_is_onehot;
  logic hit_any;
  logic hit_prefetch_any;
  logic miss;

  logic [CFG.L0_LINE_COUNT-1:0] evict_strb;
  logic [CFG.L0_LINE_COUNT-1:0] flush_strb;
  logic [CFG.L0_LINE_COUNT-1:0] validate_strb;

  typedef struct packed {
    logic vld;
    logic [CFG.FETCH_AW-1:0] addr;
  } prefetch_req_t;

  logic latch_prefetch, last_cycle_was_prefetch_q;
  prefetch_req_t prefetch_req_q, prefetch_req_d, prefetcher_out;

  // Holds the onehot signal for the line being refilled at the moment
  logic [CFG.L0_LINE_COUNT-1:0] pending_line_refill_q;
  logic pending_refill_q, pending_refill_d;

  logic evict_req;
  logic last_cycle_was_miss_q;

  `FF(last_cycle_was_miss_q, miss, '0)
  `FF(last_cycle_was_prefetch_q, latch_prefetch, '0)

  logic evict_because_miss, evict_because_prefetch;

  typedef struct packed {
    logic                    is_prefetch;
    logic [CFG.FETCH_AW-1:0] addr;
  } req_t;

  req_t refill, prefetch;
  logic refill_valid, prefetch_valid;
  logic refill_ready, prefetch_ready;
  req_t out_req;

  assign evict_because_miss = miss & ~last_cycle_was_miss_q;
  assign evict_because_prefetch = latch_prefetch & ~last_cycle_was_prefetch_q;

  assign evict_req = evict_because_miss | evict_because_prefetch;

  assign addr_tag = in_addr_i >> CFG.LINE_ALIGN;

  // ------------
  // Tag Compare
  // ------------
  for (genvar i = 0; i < CFG.L0_LINE_COUNT; i++) begin : gen_cmp_fetch
    assign hit_early[i] = tag[i].vld & (tag[i].tag[CFG.L0_EARLY_TAG_WIDTH-1:0] == addr_tag[CFG.L0_EARLY_TAG_WIDTH-1:0]);
    // The two signals calculate the same.
    if (CFG.L0_TAG_WIDTH == CFG.L0_EARLY_TAG_WIDTH) begin : gen_hit_assign
      assign hit[i] = hit_early[i];
    // Compare the rest of the tag.
    end else begin : gen_hit
      assign hit[i] = hit_early[i] & (tag[i].tag[CFG.L0_TAG_WIDTH-1:CFG.L0_EARLY_TAG_WIDTH] == addr_tag[CFG.L0_TAG_WIDTH-1:CFG.L0_EARLY_TAG_WIDTH]);
    end
    assign hit_prefetch[i] = tag[i].vld & (tag[i].tag == addr_tag_prefetch);
  end

  assign hit_any = |hit;
  assign hit_prefetch_any = |hit_prefetch;
  assign miss = ~hit_any & in_valid_i & ~pending_refill_q;

  logic clk_inv;
  tc_clk_inverter i_clk_inv (
    .clk_i (clk_i),
    .clk_o (clk_inv)
  );

  for (genvar i = 0; i < CFG.L0_LINE_COUNT; i++) begin : gen_array
    // Tag Array
    always_ff @(posedge clk_i or negedge rst_ni) begin
      if (!rst_ni) begin
        tag[i].vld <= 0;
        tag[i].tag <= 0;
      end else begin
        if (evict_strb[i]) begin
          tag[i].vld <= 1'b0;
          tag[i].tag <= evict_because_prefetch ? addr_tag_prefetch : addr_tag;
        end else if (validate_strb[i]) begin
          tag[i].vld <= 1'b1;
        end
        if (flush_strb[i]) begin
          tag[i].vld <= 1'b0;
        end
      end
    end
    if (CFG.EARLY_LATCH) begin : gen_latch
      logic clk_vld;
      tc_clk_gating i_clk_gate (
        .clk_i     (clk_inv         ),
        .en_i      (validate_strb[i]),
        .test_en_i (1'b0            ),
        .clk_o     (clk_vld         )
      );
      // Data Array
      /* verilator lint_off NOLATCH */
      always_latch begin
        if (clk_vld) begin
          data[i] <= out_rsp_data_i;
        end
      end
      /* verilator lint_on NOLATCH */
    end else begin : gen_ff
      `FFLNR(data[i], out_rsp_data_i, validate_strb[i], clk_i)
    end
  end

  // ----
  // HIT
  // ----
  // we hit in the cache and there was a unique hit.
  assign in_ready_o = hit_any & hit_early_is_onehot;

  logic [CFG.LINE_WIDTH-1:0] ins_data;
  always_comb begin : data_muxer
    ins_data = '0;
    for (int unsigned i = 0; i < CFG.L0_LINE_COUNT; i++) begin
      ins_data |= {CFG.LINE_WIDTH{hit_early[i]}} & data[i];
    end
    in_data_o = ins_data >> (in_addr_i[CFG.LINE_ALIGN-1:CFG.FETCH_ALIGN] * CFG.FETCH_DW);
  end

  // Check whether we had an early multi-hit (e.g., the portion of the tag matched
  // multiple entries in the tag array)
  if (CFG.L0_TAG_WIDTH != CFG.L0_EARLY_TAG_WIDTH) begin : gen_multihit_detection
    onehot #(
      .Width (CFG.L0_LINE_COUNT)
    ) i_onehot_hit_early (
      .d_i (hit_early),
      .is_onehot_o (hit_early_is_onehot)
    );
  end else begin : gen_no_multihit_detection
    assign hit_early_is_onehot = 1'b1;
  end

  // -------
  // Evictor
  // -------
  logic [$clog2(CFG.L0_LINE_COUNT)-1:0] cnt_d, cnt_q;

  always_comb begin : evictor
    evict_strb = '0;
    cnt_d = cnt_q;

    // Round-Robin
    if (evict_req) begin
      evict_strb = 1 << cnt_q;
      cnt_d = cnt_q + 1;
      if (evict_strb == hit_early) begin
        evict_strb = 1 << cnt_d;
        cnt_d = cnt_q + 2;
      end
    end
  end

  always_comb begin : flush
    flush_strb = '0;
    // Check whether we encountered a multi-hit condition and
    // evict the offending entry.
    if (hit_any && !hit_early_is_onehot) begin
      // We want to evict all entries which hit with the early tag
      // but didn't hit in the final comparison.
      flush_strb = ~hit & hit_early;
    end
    if (flush_valid_i) flush_strb = '1;
  end

  `FF(cnt_q, cnt_d, '0)

  // -------------
  // Miss Handling
  // -------------
  assign refill.addr = addr_tag << CFG.LINE_ALIGN;
  assign refill.is_prefetch = 1'b0;
  assign refill_valid = miss;

  `FFLNR(pending_line_refill_q, evict_strb, evict_req, clk_i)
  `FF(pending_refill_q, pending_refill_d, '0)

  always_comb begin
    pending_refill_d = pending_refill_q;
    // re-set condition
    if (pending_refill_q) begin
      if (out_rsp_valid_i & out_rsp_ready_o) begin
        pending_refill_d = 1'b0;
      end
    // set condition
    end else begin
      if (refill_valid && refill_ready) begin
        pending_refill_d = 1'b1;
      end
      if (latch_prefetch) begin
        pending_refill_d = 1'b1;
      end
    end
  end

  assign validate_strb = out_rsp_valid_i ? pending_line_refill_q : '0;
  assign out_rsp_ready_o = 1'b1;

  assign in_error_o = '0;

  assign out_req_addr_o = out_req.addr;
  assign out_req_id_o = {L0_ID, out_req.is_prefetch};

  // Priority arbitrate requests.
  always_comb begin
    out_req = prefetch;
    out_req_valid_o = prefetch_valid;
    prefetch_ready = out_req_ready_i;
    refill_ready = 1'b0;

    if (refill_valid) begin
      out_req_valid_o = refill_valid;
      out_req = refill;
      refill_ready = out_req_ready_i;
      prefetch_ready = 1'b0;
    end
  end

  // -------------
  // Pre-fetching
  // -------------
  // Generate a prefetch request if the cache hits and we haven't
  // pre-fetched the line yet and there is no other refill in progress.
  assign prefetcher_out.vld = enable_prefetching_i & hit_any & ~hit_prefetch_any & ~pending_refill_q;

  localparam FETCH_PKTS = CFG.LINE_WIDTH/32;
  logic [FETCH_PKTS-1:0] is_branch_taken;
  logic [FETCH_PKTS-1:0] is_jal;
  logic [FETCH_PKTS-1:0] mask;
  // make sure that we only look at the packets which are of interest to
  assign mask = '1 << in_addr_i[CFG.LINE_ALIGN-1:2];

  // Instruction aware pre-fetching
  for (genvar i = 0; i < FETCH_PKTS; i++) begin : gen_pre_decode
    // iterate over the fetch packets (32 bits per instruction)
    always_comb begin
      is_branch_taken[i] = 1'b0;
      is_jal[i] = 1'b0;
      if (hit_early_is_onehot) begin
        unique casez (ins_data[i*32+:32])
          // static prediction
          riscv_instr::BEQ,
          riscv_instr::BNE,
          riscv_instr::BLT,
          riscv_instr::BGE,
          riscv_instr::BLTU,
          riscv_instr::BGEU: begin
            // look at the sign bit of the immediate field
            // backward branches (immediate negative) taken
            // forward branches not taken
            is_branch_taken[i] = ins_data[i*32+31];
          end
          riscv_instr::JAL: begin
            is_jal[i] = 1'b1;
          end
          // we can't do anything about the JALR case as we don't
          // know the destination.
          default:;
        endcase
      end
    end
  end

  logic [$clog2(FETCH_PKTS)-1:0] taken_idx;
  logic no_prefetch;
  logic [$clog2(CFG.LINE_WIDTH)-1:0] ins_idx;
  assign ins_idx = 32*taken_idx;
  // Find first taken branch
  lzc #(
    .WIDTH(FETCH_PKTS),
    .MODE(0)
  ) i_lzc_branch (
    // look at branches and jals
    .in_i (mask & (is_branch_taken | is_jal)),
    .cnt_o (taken_idx),
    .empty_o (no_prefetch)
  );

  addr_t base_addr, offset, uj_imm, sb_imm;
  logic [CFG.LINE_ALIGN-1:0] base_offset;
  assign base_offset = taken_idx << 2;
  assign uj_imm = $signed({ins_data[ins_idx+31], ins_data[ins_idx+12+:8], ins_data[ins_idx+20], ins_data[ins_idx+21+:10], 1'b0});
  assign sb_imm = $signed({ins_data[ins_idx+31], ins_data[ins_idx+7], ins_data[ins_idx+25+:6], ins_data[ins_idx+8+:4], 1'b0});

  // next address calculation
  always_comb begin
    // default is next line predictor
    base_addr = no_prefetch ? in_addr_i : {in_addr_i >> CFG.LINE_ALIGN, base_offset};
    offset = (1 << CFG.LINE_ALIGN);
    // If the cache-line contains a taken branch, compute the pre-fetch address with the jump's offset.
    unique case ({is_branch_taken[taken_idx] & ~no_prefetch, is_jal[taken_idx] & ~no_prefetch})
      // JAL: UJ Immediate
      2'b01: offset = uj_imm;
      // Branch: // SB Immediate
      2'b10: offset = sb_imm;
      default:;
    endcase
  end

  assign prefetcher_out.addr = ($signed(base_addr) + offset) >> CFG.LINE_ALIGN << CFG.LINE_ALIGN;

  // check whether cache-line we want to pre-fetch is already present
  assign addr_tag_prefetch = prefetcher_out.addr >> CFG.LINE_ALIGN;

  assign latch_prefetch = prefetcher_out.vld & ~prefetch_req_q.vld;

  always_comb begin
      prefetch_req_d = prefetch_req_q;

      if (prefetch_ready) prefetch_req_d.vld = 1'b0;

      if (latch_prefetch) begin
          prefetch_req_d.vld = 1'b1;
          prefetch_req_d.addr = prefetcher_out.addr;
      end
  end

  assign prefetch.is_prefetch = 1'b1;
  assign prefetch.addr = prefetch_req_q.addr;
  assign prefetch_valid = prefetch_req_q.vld;

  `FF(prefetch_req_q.vld, prefetch_req_d.vld, '0)
  `FF(prefetch_req_q.addr, prefetch_req_d.addr, '0)

  // ------------------
  // Performance Events
  // ------------------
  always_comb begin
    icache_events_o = '0;
    icache_events_o.l0_miss = miss;
    icache_events_o.l0_hit = hit_any & in_valid_i;
    icache_events_o.l0_prefetch = prefetcher_out.vld;
    icache_events_o.l0_double_hit = hit_any & ~hit_early_is_onehot & in_valid_i;
  end

  // ----------
  // Assertions
  // ----------
  `ASSERT(HitOnehot, $onehot0(hit))
  // make sure only one signal is high and the conditions are mutual exclusive
  `ASSERT(ExclusiveEvict, $onehot0({evict_because_miss, evict_because_prefetch}))
  // request must be stable
  `ASSERT(InstReqStable, in_valid_i && !in_ready_o |=> in_valid_i)
  `ASSERT(InstReqDataStable, in_valid_i && !in_ready_o |=> $stable(in_addr_i))

  `ASSERT(RefillReqStable, out_req_valid_o && !out_req_ready_i |=> out_req_valid_o)
  `ASSERT(RefillReqDataStable, out_req_valid_o && !out_req_ready_i |=> $stable(out_req_addr_o) && $stable(out_req_id_o))

  `ASSERT(RefillRspStable, out_rsp_valid_i && !out_rsp_ready_o |=> out_rsp_valid_i)
  `ASSERT(RefillRspDataStable, out_rsp_valid_i && !out_rsp_ready_o |=> $stable(out_rsp_data_i) && $stable(out_rsp_error_i) && $stable(out_rsp_id_i))
  // make sure we observe a double hit condition
  `COVER(HitEarlyNotOnehot, hit |-> $onehot(hit_early))

endmodule