// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License.  You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 08.02.2018
// Description: Ariane Instruction Fetch Frontend


import ariane_pkg::*;

module frontend #(
    parameter logic [63:0]     DmBaseAddress = 64'h0 // debug module base address
) (
    input  logic               clk_i,              // Clock
    input  logic               rst_ni,             // Asynchronous reset active low
    input  logic               flush_i,            // flush request for PCGEN
    input  logic               flush_bp_i,         // flush branch prediction
    input  logic               debug_mode_i,
    // global input
    input  logic [63:0]        boot_addr_i,
    // Set a new PC
    // mispredict
    input  branchpredict_t     resolved_branch_i,  // from controller signaling a branch_predict -> update BTB
    // from commit, when flushing the whole pipeline
    input  logic               set_pc_commit_i,    // Take the PC from commit stage
    input  logic [63:0]        pc_commit_i,        // PC of instruction in commit stage
    // CSR input
    input  logic [63:0]        epc_i,              // exception PC which we need to return to
    input  logic               eret_i,             // return from exception
    input  logic [63:0]        trap_vector_base_i, // base of trap vector
    input  logic               ex_valid_i,         // exception is valid - from commit
    input  logic               set_debug_pc_i,     // jump to debug address
    // Instruction Fetch
    input  icache_dreq_o_t     icache_dreq_i,
    output icache_dreq_i_t     icache_dreq_o,
    // instruction output port -> to processor back-end
    output frontend_fetch_t    fetch_entry_o,       // fetch entry containing all relevant data for the ID stage
    output logic               fetch_entry_valid_o, // instruction in IF is valid
    input  logic               fetch_ack_i          // ID acknowledged this instruction
);
    // Registers
    logic [31:0] icache_data_q;
    logic        icache_valid_q;
    logic        icache_ex_valid_q;
    logic        instruction_valid;
    logic [INSTR_PER_FETCH-1:0] instr_is_compressed;

    logic [63:0] icache_vaddr_q;
    // BHT, BTB and RAS prediction
    bht_prediction_t bht_prediction;
    btb_prediction_t btb_prediction;
    ras_t            ras_predict;
    bht_update_t     bht_update;
    btb_update_t     btb_update;
    logic            ras_push, ras_pop;
    logic [63:0]     ras_update;

    // instruction fetch is ready
    logic          if_ready;
    logic [63:0]   npc_d, npc_q; // next PC
    logic npc_rst_load_q; //indicates whether we come out of reset (then we need to load boot_addr_i)
    // -----------------------
    // Ctrl Flow Speculation
    // -----------------------
    // RVI ctrl flow prediction
    logic [INSTR_PER_FETCH-1:0]       rvi_return, rvi_call, rvi_branch,
                                      rvi_jalr, rvi_jump;
    logic [INSTR_PER_FETCH-1:0][63:0] rvi_imm;
    // RVC branching
    logic [INSTR_PER_FETCH-1:0]       is_rvc;
    logic [INSTR_PER_FETCH-1:0]       rvc_branch, rvc_jump, rvc_jr, rvc_return,
                                      rvc_jalr, rvc_call;
    logic [INSTR_PER_FETCH-1:0][63:0] rvc_imm;
    // re-aligned instruction and address (coming from cache - combinationally)
    logic [INSTR_PER_FETCH-1:0][31:0] instr;
    logic [INSTR_PER_FETCH-1:0][63:0] addr;

    logic [63:0]   bp_vaddr;
    logic          bp_valid; // we have a valid branch-prediction
    logic          is_mispredict;
    // branch-prediction which we inject into the pipeline
    branchpredict_sbe_t  bp_sbe;
    // fetch fifo credit system
    logic fifo_valid, fifo_ready, fifo_empty, fifo_pop;
    logic s2_eff_kill, issue_req, s2_in_flight_d, s2_in_flight_q;
    logic [$clog2(FETCH_FIFO_DEPTH):0] fifo_credits_d;
    logic [$clog2(FETCH_FIFO_DEPTH):0] fifo_credits_q;

    // save the unaligned part of the instruction to this ff
    logic [15:0] unaligned_instr_d,   unaligned_instr_q;
    // the last instruction was unaligned
    logic        unaligned_d,         unaligned_q;
    // register to save the unaligned address
    logic [63:0] unaligned_address_d, unaligned_address_q;

    for (genvar i = 0; i < INSTR_PER_FETCH; i ++) begin
        // LSB != 2'b11
        assign instr_is_compressed[i] = ~&icache_data_q[i * 16 +: 2];
    end

    // Soft-realignment to do branch-prediction
    always_comb begin : re_align
        unaligned_d = unaligned_q;
        unaligned_address_d = unaligned_address_q;
        unaligned_instr_d = unaligned_instr_q;
        instruction_valid = icache_valid_q;

        // 32-bit can contain 2 instructions
        instr[0] = icache_data_q;
        addr[0]  = icache_vaddr_q;

        instr[1] = '0;
        addr[1]  = {icache_vaddr_q[63:2], 2'b10};

        if (icache_valid_q) begin
            // last instruction was unaligned
            if (unaligned_q) begin
                instr[0] = {icache_data_q[15:0], unaligned_instr_q};
                addr[0] = unaligned_address_q;

                unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
                unaligned_instr_d = icache_data_q[31:16]; // save the upper bits for next cycle

                // check if this is instruction is still unaligned e.g.: it is not compressed
                // if its compressed re-set unaligned flag
                // for 32 bit we can simply check the next instruction and whether it is compressed or not
                // if it is compressed the next fetch will contain an aligned instruction
                if (instr_is_compressed[1]) begin
                    unaligned_d = 1'b0;
                    instr[1] = {16'b0, icache_data_q[31:16]};
                end
            end else if (instr_is_compressed[0]) begin // instruction zero is RVC
                // is instruction 1 also compressed
                // yes? -> no problem, no -> we've got an unaligned instruction
                if (instr_is_compressed[1]) begin
                    instr[1] = {16'b0, icache_data_q[31:16]};
                end else begin
                    unaligned_instr_d = icache_data_q[31:16];
                    unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
                    unaligned_d = 1'b1;
                end
            end // else -> normal fetch
        end

        // we started to fetch on a unaligned boundary with a whole instruction -> wait until we've
        // received the next instruction
        if (icache_valid_q && icache_vaddr_q[1] && !instr_is_compressed[1]) begin
            instruction_valid = 1'b0;
            unaligned_d = 1'b1;
            unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
            unaligned_instr_d = icache_data_q[31:16];
        end

        // if we killed the consecutive fetch we are starting on a clean slate
        if (icache_dreq_o.kill_s2) begin
            unaligned_d = 1'b0;
        end
    end


    logic [INSTR_PER_FETCH:0] taken;
    // control front-end + branch-prediction
    always_comb begin : frontend_ctrl
        automatic logic take_rvi_cf; // take the control flow change (non-compressed)
        automatic logic take_rvc_cf; // take the control flow change (compressed)

        take_rvi_cf       = 1'b0;
        take_rvc_cf       = 1'b0;
        ras_pop           = 1'b0;
        ras_push          = 1'b0;
        ras_update        = '0;
        taken             = '0;
        take_rvi_cf       = 1'b0;

        bp_vaddr          = '0;    // predicted address
        bp_valid          = 1'b0;  // prediction is valid

        bp_sbe.cf_type    = RAS;

        // only predict if the response is valid
        if (instruction_valid) begin
            // look at instruction 0, 1, 2, ...
            for (int unsigned i = 0; i < INSTR_PER_FETCH; i++) begin
                // only speculate if the previous instruction was not taken
                if (!taken[i]) begin
                    // function call
                    ras_push = rvi_call[i] | rvc_call[i];
                    ras_update = addr[i] + (rvc_call[i] ? 2 : 4);

                    // Branch Prediction - **speculative**
                    if (rvi_branch[i] || rvc_branch[i]) begin
                        bp_sbe.cf_type = BHT;
                        // dynamic prediction valid?
                        if (bht_prediction.valid) begin
                            take_rvi_cf = rvi_branch[i] & (bht_prediction.taken | bht_prediction.strongly_taken);
                            take_rvc_cf = rvc_branch[i] & (bht_prediction.taken | bht_prediction.strongly_taken);
                        // default to static prediction
                        end else begin
                            // set if immediate is negative - static prediction
                            take_rvi_cf = rvi_branch[i] & rvi_imm[i][63];
                            take_rvc_cf = rvc_branch[i] & rvc_imm[i][63];
                        end
                    end

                    // unconditional jumps
                    if (rvi_jump[i] || rvc_jump[i]) begin
                        take_rvi_cf = rvi_jump[i];
                        take_rvc_cf = rvc_jump[i];
                    end

                    // to take this jump we need a valid prediction target **speculative**
                    if ((rvi_jalr[i] || rvc_jalr[i]) && ~(rvi_call[i] || rvc_call[i])) begin
                        bp_sbe.cf_type = BTB;
                        if (btb_prediction.valid) begin
                            bp_vaddr = btb_prediction.target_address;
                            taken[i+1] = 1'b1;
                        end
                    end

                    // is it a return and the RAS contains a valid prediction? **speculative**
                    if ((rvi_return[i] || rvc_return[i]) && ras_predict.valid) begin
                        bp_vaddr = ras_predict.ra;
                        ras_pop = 1'b1;
                        taken[i+1] = 1'b1;
                        bp_sbe.cf_type = RAS;
                    end

                    if (take_rvi_cf) begin
                        taken[i+1] = 1'b1;
                        bp_vaddr = addr[i] + rvi_imm[i];
                    end

                    if (take_rvc_cf) begin
                        taken[i+1] = 1'b1;
                        bp_vaddr = addr[i] + rvc_imm[i];
                    end

                    // we are not interested in the lower instruction
                    if (icache_vaddr_q[1]) begin
                        taken[1] = 1'b0;
                        // TODO(zarubaf): that seems to be overly pessimistic
                        ras_pop = 1'b0;
                        ras_push = 1'b0;
                    end
                end
            end
        end

        bp_valid = |taken;
        // assemble scoreboard entry
        bp_sbe.valid = bp_valid;
        bp_sbe.predict_address = bp_vaddr;
        bp_sbe.predict_taken = bp_valid;
    end

    assign is_mispredict = resolved_branch_i.valid & resolved_branch_i.is_mispredict;
    // we mis-predicted so kill the icache request and the fetch queue
    assign icache_dreq_o.kill_s1 = is_mispredict | flush_i;
    // if we have a valid branch-prediction we need to kill the last cache request
    assign icache_dreq_o.kill_s2 = icache_dreq_o.kill_s1 | bp_valid;
    assign fifo_valid = icache_valid_q;

    // ----------------------------------------
    // Update Control Flow Predictions
    // ----------------------------------------
    // BHT
    assign bht_update.valid = resolved_branch_i.valid & (resolved_branch_i.cf_type == BHT);
    assign bht_update.pc    = resolved_branch_i.pc;
    assign bht_update.mispredict = resolved_branch_i.is_mispredict;
    assign bht_update.taken = resolved_branch_i.is_taken;
    // BTB
    assign btb_update.valid = resolved_branch_i.valid & (resolved_branch_i.cf_type == BTB);
    assign btb_update.pc    = resolved_branch_i.pc;
    assign btb_update.target_address = resolved_branch_i.target_address;
    assign btb_update.clear = resolved_branch_i.clear;

    // -------------------
    // Next PC
    // -------------------
    // next PC (NPC) can come from (in order of precedence):
    // 0. Default assignment
    // 1. Branch Predict taken
    // 2. Control flow change request (misprediction)
    // 3. Return from environment call
    // 4. Exception/Interrupt
    // 5. Pipeline Flush because of CSR side effects
    // Mis-predict handling is a little bit different
    // select PC a.k.a PC Gen
    always_comb begin : npc_select
        automatic logic [63:0] fetch_address;

        // check whether we come out of reset
        // this is a workaround. some tools have issues
        // having boot_addr_i in the asynchronous
        // reset assignment to npc_q, even though
        // boot_addr_i will be assigned a constant
        // on the top-level.
        if (npc_rst_load_q) begin
            npc_d         = boot_addr_i;
            fetch_address = boot_addr_i;
        end else begin
            fetch_address    = npc_q;
            // keep stable by default
            npc_d            = npc_q;
        end

        // -------------------------------
        // 1. Branch Prediction
        // -------------------------------
        if (bp_valid) begin
            fetch_address = bp_vaddr;
            npc_d = bp_vaddr;
        end
        // -------------------------------
        // 0. Default assignment
        // -------------------------------
        if (if_ready) begin
            npc_d = {fetch_address[63:2], 2'b0}  + 'h4;
        end
        // -------------------------------
        // 2. Control flow change request
        // -------------------------------
        if (is_mispredict) begin
            npc_d = resolved_branch_i.target_address;
        end
        // -------------------------------
        // 3. Return from environment call
        // -------------------------------
        if (eret_i) begin
            npc_d = epc_i;
        end
        // -------------------------------
        // 4. Exception/Interrupt
        // -------------------------------
        if (ex_valid_i) begin
            npc_d    = trap_vector_base_i;
        end
        // -----------------------------------------------
        // 5. Pipeline Flush because of CSR side effects
        // -----------------------------------------------
        // On a pipeline flush start fetching from the next address
        // of the instruction in the commit stage
        if (set_pc_commit_i) begin
            // we came here from a flush request of a CSR instruction or AMO,
            // as CSR or AMO instructions do not exist in a compressed form
            // we can unconditionally do PC + 4 here
            // TODO(zarubaf) This adder can at least be merged with the one in the csr_regfile stage
            npc_d    = pc_commit_i + 64'h4;
        end
        // -------------------------------
        // 6. Debug
        // -------------------------------
        // enter debug on a hard-coded base-address
        if (set_debug_pc_i) begin
            npc_d = DmBaseAddress + dm::HaltAddress;
        end

        icache_dreq_o.vaddr = fetch_address;
    end

    // -------------------
    // Credit-based fetch FIFO flow ctrl
    // -------------------
    assign fifo_credits_d       =  (flush_i) ? FETCH_FIFO_DEPTH :
                                               fifo_credits_q + fifo_pop + s2_eff_kill - issue_req;

    // check whether there is a request in flight that is being killed now
    // if this is the case, we need to increment the credit by 1
    assign s2_eff_kill         = s2_in_flight_q & icache_dreq_o.kill_s2;
    assign s2_in_flight_d      = (flush_i)             ? 1'b0 :
                                 (issue_req)           ? 1'b1 :
                                 (icache_dreq_i.valid) ? 1'b0 :
                                                         s2_in_flight_q;

    // only enable counter if current request is not being killed
    assign issue_req           = if_ready & (~icache_dreq_o.kill_s1);
    assign fifo_pop            = fetch_ack_i & fetch_entry_valid_o;
    assign fifo_ready          = (|fifo_credits_q);
    assign if_ready            =  icache_dreq_i.ready & fifo_ready;
    assign icache_dreq_o.req   =  fifo_ready;
    assign fetch_entry_valid_o = ~fifo_empty;


//pragma translate_off
`ifndef VERILATOR
  fetch_fifo_credits0 : assert property (
      @(posedge clk_i) disable iff (~rst_ni) (fifo_credits_q <= FETCH_FIFO_DEPTH))
         else $fatal(1,"[frontend] fetch fifo credits must be <= FETCH_FIFO_DEPTH!");
    initial begin
        assert (FETCH_FIFO_DEPTH <= 8) else $fatal(1,"[frontend] fetch fifo deeper than 8 not supported");
        assert (FETCH_WIDTH == 32) else $fatal(1,"[frontend] fetch width != not supported");
    end
`endif
//pragma translate_on

    always_ff @(posedge clk_i or negedge rst_ni) begin
        if (~rst_ni) begin
            npc_q                <= '0;
            npc_rst_load_q       <= 1'b1;
            icache_data_q        <= '0;
            icache_valid_q       <= 1'b0;
            icache_vaddr_q       <= 'b0;
            icache_ex_valid_q    <= 1'b0;
            unaligned_q          <= 1'b0;
            unaligned_address_q  <= '0;
            unaligned_instr_q    <= '0;
            fifo_credits_q       <= FETCH_FIFO_DEPTH;
            s2_in_flight_q       <= 1'b0;
        end else begin
            npc_rst_load_q       <= 1'b0;
            npc_q                <= npc_d;
            icache_data_q        <= icache_dreq_i.data;
            icache_valid_q       <= icache_dreq_i.valid;
            icache_vaddr_q       <= icache_dreq_i.vaddr;
            icache_ex_valid_q    <= icache_dreq_i.ex.valid;
            unaligned_q          <= unaligned_d;
            unaligned_address_q  <= unaligned_address_d;
            unaligned_instr_q    <= unaligned_instr_d;
            fifo_credits_q       <= fifo_credits_d;
            s2_in_flight_q       <= s2_in_flight_d;
        end
    end

    ras #(
        .DEPTH  ( RAS_DEPTH   )
    ) i_ras (
        .push_i ( ras_push    ),
        .pop_i  ( ras_pop     ),
        .data_i ( ras_update  ),
        .data_o ( ras_predict ),
        .*
    );

    btb #(
        .NR_ENTRIES       ( BTB_ENTRIES      )
    ) i_btb (
        .clk_i,
        .rst_ni,
        .flush_i          ( flush_bp_i       ),
        .debug_mode_i,
        .vpc_i            ( icache_vaddr_q   ),
        .btb_update_i     ( btb_update       ),
        .btb_prediction_o ( btb_prediction   )
    );

    bht #(
        .NR_ENTRIES       ( BHT_ENTRIES      )
    ) i_bht (
        .clk_i,
        .rst_ni,
        .flush_i          ( flush_bp_i       ),
        .debug_mode_i,
        .vpc_i            ( icache_vaddr_q   ),
        .bht_update_i     ( bht_update       ),
        .bht_prediction_o ( bht_prediction   )
    );

    for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin
        instr_scan i_instr_scan (
            .instr_i      ( instr[i]      ),
            .is_rvc_o     ( is_rvc[i]     ),
            .rvi_return_o ( rvi_return[i] ),
            .rvi_call_o   ( rvi_call[i]   ),
            .rvi_branch_o ( rvi_branch[i] ),
            .rvi_jalr_o   ( rvi_jalr[i]   ),
            .rvi_jump_o   ( rvi_jump[i]   ),
            .rvi_imm_o    ( rvi_imm[i]    ),
            .rvc_branch_o ( rvc_branch[i] ),
            .rvc_jump_o   ( rvc_jump[i]   ),
            .rvc_jr_o     ( rvc_jr[i]     ),
            .rvc_return_o ( rvc_return[i] ),
            .rvc_jalr_o   ( rvc_jalr[i]   ),
            .rvc_call_o   ( rvc_call[i]   ),
            .rvc_imm_o    ( rvc_imm[i]    )
        );
    end


    fifo_v2 #(
        .DEPTH        (  8                   ),
        .dtype        ( frontend_fetch_t     )
    ) i_fetch_fifo (
        .clk_i       ( clk_i                 ),
        .rst_ni      ( rst_ni                ),
        .flush_i     ( flush_i               ),
        .testmode_i  ( 1'b0                  ),
        .full_o      (                       ),
        .empty_o     ( fifo_empty            ),
        .alm_full_o  (                       ),
        .alm_empty_o (                       ),
        .data_i      ( {icache_vaddr_q, icache_data_q, bp_sbe, taken[INSTR_PER_FETCH:1], icache_ex_valid_q} ),
        .push_i      ( fifo_valid            ),
        .data_o      ( fetch_entry_o         ),
        .pop_i       ( fifo_pop              )
    );

endmodule