// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License.  You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 08.04.2017
// Description: Issues instruction from the scoreboard and fetches the operands
//              This also includes all the forwarding logic

import ariane_pkg::*;

module issue_read_operands #(
    parameter int unsigned NR_COMMIT_PORTS = 2
)(
    input  logic                                   clk_i,    // Clock
    input  logic                                   rst_ni,   // Asynchronous reset active low
    // flush
    input  logic                                   flush_i,
    // coming from rename
    input  scoreboard_entry_t                      issue_instr_i,
    input  logic                                   issue_instr_valid_i,
    output logic                                   issue_ack_o,
    // lookup rd in scoreboard
    output logic [REG_ADDR_SIZE-1:0]               rs1_o,
    input  logic [63:0]                            rs1_i,
    input  logic                                   rs1_valid_i,
    output logic [REG_ADDR_SIZE-1:0]               rs2_o,
    input  logic [63:0]                            rs2_i,
    input  logic                                   rs2_valid_i,
    output logic [REG_ADDR_SIZE-1:0]               rs3_o,
    input  logic [FLEN-1:0]                        rs3_i,
    input  logic                                   rs3_valid_i,
    // get clobber input
    input  fu_t [2**REG_ADDR_SIZE:0]               rd_clobber_gpr_i,
    input  fu_t [2**REG_ADDR_SIZE:0]               rd_clobber_fpr_i,
    // To FU, just single issue for now
    output fu_data_t                               fu_data_o,
    output logic [63:0]                            pc_o,
    output logic                                   is_compressed_instr_o,
    // ALU 1
    input  logic                                   flu_ready_i,      // Fixed latency unit ready to accept a new request
    output logic                                   alu_valid_o,      // Output is valid
    // Branches and Jumps
    output logic                                   branch_valid_o,   // this is a valid branch instruction
    output branchpredict_sbe_t                     branch_predict_o,
    // LSU
    input  logic                                   lsu_ready_i,      // FU is ready
    output logic                                   lsu_valid_o,      // Output is valid
    // MULT
    output logic                                   mult_valid_o,     // Output is valid
    // FPU
    input  logic                                   fpu_ready_i,      // FU is ready
    output logic                                   fpu_valid_o,      // Output is valid
    output logic [1:0]                             fpu_fmt_o,        // FP fmt field from instr.
    output logic [2:0]                             fpu_rm_o,         // FP rm field from instr.
    // CSR
    output logic                                   csr_valid_o,      // Output is valid
    // commit port
    input  logic [NR_COMMIT_PORTS-1:0][4:0]        waddr_i,
    input  logic [NR_COMMIT_PORTS-1:0][63:0]       wdata_i,
    input  logic [NR_COMMIT_PORTS-1:0]             we_gpr_i,
    input  logic [NR_COMMIT_PORTS-1:0]             we_fpr_i
    // committing instruction instruction
    // from scoreboard
    // input  scoreboard_entry     commit_instr_i,
    // output logic                commit_ack_o
);
    logic stall;   // stall signal, we do not want to fetch any more entries
    logic fu_busy; // functional unit is busy
    logic [63:0] operand_a_regfile, operand_b_regfile;  // operands coming from regfile
    logic [FLEN-1:0] operand_c_regfile; // third operand only from fp regfile
    // output flipflop (ID <-> EX)
    logic [63:0] operand_a_n, operand_a_q,
                 operand_b_n, operand_b_q,
                 imm_n, imm_q;

    logic       alu_valid_n,    alu_valid_q;
    logic       mult_valid_n,   mult_valid_q;
    logic       fpu_valid_n,    fpu_valid_q;
    logic [1:0] fpu_fmt_n,      fpu_fmt_q;
    logic [2:0] fpu_rm_n,       fpu_rm_q;
    logic       lsu_valid_n,    lsu_valid_q;
    logic       csr_valid_n,    csr_valid_q;
    logic       branch_valid_n, branch_valid_q;

    logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q;
    fu_op operator_n, operator_q; // operation to perform
    fu_t  fu_n,       fu_q; // functional unit to use

    // forwarding signals
    logic forward_rs1, forward_rs2, forward_rs3;

    // original instruction stored in tval
    riscv::instruction_t orig_instr;
    assign orig_instr = riscv::instruction_t'(issue_instr_i.ex.tval[31:0]);

    // ID <-> EX registers
    assign fu_data_o.operand_a = operand_a_q;
    assign fu_data_o.operand_b = operand_b_q;
    assign fu_data_o.fu        = fu_q;
    assign fu_data_o.operator  = operator_q;
    assign fu_data_o.trans_id  = trans_id_q;
    assign fu_data_o.imm       = imm_q;
    assign alu_valid_o         = alu_valid_q;
    assign branch_valid_o      = branch_valid_q;
    assign lsu_valid_o         = lsu_valid_q;
    assign csr_valid_o         = csr_valid_q;
    assign mult_valid_o        = mult_valid_q;
    assign fpu_valid_o         = fpu_valid_q;
    assign fpu_fmt_o           = fpu_fmt_q;
    assign fpu_rm_o            = fpu_rm_q;
    // ---------------
    // Issue Stage
    // ---------------

    // select the right busy signal
    // this obviously depends on the functional unit we need
    always_comb begin : unit_busy
        unique case (issue_instr_i.fu)
            NONE:
                fu_busy = 1'b0;
            ALU, CTRL_FLOW, CSR, MULT:
                fu_busy = ~flu_ready_i;
            FPU, FPU_VEC:
                fu_busy = ~fpu_ready_i;
            LOAD, STORE:
                fu_busy = ~lsu_ready_i;
            default:
                fu_busy = 1'b0;
        endcase
    end

    // ---------------
    // Register stage
    // ---------------
    // check that all operands are available, otherwise stall
    // forward corresponding register
    always_comb begin : operands_available
        stall = 1'b0;
        // operand forwarding signals
        forward_rs1 = 1'b0;
        forward_rs2 = 1'b0;
        forward_rs3 = 1'b0; // FPR only
        // poll the scoreboard for those values
        rs1_o = issue_instr_i.rs1;
        rs2_o = issue_instr_i.rs2;
        rs3_o = issue_instr_i.result[REG_ADDR_SIZE-1:0]; // rs3 is encoded in imm field

        // 0. check that we are not using the zimm type in RS1
        //    as this is an immediate we do not have to wait on anything here
        // 1. check if the source registers are clobbered --> check appropriate clobber list (gpr/fpr)
        // 2. poll the scoreboard
        if (~issue_instr_i.use_zimm && (is_rs1_fpr(issue_instr_i.op) ? rd_clobber_fpr_i[issue_instr_i.rs1] != NONE
                                                                     : rd_clobber_gpr_i[issue_instr_i.rs1] != NONE)) begin
            // check if the clobbering instruction is not a CSR instruction, CSR instructions can only
            // be fetched through the register file since they can't be forwarded
            // if the operand is available, forward it. CSRs don't write to/from FPR
            if (rs1_valid_i && (is_rs1_fpr(issue_instr_i.op) ? 1'b1 : rd_clobber_gpr_i[issue_instr_i.rs1] != CSR)) begin
                forward_rs1 = 1'b1;
            end else begin // the operand is not available -> stall
                stall = 1'b1;
            end
        end

        if (is_rs2_fpr(issue_instr_i.op) ? rd_clobber_fpr_i[issue_instr_i.rs2] != NONE
                                         : rd_clobber_gpr_i[issue_instr_i.rs2] != NONE) begin
            // if the operand is available, forward it. CSRs don't write to/from FPR
            if (rs2_valid_i && (is_rs2_fpr(issue_instr_i.op) ? 1'b1 : rd_clobber_gpr_i[issue_instr_i.rs2] != CSR)) begin
                forward_rs2 = 1'b1;
            end else begin // the operand is not available -> stall
                stall = 1'b1;
            end
        end

        if (is_imm_fpr(issue_instr_i.op) && rd_clobber_fpr_i[issue_instr_i.result[REG_ADDR_SIZE-1:0]] != NONE) begin
            // if the operand is available, forward it. CSRs don't write to/from FPR so no need to check
            if (rs3_valid_i) begin
                forward_rs3 = 1'b1;
            end else begin // the operand is not available -> stall
                stall = 1'b1;
            end
        end
    end

    // Forwarding/Output MUX
    always_comb begin : forwarding_operand_select
        // default is regfiles (gpr or fpr)
        operand_a_n = operand_a_regfile;
        operand_b_n = operand_b_regfile;
        // immediates are the third operands in the store case
        // for FP operations, the imm field can also be the third operand from the regfile
        imm_n      = is_imm_fpr(issue_instr_i.op) ? operand_c_regfile : issue_instr_i.result;
        trans_id_n = issue_instr_i.trans_id;
        fu_n       = issue_instr_i.fu;
        operator_n = issue_instr_i.op;
        // or should we forward
        if (forward_rs1) begin
            operand_a_n  = rs1_i;
        end

        if (forward_rs2) begin
            operand_b_n  = rs2_i;
        end

        if (forward_rs3) begin
            imm_n  = rs3_i;
        end

        // use the PC as operand a
        if (issue_instr_i.use_pc) begin
            operand_a_n = issue_instr_i.pc;
        end

        // use the zimm as operand a
        if (issue_instr_i.use_zimm) begin
            // zero extend operand a
            operand_a_n = {52'b0, issue_instr_i.rs1[4:0]};
        end
        // or is it an immediate (including PC), this is not the case for a store and control flow instructions
        // also make sure operand B is not already used as an FP operand
        if (issue_instr_i.use_imm && (issue_instr_i.fu != STORE) && (issue_instr_i.fu != CTRL_FLOW) && !is_rs2_fpr(issue_instr_i.op)) begin
            operand_b_n = issue_instr_i.result;
        end
    end

    // FU select, assert the correct valid out signal (in the next cycle)
    always_comb begin : unit_valid
        alu_valid_n    = 1'b0;
        lsu_valid_n    = 1'b0;
        mult_valid_n   = 1'b0;
        fpu_valid_n    = 1'b0;
        fpu_fmt_n      = 2'b0;
        fpu_rm_n       = 3'b0;
        csr_valid_n    = 1'b0;
        branch_valid_n = 1'b0;
        // Exception pass through:
        // If an exception has occurred simply pass it through
        // we do not want to issue this instruction
        if (~issue_instr_i.ex.valid && issue_instr_valid_i && issue_ack_o) begin
            case (issue_instr_i.fu)
                ALU:
                    alu_valid_n    = 1'b1;
                CTRL_FLOW:
                    branch_valid_n = 1'b1;
                MULT:
                    mult_valid_n   = 1'b1;
                FPU : begin
                    fpu_valid_n    = 1'b1;
                    fpu_fmt_n      = orig_instr.rftype.fmt; // fmt bits from instruction
                    fpu_rm_n       = orig_instr.rftype.rm;  // rm bits from instruction
                end
                FPU_VEC : begin
                    fpu_valid_n    = 1'b1;
                    fpu_fmt_n      = orig_instr.rvftype.vfmt;         // vfmt bits from instruction
                    fpu_rm_n       = {2'b0, orig_instr.rvftype.repl}; // repl bit from instruction
                end
                LOAD, STORE:
                    lsu_valid_n    = 1'b1;
                CSR:
                    csr_valid_n    = 1'b1;
                default:;
            endcase
        end
        // if we got a flush request, de-assert the valid flag, otherwise we will start this
        // functional unit with the wrong inputs
        if (flush_i) begin
            alu_valid_n    = 1'b0;
            lsu_valid_n    = 1'b0;
            mult_valid_n   = 1'b0;
            fpu_valid_n    = 1'b0;
            csr_valid_n    = 1'b0;
            branch_valid_n = 1'b0;
        end
    end

    // We can issue an instruction if we do not detect that any other instruction is writing the same
    // destination register.
    // We also need to check if there is an unresolved branch in the scoreboard.
    always_comb begin : issue_scoreboard
        // default assignment
        issue_ack_o = 1'b0;
        // check that we didn't stall, that the instruction we got is valid
        // and that the functional unit we need is not busy
        if (issue_instr_valid_i) begin
            // check that the corresponding functional unit is not busy
            if (~stall && ~fu_busy) begin
                // -----------------------------------------
                // WAW - Write After Write Dependency Check
                // -----------------------------------------
                // no other instruction has the same destination register -> issue the instruction
                if (is_rd_fpr(issue_instr_i.op) ? (rd_clobber_fpr_i[issue_instr_i.rd] == NONE)
                                                : (rd_clobber_gpr_i[issue_instr_i.rd] == NONE)) begin
                    issue_ack_o = 1'b1;
                end
                // or check that the target destination register will be written in this cycle by the
                // commit stage
                for (int unsigned i = 0; i < NR_COMMIT_PORTS; i++)
                    if (is_rd_fpr(issue_instr_i.op) ? (we_fpr_i[i] && waddr_i[i] == issue_instr_i.rd)
                                                    : (we_gpr_i[i] && waddr_i[i] == issue_instr_i.rd)) begin
                        issue_ack_o = 1'b1;
                    end
            end
            // we can also issue the instruction under the following two circumstances:
            // we can do this even if we are stalled or no functional unit is ready (as we don't need one)
            // the decoder needs to make sure that the instruction is marked as valid when it does not
            // need any functional unit or if an exception occurred previous to the execute stage.
            // 1. we already got an exception
            if (issue_instr_i.ex.valid) begin
                issue_ack_o = 1'b1;
            end
            // 2. it is an instruction which does not need any functional unit
            if (issue_instr_i.fu == NONE) begin
                issue_ack_o = 1'b1;
            end
        end
        // after a multiplication was issued we can only issue another multiplication
        // otherwise we will get contentions on the fixed latency bus
        if (mult_valid_q && issue_instr_i.fu != MULT) begin
            issue_ack_o = 1'b0;
        end
    end

    // ----------------------
    // Integer Register File
    // ----------------------
    logic [1:0][63:0] rdata;
    logic [1:0][4:0]  raddr_pack;

    // pack signals
    logic [NR_COMMIT_PORTS-1:0][4:0]  waddr_pack;
    logic [NR_COMMIT_PORTS-1:0][63:0] wdata_pack;
    logic [NR_COMMIT_PORTS-1:0]       we_pack;
    assign raddr_pack = {issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]};
    assign waddr_pack = {waddr_i[1],  waddr_i[0]};
    assign wdata_pack = {wdata_i[1],  wdata_i[0]};
    assign we_pack    = {we_gpr_i[1], we_gpr_i[0]};

    ariane_regfile #(
        .DATA_WIDTH     ( 64              ),
        .NR_READ_PORTS  ( 2               ),
        .NR_WRITE_PORTS ( NR_COMMIT_PORTS ),
        .ZERO_REG_ZERO  ( 1               )
    ) i_ariane_regfile (
        .test_en_i ( 1'b0       ),
        .raddr_i   ( raddr_pack ),
        .rdata_o   ( rdata      ),
        .waddr_i   ( waddr_pack ),
        .wdata_i   ( wdata_pack ),
        .we_i      ( we_pack    ),
        .*
    );

    // -----------------------------
    // Floating-Point Register File
    // -----------------------------
    logic [2:0][FLEN-1:0] fprdata;

    // pack signals
    logic [2:0][4:0]  fp_raddr_pack;
    logic [NR_COMMIT_PORTS-1:0][63:0] fp_wdata_pack;

    generate
        if (FP_PRESENT) begin : float_regfile_gen
            assign fp_raddr_pack = {issue_instr_i.result[4:0], issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]};
            assign fp_wdata_pack = {wdata_i[1][FLEN-1:0], wdata_i[0][FLEN-1:0]};

            ariane_regfile #(
                .DATA_WIDTH     ( FLEN            ),
                .NR_READ_PORTS  ( 3               ),
                .NR_WRITE_PORTS ( NR_COMMIT_PORTS ),
                .ZERO_REG_ZERO  ( 0               )
            ) i_ariane_fp_regfile (
                .test_en_i ( 1'b0          ),
                .raddr_i   ( fp_raddr_pack ),
                .rdata_o   ( fprdata       ),
                .waddr_i   ( waddr_pack    ),
                .wdata_i   ( wdata_pack    ),
                .we_i      ( we_fpr_i      ),
                .*
            );
        end else begin : no_fpr_gen
            assign fprdata = '{default: '0};
        end
    endgenerate

    assign operand_a_regfile = is_rs1_fpr(issue_instr_i.op) ? fprdata[0] : rdata[0];
    assign operand_b_regfile = is_rs2_fpr(issue_instr_i.op) ? fprdata[1] : rdata[1];
    assign operand_c_regfile = fprdata[2];

    // ----------------------
    // Registers (ID <-> EX)
    // ----------------------
    always_ff @(posedge clk_i or negedge rst_ni) begin
        if (~rst_ni) begin
            operand_a_q           <= '{default: 0};
            operand_b_q           <= '{default: 0};
            imm_q                 <= 64'b0;
            alu_valid_q           <= 1'b0;
            branch_valid_q        <= 1'b0;
            mult_valid_q          <= 1'b0;
            fpu_valid_q           <= 1'b0;
            fpu_fmt_q             <= 2'b0;
            fpu_rm_q              <= 3'b0;
            lsu_valid_q           <= 1'b0;
            csr_valid_q           <= 1'b0;
            fu_q                  <= NONE;
            operator_q            <= ADD;
            trans_id_q            <= 5'b0;
            pc_o                  <= 64'b0;
            is_compressed_instr_o <= 1'b0;
            branch_predict_o      <= '{default: 0};
        end else begin
            operand_a_q           <= operand_a_n;
            operand_b_q           <= operand_b_n;
            imm_q                 <= imm_n;
            alu_valid_q           <= alu_valid_n;
            branch_valid_q        <= branch_valid_n;
            mult_valid_q          <= mult_valid_n;
            fpu_valid_q           <= fpu_valid_n;
            fpu_fmt_q             <= fpu_fmt_n;
            fpu_rm_q              <= fpu_rm_n;
            lsu_valid_q           <= lsu_valid_n;
            csr_valid_q           <= csr_valid_n;
            fu_q                  <= fu_n;
            operator_q            <= operator_n;
            trans_id_q            <= trans_id_n;
            pc_o                  <= issue_instr_i.pc;
            is_compressed_instr_o <= issue_instr_i.is_compressed;
            branch_predict_o      <= issue_instr_i.bp;
        end
    end

    //pragma translate_off
    `ifndef VERILATOR
     assert property (
        @(posedge clk_i) (branch_valid_q) |-> (!$isunknown(operand_a_q) && !$isunknown(operand_b_q)))
        else $warning ("Got unknown value in one of the operands");

    initial begin
        assert (NR_COMMIT_PORTS == 2) else $error("Only two commit ports are supported at the moment!");
    end
    `endif
    //pragma translate_on
endmodule