issue_read_operands.sv 19.4 KB
Newer Older
sakundu committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License.  You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 08.04.2017
// Description: Issues instruction from the scoreboard and fetches the operands
//              This also includes all the forwarding logic

import ariane_pkg::*;

module issue_read_operands #(
    parameter int unsigned NR_COMMIT_PORTS = 2
)(
    input  logic                                   clk_i,    // Clock
    input  logic                                   rst_ni,   // Asynchronous reset active low
    // flush
    input  logic                                   flush_i,
    // coming from rename
    input  scoreboard_entry_t                      issue_instr_i,
    input  logic                                   issue_instr_valid_i,
    output logic                                   issue_ack_o,
    // lookup rd in scoreboard
    output logic [REG_ADDR_SIZE-1:0]               rs1_o,
    input  logic [63:0]                            rs1_i,
    input  logic                                   rs1_valid_i,
    output logic [REG_ADDR_SIZE-1:0]               rs2_o,
    input  logic [63:0]                            rs2_i,
    input  logic                                   rs2_valid_i,
    output logic [REG_ADDR_SIZE-1:0]               rs3_o,
    input  logic [FLEN-1:0]                        rs3_i,
    input  logic                                   rs3_valid_i,
    // get clobber input
    input  fu_t [2**REG_ADDR_SIZE:0]               rd_clobber_gpr_i,
    input  fu_t [2**REG_ADDR_SIZE:0]               rd_clobber_fpr_i,
    // To FU, just single issue for now
    output fu_data_t                               fu_data_o,
    output logic [63:0]                            pc_o,
    output logic                                   is_compressed_instr_o,
    // ALU 1
    input  logic                                   flu_ready_i,      // Fixed latency unit ready to accept a new request
    output logic                                   alu_valid_o,      // Output is valid
    // Branches and Jumps
    output logic                                   branch_valid_o,   // this is a valid branch instruction
    output branchpredict_sbe_t                     branch_predict_o,
    // LSU
    input  logic                                   lsu_ready_i,      // FU is ready
    output logic                                   lsu_valid_o,      // Output is valid
    // MULT
    output logic                                   mult_valid_o,     // Output is valid
    // FPU
    input  logic                                   fpu_ready_i,      // FU is ready
    output logic                                   fpu_valid_o,      // Output is valid
    output logic [1:0]                             fpu_fmt_o,        // FP fmt field from instr.
    output logic [2:0]                             fpu_rm_o,         // FP rm field from instr.
    // CSR
    output logic                                   csr_valid_o,      // Output is valid
    // commit port
    input  logic [NR_COMMIT_PORTS-1:0][4:0]        waddr_i,
    input  logic [NR_COMMIT_PORTS-1:0][63:0]       wdata_i,
    input  logic [NR_COMMIT_PORTS-1:0]             we_gpr_i,
    input  logic [NR_COMMIT_PORTS-1:0]             we_fpr_i
    // committing instruction instruction
    // from scoreboard
    // input  scoreboard_entry     commit_instr_i,
    // output logic                commit_ack_o
);
    logic stall;   // stall signal, we do not want to fetch any more entries
    logic fu_busy; // functional unit is busy
    logic [63:0] operand_a_regfile, operand_b_regfile;  // operands coming from regfile
    logic [FLEN-1:0] operand_c_regfile; // third operand only from fp regfile
    // output flipflop (ID <-> EX)
    logic [63:0] operand_a_n, operand_a_q,
                 operand_b_n, operand_b_q,
                 imm_n, imm_q;

    logic       alu_valid_n,    alu_valid_q;
    logic       mult_valid_n,   mult_valid_q;
    logic       fpu_valid_n,    fpu_valid_q;
    logic [1:0] fpu_fmt_n,      fpu_fmt_q;
    logic [2:0] fpu_rm_n,       fpu_rm_q;
    logic       lsu_valid_n,    lsu_valid_q;
    logic       csr_valid_n,    csr_valid_q;
    logic       branch_valid_n, branch_valid_q;

    logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q;
    fu_op operator_n, operator_q; // operation to perform
    fu_t  fu_n,       fu_q; // functional unit to use

    // forwarding signals
    logic forward_rs1, forward_rs2, forward_rs3;

    // original instruction stored in tval
    riscv::instruction_t orig_instr;
    assign orig_instr = riscv::instruction_t'(issue_instr_i.ex.tval[31:0]);

    // ID <-> EX registers
    assign fu_data_o.operand_a = operand_a_q;
    assign fu_data_o.operand_b = operand_b_q;
    assign fu_data_o.fu        = fu_q;
    assign fu_data_o.operator  = operator_q;
    assign fu_data_o.trans_id  = trans_id_q;
    assign fu_data_o.imm       = imm_q;
    assign alu_valid_o         = alu_valid_q;
    assign branch_valid_o      = branch_valid_q;
    assign lsu_valid_o         = lsu_valid_q;
    assign csr_valid_o         = csr_valid_q;
    assign mult_valid_o        = mult_valid_q;
    assign fpu_valid_o         = fpu_valid_q;
    assign fpu_fmt_o           = fpu_fmt_q;
    assign fpu_rm_o            = fpu_rm_q;
    // ---------------
    // Issue Stage
    // ---------------

    // select the right busy signal
    // this obviously depends on the functional unit we need
    always_comb begin : unit_busy
        unique case (issue_instr_i.fu)
            NONE:
                fu_busy = 1'b0;
            ALU, CTRL_FLOW, CSR, MULT:
                fu_busy = ~flu_ready_i;
            FPU, FPU_VEC:
                fu_busy = ~fpu_ready_i;
            LOAD, STORE:
                fu_busy = ~lsu_ready_i;
            default:
                fu_busy = 1'b0;
        endcase
    end

    // ---------------
    // Register stage
    // ---------------
    // check that all operands are available, otherwise stall
    // forward corresponding register
    always_comb begin : operands_available
        stall = 1'b0;
        // operand forwarding signals
        forward_rs1 = 1'b0;
        forward_rs2 = 1'b0;
        forward_rs3 = 1'b0; // FPR only
        // poll the scoreboard for those values
        rs1_o = issue_instr_i.rs1;
        rs2_o = issue_instr_i.rs2;
        rs3_o = issue_instr_i.result[REG_ADDR_SIZE-1:0]; // rs3 is encoded in imm field

        // 0. check that we are not using the zimm type in RS1
        //    as this is an immediate we do not have to wait on anything here
        // 1. check if the source registers are clobbered --> check appropriate clobber list (gpr/fpr)
        // 2. poll the scoreboard
        if (~issue_instr_i.use_zimm && (is_rs1_fpr(issue_instr_i.op) ? rd_clobber_fpr_i[issue_instr_i.rs1] != NONE
                                                                     : rd_clobber_gpr_i[issue_instr_i.rs1] != NONE)) begin
            // check if the clobbering instruction is not a CSR instruction, CSR instructions can only
            // be fetched through the register file since they can't be forwarded
            // if the operand is available, forward it. CSRs don't write to/from FPR
            if (rs1_valid_i && (is_rs1_fpr(issue_instr_i.op) ? 1'b1 : rd_clobber_gpr_i[issue_instr_i.rs1] != CSR)) begin
                forward_rs1 = 1'b1;
            end else begin // the operand is not available -> stall
                stall = 1'b1;
            end
        end

        if (is_rs2_fpr(issue_instr_i.op) ? rd_clobber_fpr_i[issue_instr_i.rs2] != NONE
                                         : rd_clobber_gpr_i[issue_instr_i.rs2] != NONE) begin
            // if the operand is available, forward it. CSRs don't write to/from FPR
            if (rs2_valid_i && (is_rs2_fpr(issue_instr_i.op) ? 1'b1 : rd_clobber_gpr_i[issue_instr_i.rs2] != CSR)) begin
                forward_rs2 = 1'b1;
            end else begin // the operand is not available -> stall
                stall = 1'b1;
            end
        end

        if (is_imm_fpr(issue_instr_i.op) && rd_clobber_fpr_i[issue_instr_i.result[REG_ADDR_SIZE-1:0]] != NONE) begin
            // if the operand is available, forward it. CSRs don't write to/from FPR so no need to check
            if (rs3_valid_i) begin
                forward_rs3 = 1'b1;
            end else begin // the operand is not available -> stall
                stall = 1'b1;
            end
        end
    end

    // Forwarding/Output MUX
    always_comb begin : forwarding_operand_select
        // default is regfiles (gpr or fpr)
        operand_a_n = operand_a_regfile;
        operand_b_n = operand_b_regfile;
        // immediates are the third operands in the store case
        // for FP operations, the imm field can also be the third operand from the regfile
        imm_n      = is_imm_fpr(issue_instr_i.op) ? operand_c_regfile : issue_instr_i.result;
        trans_id_n = issue_instr_i.trans_id;
        fu_n       = issue_instr_i.fu;
        operator_n = issue_instr_i.op;
        // or should we forward
        if (forward_rs1) begin
            operand_a_n  = rs1_i;
        end

        if (forward_rs2) begin
            operand_b_n  = rs2_i;
        end

        if (forward_rs3) begin
            imm_n  = rs3_i;
        end

        // use the PC as operand a
        if (issue_instr_i.use_pc) begin
            operand_a_n = issue_instr_i.pc;
        end

        // use the zimm as operand a
        if (issue_instr_i.use_zimm) begin
            // zero extend operand a
            operand_a_n = {52'b0, issue_instr_i.rs1[4:0]};
        end
        // or is it an immediate (including PC), this is not the case for a store and control flow instructions
        // also make sure operand B is not already used as an FP operand
        if (issue_instr_i.use_imm && (issue_instr_i.fu != STORE) && (issue_instr_i.fu != CTRL_FLOW) && !is_rs2_fpr(issue_instr_i.op)) begin
            operand_b_n = issue_instr_i.result;
        end
    end

    // FU select, assert the correct valid out signal (in the next cycle)
    always_comb begin : unit_valid
        alu_valid_n    = 1'b0;
        lsu_valid_n    = 1'b0;
        mult_valid_n   = 1'b0;
        fpu_valid_n    = 1'b0;
        fpu_fmt_n      = 2'b0;
        fpu_rm_n       = 3'b0;
        csr_valid_n    = 1'b0;
        branch_valid_n = 1'b0;
        // Exception pass through:
        // If an exception has occurred simply pass it through
        // we do not want to issue this instruction
        if (~issue_instr_i.ex.valid && issue_instr_valid_i && issue_ack_o) begin
            case (issue_instr_i.fu)
                ALU:
                    alu_valid_n    = 1'b1;
                CTRL_FLOW:
                    branch_valid_n = 1'b1;
                MULT:
                    mult_valid_n   = 1'b1;
                FPU : begin
                    fpu_valid_n    = 1'b1;
                    fpu_fmt_n      = orig_instr.rftype.fmt; // fmt bits from instruction
                    fpu_rm_n       = orig_instr.rftype.rm;  // rm bits from instruction
                end
                FPU_VEC : begin
                    fpu_valid_n    = 1'b1;
                    fpu_fmt_n      = orig_instr.rvftype.vfmt;         // vfmt bits from instruction
                    fpu_rm_n       = {2'b0, orig_instr.rvftype.repl}; // repl bit from instruction
                end
                LOAD, STORE:
                    lsu_valid_n    = 1'b1;
                CSR:
                    csr_valid_n    = 1'b1;
                default:;
            endcase
        end
        // if we got a flush request, de-assert the valid flag, otherwise we will start this
        // functional unit with the wrong inputs
        if (flush_i) begin
            alu_valid_n    = 1'b0;
            lsu_valid_n    = 1'b0;
            mult_valid_n   = 1'b0;
            fpu_valid_n    = 1'b0;
            csr_valid_n    = 1'b0;
            branch_valid_n = 1'b0;
        end
    end

    // We can issue an instruction if we do not detect that any other instruction is writing the same
    // destination register.
    // We also need to check if there is an unresolved branch in the scoreboard.
    always_comb begin : issue_scoreboard
        // default assignment
        issue_ack_o = 1'b0;
        // check that we didn't stall, that the instruction we got is valid
        // and that the functional unit we need is not busy
        if (issue_instr_valid_i) begin
            // check that the corresponding functional unit is not busy
            if (~stall && ~fu_busy) begin
                // -----------------------------------------
                // WAW - Write After Write Dependency Check
                // -----------------------------------------
                // no other instruction has the same destination register -> issue the instruction
                if (is_rd_fpr(issue_instr_i.op) ? (rd_clobber_fpr_i[issue_instr_i.rd] == NONE)
                                                : (rd_clobber_gpr_i[issue_instr_i.rd] == NONE)) begin
                    issue_ack_o = 1'b1;
                end
                // or check that the target destination register will be written in this cycle by the
                // commit stage
                for (int unsigned i = 0; i < NR_COMMIT_PORTS; i++)
                    if (is_rd_fpr(issue_instr_i.op) ? (we_fpr_i[i] && waddr_i[i] == issue_instr_i.rd)
                                                    : (we_gpr_i[i] && waddr_i[i] == issue_instr_i.rd)) begin
                        issue_ack_o = 1'b1;
                    end
            end
            // we can also issue the instruction under the following two circumstances:
            // we can do this even if we are stalled or no functional unit is ready (as we don't need one)
            // the decoder needs to make sure that the instruction is marked as valid when it does not
            // need any functional unit or if an exception occurred previous to the execute stage.
            // 1. we already got an exception
            if (issue_instr_i.ex.valid) begin
                issue_ack_o = 1'b1;
            end
            // 2. it is an instruction which does not need any functional unit
            if (issue_instr_i.fu == NONE) begin
                issue_ack_o = 1'b1;
            end
        end
        // after a multiplication was issued we can only issue another multiplication
        // otherwise we will get contentions on the fixed latency bus
        if (mult_valid_q && issue_instr_i.fu != MULT) begin
            issue_ack_o = 1'b0;
        end
    end

    // ----------------------
    // Integer Register File
    // ----------------------
    logic [1:0][63:0] rdata;
    logic [1:0][4:0]  raddr_pack;

    // pack signals
    logic [NR_COMMIT_PORTS-1:0][4:0]  waddr_pack;
    logic [NR_COMMIT_PORTS-1:0][63:0] wdata_pack;
    logic [NR_COMMIT_PORTS-1:0]       we_pack;
    assign raddr_pack = {issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]};
    assign waddr_pack = {waddr_i[1],  waddr_i[0]};
    assign wdata_pack = {wdata_i[1],  wdata_i[0]};
    assign we_pack    = {we_gpr_i[1], we_gpr_i[0]};

    ariane_regfile #(
        .DATA_WIDTH     ( 64              ),
        .NR_READ_PORTS  ( 2               ),
        .NR_WRITE_PORTS ( NR_COMMIT_PORTS ),
        .ZERO_REG_ZERO  ( 1               )
    ) i_ariane_regfile (
        .test_en_i ( 1'b0       ),
        .raddr_i   ( raddr_pack ),
        .rdata_o   ( rdata      ),
        .waddr_i   ( waddr_pack ),
        .wdata_i   ( wdata_pack ),
        .we_i      ( we_pack    ),
        .*
    );

    // -----------------------------
    // Floating-Point Register File
    // -----------------------------
    logic [2:0][FLEN-1:0] fprdata;

    // pack signals
    logic [2:0][4:0]  fp_raddr_pack;
    logic [NR_COMMIT_PORTS-1:0][63:0] fp_wdata_pack;

    generate
        if (FP_PRESENT) begin : float_regfile_gen
            assign fp_raddr_pack = {issue_instr_i.result[4:0], issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]};
            assign fp_wdata_pack = {wdata_i[1][FLEN-1:0], wdata_i[0][FLEN-1:0]};

            ariane_regfile #(
                .DATA_WIDTH     ( FLEN            ),
                .NR_READ_PORTS  ( 3               ),
                .NR_WRITE_PORTS ( NR_COMMIT_PORTS ),
                .ZERO_REG_ZERO  ( 0               )
            ) i_ariane_fp_regfile (
                .test_en_i ( 1'b0          ),
                .raddr_i   ( fp_raddr_pack ),
                .rdata_o   ( fprdata       ),
                .waddr_i   ( waddr_pack    ),
                .wdata_i   ( wdata_pack    ),
                .we_i      ( we_fpr_i      ),
                .*
            );
        end else begin : no_fpr_gen
            assign fprdata = '{default: '0};
        end
    endgenerate

    assign operand_a_regfile = is_rs1_fpr(issue_instr_i.op) ? fprdata[0] : rdata[0];
    assign operand_b_regfile = is_rs2_fpr(issue_instr_i.op) ? fprdata[1] : rdata[1];
    assign operand_c_regfile = fprdata[2];

    // ----------------------
    // Registers (ID <-> EX)
    // ----------------------
    always_ff @(posedge clk_i or negedge rst_ni) begin
        if (~rst_ni) begin
            operand_a_q           <= '{default: 0};
            operand_b_q           <= '{default: 0};
            imm_q                 <= 64'b0;
            alu_valid_q           <= 1'b0;
            branch_valid_q        <= 1'b0;
            mult_valid_q          <= 1'b0;
            fpu_valid_q           <= 1'b0;
            fpu_fmt_q             <= 2'b0;
            fpu_rm_q              <= 3'b0;
            lsu_valid_q           <= 1'b0;
            csr_valid_q           <= 1'b0;
            fu_q                  <= NONE;
            operator_q            <= ADD;
            trans_id_q            <= 5'b0;
            pc_o                  <= 64'b0;
            is_compressed_instr_o <= 1'b0;
            branch_predict_o      <= '{default: 0};
        end else begin
            operand_a_q           <= operand_a_n;
            operand_b_q           <= operand_b_n;
            imm_q                 <= imm_n;
            alu_valid_q           <= alu_valid_n;
            branch_valid_q        <= branch_valid_n;
            mult_valid_q          <= mult_valid_n;
            fpu_valid_q           <= fpu_valid_n;
            fpu_fmt_q             <= fpu_fmt_n;
            fpu_rm_q              <= fpu_rm_n;
            lsu_valid_q           <= lsu_valid_n;
            csr_valid_q           <= csr_valid_n;
            fu_q                  <= fu_n;
            operator_q            <= operator_n;
            trans_id_q            <= trans_id_n;
            pc_o                  <= issue_instr_i.pc;
            is_compressed_instr_o <= issue_instr_i.is_compressed;
            branch_predict_o      <= issue_instr_i.bp;
        end
    end

    //pragma translate_off
    `ifndef VERILATOR
     assert property (
        @(posedge clk_i) (branch_valid_q) |-> (!$isunknown(operand_a_q) && !$isunknown(operand_b_q)))
        else $warning ("Got unknown value in one of the operands");

    initial begin
        assert (NR_COMMIT_PORTS == 2) else $error("Only two commit ports are supported at the moment!");
    end
    `endif
    //pragma translate_on
endmodule