frontend.sv 20.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License.  You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 08.02.2018
// Description: Ariane Instruction Fetch Frontend


import ariane_pkg::*;

module frontend #(
    parameter logic [63:0]     DmBaseAddress = 64'h0 // debug module base address
) (
    input  logic               clk_i,              // Clock
    input  logic               rst_ni,             // Asynchronous reset active low
    input  logic               flush_i,            // flush request for PCGEN
    input  logic               flush_bp_i,         // flush branch prediction
    input  logic               debug_mode_i,
    // global input
    input  logic [63:0]        boot_addr_i,
    // Set a new PC
    // mispredict
    input  branchpredict_t     resolved_branch_i,  // from controller signaling a branch_predict -> update BTB
    // from commit, when flushing the whole pipeline
    input  logic               set_pc_commit_i,    // Take the PC from commit stage
    input  logic [63:0]        pc_commit_i,        // PC of instruction in commit stage
    // CSR input
    input  logic [63:0]        epc_i,              // exception PC which we need to return to
    input  logic               eret_i,             // return from exception
    input  logic [63:0]        trap_vector_base_i, // base of trap vector
    input  logic               ex_valid_i,         // exception is valid - from commit
    input  logic               set_debug_pc_i,     // jump to debug address
    // Instruction Fetch
    input  icache_dreq_o_t     icache_dreq_i,
    output icache_dreq_i_t     icache_dreq_o,
    // instruction output port -> to processor back-end
    output frontend_fetch_t    fetch_entry_o,       // fetch entry containing all relevant data for the ID stage
    output logic               fetch_entry_valid_o, // instruction in IF is valid
    input  logic               fetch_ack_i          // ID acknowledged this instruction
);
    // Registers
    logic [31:0] icache_data_q;
    logic        icache_valid_q;
    logic        icache_ex_valid_q;
    logic        instruction_valid;
    logic [INSTR_PER_FETCH-1:0] instr_is_compressed;

    logic [63:0] icache_vaddr_q;
    // BHT, BTB and RAS prediction
    bht_prediction_t bht_prediction;
    btb_prediction_t btb_prediction;
    ras_t            ras_predict;
    bht_update_t     bht_update;
    btb_update_t     btb_update;
    logic            ras_push, ras_pop;
    logic [63:0]     ras_update;

    // instruction fetch is ready
    logic          if_ready;
    logic [63:0]   npc_d, npc_q; // next PC
    logic npc_rst_load_q; //indicates whether we come out of reset (then we need to load boot_addr_i)
    // -----------------------
    // Ctrl Flow Speculation
    // -----------------------
    // RVI ctrl flow prediction
    logic [INSTR_PER_FETCH-1:0]       rvi_return, rvi_call, rvi_branch,
                                      rvi_jalr, rvi_jump;
    logic [INSTR_PER_FETCH-1:0][63:0] rvi_imm;
    // RVC branching
    logic [INSTR_PER_FETCH-1:0]       is_rvc;
    logic [INSTR_PER_FETCH-1:0]       rvc_branch, rvc_jump, rvc_jr, rvc_return,
                                      rvc_jalr, rvc_call;
    logic [INSTR_PER_FETCH-1:0][63:0] rvc_imm;
    // re-aligned instruction and address (coming from cache - combinationally)
    logic [INSTR_PER_FETCH-1:0][31:0] instr;
    logic [INSTR_PER_FETCH-1:0][63:0] addr;

    logic [63:0]   bp_vaddr;
    logic          bp_valid; // we have a valid branch-prediction
    logic          is_mispredict;
    // branch-prediction which we inject into the pipeline
    branchpredict_sbe_t  bp_sbe;
    // fetch fifo credit system
    logic fifo_valid, fifo_ready, fifo_empty, fifo_pop;
    logic s2_eff_kill, issue_req, s2_in_flight_d, s2_in_flight_q;
    logic [$clog2(FETCH_FIFO_DEPTH):0] fifo_credits_d;
    logic [$clog2(FETCH_FIFO_DEPTH):0] fifo_credits_q;

    // save the unaligned part of the instruction to this ff
    logic [15:0] unaligned_instr_d,   unaligned_instr_q;
    // the last instruction was unaligned
    logic        unaligned_d,         unaligned_q;
    // register to save the unaligned address
    logic [63:0] unaligned_address_d, unaligned_address_q;

    for (genvar i = 0; i < INSTR_PER_FETCH; i ++) begin
        // LSB != 2'b11
        assign instr_is_compressed[i] = ~&icache_data_q[i * 16 +: 2];
    end

    // Soft-realignment to do branch-prediction
    always_comb begin : re_align
        unaligned_d = unaligned_q;
        unaligned_address_d = unaligned_address_q;
        unaligned_instr_d = unaligned_instr_q;
        instruction_valid = icache_valid_q;

        // 32-bit can contain 2 instructions
        instr[0] = icache_data_q;
        addr[0]  = icache_vaddr_q;

        instr[1] = '0;
        addr[1]  = {icache_vaddr_q[63:2], 2'b10};

        if (icache_valid_q) begin
            // last instruction was unaligned
            if (unaligned_q) begin
                instr[0] = {icache_data_q[15:0], unaligned_instr_q};
                addr[0] = unaligned_address_q;

                unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
                unaligned_instr_d = icache_data_q[31:16]; // save the upper bits for next cycle

                // check if this is instruction is still unaligned e.g.: it is not compressed
                // if its compressed re-set unaligned flag
                // for 32 bit we can simply check the next instruction and whether it is compressed or not
                // if it is compressed the next fetch will contain an aligned instruction
                if (instr_is_compressed[1]) begin
                    unaligned_d = 1'b0;
                    instr[1] = {16'b0, icache_data_q[31:16]};
                end
            end else if (instr_is_compressed[0]) begin // instruction zero is RVC
                // is instruction 1 also compressed
                // yes? -> no problem, no -> we've got an unaligned instruction
                if (instr_is_compressed[1]) begin
                    instr[1] = {16'b0, icache_data_q[31:16]};
                end else begin
                    unaligned_instr_d = icache_data_q[31:16];
                    unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
                    unaligned_d = 1'b1;
                end
            end // else -> normal fetch
        end

        // we started to fetch on a unaligned boundary with a whole instruction -> wait until we've
        // received the next instruction
        if (icache_valid_q && icache_vaddr_q[1] && !instr_is_compressed[1]) begin
            instruction_valid = 1'b0;
            unaligned_d = 1'b1;
            unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
            unaligned_instr_d = icache_data_q[31:16];
        end

        // if we killed the consecutive fetch we are starting on a clean slate
        if (icache_dreq_o.kill_s2) begin
            unaligned_d = 1'b0;
        end
    end


    logic [INSTR_PER_FETCH:0] taken;
    // control front-end + branch-prediction
    always_comb begin : frontend_ctrl
        automatic logic take_rvi_cf; // take the control flow change (non-compressed)
        automatic logic take_rvc_cf; // take the control flow change (compressed)

        take_rvi_cf       = 1'b0;
        take_rvc_cf       = 1'b0;
        ras_pop           = 1'b0;
        ras_push          = 1'b0;
        ras_update        = '0;
        taken             = '0;
        take_rvi_cf       = 1'b0;

        bp_vaddr          = '0;    // predicted address
        bp_valid          = 1'b0;  // prediction is valid

        bp_sbe.cf_type    = RAS;

        // only predict if the response is valid
        if (instruction_valid) begin
            // look at instruction 0, 1, 2, ...
            for (int unsigned i = 0; i < INSTR_PER_FETCH; i++) begin
                // only speculate if the previous instruction was not taken
                if (!taken[i]) begin
                    // function call
                    ras_push = rvi_call[i] | rvc_call[i];
                    ras_update = addr[i] + (rvc_call[i] ? 2 : 4);

                    // Branch Prediction - **speculative**
                    if (rvi_branch[i] || rvc_branch[i]) begin
                        bp_sbe.cf_type = BHT;
                        // dynamic prediction valid?
                        if (bht_prediction.valid) begin
                            take_rvi_cf = rvi_branch[i] & (bht_prediction.taken | bht_prediction.strongly_taken);
                            take_rvc_cf = rvc_branch[i] & (bht_prediction.taken | bht_prediction.strongly_taken);
                        // default to static prediction
                        end else begin
                            // set if immediate is negative - static prediction
                            take_rvi_cf = rvi_branch[i] & rvi_imm[i][63];
                            take_rvc_cf = rvc_branch[i] & rvc_imm[i][63];
                        end
                    end

                    // unconditional jumps
                    if (rvi_jump[i] || rvc_jump[i]) begin
                        take_rvi_cf = rvi_jump[i];
                        take_rvc_cf = rvc_jump[i];
                    end

                    // to take this jump we need a valid prediction target **speculative**
                    if ((rvi_jalr[i] || rvc_jalr[i]) && ~(rvi_call[i] || rvc_call[i])) begin
                        bp_sbe.cf_type = BTB;
                        if (btb_prediction.valid) begin
                            bp_vaddr = btb_prediction.target_address;
                            taken[i+1] = 1'b1;
                        end
                    end

                    // is it a return and the RAS contains a valid prediction? **speculative**
                    if ((rvi_return[i] || rvc_return[i]) && ras_predict.valid) begin
                        bp_vaddr = ras_predict.ra;
                        ras_pop = 1'b1;
                        taken[i+1] = 1'b1;
                        bp_sbe.cf_type = RAS;
                    end

                    if (take_rvi_cf) begin
                        taken[i+1] = 1'b1;
                        bp_vaddr = addr[i] + rvi_imm[i];
                    end

                    if (take_rvc_cf) begin
                        taken[i+1] = 1'b1;
                        bp_vaddr = addr[i] + rvc_imm[i];
                    end

                    // we are not interested in the lower instruction
                    if (icache_vaddr_q[1]) begin
                        taken[1] = 1'b0;
                        // TODO(zarubaf): that seems to be overly pessimistic
                        ras_pop = 1'b0;
                        ras_push = 1'b0;
                    end
                end
            end
        end

        bp_valid = |taken;
        // assemble scoreboard entry
        bp_sbe.valid = bp_valid;
        bp_sbe.predict_address = bp_vaddr;
        bp_sbe.predict_taken = bp_valid;
    end

    assign is_mispredict = resolved_branch_i.valid & resolved_branch_i.is_mispredict;
    // we mis-predicted so kill the icache request and the fetch queue
    assign icache_dreq_o.kill_s1 = is_mispredict | flush_i;
    // if we have a valid branch-prediction we need to kill the last cache request
    assign icache_dreq_o.kill_s2 = icache_dreq_o.kill_s1 | bp_valid;
    assign fifo_valid = icache_valid_q;

    // ----------------------------------------
    // Update Control Flow Predictions
    // ----------------------------------------
    // BHT
    assign bht_update.valid = resolved_branch_i.valid & (resolved_branch_i.cf_type == BHT);
    assign bht_update.pc    = resolved_branch_i.pc;
    assign bht_update.mispredict = resolved_branch_i.is_mispredict;
    assign bht_update.taken = resolved_branch_i.is_taken;
    // BTB
    assign btb_update.valid = resolved_branch_i.valid & (resolved_branch_i.cf_type == BTB);
    assign btb_update.pc    = resolved_branch_i.pc;
    assign btb_update.target_address = resolved_branch_i.target_address;
    assign btb_update.clear = resolved_branch_i.clear;

    // -------------------
    // Next PC
    // -------------------
    // next PC (NPC) can come from (in order of precedence):
    // 0. Default assignment
    // 1. Branch Predict taken
    // 2. Control flow change request (misprediction)
    // 3. Return from environment call
    // 4. Exception/Interrupt
    // 5. Pipeline Flush because of CSR side effects
    // Mis-predict handling is a little bit different
    // select PC a.k.a PC Gen
    always_comb begin : npc_select
        automatic logic [63:0] fetch_address;

        // check whether we come out of reset
        // this is a workaround. some tools have issues
        // having boot_addr_i in the asynchronous
        // reset assignment to npc_q, even though
        // boot_addr_i will be assigned a constant
        // on the top-level.
        if (npc_rst_load_q) begin
            npc_d         = boot_addr_i;
            fetch_address = boot_addr_i;
        end else begin
            fetch_address    = npc_q;
            // keep stable by default
            npc_d            = npc_q;
        end

        // -------------------------------
        // 1. Branch Prediction
        // -------------------------------
        if (bp_valid) begin
            fetch_address = bp_vaddr;
            npc_d = bp_vaddr;
        end
        // -------------------------------
        // 0. Default assignment
        // -------------------------------
        if (if_ready) begin
            npc_d = {fetch_address[63:2], 2'b0}  + 'h4;
        end
        // -------------------------------
        // 2. Control flow change request
        // -------------------------------
        if (is_mispredict) begin
            npc_d = resolved_branch_i.target_address;
        end
        // -------------------------------
        // 3. Return from environment call
        // -------------------------------
        if (eret_i) begin
            npc_d = epc_i;
        end
        // -------------------------------
        // 4. Exception/Interrupt
        // -------------------------------
        if (ex_valid_i) begin
            npc_d    = trap_vector_base_i;
        end
        // -----------------------------------------------
        // 5. Pipeline Flush because of CSR side effects
        // -----------------------------------------------
        // On a pipeline flush start fetching from the next address
        // of the instruction in the commit stage
        if (set_pc_commit_i) begin
            // we came here from a flush request of a CSR instruction or AMO,
            // as CSR or AMO instructions do not exist in a compressed form
            // we can unconditionally do PC + 4 here
            // TODO(zarubaf) This adder can at least be merged with the one in the csr_regfile stage
            npc_d    = pc_commit_i + 64'h4;
        end
        // -------------------------------
        // 6. Debug
        // -------------------------------
        // enter debug on a hard-coded base-address
        if (set_debug_pc_i) begin
            npc_d = DmBaseAddress + dm::HaltAddress;
        end

        icache_dreq_o.vaddr = fetch_address;
    end

    // -------------------
    // Credit-based fetch FIFO flow ctrl
    // -------------------
    assign fifo_credits_d       =  (flush_i) ? FETCH_FIFO_DEPTH :
                                               fifo_credits_q + fifo_pop + s2_eff_kill - issue_req;

    // check whether there is a request in flight that is being killed now
    // if this is the case, we need to increment the credit by 1
    assign s2_eff_kill         = s2_in_flight_q & icache_dreq_o.kill_s2;
    assign s2_in_flight_d      = (flush_i)             ? 1'b0 :
                                 (issue_req)           ? 1'b1 :
                                 (icache_dreq_i.valid) ? 1'b0 :
                                                         s2_in_flight_q;

    // only enable counter if current request is not being killed
    assign issue_req           = if_ready & (~icache_dreq_o.kill_s1);
    assign fifo_pop            = fetch_ack_i & fetch_entry_valid_o;
    assign fifo_ready          = (|fifo_credits_q);
    assign if_ready            =  icache_dreq_i.ready & fifo_ready;
    assign icache_dreq_o.req   =  fifo_ready;
    assign fetch_entry_valid_o = ~fifo_empty;


//pragma translate_off
`ifndef VERILATOR
  fetch_fifo_credits0 : assert property (
      @(posedge clk_i) disable iff (~rst_ni) (fifo_credits_q <= FETCH_FIFO_DEPTH))
         else $fatal(1,"[frontend] fetch fifo credits must be <= FETCH_FIFO_DEPTH!");
    initial begin
        assert (FETCH_FIFO_DEPTH <= 8) else $fatal(1,"[frontend] fetch fifo deeper than 8 not supported");
        assert (FETCH_WIDTH == 32) else $fatal(1,"[frontend] fetch width != not supported");
    end
`endif
//pragma translate_on

    always_ff @(posedge clk_i or negedge rst_ni) begin
        if (~rst_ni) begin
            npc_q                <= '0;
            npc_rst_load_q       <= 1'b1;
            icache_data_q        <= '0;
            icache_valid_q       <= 1'b0;
            icache_vaddr_q       <= 'b0;
            icache_ex_valid_q    <= 1'b0;
            unaligned_q          <= 1'b0;
            unaligned_address_q  <= '0;
            unaligned_instr_q    <= '0;
            fifo_credits_q       <= FETCH_FIFO_DEPTH;
            s2_in_flight_q       <= 1'b0;
        end else begin
            npc_rst_load_q       <= 1'b0;
            npc_q                <= npc_d;
            icache_data_q        <= icache_dreq_i.data;
            icache_valid_q       <= icache_dreq_i.valid;
            icache_vaddr_q       <= icache_dreq_i.vaddr;
            icache_ex_valid_q    <= icache_dreq_i.ex.valid;
            unaligned_q          <= unaligned_d;
            unaligned_address_q  <= unaligned_address_d;
            unaligned_instr_q    <= unaligned_instr_d;
            fifo_credits_q       <= fifo_credits_d;
            s2_in_flight_q       <= s2_in_flight_d;
        end
    end

    ras #(
        .DEPTH  ( RAS_DEPTH   )
    ) i_ras (
        .push_i ( ras_push    ),
        .pop_i  ( ras_pop     ),
        .data_i ( ras_update  ),
        .data_o ( ras_predict ),
        .*
    );

    btb #(
        .NR_ENTRIES       ( BTB_ENTRIES      )
    ) i_btb (
        .clk_i,
        .rst_ni,
        .flush_i          ( flush_bp_i       ),
        .debug_mode_i,
        .vpc_i            ( icache_vaddr_q   ),
        .btb_update_i     ( btb_update       ),
        .btb_prediction_o ( btb_prediction   )
    );

    bht #(
        .NR_ENTRIES       ( BHT_ENTRIES      )
    ) i_bht (
        .clk_i,
        .rst_ni,
        .flush_i          ( flush_bp_i       ),
        .debug_mode_i,
        .vpc_i            ( icache_vaddr_q   ),
        .bht_update_i     ( bht_update       ),
        .bht_prediction_o ( bht_prediction   )
    );

    for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin
        instr_scan i_instr_scan (
            .instr_i      ( instr[i]      ),
            .is_rvc_o     ( is_rvc[i]     ),
            .rvi_return_o ( rvi_return[i] ),
            .rvi_call_o   ( rvi_call[i]   ),
            .rvi_branch_o ( rvi_branch[i] ),
            .rvi_jalr_o   ( rvi_jalr[i]   ),
            .rvi_jump_o   ( rvi_jump[i]   ),
            .rvi_imm_o    ( rvi_imm[i]    ),
            .rvc_branch_o ( rvc_branch[i] ),
            .rvc_jump_o   ( rvc_jump[i]   ),
            .rvc_jr_o     ( rvc_jr[i]     ),
            .rvc_return_o ( rvc_return[i] ),
            .rvc_jalr_o   ( rvc_jalr[i]   ),
            .rvc_call_o   ( rvc_call[i]   ),
            .rvc_imm_o    ( rvc_imm[i]    )
        );
    end


    fifo_v2 #(
        .DEPTH        (  8                   ),
        .dtype        ( frontend_fetch_t     )
    ) i_fetch_fifo (
        .clk_i       ( clk_i                 ),
        .rst_ni      ( rst_ni                ),
        .flush_i     ( flush_i               ),
        .testmode_i  ( 1'b0                  ),
        .full_o      (                       ),
        .empty_o     ( fifo_empty            ),
        .alm_full_o  (                       ),
        .alm_empty_o (                       ),
        .data_i      ( {icache_vaddr_q, icache_data_q, bp_sbe, taken[INSTR_PER_FETCH:1], icache_ex_valid_q} ),
        .push_i      ( fifo_valid            ),
        .data_o      ( fetch_entry_o         ),
        .pop_i       ( fifo_pop              )
    );

endmodule