snitch_pkg.sv 4.85 KB
Newer Older
sakundu committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
// Copyright 2020 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51

/// Snitch Configuration.
package snitch_pkg;

  import cf_math_pkg::idx_width;

  localparam DataWidth                  = 32;
  localparam StrbWidth                  = DataWidth/8;
  localparam int NumFPOutstandingLoads  = 4;
  // Use a high number of outstanding loads, if running a latency-throughput analysis
  localparam int NumIntOutstandingLoads = `ifdef TRAFFIC_GEN 2048 `else 8 `endif;
  localparam MetaIdWidth                = idx_width(NumIntOutstandingLoads);
  // Xpulpimg extension enabled?
  localparam bit XPULPIMG = `ifdef XPULPIMG `XPULPIMG `else 1'bX `endif;

  typedef logic [31:0]               addr_t;
  typedef logic [DataWidth-1:0]      data_t;
  typedef logic [StrbWidth-1:0]      strb_t;
  typedef logic [MetaIdWidth-1:0]    meta_id_t;

  typedef struct packed {
    addr_t       BootAddress;
    int unsigned NrCores;
  } SnitchCfg;

  typedef struct packed {
    addr_t addr;
    meta_id_t id;
    logic [3:0] amo;
    logic write;
    data_t data;
    strb_t strb;
  } dreq_t;

  typedef struct packed {
    data_t data;
    meta_id_t id;
    logic error;
  } dresp_t;

  typedef struct packed {
    addr_t addr;
    logic [4:0] id;
    logic [31:0] data_op;
    data_t data_arga;
    data_t data_argb;
    data_t data_argc;
  } acc_req_t;

  typedef struct packed {
    logic [4:0] id;
    logic error;
    data_t data;
  } acc_resp_t;

  // Number of instructions the sequencer can hold
  localparam int FPUSequencerInstr      = 16;
  // SSRs
  localparam logic [31:0] SSR_ADDR_BASE = 32'h20_4800;
  localparam logic [31:0] SSR_ADDR_MASK = 32'hffff_fe00;
  localparam logic [11:0] CSR_SSR       = 12'h7C0;
  localparam int SSRNrCredits           = 4;
  // Registers which are used as SSRs
  localparam [4:0] FT0                  = 5'b0;
  localparam [4:0] FT1                  = 5'b1;
  localparam [1:0][4:0] SSRRegs         = {FT1, FT0};
  function automatic logic is_ssr(logic [4:0] register);
    unique case (register)
      FT0, FT1: return 1'b1;
      default : return 0;
    endcase
  endfunction

  // Amount of address bit which should be used for accesses from the SoC side.
  // This effectively determines the Address Space of a Snitch Cluster.
  localparam logic [31:0] SoCRequestAddrBits = 32;

  // Address Map
  // TCDM, everything below 0x4000_0000
  localparam logic [31:0] TCDMStartAddress = 32'h0000_0000;
  localparam logic [31:0] TCDMMask         = '1 << 28;

  // Slaves on Cluster AXI Bus
  typedef enum integer {
    TCDM               = 0,
    ClusterPeripherals = 1,
    SoC                = 2
  } cluster_slave_e;

  typedef enum integer {
    CoreReq = 0,
    ICache  = 1,
    AXISoC  = 2
  } cluster_master_e;

  localparam int unsigned NrSlaves  = 3;
  localparam int unsigned NrMasters = 3;

  localparam int IdWidth      = 2;
  localparam int IdWidthSlave = $clog2(NrMasters) + IdWidth;

  //                                                    3. SoC         2. Cluster Peripherals  3. TCDM
  localparam logic [NrSlaves-1:0][31:0] StartAddress = {32'h8000_0000, 32'h4000_0000, TCDMStartAddress};
  localparam logic [NrSlaves-1:0][31:0] EndAddress   = {32'hFFFF_FFFF, 32'h5000_0000, TCDMStartAddress + 32'h1000_0000};
  localparam logic [NrSlaves-1:0] ValidRule          = {{NrSlaves}{1'b1}};

  // Cluster Peripheral Registers
  typedef enum logic [31:0] {
    TCDMStartAddressReg = 32'h4000_0000,
    TCDMEndAddressReg   = 32'h4000_0008,
    NrCoresReg          = 32'h4000_0010,
    FetchEnableReg      = 32'h4000_0018,
    ScratchReg          = 32'h4000_0020,
    WakeUpReg           = 32'h4000_0028,
    CycleCountReg       = 32'h4000_0030,
    BarrierReg          = 32'h4000_0038,
    TcdmAccessedReg     = 32'h4000_FFF0,
    TcdmCongestedReg    = 32'h4000_FFF8,
    PerfCounterBase     = 32'h4001_0000
  } cluster_peripheral_addr_e;

  // Offload to shared accelerator
  function automatic logic shared_offload (logic [31:0] instr);
    logic offload;
    unique casez (instr)
      riscv_instr::MUL,
      riscv_instr::MULH,
      riscv_instr::MULHSU,
      riscv_instr::MULHU,
      riscv_instr::DIV,
      riscv_instr::DIVU,
      riscv_instr::REM,
      riscv_instr::REMU,
      riscv_instr::MULW,
      riscv_instr::DIVW,
      riscv_instr::DIVUW,
      riscv_instr::REMW,
      riscv_instr::REMUW: offload = 1;
      default: offload            = 0;
    endcase
    return offload;
  endfunction

  // Event strobes per core, counted by the performance counters in the cluster
  // peripherals.
  typedef struct packed {
    logic issue_fpu;         // core operations performed in the FPU
    logic issue_fpu_seq;     // includes load/store operations
    logic issue_core_to_fpu; // instructions issued from core to FPU
    logic retired_insts;     // number of instructions retired by the core
  } core_events_t;

endpackage