Commit 9c77b941 by Ravi Varadarajan

remove redundant rtl dir from Flows/<Enablement>/<Testcase>

Signed-off-by: Ravi Varadarajan <rvaradarajan@ucsd.edu>
parent 217e2d99
// Copyright 2021 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
// Description: Scrambles the address in such a way, that part of the memory is accessed
// sequentially and part is interleaved.
// Current constraints:
// Author: Samuel Riedel <sriedel@iis.ee.ethz.ch>
module address_scrambler #(
parameter int unsigned AddrWidth = 32,
parameter int unsigned ByteOffset = 2,
parameter int unsigned NumTiles = 2,
parameter int unsigned NumBanksPerTile = 2,
parameter bit Bypass = 0,
parameter int unsigned SeqMemSizePerTile = 4*1024
) (
input logic [AddrWidth-1:0] address_i,
output logic [AddrWidth-1:0] address_o
);
localparam int unsigned BankOffsetBits = $clog2(NumBanksPerTile);
localparam int unsigned TileIdBits = $clog2(NumTiles);
localparam int unsigned SeqPerTileBits = $clog2(SeqMemSizePerTile);
localparam int unsigned SeqTotalBits = SeqPerTileBits+TileIdBits;
localparam int unsigned ConstantBitsLSB = ByteOffset + BankOffsetBits;
localparam int unsigned ScrambleBits = SeqPerTileBits-ConstantBitsLSB;
if (Bypass || NumTiles < 2) begin
assign address_o = address_i;
end else begin
logic [ScrambleBits-1:0] scramble; // Address bits that have to be shuffled around
logic [TileIdBits-1:0] tile_id; // Which tile does this address region belong to
// Leave this part of the address unchanged
// The LSBs that correspond to the offset inside a tile. These are the byte offset (bank width)
// and the Bank offset (Number of Banks in tile)
assign address_o[ConstantBitsLSB-1:0] = address_i[ConstantBitsLSB-1:0];
// The MSBs that are outside of the sequential memory size. Currently the sequential memory size
// always starts at 0. These are all the MSBs up to SeqMemSizePerTile*NumTiles
assign address_o[AddrWidth-1:SeqTotalBits] = address_i[AddrWidth-1:SeqTotalBits];
// Scramble the middle part
// Bits that would have gone to different tiles but now go to increasing lines in the same tile
assign scramble = address_i[SeqPerTileBits-1:ConstantBitsLSB]; // Bits that would
// Bits that would have gone to increasing lines in the same tile but now go to different tiles
assign tile_id = address_i[SeqTotalBits-1:SeqPerTileBits];
always_comb begin
// Default: Unscrambled
address_o[SeqTotalBits-1:ConstantBitsLSB] = {tile_id, scramble};
// If not in bypass mode and address is in sequential region and more than one tile
if (address_i < (NumTiles * SeqMemSizePerTile)) begin
address_o[SeqTotalBits-1:ConstantBitsLSB] = {scramble, tile_id};
end
end
end
// Check for unsupported configurations
if (NumBanksPerTile < 2)
$fatal(1, "NumBanksPerTile must be greater than 2. The special case '1' is currently not supported!");
if (SeqMemSizePerTile % (2**ByteOffset*NumBanksPerTile) != 0)
$fatal(1, "SeqMemSizePerTile must be a multiple of BankWidth*NumBanksPerTile!");
endmodule : address_scrambler
// Copyright 2021 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
// Andreas Kurth <akurth@iis.ee.ethz.ch>
module axi2mem #(
parameter type axi_req_t = logic, // AXI request type
parameter type axi_resp_t = logic, // AXI response type
parameter int unsigned AddrWidth = 0, // address width
parameter int unsigned DataWidth = 0, // AXI data width
parameter int unsigned IdWidth = 0, // AXI ID width
parameter int unsigned NumBanks = 0, // number of banks at output
parameter int unsigned BufDepth = 1, // depth of memory response buffer
// Dependent parameters, do not override.
localparam type addr_t = logic [AddrWidth-1:0],
localparam type mem_atop_t = logic [5:0],
localparam type mem_data_t = logic [DataWidth/NumBanks-1:0],
localparam type mem_strb_t = logic [DataWidth/NumBanks/8-1:0]
) (
input logic clk_i,
input logic rst_ni,
output logic busy_o,
input axi_req_t axi_req_i,
output axi_resp_t axi_resp_o,
output logic [NumBanks-1:0] mem_req_o,
input logic [NumBanks-1:0] mem_gnt_i,
output addr_t [NumBanks-1:0] mem_addr_o, // byte address
output mem_data_t [NumBanks-1:0] mem_wdata_o, // write data
output mem_strb_t [NumBanks-1:0] mem_strb_o, // byte-wise strobe
output mem_atop_t [NumBanks-1:0] mem_atop_o, // atomic operation
output logic [NumBanks-1:0] mem_we_o, // write enable
input logic [NumBanks-1:0] mem_rvalid_i, // response valid
input mem_data_t [NumBanks-1:0] mem_rdata_i // read data
);
typedef logic [DataWidth-1:0] axi_data_t;
typedef logic [DataWidth/8-1:0] axi_strb_t;
typedef logic [IdWidth-1:0] axi_id_t;
typedef struct packed {
addr_t addr;
mem_atop_t atop;
axi_strb_t strb;
axi_data_t wdata;
logic we;
} mem_req_t;
typedef struct packed {
addr_t addr;
axi_pkg::atop_t atop;
axi_id_t id;
logic last;
axi_pkg::qos_t qos;
axi_pkg::size_t size;
logic write;
} meta_t;
axi_data_t mem_rdata,
m2s_resp;
axi_pkg::len_t r_cnt_d, r_cnt_q,
w_cnt_d, w_cnt_q;
logic arb_valid, arb_ready,
rd_valid, rd_ready,
wr_valid, wr_ready,
sel_b, sel_buf_b,
sel_r, sel_buf_r,
sel_valid, sel_ready,
sel_buf_valid, sel_buf_ready,
sel_lock_d, sel_lock_q,
meta_valid, meta_ready,
meta_buf_valid, meta_buf_ready,
meta_sel_d, meta_sel_q,
m2s_req_valid, m2s_req_ready,
m2s_resp_valid, m2s_resp_ready,
mem_req_valid, mem_req_ready,
mem_rvalid;
mem_req_t m2s_req,
mem_req;
meta_t rd_meta,
rd_meta_d, rd_meta_q,
wr_meta,
wr_meta_d, wr_meta_q,
meta, meta_buf;
assign busy_o = axi_req_i.aw_valid | axi_req_i.ar_valid | axi_req_i.w_valid |
axi_resp_o.b_valid | axi_resp_o.r_valid |
(r_cnt_q > 0) | (w_cnt_q > 0);
// Handle reads.
always_comb begin
// Default assignments
axi_resp_o.ar_ready = 1'b0;
rd_meta_d = rd_meta_q;
rd_meta = 'x;
rd_valid = 1'b0;
r_cnt_d = r_cnt_q;
// Handle R burst in progress.
if (r_cnt_q > '0) begin
rd_meta_d.last = (r_cnt_q == 8'd1);
rd_meta = rd_meta_d;
rd_meta.addr = rd_meta_q.addr + axi_pkg::num_bytes(rd_meta_q.size);
rd_valid = 1'b1;
if (rd_ready) begin
r_cnt_d--;
rd_meta_d.addr = rd_meta.addr;
end
// Handle new AR if there is one.
end else if (axi_req_i.ar_valid) begin
rd_meta_d = '{
addr: axi_pkg::aligned_addr(axi_req_i.ar.addr, axi_req_i.ar.size),
atop: '0,
id: axi_req_i.ar.id,
last: (axi_req_i.ar.len == '0),
qos: axi_req_i.ar.qos,
size: axi_req_i.ar.size,
write: 1'b0
};
rd_meta = rd_meta_d;
rd_meta.addr = axi_req_i.ar.addr;
rd_valid = 1'b1;
if (rd_ready) begin
r_cnt_d = axi_req_i.ar.len;
axi_resp_o.ar_ready = 1'b1;
end
end
end
// Handle writes.
always_comb begin
// Default assignments
axi_resp_o.aw_ready = 1'b0;
axi_resp_o.w_ready = 1'b0;
wr_meta_d = wr_meta_q;
wr_meta = 'x;
wr_valid = 1'b0;
w_cnt_d = w_cnt_q;
// Handle W bursts in progress.
if (w_cnt_q > '0) begin
wr_meta_d.last = (w_cnt_q == 8'd1);
wr_meta = wr_meta_d;
wr_meta.addr = wr_meta_q.addr + axi_pkg::num_bytes(wr_meta_q.size);
if (axi_req_i.w_valid) begin
wr_valid = 1'b1;
if (wr_ready) begin
axi_resp_o.w_ready = 1'b1;
w_cnt_d--;
wr_meta_d.addr = wr_meta.addr;
end
end
// Handle new AW if there is one.
end else if (axi_req_i.aw_valid && axi_req_i.w_valid) begin
wr_meta_d = '{
addr: axi_pkg::aligned_addr(axi_req_i.aw.addr, axi_req_i.aw.size),
atop: axi_req_i.aw.atop,
id: axi_req_i.aw.id,
last: (axi_req_i.aw.len == '0),
qos: axi_req_i.aw.qos,
size: axi_req_i.aw.size,
write: 1'b1
};
wr_meta = wr_meta_d;
wr_meta.addr = axi_req_i.aw.addr;
wr_valid = 1'b1;
if (wr_ready) begin
w_cnt_d = axi_req_i.aw.len;
axi_resp_o.aw_ready = 1'b1;
axi_resp_o.w_ready = 1'b1;
end
end
end
// Arbitrate between reads and writes.
stream_mux #(
.DATA_T (meta_t),
.N_INP (2)
) i_ax_mux (
.inp_data_i ({wr_meta, rd_meta}),
.inp_valid_i ({wr_valid, rd_valid}),
.inp_ready_o ({wr_ready, rd_ready}),
.inp_sel_i (meta_sel_d),
.oup_data_o (meta),
.oup_valid_o (arb_valid),
.oup_ready_i (arb_ready)
);
always_comb begin
meta_sel_d = meta_sel_q;
sel_lock_d = sel_lock_q;
if (sel_lock_q) begin
meta_sel_d = meta_sel_q;
if (arb_valid && arb_ready) begin
sel_lock_d = 1'b0;
end
end else begin
if (wr_valid ^ rd_valid) begin
// If either write or read is valid but not both, select the valid one.
meta_sel_d = wr_valid;
end else if (wr_valid && rd_valid) begin
// If both write and read are valid, decide according to QoS then burst properties.
// Priorize higher QoS.
if (wr_meta.qos > rd_meta.qos) begin
meta_sel_d = 1'b1;
end else if (rd_meta.qos > wr_meta.qos) begin
meta_sel_d = 1'b0;
// Decide requests with identical QoS.
end else if (wr_meta.qos == rd_meta.qos) begin
// 1. Priorize individual writes over read bursts.
// Rationale: Read bursts can be interleaved on AXI but write bursts cannot.
if (wr_meta.last && !rd_meta.last) begin
meta_sel_d = 1'b1;
// 2. Prioritize ongoing burst.
// Rationale: Stalled bursts create backpressure or require costly buffers.
end else if (w_cnt_q > '0) begin
meta_sel_d = 1'b1;
end else if (r_cnt_q > '0) begin
meta_sel_d = 1'b0;
// 3. Otherwise arbitrate round robin to prevent starvation.
end else begin
meta_sel_d = ~meta_sel_q;
end
end
end
// Lock arbitration if valid but not yet ready.
if (arb_valid && !arb_ready) begin
sel_lock_d = 1'b1;
end
end
end
// Fork arbitrated stream to meta data, memory requests, and R/B channel selection.
stream_fork #(
.N_OUP (3)
) i_fork (
.clk_i,
.rst_ni,
.valid_i (arb_valid),
.ready_o (arb_ready),
.valid_o ({sel_valid, meta_valid, m2s_req_valid}),
.ready_i ({sel_ready, meta_ready, m2s_req_ready})
);
assign sel_b = meta.write & meta.last;
assign sel_r = ~meta.write | meta.atop[5];
stream_fifo #(
.FALL_THROUGH (1'b1),
.DEPTH (1 + BufDepth),
.T (logic[1:0])
) i_sel_buf (
.clk_i,
.rst_ni,
.flush_i (1'b0),
.testmode_i (1'b0),
.data_i ({sel_b, sel_r}),
.valid_i (sel_valid),
.ready_o (sel_ready),
.data_o ({sel_buf_b, sel_buf_r}),
.valid_o (sel_buf_valid),
.ready_i (sel_buf_ready),
.usage_o (/* unused */)
);
stream_fifo #(
.FALL_THROUGH (1'b1),
.DEPTH (1 + BufDepth),
.T (meta_t)
) i_meta_buf (
.clk_i,
.rst_ni,
.flush_i (1'b0),
.testmode_i (1'b0),
.data_i (meta),
.valid_i (meta_valid),
.ready_o (meta_ready),
.data_o (meta_buf),
.valid_o (meta_buf_valid),
.ready_i (meta_buf_ready),
.usage_o (/* unused */)
);
// Map AXI ATOPs to RI5CY AMOs.
always_comb begin
m2s_req.atop = '0;
m2s_req.wdata = axi_req_i.w.data;
// if (meta_valid && meta.atop[5:4] != axi_pkg::ATOP_NONE) begin
// m2s_req.atop[5] = 1'b1;
// if (meta.atop == axi_pkg::ATOP_ATOMICSWAP) begin
// m2s_req.atop[4:0] = riscv_defines::AMO_SWAP;
// end else begin
// case (meta.atop[2:0])
// axi_pkg::ATOP_ADD: m2s_req.atop[4:0] = riscv_defines::AMO_ADD;
// axi_pkg::ATOP_CLR: begin
// m2s_req.atop[4:0] = riscv_defines::AMO_AND;
// m2s_req.wdata = ~axi_req_i.w.data;
// end
// axi_pkg::ATOP_EOR: m2s_req.atop[4:0] = riscv_defines::AMO_XOR;
// axi_pkg::ATOP_SET: m2s_req.atop[4:0] = riscv_defines::AMO_OR;
// axi_pkg::ATOP_SMAX: m2s_req.atop[4:0] = riscv_defines::AMO_MAX;
// axi_pkg::ATOP_SMIN: m2s_req.atop[4:0] = riscv_defines::AMO_MIN;
// axi_pkg::ATOP_UMAX: m2s_req.atop[4:0] = riscv_defines::AMO_MAXU;
// axi_pkg::ATOP_UMIN: m2s_req.atop[4:0] = riscv_defines::AMO_MINU;
// endcase
// end
// end
end
assign m2s_req.addr = meta.addr;
assign m2s_req.strb = axi_req_i.w.strb;
assign m2s_req.we = meta.write;
// Interface memory as stream.
stream_to_mem #(
.mem_req_t (mem_req_t),
.mem_resp_t (axi_data_t),
.BufDepth (BufDepth)
) i_mem2stream (
.clk_i,
.rst_ni,
.req_i (m2s_req),
.req_valid_i (m2s_req_valid),
.req_ready_o (m2s_req_ready),
.resp_o (m2s_resp),
.resp_valid_o (m2s_resp_valid),
.resp_ready_i (m2s_resp_ready),
.mem_req_o (mem_req),
.mem_req_valid_o (mem_req_valid),
.mem_req_ready_i (mem_req_ready),
.mem_resp_i (mem_rdata),
.mem_resp_valid_i (mem_rvalid)
);
// Split single memory request to desired number of banks.
mem2banks #(
.AddrWidth (AddrWidth),
.DataWidth (DataWidth),
.NumBanks (NumBanks)
) i_mem2banks (
.clk_i,
.rst_ni,
.req_i (mem_req_valid),
.gnt_o (mem_req_ready),
.addr_i (mem_req.addr),
.wdata_i (mem_req.wdata),
.strb_i (mem_req.strb),
.atop_i (mem_req.atop),
.we_i (mem_req.we),
.rvalid_o (mem_rvalid),
.rdata_o (mem_rdata),
.bank_req_o (mem_req_o),
.bank_gnt_i (mem_gnt_i),
.bank_addr_o (mem_addr_o),
.bank_wdata_o (mem_wdata_o),
.bank_strb_o (mem_strb_o),
.bank_atop_o (mem_atop_o),
.bank_we_o (mem_we_o),
.bank_rvalid_i (mem_rvalid_i),
.bank_rdata_i (mem_rdata_i)
);
// Join memory read data and meta data stream.
logic mem_join_valid, mem_join_ready;
stream_join #(
.N_INP (2)
) i_join (
.inp_valid_i ({m2s_resp_valid, meta_buf_valid}),
.inp_ready_o ({m2s_resp_ready, meta_buf_ready}),
.oup_valid_o (mem_join_valid),
.oup_ready_i (mem_join_ready)
);
// Dynamically fork the joined stream to B and R channels.
stream_fork_dynamic #(
.N_OUP (2)
) i_fork_dynamic (
.clk_i,
.rst_ni,
.valid_i (mem_join_valid),
.ready_o (mem_join_ready),
.sel_i ({sel_buf_b, sel_buf_r}),
.sel_valid_i (sel_buf_valid),
.sel_ready_o (sel_buf_ready),
.valid_o ({axi_resp_o.b_valid, axi_resp_o.r_valid}),
.ready_i ({axi_req_i.b_ready, axi_req_i.r_ready})
);
// Compose B responses.
assign axi_resp_o.b = '{
id: meta_buf.id,
resp: axi_pkg::RESP_OKAY,
user: '0
};
// Compose R responses.
assign axi_resp_o.r = '{
data: m2s_resp,
id: meta_buf.id,
last: meta_buf.last,
resp: axi_pkg::RESP_OKAY,
user: '0
};
// Registers
always_ff @(posedge clk_i, negedge rst_ni) begin
if (!rst_ni) begin
meta_sel_q <= 1'b0;
sel_lock_q <= 1'b0;
rd_meta_q <= '{default: '0};
wr_meta_q <= '{default: '0};
r_cnt_q <= '0;
w_cnt_q <= '0;
end else begin
meta_sel_q <= meta_sel_d;
sel_lock_q <= sel_lock_d;
rd_meta_q <= rd_meta_d;
wr_meta_q <= wr_meta_d;
r_cnt_q <= r_cnt_d;
w_cnt_q <= w_cnt_d;
end
end
// Assertions
`ifndef VERILATOR
`ifndef TARGET_SYNTHESIS
default disable iff (!rst_ni);
assume property (@(posedge clk_i)
axi_req_i.ar_valid && !axi_resp_o.ar_ready |=> $stable(axi_req_i.ar))
else $error("AR must remain stable until handshake has happened!");
assert property (@(posedge clk_i)
axi_resp_o.r_valid && !axi_req_i.r_ready |=> $stable(axi_resp_o.r))
else $error("R must remain stable until handshake has happened!");
assume property (@(posedge clk_i)
axi_req_i.aw_valid && !axi_resp_o.aw_ready |=> $stable(axi_req_i.aw))
else $error("AW must remain stable until handshake has happened!");
assume property (@(posedge clk_i)
axi_req_i.w_valid && !axi_resp_o.w_ready |=> $stable(axi_req_i.w))
else $error("W must remain stable until handshake has happened!");
assert property (@(posedge clk_i)
axi_resp_o.b_valid && !axi_req_i.b_ready |=> $stable(axi_resp_o.b))
else $error("B must remain stable until handshake has happened!");
assert property (@(posedge clk_i) axi_req_i.ar_valid && axi_req_i.ar.len > 0 |->
axi_req_i.ar.burst == axi_pkg::BURST_INCR)
else $error("Non-incrementing bursts are not supported!");
assert property (@(posedge clk_i) axi_req_i.aw_valid && axi_req_i.aw.len > 0 |->
axi_req_i.aw.burst == axi_pkg::BURST_INCR)
else $error("Non-incrementing bursts are not supported!");
assert property (@(posedge clk_i) meta_valid && meta.atop != '0 |-> meta.write)
else $warning("Unexpected atomic operation on read.");
`endif
`endif
endmodule
/*verilator lint_off DECLFILENAME*/
`include "axi/assign.svh"
`include "axi/typedef.svh"
// Interface wrapper for axi2mem
module axi2mem_wrap #(
parameter int unsigned AddrWidth = 0,
parameter int unsigned DataWidth = 0,
parameter int unsigned IdWidth = 0,
parameter int unsigned UserWidth = 0,
parameter int unsigned NumBanks = 0,
parameter int unsigned BufDepth = 1, // depth of memory response buffer
// Dependent parameters, do not override.
localparam type addr_t = logic [AddrWidth-1:0],
localparam type mem_atop_t = logic [5:0],
localparam type mem_data_t = logic [DataWidth/NumBanks-1:0],
localparam type mem_strb_t = logic [DataWidth/NumBanks/8-1:0]
) (
input logic clk_i,
input logic rst_ni,
output logic busy_o,
AXI_BUS.Slave slv,
output logic [NumBanks-1:0] mem_req_o,
input logic [NumBanks-1:0] mem_gnt_i,
output addr_t [NumBanks-1:0] mem_addr_o, // byte address
output mem_data_t [NumBanks-1:0] mem_wdata_o, // write data
output mem_strb_t [NumBanks-1:0] mem_strb_o, // byte-wise strobe
output mem_atop_t [NumBanks-1:0] mem_atop_o, // atomic operation
output logic [NumBanks-1:0] mem_we_o, // write enable
input logic [NumBanks-1:0] mem_rvalid_i, // response valid
input mem_data_t [NumBanks-1:0] mem_rdata_i // read data
);
typedef logic [IdWidth-1:0] id_t;
typedef logic [DataWidth-1:0] data_t;
typedef logic [DataWidth/8-1:0] strb_t;
typedef logic [UserWidth-1:0] user_t;
`AXI_TYPEDEF_AW_CHAN_T ( aw_chan_t, addr_t, id_t, user_t);
`AXI_TYPEDEF_W_CHAN_T ( w_chan_t, data_t, strb_t, user_t);
`AXI_TYPEDEF_B_CHAN_T ( b_chan_t, id_t, user_t);
`AXI_TYPEDEF_AR_CHAN_T ( ar_chan_t, addr_t, id_t, user_t);
`AXI_TYPEDEF_R_CHAN_T ( r_chan_t, data_t, id_t, user_t);
`AXI_TYPEDEF_REQ_T ( req_t, aw_chan_t, w_chan_t, ar_chan_t);
`AXI_TYPEDEF_RESP_T ( resp_t, b_chan_t, r_chan_t);
req_t req;
resp_t resp;
`AXI_ASSIGN_TO_REQ (req, slv);
`AXI_ASSIGN_FROM_RESP (slv, resp);
axi2mem #(
.axi_req_t (req_t),
.axi_resp_t (resp_t),
.AddrWidth (AddrWidth),
.DataWidth (DataWidth),
.IdWidth (IdWidth),
.NumBanks (NumBanks),
.BufDepth (BufDepth)
) i_axi2mem (
.clk_i,
.rst_ni,
.busy_o,
.axi_req_i (req),
.axi_resp_o (resp),
.mem_req_o,
.mem_gnt_i,
.mem_addr_o,
.mem_wdata_o,
.mem_strb_o,
.mem_atop_o,
.mem_we_o,
.mem_rvalid_i,
.mem_rdata_i
);
endmodule
// Split memory access over multiple parallel banks, where each bank has its own req/gnt request and
// valid response direction.
module mem2banks #(
parameter int unsigned AddrWidth = 0, // input address width
parameter int unsigned DataWidth = 0, // input data width, must be a power of two
parameter int unsigned NumBanks = 0, // number of banks at output, must evenly divide the data
// width
// Dependent parameters, do not override.
localparam type addr_t = logic [AddrWidth-1:0],
localparam type atop_t = logic [5:0],
localparam type inp_data_t = logic [DataWidth-1:0],
localparam type inp_strb_t = logic [DataWidth/8-1:0],
localparam type oup_data_t = logic [DataWidth/NumBanks-1:0],
localparam type oup_strb_t = logic [DataWidth/NumBanks/8-1:0]
) (
input logic clk_i,
input logic rst_ni,
input logic req_i,
output logic gnt_o,
input addr_t addr_i,
input inp_data_t wdata_i,
input inp_strb_t strb_i,
input atop_t atop_i,
input logic we_i,
output logic rvalid_o,
output inp_data_t rdata_o,
output logic [NumBanks-1:0] bank_req_o,
input logic [NumBanks-1:0] bank_gnt_i,
output addr_t [NumBanks-1:0] bank_addr_o,
output oup_data_t [NumBanks-1:0] bank_wdata_o,
output oup_strb_t [NumBanks-1:0] bank_strb_o,
output atop_t [NumBanks-1:0] bank_atop_o,
output logic [NumBanks-1:0] bank_we_o,
input logic [NumBanks-1:0] bank_rvalid_i,
input oup_data_t [NumBanks-1:0] bank_rdata_i
);
localparam DataBytes = $bits(inp_strb_t);
localparam BitsPerBank = $bits(oup_data_t);
localparam BytesPerBank = $bits(oup_strb_t);
typedef struct packed {
addr_t addr;
oup_data_t wdata;
oup_strb_t strb;
atop_t atop;
logic we;
} req_t;
logic req_valid;
logic [NumBanks-1:0] req_ready,
resp_valid, resp_ready;
req_t [NumBanks-1:0] bank_req,
bank_oup;
function automatic addr_t align_addr(input addr_t addr);
return (addr >> $clog2(DataBytes)) << $clog2(DataBytes);
endfunction
// Handle requests.
assign req_valid = req_i & gnt_o;
for (genvar i = 0; i < NumBanks; i++) begin : gen_reqs
assign bank_req[i].addr = align_addr(addr_i) + i * BytesPerBank;
assign bank_req[i].wdata = wdata_i[i*BitsPerBank+:BitsPerBank];
assign bank_req[i].strb = strb_i[i*BytesPerBank+:BytesPerBank];
assign bank_req[i].atop = atop_i;
assign bank_req[i].we = we_i;
fall_through_register #(
.T (req_t)
) i_ft_reg (
.clk_i,
.rst_ni,
.clr_i (1'b0),
.testmode_i (1'b0),
.valid_i (req_valid),
.ready_o (req_ready[i]),
.data_i (bank_req[i]),
.valid_o (bank_req_o[i]),
.ready_i (bank_gnt_i[i]),
.data_o (bank_oup[i])
);
assign bank_addr_o[i] = bank_oup[i].addr;
assign bank_wdata_o[i] = bank_oup[i].wdata;
assign bank_strb_o[i] = bank_oup[i].strb;
assign bank_atop_o[i] = bank_oup[i].atop;
assign bank_we_o[i] = bank_oup[i].we;
end
// Grant output if all our requests have been granted.
assign gnt_o = (&req_ready) & (&resp_ready);
// Handle responses.
for (genvar i = 0; i < NumBanks; i++) begin : gen_resp_regs
fall_through_register #(
.T (oup_data_t)
) i_ft_reg (
.clk_i,
.rst_ni,
.clr_i (1'b0),
.testmode_i (1'b0),
.valid_i (bank_rvalid_i[i]),
.ready_o (resp_ready[i]),
.data_i (bank_rdata_i[i]),
.data_o (rdata_o[i*BitsPerBank+:BitsPerBank]),
.ready_i (rvalid_o),
.valid_o (resp_valid[i])
);
end
assign rvalid_o = &resp_valid;
// Assertions
`ifndef VERILATOR
`ifndef TARGET_SYNTHESIS
initial begin
assume (DataWidth != 0 && (DataWidth & (DataWidth - 1)) == 0)
else $fatal(1, "Data width must be a power of two!");
assume (DataWidth % NumBanks == 0)
else $fatal(1, "Data width must be evenly divisible over banks!");
assume ((DataWidth / NumBanks) % 8 == 0)
else $fatal(1, "Data width of each bank must be divisible into 8-bit bytes!");
end
`endif
`endif
endmodule
// Copyright (c) 2014-2018 ETH Zurich, University of Bologna
//
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Authors:
// - Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>
// - Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
// - Andreas Kurth <akurth@iis.ee.ethz.ch>
/// An AXI4 cut.
///
/// Breaks all combinatorial paths between its input and output.
module axi_cut #(
// bypass enable
parameter bit Bypass = 1'b0,
// AXI channel structs
parameter type aw_chan_t = logic,
parameter type w_chan_t = logic,
parameter type b_chan_t = logic,
parameter type ar_chan_t = logic,
parameter type r_chan_t = logic,
// AXI request & response structs
parameter type req_t = logic,
parameter type resp_t = logic
) (
input logic clk_i,
input logic rst_ni,
// salve port
input req_t slv_req_i,
output resp_t slv_resp_o,
// master port
output req_t mst_req_o,
input resp_t mst_resp_i
);
// a spill register for each channel
spill_register #(
.T ( aw_chan_t ),
.Bypass ( Bypass )
) i_reg_aw (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.valid_i ( slv_req_i.aw_valid ),
.ready_o ( slv_resp_o.aw_ready ),
.data_i ( slv_req_i.aw ),
.valid_o ( mst_req_o.aw_valid ),
.ready_i ( mst_resp_i.aw_ready ),
.data_o ( mst_req_o.aw )
);
spill_register #(
.T ( w_chan_t ),
.Bypass ( Bypass )
) i_reg_w (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.valid_i ( slv_req_i.w_valid ),
.ready_o ( slv_resp_o.w_ready ),
.data_i ( slv_req_i.w ),
.valid_o ( mst_req_o.w_valid ),
.ready_i ( mst_resp_i.w_ready ),
.data_o ( mst_req_o.w )
);
spill_register #(
.T ( b_chan_t ),
.Bypass ( Bypass )
) i_reg_b (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.valid_i ( mst_resp_i.b_valid ),
.ready_o ( mst_req_o.b_ready ),
.data_i ( mst_resp_i.b ),
.valid_o ( slv_resp_o.b_valid ),
.ready_i ( slv_req_i.b_ready ),
.data_o ( slv_resp_o.b )
);
spill_register #(
.T ( ar_chan_t ),
.Bypass ( Bypass )
) i_reg_ar (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.valid_i ( slv_req_i.ar_valid ),
.ready_o ( slv_resp_o.ar_ready ),
.data_i ( slv_req_i.ar ),
.valid_o ( mst_req_o.ar_valid ),
.ready_i ( mst_resp_i.ar_ready ),
.data_o ( mst_req_o.ar )
);
spill_register #(
.T ( r_chan_t ),
.Bypass ( Bypass )
) i_reg_r (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.valid_i ( mst_resp_i.r_valid ),
.ready_o ( mst_req_o.r_ready ),
.data_i ( mst_resp_i.r ),
.valid_o ( slv_resp_o.r_valid ),
.ready_i ( slv_req_i.r_ready ),
.data_o ( slv_resp_o.r )
);
endmodule
`include "axi/assign.svh"
`include "axi/typedef.svh"
// interface wrapper
module axi_cut_intf #(
// Bypass eneable
parameter bit BYPASS = 1'b0,
// The address width.
parameter int unsigned ADDR_WIDTH = 0,
// The data width.
parameter int unsigned DATA_WIDTH = 0,
// The ID width.
parameter int unsigned ID_WIDTH = 0,
// The user data width.
parameter int unsigned USER_WIDTH = 0
) (
input logic clk_i ,
input logic rst_ni ,
AXI_BUS.Slave in ,
AXI_BUS.Master out
);
typedef logic [ID_WIDTH-1:0] id_t;
typedef logic [ADDR_WIDTH-1:0] addr_t;
typedef logic [DATA_WIDTH-1:0] data_t;
typedef logic [DATA_WIDTH/8-1:0] strb_t;
typedef logic [USER_WIDTH-1:0] user_t;
`AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t)
`AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t)
`AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t)
`AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t)
`AXI_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t)
`AXI_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t)
`AXI_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t)
req_t slv_req, mst_req;
resp_t slv_resp, mst_resp;
`AXI_ASSIGN_TO_REQ(slv_req, in)
`AXI_ASSIGN_FROM_RESP(in, slv_resp)
`AXI_ASSIGN_FROM_REQ(out, mst_req)
`AXI_ASSIGN_TO_RESP(mst_resp, out)
axi_cut #(
.Bypass ( BYPASS ),
.aw_chan_t ( aw_chan_t ),
.w_chan_t ( w_chan_t ),
.b_chan_t ( b_chan_t ),
.ar_chan_t ( ar_chan_t ),
.r_chan_t ( r_chan_t ),
.req_t ( req_t ),
.resp_t ( resp_t )
) i_axi_cut (
.clk_i,
.rst_ni,
.slv_req_i ( slv_req ),
.slv_resp_o ( slv_resp ),
.mst_req_o ( mst_req ),
.mst_resp_i ( mst_resp )
);
// Check the invariants.
// pragma translate_off
`ifndef VERILATOR
initial begin
assert (ADDR_WIDTH > 0) else $fatal(1, "Wrong addr width parameter");
assert (DATA_WIDTH > 0) else $fatal(1, "Wrong data width parameter");
assert (ID_WIDTH > 0) else $fatal(1, "Wrong id width parameter");
assert (USER_WIDTH > 0) else $fatal(1, "Wrong user width parameter");
assert (in.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition");
assert (in.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition");
assert (in.AXI_ID_WIDTH == ID_WIDTH) else $fatal(1, "Wrong interface definition");
assert (in.AXI_USER_WIDTH == USER_WIDTH) else $fatal(1, "Wrong interface definition");
assert (out.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition");
assert (out.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition");
assert (out.AXI_ID_WIDTH == ID_WIDTH) else $fatal(1, "Wrong interface definition");
assert (out.AXI_USER_WIDTH == USER_WIDTH) else $fatal(1, "Wrong interface definition");
end
`endif
// pragma translate_on
endmodule
module axi_lite_cut_intf #(
// bypass enable
parameter bit BYPASS = 1'b0,
/// The address width.
parameter int unsigned ADDR_WIDTH = 0,
/// The data width.
parameter int unsigned DATA_WIDTH = 0
) (
input logic clk_i ,
input logic rst_ni ,
AXI_LITE.Slave in ,
AXI_LITE.Master out
);
typedef logic [ADDR_WIDTH-1:0] addr_t;
typedef logic [DATA_WIDTH-1:0] data_t;
typedef logic [DATA_WIDTH/8-1:0] strb_t;
`AXI_LITE_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t)
`AXI_LITE_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t)
`AXI_LITE_TYPEDEF_B_CHAN_T(b_chan_t)
`AXI_LITE_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t)
`AXI_LITE_TYPEDEF_R_CHAN_T(r_chan_t, data_t)
`AXI_LITE_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t)
`AXI_LITE_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t)
req_t slv_req, mst_req;
resp_t slv_resp, mst_resp;
`AXI_LITE_ASSIGN_TO_REQ(slv_req, in)
`AXI_LITE_ASSIGN_FROM_RESP(in, slv_resp)
`AXI_LITE_ASSIGN_FROM_REQ(out, mst_req)
`AXI_LITE_ASSIGN_TO_RESP(mst_resp, out)
axi_cut #(
.Bypass ( BYPASS ),
.aw_chan_t ( aw_chan_t ),
.w_chan_t ( w_chan_t ),
.b_chan_t ( b_chan_t ),
.ar_chan_t ( ar_chan_t ),
.r_chan_t ( r_chan_t ),
.req_t ( req_t ),
.resp_t ( resp_t )
) i_axi_cut (
.clk_i,
.rst_ni,
.slv_req_i ( slv_req ),
.slv_resp_o ( slv_resp ),
.mst_req_o ( mst_req ),
.mst_resp_i ( mst_resp )
);
// Check the invariants.
// pragma translate_off
`ifndef VERILATOR
initial begin
assert (ADDR_WIDTH > 0) else $fatal(1, "Wrong addr width parameter");
assert (DATA_WIDTH > 0) else $fatal(1, "Wrong data width parameter");
assert (in.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition");
assert (in.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition");
assert (out.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition");
assert (out.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition");
end
`endif
// pragma translate_on
endmodule
// Copyright 2021 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
//
// Implement a hierarchical AXI interconnect. Below shows one level of the interconnect. This module
// recursively instantiates itself and creates a tree of interconnects, each with `NumPortsPerMux`
// slave ports.
//
// TODO: Add a configurable cache per level
//
// AXI Mux ID Width
// Converter
// |‾╲
// +-------->| ╲
// | + +-------+
// +-------->| M | | |
// | U |------->| > |--------->
// | X | | |
// | + +-------+
// +-------->| ╱
// |_╱
// Internal
// Slave type type Master type
module axi_hier_interco #(
parameter int unsigned NumSlvPorts = 0,
parameter int unsigned NumPortsPerMux = NumSlvPorts,
parameter int unsigned EnableCache = 1'b0,
parameter int unsigned AddrWidth = 0,
parameter int unsigned DataWidth = 0,
parameter int unsigned SlvIdWidth = 0,
parameter int unsigned MstIdWidth = 0,
parameter int unsigned UserWidth = 0,
parameter type slv_req_t = logic,
parameter type slv_resp_t = logic,
parameter type mst_req_t = logic,
parameter type mst_resp_t = logic
) (
input logic clk_i,
input logic rst_ni,
input logic test_i,
input slv_req_t [NumSlvPorts-1:0] slv_req_i,
output slv_resp_t [NumSlvPorts-1:0] slv_resp_o,
output mst_req_t mst_req_o,
input mst_resp_t mst_resp_i
);
////////////////
// Typedefs //
////////////////
localparam int unsigned IntIdWidth = SlvIdWidth + $clog2(NumSlvPorts);
typedef logic [AddrWidth-1:0] addr_t;
typedef logic [DataWidth-1:0] data_t;
typedef logic [DataWidth/8-1:0] strb_t;
typedef logic [SlvIdWidth-1:0] slv_id_t;
typedef logic [MstIdWidth-1:0] mst_id_t;
typedef logic [IntIdWidth-1:0] int_id_t;
typedef logic [UserWidth-1:0] user_t;
`include "axi/typedef.svh"
// Common AXI types
`AXI_TYPEDEF_W_CHAN_T(w_t, data_t, strb_t, user_t);
// Slave AXI types
`AXI_TYPEDEF_AW_CHAN_T(slv_aw_t, addr_t, slv_id_t, user_t);
`AXI_TYPEDEF_B_CHAN_T(slv_b_t, slv_id_t, user_t);
`AXI_TYPEDEF_AR_CHAN_T(slv_ar_t, addr_t, slv_id_t, user_t);
`AXI_TYPEDEF_R_CHAN_T(slv_r_t, data_t, slv_id_t, user_t);
// Intermediate AXI types
`AXI_TYPEDEF_AW_CHAN_T(int_aw_t, addr_t, int_id_t, user_t);
`AXI_TYPEDEF_B_CHAN_T(int_b_t, int_id_t, user_t);
`AXI_TYPEDEF_AR_CHAN_T(int_ar_t, addr_t, int_id_t, user_t);
`AXI_TYPEDEF_R_CHAN_T(int_r_t, data_t, int_id_t, user_t);
`AXI_TYPEDEF_REQ_T(int_req_t, int_aw_t, w_t, int_ar_t);
`AXI_TYPEDEF_RESP_T(int_resp_t, int_b_t, int_r_t );
///////////////
// Interco //
///////////////
// Recursive module to implement multiple hierarchy levels at once
if (NumSlvPorts <= NumPortsPerMux) begin : gen_axi_level_final
// Intermediate AXI channel
int_req_t int_req;
int_resp_t int_resp;
axi_mux #(
// AXI parameter and channel types
.SlvAxiIDWidth (SlvIdWidth ), // AXI ID width, slave ports
.slv_aw_chan_t (slv_aw_t ), // AW Channel Type, slave ports
.mst_aw_chan_t (int_aw_t ), // AW Channel Type, master port
.w_chan_t (w_t ), // W Channel Type, all ports
.slv_b_chan_t (slv_b_t ), // B Channel Type, slave ports
.mst_b_chan_t (int_b_t ), // B Channel Type, master port
.slv_ar_chan_t (slv_ar_t ), // AR Channel Type, slave ports
.mst_ar_chan_t (int_ar_t ), // AR Channel Type, master port
.slv_r_chan_t (slv_r_t ), // R Channel Type, slave ports
.mst_r_chan_t (int_r_t ), // R Channel Type, master port
.slv_req_t (slv_req_t ), // Slave port request type
.slv_resp_t (slv_resp_t ), // Slave port response type
.mst_req_t (int_req_t ), // Master ports request type
.mst_resp_t (int_resp_t ), // Master ports response type
.NoSlvPorts (NumSlvPorts), // Number of slave ports
// Maximum number of outstanding transactions per write
.MaxWTrans (8 ),
// If enabled, this multiplexer is purely combinatorial
.FallThrough (1'b0 ),
// add spill register on write master ports, adds a cycle latency on write channels
.SpillAw (1'b1 ),
.SpillW (1'b1 ),
.SpillB (1'b1 ),
// add spill register on read master ports, adds a cycle latency on read channels
.SpillAr (1'b1 ),
.SpillR (1'b1 )
) i_axi_mux (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.test_i (test_i ),
.slv_reqs_i (slv_req_i ),
.slv_resps_o (slv_resp_o),
.mst_req_o (int_req ),
.mst_resp_i (int_resp )
);
axi_id_remap #(
.AxiSlvPortIdWidth (IntIdWidth),
.AxiSlvPortMaxUniqIds (IntIdWidth),
.AxiMaxTxnsPerId (4 ),
.AxiMstPortIdWidth (MstIdWidth),
.slv_req_t (int_req_t ),
.slv_resp_t (int_resp_t),
.mst_req_t (mst_req_t ),
.mst_resp_t (mst_resp_t)
) i_axi_id_remap (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.slv_req_i (int_req ),
.slv_resp_o (int_resp ),
.mst_req_o (mst_req_o ),
.mst_resp_i (mst_resp_i)
);
// TODO: Implement cache
if (EnableCache[0])
$error("[axi_hier_interco] `EnableCache` not yet supported.");
// Check all the AXI widths
if ($bits(slv_req_i[0].aw.addr) != AddrWidth)
$error("[axi_hier_interco] `slv_req_i.aw.addr` does not match AddrWidth.");
if ($bits(slv_req_i[0].w.data) != DataWidth)
$error("[axi_hier_interco] `slv_req_i.w.data` does not match DataWidth.");
if ($bits(slv_req_i[0].aw.id) != SlvIdWidth)
$error("[axi_hier_interco] `slv_req_i.aw.id` does not match SlvIdWidth.");
if ($bits(slv_req_i[0].aw.user) != UserWidth)
$error("[axi_hier_interco] `slv_req_i.aw.user` does not match UserWidth.");
if ($bits(mst_req_o.aw.addr) != AddrWidth)
$error("[axi_hier_interco] `mst_req_o.aw.addr` does not match AddrWidth.");
if ($bits(mst_req_o.w.data) != DataWidth)
$error("[axi_hier_interco] `mst_req_o.w.data` does not match DataWidth.");
if ($bits(mst_req_o.aw.id) != MstIdWidth)
$error("[axi_hier_interco] `mst_req_o.aw.id` does not match MstIdWidth.");
if ($bits(mst_req_o.aw.user) != UserWidth)
$error("[axi_hier_interco] `mst_req_o.aw.user` does not match UserWidth.");
if ($bits(int_req.aw.addr) != AddrWidth)
$error("[axi_hier_interco] `int_req.aw.addr` does not match AddrWidth.");
if ($bits(int_req.w.data) != DataWidth)
$error("[axi_hier_interco] `int_req.w.data` does not match DataWidth.");
if ($bits(int_req.aw.id) != IntIdWidth)
$error("[axi_hier_interco] `int_req.aw.id` does not match IntIdWidth.");
if ($bits(int_req.aw.user) != UserWidth)
$error("[axi_hier_interco] `int_req.aw.user` does not match UserWidth.");
end else begin : gen_axi_level_recursive
// More than one level missing. --> Recursively call this module
// This level will contain `NumMuxes` interconnects
localparam int unsigned NumMuxes = NumSlvPorts / NumPortsPerMux;
slv_req_t [NumMuxes-1:0] int_req;
slv_resp_t [NumMuxes-1:0] int_resp;
for (genvar i = 0; i < NumMuxes; i++) begin : gen_axi_intercos
axi_hier_interco #(
.NumSlvPorts (NumPortsPerMux),
.NumPortsPerMux (NumPortsPerMux),
.EnableCache (EnableCache[0]),
.AddrWidth (AddrWidth ),
.DataWidth (DataWidth ),
.SlvIdWidth (SlvIdWidth ),
.MstIdWidth (SlvIdWidth ),
.UserWidth (UserWidth ),
.slv_req_t (slv_req_t ),
.slv_resp_t (slv_resp_t ),
.mst_req_t (slv_req_t ),
.mst_resp_t (slv_resp_t )
) i_axi_interco (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.test_i (test_i ),
.slv_req_i (slv_req_i[i*NumPortsPerMux +: NumPortsPerMux] ),
.slv_resp_o (slv_resp_o[i*NumPortsPerMux +: NumPortsPerMux]),
.mst_req_o (int_req[i] ),
.mst_resp_i (int_resp[i] )
);
end
axi_hier_interco #(
.NumSlvPorts (NumMuxes ),
.NumPortsPerMux (NumPortsPerMux),
.EnableCache (EnableCache>>1),
.AddrWidth (AddrWidth ),
.DataWidth (DataWidth ),
.SlvIdWidth (SlvIdWidth ),
.MstIdWidth (MstIdWidth ),
.UserWidth (UserWidth ),
.slv_req_t (slv_req_t ),
.slv_resp_t (slv_resp_t ),
.mst_req_t (mst_req_t ),
.mst_resp_t (mst_resp_t )
) i_axi_interco (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.test_i (test_i ),
.slv_req_i (int_req ),
.slv_resp_o (int_resp ),
.mst_req_o (mst_req_o ),
.mst_resp_i (mst_resp_i)
);
if (NumMuxes * NumPortsPerMux != NumSlvPorts)
$error("[axi_hier_interco] `NumSlvPorts mod NumPortsPerMux` must be 0.");
end
if (NumPortsPerMux <= 1)
$error("[axi_hier_interco] `NumPortsPerMux` must be bigger than 1.");
endmodule
// Copyright 2019 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Authors:
// - Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>
// - Andreas Kurth <akurth@iis.ee.ethz.ch>
// AXI ID Prepend: This module prepends/strips the MSB from the AXI IDs.
// Constraints enforced through assertions: ID width of slave and master port
module axi_id_prepend #(
parameter int unsigned NoBus = 1, // Can take multiple axi busses
parameter int unsigned AxiIdWidthSlvPort = 4, // AXI ID Width of the Slave Ports
parameter int unsigned AxiIdWidthMstPort = 6, // AXI ID Width of the Master Ports
parameter type slv_aw_chan_t = logic, // AW Channel Type for slv port
parameter type slv_w_chan_t = logic, // W Channel Type for slv port
parameter type slv_b_chan_t = logic, // B Channel Type for slv port
parameter type slv_ar_chan_t = logic, // AR Channel Type for slv port
parameter type slv_r_chan_t = logic, // R Channel Type for slv port
parameter type mst_aw_chan_t = logic, // AW Channel Type for mst port
parameter type mst_w_chan_t = logic, // W Channel Type for mst port
parameter type mst_b_chan_t = logic, // B Channel Type for mst port
parameter type mst_ar_chan_t = logic, // AR Channel Type for mst port
parameter type mst_r_chan_t = logic, // R Channel Type for mst port
// DEPENDENT PARAMETER DO NOT OVERWRITE!
parameter int unsigned PreIdWidth = AxiIdWidthMstPort - AxiIdWidthSlvPort
) (
input logic [PreIdWidth-1:0] pre_id_i, // ID to be prepended
// slave port (input), connect master modules here
// AW channel
input slv_aw_chan_t [NoBus-1:0] slv_aw_chans_i,
input logic [NoBus-1:0] slv_aw_valids_i,
output logic [NoBus-1:0] slv_aw_readies_o,
// W channel
input slv_w_chan_t [NoBus-1:0] slv_w_chans_i,
input logic [NoBus-1:0] slv_w_valids_i,
output logic [NoBus-1:0] slv_w_readies_o,
// B channel
output slv_b_chan_t [NoBus-1:0] slv_b_chans_o,
output logic [NoBus-1:0] slv_b_valids_o,
input logic [NoBus-1:0] slv_b_readies_i,
// AR channel
input slv_ar_chan_t [NoBus-1:0] slv_ar_chans_i,
input logic [NoBus-1:0] slv_ar_valids_i,
output logic [NoBus-1:0] slv_ar_readies_o,
// R channel
output slv_r_chan_t [NoBus-1:0] slv_r_chans_o,
output logic [NoBus-1:0] slv_r_valids_o,
input logic [NoBus-1:0] slv_r_readies_i,
// master ports (output), connect slave modules here
// AW channel
output mst_aw_chan_t [NoBus-1:0] mst_aw_chans_o,
output logic [NoBus-1:0] mst_aw_valids_o,
input logic [NoBus-1:0] mst_aw_readies_i,
// W channel
output mst_w_chan_t [NoBus-1:0] mst_w_chans_o,
output logic [NoBus-1:0] mst_w_valids_o,
input logic [NoBus-1:0] mst_w_readies_i,
// B channel
input mst_b_chan_t [NoBus-1:0] mst_b_chans_i,
input logic [NoBus-1:0] mst_b_valids_i,
output logic [NoBus-1:0] mst_b_readies_o,
// AR channel
output mst_ar_chan_t [NoBus-1:0] mst_ar_chans_o,
output logic [NoBus-1:0] mst_ar_valids_o,
input logic [NoBus-1:0] mst_ar_readies_i,
// R channel
input mst_r_chan_t [NoBus-1:0] mst_r_chans_i,
input logic [NoBus-1:0] mst_r_valids_i,
output logic [NoBus-1:0] mst_r_readies_o
);
// prepend the ID
for (genvar i = 0; i < NoBus; i++) begin : gen_id_prepend
if (PreIdWidth == 0) begin : gen_no_prepend
assign mst_aw_chans_o[i] = slv_aw_chans_i[i];
assign mst_ar_chans_o[i] = slv_ar_chans_i[i];
end else begin : gen_prepend
always_comb begin
mst_aw_chans_o[i] = slv_aw_chans_i[i];
mst_ar_chans_o[i] = slv_ar_chans_i[i];
mst_aw_chans_o[i].id = {pre_id_i, slv_aw_chans_i[i].id[AxiIdWidthSlvPort-1:0]};
mst_ar_chans_o[i].id = {pre_id_i, slv_ar_chans_i[i].id[AxiIdWidthSlvPort-1:0]};
end
end
// The ID is in the highest bits of the struct, so an assignment from a channel with a wide ID
// to a channel with a shorter ID correctly cuts the prepended ID.
assign slv_b_chans_o[i] = mst_b_chans_i[i];
assign slv_r_chans_o[i] = mst_r_chans_i[i];
end
// assign the handshaking's and w channel
assign mst_w_chans_o = slv_w_chans_i;
assign mst_aw_valids_o = slv_aw_valids_i;
assign slv_aw_readies_o = mst_aw_readies_i;
assign mst_w_valids_o = slv_w_valids_i;
assign slv_w_readies_o = mst_w_readies_i;
assign slv_b_valids_o = mst_b_valids_i;
assign mst_b_readies_o = slv_b_readies_i;
assign mst_ar_valids_o = slv_ar_valids_i;
assign slv_ar_readies_o = mst_ar_readies_i;
assign slv_r_valids_o = mst_r_valids_i;
assign mst_r_readies_o = slv_r_readies_i;
// pragma translate_off
`ifndef VERILATOR
initial begin : p_assert
assert(NoBus > 0)
else $fatal(1, "Input must be at least one element wide.");
assert(PreIdWidth == ($bits(mst_aw_chans_o[0].id) - $bits(slv_aw_chans_i[0].id)))
else $fatal(1, "Prepend ID Width must be: $bits(mst_aw_chans_o.id)-$bits(slv_aw_chans_i.id)");
assert ($bits(mst_aw_chans_o[0].id) > $bits(slv_aw_chans_i[0].id))
else $fatal(1, "The master AXI port has to have a wider ID than the slave port.");
end
aw_id : assert final(
mst_aw_chans_o[0].id[$bits(slv_aw_chans_i[0].id)-1:0] === slv_aw_chans_i[0].id)
else $fatal (1, "Something with the AW channel ID prepending went wrong.");
aw_addr : assert final(mst_aw_chans_o[0].addr === slv_aw_chans_i[0].addr)
else $fatal (1, "Something with the AW channel ID prepending went wrong.");
aw_len : assert final(mst_aw_chans_o[0].len === slv_aw_chans_i[0].len)
else $fatal (1, "Something with the AW channel ID prepending went wrong.");
aw_size : assert final(mst_aw_chans_o[0].size === slv_aw_chans_i[0].size)
else $fatal (1, "Something with the AW channel ID prepending went wrong.");
aw_qos : assert final(mst_aw_chans_o[0].qos === slv_aw_chans_i[0].qos)
else $fatal (1, "Something with the AW channel ID prepending went wrong.");
b_id : assert final(
mst_b_chans_i[0].id[$bits(slv_b_chans_o[0].id)-1:0] === slv_b_chans_o[0].id)
else $fatal (1, "Something with the B channel ID stripping went wrong.");
b_resp : assert final(mst_b_chans_i[0].resp === slv_b_chans_o[0].resp)
else $fatal (1, "Something with the B channel ID stripping went wrong.");
ar_id : assert final(
mst_ar_chans_o[0].id[$bits(slv_ar_chans_i[0].id)-1:0] === slv_ar_chans_i[0].id)
else $fatal (1, "Something with the AR channel ID prepending went wrong.");
ar_addr : assert final(mst_ar_chans_o[0].addr === slv_ar_chans_i[0].addr)
else $fatal (1, "Something with the AR channel ID prepending went wrong.");
ar_len : assert final(mst_ar_chans_o[0].len === slv_ar_chans_i[0].len)
else $fatal (1, "Something with the AR channel ID prepending went wrong.");
ar_size : assert final(mst_ar_chans_o[0].size === slv_ar_chans_i[0].size)
else $fatal (1, "Something with the AR channel ID prepending went wrong.");
ar_qos : assert final(mst_ar_chans_o[0].qos === slv_ar_chans_i[0].qos)
else $fatal (1, "Something with the AR channel ID prepending went wrong.");
r_id : assert final(mst_r_chans_i[0].id[$bits(slv_r_chans_o[0].id)-1:0] === slv_r_chans_o[0].id)
else $fatal (1, "Something with the R channel ID stripping went wrong.");
r_data : assert final(mst_r_chans_i[0].data === slv_r_chans_o[0].data)
else $fatal (1, "Something with the R channel ID stripping went wrong.");
r_resp : assert final(mst_r_chans_i[0].resp === slv_r_chans_o[0].resp)
else $fatal (1, "Something with the R channel ID stripping went wrong.");
`endif
// pragma translate_on
endmodule
// Copyright (c) 2019 ETH Zurich, University of Bologna
//
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Authors:
// - Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>
// - Andreas Kurth <akurth@iis.ee.ethz.ch>
// AXI Multiplexer: This module multiplexes the AXI4 slave ports down to one master port.
// The AXI IDs from the slave ports get extended with the respective slave port index.
// The extension width can be calculated with `$clog2(NoSlvPorts)`. This means the AXI
// ID for the master port has to be this `$clog2(NoSlvPorts)` wider than the ID for the
// slave ports.
// Responses are switched based on these bits. For example, with 4 slave ports
// a response with ID `6'b100110` will be forwarded to slave port 2 (`2'b10`).
// register macros
`include "common_cells/registers.svh"
module axi_mux #(
// AXI parameter and channel types
parameter int unsigned SlvAxiIDWidth = 32'd0, // AXI ID width, slave ports
parameter type slv_aw_chan_t = logic, // AW Channel Type, slave ports
parameter type mst_aw_chan_t = logic, // AW Channel Type, master port
parameter type w_chan_t = logic, // W Channel Type, all ports
parameter type slv_b_chan_t = logic, // B Channel Type, slave ports
parameter type mst_b_chan_t = logic, // B Channel Type, master port
parameter type slv_ar_chan_t = logic, // AR Channel Type, slave ports
parameter type mst_ar_chan_t = logic, // AR Channel Type, master port
parameter type slv_r_chan_t = logic, // R Channel Type, slave ports
parameter type mst_r_chan_t = logic, // R Channel Type, master port
parameter type slv_req_t = logic, // Slave port request type
parameter type slv_resp_t = logic, // Slave port response type
parameter type mst_req_t = logic, // Master ports request type
parameter type mst_resp_t = logic, // Master ports response type
parameter int unsigned NoSlvPorts = 32'd0, // Number of slave ports
// Maximum number of outstanding transactions per write
parameter int unsigned MaxWTrans = 32'd8,
// If enabled, this multiplexer is purely combinatorial
parameter bit FallThrough = 1'b0,
// add spill register on write master ports, adds a cycle latency on write channels
parameter bit SpillAw = 1'b1,
parameter bit SpillW = 1'b0,
parameter bit SpillB = 1'b0,
// add spill register on read master ports, adds a cycle latency on read channels
parameter bit SpillAr = 1'b1,
parameter bit SpillR = 1'b0
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic test_i, // Test Mode enable
// slave ports (AXI inputs), connect master modules here
input slv_req_t [NoSlvPorts-1:0] slv_reqs_i,
output slv_resp_t [NoSlvPorts-1:0] slv_resps_o,
// master port (AXI outputs), connect slave modules here
output mst_req_t mst_req_o,
input mst_resp_t mst_resp_i
);
localparam int unsigned MstIdxBits = $clog2(NoSlvPorts);
localparam int unsigned MstAxiIDWidth = SlvAxiIDWidth + MstIdxBits;
// pass through if only one slave port
if (NoSlvPorts == 32'h1) begin : gen_no_mux
assign mst_req_o = slv_reqs_i[0];
assign slv_resps_o[0] = mst_resp_i;
// other non degenerate cases
end else begin : gen_mux
typedef logic [MstIdxBits-1:0] switch_id_t;
// AXI channels between the ID prepend unit and the rest of the multiplexer
mst_aw_chan_t [NoSlvPorts-1:0] slv_aw_chans;
logic [NoSlvPorts-1:0] slv_aw_valids, slv_aw_readies;
w_chan_t [NoSlvPorts-1:0] slv_w_chans;
logic [NoSlvPorts-1:0] slv_w_valids, slv_w_readies;
mst_b_chan_t [NoSlvPorts-1:0] slv_b_chans;
logic [NoSlvPorts-1:0] slv_b_valids, slv_b_readies;
mst_ar_chan_t [NoSlvPorts-1:0] slv_ar_chans;
logic [NoSlvPorts-1:0] slv_ar_valids, slv_ar_readies;
mst_r_chan_t [NoSlvPorts-1:0] slv_r_chans;
logic [NoSlvPorts-1:0] slv_r_valids, slv_r_readies;
// These signals are all ID prepended
// AW channel
mst_aw_chan_t mst_aw_chan;
logic mst_aw_valid, mst_aw_ready;
// AW master handshake internal, so that we are able to stall, if w_fifo is full
logic aw_valid, aw_ready;
// FF to lock the AW valid signal, when a new arbitration decision is made the decision
// gets pushed into the W FIFO, when it now stalls prevent subsequent pushing
// This FF removes AW to W dependency
logic lock_aw_valid_d, lock_aw_valid_q;
logic load_aw_lock;
// signals for the FIFO that holds the last switching decision of the AW channel
logic w_fifo_full, w_fifo_empty;
logic w_fifo_push, w_fifo_pop;
switch_id_t w_fifo_data;
// W channel spill reg
w_chan_t mst_w_chan;
logic mst_w_valid, mst_w_ready;
// master ID in the b_id
switch_id_t switch_b_id;
// B channel spill reg
mst_b_chan_t mst_b_chan;
logic mst_b_valid;
// AR channel for when spill is enabled
mst_ar_chan_t mst_ar_chan;
logic ar_valid, ar_ready;
// master ID in the r_id
switch_id_t switch_r_id;
// R channel spill reg
mst_r_chan_t mst_r_chan;
logic mst_r_valid;
//--------------------------------------
// ID prepend for all slave ports
//--------------------------------------
for (genvar i = 0; i < NoSlvPorts; i++) begin : gen_id_prepend
axi_id_prepend #(
.NoBus ( 32'd1 ), // one AXI bus per slave port
.AxiIdWidthSlvPort( SlvAxiIDWidth ),
.AxiIdWidthMstPort( MstAxiIDWidth ),
.slv_aw_chan_t ( slv_aw_chan_t ),
.slv_w_chan_t ( w_chan_t ),
.slv_b_chan_t ( slv_b_chan_t ),
.slv_ar_chan_t ( slv_ar_chan_t ),
.slv_r_chan_t ( slv_r_chan_t ),
.mst_aw_chan_t ( mst_aw_chan_t ),
.mst_w_chan_t ( w_chan_t ),
.mst_b_chan_t ( mst_b_chan_t ),
.mst_ar_chan_t ( mst_ar_chan_t ),
.mst_r_chan_t ( mst_r_chan_t )
) i_id_prepend (
.pre_id_i ( switch_id_t'(i) ),
.slv_aw_chans_i ( slv_reqs_i[i].aw ),
.slv_aw_valids_i ( slv_reqs_i[i].aw_valid ),
.slv_aw_readies_o ( slv_resps_o[i].aw_ready ),
.slv_w_chans_i ( slv_reqs_i[i].w ),
.slv_w_valids_i ( slv_reqs_i[i].w_valid ),
.slv_w_readies_o ( slv_resps_o[i].w_ready ),
.slv_b_chans_o ( slv_resps_o[i].b ),
.slv_b_valids_o ( slv_resps_o[i].b_valid ),
.slv_b_readies_i ( slv_reqs_i[i].b_ready ),
.slv_ar_chans_i ( slv_reqs_i[i].ar ),
.slv_ar_valids_i ( slv_reqs_i[i].ar_valid ),
.slv_ar_readies_o ( slv_resps_o[i].ar_ready ),
.slv_r_chans_o ( slv_resps_o[i].r ),
.slv_r_valids_o ( slv_resps_o[i].r_valid ),
.slv_r_readies_i ( slv_reqs_i[i].r_ready ),
.mst_aw_chans_o ( slv_aw_chans[i] ),
.mst_aw_valids_o ( slv_aw_valids[i] ),
.mst_aw_readies_i ( slv_aw_readies[i] ),
.mst_w_chans_o ( slv_w_chans[i] ),
.mst_w_valids_o ( slv_w_valids[i] ),
.mst_w_readies_i ( slv_w_readies[i] ),
.mst_b_chans_i ( slv_b_chans[i] ),
.mst_b_valids_i ( slv_b_valids[i] ),
.mst_b_readies_o ( slv_b_readies[i] ),
.mst_ar_chans_o ( slv_ar_chans[i] ),
.mst_ar_valids_o ( slv_ar_valids[i] ),
.mst_ar_readies_i ( slv_ar_readies[i] ),
.mst_r_chans_i ( slv_r_chans[i] ),
.mst_r_valids_i ( slv_r_valids[i] ),
.mst_r_readies_o ( slv_r_readies[i] )
);
end
//--------------------------------------
// AW Channel
//--------------------------------------
rr_arb_tree #(
.NumIn ( NoSlvPorts ),
.DataType ( mst_aw_chan_t ),
.AxiVldRdy( 1'b1 ),
.LockIn ( 1'b1 )
) i_aw_arbiter (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i( 1'b0 ),
.rr_i ( '0 ),
.req_i ( slv_aw_valids ),
.gnt_o ( slv_aw_readies ),
.data_i ( slv_aw_chans ),
.gnt_i ( aw_ready ),
.req_o ( aw_valid ),
.data_o ( mst_aw_chan ),
.idx_o ( )
);
// control of the AW channel
always_comb begin
// default assignments
lock_aw_valid_d = lock_aw_valid_q;
load_aw_lock = 1'b0;
w_fifo_push = 1'b0;
mst_aw_valid = 1'b0;
aw_ready = 1'b0;
// had a downstream stall, be valid and send the AW along
if (lock_aw_valid_q) begin
mst_aw_valid = 1'b1;
// transaction
if (mst_aw_ready) begin
aw_ready = 1'b1;
lock_aw_valid_d = 1'b0;
load_aw_lock = 1'b1;
end
end else begin
if (!w_fifo_full && aw_valid) begin
mst_aw_valid = 1'b1;
w_fifo_push = 1'b1;
if (mst_aw_ready) begin
aw_ready = 1'b1;
end else begin
// go to lock if transaction not in this cycle
lock_aw_valid_d = 1'b1;
load_aw_lock = 1'b1;
end
end
end
end
`FFLARN(lock_aw_valid_q, lock_aw_valid_d, load_aw_lock, '0, clk_i, rst_ni)
fifo_v3 #(
.FALL_THROUGH ( FallThrough ),
.DEPTH ( MaxWTrans ),
.dtype ( switch_id_t )
) i_w_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i( test_i ),
.full_o ( w_fifo_full ),
.empty_o ( w_fifo_empty ),
.usage_o ( ),
.data_i ( mst_aw_chan.id[SlvAxiIDWidth+:MstIdxBits] ),
.push_i ( w_fifo_push ),
.data_o ( w_fifo_data ),
.pop_i ( w_fifo_pop )
);
spill_register #(
.T ( mst_aw_chan_t ),
.Bypass ( ~SpillAw ) // Param indicated that we want a spill reg
) i_aw_spill_reg (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.valid_i ( mst_aw_valid ),
.ready_o ( mst_aw_ready ),
.data_i ( mst_aw_chan ),
.valid_o ( mst_req_o.aw_valid ),
.ready_i ( mst_resp_i.aw_ready ),
.data_o ( mst_req_o.aw )
);
//--------------------------------------
// W Channel
//--------------------------------------
// multiplexer
assign mst_w_chan = slv_w_chans[w_fifo_data];
always_comb begin
// default assignments
mst_w_valid = 1'b0;
slv_w_readies = '0;
w_fifo_pop = 1'b0;
// control
if (!w_fifo_empty) begin
// connect the handshake
mst_w_valid = slv_w_valids[w_fifo_data];
slv_w_readies[w_fifo_data] = mst_w_ready;
// pop FIFO on a last transaction
w_fifo_pop = slv_w_valids[w_fifo_data] & mst_w_ready & mst_w_chan.last;
end
end
spill_register #(
.T ( w_chan_t ),
.Bypass ( ~SpillW )
) i_w_spill_reg (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.valid_i ( mst_w_valid ),
.ready_o ( mst_w_ready ),
.data_i ( mst_w_chan ),
.valid_o ( mst_req_o.w_valid ),
.ready_i ( mst_resp_i.w_ready ),
.data_o ( mst_req_o.w )
);
//--------------------------------------
// B Channel
//--------------------------------------
// replicate B channels
assign slv_b_chans = {NoSlvPorts{mst_b_chan}};
// control B channel handshake
assign switch_b_id = mst_b_chan.id[SlvAxiIDWidth+:MstIdxBits];
assign slv_b_valids = (mst_b_valid) ? (1 << switch_b_id) : '0;
spill_register #(
.T ( mst_b_chan_t ),
.Bypass ( ~SpillB )
) i_b_spill_reg (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.valid_i ( mst_resp_i.b_valid ),
.ready_o ( mst_req_o.b_ready ),
.data_i ( mst_resp_i.b ),
.valid_o ( mst_b_valid ),
.ready_i ( slv_b_readies[switch_b_id] ),
.data_o ( mst_b_chan )
);
//--------------------------------------
// AR Channel
//--------------------------------------
rr_arb_tree #(
.NumIn ( NoSlvPorts ),
.DataType ( mst_ar_chan_t ),
.AxiVldRdy( 1'b1 ),
.LockIn ( 1'b1 )
) i_ar_arbiter (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i( 1'b0 ),
.rr_i ( '0 ),
.req_i ( slv_ar_valids ),
.gnt_o ( slv_ar_readies ),
.data_i ( slv_ar_chans ),
.gnt_i ( ar_ready ),
.req_o ( ar_valid ),
.data_o ( mst_ar_chan ),
.idx_o ( )
);
spill_register #(
.T ( mst_ar_chan_t ),
.Bypass ( ~SpillAr )
) i_ar_spill_reg (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.valid_i ( ar_valid ),
.ready_o ( ar_ready ),
.data_i ( mst_ar_chan ),
.valid_o ( mst_req_o.ar_valid ),
.ready_i ( mst_resp_i.ar_ready ),
.data_o ( mst_req_o.ar )
);
//--------------------------------------
// R Channel
//--------------------------------------
// replicate R channels
assign slv_r_chans = {NoSlvPorts{mst_r_chan}};
// R channel handshake control
assign switch_r_id = mst_r_chan.id[SlvAxiIDWidth+:MstIdxBits];
assign slv_r_valids = (mst_r_valid) ? (1 << switch_r_id) : '0;
spill_register #(
.T ( mst_r_chan_t ),
.Bypass ( ~SpillR )
) i_r_spill_reg (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.valid_i ( mst_resp_i.r_valid ),
.ready_o ( mst_req_o.r_ready ),
.data_i ( mst_resp_i.r ),
.valid_o ( mst_r_valid ),
.ready_i ( slv_r_readies[switch_r_id] ),
.data_o ( mst_r_chan )
);
end
// pragma translate_off
`ifndef VERILATOR
initial begin
assert (SlvAxiIDWidth > 0) else $fatal(1, "AXI ID width of slave ports must be non-zero!");
assert (NoSlvPorts > 0) else $fatal(1, "Number of slave ports must be non-zero!");
assert (MaxWTrans > 0)
else $fatal(1, "Maximum number of outstanding writes must be non-zero!");
assert (MstAxiIDWidth >= SlvAxiIDWidth + $clog2(NoSlvPorts))
else $fatal(1, "AXI ID width of master ports must be wide enough to identify slave ports!");
// Assert ID widths (one slave is sufficient since they all have the same type).
assert ($unsigned($bits(slv_reqs_i[0].aw.id)) == SlvAxiIDWidth)
else $fatal(1, "ID width of AW channel of slave ports does not match parameter!");
assert ($unsigned($bits(slv_reqs_i[0].ar.id)) == SlvAxiIDWidth)
else $fatal(1, "ID width of AR channel of slave ports does not match parameter!");
assert ($unsigned($bits(slv_resps_o[0].b.id)) == SlvAxiIDWidth)
else $fatal(1, "ID width of B channel of slave ports does not match parameter!");
assert ($unsigned($bits(slv_resps_o[0].r.id)) == SlvAxiIDWidth)
else $fatal(1, "ID width of R channel of slave ports does not match parameter!");
assert ($unsigned($bits(mst_req_o.aw.id)) == MstAxiIDWidth)
else $fatal(1, "ID width of AW channel of master port is wrong!");
assert ($unsigned($bits(mst_req_o.ar.id)) == MstAxiIDWidth)
else $fatal(1, "ID width of AR channel of master port is wrong!");
assert ($unsigned($bits(mst_resp_i.b.id)) == MstAxiIDWidth)
else $fatal(1, "ID width of B channel of master port is wrong!");
assert ($unsigned($bits(mst_resp_i.r.id)) == MstAxiIDWidth)
else $fatal(1, "ID width of R channel of master port is wrong!");
end
`endif
// pragma translate_on
endmodule
// interface wrap
`include "axi/assign.svh"
`include "axi/typedef.svh"
module axi_mux_intf #(
parameter int unsigned SLV_AXI_ID_WIDTH = 32'd0, // Synopsys DC requires default value for params
parameter int unsigned MST_AXI_ID_WIDTH = 32'd0,
parameter int unsigned AXI_ADDR_WIDTH = 32'd0,
parameter int unsigned AXI_DATA_WIDTH = 32'd0,
parameter int unsigned AXI_USER_WIDTH = 32'd0,
parameter int unsigned NO_SLV_PORTS = 32'd0, // Number of slave ports
// Maximum number of outstanding transactions per write
parameter int unsigned MAX_W_TRANS = 32'd8,
// if enabled, this multiplexer is purely combinatorial
parameter bit FALL_THROUGH = 1'b0,
// add spill register on write master ports, adds a cycle latency on write channels
parameter bit SPILL_AW = 1'b1,
parameter bit SPILL_W = 1'b0,
parameter bit SPILL_B = 1'b0,
// add spill register on read master ports, adds a cycle latency on read channels
parameter bit SPILL_AR = 1'b1,
parameter bit SPILL_R = 1'b0
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic test_i, // Testmode enable
AXI_BUS.Slave slv [NO_SLV_PORTS-1:0], // slave ports
AXI_BUS.Master mst // master port
);
typedef logic [SLV_AXI_ID_WIDTH-1:0] slv_id_t;
typedef logic [MST_AXI_ID_WIDTH-1:0] mst_id_t;
typedef logic [AXI_ADDR_WIDTH -1:0] addr_t;
typedef logic [AXI_DATA_WIDTH-1:0] data_t;
typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t;
typedef logic [AXI_USER_WIDTH-1:0] user_t;
// channels typedef
`AXI_TYPEDEF_AW_CHAN_T(slv_aw_chan_t, addr_t, slv_id_t, user_t)
`AXI_TYPEDEF_AW_CHAN_T(mst_aw_chan_t, addr_t, mst_id_t, user_t)
`AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t)
`AXI_TYPEDEF_B_CHAN_T(slv_b_chan_t, slv_id_t, user_t)
`AXI_TYPEDEF_B_CHAN_T(mst_b_chan_t, mst_id_t, user_t)
`AXI_TYPEDEF_AR_CHAN_T(slv_ar_chan_t, addr_t, slv_id_t, user_t)
`AXI_TYPEDEF_AR_CHAN_T(mst_ar_chan_t, addr_t, mst_id_t, user_t)
`AXI_TYPEDEF_R_CHAN_T(slv_r_chan_t, data_t, slv_id_t, user_t)
`AXI_TYPEDEF_R_CHAN_T(mst_r_chan_t, data_t, mst_id_t, user_t)
`AXI_TYPEDEF_REQ_T(slv_req_t, slv_aw_chan_t, w_chan_t, slv_ar_chan_t)
`AXI_TYPEDEF_RESP_T(slv_resp_t, slv_b_chan_t, slv_r_chan_t)
`AXI_TYPEDEF_REQ_T(mst_req_t, mst_aw_chan_t, w_chan_t, mst_ar_chan_t)
`AXI_TYPEDEF_RESP_T(mst_resp_t, mst_b_chan_t, mst_r_chan_t)
slv_req_t [NO_SLV_PORTS-1:0] slv_reqs;
slv_resp_t [NO_SLV_PORTS-1:0] slv_resps;
mst_req_t mst_req;
mst_resp_t mst_resp;
for (genvar i = 0; i < NO_SLV_PORTS; i++) begin : gen_assign_slv_ports
`AXI_ASSIGN_TO_REQ(slv_reqs[i], slv[i])
`AXI_ASSIGN_FROM_RESP(slv[i], slv_resps[i])
end
`AXI_ASSIGN_FROM_REQ(mst, mst_req)
`AXI_ASSIGN_TO_RESP(mst_resp, mst)
axi_mux #(
.SlvAxiIDWidth ( SLV_AXI_ID_WIDTH ),
.slv_aw_chan_t ( slv_aw_chan_t ), // AW Channel Type, slave ports
.mst_aw_chan_t ( mst_aw_chan_t ), // AW Channel Type, master port
.w_chan_t ( w_chan_t ), // W Channel Type, all ports
.slv_b_chan_t ( slv_b_chan_t ), // B Channel Type, slave ports
.mst_b_chan_t ( mst_b_chan_t ), // B Channel Type, master port
.slv_ar_chan_t ( slv_ar_chan_t ), // AR Channel Type, slave ports
.mst_ar_chan_t ( mst_ar_chan_t ), // AR Channel Type, master port
.slv_r_chan_t ( slv_r_chan_t ), // R Channel Type, slave ports
.mst_r_chan_t ( mst_r_chan_t ), // R Channel Type, master port
.slv_req_t ( slv_req_t ),
.slv_resp_t ( slv_resp_t ),
.mst_req_t ( mst_req_t ),
.mst_resp_t ( mst_resp_t ),
.NoSlvPorts ( NO_SLV_PORTS ), // Number of slave ports
.MaxWTrans ( MAX_W_TRANS ),
.FallThrough ( FALL_THROUGH ),
.SpillAw ( SPILL_AW ),
.SpillW ( SPILL_W ),
.SpillB ( SPILL_B ),
.SpillAr ( SPILL_AR ),
.SpillR ( SPILL_R )
) i_axi_mux (
.clk_i ( clk_i ), // Clock
.rst_ni ( rst_ni ), // Asynchronous reset active low
.test_i ( test_i ), // Test Mode enable
.slv_reqs_i ( slv_reqs ),
.slv_resps_o ( slv_resps ),
.mst_req_o ( mst_req ),
.mst_resp_i ( mst_resp )
);
endmodule
// Copyright (c) 2014-2020 ETH Zurich, University of Bologna
//
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Authors:
// - Andreas Kurth <akurth@iis.ee.ethz.ch>
// - Florian Zaruba <zarubaf@iis.ee.ethz.ch>
// - Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>
// - Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
// - Matheus Cavalcante <matheusd@iis.ee.ethz.ch>
//! AXI Package
/// Contains all necessary type definitions, constants, and generally useful functions.
package axi_pkg;
/// AXI Transaction Burst Type.
typedef logic [1:0] burst_t;
/// AXI Transaction Response Type.
typedef logic [1:0] resp_t;
/// AXI Transaction Cacheability Type.
typedef logic [3:0] cache_t;
/// AXI Transaction Protection Type.
typedef logic [2:0] prot_t;
/// AXI Transaction Quality of Service Type.
typedef logic [3:0] qos_t;
/// AXI Transaction Region Type.
typedef logic [3:0] region_t;
/// AXI Transaction Length Type.
typedef logic [7:0] len_t;
/// AXI Transaction Size Type.
typedef logic [2:0] size_t;
/// AXI5 Atomic Operation Type.
typedef logic [5:0] atop_t; // atomic operations
/// AXI5 Non-Secure Address Identifier.
typedef logic [3:0] nsaid_t;
/// In a fixed burst:
/// - The address is the same for every transfer in the burst.
/// - The byte lanes that are valid are constant for all beats in the burst. However, within
/// those byte lanes, the actual bytes that have `wstrb` asserted can differ for each beat in
/// the burst.
/// This burst type is used for repeated accesses to the same location such as when loading or
/// emptying a FIFO.
localparam BURST_FIXED = 2'b00;
/// In an incrementing burst, the address for each transfer in the burst is an increment of the
/// address for the previous transfer. The increment value depends on the size of the transfer.
/// For example, the address for each transfer in a burst with a size of 4 bytes is the previous
/// address plus four.
/// This burst type is used for accesses to normal sequential memory.
localparam BURST_INCR = 2'b01;
/// A wrapping burst is similar to an incrementing burst, except that the address wraps around to
/// a lower address if an upper address limit is reached.
/// The following restrictions apply to wrapping bursts:
/// - The start address must be aligned to the size of each transfer.
/// - The length of the burst must be 2, 4, 8, or 16 transfers.
localparam BURST_WRAP = 2'b10;
/// Normal access success. Indicates that a normal access has been successful. Can also indicate
/// that an exclusive access has failed.
localparam RESP_OKAY = 2'b00;
/// Exclusive access okay. Indicates that either the read or write portion of an exclusive access
/// has been successful.
localparam RESP_EXOKAY = 2'b01;
/// Slave error. Used when the access has reached the slave successfully, but the slave wishes to
/// return an error condition to the originating master.
localparam RESP_SLVERR = 2'b10;
/// Decode error. Generated, typically by an interconnect component, to indicate that there is no
/// slave at the transaction address.
localparam RESP_DECERR = 2'b11;
/// When this bit is asserted, the interconnect, or any component, can delay the transaction
/// reaching its final destination for any number of cycles.
localparam CACHE_BUFFERABLE = 4'b0001;
/// When HIGH, Modifiable indicates that the characteristics of the transaction can be modified.
/// When Modifiable is LOW, the transaction is Non-modifiable.
localparam CACHE_MODIFIABLE = 4'b0010;
/// When this bit is asserted, read allocation of the transaction is recommended but is not
/// mandatory.
localparam CACHE_RD_ALLOC = 4'b0100;
/// When this bit is asserted, write allocation of the transaction is recommended but is not
/// mandatory.
localparam CACHE_WR_ALLOC = 4'b1000;
/// Maximum number of bytes per burst, as specified by `size` (see Table A3-2).
function automatic shortint unsigned num_bytes(size_t size);
return 1 << size;
endfunction
/// An overly long address type.
/// It lets us define functions that work generically for shorter addresses. We rely on the
/// synthesizer to optimize the unused bits away.
typedef logic [127:0] largest_addr_t;
/// Aligned address of burst (see A3-51).
function automatic largest_addr_t aligned_addr(largest_addr_t addr, size_t size);
return (addr >> size) << size;
endfunction
/// Warp boundary of a `BURST_WRAP` transfer (see A3-51).
/// This is the lowest address accessed within a wrapping burst.
/// This address is aligned to the size and length of the burst.
/// The length of a `BURST_WRAP` has to be 2, 4, 8, or 16 transfers.
function automatic largest_addr_t wrap_boundary (largest_addr_t addr, size_t size, len_t len);
largest_addr_t wrap_addr;
// pragma translate_off
`ifndef VERILATOR
assume (len == len_t'(4'b1) || len == len_t'(4'b11) || len == len_t'(4'b111) ||
len == len_t'(4'b1111)) else
$error("AXI BURST_WRAP with not allowed len of: %0h", len);
`endif
// pragma translate_on
// In A3-51 the wrap boundary is defined as:
// `Wrap_Boundary = (INT(Start_Address / (Number_Bytes × Burst_Length))) ×
// (Number_Bytes × Burst_Length)`
// Whereas the aligned address is defined as:
// `Aligned_Address = (INT(Start_Address / Number_Bytes)) × Number_Bytes`
// This leads to the wrap boundary using the same calculation as the aligned address, difference
// being the additional dependency on the burst length. The addition in the case statement
// is equal to the multiplication with `Burst_Length` as a shift (used by `aligned_addr`) is
// equivalent with multiplication and division by a power of two, which conveniently are the
// only allowed values for `len` of a `BURST_WRAP`.
unique case (len)
4'b1 : wrap_addr = (addr >> (unsigned'(size) + 1)) << (unsigned'(size) + 1); // multiply `Number_Bytes` by `2`
4'b11 : wrap_addr = (addr >> (unsigned'(size) + 2)) << (unsigned'(size) + 2); // multiply `Number_Bytes` by `4`
4'b111 : wrap_addr = (addr >> (unsigned'(size) + 3)) << (unsigned'(size) + 3); // multiply `Number_Bytes` by `8`
4'b1111 : wrap_addr = (addr >> (unsigned'(size) + 4)) << (unsigned'(size) + 4); // multiply `Number_Bytes` by `16`
default : wrap_addr = '0;
endcase
return wrap_addr;
endfunction
/// Address of beat (see A3-51).
function automatic largest_addr_t
beat_addr(largest_addr_t addr, size_t size, len_t len, burst_t burst, shortint unsigned i_beat);
largest_addr_t ret_addr = addr;
largest_addr_t wrp_bond = '0;
if (burst == BURST_WRAP) begin
// do not trigger the function if there is no wrapping burst, to prevent assumptions firing
wrp_bond = wrap_boundary(addr, size, len);
end
if (i_beat != 0 && burst != BURST_FIXED) begin
// From A3-51:
// For an INCR burst, and for a WRAP burst for which the address has not wrapped, this
// equation determines the address of any transfer after the first transfer in a burst:
// `Address_N = Aligned_Address + (N – 1) × Number_Bytes` (N counts from 1 to len!)
ret_addr = aligned_addr(addr, size) + i_beat * num_bytes(size);
// From A3-51:
// For a WRAP burst, if Address_N = Wrap_Boundary + (Number_Bytes × Burst_Length), then:
// * Use this equation for the current transfer:
// `Address_N = Wrap_Boundary`
// * Use this equation for any subsequent transfers:
// `Address_N = Start_Address + ((N – 1) × Number_Bytes) – (Number_Bytes × Burst_Length)`
// This means that the address calculation of a `BURST_WRAP` fundamentally works the same
// as for a `BURST_INC`, the difference is when the calculated address increments
// over the wrap threshold, the address wraps around by subtracting the accessed address
// space from the normal `BURST_INCR` address. The lower wrap boundary is equivalent to
// The wrap trigger condition minus the container size (`num_bytes(size) * (len + 1)`).
if (burst == BURST_WRAP && ret_addr >= wrp_bond + (num_bytes(size) * (len + 1))) begin
ret_addr = ret_addr - (num_bytes(size) * (len + 1));
end
end
return ret_addr;
endfunction
/// Index of lowest byte in beat (see A3-51).
function automatic shortint unsigned
beat_lower_byte(largest_addr_t addr, size_t size, len_t len, burst_t burst,
shortint unsigned strobe_width, shortint unsigned i_beat);
largest_addr_t _addr = beat_addr(addr, size, len, burst, i_beat);
return _addr - (_addr / strobe_width) * strobe_width;
endfunction
/// Index of highest byte in beat (see A3-51).
function automatic shortint unsigned
beat_upper_byte(largest_addr_t addr, size_t size, len_t len, burst_t burst,
shortint unsigned strobe_width, shortint unsigned i_beat);
if (i_beat == 0) begin
return aligned_addr(addr, size) + (num_bytes(size) - 1) - (addr / strobe_width) * strobe_width;
end else begin
return beat_lower_byte(addr, size, len, burst, strobe_width, i_beat) + num_bytes(size) - 1;
end
endfunction
/// Is the bufferable bit set?
function automatic logic bufferable(cache_t cache);
return |(cache & CACHE_BUFFERABLE);
endfunction
/// Is the modifiable bit set?
function automatic logic modifiable(cache_t cache);
return |(cache & CACHE_MODIFIABLE);
endfunction
/// Memory Type.
typedef enum logic [3:0] {
DEVICE_NONBUFFERABLE,
DEVICE_BUFFERABLE,
NORMAL_NONCACHEABLE_NONBUFFERABLE,
NORMAL_NONCACHEABLE_BUFFERABLE,
WTHRU_NOALLOCATE,
WTHRU_RALLOCATE,
WTHRU_WALLOCATE,
WTHRU_RWALLOCATE,
WBACK_NOALLOCATE,
WBACK_RALLOCATE,
WBACK_WALLOCATE,
WBACK_RWALLOCATE
} mem_type_t;
/// Create an `AR_CACHE` field from a `mem_type_t` type.
function automatic logic [3:0] get_arcache(mem_type_t mtype);
unique case (mtype)
DEVICE_NONBUFFERABLE : return 4'b0000;
DEVICE_BUFFERABLE : return 4'b0001;
NORMAL_NONCACHEABLE_NONBUFFERABLE : return 4'b0010;
NORMAL_NONCACHEABLE_BUFFERABLE : return 4'b0011;
WTHRU_NOALLOCATE : return 4'b1010;
WTHRU_RALLOCATE : return 4'b1110;
WTHRU_WALLOCATE : return 4'b1010;
WTHRU_RWALLOCATE : return 4'b1110;
WBACK_NOALLOCATE : return 4'b1011;
WBACK_RALLOCATE : return 4'b1111;
WBACK_WALLOCATE : return 4'b1011;
WBACK_RWALLOCATE : return 4'b1111;
endcase // mtype
endfunction
/// Create an `AW_CACHE` field from a `mem_type_t` type.
function automatic logic [3:0] get_awcache(mem_type_t mtype);
unique case (mtype)
DEVICE_NONBUFFERABLE : return 4'b0000;
DEVICE_BUFFERABLE : return 4'b0001;
NORMAL_NONCACHEABLE_NONBUFFERABLE : return 4'b0010;
NORMAL_NONCACHEABLE_BUFFERABLE : return 4'b0011;
WTHRU_NOALLOCATE : return 4'b0110;
WTHRU_RALLOCATE : return 4'b0110;
WTHRU_WALLOCATE : return 4'b1110;
WTHRU_RWALLOCATE : return 4'b1110;
WBACK_NOALLOCATE : return 4'b0111;
WBACK_RALLOCATE : return 4'b0111;
WBACK_WALLOCATE : return 4'b1111;
WBACK_RWALLOCATE : return 4'b1111;
endcase // mtype
endfunction
/// RESP precedence: DECERR > SLVERR > OKAY > EXOKAY. This is not defined in the AXI standard but
/// depends on the implementation. We consistently use the precedence above. Rationale:
/// - EXOKAY means an exclusive access was successful, whereas OKAY means it was not. Thus, if
/// OKAY and EXOKAY are to be merged, OKAY precedes because the exclusive access was not fully
/// successful.
/// - Both DECERR and SLVERR mean (part of) a transaction were unsuccessful, whereas OKAY means an
/// entire transaction was successful. Thus both DECERR and SLVERR precede OKAY.
/// - DECERR means (part of) a transactions could not be routed to a slave component, whereas
/// SLVERR means the transaction reached a slave component but lead to an error condition there.
/// Thus DECERR precedes SLVERR because DECERR happens earlier in the handling of a transaction.
function automatic resp_t resp_precedence(resp_t resp_a, resp_t resp_b);
unique case (resp_a)
RESP_OKAY: begin
// Any response except EXOKAY precedes OKAY.
if (resp_b == RESP_EXOKAY) begin
return resp_a;
end else begin
return resp_b;
end
end
RESP_EXOKAY: begin
// Any response precedes EXOKAY.
return resp_b;
end
RESP_SLVERR: begin
// Only DECERR precedes SLVERR.
if (resp_b == RESP_DECERR) begin
return resp_b;
end else begin
return resp_a;
end
end
RESP_DECERR: begin
// No response precedes DECERR.
return resp_a;
end
endcase
endfunction
// ATOP[5:0]
/// - Sends a single data value with an address.
/// - The target swaps the value at the addressed location with the data value that is supplied in
/// the transaction.
/// - The original data value at the addressed location is returned.
/// - Outbound data size is 1, 2, 4, or 8 bytes.
/// - Inbound data size is the same as the outbound data size.
localparam ATOP_ATOMICSWAP = 6'b110000;
/// - Sends two data values, the compare value and the swap value, to the addressed location.
/// The compare and swap values are of equal size.
/// - The data value at the addressed location is checked against the compare value:
/// - If the values match, the swap value is written to the addressed location.
/// - If the values do not match, the swap value is not written to the addressed location.
/// - The original data value at the addressed location is returned.
/// - Outbound data size is 2, 4, 8, 16, or 32 bytes.
/// - Inbound data size is half of the outbound data size because the outbound data contains both
/// compare and swap values, whereas the inbound data has only the original data value.
localparam ATOP_ATOMICCMP = 6'b110001;
// ATOP[5:4]
/// Perform no atomic operation.
localparam ATOP_NONE = 2'b00;
/// - Sends a single data value with an address and the atomic operation to be performed.
/// - The target performs the operation using the sent data and value at the addressed location as
/// operands.
/// - The result is stored in the address location.
/// - A single response is given without data.
/// - Outbound data size is 1, 2, 4, or 8 bytes.
localparam ATOP_ATOMICSTORE = 2'b01;
/// Sends a single data value with an address and the atomic operation to be performed.
/// - The original data value at the addressed location is returned.
/// - The target performs the operation using the sent data and value at the addressed location as
/// operands.
/// - The result is stored in the address location.
/// - Outbound data size is 1, 2, 4, or 8 bytes.
/// - Inbound data size is the same as the outbound data size.
localparam ATOP_ATOMICLOAD = 2'b10;
// ATOP[3]
/// For AtomicStore and AtomicLoad transactions `AWATOP[3]` indicates the endianness that is
/// required for the atomic operation. The value of `AWATOP[3]` applies to arithmetic operations
/// only and is ignored for bitwise logical operations.
/// When deasserted, this bit indicates that the operation is little-endian.
localparam ATOP_LITTLE_END = 1'b0;
/// When asserted, this bit indicates that the operation is big-endian.
localparam ATOP_BIG_END = 1'b1;
// ATOP[2:0]
/// The value in memory is added to the sent data and the result stored in memory.
localparam ATOP_ADD = 3'b000;
/// Every set bit in the sent data clears the corresponding bit of the data in memory.
localparam ATOP_CLR = 3'b001;
/// Bitwise exclusive OR of the sent data and value in memory.
localparam ATOP_EOR = 3'b010;
/// Every set bit in the sent data sets the corresponding bit of the data in memory.
localparam ATOP_SET = 3'b011;
/// The value stored in memory is the maximum of the existing value and sent data. This operation
/// assumes signed data.
localparam ATOP_SMAX = 3'b100;
/// The value stored in memory is the minimum of the existing value and sent data. This operation
/// assumes signed data.
localparam ATOP_SMIN = 3'b101;
/// The value stored in memory is the maximum of the existing value and sent data. This operation
/// assumes unsigned data.
localparam ATOP_UMAX = 3'b110;
/// The value stored in memory is the minimum of the existing value and sent data. This operation
/// assumes unsigned data.
localparam ATOP_UMIN = 3'b111;
// ATOP[5] == 1'b1 indicated that an atomic transaction has a read response
// Ussage eg: if (req_i.aw.atop[axi_pkg::ATOP_R_RESP]) begin
localparam ATOP_R_RESP = 32'd5;
// `xbar_latency_e` and `xbar_cfg_t` are documented in `doc/axi_xbar.md`.
/// Slice on Demux AW channel.
localparam logic [9:0] DemuxAw = (1 << 9);
/// Slice on Demux W channel.
localparam logic [9:0] DemuxW = (1 << 8);
/// Slice on Demux B channel.
localparam logic [9:0] DemuxB = (1 << 7);
/// Slice on Demux AR channel.
localparam logic [9:0] DemuxAr = (1 << 6);
/// Slice on Demux R channel.
localparam logic [9:0] DemuxR = (1 << 5);
/// Slice on Mux AW channel.
localparam logic [9:0] MuxAw = (1 << 4);
/// Slice on Mux W channel.
localparam logic [9:0] MuxW = (1 << 3);
/// Slice on Mux B channel.
localparam logic [9:0] MuxB = (1 << 2);
/// Slice on Mux AR channel.
localparam logic [9:0] MuxAr = (1 << 1);
/// Slice on Mux R channel.
localparam logic [9:0] MuxR = (1 << 0);
/// Latency configuration for `axi_xbar`.
typedef enum logic [9:0] {
NO_LATENCY = 10'b000_00_000_00,
CUT_SLV_AX = DemuxAw | DemuxAr,
CUT_MST_AX = MuxAw | MuxAr,
CUT_ALL_AX = DemuxAw | DemuxAr | MuxAw | MuxAr,
CUT_SLV_PORTS = DemuxAw | DemuxW | DemuxB | DemuxAr | DemuxR,
CUT_MST_PORTS = MuxAw | MuxW | MuxB | MuxAr | MuxR,
CUT_ALL_PORTS = 10'b111_11_111_11
} xbar_latency_e;
/// Configuration for `axi_xbar`.
typedef struct packed {
int unsigned NoSlvPorts;
int unsigned NoMstPorts;
int unsigned MaxMstTrans;
int unsigned MaxSlvTrans;
bit FallThrough;
xbar_latency_e LatencyMode;
int unsigned AxiIdWidthSlvPorts;
int unsigned AxiIdUsedSlvPorts;
bit UniqueIds;
int unsigned AxiAddrWidth;
int unsigned AxiDataWidth;
int unsigned NoAddrRules;
} xbar_cfg_t;
/// Commonly used rule types for `axi_xbar` (64-bit addresses).
typedef struct packed {
int unsigned idx;
logic [63:0] start_addr;
logic [63:0] end_addr;
} xbar_rule_64_t;
/// Commonly used rule types for `axi_xbar` (32-bit addresses).
typedef struct packed {
int unsigned idx;
logic [31:0] start_addr;
logic [31:0] end_addr;
} xbar_rule_32_t;
endpackage
// Copyright 2021 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
module axi_rab_wrap #(
// L1 Configuration
parameter int unsigned L1NumSlicesMemPool = 0,
parameter int unsigned L1NumSlicesHost = 0,
// L2 Configuration
parameter bit L2Enable = 1'b0,
parameter int unsigned L2NumSets = 0,
parameter int unsigned L2NumSetEntries = 0,
parameter int unsigned L2NumParVaRams = 0,
// Miss Handler FIFO Configuration
parameter int unsigned MhFifoDepth = 0,
// AXI Configuration
parameter int unsigned AxiAddrWidth = 0,
parameter int unsigned AxiLiteDataWidth = 0,
parameter int unsigned AxiDataWidth = 0,
parameter int unsigned AxiIdWidth = 0,
parameter int unsigned AxiUserWidth = 0,
// AXI types
parameter type axi_req_t = logic,
parameter type axi_resp_t = logic,
parameter type axi_lite_req_t = logic,
parameter type axi_lite_resp_t = logic
) (
input logic clk_i,
input logic rst_ni,
// Transactions coming from Mempool and going to Host
input axi_req_t from_mempool_req_i,
output axi_resp_t from_mempool_resp_o,
output logic from_mempool_miss_irq_o,
output logic from_mempool_multi_irq_o,
output logic from_mempool_prot_irq_o,
output axi_req_t to_host_req_o,
input axi_resp_t to_host_resp_i,
// Transactions coming from Host and going to Mempool
input axi_req_t from_host_req_i,
output axi_resp_t from_host_resp_o,
output logic from_host_miss_irq_o,
output logic from_host_multi_irq_o,
output logic from_host_prot_irq_o,
output axi_req_t to_mempool_req_o,
input axi_resp_t to_mempool_resp_i,
output logic mh_fifo_full_irq_o,
input axi_lite_req_t conf_req_i,
output axi_lite_resp_t conf_resp_o
);
// Truncate configuration address since the RAB will reject addresses outside its configuration space
// The RAB's configuration space is at most 2^16 bytes
logic [AxiAddrWidth-1:0] conf_req_aw_tuncated;
logic [AxiAddrWidth-1:0] conf_req_ar_tuncated;
assign conf_req_aw_tuncated = {{AxiAddrWidth-16{1'b0}}, conf_req_i.aw.addr[15:0]};
assign conf_req_ar_tuncated = {{AxiAddrWidth-16{1'b0}}, conf_req_i.ar.addr[15:0]};
axi_rab_top #(
.N_PORTS (2 ),
.N_L1_SLICES ('{0, 0, L1NumSlicesMemPool, L1NumSlicesHost} ),
.N_L1_SLICES_MAX (L1NumSlicesMemPool > L1NumSlicesHost ? L1NumSlicesMemPool : L1NumSlicesHost),
.EN_ACP (1'b0 ),
.ENABLE_L2TLB ('{1'b0, 1'b0, L2Enable, 1'b0} ),
.N_L2_SETS (L2NumSets ),
.N_L2_SET_ENTRIES (L2NumSetEntries ),
.N_L2_PAR_VA_RAMS (L2NumParVaRams ),
.AXI_DATA_WIDTH (AxiDataWidth ),
.AXI_S_ADDR_WIDTH (AxiAddrWidth ),
.AXI_M_ADDR_WIDTH (AxiAddrWidth ),
.AXI_LITE_DATA_WIDTH (AxiLiteDataWidth ),
.AXI_LITE_ADDR_WIDTH (AxiAddrWidth ),
.AXI_ID_WIDTH (AxiIdWidth ),
.AXI_USER_WIDTH (AxiUserWidth ),
.MH_FIFO_DEPTH (MhFifoDepth )
) i_rab (
.Clk_CI (clk_i ),
.NonGatedClk_CI (clk_i ),
.Rst_RBI (rst_ni ),
// AXI4 Slave {{{
.s_axi4_awid ({from_mempool_req_i.aw.id, from_host_req_i.aw.id }),
.s_axi4_awaddr ({from_mempool_req_i.aw.addr, from_host_req_i.aw.addr }),
.s_axi4_awvalid ({from_mempool_req_i.aw_valid, from_host_req_i.aw_valid }),
.s_axi4_awready ({from_mempool_resp_o.aw_ready, from_host_resp_o.aw_ready}),
.s_axi4_awlen ({from_mempool_req_i.aw.len, from_host_req_i.aw.len }),
.s_axi4_awsize ({from_mempool_req_i.aw.size, from_host_req_i.aw.size }),
.s_axi4_awburst ({from_mempool_req_i.aw.burst, from_host_req_i.aw.burst }),
.s_axi4_awlock ({from_mempool_req_i.aw.lock, from_host_req_i.aw.lock }),
.s_axi4_awprot ({from_mempool_req_i.aw.prot, from_host_req_i.aw.prot }),
.s_axi4_awatop ({from_mempool_req_i.aw.atop, from_host_req_i.aw.atop }),
.s_axi4_awcache ({from_mempool_req_i.aw.cache, from_host_req_i.aw.cache }),
.s_axi4_awregion ({from_mempool_req_i.aw.region, from_host_req_i.aw.region}),
.s_axi4_awqos ({from_mempool_req_i.aw.qos, from_host_req_i.aw.qos }),
.s_axi4_awuser ({from_mempool_req_i.aw.user, from_host_req_i.aw.user }),
.s_axi4_wdata ({from_mempool_req_i.w.data, from_host_req_i.w.data }),
.s_axi4_wvalid ({from_mempool_req_i.w_valid, from_host_req_i.w_valid }),
.s_axi4_wready ({from_mempool_resp_o.w_ready, from_host_resp_o.w_ready }),
.s_axi4_wstrb ({from_mempool_req_i.w.strb, from_host_req_i.w.strb }),
.s_axi4_wlast ({from_mempool_req_i.w.last, from_host_req_i.w.last }),
.s_axi4_wuser ({from_mempool_req_i.w.user, from_host_req_i.w.user }),
.s_axi4_bid ({from_mempool_resp_o.b.id, from_host_resp_o.b.id }),
.s_axi4_bresp ({from_mempool_resp_o.b.resp, from_host_resp_o.b.resp }),
.s_axi4_bvalid ({from_mempool_resp_o.b_valid, from_host_resp_o.b_valid }),
.s_axi4_buser ({from_mempool_resp_o.b.user, from_host_resp_o.b.user }),
.s_axi4_bready ({from_mempool_req_i.b_ready, from_host_req_i.b_ready }),
.s_axi4_arid ({from_mempool_req_i.ar.id, from_host_req_i.ar.id }),
.s_axi4_araddr ({from_mempool_req_i.ar.addr, from_host_req_i.ar.addr }),
.s_axi4_arvalid ({from_mempool_req_i.ar_valid, from_host_req_i.ar_valid }),
.s_axi4_arready ({from_mempool_resp_o.ar_ready, from_host_resp_o.ar_ready}),
.s_axi4_arlen ({from_mempool_req_i.ar.len, from_host_req_i.ar.len }),
.s_axi4_arsize ({from_mempool_req_i.ar.size, from_host_req_i.ar.size }),
.s_axi4_arburst ({from_mempool_req_i.ar.burst, from_host_req_i.ar.burst }),
.s_axi4_arlock ({from_mempool_req_i.ar.lock, from_host_req_i.ar.lock }),
.s_axi4_arprot ({from_mempool_req_i.ar.prot, from_host_req_i.ar.prot }),
.s_axi4_arcache ({from_mempool_req_i.ar.cache, from_host_req_i.ar.cache }),
.s_axi4_arregion ({from_mempool_req_i.ar.region, from_host_req_i.ar.region}),
.s_axi4_arqos ({from_mempool_req_i.ar.qos, from_host_req_i.ar.qos }),
.s_axi4_aruser ({from_mempool_req_i.ar.user, from_host_req_i.ar.user }),
.s_axi4_rid ({from_mempool_resp_o.r.id, from_host_resp_o.r.id }),
.s_axi4_rdata ({from_mempool_resp_o.r.data, from_host_resp_o.r.data }),
.s_axi4_rresp ({from_mempool_resp_o.r.resp, from_host_resp_o.r.resp }),
.s_axi4_rvalid ({from_mempool_resp_o.r_valid, from_host_resp_o.r_valid }),
.s_axi4_rready ({from_mempool_req_i.r_ready, from_host_req_i.r_ready }),
.s_axi4_rlast ({from_mempool_resp_o.r.last, from_host_resp_o.r.last }),
.s_axi4_ruser ({from_mempool_resp_o.r.user, from_host_resp_o.r.user }),
// }}}
// AXI4 Master 0 {{{
.m0_axi4_awid ({to_host_req_o.aw.id, to_mempool_req_o.aw.id }),
.m0_axi4_awaddr ({to_host_req_o.aw.addr, to_mempool_req_o.aw.addr }),
.m0_axi4_awvalid ({to_host_req_o.aw_valid, to_mempool_req_o.aw_valid }),
.m0_axi4_awready ({to_host_resp_i.aw_ready, to_mempool_resp_i.aw_ready }),
.m0_axi4_awlen ({to_host_req_o.aw.len, to_mempool_req_o.aw.len }),
.m0_axi4_awsize ({to_host_req_o.aw.size, to_mempool_req_o.aw.size }),
.m0_axi4_awburst ({to_host_req_o.aw.burst, to_mempool_req_o.aw.burst }),
.m0_axi4_awlock ({to_host_req_o.aw.lock, to_mempool_req_o.aw.lock }),
.m0_axi4_awprot ({to_host_req_o.aw.prot, to_mempool_req_o.aw.prot }),
.m0_axi4_awatop ({to_host_req_o.aw.atop, to_mempool_req_o.aw.atop }),
.m0_axi4_awcache ({to_host_req_o.aw.cache, to_mempool_req_o.aw.cache }),
.m0_axi4_awregion ({to_host_req_o.aw.region, to_mempool_req_o.aw.region }),
.m0_axi4_awqos ({to_host_req_o.aw.qos, to_mempool_req_o.aw.qos }),
.m0_axi4_awuser ({to_host_req_o.aw.user, to_mempool_req_o.aw.user }),
.m0_axi4_wdata ({to_host_req_o.w.data, to_mempool_req_o.w.data }),
.m0_axi4_wvalid ({to_host_req_o.w_valid, to_mempool_req_o.w_valid }),
.m0_axi4_wready ({to_host_resp_i.w_ready, to_mempool_resp_i.w_ready }),
.m0_axi4_wstrb ({to_host_req_o.w.strb, to_mempool_req_o.w.strb }),
.m0_axi4_wlast ({to_host_req_o.w.last, to_mempool_req_o.w.last }),
.m0_axi4_wuser ({to_host_req_o.w.user, to_mempool_req_o.w.user }),
.m0_axi4_bid ({to_host_resp_i.b.id, to_mempool_resp_i.b.id }),
.m0_axi4_bresp ({to_host_resp_i.b.resp, to_mempool_resp_i.b.resp }),
.m0_axi4_bvalid ({to_host_resp_i.b_valid, to_mempool_resp_i.b_valid }),
.m0_axi4_buser ({to_host_resp_i.b.user, to_mempool_resp_i.b.user }),
.m0_axi4_bready ({to_host_req_o.b_ready, to_mempool_req_o.b_ready }),
.m0_axi4_arid ({to_host_req_o.ar.id, to_mempool_req_o.ar.id }),
.m0_axi4_araddr ({to_host_req_o.ar.addr, to_mempool_req_o.ar.addr }),
.m0_axi4_arvalid ({to_host_req_o.ar_valid, to_mempool_req_o.ar_valid }),
.m0_axi4_arready ({to_host_resp_i.ar_ready, to_mempool_resp_i.ar_ready }),
.m0_axi4_arlen ({to_host_req_o.ar.len, to_mempool_req_o.ar.len }),
.m0_axi4_arsize ({to_host_req_o.ar.size, to_mempool_req_o.ar.size }),
.m0_axi4_arburst ({to_host_req_o.ar.burst, to_mempool_req_o.ar.burst }),
.m0_axi4_arlock ({to_host_req_o.ar.lock, to_mempool_req_o.ar.lock }),
.m0_axi4_arprot ({to_host_req_o.ar.prot, to_mempool_req_o.ar.prot }),
.m0_axi4_arcache ({to_host_req_o.ar.cache, to_mempool_req_o.ar.cache }),
.m0_axi4_arregion ({to_host_req_o.ar.region, to_mempool_req_o.ar.region }),
.m0_axi4_arqos ({to_host_req_o.ar.qos, to_mempool_req_o.ar.qos }),
.m0_axi4_aruser ({to_host_req_o.ar.user, to_mempool_req_o.ar.user }),
.m0_axi4_rid ({to_host_resp_i.r.id, to_mempool_resp_i.r.id }),
.m0_axi4_rdata ({to_host_resp_i.r.data, to_mempool_resp_i.r.data }),
.m0_axi4_rresp ({to_host_resp_i.r.resp, to_mempool_resp_i.r.resp }),
.m0_axi4_rvalid ({to_host_resp_i.r_valid, to_mempool_resp_i.r_valid }),
.m0_axi4_rready ({to_host_req_o.r_ready, to_mempool_req_o.r_ready }),
.m0_axi4_rlast ({to_host_resp_i.r.last, to_mempool_resp_i.r.last }),
.m0_axi4_ruser ({to_host_resp_i.r.user, to_mempool_resp_i.r.user }),
// }}}
// AXI4 Master 1 {{{
.m1_axi4_awid (/* unused */),
.m1_axi4_awaddr (/* unused */),
.m1_axi4_awvalid (/* unused */),
.m1_axi4_awready ('0 ),
.m1_axi4_awlen (/* unused */),
.m1_axi4_awsize (/* unused */),
.m1_axi4_awburst (/* unused */),
.m1_axi4_awlock (/* unused */),
.m1_axi4_awprot (/* unused */),
.m1_axi4_awatop (/* unused */),
.m1_axi4_awcache (/* unused */),
.m1_axi4_awregion (/* unused */),
.m1_axi4_awqos (/* unused */),
.m1_axi4_awuser (/* unused */),
.m1_axi4_wdata (/* unused */),
.m1_axi4_wvalid (/* unused */),
.m1_axi4_wready ('0 ),
.m1_axi4_wstrb (/* unused */),
.m1_axi4_wlast (/* unused */),
.m1_axi4_wuser (/* unused */),
.m1_axi4_bid ('0 ),
.m1_axi4_bresp ('0 ),
.m1_axi4_bvalid ('0 ),
.m1_axi4_buser ('0 ),
.m1_axi4_bready (/* unused */),
.m1_axi4_arid (/* unused */),
.m1_axi4_araddr (/* unused */),
.m1_axi4_arvalid (/* unused */),
.m1_axi4_arready ('0 ),
.m1_axi4_arlen (/* unused */),
.m1_axi4_arsize (/* unused */),
.m1_axi4_arburst (/* unused */),
.m1_axi4_arlock (/* unused */),
.m1_axi4_arprot (/* unused */),
.m1_axi4_arcache (/* unused */),
.m1_axi4_arregion (/* unused */),
.m1_axi4_arqos (/* unused */),
.m1_axi4_aruser (/* unused */),
.m1_axi4_rid ('0 ),
.m1_axi4_rdata ('0 ),
.m1_axi4_rresp ('0 ),
.m1_axi4_rvalid ('0 ),
.m1_axi4_rready (/* unused */),
.m1_axi4_rlast ('0 ),
.m1_axi4_ruser ('0 ),
// }}}
// AXI4 Lite Slave (Configuration Interface) {{{
.s_axi4lite_awaddr (conf_req_aw_tuncated),
.s_axi4lite_awvalid (conf_req_i.aw_valid ),
.s_axi4lite_awready (conf_resp_o.aw_ready),
.s_axi4lite_wdata (conf_req_i.w.data ),
.s_axi4lite_wvalid (conf_req_i.w_valid ),
.s_axi4lite_wready (conf_resp_o.w_ready ),
.s_axi4lite_wstrb (conf_req_i.w.strb ),
.s_axi4lite_bresp (conf_resp_o.b.resp ),
.s_axi4lite_bvalid (conf_resp_o.b_valid ),
.s_axi4lite_bready (conf_req_i.b_ready ),
.s_axi4lite_araddr (conf_req_ar_tuncated),
.s_axi4lite_arvalid (conf_req_i.ar_valid ),
.s_axi4lite_arready (conf_resp_o.ar_ready),
.s_axi4lite_rdata (conf_resp_o.r.data ),
.s_axi4lite_rresp (conf_resp_o.r.resp ),
.s_axi4lite_rvalid (conf_resp_o.r_valid ),
.s_axi4lite_rready (conf_req_i.r_ready ),
// }}}
.int_miss ({from_mempool_miss_irq_o, from_host_miss_irq_o }),
.int_multi ({from_mempool_multi_irq_o, from_host_multi_irq_o}),
.int_prot ({from_mempool_prot_irq_o, from_host_prot_irq_o }),
.int_mhf_full (mh_fifo_full_irq_o )
);
endmodule
// Copyright 2021 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
//
// Description: Automatically generated bootrom
//
// Generated by hardware/scripts/generate_bootrom.py
module bootrom #(
/* Automatically generated. DO NOT CHANGE! */
parameter int unsigned DataWidth = 128,
parameter int unsigned AddrWidth = 32
) (
input logic clk_i,
input logic req_i,
input logic [AddrWidth-1:0] addr_i,
output logic [DataWidth-1:0] rdata_o
);
localparam int RomSize = 1;
localparam int AddrBits = RomSize > 1 ? $clog2(RomSize) : 1;
const logic [RomSize-1:0][DataWidth-1:0] mem = {
128'h00050067_10500073_00050513_e0000517
};
logic [AddrBits-1:0] addr_q;
always_ff @(posedge clk_i) begin
if (req_i) begin
addr_q <= addr_i[AddrBits-1+4:4];
end
end
// this prevents spurious Xes from propagating into
// the speculative fetch stage of the core
assign rdata_o = (addr_q < RomSize) ? mem[addr_q] : '0;
endmodule
// Copyright 2016 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
/// cf_math_pkg: Constant Function Implementations of Mathematical Functions for HDL Elaboration
///
/// This package contains a collection of mathematical functions that are commonly used when defining
/// the value of constants in HDL code. These functions are implemented as Verilog constants
/// functions. Introduced in Verilog 2001 (IEEE Std 1364-2001), a constant function (§ 10.3.5) is a
/// function whose value can be evaluated at compile time or during elaboration. A constant function
/// must be called with arguments that are constants.
package cf_math_pkg;
/// Ceiled Division of Two Natural Numbers
///
/// Returns the quotient of two natural numbers, rounded towards plus infinity.
function automatic integer ceil_div (input longint dividend, input longint divisor);
automatic longint remainder;
// pragma translate_off
`ifndef VERILATOR
if (dividend < 0) begin
$fatal(1, "Dividend %0d is not a natural number!", dividend);
end
if (divisor < 0) begin
$fatal(1, "Divisor %0d is not a natural number!", divisor);
end
if (divisor == 0) begin
$fatal(1, "Division by zero!");
end
`endif
// pragma translate_on
remainder = dividend;
for (ceil_div = 0; remainder > 0; ceil_div++) begin
remainder = remainder - divisor;
end
endfunction
/// Index width required to be able to represent up to `num_idx` indices as a binary
/// encoded signal.
/// Ensures that the minimum width if an index signal is `1`, regardless of parametrization.
///
/// Sample usage in type definition:
/// As parameter:
/// `parameter type idx_t = logic[cf_math_pkg::idx_width(NumIdx)-1:0]`
/// As typedef:
/// `typedef logic [cf_math_pkg::idx_width(NumIdx)-1:0] idx_t`
function automatic integer unsigned idx_width (input integer unsigned num_idx);
return (num_idx > 32'd1) ? unsigned'($clog2(num_idx)) : 32'd1;
endfunction
endpackage
// Copyright 2021 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
`include "common_cells/registers.svh"
module ctrl_registers #(
parameter int DataWidth = 32,
parameter int NumRegs = 0,
// Parameters
parameter logic [DataWidth-1:0] TCDMBaseAddr = 0,
parameter logic [DataWidth-1:0] TCDMSize = 0,
parameter logic [DataWidth-1:0] NumCores = 0,
// AXI Structs
parameter type axi_lite_req_t = logic,
parameter type axi_lite_resp_t = logic
) (
input logic clk_i,
input logic rst_ni,
// AXI Bus
input axi_lite_req_t axi_lite_slave_req_i,
output axi_lite_resp_t axi_lite_slave_resp_o,
// Control registers
output logic [DataWidth-1:0] eoc_o,
output logic eoc_valid_o,
output logic [NumCores-1:0] wake_up_o,
output logic [DataWidth-1:0] tcdm_start_address_o,
output logic [DataWidth-1:0] tcdm_end_address_o,
output logic [DataWidth-1:0] num_cores_o
);
import mempool_pkg::*;
/*****************
* Definitions *
*****************/
localparam int unsigned DataWidthInBytes = (DataWidth + 7) / 8;
localparam int unsigned RegNumBytes = NumRegs * DataWidthInBytes;
localparam int unsigned RegDataWidth = NumRegs * DataWidth;
localparam logic [DataWidthInBytes-1:0] ReadOnlyReg = {DataWidthInBytes{1'b1}};
localparam logic [DataWidthInBytes-1:0] ReadWriteReg = {DataWidthInBytes{1'b0}};
// Memory map
// [3:0]: eoc_reg (rw)
// [7:4]: wake_up_reg (rw)
// [11:8]: tcdm_start_adress_reg (ro)
// [15:12]:tcdm_end_address_reg (ro)
// [19:16]:nr_cores_address_reg (ro)
localparam logic [NumRegs-1:0][DataWidth-1:0] RegRstVal = '{
NumCores,
TCDMBaseAddr + TCDMSize,
TCDMBaseAddr,
{DataWidth{1'b0}},
{DataWidth{1'b0}}
};
localparam logic [NumRegs-1:0][DataWidthInBytes-1:0] AxiReadOnly = '{
ReadOnlyReg,
ReadOnlyReg,
ReadOnlyReg,
ReadWriteReg,
ReadWriteReg
};
/***************
* Registers *
***************/
logic [DataWidth-1:0] eoc;
logic [DataWidth-1:0] wake_up;
logic [DataWidth-1:0] tcdm_start_address;
logic [DataWidth-1:0] tcdm_end_address;
logic [DataWidth-1:0] num_cores;
logic [RegNumBytes-1:0] wr_active_d;
logic [RegNumBytes-1:0] wr_active_q;
axi_lite_regs #(
.RegNumBytes (RegNumBytes ),
.AxiAddrWidth(AddrWidth ),
.AxiDataWidth(AxiLiteDataWidth ),
.AxiReadOnly (AxiReadOnly ),
.RegRstVal (RegRstVal ),
.req_lite_t (axi_lite_req_t ),
.resp_lite_t (axi_lite_resp_t )
) i_axi_lite_regs (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.axi_req_i (axi_lite_slave_req_i ),
.axi_resp_o (axi_lite_slave_resp_o ),
.wr_active_o(wr_active_d ),
.rd_active_o(/* Unused */ ),
.reg_d_i ('0 ),
.reg_load_i ('0 ),
.reg_q_o ({num_cores, tcdm_end_address, tcdm_start_address, wake_up, eoc})
);
/***************
* Signals *
***************/
assign eoc_o = eoc >> 1;
assign tcdm_start_address_o = tcdm_start_address;
assign tcdm_end_address_o = tcdm_end_address;
assign num_cores_o = num_cores;
// converts 32 bit wake up to 256 bit
always_comb begin
wake_up_o = '0;
if (wr_active_q[7:4]) begin
if (wake_up < NumCores) begin
wake_up_o = 1 << wake_up;
end else if (wake_up == {DataWidth{1'b1}}) begin
wake_up_o = {NumCores{1'b1}};
end
end
end
assign eoc_valid_o = eoc[0];
// register to add +1 latency to the wr_active signal
`FF(wr_active_q, wr_active_d, '0, clk_i, rst_ni)
endmodule : ctrl_registers
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
// Fall-through register with a simple stream-like ready/valid handshake.
// This register does not cut combinatorial paths on any signals: in case the module at its output
// is ready to accept data within the same clock cycle, they are forwarded. Use this module to get a
// 'default ready' behavior towards the input.
module fall_through_register #(
parameter type T = logic // Vivado requires a default value for type parameters.
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous active-low reset
input logic clr_i, // Synchronous clear
input logic testmode_i, // Test mode to bypass clock gating
// Input port
input logic valid_i,
output logic ready_o,
input T data_i,
// Output port
output logic valid_o,
input logic ready_i,
output T data_o
);
logic fifo_empty,
fifo_full;
fifo_v2 #(
.FALL_THROUGH (1'b1),
.DATA_WIDTH ($size(T)),
.DEPTH (1),
.dtype (T)
) i_fifo (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (clr_i),
.testmode_i (testmode_i),
.full_o (fifo_full),
.empty_o (fifo_empty),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i (data_i),
.push_i (valid_i & ~fifo_full),
.data_o (data_o),
.pop_i (ready_i & ~fifo_empty)
);
assign ready_o = ~fifo_full;
assign valid_o = ~fifo_empty;
endmodule
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
module fifo_v2 #(
parameter bit FALL_THROUGH = 1'b0, // fifo is in fall-through mode
parameter int unsigned DATA_WIDTH = 32, // default data width if the fifo is of type logic
parameter int unsigned DEPTH = 8, // depth can be arbitrary from 0 to 2**32
parameter int unsigned ALM_EMPTY_TH = 1, // almost empty threshold (when to assert alm_empty_o)
parameter int unsigned ALM_FULL_TH = 1, // almost full threshold (when to assert alm_full_o)
parameter type dtype = logic [DATA_WIDTH-1:0],
// DO NOT OVERWRITE THIS PARAMETER
parameter int unsigned ADDR_DEPTH = (DEPTH > 1) ? $clog2(DEPTH) : 1
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // flush the queue
input logic testmode_i, // test_mode to bypass clock gating
// status flags
output logic full_o, // queue is full
output logic empty_o, // queue is empty
output logic alm_full_o, // FIFO fillstate >= the specified threshold
output logic alm_empty_o, // FIFO fillstate <= the specified threshold
// as long as the queue is not full we can push new data
input dtype data_i, // data to push into the queue
input logic push_i, // data is valid and can be pushed to the queue
// as long as the queue is not empty we can pop new elements
output dtype data_o, // output data
input logic pop_i // pop head from queue
);
logic [ADDR_DEPTH-1:0] usage;
// generate threshold parameters
if (DEPTH == 0) begin
assign alm_full_o = 1'b0; // that signal does not make any sense in a FIFO of depth 0
assign alm_empty_o = 1'b0; // that signal does not make any sense in a FIFO of depth 0
end else begin
assign alm_full_o = (usage >= ALM_FULL_TH[ADDR_DEPTH-1:0]);
assign alm_empty_o = (usage <= ALM_EMPTY_TH[ADDR_DEPTH-1:0]);
end
fifo_v3 #(
.FALL_THROUGH ( FALL_THROUGH ),
.DATA_WIDTH ( DATA_WIDTH ),
.DEPTH ( DEPTH ),
.dtype ( dtype )
) i_fifo_v3 (
.clk_i,
.rst_ni,
.flush_i,
.testmode_i,
.full_o,
.empty_o,
.usage_o (usage),
.data_i,
.push_i,
.data_o,
.pop_i
);
// pragma translate_off
`ifndef VERILATOR
initial begin
assert (ALM_FULL_TH <= DEPTH) else $error("ALM_FULL_TH can't be larger than the DEPTH.");
assert (ALM_EMPTY_TH <= DEPTH) else $error("ALM_EMPTY_TH can't be larger than the DEPTH.");
end
`endif
// pragma translate_on
endmodule // fifo_v2
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
module fifo_v3 #(
parameter bit FALL_THROUGH = 1'b0, // fifo is in fall-through mode
parameter int unsigned DATA_WIDTH = 32, // default data width if the fifo is of type logic
parameter int unsigned DEPTH = 8, // depth can be arbitrary from 0 to 2**32
parameter type dtype = logic [DATA_WIDTH-1:0],
// DO NOT OVERWRITE THIS PARAMETER
parameter int unsigned ADDR_DEPTH = (DEPTH > 1) ? $clog2(DEPTH) : 1
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // flush the queue
input logic testmode_i, // test_mode to bypass clock gating
// status flags
output logic full_o, // queue is full
output logic empty_o, // queue is empty
output logic [ADDR_DEPTH-1:0] usage_o, // fill pointer
// as long as the queue is not full we can push new data
input dtype data_i, // data to push into the queue
input logic push_i, // data is valid and can be pushed to the queue
// as long as the queue is not empty we can pop new elements
output dtype data_o, // output data
input logic pop_i // pop head from queue
);
// local parameter
// FIFO depth - handle the case of pass-through, synthesizer will do constant propagation
localparam int unsigned FifoDepth = (DEPTH > 0) ? DEPTH : 1;
// clock gating control
logic gate_clock;
// pointer to the read and write section of the queue
logic [ADDR_DEPTH - 1:0] read_pointer_n, read_pointer_q, write_pointer_n, write_pointer_q;
// keep a counter to keep track of the current queue status
// this integer will be truncated by the synthesis tool
logic [ADDR_DEPTH:0] status_cnt_n, status_cnt_q;
// actual memory
dtype [FifoDepth - 1:0] mem_n, mem_q;
assign usage_o = status_cnt_q[ADDR_DEPTH-1:0];
if (DEPTH == 0) begin : gen_pass_through
assign empty_o = ~push_i;
assign full_o = ~pop_i;
end else begin : gen_fifo
assign full_o = (status_cnt_q == FifoDepth[ADDR_DEPTH:0]);
assign empty_o = (status_cnt_q == 0) & ~(FALL_THROUGH & push_i);
end
// status flags
// read and write queue logic
always_comb begin : read_write_comb
// default assignment
read_pointer_n = read_pointer_q;
write_pointer_n = write_pointer_q;
status_cnt_n = status_cnt_q;
data_o = (DEPTH == 0) ? data_i : mem_q[read_pointer_q];
mem_n = mem_q;
gate_clock = 1'b1;
// push a new element to the queue
if (push_i && ~full_o) begin
// push the data onto the queue
mem_n[write_pointer_q] = data_i;
// un-gate the clock, we want to write something
gate_clock = 1'b0;
// increment the write counter
if (write_pointer_q == FifoDepth[ADDR_DEPTH-1:0] - 1)
write_pointer_n = '0;
else
write_pointer_n = write_pointer_q + 1;
// increment the overall counter
status_cnt_n = status_cnt_q + 1;
end
if (pop_i && ~empty_o) begin
// read from the queue is a default assignment
// but increment the read pointer...
if (read_pointer_n == FifoDepth[ADDR_DEPTH-1:0] - 1)
read_pointer_n = '0;
else
read_pointer_n = read_pointer_q + 1;
// ... and decrement the overall count
status_cnt_n = status_cnt_q - 1;
end
// keep the count pointer stable if we push and pop at the same time
if (push_i && pop_i && ~full_o && ~empty_o)
status_cnt_n = status_cnt_q;
// FIFO is in pass through mode -> do not change the pointers
if (FALL_THROUGH && (status_cnt_q == 0) && push_i) begin
data_o = data_i;
if (pop_i) begin
status_cnt_n = status_cnt_q;
read_pointer_n = read_pointer_q;
write_pointer_n = write_pointer_q;
end
end
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
read_pointer_q <= '0;
write_pointer_q <= '0;
status_cnt_q <= '0;
end else begin
if (flush_i) begin
read_pointer_q <= '0;
write_pointer_q <= '0;
status_cnt_q <= '0;
end else begin
read_pointer_q <= read_pointer_n;
write_pointer_q <= write_pointer_n;
status_cnt_q <= status_cnt_n;
end
end
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
mem_q <= '0;
end else if (!gate_clock) begin
mem_q <= mem_n;
end
end
// pragma translate_off
`ifndef VERILATOR
initial begin
assert (DEPTH > 0) else $error("DEPTH must be greater than 0.");
end
full_write : assert property(
@(posedge clk_i) disable iff (~rst_ni) (full_o |-> ~push_i))
else $fatal (1, "Trying to push new data although the FIFO is full.");
empty_read : assert property(
@(posedge clk_i) disable iff (~rst_ni) (empty_o |-> ~pop_i))
else $fatal (1, "Trying to pop data although the FIFO is empty.");
`endif
// pragma translate_on
endmodule // fifo_v3
// Copyright 2018 ETH Zurich and University of Bologna.
//
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
// Deprecated, use lzc unit instead.
/// A leading-one finder / leading zero counter.
/// Set FLIP to 0 for find_first_one => first_one_o is the index of the first one (from the LSB)
/// Set FLIP to 1 for leading zero counter => first_one_o is the number of leading zeroes (from the MSB)
module find_first_one #(
/// The width of the input vector.
parameter int WIDTH = -1,
parameter int FLIP = 0
)(
input logic [WIDTH-1:0] in_i,
output logic [$clog2(WIDTH)-1:0] first_one_o,
output logic no_ones_o
);
localparam int NUM_LEVELS = $clog2(WIDTH);
// pragma translate_off
initial begin
assert(WIDTH >= 0);
end
// pragma translate_on
logic [WIDTH-1:0][NUM_LEVELS-1:0] index_lut;
logic [2**NUM_LEVELS-1:0] sel_nodes;
logic [2**NUM_LEVELS-1:0][NUM_LEVELS-1:0] index_nodes;
logic [WIDTH-1:0] in_tmp;
for (genvar i = 0; i < WIDTH; i++) begin
assign in_tmp[i] = FLIP ? in_i[WIDTH-1-i] : in_i[i];
end
for (genvar j = 0; j < WIDTH; j++) begin
assign index_lut[j] = j;
end
for (genvar level = 0; level < NUM_LEVELS; level++) begin
if (level < NUM_LEVELS-1) begin
for (genvar l = 0; l < 2**level; l++) begin
assign sel_nodes[2**level-1+l] = sel_nodes[2**(level+1)-1+l*2] | sel_nodes[2**(level+1)-1+l*2+1];
assign index_nodes[2**level-1+l] = (sel_nodes[2**(level+1)-1+l*2] == 1'b1) ?
index_nodes[2**(level+1)-1+l*2] : index_nodes[2**(level+1)-1+l*2+1];
end
end
if (level == NUM_LEVELS-1) begin
for (genvar k = 0; k < 2**level; k++) begin
// if two successive indices are still in the vector...
if (k * 2 < WIDTH-1) begin
assign sel_nodes[2**level-1+k] = in_tmp[k*2] | in_tmp[k*2+1];
assign index_nodes[2**level-1+k] = (in_tmp[k*2] == 1'b1) ? index_lut[k*2] : index_lut[k*2+1];
end
// if only the first index is still in the vector...
if (k * 2 == WIDTH-1) begin
assign sel_nodes[2**level-1+k] = in_tmp[k*2];
assign index_nodes[2**level-1+k] = index_lut[k*2];
end
// if index is out of range
if (k * 2 > WIDTH-1) begin
assign sel_nodes[2**level-1+k] = 1'b0;
assign index_nodes[2**level-1+k] = '0;
end
end
end
end
assign first_one_o = NUM_LEVELS > 0 ? index_nodes[0] : '0;
assign no_ones_o = NUM_LEVELS > 0 ? ~sel_nodes[0] : '1;
endmodule
// Copyright 2020 ETH Zurich and University of Bologna.
//
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Florian Zaruba <zarubaf@iis.ee.ethz.ch>
`include "common_cells/registers.svh"
/// A register with handshakes that completely cuts any combinatorial paths
/// between the input and output in isochronous clock domains.
///
/// > Definition of isochronous: In telecommunication, an isochronous signal is a signal
/// > in which the time interval separating any two significant instants is equal to the
/// > unit interval or a multiple of the unit interval.
///
/// The source and destination clock domains must be derived from the same clock
/// but can vary in frequency by a constant factor (e.g., double the frequency).
///
/// The module is basically a two deep dual-clock fifo with read and write pointers
/// in different clock domains. As we know the static timing relationship between the
/// clock domains we can rely on static timing analysis (STA) to get the sampling windows
/// right and therefore don't need any synchronization.
///
/// # Restrictions
///
/// Source and destination clock domains must be an integer multiple of each other and
/// all timing-paths need to be covered by STA. For example a recommended SDC would be:
///
/// `create_generated_clock dst_clk_i -name dst_clk -source src_clk_i -divide_by 2
///
/// There are _no_ restrictions on which clock domain should be the faster, any integer
/// ratio will work.
module isochronous_spill_register #(
/// Data type of spill register.
parameter type T = logic,
/// Make this spill register transparent.
parameter bit Bypass = 1'b0
) (
/// Clock of source clock domain.
input logic src_clk_i,
/// Active low async reset in source domain.
input logic src_rst_ni,
/// Source input data is valid.
input logic src_valid_i,
/// Source is ready to accept.
output logic src_ready_o,
/// Source input data.
input T src_data_i,
/// Clock of destination clock domain.
input logic dst_clk_i,
/// Active low async reset in destination domain.
input logic dst_rst_ni,
/// Destination output data is valid.
output logic dst_valid_o,
/// Destination is ready to accept.
input logic dst_ready_i,
/// Destination output data.
output T dst_data_o
);
// Don't generate the spill register.
if (Bypass) begin : gen_bypass
assign dst_valid_o = src_valid_i;
assign src_ready_o = dst_ready_i;
assign dst_data_o = src_data_i;
// Generate the spill register
end else begin : gen_isochronous_spill_register
/// Read/write pointer are one bit wider than necessary.
/// We implicitly capture the full and empty state with the second bit:
/// If all but the topmost bit of `rd_pointer_q` and `wr_pointer_q` agree, the
/// FIFO is in a critical state. If the topmost bit is equal, the FIFO is
/// empty, otherwise it is full.
logic [1:0] rd_pointer_q, wr_pointer_q;
// Advance write pointer if we pushed a new item into the FIFO. (Source clock domain)
`FFLARN(wr_pointer_q, wr_pointer_q+1, (src_valid_i && src_ready_o), '0, src_clk_i, src_rst_ni)
// Advance read pointer if downstream consumed an item. (Destination clock domain)
`FFLARN(rd_pointer_q, rd_pointer_q+1, (dst_valid_o && dst_ready_i), '0, dst_clk_i, dst_rst_ni)
T [1:0] mem_d, mem_q;
`FFLNR(mem_q, mem_d, (src_valid_i && src_ready_o), src_clk_i)
always_comb begin
mem_d = mem_q;
mem_d[wr_pointer_q[0]] = src_data_i;
end
assign src_ready_o = (rd_pointer_q ^ wr_pointer_q) != 2'b10;
assign dst_valid_o = (rd_pointer_q ^ wr_pointer_q) != '0;
assign dst_data_o = mem_q[rd_pointer_q[0]];
end
// pragma translate_off
// stability guarantees
`ifndef VERILATOR
assert property (@(posedge src_clk_i) disable iff (src_rst_ni)
(src_valid_i && !src_ready_o |=> $stable(src_valid_i))) else $error("src_valid_i is unstable");
assert property (@(posedge src_clk_i) disable iff (src_rst_ni)
(src_valid_i && !src_ready_o |=> $stable(src_data_i))) else $error("src_data_i is unstable");
assert property (@(posedge dst_clk_i) disable iff (dst_rst_ni)
(dst_valid_o && !dst_ready_i |=> $stable(dst_valid_o))) else $error("dst_valid_o is unstable");
assert property (@(posedge dst_clk_i) disable iff (dst_rst_ni)
(dst_valid_o && !dst_ready_i |=> $stable(dst_data_o))) else $error("dst_data_o is unstable");
`endif
// pragma translate_on
endmodule
// Copyright 2021 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
module latch_scm #(
parameter ADDR_WIDTH = 5,
parameter DATA_WIDTH = 32
) (
input logic clk,
// Read port
input logic ReadEnable,
input logic [ADDR_WIDTH-1:0] ReadAddr,
output logic [DATA_WIDTH-1:0] ReadData,
// Write port
input logic WriteEnable,
input logic [ADDR_WIDTH-1:0] WriteAddr,
input logic [DATA_WIDTH-1:0] WriteData
);
localparam NUM_WORDS = 2**ADDR_WIDTH;
// Read address register, located at the input of the address decoder
logic [ADDR_WIDTH-1:0] RAddrRegxDP;
logic [NUM_WORDS-1:0] RAddrOneHotxD;
logic [DATA_WIDTH-1:0] MemContentxDP[NUM_WORDS];
logic [NUM_WORDS-1:0] WAddrOneHotxD;
logic [NUM_WORDS-1:0] ClocksxC;
logic [DATA_WIDTH-1:0] WDataIntxD;
logic clk_int;
int unsigned i;
int unsigned j;
int unsigned k;
int unsigned l;
int unsigned m;
genvar x;
genvar y;
tc_clk_gating CG_WE_GLOBAL (
.clk_o (clk_int ),
.en_i (WriteEnable),
.test_en_i (1'b0 ),
.clk_i (clk )
);
//-----------------------------------------------------------------------------
//-- READ : Read address register
//-----------------------------------------------------------------------------
always_ff @(posedge clk) begin : p_RAddrReg
if(ReadEnable)
RAddrRegxDP <= ReadAddr;
end
//-----------------------------------------------------------------------------
//-- READ : Read address decoder RAD
//-----------------------------------------------------------------------------
always_comb begin : p_RAD
RAddrOneHotxD = '0;
RAddrOneHotxD[RAddrRegxDP] = 1'b1;
end
assign ReadData = MemContentxDP[RAddrRegxDP];
//-----------------------------------------------------------------------------
//-- WRITE : Write Address Decoder (WAD), combinatorial process
//-----------------------------------------------------------------------------
always_comb begin : p_WAD
for(i=0; i<NUM_WORDS; i++) begin : p_WordIter
if ( (WriteEnable == 1'b1 ) && (WriteAddr == i) )
WAddrOneHotxD[i] = 1'b1;
else
WAddrOneHotxD[i] = 1'b0;
end
end
//-----------------------------------------------------------------------------
//-- WRITE : Clock gating (if integrated clock-gating cells are available)
//-----------------------------------------------------------------------------
generate
for(x=0; x<NUM_WORDS; x++) begin : CG_CELL_WORD_ITER
tc_clk_gating CG_Inst (
.clk_o ( ClocksxC[x] ),
.en_i ( WAddrOneHotxD[x] ),
.test_en_i ( 1'b0 ),
.clk_i ( clk_int )
);
end
endgenerate
//-----------------------------------------------------------------------------
// WRITE : SAMPLE INPUT DATA
//---------------------------------------------------------------------------
always_ff @(posedge clk) begin : sample_waddr
if(WriteEnable)
WDataIntxD <= WriteData;
end
//-----------------------------------------------------------------------------
//-- WRITE : Write operation
//-----------------------------------------------------------------------------
//-- Generate M = WORDS sequential processes, each of which describes one
//-- word of the memory. The processes are synchronized with the clocks
//-- ClocksxC(i), i = 0, 1, ..., M-1
//-- Use active low, i.e. transparent on low latches as storage elements
//-- Data is sampled on rising clock edge
/* verilator lint_off NOLATCH */
always_latch begin : latch_wdata
for(k=0; k<NUM_WORDS; k++) begin : w_WordIter
if( ClocksxC[k] == 1'b1)
MemContentxDP[k] <= WDataIntxD;
end
end
/* verilator lint_on NOLATCH */
endmodule
// Copyright (c) 2018 - 2019 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
/// A trailing zero counter / leading zero counter.
/// Set MODE to 0 for trailing zero counter => cnt_o is the number of trailing zeros (from the LSB)
/// Set MODE to 1 for leading zero counter => cnt_o is the number of leading zeros (from the MSB)
/// If the input does not contain a zero, `empty_o` is asserted. Additionally `cnt_o` contains
/// the maximum number of zeros - 1. For example:
/// in_i = 000_0000, empty_o = 1, cnt_o = 6 (mode = 0)
/// in_i = 000_0001, empty_o = 0, cnt_o = 0 (mode = 0)
/// in_i = 000_1000, empty_o = 0, cnt_o = 3 (mode = 0)
/// Furthermore, this unit contains a more efficient implementation for Verilator (simulation only).
/// This speeds up simulation significantly.
module lzc #(
/// The width of the input vector.
parameter int unsigned WIDTH = 2,
/// Mode selection: 0 -> trailing zero, 1 -> leading zero
parameter bit MODE = 1'b0,
/// Dependent parameter. Do **not** change!
///
/// Width of the output signal with the zero count.
parameter int unsigned CNT_WIDTH = cf_math_pkg::idx_width(WIDTH)
) (
/// Input vector to be counted.
input logic [WIDTH-1:0] in_i,
/// Count of the leading / trailing zeros.
output logic [CNT_WIDTH-1:0] cnt_o,
/// Counter is empty: Asserted if all bits in in_i are zero.
output logic empty_o
);
if (WIDTH == 1) begin : gen_degenerate_lzc
assign cnt_o[0] = !in_i[0];
assign empty_o = !in_i[0];
end else begin : gen_lzc
localparam int unsigned NumLevels = $clog2(WIDTH);
// pragma translate_off
initial begin
assert(WIDTH > 0) else $fatal(1, "input must be at least one bit wide");
end
// pragma translate_on
logic [WIDTH-1:0][NumLevels-1:0] index_lut;
logic [2**NumLevels-1:0] sel_nodes;
logic [2**NumLevels-1:0][NumLevels-1:0] index_nodes;
logic [WIDTH-1:0] in_tmp;
// reverse vector if required
always_comb begin : flip_vector
for (int unsigned i = 0; i < WIDTH; i++) begin
in_tmp[i] = (MODE) ? in_i[WIDTH-1-i] : in_i[i];
end
end
for (genvar j = 0; unsigned'(j) < WIDTH; j++) begin : g_index_lut
assign index_lut[j] = (NumLevels)'(unsigned'(j));
end
for (genvar level = 0; unsigned'(level) < NumLevels; level++) begin : g_levels
if (unsigned'(level) == NumLevels - 1) begin : g_last_level
for (genvar k = 0; k < 2 ** level; k++) begin : g_level
// if two successive indices are still in the vector...
if (unsigned'(k) * 2 < WIDTH - 1) begin : g_reduce
assign sel_nodes[2 ** level - 1 + k] = in_tmp[k * 2] | in_tmp[k * 2 + 1];
assign index_nodes[2 ** level - 1 + k] = (in_tmp[k * 2] == 1'b1)
? index_lut[k * 2] :
index_lut[k * 2 + 1];
end
// if only the first index is still in the vector...
if (unsigned'(k) * 2 == WIDTH - 1) begin : g_base
assign sel_nodes[2 ** level - 1 + k] = in_tmp[k * 2];
assign index_nodes[2 ** level - 1 + k] = index_lut[k * 2];
end
// if index is out of range
if (unsigned'(k) * 2 > WIDTH - 1) begin : g_out_of_range
assign sel_nodes[2 ** level - 1 + k] = 1'b0;
assign index_nodes[2 ** level - 1 + k] = '0;
end
end
end else begin : g_not_last_level
for (genvar l = 0; l < 2 ** level; l++) begin : g_level
assign sel_nodes[2 ** level - 1 + l] =
sel_nodes[2 ** (level + 1) - 1 + l * 2] | sel_nodes[2 ** (level + 1) - 1 + l * 2 + 1];
assign index_nodes[2 ** level - 1 + l] = (sel_nodes[2 ** (level + 1) - 1 + l * 2] == 1'b1)
? index_nodes[2 ** (level + 1) - 1 + l * 2] :
index_nodes[2 ** (level + 1) - 1 + l * 2 + 1];
end
end
end
assign cnt_o = NumLevels > unsigned'(0) ? index_nodes[0] : {($clog2(WIDTH)) {1'b0}};
assign empty_o = NumLevels > unsigned'(0) ? ~sel_nodes[0] : ~(|in_i);
end : gen_lzc
endmodule : lzc
// Copyright 2021 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
module mempool_cc
import snitch_pkg::meta_id_t;
#(
parameter logic [31:0] BootAddr = 32'h0000_1000,
parameter logic [31:0] MTVEC = BootAddr,
parameter bit RVE = 0, // Reduced-register extension
parameter bit RVM = 1, // Enable IntegerMmultiplication & Division Extension
parameter bit RegisterOffloadReq = 1,
parameter bit RegisterOffloadResp = 1,
parameter bit RegisterTCDMReq = 0,
parameter bit RegisterTCDMResp = 0
) (
input logic clk_i,
input logic rst_i,
input logic [31:0] hart_id_i,
// Instruction Port
output logic [31:0] inst_addr_o,
input logic [31:0] inst_data_i,
output logic inst_valid_o,
input logic inst_ready_i,
// TCDM Ports
output logic [31:0] data_qaddr_o,
output logic data_qwrite_o,
output logic [3:0] data_qamo_o,
output logic [31:0] data_qdata_o,
output logic [3:0] data_qstrb_o,
output meta_id_t data_qid_o,
output logic data_qvalid_o,
input logic data_qready_i,
input logic [31:0] data_pdata_i,
input logic data_perror_i,
input meta_id_t data_pid_i,
input logic data_pvalid_i,
output logic data_pready_o,
input logic wake_up_sync_i,
// Core event strobes
output snitch_pkg::core_events_t core_events_o
);
// Data port signals
snitch_pkg::dreq_t data_req_d, data_req_q;
snitch_pkg::dresp_t data_resp_d, data_resp_q;
logic data_req_d_valid, data_req_d_ready, data_resp_d_valid, data_resp_d_ready;
logic data_req_q_valid, data_req_q_ready, data_resp_q_valid, data_resp_q_ready;
// Accelerator signals
snitch_pkg::acc_req_t acc_req_d, acc_req_q;
snitch_pkg::acc_resp_t acc_resp_d, acc_resp_q;
logic acc_req_d_valid, acc_req_d_ready, acc_resp_d_valid, acc_resp_d_ready;
logic acc_req_q_valid, acc_req_q_ready, acc_resp_q_valid, acc_resp_q_ready;
// Snitch Integer Core
snitch #(
.BootAddr ( BootAddr ),
.MTVEC ( MTVEC ),
.RVE ( RVE ),
.RVM ( RVM )
) i_snitch (
.clk_i ,
.rst_i ,
.hart_id_i ,
.inst_addr_o ,
.inst_data_i ,
.inst_valid_o ,
.inst_ready_i ,
.acc_qaddr_o ( acc_req_d.addr ),
.acc_qid_o ( acc_req_d.id ),
.acc_qdata_op_o ( acc_req_d.data_op ),
.acc_qdata_arga_o ( acc_req_d.data_arga ),
.acc_qdata_argb_o ( acc_req_d.data_argb ),
.acc_qdata_argc_o ( acc_req_d.data_argc ),
.acc_qvalid_o ( acc_req_d_valid ),
.acc_qready_i ( acc_req_d_ready ),
.acc_pdata_i ( acc_resp_q.data ),
.acc_pid_i ( acc_resp_q.id ),
.acc_perror_i ( acc_resp_q.error ),
.acc_pvalid_i ( acc_resp_q_valid ),
.acc_pready_o ( acc_resp_q_ready ),
.data_qaddr_o ( data_req_d.addr ),
.data_qwrite_o ( data_req_d.write ),
.data_qamo_o ( data_req_d.amo ),
.data_qdata_o ( data_req_d.data ),
.data_qstrb_o ( data_req_d.strb ),
.data_qid_o ( data_req_d.id ),
.data_qvalid_o ( data_req_d_valid ),
.data_qready_i ( data_req_d_ready ),
.data_pdata_i ( data_resp_q.data ),
.data_perror_i ( data_resp_q.error ),
.data_pid_i ( data_resp_q.id ),
.data_pvalid_i ( data_resp_q_valid ),
.data_pready_o ( data_resp_q_ready ),
.wake_up_sync_i ( wake_up_sync_i ),
.core_events_o ( core_events_o )
);
// Cut off-loading request path
spill_register #(
.T ( snitch_pkg::acc_req_t ),
.Bypass ( !RegisterOffloadReq )
) i_spill_register_acc_req (
.clk_i ,
.rst_ni ( ~rst_i ),
.valid_i ( acc_req_d_valid ),
.ready_o ( acc_req_d_ready ),
.data_i ( acc_req_d ),
.valid_o ( acc_req_q_valid ),
.ready_i ( acc_req_q_ready ),
.data_o ( acc_req_q )
);
// Cut off-loading response path
spill_register #(
.T ( snitch_pkg::acc_resp_t ),
.Bypass ( !RegisterOffloadResp )
) i_spill_register_acc_resp (
.clk_i ,
.rst_ni ( ~rst_i ),
.valid_i ( acc_resp_d_valid ),
.ready_o ( acc_resp_d_ready ),
.data_i ( acc_resp_d ),
.valid_o ( acc_resp_q_valid ),
.ready_i ( acc_resp_q_ready ),
.data_o ( acc_resp_q )
);
// Snitch IPU accelerator
snitch_ipu #(
.IdWidth ( 5 )
) i_snitch_ipu (
.clk_i ,
.rst_i ,
.acc_qaddr_i ( acc_req_q.addr ),
.acc_qid_i ( acc_req_q.id ),
.acc_qdata_op_i ( acc_req_q.data_op ),
.acc_qdata_arga_i ( acc_req_q.data_arga ),
.acc_qdata_argb_i ( acc_req_q.data_argb ),
.acc_qdata_argc_i ( acc_req_q.data_argc ),
.acc_qvalid_i ( acc_req_q_valid ),
.acc_qready_o ( acc_req_q_ready ),
.acc_pdata_o ( acc_resp_d.data ),
.acc_pid_o ( acc_resp_d.id ),
.acc_perror_o ( acc_resp_d.error ),
.acc_pvalid_o ( acc_resp_d_valid ),
.acc_pready_i ( acc_resp_d_ready )
);
// Cut TCDM data request path
spill_register #(
.T ( snitch_pkg::dreq_t ),
.Bypass ( !RegisterTCDMReq )
) i_spill_register_tcdm_req (
.clk_i ,
.rst_ni ( ~rst_i ),
.valid_i ( data_req_d_valid ),
.ready_o ( data_req_d_ready ),
.data_i ( data_req_d ),
.valid_o ( data_req_q_valid ),
.ready_i ( data_req_q_ready ),
.data_o ( data_req_q )
);
// Cut TCDM data response path
spill_register #(
.T ( snitch_pkg::dresp_t ),
.Bypass ( !RegisterTCDMResp )
) i_spill_register_tcdm_resp (
.clk_i ,
.rst_ni ( ~rst_i ),
.valid_i ( data_resp_d_valid ),
.ready_o ( data_resp_d_ready ),
.data_i ( data_resp_d ),
.valid_o ( data_resp_q_valid ),
.ready_i ( data_resp_q_ready ),
.data_o ( data_resp_q )
);
// Assign TCDM data interface
assign data_qaddr_o = data_req_q.addr;
assign data_qwrite_o = data_req_q.write;
assign data_qamo_o = data_req_q.amo;
assign data_qdata_o = data_req_q.data;
assign data_qstrb_o = data_req_q.strb;
assign data_qid_o = data_req_q.id;
assign data_qvalid_o = data_req_q_valid;
assign data_req_q_ready = data_qready_i;
assign data_resp_d.data = data_pdata_i;
assign data_resp_d.id = data_pid_i;
assign data_resp_d.error = data_perror_i;
assign data_resp_d_valid = data_pvalid_i;
assign data_pready_o = data_resp_d_ready;
// --------------------------
// Tracer
// --------------------------
// pragma translate_off
int f;
string fn;
logic [63:0] cycle;
int unsigned stall, stall_ins, stall_raw, stall_lsu, stall_acc;
always_ff @(posedge rst_i) begin
if(rst_i) begin
$sformat(fn, "trace_hart_%04x.dasm", hart_id_i);
f = $fopen(fn, "w");
$display("[Tracer] Logging Hart %d to %s", hart_id_i, fn);
end
end
typedef enum logic [1:0] {SrcSnitch = 0, SrcFpu = 1, SrcFpuSeq = 2} trace_src_e;
localparam int SnitchTrace = `ifdef SNITCH_TRACE `SNITCH_TRACE `else 0 `endif;
always_ff @(posedge clk_i or posedge rst_i) begin
automatic string trace_entry;
automatic string extras_str;
if (!rst_i) begin
cycle <= cycle + 1;
// Trace snitch iff:
// Tracing enabled by CSR register
// we are not stalled <==> we have issued and processed an instruction (including offloads)
// OR we are retiring (issuing a writeback from) a load or accelerator instruction
if ((i_snitch.csr_trace_q || SnitchTrace) && (!i_snitch.stall || i_snitch.retire_load || i_snitch.retire_acc)) begin
// Manual loop unrolling for Verilator
// Data type keys for arrays are currently not supported in Verilator
extras_str = "{";
// State
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "source", SrcSnitch);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "stall", i_snitch.stall);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "stall_tot", stall);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "stall_ins", stall_ins);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "stall_raw", stall_raw);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "stall_lsu", stall_lsu);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "stall_acc", stall_acc);
// Decoding
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "rs1", i_snitch.rs1);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "rs2", i_snitch.rs2);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "rd", i_snitch.rd);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "is_load", i_snitch.is_load);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "is_store", i_snitch.is_store);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "is_branch", i_snitch.is_branch);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "pc_d", i_snitch.pc_d);
// Operands
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "opa", i_snitch.opa);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "opb", i_snitch.opb);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "opa_select", i_snitch.opa_select);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "opb_select", i_snitch.opb_select);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "write_rd", i_snitch.write_rd);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "csr_addr", i_snitch.inst_data_i[31:20]);
// Pipeline writeback
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "writeback", i_snitch.alu_writeback);
// Load/Store
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "gpr_rdata_1", i_snitch.gpr_rdata[1]);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "ls_size", i_snitch.ls_size);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "ld_result_32",i_snitch.ld_result[31:0]);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "lsu_rd", i_snitch.lsu_rd);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "retire_load", i_snitch.retire_load);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "alu_result", i_snitch.alu_result);
// Atomics
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "ls_amo", i_snitch.ls_amo);
// Accumulator
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "retire_acc", i_snitch.retire_acc);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "acc_pid", i_snitch.acc_pid_i);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "acc_pdata_32",i_snitch.acc_pdata_i[31:0]);
// FPU offload
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "fpu_offload", 1'b0);
extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "is_seq_insn", 1'b0);
extras_str = $sformatf("%s}", extras_str);
$sformat(trace_entry, "%t %8d 0x%h DASM(%h) #; %s\n",
$time, cycle, i_snitch.pc_q, i_snitch.inst_data_i, extras_str);
$fwrite(f, trace_entry);
end
// Reset all stalls when we execute an instruction
if (!i_snitch.stall) begin
stall <= 0;
stall_ins <= 0;
stall_raw <= 0;
stall_lsu <= 0;
stall_acc <= 0;
end else begin
// We are currently stalled, let's count the stall causes
if (i_snitch.stall) begin
stall <= stall + 1;
end
if ((!i_snitch.inst_ready_i) && (i_snitch.inst_valid_o)) begin
stall_ins <= stall_ins + 1;
end
if ((!i_snitch.operands_ready) || (!i_snitch.dst_ready)) begin
stall_raw <= stall_raw + 1;
end
if (i_snitch.lsu_stall) begin
stall_lsu <= stall_lsu + 1;
end
if (i_snitch.acc_stall) begin
stall_acc <= stall_acc + 1;
end
end
end else begin
cycle <= '0;
stall <= 0;
stall_ins <= 0;
stall_raw <= 0;
stall_lsu <= 0;
stall_acc <= 0;
end
end
final begin
$fclose(f);
end
// pragma translate_on
endmodule
// Copyright 2021 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
module mempool_cluster
import mempool_pkg::*;
import cf_math_pkg::idx_width;
#(
// TCDM
parameter addr_t TCDMBaseAddr = 32'b0,
// Boot address
parameter logic [31:0] BootAddr = 32'h0000_0000,
// Dependant parameters. DO NOT CHANGE!
parameter int unsigned NumAXIMasters = NumGroups
) (
// Clock and reset
input logic clk_i,
input logic rst_ni,
input logic testmode_i,
// Scan chain
input logic scan_enable_i,
input logic scan_data_i,
output logic scan_data_o,
// Wake up signal
input logic [NumCores-1:0] wake_up_i,
// AXI Interface
output axi_tile_req_t [NumAXIMasters-1:0] axi_mst_req_o,
input axi_tile_resp_t [NumAXIMasters-1:0] axi_mst_resp_i
);
/************
* Groups *
************/
// TCDM interfaces
// North
tcdm_slave_req_t [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_north_req;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_north_req_valid;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_north_req_ready;
tcdm_master_resp_t [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_north_resp;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_north_resp_valid;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_north_resp_ready;
tcdm_slave_req_t [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_north_req;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_north_req_valid;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_north_req_ready;
tcdm_master_resp_t [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_north_resp;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_north_resp_valid;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_north_resp_ready;
// East
tcdm_slave_req_t [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_east_req;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_east_req_valid;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_east_req_ready;
tcdm_master_resp_t [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_east_resp;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_east_resp_valid;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_east_resp_ready;
tcdm_slave_req_t [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_east_req;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_east_req_valid;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_east_req_ready;
tcdm_master_resp_t [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_east_resp;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_east_resp_valid;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_east_resp_ready;
// Northeast
tcdm_slave_req_t [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_northeast_req;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_northeast_req_valid;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_northeast_req_ready;
tcdm_master_resp_t [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_northeast_resp;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_northeast_resp_valid;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_northeast_resp_ready;
tcdm_slave_req_t [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_northeast_req;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_northeast_req_valid;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_northeast_req_ready;
tcdm_master_resp_t [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_northeast_resp;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_northeast_resp_valid;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_northeast_resp_ready;
// Bypass
tcdm_slave_req_t [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_bypass_req;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_bypass_req_valid;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_bypass_req_ready;
tcdm_master_resp_t [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_bypass_resp;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_bypass_resp_valid;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_master_bypass_resp_ready;
tcdm_slave_req_t [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_bypass_req;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_bypass_req_valid;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_bypass_req_ready;
tcdm_master_resp_t [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_bypass_resp;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_bypass_resp_valid;
logic [NumGroups-1:0][NumTilesPerGroup-1:0] tcdm_slave_bypass_resp_ready;
for (genvar g = 0; unsigned'(g) < NumGroups; g++) begin: gen_groups
mempool_group #(
.TCDMBaseAddr(TCDMBaseAddr),
.BootAddr (BootAddr )
) i_group (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.testmode_i (testmode_i ),
.scan_enable_i (scan_enable_i ),
.scan_data_i (/* Unconnected */ ),
.scan_data_o (/* Unconnected */ ),
.group_id_i (g[idx_width(NumGroups)-1:0] ),
// TCDM Master interfaces
.tcdm_master_north_req_o (tcdm_master_north_req[g] ),
.tcdm_master_north_req_valid_o (tcdm_master_north_req_valid[g] ),
.tcdm_master_north_req_ready_i (tcdm_master_north_req_ready[g] ),
.tcdm_master_north_resp_i (tcdm_master_north_resp[g] ),
.tcdm_master_north_resp_valid_i (tcdm_master_north_resp_valid[g] ),
.tcdm_master_north_resp_ready_o (tcdm_master_north_resp_ready[g] ),
.tcdm_master_east_req_o (tcdm_master_east_req[g] ),
.tcdm_master_east_req_valid_o (tcdm_master_east_req_valid[g] ),
.tcdm_master_east_req_ready_i (tcdm_master_east_req_ready[g] ),
.tcdm_master_east_resp_i (tcdm_master_east_resp[g] ),
.tcdm_master_east_resp_valid_i (tcdm_master_east_resp_valid[g] ),
.tcdm_master_east_resp_ready_o (tcdm_master_east_resp_ready[g] ),
.tcdm_master_northeast_req_o (tcdm_master_northeast_req[g] ),
.tcdm_master_northeast_req_valid_o (tcdm_master_northeast_req_valid[g] ),
.tcdm_master_northeast_req_ready_i (tcdm_master_northeast_req_ready[g] ),
.tcdm_master_northeast_resp_i (tcdm_master_northeast_resp[g] ),
.tcdm_master_northeast_resp_valid_i(tcdm_master_northeast_resp_valid[g] ),
.tcdm_master_northeast_resp_ready_o(tcdm_master_northeast_resp_ready[g] ),
.tcdm_master_bypass_req_o (tcdm_master_bypass_req[g] ),
.tcdm_master_bypass_req_valid_o (tcdm_master_bypass_req_valid[g] ),
.tcdm_master_bypass_req_ready_i (tcdm_master_bypass_req_ready[g] ),
.tcdm_master_bypass_resp_i (tcdm_master_bypass_resp[g] ),
.tcdm_master_bypass_resp_valid_i (tcdm_master_bypass_resp_valid[g] ),
.tcdm_master_bypass_resp_ready_o (tcdm_master_bypass_resp_ready[g] ),
// TCDM banks interface
.tcdm_slave_north_req_i (tcdm_slave_north_req[g] ),
.tcdm_slave_north_req_valid_i (tcdm_slave_north_req_valid[g] ),
.tcdm_slave_north_req_ready_o (tcdm_slave_north_req_ready[g] ),
.tcdm_slave_north_resp_o (tcdm_slave_north_resp[g] ),
.tcdm_slave_north_resp_valid_o (tcdm_slave_north_resp_valid[g] ),
.tcdm_slave_north_resp_ready_i (tcdm_slave_north_resp_ready[g] ),
.tcdm_slave_east_req_i (tcdm_slave_east_req[g] ),
.tcdm_slave_east_req_valid_i (tcdm_slave_east_req_valid[g] ),
.tcdm_slave_east_req_ready_o (tcdm_slave_east_req_ready[g] ),
.tcdm_slave_east_resp_o (tcdm_slave_east_resp[g] ),
.tcdm_slave_east_resp_valid_o (tcdm_slave_east_resp_valid[g] ),
.tcdm_slave_east_resp_ready_i (tcdm_slave_east_resp_ready[g] ),
.tcdm_slave_northeast_req_i (tcdm_slave_northeast_req[g] ),
.tcdm_slave_northeast_req_valid_i (tcdm_slave_northeast_req_valid[g] ),
.tcdm_slave_northeast_req_ready_o (tcdm_slave_northeast_req_ready[g] ),
.tcdm_slave_northeast_resp_o (tcdm_slave_northeast_resp[g] ),
.tcdm_slave_northeast_resp_valid_o (tcdm_slave_northeast_resp_valid[g] ),
.tcdm_slave_northeast_resp_ready_i (tcdm_slave_northeast_resp_ready[g] ),
.tcdm_slave_bypass_req_i (tcdm_slave_bypass_req[g] ),
.tcdm_slave_bypass_req_valid_i (tcdm_slave_bypass_req_valid[g] ),
.tcdm_slave_bypass_req_ready_o (tcdm_slave_bypass_req_ready[g] ),
.tcdm_slave_bypass_resp_o (tcdm_slave_bypass_resp[g] ),
.tcdm_slave_bypass_resp_valid_o (tcdm_slave_bypass_resp_valid[g] ),
.tcdm_slave_bypass_resp_ready_i (tcdm_slave_bypass_resp_ready[g] ),
.wake_up_i (wake_up_i[g*NumCoresPerGroup +: NumCoresPerGroup]),
// AXI interface
.axi_mst_req_o (axi_mst_req_o[g] ),
.axi_mst_resp_i (axi_mst_resp_i[g] )
);
end : gen_groups
/*******************
* Interconnects *
*******************/
for (genvar ini = 0; ini < NumGroups; ini++) begin: gen_interconnections
// East
assign tcdm_slave_east_req[ini ^ 2'b01] = tcdm_master_east_req[ini];
assign tcdm_slave_east_req_valid[ini ^ 2'b01] = tcdm_master_east_req_valid[ini];
assign tcdm_master_east_req_ready[ini] = tcdm_slave_east_req_ready[ini ^ 2'b01];
assign tcdm_master_east_resp[ini ^ 2'b01] = tcdm_slave_east_resp[ini];
assign tcdm_master_east_resp_valid[ini ^ 2'b01] = tcdm_slave_east_resp_valid[ini];
assign tcdm_slave_east_resp_ready[ini] = tcdm_master_east_resp_ready[ini ^ 2'b01];
// North
assign tcdm_slave_north_req[ini ^ 2'b10] = tcdm_master_north_req[ini];
assign tcdm_slave_north_req_valid[ini ^ 2'b10] = tcdm_master_north_req_valid[ini];
assign tcdm_master_north_req_ready[ini] = tcdm_slave_north_req_ready[ini ^ 2'b10];
assign tcdm_master_north_resp[ini ^ 2'b10] = tcdm_slave_north_resp[ini];
assign tcdm_master_north_resp_valid[ini ^ 2'b10] = tcdm_slave_north_resp_valid[ini];
assign tcdm_slave_north_resp_ready[ini] = tcdm_master_north_resp_ready[ini ^ 2'b10];
// Northeast
// First north, then east
assign tcdm_slave_bypass_req[ini ^ 2'b10] = tcdm_master_northeast_req[ini];
assign tcdm_slave_northeast_req[ini ^ 2'b11] = tcdm_master_bypass_req[ini ^ 2'b10];
assign tcdm_slave_bypass_req_valid[ini ^ 2'b10] = tcdm_master_northeast_req_valid[ini];
assign tcdm_slave_northeast_req_valid[ini ^ 2'b11] = tcdm_master_bypass_req_valid[ini ^ 2'b10];
assign tcdm_master_bypass_req_ready[ini ^ 2'b10] = tcdm_slave_northeast_req_ready[ini ^ 2'b11];
assign tcdm_master_northeast_req_ready[ini] = tcdm_slave_bypass_req_ready[ini ^ 2'b10];
// First east, then north
assign tcdm_master_northeast_resp[ini] = tcdm_slave_bypass_resp[ini ^ 2'b10];
assign tcdm_master_bypass_resp[ini ^ 2'b10] = tcdm_slave_northeast_resp[ini ^ 2'b11];
assign tcdm_master_northeast_resp_valid[ini] = tcdm_slave_bypass_resp_valid[ini ^ 2'b10];
assign tcdm_master_bypass_resp_valid[ini ^ 2'b10] = tcdm_slave_northeast_resp_valid[ini ^ 2'b11];
assign tcdm_slave_bypass_resp_ready[ini ^ 2'b10] = tcdm_master_northeast_resp_ready[ini];
assign tcdm_slave_northeast_resp_ready[ini ^ 2'b11] = tcdm_master_bypass_resp_ready[ini ^ 2'b10];
end: gen_interconnections
/****************
* Assertions *
****************/
if (NumCores > 1024)
$fatal(1, "[mempool] MemPool is currently limited to 1024 cores.");
if (NumTiles < NumGroups)
$fatal(1, "[mempool] MemPool requires more tiles than groups.");
if (NumCores != NumTiles * NumCoresPerTile)
$fatal(1, "[mempool] The number of cores is not divisible by the number of cores per tile.");
if (BankingFactor < 1)
$fatal(1, "[mempool] The banking factor must be a positive integer.");
if (BankingFactor != 2**$clog2(BankingFactor))
$fatal(1, "[mempool] The banking factor must be a power of two.");
if (NumGroups != 4)
$fatal(1, "[mempool] This version of the MemPool cluster only works with four groups.");
endmodule : mempool_cluster
// Copyright 2021 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
module mempool_cluster_wrap
import mempool_pkg::*;
#(
// TCDM
parameter addr_t TCDMBaseAddr = 32'b0000_0000,
// Boot address
parameter logic [31:0] BootAddr = 32'h0000_0000,
// Dependant parameters. DO NOT CHANGE!
parameter int unsigned NumAXIMasters = NumGroups
) (
// Clock and reset
input logic clk_i,
input logic rst_ni,
input logic testmode_i,
// Scan chain
input logic scan_enable_i,
input logic scan_data_i,
output logic scan_data_o,
// Wake up signal
input logic [NumCores-1:0] wake_up_i,
// AXI Interface
output axi_tile_req_t [NumAXIMasters-1:0] axi_mst_req_o,
input axi_tile_resp_t [NumAXIMasters-1:0] axi_mst_resp_i
);
/*********************
* MemPool Cluster *
*********************/
mempool_cluster #(
.TCDMBaseAddr(TCDMBaseAddr),
.BootAddr (BootAddr )
) i_mempool_cluster (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.testmode_i (testmode_i ),
.scan_enable_i (scan_enable_i ),
.scan_data_i (scan_data_i ),
.scan_data_o (scan_data_o ),
.wake_up_i ('0 ),
.axi_mst_req_o (axi_mst_req_o ),
.axi_mst_resp_i(axi_mst_resp_i)
);
endmodule : mempool_cluster_wrap
// Copyright 2021 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
`include "mempool/mempool.svh"
module mempool_group
import mempool_pkg::*;
import cf_math_pkg::idx_width;
#(
// TCDM
parameter addr_t TCDMBaseAddr = 32'b0,
// Boot address
parameter logic [31:0] BootAddr = 32'h0000_1000,
// Dependant parameters. DO NOT CHANGE!
parameter int unsigned NumAXIMasters = NumTilesPerGroup
) (
// Clock and reset
input logic clk_i,
input logic rst_ni,
input logic testmode_i,
// Scan chain
input logic scan_enable_i,
input logic scan_data_i,
output logic scan_data_o,
// Group ID
input logic [idx_width(NumGroups)-1:0] group_id_i,
// TCDM Master interfaces
output `STRUCT_VECT(tcdm_slave_req_t, [NumTilesPerGroup-1:0]) tcdm_master_north_req_o,
output logic [NumTilesPerGroup-1:0] tcdm_master_north_req_valid_o,
input logic [NumTilesPerGroup-1:0] tcdm_master_north_req_ready_i,
input `STRUCT_VECT(tcdm_master_resp_t, [NumTilesPerGroup-1:0]) tcdm_master_north_resp_i,
input logic [NumTilesPerGroup-1:0] tcdm_master_north_resp_valid_i,
output logic [NumTilesPerGroup-1:0] tcdm_master_north_resp_ready_o,
output `STRUCT_VECT(tcdm_slave_req_t, [NumTilesPerGroup-1:0]) tcdm_master_northeast_req_o,
output logic [NumTilesPerGroup-1:0] tcdm_master_northeast_req_valid_o,
input logic [NumTilesPerGroup-1:0] tcdm_master_northeast_req_ready_i,
input `STRUCT_VECT(tcdm_master_resp_t, [NumTilesPerGroup-1:0]) tcdm_master_northeast_resp_i,
input logic [NumTilesPerGroup-1:0] tcdm_master_northeast_resp_valid_i,
output logic [NumTilesPerGroup-1:0] tcdm_master_northeast_resp_ready_o,
output `STRUCT_VECT(tcdm_slave_req_t, [NumTilesPerGroup-1:0]) tcdm_master_bypass_req_o,
output logic [NumTilesPerGroup-1:0] tcdm_master_bypass_req_valid_o,
input logic [NumTilesPerGroup-1:0] tcdm_master_bypass_req_ready_i,
input `STRUCT_VECT(tcdm_master_resp_t, [NumTilesPerGroup-1:0]) tcdm_master_bypass_resp_i,
input logic [NumTilesPerGroup-1:0] tcdm_master_bypass_resp_valid_i,
output logic [NumTilesPerGroup-1:0] tcdm_master_bypass_resp_ready_o,
output `STRUCT_VECT(tcdm_slave_req_t, [NumTilesPerGroup-1:0]) tcdm_master_east_req_o,
output logic [NumTilesPerGroup-1:0] tcdm_master_east_req_valid_o,
input logic [NumTilesPerGroup-1:0] tcdm_master_east_req_ready_i,
input `STRUCT_VECT(tcdm_master_resp_t, [NumTilesPerGroup-1:0]) tcdm_master_east_resp_i,
input logic [NumTilesPerGroup-1:0] tcdm_master_east_resp_valid_i,
output logic [NumTilesPerGroup-1:0] tcdm_master_east_resp_ready_o,
// TCDM Slave interfaces
input `STRUCT_VECT(tcdm_slave_req_t, [NumTilesPerGroup-1:0]) tcdm_slave_north_req_i,
input logic [NumTilesPerGroup-1:0] tcdm_slave_north_req_valid_i,
output logic [NumTilesPerGroup-1:0] tcdm_slave_north_req_ready_o,
output `STRUCT_VECT(tcdm_master_resp_t, [NumTilesPerGroup-1:0]) tcdm_slave_north_resp_o,
output logic [NumTilesPerGroup-1:0] tcdm_slave_north_resp_valid_o,
input logic [NumTilesPerGroup-1:0] tcdm_slave_north_resp_ready_i,
input `STRUCT_VECT(tcdm_slave_req_t, [NumTilesPerGroup-1:0]) tcdm_slave_northeast_req_i,
input logic [NumTilesPerGroup-1:0] tcdm_slave_northeast_req_valid_i,
output logic [NumTilesPerGroup-1:0] tcdm_slave_northeast_req_ready_o,
output `STRUCT_VECT(tcdm_master_resp_t, [NumTilesPerGroup-1:0]) tcdm_slave_northeast_resp_o,
output logic [NumTilesPerGroup-1:0] tcdm_slave_northeast_resp_valid_o,
input logic [NumTilesPerGroup-1:0] tcdm_slave_northeast_resp_ready_i,
input `STRUCT_VECT(tcdm_slave_req_t, [NumTilesPerGroup-1:0]) tcdm_slave_bypass_req_i,
input logic [NumTilesPerGroup-1:0] tcdm_slave_bypass_req_valid_i,
output logic [NumTilesPerGroup-1:0] tcdm_slave_bypass_req_ready_o,
output `STRUCT_VECT(tcdm_master_resp_t, [NumTilesPerGroup-1:0]) tcdm_slave_bypass_resp_o,
output logic [NumTilesPerGroup-1:0] tcdm_slave_bypass_resp_valid_o,
input logic [NumTilesPerGroup-1:0] tcdm_slave_bypass_resp_ready_i,
input `STRUCT_VECT(tcdm_slave_req_t, [NumTilesPerGroup-1:0]) tcdm_slave_east_req_i,
input logic [NumTilesPerGroup-1:0] tcdm_slave_east_req_valid_i,
output logic [NumTilesPerGroup-1:0] tcdm_slave_east_req_ready_o,
output `STRUCT_VECT(tcdm_master_resp_t, [NumTilesPerGroup-1:0]) tcdm_slave_east_resp_o,
output logic [NumTilesPerGroup-1:0] tcdm_slave_east_resp_valid_o,
input logic [NumTilesPerGroup-1:0] tcdm_slave_east_resp_ready_i,
// Wake up interface
input logic [NumCoresPerGroup-1:0] wake_up_i,
// AXI Interface
output `STRUCT_PORT(axi_tile_req_t) axi_mst_req_o,
input `STRUCT_PORT(axi_tile_resp_t) axi_mst_resp_i
);
/*****************
* Definitions *
*****************/
typedef logic [idx_width(NumTiles)-1:0] tile_id_t;
/**********************
* Ports to structs *
**********************/
// The ports might be structs flattened to vectors. To access the structs'
// internal signals, assign the flattened vectors back to structs.
tcdm_slave_req_t [NumTilesPerGroup-1:0] tcdm_master_north_req_s;
tcdm_slave_req_t [NumTilesPerGroup-1:0] tcdm_master_northeast_req_s;
tcdm_slave_req_t [NumTilesPerGroup-1:0] tcdm_master_east_req_s;
tcdm_master_resp_t [NumTilesPerGroup-1:0] tcdm_slave_north_resp_s;
tcdm_master_resp_t [NumTilesPerGroup-1:0] tcdm_slave_northeast_resp_s;
tcdm_master_resp_t [NumTilesPerGroup-1:0] tcdm_slave_east_resp_s;
assign tcdm_master_north_req_o = tcdm_master_north_req_s;
assign tcdm_master_northeast_req_o = tcdm_master_northeast_req_s;
assign tcdm_master_east_req_o = tcdm_master_east_req_s;
assign tcdm_slave_north_resp_o = tcdm_slave_north_resp_s;
assign tcdm_slave_northeast_resp_o = tcdm_slave_northeast_resp_s;
assign tcdm_slave_east_resp_o = tcdm_slave_east_resp_s;
/***********
* Tiles *
***********/
// TCDM interfaces
// North
tcdm_master_req_t [NumTilesPerGroup-1:0] tcdm_master_north_req;
logic [NumTilesPerGroup-1:0] tcdm_master_north_req_valid;
logic [NumTilesPerGroup-1:0] tcdm_master_north_req_ready;
tcdm_slave_resp_t [NumTilesPerGroup-1:0] tcdm_slave_north_resp;
logic [NumTilesPerGroup-1:0] tcdm_slave_north_resp_valid;
logic [NumTilesPerGroup-1:0] tcdm_slave_north_resp_ready;
// East
tcdm_master_req_t [NumTilesPerGroup-1:0] tcdm_master_east_req;
logic [NumTilesPerGroup-1:0] tcdm_master_east_req_valid;
logic [NumTilesPerGroup-1:0] tcdm_master_east_req_ready;
tcdm_slave_resp_t [NumTilesPerGroup-1:0] tcdm_slave_east_resp;
logic [NumTilesPerGroup-1:0] tcdm_slave_east_resp_valid;
logic [NumTilesPerGroup-1:0] tcdm_slave_east_resp_ready;
// Northeast
tcdm_master_req_t [NumTilesPerGroup-1:0] tcdm_master_northeast_req;
logic [NumTilesPerGroup-1:0] tcdm_master_northeast_req_valid;
logic [NumTilesPerGroup-1:0] tcdm_master_northeast_req_ready;
tcdm_slave_resp_t [NumTilesPerGroup-1:0] tcdm_slave_northeast_resp;
logic [NumTilesPerGroup-1:0] tcdm_slave_northeast_resp_valid;
logic [NumTilesPerGroup-1:0] tcdm_slave_northeast_resp_ready;
// Center
tcdm_master_req_t [NumTilesPerGroup-1:0] tcdm_master_local_req;
logic [NumTilesPerGroup-1:0] tcdm_master_local_req_valid;
logic [NumTilesPerGroup-1:0] tcdm_master_local_req_ready;
tcdm_master_resp_t [NumTilesPerGroup-1:0] tcdm_master_local_resp;
logic [NumTilesPerGroup-1:0] tcdm_master_local_resp_valid;
logic [NumTilesPerGroup-1:0] tcdm_master_local_resp_ready;
tcdm_slave_req_t [NumTilesPerGroup-1:0] tcdm_slave_local_req;
logic [NumTilesPerGroup-1:0] tcdm_slave_local_req_valid;
logic [NumTilesPerGroup-1:0] tcdm_slave_local_req_ready;
tcdm_slave_resp_t [NumTilesPerGroup-1:0] tcdm_slave_local_resp;
logic [NumTilesPerGroup-1:0] tcdm_slave_local_resp_valid;
logic [NumTilesPerGroup-1:0] tcdm_slave_local_resp_ready;
// AXI interfaces
axi_tile_req_t [NumTilesPerGroup-1:0] axi_tile_req;
axi_tile_resp_t [NumTilesPerGroup-1:0] axi_tile_resp;
for (genvar t = 0; unsigned'(t) < NumTilesPerGroup; t++) begin: gen_tiles
tile_id_t id;
assign id = (group_id_i << $clog2(NumTilesPerGroup)) | t[idx_width(NumTilesPerGroup)-1:0];
mempool_tile_wrap #(
.TCDMBaseAddr(TCDMBaseAddr),
.BootAddr (BootAddr )
) i_tile (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.scan_enable_i (scan_enable_i ),
.scan_data_i (/* Unconnected */ ),
.scan_data_o (/* Unconnected */ ),
.tile_id_i (id ),
// TCDM Master interfaces
.tcdm_master_north_req_o (tcdm_master_north_req[t] ),
.tcdm_master_north_req_valid_o (tcdm_master_north_req_valid[t] ),
.tcdm_master_north_req_ready_i (tcdm_master_north_req_ready[t] ),
.tcdm_master_north_resp_i (tcdm_master_north_resp_i[t] ),
.tcdm_master_north_resp_valid_i (tcdm_master_north_resp_valid_i[t] ),
.tcdm_master_north_resp_ready_o (tcdm_master_north_resp_ready_o[t] ),
.tcdm_master_east_req_o (tcdm_master_east_req[t] ),
.tcdm_master_east_req_valid_o (tcdm_master_east_req_valid[t] ),
.tcdm_master_east_req_ready_i (tcdm_master_east_req_ready[t] ),
.tcdm_master_east_resp_i (tcdm_master_east_resp_i[t] ),
.tcdm_master_east_resp_valid_i (tcdm_master_east_resp_valid_i[t] ),
.tcdm_master_east_resp_ready_o (tcdm_master_east_resp_ready_o[t] ),
.tcdm_master_northeast_req_o (tcdm_master_northeast_req[t] ),
.tcdm_master_northeast_req_valid_o (tcdm_master_northeast_req_valid[t] ),
.tcdm_master_northeast_req_ready_i (tcdm_master_northeast_req_ready[t] ),
.tcdm_master_northeast_resp_i (tcdm_master_northeast_resp_i[t] ),
.tcdm_master_northeast_resp_valid_i(tcdm_master_northeast_resp_valid_i[t] ),
.tcdm_master_northeast_resp_ready_o(tcdm_master_northeast_resp_ready_o[t] ),
.tcdm_master_local_req_o (tcdm_master_local_req[t] ),
.tcdm_master_local_req_valid_o (tcdm_master_local_req_valid[t] ),
.tcdm_master_local_req_ready_i (tcdm_master_local_req_ready[t] ),
.tcdm_master_local_resp_i (tcdm_master_local_resp[t] ),
.tcdm_master_local_resp_valid_i (tcdm_master_local_resp_valid[t] ),
.tcdm_master_local_resp_ready_o (tcdm_master_local_resp_ready[t] ),
// TCDM banks interface
.tcdm_slave_north_req_i (tcdm_slave_north_req_i[t] ),
.tcdm_slave_north_req_valid_i (tcdm_slave_north_req_valid_i[t] ),
.tcdm_slave_north_req_ready_o (tcdm_slave_north_req_ready_o[t] ),
.tcdm_slave_north_resp_o (tcdm_slave_north_resp[t] ),
.tcdm_slave_north_resp_valid_o (tcdm_slave_north_resp_valid[t] ),
.tcdm_slave_north_resp_ready_i (tcdm_slave_north_resp_ready[t] ),
.tcdm_slave_east_req_i (tcdm_slave_east_req_i[t] ),
.tcdm_slave_east_req_valid_i (tcdm_slave_east_req_valid_i[t] ),
.tcdm_slave_east_req_ready_o (tcdm_slave_east_req_ready_o[t] ),
.tcdm_slave_east_resp_o (tcdm_slave_east_resp[t] ),
.tcdm_slave_east_resp_valid_o (tcdm_slave_east_resp_valid[t] ),
.tcdm_slave_east_resp_ready_i (tcdm_slave_east_resp_ready[t] ),
.tcdm_slave_northeast_req_i (tcdm_slave_northeast_req_i[t] ),
.tcdm_slave_northeast_req_valid_i (tcdm_slave_northeast_req_valid_i[t] ),
.tcdm_slave_northeast_req_ready_o (tcdm_slave_northeast_req_ready_o[t] ),
.tcdm_slave_northeast_resp_o (tcdm_slave_northeast_resp[t] ),
.tcdm_slave_northeast_resp_valid_o (tcdm_slave_northeast_resp_valid[t] ),
.tcdm_slave_northeast_resp_ready_i (tcdm_slave_northeast_resp_ready[t] ),
.tcdm_slave_local_req_i (tcdm_slave_local_req[t] ),
.tcdm_slave_local_req_valid_i (tcdm_slave_local_req_valid[t] ),
.tcdm_slave_local_req_ready_o (tcdm_slave_local_req_ready[t] ),
.tcdm_slave_local_resp_o (tcdm_slave_local_resp[t] ),
.tcdm_slave_local_resp_valid_o (tcdm_slave_local_resp_valid[t] ),
.tcdm_slave_local_resp_ready_i (tcdm_slave_local_resp_ready[t] ),
// AXI interface
.axi_mst_req_o (axi_tile_req[t] ),
.axi_mst_resp_i (axi_tile_resp[t] ),
// Wake up interface
.wake_up_i (wake_up_i[t*NumCoresPerTile +: NumCoresPerTile])
);
end : gen_tiles
/*************************
* Local Interconnect *
*************************/
logic [NumTilesPerGroup-1:0] master_local_req_valid;
logic [NumTilesPerGroup-1:0] master_local_req_ready;
tcdm_addr_t [NumTilesPerGroup-1:0] master_local_req_tgt_addr;
logic [NumTilesPerGroup-1:0] master_local_req_wen;
tcdm_payload_t [NumTilesPerGroup-1:0] master_local_req_wdata;
strb_t [NumTilesPerGroup-1:0] master_local_req_be;
logic [NumTilesPerGroup-1:0] master_local_resp_valid;
logic [NumTilesPerGroup-1:0] master_local_resp_ready;
tcdm_payload_t [NumTilesPerGroup-1:0] master_local_resp_rdata;
logic [NumTilesPerGroup-1:0] slave_local_req_valid;
logic [NumTilesPerGroup-1:0] slave_local_req_ready;
tile_addr_t [NumTilesPerGroup-1:0] slave_local_req_tgt_addr;
tile_group_id_t [NumTilesPerGroup-1:0] slave_local_req_ini_addr;
logic [NumTilesPerGroup-1:0] slave_local_req_wen;
tcdm_payload_t [NumTilesPerGroup-1:0] slave_local_req_wdata;
strb_t [NumTilesPerGroup-1:0] slave_local_req_be;
logic [NumTilesPerGroup-1:0] slave_local_resp_valid;
logic [NumTilesPerGroup-1:0] slave_local_resp_ready;
tile_group_id_t [NumTilesPerGroup-1:0] slave_local_resp_ini_addr;
tcdm_payload_t [NumTilesPerGroup-1:0] slave_local_resp_rdata;
for (genvar t = 0; t < NumTilesPerGroup; t++) begin: gen_local_connections
assign master_local_req_valid[t] = tcdm_master_local_req_valid[t];
assign master_local_req_tgt_addr[t] = tcdm_master_local_req[t].tgt_addr;
assign master_local_req_wen[t] = tcdm_master_local_req[t].wen;
assign master_local_req_wdata[t] = tcdm_master_local_req[t].wdata;
assign master_local_req_be[t] = tcdm_master_local_req[t].be;
assign tcdm_master_local_req_ready[t] = master_local_req_ready[t];
assign slave_local_resp_valid[t] = tcdm_slave_local_resp_valid[t];
assign slave_local_resp_ini_addr[t] = tcdm_slave_local_resp[t].ini_addr;
assign slave_local_resp_rdata[t] = tcdm_slave_local_resp[t].rdata;
assign tcdm_slave_local_resp_ready[t] = slave_local_resp_ready[t];
assign tcdm_master_local_resp_valid[t] = master_local_resp_valid[t];
assign tcdm_master_local_resp[t].rdata = master_local_resp_rdata[t];
assign master_local_resp_ready[t] = tcdm_master_local_resp_ready[t];
assign tcdm_slave_local_req_valid[t] = slave_local_req_valid[t];
assign tcdm_slave_local_req[t].tgt_addr = slave_local_req_tgt_addr[t];
assign tcdm_slave_local_req[t].ini_addr = slave_local_req_ini_addr[t];
assign tcdm_slave_local_req[t].wen = slave_local_req_wen[t];
assign tcdm_slave_local_req[t].wdata = slave_local_req_wdata[t];
assign tcdm_slave_local_req[t].be = slave_local_req_be[t];
assign slave_local_req_ready[t] = tcdm_slave_local_req_ready[t];
end
variable_latency_interconnect #(
.NumIn (NumTilesPerGroup ),
.NumOut (NumTilesPerGroup ),
.AddrWidth (TCDMAddrWidth ),
.DataWidth ($bits(tcdm_payload_t) ),
.BeWidth (DataWidth/8 ),
.ByteOffWidth(0 ),
.AddrMemWidth(TCDMAddrMemWidth + idx_width(NumBanksPerTile)),
.Topology (tcdm_interconnect_pkg::LIC ),
.AxiVldRdy (1'b1 )
) i_local_interco (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.req_valid_i (master_local_req_valid ),
.req_ready_o (master_local_req_ready ),
.req_tgt_addr_i (master_local_req_tgt_addr),
.req_wen_i (master_local_req_wen ),
.req_wdata_i (master_local_req_wdata ),
.req_be_i (master_local_req_be ),
.resp_valid_o (master_local_resp_valid ),
.resp_ready_i (master_local_resp_ready ),
.resp_rdata_o (master_local_resp_rdata ),
.resp_ini_addr_i(slave_local_resp_ini_addr),
.resp_rdata_i (slave_local_resp_rdata ),
.resp_valid_i (slave_local_resp_valid ),
.resp_ready_o (slave_local_resp_ready ),
.req_valid_o (slave_local_req_valid ),
.req_ready_i (slave_local_req_ready ),
.req_be_o (slave_local_req_be ),
.req_wdata_o (slave_local_req_wdata ),
.req_wen_o (slave_local_req_wen ),
.req_ini_addr_o (slave_local_req_ini_addr ),
.req_tgt_addr_o (slave_local_req_tgt_addr )
);
/***********************
* East Interconnect *
***********************/
logic [NumTilesPerGroup-1:0] master_east_req_valid;
logic [NumTilesPerGroup-1:0] master_east_req_ready;
tcdm_addr_t [NumTilesPerGroup-1:0] master_east_req_tgt_addr;
logic [NumTilesPerGroup-1:0] master_east_req_wen;
tcdm_payload_t [NumTilesPerGroup-1:0] master_east_req_wdata;
strb_t [NumTilesPerGroup-1:0] master_east_req_be;
logic [NumTilesPerGroup-1:0] master_east_resp_valid;
logic [NumTilesPerGroup-1:0] master_east_resp_ready;
tcdm_payload_t [NumTilesPerGroup-1:0] master_east_resp_rdata;
logic [NumTilesPerGroup-1:0] slave_east_req_valid;
logic [NumTilesPerGroup-1:0] slave_east_req_ready;
tile_addr_t [NumTilesPerGroup-1:0] slave_east_req_tgt_addr;
tile_group_id_t [NumTilesPerGroup-1:0] slave_east_req_ini_addr;
logic [NumTilesPerGroup-1:0] slave_east_req_wen;
tcdm_payload_t [NumTilesPerGroup-1:0] slave_east_req_wdata;
strb_t [NumTilesPerGroup-1:0] slave_east_req_be;
logic [NumTilesPerGroup-1:0] slave_east_resp_valid;
logic [NumTilesPerGroup-1:0] slave_east_resp_ready;
tile_group_id_t [NumTilesPerGroup-1:0] slave_east_resp_ini_addr;
tcdm_payload_t [NumTilesPerGroup-1:0] slave_east_resp_rdata;
for (genvar t = 0; t < NumTilesPerGroup; t++) begin: gen_east_connections
assign master_east_req_valid[t] = tcdm_master_east_req_valid[t];
assign master_east_req_tgt_addr[t] = tcdm_master_east_req[t].tgt_addr;
assign master_east_req_wen[t] = tcdm_master_east_req[t].wen;
assign master_east_req_wdata[t] = tcdm_master_east_req[t].wdata;
assign master_east_req_be[t] = tcdm_master_east_req[t].be;
assign tcdm_master_east_req_ready[t] = master_east_req_ready[t];
assign tcdm_master_east_req_valid_o[t] = slave_east_req_valid[t];
assign tcdm_master_east_req_s[t].tgt_addr = slave_east_req_tgt_addr[t];
assign tcdm_master_east_req_s[t].ini_addr = slave_east_req_ini_addr[t];
assign tcdm_master_east_req_s[t].wen = slave_east_req_wen[t];
assign tcdm_master_east_req_s[t].wdata = slave_east_req_wdata[t];
assign tcdm_master_east_req_s[t].be = slave_east_req_be[t];
assign slave_east_req_ready[t] = tcdm_master_east_req_ready_i[t];
assign slave_east_resp_valid[t] = tcdm_slave_east_resp_valid[t];
assign slave_east_resp_ini_addr[t] = tcdm_slave_east_resp[t].ini_addr;
assign slave_east_resp_rdata[t] = tcdm_slave_east_resp[t].rdata;
assign tcdm_slave_east_resp_ready[t] = slave_east_resp_ready[t];
assign tcdm_slave_east_resp_valid_o[t] = master_east_resp_valid[t];
assign tcdm_slave_east_resp_s[t].rdata = master_east_resp_rdata[t];
assign master_east_resp_ready[t] = tcdm_slave_east_resp_ready_i[t];
end
variable_latency_interconnect #(
.NumIn (NumTilesPerGroup ),
.NumOut (NumTilesPerGroup ),
.AddrWidth (TCDMAddrWidth ),
.DataWidth ($bits(tcdm_payload_t) ),
.BeWidth (DataWidth/8 ),
.ByteOffWidth (0 ),
.AddrMemWidth (TCDMAddrMemWidth + idx_width(NumBanksPerTile)),
.Topology (tcdm_interconnect_pkg::LIC ),
.AxiVldRdy (1'b1 ),
.SpillRegisterReq (64'b1 ),
.SpillRegisterResp (64'b1 ),
.FallThroughRegister(1'b1 )
) i_east_interco (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.req_valid_i (master_east_req_valid ),
.req_ready_o (master_east_req_ready ),
.req_tgt_addr_i (master_east_req_tgt_addr),
.req_wen_i (master_east_req_wen ),
.req_wdata_i (master_east_req_wdata ),
.req_be_i (master_east_req_be ),
.resp_valid_o (master_east_resp_valid ),
.resp_ready_i (master_east_resp_ready ),
.resp_rdata_o (master_east_resp_rdata ),
.resp_ini_addr_i(slave_east_resp_ini_addr),
.resp_rdata_i (slave_east_resp_rdata ),
.resp_valid_i (slave_east_resp_valid ),
.resp_ready_o (slave_east_resp_ready ),
.req_valid_o (slave_east_req_valid ),
.req_ready_i (slave_east_req_ready ),
.req_be_o (slave_east_req_be ),
.req_wdata_o (slave_east_req_wdata ),
.req_wen_o (slave_east_req_wen ),
.req_ini_addr_o (slave_east_req_ini_addr ),
.req_tgt_addr_o (slave_east_req_tgt_addr )
);
/************************
* North Interconnect *
************************/
logic [NumTilesPerGroup-1:0] master_north_req_valid;
logic [NumTilesPerGroup-1:0] master_north_req_ready;
tcdm_addr_t [NumTilesPerGroup-1:0] master_north_req_tgt_addr;
logic [NumTilesPerGroup-1:0] master_north_req_wen;
tcdm_payload_t [NumTilesPerGroup-1:0] master_north_req_wdata;
strb_t [NumTilesPerGroup-1:0] master_north_req_be;
logic [NumTilesPerGroup-1:0] master_north_resp_valid;
logic [NumTilesPerGroup-1:0] master_north_resp_ready;
tcdm_payload_t [NumTilesPerGroup-1:0] master_north_resp_rdata;
logic [NumTilesPerGroup-1:0] slave_north_req_valid;
logic [NumTilesPerGroup-1:0] slave_north_req_ready;
tile_addr_t [NumTilesPerGroup-1:0] slave_north_req_tgt_addr;
tile_group_id_t [NumTilesPerGroup-1:0] slave_north_req_ini_addr;
logic [NumTilesPerGroup-1:0] slave_north_req_wen;
tcdm_payload_t [NumTilesPerGroup-1:0] slave_north_req_wdata;
strb_t [NumTilesPerGroup-1:0] slave_north_req_be;
logic [NumTilesPerGroup-1:0] slave_north_resp_valid;
logic [NumTilesPerGroup-1:0] slave_north_resp_ready;
tile_group_id_t [NumTilesPerGroup-1:0] slave_north_resp_ini_addr;
tcdm_payload_t [NumTilesPerGroup-1:0] slave_north_resp_rdata;
for (genvar t = 0; t < NumTilesPerGroup; t++) begin: gen_north_connections
assign master_north_req_valid[t] = tcdm_master_north_req_valid[t];
assign master_north_req_tgt_addr[t] = tcdm_master_north_req[t].tgt_addr;
assign master_north_req_wen[t] = tcdm_master_north_req[t].wen;
assign master_north_req_wdata[t] = tcdm_master_north_req[t].wdata;
assign master_north_req_be[t] = tcdm_master_north_req[t].be;
assign tcdm_master_north_req_ready[t] = master_north_req_ready[t];
assign tcdm_master_north_req_valid_o[t] = slave_north_req_valid[t];
assign tcdm_master_north_req_s[t].tgt_addr = slave_north_req_tgt_addr[t];
assign tcdm_master_north_req_s[t].ini_addr = slave_north_req_ini_addr[t];
assign tcdm_master_north_req_s[t].wen = slave_north_req_wen[t];
assign tcdm_master_north_req_s[t].wdata = slave_north_req_wdata[t];
assign tcdm_master_north_req_s[t].be = slave_north_req_be[t];
assign slave_north_req_ready[t] = tcdm_master_north_req_ready_i[t];
assign slave_north_resp_valid[t] = tcdm_slave_north_resp_valid[t];
assign slave_north_resp_ini_addr[t] = tcdm_slave_north_resp[t].ini_addr;
assign slave_north_resp_rdata[t] = tcdm_slave_north_resp[t].rdata;
assign tcdm_slave_north_resp_ready[t] = slave_north_resp_ready[t];
assign tcdm_slave_north_resp_valid_o[t] = master_north_resp_valid[t];
assign tcdm_slave_north_resp_s[t].rdata = master_north_resp_rdata[t];
assign master_north_resp_ready[t] = tcdm_slave_north_resp_ready_i[t];
end
variable_latency_interconnect #(
.NumIn (NumTilesPerGroup ),
.NumOut (NumTilesPerGroup ),
.AddrWidth (TCDMAddrWidth ),
.DataWidth ($bits(tcdm_payload_t) ),
.BeWidth (DataWidth/8 ),
.ByteOffWidth (0 ),
.AddrMemWidth (TCDMAddrMemWidth + idx_width(NumBanksPerTile)),
.Topology (tcdm_interconnect_pkg::LIC ),
.AxiVldRdy (1'b1 ),
.SpillRegisterReq (64'b1 ),
.SpillRegisterResp (64'b1 ),
.FallThroughRegister(1'b1 )
) i_north_interco (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.req_valid_i (master_north_req_valid ),
.req_ready_o (master_north_req_ready ),
.req_tgt_addr_i (master_north_req_tgt_addr),
.req_wen_i (master_north_req_wen ),
.req_wdata_i (master_north_req_wdata ),
.req_be_i (master_north_req_be ),
.resp_valid_o (master_north_resp_valid ),
.resp_ready_i (master_north_resp_ready ),
.resp_rdata_o (master_north_resp_rdata ),
.req_valid_o (slave_north_req_valid ),
.req_ready_i (slave_north_req_ready ),
.req_be_o (slave_north_req_be ),
.req_wdata_o (slave_north_req_wdata ),
.req_wen_o (slave_north_req_wen ),
.req_ini_addr_o (slave_north_req_ini_addr ),
.req_tgt_addr_o (slave_north_req_tgt_addr ),
.resp_ini_addr_i(slave_north_resp_ini_addr),
.resp_rdata_i (slave_north_resp_rdata ),
.resp_valid_i (slave_north_resp_valid ),
.resp_ready_o (slave_north_resp_ready )
);
/****************************
* Northeast Interconnect *
****************************/
logic [NumTilesPerGroup-1:0] master_northeast_req_valid;
logic [NumTilesPerGroup-1:0] master_northeast_req_ready;
tcdm_addr_t [NumTilesPerGroup-1:0] master_northeast_req_tgt_addr;
logic [NumTilesPerGroup-1:0] master_northeast_req_wen;
tcdm_payload_t [NumTilesPerGroup-1:0] master_northeast_req_wdata;
strb_t [NumTilesPerGroup-1:0] master_northeast_req_be;
logic [NumTilesPerGroup-1:0] master_northeast_resp_valid;
logic [NumTilesPerGroup-1:0] master_northeast_resp_ready;
tcdm_payload_t [NumTilesPerGroup-1:0] master_northeast_resp_rdata;
logic [NumTilesPerGroup-1:0] slave_northeast_req_valid;
logic [NumTilesPerGroup-1:0] slave_northeast_req_ready;
tile_addr_t [NumTilesPerGroup-1:0] slave_northeast_req_tgt_addr;
tile_group_id_t [NumTilesPerGroup-1:0] slave_northeast_req_ini_addr;
logic [NumTilesPerGroup-1:0] slave_northeast_req_wen;
tcdm_payload_t [NumTilesPerGroup-1:0] slave_northeast_req_wdata;
strb_t [NumTilesPerGroup-1:0] slave_northeast_req_be;
logic [NumTilesPerGroup-1:0] slave_northeast_resp_valid;
logic [NumTilesPerGroup-1:0] slave_northeast_resp_ready;
tile_group_id_t [NumTilesPerGroup-1:0] slave_northeast_resp_ini_addr;
tcdm_payload_t [NumTilesPerGroup-1:0] slave_northeast_resp_rdata;
for (genvar t = 0; t < NumTilesPerGroup; t++) begin: gen_northeast_connections
assign master_northeast_req_valid[t] = tcdm_master_northeast_req_valid[t];
assign master_northeast_req_tgt_addr[t] = tcdm_master_northeast_req[t].tgt_addr;
assign master_northeast_req_wen[t] = tcdm_master_northeast_req[t].wen;
assign master_northeast_req_wdata[t] = tcdm_master_northeast_req[t].wdata;
assign master_northeast_req_be[t] = tcdm_master_northeast_req[t].be;
assign tcdm_master_northeast_req_ready[t] = master_northeast_req_ready[t];
assign tcdm_master_northeast_req_valid_o[t] = slave_northeast_req_valid[t];
assign tcdm_master_northeast_req_s[t].tgt_addr = slave_northeast_req_tgt_addr[t];
assign tcdm_master_northeast_req_s[t].ini_addr = slave_northeast_req_ini_addr[t];
assign tcdm_master_northeast_req_s[t].wen = slave_northeast_req_wen[t];
assign tcdm_master_northeast_req_s[t].wdata = slave_northeast_req_wdata[t];
assign tcdm_master_northeast_req_s[t].be = slave_northeast_req_be[t];
assign slave_northeast_req_ready[t] = tcdm_master_northeast_req_ready_i[t];
assign slave_northeast_resp_valid[t] = tcdm_slave_northeast_resp_valid[t];
assign slave_northeast_resp_ini_addr[t] = tcdm_slave_northeast_resp[t].ini_addr;
assign slave_northeast_resp_rdata[t] = tcdm_slave_northeast_resp[t].rdata;
assign tcdm_slave_northeast_resp_ready[t] = slave_northeast_resp_ready[t];
assign tcdm_slave_northeast_resp_valid_o[t] = master_northeast_resp_valid[t];
assign tcdm_slave_northeast_resp_s[t].rdata = master_northeast_resp_rdata[t];
assign master_northeast_resp_ready[t] = tcdm_slave_northeast_resp_ready_i[t];
end
variable_latency_interconnect #(
.NumIn (NumTilesPerGroup ),
.NumOut (NumTilesPerGroup ),
.AddrWidth (TCDMAddrWidth ),
.DataWidth ($bits(tcdm_payload_t) ),
.BeWidth (DataWidth/8 ),
.ByteOffWidth (0 ),
.AddrMemWidth (TCDMAddrMemWidth + idx_width(NumBanksPerTile)),
.Topology (tcdm_interconnect_pkg::LIC ),
.AxiVldRdy (1'b1 ),
.SpillRegisterReq (64'b1 ),
.SpillRegisterResp (64'b1 ),
.FallThroughRegister(1'b1 )
) i_northeast_interco (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.req_valid_i (master_northeast_req_valid ),
.req_ready_o (master_northeast_req_ready ),
.req_tgt_addr_i (master_northeast_req_tgt_addr),
.req_wen_i (master_northeast_req_wen ),
.req_wdata_i (master_northeast_req_wdata ),
.req_be_i (master_northeast_req_be ),
.resp_valid_o (master_northeast_resp_valid ),
.resp_ready_i (master_northeast_resp_ready ),
.resp_rdata_o (master_northeast_resp_rdata ),
.resp_ini_addr_i(slave_northeast_resp_ini_addr),
.resp_rdata_i (slave_northeast_resp_rdata ),
.resp_valid_i (slave_northeast_resp_valid ),
.resp_ready_o (slave_northeast_resp_ready ),
.req_valid_o (slave_northeast_req_valid ),
.req_ready_i (slave_northeast_req_ready ),
.req_be_o (slave_northeast_req_be ),
.req_wdata_o (slave_northeast_req_wdata ),
.req_wen_o (slave_northeast_req_wen ),
.req_ini_addr_o (slave_northeast_req_ini_addr ),
.req_tgt_addr_o (slave_northeast_req_tgt_addr )
);
/**********************
* AXI Interconnect *
**********************/
axi_hier_interco #(
.NumSlvPorts (NumTilesPerGroup),
.NumPortsPerMux (4 ),
.EnableCache (1'b0 ),
.AddrWidth (AddrWidth ),
.DataWidth (AxiDataWidth ),
.SlvIdWidth (AxiTileIdWidth ),
.MstIdWidth (AxiTileIdWidth ),
.UserWidth (1 ),
.slv_req_t (axi_tile_req_t ),
.slv_resp_t (axi_tile_resp_t ),
.mst_req_t (axi_tile_req_t ),
.mst_resp_t (axi_tile_resp_t )
) i_axi_interco (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.test_i (1'b0 ),
.slv_req_i (axi_tile_req ),
.slv_resp_o (axi_tile_resp ),
.mst_req_o (axi_mst_req_o ),
.mst_resp_i (axi_mst_resp_i)
);
/*********************
* Bypass Channels *
*********************/
assign tcdm_master_bypass_req_o = tcdm_slave_bypass_req_i;
assign tcdm_master_bypass_req_valid_o = tcdm_slave_bypass_req_valid_i;
assign tcdm_slave_bypass_req_ready_o = tcdm_master_bypass_req_ready_i;
assign tcdm_slave_bypass_resp_o = tcdm_master_bypass_resp_i;
assign tcdm_slave_bypass_resp_valid_o = tcdm_master_bypass_resp_valid_i;
assign tcdm_master_bypass_resp_ready_o = tcdm_slave_bypass_resp_ready_i;
endmodule: mempool_group
// Copyright 2021 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
// csamudra : 12/24 edited line 18 localparam integer ...NumCores ..16..endif; earlier it was 0 instead of 16
// csamudra : 12/24 edited line 19 localparam integer ...NumCoresPerTile..4..endif; earlier it was 0 instead of 4
package mempool_pkg;
import snitch_pkg::MetaIdWidth;
import cf_math_pkg::idx_width;
/*********************
* TILE PARAMETERS *
*********************/
`include "axi/assign.svh"
`include "axi/typedef.svh"
localparam integer unsigned NumCores = `ifdef NUM_CORES `NUM_CORES `else 16 `endif;
localparam integer unsigned NumCoresPerTile = `ifdef NUM_CORES_PER_TILE `NUM_CORES_PER_TILE `else 4 `endif;
localparam integer unsigned NumGroups = 4;
localparam integer unsigned NumTiles = NumCores / NumCoresPerTile;
localparam integer unsigned NumTilesPerGroup = NumTiles / NumGroups;
localparam integer unsigned NumCoresPerGroup = NumCores / NumGroups;
localparam integer unsigned NumCoresPerCache = NumCoresPerTile;
localparam integer unsigned AxiCoreIdWidth = 1;
localparam integer unsigned AxiTileIdWidth = AxiCoreIdWidth+1; // + 1 for cache
localparam integer unsigned AxiDataWidth = 128;
localparam integer unsigned AxiLiteDataWidth = 32;
/***********************
* MEMORY PARAMETERS *
***********************/
localparam integer unsigned AddrWidth = 32;
localparam integer unsigned DataWidth = 32;
localparam integer unsigned BeWidth = DataWidth / 8;
localparam integer unsigned ByteOffset = $clog2(BeWidth);
localparam integer unsigned BankingFactor = 4;
localparam bit LrScEnable = 1'b1;
localparam integer unsigned TCDMSizePerBank = 1024; // [B]
localparam integer unsigned NumBanks = NumCores * BankingFactor;
localparam integer unsigned NumBanksPerTile = NumBanks / NumTiles;
localparam integer unsigned NumBanksPerGroup = NumBanks / NumGroups;
localparam integer unsigned TCDMAddrMemWidth = $clog2(TCDMSizePerBank / mempool_pkg::BeWidth);
localparam integer unsigned TCDMAddrWidth = TCDMAddrMemWidth + idx_width(NumBanksPerGroup);
localparam integer unsigned L2Size = `ifdef L2_SIZE `L2_SIZE `else 0 `endif; // [B]
localparam integer unsigned L2BeWidth = AxiDataWidth/8;
localparam integer unsigned L2ByteOffset = $clog2(L2BeWidth);
typedef logic [AxiCoreIdWidth-1:0] axi_core_id_t;
typedef logic [AxiTileIdWidth-1:0] axi_tile_id_t;
typedef logic [AxiDataWidth-1:0] axi_data_t;
typedef logic [AxiDataWidth/8-1:0] axi_strb_t;
typedef logic [AxiLiteDataWidth-1:0] axi_lite_data_t;
typedef logic [AxiLiteDataWidth/8-1:0] axi_lite_strb_t;
typedef logic [AddrWidth-1:0] addr_t;
typedef logic [DataWidth-1:0] data_t;
typedef logic [BeWidth-1:0] strb_t;
localparam NumSystemXbarMasters = NumGroups + 1;
localparam AxiSystemIdWidth = $clog2(NumSystemXbarMasters) + AxiTileIdWidth;
typedef logic [AxiSystemIdWidth-1:0] axi_system_id_t;
localparam NumTestbenchXbarMasters = 1;
localparam AxiTestbenchIdWidth = $clog2(NumTestbenchXbarMasters) + AxiSystemIdWidth;
typedef logic [AxiTestbenchIdWidth-1:0] axi_tb_id_t;
`AXI_TYPEDEF_AW_CHAN_T(axi_core_aw_t, addr_t, axi_core_id_t, logic);
`AXI_TYPEDEF_W_CHAN_T(axi_core_w_t, axi_data_t, axi_strb_t, logic);
`AXI_TYPEDEF_B_CHAN_T(axi_core_b_t, axi_core_id_t, logic);
`AXI_TYPEDEF_AR_CHAN_T(axi_core_ar_t, addr_t, axi_core_id_t, logic);
`AXI_TYPEDEF_R_CHAN_T(axi_core_r_t, axi_data_t, axi_core_id_t, logic);
`AXI_TYPEDEF_REQ_T(axi_core_req_t, axi_core_aw_t, axi_core_w_t, axi_core_ar_t);
`AXI_TYPEDEF_RESP_T(axi_core_resp_t, axi_core_b_t, axi_core_r_t );
`AXI_TYPEDEF_AW_CHAN_T(axi_tile_aw_t, addr_t, axi_tile_id_t, logic);
`AXI_TYPEDEF_W_CHAN_T(axi_tile_w_t, axi_data_t, axi_strb_t, logic);
`AXI_TYPEDEF_B_CHAN_T(axi_tile_b_t, axi_tile_id_t, logic);
`AXI_TYPEDEF_AR_CHAN_T(axi_tile_ar_t, addr_t, axi_tile_id_t, logic);
`AXI_TYPEDEF_R_CHAN_T(axi_tile_r_t, axi_data_t, axi_tile_id_t, logic);
`AXI_TYPEDEF_REQ_T(axi_tile_req_t, axi_tile_aw_t, axi_tile_w_t, axi_tile_ar_t);
`AXI_TYPEDEF_RESP_T(axi_tile_resp_t, axi_tile_b_t, axi_tile_r_t );
`AXI_TYPEDEF_AW_CHAN_T(axi_system_aw_t, addr_t, axi_system_id_t, logic);
`AXI_TYPEDEF_W_CHAN_T(axi_system_w_t, axi_data_t, axi_strb_t, logic);
`AXI_TYPEDEF_B_CHAN_T(axi_system_b_t, axi_system_id_t, logic);
`AXI_TYPEDEF_AR_CHAN_T(axi_system_ar_t, addr_t, axi_system_id_t, logic);
`AXI_TYPEDEF_R_CHAN_T(axi_system_r_t, axi_data_t, axi_system_id_t, logic);
`AXI_TYPEDEF_REQ_T(axi_system_req_t, axi_system_aw_t, axi_system_w_t, axi_system_ar_t);
`AXI_TYPEDEF_RESP_T(axi_system_resp_t, axi_system_b_t, axi_system_r_t);
// AXI to ctrl registers
`AXI_TYPEDEF_W_CHAN_T(axi_ctrl_w_t, axi_lite_data_t, axi_lite_strb_t, logic);
`AXI_TYPEDEF_R_CHAN_T(axi_ctrl_r_t, axi_lite_data_t, axi_system_id_t, logic);
`AXI_TYPEDEF_REQ_T(axi_ctrl_req_t, axi_system_aw_t, axi_ctrl_w_t, axi_system_ar_t);
`AXI_TYPEDEF_RESP_T(axi_ctrl_resp_t, axi_system_b_t, axi_ctrl_r_t);
`AXI_TYPEDEF_AW_CHAN_T(axi_tb_aw_t, addr_t, axi_tb_id_t, logic);
`AXI_TYPEDEF_W_CHAN_T(axi_tb_w_t, axi_data_t, axi_strb_t, logic);
`AXI_TYPEDEF_B_CHAN_T(axi_tb_b_t, axi_tb_id_t, logic);
`AXI_TYPEDEF_AR_CHAN_T(axi_tb_ar_t, addr_t, axi_tb_id_t, logic);
`AXI_TYPEDEF_R_CHAN_T(axi_tb_r_t, axi_data_t, axi_tb_id_t, logic);
`AXI_TYPEDEF_REQ_T(axi_tb_req_t, axi_tb_aw_t, axi_tb_w_t, axi_tb_ar_t);
`AXI_TYPEDEF_RESP_T(axi_tb_resp_t, axi_tb_b_t, axi_tb_r_t);
`AXI_LITE_TYPEDEF_AW_CHAN_T(axi_lite_slv_aw_t, addr_t)
`AXI_LITE_TYPEDEF_W_CHAN_T(axi_lite_slv_w_t, axi_lite_data_t, axi_lite_strb_t)
`AXI_LITE_TYPEDEF_B_CHAN_T(axi_lite_slv_b_t)
`AXI_LITE_TYPEDEF_AR_CHAN_T(axi_lite_slv_ar_t, addr_t)
`AXI_LITE_TYPEDEF_R_CHAN_T(axi_lite_slv_r_t, axi_lite_data_t)
`AXI_LITE_TYPEDEF_REQ_T(axi_lite_slv_req_t, axi_lite_slv_aw_t, axi_lite_slv_w_t, axi_lite_slv_ar_t)
`AXI_LITE_TYPEDEF_RESP_T(axi_lite_slv_resp_t, axi_lite_slv_b_t, axi_lite_slv_r_t)
/***********************
* INSTRUCTION CACHE *
***********************/
localparam int unsigned ICacheSizeByte = 512 * NumCoresPerCache; // Total Size of instruction cache in bytes
localparam int unsigned ICacheSets = NumCoresPerCache / 2; // Number of sets
localparam int unsigned ICacheLineWidth = 32 * 2 * NumCoresPerCache; // Size of each cache line in bits,
/**********************************
* TCDM INTERCONNECT PARAMETERS *
**********************************/
typedef logic [TCDMAddrWidth-1:0] tcdm_addr_t;
typedef logic [TCDMAddrMemWidth-1:0] bank_addr_t;
typedef logic [TCDMAddrMemWidth+idx_width(NumBanksPerTile)-1:0] tile_addr_t;
typedef logic [MetaIdWidth-1:0] meta_id_t;
typedef logic [idx_width(NumCoresPerTile)-1:0] tile_core_id_t;
typedef logic [idx_width(NumTilesPerGroup)-1:0] tile_group_id_t;
typedef logic [idx_width(NumGroups)-1:0] group_id_t;
typedef logic [3:0] amo_t;
typedef struct packed {
meta_id_t meta_id;
tile_core_id_t core_id;
amo_t amo;
data_t data;
} tcdm_payload_t;
typedef struct packed {
tcdm_payload_t wdata;
logic wen;
strb_t be;
tcdm_addr_t tgt_addr;
} tcdm_master_req_t;
typedef struct packed {
tcdm_payload_t rdata;
} tcdm_master_resp_t;
typedef struct packed {
tcdm_payload_t wdata;
logic wen;
strb_t be;
tile_addr_t tgt_addr;
tile_group_id_t ini_addr;
} tcdm_slave_req_t;
typedef struct packed {
tcdm_payload_t rdata;
tile_group_id_t ini_addr;
} tcdm_slave_resp_t;
/*****************
* ADDRESS MAP *
*****************/
// Size in bytes of memory that is sequentially addressable per tile
localparam int unsigned SeqMemSizePerTile = NumCoresPerTile*1024; // 1 KiB
typedef struct packed {
int unsigned slave_idx;
addr_t mask;
addr_t value;
} address_map_t;
/***********************
* TRAFFIC GENERATOR *
***********************/
// Replaces core with a traffic generator
parameter bit TrafficGeneration = `ifdef TRAFFIC_GEN `TRAFFIC_GEN `else 0 `endif;
endpackage : mempool_pkg
// Copyright 2021 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
`include "axi/assign.svh"
`include "common_cells/registers.svh"
module mempool_system
import mempool_pkg::*;
#(
// TCDM
parameter addr_t TCDMBaseAddr = 32'h0000_0000,
// Boot address
parameter addr_t BootAddr = 32'h0000_0000
) (
input logic clk_i,
input logic rst_ni,
input logic fetch_en_i,
output logic eoc_valid_o,
output logic busy_o,
output axi_system_req_t mst_req_o,
input axi_system_resp_t mst_resp_i,
input axi_system_req_t slv_req_i,
output axi_system_resp_t slv_resp_o
);
import axi_pkg::xbar_cfg_t;
import axi_pkg::xbar_rule_32_t;
import axi_pkg::atop_t;
localparam TCDMSize = NumBanks * TCDMSizePerBank;
/*********
* AXI *
*********/
localparam NumAXIMasters = NumGroups + 1; // +1 because the external host is also a master
localparam NumAXISlaves = 4; // control regs, l2 memory, bootrom and the external mst ports
localparam NumRules = NumAXISlaves - 1;
typedef enum logic [$clog2(NumAXISlaves) - 1:0] {
CtrlRegisters,
L2Memory,
Bootrom,
External
} axi_slave_target;
axi_tile_req_t [NumAXIMasters - 1:0] axi_mst_req;
axi_tile_resp_t [NumAXIMasters - 1:0] axi_mst_resp;
axi_system_req_t [NumAXISlaves - 1:0] axi_mem_req;
axi_system_resp_t [NumAXISlaves - 1:0] axi_mem_resp;
logic [NumCores - 1:0] wake_up;
logic [DataWidth - 1:0] eoc;
localparam xbar_cfg_t XBarCfg = '{
NoSlvPorts : NumAXIMasters,
NoMstPorts : NumAXISlaves,
MaxMstTrans : 4,
MaxSlvTrans : 4,
FallThrough : 1'b0,
LatencyMode : axi_pkg::CUT_MST_PORTS,
AxiIdWidthSlvPorts : AxiTileIdWidth,
AxiIdUsedSlvPorts : AxiTileIdWidth,
UniqueIds : 0,
AxiAddrWidth : AddrWidth,
AxiDataWidth : AxiDataWidth,
NoAddrRules : NumRules
};
/*********************
* MemPool Cluster *
********************/
mempool_cluster #(
.TCDMBaseAddr(TCDMBaseAddr),
.BootAddr (BootAddr )
) i_mempool_cluster (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.wake_up_i (wake_up ),
.testmode_i (1'b0 ),
.scan_enable_i (1'b0 ),
.scan_data_i (1'b0 ),
.scan_data_o (/* Unused */ ),
.axi_mst_req_o (axi_mst_req[NumAXIMasters-2:0] ),
.axi_mst_resp_i(axi_mst_resp[NumAXIMasters-2:0])
);
/**********************
* AXI Interconnect *
**********************/
localparam addr_t CtrlRegistersBaseAddr = 32'h4000_0000;
localparam addr_t CtrlRegistersEndAddr = 32'h4000_FFFF;
localparam addr_t L2MemoryBaseAddr = `ifdef L2_BASE `L2_BASE `else 32'h8000_0000 `endif;
localparam addr_t L2MemoryEndAddr = L2MemoryBaseAddr + L2Size;
localparam addr_t BootromBaseAddr = 32'hA000_0000;
localparam addr_t BootromEndAddr = 32'hA000_FFFF;
xbar_rule_32_t [NumRules - 1:0] xbar_routing_rules;
assign xbar_routing_rules = '{
'{idx: CtrlRegisters, start_addr: CtrlRegistersBaseAddr, end_addr: CtrlRegistersEndAddr},
'{idx: L2Memory, start_addr: L2MemoryBaseAddr, end_addr: L2MemoryEndAddr},
'{idx: Bootrom, start_addr: BootromBaseAddr, end_addr: BootromEndAddr}
};
axi_xbar #(
.Cfg (XBarCfg ),
.slv_aw_chan_t(axi_tile_aw_t ),
.mst_aw_chan_t(axi_system_aw_t ),
.w_chan_t (axi_tile_w_t ),
.slv_b_chan_t (axi_tile_b_t ),
.mst_b_chan_t (axi_system_b_t ),
.slv_ar_chan_t(axi_tile_ar_t ),
.mst_ar_chan_t(axi_system_ar_t ),
.slv_r_chan_t (axi_tile_r_t ),
.mst_r_chan_t (axi_system_r_t ),
.slv_req_t (axi_tile_req_t ),
.slv_resp_t (axi_tile_resp_t ),
.mst_req_t (axi_system_req_t ),
.mst_resp_t (axi_system_resp_t),
.rule_t (xbar_rule_32_t )
) i_xbar (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.test_i (1'b0 ),
.slv_ports_req_i (axi_mst_req ),
.slv_ports_resp_o (axi_mst_resp ),
.mst_ports_req_o (axi_mem_req ),
.mst_ports_resp_i (axi_mem_resp ),
.addr_map_i (xbar_routing_rules ),
.en_default_mst_port_i('1 ), // default all slave ports to master port External
.default_mst_port_i ({NumAXIMasters{External}})
);
/********
* L2 *
********/
localparam L2NumWords = L2Size / L2BeWidth;
localparam L2AddrWidth = $clog2(L2NumWords);
// Memory
logic mem_req;
logic mem_rvalid;
addr_t mem_addr;
axi_data_t mem_wdata;
axi_strb_t mem_strb;
logic mem_we;
axi_data_t mem_rdata;
axi2mem #(
.axi_req_t (axi_system_req_t ),
.axi_resp_t (axi_system_resp_t),
.AddrWidth (AddrWidth),
.DataWidth (AxiDataWidth),
.IdWidth (AxiSystemIdWidth),
.NumBanks (1),
.BufDepth (2)
) i_axi2mem_l2mem (
.clk_i (clk_i),
.rst_ni (rst_ni),
.busy_o (/*unsused*/),
.axi_req_i (axi_mem_req[L2Memory] ),
.axi_resp_o (axi_mem_resp[L2Memory]),
.mem_req_o (mem_req),
.mem_gnt_i (mem_req),
.mem_addr_o (mem_addr),
.mem_wdata_o (mem_wdata),
.mem_strb_o (mem_strb),
.mem_atop_o (/*unused*/),
.mem_we_o (mem_we),
.mem_rvalid_i (mem_rvalid),
.mem_rdata_i (mem_rdata)
);
`FF(mem_rvalid, mem_req, 1'b0, clk_i, rst_ni)
tc_sram #(
.DataWidth(AxiDataWidth),
.NumWords (L2NumWords ),
.NumPorts (1 )
) l2_mem (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.req_i (mem_req ),
.we_i (mem_we ),
.addr_i (mem_addr[L2ByteOffset +: L2AddrWidth]),
.wdata_i(mem_wdata ),
.be_i (mem_strb ),
.rdata_o(mem_rdata )
);
/*************
* Bootrom *
*************/
// Memory
logic bootrom_req;
logic bootrom_rvalid;
addr_t bootrom_addr;
axi_data_t bootrom_rdata;
axi2mem #(
.axi_req_t (axi_system_req_t ),
.axi_resp_t (axi_system_resp_t),
.AddrWidth (AddrWidth),
.DataWidth (AxiDataWidth),
.IdWidth (AxiSystemIdWidth),
.NumBanks (1),
.BufDepth (2)
) i_axi2mem_bootrom (
.clk_i (clk_i),
.rst_ni (rst_ni),
.busy_o (/*unsused*/),
.axi_req_i (axi_mem_req[Bootrom] ),
.axi_resp_o (axi_mem_resp[Bootrom]),
.mem_req_o (bootrom_req),
.mem_gnt_i (bootrom_req),
.mem_addr_o (bootrom_addr),
.mem_wdata_o (/*unused*/),
.mem_strb_o (/*unused*/),
.mem_atop_o (/*unused*/),
.mem_we_o (/*unused*/),
.mem_rvalid_i (bootrom_rvalid),
.mem_rdata_i (bootrom_rdata)
);
`FF(bootrom_rvalid, bootrom_req, 1'b0, clk_i, rst_ni)
bootrom i_bootrom (
.clk_i (clk_i ),
.req_i (bootrom_req ),
.addr_i (bootrom_addr ),
.rdata_o(bootrom_rdata)
);
/***********************
* Control Registers *
***********************/
axi_ctrl_req_t axi_ctrl_req;
axi_ctrl_resp_t axi_ctrl_resp;
axi_lite_slv_req_t axi_lite_ctrl_registers_req;
axi_lite_slv_resp_t axi_lite_ctrl_registers_resp;
axi_dw_converter #(
.AxiMaxReads (1 ), // Number of outstanding reads
.AxiSlvPortDataWidth (AxiDataWidth ), // Data width of the slv port
.AxiMstPortDataWidth (AxiLiteDataWidth ), // Data width of the mst port
.AxiAddrWidth (AddrWidth ), // Address width
.AxiIdWidth (AxiSystemIdWidth ), // ID width
.aw_chan_t (axi_system_aw_t ), // AW Channel Type
.mst_w_chan_t (axi_ctrl_w_t ), // W Channel Type for the mst port
.slv_w_chan_t (axi_system_w_t ), // W Channel Type for the slv port
.b_chan_t (axi_system_b_t ), // B Channel Type
.ar_chan_t (axi_system_ar_t ), // AR Channel Type
.mst_r_chan_t (axi_ctrl_r_t ), // R Channel Type for the mst port
.slv_r_chan_t (axi_system_r_t ), // R Channel Type for the slv port
.axi_mst_req_t (axi_ctrl_req_t ), // AXI Request Type for mst ports
.axi_mst_resp_t (axi_ctrl_resp_t ), // AXI Response Type for mst ports
.axi_slv_req_t (axi_system_req_t ), // AXI Request Type for slv ports
.axi_slv_resp_t (axi_system_resp_t) // AXI Response Type for slv ports
) i_axi_dw_converter_ctrl (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
// Slave interface
.slv_req_i (axi_mem_req[CtrlRegisters] ),
.slv_resp_o (axi_mem_resp[CtrlRegisters]),
// Master interface
.mst_req_o (axi_ctrl_req ),
.mst_resp_i (axi_ctrl_resp )
);
axi_to_axi_lite #(
.AxiAddrWidth (AddrWidth ),
.AxiDataWidth (AxiLiteDataWidth ),
.AxiIdWidth (AxiSystemIdWidth ),
.AxiUserWidth (1 ),
.AxiMaxReadTxns (1 ),
.AxiMaxWriteTxns(1 ),
.FallThrough (1'b0 ),
.full_req_t (axi_ctrl_req_t ),
.full_resp_t (axi_ctrl_resp_t ),
.lite_req_t (axi_lite_slv_req_t ),
.lite_resp_t (axi_lite_slv_resp_t)
) i_axi_to_axi_lite (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.test_i (1'b0 ),
.slv_req_i (axi_ctrl_req ),
.slv_resp_o(axi_ctrl_resp ),
.mst_req_o (axi_lite_ctrl_registers_req ),
.mst_resp_i(axi_lite_ctrl_registers_resp)
);
ctrl_registers #(
.NumRegs (5 ),
.TCDMBaseAddr (TCDMBaseAddr ),
.TCDMSize (TCDMSize ),
.NumCores (NumCores ),
.axi_lite_req_t (axi_lite_slv_req_t ),
.axi_lite_resp_t(axi_lite_slv_resp_t)
) i_ctrl_registers (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.axi_lite_slave_req_i (axi_lite_ctrl_registers_req ),
.axi_lite_slave_resp_o(axi_lite_ctrl_registers_resp),
.tcdm_start_address_o (/* Unused */ ),
.tcdm_end_address_o (/* Unused */ ),
.num_cores_o (/* Unused */ ),
.wake_up_o (wake_up ),
.eoc_o (/* Unused */ ),
.eoc_valid_o (eoc_valid_o )
);
assign busy_o = 1'b0;
// From MemPool to the Host
assign mst_req_o = axi_mem_req[External];
assign axi_mem_resp[External] = mst_resp_i;
// From the Host to MemPool
axi_id_remap #(
.AxiSlvPortIdWidth (AxiSystemIdWidth ),
.AxiSlvPortMaxUniqIds(1 ),
.AxiMaxTxnsPerId (1 ),
.AxiMstPortIdWidth (AxiTileIdWidth ),
.slv_req_t (axi_system_req_t ),
.slv_resp_t (axi_system_resp_t),
.mst_req_t (axi_tile_req_t ),
.mst_resp_t (axi_tile_resp_t )
) i_axi_id_remap (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.slv_req_i (slv_req_i ),
.slv_resp_o(slv_resp_o ),
.mst_req_o (axi_mst_req[NumAXIMasters-1] ),
.mst_resp_i(axi_mst_resp[NumAXIMasters-1])
);
endmodule : mempool_system
// Copyright 2021 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
module mempool_tile
import mempool_pkg::*;
import cf_math_pkg::idx_width;
#(
// TCDM
parameter addr_t TCDMBaseAddr = 32'b0,
// Boot address
parameter logic [31:0] BootAddr = 32'h0000_1000,
// Dependent parameters. DO NOT CHANGE.
parameter int unsigned NumCaches = NumCoresPerTile / NumCoresPerCache
) (
// Clock and reset
input logic clk_i,
input logic rst_ni,
// Scan chain
input logic scan_enable_i,
input logic scan_data_i,
output logic scan_data_o,
// Tile ID
input logic [idx_width(NumTiles)-1:0] tile_id_i,
// TCDM Master interfaces
output tcdm_master_req_t [NumGroups-1:0] tcdm_master_req_o,
output logic [NumGroups-1:0] tcdm_master_req_valid_o,
input logic [NumGroups-1:0] tcdm_master_req_ready_i,
input tcdm_master_resp_t [NumGroups-1:0] tcdm_master_resp_i,
input logic [NumGroups-1:0] tcdm_master_resp_valid_i,
output logic [NumGroups-1:0] tcdm_master_resp_ready_o,
// TCDM slave interfaces
input tcdm_slave_req_t [NumGroups-1:0] tcdm_slave_req_i,
input logic [NumGroups-1:0] tcdm_slave_req_valid_i,
output logic [NumGroups-1:0] tcdm_slave_req_ready_o,
output tcdm_slave_resp_t [NumGroups-1:0] tcdm_slave_resp_o,
output logic [NumGroups-1:0] tcdm_slave_resp_valid_o,
input logic [NumGroups-1:0] tcdm_slave_resp_ready_i,
// AXI Interface
output axi_tile_req_t axi_mst_req_o,
input axi_tile_resp_t axi_mst_resp_i,
// Wake up interface
input logic [NumCoresPerTile-1:0] wake_up_i
);
/****************
* Includes *
****************/
//the below line includes register definitions --> not sure why this is required, why do we have such a file ?
`include "common_cells/registers.svh"
/*****************
* Definitions *
*****************/
import snitch_pkg::dreq_t;
import snitch_pkg::dresp_t;
typedef logic [idx_width(NumGroups)-1:0] group_id_t;
// TCDM Memory Region
localparam addr_t TCDMSize = NumBanks * TCDMSizePerBank;
localparam addr_t TCDMMask = ~(TCDMSize - 1);
// Local interconnect address width
typedef logic [idx_width(NumCoresPerTile + NumGroups)-1:0] local_req_interco_addr_t;
// Group ID
logic [idx_width(NumGroups)-1:0] group_id;
if (NumGroups != 1) begin: gen_group_id
assign group_id = tile_id_i[$clog2(NumTiles)-1 -: $clog2(NumGroups)];
end else begin: gen_group_id
assign group_id = '0;
end: gen_group_id
/***********
* Cores *
***********/
// Instruction interfaces
addr_t [NumCaches-1:0][NumCoresPerCache-1:0] snitch_inst_addr;
data_t [NumCaches-1:0][NumCoresPerCache-1:0] snitch_inst_data;
logic [NumCaches-1:0][NumCoresPerCache-1:0] snitch_inst_valid;
logic [NumCaches-1:0][NumCoresPerCache-1:0] snitch_inst_ready;
// Data interfaces
addr_t [NumCoresPerTile-1:0] snitch_data_qaddr;
logic [NumCoresPerTile-1:0] snitch_data_qwrite;
amo_t [NumCoresPerTile-1:0] snitch_data_qamo;
data_t [NumCoresPerTile-1:0] snitch_data_qdata;
strb_t [NumCoresPerTile-1:0] snitch_data_qstrb;
meta_id_t [NumCoresPerTile-1:0] snitch_data_qid;
logic [NumCoresPerTile-1:0] snitch_data_qvalid;
logic [NumCoresPerTile-1:0] snitch_data_qready;
data_t [NumCoresPerTile-1:0] snitch_data_pdata;
logic [NumCoresPerTile-1:0] snitch_data_perror;
meta_id_t [NumCoresPerTile-1:0] snitch_data_pid;
logic [NumCoresPerTile-1:0] snitch_data_pvalid;
logic [NumCoresPerTile-1:0] snitch_data_pready;
for (genvar c = 0; unsigned'(c) < NumCoresPerTile; c++) begin: gen_cores
logic [31:0] hart_id;
assign hart_id = {unsigned'(tile_id_i), c[idx_width(NumCoresPerTile)-1:0]};
if (!TrafficGeneration) begin: gen_mempool_cc
mempool_cc #(
.BootAddr (BootAddr)
) riscv_core (
.clk_i (clk_i ),
.rst_i (!rst_ni ),
.hart_id_i (hart_id ),
// IMEM Port
.inst_addr_o (snitch_inst_addr[c/NumCoresPerCache][c%NumCoresPerCache] ),
.inst_data_i (snitch_inst_data[c/NumCoresPerCache][c%NumCoresPerCache] ),
.inst_valid_o (snitch_inst_valid[c/NumCoresPerCache][c%NumCoresPerCache]),
.inst_ready_i (snitch_inst_ready[c/NumCoresPerCache][c%NumCoresPerCache]),
// Data Ports
.data_qaddr_o (snitch_data_qaddr[c] ),
.data_qwrite_o (snitch_data_qwrite[c] ),
.data_qamo_o (snitch_data_qamo[c] ),
.data_qdata_o (snitch_data_qdata[c] ),
.data_qstrb_o (snitch_data_qstrb[c] ),
.data_qid_o (snitch_data_qid[c] ),
.data_qvalid_o (snitch_data_qvalid[c] ),
.data_qready_i (snitch_data_qready[c] ),
.data_pdata_i (snitch_data_pdata[c] ),
.data_perror_i (snitch_data_perror[c] ),
.data_pid_i (snitch_data_pid[c] ),
.data_pvalid_i (snitch_data_pvalid[c] ),
.data_pready_o (snitch_data_pready[c] ),
.wake_up_sync_i(wake_up_i[c] ),
// Core Events
.core_events_o (/* Unused */ )
);
end else begin
assign snitch_data_qaddr[c] = '0;
assign snitch_data_qwrite[c] = '0;
assign snitch_data_qamo[c] = '0;
assign snitch_data_qdata[c] = '0;
assign snitch_data_qstrb[c] = '0;
assign snitch_data_qid[c] = '0;
assign snitch_data_qvalid[c] = '0;
assign snitch_data_pready[c] = '0;
assign snitch_inst_addr[c/NumCoresPerCache][c%NumCoresPerCache] = '0;
assign snitch_inst_valid[c/NumCoresPerCache][c%NumCoresPerCache] = '0;
end
end
/***********************
* Instruction Cache *
***********************/
// Instruction interface
addr_t [NumCaches-1:0] refill_qaddr;
logic [NumCaches-1:0][7:0] refill_qlen;
logic [NumCaches-1:0] refill_qvalid;
logic [NumCaches-1:0] refill_qready;
logic [NumCaches-1:0][AxiDataWidth-1:0] refill_pdata;
logic [NumCaches-1:0] refill_perror;
logic [NumCaches-1:0] refill_pvalid;
logic [NumCaches-1:0] refill_plast;
logic [NumCaches-1:0] refill_pready;
for (genvar c = 0; unsigned'(c) < NumCaches; c++) begin: gen_caches
snitch_icache #(
.NR_FETCH_PORTS (NumCoresPerCache ),
/// Cache Line Width
.L0_LINE_COUNT (4 ),
.LINE_WIDTH (ICacheLineWidth ),
.LINE_COUNT (ICacheSizeByte / (ICacheSets * ICacheLineWidth / 8) ),
.SET_COUNT (ICacheSets ),
.FETCH_AW (AddrWidth ),
.FETCH_DW (DataWidth ),
.FILL_AW (AddrWidth ),
.FILL_DW (AxiDataWidth ),
.L1_TAG_SCM (1 ),
/// Make the early cache latch-based. This reduces latency at the cost of
/// increased combinatorial path lengths and the hassle of having latches in
/// the design.
.EARLY_LATCH (1 ),
.L0_EARLY_TAG_WIDTH (11 ),
.ISO_CROSSING (0 )
) i_snitch_icache (
.clk_i (clk_i ),
.clk_d2_i (clk_i ),
.rst_ni (rst_ni ),
.enable_prefetching_i (snitch_inst_valid[c] ),
.icache_events_o (/* Unused */ ),
.flush_valid_i (1'b0 ),
.flush_ready_o (/* Unused */ ),
.inst_addr_i (snitch_inst_addr[c] ),
.inst_data_o (snitch_inst_data[c] ),
.inst_cacheable_i ({NumCoresPerCache{1'b1}}),
.inst_valid_i (snitch_inst_valid[c] ),
.inst_ready_o (snitch_inst_ready[c] ),
.inst_error_o (/* Unused */ ),
.refill_qaddr_o (refill_qaddr[c] ),
.refill_qlen_o (refill_qlen[c] ),
.refill_qvalid_o (refill_qvalid[c] ),
.refill_qready_i (refill_qready[c] ),
.refill_pdata_i (refill_pdata[c] ),
.refill_perror_i (refill_perror[c] ),
.refill_pvalid_i (refill_pvalid[c] ),
.refill_plast_i (refill_plast[c] ),
.refill_pready_o (refill_pready[c] )
);
end
/******************
* Memory Banks *
******************/
// Bank metadata
typedef struct packed {
local_req_interco_addr_t ini_addr;
meta_id_t meta_id;
tile_group_id_t tile_id;
tile_core_id_t core_id;
} bank_metadata_t;
// Memory interfaces
logic [NumBanksPerTile-1:0] bank_req_valid;
logic [NumBanksPerTile-1:0] bank_req_ready;
local_req_interco_addr_t [NumBanksPerTile-1:0] bank_req_ini_addr;
tcdm_slave_req_t [NumBanksPerTile-1:0] bank_req_payload;
logic [NumBanksPerTile-1:0] bank_resp_valid;
logic [NumBanksPerTile-1:0] bank_resp_ready;
tcdm_slave_resp_t [NumBanksPerTile-1:0] bank_resp_payload;
local_req_interco_addr_t [NumBanksPerTile-1:0] bank_resp_ini_addr;
for (genvar b = 0; unsigned'(b) < NumBanksPerTile; b++) begin: gen_banks
bank_metadata_t meta_in;
bank_metadata_t meta_out;
logic req_valid;
logic req_write;
bank_addr_t req_addr;
data_t req_wdata;
data_t resp_rdata;
strb_t req_be;
// Un/Pack metadata
assign meta_in = '{
ini_addr : bank_req_ini_addr[b],
meta_id : bank_req_payload[b].wdata.meta_id,
core_id : bank_req_payload[b].wdata.core_id,
tile_id : bank_req_payload[b].ini_addr
};
assign bank_resp_ini_addr[b] = meta_out.ini_addr;
assign bank_resp_payload[b].rdata.meta_id = meta_out.meta_id;
assign bank_resp_payload[b].ini_addr = meta_out.tile_id;
assign bank_resp_payload[b].rdata.core_id = meta_out.core_id;
assign bank_resp_payload[b].rdata.amo = '0; // Don't care
tcdm_adapter #(
.AddrWidth (TCDMAddrMemWidth),
.DataWidth (DataWidth ),
.metadata_t (bank_metadata_t ),
.LrScEnable (LrScEnable ),
.RegisterAmo(1'b0 )
) i_tcdm_adapter (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.in_valid_i (bank_req_valid[b] ),
.in_ready_o (bank_req_ready[b] ),
.in_address_i(bank_req_payload[b].tgt_addr[idx_width(NumBanksPerTile) +: TCDMAddrMemWidth]),
.in_amo_i (bank_req_payload[b].wdata.amo ),
.in_write_i (bank_req_payload[b].wen ),
.in_wdata_i (bank_req_payload[b].wdata.data ),
.in_meta_i (meta_in ),
.in_be_i (bank_req_payload[b].be ),
.in_valid_o (bank_resp_valid[b] ),
.in_ready_i (bank_resp_ready[b] ),
.in_rdata_o (bank_resp_payload[b].rdata.data ),
.in_meta_o (meta_out ),
.out_req_o (req_valid ),
.out_add_o (req_addr ),
.out_write_o (req_write ),
.out_wdata_o (req_wdata ),
.out_be_o (req_be ),
.out_rdata_i (resp_rdata )
);
// Bank
tc_sram #(
.DataWidth(DataWidth ),
.NumWords (2**TCDMAddrMemWidth),
.NumPorts (1 )
) mem_bank (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.req_i (req_valid ),
.we_i (req_write ),
.addr_i (req_addr ),
.wdata_i(req_wdata ),
.be_i (req_be ),
.rdata_o(resp_rdata)
);
end
/***************
* Registers *
***************/
// These are required to break dependencies between request and response, establishing a correct
// valid/ready handshake.
tcdm_master_req_t [NumGroups-1:0] prereg_tcdm_master_req;
logic [NumGroups-1:0] prereg_tcdm_master_req_valid;
logic [NumGroups-1:0] prereg_tcdm_master_req_ready;
tcdm_slave_req_t [NumGroups-1:0] postreg_tcdm_slave_req;
logic [NumGroups-1:0] postreg_tcdm_slave_req_valid;
logic [NumGroups-1:0] postreg_tcdm_slave_req_ready;
tcdm_slave_resp_t [NumGroups-1:0] prereg_tcdm_slave_resp;
logic [NumGroups-1:0] prereg_tcdm_slave_resp_valid;
logic [NumGroups-1:0] prereg_tcdm_slave_resp_ready;
tcdm_master_resp_t [NumGroups-1:0] postreg_tcdm_master_resp;
tile_core_id_t [NumGroups-1:0] postreg_tcdm_master_resp_ini_sel;
logic [NumGroups-1:0] postreg_tcdm_master_resp_valid;
logic [NumGroups-1:0] postreg_tcdm_master_resp_ready;
// Break paths between request and response with registers
for (genvar h = 0; unsigned'(h) < NumGroups; h++) begin: gen_tcdm_registers
spill_register #(
.T(tcdm_master_req_t)
) i_tcdm_master_req_register (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.data_i (prereg_tcdm_master_req[h] ),
.valid_i(prereg_tcdm_master_req_valid[h]),
.ready_o(prereg_tcdm_master_req_ready[h]),
.data_o (tcdm_master_req_o[h] ),
.valid_o(tcdm_master_req_valid_o[h] ),
.ready_i(tcdm_master_req_ready_i[h] )
);
fall_through_register #(
.T(tcdm_master_resp_t)
) i_tcdm_master_resp_register (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.clr_i (1'b0 ),
.testmode_i(1'b0 ),
.data_i (tcdm_master_resp_i[h] ),
.valid_i (tcdm_master_resp_valid_i[h] ),
.ready_o (tcdm_master_resp_ready_o[h] ),
.data_o (postreg_tcdm_master_resp[h] ),
.valid_o (postreg_tcdm_master_resp_valid[h]),
.ready_i (postreg_tcdm_master_resp_ready[h])
);
// Helper signal to drive the remote response interconnect
assign postreg_tcdm_master_resp_ini_sel[h] = postreg_tcdm_master_resp[h].rdata.core_id;
fall_through_register #(
.T(tcdm_slave_req_t)
) i_tcdm_slave_req_register (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.clr_i (1'b0 ),
.testmode_i(1'b0 ),
.data_i (tcdm_slave_req_i[h] ),
.valid_i (tcdm_slave_req_valid_i[h] ),
.ready_o (tcdm_slave_req_ready_o[h] ),
.data_o (postreg_tcdm_slave_req[h] ),
.valid_o (postreg_tcdm_slave_req_valid[h]),
.ready_i (postreg_tcdm_slave_req_ready[h])
);
spill_register #(
.T(tcdm_slave_resp_t)
) i_tcdm_slave_resp_register (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.data_i (prereg_tcdm_slave_resp[h] ),
.valid_i(prereg_tcdm_slave_resp_valid[h]),
.ready_o(prereg_tcdm_slave_resp_ready[h]),
.data_o (tcdm_slave_resp_o[h] ),
.valid_o(tcdm_slave_resp_valid_o[h] ),
.ready_i(tcdm_slave_resp_ready_i[h] )
);
end: gen_tcdm_registers
/****************************
* Remote Interconnects *
****************************/
tcdm_master_req_t [NumCoresPerTile-1:0] remote_req_interco;
logic [NumCoresPerTile-1:0] remote_req_interco_valid;
logic [NumCoresPerTile-1:0] remote_req_interco_ready;
group_id_t [NumCoresPerTile-1:0] remote_req_interco_tgt_sel;
tcdm_master_resp_t [NumCoresPerTile-1:0] remote_resp_interco;
logic [NumCoresPerTile-1:0] remote_resp_interco_valid;
logic [NumCoresPerTile-1:0] remote_resp_interco_ready;
stream_xbar #(
.NumInp (NumCoresPerTile ),
.NumOut (NumGroups ),
.payload_t(tcdm_master_req_t)
) i_remote_req_interco (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.flush_i(1'b0 ),
// External priority flag
.rr_i ('0 ),
// Master
.data_i (remote_req_interco ),
.valid_i(remote_req_interco_valid ),
.ready_o(remote_req_interco_ready ),
.sel_i (remote_req_interco_tgt_sel ),
// Slave
.data_o (prereg_tcdm_master_req ),
.valid_o(prereg_tcdm_master_req_valid),
.ready_i(prereg_tcdm_master_req_ready),
.idx_o (/* Unused */ )
);
stream_xbar #(
.NumInp (NumGroups ),
.NumOut (NumCoresPerTile ),
.payload_t(tcdm_master_resp_t)
) i_remote_resp_interco (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.flush_i(1'b0 ),
// External priority flag
.rr_i ('0 ),
// Master
.data_i (postreg_tcdm_master_resp ),
.valid_i(postreg_tcdm_master_resp_valid ),
.ready_o(postreg_tcdm_master_resp_ready ),
.sel_i (postreg_tcdm_master_resp_ini_sel),
// Slave
.data_o (remote_resp_interco ),
.valid_o(remote_resp_interco_valid ),
.ready_i(remote_resp_interco_ready ),
.idx_o (/* Unused */ )
);
/**********************
* Local Intercos *
**********************/
logic [NumCoresPerTile-1:0] local_req_interco_valid;
logic [NumCoresPerTile-1:0] local_req_interco_ready;
tcdm_slave_req_t [NumCoresPerTile-1:0] local_req_interco_payload;
logic [NumCoresPerTile-1:0] local_resp_interco_valid;
logic [NumCoresPerTile-1:0] local_resp_interco_ready;
tcdm_slave_resp_t [NumCoresPerTile-1:0] local_resp_interco_payload;
logic [NumCoresPerTile+NumGroups-1:0][idx_width(NumBanksPerTile)-1:0] local_req_interco_tgt_sel;
for (genvar j = 0; unsigned'(j) < NumCoresPerTile; j++) begin: gen_local_req_interco_tgt_sel_local
assign local_req_interco_tgt_sel[j] = local_req_interco_payload[j].tgt_addr[idx_width(NumBanksPerTile)-1:0];
end: gen_local_req_interco_tgt_sel_local
for (genvar j = 0; unsigned'(j) < NumGroups; j++) begin: gen_local_req_interco_tgt_sel_remote
assign local_req_interco_tgt_sel[j + NumCoresPerTile] = postreg_tcdm_slave_req[j].tgt_addr[idx_width(NumBanksPerTile)-1:0];
end: gen_local_req_interco_tgt_sel_remote
stream_xbar #(
.NumInp (NumCoresPerTile+NumGroups),
.NumOut (NumBanksPerTile ),
.payload_t(tcdm_slave_req_t )
) i_local_req_interco (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.flush_i(1'b0 ),
// External priority flag
.rr_i ('0 ),
// Master
.data_i ({postreg_tcdm_slave_req, local_req_interco_payload} ),
.valid_i({postreg_tcdm_slave_req_valid, local_req_interco_valid}),
.ready_o({postreg_tcdm_slave_req_ready, local_req_interco_ready}),
.sel_i (local_req_interco_tgt_sel ),
// Slave
.data_o (bank_req_payload ),
.valid_o(bank_req_valid ),
.ready_i(bank_req_ready ),
.idx_o (bank_req_ini_addr )
);
stream_xbar #(
.NumInp (NumBanksPerTile ),
.NumOut (NumCoresPerTile+NumGroups),
.payload_t(tcdm_slave_resp_t )
) i_local_resp_interco (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.flush_i(1'b0 ),
// External priority flag
.rr_i ('0 ),
// Master
.data_i (bank_resp_payload ),
.valid_i(bank_resp_valid ),
.ready_o(bank_resp_ready ),
.sel_i (bank_resp_ini_addr ),
// Slave
.data_o ({prereg_tcdm_slave_resp, local_resp_interco_payload} ),
.valid_o({prereg_tcdm_slave_resp_valid, local_resp_interco_valid}),
.ready_i({prereg_tcdm_slave_resp_ready, local_resp_interco_ready}),
.idx_o (/* Unused */ )
);
/*******************
* Core De/mux *
*******************/
// SoC requests
dreq_t [NumCoresPerTile-1:0] soc_data_q;
logic [NumCoresPerTile-1:0] soc_data_qvalid;
logic [NumCoresPerTile-1:0] soc_data_qready;
dresp_t [NumCoresPerTile-1:0] soc_data_p;
logic [NumCoresPerTile-1:0] soc_data_pvalid;
logic [NumCoresPerTile-1:0] soc_data_pready;
// Address map
typedef enum int unsigned {
TCDM_EXTERNAL = 0, TCDM_LOCAL, SOC
} addr_map_slave_t;
address_map_t [2:0] mask_map;
assign mask_map = '{
// Lowest priority: send request through the SoC port
'{slave_idx: SOC,
mask : '0,
value : '0
},
// Send request through the external TCDM port
'{slave_idx: TCDM_EXTERNAL,
mask : TCDMMask,
value : TCDMBaseAddr
},
// Highest priority: send request through the local TCDM port
'{slave_idx: TCDM_LOCAL,
mask : TCDMMask | ({idx_width(NumTiles){1'b1}} << (ByteOffset + $clog2(NumBanksPerTile))),
value : TCDMBaseAddr | (tile_id_i << (ByteOffset + $clog2(NumBanksPerTile)))
}
};
for (genvar c = 0; c < NumCoresPerTile; c++) begin: gen_core_mux
// Remove tile index from local_req_interco_addr_int, since it will not be used for routing.
addr_t local_req_interco_addr_int;
assign local_req_interco_payload[c].tgt_addr =
tcdm_addr_t'({local_req_interco_addr_int[ByteOffset + idx_width(NumBanksPerTile) + $clog2(NumTiles) +: TCDMAddrMemWidth], // Bank address
local_req_interco_addr_int[ByteOffset +: idx_width(NumBanksPerTile)]}); // Bank
// Switch tile and bank indexes for correct upper level routing, and remove the group index
addr_t prescramble_tcdm_req_tgt_addr;
if (NumTilesPerGroup == 1) begin : gen_remote_req_interco_tgt_addr
assign remote_req_interco[c].tgt_addr =
tcdm_addr_t'({prescramble_tcdm_req_tgt_addr[ByteOffset + idx_width(NumBanksPerTile) + $clog2(NumGroups) +: TCDMAddrMemWidth], // Bank address
prescramble_tcdm_req_tgt_addr[ByteOffset +: idx_width(NumBanksPerTile)]}); // Tile
end else begin : gen_remote_req_interco_tgt_addr
assign remote_req_interco[c].tgt_addr =
tcdm_addr_t'({prescramble_tcdm_req_tgt_addr[ByteOffset + idx_width(NumBanksPerTile) + $clog2(NumTilesPerGroup) + $clog2(NumGroups) +: TCDMAddrMemWidth], // Bank address
prescramble_tcdm_req_tgt_addr[ByteOffset +: idx_width(NumBanksPerTile)], // Bank
prescramble_tcdm_req_tgt_addr[ByteOffset + idx_width(NumBanksPerTile) +: $clog2(NumTilesPerGroup)]}); // Tile
end
if (NumGroups == 1) begin : gen_remote_req_interco_tgt_sel
assign remote_req_interco_tgt_sel[c] = 1'b0;
end else begin : gen_remote_req_interco_tgt_sel
// Output port depends on both the target and initiator group
assign remote_req_interco_tgt_sel[c] = (prescramble_tcdm_req_tgt_addr[ByteOffset + $clog2(NumBanksPerTile) + $clog2(NumTilesPerGroup) +: $clog2(NumGroups)]) ^ group_id;
end
// We don't care about these
assign local_req_interco_payload[c].wdata.core_id = '0;
assign local_req_interco_payload[c].ini_addr = '0;
assign soc_data_q[c].id = '0;
// Constant value
assign remote_req_interco[c].wdata.core_id = c[idx_width(NumCoresPerTile)-1:0];
// Scramble address before entering TCDM shim for sequential+interleaved memory map
addr_t snitch_data_qaddr_scrambled;
address_scrambler #(
.AddrWidth (AddrWidth ),
.ByteOffset (ByteOffset ),
.NumTiles (NumTiles ),
.NumBanksPerTile (NumBanksPerTile ),
.Bypass (0 ),
.SeqMemSizePerTile (SeqMemSizePerTile)
) i_address_scrambler (
.address_i (snitch_data_qaddr[c] ),
.address_o (snitch_data_qaddr_scrambled)
);
if (!TrafficGeneration) begin: gen_tcdm_shim
tcdm_shim #(
.AddrWidth (AddrWidth ),
.DataWidth (DataWidth ),
.MaxOutStandingReads (snitch_pkg::NumIntOutstandingLoads),
.NrTCDM (2 ),
.NrSoC (1 ),
.NumRules (3 )
) i_tcdm_shim (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
// to TCDM --> FF Connection to outside of tile
.tcdm_req_valid_o ({local_req_interco_valid[c], remote_req_interco_valid[c]} ),
.tcdm_req_tgt_addr_o({local_req_interco_addr_int, prescramble_tcdm_req_tgt_addr} ),
.tcdm_req_wen_o ({local_req_interco_payload[c].wen, remote_req_interco[c].wen} ),
.tcdm_req_wdata_o ({local_req_interco_payload[c].wdata.data, remote_req_interco[c].wdata.data} ),
.tcdm_req_amo_o ({local_req_interco_payload[c].wdata.amo, remote_req_interco[c].wdata.amo} ),
.tcdm_req_id_o ({local_req_interco_payload[c].wdata.meta_id, remote_req_interco[c].wdata.meta_id} ),
.tcdm_req_be_o ({local_req_interco_payload[c].be, remote_req_interco[c].be} ),
.tcdm_req_ready_i ({local_req_interco_ready[c], remote_req_interco_ready[c]} ),
.tcdm_resp_valid_i ({local_resp_interco_valid[c], remote_resp_interco_valid[c]} ),
.tcdm_resp_ready_o ({local_resp_interco_ready[c], remote_resp_interco_ready[c]} ),
.tcdm_resp_rdata_i ({local_resp_interco_payload[c].rdata.data, remote_resp_interco[c].rdata.data} ),
.tcdm_resp_id_i ({local_resp_interco_payload[c].rdata.meta_id, remote_resp_interco[c].rdata.meta_id}),
// to SoC
.soc_qaddr_o (soc_data_q[c].addr ),
.soc_qwrite_o (soc_data_q[c].write ),
.soc_qamo_o (soc_data_q[c].amo ),
.soc_qdata_o (soc_data_q[c].data ),
.soc_qstrb_o (soc_data_q[c].strb ),
.soc_qvalid_o (soc_data_qvalid[c] ),
.soc_qready_i (soc_data_qready[c] ),
.soc_pdata_i (soc_data_p[c].data ),
.soc_perror_i (soc_data_p[c].error ),
.soc_pvalid_i (soc_data_pvalid[c] ),
.soc_pready_o (soc_data_pready[c] ),
// from core
.data_qaddr_i (snitch_data_qaddr_scrambled ),
.data_qwrite_i (snitch_data_qwrite[c] ),
.data_qamo_i (snitch_data_qamo[c] ),
.data_qdata_i (snitch_data_qdata[c] ),
.data_qstrb_i (snitch_data_qstrb[c] ),
.data_qid_i (snitch_data_qid[c] ),
.data_qvalid_i (snitch_data_qvalid[c] ),
.data_qready_o (snitch_data_qready[c] ),
.data_pdata_o (snitch_data_pdata[c] ),
.data_perror_o (snitch_data_perror[c] ),
.data_pid_o (snitch_data_pid[c] ),
.data_pvalid_o (snitch_data_pvalid[c] ),
.data_pready_i (snitch_data_pready[c] ),
.address_map_i (mask_map )
);
end else begin: gen_traffic_generator
traffic_generator #(
.NumRules (3 ),
.TCDMBaseAddr (TCDMBaseAddr ),
.MaxOutStandingReads(snitch_pkg::NumIntOutstandingLoads)
) i_traffic_gen (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.core_id_i ({tile_id_i, c[idx_width(NumCoresPerTile)-1:0]} ),
// Address map
.address_map_i (mask_map ),
// To TCDM
.tcdm_req_valid_o ({local_req_interco_valid[c], remote_req_interco_valid[c]} ),
.tcdm_req_tgt_addr_o({local_req_interco_addr_int, prescramble_tcdm_req_tgt_addr} ),
.tcdm_req_wen_o ({local_req_interco_payload[c].wen, remote_req_interco[c].wen}),
.tcdm_req_wdata_o ({local_req_interco_payload[c].wdata.data,
remote_req_interco[c].wdata.data}),
.tcdm_req_amo_o({local_req_interco_payload[c].wdata.amo,
remote_req_interco[c].wdata.amo}),
.tcdm_req_id_o({local_req_interco_payload[c]
.wdata.meta_id, remote_req_interco[c].wdata.meta_id}),
.tcdm_req_be_o ({local_req_interco_payload[c].be, remote_req_interco[c].be}),
.tcdm_req_ready_i ({local_req_interco_ready[c], remote_req_interco_ready[c]} ),
.tcdm_resp_valid_i({local_resp_interco_valid[c], remote_resp_interco_valid[c]}),
.tcdm_resp_ready_o({local_resp_interco_ready[c], remote_resp_interco_ready[c]}),
.tcdm_resp_rdata_i({local_resp_interco_payload[c].rdata.data,
remote_resp_interco[c].rdata.data} ),
.tcdm_resp_id_i ({local_resp_interco_payload[c].rdata.meta_id,
remote_resp_interco[c].rdata.meta_id})
);
// Tie unused signals
assign soc_data_q[c].addr = '0;
assign soc_data_q[c].write = '0;
assign soc_data_q[c].amo = '0;
assign soc_data_q[c].data = '0;
assign soc_data_q[c].strb = '0;
assign soc_data_qvalid[c] = '0;
assign soc_data_pready[c] = '0;
assign snitch_data_qready[c] = '0;
assign snitch_data_pdata[c] = '0;
assign snitch_data_perror[c] = '0;
assign snitch_data_pid[c] = '0;
assign snitch_data_pvalid[c] = '0;
end
end
/****************
* AXI Plug *
****************/
snitch_pkg::dreq_t soc_req_o;
snitch_pkg::dresp_t soc_resp_i;
logic soc_qvalid;
logic soc_qready;
logic soc_pvalid;
logic soc_pready;
// We don't care about this
assign soc_resp_i.id = 'x;
snitch_demux #(
.NrPorts (NumCoresPerTile ),
.req_t (snitch_pkg::dreq_t ),
.resp_t (snitch_pkg::dresp_t)
) i_snitch_demux_data (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
// Inputs
.req_payload_i (soc_data_q ),
.req_valid_i (soc_data_qvalid),
.req_ready_o (soc_data_qready),
.resp_payload_o(soc_data_p ),
.resp_last_o (/* Unused */ ),
.resp_valid_o (soc_data_pvalid),
.resp_ready_i (soc_data_pready),
// Output
.req_payload_o (soc_req_o ),
.req_valid_o (soc_qvalid ),
.req_ready_i (soc_qready ),
.resp_payload_i(soc_resp_i ),
.resp_last_i (1'b1 ),
.resp_valid_i (soc_pvalid ),
.resp_ready_o (soc_pready )
);
// Core request
axi_core_req_t axi_cores_req, axi_cache_req;
axi_core_resp_t axi_cores_resp, axi_cache_resp;
axi_tile_req_t axi_mst_req;
axi_tile_resp_t axi_mst_resp;
snitch_axi_adapter #(
.addr_t (snitch_pkg::addr_t),
.data_t (snitch_pkg::data_t),
.strb_t (snitch_pkg::strb_t),
.axi_mst_req_t (axi_core_req_t ),
.axi_mst_resp_t (axi_core_resp_t )
) i_snitch_core_axi_adapter (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.slv_qaddr_i (soc_req_o.addr ),
.slv_qwrite_i(soc_req_o.write ),
.slv_qamo_i (soc_req_o.amo ),
.slv_qdata_i (soc_req_o.data ),
.slv_qsize_i (3'b010 ),
.slv_qstrb_i (soc_req_o.strb ),
.slv_qrlen_i ('0 ),
.slv_qvalid_i(soc_qvalid ),
.slv_qready_o(soc_qready ),
.slv_pdata_o (soc_resp_i.data ),
.slv_perror_o(soc_resp_i.error),
.slv_plast_o (/* Unused */ ),
.slv_pvalid_o(soc_pvalid ),
.slv_pready_i(soc_pready ),
.axi_req_o (axi_cores_req ),
.axi_resp_i (axi_cores_resp )
);
// TODO: Add demux for the case where we have many intruction caches
snitch_axi_adapter #(
.addr_t (snitch_pkg::addr_t),
.data_t (axi_data_t ),
.strb_t (axi_strb_t ),
.axi_mst_req_t (axi_core_req_t ),
.axi_mst_resp_t (axi_core_resp_t )
) i_snitch_cache_axi_adapter (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.slv_qaddr_i (refill_qaddr[0] ),
.slv_qwrite_i('0 ),
.slv_qamo_i ('0 ),
.slv_qdata_i ('0 ),
.slv_qsize_i (3'($clog2(AxiDataWidth/8))),
.slv_qstrb_i ('0 ),
.slv_qrlen_i (refill_qlen[0] ),
.slv_qvalid_i(refill_qvalid[0] ),
.slv_qready_o(refill_qready[0] ),
.slv_pdata_o (refill_pdata[0] ),
.slv_perror_o(refill_perror[0] ),
.slv_plast_o (refill_plast[0] ),
.slv_pvalid_o(refill_pvalid[0] ),
.slv_pready_i(refill_pready[0] ),
.axi_req_o (axi_cache_req ),
.axi_resp_i (axi_cache_resp )
);
axi_mux #(
.SlvAxiIDWidth (AxiCoreIdWidth ),
.slv_aw_chan_t (axi_core_aw_t ),
.mst_aw_chan_t (axi_tile_aw_t ),
.w_chan_t (axi_tile_w_t ),
.slv_b_chan_t (axi_core_b_t ),
.mst_b_chan_t (axi_tile_b_t ),
.slv_ar_chan_t (axi_core_ar_t ),
.mst_ar_chan_t (axi_tile_ar_t ),
.slv_r_chan_t (axi_core_r_t ),
.mst_r_chan_t (axi_tile_r_t ),
.slv_req_t (axi_core_req_t ),
.slv_resp_t (axi_core_resp_t),
.mst_req_t (axi_tile_req_t ),
.mst_resp_t (axi_tile_resp_t),
.NoSlvPorts (2 ),
.MaxWTrans (8 ),
.FallThrough (1 )
) i_axi_mux (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.test_i (1'b0 ),
.slv_reqs_i ({axi_cores_req, axi_cache_req} ),
.slv_resps_o({axi_cores_resp, axi_cache_resp}),
.mst_req_o (axi_mst_req ),
.mst_resp_i (axi_mst_resp )
);
axi_cut #(
.aw_chan_t(axi_tile_aw_t ),
.w_chan_t (axi_tile_w_t ),
.b_chan_t (axi_tile_b_t ),
.ar_chan_t(axi_tile_ar_t ),
.r_chan_t (axi_tile_r_t ),
.req_t (axi_tile_req_t ),
.resp_t (axi_tile_resp_t)
) axi_mst_slice (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.slv_req_i (axi_mst_req ),
.slv_resp_o(axi_mst_resp ),
.mst_req_o (axi_mst_req_o ),
.mst_resp_i(axi_mst_resp_i)
);
/******************
* Assertions *
******************/
// Check invariants.
if (BootAddr[1:0] != 2'b00)
$fatal(1, "[mempool_tile] Boot address should be aligned in a 4-byte boundary.");
if (NumCoresPerTile != 2**$clog2(NumCoresPerTile))
$fatal(1, "[mempool_tile] The number of cores per tile must be a power of two.");
if (NumCores != unsigned'(2**$clog2(NumCores)))
$fatal(1, "[mempool_tile] The number of cores must be a power of two.");
if (NumBanksPerTile < 1)
$fatal(1, "[mempool_tile] The number of banks per tile must be larger than one");
if (NumCaches != 1)
$error("NumCaches > 1 is not supported!");
if (DataWidth > AxiDataWidth)
$error("AxiDataWidth needs to be larger than DataWidth!");
endmodule : mempool_tile
/*****************
* WRAPPER *
*****************/
/*verilator lint_off DECLFILENAME*/
//what is the below file exactly used for ?
`include "mempool/mempool.svh"
module mempool_tile_wrap
import mempool_pkg::*;
import cf_math_pkg::idx_width;
#(
// TCDM
parameter addr_t TCDMBaseAddr = 32'b0,
// Boot address
parameter logic [31:0] BootAddr = 32'h0000_1000,
// Dependent parameters. DO NOT CHANGE.
parameter int unsigned NumCaches = NumCoresPerTile / NumCoresPerCache
) (
// Clock and reset
input logic clk_i,
input logic rst_ni,
// Scan chain
input logic scan_enable_i,
input logic scan_data_i,
output logic scan_data_o,
// Tile ID
input logic [idx_width(NumTiles)-1:0] tile_id_i,
// TCDM Master interfaces
output `STRUCT_PORT(tcdm_master_req_t) tcdm_master_north_req_o,
output logic tcdm_master_north_req_valid_o,
input logic tcdm_master_north_req_ready_i,
input `STRUCT_PORT(tcdm_master_resp_t) tcdm_master_north_resp_i,
input logic tcdm_master_north_resp_valid_i,
output logic tcdm_master_north_resp_ready_o,
output `STRUCT_PORT(tcdm_master_req_t) tcdm_master_northeast_req_o,
output logic tcdm_master_northeast_req_valid_o,
input logic tcdm_master_northeast_req_ready_i,
input `STRUCT_PORT(tcdm_master_resp_t) tcdm_master_northeast_resp_i,
input logic tcdm_master_northeast_resp_valid_i,
output logic tcdm_master_northeast_resp_ready_o,
output `STRUCT_PORT(tcdm_master_req_t) tcdm_master_east_req_o,
output logic tcdm_master_east_req_valid_o,
input logic tcdm_master_east_req_ready_i,
input `STRUCT_PORT(tcdm_master_resp_t) tcdm_master_east_resp_i,
input logic tcdm_master_east_resp_valid_i,
output logic tcdm_master_east_resp_ready_o,
output `STRUCT_PORT(tcdm_master_req_t) tcdm_master_local_req_o,
output logic tcdm_master_local_req_valid_o,
input logic tcdm_master_local_req_ready_i,
input `STRUCT_PORT(tcdm_master_resp_t) tcdm_master_local_resp_i,
input logic tcdm_master_local_resp_valid_i,
output logic tcdm_master_local_resp_ready_o,
// TCDM Slave interfaces
input `STRUCT_PORT(tcdm_slave_req_t) tcdm_slave_north_req_i,
input logic tcdm_slave_north_req_valid_i,
output logic tcdm_slave_north_req_ready_o,
output `STRUCT_PORT(tcdm_slave_resp_t) tcdm_slave_north_resp_o,
output logic tcdm_slave_north_resp_valid_o,
input logic tcdm_slave_north_resp_ready_i,
input `STRUCT_PORT(tcdm_slave_req_t) tcdm_slave_northeast_req_i,
input logic tcdm_slave_northeast_req_valid_i,
output logic tcdm_slave_northeast_req_ready_o,
output `STRUCT_PORT(tcdm_slave_resp_t) tcdm_slave_northeast_resp_o,
output logic tcdm_slave_northeast_resp_valid_o,
input logic tcdm_slave_northeast_resp_ready_i,
input `STRUCT_PORT(tcdm_slave_req_t) tcdm_slave_east_req_i,
input logic tcdm_slave_east_req_valid_i,
output logic tcdm_slave_east_req_ready_o,
output `STRUCT_PORT(tcdm_slave_resp_t) tcdm_slave_east_resp_o,
output logic tcdm_slave_east_resp_valid_o,
input logic tcdm_slave_east_resp_ready_i,
input `STRUCT_PORT(tcdm_slave_req_t) tcdm_slave_local_req_i,
input logic tcdm_slave_local_req_valid_i,
output logic tcdm_slave_local_req_ready_o,
output `STRUCT_PORT(tcdm_slave_resp_t) tcdm_slave_local_resp_o,
output logic tcdm_slave_local_resp_valid_o,
input logic tcdm_slave_local_resp_ready_i,
// AXI Interface
output `STRUCT_PORT(axi_tile_req_t) axi_mst_req_o,
input `STRUCT_PORT(axi_tile_resp_t) axi_mst_resp_i,
// Wake up interface
input logic [NumCoresPerTile-1:0] wake_up_i
);
mempool_tile #(
.TCDMBaseAddr(TCDMBaseAddr),
.BootAddr (BootAddr )
) i_tile (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.tile_id_i (tile_id_i ),
// Scan chain
.scan_enable_i (scan_enable_i ),
.scan_data_i (scan_data_i ),
.scan_data_o (scan_data_o ),
// TCDM Master
.tcdm_master_req_o ({tcdm_master_northeast_req_o, tcdm_master_north_req_o, tcdm_master_east_req_o, tcdm_master_local_req_o} ),
.tcdm_master_req_ready_i ({tcdm_master_northeast_req_ready_i, tcdm_master_north_req_ready_i, tcdm_master_east_req_ready_i, tcdm_master_local_req_ready_i} ),
.tcdm_master_req_valid_o ({tcdm_master_northeast_req_valid_o, tcdm_master_north_req_valid_o, tcdm_master_east_req_valid_o, tcdm_master_local_req_valid_o} ),
.tcdm_master_resp_i ({tcdm_master_northeast_resp_i, tcdm_master_north_resp_i, tcdm_master_east_resp_i, tcdm_master_local_resp_i} ),
.tcdm_master_resp_ready_o({tcdm_master_northeast_resp_ready_o, tcdm_master_north_resp_ready_o, tcdm_master_east_resp_ready_o, tcdm_master_local_resp_ready_o}),
.tcdm_master_resp_valid_i({tcdm_master_northeast_resp_valid_i, tcdm_master_north_resp_valid_i, tcdm_master_east_resp_valid_i, tcdm_master_local_resp_valid_i}),
// TCDM Slave
.tcdm_slave_req_i ({tcdm_slave_northeast_req_i, tcdm_slave_north_req_i, tcdm_slave_east_req_i, tcdm_slave_local_req_i} ),
.tcdm_slave_req_ready_o ({tcdm_slave_northeast_req_ready_o, tcdm_slave_north_req_ready_o, tcdm_slave_east_req_ready_o, tcdm_slave_local_req_ready_o} ),
.tcdm_slave_req_valid_i ({tcdm_slave_northeast_req_valid_i, tcdm_slave_north_req_valid_i, tcdm_slave_east_req_valid_i, tcdm_slave_local_req_valid_i} ),
.tcdm_slave_resp_o ({tcdm_slave_northeast_resp_o, tcdm_slave_north_resp_o, tcdm_slave_east_resp_o, tcdm_slave_local_resp_o} ),
.tcdm_slave_resp_ready_i ({tcdm_slave_northeast_resp_ready_i, tcdm_slave_north_resp_ready_i, tcdm_slave_east_resp_ready_i, tcdm_slave_local_resp_ready_i} ),
.tcdm_slave_resp_valid_o ({tcdm_slave_northeast_resp_valid_o, tcdm_slave_north_resp_valid_o, tcdm_slave_east_resp_valid_o, tcdm_slave_local_resp_valid_o} ),
// AXI interface
.axi_mst_req_o (axi_mst_req_o ),
.axi_mst_resp_i (axi_mst_resp_i ),
// Wake up interface
.wake_up_i (wake_up_i )
);
endmodule: mempool_tile_wrap
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
// Franceco Conti <fconti@iis.ee.ethz.ch>
module onehot_to_bin #(
parameter int unsigned ONEHOT_WIDTH = 16,
// Do Not Change
parameter int unsigned BIN_WIDTH = ONEHOT_WIDTH == 1 ? 1 : $clog2(ONEHOT_WIDTH)
) (
input logic [ONEHOT_WIDTH-1:0] onehot,
output logic [BIN_WIDTH-1:0] bin
);
for (genvar j = 0; j < BIN_WIDTH; j++) begin : jl
logic [ONEHOT_WIDTH-1:0] tmp_mask;
for (genvar i = 0; i < ONEHOT_WIDTH; i++) begin : il
logic [BIN_WIDTH-1:0] tmp_i;
assign tmp_i = i;
assign tmp_mask[i] = tmp_i[j];
end
assign bin[j] = |(tmp_mask & onehot);
end
// pragma translate_off
`ifndef VERILATOR
assert final ($onehot0(onehot)) else
$fatal(1, "[onehot_to_bin] More than two bit set in the one-hot signal");
`endif
// pragma translate_on
endmodule
This source diff could not be displayed because it is too large. You can view the blob instead.
// Copyright 2019 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Wolfgang Roenninger <wroennin@iis.ee.ethz.ch>, ETH Zurich
// Date: 02.04.2019
// Description: logarithmic arbitration tree with round robin arbitration scheme.
/// The rr_arb_tree employs non-starving round robin-arbitration - i.e., the priorities
/// rotate each cycle.
///
/// ## Fair vs. unfair Arbitration
///
/// This refers to fair throughput distribution when not all inputs have active requests.
/// This module has an internal state `rr_q` which defines the highest priority input. (When
/// `ExtPrio` is `1'b1` this state is provided from the outside.) The arbitration tree will
/// choose the input with the same index as currently defined by the state if it has an active
/// request. Otherwise a *random* other active input is selected. The parameter `FairArb` is used
/// to distinguish between two methods of calculating the next state.
/// * `1'b0`: The next state is calculated by advancing the current state by one. This leads to the
/// state being calculated without the context of the active request. Leading to an
/// unfair throughput distribution if not all inputs have active requests.
/// * `1'b1`: The next state jumps to the next unserved request with higher index.
/// This is achieved by using two trailing-zero-counters (`lzc`). The upper has the masked
/// `req_i` signal with all indices which will have a higher priority in the next state.
/// The trailing zero count defines the input index with the next highest priority after
/// the current one is served. When the upper is empty the lower `lzc` provides the
/// wrapped index if there are outstanding requests with lower or same priority.
/// The implication of throughput fairness on the module timing are:
/// * The trailing zero counter (`lzc`) has a loglog relation of input to output timing. This means
/// that in this module the input to register path scales with Log(Log(`NumIn`)).
/// * The `rr_arb_tree` data multiplexing scales with Log(`NumIn`). This means that the input to output
/// timing path of this module also scales scales with Log(`NumIn`).
/// This implies that in this module the input to output path is always longer than the input to
/// register path. As the output data usually also terminates in a register the parameter `FairArb`
/// only has implications on the area. When it is `1'b0` a static plus one adder is instantiated.
/// If it is `1'b1` two `lzc`, a masking logic stage and a two input multiplexer are instantiated.
/// However these are small in respect of the data multiplexers needed, as the width of the `req_i`
/// signal is usually less as than `DataWidth`.
module rr_arb_tree #(
/// Number of inputs to be arbitrated.
parameter int unsigned NumIn = 64,
/// Data width of the payload in bits. Not needed if `DataType` is overwritten.
parameter int unsigned DataWidth = 32,
/// Data type of the payload, can be overwritten with custom type. Only use of `DataWidth`.
parameter type DataType = logic [DataWidth-1:0],
/// The `ExtPrio` option allows to override the internal round robin counter via the
/// `rr_i` signal. This can be useful in case multiple arbiters need to have
/// rotating priorities that are operating in lock-step. If static priority arbitration
/// is needed, just connect `rr_i` to '0.
///
/// Set to 1'b1 to enable.
parameter bit ExtPrio = 1'b0,
/// If `AxiVldRdy` is set, the req/gnt signals are compliant with the AXI style vld/rdy
/// handshake. Namely, upstream vld (req) must not depend on rdy (gnt), as it can be deasserted
/// again even though vld is asserted. Enabling `AxiVldRdy` leads to a reduction of arbiter
/// delay and area.
///
/// Set to `1'b1` to treat req/gnt as vld/rdy.
parameter bit AxiVldRdy = 1'b0,
/// The `LockIn` option prevents the arbiter from changing the arbitration
/// decision when the arbiter is disabled. I.e., the index of the first request
/// that wins the arbitration will be locked in case the destination is not
/// able to grant the request in the same cycle.
///
/// Set to `1'b1` to enable.
parameter bit LockIn = 1'b0,
/// When set, ensures that throughput gets distributed evenly between all inputs.
///
/// Set to `1'b0` to disable.
parameter bit FairArb = 1'b1,
/// Dependent parameter, do **not** overwrite.
/// Width of the arbitration priority signal and the arbitrated index.
parameter int unsigned IdxWidth = (NumIn > 32'd1) ? unsigned'($clog2(NumIn)) : 32'd1,
/// Dependent parameter, do **not** overwrite.
/// Type for defining the arbitration priority and arbitrated index signal.
parameter type idx_t = logic [IdxWidth-1:0]
) (
/// Clock, positive edge triggered.
input logic clk_i,
/// Asynchronous reset, active low.
input logic rst_ni,
/// Clears the arbiter state. Only used if `ExtPrio` is `1'b0` or `LockIn` is `1'b1`.
input logic flush_i,
/// External round-robin priority. Only used if `ExtPrio` is `1'b1.`
input idx_t rr_i,
/// Input requests arbitration.
input logic [NumIn-1:0] req_i,
/* verilator lint_off UNOPTFLAT */
/// Input request is granted.
output logic [NumIn-1:0] gnt_o,
/* verilator lint_on UNOPTFLAT */
/// Input data for arbitration.
input DataType [NumIn-1:0] data_i,
/// Output request is valid.
output logic req_o,
/// Output request is granted.
input logic gnt_i,
/// Output data.
output DataType data_o,
/// Index from which input the data came from.
output idx_t idx_o
);
// pragma translate_off
`ifndef VERILATOR
`ifndef XSIM
// Default SVA reset
default disable iff (!rst_ni || flush_i);
`endif
`endif
// pragma translate_on
// just pass through in this corner case
if (NumIn == unsigned'(1)) begin : gen_pass_through
assign req_o = req_i[0];
assign gnt_o[0] = gnt_i;
assign data_o = data_i[0];
assign idx_o = '0;
// non-degenerate cases
end else begin : gen_arbiter
localparam int unsigned NumLevels = unsigned'($clog2(NumIn));
/* verilator lint_off UNOPTFLAT */
idx_t [2**NumLevels-2:0] index_nodes; // used to propagate the indices
DataType [2**NumLevels-2:0] data_nodes; // used to propagate the data
logic [2**NumLevels-2:0] gnt_nodes; // used to propagate the grant to masters
logic [2**NumLevels-2:0] req_nodes; // used to propagate the requests to slave
/* lint_off */
idx_t rr_q;
logic [NumIn-1:0] req_d;
// the final arbitration decision can be taken from the root of the tree
assign req_o = req_nodes[0];
assign data_o = data_nodes[0];
assign idx_o = index_nodes[0];
if (ExtPrio) begin : gen_ext_rr
assign rr_q = rr_i;
assign req_d = req_i;
end else begin : gen_int_rr
idx_t rr_d;
// lock arbiter decision in case we got at least one req and no acknowledge
if (LockIn) begin : gen_lock
logic lock_d, lock_q;
logic [NumIn-1:0] req_q;
assign lock_d = req_o & ~gnt_i;
assign req_d = (lock_q) ? req_q : req_i;
always_ff @(posedge clk_i or negedge rst_ni) begin : p_lock_reg
if (!rst_ni) begin
lock_q <= '0;
end else begin
if (flush_i) begin
lock_q <= '0;
end else begin
lock_q <= lock_d;
end
end
end
// pragma translate_off
`ifndef VERILATOR
lock: assert property(
@(posedge clk_i) LockIn |-> req_o &&
(!gnt_i && !flush_i) |=> idx_o == $past(idx_o)) else
$fatal (1, "Lock implies same arbiter decision in next cycle if output is not \
ready.");
logic [NumIn-1:0] req_tmp;
assign req_tmp = req_q & req_i;
lock_req: assume property(
@(posedge clk_i) LockIn |-> lock_d |=> req_tmp == req_q) else
$fatal (1, "It is disallowed to deassert unserved request signals when LockIn is \
enabled.");
`endif
// pragma translate_on
always_ff @(posedge clk_i or negedge rst_ni) begin : p_req_regs
if (!rst_ni) begin
req_q <= '0;
end else begin
if (flush_i) begin
req_q <= '0;
end else begin
req_q <= req_d;
end
end
end
end else begin : gen_no_lock
assign req_d = req_i;
end
if (FairArb) begin : gen_fair_arb
logic [NumIn-1:0] upper_mask, lower_mask;
idx_t upper_idx, lower_idx, next_idx;
logic upper_empty, lower_empty;
for (genvar i = 0; i < NumIn; i++) begin : gen_mask
assign upper_mask[i] = (i > rr_q) ? req_d[i] : 1'b0;
assign lower_mask[i] = (i <= rr_q) ? req_d[i] : 1'b0;
end
lzc #(
.WIDTH ( NumIn ),
.MODE ( 1'b0 )
) i_lzc_upper (
.in_i ( upper_mask ),
.cnt_o ( upper_idx ),
.empty_o ( upper_empty )
);
lzc #(
.WIDTH ( NumIn ),
.MODE ( 1'b0 )
) i_lzc_lower (
.in_i ( lower_mask ),
.cnt_o ( lower_idx ),
.empty_o ( /*unused*/ )
);
assign next_idx = upper_empty ? lower_idx : upper_idx;
assign rr_d = (gnt_i && req_o) ? next_idx : rr_q;
end else begin : gen_unfair_arb
assign rr_d = (gnt_i && req_o) ? ((rr_q == idx_t'(NumIn-1)) ? '0 : rr_q + 1'b1) : rr_q;
end
// this holds the highest priority
always_ff @(posedge clk_i or negedge rst_ni) begin : p_rr_regs
if (!rst_ni) begin
rr_q <= '0;
end else begin
if (flush_i) begin
rr_q <= '0;
end else begin
rr_q <= rr_d;
end
end
end
end
assign gnt_nodes[0] = gnt_i;
// arbiter tree
for (genvar level = 0; unsigned'(level) < NumLevels; level++) begin : gen_levels
for (genvar l = 0; l < 2**level; l++) begin : gen_level
// local select signal
logic sel;
// index calcs
localparam int unsigned Idx0 = 2**level-1+l;// current node
localparam int unsigned Idx1 = 2**(level+1)-1+l*2;
//////////////////////////////////////////////////////////////
// uppermost level where data is fed in from the inputs
if (unsigned'(level) == NumLevels-1) begin : gen_first_level
// if two successive indices are still in the vector...
if (unsigned'(l) * 2 < NumIn-1) begin : gen_reduce
assign req_nodes[Idx0] = req_d[l*2] | req_d[l*2+1];
// arbitration: round robin
assign sel = ~req_d[l*2] | req_d[l*2+1] & rr_q[NumLevels-1-level];
assign index_nodes[Idx0] = idx_t'(sel);
assign data_nodes[Idx0] = (sel) ? data_i[l*2+1] : data_i[l*2];
assign gnt_o[l*2] = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2]) & ~sel;
assign gnt_o[l*2+1] = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2+1]) & sel;
end
// if only the first index is still in the vector...
if (unsigned'(l) * 2 == NumIn-1) begin : gen_first
assign req_nodes[Idx0] = req_d[l*2];
assign index_nodes[Idx0] = '0;// always zero in this case
assign data_nodes[Idx0] = data_i[l*2];
assign gnt_o[l*2] = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2]);
end
// if index is out of range, fill up with zeros (will get pruned)
if (unsigned'(l) * 2 > NumIn-1) begin : gen_out_of_range
assign req_nodes[Idx0] = 1'b0;
assign index_nodes[Idx0] = idx_t'('0);
assign data_nodes[Idx0] = DataType'('0);
end
//////////////////////////////////////////////////////////////
// general case for other levels within the tree
end else begin : gen_other_levels
assign req_nodes[Idx0] = req_nodes[Idx1] | req_nodes[Idx1+1];
// arbitration: round robin
assign sel = ~req_nodes[Idx1] | req_nodes[Idx1+1] & rr_q[NumLevels-1-level];
assign index_nodes[Idx0] = (sel) ?
idx_t'({1'b1, index_nodes[Idx1+1][NumLevels-unsigned'(level)-2:0]}) :
idx_t'({1'b0, index_nodes[Idx1][NumLevels-unsigned'(level)-2:0]});
assign data_nodes[Idx0] = (sel) ? data_nodes[Idx1+1] : data_nodes[Idx1];
assign gnt_nodes[Idx1] = gnt_nodes[Idx0] & ~sel;
assign gnt_nodes[Idx1+1] = gnt_nodes[Idx0] & sel;
end
//////////////////////////////////////////////////////////////
end
end
// pragma translate_off
`ifndef VERILATOR
`ifndef XSIM
initial begin : p_assert
assert(NumIn)
else $fatal(1, "Input must be at least one element wide.");
assert(!(LockIn && ExtPrio))
else $fatal(1,"Cannot use LockIn feature together with external ExtPrio.");
end
hot_one : assert property(
@(posedge clk_i) $onehot0(gnt_o))
else $fatal (1, "Grant signal must be hot1 or zero.");
gnt0 : assert property(
@(posedge clk_i) |gnt_o |-> gnt_i)
else $fatal (1, "Grant out implies grant in.");
gnt1 : assert property(
@(posedge clk_i) req_o |-> gnt_i |-> |gnt_o)
else $fatal (1, "Req out and grant in implies grant out.");
gnt_idx : assert property(
@(posedge clk_i) req_o |-> gnt_i |-> gnt_o[idx_o])
else $fatal (1, "Idx_o / gnt_o do not match.");
req0 : assert property(
@(posedge clk_i) |req_i |-> req_o)
else $fatal (1, "Req in implies req out.");
req1 : assert property(
@(posedge clk_i) req_o |-> |req_i)
else $fatal (1, "Req out implies req in.");
`endif
`endif
// pragma translate_on
end
endmodule : rr_arb_tree
// Copyright 2020 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
// Authors: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
// Sergio Mazzola <smazzola@student.ethz.ch>
// Description: Top-Level of Snitch Integer Core RV32E
`include "common_cells/registers.svh"
`include "common_cells/assertions.svh"
// `SNITCH_ENABLE_PERF Enables mcycle, minstret performance counters (read only)
module snitch
import snitch_pkg::meta_id_t;
#(
parameter logic [31:0] BootAddr = 32'h0000_1000,
parameter logic [31:0] MTVEC = BootAddr, // Exception Base Address (see privileged spec 3.1.7)
parameter bit RVE = 0, // Reduced-register Extension
parameter bit RVM = 1, // Enable IntegerMmultiplication & Division Extension
parameter int RegNrWritePorts = 2 // Implement one or two write ports into the register file
) (
input logic clk_i,
input logic rst_i,
input logic [31:0] hart_id_i,
// Instruction Refill Port
output logic [31:0] inst_addr_o,
input logic [31:0] inst_data_i,
output logic inst_valid_o,
input logic inst_ready_i,
`ifdef RISCV_FORMAL
output logic [0:0] rvfi_valid,
output logic [0:0][63:0] rvfi_order,
output logic [0:0][31:0] rvfi_insn,
output logic [0:0] rvfi_trap,
output logic [0:0] rvfi_halt,
output logic [0:0] rvfi_intr,
output logic [0:0][1:0] rvfi_mode,
output logic [0:0][4:0] rvfi_rs1_addr,
output logic [0:0][4:0] rvfi_rs2_addr,
output logic [0:0][31:0] rvfi_rs1_rdata,
output logic [0:0][31:0] rvfi_rs2_rdata,
output logic [0:0][4:0] rvfi_rd_addr,
output logic [0:0][31:0] rvfi_rd_wdata,
output logic [0:0][31:0] rvfi_pc_rdata,
output logic [0:0][31:0] rvfi_pc_wdata,
output logic [0:0][31:0] rvfi_mem_addr,
output logic [0:0][3:0] rvfi_mem_rmask,
output logic [0:0][3:0] rvfi_mem_wmask,
output logic [0:0][31:0] rvfi_mem_rdata,
output logic [0:0][31:0] rvfi_mem_wdata,
`endif
/// Accelerator Interface - Master Port
/// Independent channels for transaction request and read completion.
/// AXI-like handshaking.
/// Same IDs need to be handled in-order.
output logic [31:0] acc_qaddr_o,
output logic [4:0] acc_qid_o,
output logic [31:0] acc_qdata_op_o,
output logic [31:0] acc_qdata_arga_o,
output logic [31:0] acc_qdata_argb_o,
output logic [31:0] acc_qdata_argc_o,
output logic acc_qvalid_o,
input logic acc_qready_i,
input logic [31:0] acc_pdata_i,
input logic [4:0] acc_pid_i,
input logic acc_perror_i,
input logic acc_pvalid_i,
output logic acc_pready_o,
/// TCDM Data Interface
/// Write transactions do not return data on the `P Channel`
/// Transactions need to be handled strictly in-order.
output logic [31:0] data_qaddr_o,
output logic data_qwrite_o,
output logic [3:0] data_qamo_o,
output logic [31:0] data_qdata_o,
output logic [3:0] data_qstrb_o,
output meta_id_t data_qid_o,
output logic data_qvalid_o,
input logic data_qready_i,
input logic [31:0] data_pdata_i,
input logic data_perror_i,
input meta_id_t data_pid_i,
input logic data_pvalid_i,
output logic data_pready_o,
input logic wake_up_sync_i, // synchronous wake-up interrupt
// Core event strobes
output snitch_pkg::core_events_t core_events_o
);
localparam int RegWidth = RVE ? 4 : 5;
localparam int RegNrReadPorts = snitch_pkg::XPULPIMG ? 3 : 2;
logic illegal_inst;
logic zero_lsb;
// Instruction fetch
logic [31:0] pc_d, pc_q;
logic wfi_d, wfi_q;
logic wake_up_d, wake_up_q;
logic [31:0] consec_pc;
// Immediates
logic [31:0] iimm, uimm, jimm, bimm, simm, pbimm;
/* verilator lint_off WIDTH */
assign iimm = $signed({inst_data_i[31:20]});
assign uimm = {inst_data_i[31:12], 12'b0};
assign jimm = $signed({inst_data_i[31],
inst_data_i[19:12], inst_data_i[20], inst_data_i[30:21], 1'b0});
assign bimm = $signed({inst_data_i[31],
inst_data_i[7], inst_data_i[30:25], inst_data_i[11:8], 1'b0});
assign simm = $signed({inst_data_i[31:25], inst_data_i[11:7]});
assign pbimm = $signed(inst_data_i[24:20]); // Xpulpimg immediate branching signed immediate
/* verilator lint_on WIDTH */
logic [31:0] opa, opb;
logic [32:0] adder_result;
logic [31:0] alu_result;
logic [RegWidth-1:0] rd, rs1, rs2;
logic stall, lsu_stall;
// Register connections
logic [RegNrReadPorts-1:0][RegWidth-1:0] gpr_raddr;
logic [RegNrReadPorts-1:0][31:0] gpr_rdata;
logic [RegNrWritePorts-1:0][RegWidth-1:0] gpr_waddr;
logic [RegNrWritePorts-1:0][31:0] gpr_wdata;
logic [RegNrWritePorts-1:0] gpr_we;
logic [2**RegWidth-1:0] sb_d, sb_q;
// Load/Store Defines
logic is_load, is_store, is_signed, is_postincr;
logic is_fp_load, is_fp_store;
logic ls_misaligned;
logic ld_addr_misaligned;
logic st_addr_misaligned;
enum logic [1:0] {
Byte = 2'b00,
HalfWord = 2'b01,
Word = 2'b10,
Double = 2'b11
} ls_size;
enum logic [3:0] {
AMONone = 4'h0,
AMOSwap = 4'h1,
AMOAdd = 4'h2,
AMOAnd = 4'h3,
AMOOr = 4'h4,
AMOXor = 4'h5,
AMOMax = 4'h6,
AMOMaxu = 4'h7,
AMOMin = 4'h8,
AMOMinu = 4'h9,
AMOLR = 4'hA,
AMOSC = 4'hB
} ls_amo;
logic [31:0] ld_result;
logic lsu_qready, lsu_qvalid;
logic lsu_pvalid, lsu_pready;
logic [RegWidth-1:0] lsu_rd;
logic [31:0] lsu_qaddr;
logic retire_load; // retire a load instruction
logic retire_p; // retire from post-increment instructions
logic retire_i; // retire the rest of the base instruction set
logic retire_acc; // retire an instruction we offloaded
logic acc_stall;
logic valid_instr;
logic exception;
// ALU Operations
enum logic [3:0] {
// Arithmetical operations
Add, Sub,
// Shifts
Sll, Srl, Sra,
// Logical operations
LXor, LOr, LAnd, LNAnd,
// Comparisons
Eq, Neq, Ge, Geu,
Slt, Sltu,
// Miscellaneous
BypassA
} alu_op;
enum logic [3:0] {
None, Reg, IImmediate, UImmediate, JImmediate, SImmediate, SFImmediate, PC, CSR, CSRImmediate, PBImmediate, RegRd, RegRs2
} opa_select, opb_select, opc_select;
logic write_rd; // write rd desitnation this cycle
logic uses_rd;
logic write_rs1; // write rs1 destination this cycle
logic uses_rs1;
enum logic [1:0] {Consec, Alu, Exception} next_pc;
enum logic [1:0] {RdAlu, RdConsecPC, RdBypass} rd_select;
logic [31:0] rd_bypass;
logic is_branch;
logic [31:0] csr_rvalue;
logic csr_en;
// Registers
`FFAR(pc_q, pc_d, BootAddr, clk_i, rst_i)
`FFAR(wfi_q, wfi_d, '0, clk_i, rst_i)
`FFAR(wake_up_q, wake_up_d, '0, clk_i, rst_i)
`FFAR(sb_q, sb_d, '0, clk_i, rst_i)
// performance counter
`ifdef SNITCH_ENABLE_PERF
logic [63:0] cycle_q;
logic [63:0] instret_q;
`FFAR(cycle_q, cycle_q + 1, '0, clk_i, rst_i);
`FFLAR(instret_q, instret_q + 1, !stall, '0, clk_i, rst_i);
`endif
always_comb begin
core_events_o = '0;
core_events_o.retired_insts = ~stall;
end
// accelerator offloading interface
// register int destination in scoreboard
logic acc_register_rd;
assign acc_qaddr_o = hart_id_i;
assign acc_qid_o = rd;
assign acc_qdata_op_o = inst_data_i;
assign acc_qdata_arga_o = {{32{gpr_rdata[0][31]}}, gpr_rdata[0]};
assign acc_qdata_argb_o = {{32{gpr_rdata[1][31]}}, gpr_rdata[1]};
assign acc_qdata_argc_o = {{32{gpr_rdata[2][31]}}, gpr_rdata[2]};
// instruction fetch interface
assign inst_addr_o = pc_q;
assign inst_valid_o = ~wfi_q;
// --------------------
// Control
// --------------------
// Scoreboard: Keep track of rd dependencies (only loads at the moment)
logic operands_ready;
logic dst_ready;
logic opa_ready, opb_ready, opc_ready;
logic dstrd_ready, dstrs1_ready;
always_comb begin
sb_d = sb_q;
if (retire_load) sb_d[lsu_rd] = 1'b0;
// only place the reservation if we actually executed the load or offload instruction
if ((is_load | acc_register_rd) && !stall && !exception) sb_d[rd] = 1'b1;
if (retire_acc) sb_d[acc_pid_i[RegWidth-1:0]] = 1'b0;
sb_d[0] = 1'b0;
end
// TODO(zarubaf): This can probably be described a bit more efficient
assign opa_ready = (opa_select != Reg) | ~sb_q[rs1];
assign opb_ready = ((opb_select != Reg & opb_select != SImmediate) | ~sb_q[rs2]) & ((opb_select != RegRd) | ~sb_q[rd]);
assign opc_ready = ((opc_select != Reg) | ~sb_q[rd]) & ((opc_select != RegRs2) | ~sb_q[rs2]);
assign operands_ready = opa_ready & opb_ready & opc_ready;
// either we are not using the destination register or we need to make
// sure that its destination operand is not marked busy in the scoreboard.
assign dstrd_ready = ~uses_rd | (uses_rd & ~sb_q[rd]);
assign dstrs1_ready = ~uses_rs1 | (uses_rs1 & ~sb_q[rs1]);
assign dst_ready = dstrd_ready & dstrs1_ready;
assign valid_instr = (inst_ready_i & inst_valid_o) & operands_ready & dst_ready;
// the accelerator interface stalled us
assign acc_stall = (acc_qvalid_o & ~acc_qready_i);
// the LSU Interface didn't accept our request yet
assign lsu_stall = (lsu_qvalid & ~lsu_qready);
// Stall the stage if we either didn't get a valid instruction or the LSU/Accelerator is not ready
assign stall = ~valid_instr | lsu_stall | acc_stall;
// --------------------
// Instruction Frontend
// --------------------
assign consec_pc = pc_q + ((is_branch & alu_result[0]) ? bimm : 'd4);
always_comb begin
pc_d = pc_q;
// if we got a valid instruction word increment the PC unless we are waiting for an event
if (!stall && !wfi_q) begin
casez (next_pc)
Consec: pc_d = consec_pc;
Alu: pc_d = alu_result & {{31{1'b1}}, ~zero_lsb};
Exception: pc_d = MTVEC;
endcase
end
end
// --------------------
// Decoder
// --------------------
assign rd = inst_data_i[7 + RegWidth - 1:7];
assign rs1 = inst_data_i[15 + RegWidth - 1:15];
assign rs2 = inst_data_i[20 + RegWidth - 1:20];
always_comb begin
illegal_inst = 1'b0;
alu_op = Add;
opa_select = None;
opb_select = None;
opc_select = None;
next_pc = Consec;
// set up rd destination
rd_select = RdAlu;
write_rd = 1'b1;
// if we are writing the field this cycle we need an int destination register
uses_rd = write_rd;
// set up rs1 destination
write_rs1 = 1'b0;
uses_rs1 = write_rs1;
rd_bypass = '0;
zero_lsb = 1'b0;
is_branch = 1'b0;
// LSU interface
is_load = 1'b0;
is_store = 1'b0;
is_postincr = 1'b0;
is_fp_load = 1'b0;
is_fp_store = 1'b0;
is_signed = 1'b0;
ls_size = Byte;
ls_amo = AMONone;
acc_qvalid_o = 1'b0;
acc_register_rd = 1'b0;
csr_en = 1'b0;
// Wake up if a wake-up is incoming or pending
wfi_d = (wake_up_q || wake_up_sync_i) ? 1'b0 : wfi_q;
// Only store a pending wake-up if we are not asleep
wake_up_d = (wake_up_sync_i && !wfi_q) ? 1'b1 : wake_up_q;
unique casez (inst_data_i)
riscv_instr::ADD: begin
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::ADDI: begin
opa_select = Reg;
opb_select = IImmediate;
end
riscv_instr::SUB: begin
alu_op = Sub;
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::XOR: begin
opa_select = Reg;
opb_select = Reg;
alu_op = LXor;
end
riscv_instr::XORI: begin
alu_op = LXor;
opa_select = Reg;
opb_select = IImmediate;
end
riscv_instr::OR: begin
opa_select = Reg;
opb_select = Reg;
alu_op = LOr;
end
riscv_instr::ORI: begin
alu_op = LOr;
opa_select = Reg;
opb_select = IImmediate;
end
riscv_instr::AND: begin
alu_op = LAnd;
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::ANDI: begin
alu_op = LAnd;
opa_select = Reg;
opb_select = IImmediate;
end
riscv_instr::SLT: begin
alu_op = Slt;
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::SLTI: begin
alu_op = Slt;
opa_select = Reg;
opb_select = IImmediate;
end
riscv_instr::SLTU: begin
alu_op = Sltu;
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::SLTIU: begin
alu_op = Sltu;
opa_select = Reg;
opb_select = IImmediate;
end
riscv_instr::SLL: begin
alu_op = Sll;
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::SRL: begin
alu_op = Srl;
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::SRA: begin
alu_op = Sra;
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::SLLI: begin
alu_op = Sll;
opa_select = Reg;
opb_select = IImmediate;
end
riscv_instr::SRLI: begin
alu_op = Srl;
opa_select = Reg;
opb_select = IImmediate;
end
riscv_instr::SRAI: begin
alu_op = Sra;
opa_select = Reg;
opb_select = IImmediate;
end
riscv_instr::LUI: begin
opa_select = None;
opb_select = None;
rd_select = RdBypass;
rd_bypass = uimm;
end
riscv_instr::AUIPC: begin
opa_select = UImmediate;
opb_select = PC;
end
riscv_instr::JAL: begin
rd_select = RdConsecPC;
opa_select = JImmediate;
opb_select = PC;
next_pc = Alu;
end
riscv_instr::JALR: begin
rd_select = RdConsecPC;
opa_select = Reg;
opb_select = IImmediate;
next_pc = Alu;
zero_lsb = 1'b1;
end
// use the ALU for comparisons
riscv_instr::BEQ: begin
is_branch = 1'b1;
write_rd = 1'b0;
alu_op = Eq;
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::BNE: begin
is_branch = 1'b1;
write_rd = 1'b0;
alu_op = Neq;
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::BLT: begin
is_branch = 1'b1;
write_rd = 1'b0;
alu_op = Slt;
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::BLTU: begin
is_branch = 1'b1;
write_rd = 1'b0;
alu_op = Sltu;
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::BGE: begin
is_branch = 1'b1;
write_rd = 1'b0;
alu_op = Ge;
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::BGEU: begin
is_branch = 1'b1;
write_rd = 1'b0;
alu_op = Geu;
opa_select = Reg;
opb_select = Reg;
end
// Load/Stores
riscv_instr::SB: begin
write_rd = 1'b0;
is_store = 1'b1;
opa_select = Reg;
opb_select = SImmediate;
end
riscv_instr::SH: begin
write_rd = 1'b0;
is_store = 1'b1;
ls_size = HalfWord;
opa_select = Reg;
opb_select = SImmediate;
end
riscv_instr::SW: begin
write_rd = 1'b0;
is_store = 1'b1;
ls_size = Word;
opa_select = Reg;
opb_select = SImmediate;
end
riscv_instr::LB: begin
write_rd = 1'b0;
uses_rd = 1'b1;
is_load = 1'b1;
is_signed = 1'b1;
opa_select = Reg;
opb_select = IImmediate;
end
riscv_instr::LH: begin
write_rd = 1'b0;
uses_rd = 1'b1;
is_load = 1'b1;
is_signed = 1'b1;
ls_size = HalfWord;
opa_select = Reg;
opb_select = IImmediate;
end
riscv_instr::LW: begin
write_rd = 1'b0;
uses_rd = 1'b1;
is_load = 1'b1;
is_signed = 1'b1;
ls_size = Word;
opa_select = Reg;
opb_select = IImmediate;
end
riscv_instr::LBU: begin
write_rd = 1'b0;
uses_rd = 1'b1;
is_load = 1'b1;
opa_select = Reg;
opb_select = IImmediate;
end
riscv_instr::LHU: begin
write_rd = 1'b0;
uses_rd = 1'b1;
is_load = 1'b1;
ls_size = HalfWord;
opa_select = Reg;
opb_select = IImmediate;
end
// CSR Instructions
riscv_instr::CSRRW: begin // Atomic Read/Write CSR
opa_select = Reg;
opb_select = None;
rd_select = RdBypass;
rd_bypass = csr_rvalue;
csr_en = 1'b1;
end
riscv_instr::CSRRWI: begin
opa_select = CSRImmediate;
opb_select = None;
rd_select = RdBypass;
rd_bypass = csr_rvalue;
csr_en = 1'b1;
end
riscv_instr::CSRRS: begin // Atomic Read and Set Bits in CSR
alu_op = LOr;
opa_select = Reg;
opb_select = CSR;
rd_select = RdBypass;
rd_bypass = csr_rvalue;
csr_en = 1'b1;
end
riscv_instr::CSRRSI: begin
// offload CSR enable to FP SS
if (inst_data_i[31:20] != snitch_pkg::CSR_SSR) begin
alu_op = LOr;
opa_select = CSRImmediate;
opb_select = CSR;
rd_select = RdBypass;
rd_bypass = csr_rvalue;
csr_en = 1'b1;
end else begin
write_rd = 1'b0;
acc_qvalid_o = valid_instr;
end
end
riscv_instr::CSRRC: begin // Atomic Read and Clear Bits in CSR
alu_op = LNAnd;
opa_select = Reg;
opb_select = CSR;
rd_select = RdBypass;
rd_bypass = csr_rvalue;
csr_en = 1'b1;
end
riscv_instr::CSRRCI: begin
if (inst_data_i[31:20] != snitch_pkg::CSR_SSR) begin
alu_op = LNAnd;
opa_select = CSRImmediate;
opb_select = CSR;
rd_select = RdBypass;
rd_bypass = csr_rvalue;
csr_en = 1'b1;
end else begin
write_rd = 1'b0;
acc_qvalid_o = valid_instr;
end
end
riscv_instr::ECALL,
riscv_instr::EBREAK: begin
// TODO(zarubaf): Trap to precise address
write_rd = 1'b0;
end
// NOP Instructions
riscv_instr::FENCE: begin
write_rd = 1'b0;
end
riscv_instr::WFI: begin
if (valid_instr) begin
wfi_d = 1'b1;
if (wake_up_q || wake_up_sync_i) begin
// Do not sleep if a wake-up is pending
wfi_d = 1'b0;
wake_up_d = 1'b0;
end
end
end
// Atomics
riscv_instr::AMOADD_W: begin
alu_op = BypassA;
write_rd = 1'b0;
uses_rd = 1'b1;
is_load = 1'b1;
is_signed = 1'b1;
ls_size = Word;
ls_amo = AMOAdd;
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::AMOXOR_W: begin
alu_op = BypassA;
write_rd = 1'b0;
uses_rd = 1'b1;
is_load = 1'b1;
is_signed = 1'b1;
ls_size = Word;
ls_amo = AMOXor;
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::AMOOR_W: begin
alu_op = BypassA;
write_rd = 1'b0;
uses_rd = 1'b1;
is_load = 1'b1;
is_signed = 1'b1;
ls_size = Word;
ls_amo = AMOOr;
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::AMOAND_W: begin
alu_op = BypassA;
write_rd = 1'b0;
uses_rd = 1'b1;
is_load = 1'b1;
is_signed = 1'b1;
ls_size = Word;
ls_amo = AMOAnd;
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::AMOMIN_W: begin
alu_op = BypassA;
write_rd = 1'b0;
uses_rd = 1'b1;
is_load = 1'b1;
is_signed = 1'b1;
ls_size = Word;
ls_amo = AMOMin;
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::AMOMAX_W: begin
alu_op = BypassA;
write_rd = 1'b0;
uses_rd = 1'b1;
is_load = 1'b1;
is_signed = 1'b1;
ls_size = Word;
ls_amo = AMOMax;
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::AMOMINU_W: begin
alu_op = BypassA;
write_rd = 1'b0;
uses_rd = 1'b1;
is_load = 1'b1;
is_signed = 1'b1;
ls_size = Word;
ls_amo = AMOMinu;
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::AMOMAXU_W: begin
alu_op = BypassA;
write_rd = 1'b0;
uses_rd = 1'b1;
is_load = 1'b1;
is_signed = 1'b1;
ls_size = Word;
ls_amo = AMOMaxu;
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::AMOSWAP_W: begin
alu_op = BypassA;
write_rd = 1'b0;
uses_rd = 1'b1;
is_load = 1'b1;
is_signed = 1'b1;
ls_size = Word;
ls_amo = AMOSwap;
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::LR_W: begin
alu_op = BypassA;
write_rd = 1'b0;
uses_rd = 1'b1;
is_load = 1'b1;
is_signed = 1'b1;
ls_size = Word;
ls_amo = AMOLR;
opa_select = Reg;
opb_select = Reg;
end
riscv_instr::SC_W: begin
alu_op = BypassA;
write_rd = 1'b0;
uses_rd = 1'b1;
is_load = 1'b1;
is_signed = 1'b1;
ls_size = Word;
ls_amo = AMOSC;
opa_select = Reg;
opb_select = Reg;
end
// Off-load to IPU coprocessor
riscv_instr::MUL,
riscv_instr::MULH,
riscv_instr::MULHSU,
riscv_instr::MULHU,
riscv_instr::DIV,
riscv_instr::DIVU,
riscv_instr::REM,
riscv_instr::REMU,
riscv_instr::MULW,
riscv_instr::DIVW,
riscv_instr::DIVUW,
riscv_instr::REMW,
riscv_instr::REMUW: begin
write_rd = 1'b0;
uses_rd = 1'b1;
acc_qvalid_o = valid_instr;
opa_select = Reg;
opb_select = Reg;
acc_register_rd = 1'b1;
end
/* Xpulpimg extension */
// Post-increment loads/stores
riscv_instr::P_LB_IRPOST: begin // Xpulpimg: p.lb rd,iimm(rs1!)
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
uses_rd = 1'b1;
write_rs1 = 1'b1;
is_load = 1'b1;
is_postincr = 1'b1;
is_signed = 1'b1;
opa_select = Reg;
opb_select = IImmediate;
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_LBU_IRPOST: begin // Xpulpimg: p.lbu
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
uses_rd = 1'b1;
write_rs1 = 1'b1;
is_load = 1'b1;
is_postincr = 1'b1;
opa_select = Reg;
opb_select = IImmediate;
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_LH_IRPOST: begin // Xpulpimg: p.lh
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
uses_rd = 1'b1;
write_rs1 = 1'b1;
is_load = 1'b1;
is_postincr = 1'b1;
is_signed = 1'b1;
ls_size = HalfWord;
opa_select = Reg;
opb_select = IImmediate;
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_LHU_IRPOST: begin // Xpulpimg: p.lhu
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
uses_rd = 1'b1;
write_rs1 = 1'b1;
is_load = 1'b1;
is_postincr = 1'b1;
ls_size = HalfWord;
opa_select = Reg;
opb_select = IImmediate;
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_LW_IRPOST: begin // Xpulpimg: p.lw
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
uses_rd = 1'b1;
write_rs1 = 1'b1;
is_load = 1'b1;
is_postincr = 1'b1;
is_signed = 1'b1;
ls_size = Word;
opa_select = Reg;
opb_select = IImmediate;
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_LB_RRPOST: begin // Xpulpimg: p.lb rd,rs2(rs1!)
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
uses_rd = 1'b1;
write_rs1 = 1'b1;
is_load = 1'b1;
is_postincr = 1'b1;
is_signed = 1'b1;
opa_select = Reg;
opb_select = Reg;
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_LBU_RRPOST: begin // Xpulpimg: p.lbu
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
uses_rd = 1'b1;
write_rs1 = 1'b1;
is_load = 1'b1;
is_postincr = 1'b1;
opa_select = Reg;
opb_select = Reg;
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_LH_RRPOST: begin // Xpulpimg: p.lh
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
uses_rd = 1'b1;
write_rs1 = 1'b1;
is_load = 1'b1;
is_postincr = 1'b1;
is_signed = 1'b1;
ls_size = HalfWord;
opa_select = Reg;
opb_select = Reg;
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_LHU_RRPOST: begin // Xpulpimg: p.lhu
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
uses_rd = 1'b1;
write_rs1 = 1'b1;
is_load = 1'b1;
is_postincr = 1'b1;
ls_size = HalfWord;
opa_select = Reg;
opb_select = Reg;
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_LW_RRPOST: begin // Xpulpimg: p.lw
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
uses_rd = 1'b1;
write_rs1 = 1'b1;
is_load = 1'b1;
is_postincr = 1'b1;
is_signed = 1'b1;
ls_size = Word;
opa_select = Reg;
opb_select = Reg;
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_LB_RR: begin // Xpulpimg: p.lb rd,rs2(rs1)
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
uses_rd = 1'b1;
is_load = 1'b1;
is_signed = 1'b1;
opa_select = Reg;
opb_select = Reg;
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_LBU_RR: begin // Xpulpimg: p.lbu
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
uses_rd = 1'b1;
is_load = 1'b1;
opa_select = Reg;
opb_select = Reg;
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_LH_RR: begin // Xpulpimg: p.lh
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
uses_rd = 1'b1;
is_load = 1'b1;
is_signed = 1'b1;
ls_size = HalfWord;
opa_select = Reg;
opb_select = Reg;
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_LHU_RR: begin // Xpulpimg: p.lhu
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
uses_rd = 1'b1;
is_load = 1'b1;
ls_size = HalfWord;
opa_select = Reg;
opb_select = Reg;
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_LW_RR: begin // Xpulpimg: p.lw
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
uses_rd = 1'b1;
is_load = 1'b1;
is_signed = 1'b1;
ls_size = Word;
opa_select = Reg;
opb_select = Reg;
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_SB_IRPOST: begin // Xpulpimg: p.sb rs2,simm(rs1!)
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
write_rs1 = 1'b1;
is_store = 1'b1;
is_postincr = 1'b1;
opa_select = Reg;
opb_select = SImmediate;
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_SH_IRPOST: begin // Xpulpimg: p.sh
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
write_rs1 = 1'b1;
is_store = 1'b1;
is_postincr = 1'b1;
ls_size = HalfWord;
opa_select = Reg;
opb_select = SImmediate;
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_SW_IRPOST: begin // Xpulpimg: p.sw
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
write_rs1 = 1'b1;
is_store = 1'b1;
is_postincr = 1'b1;
ls_size = Word;
opa_select = Reg;
opb_select = SImmediate;
end else begin
illegal_inst = 1'b1;
end
end
// opb is usually assigned with the content of rs2; in stores with reg-reg
// addressing mode, however, the offset is stored in rd, so rd content is
// instead assigned to opb: if we cross such signals now (rd -> opb,
// rs2 -> opc) we don't have to do that in the ALU, with bigger muxes
riscv_instr::P_SB_RRPOST: begin // Xpulpimg: p.sb rs2,rs3(rs1!)
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
write_rs1 = 1'b1;
is_store = 1'b1;
is_postincr = 1'b1;
opa_select = Reg; // rs1 base address
opb_select = RegRd; // rs3 (i.e. rd) offset
opc_select = RegRs2; // rs2 source data
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_SH_RRPOST: begin // Xpulpimg: p.sh
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
write_rs1 = 1'b1;
is_store = 1'b1;
is_postincr = 1'b1;
ls_size = HalfWord;
opa_select = Reg;
opb_select = RegRd;
opc_select = RegRs2;
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_SW_RRPOST: begin // Xpulpimg: p.sw
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
write_rs1 = 1'b1;
is_store = 1'b1;
is_postincr = 1'b1;
ls_size = Word;
opa_select = Reg;
opb_select = RegRd;
opc_select = RegRs2;
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_SB_RR: begin // Xpulpimg: p.sb rs2,rs3(rs1)
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
is_store = 1'b1;
opa_select = Reg;
opb_select = RegRd;
opc_select = RegRs2;
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_SH_RR: begin // Xpulpimg: p.sh
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
is_store = 1'b1;
ls_size = HalfWord;
opa_select = Reg;
opb_select = RegRd;
opc_select = RegRs2;
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_SW_RR: begin // Xpulpimg: p.sw
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
is_store = 1'b1;
ls_size = Word;
opa_select = Reg;
opb_select = RegRd;
opc_select = RegRs2;
end else begin
illegal_inst = 1'b1;
end
end
// Immediate branching
riscv_instr::P_BEQIMM: begin // Xpulpimg: p.beqimm
if (snitch_pkg::XPULPIMG) begin
is_branch = 1'b1;
write_rd = 1'b0;
alu_op = Eq;
opa_select = Reg;
opb_select = PBImmediate;
end else begin
illegal_inst = 1'b1;
end
end
riscv_instr::P_BNEIMM: begin // Xpulpimg: p.bneimm
if (snitch_pkg::XPULPIMG) begin
is_branch = 1'b1;
write_rd = 1'b0;
alu_op = Neq;
opa_select = Reg;
opb_select = PBImmediate;
end else begin
illegal_inst = 1'b1;
end
end
// Off-load to IPU coprocessor
// 1 source register (rs1)
riscv_instr::P_ABS, // Xpulpimg: p.abs
riscv_instr::P_EXTHS, // Xpulpimg: p.exths
riscv_instr::P_EXTHZ, // Xpulpimg: p.exthz
riscv_instr::P_EXTBS, // Xpulpimg: p.extbs
riscv_instr::P_EXTBZ, // Xpulpimg: p.extbz
riscv_instr::P_CLIP, // Xpulpimg: p.clip
riscv_instr::P_CLIPU, // Xpulpimg: p.clipu
riscv_instr::PV_ADD_SCI_H, // Xpulpimg: pv.add.sci.h
riscv_instr::PV_ADD_SCI_B, // Xpulpimg: pv.add.sci.b
riscv_instr::PV_SUB_SCI_H, // Xpulpimg: pv.sub.sci.h
riscv_instr::PV_SUB_SCI_B, // Xpulpimg: pv.sub.sci.b
riscv_instr::PV_AVG_SCI_H, // Xpulpimg: pv.avg.sci.h
riscv_instr::PV_AVG_SCI_B, // Xpulpimg: pv.avg.sci.b
riscv_instr::PV_AVGU_SCI_H, // Xpulpimg: pv.avgu.sci.h
riscv_instr::PV_AVGU_SCI_B, // Xpulpimg: pv.avgu.sci.b
riscv_instr::PV_MIN_SCI_H, // Xpulpimg: pv.min.sci.h
riscv_instr::PV_MIN_SCI_B, // Xpulpimg: pv.min.sci.b
riscv_instr::PV_MINU_SCI_H, // Xpulpimg: pv.minu.sci.h
riscv_instr::PV_MINU_SCI_B, // Xpulpimg: pv.minu.sci.b
riscv_instr::PV_MAX_SCI_H, // Xpulpimg: pv.max.sci.h
riscv_instr::PV_MAX_SCI_B, // Xpulpimg: pv.max.sci.b
riscv_instr::PV_MAXU_SCI_H, // Xpulpimg: pv.maxu.sci.h
riscv_instr::PV_MAXU_SCI_B, // Xpulpimg: pv.maxu.sci.b
riscv_instr::PV_SRL_SCI_H, // Xpulpimg: pv.srl.sci.h
riscv_instr::PV_SRL_SCI_B, // Xpulpimg: pv.srl.sci.b
riscv_instr::PV_SRA_SCI_H, // Xpulpimg: pv.sra.sci.h
riscv_instr::PV_SRA_SCI_B, // Xpulpimg: pv.sra.sci.b
riscv_instr::PV_SLL_SCI_H, // Xpulpimg: pv.sll.sci.h
riscv_instr::PV_SLL_SCI_B, // Xpulpimg: pv.sll.sci.b
riscv_instr::PV_OR_SCI_H, // Xpulpimg: pv.or.sci.h
riscv_instr::PV_OR_SCI_B, // Xpulpimg: pv.or.sci.b
riscv_instr::PV_XOR_SCI_H, // Xpulpimg: pv.xor.sci.h
riscv_instr::PV_XOR_SCI_B, // Xpulpimg: pv.xor.sci.b
riscv_instr::PV_AND_SCI_B, // Xpulpimg: pv.and.sci.b
riscv_instr::PV_AND_SCI_H, // Xpulpimg: pv.and.sci.h
riscv_instr::PV_ABS_H, // Xpulpimg: pv.abs.h
riscv_instr::PV_ABS_B, // Xpulpimg: pv.abs.b
riscv_instr::PV_EXTRACT_H, // Xpulpimg: pv.extract.h
riscv_instr::PV_EXTRACT_B, // Xpulpimg: pv.extract.b
riscv_instr::PV_EXTRACTU_H, // Xpulpimg: pv.extractu.h
riscv_instr::PV_EXTRACTU_B, // Xpulpimg: pv.extractu.b
riscv_instr::PV_DOTUP_SCI_H, // Xpulpimg: pv.dotup.sci.h
riscv_instr::PV_DOTUP_SCI_B, // Xpulpimg: pv.dotup.sci.b
riscv_instr::PV_DOTUSP_SCI_H, // Xpulpimg: pv.dotusp.sci.h
riscv_instr::PV_DOTUSP_SCI_B, // Xpulpimg: pv.dotusp.sci.b
riscv_instr::PV_DOTSP_SCI_H, // Xpulpimg: pv.dotsp.sci.h
riscv_instr::PV_DOTSP_SCI_B: begin // Xpulpimg: pv.dotsp.sci.b
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
uses_rd = 1'b1;
acc_qvalid_o = valid_instr;
opa_select = Reg;
acc_register_rd = 1'b1;
end else begin
illegal_inst = 1'b1;
end
end
// 2 source registers (rs1, rs2)
riscv_instr::P_SLET, // Xpulpimg: p.slet
riscv_instr::P_SLETU, // Xpulpimg: p.sletu
riscv_instr::P_MIN, // Xpulpimg: p.min
riscv_instr::P_MINU, // Xpulpimg: p.minu
riscv_instr::P_MAX, // Xpulpimg: p.max
riscv_instr::P_MAXU, // Xpulpimg: p.maxu
riscv_instr::P_CLIPR, // Xpulpimg: p.clipr
riscv_instr::P_CLIPUR, // Xpulpimg: p.clipur
riscv_instr::PV_ADD_H, // Xpulpimg: pv.add.h
riscv_instr::PV_ADD_SC_H, // Xpulpimg: pv.add.sc.h
riscv_instr::PV_ADD_B, // Xpulpimg: pv.add.b
riscv_instr::PV_ADD_SC_B, // Xpulpimg: pv.add.sc.b
riscv_instr::PV_SUB_H, // Xpulpimg: pv.sub.h
riscv_instr::PV_SUB_SC_H, // Xpulpimg: pv.sub.sc.h
riscv_instr::PV_SUB_B, // Xpulpimg: pv.sub.b
riscv_instr::PV_SUB_SC_B, // Xpulpimg: pv.sub.sc.b
riscv_instr::PV_AVG_H, // Xpulpimg: pv.avg.h
riscv_instr::PV_AVG_SC_H, // Xpulpimg: pv.avg.sc.h
riscv_instr::PV_AVG_B, // Xpulpimg: pv.avg.b
riscv_instr::PV_AVG_SC_B, // Xpulpimg: pv.avg.sc.b
riscv_instr::PV_AVGU_H, // Xpulpimg: pv.avgu.h
riscv_instr::PV_AVGU_SC_H, // Xpulpimg: pv.avgu.sc.h
riscv_instr::PV_AVGU_B, // Xpulpimg: pv.avgu.b
riscv_instr::PV_AVGU_SC_B, // Xpulpimg: pv.avgu.sc.b
riscv_instr::PV_MIN_H, // Xpulpimg: pv.min.h
riscv_instr::PV_MIN_SC_H, // Xpulpimg: pv.min.sc.h
riscv_instr::PV_MIN_B, // Xpulpimg: pv.min.b
riscv_instr::PV_MIN_SC_B, // Xpulpimg: pv.min.sc.b
riscv_instr::PV_MINU_H, // Xpulpimg: pv.minu.h
riscv_instr::PV_MINU_SC_H, // Xpulpimg: pv.minu.sc.h
riscv_instr::PV_MINU_B, // Xpulpimg: pv.minu.b
riscv_instr::PV_MINU_SC_B, // Xpulpimg: pv.minu.sc.b
riscv_instr::PV_MAX_H, // Xpulpimg: pv.max.h
riscv_instr::PV_MAX_SC_H, // Xpulpimg: pv.max.sc.h
riscv_instr::PV_MAX_B, // Xpulpimg: pv.max.b
riscv_instr::PV_MAX_SC_B, // Xpulpimg: pv.max.sc.b
riscv_instr::PV_MAXU_H, // Xpulpimg: pv.maxu.h
riscv_instr::PV_MAXU_SC_H, // Xpulpimg: pv.maxu.sc.h
riscv_instr::PV_MAXU_B, // Xpulpimg: pv.maxu.b
riscv_instr::PV_MAXU_SC_B, // Xpulpimg: pv.maxu.sc.b
riscv_instr::PV_SRL_H, // Xpulpimg: pv.srl.h
riscv_instr::PV_SRL_SC_H, // Xpulpimg: pv.srl.sc.h
riscv_instr::PV_SRL_B, // Xpulpimg: pv.srl.b
riscv_instr::PV_SRL_SC_B, // Xpulpimg: pv.srl.sc.b
riscv_instr::PV_SRA_H, // Xpulpimg: pv.sra.h
riscv_instr::PV_SRA_SC_H, // Xpulpimg: pv.sra.sc.h
riscv_instr::PV_SRA_B, // Xpulpimg: pv.sra.b
riscv_instr::PV_SRA_SC_B, // Xpulpimg: pv.sra.sc.b
riscv_instr::PV_SLL_H, // Xpulpimg: pv.sll.h
riscv_instr::PV_SLL_SC_H, // Xpulpimg: pv.sll.sc.h
riscv_instr::PV_SLL_B, // Xpulpimg: pv.sll.b
riscv_instr::PV_SLL_SC_B, // Xpulpimg: pv.sll.sc.b
riscv_instr::PV_OR_H, // Xpulpimg: pv.or.h
riscv_instr::PV_OR_SC_H, // Xpulpimg: pv.or.sc.h
riscv_instr::PV_OR_B, // Xpulpimg: pv.or.b
riscv_instr::PV_OR_SC_B, // Xpulpimg: pv.or.sc.b
riscv_instr::PV_XOR_H, // Xpulpimg: pv.xor.h
riscv_instr::PV_XOR_SC_H, // Xpulpimg: pv.xor.sc.h
riscv_instr::PV_XOR_B, // Xpulpimg: pv.xor.b
riscv_instr::PV_XOR_SC_B, // Xpulpimg: pv.xor.sc.b
riscv_instr::PV_AND_H, // Xpulpimg: pv.and.h
riscv_instr::PV_AND_SC_H, // Xpulpimg: pv.and.sc.h
riscv_instr::PV_AND_B, // Xpulpimg: pv.and.b
riscv_instr::PV_AND_SC_B, // Xpulpimg: pv.and.sc.b
riscv_instr::PV_DOTUP_H, // Xpulpimg: pv.dotup.h
riscv_instr::PV_DOTUP_SC_H, // Xpulpimg: pv.dotup.sc.h
riscv_instr::PV_DOTUP_B, // Xpulpimg: pv.dotup.b
riscv_instr::PV_DOTUP_SC_B, // Xpulpimg: pv.dotup.sc.b
riscv_instr::PV_DOTUSP_H, // Xpulpimg: pv.dotusp.h
riscv_instr::PV_DOTUSP_SC_H, // Xpulpimg: pv.dotusp.sc.h
riscv_instr::PV_DOTUSP_B, // Xpulpimg: pv.dotusp.b
riscv_instr::PV_DOTUSP_SC_B, // Xpulpimg: pv.dotusp.sc.b
riscv_instr::PV_DOTSP_H, // Xpulpimg: pv.dotsp.h
riscv_instr::PV_DOTSP_SC_H, // Xpulpimg: pv.dotsp.sc.h
riscv_instr::PV_DOTSP_B, // Xpulpimg: pv.dotsp.b
riscv_instr::PV_DOTSP_SC_B: begin // Xpulpimg: pv.dotsp.sc.b
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
uses_rd = 1'b1;
acc_qvalid_o = valid_instr;
opa_select = Reg;
opb_select = Reg;
acc_register_rd = 1'b1;
end else begin
illegal_inst = 1'b1;
end
end
// 2 source registers (rs1, rd)
riscv_instr::PV_INSERT_H, // Xpulpimg: pv.insert.h
riscv_instr::PV_INSERT_B, // Xpulpimg: pv.insert.b
riscv_instr::PV_SDOTUP_SCI_H, // Xpulpimg: pv.sdotup.sci.h
riscv_instr::PV_SDOTUP_SCI_B, // Xpulpimg: pv.sdotup.sci.b
riscv_instr::PV_SDOTUSP_SCI_H, // Xpulpimg: pv.sdotusp.sci.h
riscv_instr::PV_SDOTUSP_SCI_B, // Xpulpimg: pv.sdotusp.sci.b
riscv_instr::PV_SDOTSP_SCI_H, // Xpulpimg: pv.sdotsp.sci.h
riscv_instr::PV_SDOTSP_SCI_B: begin // Xpulpimg: pv.sdotsp.sci.b
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
uses_rd = 1'b1;
acc_qvalid_o = valid_instr;
opa_select = Reg;
opc_select = Reg;
acc_register_rd = 1'b1;
end else begin
illegal_inst = 1'b1;
end
end
// 3 source registers (rs1, rs2, rd)
riscv_instr::P_MAC, // Xpulpimg: p.mac
riscv_instr::P_MSU, // Xpulpimg: p.msu
riscv_instr::PV_SDOTUP_H, // Xpulpimg: pv.sdotup.h
riscv_instr::PV_SDOTUP_SC_H, // Xpulpimg: pv.sdotup.sc.h
riscv_instr::PV_SDOTUP_B, // Xpulpimg: pv.sdotup.b
riscv_instr::PV_SDOTUP_SC_B, // Xpulpimg: pv.sdotup.sc.b
riscv_instr::PV_SDOTUSP_H, // Xpulpimg: pv.sdotusp.h
riscv_instr::PV_SDOTUSP_SC_H, // Xpulpimg: pv.sdotusp.sc.h
riscv_instr::PV_SDOTUSP_B, // Xpulpimg: pv.sdotusp.b
riscv_instr::PV_SDOTUSP_SC_B, // Xpulpimg: pv.sdotusp.sc.b
riscv_instr::PV_SDOTSP_H, // Xpulpimg: pv.sdotsp.h
riscv_instr::PV_SDOTSP_SC_H, // Xpulpimg: pv.sdotsp.sc.h
riscv_instr::PV_SDOTSP_B, // Xpulpimg: pv.sdotsp.b
riscv_instr::PV_SDOTSP_SC_B, // Xpulpimg: pv.sdotsp.sc.b
riscv_instr::PV_SHUFFLE2_H, // Xpulpimg: pv.shuffle2.h
riscv_instr::PV_SHUFFLE2_B: begin // Xpulpimg: pv.shuffle2.b
if (snitch_pkg::XPULPIMG) begin
write_rd = 1'b0;
uses_rd = 1'b1;
acc_qvalid_o = valid_instr;
opa_select = Reg;
opb_select = Reg;
opc_select = Reg;
acc_register_rd = 1'b1;
end else begin
illegal_inst = 1'b1;
end
end
/* end of Xpulpimg extension */
// TODO(zarubaf): Illegal Instructions
default: begin
illegal_inst = 1'b1;
end
endcase
// Sanitize illegal instructions so that they don't exert any side-effects.
if (exception) begin
write_rd = 1'b0;
uses_rd = 1'b0;
write_rs1 = 1'b0;
uses_rs1 = 1'b0;
acc_qvalid_o = 1'b0;
next_pc = Exception;
end
end
assign exception = illegal_inst | ld_addr_misaligned | st_addr_misaligned;
// pragma translate_off
always_ff @(posedge clk_i or posedge rst_i) begin
if (!rst_i && illegal_inst && inst_valid_o && inst_ready_i) begin
$display("[Illegal Instruction Core %0d] PC: %h Data: %h", hart_id_i, inst_addr_o, inst_data_i);
end
if (!rst_i && wake_up_sync_i && wake_up_q) begin
$display("[Missed wake-up Core %0d] Cycle: %d, Time: %t", hart_id_i, cycle_q, $time);
end
end
// pragma translate_on
// CSR logic
logic csr_dump;
logic csr_trace_en;
logic csr_trace_q;
always_comb begin
csr_rvalue = '0;
csr_dump = 1'b0;
csr_trace_en = 1'b0;
// TODO(zarubaf): Needs some more input handling, like illegal instruction exceptions.
// Right now we skip this due to simplicity.
if (csr_en) begin
unique case (inst_data_i[31:20])
riscv_instr::CSR_MHARTID: begin
csr_rvalue = hart_id_i;
end
riscv_instr::CSR_TRACE: begin
csr_rvalue = csr_trace_q;
csr_trace_en = 1'b1;
end
`ifdef SNITCH_ENABLE_PERF
riscv_instr::CSR_MCYCLE: begin
csr_rvalue = cycle_q[31:0];
end
riscv_instr::CSR_MINSTRET: begin
csr_rvalue = instret_q[31:0];
end
riscv_instr::CSR_MCYCLEH: begin
csr_rvalue = cycle_q[63:32];
end
riscv_instr::CSR_MINSTRETH: begin
csr_rvalue = instret_q[63:32];
end
`endif
default: begin
csr_rvalue = '0;
csr_dump = 1'b1;
end
endcase
end
end
// CSR registers
`FFLAR(csr_trace_q, alu_result, csr_trace_en, '0, clk_i, rst_i);
// pragma translate_off
always_ff @(posedge clk_i or posedge rst_i) begin
// Display CSR write if the CSR does not exist
if (!rst_i && csr_dump && inst_valid_o && inst_ready_i && !stall) begin
$display("[DUMP] %3d: 0x%3h = %d", hart_id_i, inst_data_i[31:20], alu_result);
end
end
// pragma translate_on
snitch_regfile #(
.DATA_WIDTH ( 32 ),
.NR_READ_PORTS ( RegNrReadPorts ),
.NR_WRITE_PORTS ( RegNrWritePorts ),
.ZERO_REG_ZERO ( 1 ),
.ADDR_WIDTH ( RegWidth )
) i_snitch_regfile (
.clk_i,
.raddr_i ( gpr_raddr ),
.rdata_o ( gpr_rdata ),
.waddr_i ( gpr_waddr ),
.wdata_i ( gpr_wdata ),
.we_i ( gpr_we )
);
// --------------------
// Operand Select
// --------------------
always_comb begin
unique case (opa_select)
None: opa = '0;
Reg: opa = gpr_rdata[0];
UImmediate: opa = uimm;
JImmediate: opa = jimm;
CSRImmediate: opa = {{{32-RegWidth}{1'b0}}, rs1};
default: opa = '0;
endcase
end
always_comb begin
unique case (opb_select)
None: opb = '0;
Reg: opb = gpr_rdata[1];
IImmediate: opb = iimm;
SFImmediate, SImmediate: opb = simm;
PC: opb = pc_q;
CSR: opb = csr_rvalue;
PBImmediate: opb = pbimm;
RegRd: opb = gpr_rdata[2];
default: opb = '0;
endcase
end
assign gpr_raddr[0] = rs1;
assign gpr_raddr[1] = rs2;
// connect third read port only if present
if (RegNrReadPorts >= 3) begin : gpr_raddr_2
assign gpr_raddr[2] = rd;
end
// --------------------
// ALU
// --------------------
// Main Shifter
logic [31:0] shift_opa, shift_opa_reversed;
logic [31:0] shift_right_result, shift_left_result;
logic [32:0] shift_opa_ext, shift_right_result_ext;
logic shift_left, shift_arithmetic; // shift control
for (genvar i = 0; i < 32; i++) begin : gen_reverse_opa
assign shift_opa_reversed[i] = opa[31-i];
assign shift_left_result[i] = shift_right_result[31-i];
end
assign shift_opa = shift_left ? shift_opa_reversed : opa;
assign shift_opa_ext = {shift_opa[31] & shift_arithmetic, shift_opa};
assign shift_right_result_ext = $unsigned($signed(shift_opa_ext) >>> opb[4:0]);
assign shift_right_result = shift_right_result_ext[31:0];
// Main Adder
logic [32:0] alu_opa, alu_opb;
assign adder_result = alu_opa + alu_opb;
// ALU
/* verilator lint_off WIDTH */
always_comb begin
alu_opa = $signed(opa);
alu_opb = $signed(opb);
alu_result = adder_result[31:0];
shift_left = 1'b0;
shift_arithmetic = 1'b0;
unique case (alu_op)
// Arithmetical operations
Sub: alu_opb = -$signed(opb);
// Comparisons
Slt: begin
alu_opb = -$signed(opb);
alu_result = {30'b0, adder_result[32]};
end
Ge: begin
alu_opb = -$signed(opb);
alu_result = {30'b0, ~adder_result[32]};
end
Sltu: begin
alu_opa = $unsigned(opa);
alu_opb = -$unsigned(opb);
alu_result = {30'b0, adder_result[32]};
end
Geu: begin
alu_opa = $unsigned(opa);
alu_opb = -$unsigned(opb);
alu_result = {30'b0, ~adder_result[32]};
end
// Shifts
Sll: begin
shift_left = 1'b1;
alu_result = shift_left_result;
end
Srl: alu_result = shift_right_result;
Sra: begin
shift_arithmetic = 1'b1;
alu_result = shift_right_result;
end
// Logical operations
LXor: alu_result = opa ^ opb;
LAnd: alu_result = opa & opb;
LNAnd: alu_result = (~opa) & opb;
LOr: alu_result = opa | opb;
// Equal, not equal
Eq: begin
alu_opb = -$signed(opb);
alu_result = ~|adder_result;
end
Neq: begin
alu_opb = -$signed(opb);
alu_result = |adder_result;
end
// Miscellaneous
BypassA: begin
alu_result = opa;
end
default: alu_result = adder_result[31:0];
endcase
end
/* verilator lint_on WIDTH */
// --------------------
// LSU
// --------------------
snitch_lsu #(
.tag_t ( logic[RegWidth-1:0] ),
.NumOutstandingLoads ( snitch_pkg::NumIntOutstandingLoads )
) i_snitch_lsu (
.clk_i ,
.rst_i ,
.lsu_qtag_i ( rd ),
.lsu_qwrite ( is_store ),
.lsu_qsigned ( is_signed ),
.lsu_qaddr_i ( lsu_qaddr ),
.lsu_qdata_i ( gpr_rdata[1] ),
.lsu_qsize_i ( ls_size ),
.lsu_qamo_i ( ls_amo ),
.lsu_qvalid_i ( lsu_qvalid ),
.lsu_qready_o ( lsu_qready ),
.lsu_pdata_o ( ld_result ),
.lsu_ptag_o ( lsu_rd ),
.lsu_perror_o ( ), // ignored for the moment
.lsu_pvalid_o ( lsu_pvalid ),
.lsu_pready_i ( lsu_pready ),
.data_qaddr_o ,
.data_qwrite_o ,
.data_qdata_o ,
.data_qamo_o ,
.data_qstrb_o ,
.data_qid_o ,
.data_qvalid_o ,
.data_qready_i ,
.data_pdata_i ,
.data_perror_i ,
.data_pid_i ,
.data_pvalid_i ,
.data_pready_o
);
// address can be alu_result (i.e. rs1 + iimm/simm) or rs1 (for post-increment load/stores)
assign lsu_qaddr = is_postincr ? gpr_rdata[0] : alu_result;
assign lsu_qvalid = valid_instr & (is_load | is_store) & ~(ld_addr_misaligned | st_addr_misaligned);
// NOTE(smazzola): write-backs "on rd from non-load or non-acc instructions" and "on rs1 from
// post-increment instructions" in the same cycle should be mutually exclusive (currently valid
// assumption since write-back to rs1 happens on the cycle in which the post-increment load/store
// is issued, if that cycle is not a stall, and it is not postponed like offloaded instructions,
// so no other instructions writing back on rd can be issued in the same cycle)
// retire post-incremented address on rs1 if valid postincr instruction and LSU not stalling
assign retire_p = write_rs1 & ~stall & (rs1 != 0);
// we can retire if we are not stalling and if the instruction is writing a register
assign retire_i = write_rd & valid_instr & (rd != 0);
// -----------------------
// Unaligned Address Check
// -----------------------
always_comb begin
ls_misaligned = 1'b0;
unique case (ls_size)
HalfWord: if (alu_result[0] != 1'b0) ls_misaligned = 1'b1;
Word: if (alu_result[1:0] != 2'b00) ls_misaligned = 1'b1;
Double: if (alu_result[2:0] != 3'b000) ls_misaligned = 1'b1;
default: ls_misaligned = 1'b0;
endcase
end
assign st_addr_misaligned = ls_misaligned & (is_store | is_fp_store);
assign ld_addr_misaligned = ls_misaligned & (is_load | is_fp_load);
// pragma translate_off
always_ff @(posedge clk_i or posedge rst_i) begin
if (!rst_i && (ld_addr_misaligned || st_addr_misaligned) && valid_instr && inst_ready_i) begin
$display("%t: [Misaligned Load/Store Core %0d] PC: %h Address: %h Data: %h", $time, hart_id_i, inst_addr_o, alu_result, inst_data_i);
end
end
// pragma translate_on
// --------------------
// Write-Back
// --------------------
// Write-back data, can come from:
// 1. ALU/Jump Target/Bypass
// 2. LSU
// 3. Accelerator Bus
logic [31:0] alu_writeback;
always_comb begin
casez (rd_select)
RdAlu: alu_writeback = alu_result;
RdConsecPC: alu_writeback = consec_pc;
RdBypass: alu_writeback = rd_bypass;
default: alu_writeback = alu_result;
endcase
end
if (RegNrWritePorts == 1) begin
always_comb begin
gpr_we[0] = 1'b0;
// NOTE(smazzola): this works because write-backs on rd and rs1 in the same cycle are mutually
// exclusive; if this should change, the following statement has to be written in another form
gpr_waddr[0] = retire_p ? rs1 : rd; // choose whether to writeback at RF[rs1] for post-increment load/stores
gpr_wdata[0] = alu_writeback;
// external interfaces
lsu_pready = 1'b0;
acc_pready_o = 1'b0;
retire_acc = 1'b0;
retire_load = 1'b0;
if (retire_i | retire_p) begin
gpr_we[0] = 1'b1;
// if we are not retiring another instruction retire the load now
end else if (lsu_pvalid) begin
retire_load = 1'b1;
gpr_we[0] = 1'b1;
gpr_waddr[0] = lsu_rd;
gpr_wdata[0] = ld_result[31:0];
lsu_pready = 1'b1;
end else if (acc_pvalid_i) begin
retire_acc = 1'b1;
gpr_we[0] = 1'b1;
gpr_waddr[0] = acc_pid_i;
gpr_wdata[0] = acc_pdata_i[31:0];
acc_pready_o = 1'b1;
end
end
end else if (RegNrWritePorts == 2) begin
always_comb begin
gpr_we[0] = 1'b0;
// NOTE(smazzola): this works because write-backs on rd and rs1 in the same cycle are mutually
// exclusive; if this should change, the following statement has to be written in another form
gpr_waddr[0] = retire_p ? rs1 : rd; // choose whether to writeback at RF[rs1] for post-increment load/stores
gpr_wdata[0] = alu_writeback;
gpr_we[1] = 1'b0;
gpr_waddr[1] = lsu_rd;
gpr_wdata[1] = ld_result[31:0];
// external interfaces
lsu_pready = 1'b0;
acc_pready_o = 1'b0;
retire_acc = 1'b0;
retire_load = 1'b0;
if (retire_i | retire_p) begin
gpr_we[0] = 1'b1;
if (lsu_pvalid) begin
retire_load = 1'b1;
gpr_we[1] = 1'b1;
lsu_pready = 1'b1;
end else if (acc_pvalid_i) begin
retire_acc = 1'b1;
gpr_we[1] = 1'b1;
gpr_waddr[1] = acc_pid_i;
gpr_wdata[1] = acc_pdata_i[31:0];
acc_pready_o = 1'b1;
end
// if we are not retiring another instruction retire the load now
end else begin
if (acc_pvalid_i) begin
retire_acc = 1'b1;
gpr_we[0] = 1'b1;
gpr_waddr[0] = acc_pid_i;
gpr_wdata[0] = acc_pdata_i[31:0];
acc_pready_o = 1'b1;
end
if (lsu_pvalid) begin
retire_load = 1'b1;
gpr_we[1] = 1'b1;
lsu_pready = 1'b1;
end
end
end
end else begin
$fatal(1, "[snitch] Unsupported RegNrWritePorts.");
end
// --------------------------
// RISC-V Formal Interface
// --------------------------
`ifdef RISCV_FORMAL
logic instr_addr_misaligned;
logic ld_addr_misaligned_q;
// check that the instruction is a control transfer instruction
assign instr_addr_misaligned = (inst_data_i inside {
riscv_instr::JAL,
riscv_instr::JALR,
riscv_instr::BEQ,
riscv_instr::BNE,
riscv_instr::BLT,
riscv_instr::BLTU,
riscv_instr::BGE,
riscv_instr::BGEU
}) && (pc_d[1:0] != 2'b0);
// retire an instruction and increase ordering bit
`FFLAR(rvfi_order[0], rvfi_order[0] + 1, rvfi_valid[0], '0, clk_i, rst_i)
logic [31:0] ld_instr_q;
logic [31:0] ld_addr_q;
logic [4:0] rs1_q;
logic [31:0] rs1_data_q;
logic [31:0] pc_qq;
// we need to latch the load
`FFLAR(ld_instr_q, inst_data_i, latch_load, '0, clk_i, rst_i)
`FFLAR(ld_addr_q, data_qaddr_o, latch_load, '0, clk_i, rst_i)
`FFLAR(rs1_q, rs1, latch_load, '0, clk_i, rst_i)
`FFLAR(rs1_data_q, gpr_rdata[0], latch_load, '0, clk_i, rst_i)
`FFLAR(pc_qq, pc_d, latch_load, '0, clk_i, rst_i)
`FFLAR(ld_addr_misaligned_q, ld_addr_misaligned, latch_load, '0, clk_i, rst_i)
// in case we don't retire another instruction on port 1 we can use it for loads
logic retire_load_port1;
assign retire_load_port1 = retire_load & stall;
// NRET: 1
assign rvfi_halt[0] = 1'b0;
assign rvfi_mode[0] = 2'b11;
assign rvfi_intr[0] = 1'b0;
assign rvfi_valid[0] = !stall | retire_load;
assign rvfi_insn[0] = retire_load_port1 ? ld_instr_q : (is_load ? '0 : inst_data_i);
assign rvfi_trap[0] = retire_load_port1 ? ld_addr_misaligned_q : illegal_inst
| instr_addr_misaligned
| st_addr_misaligned;
assign rvfi_rs1_addr[0] = (retire_load_port1) ? rs1_q : rs1;
assign rvfi_rs1_rdata[0] = (retire_load_port1) ? rs1_data_q : gpr_rdata[0];
assign rvfi_rs2_addr[0] = (retire_load_port1) ? '0 : rs2;
assign rvfi_rs2_rdata[0] = (retire_load_port1) ? '0 : gpr_rdata[1];
assign rvfi_rd_addr[0] = (retire_load_port1) ? lsu_rd : ((gpr_we[0] && write_rd) ? rd : '0);
assign rvfi_rd_wdata[0] = (retire_load_port1) ? (lsu_rd != 0 ? ld_result[31:0] : '0) : (rd != 0 && gpr_we[0] && write_rd) ? gpr_wdata[0] : 0;
assign rvfi_pc_rdata[0] = (retire_load_port1) ? pc_qq : pc_q;
assign rvfi_pc_wdata[0] = (retire_load_port1) ? (pc_qq + 4) : pc_d;
assign rvfi_mem_addr[0] = (retire_load_port1) ? ld_addr_q : data_qaddr_o;
assign rvfi_mem_wmask[0] = (retire_load_port1) ? '0 : ((data_qvalid_o && data_qready_i) ? data_qstrb_o[3:0] : '0);
assign rvfi_mem_rmask[0] = (retire_load_port1) ? 4'hf : '0;
assign rvfi_mem_rdata[0] = (retire_load_port1) ? data_pdata_i[31:0] : '0;
assign rvfi_mem_wdata[0] = (retire_load_port1) ? '0 : data_qdata_o[31:0];
`endif
// ----------
// Assertions
// ----------
// Make sure the instruction interface is stable. Otherwise, Snitch might violate the protocol at
// the LSU or accelerator interface by withdrawing the valid signal.
`ASSERT(InstructionInterfaceStable,
(inst_valid_o && inst_ready_i) ##1 (inst_valid_o && $stable(inst_addr_o))
|-> inst_ready_i && $stable(inst_data_i), clk_i, rst_i)
endmodule
// Copyright 2021 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
//
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
// Demux based on address
module snitch_addr_demux
import mempool_pkg::address_map_t;
import cf_math_pkg::idx_width;
#(
parameter int unsigned NrOutput = 2 ,
parameter int unsigned AddressWidth = 32 ,
parameter int unsigned NumRules = 1 , // Routing rules
parameter type req_t = logic,
parameter type resp_t = logic,
/// Dependent parameters, DO NOT OVERRIDE!
localparam integer LogNrOutput = idx_width(NrOutput)
) (
input logic clk_i,
input logic rst_ni,
// request port
input logic [AddressWidth-1:0] req_addr_i,
input req_t req_payload_i,
input logic req_valid_i,
output logic req_ready_o,
output resp_t resp_payload_o,
output logic resp_valid_o,
input logic resp_ready_i,
// response port
output req_t [NrOutput-1:0] req_payload_o,
output logic [NrOutput-1:0] req_valid_o,
input logic [NrOutput-1:0] req_ready_i,
input resp_t [NrOutput-1:0] resp_payload_i,
input logic [NrOutput-1:0] resp_valid_i,
output logic [NrOutput-1:0] resp_ready_o,
input address_map_t [NumRules-1:0] address_map_i
);
logic [LogNrOutput-1:0] slave_select;
logic [NumRules-1:0] addr_match;
logic [idx_width(NumRules)-1:0] rule_select;
assign slave_select = address_map_i[rule_select].slave_idx;
// Address Decoder
always_comb begin : addr_decoder
for (int i = 0; i < NumRules; i++) begin
addr_match[i] = (req_addr_i & address_map_i[i].mask) == address_map_i[i].value;
end
end
find_first_one #(
.WIDTH(NumRules)
) find_slave_select (
.in_i ( addr_match ),
.first_one_o( rule_select ),
.no_ones_o ( /* Unused */ )
);
// Demux request to correct interconnect
stream_demux #(
.N_OUP ( NrOutput )
) i_req_demux (
.inp_valid_i ( req_valid_i ),
.inp_ready_o ( req_ready_o ),
.oup_sel_i ( slave_select ),
.oup_valid_o ( req_valid_o ),
.oup_ready_i ( req_ready_i )
);
for (genvar i = 0; i < NrOutput; i++) begin : gen_req_outputs
assign req_payload_o[i] = req_payload_i;
end
// Merge the response streams
logic [idx_width(NrOutput)-1:0] rr_prio;
assign rr_prio = 0;
rr_arb_tree #(
.DataType (resp_t ),
.NumIn (NrOutput),
.AxiVldRdy(1'b1 ),
.ExtPrio (1'b1 )
) i_resp_stream_arbiter (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.flush_i(1'b0 ),
.rr_i (rr_prio ),
.req_i (resp_valid_i ),
.data_i (resp_payload_i),
.gnt_o (resp_ready_o ),
.req_o (resp_valid_o ),
.data_o (resp_payload_o),
.gnt_i (resp_ready_i ),
.idx_o (/* Unused */ )
);
/* pragma translate_off */
`ifdef FORMAL
logic f_past_valid;
initial f_past_valid = 1'b0;
always @(posedge clk_i)
f_past_valid <= 1'b1;
// assert reset in time step zero and deassert
assume property (@(posedge clk_i) !f_past_valid |-> !rst_ni);
// make sure that we get a response for each read we issued
for (genvar i = 0; i < NrOutput; i++) begin
assume property (@(posedge clk_i) disable iff (!rst_ni) (resp_valid_i[i] & resp_ready_o[i]) |-> $past(req_valid_o[i] & req_ready_i[i] & !req_write_i));
end
`endif
// check that we propagate a downstream request directly (e.g. combinatorial)
assert property (@(posedge clk_i) disable iff (!rst_ni) (req_valid_i & req_ready_o) |-> |(req_valid_o & req_ready_i));
/* pragma translate_on */
endmodule
// Copyright 2018-2019 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
//
// File: axi_adapter.sv
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
// Date: 1.8.2018
//
// Description: Manages communication with the AXI Bus
module snitch_axi_adapter #(
parameter int unsigned WriteFIFODepth = 2,
parameter int unsigned ReadFIFODepth = 2,
parameter type addr_t = logic,
parameter type data_t = logic,
parameter type strb_t = logic,
parameter type axi_mst_req_t = logic,
parameter type axi_mst_resp_t = logic
) (
input logic clk_i,
input logic rst_ni,
// AXI port
input axi_mst_resp_t axi_resp_i,
output axi_mst_req_t axi_req_o,
input addr_t slv_qaddr_i,
input logic slv_qwrite_i,
input logic [3:0] slv_qamo_i,
input data_t slv_qdata_i,
input logic [2:0] slv_qsize_i,
input strb_t slv_qstrb_i,
input logic [7:0] slv_qrlen_i,
input logic slv_qvalid_i,
output logic slv_qready_o,
output data_t slv_pdata_o,
output logic slv_perror_o,
output logic slv_plast_o,
output logic slv_pvalid_o,
input logic slv_pready_i
);
localparam DataWidth = $bits(data_t);
localparam StrbWidth = $bits(strb_t);
localparam SlvByteOffset = $clog2($bits(strb_t));
localparam AxiByteOffset = $clog2($bits(axi_req_o.w.strb));
typedef enum logic [3:0] {
AMONone = 4'h0,
AMOSwap = 4'h1,
AMOAdd = 4'h2,
AMOAnd = 4'h3,
AMOOr = 4'h4,
AMOXor = 4'h5,
AMOMax = 4'h6,
AMOMaxu = 4'h7,
AMOMin = 4'h8,
AMOMinu = 4'h9,
AMOLR = 4'hA,
AMOSC = 4'hB
} amo_op_t;
typedef struct packed {
data_t data;
strb_t strb;
} write_t;
logic write_full;
logic write_empty;
logic read_full;
write_t write_data_in;
write_t write_data_out;
assign axi_req_o.aw.addr = slv_qaddr_i;
assign axi_req_o.aw.prot = 3'b0;
assign axi_req_o.aw.region = 4'b0;
assign axi_req_o.aw.size = slv_qsize_i;
assign axi_req_o.aw.len = '0;
assign axi_req_o.aw.burst = axi_pkg::BURST_INCR;
assign axi_req_o.aw.lock = 1'b0;
assign axi_req_o.aw.cache = axi_pkg::CACHE_MODIFIABLE;
assign axi_req_o.aw.qos = 4'b0;
assign axi_req_o.aw.id = '0;
assign axi_req_o.aw.user = '0;
assign axi_req_o.aw_valid = ~write_full & slv_qvalid_i & slv_qwrite_i;
always_comb begin
write_data_in.data = slv_qdata_i;
write_data_in.strb = slv_qstrb_i;
unique case (amo_op_t'(slv_qamo_i))
// RISC-V atops have a load semantic
AMOSwap: axi_req_o.aw.atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ATOMICSWAP};
AMOAdd: axi_req_o.aw.atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ADD};
AMOAnd: begin
// in this case we need to invert the data to get a "CLR"
write_data_in.data = ~slv_qdata_i;
axi_req_o.aw.atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_CLR};
end
AMOOr: axi_req_o.aw.atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SET};
AMOXor: axi_req_o.aw.atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_EOR};
AMOMax: axi_req_o.aw.atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMAX};
AMOMaxu: axi_req_o.aw.atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMAX};
AMOMin: axi_req_o.aw.atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMIN};
AMOMinu: axi_req_o.aw.atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMIN};
default: axi_req_o.aw.atop = '0;
endcase
end
localparam int unsigned ShiftWidth = (SlvByteOffset == AxiByteOffset) ? 1 : AxiByteOffset - SlvByteOffset;
typedef logic [ShiftWidth-1:0] shift_t;
typedef struct packed {
write_t data;
shift_t shift;
} write_ext_t;
if (SlvByteOffset == AxiByteOffset) begin : gen_w_data
// Write
fifo_v3 #(
.DEPTH ( WriteFIFODepth ),
.dtype ( write_t )
) i_fifo_w_data (
.clk_i,
.rst_ni,
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( write_full ),
.empty_o ( write_empty ),
.usage_o ( /* NC */ ),
.data_i ( write_data_in ),
.push_i ( slv_qvalid_i & slv_qready_o & slv_qwrite_i ),
.data_o ( write_data_out ),
.pop_i ( axi_req_o.w_valid & axi_resp_i.w_ready )
);
assign axi_req_o.w.data = write_data_out.data;
assign axi_req_o.w.strb = write_data_out.strb;
// Read
assign read_full = 1'b0;
assign slv_pdata_o = axi_resp_i.r.data;
end else begin : gen_w_data
// Write
write_ext_t write_data_ext_in, write_data_ext_out;
fifo_v3 #(
.DEPTH ( WriteFIFODepth ),
.dtype ( write_ext_t )
) i_fifo_w_data (
.clk_i,
.rst_ni,
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( write_full ),
.empty_o ( write_empty ),
.usage_o ( /* NC */ ),
.data_i ( write_data_ext_in ),
.push_i ( slv_qvalid_i & slv_qready_o & slv_qwrite_i ),
.data_o ( write_data_ext_out ),
.pop_i ( axi_req_o.w_valid & axi_resp_i.w_ready )
);
assign write_data_ext_in.data = write_data_in;
assign write_data_ext_in.shift = slv_qaddr_i[AxiByteOffset-1:SlvByteOffset];
assign axi_req_o.w.data = {'0, write_data_ext_out.data.data} << ($bits(data_t) * write_data_ext_out.shift);
assign axi_req_o.w.strb = {'0, write_data_ext_out.data.strb} << ($bits(strb_t) * write_data_ext_out.shift);
// Read
shift_t read_shift;
fifo_v3 #(
.DEPTH ( ReadFIFODepth ),
.DATA_WIDTH ( AxiByteOffset-SlvByteOffset )
) i_fifo_r_shift (
.clk_i,
.rst_ni,
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( read_full ),
.empty_o ( /* NC */ ),
.usage_o ( /* NC */ ),
.data_i ( slv_qaddr_i[AxiByteOffset-1:SlvByteOffset] ),
.push_i ( slv_qvalid_i & slv_qready_o & ~slv_qwrite_i ),
.data_o ( read_shift ),
.pop_i ( axi_resp_i.r_valid & slv_pready_i )
);
assign slv_pdata_o = axi_resp_i.r.data >> ($bits(data_t) * read_shift);
end
assign axi_req_o.w.last = 1'b1;
assign axi_req_o.w.user = '0;
assign axi_req_o.w_valid = ~write_empty;
assign axi_req_o.b_ready = 1'b1;
assign axi_req_o.ar.addr = slv_qaddr_i;
assign axi_req_o.ar.prot = 3'b0;
assign axi_req_o.ar.region = 4'b0;
assign axi_req_o.ar.size = slv_qsize_i;
assign axi_req_o.ar.len = slv_qrlen_i;
assign axi_req_o.ar.burst = axi_pkg::BURST_INCR;
assign axi_req_o.ar.lock = 1'b0;
assign axi_req_o.ar.cache = axi_pkg::CACHE_MODIFIABLE;
assign axi_req_o.ar.qos = 4'b0;
assign axi_req_o.ar.id = '0;
assign axi_req_o.ar.user = '0;
assign axi_req_o.ar_valid = ~read_full & slv_qvalid_i & ~slv_qwrite_i;
assign slv_perror_o = (axi_resp_i.r.resp inside {axi_pkg::RESP_EXOKAY, axi_pkg::RESP_OKAY}) ? 1'b0 : 1'b1;
assign slv_plast_o = axi_resp_i.r.last;
assign slv_pvalid_o = axi_resp_i.r_valid;
assign axi_req_o.r_ready = slv_pready_i;
assign slv_qready_o = (axi_resp_i.ar_ready & axi_req_o.ar_valid)
| (axi_resp_i.aw_ready & axi_req_o.aw_valid);
`ifndef VERILATOR
// pragma translate_off
hot_one : assert property (
@(posedge clk_i) disable iff (!rst_ni) (slv_qvalid_i & slv_qwrite_i & slv_qready_o) |-> (slv_qrlen_i == 0))
else $warning("Bursts are not supported for write transactions");
// pragma translate_on
`endif
endmodule
// Copyright 2020 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
/// Arbitrates request/response interface
/// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
/// Demux based on arbitration
module snitch_demux #(
parameter int unsigned NrPorts = 4,
parameter type req_t = snitch_pkg::dreq_t,
parameter type resp_t = snitch_pkg::dresp_t,
parameter int unsigned RespDepth = 8,
parameter bit [NrPorts-1:0] RegisterReq = '0,
parameter Arbiter = "rr" // "rr" or "prio"
) (
input logic clk_i,
input logic rst_ni,
// request port
input req_t [NrPorts-1:0] req_payload_i,
input logic [NrPorts-1:0] req_valid_i,
output logic [NrPorts-1:0] req_ready_o,
output resp_t [NrPorts-1:0] resp_payload_o,
output logic [NrPorts-1:0] resp_last_o,
output logic [NrPorts-1:0] resp_valid_o,
input logic [NrPorts-1:0] resp_ready_i,
// response port
output req_t req_payload_o,
output logic req_valid_o,
input logic req_ready_i,
input resp_t resp_payload_i,
input logic resp_last_i,
input logic resp_valid_i,
output logic resp_ready_o
);
localparam LogNrPorts = (NrPorts > 1) ? $clog2(NrPorts) : 1;
logic [NrPorts-1:0] req_valid_masked;
logic [NrPorts-1:0] req_ready_masked;
logic [LogNrPorts-1:0] idx, idx_rsp;
logic full;
req_t [NrPorts-1:0] req_payload_q;
logic [NrPorts-1:0] req_valid_q;
logic [NrPorts-1:0] req_ready_q;
// Cut the incoming path
for (genvar i = 0; i < NrPorts; i++) begin : gen_spill_regs
spill_register #(
.T ( req_t ),
.Bypass ( !RegisterReq[i] )
) i_spill_register_tcdm_req (
.clk_i,
.rst_ni,
.valid_i ( req_valid_i [i] ),
.ready_o ( req_ready_o [i] ),
.data_i ( req_payload_i [i] ),
.valid_o ( req_valid_q [i] ),
.ready_i ( req_ready_masked [i] ),
.data_o ( req_payload_q [i] )
);
end
for (genvar i = 0; i < NrPorts; i++) begin : gen_req_valid_masked
assign req_valid_masked[i] = req_valid_q[i] & ~full;
assign req_ready_masked[i] = req_ready_q[i] & ~full;
end
/// Arbitrate on instruction request port
stream_arbiter #(
.DATA_T ( req_t ),
.N_INP ( NrPorts ),
.ARBITER ( Arbiter )
) i_stream_arbiter_req (
.clk_i,
.rst_ni,
.inp_data_i ( req_payload_q ),
.inp_valid_i ( req_valid_masked ),
.inp_ready_o ( req_ready_q ),
.oup_data_o ( req_payload_o ),
.oup_valid_o ( req_valid_o ),
.oup_ready_i ( req_ready_i )
);
if (NrPorts == 1) begin
assign idx_rsp = 0;
assign full = 1'b0;
end else begin
onehot_to_bin #(
.ONEHOT_WIDTH ( NrPorts )
) i_onehot_to_bin (
.onehot ( req_valid_q & req_ready_q ),
.bin ( idx )
);
fifo_v3 #(
.DATA_WIDTH ( LogNrPorts ),
.DEPTH ( RespDepth )
) i_resp_fifo (
.clk_i,
.rst_ni,
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( full ),
.empty_o ( ),
.usage_o ( ),
.data_i ( idx ),
// only reads will generate a response message
.push_i ( req_valid_o & req_ready_i & ~req_payload_o.write ),
.data_o ( idx_rsp ),
.pop_i ( resp_ready_o & resp_valid_i & resp_last_i )
);
end
stream_demux #(
.N_OUP ( NrPorts )
) i_stream_demux_resp (
.inp_valid_i ( resp_valid_i ),
.inp_ready_o ( resp_ready_o ),
.oup_sel_i ( idx_rsp ),
.oup_valid_o ( resp_valid_o ),
.oup_ready_i ( resp_ready_i )
);
for (genvar i = 0; i < NrPorts; i++) begin
assign resp_payload_o[i] = resp_payload_i;
assign resp_last_o[i] = resp_last_i;
end
endmodule
// Copyright 2020 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
// Florian Zaruba <zarubaf@iis.ee.ethz.ch>
`include "common_cells/registers.svh"
module snitch_icache #(
/// Number of request (fetch) ports
parameter int NR_FETCH_PORTS = -1,
/// L0 Cache Line Count
parameter int L0_LINE_COUNT = -1,
/// Cache Line Width
parameter int LINE_WIDTH = -1,
/// The number of cache lines per set. Power of two; >= 2.
parameter int LINE_COUNT = -1,
/// The set associativity of the cache. Power of two; >= 1.
parameter int SET_COUNT = 1,
/// Fetch interface address width. Same as FILL_AW; >= 1.
parameter int FETCH_AW = -1,
/// Fetch interface data width. Power of two; >= 8.
parameter int FETCH_DW = -1,
/// Fill interface address width. Same as FETCH_AW; >= 1.
parameter int FILL_AW = -1,
/// Fill interface data width. Power of two; >= 8.
parameter int FILL_DW = -1,
/// Replace the L1 tag banks with latch-based SCM.
parameter bit L1_TAG_SCM = 0,
/// This reduces area impact at the cost of
/// increased hassle of having latches in
/// the design.
/// i_snitch_icache/gen_prefetcher*i_snitch_icache_l0/data*/Q
parameter bit EARLY_LATCH = 0,
/// Tag width of the data determining logic, this can reduce the
/// the critical path into the L0 cache when small. The trade-off
/// is a higher miss-rate in case the smaller tag matches more
/// tags. The tag must be smaller than the necessary L0 tag.
/// If configured to `-1` the entire tag is used, effectively
/// disabling this feature.
parameter int L0_EARLY_TAG_WIDTH = -1,
/// Operate L0 cache in slower clock-domain
parameter bit ISO_CROSSING = 1
) (
input logic clk_i,
input logic clk_d2_i,
input logic rst_ni,
input logic [NR_FETCH_PORTS-1:0] enable_prefetching_i,
output snitch_icache_pkg::icache_events_t [NR_FETCH_PORTS-1:0] icache_events_o,
input logic flush_valid_i,
output logic flush_ready_o,
input logic [NR_FETCH_PORTS-1:0][FETCH_AW-1:0] inst_addr_i,
output logic [NR_FETCH_PORTS-1:0][FETCH_DW-1:0] inst_data_o,
input logic [NR_FETCH_PORTS-1:0] inst_cacheable_i,
input logic [NR_FETCH_PORTS-1:0] inst_valid_i,
output logic [NR_FETCH_PORTS-1:0] inst_ready_o,
output logic [NR_FETCH_PORTS-1:0] inst_error_o,
// AXI-like read-only interface
output logic [FILL_AW-1:0] refill_qaddr_o,
output logic [7:0] refill_qlen_o,
output logic refill_qvalid_o,
input logic refill_qready_i,
input logic [FILL_DW-1:0] refill_pdata_i,
input logic refill_perror_i,
input logic refill_pvalid_i,
input logic refill_plast_i,
output logic refill_pready_o
);
// Bundle the parameters up into a proper configuration struct that we can
// pass to submodules.
localparam PENDING_COUNT = 8;
localparam snitch_icache_pkg::config_t CFG = '{
NR_FETCH_PORTS: NR_FETCH_PORTS,
LINE_WIDTH: LINE_WIDTH,
LINE_COUNT: LINE_COUNT,
L0_LINE_COUNT: L0_LINE_COUNT,
SET_COUNT: SET_COUNT,
PENDING_COUNT: PENDING_COUNT,
FETCH_AW: FETCH_AW,
FETCH_DW: FETCH_DW,
FILL_AW: FILL_AW,
FILL_DW: FILL_DW,
L1_TAG_SCM: L1_TAG_SCM,
EARLY_LATCH: EARLY_LATCH,
FETCH_ALIGN: $clog2(FETCH_DW/8),
FILL_ALIGN: $clog2(FILL_DW/8),
LINE_ALIGN: $clog2(LINE_WIDTH/8),
COUNT_ALIGN: $clog2(LINE_COUNT),
SET_ALIGN: $clog2(SET_COUNT),
TAG_WIDTH: FETCH_AW - $clog2(LINE_WIDTH/8) - $clog2(LINE_COUNT) + 1,
L0_TAG_WIDTH: FETCH_AW - $clog2(LINE_WIDTH/8),
L0_EARLY_TAG_WIDTH: (L0_EARLY_TAG_WIDTH == -1) ? FETCH_AW - $clog2(LINE_WIDTH/8) : L0_EARLY_TAG_WIDTH,
ID_WIDTH_REQ: $clog2(NR_FETCH_PORTS) + 1,
ID_WIDTH_RESP: 2*NR_FETCH_PORTS,
PENDING_IW: $clog2(PENDING_COUNT)
};
// pragma translate_off
`ifndef VERILATOR
// Check invariants.
initial begin
assert(L0_LINE_COUNT > 0);
assert(LINE_WIDTH > 0);
assert(LINE_COUNT > 1);
assert(SET_COUNT >= 2) else $warning("Only >= 2 sets are supported");
assert(FETCH_AW > 0);
assert(FETCH_DW > 0);
assert(FILL_AW > 0);
assert(FILL_DW > 0);
assert(CFG.L0_EARLY_TAG_WIDTH < CFG.L0_TAG_WIDTH);
assert(FETCH_AW == FILL_AW);
assert(2**$clog2(LINE_WIDTH) == LINE_WIDTH);
assert(2**$clog2(LINE_COUNT) == LINE_COUNT);
assert(2**$clog2(SET_COUNT) == SET_COUNT);
assert(2**$clog2(FETCH_DW) == FETCH_DW);
assert(2**$clog2(FILL_DW) == FILL_DW);
end
`endif
// pragma translate_on
// Instantiate the optional early cache, or bypass it.
logic [NR_FETCH_PORTS-1:0][FETCH_AW-1:0] early_addr;
logic [NR_FETCH_PORTS-1:0][LINE_WIDTH-1:0] early_data;
logic [NR_FETCH_PORTS-1:0] early_valid;
logic [NR_FETCH_PORTS-1:0] early_ready;
logic [NR_FETCH_PORTS-1:0] early_error;
// The prefetch module is responsible for taking the 1-channel valid/ready
// transaction from the early cache and translate it into a 2-channel
// transaction. Once the actual incoming request has been accepted on the
// `req` channel, the prefetcher issues another low-priority request for the
// next cache line.
typedef struct packed {
logic [CFG.FETCH_AW-1:0] addr;
logic [CFG.ID_WIDTH_REQ-1:0] id;
} prefetch_req_t;
typedef struct packed {
logic [CFG.LINE_WIDTH-1:0] data;
logic error;
logic [CFG.ID_WIDTH_RESP-1:0] id;
} prefetch_resp_t;
prefetch_req_t [NR_FETCH_PORTS-1:0] prefetch_req ;
logic [NR_FETCH_PORTS-1:0] prefetch_req_valid ;
logic [NR_FETCH_PORTS-1:0] prefetch_req_ready ;
prefetch_req_t prefetch_lookup_req ;
logic prefetch_lookup_req_valid ;
logic prefetch_lookup_req_ready ;
prefetch_resp_t [NR_FETCH_PORTS-1:0] prefetch_rsp ;
logic [NR_FETCH_PORTS-1:0] prefetch_rsp_valid ;
logic [NR_FETCH_PORTS-1:0] prefetch_rsp_ready ;
prefetch_resp_t prefetch_lookup_rsp ;
logic prefetch_lookup_rsp_valid ;
logic prefetch_lookup_rsp_ready ;
typedef struct packed {
logic [CFG.FETCH_AW-1:0] addr;
logic [CFG.PENDING_IW-1:0] id;
logic bypass;
} miss_refill_req_t;
miss_refill_req_t handler_req, bypass_req, bypass_req_q, refill_req;
logic handler_req_valid, bypass_req_valid, bypass_req_valid_q, refill_req_valid;
logic handler_req_ready, bypass_req_ready, bypass_req_ready_q, refill_req_ready;
typedef struct packed {
logic [CFG.LINE_WIDTH-1:0] data;
logic error;
logic [CFG.PENDING_IW-1:0] id;
logic bypass;
} miss_refill_rsp_t;
miss_refill_rsp_t handler_rsp, bypass_rsp, bypass_rsp_q, refill_rsp;
logic handler_rsp_valid, bypass_rsp_valid, bypass_rsp_valid_q, refill_rsp_valid;
logic handler_rsp_ready, bypass_rsp_ready, bypass_rsp_ready_q, refill_rsp_ready;
logic [NR_FETCH_PORTS-1:0][FETCH_DW-1:0] bypass_data;
logic [NR_FETCH_PORTS-1:0] bypass_error;
logic [NR_FETCH_PORTS-1:0] bypass_valid;
logic [NR_FETCH_PORTS-1:0] bypass_ready;
logic [NR_FETCH_PORTS-1:0][FETCH_AW-1:0] bypass_addr;
// logic [NR_FETCH_PORTS-1:0]
logic [NR_FETCH_PORTS-1:0] in_cache_valid, in_bypass_valid;
logic [NR_FETCH_PORTS-1:0] in_cache_ready, in_bypass_ready;
logic [NR_FETCH_PORTS-1:0] [FETCH_DW-1:0] in_cache_data, in_bypass_data;
logic [NR_FETCH_PORTS-1:0] in_cache_error, in_bypass_error;
for (genvar i = 0; i < NR_FETCH_PORTS; i++) begin : gen_prefetcher
prefetch_req_t local_prefetch_req;
logic local_prefetch_req_valid;
logic local_prefetch_req_ready;
prefetch_resp_t local_prefetch_rsp;
logic local_prefetch_rsp_valid;
logic local_prefetch_rsp_ready;
assign in_cache_valid[i] = inst_cacheable_i[i] & inst_valid_i[i];
assign in_bypass_valid[i] = ~inst_cacheable_i[i] & inst_valid_i[i];
assign inst_ready_o[i] = (inst_cacheable_i[i] & in_cache_ready [i]) | (~inst_cacheable_i[i] & in_bypass_ready [i]);
// multiplex results
assign {inst_error_o[i], inst_data_o[i]} = ({($bits(in_cache_data[i])+1){inst_cacheable_i[i]}} & {in_cache_error [i], in_cache_data[i]})
| (~{($bits(in_cache_data[i])+1){inst_cacheable_i[i]}} & {in_bypass_error[i], in_bypass_data[i]});
snitch_icache_l0 #(
.CFG ( CFG ),
.L0_ID ( i )
) i_snitch_icache_l0 (
.clk_i ( clk_d2_i ),
.rst_ni,
.flush_valid_i,
.enable_prefetching_i ( enable_prefetching_i [i] ),
.icache_events_o ( icache_events_o [i] ),
.in_addr_i ( inst_addr_i [i] ),
.in_data_o ( in_cache_data [i] ),
.in_error_o ( in_cache_error [i] ),
.in_valid_i ( in_cache_valid [i] ),
.in_ready_o ( in_cache_ready [i] ),
.out_req_addr_o ( local_prefetch_req.addr ),
.out_req_id_o ( local_prefetch_req.id ),
.out_req_valid_o ( local_prefetch_req_valid ),
.out_req_ready_i ( local_prefetch_req_ready ),
.out_rsp_data_i ( local_prefetch_rsp.data ),
.out_rsp_error_i ( local_prefetch_rsp.error ),
.out_rsp_id_i ( local_prefetch_rsp.id ),
.out_rsp_valid_i ( local_prefetch_rsp_valid ),
.out_rsp_ready_o ( local_prefetch_rsp_ready )
);
isochronous_spill_register #(
.T ( prefetch_req_t ),
.Bypass ( !ISO_CROSSING )
) i_spill_register_prefetch_req (
.src_clk_i ( clk_d2_i ),
.src_rst_ni ( rst_ni ),
.src_valid_i ( local_prefetch_req_valid ),
.src_ready_o ( local_prefetch_req_ready ),
.src_data_i ( local_prefetch_req ),
.dst_clk_i ( clk_i ),
.dst_rst_ni ( rst_ni ),
.dst_valid_o ( prefetch_req_valid [i] ),
.dst_ready_i ( prefetch_req_ready [i] ),
.dst_data_o ( prefetch_req [i] )
);
isochronous_spill_register #(
.T ( prefetch_resp_t ),
.Bypass ( !ISO_CROSSING )
) i_spill_register_prefetch_resp (
.src_clk_i ( clk_i ),
.src_rst_ni ( rst_ni ),
.src_valid_i ( prefetch_rsp_valid [i] ),
.src_ready_o ( prefetch_rsp_ready [i] ),
.src_data_i ( prefetch_rsp [i] ),
.dst_clk_i ( clk_d2_i ),
.dst_rst_ni ( rst_ni ),
.dst_valid_o ( local_prefetch_rsp_valid ),
.dst_ready_i ( local_prefetch_rsp_ready ),
.dst_data_o ( local_prefetch_rsp )
);
end
l0_to_bypass #(
.CFG ( CFG )
) i_l0_to_bypass (
.clk_i ( clk_d2_i ),
.rst_ni,
.in_valid_i ( in_bypass_valid ),
.in_ready_o ( in_bypass_ready ),
.in_addr_i ( inst_addr_i ),
.in_data_o ( in_bypass_data ),
.in_error_o ( in_bypass_error ),
.refill_req_addr_o ( bypass_req.addr ),
.refill_req_bypass_o ( bypass_req.bypass ),
.refill_req_valid_o ( bypass_req_valid ),
.refill_req_ready_i ( bypass_req_ready ),
.refill_rsp_data_i ( bypass_rsp_q.data ),
.refill_rsp_error_i ( bypass_rsp_q.error ),
.refill_rsp_valid_i ( bypass_rsp_valid_q ),
.refill_rsp_ready_o ( bypass_rsp_ready_q )
);
assign bypass_req.id = '0;
isochronous_spill_register #(
.T ( miss_refill_req_t ),
.Bypass ( !ISO_CROSSING )
) i_spill_register_bypass_req (
.src_clk_i ( clk_d2_i ),
.src_rst_ni ( rst_ni ),
.src_valid_i ( bypass_req_valid ),
.src_ready_o ( bypass_req_ready ),
.src_data_i ( bypass_req ),
.dst_clk_i ( clk_i ),
.dst_rst_ni ( rst_ni ),
.dst_valid_o ( bypass_req_valid_q ),
.dst_ready_i ( bypass_req_ready_q ),
.dst_data_o ( bypass_req_q )
);
isochronous_spill_register #(
.T ( miss_refill_rsp_t ),
.Bypass ( !ISO_CROSSING )
) i_spill_register_bypass_resp (
.src_clk_i ( clk_i ),
.src_rst_ni ( rst_ni ),
.src_valid_i ( bypass_rsp_valid ),
.src_ready_o ( bypass_rsp_ready ),
.src_data_i ( bypass_rsp ),
.dst_clk_i ( clk_d2_i ),
.dst_rst_ni ( rst_ni ),
.dst_valid_o ( bypass_rsp_valid_q ),
.dst_ready_i ( bypass_rsp_ready_q ),
.dst_data_o ( bypass_rsp_q )
);
/// Arbitrate cache port
// 1. Request Side
stream_arbiter #(
.DATA_T ( prefetch_req_t ),
.N_INP ( NR_FETCH_PORTS )
) i_stream_arbiter (
.clk_i,
.rst_ni,
.inp_data_i ( prefetch_req ),
.inp_valid_i ( prefetch_req_valid ),
.inp_ready_o ( prefetch_req_ready ),
.oup_data_o ( prefetch_lookup_req ),
.oup_valid_o ( prefetch_lookup_req_valid ),
.oup_ready_i ( prefetch_lookup_req_ready )
);
// 2. Response Side
// This breaks if the pre-fetcher would not alway be ready
// which is the case for the moment
for (genvar i = 0; i < NR_FETCH_PORTS; i++) begin : gen_resp
assign prefetch_rsp[i] = prefetch_lookup_rsp;
// check if one of the ID bits is set
assign prefetch_rsp_valid[i] = ((|((prefetch_rsp[i].id >> 2*i) & 2'b11)) & prefetch_lookup_rsp_valid);
end
assign prefetch_lookup_rsp_ready = |prefetch_rsp_ready;
/// Tag lookup
// The lookup module contains the actual cache RAMs and performs lookups.
logic [CFG.FETCH_AW-1:0] lookup_addr ;
logic [CFG.ID_WIDTH_REQ-1:0] lookup_id ;
logic [CFG.SET_ALIGN-1:0] lookup_set ;
logic lookup_hit ;
logic [CFG.LINE_WIDTH-1:0] lookup_data ;
logic lookup_error ;
logic lookup_valid ;
logic lookup_ready ;
logic [CFG.COUNT_ALIGN-1:0] write_addr ;
logic [CFG.SET_ALIGN-1:0] write_set ;
logic [CFG.LINE_WIDTH-1:0] write_data ;
logic [CFG.TAG_WIDTH-1:0] write_tag ;
logic write_error ;
logic write_valid ;
logic write_ready ;
logic flush_valid, flush_ready;
// We need to propagate the handshake into the other
// clock domain in case we operate w/ different clocks.
if (ISO_CROSSING) begin : gen_flush_crossing
isochronous_spill_register
i_isochronous_4phase_handshake (
.src_clk_i ( clk_d2_i ),
.src_rst_ni ( rst_ni ),
.src_valid_i ( flush_valid_i ),
.src_ready_o ( flush_ready_o ),
.src_data_i ( '0 ),
.dst_clk_i ( clk_i ),
.dst_rst_ni ( rst_ni ),
.dst_valid_o ( flush_valid ),
.dst_ready_i ( flush_ready ),
.dst_data_o ( /* Unused */ )
);
end else begin : gen_no_flush_crossing
assign flush_valid = flush_valid_i;
assign flush_ready_o = flush_ready;
end
snitch_icache_lookup #(CFG) i_lookup (
.clk_i,
.rst_ni,
.flush_valid_i (flush_valid),
.flush_ready_o (flush_ready),
.in_addr_i ( prefetch_lookup_req.addr ),
.in_id_i ( prefetch_lookup_req.id ),
.in_valid_i ( prefetch_lookup_req_valid ),
.in_ready_o ( prefetch_lookup_req_ready ),
.out_addr_o ( lookup_addr ),
.out_id_o ( lookup_id ),
.out_set_o ( lookup_set ),
.out_hit_o ( lookup_hit ),
.out_data_o ( lookup_data ),
.out_error_o ( lookup_error ),
.out_valid_o ( lookup_valid ),
.out_ready_i ( lookup_ready ),
.write_addr_i ( write_addr ),
.write_set_i ( write_set ),
.write_data_i ( write_data ),
.write_tag_i ( write_tag ),
.write_error_i ( write_error ),
.write_valid_i ( write_valid ),
.write_ready_o ( write_ready )
);
// The miss handler module deals with the result of the lookup. It also
// keeps track of the pending refills and ensures that no redundant memory
// requests are made. Upon refill completion, it sends a new tag/data item
// to the lookup module and the received data to the prefetch module.
snitch_icache_handler #(CFG) i_handler (
.clk_i,
.rst_ni,
.in_req_addr_i ( lookup_addr ),
.in_req_id_i ( lookup_id ),
.in_req_set_i ( lookup_set ),
.in_req_hit_i ( lookup_hit ),
.in_req_data_i ( lookup_data ),
.in_req_error_i ( lookup_error ),
.in_req_valid_i ( lookup_valid ),
.in_req_ready_o ( lookup_ready ),
.in_rsp_data_o ( prefetch_lookup_rsp.data ),
.in_rsp_error_o ( prefetch_lookup_rsp.error ),
.in_rsp_id_o ( prefetch_lookup_rsp.id ),
.in_rsp_valid_o ( prefetch_lookup_rsp_valid ),
.in_rsp_ready_i ( prefetch_lookup_rsp_ready ),
.write_addr_o ( write_addr ),
.write_set_o ( write_set ),
.write_data_o ( write_data ),
.write_tag_o ( write_tag ),
.write_error_o ( write_error ),
.write_valid_o ( write_valid ),
.write_ready_i ( write_ready ),
.out_req_addr_o ( handler_req.addr ),
.out_req_id_o ( handler_req.id ),
.out_req_valid_o ( handler_req_valid ),
.out_req_ready_i ( handler_req_ready ),
.out_rsp_data_i ( handler_rsp.data ),
.out_rsp_error_i ( handler_rsp.error ),
.out_rsp_id_i ( handler_rsp.id ),
.out_rsp_valid_i ( handler_rsp_valid ),
.out_rsp_ready_o ( handler_rsp_ready )
);
assign handler_req.bypass = 1'b0;
// Arbitrate between bypass and cache-refills
stream_arbiter #(
.DATA_T ( miss_refill_req_t ),
.N_INP ( 2 )
) i_stream_arbiter_miss_refill (
.clk_i,
.rst_ni,
.inp_data_i ( {bypass_req_q, handler_req} ),
.inp_valid_i ( {bypass_req_valid_q, handler_req_valid} ),
.inp_ready_o ( {bypass_req_ready_q, handler_req_ready} ),
.oup_data_o ( refill_req ),
.oup_valid_o ( refill_req_valid ),
.oup_ready_i ( refill_req_ready )
);
// Response path muxing
stream_demux #(
.N_OUP ( 2 )
) i_stream_demux_miss_refill (
.inp_valid_i ( refill_rsp_valid ),
.inp_ready_o ( refill_rsp_ready ),
.oup_sel_i ( refill_rsp.bypass ),
.oup_valid_o ( {{bypass_rsp_valid, handler_rsp_valid}} ),
.oup_ready_i ( {{bypass_rsp_ready, handler_rsp_ready}} )
);
assign handler_rsp = refill_rsp;
assign bypass_rsp = refill_rsp;
// AXI-like read-only interface
typedef struct packed {
logic [FILL_AW-1:0] addr;
logic [7:0] len;
} extern_req_t;
typedef struct packed {
logic [FILL_DW-1:0] data;
logic error;
logic last;
} extern_rsp_t;
extern_req_t extern_req, extern_req_q;
logic extern_qvalid;
logic extern_qready;
extern_rsp_t extern_rsp, extern_rsp_q;
logic extern_pvalid_q;
logic extern_pready_q;
// Instantiate the cache refill module which emits AXI transactions.
snitch_icache_refill #(CFG) i_refill (
.clk_i,
.rst_ni,
.in_req_addr_i ( refill_req.addr ),
.in_req_id_i ( refill_req.id ),
.in_req_bypass_i ( refill_req.bypass ),
.in_req_valid_i ( refill_req_valid ),
.in_req_ready_o ( refill_req_ready ),
.in_rsp_data_o ( refill_rsp.data ),
.in_rsp_error_o ( refill_rsp.error ),
.in_rsp_id_o ( refill_rsp.id ),
.in_rsp_bypass_o ( refill_rsp.bypass ),
.in_rsp_valid_o ( refill_rsp_valid ),
.in_rsp_ready_i ( refill_rsp_ready ),
.refill_qaddr_o ( extern_req.addr ),
.refill_qlen_o ( extern_req.len ),
.refill_qvalid_o ( extern_qvalid ),
.refill_qready_i ( extern_qready ),
.refill_pdata_i ( extern_rsp_q.data ),
.refill_perror_i ( extern_rsp_q.error ),
.refill_plast_i ( extern_rsp_q.last ),
.refill_pvalid_i ( extern_pvalid_q ),
.refill_pready_o ( extern_pready_q )
);
// Insert Slices.
spill_register #(.T(extern_req_t)) i_spill_register_req (
.clk_i,
.rst_ni,
.valid_i ( extern_qvalid ),
.ready_o ( extern_qready ),
.data_i ( extern_req ),
// Q Output
.valid_o ( refill_qvalid_o ),
.ready_i ( refill_qready_i ),
.data_o ( extern_req_q )
);
assign refill_qaddr_o = extern_req_q.addr;
assign refill_qlen_o = extern_req_q.len;
spill_register #(.T(extern_rsp_t)) i_spill_register_resp (
.clk_i,
.rst_ni,
.valid_i ( refill_pvalid_i ),
.ready_o ( refill_pready_o ),
.data_i ( extern_rsp ),
// Q Output
.valid_o ( extern_pvalid_q ),
.ready_i ( extern_pready_q ),
.data_o ( extern_rsp_q )
);
assign extern_rsp.data = refill_pdata_i;
assign extern_rsp.error = refill_perror_i;
assign extern_rsp.last = refill_plast_i;
endmodule
// Translate register interface to refill requests.
// Used for bypassable accesses.
module l0_to_bypass #(
parameter snitch_icache_pkg::config_t CFG = '0
) (
input logic clk_i,
input logic rst_ni,
input logic [CFG.NR_FETCH_PORTS-1:0] in_valid_i,
output logic [CFG.NR_FETCH_PORTS-1:0] in_ready_o,
input logic [CFG.NR_FETCH_PORTS-1:0][CFG.FETCH_AW-1:0] in_addr_i,
output logic [CFG.NR_FETCH_PORTS-1:0][CFG.FETCH_DW-1:0] in_data_o,
output logic [CFG.NR_FETCH_PORTS-1:0] in_error_o,
output logic [CFG.FETCH_AW-1:0] refill_req_addr_o,
output logic refill_req_bypass_o,
output logic refill_req_valid_o,
input logic refill_req_ready_i,
input logic [CFG.LINE_WIDTH-1:0] refill_rsp_data_i,
input logic refill_rsp_error_i,
input logic refill_rsp_valid_i,
output logic refill_rsp_ready_o
);
assign refill_req_bypass_o = 1'b1;
logic [CFG.NR_FETCH_PORTS-1:0] in_valid;
logic [CFG.NR_FETCH_PORTS-1:0] in_ready;
enum logic [1:0] {
Idle, RequestData, WaitResponse, PresentResponse
} state_d [CFG.NR_FETCH_PORTS-1:0], state_q [CFG.NR_FETCH_PORTS-1:0];
// Mask address so that it is aligned to the cache-line width.
logic [CFG.NR_FETCH_PORTS-1:0][CFG.FETCH_AW-1:0] in_addr_masked;
for (genvar i = 0; i < CFG.NR_FETCH_PORTS; i++) begin
assign in_addr_masked[i] = in_addr_i[i] >> CFG.LINE_ALIGN << CFG.LINE_ALIGN;
end
stream_arbiter #(
.DATA_T ( logic [CFG.FETCH_AW-1:0] ),
.N_INP ( CFG.NR_FETCH_PORTS )
) i_stream_arbiter (
.clk_i,
.rst_ni,
.inp_data_i ( in_addr_masked ),
.inp_valid_i ( in_valid ),
.inp_ready_o ( in_ready ),
.oup_data_o ( refill_req_addr_o ),
.oup_valid_o ( refill_req_valid_o ),
.oup_ready_i ( refill_req_ready_i )
);
localparam int unsigned NR_FETCH_PORTS_BIN = CFG.NR_FETCH_PORTS == 1 ? 1 : $clog2(CFG.NR_FETCH_PORTS);
logic [CFG.NR_FETCH_PORTS-1:0] rsp_fifo_mux;
logic [NR_FETCH_PORTS_BIN-1:0] onehot_mux;
logic [CFG.NR_FETCH_PORTS-1:0] rsp_fifo_pop;
logic rsp_fifo_full;
logic [CFG.NR_FETCH_PORTS-1:0] rsp_valid;
logic [CFG.NR_FETCH_PORTS-1:0] rsp_ready;
fifo_v3 #(
.DATA_WIDTH ( CFG.NR_FETCH_PORTS ),
.DEPTH ( 4 )
) rsp_fifo (
.clk_i,
.rst_ni,
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( rsp_fifo_full ),
.empty_o ( ),
.usage_o ( ),
.data_i ( {in_valid & in_ready} ),
.push_i ( |{in_valid & in_ready}),
.data_o ( rsp_fifo_mux ),
.pop_i ( |rsp_fifo_pop )
);
onehot_to_bin #(
.ONEHOT_WIDTH (CFG.NR_FETCH_PORTS)
) i_onehot_to_bin (
.onehot (rsp_fifo_mux),
.bin (onehot_mux)
);
assign rsp_ready = '1;
stream_demux #(
.N_OUP ( CFG.NR_FETCH_PORTS )
) i_stream_mux_miss_refill (
.inp_valid_i ( refill_rsp_valid_i ),
.inp_ready_o ( refill_rsp_ready_o ),
.oup_sel_i ( onehot_mux ),
.oup_valid_o ( rsp_valid ),
.oup_ready_i ( rsp_ready )
);
for (genvar i = 0; i < CFG.NR_FETCH_PORTS; i++) begin : gen_bypass_request
always_comb begin
state_d[i] = state_q[i];
in_ready_o[i] = 1'b0;
rsp_fifo_pop[i] = 1'b0;
in_valid[i] = 1'b0;
unique case (state_q[i])
// latch data when idle
Idle: if (in_valid_i[i]) state_d[i] = RequestData;
RequestData: begin
// check that there is still space for the response to be accepted.
if (!rsp_fifo_full) begin
in_valid[i] = 1'b1;
if (in_ready[i]) state_d[i] = WaitResponse;
end
end
WaitResponse: begin
if (rsp_valid[i]) begin
rsp_fifo_pop[i] = 1'b1;
state_d[i] = PresentResponse;
end
end
// The response will be served from the register and is valid for one cycle.
PresentResponse: begin
state_d[i] = Idle;
in_ready_o[i] = 1'b1;
end
default:;
endcase
end
logic [CFG.FILL_DW-1:0] fill_rsp_data;
assign fill_rsp_data = refill_rsp_data_i >> (in_addr_i[i][CFG.LINE_ALIGN-1:CFG.FETCH_ALIGN] * CFG.FETCH_DW);
`FFLNR({in_data_o[i], in_error_o[i]}, {fill_rsp_data[CFG.FETCH_DW-1:0], refill_rsp_error_i}, rsp_valid[i], clk_i)
end
`FF(state_q, state_d, '{default: Idle})
endmodule
// Copyright 2020 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
module snitch_icache_handler #(
parameter snitch_icache_pkg::config_t CFG = '0
)(
input logic clk_i,
input logic rst_ni,
input logic [CFG.FETCH_AW-1:0] in_req_addr_i,
input logic [CFG.ID_WIDTH_REQ-1:0] in_req_id_i,
input logic [CFG.SET_ALIGN-1:0] in_req_set_i,
input logic in_req_hit_i,
input logic [CFG.LINE_WIDTH-1:0] in_req_data_i,
input logic in_req_error_i,
input logic in_req_valid_i,
output logic in_req_ready_o,
output logic [CFG.LINE_WIDTH-1:0] in_rsp_data_o,
output logic in_rsp_error_o,
output logic [CFG.ID_WIDTH_RESP-1:0] in_rsp_id_o,
output logic in_rsp_valid_o,
input logic in_rsp_ready_i,
output logic [CFG.COUNT_ALIGN-1:0] write_addr_o,
output logic [CFG.SET_ALIGN-1:0] write_set_o,
output logic [CFG.LINE_WIDTH-1:0] write_data_o,
output logic [CFG.TAG_WIDTH-1:0] write_tag_o,
output logic write_error_o,
output logic write_valid_o,
input logic write_ready_i,
output logic [CFG.FETCH_AW-1:0] out_req_addr_o,
output logic [CFG.PENDING_IW-1:0] out_req_id_o,
output logic out_req_valid_o,
input logic out_req_ready_i,
input logic [CFG.LINE_WIDTH-1:0] out_rsp_data_i,
input logic out_rsp_error_i,
input logic [CFG.PENDING_IW-1:0] out_rsp_id_i,
input logic out_rsp_valid_i,
output logic out_rsp_ready_o
);
`ifndef SYNTHESIS
initial assert(CFG != '0);
`endif
// The table of pending refills holds the metadata of all refills that are
// currently in flight. The table has a push and a pop interfaces. The push
// interface is used to mark entries as valid and update the mask of request
// IDs that the refill will serve. The pop interface is used to read a value
// from the table and clear its valid flag.
typedef struct packed {
logic valid;
logic [CFG.FETCH_AW-1:0] addr;
logic [CFG.ID_WIDTH_RESP-1:0] idmask; // mask of incoming ids
} pending_t;
pending_t pending_q [CFG.PENDING_COUNT];
logic [CFG.PENDING_COUNT-1:0] pending_clr;
logic [CFG.PENDING_COUNT-1:0] pending_set;
logic [CFG.PENDING_IW-1:0] push_index;
logic push_init; // reset the idmask instead of or'ing
logic [CFG.FETCH_AW-1:0] push_addr;
logic [CFG.ID_WIDTH_RESP-1:0] push_idmask;
logic push_enable;
logic [CFG.PENDING_IW-1:0] pop_index;
logic [CFG.FETCH_AW-1:0] pop_addr;
logic [CFG.ID_WIDTH_RESP-1:0] pop_idmask;
logic pop_enable;
for (genvar i = 0; i < CFG.PENDING_COUNT; i++) begin : g_pending_row
always_ff @(posedge clk_i, negedge rst_ni) begin
if (!rst_ni)
pending_q[i].valid <= 0;
else if (pending_set[i] || pending_clr[i])
pending_q[i].valid <= pending_set[i] && ~pending_clr[i];
end
always_ff @(posedge clk_i, negedge rst_ni) begin
if (!rst_ni) begin
pending_q[i].addr <= '0;
pending_q[i].idmask <= '0;
end else if (pending_set[i]) begin
pending_q[i].addr <= push_addr;
pending_q[i].idmask <= push_init ? push_idmask : push_idmask | pending_q[i].idmask;
end
end
end
// The bypass logic ensures that if a table entry is pushed and popped at
// the same time, the pop is updated with the push information and the push
// discarded.
always_comb begin : p_pushpop_bypass
pending_set = push_enable ? 'b1 << push_index : '0;
pending_clr = pop_enable ? 'b1 << pop_index : '0;
pop_addr = pending_q[pop_index].addr;
pop_idmask = pending_q[pop_index].idmask;
if (push_enable && pop_enable && push_index == pop_index) begin
pop_addr = push_addr;
pop_idmask |= push_idmask;
end
end
// Determine the first available entry in the pending table, if any is free.
logic [CFG.PENDING_COUNT-1:0] free_entries;
logic free;
logic [CFG.PENDING_IW-1:0] free_id;
always_comb begin : p_free_id
for (int i = 0; i < CFG.PENDING_COUNT; i++)
free_entries[i] = ~pending_q[i].valid;
free = |free_entries;
end
lzc #(.WIDTH(CFG.PENDING_COUNT)) i_lzc_free (
.in_i ( free_entries ),
.cnt_o ( free_id ),
.empty_o ( )
);
// Determine if the address of the incoming request coincides with any of
// the entries in the pending table.
logic [CFG.PENDING_COUNT-1:0] pending_matches;
logic pending;
logic [CFG.PENDING_IW-1:0] pending_id;
always_comb begin : p_pending_id
for (int i = 0; i < CFG.PENDING_COUNT; i++)
pending_matches[i] = pending_q[i].valid && pending_q[i].addr == in_req_addr_i;
pending = |pending_matches;
end
lzc #(.WIDTH(CFG.PENDING_COUNT)) i_lzc_pending (
.in_i ( pending_matches ),
.cnt_o ( pending_id ),
.empty_o ( )
);
// The miss handler checks if the access into the cache was a hit. If yes,
// the data is forwarded to the response handler. Otherwise the table of
// pending refills is consulted to check if any refills are currently in
// progress which cover the request. If not, a new refill request is issued
// and the next free entry in the table allocated. Otherwise the existing
// table entry is updated.
logic [CFG.ID_WIDTH_RESP-1:0] hit_id;
logic [CFG.LINE_WIDTH-1:0] hit_data;
logic hit_error;
logic hit_valid;
logic hit_ready;
always_comb begin : p_miss_handler
hit_valid = 0;
hit_id = 'b1 << in_req_id_i;
hit_data = in_req_data_i;
hit_error = in_req_error_i;
push_index = free_id;
push_init = 0;
push_addr = in_req_addr_i;
push_idmask = 'b1 << in_req_id_i;
push_enable = 0;
in_req_ready_o = 1;
out_req_addr_o = in_req_addr_i;
out_req_id_o = free_id;
out_req_valid_o = 0;
if (in_req_valid_i) begin
// The cache lookup was a hit.
if (in_req_hit_i) begin
hit_valid = 1;
in_req_ready_o = hit_ready;
// The cache lookup was a miss, but there is already a pending
// refill that covers the line.
end else if (pending) begin
push_index = pending_id;
push_enable = 1;
// The cache lookup was a miss, there is no pending refill, but
// there are available entries in the table.
end else if (free) begin
out_req_addr_o = in_req_addr_i;
out_req_id_o = free_id;
out_req_valid_o = 1;
in_req_ready_o = out_req_ready_i;
push_index = free_id;
push_init = 1;
push_enable = out_req_ready_i;
// The cache lookup was a miss, there is no pending refill, and
// there is no room in the table for a new refill at the moment.
end else begin
in_req_ready_o = 0;
end
end
end
// The cache line eviction LFSR is responsible for picking a cache line for
// replacement at random. Note that we assume that the entire cache is full,
// so no empty cache lines are available. This is the common case since we
// do not support flushing of the cache.
logic [CFG.SET_ALIGN-1:0] evict_index;
logic evict_enable;
snitch_icache_lfsr #(CFG.SET_ALIGN) i_evict_lfsr (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.value_o ( evict_index ),
.enable_i ( evict_enable )
);
// The response handler deals with incoming refill responses. It queries and
// clears the corresponding entry in the pending table, stores the data in
// the cache via the `write` port, and returns the data to the appropriate
// fetch ports via the `in_rsp` port. It also mixes the data of a cache hit
// into the response stream.
logic write_served_q;
logic in_rsp_served_q;
logic rsp_valid, rsp_ready;
struct packed {
logic sel;
logic lock;
} arb_q, arb_d;
always_ff @(posedge clk_i, negedge rst_ni) begin
if (!rst_ni)
arb_q <= '0;
else
arb_q <= arb_d;
end
always_comb begin : p_response_handler
pop_index = out_rsp_id_i;
pop_enable = 0;
write_addr_o = pop_addr >> CFG.LINE_ALIGN;
write_set_o = evict_index;
write_data_o = out_rsp_data_i;
write_tag_o = pop_addr >> (CFG.LINE_ALIGN + CFG.COUNT_ALIGN);
write_error_o = out_rsp_error_i;
write_valid_o = 0;
in_rsp_data_o = out_rsp_data_i;
in_rsp_error_o = out_rsp_error_i;
in_rsp_id_o = pop_idmask;
in_rsp_valid_o = 0;
hit_ready = 1;
out_rsp_ready_o = 1;
evict_enable = 0;
rsp_valid = 0;
rsp_ready = 1;
arb_d = arb_q;
if (!arb_q.lock) begin
if (hit_valid) begin
arb_d.sel = 0;
arb_d.lock = 1;
end else if (out_rsp_valid_i) begin
arb_d.sel = 1;
arb_d.lock = 1;
end else begin
arb_d.sel = 0;
arb_d.lock = 0;
end
end
// Cache hit data is pending.
if (arb_d.sel == 0) begin
if (hit_valid) begin
out_rsp_ready_o = 0;
in_rsp_data_o = hit_data;
in_rsp_error_o = 0;
in_rsp_id_o = hit_id;
in_rsp_valid_o = 1;
hit_ready = in_rsp_ready_i;
end else hit_ready = 1;
if (hit_ready) arb_d.lock = 0;
// No cache hit is pending, but response data is available.
end else if (arb_d.sel == 1) begin
if (out_rsp_valid_i) begin
rsp_valid = 1;
rsp_ready = (in_rsp_ready_i || in_rsp_served_q) && (write_ready_i || write_served_q);
write_valid_o = 1 && ~write_served_q;
in_rsp_valid_o = 1 && ~in_rsp_served_q;
pop_enable = rsp_ready;
out_rsp_ready_o = rsp_ready;
evict_enable = rsp_ready;
end else rsp_ready = 1;
if (rsp_ready) arb_d.lock = 0;
end
end
always_ff @(posedge clk_i, negedge rst_ni) begin
if (!rst_ni) begin
write_served_q <= 0;
in_rsp_served_q <= 0;
end else begin
write_served_q <= rsp_valid & ~rsp_ready & (write_served_q | write_ready_i);
in_rsp_served_q <= rsp_valid & ~rsp_ready & (in_rsp_served_q | in_rsp_ready_i);
end
end
endmodule
// Copyright 2020 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
// Florian Zaruba <zarubaf@iis.ee.ethz.ch>
`include "common_cells/registers.svh"
`include "common_cells/assertions.svh"
/// A simple single-line cache private to each port.
module snitch_icache_l0 import snitch_icache_pkg::*; #(
parameter config_t CFG = '0,
parameter int unsigned L0_ID = 0
) (
input logic clk_i,
input logic rst_ni,
input logic flush_valid_i,
input logic enable_prefetching_i,
output icache_events_t icache_events_o,
input logic [CFG.FETCH_AW-1:0] in_addr_i,
input logic in_valid_i,
output logic [CFG.FETCH_DW-1:0] in_data_o,
output logic in_ready_o,
output logic in_error_o,
output logic [CFG.FETCH_AW-1:0] out_req_addr_o,
output logic [CFG.ID_WIDTH_REQ-1:0] out_req_id_o,
output logic out_req_valid_o,
input logic out_req_ready_i,
input logic [CFG.LINE_WIDTH-1:0] out_rsp_data_i,
input logic out_rsp_error_i,
input logic [CFG.ID_WIDTH_RESP-1:0] out_rsp_id_i,
input logic out_rsp_valid_i,
output logic out_rsp_ready_o
);
typedef logic [CFG.FETCH_AW-1:0] addr_t;
typedef struct packed {
logic [CFG.L0_TAG_WIDTH-1:0] tag;
logic vld;
} tag_t;
logic [CFG.L0_TAG_WIDTH-1:0] addr_tag, addr_tag_prefetch;
tag_t [CFG.L0_LINE_COUNT-1:0] tag;
logic [CFG.L0_LINE_COUNT-1:0][CFG.LINE_WIDTH-1:0] data;
logic [CFG.L0_LINE_COUNT-1:0] hit, hit_early, hit_prefetch;
logic hit_early_is_onehot;
logic hit_any;
logic hit_prefetch_any;
logic miss;
logic [CFG.L0_LINE_COUNT-1:0] evict_strb;
logic [CFG.L0_LINE_COUNT-1:0] flush_strb;
logic [CFG.L0_LINE_COUNT-1:0] validate_strb;
typedef struct packed {
logic vld;
logic [CFG.FETCH_AW-1:0] addr;
} prefetch_req_t;
logic latch_prefetch, last_cycle_was_prefetch_q;
prefetch_req_t prefetch_req_q, prefetch_req_d, prefetcher_out;
// Holds the onehot signal for the line being refilled at the moment
logic [CFG.L0_LINE_COUNT-1:0] pending_line_refill_q;
logic pending_refill_q, pending_refill_d;
logic evict_req;
logic last_cycle_was_miss_q;
`FF(last_cycle_was_miss_q, miss, '0)
`FF(last_cycle_was_prefetch_q, latch_prefetch, '0)
logic evict_because_miss, evict_because_prefetch;
typedef struct packed {
logic is_prefetch;
logic [CFG.FETCH_AW-1:0] addr;
} req_t;
req_t refill, prefetch;
logic refill_valid, prefetch_valid;
logic refill_ready, prefetch_ready;
req_t out_req;
assign evict_because_miss = miss & ~last_cycle_was_miss_q;
assign evict_because_prefetch = latch_prefetch & ~last_cycle_was_prefetch_q;
assign evict_req = evict_because_miss | evict_because_prefetch;
assign addr_tag = in_addr_i >> CFG.LINE_ALIGN;
// ------------
// Tag Compare
// ------------
for (genvar i = 0; i < CFG.L0_LINE_COUNT; i++) begin : gen_cmp_fetch
assign hit_early[i] = tag[i].vld & (tag[i].tag[CFG.L0_EARLY_TAG_WIDTH-1:0] == addr_tag[CFG.L0_EARLY_TAG_WIDTH-1:0]);
// The two signals calculate the same.
if (CFG.L0_TAG_WIDTH == CFG.L0_EARLY_TAG_WIDTH) begin : gen_hit_assign
assign hit[i] = hit_early[i];
// Compare the rest of the tag.
end else begin : gen_hit
assign hit[i] = hit_early[i] & (tag[i].tag[CFG.L0_TAG_WIDTH-1:CFG.L0_EARLY_TAG_WIDTH] == addr_tag[CFG.L0_TAG_WIDTH-1:CFG.L0_EARLY_TAG_WIDTH]);
end
assign hit_prefetch[i] = tag[i].vld & (tag[i].tag == addr_tag_prefetch);
end
assign hit_any = |hit;
assign hit_prefetch_any = |hit_prefetch;
assign miss = ~hit_any & in_valid_i & ~pending_refill_q;
logic clk_inv;
tc_clk_inverter i_clk_inv (
.clk_i (clk_i),
.clk_o (clk_inv)
);
for (genvar i = 0; i < CFG.L0_LINE_COUNT; i++) begin : gen_array
// Tag Array
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
tag[i].vld <= 0;
tag[i].tag <= 0;
end else begin
if (evict_strb[i]) begin
tag[i].vld <= 1'b0;
tag[i].tag <= evict_because_prefetch ? addr_tag_prefetch : addr_tag;
end else if (validate_strb[i]) begin
tag[i].vld <= 1'b1;
end
if (flush_strb[i]) begin
tag[i].vld <= 1'b0;
end
end
end
if (CFG.EARLY_LATCH) begin : gen_latch
logic clk_vld;
tc_clk_gating i_clk_gate (
.clk_i (clk_inv ),
.en_i (validate_strb[i]),
.test_en_i (1'b0 ),
.clk_o (clk_vld )
);
// Data Array
/* verilator lint_off NOLATCH */
always_latch begin
if (clk_vld) begin
data[i] <= out_rsp_data_i;
end
end
/* verilator lint_on NOLATCH */
end else begin : gen_ff
`FFLNR(data[i], out_rsp_data_i, validate_strb[i], clk_i)
end
end
// ----
// HIT
// ----
// we hit in the cache and there was a unique hit.
assign in_ready_o = hit_any & hit_early_is_onehot;
logic [CFG.LINE_WIDTH-1:0] ins_data;
always_comb begin : data_muxer
ins_data = '0;
for (int unsigned i = 0; i < CFG.L0_LINE_COUNT; i++) begin
ins_data |= {CFG.LINE_WIDTH{hit_early[i]}} & data[i];
end
in_data_o = ins_data >> (in_addr_i[CFG.LINE_ALIGN-1:CFG.FETCH_ALIGN] * CFG.FETCH_DW);
end
// Check whether we had an early multi-hit (e.g., the portion of the tag matched
// multiple entries in the tag array)
if (CFG.L0_TAG_WIDTH != CFG.L0_EARLY_TAG_WIDTH) begin : gen_multihit_detection
onehot #(
.Width (CFG.L0_LINE_COUNT)
) i_onehot_hit_early (
.d_i (hit_early),
.is_onehot_o (hit_early_is_onehot)
);
end else begin : gen_no_multihit_detection
assign hit_early_is_onehot = 1'b1;
end
// -------
// Evictor
// -------
logic [$clog2(CFG.L0_LINE_COUNT)-1:0] cnt_d, cnt_q;
always_comb begin : evictor
evict_strb = '0;
cnt_d = cnt_q;
// Round-Robin
if (evict_req) begin
evict_strb = 1 << cnt_q;
cnt_d = cnt_q + 1;
if (evict_strb == hit_early) begin
evict_strb = 1 << cnt_d;
cnt_d = cnt_q + 2;
end
end
end
always_comb begin : flush
flush_strb = '0;
// Check whether we encountered a multi-hit condition and
// evict the offending entry.
if (hit_any && !hit_early_is_onehot) begin
// We want to evict all entries which hit with the early tag
// but didn't hit in the final comparison.
flush_strb = ~hit & hit_early;
end
if (flush_valid_i) flush_strb = '1;
end
`FF(cnt_q, cnt_d, '0)
// -------------
// Miss Handling
// -------------
assign refill.addr = addr_tag << CFG.LINE_ALIGN;
assign refill.is_prefetch = 1'b0;
assign refill_valid = miss;
`FFLNR(pending_line_refill_q, evict_strb, evict_req, clk_i)
`FF(pending_refill_q, pending_refill_d, '0)
always_comb begin
pending_refill_d = pending_refill_q;
// re-set condition
if (pending_refill_q) begin
if (out_rsp_valid_i & out_rsp_ready_o) begin
pending_refill_d = 1'b0;
end
// set condition
end else begin
if (refill_valid && refill_ready) begin
pending_refill_d = 1'b1;
end
if (latch_prefetch) begin
pending_refill_d = 1'b1;
end
end
end
assign validate_strb = out_rsp_valid_i ? pending_line_refill_q : '0;
assign out_rsp_ready_o = 1'b1;
assign in_error_o = '0;
assign out_req_addr_o = out_req.addr;
assign out_req_id_o = {L0_ID, out_req.is_prefetch};
// Priority arbitrate requests.
always_comb begin
out_req = prefetch;
out_req_valid_o = prefetch_valid;
prefetch_ready = out_req_ready_i;
refill_ready = 1'b0;
if (refill_valid) begin
out_req_valid_o = refill_valid;
out_req = refill;
refill_ready = out_req_ready_i;
prefetch_ready = 1'b0;
end
end
// -------------
// Pre-fetching
// -------------
// Generate a prefetch request if the cache hits and we haven't
// pre-fetched the line yet and there is no other refill in progress.
assign prefetcher_out.vld = enable_prefetching_i & hit_any & ~hit_prefetch_any & ~pending_refill_q;
localparam FETCH_PKTS = CFG.LINE_WIDTH/32;
logic [FETCH_PKTS-1:0] is_branch_taken;
logic [FETCH_PKTS-1:0] is_jal;
logic [FETCH_PKTS-1:0] mask;
// make sure that we only look at the packets which are of interest to
assign mask = '1 << in_addr_i[CFG.LINE_ALIGN-1:2];
// Instruction aware pre-fetching
for (genvar i = 0; i < FETCH_PKTS; i++) begin : gen_pre_decode
// iterate over the fetch packets (32 bits per instruction)
always_comb begin
is_branch_taken[i] = 1'b0;
is_jal[i] = 1'b0;
if (hit_early_is_onehot) begin
unique casez (ins_data[i*32+:32])
// static prediction
riscv_instr::BEQ,
riscv_instr::BNE,
riscv_instr::BLT,
riscv_instr::BGE,
riscv_instr::BLTU,
riscv_instr::BGEU: begin
// look at the sign bit of the immediate field
// backward branches (immediate negative) taken
// forward branches not taken
is_branch_taken[i] = ins_data[i*32+31];
end
riscv_instr::JAL: begin
is_jal[i] = 1'b1;
end
// we can't do anything about the JALR case as we don't
// know the destination.
default:;
endcase
end
end
end
logic [$clog2(FETCH_PKTS)-1:0] taken_idx;
logic no_prefetch;
logic [$clog2(CFG.LINE_WIDTH)-1:0] ins_idx;
assign ins_idx = 32*taken_idx;
// Find first taken branch
lzc #(
.WIDTH(FETCH_PKTS),
.MODE(0)
) i_lzc_branch (
// look at branches and jals
.in_i (mask & (is_branch_taken | is_jal)),
.cnt_o (taken_idx),
.empty_o (no_prefetch)
);
addr_t base_addr, offset, uj_imm, sb_imm;
logic [CFG.LINE_ALIGN-1:0] base_offset;
assign base_offset = taken_idx << 2;
assign uj_imm = $signed({ins_data[ins_idx+31], ins_data[ins_idx+12+:8], ins_data[ins_idx+20], ins_data[ins_idx+21+:10], 1'b0});
assign sb_imm = $signed({ins_data[ins_idx+31], ins_data[ins_idx+7], ins_data[ins_idx+25+:6], ins_data[ins_idx+8+:4], 1'b0});
// next address calculation
always_comb begin
// default is next line predictor
base_addr = no_prefetch ? in_addr_i : {in_addr_i >> CFG.LINE_ALIGN, base_offset};
offset = (1 << CFG.LINE_ALIGN);
// If the cache-line contains a taken branch, compute the pre-fetch address with the jump's offset.
unique case ({is_branch_taken[taken_idx] & ~no_prefetch, is_jal[taken_idx] & ~no_prefetch})
// JAL: UJ Immediate
2'b01: offset = uj_imm;
// Branch: // SB Immediate
2'b10: offset = sb_imm;
default:;
endcase
end
assign prefetcher_out.addr = ($signed(base_addr) + offset) >> CFG.LINE_ALIGN << CFG.LINE_ALIGN;
// check whether cache-line we want to pre-fetch is already present
assign addr_tag_prefetch = prefetcher_out.addr >> CFG.LINE_ALIGN;
assign latch_prefetch = prefetcher_out.vld & ~prefetch_req_q.vld;
always_comb begin
prefetch_req_d = prefetch_req_q;
if (prefetch_ready) prefetch_req_d.vld = 1'b0;
if (latch_prefetch) begin
prefetch_req_d.vld = 1'b1;
prefetch_req_d.addr = prefetcher_out.addr;
end
end
assign prefetch.is_prefetch = 1'b1;
assign prefetch.addr = prefetch_req_q.addr;
assign prefetch_valid = prefetch_req_q.vld;
`FF(prefetch_req_q.vld, prefetch_req_d.vld, '0)
`FF(prefetch_req_q.addr, prefetch_req_d.addr, '0)
// ------------------
// Performance Events
// ------------------
always_comb begin
icache_events_o = '0;
icache_events_o.l0_miss = miss;
icache_events_o.l0_hit = hit_any & in_valid_i;
icache_events_o.l0_prefetch = prefetcher_out.vld;
icache_events_o.l0_double_hit = hit_any & ~hit_early_is_onehot & in_valid_i;
end
// ----------
// Assertions
// ----------
`ASSERT(HitOnehot, $onehot0(hit))
// make sure only one signal is high and the conditions are mutual exclusive
`ASSERT(ExclusiveEvict, $onehot0({evict_because_miss, evict_because_prefetch}))
// request must be stable
`ASSERT(InstReqStable, in_valid_i && !in_ready_o |=> in_valid_i)
`ASSERT(InstReqDataStable, in_valid_i && !in_ready_o |=> $stable(in_addr_i))
`ASSERT(RefillReqStable, out_req_valid_o && !out_req_ready_i |=> out_req_valid_o)
`ASSERT(RefillReqDataStable, out_req_valid_o && !out_req_ready_i |=> $stable(out_req_addr_o) && $stable(out_req_id_o))
`ASSERT(RefillRspStable, out_rsp_valid_i && !out_rsp_ready_o |=> out_rsp_valid_i)
`ASSERT(RefillRspDataStable, out_rsp_valid_i && !out_rsp_ready_o |=> $stable(out_rsp_data_i) && $stable(out_rsp_error_i) && $stable(out_rsp_id_i))
// make sure we observe a double hit condition
`COVER(HitEarlyNotOnehot, hit |-> $onehot(hit_early))
endmodule
// Copyright 2020 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
/// A linear feedback shift register.
///
/// The register provides a maximum length sequence for N <= 32. For larger N,
/// multiple LFSR are instantiated. Note that the generated sequence is forced
/// to include the value 0, making it length 2**N instead of the usual 2**N-1.
module snitch_icache_lfsr #(
parameter int N = -1
)(
input logic clk_i,
input logic rst_ni,
output logic [N-1:0] value_o,
input logic enable_i
);
`ifndef SYNTHESIS
initial assert(N > 0);
`endif
if (N > 32) begin : g_split
localparam int N0 = N/2;
localparam int N1 = N-N0;
snitch_icache_lfsr #(N0) i_lo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.value_o ( value_o[N0-1:0] ),
.enable_i ( enable_i )
);
snitch_icache_lfsr #(N1) i_hi (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.value_o ( value_o[N-1:N0] ),
.enable_i ( enable_i && value_o[N0-1:0] == 0 )
);
end else if (N == 1) begin : g_toggle
logic q;
always_ff @(posedge clk_i, negedge rst_ni) begin
if (!rst_ni)
q <= 0;
else if (enable_i)
q <= ~q;
end
assign value_o = q;
end else begin : g_impl
logic [N-1:0] q, d, taps;
assign value_o = q;
always_ff @(posedge clk_i, negedge rst_ni) begin
if (!rst_ni)
q <= 0;
else if (enable_i)
q <= d;
end
always_comb begin
if (q == '0) begin
d = '1;
end else begin
d = {1'b0, q[N-1:1]};
if (q[0]) d ^= taps;
if (d == '1) d = '0;
end
end
// A lookup table for the taps.
always_comb begin
taps = 1 << (N-1);
case (N)
2: taps = $unsigned( 1<< 1 | 1<< 0 );
3: taps = $unsigned( 1<< 2 | 1<< 1 );
4: taps = $unsigned( 1<< 3 | 1<< 2 );
5: taps = $unsigned( 1<< 4 | 1<< 2 );
6: taps = $unsigned( 1<< 5 | 1<< 4 );
7: taps = $unsigned( 1<< 6 | 1<< 5 );
8: taps = $unsigned( 1<< 7 | 1<< 5 | 1<< 4 | 1<< 3 );
9: taps = $unsigned( 1<< 8 | 1<< 4 );
10: taps = $unsigned( 1<< 9 | 1<< 6 );
11: taps = $unsigned( 1<<10 | 1<< 8 );
12: taps = $unsigned( 1<<11 | 1<<10 | 1<< 9 | 1<< 3 );
13: taps = $unsigned( 1<<12 | 1<<11 | 1<<10 | 1<< 7 );
14: taps = $unsigned( 1<<13 | 1<<12 | 1<<11 | 1<< 1 );
15: taps = $unsigned( 1<<14 | 1<<13 );
16: taps = $unsigned( 1<<15 | 1<<14 | 1<<12 | 1<< 3 );
17: taps = $unsigned( 1<<16 | 1<<13 );
18: taps = $unsigned( 1<<17 | 1<<10 );
19: taps = $unsigned( 1<<18 | 1<<17 | 1<<16 | 1<<13 );
20: taps = $unsigned( 1<<19 | 1<<16 );
21: taps = $unsigned( 1<<20 | 1<<18 );
22: taps = $unsigned( 1<<21 | 1<<20 );
23: taps = $unsigned( 1<<22 | 1<<17 );
24: taps = $unsigned( 1<<23 | 1<<22 | 1<<21 | 1<<16 );
25: taps = $unsigned( 1<<24 | 1<<21 );
26: taps = $unsigned( 1<<25 | 1<< 5 | 1<< 1 | 1<< 0 );
27: taps = $unsigned( 1<<26 | 1<< 4 | 1<< 1 | 1<< 0 );
28: taps = $unsigned( 1<<27 | 1<<24 );
29: taps = $unsigned( 1<<28 | 1<<26 );
30: taps = $unsigned( 1<<29 | 1<< 5 | 1<< 3 | 1<< 0 );
31: taps = $unsigned( 1<<30 | 1<<27 );
32: taps = $unsigned( 1<<31 | 1<<21 | 1<< 1 | 1<< 0 );
endcase;
end
end
endmodule
// Copyright 2021 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
// Samuel Riedel <sriedel@iis.ee.ethz.ch>
`include "common_cells/registers.svh"
/// An actual cache lookup.
module snitch_icache_lookup #(
parameter snitch_icache_pkg::config_t CFG = '0
)(
input logic clk_i,
input logic rst_ni,
input logic flush_valid_i,
output logic flush_ready_o,
input logic [CFG.FETCH_AW-1:0] in_addr_i,
input logic [CFG.ID_WIDTH_REQ-1:0] in_id_i,
input logic in_valid_i,
output logic in_ready_o,
output logic [CFG.FETCH_AW-1:0] out_addr_o,
output logic [CFG.ID_WIDTH_REQ-1:0] out_id_o,
output logic [CFG.SET_ALIGN-1:0] out_set_o,
output logic out_hit_o,
output logic [CFG.LINE_WIDTH-1:0] out_data_o,
output logic out_error_o,
output logic out_valid_o,
input logic out_ready_i,
input logic [CFG.COUNT_ALIGN-1:0] write_addr_i,
input logic [CFG.SET_ALIGN-1:0] write_set_i,
input logic [CFG.LINE_WIDTH-1:0] write_data_i,
input logic [CFG.TAG_WIDTH-1:0] write_tag_i,
input logic write_error_i,
input logic write_valid_i,
output logic write_ready_o
);
localparam int unsigned DATA_ADDR_WIDTH = $clog2(CFG.SET_COUNT) + CFG.COUNT_ALIGN;
`ifndef SYNTHESIS
initial assert(CFG != '0);
`endif
logic [CFG.COUNT_ALIGN:0] init_count_q;
logic init_phase;
// We are always ready to flush
assign flush_ready_o = 1'b1;
assign init_phase = init_count_q != $unsigned(CFG.LINE_COUNT);
// Initialization and flush FSM
always_ff @(posedge clk_i, negedge rst_ni) begin
if (!rst_ni)
init_count_q <= '0;
else if (init_count_q != $unsigned(CFG.LINE_COUNT))
init_count_q <= init_count_q + 1;
else if (flush_valid_i)
init_count_q <= '0;
end
// --------------------------------------------------
// Tag stage
// --------------------------------------------------
typedef struct packed {
logic [CFG.FETCH_AW-1:0] addr;
logic [CFG.ID_WIDTH_REQ-1:0] id;
} tag_req_t;
typedef struct packed {
logic [CFG.SET_ALIGN-1:0] cset;
logic hit;
logic error;
} tag_rsp_t;
logic req_valid, req_ready;
logic req_handshake;
logic [CFG.COUNT_ALIGN-1:0] tag_addr;
logic [CFG.SET_COUNT-1:0] tag_enable;
logic [CFG.TAG_WIDTH+1:0] tag_wdata, tag_rdata [CFG.SET_COUNT];
logic tag_write;
tag_req_t tag_req_d, tag_req_q;
tag_rsp_t tag_rsp_s, tag_rsp_d, tag_rsp_q, tag_rsp;
logic tag_valid, tag_ready;
logic tag_handshake;
logic [CFG.TAG_WIDTH-1:0] required_tag;
logic [CFG.SET_COUNT-1:0] line_hit;
logic [DATA_ADDR_WIDTH-1:0] lookup_addr;
logic [DATA_ADDR_WIDTH-1:0] write_addr;
// Connect input requests to tag stage
assign tag_req_d.addr = in_addr_i;
assign tag_req_d.id = in_id_i;
// Multiplex read and write access to the tag banks onto one port, prioritizing write accesses
always_comb begin
tag_addr = in_addr_i >> CFG.LINE_ALIGN;
tag_enable = '0;
tag_wdata = {1'b1, write_error_i, write_tag_i};
tag_write = 1'b0;
write_ready_o = 1'b0;
in_ready_o = 1'b0;
req_valid = 1'b0;
if (init_phase) begin
tag_addr = init_count_q;
tag_enable = '1;
tag_wdata = '0;
tag_write = 1'b1;
end else if (write_valid_i) begin
// Write a refill request
tag_addr = write_addr_i;
tag_enable = $unsigned(1 << write_set_i);
tag_write = 1'b1;
write_ready_o = 1'b1;
end else if (in_valid_i) begin
// Check cache
tag_enable = '1;
in_ready_o = req_ready;
// Request to store data in pipeline
req_valid = 1'b1;
end
end
// Instantiate the tag sets.
for (genvar i = 0; i < CFG.SET_COUNT; i++) begin : g_sets
if (CFG.L1_TAG_SCM) begin : gen_scm
latch_scm #(
.ADDR_WIDTH ($clog2(CFG.LINE_COUNT)),
.DATA_WIDTH (CFG.TAG_WIDTH+2 )
) i_tag (
.clk ( clk_i ),
.ReadEnable ( tag_enable[i] && !tag_write ),
.ReadAddr ( tag_addr ),
.ReadData ( tag_rdata[i] ),
.WriteEnable ( tag_enable[i] && tag_write ),
.WriteAddr ( tag_addr ),
.WriteData ( tag_wdata )
);
end else begin : gen_sram
tc_sram #(
.DataWidth ( CFG.TAG_WIDTH+2 ),
.NumWords ( CFG.LINE_COUNT ),
.NumPorts ( 1 )
) i_tag (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( tag_enable[i] ),
.we_i ( tag_write ),
.addr_i ( tag_addr ),
.wdata_i ( tag_wdata ),
.be_i ( '1 ),
.rdata_o ( tag_rdata[i] )
);
end
end
// Determine which set hit
always_comb begin
automatic logic [CFG.SET_COUNT-1:0] errors;
required_tag = tag_req_q.addr >> (CFG.LINE_ALIGN + CFG.COUNT_ALIGN);
for (int i = 0; i < CFG.SET_COUNT; i++) begin
line_hit[i] = tag_rdata[i][CFG.TAG_WIDTH+1] && tag_rdata[i][CFG.TAG_WIDTH-1:0] == required_tag;
errors[i] = tag_rdata[i][CFG.TAG_WIDTH] && line_hit[i];
end
tag_rsp_s.hit = |line_hit;
tag_rsp_s.error = |errors;
end
lzc #(.WIDTH(CFG.SET_COUNT)) i_lzc (
.in_i ( line_hit ),
.cnt_o ( tag_rsp_s.cset ),
.empty_o ( )
);
// Buffer the metadata on a valid handshake. Stall on write (implicit in req_valid/ready)
`FFL(tag_req_q, tag_req_d, req_valid && req_ready, '0, clk_i, rst_ni)
`FF(tag_valid, req_valid ? 1'b1 : tag_ready ? 1'b0 : tag_valid, '0, clk_i, rst_ni)
// Ready if buffer is empy or downstream is reading. Stall on write
assign req_ready = (!tag_valid || tag_ready) && !tag_write;
// Register the handshake of the reg stage to buffer the tag output data in the next cycle
`FF(req_handshake, req_valid && req_ready, 1'b0, clk_i, rst_ni)
// Fall-through buffer the tag data: Store the tag data if the SRAM bank accepted a request in
// the previous cycle and if we actually have to buffer them because the receiver is not ready
`FF(tag_rsp_q, tag_rsp_d, '0, clk_i, rst_ni)
assign tag_rsp = req_handshake ? tag_rsp_s : tag_rsp_q;
always_comb begin
tag_rsp_d = tag_rsp_q;
// Load the FF if new data is incoming and downstream is not ready
if (req_handshake && !tag_ready) begin
tag_rsp_d = tag_rsp_s;
end
// Override the hit if the write that stalled us invalidated the data
if (lookup_addr == write_addr && write_valid_i) begin
tag_rsp_d.hit = 1'b0;
end
end
// --------------------------------------------------
// Data stage
// --------------------------------------------------
typedef struct packed {
logic [CFG.FETCH_AW-1:0] addr;
logic [CFG.ID_WIDTH_REQ-1:0] id;
logic [CFG.SET_ALIGN-1:0] cset;
logic hit;
logic error;
} data_req_t;
typedef logic [CFG.LINE_WIDTH-1:0] data_rsp_t;
logic [DATA_ADDR_WIDTH-1:0] data_addr;
logic data_enable;
data_rsp_t data_wdata, data_rdata;
logic data_write;
data_req_t data_req_d, data_req_q;
data_rsp_t data_rsp_q;
logic data_valid, data_ready;
// Connect tag stage response to data stage request
assign data_req_d.addr = tag_req_q.addr;
assign data_req_d.id = tag_req_q.id;
assign data_req_d.cset = tag_rsp.cset;
assign data_req_d.hit = tag_rsp.hit;
assign data_req_d.error = tag_rsp.error;
assign lookup_addr = {tag_rsp.cset, tag_req_q.addr[CFG.LINE_ALIGN +: CFG.COUNT_ALIGN]};
assign write_addr = {write_set_i, write_addr_i};
// Data bank port mux
always_comb begin
// Default read request
data_addr = lookup_addr;
data_enable = tag_valid && tag_rsp.hit; // Only read data on hit
data_wdata = write_data_i;
data_write = 1'b0;
// Write takes priority
if (!init_phase && write_valid_i) begin
data_addr = write_addr;
data_enable = 1'b1;
data_write = 1'b1;
end
end
tc_sram #(
.DataWidth ( CFG.LINE_WIDTH ),
.NumWords ( CFG.LINE_COUNT * CFG.SET_COUNT ),
.NumPorts ( 1 )
) i_data (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( data_enable ),
.we_i ( data_write ),
.addr_i ( data_addr ),
.wdata_i ( data_wdata ),
.be_i ( '1 ),
.rdata_o ( data_rdata )
);
// Buffer the metadata on a valid handshake. Stall on write (implicit in tag_ready)
`FFL(data_req_q, data_req_d, tag_valid && tag_ready, '0, clk_i, rst_ni)
`FF(data_valid, (tag_valid && !data_write) ? 1'b1 : data_ready ? 1'b0 : data_valid, '0, clk_i, rst_ni)
// Ready if buffer is empy or downstream is reading. Stall on write
assign tag_ready = (!data_valid || data_ready) && !data_write;
// Register the handshake of the tag stage to buffer the data output data in the next cycle
// but only if it was a hit. Otherwise, the data is not read anyway.
`FF(tag_handshake, tag_valid && tag_ready && data_req_d.hit, 1'b0, clk_i, rst_ni)
// Fall-through buffer the read data: Store the read data if the SRAM bank accepted a request in
// the previous cycle and if we actually have to buffer them because the receiver is not ready
`FFL(data_rsp_q, data_rdata, tag_handshake && !data_ready, '0, clk_i, rst_ni)
assign out_data_o = tag_handshake ? data_rdata : data_rsp_q;
// Generate the remaining output signals.
assign out_addr_o = data_req_q.addr;
assign out_id_o = data_req_q.id;
assign out_set_o = data_req_q.cset;
assign out_hit_o = data_req_q.hit;
assign out_error_o = data_req_q.error;
assign out_valid_o = data_valid;
assign data_ready = out_ready_i;
endmodule
// Copyright 2020 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
package snitch_icache_pkg;
typedef struct packed {
logic l0_miss;
logic l0_hit;
logic l0_prefetch;
logic l0_double_hit;
} icache_events_t;
typedef struct packed {
// Parameters passed to the root module.
int NR_FETCH_PORTS;
int LINE_WIDTH;
int LINE_COUNT;
int SET_COUNT;
int PENDING_COUNT;
int L0_LINE_COUNT;
int FETCH_AW;
int FETCH_DW;
int FILL_AW;
int FILL_DW;
bit L1_TAG_SCM;
bit EARLY_LATCH;
// Derived values.
int FETCH_ALIGN;
int FILL_ALIGN;
int LINE_ALIGN;
int COUNT_ALIGN;
int SET_ALIGN;
int TAG_WIDTH;
int L0_TAG_WIDTH;
int L0_EARLY_TAG_WIDTH;
int ID_WIDTH_REQ;
int ID_WIDTH_RESP;
int PENDING_IW; // refill ID width
} config_t;
endpackage
// Copyright 2020 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
/// A refiller for cache lines.
module snitch_icache_refill #(
parameter snitch_icache_pkg::config_t CFG = '0
) (
input logic clk_i,
input logic rst_ni,
input logic [CFG.FETCH_AW-1:0] in_req_addr_i,
input logic [CFG.PENDING_IW-1:0] in_req_id_i,
input logic in_req_bypass_i,
input logic in_req_valid_i,
output logic in_req_ready_o,
output logic [CFG.LINE_WIDTH-1:0] in_rsp_data_o,
output logic in_rsp_error_o,
output logic [CFG.PENDING_IW-1:0] in_rsp_id_o,
output logic in_rsp_bypass_o,
output logic in_rsp_valid_o,
input logic in_rsp_ready_i,
output logic [CFG.FILL_AW-1:0] refill_qaddr_o,
output logic [7:0] refill_qlen_o,
output logic refill_qvalid_o,
input logic refill_qready_i,
input logic [CFG.FILL_DW-1:0] refill_pdata_i,
input logic refill_perror_i,
input logic refill_plast_i,
input logic refill_pvalid_i,
output logic refill_pready_o
);
`ifndef SYNTHESIS
initial assert(CFG != '0);
`endif
// How many response beats are necessary to refill one cache line.
localparam BEATS_PER_REFILL = CFG.LINE_WIDTH >= CFG.FILL_DW ? CFG.LINE_WIDTH/CFG.FILL_DW : 1;
// The response queue holds metadata for the issued requests in order.
logic queue_full;
logic queue_push;
logic queue_pop;
fifo_v3 #(
.DEPTH ( 4 ),
.DATA_WIDTH ( CFG.PENDING_IW+1 )
) i_fifo_id_queue (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( queue_full ),
.empty_o ( ),
.usage_o ( ),
.data_i ( {in_req_bypass_i, in_req_id_i} ),
.push_i ( queue_push ),
.data_o ( {in_rsp_bypass_o, in_rsp_id_o} ),
.pop_i ( queue_pop )
);
// Accept incoming requests, push the ID into the queue, and issue the
// corresponding request.
assign refill_qaddr_o = in_req_addr_i;
assign refill_qlen_o = $unsigned(BEATS_PER_REFILL-1);
assign refill_qvalid_o = in_req_valid_i & ~queue_full;
assign in_req_ready_o = refill_qready_i & ~queue_full;
assign queue_push = refill_qvalid_o & refill_qready_i;
// Assemble incoming responses if the cache line is wider than the bus data width.
logic [CFG.LINE_WIDTH-1:0] response_data;
if (CFG.LINE_WIDTH > CFG.FILL_DW) begin : g_data_concat
always_ff @(posedge clk_i, negedge rst_ni) begin
if (!rst_ni)
response_data[CFG.LINE_WIDTH-CFG.FILL_DW-1:0] <= '0;
else if (refill_pvalid_i && refill_pready_o)
response_data[CFG.LINE_WIDTH-CFG.FILL_DW-1:0] <= response_data[CFG.LINE_WIDTH-1:CFG.FILL_DW];
end
assign response_data[CFG.LINE_WIDTH-1:CFG.LINE_WIDTH-CFG.FILL_DW] = refill_pdata_i;
end else if (CFG.LINE_WIDTH < CFG.FILL_DW) begin : g_data_slice
assign response_data = refill_pdata_i >> (in_req_addr_i[CFG.FILL_ALIGN-1:CFG.LINE_ALIGN] * CFG.LINE_WIDTH);
end else begin : g_data_passthrough
assign response_data = refill_pdata_i;
end
// Accept response beats. Upon the last beat, pop the ID off the queue
// and return the response.
always_comb begin : p_response
in_rsp_data_o = response_data;
in_rsp_error_o = refill_perror_i;
in_rsp_valid_o = 0;
queue_pop = 0;
refill_pready_o = 0;
if (refill_pvalid_i) begin
if (!refill_plast_i) begin
refill_pready_o = 1;
end else begin
in_rsp_valid_o = 1;
if (in_rsp_ready_i) begin
refill_pready_o = 1;
queue_pop = 1;
end
end
end
end
endmodule
// Copyright 2020 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
/// Integer Processing Unit
/// Based on Snitch Shared Muliplier/Divider
/// Author: Sergio Mazzola, <smazzola@student.ethz.ch>
module snitch_ipu #(
parameter int unsigned IdWidth = 5
) (
input logic clk_i,
input logic rst_i,
// Accelerator Interface - Slave
input logic [31:0] acc_qaddr_i, // unused
input logic [IdWidth-1:0] acc_qid_i,
input logic [31:0] acc_qdata_op_i, // RISC-V instruction
input logic [31:0] acc_qdata_arga_i,
input logic [31:0] acc_qdata_argb_i,
input logic [31:0] acc_qdata_argc_i,
input logic acc_qvalid_i,
output logic acc_qready_o,
output logic [31:0] acc_pdata_o,
output logic [IdWidth-1:0] acc_pid_o,
output logic acc_perror_o,
output logic acc_pvalid_o,
input logic acc_pready_i
);
`include "common_cells/registers.svh"
typedef struct packed {
logic [31:0] result;
logic [IdWidth-1:0] id;
} result_t;
// input handshake
logic div_valid_op, div_ready_op;
/* verilator lint_off UNDRIVEN */
logic mul_valid_op, mul_ready_op;
logic dsp_valid_op, dsp_ready_op;
/* verilator lint_on UNDRIVEN */
// output handshake
logic mul_valid, mul_ready;
logic div_valid, div_ready;
logic dsp_valid, dsp_ready;
result_t div, mul, dsp, oup;
logic illegal_instruction;
always_comb begin
mul_valid_op = 1'b0;
div_valid_op = 1'b0;
dsp_valid_op = 1'b0;
acc_qready_o = 1'b0;
acc_perror_o = 1'b0;
illegal_instruction = 1'b0;
unique casez (acc_qdata_op_i)
riscv_instr::MUL,
riscv_instr::MULH,
riscv_instr::MULHSU,
riscv_instr::MULHU: begin
if (snitch_pkg::XPULPIMG) begin
dsp_valid_op = acc_qvalid_i;
acc_qready_o = dsp_ready_op;
end else begin
mul_valid_op = acc_qvalid_i;
acc_qready_o = mul_ready_op;
end
end
riscv_instr::DIV,
riscv_instr::DIVU,
riscv_instr::REM,
riscv_instr::REMU: begin
div_valid_op = acc_qvalid_i;
acc_qready_o = div_ready_op;
end
riscv_instr::P_ABS, // Xpulpimg: p.abs
riscv_instr::P_SLET, // Xpulpimg: p.slet
riscv_instr::P_SLETU, // Xpulpimg: p.sletu
riscv_instr::P_MIN, // Xpulpimg: p.min
riscv_instr::P_MINU, // Xpulpimg: p.minu
riscv_instr::P_MAX, // Xpulpimg: p.max
riscv_instr::P_MAXU, // Xpulpimg: p.maxu
riscv_instr::P_EXTHS, // Xpulpimg: p.exths
riscv_instr::P_EXTHZ, // Xpulpimg: p.exthz
riscv_instr::P_EXTBS, // Xpulpimg: p.extbs
riscv_instr::P_EXTBZ, // Xpulpimg: p.extbz
riscv_instr::P_CLIP, // Xpulpimg: p.clip
riscv_instr::P_CLIPU, // Xpulpimg: p.clipu
riscv_instr::P_CLIPR, // Xpulpimg: p.clipr
riscv_instr::P_CLIPUR, // Xpulpimg: p.clipur
riscv_instr::P_MAC, // Xpulpimg: p.mac
riscv_instr::P_MSU, // Xpulpimg: p.msu
riscv_instr::PV_ADD_H, // Xpulpimg: pv.add.h
riscv_instr::PV_ADD_SC_H, // Xpulpimg: pv.add.sc.h
riscv_instr::PV_ADD_SCI_H, // Xpulpimg: pv.add.sci.h
riscv_instr::PV_ADD_B, // Xpulpimg: pv.add.b
riscv_instr::PV_ADD_SC_B, // Xpulpimg: pv.add.sc.b
riscv_instr::PV_ADD_SCI_B, // Xpulpimg: pv.add.sci.b
riscv_instr::PV_SUB_H, // Xpulpimg: pv.sub.h
riscv_instr::PV_SUB_SC_H, // Xpulpimg: pv.sub.sc.h
riscv_instr::PV_SUB_SCI_H, // Xpulpimg: pv.sub.sci.h
riscv_instr::PV_SUB_B, // Xpulpimg: pv.sub.b
riscv_instr::PV_SUB_SC_B, // Xpulpimg: pv.sub.sc.b
riscv_instr::PV_SUB_SCI_B, // Xpulpimg: pv.sub.sci.b
riscv_instr::PV_AVG_H, // Xpulpimg: pv.avg.h
riscv_instr::PV_AVG_SC_H, // Xpulpimg: pv.avg.sc.h
riscv_instr::PV_AVG_SCI_H, // Xpulpimg: pv.avg.sci.h
riscv_instr::PV_AVG_B, // Xpulpimg: pv.avg.b
riscv_instr::PV_AVG_SC_B, // Xpulpimg: pv.avg.sc.b
riscv_instr::PV_AVG_SCI_B, // Xpulpimg: pv.avg.sci.b
riscv_instr::PV_AVGU_H, // Xpulpimg: pv.avgu.h
riscv_instr::PV_AVGU_SC_H, // Xpulpimg: pv.avgu.sc.h
riscv_instr::PV_AVGU_SCI_H, // Xpulpimg: pv.avgu.sci.h
riscv_instr::PV_AVGU_B, // Xpulpimg: pv.avgu.b
riscv_instr::PV_AVGU_SC_B, // Xpulpimg: pv.avgu.sc.b
riscv_instr::PV_AVGU_SCI_B, // Xpulpimg: pv.avgu.sci.b
riscv_instr::PV_MIN_H, // Xpulpimg: pv.min.h
riscv_instr::PV_MIN_SC_H, // Xpulpimg: pv.min.sc.h
riscv_instr::PV_MIN_SCI_H, // Xpulpimg: pv.min.sci.h
riscv_instr::PV_MIN_B, // Xpulpimg: pv.min.b
riscv_instr::PV_MIN_SC_B, // Xpulpimg: pv.min.sc.b
riscv_instr::PV_MIN_SCI_B, // Xpulpimg: pv.min.sci.b
riscv_instr::PV_MINU_H, // Xpulpimg: pv.minu.h
riscv_instr::PV_MINU_SC_H, // Xpulpimg: pv.minu.sc.h
riscv_instr::PV_MINU_SCI_H, // Xpulpimg: pv.minu.sci.h
riscv_instr::PV_MINU_B, // Xpulpimg: pv.minu.b
riscv_instr::PV_MINU_SC_B, // Xpulpimg: pv.minu.sc.b
riscv_instr::PV_MINU_SCI_B, // Xpulpimg: pv.minu.sci.b
riscv_instr::PV_MAX_H, // Xpulpimg: pv.max.h
riscv_instr::PV_MAX_SC_H, // Xpulpimg: pv.max.sc.h
riscv_instr::PV_MAX_SCI_H, // Xpulpimg: pv.max.sci.h
riscv_instr::PV_MAX_B, // Xpulpimg: pv.max.b
riscv_instr::PV_MAX_SC_B, // Xpulpimg: pv.max.sc.b
riscv_instr::PV_MAX_SCI_B, // Xpulpimg: pv.max.sci.b
riscv_instr::PV_MAXU_H, // Xpulpimg: pv.maxu.h
riscv_instr::PV_MAXU_SC_H, // Xpulpimg: pv.maxu.sc.h
riscv_instr::PV_MAXU_SCI_H, // Xpulpimg: pv.maxu.sci.h
riscv_instr::PV_MAXU_B, // Xpulpimg: pv.maxu.b
riscv_instr::PV_MAXU_SC_B, // Xpulpimg: pv.maxu.sc.b
riscv_instr::PV_MAXU_SCI_B, // Xpulpimg: pv.maxu.sci.b
riscv_instr::PV_SRL_H, // Xpulpimg: pv.srl.h
riscv_instr::PV_SRL_SC_H, // Xpulpimg: pv.srl.sc.h
riscv_instr::PV_SRL_SCI_H, // Xpulpimg: pv.srl.sci.h
riscv_instr::PV_SRL_B, // Xpulpimg: pv.srl.b
riscv_instr::PV_SRL_SC_B, // Xpulpimg: pv.srl.sc.b
riscv_instr::PV_SRL_SCI_B, // Xpulpimg: pv.srl.sci.b
riscv_instr::PV_SRA_H, // Xpulpimg: pv.sra.h
riscv_instr::PV_SRA_SC_H, // Xpulpimg: pv.sra.sc.h
riscv_instr::PV_SRA_SCI_H, // Xpulpimg: pv.sra.sci.h
riscv_instr::PV_SRA_B, // Xpulpimg: pv.sra.b
riscv_instr::PV_SRA_SC_B, // Xpulpimg: pv.sra.sc.b
riscv_instr::PV_SRA_SCI_B, // Xpulpimg: pv.sra.sci.b
riscv_instr::PV_SLL_H, // Xpulpimg: pv.sll.h
riscv_instr::PV_SLL_SC_H, // Xpulpimg: pv.sll.sc.h
riscv_instr::PV_SLL_SCI_H, // Xpulpimg: pv.sll.sci.h
riscv_instr::PV_SLL_B, // Xpulpimg: pv.sll.b
riscv_instr::PV_SLL_SC_B, // Xpulpimg: pv.sll.sc.b
riscv_instr::PV_SLL_SCI_B, // Xpulpimg: pv.sll.sci.b
riscv_instr::PV_OR_H, // Xpulpimg: pv.or.h
riscv_instr::PV_OR_SC_H, // Xpulpimg: pv.or.sc.h
riscv_instr::PV_OR_SCI_H, // Xpulpimg: pv.or.sci.h
riscv_instr::PV_OR_B, // Xpulpimg: pv.or.b
riscv_instr::PV_OR_SC_B, // Xpulpimg: pv.or.sc.b
riscv_instr::PV_OR_SCI_B, // Xpulpimg: pv.or.sci.b
riscv_instr::PV_XOR_H, // Xpulpimg: pv.xor.h
riscv_instr::PV_XOR_SC_H, // Xpulpimg: pv.xor.sc.h
riscv_instr::PV_XOR_SCI_H, // Xpulpimg: pv.xor.sci.h
riscv_instr::PV_XOR_B, // Xpulpimg: pv.xor.b
riscv_instr::PV_XOR_SC_B, // Xpulpimg: pv.xor.sc.b
riscv_instr::PV_XOR_SCI_B, // Xpulpimg: pv.xor.sci.b
riscv_instr::PV_AND_H, // Xpulpimg: pv.and.h
riscv_instr::PV_AND_SC_H, // Xpulpimg: pv.and.sc.h
riscv_instr::PV_AND_SCI_H, // Xpulpimg: pv.and.sci.h
riscv_instr::PV_AND_B, // Xpulpimg: pv.and.b
riscv_instr::PV_AND_SC_B, // Xpulpimg: pv.and.sc.b
riscv_instr::PV_AND_SCI_B, // Xpulpimg: pv.and.sci.b
riscv_instr::PV_ABS_H, // Xpulpimg: pv.abs.h
riscv_instr::PV_ABS_B, // Xpulpimg: pv.abs.b
riscv_instr::PV_EXTRACT_H, // Xpulpimg: pv.extract.h
riscv_instr::PV_EXTRACT_B, // Xpulpimg: pv.extract.b
riscv_instr::PV_EXTRACTU_H, // Xpulpimg: pv.extractu.h
riscv_instr::PV_EXTRACTU_B, // Xpulpimg: pv.extractu.b
riscv_instr::PV_INSERT_H, // Xpulpimg: pv.insert.h
riscv_instr::PV_INSERT_B, // Xpulpimg: pv.insert.b
riscv_instr::PV_DOTUP_H, // Xpulpimg: pv.dotup.h
riscv_instr::PV_DOTUP_SC_H, // Xpulpimg: pv.dotup.sc.h
riscv_instr::PV_DOTUP_SCI_H, // Xpulpimg: pv.dotup.sci.h
riscv_instr::PV_DOTUP_B, // Xpulpimg: pv.dotup.b
riscv_instr::PV_DOTUP_SC_B, // Xpulpimg: pv.dotup.sc.b
riscv_instr::PV_DOTUP_SCI_B, // Xpulpimg: pv.dotup.sci.b
riscv_instr::PV_DOTUSP_H, // Xpulpimg: pv.dotusp.h
riscv_instr::PV_DOTUSP_SC_H, // Xpulpimg: pv.dotusp.sc.h
riscv_instr::PV_DOTUSP_SCI_H, // Xpulpimg: pv.dotusp.sci.h
riscv_instr::PV_DOTUSP_B, // Xpulpimg: pv.dotusp.b
riscv_instr::PV_DOTUSP_SC_B, // Xpulpimg: pv.dotusp.sc.b
riscv_instr::PV_DOTUSP_SCI_B, // Xpulpimg: pv.dotusp.sci.b
riscv_instr::PV_DOTSP_H, // Xpulpimg: pv.dotsp.h
riscv_instr::PV_DOTSP_SC_H, // Xpulpimg: pv.dotsp.sc.h
riscv_instr::PV_DOTSP_SCI_H, // Xpulpimg: pv.dotsp.sci.h
riscv_instr::PV_DOTSP_B, // Xpulpimg: pv.dotsp.b
riscv_instr::PV_DOTSP_SC_B, // Xpulpimg: pv.dotsp.sc.b
riscv_instr::PV_DOTSP_SCI_B, // Xpulpimg: pv.dotsp.sci.b
riscv_instr::PV_SDOTUP_H, // Xpulpimg: pv.sdotup.h
riscv_instr::PV_SDOTUP_SC_H, // Xpulpimg: pv.sdotup.sc.h
riscv_instr::PV_SDOTUP_SCI_H, // Xpulpimg: pv.sdotup.sci.h
riscv_instr::PV_SDOTUP_B, // Xpulpimg: pv.sdotup.b
riscv_instr::PV_SDOTUP_SC_B, // Xpulpimg: pv.sdotup.sc.b
riscv_instr::PV_SDOTUP_SCI_B, // Xpulpimg: pv.sdotup.sci.b
riscv_instr::PV_SDOTUSP_H, // Xpulpimg: pv.sdotusp.h
riscv_instr::PV_SDOTUSP_SC_H, // Xpulpimg: pv.sdotusp.sc.h
riscv_instr::PV_SDOTUSP_SCI_H, // Xpulpimg: pv.sdotusp.sci.h
riscv_instr::PV_SDOTUSP_B, // Xpulpimg: pv.sdotusp.b
riscv_instr::PV_SDOTUSP_SC_B, // Xpulpimg: pv.sdotusp.sc.b
riscv_instr::PV_SDOTUSP_SCI_B, // Xpulpimg: pv.sdotusp.sci.b
riscv_instr::PV_SDOTSP_H, // Xpulpimg: pv.sdotsp.h
riscv_instr::PV_SDOTSP_SC_H, // Xpulpimg: pv.sdotsp.sc.h
riscv_instr::PV_SDOTSP_SCI_H, // Xpulpimg: pv.sdotsp.sci.h
riscv_instr::PV_SDOTSP_B, // Xpulpimg: pv.sdotsp.b
riscv_instr::PV_SDOTSP_SC_B, // Xpulpimg: pv.sdotsp.sc.b
riscv_instr::PV_SDOTSP_SCI_B, // Xpulpimg: pv.sdotsp.sci.b
riscv_instr::PV_SHUFFLE2_H, // Xpulpimg: pv.shuffle2.h
riscv_instr::PV_SHUFFLE2_B: begin // Xpulpimg: pv.shuffle2.b
if (snitch_pkg::XPULPIMG) begin
dsp_valid_op = acc_qvalid_i;
acc_qready_o = dsp_ready_op;
end else begin
illegal_instruction = 1'b1;
end
end
default: illegal_instruction = 1'b1;
endcase
end
// Serial Divider
serdiv #(
.WIDTH ( 32 ),
.IdWidth ( IdWidth )
) i_div (
.clk_i ( clk_i ),
.rst_ni ( ~rst_i ),
.id_i ( acc_qid_i ),
.operator_i ( acc_qdata_op_i ),
.op_a_i ( acc_qdata_arga_i ),
.op_b_i ( acc_qdata_argb_i ),
.in_vld_i ( div_valid_op ),
.in_rdy_o ( div_ready_op ),
.out_vld_o ( div_valid ),
.out_rdy_i ( div_ready ),
.id_o ( div.id ),
.res_o ( div.result )
);
if (snitch_pkg::XPULPIMG) begin : gen_xpulpimg
// DSP Unit
dspu #(
.Width ( 32 ),
.IdWidth ( IdWidth )
) i_dspu (
.clk_i ( clk_i ),
.rst_i ( rst_i ),
.id_i ( acc_qid_i ),
.operator_i ( acc_qdata_op_i ),
.op_a_i ( acc_qdata_arga_i ),
.op_b_i ( acc_qdata_argb_i ),
.op_c_i ( acc_qdata_argc_i ),
.in_valid_i ( dsp_valid_op ),
.in_ready_o ( dsp_ready_op ),
.out_valid_o ( dsp_valid ),
.out_ready_i ( dsp_ready ),
.id_o ( dsp.id ),
.result_o ( dsp.result )
);
// Output Arbitration
stream_arbiter #(
.DATA_T ( result_t ),
.N_INP ( 2 )
) i_stream_arbiter (
.clk_i,
.rst_ni ( ~rst_i ),
.inp_data_i ( {div, dsp} ),
.inp_valid_i ( {div_valid, dsp_valid} ),
.inp_ready_o ( {div_ready, dsp_ready} ),
.oup_data_o ( oup ),
.oup_valid_o ( acc_pvalid_o ),
.oup_ready_i ( acc_pready_i )
);
end else begin : gen_vanilla
// Multiplication
multiplier #(
.Width ( 32 ),
.IdWidth ( IdWidth )
) i_multiplier (
.clk_i,
.rst_i,
.id_i ( acc_qid_i ),
.operator_i ( acc_qdata_op_i ),
.operand_a_i ( acc_qdata_arga_i ),
.operand_b_i ( acc_qdata_argb_i ),
.valid_i ( mul_valid_op ),
.ready_o ( mul_ready_op ),
.result_o ( mul.result ),
.valid_o ( mul_valid ),
.ready_i ( mul_ready ),
.id_o ( mul.id )
);
// Output Arbitration
stream_arbiter #(
.DATA_T ( result_t ),
.N_INP ( 2 )
) i_stream_arbiter (
.clk_i,
.rst_ni ( ~rst_i ),
.inp_data_i ( {div, mul} ),
.inp_valid_i ( {div_valid, mul_valid} ),
.inp_ready_o ( {div_ready, mul_ready} ),
.oup_data_o ( oup ),
.oup_valid_o ( acc_pvalid_o ),
.oup_ready_i ( acc_pready_i )
);
end
assign acc_pdata_o = oup.result;
assign acc_pid_o = oup.id;
endmodule
module dspu #(
parameter int unsigned Width = 32,
parameter int unsigned IdWidth = 5
) (
input logic clk_i, // unused
input logic rst_i, // unused
input logic [IdWidth-1:0] id_i,
input logic [31:0] operator_i,
input logic [Width-1:0] op_a_i,
input logic [Width-1:0] op_b_i,
input logic [Width-1:0] op_c_i,
input logic in_valid_i,
output logic in_ready_o,
output logic out_valid_o,
input logic out_ready_i,
output logic [IdWidth-1:0] id_o,
output logic [Width-1:0] result_o
);
// Control signals
assign out_valid_o = in_valid_i;
assign in_ready_o = out_ready_i;
assign id_o = id_i;
// Decoded fields
logic [4:0] imm5;
logic [5:0] imm6;
assign imm5 = operator_i[24:20];
assign imm6 = {operator_i[24:20], operator_i[25]};
// Internal control signals
logic cmp_signed; // comparator operation is signed
enum logic [1:0] {
None, Reg, Zero, ClipBound
} cmp_op_b_sel; // selection of shared comparator operands
logic clip_unsigned; // clip operation has "0" as lower bound
logic clip_register; // if 1 clip operation uses rs2, else imm5
enum logic [1:0] {
NoMul, MulLow, MulHigh, MulMac
} mul_op; // type of multiplication operation
logic mac_msu; // multiplication operation is MSU
logic mul_op_a_sign; // sign of multiplier operand a
logic mac_op_b_sign; // sign of multiplier operand b
enum logic [3:0] {
Nop, Abs, Sle, Min, Max, Exths, Exthz, Extbs, Extbz, Clip, Mac, Simd
} res_sel; // result selection
enum logic [4:0] {
SimdNop, SimdAdd, SimdSub, SimdAvg, SimdMin, SimdMax, SimdSrl, SimdSra, SimdSll, SimdOr,
SimdXor, SimdAnd, SimdAbs, SimdExt, SimdIns, SimdDotp, SimdShuffle
} simd_op; // SIMD operation
enum logic {
HalfWord, Byte
} simd_size; // SIMD granularity
enum logic [1:0] {
Vect, Sc, Sci
} simd_mode; // SIMD mode
logic simd_signed; // SIMD operation is signed and uses sign-extended imm6
logic simd_dotp_op_a_signed; // signedness of SIMD dotp operand a
logic simd_dotp_op_b_signed; // signedness of SIMD dotp operand b
logic simd_dotp_acc; // accumulate result of SIMD dotp on destination reg
// --------------------
// Decoder
// --------------------
always_comb begin
cmp_signed = 1'b1;
cmp_op_b_sel = None;
clip_unsigned = 1'b0;
clip_register = 1'b0;
mul_op = NoMul;
mac_msu = 1'b0;
mul_op_a_sign = 1'b0;
mac_op_b_sign = 1'b0;
res_sel = Nop;
simd_op = SimdNop;
simd_size = HalfWord;
simd_mode = Vect;
simd_signed = 1;
simd_dotp_op_a_signed = 1;
simd_dotp_op_b_signed = 1;
simd_dotp_acc = 0;
unique casez (operator_i)
// Multiplications from M extension
riscv_instr::MUL: begin
mul_op = MulLow;
mul_op_a_sign = 1'b1;
mac_op_b_sign = 1'b1;
res_sel = Mac;
end
riscv_instr::MULH: begin
mul_op = MulHigh;
mul_op_a_sign = 1'b1;
mac_op_b_sign = 1'b1;
res_sel = Mac;
end
riscv_instr::MULHSU: begin
mul_op = MulHigh;
mul_op_a_sign = 1'b1;
res_sel = Mac;
end
riscv_instr::MULHU: begin
mul_op = MulHigh;
res_sel = Mac;
end
// Instructions from Xpulpimg
riscv_instr::P_ABS: begin
cmp_op_b_sel = Zero;
res_sel = Abs;
end
riscv_instr::P_SLET: begin
cmp_op_b_sel = Reg;
res_sel = Sle;
end
riscv_instr::P_SLETU: begin
cmp_signed = 1'b0;
cmp_op_b_sel = Reg;
res_sel = Sle;
end
riscv_instr::P_MIN: begin
cmp_op_b_sel = Reg;
res_sel = Min;
end
riscv_instr::P_MINU: begin
cmp_signed = 1'b0;
cmp_op_b_sel = Reg;
res_sel = Min;
end
riscv_instr::P_MAX: begin
cmp_op_b_sel = Reg;
res_sel = Max;
end
riscv_instr::P_MAXU: begin
cmp_signed = 1'b0;
cmp_op_b_sel = Reg;
res_sel = Max;
end
riscv_instr::P_EXTHS: begin
cmp_op_b_sel = Reg;
res_sel = Exths;
end
riscv_instr::P_EXTHZ: begin
cmp_op_b_sel = Reg;
res_sel = Exthz;
end
riscv_instr::P_EXTBS: begin
cmp_op_b_sel = Reg;
res_sel = Extbs;
end
riscv_instr::P_EXTBZ: begin
cmp_op_b_sel = Reg;
res_sel = Extbz;
end
riscv_instr::P_CLIP: begin
cmp_op_b_sel = ClipBound;
res_sel = Clip;
end
riscv_instr::P_CLIPU: begin
clip_unsigned = 1'b1;
cmp_op_b_sel = ClipBound;
res_sel = Clip;
end
riscv_instr::P_CLIPR: begin
clip_register = 1'b1;
cmp_op_b_sel = ClipBound;
res_sel = Clip;
end
riscv_instr::P_CLIPUR: begin
clip_unsigned = 1'b1;
clip_register = 1'b1;
cmp_op_b_sel = ClipBound;
res_sel = Clip;
end
riscv_instr::P_MAC: begin
mul_op = MulMac;
mul_op_a_sign = 1'b1;
mac_op_b_sign = 1'b1;
res_sel = Mac;
end
riscv_instr::P_MSU: begin
mul_op = MulMac;
mac_msu = 1'b1;
mul_op_a_sign = 1'b1;
mac_op_b_sign = 1'b1;
res_sel = Mac;
end
riscv_instr::PV_ADD_H: begin
simd_op = SimdAdd;
res_sel = Simd;
end
riscv_instr::PV_ADD_SC_H: begin
simd_op = SimdAdd;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_ADD_SCI_H: begin
simd_op = SimdAdd;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_ADD_B: begin
simd_op = SimdAdd;
simd_size = Byte;
res_sel = Simd;
end
riscv_instr::PV_ADD_SC_B: begin
simd_op = SimdAdd;
simd_size = Byte;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_ADD_SCI_B: begin
simd_op = SimdAdd;
simd_size = Byte;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_SUB_H: begin
simd_op = SimdSub;
res_sel = Simd;
end
riscv_instr::PV_SUB_SC_H: begin
simd_op = SimdSub;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_SUB_SCI_H: begin
simd_op = SimdSub;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_SUB_B: begin
simd_op = SimdSub;
simd_size = Byte;
res_sel = Simd;
end
riscv_instr::PV_SUB_SC_B: begin
simd_op = SimdSub;
simd_size = Byte;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_SUB_SCI_B: begin
simd_op = SimdSub;
simd_size = Byte;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_AVG_H: begin
simd_op = SimdAvg;
res_sel = Simd;
end
riscv_instr::PV_AVG_SC_H: begin
simd_op = SimdAvg;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_AVG_SCI_H: begin
simd_op = SimdAvg;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_AVG_B: begin
simd_op = SimdAvg;
simd_size = Byte;
res_sel = Simd;
end
riscv_instr::PV_AVG_SC_B: begin
simd_op = SimdAvg;
simd_size = Byte;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_AVG_SCI_B: begin
simd_op = SimdAvg;
simd_size = Byte;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_AVGU_H: begin
simd_op = SimdAvg;
simd_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_AVGU_SC_H: begin
simd_op = SimdAvg;
simd_mode = Sc;
simd_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_AVGU_SCI_H: begin
simd_op = SimdAvg;
simd_mode = Sci;
simd_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_AVGU_B: begin
simd_op = SimdAvg;
simd_size = Byte;
simd_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_AVGU_SC_B: begin
simd_op = SimdAvg;
simd_size = Byte;
simd_mode = Sc;
simd_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_AVGU_SCI_B: begin
simd_op = SimdAvg;
simd_size = Byte;
simd_mode = Sci;
simd_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_MIN_H: begin
simd_op = SimdMin;
res_sel = Simd;
end
riscv_instr::PV_MIN_SC_H: begin
simd_op = SimdMin;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_MIN_SCI_H: begin
simd_op = SimdMin;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_MIN_B: begin
simd_op = SimdMin;
simd_size = Byte;
res_sel = Simd;
end
riscv_instr::PV_MIN_SC_B: begin
simd_op = SimdMin;
simd_size = Byte;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_MIN_SCI_B: begin
simd_op = SimdMin;
simd_size = Byte;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_MINU_H: begin
simd_op = SimdMin;
simd_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_MINU_SC_H: begin
simd_op = SimdMin;
simd_mode = Sc;
simd_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_MINU_SCI_H: begin
simd_op = SimdMin;
simd_mode = Sci;
simd_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_MINU_B: begin
simd_op = SimdMin;
simd_size = Byte;
simd_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_MINU_SC_B: begin
simd_op = SimdMin;
simd_size = Byte;
simd_mode = Sc;
simd_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_MINU_SCI_B: begin
simd_op = SimdMin;
simd_size = Byte;
simd_mode = Sci;
simd_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_MAX_H: begin
simd_op = SimdMax;
res_sel = Simd;
end
riscv_instr::PV_MAX_SC_H: begin
simd_op = SimdMax;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_MAX_SCI_H: begin
simd_op = SimdMax;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_MAX_B: begin
simd_op = SimdMax;
simd_size = Byte;
res_sel = Simd;
end
riscv_instr::PV_MAX_SC_B: begin
simd_op = SimdMax;
simd_size = Byte;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_MAX_SCI_B: begin
simd_op = SimdMax;
simd_size = Byte;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_MAXU_H: begin
simd_op = SimdMax;
simd_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_MAXU_SC_H: begin
simd_op = SimdMax;
simd_mode = Sc;
simd_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_MAXU_SCI_H: begin
simd_op = SimdMax;
simd_mode = Sci;
simd_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_MAXU_B: begin
simd_op = SimdMax;
simd_size = Byte;
simd_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_MAXU_SC_B: begin
simd_op = SimdMax;
simd_size = Byte;
simd_mode = Sc;
simd_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_MAXU_SCI_B: begin
simd_op = SimdMax;
simd_size = Byte;
simd_mode = Sci;
simd_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_SRL_H: begin
simd_op = SimdSrl;
res_sel = Simd;
end
riscv_instr::PV_SRL_SC_H: begin
simd_op = SimdSrl;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_SRL_SCI_H: begin
simd_op = SimdSrl;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_SRL_B: begin
simd_op = SimdSrl;
simd_size = Byte;
res_sel = Simd;
end
riscv_instr::PV_SRL_SC_B: begin
simd_op = SimdSrl;
simd_size = Byte;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_SRL_SCI_B: begin
simd_op = SimdSrl;
simd_size = Byte;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_SRA_H: begin
simd_op = SimdSra;
res_sel = Simd;
end
riscv_instr::PV_SRA_SC_H: begin
simd_op = SimdSra;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_SRA_SCI_H: begin
simd_op = SimdSra;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_SRA_B: begin
simd_op = SimdSra;
simd_size = Byte;
res_sel = Simd;
end
riscv_instr::PV_SRA_SC_B: begin
simd_op = SimdSra;
simd_size = Byte;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_SRA_SCI_B: begin
simd_op = SimdSra;
simd_size = Byte;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_SLL_H: begin
simd_op = SimdSll;
res_sel = Simd;
end
riscv_instr::PV_SLL_SC_H: begin
simd_op = SimdSll;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_SLL_SCI_H: begin
simd_op = SimdSll;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_SLL_B: begin
simd_op = SimdSll;
simd_size = Byte;
res_sel = Simd;
end
riscv_instr::PV_SLL_SC_B: begin
simd_op = SimdSll;
simd_size = Byte;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_SLL_SCI_B: begin
simd_op = SimdSll;
simd_size = Byte;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_OR_H: begin
simd_op = SimdOr;
res_sel = Simd;
end
riscv_instr::PV_OR_SC_H: begin
simd_op = SimdOr;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_OR_SCI_H: begin
simd_op = SimdOr;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_OR_B: begin
simd_op = SimdOr;
simd_size = Byte;
res_sel = Simd;
end
riscv_instr::PV_OR_SC_B: begin
simd_op = SimdOr;
simd_size = Byte;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_OR_SCI_B: begin
simd_op = SimdOr;
simd_size = Byte;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_XOR_H: begin
simd_op = SimdXor;
res_sel = Simd;
end
riscv_instr::PV_XOR_SC_H: begin
simd_op = SimdXor;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_XOR_SCI_H: begin
simd_op = SimdXor;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_XOR_B: begin
simd_op = SimdXor;
simd_size = Byte;
res_sel = Simd;
end
riscv_instr::PV_XOR_SC_B: begin
simd_op = SimdXor;
simd_size = Byte;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_XOR_SCI_B: begin
simd_op = SimdXor;
simd_size = Byte;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_AND_H: begin
simd_op = SimdAnd;
res_sel = Simd;
end
riscv_instr::PV_AND_SC_H: begin
simd_op = SimdAnd;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_AND_SCI_H: begin
simd_op = SimdAnd;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_AND_B: begin
simd_op = SimdAnd;
simd_size = Byte;
res_sel = Simd;
end
riscv_instr::PV_AND_SC_B: begin
simd_op = SimdAnd;
simd_size = Byte;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_AND_SCI_B: begin
simd_op = SimdAnd;
simd_size = Byte;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_ABS_H: begin
simd_op = SimdAbs;
res_sel = Simd;
end
riscv_instr::PV_ABS_B: begin
simd_op = SimdAbs;
simd_size = Byte;
res_sel = Simd;
end
riscv_instr::PV_EXTRACT_H: begin
simd_op = SimdExt;
res_sel = Simd;
end
riscv_instr::PV_EXTRACT_B: begin
simd_op = SimdExt;
simd_size = Byte;
res_sel = Simd;
end
riscv_instr::PV_EXTRACTU_H: begin
simd_op = SimdExt;
simd_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_EXTRACTU_B: begin
simd_op = SimdExt;
simd_size = Byte;
simd_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_INSERT_H: begin
simd_op = SimdIns;
res_sel = Simd;
end
riscv_instr::PV_INSERT_B: begin
simd_op = SimdIns;
simd_size = Byte;
res_sel = Simd;
end
riscv_instr::PV_DOTUP_H: begin
simd_op = SimdDotp;
simd_signed = 0;
simd_dotp_op_a_signed = 0;
simd_dotp_op_b_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_DOTUP_SC_H: begin
simd_op = SimdDotp;
simd_mode = Sc;
simd_signed = 0;
simd_dotp_op_a_signed = 0;
simd_dotp_op_b_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_DOTUP_SCI_H: begin
simd_op = SimdDotp;
simd_mode = Sci;
simd_signed = 0;
simd_dotp_op_a_signed = 0;
simd_dotp_op_b_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_DOTUP_B: begin
simd_op = SimdDotp;
simd_size = Byte;
simd_signed = 0;
simd_dotp_op_a_signed = 0;
simd_dotp_op_b_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_DOTUP_SC_B: begin
simd_op = SimdDotp;
simd_size = Byte;
simd_mode = Sc;
simd_signed = 0;
simd_dotp_op_a_signed = 0;
simd_dotp_op_b_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_DOTUP_SCI_B: begin
simd_op = SimdDotp;
simd_size = Byte;
simd_mode = Sci;
simd_signed = 0;
simd_dotp_op_a_signed = 0;
simd_dotp_op_b_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_DOTUSP_H: begin
simd_op = SimdDotp;
simd_dotp_op_a_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_DOTUSP_SC_H: begin
simd_op = SimdDotp;
simd_mode = Sc;
simd_dotp_op_a_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_DOTUSP_SCI_H: begin
simd_op = SimdDotp;
simd_mode = Sci;
simd_dotp_op_a_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_DOTUSP_B: begin
simd_op = SimdDotp;
simd_size = Byte;
simd_dotp_op_a_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_DOTUSP_SC_B: begin
simd_op = SimdDotp;
simd_size = Byte;
simd_mode = Sc;
simd_dotp_op_a_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_DOTUSP_SCI_B: begin
simd_op = SimdDotp;
simd_size = Byte;
simd_mode = Sci;
simd_dotp_op_a_signed = 0;
res_sel = Simd;
end
riscv_instr::PV_DOTSP_H: begin
simd_op = SimdDotp;
res_sel = Simd;
end
riscv_instr::PV_DOTSP_SC_H: begin
simd_op = SimdDotp;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_DOTSP_SCI_H: begin
simd_op = SimdDotp;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_DOTSP_B: begin
simd_op = SimdDotp;
simd_size = Byte;
res_sel = Simd;
end
riscv_instr::PV_DOTSP_SC_B: begin
simd_op = SimdDotp;
simd_size = Byte;
simd_mode = Sc;
res_sel = Simd;
end
riscv_instr::PV_DOTSP_SCI_B: begin
simd_op = SimdDotp;
simd_size = Byte;
simd_mode = Sci;
res_sel = Simd;
end
riscv_instr::PV_SDOTUP_H: begin
simd_op = SimdDotp;
simd_signed = 0;
simd_dotp_op_a_signed = 0;
simd_dotp_op_b_signed = 0;
simd_dotp_acc = 1;
res_sel = Simd;
end
riscv_instr::PV_SDOTUP_SC_H: begin
simd_op = SimdDotp;
simd_mode = Sc;
simd_signed = 0;
simd_dotp_op_a_signed = 0;
simd_dotp_op_b_signed = 0;
simd_dotp_acc = 1;
res_sel = Simd;
end
riscv_instr::PV_SDOTUP_SCI_H: begin
simd_op = SimdDotp;
simd_mode = Sci;
simd_signed = 0;
simd_dotp_op_a_signed = 0;
simd_dotp_op_b_signed = 0;
simd_dotp_acc = 1;
res_sel = Simd;
end
riscv_instr::PV_SDOTUP_B: begin
simd_op = SimdDotp;
simd_size = Byte;
simd_signed = 0;
simd_dotp_op_a_signed = 0;
simd_dotp_op_b_signed = 0;
simd_dotp_acc = 1;
res_sel = Simd;
end
riscv_instr::PV_SDOTUP_SC_B: begin
simd_op = SimdDotp;
simd_size = Byte;
simd_mode = Sc;
simd_signed = 0;
simd_dotp_op_a_signed = 0;
simd_dotp_op_b_signed = 0;
simd_dotp_acc = 1;
res_sel = Simd;
end
riscv_instr::PV_SDOTUP_SCI_B: begin
simd_op = SimdDotp;
simd_size = Byte;
simd_mode = Sci;
simd_signed = 0;
simd_dotp_op_a_signed = 0;
simd_dotp_op_b_signed = 0;
simd_dotp_acc = 1;
res_sel = Simd;
end
riscv_instr::PV_SDOTUSP_H: begin
simd_op = SimdDotp;
simd_dotp_op_a_signed = 0;
simd_dotp_acc = 1;
res_sel = Simd;
end
riscv_instr::PV_SDOTUSP_SC_H: begin
simd_op = SimdDotp;
simd_mode = Sc;
simd_dotp_op_a_signed = 0;
simd_dotp_acc = 1;
res_sel = Simd;
end
riscv_instr::PV_SDOTUSP_SCI_H: begin
simd_op = SimdDotp;
simd_mode = Sci;
simd_dotp_op_a_signed = 0;
simd_dotp_acc = 1;
res_sel = Simd;
end
riscv_instr::PV_SDOTUSP_B: begin
simd_op = SimdDotp;
simd_size = Byte;
simd_dotp_op_a_signed = 0;
simd_dotp_acc = 1;
res_sel = Simd;
end
riscv_instr::PV_SDOTUSP_SC_B: begin
simd_op = SimdDotp;
simd_size = Byte;
simd_mode = Sc;
simd_dotp_op_a_signed = 0;
simd_dotp_acc = 1;
res_sel = Simd;
end
riscv_instr::PV_SDOTUSP_SCI_B: begin
simd_op = SimdDotp;
simd_size = Byte;
simd_mode = Sci;
simd_dotp_op_a_signed = 0;
simd_dotp_acc = 1;
res_sel = Simd;
end
riscv_instr::PV_SDOTSP_H: begin
simd_op = SimdDotp;
simd_dotp_acc = 1;
res_sel = Simd;
end
riscv_instr::PV_SDOTSP_SC_H: begin
simd_op = SimdDotp;
simd_mode = Sc;
simd_dotp_acc = 1;
res_sel = Simd;
end
riscv_instr::PV_SDOTSP_SCI_H: begin
simd_op = SimdDotp;
simd_mode = Sci;
simd_dotp_acc = 1;
res_sel = Simd;
end
riscv_instr::PV_SDOTSP_B: begin
simd_op = SimdDotp;
simd_size = Byte;
simd_dotp_acc = 1;
res_sel = Simd;
end
riscv_instr::PV_SDOTSP_SC_B: begin
simd_op = SimdDotp;
simd_size = Byte;
simd_mode = Sc;
simd_dotp_acc = 1;
res_sel = Simd;
end
riscv_instr::PV_SDOTSP_SCI_B: begin
simd_op = SimdDotp;
simd_size = Byte;
simd_mode = Sci;
simd_dotp_acc = 1;
res_sel = Simd;
end
riscv_instr::PV_SHUFFLE2_H: begin
simd_op = SimdShuffle;
res_sel = Simd;
end
riscv_instr::PV_SHUFFLE2_B: begin
simd_op = SimdShuffle;
simd_size = Byte;
res_sel = Simd;
end
default: ;
endcase
end
// ___ _ _____ _ ___ _ _____ _ _
// | \ /_\|_ _|/_\ | _ \ /_\|_ _|| || |
// | |) |/ _ \ | | / _ \ | _// _ \ | | | __ |
// |___//_/ \_\|_|/_/ \_\|_| /_/ \_\|_| |_||_|
//
// --------------------
// Clips
// --------------------
logic clip_use_n_bound;
logic [Width-1:0] clip_op_b_n, clip_op_b; // clip lower and upper bounds
logic [Width-1:0] clip_lower;
logic [Width-1:0] clip_comp;
// Generate -2^(imm5-1), 2^(imm5-1)-1 for clip/clipu and -rs2-1, rs2 for clipr, clipur
assign clip_lower = ({(Width+1){1'b1}} << $unsigned(imm5)) >> 1;
assign clip_op_b_n = clip_unsigned ? 'b0 : (clip_register ? ~op_b_i : clip_lower);
assign clip_op_b = clip_register ? op_b_i : ~clip_lower;
// is 1 when NOT(rs1 >= 0 AND clip_op_b >= 0), i.e. at least one operand is negative
assign clip_use_n_bound = op_a_i[Width-1] | clip_op_b[Width-1];
// Select operand to use in comparison for clip operations: clips would need two comparisons
// to clamp the result between the two bounds; but one comparison is enough if we select the
// second operand basing on op_a and clip_op_b signs (i.e. rs1 and clip upper bound, being
// either rs2 or 2^(imm5-1)-1)
assign clip_comp = clip_use_n_bound ? clip_op_b_n : clip_op_b;
// --------------------
// Shared comparator
// --------------------
logic [Width-1:0] cmp_op_a, cmp_op_b;
logic cmp_result;
// Comparator operand A assignment
assign cmp_op_a = op_a_i;
// Comparator operand B assignment
always_comb begin
unique case (cmp_op_b_sel)
Reg: cmp_op_b = op_b_i;
Zero: cmp_op_b = '0;
ClipBound: cmp_op_b = clip_comp;
default: cmp_op_b = '0;
endcase
end
// Instantiate comparator
assign cmp_result = $signed({cmp_op_a[Width-1] & cmp_signed, cmp_op_a}) <= $signed({cmp_op_b[Width-1] & cmp_signed, cmp_op_b});
// --------------------
// Multiplier & acc
// --------------------
// 32x32 into 32 bits multiplier & accumulator
logic [Width-1:0] mul_op_a;
logic [2*Width-1:0] mul_result;
logic [Width-1:0] mac_result;
assign mul_op_a = mac_msu ? -op_a_i : op_a_i; // op_a_i is sign-inverted if mac_msu=1, to have -op_a*op_b
// 32-bits input, 64-bits output multiplier
assign mul_result = $signed({mul_op_a[Width-1] & mul_op_a_sign, mul_op_a}) * $signed({op_b_i[Width-1] & mac_op_b_sign, op_b_i});
always_comb begin
unique case (mul_op)
MulLow: mac_result = mul_result[Width-1:0]; // mul, take lowest 32 bits
MulHigh: mac_result = mul_result[2*Width-1:Width]; // mul high, take highest 32 bits
MulMac: mac_result = op_c_i + mul_result[Width-1:0]; // accumulate
default: mac_result = '0;
endcase
end
// --------------------
// SIMD operations
// --------------------
logic [3:0][7:0] simd_op_a, simd_op_b, simd_op_c;
logic [1:0][7:0] simd_imm;
logic [3:0][7:0] simd_result;
// half-word and byte immediate extensions
always_comb
if(simd_signed) simd_imm = $signed(imm6);
else simd_imm = $unsigned(imm6);
// SIMD operands composition
always_comb begin
simd_op_a = 'b0;
simd_op_b = 'b0;
simd_op_c = 'b0;
unique case (simd_size)
// half-word granularity
HalfWord:
for (int i = 0; i < Width/16; i++) begin
simd_op_a[2*i +: 2] = op_a_i[16*i +: 16]; // operands A are the half-words of op_a_i
// operands B are the half-words of op_b_i, replicated lowest half-word of op_b_i or replicated 6-bit immediate
simd_op_b[2*i +: 2] = (simd_mode == Vect) ? op_b_i[16*i +: 16] : ((simd_mode == Sc) ? op_b_i[15:0] : simd_imm);
simd_op_c[2*i +: 2] = op_c_i[16*i +: 16]; // operands C are the half-words of op_c_i
end
// byte granularity
Byte:
for (int i = 0; i < Width/8; i++) begin
simd_op_a[i] = op_a_i[8*i +: 8]; // operands A are the bytes of op_a_i
// operands B are the bytes of op_b_i, replicated lowest byte of op_b_i or replicated 6-bit immediate
simd_op_b[i] = (simd_mode == Vect) ? op_b_i[8*i +: 8] : ((simd_mode == Sc) ? op_b_i[7:0] : simd_imm[0]);
simd_op_c[i] = op_c_i[8*i +: 8]; // operands C are the bytes of op_c_i
end
default: ;
endcase
end
// SIMD unit
always_comb begin
simd_result = 'b0;
unique case (simd_size)
// half-word granularity
HalfWord: begin
unique case (simd_op)
SimdAdd:
for (int i = 0; i < Width/16; i++)
simd_result[2*i +: 2] = $signed(simd_op_a[2*i +: 2]) + $signed(simd_op_b[2*i +: 2]);
SimdSub:
for (int i = 0; i < Width/16; i++)
simd_result[2*i +: 2] = $signed(simd_op_a[2*i +: 2]) - $signed(simd_op_b[2*i +: 2]);
SimdAvg:
for (int i = 0; i < Width/16; i++) begin
simd_result[2*i +: 2] = $signed(simd_op_a[2*i +: 2]) + $signed(simd_op_b[2*i +: 2]);
simd_result[2*i +: 2] = {simd_result[2*i+1][7] & simd_signed, simd_result[2*i +: 2]} >> 1;
end
SimdMin:
for (int i = 0; i < Width/16; i++)
simd_result[2*i +: 2] = $signed({simd_op_a[2*i+1][7] & simd_signed, simd_op_a[2*i +: 2]}) <=
$signed({simd_op_b[2*i+1][7] & simd_signed, simd_op_b[2*i +: 2]}) ?
simd_op_a[2*i +: 2] : simd_op_b[2*i +: 2];
SimdMax:
for (int i = 0; i < Width/16; i++)
simd_result[2*i +: 2] = $signed({simd_op_a[2*i+1][7] & simd_signed, simd_op_a[2*i +: 2]}) >
$signed({simd_op_b[2*i+1][7] & simd_signed, simd_op_b[2*i +: 2]}) ?
simd_op_a[2*i +: 2] : simd_op_b[2*i +: 2];
SimdSrl:
for (int i = 0; i < Width/16; i++)
simd_result[2*i +: 2] = $unsigned(simd_op_a[2*i +: 2]) >> simd_op_b[2*i][3:0];
SimdSra:
for (int i = 0; i < Width/16; i++)
simd_result[2*i +: 2] = $signed(simd_op_a[2*i +: 2]) >>> simd_op_b[2*i][3:0];
SimdSll:
for (int i = 0; i < Width/16; i++)
simd_result[2*i +: 2] = $unsigned(simd_op_a[2*i +: 2]) << simd_op_b[2*i][3:0];
SimdOr: simd_result = simd_op_a | simd_op_b;
SimdXor: simd_result = simd_op_a ^ simd_op_b;
SimdAnd: simd_result = simd_op_a & simd_op_b;
SimdAbs:
for (int i = 0; i < Width/16; i++)
simd_result[2*i +: 2] = $signed(simd_op_a[2*i +: 2]) > 0 ? simd_op_a[2*i +: 2] : -$signed(simd_op_a[2*i +: 2]);
SimdExt: begin
simd_result[1:0] = simd_op_a[2*imm6[0] +: 2];
// sign- or zero-extend
simd_result[3:2] = {16{simd_op_a[2*imm6[0]+1][7] & simd_signed}};
end
SimdIns: begin
simd_result = op_c_i;
simd_result[2*imm6[0] +: 2] = simd_op_a[1:0];
end
SimdDotp: begin
simd_result = op_c_i & {(Width){simd_dotp_acc}}; // accumulate on rd or start from zero
for (int i = 0; i < Width/16; i++) begin
simd_result = $signed(simd_result) + $signed({simd_op_a[2*i+1][7] & simd_dotp_op_a_signed, simd_op_a[2*i +: 2]}) *
$signed({simd_op_b[2*i+1][7] & simd_dotp_op_b_signed, simd_op_b[2*i +: 2]});
end
end
SimdShuffle:
for (int i = 0; i < Width/16; i++)
simd_result[2*i +: 2] = simd_op_b[2*i][1] ? simd_op_a[2*simd_op_b[2*i][0] +: 2] : simd_op_c[2*simd_op_b[2*i][0] +: 2];
default: ;
endcase
end
// byte granularity
Byte: begin
unique case (simd_op)
SimdAdd:
for (int i = 0; i < Width/8; i++)
simd_result[i] = $signed(simd_op_a[i]) + $signed(simd_op_b[i]);
SimdSub:
for (int i = 0; i < Width/8; i++)
simd_result[i] = $signed(simd_op_a[i]) - $signed(simd_op_b[i]);
SimdAvg:
for (int i = 0; i < Width/8; i++) begin
simd_result[i] = $signed(simd_op_a[i]) + $signed(simd_op_b[i]);
simd_result[i] = {simd_result[i][7] & simd_signed, simd_result[i]} >> 1;
end
SimdMin:
for (int i = 0; i < Width/8; i++)
simd_result[i] = $signed({simd_op_a[i][7] & simd_signed, simd_op_a[i]}) <=
$signed({simd_op_b[i][7] & simd_signed, simd_op_b[i]}) ?
simd_op_a[i] : simd_op_b[i];
SimdMax:
for (int i = 0; i < Width/8; i++)
simd_result[i] = $signed({simd_op_a[i][7] & simd_signed, simd_op_a[i]}) >
$signed({simd_op_b[i][7] & simd_signed, simd_op_b[i]}) ?
simd_op_a[i] : simd_op_b[i];
SimdSrl:
for (int i = 0; i < Width/8; i++)
simd_result[i] = $unsigned(simd_op_a[i]) >> simd_op_b[i][2:0];
SimdSra:
for (int i = 0; i < Width/8; i++)
simd_result[i] = $signed(simd_op_a[i]) >>> simd_op_b[i][2:0];
SimdSll:
for (int i = 0; i < Width/8; i++)
simd_result[i] = $unsigned(simd_op_a[i]) << simd_op_b[i][2:0];
SimdOr: simd_result = simd_op_a | simd_op_b;
SimdXor: simd_result = simd_op_a ^ simd_op_b;
SimdAnd: simd_result = simd_op_a & simd_op_b;
SimdAbs:
for (int i = 0; i < Width/8; i++)
simd_result[i] = $signed(simd_op_a[i]) > 0 ? simd_op_a[i] : -$signed(simd_op_a[i]);
SimdExt: begin
simd_result[0] = simd_op_a[imm6[1:0]];
// sign- or zero-extend
simd_result[3:1] = {24{simd_op_a[imm6[1:0]][7] & simd_signed}};
end
SimdIns: begin
simd_result = op_c_i;
simd_result[imm6[1:0]] = simd_op_a[0];
end
SimdDotp: begin
simd_result = op_c_i & {(Width){simd_dotp_acc}}; // accumulate on rd or start from zero
for (int i = 0; i < Width/8; i++)
simd_result = $signed(simd_result) + $signed({simd_op_a[i][7] & simd_dotp_op_a_signed, simd_op_a[i]}) *
$signed({simd_op_b[i][7] & simd_dotp_op_b_signed, simd_op_b[i]});
end
SimdShuffle:
for (int i = 0; i < Width/8; i++)
simd_result[i] = simd_op_b[i][2] ? simd_op_a[simd_op_b[i][1:0]] : simd_op_c[simd_op_b[i][1:0]];
default: ;
endcase
end
default: ;
endcase
end
// --------------------
// Result generation
// --------------------
always_comb begin
unique case (res_sel)
Abs: result_o = cmp_result ? -$signed(op_a_i) : op_a_i;
Sle: result_o = $unsigned(cmp_result);
Min: result_o = cmp_result ? op_a_i : op_b_i;
Max: result_o = ~cmp_result ? op_a_i : op_b_i;
Exths: result_o = $signed(op_a_i[15:0]);
Exthz: result_o = $unsigned(op_a_i[15:0]);
Extbs: result_o = $signed(op_a_i[7:0]);
Extbz: result_o = $unsigned(op_a_i[7:0]);
// Select the clip output basing on the result of the comparison and on the signs of the operands:
// - if rs1 <= clip_comp (i.e. cmp_result = 1)
// * if clip_comp=clip_op_b_n (i.e. rs1<0 or clip_op_b<0): rs1 is below the lower boundand since
// this check has priority over the others, result_o is clipped to clip_op_b_n
// * if clip_comp=clip_op_b (i.e. rs1>=0 and clip_op_b>=0): since rs1<=clip_op_b, then it is
// clip_op_b_n < 0 <= rs1 <= clip_op_b thus rs1 is already within the clip bounds
// - if rs1 > clip_comp (i.e. cmp_result = 0)
// * if rs1 < 0: clip_comp=clip_op_b_n because clip_use_n_bound=1; since rs1>clip_op_b_n and
// rs1<0 it is clip_op_b_n < rs1 < 0 <= clip_op_b, thus rs1 is already within the clip bounds
// * if rs1 >= 0: then clip_comp might be clip_op_b_n or clip_op_b basing on clip_op_b sign;
// + if clip_op_b < 0: clip_comp=clip_op_b_n, so rs1>clip_op_b_n but also rs1 >= 0, so it is
// clip_op_b < 0 <= clip_op_n <= rs1; then rs1 is not <= clip_ob_n but it is >= clip_op_b,
// so result_o is clipped to clip_op_b
// + if clip_op_b >= 0: clip_comp=clip_op_b (i.e. rs1>=0 and clip_op_b>=0) and the result must
// be clipped to the upper bound since rs1 > clip_op_b
Clip: result_o = cmp_result ? (clip_use_n_bound ? clip_op_b_n : op_a_i) : (op_a_i[Width-1] ? op_a_i : clip_op_b);
Mac: result_o = mac_result;
Simd: result_o = simd_result;
default: result_o = '0;
endcase
end
endmodule
// Copyright 2020 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
// Description: Load Store Unit (can handle `NumOutstandingLoads` outstanding loads) and
// optionally NaNBox if used in a floating-point setting.
// It supports out-of-order memory responses via metadata linking with IDs.
module snitch_lsu
import cf_math_pkg::idx_width;
#(
parameter type tag_t = logic [4:0],
parameter int unsigned NumOutstandingLoads = 1,
parameter bit NaNBox = 0,
// Dependent parameters. DO NOT CHANGE.
localparam int unsigned IdWidth = idx_width(NumOutstandingLoads)
) (
input logic clk_i,
input logic rst_i,
// request channel
input tag_t lsu_qtag_i,
input logic lsu_qwrite,
input logic lsu_qsigned,
input logic [31:0] lsu_qaddr_i,
input logic [31:0] lsu_qdata_i,
input logic [1:0] lsu_qsize_i,
input logic [3:0] lsu_qamo_i,
input logic lsu_qvalid_i,
output logic lsu_qready_o,
// response channel
output logic [31:0] lsu_pdata_o,
output tag_t lsu_ptag_o,
output logic lsu_perror_o,
output logic lsu_pvalid_o,
input logic lsu_pready_i,
// Memory Interface Channel
output logic [31:0] data_qaddr_o,
output logic data_qwrite_o,
output logic [3:0] data_qamo_o,
output logic [31:0] data_qdata_o,
output logic [3:0] data_qstrb_o,
output logic [IdWidth-1:0] data_qid_o,
output logic data_qvalid_o,
input logic data_qready_i,
input logic [31:0] data_pdata_i,
input logic data_perror_i,
input logic [IdWidth-1:0] data_pid_i,
input logic data_pvalid_i,
output logic data_pready_o
);
// ----------------
// TYPEDEFS
// ----------------
typedef logic [IdWidth-1:0] meta_id_t;
typedef struct packed {
tag_t tag;
logic sign_ext;
logic [1:0] offset;
logic [1:0] size;
} metadata_t;
// ----------------
// SIGNALS
// ----------------
// ID Table
logic [NumOutstandingLoads-1:0] id_available_d, id_available_q;
metadata_t [NumOutstandingLoads-1:0] metadata_d, metadata_q;
metadata_t req_metadata;
metadata_t resp_metadata;
meta_id_t req_id;
meta_id_t resp_id;
logic id_table_push;
logic id_table_pop;
logic id_table_full;
// Response
logic [31:0] ld_result;
// ----------------
// ID TABLE
// ----------------
// Track ID availability and store metadata
always_comb begin
// Default
id_available_d = id_available_q;
metadata_d = metadata_q;
// Take ID and store metadata
if (id_table_push) begin
id_available_d[req_id] = 1'b0;
metadata_d[req_id] = req_metadata;
end
// Free ID
if (id_table_pop) begin
id_available_d[resp_id] = 1'b1;
end
end
assign req_metadata = '{
tag: lsu_qtag_i,
sign_ext: lsu_qsigned,
offset: lsu_qaddr_i[1:0],
size: lsu_qsize_i
};
assign resp_metadata = metadata_q[resp_id];
// Search available ID for request
lzc #(
.WIDTH ( NumOutstandingLoads )
) i_req_id (
.in_i ( id_available_q ),
.cnt_o ( req_id ),
.empty_o( id_table_full )
);
// Pop if response accepted
assign id_table_pop = data_pvalid_i & data_pready_o;
// Push if load request accepted
assign id_table_push = ~lsu_qwrite & data_qready_i & data_qvalid_o;
// ----------------
// REQUEST
// ----------------
// only make a request when we got a valid request and if it is a load
// also check that we can actually store the necessary information to process
// it in the upcoming cycle(s).
assign data_qvalid_o = (lsu_qvalid_i) & (lsu_qwrite | ~id_table_full);
assign data_qwrite_o = lsu_qwrite;
assign data_qaddr_o = {lsu_qaddr_i[31:2], 2'b0};
assign data_qamo_o = lsu_qamo_i;
assign data_qid_o = req_id;
// generate byte enable mask
always_comb begin
unique case (lsu_qsize_i)
2'b00: data_qstrb_o = (4'b1 << lsu_qaddr_i[1:0]);
2'b01: data_qstrb_o = (4'b11 << lsu_qaddr_i[1:0]);
2'b10: data_qstrb_o = '1;
default: data_qstrb_o = '0;
endcase
end
// re-align write data
/* verilator lint_off WIDTH */
always_comb begin
unique case (lsu_qaddr_i[1:0])
2'b00: data_qdata_o = lsu_qdata_i;
2'b01: data_qdata_o = {lsu_qdata_i[23:0], lsu_qdata_i[31:24]};
2'b10: data_qdata_o = {lsu_qdata_i[15:0], lsu_qdata_i[31:16]};
2'b11: data_qdata_o = {lsu_qdata_i[ 7:0], lsu_qdata_i[31: 8]};
default: data_qdata_o = lsu_qdata_i;
endcase
end
/* verilator lint_on WIDTH */
// the interface didn't accept our request yet
assign lsu_qready_o = ~(data_qvalid_o & ~data_qready_i) & ~id_table_full;
// ----------------
// RESPONSE
// ----------------
// Return Path
// shift the load data back by offset bytes
logic [31:0] shifted_data;
assign shifted_data = data_pdata_i >> {resp_metadata.offset, 3'b000};
always_comb begin
unique case (resp_metadata.size)
2'b00: ld_result = {{24{shifted_data[ 7] & resp_metadata.sign_ext}}, shifted_data[7:0]};
2'b01: ld_result = {{16{shifted_data[15] & resp_metadata.sign_ext}}, shifted_data[15:0]};
2'b10: ld_result = shifted_data;
default: ld_result = shifted_data;
endcase
end
assign resp_id = data_pid_i;
assign lsu_perror_o = data_perror_i;
assign lsu_pdata_o = ld_result;
assign lsu_ptag_o = resp_metadata.tag;
assign lsu_pvalid_o = data_pvalid_i;
assign data_pready_o = lsu_pready_i;
// ----------------
// SEQUENTIAL
// ----------------
always_ff @(posedge clk_i or posedge rst_i) begin
if (rst_i) begin
id_available_q <= '1;
metadata_q <= 'b0;
end else begin
id_available_q <= id_available_d;
metadata_q <= metadata_d;
end
end
// ----------------
// ASSERTIONS
// ----------------
// Check for unsupported parameters
if (NumOutstandingLoads == 0)
$error(1, "[snitch_lsu] NumOutstandingLoads cannot be 0.");
// pragma translate_off
`ifndef VERILATOR
invalid_req_id : assert property(
@(posedge clk_i) disable iff (rst_i) (!(id_table_push & ~id_available_q[req_id])))
else $fatal (1, "Request ID is not available.");
`endif
`ifndef VERILATOR
invalid_resp_id : assert property(
@(posedge clk_i) disable iff (rst_i) (!(id_table_pop & id_available_q[resp_id])))
else $fatal (1, "Response ID does not match with valid metadata.");
`endif
// pragma translate_on
endmodule
// Copyright 2020 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
/// Hardware implementation of SystemVerilog's `$onehot()` function.
/// It uses a tree of half adders and a separate
/// or reduction tree for the carry.
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
// Author: Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
module onehot #(
parameter int unsigned Width = 4
) (
input logic [Width-1:0] d_i,
output logic is_onehot_o
);
// trivial base case
if (Width == 1) begin : gen_degenerated_onehot
assign is_onehot_o = d_i;
end else begin : gen_onehot
localparam int LVLS = $clog2(Width) + 1;
logic [LVLS-1:0][2**(LVLS-1)-1:0] sum, carry;
logic [LVLS-2:0] carry_array;
// Extend to a power of two.
assign sum[0] = $unsigned(d_i);
// generate half adders for each lvl
// lvl 0 is the input level
for (genvar i = 1; i < LVLS; i++) begin
localparam LVL_WIDTH = 2**LVLS / 2**i;
for (genvar j = 0; j < LVL_WIDTH; j+=2) begin
assign sum[i][j/2] = sum[i-1][j] ^ sum[i-1][j+1];
assign carry[i][j/2] = sum[i-1][j] & sum[i-1][j+1];
end
// generate carry tree
assign carry_array[i-1] = |carry[i][LVL_WIDTH/2-1:0];
end
assign is_onehot_o = sum[LVLS-1][0] & ~|carry_array;
end
endmodule
// Copyright 2020 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
/// Snitch Configuration.
package snitch_pkg;
import cf_math_pkg::idx_width;
localparam DataWidth = 32;
localparam StrbWidth = DataWidth/8;
localparam int NumFPOutstandingLoads = 4;
// Use a high number of outstanding loads, if running a latency-throughput analysis
localparam int NumIntOutstandingLoads = `ifdef TRAFFIC_GEN 2048 `else 8 `endif;
localparam MetaIdWidth = idx_width(NumIntOutstandingLoads);
// Xpulpimg extension enabled?
localparam bit XPULPIMG = `ifdef XPULPIMG `XPULPIMG `else 1'bX `endif;
typedef logic [31:0] addr_t;
typedef logic [DataWidth-1:0] data_t;
typedef logic [StrbWidth-1:0] strb_t;
typedef logic [MetaIdWidth-1:0] meta_id_t;
typedef struct packed {
addr_t BootAddress;
int unsigned NrCores;
} SnitchCfg;
typedef struct packed {
addr_t addr;
meta_id_t id;
logic [3:0] amo;
logic write;
data_t data;
strb_t strb;
} dreq_t;
typedef struct packed {
data_t data;
meta_id_t id;
logic error;
} dresp_t;
typedef struct packed {
addr_t addr;
logic [4:0] id;
logic [31:0] data_op;
data_t data_arga;
data_t data_argb;
data_t data_argc;
} acc_req_t;
typedef struct packed {
logic [4:0] id;
logic error;
data_t data;
} acc_resp_t;
// Number of instructions the sequencer can hold
localparam int FPUSequencerInstr = 16;
// SSRs
localparam logic [31:0] SSR_ADDR_BASE = 32'h20_4800;
localparam logic [31:0] SSR_ADDR_MASK = 32'hffff_fe00;
localparam logic [11:0] CSR_SSR = 12'h7C0;
localparam int SSRNrCredits = 4;
// Registers which are used as SSRs
localparam [4:0] FT0 = 5'b0;
localparam [4:0] FT1 = 5'b1;
localparam [1:0][4:0] SSRRegs = {FT1, FT0};
function automatic logic is_ssr(logic [4:0] register);
unique case (register)
FT0, FT1: return 1'b1;
default : return 0;
endcase
endfunction
// Amount of address bit which should be used for accesses from the SoC side.
// This effectively determines the Address Space of a Snitch Cluster.
localparam logic [31:0] SoCRequestAddrBits = 32;
// Address Map
// TCDM, everything below 0x4000_0000
localparam logic [31:0] TCDMStartAddress = 32'h0000_0000;
localparam logic [31:0] TCDMMask = '1 << 28;
// Slaves on Cluster AXI Bus
typedef enum integer {
TCDM = 0,
ClusterPeripherals = 1,
SoC = 2
} cluster_slave_e;
typedef enum integer {
CoreReq = 0,
ICache = 1,
AXISoC = 2
} cluster_master_e;
localparam int unsigned NrSlaves = 3;
localparam int unsigned NrMasters = 3;
localparam int IdWidth = 2;
localparam int IdWidthSlave = $clog2(NrMasters) + IdWidth;
// 3. SoC 2. Cluster Peripherals 3. TCDM
localparam logic [NrSlaves-1:0][31:0] StartAddress = {32'h8000_0000, 32'h4000_0000, TCDMStartAddress};
localparam logic [NrSlaves-1:0][31:0] EndAddress = {32'hFFFF_FFFF, 32'h5000_0000, TCDMStartAddress + 32'h1000_0000};
localparam logic [NrSlaves-1:0] ValidRule = {{NrSlaves}{1'b1}};
// Cluster Peripheral Registers
typedef enum logic [31:0] {
TCDMStartAddressReg = 32'h4000_0000,
TCDMEndAddressReg = 32'h4000_0008,
NrCoresReg = 32'h4000_0010,
FetchEnableReg = 32'h4000_0018,
ScratchReg = 32'h4000_0020,
WakeUpReg = 32'h4000_0028,
CycleCountReg = 32'h4000_0030,
BarrierReg = 32'h4000_0038,
TcdmAccessedReg = 32'h4000_FFF0,
TcdmCongestedReg = 32'h4000_FFF8,
PerfCounterBase = 32'h4001_0000
} cluster_peripheral_addr_e;
// Offload to shared accelerator
function automatic logic shared_offload (logic [31:0] instr);
logic offload;
unique casez (instr)
riscv_instr::MUL,
riscv_instr::MULH,
riscv_instr::MULHSU,
riscv_instr::MULHU,
riscv_instr::DIV,
riscv_instr::DIVU,
riscv_instr::REM,
riscv_instr::REMU,
riscv_instr::MULW,
riscv_instr::DIVW,
riscv_instr::DIVUW,
riscv_instr::REMW,
riscv_instr::REMUW: offload = 1;
default: offload = 0;
endcase
return offload;
endfunction
// Event strobes per core, counted by the performance counters in the cluster
// peripherals.
typedef struct packed {
logic issue_fpu; // core operations performed in the FPU
logic issue_fpu_seq; // includes load/store operations
logic issue_core_to_fpu; // instructions issued from core to FPU
logic retired_insts; // number of instructions retired by the core
} core_events_t;
endpackage
// Copyright 2020 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
// Description: Variable Register File
module snitch_regfile #(
parameter DATA_WIDTH = 32,
parameter NR_READ_PORTS = 2,
parameter NR_WRITE_PORTS = 1,
parameter ZERO_REG_ZERO = 0,
parameter ADDR_WIDTH = 4
) (
// clock and reset
input logic clk_i,
// read port
input logic [NR_READ_PORTS-1:0][ADDR_WIDTH-1:0] raddr_i,
output logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o,
// write port
input logic [NR_WRITE_PORTS-1:0][ADDR_WIDTH-1:0] waddr_i,
input logic [NR_WRITE_PORTS-1:0][DATA_WIDTH-1:0] wdata_i,
input logic [NR_WRITE_PORTS-1:0] we_i
);
localparam NUM_WORDS = 2**ADDR_WIDTH;
logic [NUM_WORDS-1:0][DATA_WIDTH-1:0] mem;
logic [NR_WRITE_PORTS-1:0][NUM_WORDS-1:0] we_dec;
always_comb begin : we_decoder
for (int unsigned j = 0; j < NR_WRITE_PORTS; j++) begin
for (int unsigned i = 0; i < NUM_WORDS; i++) begin
if (waddr_i[j] == i) we_dec[j][i] = we_i[j];
else we_dec[j][i] = 1'b0;
end
end
end
// loop from 1 to NUM_WORDS-1 as R0 is nil
always_ff @(posedge clk_i) begin : register_write_behavioral
for (int unsigned j = 0; j < NR_WRITE_PORTS; j++) begin
for (int unsigned i = 0; i < NUM_WORDS; i++) begin
if (we_dec[j][i]) begin
mem[i] <= wdata_i[j];
end
end
if (ZERO_REG_ZERO) begin
mem[0] <= '0;
end
end
end
for (genvar i = 0; i < NR_READ_PORTS; i++) begin
assign rdata_o[i] = mem[raddr_i[i]];
end
endmodule
// Copyright 2020 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
/// Shared Multiply/Divide a.k.a M Extension
/// Based on Ariane Multiply Divide
/// Author: Michael Schaffner, <schaffner@iis.ee.ethz.ch>
/// Author: Florian Zaruba , <zarubaf@iis.ee.ethz.ch>
module snitch_shared_muldiv #(
parameter int unsigned IdWidth = 5
) (
input logic clk_i,
input logic rst_i,
// Accelerator Interface - Slave
input logic [31:0] acc_qaddr_i, // unused
input logic [IdWidth-1:0] acc_qid_i,
input logic [31:0] acc_qdata_op_i, // RISC-V instruction
input logic [31:0] acc_qdata_arga_i,
input logic [31:0] acc_qdata_argb_i,
input logic [31:0] acc_qdata_argc_i,
input logic acc_qvalid_i,
output logic acc_qready_o,
output logic [31:0] acc_pdata_o,
output logic [IdWidth-1:0] acc_pid_o,
output logic acc_perror_o,
output logic acc_pvalid_o,
input logic acc_pready_i
);
`include "common_cells/registers.svh"
typedef struct packed {
logic [31:0] result;
logic [IdWidth-1:0] id;
} result_t;
// input handshake
logic div_valid_op, div_ready_op;
logic mul_valid_op, mul_ready_op;
// output handshake
logic mul_valid, mul_ready;
logic div_valid, div_ready;
result_t div, mul, oup;
logic illegal_instruction;
always_comb begin
mul_valid_op = 1'b0;
div_valid_op = 1'b0;
acc_qready_o = 1'b0;
acc_perror_o = 1'b0;
illegal_instruction = 1'b0;
unique casez (acc_qdata_op_i)
riscv_instr::MUL,
riscv_instr::MULH,
riscv_instr::MULHSU,
riscv_instr::MULHU: begin
mul_valid_op = acc_qvalid_i;
acc_qready_o = mul_ready_op;
end
riscv_instr::DIV,
riscv_instr::DIVU,
riscv_instr::REM,
riscv_instr::REMU: begin
div_valid_op = acc_qvalid_i;
acc_qready_o = div_ready_op;
end
default: illegal_instruction = 1'b1;
endcase
end
// Multiplication
multiplier #(
.Width ( 32 ),
.IdWidth ( IdWidth )
) i_multiplier (
.clk_i,
.rst_i,
.id_i ( acc_qid_i ),
.operator_i ( acc_qdata_op_i ),
.operand_a_i ( acc_qdata_arga_i ),
.operand_b_i ( acc_qdata_argb_i ),
.valid_i ( mul_valid_op ),
.ready_o ( mul_ready_op ),
.result_o ( mul.result ),
.valid_o ( mul_valid ),
.ready_i ( mul_ready ),
.id_o ( mul.id )
);
// Serial Divider
serdiv #(
.WIDTH ( 32 ),
.IdWidth ( IdWidth )
) i_div (
.clk_i ( clk_i ),
.rst_ni ( ~rst_i ),
.id_i ( acc_qid_i ),
.operator_i ( acc_qdata_op_i ),
.op_a_i ( acc_qdata_arga_i ),
.op_b_i ( acc_qdata_argb_i ),
.in_vld_i ( div_valid_op ),
.in_rdy_o ( div_ready_op ),
.out_vld_o ( div_valid ),
.out_rdy_i ( div_ready ),
.id_o ( div.id ),
.res_o ( div.result )
);
// Output Arbitration
stream_arbiter #(
.DATA_T ( result_t ),
.N_INP ( 2 )
) i_stream_arbiter (
.clk_i,
.rst_ni ( ~rst_i ),
.inp_data_i ( {div, mul} ),
.inp_valid_i ( {div_valid, mul_valid} ),
.inp_ready_o ( {div_ready, mul_ready} ),
.oup_data_o ( oup ),
.oup_valid_o ( acc_pvalid_o ),
.oup_ready_i ( acc_pready_i )
);
assign acc_pdata_o = oup.result;
assign acc_pid_o = oup.id;
endmodule
module multiplier #(
parameter int unsigned Width = 64,
parameter int unsigned IdWidth = 5
) (
input logic clk_i,
input logic rst_i,
input logic [IdWidth-1:0] id_i,
input logic [31:0] operator_i,
input logic [Width-1:0] operand_a_i,
input logic [Width-1:0] operand_b_i,
input logic valid_i,
output logic ready_o,
output logic [Width-1:0] result_o,
output logic valid_o,
input logic ready_i,
output logic [IdWidth-1:0] id_o
);
// Pipeline register
logic [IdWidth-1:0] id_q;
logic valid_d, valid_q;
logic select_upper_q, select_upper_d;
logic [2*Width-1:0] result_d, result_q;
// control registers
logic sign_a, sign_b;
// control signals
assign ready_o = ~valid_o | ready_i;
// datapath
logic [2*Width-1:0] mult_result;
assign mult_result = $signed({operand_a_i[Width-1] & sign_a, operand_a_i}) * $signed({operand_b_i[Width-1] & sign_b, operand_b_i});
// Sign Select MUX
always_comb begin
sign_a = 1'b0;
sign_b = 1'b0;
unique casez (operator_i)
riscv_instr::MULH: begin
sign_a = 1'b1;
sign_b = 1'b1;
select_upper_d = 1'b1;
end
riscv_instr::MULHU: begin
select_upper_d = 1'b1;
end
riscv_instr::MULHSU: begin
sign_a = 1'b1;
select_upper_d = 1'b1;
end
// MUL performs an XLEN-bit × XLEN-bit multiplication and places the lower XLEN bits in the destination register
default: begin // including MUL
select_upper_d = 1'b0;
end
endcase
end
// single stage version
assign result_d = $signed({operand_a_i[Width-1] & sign_a, operand_a_i}) *
$signed({operand_b_i[Width-1] & sign_b, operand_b_i});
// ressult mux
always_comb begin
result_o = result_q[Width-1:0];
if (select_upper_q) begin
result_o = result_q[2*Width-1:Width];
end
end
always_comb begin
valid_d = valid_q;
if (valid_q & ready_i)
valid_d = 0;
if (valid_i & ready_o)
valid_d = 1;
end
`FFAR(valid_q, valid_d, '0, clk_i, rst_i)
// Pipe-line registers
`FFLAR(id_q, id_i, (valid_i & ready_o), '0, clk_i, rst_i)
`FFLAR(result_q, result_d, (valid_i & ready_o), '0, clk_i, rst_i)
`FFLAR(select_upper_q, select_upper_d, (valid_i & ready_o), '0, clk_i, rst_i)
assign id_o = id_q;
assign valid_o = valid_q;
endmodule
module serdiv #(
parameter WIDTH = 64,
parameter int unsigned IdWidth = 5
) (
input logic clk_i,
input logic rst_ni,
// input IF
input logic [IdWidth-1:0] id_i,
input logic [31:0] operator_i,
input logic [WIDTH-1:0] op_a_i,
input logic [WIDTH-1:0] op_b_i,
// handshake
input logic in_vld_i, // there is a cycle delay from in_rdy_o->in_vld_i, see issue_read_operands.sv stage
output logic in_rdy_o,
// output IF
output logic out_vld_o,
input logic out_rdy_i,
output logic [IdWidth-1:0] id_o,
output logic [WIDTH-1:0] res_o
);
logic signed_op;
logic rem;
always_comb begin
signed_op = 1'b0;
rem = 1'b0;
unique casez (operator_i)
riscv_instr::DIV: begin
signed_op = 1'b1;
end
riscv_instr::DIVU: begin
end
riscv_instr::REM: begin
signed_op = 1'b1;
rem = 1'b1;
end
riscv_instr::REMU: begin
rem = 1'b1;
end
default:;
endcase
end
enum logic [1:0] {
IDLE, DIVIDE, FINISH
} state_d, state_q;
logic [WIDTH-1:0] res_q, res_d;
logic [WIDTH-1:0] op_a_q, op_a_d;
logic [WIDTH-1:0] op_b_q, op_b_d;
logic op_a_sign, op_b_sign;
logic op_b_zero, op_b_zero_q, op_b_zero_d;
logic [IdWidth-1:0] id_q, id_d;
logic rem_sel_d, rem_sel_q;
logic comp_inv_d, comp_inv_q;
logic res_inv_d, res_inv_q;
logic [WIDTH-1:0] add_mux;
logic [WIDTH-1:0] add_out;
logic [WIDTH-1:0] add_tmp;
logic [WIDTH-1:0] b_mux;
logic [WIDTH-1:0] out_mux;
logic [$clog2(WIDTH+1)-1:0] cnt_q, cnt_d;
logic cnt_zero;
logic [WIDTH-1:0] lzc_a_input, lzc_b_input, op_b;
logic [$clog2(WIDTH)-1:0] lzc_a_result, lzc_b_result;
logic [$clog2(WIDTH+1)-1:0] shift_a;
logic [$clog2(WIDTH+1):0] div_shift;
logic a_reg_en, b_reg_en, res_reg_en, ab_comp, pm_sel, load_en;
logic lzc_a_no_one, lzc_b_no_one;
logic div_res_zero_d, div_res_zero_q;
/////////////////////////////////////
// align the input operands
// for faster division
/////////////////////////////////////
assign op_b_zero = (op_b_i == 0);
assign op_a_sign = op_a_i[$high(op_a_i)];
assign op_b_sign = op_b_i[$high(op_b_i)];
assign lzc_a_input = (signed_op & op_a_sign) ? {~op_a_i, 1'b0} : op_a_i;
assign lzc_b_input = (signed_op & op_b_sign) ? ~op_b_i : op_b_i;
lzc #(
.MODE ( 1 ), // count leading zeros
.WIDTH ( WIDTH )
) i_lzc_a (
.in_i ( lzc_a_input ),
.cnt_o ( lzc_a_result ),
.empty_o ( lzc_a_no_one )
);
lzc #(
.MODE ( 1 ), // count leading zeros
.WIDTH ( WIDTH )
) i_lzc_b (
.in_i ( lzc_b_input ),
.cnt_o ( lzc_b_result ),
.empty_o ( lzc_b_no_one )
);
assign shift_a = (lzc_a_no_one) ? WIDTH : lzc_a_result;
assign div_shift = (lzc_b_no_one) ? WIDTH : lzc_b_result-shift_a;
assign op_b = op_b_i <<< $unsigned(div_shift);
// the division is zero if |opB| > |opA| and can be terminated
assign div_res_zero_d = (load_en) ? ($signed(div_shift) < 0) : div_res_zero_q;
/////////////////////////////////////
// Datapath
/////////////////////////////////////
assign pm_sel = load_en & ~(signed_op & (op_a_sign ^ op_b_sign));
// muxes
assign add_mux = (load_en) ? op_a_i : op_b_q;
// attention: logical shift by one in case of negative operand B!
assign b_mux = (load_en) ? op_b : {comp_inv_q, (op_b_q[$high(op_b_q):1])};
// in case of bad timing, we could output from regs -> needs a cycle more in the FSM
assign out_mux = (rem_sel_q) ? op_a_q : res_q;
// invert if necessary
assign res_o = (res_inv_q) ? -$signed(out_mux) : out_mux;
// main comparator
assign ab_comp = ((op_a_q == op_b_q) | ((op_a_q > op_b_q) ^ comp_inv_q)) & ((|op_a_q) | op_b_zero_q);
// main adder
assign add_tmp = (load_en) ? 0 : op_a_q;
assign add_out = (pm_sel) ? add_tmp + add_mux : add_tmp - $signed(add_mux);
/////////////////////////////////////
// FSM, counter
/////////////////////////////////////
assign cnt_zero = (cnt_q == 0);
assign cnt_d = (load_en) ? div_shift :
(~cnt_zero) ? cnt_q - 1 : cnt_q;
always_comb begin : p_fsm
// default
state_d = state_q;
in_rdy_o = 1'b0;
out_vld_o = 1'b0;
load_en = 1'b0;
a_reg_en = 1'b0;
b_reg_en = 1'b0;
res_reg_en = 1'b0;
unique case (state_q)
IDLE: begin
in_rdy_o = 1'b1;
if (in_vld_i) begin
a_reg_en = 1'b1;
b_reg_en = 1'b1;
load_en = 1'b1;
state_d = DIVIDE;
end
end
DIVIDE: begin
if (!div_res_zero_q) begin
a_reg_en = ab_comp;
b_reg_en = 1'b1;
res_reg_en = 1'b1;
end
// can end the division now if the result is clearly 0
if (div_res_zero_q) begin
out_vld_o = 1'b1;
state_d = FINISH;
if (out_rdy_i) begin
state_d = IDLE;
end
end else if (cnt_zero) begin
state_d = FINISH;
end
end
FINISH: begin
out_vld_o = 1'b1;
if (out_rdy_i) begin
state_d = IDLE;
end
end
default : state_d = IDLE;
endcase
end
/////////////////////////////////////
// regs, flags
/////////////////////////////////////
// get flags
assign rem_sel_d = (load_en) ? rem : rem_sel_q;
assign comp_inv_d = (load_en) ? signed_op & op_b_sign : comp_inv_q;
assign op_b_zero_d = (load_en) ? op_b_zero : op_b_zero_q;
assign res_inv_d = (load_en) ? (~op_b_zero | rem) & signed_op & (op_a_sign ^ op_b_sign) : res_inv_q;
// transaction id
assign id_d = (load_en) ? id_i : id_q;
assign id_o = id_q;
assign op_a_d = (a_reg_en) ? add_out : op_a_q;
assign op_b_d = (b_reg_en) ? b_mux : op_b_q;
assign res_d = (load_en) ? '0 :
(res_reg_en) ? {res_q[$high(res_q)-1:0], ab_comp} : res_q;
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if (!rst_ni) begin
state_q <= IDLE;
op_a_q <= '0;
op_b_q <= '0;
res_q <= '0;
cnt_q <= '0;
id_q <= '0;
rem_sel_q <= 1'b0;
comp_inv_q <= 1'b0;
res_inv_q <= 1'b0;
op_b_zero_q <= 1'b0;
div_res_zero_q <= 1'b0;
end else begin
state_q <= state_d;
op_a_q <= op_a_d;
op_b_q <= op_b_d;
res_q <= res_d;
cnt_q <= cnt_d;
id_q <= id_d;
rem_sel_q <= rem_sel_d;
comp_inv_q <= comp_inv_d;
res_inv_q <= res_inv_d;
op_b_zero_q <= op_b_zero_d;
div_res_zero_q <= div_res_zero_d;
end
end
endmodule
// Copyright 2018 ETH Zurich and University of Bologna.
//
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
/// Wrapper around the flushable spill register to maintain back-ward
/// compatibility.
module spill_register #(
parameter type T = logic,
parameter bit Bypass = 1'b0 // make this spill register transparent
) (
input logic clk_i ,
input logic rst_ni ,
input logic valid_i ,
output logic ready_o ,
input T data_i ,
output logic valid_o ,
input logic ready_i ,
output T data_o
);
spill_register_flushable #(
.T(T),
.Bypass(Bypass)
) spill_register_flushable_i (
.clk_i,
.rst_ni,
.valid_i,
.flush_i(1'b0),
.ready_o,
.data_i,
.valid_o,
.ready_i,
.data_o
);
endmodule
// Copyright 2021 ETH Zurich and University of Bologna.
//
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Fabian Schuiki <fschuiki@iis.ee.ethz.ch>
/// A register with handshakes that completely cuts any combinational paths
/// between the input and output. This spill register can be flushed.
module spill_register_flushable #(
parameter type T = logic,
parameter bit Bypass = 1'b0 // make this spill register transparent
) (
input logic clk_i ,
input logic rst_ni ,
input logic valid_i ,
input logic flush_i ,
output logic ready_o ,
input T data_i ,
output logic valid_o ,
input logic ready_i ,
output T data_o
);
if (Bypass) begin : gen_bypass
assign valid_o = valid_i;
assign ready_o = ready_i;
assign data_o = data_i;
end else begin : gen_spill_reg
// The A register.
T a_data_q;
logic a_full_q;
logic a_fill, a_drain;
always_ff @(posedge clk_i or negedge rst_ni) begin : ps_a_data
if (!rst_ni)
a_data_q <= '0;
else if (a_fill)
a_data_q <= data_i;
end
always_ff @(posedge clk_i or negedge rst_ni) begin : ps_a_full
if (!rst_ni)
a_full_q <= 0;
else if (a_fill || a_drain)
a_full_q <= a_fill;
end
// The B register.
T b_data_q;
logic b_full_q;
logic b_fill, b_drain;
always_ff @(posedge clk_i or negedge rst_ni) begin : ps_b_data
if (!rst_ni)
b_data_q <= '0;
else if (b_fill)
b_data_q <= a_data_q;
end
always_ff @(posedge clk_i or negedge rst_ni) begin : ps_b_full
if (!rst_ni)
b_full_q <= 0;
else if (b_fill || b_drain)
b_full_q <= b_fill;
end
// Fill the A register when the A or B register is empty. Drain the A register
// whenever it is full and being filled, or if a flush is requested.
assign a_fill = valid_i && ready_o && (!flush_i);
assign a_drain = (a_full_q && !b_full_q) || flush_i;
// Fill the B register whenever the A register is drained, but the downstream
// circuit is not ready. Drain the B register whenever it is full and the
// downstream circuit is ready, or if a flush is requested.
assign b_fill = a_drain && (!ready_i) && (!flush_i);
assign b_drain = (b_full_q && ready_i) || flush_i;
// We can accept input as long as register B is not full.
// Note: flush_i and valid_i must not be high at the same time,
// otherwise an invalid handshake may occur
assign ready_o = !a_full_q || !b_full_q;
// The unit provides output as long as one of the registers is filled.
assign valid_o = a_full_q | b_full_q;
// We empty the spill register before the slice register.
assign data_o = b_full_q ? b_data_q : a_data_q;
// pragma translate_off
`ifndef VERILATOR
flush_valid : assert property (
@(posedge clk_i) disable iff (~rst_ni) (flush_i |-> ~valid_i)) else
$warning("Trying to flush and feed the spill register simultaneously. You will lose data!");
`endif
// pragma translate_on
end
endmodule
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
// Stream arbiter: Arbitrates a parametrizable number of input streams (i.e., valid-ready
// handshaking with dependency rules as in AXI4) to a single output stream. Once `oup_valid_o` is
// asserted, `oup_data_o` remains invariant until the output handshake has occurred. The
// arbitration scheme is round-robin with "look ahead", see the `rrarbiter` for details.
module stream_arbiter #(
parameter type DATA_T = logic, // Vivado requires a default value for type parameters.
parameter integer N_INP = -1, // Synopsys DC requires a default value for parameters.
parameter ARBITER = "rr" // "rr" or "prio"
) (
input logic clk_i,
input logic rst_ni,
input DATA_T [N_INP-1:0] inp_data_i,
input logic [N_INP-1:0] inp_valid_i,
output logic [N_INP-1:0] inp_ready_o,
output DATA_T oup_data_o,
output logic oup_valid_o,
input logic oup_ready_i
);
stream_arbiter_flushable #(
.DATA_T (DATA_T),
.N_INP (N_INP),
.ARBITER (ARBITER)
) i_arb (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (1'b0),
.inp_data_i (inp_data_i),
.inp_valid_i (inp_valid_i),
.inp_ready_o (inp_ready_o),
.oup_data_o (oup_data_o),
.oup_valid_o (oup_valid_o),
.oup_ready_i (oup_ready_i)
);
endmodule
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
// Stream arbiter: Arbitrates a parametrizable number of input streams (i.e., valid-ready
// handshaking with dependency rules as in AXI4) to a single output stream. Once `oup_valid_o` is
// asserted, `oup_data_o` remains invariant until the output handshake has occurred. The
// arbitration scheme is fair round-robin tree, see `rr_arb_tree` for details.
module stream_arbiter_flushable #(
parameter type DATA_T = logic, // Vivado requires a default value for type parameters.
parameter integer N_INP = -1, // Synopsys DC requires a default value for parameters.
parameter ARBITER = "rr" // "rr" or "prio"
) (
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input DATA_T [N_INP-1:0] inp_data_i,
input logic [N_INP-1:0] inp_valid_i,
output logic [N_INP-1:0] inp_ready_o,
output DATA_T oup_data_o,
output logic oup_valid_o,
input logic oup_ready_i
);
if (ARBITER == "rr") begin : gen_rr_arb
rr_arb_tree #(
.NumIn (N_INP),
.DataType (DATA_T),
.ExtPrio (1'b0),
.AxiVldRdy (1'b1),
.LockIn (1'b1)
) i_arbiter (
.clk_i,
.rst_ni,
.flush_i,
.rr_i ('0),
.req_i (inp_valid_i),
.gnt_o (inp_ready_o),
.data_i (inp_data_i),
.gnt_i (oup_ready_i),
.req_o (oup_valid_o),
.data_o (oup_data_o),
.idx_o ()
);
end else if (ARBITER == "prio") begin : gen_prio_arb
rr_arb_tree #(
.NumIn (N_INP),
.DataType (DATA_T),
.ExtPrio (1'b1),
.AxiVldRdy (1'b1),
.LockIn (1'b1)
) i_arbiter (
.clk_i,
.rst_ni,
.flush_i,
.rr_i ('0),
.req_i (inp_valid_i),
.gnt_o (inp_ready_o),
.data_i (inp_data_i),
.gnt_i (oup_ready_i),
.req_o (oup_valid_o),
.data_o (oup_data_o),
.idx_o ()
);
end else begin : gen_arb_error
// pragma translate_off
$fatal(1, "Invalid value for parameter 'ARBITER'!");
// pragma translate_on
end
endmodule
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
/// Connects the input stream (valid-ready) handshake to one of `N_OUP` output stream handshakes.
///
/// This module has no data ports because stream data does not need to be demultiplexed: the data of
/// the input stream can just be applied at all output streams.
module stream_demux #(
/// Number of connected outputs.
parameter int unsigned N_OUP = 32'd1,
/// Dependent parameters, DO NOT OVERRIDE!
parameter int unsigned LOG_N_OUP = (N_OUP > 32'd1) ? unsigned'($clog2(N_OUP)) : 1'b1
) (
input logic inp_valid_i,
output logic inp_ready_o,
input logic [LOG_N_OUP-1:0] oup_sel_i,
output logic [N_OUP-1:0] oup_valid_o,
input logic [N_OUP-1:0] oup_ready_i
);
always_comb begin
oup_valid_o = '0;
oup_valid_o[oup_sel_i] = inp_valid_i;
end
assign inp_ready_o = oup_ready_i[oup_sel_i];
endmodule
// Copyright (c) 2020 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
// Author: Wolfgang Roenninger <wroennin@ethz.ch>
/// Fully connected stream crossbar.
///
/// Handshaking rules as defined by the `AMBA AXI` standard on default.
module stream_xbar #(
/// Number of inputs into the crossbar (`> 0`).
parameter int unsigned NumInp = 32'd0,
/// Number of outputs from the crossbar (`> 0`).
parameter int unsigned NumOut = 32'd0,
/// Data width of the stream. Can be overwritten by defining the type parameter `payload_t`.
parameter int unsigned DataWidth = 32'd1,
/// Payload type of the data ports, only usage of parameter `DataWidth`.
parameter type payload_t = logic [DataWidth-1:0],
/// Adds a spill register stage at each output.
parameter bit OutSpillReg = 1'b0,
/// Use external priority for the individual `rr_arb_trees`.
parameter int unsigned ExtPrio = 1'b0,
/// Use strict AXI valid ready handshaking.
/// To be protocol conform also the parameter `LockIn` has to be set.
parameter int unsigned AxiVldRdy = 1'b1,
/// Lock in the arbitration decision of the `rr_arb_tree`.
/// When this is set, valids have to be asserted until the corresponding transaction is indicated
/// by ready.
parameter int unsigned LockIn = 1'b1,
/// Derived parameter, do **not** overwrite!
///
/// Width of the output selection signal.
parameter int unsigned SelWidth = (NumOut > 32'd1) ? unsigned'($clog2(NumOut)) : 32'd1,
/// Derived parameter, do **not** overwrite!
///
/// Signal type definition for selecting the output at the inputs.
parameter type sel_oup_t = logic[SelWidth-1:0],
/// Derived parameter, do **not** overwrite!
///
/// Width of the input index signal.
parameter int unsigned IdxWidth = (NumInp > 32'd1) ? unsigned'($clog2(NumInp)) : 32'd1,
/// Derived parameter, do **not** overwrite!
///
/// Signal type definition indicating from which input the output came.
parameter type idx_inp_t = logic[IdxWidth-1:0]
) (
/// Clock, positive edge triggered.
input logic clk_i,
/// Asynchronous reset, active low.
input logic rst_ni,
/// Flush the state of the internal `rr_arb_tree` modules.
/// If not used set to `0`.
/// Flush should only be used if there are no active `valid_i`, otherwise it will
/// not adhere to the AXI handshaking.
input logic flush_i,
/// Provide an external state for the `rr_arb_tree` models.
/// Will only do something if ExtPrio is `1` otherwise tie to `0`.
input idx_inp_t [NumOut-1:0] rr_i,
/// Input data ports.
/// Has to be stable as long as `valid_i` is asserted when parameter `AxiVldRdy` is set.
input payload_t [NumInp-1:0] data_i,
/// Selection of the output port where the data should be routed.
/// Has to be stable as long as `valid_i` is asserted and parameter `AxiVldRdy` is set.
input sel_oup_t [NumInp-1:0] sel_i,
/// Input is valid.
input logic [NumInp-1:0] valid_i,
/// Input is ready to accept data.
output logic [NumInp-1:0] ready_o,
/// Output data ports. Valid if `valid_o = 1`
output payload_t [NumOut-1:0] data_o,
/// Index of the input port where data came from.
output idx_inp_t [NumOut-1:0] idx_o,
/// Output is valid.
output logic [NumOut-1:0] valid_o,
/// Output can be accepted.
input logic [NumOut-1:0] ready_i
);
typedef struct packed {
payload_t data;
idx_inp_t idx;
} spill_data_t;
logic [NumInp-1:0][NumOut-1:0] inp_valid;
logic [NumInp-1:0][NumOut-1:0] inp_ready;
payload_t [NumOut-1:0][NumInp-1:0] out_data;
logic [NumOut-1:0][NumInp-1:0] out_valid;
logic [NumOut-1:0][NumInp-1:0] out_ready;
// Generate the input selection
for (genvar i = 0; unsigned'(i) < NumInp; i++) begin : gen_inps
stream_demux #(
.N_OUP ( NumOut )
) i_stream_demux (
.inp_valid_i ( valid_i[i] ),
.inp_ready_o ( ready_o[i] ),
.oup_sel_i ( sel_i[i] ),
.oup_valid_o ( inp_valid[i] ),
.oup_ready_i ( inp_ready[i] )
);
// Do the switching cross of the signals.
for (genvar j = 0; unsigned'(j) < NumOut; j++) begin : gen_cross
// Propagate the data from this input to all outputs.
assign out_data[j][i] = data_i[i];
// switch handshaking
assign out_valid[j][i] = inp_valid[i][j];
assign inp_ready[i][j] = out_ready[j][i];
end
end
// Generate the output arbitration.
for (genvar j = 0; unsigned'(j) < NumOut; j++) begin : gen_outs
spill_data_t arb;
logic arb_valid, arb_ready;
rr_arb_tree #(
.NumIn ( NumInp ),
.DataType ( payload_t ),
.ExtPrio ( ExtPrio ),
.AxiVldRdy ( AxiVldRdy ),
.LockIn ( LockIn )
) i_rr_arb_tree (
.clk_i,
.rst_ni,
.flush_i,
.rr_i ( rr_i[j] ),
.req_i ( out_valid[j] ),
.gnt_o ( out_ready[j] ),
.data_i ( out_data[j] ),
.req_o ( arb_valid ),
.gnt_i ( arb_ready ),
.data_o ( arb.data ),
.idx_o ( arb.idx )
);
spill_data_t spill;
spill_register #(
.T ( spill_data_t ),
.Bypass ( !OutSpillReg )
) i_spill_register (
.clk_i,
.rst_ni,
.valid_i ( arb_valid ),
.ready_o ( arb_ready ),
.data_i ( arb ),
.valid_o ( valid_o[j] ),
.ready_i ( ready_i[j] ),
.data_o ( spill )
);
// Assign the outputs (deaggregate the data).
assign data_o[j] = spill.data;
assign idx_o[j] = spill.idx;
end
// Assertions
// Make sure that the handshake and payload is stable
// pragma translate_off
`ifndef VERILATOR
default disable iff rst_ni;
for (genvar i = 0; unsigned'(i) < NumInp; i++) begin : gen_sel_assertions
assert property (@(posedge clk_i) (valid_i[i] |-> sel_i[i] < sel_oup_t'(NumOut))) else
$fatal(1, "Non-existing output is selected!");
end
if (AxiVldRdy) begin : gen_handshake_assertions
for (genvar i = 0; unsigned'(i) < NumInp; i++) begin : gen_inp_assertions
assert property (@(posedge clk_i) (valid_i[i] && !ready_o[i] |=> $stable(data_i[i]))) else
$error("data_i is unstable at input: %0d", i);
assert property (@(posedge clk_i) (valid_i[i] && !ready_o[i] |=> $stable(sel_i[i]))) else
$error("sel_i is unstable at input: %0d", i);
assert property (@(posedge clk_i) (valid_i[i] && !ready_o[i] |=> valid_i[i])) else
$error("valid_i at input %0d has been taken away without a ready.", i);
end
for (genvar i = 0; unsigned'(i) < NumOut; i++) begin : gen_out_assertions
assert property (@(posedge clk_i) (valid_o[i] && !ready_i[i] |=> $stable(data_o[i]))) else
$error("data_o is unstable at output: %0d Check that parameter LockIn is set.", i);
assert property (@(posedge clk_i) (valid_o[i] && !ready_i[i] |=> $stable(idx_o[i]))) else
$error("idx_o is unstable at output: %0d Check that parameter LockIn is set.", i);
assert property (@(posedge clk_i) (valid_o[i] && !ready_i[i] |=> valid_o[i])) else
$error("valid_o at output %0d has been taken away without a ready.", i);
end
end
initial begin : proc_parameter_assertions
assert (NumInp > 32'd0) else $fatal(1, "NumInp has to be > 0!");
assert (NumOut > 32'd0) else $fatal(1, "NumOut has to be > 0!");
end
`endif
// pragma translate_on
endmodule
// Copyright 2019 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
module tc_clk_and2 (
input logic clk0_i,
input logic clk1_i,
output logic clk_o
);
assign clk_o = clk0_i & clk1_i;
endmodule
module tc_clk_buffer (
input logic clk_i,
output logic clk_o
);
assign clk_o = clk_i;
endmodule
// Description: Behavioral model of an integrated clock-gating cell (ICG)
module tc_clk_gating (
input logic clk_i,
input logic en_i,
input logic test_en_i,
output logic clk_o
);
logic clk_en;
always_latch begin
if (clk_i == 1'b0) clk_en <= en_i | test_en_i;
end
assign clk_o = clk_i & clk_en;
endmodule
module tc_clk_inverter (
input logic clk_i,
output logic clk_o
);
assign clk_o = ~clk_i;
endmodule
module tc_clk_mux2 (
input logic clk0_i,
input logic clk1_i,
input logic clk_sel_i,
output logic clk_o
);
assign clk_o = (clk_sel_i) ? clk1_i : clk0_i;
endmodule
module tc_clk_xor2 (
input logic clk0_i,
input logic clk1_i,
output logic clk_o
);
assign clk_o = clk0_i ^ clk1_i;
endmodule
`ifndef SYNTHESIS
module tc_clk_delay #(
parameter int unsigned Delay = 300ps
) (
input logic in_i,
output logic out_o
);
// pragma translate_off
`ifndef VERILATOR
assign #(Delay) out_o = in_i;
`endif
// pragma translate_on
endmodule
`endif
// Copyright (c) 2020 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
// Author: Wolfgang Roenninger <wroennin@ethz.ch>
// Description: Functional module of a generic SRAM
//
// Parameters:
// - NumWords: Number of words in the macro. Address width can be calculated with:
// `AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1`
// The module issues a warning if there is a request on an address which is
// not in range.
// - DataWidth: Width of the ports `wdata_i` and `rdata_o`.
// - ByteWidth: Width of a byte, the byte enable signal `be_i` can be calculated with the
// ceiling division `ceil(DataWidth, ByteWidth)`.
// - NumPorts: Number of read and write ports. Each is a full port. Ports with a higher
// index read and write after the ones with lower indices.
// - Latency: Read latency, the read data is available this many cycles after a request.
// - SimInit: Macro simulation initialization. Values are:
// "zeros": Each bit gets initialized with 1'b0.
// "ones": Each bit gets initialized with 1'b1.
// "random": Each bit gets random initialized with 1'b0 or 1'b1.
// "none": Each bit gets initialized with 1'bx. (default)
// - PrintSimCfg: Prints at the beginning of the simulation a `Hello` message with
// the instantiated parameters and signal widths.
//
// Ports:
// - `clk_i`: Clock
// - `rst_ni`: Asynchronous reset, active low
// - `req_i`: Request, active high
// - `we_i`: Write request, active high
// - `addr_i`: Request address
// - `wdata_i`: Write data, has to be valid on request
// - `be_i`: Byte enable, active high
// - `rdata_o`: Read data, valid `Latency` cycles after a request with `we_i` low.
//
// Behaviour:
// - Address collision: When Ports are making a write access onto the same address,
// the write operation will start at the port with the lowest address
// index, each port will overwrite the changes made by the previous ports
// according how the respective `be_i` signal is set.
// - Read data on write: This implementation will not produce a read data output on the signal
// `rdata_o` when `req_i` and `we_i` are asserted. The output data is stable
// on write requests.
module tc_sram #(
parameter int unsigned NumWords = 32'd1024, // Number of Words in data array
parameter int unsigned DataWidth = 32'd128, // Data signal width
parameter int unsigned ByteWidth = 32'd8, // Width of a data byte
parameter int unsigned NumPorts = 32'd2, // Number of read and write ports
parameter int unsigned Latency = 32'd1, // Latency when the read data is available
parameter SimInit = "none", // Simulation initialization
parameter bit PrintSimCfg = 1'b0, // Print configuration
// DEPENDENT PARAMETERS, DO NOT OVERWRITE!
parameter int unsigned AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1,
parameter int unsigned BeWidth = (DataWidth + ByteWidth - 32'd1) / ByteWidth, // ceil_div
parameter type addr_t = logic [AddrWidth-1:0],
parameter type data_t = logic [DataWidth-1:0],
parameter type be_t = logic [BeWidth-1:0]
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// input ports
input logic [NumPorts-1:0] req_i, // request
input logic [NumPorts-1:0] we_i, // write enable
input addr_t [NumPorts-1:0] addr_i, // request address
input data_t [NumPorts-1:0] wdata_i, // write data
input be_t [NumPorts-1:0] be_i, // write byte enable
// output ports
output data_t [NumPorts-1:0] rdata_o // read data
);
// memory array
//data_t sram [NumWords-1:0];
// hold the read address when no read access is made
//addr_t [NumPorts-1:0] r_addr_q;
generate
if (DataWidth == 32 && NumWords == 256) begin
// SRAM_256x32_GF12 sram_instance(.Q(rdata_o), .CLK(clk_i), .CEN(~req_i), .GWEN(we_i), .A(addr_i), .D(wdata_i), .EMA(3'b111), .RET1N(1'b0), .RET2N(1'b0));
fakeram45_256x32 sram_instance(.rd_out(rdata_o[0]), .clk(clk_i), .ce_in(~req_i), .we_in(we_i), .addr_in(addr_i), .wd_in(wdata_i[0]));
end
else if (DataWidth == 256 && NumWords == 64)begin
// RF_64x64_GF12 fr_sp_instance0(.Q(rdata_o[0][63:0]), .CLK(clk_i), .CEN(~req_i), .GWEN(we_i), .A(addr_i), .D(wdata_i[0][63:0]), .EMA(3'b000), .RET1N(1'b0), .RET2N(1'b0));
// RF_64x64_GF12 fr_sp_instance1(.Q(rdata_o[0][127:64]), .CLK(clk_i), .CEN(~req_i), .GWEN(we_i), .A(addr_i), .D(wdata_i[0][127:64]), .EMA(3'b000), .RET1N(1'b0), .RET2N(1'b0));
// RF_64x64_GF12 fr_sp_instance2(.Q(rdata_o[0][191:128]), .CLK(clk_i), .CEN(~req_i), .GWEN(we_i), .A(addr_i), .D(wdata_i[0][191:128]), .EMA(3'b000), .RET1N(1'b0), .RET2N(1'b0));
// RF_64x64_GF12 fr_sp_instance3(.Q(rdata_o[0][255:192]), .CLK(clk_i), .CEN(~req_i), .GWEN(we_i), .A(addr_i), .D(wdata_i[0][255:192]), .EMA(3'b000), .RET1N(1'b0), .RET2N(1'b0));
fakeram45_64x64 fr_sp_instance0(.rd_out(rdata_o[0][63:0]), .clk(clk_i), .ce_in(~req_i), .we_in(we_i), .addr_in(addr_i), .wd_in(wdata_i[0][63:0]));
fakeram45_64x64 fr_sp_instance1(.rd_out(rdata_o[0][127:64]), .clk(clk_i), .ce_in(~req_i), .we_in(we_i), .addr_in(addr_i), .wd_in(wdata_i[0][127:64]));
fakeram45_64x64 fr_sp_instance2(.rd_out(rdata_o[0][191:128]), .clk(clk_i), .ce_in(~req_i), .we_in(we_i), .addr_in(addr_i), .wd_in(wdata_i[0][191:128]));
fakeram45_64x64 fr_sp_instance3(.rd_out(rdata_o[0][255:192]), .clk(clk_i), .ce_in(~req_i), .we_in(we_i), .addr_in(addr_i), .wd_in(wdata_i[0][255:192]));
end
else begin
// memory array
data_t sram [NumWords-1:0];
// hold the read address when no read access is made
addr_t [NumPorts-1:0] r_addr_q;
// SRAM simulation initialization
data_t [NumWords-1:0] init_val;
//initial begin : proc_sram_init
// for (int unsigned i = 0; i < NumWords; i++) begin
// for (int unsigned j = 0; j < DataWidth; j++) begin
// case (SimInit)
// "zeros": init_val[i][j] = 1'b0;
// "ones": init_val[i][j] = 1'b1;
// "random": init_val[i][j] = $urandom();
// default: init_val[i][j] = 1'bx;
// endcase
// end
// end
//end
// set the read output if requested
// The read data at the highest array index is set combinational.
// It gets then delayed for a number of cycles until it gets available at the output at
// array index 0.
// read data output assignment
data_t [NumPorts-1:0][Latency-1:0] rdata_q, rdata_d;
if (Latency == 32'd0) begin : gen_no_read_lat
for (genvar i = 0; i < NumPorts; i++) begin : gen_port
assign rdata_o[i] = (req_i[i] && !we_i[i]) ? sram[addr_i[i]] : sram[r_addr_q[i]];
end
end else begin : gen_read_lat
always_comb begin
for (int unsigned i = 0; i < NumPorts; i++) begin
rdata_o[i] = rdata_q[i][0];
for (int unsigned j = 0; j < (Latency-1); j++) begin
rdata_d[i][j] = rdata_q[i][j+1];
end
rdata_d[i][Latency-1] = (req_i[i] && !we_i[i]) ? sram[addr_i[i]] : sram[r_addr_q[i]];
end
end
end
// write memory array
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
for (int unsigned i = 0; i < NumWords; i++) begin
sram[i] <= init_val[i];
end
for (int i = 0; i < NumPorts; i++) begin
r_addr_q[i] <= {AddrWidth{1'b0}};
// initialize the read output register for each port
if (Latency != 32'd0) begin
for (int unsigned j = 0; j < Latency; j++) begin
rdata_q[i][j] <= init_val[{AddrWidth{1'b0}}];
end
end
end
end else begin
// read value latch happens before new data is written to the sram
for (int unsigned i = 0; i < NumPorts; i++) begin
if (Latency != 0) begin
for (int unsigned j = 0; j < Latency; j++) begin
rdata_q[i][j] <= rdata_d[i][j];
end
end
end
// there is a request for the SRAM, latch the required register
for (int unsigned i = 0; i < NumPorts; i++) begin
if (req_i[i]) begin
if (we_i[i]) begin
// update value when write is set at clock
for (int unsigned j = 0; j < DataWidth; j++) begin
if (be_i[i][j/ByteWidth]) begin
sram[addr_i[i]][j] <= wdata_i[i][j];
end
end
end else begin
// otherwise update read address for subsequent non request cycles
r_addr_q[i] <= addr_i[i];
end
end // if req_i
end // for ports
end // if !rst_ni
end
// Validate parameters.
// pragma translate_off
`ifndef VERILATOR
`ifndef TARGET_SYNTHESYS
initial begin: p_assertions
assert ($bits(addr_i) == NumPorts * AddrWidth) else $fatal(1, "AddrWidth problem on `addr_i`");
assert ($bits(wdata_i) == NumPorts * DataWidth) else $fatal(1, "DataWidth problem on `wdata_i`");
assert ($bits(be_i) == NumPorts * BeWidth) else $fatal(1, "BeWidth problem on `be_i`" );
assert ($bits(rdata_o) == NumPorts * DataWidth) else $fatal(1, "DataWidth problem on `rdata_o`");
assert (NumWords >= 32'd1) else $fatal(1, "NumWords has to be > 0");
assert (DataWidth >= 32'd1) else $fatal(1, "DataWidth has to be > 0");
assert (ByteWidth >= 32'd1) else $fatal(1, "ByteWidth has to be > 0");
assert (NumPorts >= 32'd1) else $fatal(1, "The number of ports must be at least 1!");
end
initial begin: p_sim_hello
if (PrintSimCfg) begin
$display("#################################################################################");
$display("tc_sram functional instantiated with the configuration:" );
$display("Instance: %m" );
$display("Number of ports (dec): %0d", NumPorts );
$display("Number of words (dec): %0d", NumWords );
$display("Address width (dec): %0d", AddrWidth );
$display("Data width (dec): %0d", DataWidth );
$display("Byte width (dec): %0d", ByteWidth );
$display("Byte enable width (dec): %0d", BeWidth );
$display("Latency Cycles (dec): %0d", Latency );
$display("Simulation init (str): %0s", SimInit );
$display("#################################################################################");
end
end
for (genvar i = 0; i < NumPorts; i++) begin : gen_assertions
assert property ( @(posedge clk_i) disable iff (!rst_ni)
(req_i[i] |-> (addr_i[i] < NumWords))) else
$warning("Request address %0h not mapped, port %0d, expect random write or read behavior!",
addr_i[i], i);
end
`endif
`endif
// pragma translate_on
end
endgenerate
endmodule
// Copyright 2021 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
// Description: Handles the protocol conversion from valid/ready to req/gnt and correctly returns
// the metadata. Additionally, it handles atomics. Hence, it needs to be instantiated in front of
// an SRAM over which it has exclusive access.
//
// Author: Samuel Riedel <sriedel@iis.ee.ethz.ch>
`include "common_cells/registers.svh"
module tcdm_adapter #(
parameter int unsigned AddrWidth = 32,
parameter int unsigned DataWidth = 32,
parameter type metadata_t = logic,
parameter bit LrScEnable = 1,
// Cut path between request and response at the cost of increased AMO latency
parameter bit RegisterAmo = 1'b0,
// Dependent parameters. DO NOT CHANGE.
localparam int unsigned BeWidth = DataWidth/8
) (
input logic clk_i,
input logic rst_ni,
// master side
input logic in_valid_i, // Bank request
output logic in_ready_o, // Bank grant
input logic [AddrWidth-1:0] in_address_i, // Address
input logic [3:0] in_amo_i, // Atomic Memory Operation
input logic in_write_i, // 1: Store, 0: Load
input logic [DataWidth-1:0] in_wdata_i, // Write data
input metadata_t in_meta_i, // Meta data
input logic [BeWidth-1:0] in_be_i, // Byte enable
output logic in_valid_o, // Read data
input logic in_ready_i, // Read data
output logic [DataWidth-1:0] in_rdata_o, // Read data
output metadata_t in_meta_o, // Meta data
// slave side
output logic out_req_o, // Bank request
output logic [AddrWidth-1:0] out_add_o, // Address
output logic out_write_o, // 1: Store, 0: Load
output logic [DataWidth-1:0] out_wdata_o, // Write data
output logic [BeWidth-1:0] out_be_o, // Bit enable
input logic [DataWidth-1:0] out_rdata_i // Read data
);
import mempool_pkg::NumCores;
import mempool_pkg::NumGroups;
import mempool_pkg::NumCoresPerTile;
import cf_math_pkg::idx_width;
typedef enum logic [3:0] {
AMONone = 4'h0,
AMOSwap = 4'h1,
AMOAdd = 4'h2,
AMOAnd = 4'h3,
AMOOr = 4'h4,
AMOXor = 4'h5,
AMOMax = 4'h6,
AMOMaxu = 4'h7,
AMOMin = 4'h8,
AMOMinu = 4'h9,
AMOLR = 4'hA,
AMOSC = 4'hB
} amo_op_t;
logic meta_valid, meta_ready;
logic rdata_valid, rdata_ready;
/// read signal before register
logic [DataWidth-1:0] out_rdata;
logic out_gnt;
logic pop_resp;
enum logic [1:0] {
Idle, DoAMO, WriteBackAMO
} state_q, state_d;
logic load_amo;
amo_op_t amo_op_q;
logic [BeWidth-1:0] be_expand;
logic [AddrWidth-1:0] addr_q;
logic [31:0] amo_operand_a;
logic [31:0] amo_operand_b_q;
logic [31:0] amo_result, amo_result_q;
// Store the metadata at handshake
spill_register #(
.T (metadata_t),
.Bypass(1'b0 )
) i_metadata_register (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.valid_i(in_valid_i && in_ready_o && !in_write_i),
.ready_o(meta_ready ),
.data_i (in_meta_i ),
.valid_o(meta_valid ),
.ready_i(pop_resp ),
.data_o (in_meta_o )
);
// Store response if it's not accepted immediately
fall_through_register #(
.T(logic[DataWidth-1:0])
) i_rdata_register (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.clr_i (1'b0 ),
.testmode_i(1'b0 ),
.data_i (out_rdata ),
.valid_i (out_gnt ),
.ready_o (rdata_ready),
.data_o (in_rdata_o ),
.valid_o (rdata_valid),
.ready_i (pop_resp )
);
localparam int unsigned CoreIdWidth = idx_width(NumCores);
localparam int unsigned IniAddrWidth = idx_width(NumCoresPerTile + NumGroups);
logic sc_successful_d, sc_successful_q;
logic sc_q;
// In case of a SC we must forward SC result from the cycle earlier.
assign out_rdata = (sc_q && LrScEnable) ? $unsigned(!sc_successful_q) : out_rdata_i;
// Ready to output data if both meta and read data
// are available (the read data will always be last)
assign in_valid_o = meta_valid && rdata_valid;
// Only pop the data from the registers once both registers are ready
assign pop_resp = in_ready_i && in_valid_o;
// Generate out_gnt one cycle after sending read request to the bank
`FF(out_gnt, (out_req_o && !out_write_o) || sc_successful_d, 1'b0, clk_i, rst_ni);
// ----------------
// LR/SC
// ----------------
if (LrScEnable) begin : gen_lrsc
// unique core identifier, does not necessarily match core_id
logic [CoreIdWidth:0] unique_core_id;
typedef struct packed {
/// Is the reservation valid.
logic valid;
/// On which address is the reservation placed.
/// This address is aligned to the memory size
/// implying that the reservation happen on a set size
/// equal to the word width of the memory (32 or 64 bit).
logic [AddrWidth-1:0] addr;
/// Which core made this reservation. Important to
/// track the reservations from different cores and
/// to prevent any live-locking.
logic [CoreIdWidth:0] core;
} reservation_t;
reservation_t reservation_d, reservation_q;
`FF(sc_successful_q, sc_successful_d, 1'b0, clk_i, rst_ni);
`FF(reservation_q, reservation_d, 1'b0, clk_i, rst_ni);
`FF(sc_q, in_valid_i && in_ready_o && (amo_op_t'(in_amo_i) == AMOSC), 1'b0, clk_i, rst_ni);
always_comb begin
// {group_id, tile_id, core_id}
// MSB of ini_addr determines if request is coming from local or remote tile
if (in_meta_i.ini_addr[IniAddrWidth-1] == 0) begin
// Request is coming from the local tile
// take group id of TCDM adapter
unique_core_id = {'0, in_meta_i.tile_id, in_meta_i.ini_addr[IniAddrWidth-2:0]};
end else begin
// Request is coming from a remote tile
// take group id from ini_addr
// Ignore first bit of IniAddr to obtain the group address
unique_core_id = {in_meta_i.ini_addr[IniAddrWidth-2:0],
in_meta_i.tile_id, in_meta_i.core_id};
end
reservation_d = reservation_q;
sc_successful_d = 1'b0;
// new valid transaction
if (in_valid_i && in_ready_o) begin
// An SC can only pair with the most recent LR in program order.
// Place a reservation on the address if there isn't already a valid reservation.
// We prevent a live-lock by don't throwing away the reservation of a hart unless
// it makes a new reservation in program order or issues any SC.
if (amo_op_t'(in_amo_i) == AMOLR &&
(!reservation_q.valid || reservation_q.core == unique_core_id)) begin
reservation_d.valid = 1'b1;
reservation_d.addr = in_address_i;
reservation_d.core = unique_core_id;
end
// An SC may succeed only if no store from another hart (or other device) to
// the reservation set can be observed to have occurred between
// the LR and the SC, and if there is no other SC between the
// LR and itself in program order.
// check whether another core has made a write attempt
if ((unique_core_id != reservation_q.core) &&
(in_address_i == reservation_q.addr) &&
(!(amo_op_t'(in_amo_i) inside {AMONone, AMOLR, AMOSC}) || in_write_i)) begin
reservation_d.valid = 1'b0;
end
// An SC from the same hart clears any pending reservation.
if (reservation_q.valid && amo_op_t'(in_amo_i) == AMOSC
&& reservation_q.core == unique_core_id) begin
reservation_d.valid = 1'b0;
sc_successful_d = (reservation_q.addr == in_address_i);
end
end
end // always_comb
end else begin : disable_lrcs
assign sc_q = 1'b0;
assign sc_successful_d = 1'b0;
assign sc_successful_q = 1'b0;
end
// ----------------
// Atomics
// ----------------
always_comb begin
// feed-through
in_ready_o = in_valid_o && !in_ready_i ? 1'b0 : 1'b1;
out_req_o = in_valid_i && in_ready_o;
out_add_o = in_address_i;
out_write_o = in_write_i || (sc_successful_d && (amo_op_t'(in_amo_i) == AMOSC));
out_wdata_o = in_wdata_i;
out_be_o = in_be_i;
state_d = state_q;
load_amo = 1'b0;
unique case (state_q)
Idle: begin
if (in_valid_i && in_ready_o && !(amo_op_t'(in_amo_i) inside {AMONone, AMOLR, AMOSC})) begin
load_amo = 1'b1;
state_d = DoAMO;
end
end
// Claim the memory interface
DoAMO, WriteBackAMO: begin
in_ready_o = 1'b0;
state_d = (RegisterAmo && state_q != WriteBackAMO) ? WriteBackAMO : Idle;
// Commit AMO
out_req_o = 1'b1;
out_write_o = 1'b1;
out_add_o = addr_q;
out_be_o = 4'b1111;
// serve from register if we cut the path
if (RegisterAmo) begin
out_wdata_o = amo_result_q;
end else begin
out_wdata_o = amo_result;
end
end
default:;
endcase
end
if (RegisterAmo) begin : gen_amo_slice
`FFLNR(amo_result_q, amo_result, (state_q == DoAMO), clk_i)
end else begin : gen_amo_slice
assign amo_result_q = '0;
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
state_q <= Idle;
amo_op_q <= amo_op_t'('0);
addr_q <= '0;
amo_operand_b_q <= '0;
end else begin
state_q <= state_d;
if (load_amo) begin
amo_op_q <= amo_op_t'(in_amo_i);
addr_q <= in_address_i;
amo_operand_b_q <= in_wdata_i;
end else begin
amo_op_q <= AMONone;
end
end
end
// ----------------
// AMO ALU
// ----------------
logic [33:0] adder_sum;
logic [32:0] adder_operand_a, adder_operand_b;
assign amo_operand_a = out_rdata_i;
assign adder_sum = adder_operand_a + adder_operand_b;
/* verilator lint_off WIDTH */
always_comb begin : amo_alu
adder_operand_a = $signed(amo_operand_a);
adder_operand_b = $signed(amo_operand_b_q);
amo_result = amo_operand_b_q;
unique case (amo_op_q)
// the default is to output operand_b
AMOSwap:;
AMOAdd: amo_result = adder_sum[31:0];
AMOAnd: amo_result = amo_operand_a & amo_operand_b_q;
AMOOr: amo_result = amo_operand_a | amo_operand_b_q;
AMOXor: amo_result = amo_operand_a ^ amo_operand_b_q;
AMOMax: begin
adder_operand_b = -$signed(amo_operand_b_q);
amo_result = adder_sum[32] ? amo_operand_b_q : amo_operand_a;
end
AMOMin: begin
adder_operand_b = -$signed(amo_operand_b_q);
amo_result = adder_sum[32] ? amo_operand_a : amo_operand_b_q;
end
AMOMaxu: begin
adder_operand_a = $unsigned(amo_operand_a);
adder_operand_b = -$unsigned(amo_operand_b_q);
amo_result = adder_sum[32] ? amo_operand_b_q : amo_operand_a;
end
AMOMinu: begin
adder_operand_a = $unsigned(amo_operand_a);
adder_operand_b = -$unsigned(amo_operand_b_q);
amo_result = adder_sum[32] ? amo_operand_a : amo_operand_b_q;
end
default: amo_result = '0;
endcase
end
// pragma translate_off
// Check for unsupported parameters
if (DataWidth != 32) begin
$error($sformatf("Module currently only supports DataWidth = 32. DataWidth is currently set to: %0d", DataWidth));
end
`ifndef VERILATOR
rdata_full : assert property(
@(posedge clk_i) disable iff (~rst_ni) (out_gnt |-> rdata_ready))
else $fatal (1, "Trying to push new data although the i_rdata_register is not ready.");
`endif
// pragma translate_on
endmodule
// Copyright 2019 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 28.05.2019
// Description: Package with important constants and lookup tables for TCDM
// interconnect.
package tcdm_interconnect_pkg;
typedef enum logic [1:0] { LIC, BFLY2, BFLY4, CLOS } topo_e;
////////////////////////////////////////////////////////////////////////
// LUT params for Clos net with configs: 1: m=0.50*n, 2: m=1.00*n, 3: m=2.00*n,
// to be indexed with [config_idx][$clog2(BankingFact)][$clog2(NumBanks)]
// generated with MATLAB script gen_clos_params.m
////////////////////////////////////////////////////////////////////////
localparam logic [3:1][4:0][12:2][15:0] ClosNLut = {16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,
16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,
16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,
16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,
16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,16'd2,
16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,
16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,
16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,
16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,
16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,16'd2,
16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,
16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,
16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,
16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,
16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,16'd2};
localparam logic [3:1][4:0][12:2][15:0] ClosMLut = {16'd128,16'd128,16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,
16'd128,16'd128,16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,
16'd128,16'd128,16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,
16'd128,16'd128,16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,
16'd128,16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,
16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,
16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,
16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,
16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,
16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,16'd2,
16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,16'd2,16'd1,
16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,16'd2,16'd1,
16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,16'd2,16'd1,
16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,16'd2,16'd1,
16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,16'd2,16'd1,16'd1};
localparam logic [3:1][4:0][12:2][15:0] ClosRLut = {16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,16'd2,
16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,16'd2,
16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,16'd2,
16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,16'd2,
16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,
16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,16'd2,
16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,16'd2,
16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,16'd2,
16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,16'd2,
16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,
16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,16'd2,
16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,16'd2,
16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,16'd2,
16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2,16'd2,
16'd64,16'd64,16'd32,16'd32,16'd16,16'd16,16'd8,16'd8,16'd4,16'd4,16'd2};
endpackage : tcdm_interconnect_pkg
// Copyright 2021 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
// TCDM Shim
// Description: Converts propper handshaking (ready/valid) to TCDM signaling
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
module tcdm_shim
import mempool_pkg::address_map_t;
import cf_math_pkg::idx_width;
#(
parameter int unsigned AddrWidth = 32 ,
parameter int unsigned DataWidth = 32 ,
parameter int unsigned MaxOutStandingReads = 8 ,
parameter int unsigned NrTCDM = 2 ,
parameter int unsigned NrSoC = 1 ,
parameter int unsigned NumRules = 1 , // Routing rules
localparam int unsigned StrbWidth = DataWidth/8 ,
localparam int unsigned NumOutput = NrTCDM + NrSoC,
localparam int unsigned MetaIdWidth = idx_width(MaxOutStandingReads)
) (
input logic clk_i,
input logic rst_ni,
// to TCDM
output logic [NrTCDM-1:0] tcdm_req_valid_o,
output logic [NrTCDM-1:0][AddrWidth-1:0] tcdm_req_tgt_addr_o,
output logic [NrTCDM-1:0] tcdm_req_wen_o,
output logic [NrTCDM-1:0][DataWidth-1:0] tcdm_req_wdata_o,
output logic [NrTCDM-1:0][3:0] tcdm_req_amo_o,
output logic [NrTCDM-1:0][MetaIdWidth-1:0] tcdm_req_id_o,
output logic [NrTCDM-1:0][StrbWidth-1:0] tcdm_req_be_o,
input logic [NrTCDM-1:0] tcdm_req_ready_i,
input logic [NrTCDM-1:0] tcdm_resp_valid_i,
output logic [NrTCDM-1:0] tcdm_resp_ready_o,
input logic [NrTCDM-1:0][DataWidth-1:0] tcdm_resp_rdata_i,
input logic [NrTCDM-1:0][MetaIdWidth-1:0] tcdm_resp_id_i,
// to SoC
output logic [NrSoC-1:0] [AddrWidth-1:0] soc_qaddr_o,
output logic [NrSoC-1:0] soc_qwrite_o,
output logic [NrSoC-1:0] [3:0] soc_qamo_o,
output logic [NrSoC-1:0] [DataWidth-1:0] soc_qdata_o,
output logic [NrSoC-1:0] [StrbWidth-1:0] soc_qstrb_o,
output logic [NrSoC-1:0] soc_qvalid_o,
input logic [NrSoC-1:0] soc_qready_i,
input logic [NrSoC-1:0] [DataWidth-1:0] soc_pdata_i,
input logic [NrSoC-1:0] soc_perror_i,
input logic [NrSoC-1:0] soc_pvalid_i,
output logic [NrSoC-1:0] soc_pready_o,
// from core
input logic [AddrWidth-1:0] data_qaddr_i,
input logic data_qwrite_i,
input logic [3:0] data_qamo_i,
input logic [DataWidth-1:0] data_qdata_i,
input logic [StrbWidth-1:0] data_qstrb_i,
input logic [MetaIdWidth-1:0] data_qid_i,
input logic data_qvalid_i,
output logic data_qready_o,
output logic [DataWidth-1:0] data_pdata_o,
output logic data_perror_o,
output logic [MetaIdWidth-1:0] data_pid_o,
output logic data_pvalid_o,
input logic data_pready_i,
// Address map
input address_map_t [NumRules-1:0] address_map_i
);
// Imports
import snitch_pkg::dreq_t ;
import snitch_pkg::dresp_t;
// Includes
`include "common_cells/registers.svh"
dreq_t data_qpayload ;
dreq_t [NrSoC-1:0] soc_qpayload ;
dreq_t [NrTCDM-1:0] tcdm_qpayload;
dresp_t data_ppayload ;
dresp_t [NrSoC-1:0] soc_ppayload ;
dresp_t [NrTCDM-1:0] tcdm_ppayload;
for (genvar i = 0; i < NrTCDM; i++) begin : gen_tcdm_ppayload
assign tcdm_ppayload[i].id = tcdm_resp_id_i[i] ;
assign tcdm_ppayload[i].data = tcdm_resp_rdata_i[i];
assign tcdm_ppayload[i].error = 1'b0 ;
end
// ROB IDs of the SoC requests (come back in order)
logic [NrSoC-1:0][MetaIdWidth-1:0] soc_meta_id;
for (genvar i = 0; i < NrSoC; i++) begin: gen_soc_meta_id_fifo
fifo_v3 #(
.DEPTH (MaxOutStandingReads),
.DATA_WIDTH(MetaIdWidth )
) i_soc_meta_id_fifo (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.flush_i (1'b0 ),
.testmode_i(1'b0 ),
.data_i (data_qid_i ),
.push_i (soc_qvalid_o[i] & soc_qready_i[i] &!soc_qwrite_o[i]),
.full_o (/* Unused */ ),
.data_o (soc_meta_id[i] ),
.pop_i (soc_pvalid_i[i] & soc_pready_o[i] ),
.empty_o (/* Unused */ ),
.usage_o (/* Unused */ )
);
end: gen_soc_meta_id_fifo
// Demux according to address
snitch_addr_demux #(
.NrOutput (NumOutput),
.AddressWidth (AddrWidth),
.NumRules (NumRules ), // TODO
.req_t (dreq_t ),
.resp_t (dresp_t )
) i_snitch_addr_demux (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.req_addr_i (data_qaddr_i ),
.req_payload_i (data_qpayload ),
.req_valid_i (data_qvalid_i ),
.req_ready_o (data_qready_o ),
.resp_payload_o(data_ppayload ),
.resp_valid_o (data_pvalid_o ),
.resp_ready_i (data_pready_i ),
.req_payload_o ({soc_qpayload, tcdm_qpayload} ),
.req_valid_o ({soc_qvalid_o, tcdm_req_valid_o} ),
.req_ready_i ({soc_qready_i, tcdm_req_ready_i} ),
.resp_payload_i({soc_ppayload, tcdm_ppayload} ),
.resp_valid_i ({soc_pvalid_i, tcdm_resp_valid_i}),
.resp_ready_o ({soc_pready_o, tcdm_resp_ready_o}),
.address_map_i (address_map_i )
);
// Connect TCDM output ports
for (genvar i = 0; i < NrTCDM; i++) begin : gen_tcdm_con
assign tcdm_req_tgt_addr_o[i] = tcdm_qpayload[i].addr ;
assign tcdm_req_wdata_o[i] = tcdm_qpayload[i].data ;
assign tcdm_req_amo_o[i] = tcdm_qpayload[i].amo ;
assign tcdm_req_id_o[i] = tcdm_qpayload[i].id ;
assign tcdm_req_wen_o[i] = tcdm_qpayload[i].write;
assign tcdm_req_be_o[i] = tcdm_qpayload[i].strb ;
end
// Connect SOCs
for (genvar i = 0; i < NrSoC; i++) begin : gen_soc_con
assign soc_qaddr_o[i] = soc_qpayload[i].addr ;
assign soc_qwrite_o[i] = soc_qpayload[i].write;
assign soc_qamo_o[i] = soc_qpayload[i].amo ;
assign soc_qdata_o[i] = soc_qpayload[i].data ;
assign soc_qstrb_o[i] = soc_qpayload[i].strb ;
assign soc_ppayload[i].data = soc_pdata_i[i] ;
assign soc_ppayload[i].id = soc_meta_id[i] ;
assign soc_ppayload[i].error = soc_perror_i[i] ;
end
// Request interface
assign data_qpayload.addr = data_qaddr_i ;
assign data_qpayload.write = data_qwrite_i;
assign data_qpayload.amo = data_qamo_i ;
assign data_qpayload.data = data_qdata_i ;
assign data_qpayload.id = data_qid_i ;
assign data_qpayload.strb = data_qstrb_i ;
// Response interface
assign data_pdata_o = data_ppayload.data ;
assign data_perror_o = data_ppayload.error;
assign data_pid_o = data_ppayload.id ;
// Elaboration-time assertions
if (AddrWidth != 32)
$fatal(1, "[tcdm_shim] Only support 32-bit wide addresses.");
if (DataWidth != 32)
$fatal(1, "[tcdm_shim] Only support a data width of 32 bits.");
endmodule
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment