// Copyright 2018 ETH Zurich and University of Bologna. // Copyright and related rights are licensed under the Solderpad Hardware // License, Version 0.51 (the "License"); you may not use this file except in // compliance with the License. You may obtain a copy of the License at // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law // or agreed to in writing, software, hardware and materials distributed under // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. // // Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich // Date: 08.08.2018 // Description: adapter module to connect the L1D$ and L1I$ to the native // interface of the OpenPiton L1.5 cache. // // A couple of notes: // // 1) the L15 has been designed for an OpenSparc T1 core with 2 threads and can serve only // 1 ld and rd request per thread. Ariane has only one hart, but the LSU can issue several write // requests to optimize bandwidth. hence, we reuse the threadid field to issue and track multiple // requests (up to 2 in this case). // // 2) the CSM (clumped shared memory = coherence domain restriction in OpenPiton) // feature is currently not supported by Ariane. // // 3) some features like blockinitstore, prefetch, ECC errors are not used (see interface below) // // 4) the arbiter can store upt to two outgoing requests per cache. incoming responses are passed // through one streaming register, and need to be consumed unconditionally by the caches. // // 5) The L1.5 protocol is closely related to the CPX bus of openSPARC, see also [1,2] // // 6) Note on transaction data and size: if a store packet is less than 64 bits, then // the field is filled with copies of the data. in case of an interrupt vector, // an 18bit interrupt vector is expected. // // 7) L1I$ refill requests always have precedence over L1D$ requests. // // 8) L1I$ fill requests are always complete cache lines at the moment // // 9) the adapter converts from little endian (Ariane) to big endian (openpiton), and vice versa. // // 10) L1I$ requests to I.O space (bit39 of address = 1'b1) always return 32bit nc data // // Refs: [1] OpenSPARC T1 Microarchitecture Specification // https://www.oracle.com/technetwork/systems/opensparc/t1-01-opensparct1-micro-arch-1538959.html // [2] OpenPiton Microarchitecture Specification // https://parallel.princeton.edu/openpiton/docs/micro_arch.pdf // import ariane_pkg::*; import serpent_cache_pkg::*; module serpent_l15_adapter #( parameter logic [63:0] CachedAddrBeg = 64'h00_8000_0000, // begin of cached region parameter logic [63:0] CachedAddrEnd = 64'h80_0000_0000, // end of cached region parameter bit SwapEndianess = 1 , parameter bit PitonRemapIO = 1 // for OpenPiton ) ( input logic clk_i, input logic rst_ni, // icache input logic icache_data_req_i, output logic icache_data_ack_o, input icache_req_t icache_data_i, // returning packets must be consumed immediately output logic icache_rtrn_vld_o, output icache_rtrn_t icache_rtrn_o, // dcache input logic dcache_data_req_i, output logic dcache_data_ack_o, input dcache_req_t dcache_data_i, // returning packets must be consumed immediately output logic dcache_rtrn_vld_o, output dcache_rtrn_t dcache_rtrn_o, // TODO: interrupt interface // L15 output l15_req_t l15_req_o, input l15_rtrn_t l15_rtrn_i ); // request path icache_req_t icache_data; logic icache_data_full, icache_data_empty; dcache_req_t dcache_data; logic dcache_data_full, dcache_data_empty; logic [1:0] arb_req, arb_ack; logic arb_idx; // return path logic rtrn_fifo_empty, rtrn_fifo_full, rtrn_fifo_pop; l15_rtrn_t rtrn_fifo_data; /////////////////////////////////////////////////////// // request path to L15 /////////////////////////////////////////////////////// // relevant l15 signals // l15_req_t l15_req_o.l15_rqtype; // see below for encoding // logic l15_req_o.l15_nc; // non-cacheable bit // logic [2:0] l15_req_o.l15_size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte) // logic [L15_TID_WIDTH-1:0] l15_req_o.l15_threadid; // currently 0 or 1 // logic l15_req_o.l15_invalidate_cacheline; // unused by Ariane as L1 has no ECC at the moment // logic [L15_WAY_WIDTH-1:0] l15_req_o.l15_l1rplway; // way to replace // logic [39:0] l15_req_o.l15_address; // physical address // logic [63:0] l15_req_o.l15_data; // word to write // logic [63:0] l15_req_o.l15_data_next_entry; // unused in Ariane (only used for CAS atomic requests) // logic [L15_TLB_CSM_WIDTH-1:0] l15_req_o.l15_csm_data; logic [63:0] tmp_paddr; assign icache_data_ack_o = icache_data_req_i & ~icache_data_full; assign dcache_data_ack_o = dcache_data_req_i & ~dcache_data_full; // data mux assign l15_req_o.l15_nc = (arb_idx) ? dcache_data.nc : icache_data.nc; // icache fills are either cachelines or 4byte fills, depending on whether they go to the Piton I/O space or not. assign l15_req_o.l15_size = (arb_idx) ? dcache_data.size : (icache_data.nc) ? 3'b010 : 3'b111; assign l15_req_o.l15_threadid = (arb_idx) ? dcache_data.tid : icache_data.tid; assign l15_req_o.l15_prefetch = '0; // unused in openpiton assign l15_req_o.l15_invalidate_cacheline = '0; // unused by Ariane as L1 has no ECC at the moment assign l15_req_o.l15_blockstore = '0; // unused in openpiton assign l15_req_o.l15_blockinitstore = '0; // unused in openpiton assign l15_req_o.l15_l1rplway = (arb_idx) ? dcache_data.way : icache_data.way; // assign tmp_paddr = (arb_idx) ? dcache_data.paddr : // icache_data.paddr; // assign l15_req_o.l15_address = ((tmp_paddr < CachedAddrBeg) && PitonRemapIO) ? {25'b1, tmp_paddr[38:0]} : tmp_paddr; assign l15_req_o.l15_address = (arb_idx) ? dcache_data.paddr : icache_data.paddr; assign l15_req_o.l15_data_next_entry = '0; // unused in Ariane (only used for CAS atomic requests) assign l15_req_o.l15_csm_data = '0; // unused in Ariane (only used for coherence domain restriction features) assign l15_req_o.l15_amo_op = dcache_data.amo_op; // openpiton is big endian generate if (SwapEndianess) assign l15_req_o.l15_data = swendian64(dcache_data.data); else assign l15_req_o.l15_data = dcache_data.data; endgenerate // arbiter rrarbiter #( .NUM_REQ(2), .LOCK_IN(1) ) i_rrarbiter ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .flush_i( '0 ), .en_i ( l15_rtrn_i.l15_ack ), .req_i ( arb_req ), .ack_o ( arb_ack ), .vld_o ( ), .idx_o ( arb_idx ) ); assign arb_req = {~dcache_data_empty, ~icache_data_empty}; assign l15_req_o.l15_val = (|arb_req);// & ~header_ack_q; // encode packet type always_comb begin : p_req l15_req_o.l15_rqtype = L15_LOAD_RQ; unique case (arb_idx) 0: begin// icache l15_req_o.l15_rqtype = L15_IMISS_RQ; end 1: begin unique case (dcache_data.rtype) DCACHE_STORE_REQ: begin l15_req_o.l15_rqtype = L15_STORE_RQ; end DCACHE_LOAD_REQ: begin l15_req_o.l15_rqtype = L15_LOAD_RQ; end DCACHE_ATOMIC_REQ: begin l15_req_o.l15_rqtype = L15_ATOMIC_RQ; end // DCACHE_INT_REQ: begin // //TODO interrupt requests // end default: begin ; end endcase // dcache_data.rtype end default: begin ; end endcase end // p_req fifo_v2 #( .dtype ( icache_req_t ), .DEPTH ( ADAPTER_REQ_FIFO_DEPTH ) ) i_icache_data_fifo ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .flush_i ( 1'b0 ), .testmode_i ( 1'b0 ), .full_o ( icache_data_full ), .empty_o ( icache_data_empty ), .alm_full_o ( ), .alm_empty_o ( ), .data_i ( icache_data_i ), .push_i ( icache_data_ack_o ), .data_o ( icache_data ), .pop_i ( arb_ack[0] ) ); fifo_v2 #( .dtype ( dcache_req_t ), .DEPTH ( ADAPTER_REQ_FIFO_DEPTH ) ) i_dcache_data_fifo ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .flush_i ( 1'b0 ), .testmode_i ( 1'b0 ), .full_o ( dcache_data_full ), .empty_o ( dcache_data_empty ), .alm_full_o ( ), .alm_empty_o ( ), .data_i ( dcache_data_i ), .push_i ( dcache_data_ack_o ), .data_o ( dcache_data ), .pop_i ( arb_ack[1] ) ); /////////////////////////////////////////////////////// // return path from L15 /////////////////////////////////////////////////////// // relevant l15 signals // l15_rtrn_i.l15_returntype; // see below for encoding // l15_rtrn_i.l15_noncacheable; // non-cacheable bit // l15_rtrn_i.l15_atomic; // asserted in load return and store ack pack // l15_rtrn_i.l15_threadid; // used as transaction ID // l15_rtrn_i.l15_f4b; // 4byte instruction fill from I/O space (nc). // l15_rtrn_i.l15_data_0; // used for both caches // l15_rtrn_i.l15_data_1; // used for both caches // l15_rtrn_i.l15_data_2; // currently only used for I$ // l15_rtrn_i.l15_data_3; // currently only used for I$ // l15_rtrn_i.l15_inval_icache_all_way; // invalidate all ways // l15_rtrn_i.l15_inval_address_15_4; // invalidate selected cacheline // l15_rtrn_i.l15_inval_dcache_inval; // invalidate selected cacheline and way // l15_rtrn_i.l15_inval_way; // way to invalidate // acknowledge if we have space to hold this packet assign l15_req_o.l15_req_ack = l15_rtrn_i.l15_val & ~rtrn_fifo_full; // packets have to be consumed immediately assign rtrn_fifo_pop = ~rtrn_fifo_empty; // decode packet type always_comb begin : p_rtrn_logic icache_rtrn_o.rtype = ICACHE_IFILL_ACK; dcache_rtrn_o.rtype = DCACHE_LOAD_ACK; icache_rtrn_vld_o = 1'b0; dcache_rtrn_vld_o = 1'b0; if(~rtrn_fifo_empty) begin unique case (rtrn_fifo_data.l15_returntype) L15_LOAD_RET: begin dcache_rtrn_o.rtype = DCACHE_LOAD_ACK; dcache_rtrn_vld_o = 1'b1; end L15_ST_ACK: begin dcache_rtrn_o.rtype = DCACHE_STORE_ACK; dcache_rtrn_vld_o = 1'b1; end L15_IFILL_RET: begin icache_rtrn_o.rtype = ICACHE_IFILL_ACK; icache_rtrn_vld_o = 1'b1; end L15_EVICT_REQ: begin icache_rtrn_o.rtype = ICACHE_INV_REQ; dcache_rtrn_o.rtype = DCACHE_INV_REQ; icache_rtrn_vld_o = icache_rtrn_o.inv.vld | icache_rtrn_o.inv.all; dcache_rtrn_vld_o = dcache_rtrn_o.inv.vld | dcache_rtrn_o.inv.all; end L15_CPX_RESTYPE_ATOMIC_RES: begin dcache_rtrn_o.rtype = DCACHE_ATOMIC_ACK; dcache_rtrn_vld_o = 1'b1; end // L15_INT_RET: begin // TODO: implement this // dcache_rtrn_o.reqType = DCACHE_INT_ACK; // end default: begin ; end endcase // rtrn_fifo_data.l15_returntype end end // openpiton is big endian generate if (SwapEndianess) begin assign dcache_rtrn_o.data = { swendian64(rtrn_fifo_data.l15_data_1), swendian64(rtrn_fifo_data.l15_data_0) }; assign icache_rtrn_o.data = { swendian64(rtrn_fifo_data.l15_data_3), swendian64(rtrn_fifo_data.l15_data_2), swendian64(rtrn_fifo_data.l15_data_1), swendian64(rtrn_fifo_data.l15_data_0) }; end else begin assign dcache_rtrn_o.data = { rtrn_fifo_data.l15_data_1, rtrn_fifo_data.l15_data_0 }; assign icache_rtrn_o.data = { rtrn_fifo_data.l15_data_3, rtrn_fifo_data.l15_data_2, rtrn_fifo_data.l15_data_1, rtrn_fifo_data.l15_data_0 }; end endgenerate // fifo signals assign icache_rtrn_o.tid = rtrn_fifo_data.l15_threadid; assign icache_rtrn_o.nc = rtrn_fifo_data.l15_noncacheable; assign icache_rtrn_o.f4b = rtrn_fifo_data.l15_f4b; assign dcache_rtrn_o.tid = rtrn_fifo_data.l15_threadid; assign dcache_rtrn_o.nc = rtrn_fifo_data.l15_noncacheable; // invalidation signal mapping assign icache_rtrn_o.inv.idx = {rtrn_fifo_data.l15_inval_address_15_4, 4'b0000}; assign icache_rtrn_o.inv.way = rtrn_fifo_data.l15_inval_way; assign icache_rtrn_o.inv.vld = rtrn_fifo_data.l15_inval_icache_inval; assign icache_rtrn_o.inv.all = rtrn_fifo_data.l15_inval_icache_all_way; assign dcache_rtrn_o.inv.idx = {rtrn_fifo_data.l15_inval_address_15_4, 4'b0000}; assign dcache_rtrn_o.inv.way = rtrn_fifo_data.l15_inval_way; assign dcache_rtrn_o.inv.vld = rtrn_fifo_data.l15_inval_dcache_inval; assign dcache_rtrn_o.inv.all = rtrn_fifo_data.l15_inval_dcache_all_way; fifo_v2 #( .dtype ( l15_rtrn_t ), .DEPTH ( ADAPTER_RTRN_FIFO_DEPTH ) ) i_rtrn_fifo ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .flush_i ( 1'b0 ), .testmode_i ( 1'b0 ), .full_o ( rtrn_fifo_full ), .empty_o ( rtrn_fifo_empty ), .alm_full_o ( ), .alm_empty_o ( ), .data_i ( l15_rtrn_i ), .push_i ( l15_req_o.l15_req_ack ), .data_o ( rtrn_fifo_data ), .pop_i ( rtrn_fifo_pop ) ); /////////////////////////////////////////////////////// // assertions /////////////////////////////////////////////////////// //pragma translate_off `ifndef VERILATOR invalidations: assert property ( @(posedge clk_i) disable iff (~rst_ni) l15_rtrn_i.l15_val |-> l15_rtrn_i.l15_returntype == L15_EVICT_REQ |-> (l15_rtrn_i.l15_inval_icache_inval | l15_rtrn_i.l15_inval_dcache_inval | l15_rtrn_i.l15_inval_icache_all_way | l15_rtrn_i.l15_inval_dcache_all_way)) else $fatal(1,"[l15_adapter] got invalidation package with zero invalidation flags"); blockstore_o: assert property ( @(posedge clk_i) disable iff (~rst_ni) l15_req_o.l15_val |-> l15_req_o.l15_rqtype == L15_STORE_RQ |-> !(l15_req_o.l15_blockstore || l15_req_o.l15_blockinitstore)) else $fatal(1,"[l15_adapter] blockstores are not supported (out)"); blockstore_i: assert property ( @(posedge clk_i) disable iff (~rst_ni) l15_rtrn_i.l15_val |-> l15_rtrn_i.l15_returntype inside {L15_ST_ACK, L15_ST_ACK} |-> !l15_rtrn_i.l15_blockinitstore) else $fatal(1,"[l15_adapter] blockstores are not supported (in)"); unsuported_rtrn_types: assert property ( @(posedge clk_i) disable iff (~rst_ni) (l15_rtrn_i.l15_val |-> l15_rtrn_i.l15_returntype inside {L15_LOAD_RET, L15_ST_ACK, L15_IFILL_RET, L15_EVICT_REQ, L15_CPX_RESTYPE_ATOMIC_RES})) else $warning("[l15_adapter] return type %X04 is not (yet) supported by l15 adapter.", l15_rtrn_i.l15_returntype); amo_type: assert property ( @(posedge clk_i) disable iff (~rst_ni) (l15_rtrn_i.l15_val |-> l15_rtrn_i.l15_returntype inside {L15_CPX_RESTYPE_ATOMIC_RES} |-> l15_rtrn_i.l15_atomic )) else $fatal(1,"[l15_adapter] l15_atomic must be asserted when the return type is an ATOMIC_RES"); initial begin // assert wrong parameterizations assert (L15_SET_ASSOC >= ICACHE_SET_ASSOC) else $fatal(1,"[l15_adapter] number of icache ways must be smaller or equal the number of L15 ways"); // assert wrong parameterizations assert (L15_SET_ASSOC >= DCACHE_SET_ASSOC) else $fatal(1,"[l15_adapter] number of dcache ways must be smaller or equal the number of L15 ways"); // invalidation address returned by L1.5 is 16 bit assert (16 >= DCACHE_INDEX_WIDTH && 16 >= ICACHE_INDEX_WIDTH) else $fatal(1,"[l15_adapter] maximum number of index bits supported by L1.5 is 16"); end `endif //pragma translate_on endmodule // serpent_l15_adapter