Commit 93d2b4c5 by 刘昀达

all work

parent 8d397ea0
{
"files.associations": {
"*.tcc": "cpp",
"array": "cpp",
"string": "cpp",
"*.tup": "cpp",
"atomic": "cpp",
"bitset": "cpp",
"cctype": "cpp",
"clocale": "cpp",
"cmath": "cpp",
"cstdarg": "cpp",
"cstddef": "cpp",
"cstdint": "cpp",
"cstdio": "cpp",
"cstdlib": "cpp",
"cstring": "cpp",
"ctime": "cpp",
"cwchar": "cpp",
"cwctype": "cpp",
"deque": "cpp",
"list": "cpp",
"unordered_map": "cpp",
"vector": "cpp",
"exception": "cpp",
"algorithm": "cpp",
"functional": "cpp",
"iterator": "cpp",
"map": "cpp",
"memory": "cpp",
"memory_resource": "cpp",
"numeric": "cpp",
"random": "cpp",
"set": "cpp",
"system_error": "cpp",
"tuple": "cpp",
"type_traits": "cpp",
"utility": "cpp",
"fstream": "cpp",
"initializer_list": "cpp",
"iomanip": "cpp",
"iosfwd": "cpp",
"iostream": "cpp",
"istream": "cpp",
"limits": "cpp",
"new": "cpp",
"ostream": "cpp",
"sstream": "cpp",
"stdexcept": "cpp",
"streambuf": "cpp",
"cfenv": "cpp",
"typeinfo": "cpp"
}
}
\ No newline at end of file
# Copyright (c) 2009-2011, Tor M. Aamodt, Timothy G. Rogers, Wilson W.L. Fung
# Ali Bakhoda, Ivan Sham
# The University of British Columbia
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# Redistributions in binary form must reproduce the above copyright notice, this
# list of conditions and the following disclaimer in the documentation and/or
# other materials provided with the distribution.
# Neither the name of The University of British Columbia nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# GPGPU-Sim Makefile
DEBUG?=0
TRACE?=1
include ../version_detection.mk
CXXFLAGS = -Wall -DDEBUG
CXXFLAGS += -DCUDART_VERSION=$(CUDART_VERSION)
ifeq ($(GNUC_CPP0X), 1)
CXXFLAGS += -std=c++0x
endif
ifeq ($(TRACE),1)
CXXFLAGS += -DTRACING_ON=1
endif
ifneq ($(DEBUG),1)
OPTFLAGS += -O3
else
CXXFLAGS +=
endif
OPTFLAGS += -g3 -fPIC
CPP = g++ $(SNOW)
OEXT = o
OUTPUT_DIR=$(SIM_OBJ_FILES_DIR)
SRCS = $(shell ls *.cc)
OBJS = $(SRCS:%.cc=$(OUTPUT_DIR)/%.$(OEXT))
$(OUTPUT_DIR)/libgpgpusim.a: $(OBJS) gpu_uarch_simlib
ar rcs $(OUTPUT_DIR)/libgpgpusim.a $(OBJS) $(OUTPUT_DIR)/gpgpu-sim/*.o
gpu_uarch_simlib:
make -C ./gpgpu-sim
$(OUTPUT_DIR)/Makefile.makedepend: depend
depend:
touch $(OUTPUT_DIR)/Makefile.makedepend
makedepend -f$(OUTPUT_DIR)/Makefile.makedepend -p$(OUTPUT_DIR)/ $(SRCS) 2> /dev/null
clean:
rm -f *.o core *~ *.a Makefile.makedepend Makefile.makedepend.bak
$(OUTPUT_DIR)/%.$(OEXT): %.cc
$(CPP) $(OPTFLAGS) $(CXXFLAGS) -o $(OUTPUT_DIR)/$*.$(OEXT) -c $*.cc
option_parser.$(OEXT): option_parser.h
include $(OUTPUT_DIR)/Makefile.makedepend
# Copyright (c) 2009-2011, Tor M. Aamodt
# Wilson W.L. Fung, Timothy G. Rogers, Ali Bakhoda
# The University of British Columbia
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# Redistributions in binary form must reproduce the above copyright notice, this
# list of conditions and the following disclaimer in the documentation and/or
# other materials provided with the distribution.
# Neither the name of The University of British Columbia nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
default: libgpgpu_ptx_sim.a
INTEL=0
DEBUG?=0
TRACE?=0
CPP = g++ $(SNOW)
ifeq ($(INTEL),1)
CPP = icpc
CC = icc
endif
include ../../version_detection.mk
OUTPUT_DIR=$(SIM_OBJ_FILES_DIR)/cuda-sim
OPT := -O3 -g3 -Wall -Wno-unused-function -Wno-sign-compare
ifeq ($(DEBUG),1)
OPT := -g3 -Wall -Wno-unused-function -Wno-sign-compare
endif
OPT += -I$(CUDA_INSTALL_PATH)/include -I$(OUTPUT_DIR)/ -I.
OPT += -fPIC
ifeq ($(TRACE),1)
OPT += -DTRACING_ON=1
endif
CXX_OPT = $(OPT)
ifeq ($(INTEL),1)
CXX_OPT += -std=c++0x
else
ifeq ($(GNUC_CPP0X),1)
CXX_OPT += -std=c++0x
endif
endif
OBJS := $(OUTPUT_DIR)/ptx_parser.o $(OUTPUT_DIR)/ptx_loader.o $(OUTPUT_DIR)/cuda_device_printf.o $(OUTPUT_DIR)/instructions.o $(OUTPUT_DIR)/cuda-sim.o $(OUTPUT_DIR)/ptx_ir.o $(OUTPUT_DIR)/ptx_sim.o $(OUTPUT_DIR)/memory.o $(OUTPUT_DIR)/ptx-stats.o $(OUTPUT_DIR)/decuda_pred_table/decuda_pred_table.o $(OUTPUT_DIR)/ptx.tab.o $(OUTPUT_DIR)/lex.ptx_.o $(OUTPUT_DIR)/ptxinfo.tab.o $(OUTPUT_DIR)/lex.ptxinfo_.o
OPT += -DCUDART_VERSION=$(CUDART_VERSION)
SRCS = $(shell ls *.cc)
$(OUTPUT_DIR)/Makefile.makedepend: depend
depend:
touch $(OUTPUT_DIR)/Makefile.makedepend
makedepend -f$(OUTPUT_DIR)/Makefile.makedepend -p$(OUTPUT_DIR)/ $(SRCS) 2> /dev/null
libgpgpu_ptx_sim.a: $(OBJS)
ar rcs $(OUTPUT_DIR)/libgpgpu_ptx_sim.a $(OUTPUT_DIR)/ptx.tab.o $(OUTPUT_DIR)/lex.ptx_.o $(OUTPUT_DIR)/ptxinfo.tab.o $(OUTPUT_DIR)/lex.ptxinfo_.o $(OBJS)
$(OUTPUT_DIR)/ptx.tab.o: $(OUTPUT_DIR)/ptx.tab.c
$(CPP) -c $(OPT) -DYYDEBUG $(OUTPUT_DIR)/ptx.tab.c -o $(OUTPUT_DIR)/ptx.tab.o
$(OUTPUT_DIR)/lex.ptx_.o: $(OUTPUT_DIR)/lex.ptx_.c
$(CPP) -c $(OPT) $(OUTPUT_DIR)/lex.ptx_.c -o $(OUTPUT_DIR)/lex.ptx_.o
$(OUTPUT_DIR)/ptxinfo.tab.o: $(OUTPUT_DIR)/ptxinfo.tab.c
$(CPP) -c $(OPT) -DYYDEBUG $(OUTPUT_DIR)/ptxinfo.tab.c -o $(OUTPUT_DIR)/ptxinfo.tab.o
$(OUTPUT_DIR)/lex.ptxinfo_.o: $(OUTPUT_DIR)/lex.ptxinfo_.c $(OUTPUT_DIR)/ptxinfo.tab.c
$(CPP) -c $(OPT) $(OUTPUT_DIR)/lex.ptxinfo_.c -o $(OUTPUT_DIR)/lex.ptxinfo_.o
$(OUTPUT_DIR)/ptx.tab.c: ptx.y
bison --name-prefix=ptx_ -v -d ptx.y --file-prefix=$(OUTPUT_DIR)/ptx
$(OUTPUT_DIR)/ptxinfo.tab.c: ptxinfo.y
bison --name-prefix=ptxinfo_ -v -d ptxinfo.y --file-prefix=$(OUTPUT_DIR)/ptxinfo
$(OUTPUT_DIR)/lex.ptx_.c: ptx.l
flex --outfile=$(OUTPUT_DIR)/lex.ptx_.c ptx.l
$(OUTPUT_DIR)/lex.ptxinfo_.c: ptxinfo.l
flex --outfile=$(OUTPUT_DIR)/lex.ptxinfo_.c ptxinfo.l
clean:
rm -f *~ *.o *.gcda *.gcno *.gcov libgpgpu_ptx_sim.a \
ptx.tab.h ptx.tab.c ptx.output lex.ptx_.c \
ptxinfo.tab.h ptxinfo.tab.c ptxinfo.output lex.ptxinfo_.c \
instructions.h ptx_parser_decode.def directed_tests.log
rm -f $(OUTPUT_DIR)/decuda_pred_table/*.o
rm -f $(OUTPUT_DIR)/Makefile.makedepend $(OUTPUT_DIR)/Makefile.makedepend.bak
$(OUTPUT_DIR)/%.o: %.c
$(CPP) -c $(OPT) $< -o $(OUTPUT_DIR)/$*.o
$(OUTPUT_DIR)/%.o: %.cc
$(CPP) -c $(CXX_OPT) $< -o $(OUTPUT_DIR)/$*.o
$(OUTPUT_DIR)/instructions.h: instructions.cc
@touch $*.h
@chmod +w $*.h
@echo "// DO NOT EDIT THIS FILE! IT IS AUTOMATICALLY GENERATED BY THE MAKEFILE (see target for instructions.h)" > $*.h
@echo "#include \"ptx_ir.h\"" >> $*.h
@echo "#ifndef instructions_h_included" >> $*.h
@echo "#define instructions_h_included" >> $*.h
@cat $< | grep "_impl(" | sed 's/{.*//' | sed 's/$$/;/' >> $*.h
@echo "#endif" >> $*.h
@chmod -w $*.h
@echo "created $(OUTPUT_DIR)/instructions.h"
$(OUTPUT_DIR)/ptx_parser_decode.def: $(OUTPUT_DIR)/ptx.tab.c
ifeq ($(shell uname),Linux)
cat $(OUTPUT_DIR)/ptx.tab.h | grep "=" | sed 's/^[ ]\+//' | sed 's/[=,]//g' | sed 's/\([_A-Z1-9]\+\)[ ]\+\([0-9]\+\)/\1 \1/' | sed 's/^/DEF(/' | sed 's/ /,"/' | sed 's/$$/")/' > $(OUTPUT_DIR)/ptx_parser_decode.def
else
cat $(OUTPUT_DIR)/ptx.tab.h | grep "=" | sed -E 's/^ +//' | sed 's/[=,]//g' | sed -E 's/([_A-Z1-9]+).*/\1 \1/' | sed 's/^/DEF(/' | sed 's/ /,"/' | sed 's/$$/")/' > $(OUTPUT_DIR)/ptx_parser_decode.def
endif
$(OUTPUT_DIR)/instructions.o: $(OUTPUT_DIR)/instructions.h $(OUTPUT_DIR)/ptx.tab.c
$(OUTPUT_DIR)/cuda_device_printf.o: $(OUTPUT_DIR)/ptx.tab.c
$(OUTPUT_DIR)/ptx_ir.o: $(OUTPUT_DIR)/ptx.tab.c $(OUTPUT_DIR)/ptx_parser_decode.def
$(OUTPUT_DIR)/ptx_loader.o: $(OUTPUT_DIR)/ptx.tab.c $(OUTPUT_DIR)/ptx_parser_decode.def
$(OUTPUT_DIR)/ptx_parser.o: $(OUTPUT_DIR)/ptx.tab.c $(OUTPUT_DIR)/ptx_parser_decode.def
$(OUTPUT_DIR)/ptxinfo.tab.o: $(OUTPUT_DIR)/ptx.tab.c
$(OUTPUT_DIR)/ptx-stats.o: $(OUTPUT_DIR)/ptx.tab.c
$(OUTPUT_DIR)/ptx_sim.o: $(OUTPUT_DIR)/ptx.tab.c
$(OUTPUT_DIR)/cuda-sim.o: $(OUTPUT_DIR)/ptx.tab.c
$(OUTPUT_DIR)/lex.ptxinfo_.o: $(OUTPUT_DIR)/ptx.tab.c
$(OUTPUT_DIR)/lex.ptx_.o: $(OUTPUT_DIR)/ptx.tab.c
include $(OUTPUT_DIR)/Makefile.makedepend
// Copyright (c) 2009-2011, Tor M. Aamodt
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef CUDASIM_H_INCLUDED
#define CUDASIM_H_INCLUDED
#include "../abstract_hardware_model.h"
#include"../gpgpu-sim/shader.h"
#include <stdlib.h>
#include <map>
#include <string>
#include"ptx_sim.h"
class memory_space;
class function_info;
class symbol_table;
extern const char *g_gpgpusim_version_string;
extern int g_ptx_sim_mode;
extern int g_debug_execution;
extern int g_debug_thread_uid;
extern void ** g_inst_classification_stat;
extern void ** g_inst_op_classification_stat;
extern int g_ptx_kernel_count; // used for classification stat collection purposes
void ptx_opcocde_latency_options (option_parser_t opp);
extern class kernel_info_t *gpgpu_opencl_ptx_sim_init_grid(class function_info *entry,
gpgpu_ptx_sim_arg_list_t args,
struct dim3 gridDim,
struct dim3 blockDim,
class gpgpu_t *gpu );
extern void gpgpu_cuda_ptx_sim_main_func( kernel_info_t &kernel, bool openCL = false );
extern void print_splash();
extern void gpgpu_ptx_sim_register_const_variable(void*, const char *deviceName, size_t size );
extern void gpgpu_ptx_sim_register_global_variable(void *hostVar, const char *deviceName, size_t size );
extern void gpgpu_ptx_sim_memcpy_symbol(const char *hostVar, const void *src, size_t count, size_t offset, int to, gpgpu_t *gpu );
extern void read_sim_environment_variables();
extern void ptxinfo_opencl_addinfo( std::map<std::string,function_info*> &kernels );
unsigned ptx_sim_init_thread( kernel_info_t &kernel,
class ptx_thread_info** thread_info,
int sid,
unsigned tid,
unsigned threads_left,
unsigned num_threads,
class core_t *core,
unsigned hw_cta_id,
unsigned hw_warp_id,
gpgpu_t *gpu,
bool functionalSimulationMode = false);
const warp_inst_t *ptx_fetch_inst( address_type pc );
const struct gpgpu_ptx_sim_kernel_info* ptx_sim_kernel_info(const class function_info *kernel);
void ptx_print_insn( address_type pc, FILE *fp );
std::string ptx_get_insn_str( address_type pc );
void set_param_gpgpu_num_shaders(int num_shaders);
/*!
* This class functionally executes a kernel. It uses the basic data structures and procedures in core_t
*/
class functionalCoreSim: public core_t
{
public:
functionalCoreSim(kernel_info_t * kernel, gpgpu_sim *g, unsigned warp_size)
: core_t( g, kernel, warp_size, kernel->threads_per_cta() )
{
m_warpAtBarrier = new bool [m_warp_count];
m_liveThreadCount = new unsigned [m_warp_count];
}
virtual ~functionalCoreSim(){
warp_exit(0);
delete[] m_liveThreadCount;
delete[] m_warpAtBarrier;
}
//! executes all warps till completion
void execute();
virtual void warp_exit( unsigned warp_id );
virtual bool warp_waiting_at_barrier( unsigned warp_id ) const
{
return (m_warpAtBarrier[warp_id] || !(m_liveThreadCount[warp_id]>0));
}
private:
void executeWarp(unsigned, bool &, bool &);
//initializes threads in the CTA block which we are executing
void initializeCTA();
virtual void checkExecutionStatusAndUpdate(warp_inst_t &inst, unsigned t, unsigned tid)
{
if(m_thread[tid]==NULL || m_thread[tid]->is_done()){
m_liveThreadCount[tid/m_warp_size]--;
}
}
// lunches the stack and set the threads count
void createWarp(unsigned warpId);
//each warp live thread count and barrier indicator
unsigned * m_liveThreadCount;
bool* m_warpAtBarrier;
};
#define RECONVERGE_RETURN_PC ((address_type)-2)
#define NO_BRANCH_DIVERGENCE ((address_type)-1)
address_type get_return_pc( void *thd );
const char *get_ptxinfo_kname();
void print_ptxinfo();
void clear_ptxinfo();
struct gpgpu_ptx_sim_kernel_info get_ptxinfo_kinfo();
#endif
// Copyright (c) 2009-2011, Tor M. Aamodt
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cuda_device_printf.h"
#include "ptx_ir.h"
void decode_space( memory_space_t &space, ptx_thread_info *thread, const operand_info &op, memory_space *&mem, addr_t &addr);
void my_cuda_printf(const char *fmtstr,const char *arg_list)
{
FILE *fp = stdout;
unsigned i=0,j=0;
unsigned arg_offset=0;
char buf[64];
bool in_fmt=false;
while( fmtstr[i] ) {
char c = fmtstr[i++];
if( !in_fmt ) {
if( c != '%' ) {
fprintf(fp,"%c",c);
} else {
in_fmt=true;
buf[0] = c;
j=1;
}
} else {
if(!( c == 'u' || c == 'f' || c == 'd' )) {
printf("GPGPU-Sim PTX: ERROR ** printf parsing support is limited to %%u, %%f, %%d at present");
abort();
}
buf[j] = c;
buf[j+1] = 0;
void* ptr = (void*)&arg_list[arg_offset];
//unsigned long long value = ((unsigned long long*)arg_list)[arg_offset];
if( c == 'u' || c == 'd' ) {
fprintf(fp,buf,*((unsigned long long*)ptr));
} else if( c == 'f' ) {
double tmp = *((double*)ptr);
fprintf(fp,buf,tmp);
}
arg_offset++;
in_fmt=false;
}
}
}
void gpgpusim_cuda_vprintf(const ptx_instruction * pI, ptx_thread_info * thread, const function_info * target_func )
{
char *fmtstr = NULL;
char *arg_list = NULL;
unsigned n_return = target_func->has_return();
unsigned n_args = target_func->num_args();
assert( n_args == 2 );
for( unsigned arg=0; arg < n_args; arg ++ ) {
const operand_info &actual_param_op = pI->operand_lookup(n_return+1+arg);
const symbol *formal_param = target_func->get_arg(arg);
unsigned size=formal_param->get_size_in_bytes();
assert( formal_param->is_param_local() );
assert( actual_param_op.is_param_local() );
addr_t from_addr = actual_param_op.get_symbol()->get_address();
unsigned long long buffer[1024];
assert(size<1024*sizeof(unsigned long long));
thread->m_local_mem->read(from_addr,size,buffer);
addr_t addr = (addr_t)buffer[0]; // should be pointer to generic memory location
memory_space *mem=NULL;
memory_space_t space = generic_space;
decode_space(space,thread,actual_param_op,mem,addr); // figure out which space
if( arg == 0 ) {
unsigned len = 0;
char b = 0;
do { // figure out length
mem->read(addr+len,1,&b);
len++;
} while(b);
fmtstr = (char*)malloc(len+64);
for( int i=0; i < len; i++ )
mem->read(addr+i,1,fmtstr+i);
//mem->read(addr,len,fmtstr);
} else {
unsigned len = thread->get_finfo()->local_mem_framesize();
arg_list = (char*)malloc(len+64);
for( int i=0; i < len; i++ )
mem->read(addr+i,1,arg_list+i);
//mem->read(addr,len,arg_list);
}
}
my_cuda_printf(fmtstr,arg_list);
free(fmtstr);
free(arg_list);
}
// Copyright (c) 2009-2011, Tor M. Aamodt
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef CUDA_DEVICE_PRINTF_INCLUDED
#define CUDA_DEVICE_PRINTF_INCLUDED
void gpgpusim_cuda_vprintf(const class ptx_instruction * pI, class ptx_thread_info * thread, const class function_info * target_func );
#endif
/*Copyright (c) 2007, Wladimir J. van der Laan
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the names of its
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
#include "decuda_pred_table.h"
bool pred_lookup(int condition, int flags)
{
// Logic table for G80 architecture, all condition codes against all
// flag combinations. This was evaluated on actual hardware.
// The flags are assigned to values like this:
// 1 Z zero flag
// 2 S sign flag
// 4 C carry flag
// 8 O overflow flag
//
// fl 0 1 2 3 4 5 6 7 8 9 a b c d e f
static bool const pred_table[32][16] =
{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // 00 fl
{0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1}, // 01 lt
{0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0}, // 02 eq
{0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0}, // 03 le
{1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0}, // 04 gt
{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}, // 05 ne (also nz)
{1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1}, // 06 ge
{1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0}, // 07 leg
{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1}, // 08 nan
{0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0}, // 09 ltu
{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}, // 0a equ (also zf)
{0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1}, // 0b leu
{1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1}, // 0c gtu
{1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1}, // 0d neu
{1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0}, // 0e geu
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, // 0f tr
{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1}, // 10 of
{0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1}, // 11 cf
{0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0}, // 12 ab
{0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1}, // 13 sf
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0}, // 1c nsf
{1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1}, // 1d ble
{1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0}, // 1e ncf
{1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0}}; // 1f nof
return pred_table[condition][flags];
}
bool pred_lookup(int condition, int flags);
This source diff could not be displayed because it is too large. You can view the blob instead.
// Copyright (c) 2009-2011, Tor M. Aamodt, Wilson W.L. Fung,
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "memory.h"
#include <stdlib.h>
#include "../debug.h"
template<unsigned BSIZE> memory_space_impl<BSIZE>::memory_space_impl( std::string name, unsigned hash_size )
{
m_name = name;
MEM_MAP_RESIZE(hash_size);
m_log2_block_size = -1;
for( unsigned n=0, mask=1; mask != 0; mask <<= 1, n++ ) {
if( BSIZE & mask ) {
assert( m_log2_block_size == (unsigned)-1 );
m_log2_block_size = n;
}
}
assert( m_log2_block_size != (unsigned)-1 );
}
template<unsigned BSIZE> void memory_space_impl<BSIZE>::write( mem_addr_t addr, size_t length, const void *data, class ptx_thread_info *thd, const ptx_instruction *pI)
{
mem_addr_t index = addr >> m_log2_block_size;
if ( (addr+length) <= (index+1)*BSIZE ) {
// fast route for intra-block access
unsigned offset = addr & (BSIZE-1);
unsigned nbytes = length;
m_data[index].write(offset,nbytes,(const unsigned char*)data);
} else {
// slow route for inter-block access
unsigned nbytes_remain = length;
unsigned src_offset = 0;
mem_addr_t current_addr = addr;
while (nbytes_remain > 0) {
unsigned offset = current_addr & (BSIZE-1);
mem_addr_t page = current_addr >> m_log2_block_size;
mem_addr_t access_limit = offset + nbytes_remain;
if (access_limit > BSIZE) {
access_limit = BSIZE;
}
size_t tx_bytes = access_limit - offset;
m_data[page].write(offset, tx_bytes, &((const unsigned char*)data)[src_offset]);
// advance pointers
src_offset += tx_bytes;
current_addr += tx_bytes;
nbytes_remain -= tx_bytes;
}
assert(nbytes_remain == 0);
}
if( !m_watchpoints.empty() ) {
std::map<unsigned,mem_addr_t>::iterator i;
for( i=m_watchpoints.begin(); i!=m_watchpoints.end(); i++ ) {
mem_addr_t wa = i->second;
if( ((addr<=wa) && ((addr+length)>wa)) || ((addr>wa) && (addr < (wa+4))) )
hit_watchpoint(i->first,thd,pI);
}
}
}
template<unsigned BSIZE> void memory_space_impl<BSIZE>::read_single_block( mem_addr_t blk_idx, mem_addr_t addr, size_t length, void *data) const
{
if ((addr + length) > (blk_idx + 1) * BSIZE) {
printf("GPGPU-Sim PTX: ERROR * access to memory \'%s\' is unaligned : addr=0x%x, length=%zu\n",
m_name.c_str(), addr, length);
printf("GPGPU-Sim PTX: (addr+length)=0x%lx > 0x%x=(index+1)*BSIZE, index=0x%x, BSIZE=0x%x\n",
(addr+length),(blk_idx+1)*BSIZE, blk_idx, BSIZE);
throw 1;
}
typename map_t::const_iterator i = m_data.find(blk_idx);
if( i == m_data.end() ) {
for( size_t n=0; n < length; n++ )
((unsigned char*)data)[n] = (unsigned char) 0;
//printf("GPGPU-Sim PTX: WARNING reading %zu bytes from unititialized memory at address 0x%x in space %s\n", length, addr, m_name.c_str() );
} else {
unsigned offset = addr & (BSIZE-1);
unsigned nbytes = length;
i->second.read(offset,nbytes,(unsigned char*)data);
}
}
template<unsigned BSIZE> void memory_space_impl<BSIZE>::read( mem_addr_t addr, size_t length, void *data ) const
{
mem_addr_t index = addr >> m_log2_block_size;
if ((addr+length) <= (index+1)*BSIZE ) {
// fast route for intra-block access
read_single_block(index, addr, length, data);
} else {
// slow route for inter-block access
unsigned nbytes_remain = length;
unsigned dst_offset = 0;
mem_addr_t current_addr = addr;
while (nbytes_remain > 0) {
unsigned offset = current_addr & (BSIZE-1);
mem_addr_t page = current_addr >> m_log2_block_size;
mem_addr_t access_limit = offset + nbytes_remain;
if (access_limit > BSIZE) {
access_limit = BSIZE;
}
size_t tx_bytes = access_limit - offset;
read_single_block(page, current_addr, tx_bytes, &((unsigned char*)data)[dst_offset]);
// advance pointers
dst_offset += tx_bytes;
current_addr += tx_bytes;
nbytes_remain -= tx_bytes;
}
assert(nbytes_remain == 0);
}
}
template<unsigned BSIZE> void memory_space_impl<BSIZE>::print( const char *format, FILE *fout ) const
{
typename map_t::const_iterator i_page;
for (i_page = m_data.begin(); i_page != m_data.end(); ++i_page) {
fprintf(fout, "%s - %#x:", m_name.c_str(), i_page->first);
i_page->second.print(format, fout);
}
}
template<unsigned BSIZE> void memory_space_impl<BSIZE>::set_watch( addr_t addr, unsigned watchpoint )
{
m_watchpoints[watchpoint]=addr;
}
template class memory_space_impl<32>;
template class memory_space_impl<64>;
template class memory_space_impl<8192>;
template class memory_space_impl<16*1024>;
void g_print_memory_space(memory_space *mem, const char *format = "%08x", FILE *fout = stdout)
{
mem->print(format,fout);
}
#ifdef UNIT_TEST
int main(int argc, char *argv[] )
{
int errors_found=0;
memory_space *mem = new memory_space_impl<32>("test",4);
// write address to [address]
for( mem_addr_t addr=0; addr < 16*1024; addr+=4)
mem->write(addr,4,&addr,NULL,NULL);
for( mem_addr_t addr=0; addr < 16*1024; addr+=4) {
unsigned tmp=0;
mem->read(addr,4,&tmp);
if( tmp != addr ) {
errors_found=1;
printf("ERROR ** mem[0x%x] = 0x%x, expected 0x%x\n", addr, tmp, addr );
}
}
for( mem_addr_t addr=0; addr < 16*1024; addr+=1) {
unsigned char val = (addr + 128) % 256;
mem->write(addr,1,&val,NULL,NULL);
}
for( mem_addr_t addr=0; addr < 16*1024; addr+=1) {
unsigned tmp=0;
mem->read(addr,1,&tmp);
unsigned char val = (addr + 128) % 256;
if( tmp != val ) {
errors_found=1;
printf("ERROR ** mem[0x%x] = 0x%x, expected 0x%x\n", addr, tmp, (unsigned)val );
}
}
if( errors_found ) {
printf("SUMMARY: ERRORS FOUND\n");
} else {
printf("SUMMARY: UNIT TEST PASSED\n");
}
}
#endif
// Copyright (c) 2009-2011, Tor M. Aamodt, Wilson W.L. Fung
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef memory_h_INCLUDED
#define memory_h_INCLUDED
#include "../abstract_hardware_model.h"
#include "../tr1_hash_map.h"
#define mem_map tr1_hash_map
#if tr1_hash_map_ismap == 1
#define MEM_MAP_RESIZE(hash_size)
#else
#define MEM_MAP_RESIZE(hash_size) (m_data.rehash(hash_size))
#endif
#include <assert.h>
#include <string.h>
#include <stdio.h>
#include <string>
#include <map>
#include <stdlib.h>
typedef address_type mem_addr_t;
#define MEM_BLOCK_SIZE (4*1024)
template<unsigned BSIZE> class mem_storage {
public:
mem_storage( const mem_storage &another )
{
m_data = (unsigned char*)calloc(1,BSIZE);
memcpy(m_data,another.m_data,BSIZE);
}
mem_storage()
{
m_data = (unsigned char*)calloc(1,BSIZE);
}
~mem_storage()
{
free(m_data);
}
void write( unsigned offset, size_t length, const unsigned char *data )
{
assert( offset + length <= BSIZE );
memcpy(m_data+offset,data,length);
}
void read( unsigned offset, size_t length, unsigned char *data ) const
{
assert( offset + length <= BSIZE );
memcpy(data,m_data+offset,length);
}
void print( const char *format, FILE *fout ) const
{
unsigned int *i_data = (unsigned int*)m_data;
for (int d = 0; d < (BSIZE / sizeof(unsigned int)); d++) {
if (d % 8 == 0) {
fprintf(fout, "\n");
}
fprintf(fout, format, i_data[d]);
fprintf(fout, " ");
}
fprintf(fout, "\n");
fflush(fout);
}
private:
unsigned m_nbytes;
unsigned char *m_data;
};
class ptx_thread_info;
class ptx_instruction;
class memory_space
{
public:
virtual ~memory_space() {}
virtual void write( mem_addr_t addr, size_t length, const void *data, ptx_thread_info *thd, const ptx_instruction *pI ) = 0; // 写入地址addr,数据长度为length的数据data
virtual void read( mem_addr_t addr, size_t length, void *data ) const = 0; // 读取地址addr为首,长度为length的数据,传到data
virtual void print( const char *format, FILE *fout ) const = 0; // 将文件fout以format格式,输出
virtual void set_watch( addr_t addr, unsigned watchpoint ) = 0; // 设置断点addr
};
template<unsigned BSIZE> class memory_space_impl : public memory_space {
public:
memory_space_impl( std::string name, unsigned hash_size );
virtual void write( mem_addr_t addr, size_t length, const void *data, ptx_thread_info *thd, const ptx_instruction *pI );
virtual void read( mem_addr_t addr, size_t length, void *data ) const;
virtual void print( const char *format, FILE *fout ) const;
virtual void set_watch( addr_t addr, unsigned watchpoint );
private:
void read_single_block( mem_addr_t blk_idx, mem_addr_t addr, size_t length, void *data) const;
std::string m_name;
unsigned m_log2_block_size;
typedef mem_map<mem_addr_t,mem_storage<BSIZE> > map_t;
map_t m_data;
std::map<unsigned,mem_addr_t> m_watchpoints;
};
#endif
// Copyright (c) 2009-2011, Tor M. Aamodt, Ali Bakhoda
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/*6th operand of each OP_DEF reflects its classification */
/*Type
ALU 1
MAD 2
Control 3
SFU 4
Mem(except Tex) 5
Tex 6
Nop 7
Other 10
*/
OP_DEF(ABS_OP,abs_impl,"abs",1,1)
OP_DEF(ADD_OP,add_impl,"add",1,1)
OP_DEF(ADDP_OP,addp_impl,"addp",1,1)
OP_DEF(ADDC_OP,addc_impl,"addc",1,1)
OP_DEF(AND_OP,and_impl,"and",1,1)
OP_DEF(ANDN_OP,andn_impl,"andn",1,1)
OP_DEF(ATOM_OP,atom_impl,"atom",1,3)
OP_DEF(BAR_OP,bar_impl,"bar",1,3)
OP_DEF(BFE_OP,bfe_impl,"bfe",1,1)
OP_DEF(BFI_OP,bfi_impl,"bfi",1,1)
OP_DEF(BFIND_OP,bfind_impl,"bfind",1,1)
OP_DEF(BRA_OP,bra_impl,"bra",0,3)
OP_DEF(BRX_OP,brx_impl,"brx",0,3)
OP_DEF(BREV_OP,brev_impl,"brev",1,1)
OP_DEF(BRKPT_OP,brkpt_impl,"brkpt",1,9)
OP_DEF(CALL_OP,call_impl,"call",1,3)
OP_DEF(CALLP_OP,callp_impl,"callp",1,3)
OP_DEF(CLZ_OP,clz_impl,"clz",1,1)
OP_DEF(CNOT_OP,cnot_impl,"cnot",1,1)
OP_DEF(COS_OP,cos_impl,"cos",1,4)
OP_DEF(CVT_OP,cvt_impl,"cvt",1,1)
OP_DEF(CVTA_OP,cvta_impl,"cvta",1,1)
OP_DEF(DIV_OP,div_impl,"div",1,1)
OP_DEF(EX2_OP,ex2_impl,"ex2",1,4)
OP_DEF(EXIT_OP,exit_impl,"exit",1,3)
OP_DEF(FMA_OP,fma_impl,"fma",1,2)
OP_DEF(ISSPACEP_OP,isspacep_impl,"isspacep",1,1)
OP_DEF(LD_OP,ld_impl,"ld",1,5)
OP_DEF(LDU_OP,ldu_impl,"ldu",1,5)
OP_DEF(LG2_OP,lg2_impl,"lg2",1,4)
OP_DEF(MAD24_OP,mad24_impl,"mad24",1,2)
OP_DEF(MAD_OP,mad_impl,"mad",1,2)
OP_DEF(MADP_OP,madp_impl,"madp",1,2)
OP_DEF(MAX_OP,max_impl,"max",1,1)
OP_DEF(MEMBAR_OP,membar_impl,"membar",1,3)
OP_DEF(MIN_OP,min_impl,"min",1,1)
OP_DEF(MOV_OP,mov_impl,"mov",1,1)
OP_DEF(MUL24_OP,mul24_impl,"mul24",1,1)
OP_DEF(MUL_OP,mul_impl,"mul",1,1)
OP_DEF(NEG_OP,neg_impl,"neg",1,1)
OP_DEF(NANDN_OP,nandn_impl,"nandn",1,1)
OP_DEF(NORN_OP,norn_impl,"norn",1,1)
OP_DEF(NOT_OP,not_impl,"not",1,1)
OP_DEF(OR_OP,or_impl,"or",1,1)
OP_DEF(ORN_OP,orn_impl,"orn",1,1)
OP_DEF(PMEVENT_OP,pmevent_impl,"pmevent",1,10)
OP_DEF(POPC_OP,popc_impl,"popc",1,1)
OP_DEF(PREFETCH_OP,prefetch_impl,"prefetch",1,5)
OP_DEF(PREFETCHU_OP,prefetchu_impl,"prefetchu",1,5)
OP_DEF(PRMT_OP,prmt_impl,"prmt",1,1)
OP_DEF(RCP_OP,rcp_impl,"rcp",1,4)
OP_DEF(RED_OP,red_impl,"red",1,7)
OP_DEF(REM_OP,rem_impl,"rem",1,1)
OP_DEF(RET_OP,ret_impl,"ret",0,3)
OP_DEF(RETP_OP,retp_impl,"retp",0,3)
OP_DEF(RSQRT_OP,rsqrt_impl,"rsqrt",1,4)
OP_DEF(SAD_OP,sad_impl,"sad",1,1)
OP_DEF(SELP_OP,selp_impl,"selp",1,1)
OP_DEF(SETP_OP,setp_impl,"setp",1,1)
OP_DEF(SET_OP,set_impl,"set",1,1)
OP_DEF(SHL_OP,shl_impl,"shl",1,1)
OP_DEF(SHR_OP,shr_impl,"shr",1,1)
OP_DEF(SIN_OP,sin_impl,"sin",1,4)
OP_DEF(SLCT_OP,slct_impl,"slct",1,1)
OP_DEF(SQRT_OP,sqrt_impl,"sqrt",1,4)
OP_DEF(SSY_OP,ssy_impl,"ssy",0,3)
OP_DEF(ST_OP,st_impl,"st",0,5)
OP_DEF(SUB_OP,sub_impl,"sub",1,1)
OP_DEF(SUBC_OP,subc_impl,"subc",1,1)
OP_DEF(SULD_OP,suld_impl,"suld",1,6)
OP_DEF(SURED_OP,sured_impl,"sured",1,6)
OP_DEF(SUST_OP,sust_impl,"sust",1,6)
OP_DEF(SUQ_OP,suq_impl,"suq",1,6)
OP_DEF(TEX_OP,tex_impl,"tex",1,6)
OP_DEF(TRAP_OP,trap_impl,"trap",1,3)
OP_DEF(VABSDIFF_OP,vabsdiff_impl,"vabsdiff",0,11)
OP_DEF(VADD_OP,vadd_impl,"vadd",0,11)
OP_DEF(VMAD_OP,vmad_impl,"vmad",0,11)
OP_DEF(VMAX_OP,vmax_impl,"vmax",0,11)
OP_DEF(VMIN_OP,vmin_impl,"vmin",0,11)
OP_DEF(VSET_OP,vset_impl,"vset",0,11)
OP_DEF(VSHL_OP,vshl_impl,"vshl",0,11)
OP_DEF(VSHR_OP,vshr_impl,"vshr",0,11)
OP_DEF(VSUB_OP,vsub_impl,"vsub",0,11)
OP_DEF(VOTE_OP,vote_impl,"vote",0,3)
OP_DEF(XOR_OP,xor_impl,"xor",1,1)
OP_DEF(NOP_OP,nop_impl,"nop",0,7)
OP_DEF(BREAK_OP,break_impl,"break",0,3)
OP_DEF(BREAKADDR_OP,breakaddr_impl,"breakaddr",0,3)
// Copyright (c) 2009-2011, Tor M. Aamodt, Ali Bakhoda
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef opcodes_h_included
#define opcodes_h_included
enum opcode_t {
#define OP_DEF(OP,FUNC,STR,DST,CLASSIFICATION) OP,
#include "opcodes.def"
NUM_OPCODES
#undef OP_DEF
};
enum special_regs {
CLOCK_REG,
HALFCLOCK_ID,
CLOCK64_REG,
CTAID_REG,
ENVREG_REG,
GRIDID_REG,
LANEID_REG,
LANEMASK_EQ_REG,
LANEMASK_LE_REG,
LANEMASK_LT_REG,
LANEMASK_GE_REG,
LANEMASK_GT_REG,
NCTAID_REG,
NTID_REG,
NSMID_REG,
NWARPID_REG,
PM_REG,
SMID_REG,
TID_REG,
WARPID_REG,
WARPSZ_REG
};
#endif
// Copyright (c) 2009-2011, Wilson W.L. Fung, Tor M. Aamodt
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "../option_parser.h"
extern bool enable_ptx_file_line_stats;
// set options
void ptx_file_line_stats_options(option_parser_t opp);
// output stats to a file
void ptx_file_line_stats_write_file();
#ifdef __cplusplus
// stat collection interface to cuda-sim
class ptx_instruction;
void ptx_file_line_stats_add_exec_count(const ptx_instruction *pInsn);
#endif
// stat collection interface to gpgpu-sim
void ptx_file_line_stats_add_latency(unsigned pc, unsigned latency);
void ptx_file_line_stats_add_dram_traffic(unsigned pc, unsigned dram_traffic);
void ptx_file_line_stats_add_smem_bank_conflict(unsigned pc, unsigned n_way_bkconflict);
void ptx_file_line_stats_add_uncoalesced_gmem(unsigned pc, unsigned n_access);
void ptx_file_line_stats_create_exposed_latency_tracker(int n_shader_cores);
void ptx_file_line_stats_add_inflight_memory_insn(int sc_id, unsigned pc);
void ptx_file_line_stats_sub_inflight_memory_insn(int sc_id, unsigned pc);
void ptx_file_line_stats_commit_exposed_latency(int sc_id, int exposed_latency);
void ptx_file_line_stats_add_warp_divergence(unsigned pc, unsigned n_way_divergence);
// Copyright (c) 2009-2011, Tor M. Aamodt
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ptx_loader.h"
#include "ptx_ir.h"
#include "cuda-sim.h"
#include "ptx_parser.h"
#include <unistd.h>
#include <dirent.h>
#include <fstream>
/// globals
memory_space *g_global_mem;
memory_space *g_tex_mem;
memory_space *g_surf_mem;
memory_space *g_param_mem;
bool g_override_embedded_ptx = false;
/// extern prototypes
extern int ptx_parse();
extern int ptx__scan_string(const char*);
const char *g_ptxinfo_filename;
extern int ptxinfo_parse();
extern int ptxinfo_debug;
extern FILE *ptxinfo_in;
static bool g_save_embedded_ptx;
bool g_keep_intermediate_files;
bool m_ptx_save_converted_ptxplus;
bool keep_intermediate_files() {return g_keep_intermediate_files;}
void ptx_reg_options(option_parser_t opp)
{
option_parser_register(opp, "-save_embedded_ptx", OPT_BOOL, &g_save_embedded_ptx,
"saves ptx files embedded in binary as <n>.ptx",
"0");
option_parser_register(opp, "-keep", OPT_BOOL, &g_keep_intermediate_files,
"keep intermediate files created by GPGPU-Sim when interfacing with external programs",
"0");
option_parser_register(opp, "-gpgpu_ptx_save_converted_ptxplus", OPT_BOOL,
&m_ptx_save_converted_ptxplus,
"Saved converted ptxplus to a file",
"0");
}
void print_ptx_file( const char *p, unsigned source_num, const char *filename )
{
printf("\nGPGPU-Sim PTX: file _%u.ptx contents:\n\n", source_num );
char *s = strdup(p);
char *t = s;
unsigned n=1;
while ( *t != '\0' ) {
char *u = t;
while ( (*u != '\n') && (*u != '\0') ) u++;
unsigned last = (*u == '\0');
*u = '\0';
const ptx_instruction *pI = ptx_instruction_lookup(filename,n);
char pc[64];
if( pI && pI->get_PC() )
snprintf(pc,64,"%4u", pI->get_PC() );
else
snprintf(pc,64," ");
printf(" _%u.ptx %4u (pc=%s): %s\n", source_num, n, pc, t );
if ( last ) break;
t = u+1;
n++;
}
free(s);
fflush(stdout);
}
char* gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(const std::string ptxfilename, const std::string elffilename, const std::string sassfilename)
{
printf("GPGPU-Sim PTX: converting EMBEDDED .ptx file to ptxplus \n");
char fname_ptxplus[1024];
snprintf(fname_ptxplus,1024,"_ptxplus_XXXXXX");
int fd4=mkstemp(fname_ptxplus);
close(fd4);
// Run cuobjdump_to_ptxplus
char commandline[1024];
int result;
snprintf(commandline, 1024, "$GPGPUSIM_ROOT/build/$GPGPUSIM_CONFIG/cuobjdump_to_ptxplus/cuobjdump_to_ptxplus %s %s %s %s",
ptxfilename.c_str(),
sassfilename.c_str(),
elffilename.c_str(),
fname_ptxplus);
fflush(stdout);
printf("GPGPU-Sim PTX: calling cuobjdump_to_ptxplus\ncommandline: %s\n", commandline);
result = system(commandline);
if(result){printf("GPGPU-Sim PTX: ERROR ** could not execute %s\n", commandline); exit(1);}
// Get ptxplus from file
std::ifstream fileStream(fname_ptxplus, std::ios::in);
std::string text, line;
while(getline(fileStream,line)) {
text += (line + "\n");
}
fileStream.close();
char* ptxplus_str = new char [strlen(text.c_str())+1];
strcpy(ptxplus_str, text.c_str());
if (!m_ptx_save_converted_ptxplus){
char rm_commandline[1024];
snprintf(rm_commandline,1024,"rm -f %s", fname_ptxplus);
printf("GPGPU-Sim PTX: removing temporary files using \"%s\"\n", rm_commandline);
int rm_result = system(rm_commandline);
if( rm_result != 0 ) {
printf("GPGPU-Sim PTX: ERROR ** while removing temporary files %d\n", rm_result);
exit(1);
}
}
printf("GPGPU-Sim PTX: DONE converting EMBEDDED .ptx file to ptxplus \n");
return ptxplus_str;
}
symbol_table *gpgpu_ptx_sim_load_ptx_from_string( const char *p, unsigned source_num )
{
char buf[1024];
snprintf(buf,1024,"_%u.ptx", source_num );
if( g_save_embedded_ptx ) {
FILE *fp = fopen(buf,"w");
fprintf(fp,"%s",p);
fclose(fp);
}
symbol_table *symtab=init_parser(buf);
ptx__scan_string(p);
int errors = ptx_parse ();
if ( errors ) {
char fname[1024];
snprintf(fname,1024,"_ptx_errors_XXXXXX");
int fd=mkstemp(fname);
close(fd);
printf("GPGPU-Sim PTX: parser error detected, exiting... but first extracting .ptx to \"%s\"\n", fname);
FILE *ptxfile = fopen(fname,"w");
fprintf(ptxfile,"%s", p );
fclose(ptxfile);
abort();
exit(40);
}
if ( g_debug_execution >= 100 )
print_ptx_file(p,source_num,buf);
printf("GPGPU-Sim PTX: finished parsing EMBEDDED .ptx file %s\n",buf);
return symtab;
}
void gpgpu_ptxinfo_load_from_string( const char *p_for_info, unsigned source_num )
{
char fname[1024];
snprintf(fname,1024,"_ptx_XXXXXX");
int fd=mkstemp(fname);
close(fd);
printf("GPGPU-Sim PTX: extracting embedded .ptx to temporary file \"%s\"\n", fname);
FILE *ptxfile = fopen(fname,"w");
fprintf(ptxfile,"%s", p_for_info);
fclose(ptxfile);
char fname2[1024];
snprintf(fname2,1024,"_ptx2_XXXXXX");
fd=mkstemp(fname2);
close(fd);
char commandline2[4096];
snprintf(commandline2,4096,"cat %s | sed 's/.version 1.5/.version 1.4/' | sed 's/, texmode_independent//' | sed 's/\\(\\.extern \\.const\\[1\\] .b8 \\w\\+\\)\\[\\]/\\1\\[1\\]/' | sed 's/const\\[.\\]/const\\[0\\]/g' > %s", fname, fname2);
printf("Running: %s\n", commandline2);
int result = system(commandline2);
if( result != 0 ) {
printf("GPGPU-Sim PTX: ERROR ** while loading PTX (a) %d\n", result);
printf(" Ensure you have write access to simulation directory\n");
printf(" and have \'cat\' and \'sed\' in your path.\n");
exit(1);
}
char tempfile_ptxinfo[1024];
snprintf(tempfile_ptxinfo,1024,"%sinfo",fname);
char commandline[1024];
char extra_flags[1024];
extra_flags[0]=0;
#if CUDART_VERSION >= 3000
snprintf(extra_flags,1024,"--gpu-name=sm_20");
#endif
snprintf(commandline,1024,"$CUDA_INSTALL_PATH/bin/ptxas %s -v %s --output-file /dev/null 2> %s",
extra_flags, fname2, tempfile_ptxinfo);
printf("GPGPU-Sim PTX: generating ptxinfo using \"%s\"\n", commandline);
result = system(commandline);
if( result != 0 ) {
printf("GPGPU-Sim PTX: ERROR ** while loading PTX (b) %d\n", result);
printf(" Ensure ptxas is in your path.\n");
exit(1);
}
ptxinfo_in = fopen(tempfile_ptxinfo,"r");
g_ptxinfo_filename = tempfile_ptxinfo;
ptxinfo_parse();
snprintf(commandline,1024,"rm -f %s %s %s", fname, fname2, tempfile_ptxinfo);
printf("GPGPU-Sim PTX: removing ptxinfo using \"%s\"\n", commandline);
result = system(commandline);
if( result != 0 ) {
printf("GPGPU-Sim PTX: ERROR ** while loading PTX (c) %d\n", result);
exit(1);
}
}
// Copyright (c) 2009-2011, Tor M. Aamodt
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef PTX_LOADER_H_INCLUDED
#define PTX_LOADER_H_INCLUDED
#include <string>
extern bool g_override_embedded_ptx;
class symbol_table *gpgpu_ptx_sim_load_ptx_from_string( const char *p, unsigned source_num );
void gpgpu_ptxinfo_load_from_string( const char *p_for_info, unsigned source_num );
char* gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(const std::string ptx_str, const std::string sass_str, const std::string elf_str);
bool keep_intermediate_files();
#endif
// Copyright (c) 2009-2011, Tor M. Aamodt
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef ptx_parser_INCLUDED
#define ptx_parser_INCLUDED
#include "../abstract_hardware_model.h"
extern const char *g_filename;
extern int g_error_detected;
#ifdef __cplusplus
class symbol_table* init_parser(const char*);
const class ptx_instruction *ptx_instruction_lookup( const char *filename, unsigned linenumber );
#endif
const char *decode_token( int type );
void read_parser_environment_variables();
void start_function( int entry_point );
void add_function_name( const char *fname );
void init_directive_state();
void add_directive();
void end_function();
void add_identifier( const char *s, int array_dim, unsigned array_ident );
void add_function_arg();
void add_scalar_type_spec( int type_spec );
void add_scalar_operand( const char *identifier );
void add_neg_pred_operand( const char *identifier );
void add_variables();
void set_variable_type();
void add_opcode( int opcode );
void add_pred( const char *identifier, int negate, int predModifier );
void add_1vector_operand( const char *d1 );
void add_2vector_operand( const char *d1, const char *d2 );
void add_3vector_operand( const char *d1, const char *d2, const char *d3 );
void add_4vector_operand( const char *d1, const char *d2, const char *d3, const char *d4 );
void add_option(int option );
void add_builtin_operand( int builtin, int dim_modifier );
void add_memory_operand( );
void add_literal_int( int value );
void add_literal_float( float value );
void add_literal_double( double value );
void add_address_operand( const char *identifier, int offset );
void add_address_operand2( int offset );
void add_label( const char *idenfiier );
void add_vector_spec(int spec );
void add_space_spec( enum _memory_space_t spec, int value );
void add_ptr_spec( enum _memory_space_t spec );
void add_extern_spec();
void add_instruction();
void set_return();
void add_alignment_spec( int spec );
void add_array_initializer();
void add_file( unsigned num, const char *filename );
void add_version_info( float ver, unsigned ext);
void *reset_symtab();
void set_symtab(void*);
void add_pragma( const char *str );
void func_header(const char* a);
void func_header_info(const char* a);
void func_header_info_int(const char* a, int b);
void add_constptr(const char* identifier1, const char* identifier2, int offset);
void target_header(char* a);
void target_header2(char* a, char* b);
void target_header3(char* a, char* b, char* c);
void add_double_operand( const char *d1, const char *d2 );
void change_memory_addr_space( const char *identifier );
void change_operand_lohi( int lohi );
void change_double_operand_type( int addr_type );
void change_operand_neg( );
void set_immediate_operand_type( );
void version_header(double a);
#define NON_ARRAY_IDENTIFIER 1
#define ARRAY_IDENTIFIER_NO_DIM 2
#define ARRAY_IDENTIFIER 3
#endif
/*
Copyright (c) 2009-2011, Tor M. Aamodt
The University of British Columbia
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
Neither the name of The University of British Columbia nor the names of its
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
%option noyywrap
%option yylineno
%option prefix="ptxinfo_"
%{
#include "ptxinfo.tab.h"
#include <string.h>
#define LINEBUF_SIZE 1024
char ptxinfo_linebuf[LINEBUF_SIZE];
unsigned ptxinfo_col = 0;
#define TC if( (ptxinfo_lineno == 1) && ((ptxinfo_col + strlen(ptxinfo_text)) < LINEBUF_SIZE) ) { \
strncpy(ptxinfo_linebuf+ptxinfo_col,ptxinfo_text,strlen(ptxinfo_text)); \
} \
ptxinfo_col+=strlen(ptxinfo_text);
%}
%%
"ptxas info : Function properties for"[^\n]*\n[^\n]*
"warning"[^\n]* TC; return WARNING;
"ptxas" TC; return HEADER;
"info" TC; return INFO;
"Compiling entry function" TC; return FUNC;
"Used" TC; return USED;
"registers" TC; return REGS;
"bytes" TC; return BYTES;
"lmem" TC; return LMEM;
"smem" TC; return SMEM;
"cmem" TC; return CMEM;
"line" TC; return LINE;
"for" TC; return FOR;
[_A-Za-z$%][_0-9A-Za-z$]* TC; ptxinfo_lval.string_value = strdup(yytext); return IDENTIFIER;
[-]{0,1}[0-9]+ TC; ptxinfo_lval.int_value = atoi(yytext); return INT_OPERAND;
"+" TC; return PLUS;
"," TC; return COMMA;
"[" TC; return LEFT_SQUARE_BRACKET;
"]" TC; return RIGHT_SQUARE_BRACKET;
":" TC; return COLON;
";" TC; return SEMICOLON;
"'" TC; return QUOTE;
" " TC;
"\t" TC;
\n.* ptxinfo_col=0; strncpy(ptxinfo_linebuf, yytext + 1, 1024); yyless( 1 );
%%
extern int g_ptxinfo_error_detected;
extern const char *g_filename;
extern const char *g_ptxinfo_filename;
int ptxinfo_error( const char *s )
{
int i;
g_ptxinfo_error_detected = 1;
fflush(stdout);
printf("GPGPU-Sim: ERROR while parsing output of ptxas (used to capture resource usage information)\n");
if( s != NULL )
printf("GPGPU-Sim: %s (%s:%u) Syntax error:\n\n", g_filename, g_ptxinfo_filename, ptxinfo_lineno );
printf(" %s\n", ptxinfo_linebuf );
printf(" ");
for( i=0; i < ptxinfo_col-1; i++ ) {
if( ptxinfo_linebuf[i] == '\t' ) printf("\t");
else printf(" ");
}
printf("^\n\n");
fflush(stdout);
exit(43);
return 0;
}
/*
Copyright (c) 2009-2011, Tor M. Aamodt
The University of British Columbia
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
Neither the name of The University of British Columbia nor the names of its
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
%union {
int int_value;
char * string_value;
}
%token <int_value> INT_OPERAND
%token HEADER
%token INFO
%token FUNC
%token USED
%token REGS
%token BYTES
%token LMEM
%token SMEM
%token CMEM
%token <string_value> IDENTIFIER
%token PLUS
%token COMMA
%token LEFT_SQUARE_BRACKET
%token RIGHT_SQUARE_BRACKET
%token COLON
%token SEMICOLON
%token QUOTE
%token LINE
%token <string_value> WARNING
%token FOR
%{
#include <stdlib.h>
#include <string.h>
static unsigned g_declared;
static unsigned g_system;
int ptxinfo_lex(void);
void ptxinfo_addinfo();
void ptxinfo_function(const char *fname );
void ptxinfo_regs( unsigned nregs );
void ptxinfo_lmem( unsigned declared, unsigned system );
void ptxinfo_smem( unsigned declared, unsigned system );
void ptxinfo_cmem( unsigned nbytes, unsigned bank );
int ptxinfo_error(const char*);
%}
%%
input: /* empty */
| input line
;
line: HEADER INFO COLON line_info
| HEADER IDENTIFIER COMMA LINE INT_OPERAND SEMICOLON WARNING
| HEADER WARNING { printf("GPGPU-Sim: ptxas %s\n", $2); }
;
line_info: function_name
| function_info { ptxinfo_addinfo(); }
;
function_name: FUNC QUOTE IDENTIFIER QUOTE { ptxinfo_function($3); }
| FUNC QUOTE IDENTIFIER QUOTE FOR QUOTE IDENTIFIER QUOTE { ptxinfo_function($3); }
function_info: info
| function_info COMMA info
;
info: USED INT_OPERAND REGS { ptxinfo_regs($2); }
| tuple LMEM { ptxinfo_lmem(g_declared,g_system); }
| tuple SMEM { ptxinfo_smem(g_declared,g_system); }
| INT_OPERAND BYTES CMEM LEFT_SQUARE_BRACKET INT_OPERAND RIGHT_SQUARE_BRACKET { ptxinfo_cmem($1,$5); }
| INT_OPERAND BYTES LMEM { ptxinfo_lmem($1,0); }
| INT_OPERAND BYTES SMEM { ptxinfo_smem($1,0); }
| INT_OPERAND BYTES CMEM { ptxinfo_cmem($1,0); }
| INT_OPERAND REGS { ptxinfo_regs($1); }
;
tuple: INT_OPERAND PLUS INT_OPERAND BYTES { g_declared=$1; g_system=$3; }
%%
// Copyright (c) 2009-2011, Tor M. Aamodt
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "debug.h"
#include "gpgpu-sim/shader.h"
#include "gpgpu-sim/gpu-sim.h"
#include "cuda-sim/ptx_sim.h"
#include "cuda-sim/cuda-sim.h"
#include "cuda-sim/ptx_ir.h"
#include <map>
#include <stdio.h>
#include <string.h>
class watchpoint_event {
public:
watchpoint_event()
{
m_thread=NULL;
m_inst=NULL;
}
watchpoint_event(const ptx_thread_info *thd, const ptx_instruction *pI)
{
m_thread=thd;
m_inst = pI;
}
const ptx_thread_info *thread() const { return m_thread; }
const ptx_instruction *inst() const { return m_inst; }
private:
const ptx_thread_info *m_thread;
const ptx_instruction *m_inst;
};
std::map<unsigned,watchpoint_event> g_watchpoint_hits;
void hit_watchpoint( unsigned watchpoint_num, ptx_thread_info *thd, const ptx_instruction *pI )
{
g_watchpoint_hits[watchpoint_num]=watchpoint_event(thd,pI);
}
/// interactive debugger
void gpgpu_sim::gpgpu_debug()
{
bool done=true;
static bool single_step=true;
static unsigned next_brkpt=1;
static std::map<unsigned,brk_pt> breakpoints;
/// if single stepping, go to interactive debugger
if( single_step )
done=false;
/// check if we've reached a breakpoint
const ptx_thread_info *brk_thd = NULL;
const ptx_instruction *brk_inst = NULL;
for( std::map<unsigned,brk_pt>::iterator i=breakpoints.begin(); i!=breakpoints.end(); i++) {
unsigned num=i->first;
brk_pt &b=i->second;
if( b.is_watchpoint() ) {
unsigned addr = b.get_addr();
unsigned new_value;
m_global_mem->read(addr,4,&new_value);
if( new_value != b.get_value() || g_watchpoint_hits.find(num) != g_watchpoint_hits.end() ) {
printf( "GPGPU-Sim PTX DBG: watch point %u triggered (old value=%x, new value=%x)\n",
num,b.get_value(),new_value );
std::map<unsigned,watchpoint_event>::iterator w=g_watchpoint_hits.find(num);
if( w==g_watchpoint_hits.end() )
printf( "GPGPU-Sim PTX DBG: memory transfer modified value\n");
else {
watchpoint_event wa = w->second;
brk_thd = wa.thread();
brk_inst = wa.inst();
printf( "GPGPU-Sim PTX DBG: modified by thread uid=%u, sid=%u, hwtid=%u\n",
brk_thd->get_uid(),brk_thd->get_hw_sid(), brk_thd->get_hw_tid() );
printf( "GPGPU-Sim PTX DBG: ");
brk_inst->print_insn(stdout);
printf( "\n" );
g_watchpoint_hits.erase(w);
}
b.set_value(new_value);
done = false;
}
} else {
/*
for( unsigned sid=0; sid < m_n_shader; sid++ ) {
unsigned hw_thread_id = -1;
abort();
ptx_thread_info *thread = m_sc[sid]->get_functional_thread(hw_thread_id);
if( thread_at_brkpt(thread, b) ) {
done = false;
printf("GPGPU-Sim PTX DBG: reached breakpoint %u at %s (sm=%u, hwtid=%u)\n",
num, b.location().c_str(), sid, hw_thread_id );
brk_thd = thread;
brk_inst = brk_thd->get_inst();
printf( "GPGPU-Sim PTX DBG: reached by thread uid=%u, sid=%u, hwtid=%u\n",
brk_thd->get_uid(),brk_thd->get_hw_sid(), brk_thd->get_hw_tid() );
printf( "GPGPU-Sim PTX DBG: ");
brk_inst->print_insn(stdout);
printf( "\n" );
}
}
*/
}
}
if( done )
assert( g_watchpoint_hits.empty() );
/// enter interactive debugger loop
while (!done) {
printf("(ptx debugger) ");
fflush(stdout);
char line[1024];
fgets(line,1024,stdin);
char *tok = strtok(line," \t\n");
if( !strcmp(tok,"dp") ) {
int shader_num = 0;
tok = strtok(NULL," \t\n");
sscanf(tok,"%d",&shader_num);
dump_pipeline((0x40|0x4|0x1),shader_num,0);
printf("\n");
fflush(stdout);
} else if( !strcmp(tok,"q") || !strcmp(tok,"quit") ) {
printf("\nreally quit GPGPU-Sim (y/n)?\n");
fgets(line,1024,stdin);
tok = strtok(line," \t\n");
if( !strcmp(tok,"y") ) {
exit(0);
} else {
printf("not quiting.\n");
}
} else if( !strcmp(tok,"b") ) {
tok = strtok(NULL," \t\n");
char brkpt[1024];
sscanf(tok,"%s",brkpt);
tok = strtok(NULL," \t\n");
unsigned uid;
sscanf(tok,"%u",&uid);
breakpoints[next_brkpt++] = brk_pt(brkpt,uid);
} else if( !strcmp(tok,"d") ) {
tok = strtok(NULL," \t\n");
unsigned uid;
sscanf(tok,"%u",&uid);
breakpoints.erase(uid);
} else if( !strcmp(tok,"s") ) {
done = true;
} else if( !strcmp(tok,"c") ) {
single_step=false;
done = true;
} else if( !strcmp(tok,"w") ) {
tok = strtok(NULL," \t\n");
unsigned addr;
sscanf(tok,"%x",&addr);
unsigned value;
m_global_mem->read(addr,4,&value);
m_global_mem->set_watch(addr,next_brkpt);
breakpoints[next_brkpt++] = brk_pt(addr,value);
} else if( !strcmp(tok,"l") ) {
if( brk_thd == NULL ) {
printf("no thread selected\n");
} else {
addr_t pc = brk_thd->get_pc();
addr_t start_pc = (pc<5)?0:(pc-5);
for( addr_t p=start_pc; p <= pc+5; p++ ) {
const ptx_instruction *i = brk_thd->get_inst(p);
if( i ) {
if( p != pc )
printf( " " );
else
printf( "==> " );
i->print_insn(stdout);
printf( "\n" );
}
}
}
} else if( !strcmp(tok,"h") ) {
printf("commands:\n");
printf(" q - quit GPGPU-Sim\n");
printf(" b <file>:<line> <thead uid> - set breakpoint\n");
printf(" w <global address> - set watchpoint\n");
printf(" del <n> - delete breakpoint\n");
printf(" s - single step one shader cycle (all cores)\n");
printf(" c - continue simulation without single stepping\n");
printf(" l - list PTX around current breakpoint\n");
printf(" dp <n> - display pipeline contents on SM <n>\n");
printf(" h - print this message\n");
} else {
printf("\ncommand not understood.\n");
}
fflush(stdout);
}
}
bool thread_at_brkpt( ptx_thread_info *thread, const struct brk_pt &b )
{
return b.is_equal(thread->get_location(),thread->get_uid());
}
// Copyright (c) 2009-2011, Tor M. Aamodt,
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef PTX_DEBUG_INCLUDED
#define PTX_DEBUG_INCLUDED
#include "abstract_hardware_model.h"
#include <string>
class brk_pt {
public:
brk_pt() { m_valid=false; }
brk_pt( const char *fileline, unsigned uid )
{
m_valid = true;
m_watch = false;
m_fileline = std::string(fileline);
m_thread_uid=uid;
}
brk_pt( unsigned addr, unsigned value )
{
m_valid = true;
m_watch = true;
m_addr = addr;
m_value = value;
}
unsigned get_value() const { return m_value; }
addr_t get_addr() const { return m_addr; }
bool is_valid() const { return m_valid; }
bool is_watchpoint() const { return m_watch; }
bool is_equal( const std::string &fileline, unsigned uid ) const
{
if( m_watch )
return false;
if( (m_thread_uid != (unsigned)-1) && (uid != m_thread_uid) )
return false;
return m_fileline == fileline;
}
std::string location() const
{
char buffer[1024];
sprintf(buffer,"%s thread uid = %u", m_fileline.c_str(), m_thread_uid);
return buffer;
}
unsigned set_value( unsigned val ) { return m_value=val; }
private:
bool m_valid;
bool m_watch;
// break point
std::string m_fileline;
unsigned m_thread_uid;
// watch point
unsigned m_addr;
unsigned m_value;
};
extern int gpgpu_ptx_instruction_classification ;
class ptx_thread_info;
class ptx_instruction;
bool thread_at_brkpt( ptx_thread_info *thd_info, const struct brk_pt &b );
void hit_watchpoint( unsigned watchpoint_num, ptx_thread_info *thd, const ptx_instruction *pI );
#endif
# Copyright (c) 2009-2011, Tor M. Aamodt, Wilson W.L. Fung, Ali Bakhoda,
# Timothy G. Rogers
# The University of British Columbia
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# Redistributions in binary form must reproduce the above copyright notice, this
# list of conditions and the following disclaimer in the documentation and/or
# other materials provided with the distribution.
# Neither the name of The University of British Columbia nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# GPGPU-Sim Makefile
DEBUG?=0
TRACE?=0
ifeq ($(DEBUG),1)
CXXFLAGS = -Wall -DDEBUG
else
CXXFLAGS = -Wall
endif
ifeq ($(TRACE),1)
CXXFLAGS += -DTRACING_ON=1
endif
include ../../version_detection.mk
ifeq ($(GNUC_CPP0X), 1)
CXXFLAGS += -std=c++0x
endif
ifneq ($(DEBUG),1)
OPTFLAGS += -O3
else
CXXFLAGS +=
endif
POWER_FLAGS=
ifneq ($(GPGPUSIM_POWER_MODEL),)
POWER_FLAGS = -I$(GPGPUSIM_POWER_MODEL) -DGPGPUSIM_POWER_MODEL
endif
OPTFLAGS += -g3 -fPIC
CPP = g++ $(SNOW)
OEXT = o
OUTPUT_DIR=$(SIM_OBJ_FILES_DIR)/gpgpu-sim
SRCS = $(shell ls *.cc)
EXCLUDES =
ifeq ($(GPGPUSIM_POWER_MODEL), )
EXCLUDES += power_interface.cc
endif
CSRCS = $(filter-out $(EXCLUDES), $(SRCS))
OBJS = $(CSRCS:%.cc=$(OUTPUT_DIR)/%.$(OEXT))
libgpu_uarch_sim.a:$(OBJS)
ar rcs $(OUTPUT_DIR)/libgpu_uarch_sim.a $(OBJS)
$(OUTPUT_DIR)/Makefile.makedepend: depend
depend:
touch $(OUTPUT_DIR)/Makefile.makedepend
makedepend -f$(OUTPUT_DIR)/Makefile.makedepend -p$(OUTPUT_DIR)/ $(CSRCS) 2> /dev/null
$(OUTPUT_DIR)/%.$(OEXT): %.cc
$(CPP) $(OPTFLAGS) $(CXXFLAGS) $(POWER_FLAGS) -o $(OUTPUT_DIR)/$*.$(OEXT) -c $*.cc
clean:
rm -f *.o core *~ *.a
rm -f Makefile.makedepend Makefile.makedepend.bak
$(OUTPUT_DIR)/option_parser.$(OEXT): option_parser.h
$(OUTPUT_DIR)/dram_sched.$(OEXT): $(OUTPUT_DIR)/../cuda-sim/ptx.tab.h
$(OUTPUT_DIR)/../cuda-sim/ptx.tab.h:
make -C ../cuda-sim/ $(OUTPUT_DIR)/../cuda-sim/ptx.tab.c
include $(OUTPUT_DIR)/Makefile.makedepend
// Copyright (c) 2009-2011, Tor M. Aamodt, Wilson W.L. Fung,
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "../option_parser.h"
#ifndef ADDRDEC_H
#define ADDRDEC_H
#include "../abstract_hardware_model.h"
struct addrdec_t {
void print( FILE *fp ) const;
unsigned chip;
unsigned bk;
unsigned row;
unsigned col;
unsigned burst;
unsigned sub_partition;
};
class linear_to_raw_address_translation {
public:
linear_to_raw_address_translation();
void addrdec_setoption(option_parser_t opp);
void init(unsigned int n_channel, unsigned int n_sub_partition_in_channel);
// accessors
void addrdec_tlx(new_addr_type addr, addrdec_t *tlx) const;
new_addr_type partition_address( new_addr_type addr ) const;
private:
void addrdec_parseoption(const char *option);
void sweep_test() const; // sanity check to ensure no overlapping
enum {
CHIP = 0,
BK = 1,
ROW = 2,
COL = 3,
BURST = 4,
N_ADDRDEC
};
const char *addrdec_option;
int gpgpu_mem_address_mask;
bool run_test;
int ADDR_CHIP_S;
unsigned char addrdec_mklow[N_ADDRDEC];
unsigned char addrdec_mkhigh[N_ADDRDEC];
new_addr_type addrdec_mask[N_ADDRDEC];
new_addr_type sub_partition_id_mask;
unsigned int gap;
int m_n_channel;
int m_n_sub_partition_in_channel;
};
#endif
// Copyright (c) 2009-2011, Wilson W.L. Fung, Tor M. Aamodt
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#ifndef DELAYQUEUE_H
#define DELAYQUEUE_H
#include "../statwrapper.h"
#include "gpu-misc.h"
template <class T>
struct fifo_data {
T *m_data;
fifo_data *m_next;
};
template <class T>
class fifo_pipeline {
public:
fifo_pipeline(const char* nm, unsigned int minlen, unsigned int maxlen )
{
assert(maxlen);
m_name = nm;
m_min_len = minlen; // 流水线最小长度
m_max_len = maxlen; // 流水线最大长度
m_length = 0; // 流水线长度
m_n_element = 0; // 初始化为0,
m_head = NULL;
m_tail = NULL;
for (unsigned i=0;i<m_min_len;i++)
push(NULL); // 压入: NULL
}
~fifo_pipeline()
{
while (m_head) {
m_tail = m_head;
m_head = m_head->m_next;
delete m_tail;
}
}
void push(T* data ) // FIFO pipeline的push操作
{
assert(m_length < m_max_len); // 长度必须小于最大长度
if (m_head) { // 以链表的形式组织, 判断表头是否为:NULL。表头不空执行if
if (m_tail->m_data || m_length < m_min_len) { // 末尾元素的数据域不空 || 当前长度 < 最小长度
m_tail->m_next = new fifo_data<T>(); // 末尾元素的数据域不是空的,不能直接插入,应该创建一个新的节点,让m_tail指向它
m_tail = m_tail->m_next;
m_length++; // NULL节点也计入长度
m_n_element++; // NULL节点也计入,但是每次弹出NULL节点时,相对地会-1。m_n_element:最后应该是表示非空的元素个数
}
} else { // 表头是空的,初始化链表
m_head = m_tail = new fifo_data<T>(); //
m_length++; // NULL节点也计入长度
m_n_element++; // NULL节点也计入,但是每次弹出NULL节点时,相对地会-1。 m_n_element :最后应该是表示非空的元素个数。 查看过代码,如果push一个NULL进FIFO pipeline,m_n_element会先--,和这边的++抵消掉
}
m_tail->m_next = NULL; // 尾插法,每次将新的元素插入到末尾m_tail
m_tail->m_data = data; // 数据域
}
T* pop() // FIFO pipeline的pop操作
{
fifo_data<T>* next; // 因为需要弹出第一个元素(即:表头),所以设置一个指针来接替
T* data; // 返回的数据
if (m_head) { // 判断表头是否为空
next = m_head->m_next; // 新的表头
data = m_head->m_data; // 获取数据
if ( m_head == m_tail ) { // 表头 == 表尾, 链表中只有一个元素,这个时候需要设置一下表尾
assert( next == NULL );
m_tail = NULL; // 表尾设置为空
}
delete m_head; // 删除表头
m_head = next; // 设置表头
m_length--; // 修改链表长度
if (m_length == 0) { // 长度 == 0
assert( m_head == NULL );
m_tail = m_head;
}
m_n_element--; // 有效元素个数-1
if (m_min_len && m_length < m_min_len) {
push(NULL); // 如果 链表长度 小于 设定的最小长度 则push空节点(数据域为空)
m_n_element--; // 不计入插入的NULL (因为在push里面,无论push什么,m_n_element都会+1) /// uncount NULL elements inserted to create delays
}
} else { // 表头为空, 链表(FIFO pipeline)中不存在数据
data = NULL; // 数据为空
}
return data; // 返回
}
T* top() const
{
if (m_head) {
return m_head->m_data;
} else {
return NULL;
}
}
void set_min_length(unsigned int new_min_len) // 设置FIFO pipeline的最小长度
{
if (new_min_len == m_min_len) return; // 对比: 之前的最小长度 == 新的最小长度
if (new_min_len > m_min_len) { // 对比: 新的最小长度 > 之前的最小长度
m_min_len = new_min_len; // 更新最小长度 最小长度增加
while (m_length < m_min_len) { // m_length:当前FIFO pipeline的长度(链表长度),如果小于设置的“最小长度”进入循环。 通过压入NULL,增加“链表长度”,使“链表长度” == “设定的最小长度”
push(NULL); // 压入: NULL(相当于流水线停顿/空转)
m_n_element--; // m_n_element:统计FIFO pipeline中的非NULL节点个数 /// uncount NULL elements inserted to create delays
}
} else { // 对比: 新的最小长度 < 之前的最小长度
// in this branch imply that the original min_len is larger then 0
// ie. head != 0
assert(m_head); // 判断表头是否为空
m_min_len = new_min_len; // 更新最小长度 最小长度减小
while ((m_length > m_min_len) && (m_tail->m_data == 0)) { // 删减链表,去掉链表尾部,数据域为空的节点。 通过不断删除:(1)空节点 (2)数据域为空的节点。 减少“链表长度”, 使“链表长度“ == “设定的最小长度”
fifo_data<T> *iter; // 迭代器
iter = m_head; // 表头
while (iter && (iter->m_next != m_tail)) // 找出空节点
iter = iter->m_next;
if (!iter) { // iter == NULL 执行下面的操作
// 此时,链表中只有一个节点,且这个节点的数据域是空的 /// there is only one node, and that node is empty
assert(m_head->m_data == 0);
pop(); // 弹出数据域为空的节点
} else { // iter不空, 且至少两个节点
// 此时,链表中有多个节点,尾节点是空的节点 // there are more than one node, and tail node is empty
assert(iter->m_next == m_tail);
delete m_tail; // 释放尾节点的空间
m_tail = iter; // 尾节点指针指向尾节点的前一节点
m_tail->m_next = 0; // 末节点设为空
m_length--; // 链表长度--
}
}
}
}
bool full() const { return (m_max_len && m_length >= m_max_len); } // 判断FIFO pipeline是否满了。 链表长度 >= 最大长度
bool empty() const { return m_head == NULL; } // 判断FIFO pipeline是否空了。 查看表头是否为空
unsigned get_n_element() const { return m_n_element; } // 获取有效节点的个数
unsigned get_length() const { return m_length; } // 获取链表的长度(包括空节点(数据域为空))
unsigned get_max_len() const { return m_max_len; } // 获取链表的最大长度
void print() const
{
fifo_data<T>* ddp = m_head;
printf("%s(%d): ", m_name, m_length);
while (ddp) {
printf("%p ", ddp->m_data);
ddp = ddp->m_next;
}
printf("\n");
}
private:
const char* m_name; // 名字?
unsigned int m_min_len; // 链表的最小长度
unsigned int m_max_len; // 链表的最大长度
unsigned int m_length; // 链表的长度(包括空节点(数据域为空))
unsigned int m_n_element; // 有效节点的个数
fifo_data<T> *m_head; // 链表头部(表头),队首
fifo_data<T> *m_tail; // 链表尾部(表尾),队尾
};
#endif
// Copyright (c) 2009-2011, Tor M. Aamodt, Ivan Sham, Ali Bakhoda,
// George L. Yuan, Wilson W.L. Fung
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef DRAM_H
#define DRAM_H
#include "delayqueue.h"
#include <set>
#include <zlib.h>
#include <stdio.h>
#include <stdlib.h>
#define READ 'R' //define read and write states
#define WRITE 'W'
#define BANK_IDLE 'I'
#define BANK_ACTIVE 'A'
class dram_req_t {
public:
dram_req_t( class mem_fetch *data );
unsigned int row; // 需要激活的行地址
unsigned int col; // 需要激活的列地址
unsigned int bk; // bank的ID,猜想:DRAM channel的寻址是三维的
unsigned int nbytes; // 读取/写入的数据大小
unsigned int txbytes; // t:transferred x:读取/写入
unsigned int dqbytes;
unsigned int age;
unsigned int timestamp;
unsigned char rw; // 判断读/写操作 /// is the request a read or a write?
unsigned long long int addr; // 访问(读/写)地址
unsigned int insertion_time; // 插入时间
class mem_fetch * data;
};
struct bankgrp_t
{
unsigned int CCDLc; // 当bank的group有效时,从一个列选信号切换到另一个列选信号,所需要的时间 //column to column delay when bank groups are enabled
unsigned int RTPLc; // “读取”操作的预充电时延 //read to precharge delay when bank groups are enabled for GDDR5 this is identical to RTPS, if for other DRAM this is different, you will need to split them in two
};
struct bank_t
{
unsigned int RCDc; // 执行“读取”操作时,从“行选信号”处于激活状态开始,到“列选信号”处于激活状态,所需要的时间(注:“行选信号”与“bank的ID”先行传输,等“行选信号”选中的行处于激活状态之后,才发出“列选信号”) // row to column delay - time required to activate a row before a read
unsigned int RCDWRc; // 执行“写入”操作时,从“行选信号”处于激活状态开始,到“列选信号”处于激活状态,所需要的时间 //row to column delay for a write command
unsigned int RASc; // 激活“行选信号”所选中的行,花费的时间 //time needed to activate row
unsigned int RPc; // 行预充电时间(即:关闭“行选信号”选中的行所需要的时间),这里默认是在同一bank内的行之间的切换(L-Bank关闭现有工作行,准备打开新行的操作就是预充电(Precharge)。) // row precharge ie. deactivate row
unsigned int RCc; // 行循环时间(即:关闭之后,再激活不同行,所花费的时间) // row cycle time ie. precharge current, then activate different row
unsigned int WTPc; // write to precharge //time to switch from write to precharge in the same bank
unsigned int RTPc; // read to precharge //time to switch from read to precharge in the same bank
unsigned char rw; // bank的读写状态 (读取or写入) /// is the bank reading or writing?
unsigned char state; // bank的执行状态(活跃or空闲) /// is the bank active or idle?
unsigned int curr_row; // bank当前选中的”行“
dram_req_t *mrq; // 该bank上的访存请求(mrq)
unsigned int n_access;
unsigned int n_writes;
unsigned int n_idle;
unsigned int bkgrpindex;
};
struct mem_fetch;
class dram_t
{
public:
dram_t( unsigned int parition_id, const struct memory_config *config, class memory_stats_t *stats,
class memory_partition_unit *mp );
bool full() const;
void print( FILE* simFile ) const;
void visualize() const;
void print_stat( FILE* simFile );
unsigned que_length() const;
bool returnq_full() const;
unsigned int queue_limit() const;
void visualizer_print( gzFile visualizer_file );
class mem_fetch* return_queue_pop();
class mem_fetch* return_queue_top();
void push( class mem_fetch *data );
void cycle();
void dram_log (int task);
class memory_partition_unit *m_memory_partition_unit;
unsigned int id;
// Power Model
void set_dram_power_stats(unsigned &cmd,
unsigned &activity,
unsigned &nop,
unsigned &act,
unsigned &pre,
unsigned &rd,
unsigned &wr,
unsigned &req) const;
private:
void scheduler_fifo();
void scheduler_frfcfs();
const struct memory_config *m_config;
bankgrp_t **bkgrp;
bank_t **bk;
unsigned int prio;
unsigned int RRDc; // 不同的bank之间,切换“行选信号”所花费的最短时间 //minimal time required between activation of rows in different banks
unsigned int CCDc; // ”列选信号“之间的切换开销 //column to column delay
unsigned int RTWc; // 内存从“读取”状态转换到“写入”状态,所需要的时间(适用于所有bank) /// read to write penalty applies across banks
unsigned int WTRc; // 内存从“写入”状态转换到“读取”状态,所需要的时间(适用于所有bank) /// write to read penalty applies across banks
unsigned char rw; // 记录最后一次的访存请求(是“读取”or“写入”) /// was last request a read or write? (important for RTW, WTR)
unsigned int pending_writes;
fifo_pipeline<dram_req_t> *rwq; //
fifo_pipeline<dram_req_t> *mrqq; //
// 用来缓存DRAM处理结束时的数据包 /// buffer to hold packets when DRAM processing is over
// 在DRAM的时钟域,将数据包放入缓存,并且在L2 cache/ICNT的时钟域,弹出数据包 /// should be filled with dram clock and popped with l2 or icnt clock
fifo_pipeline<mem_fetch> *returnq; // 用来缓存DRAM处理结束时的数据包。在DRAM的时钟域,将数据包放入缓存,并且在L2 cache/ICNT的时钟域,弹出数据包
unsigned int dram_util_bins[10];
unsigned int dram_eff_bins[10];
unsigned int last_n_cmd, last_n_activity, last_bwutil;
unsigned int n_cmd;
unsigned int n_activity;
unsigned int n_nop;
unsigned int n_act;
unsigned int n_pre;
unsigned int n_rd;
unsigned int n_wr;
unsigned int n_req;
unsigned int max_mrqs_temp;
unsigned int bwutil;
unsigned int max_mrqs;
unsigned int ave_mrqs;
class frfcfs_scheduler* m_frfcfs_scheduler;
unsigned int n_cmd_partial;
unsigned int n_activity_partial;
unsigned int n_nop_partial;
unsigned int n_act_partial;
unsigned int n_pre_partial;
unsigned int n_req_partial;
unsigned int ave_mrqs_partial;
unsigned int bwutil_partial;
struct memory_stats_t *m_stats;
class Stats* mrqq_Dist; //memory request queue inside DRAM
friend class frfcfs_scheduler;
};
#endif /*DRAM_H*/
// Copyright (c) 2009-2011, Tor M. Aamodt, Ali Bakhoda, George L. Yuan,
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "dram_sched.h"
#include "gpu-misc.h"
#include "gpu-sim.h"
#include "../abstract_hardware_model.h"
#include "mem_latency_stat.h"
frfcfs_scheduler::frfcfs_scheduler( const memory_config *config, dram_t *dm, memory_stats_t *stats )
{
m_config = config;
m_stats = stats;
m_num_pending = 0; // 初始化为0
m_dram = dm;
m_queue = new std::list<dram_req_t*>[m_config->nbk]; // 根据bank的数量,生成list数组,保存dram_req_t
m_bins = new std::map<unsigned, std::list<std::list<dram_req_t*>::iterator> >[ m_config->nbk ]; // 映射,根据bank的ID号,映射到一个存放dram_req_t的二维数组,第一维:访问的行号
m_last_row = new std::list<std::list<dram_req_t*>::iterator>*[ m_config->nbk ]; // 根据bank的数量生成二维list
curr_row_service_time = new unsigned[m_config->nbk]; // 根据bank的数量生成数组,统计每个bank,当前行的服务时间
row_service_timestamp = new unsigned[m_config->nbk]; // 根据bank的数量生成数组,统计每个bank,当前行的开始服务时间
for ( unsigned i=0; i < m_config->nbk; i++ ) {
m_queue[i].clear();
m_bins[i].clear();
m_last_row[i] = NULL;
curr_row_service_time[i] = 0;
row_service_timestamp[i] = 0;
}
}
void frfcfs_scheduler::add_req( dram_req_t *req ) // 增加访存请求dram_req_t
{
m_num_pending++; // 记录访存数量
m_queue[req->bk].push_front(req); // list,根据bank编号(req->bk),按照先来先服务顺序,存放请求req
std::list<dram_req_t*>::iterator ptr = m_queue[req->bk].begin(); // 取出req->bk列表的第一个元素(请求)
m_bins[req->bk][req->row].push_front( ptr ); // 将最新的请求放入 //newest reqs to the front
}
void frfcfs_scheduler::data_collection(unsigned int bank)
{
if (gpu_sim_cycle > row_service_timestamp[bank]) {
curr_row_service_time[bank] = gpu_sim_cycle - row_service_timestamp[bank];
if (curr_row_service_time[bank] > m_stats->max_servicetime2samerow[m_dram->id][bank])
m_stats->max_servicetime2samerow[m_dram->id][bank] = curr_row_service_time[bank];
}
curr_row_service_time[bank] = 0;
row_service_timestamp[bank] = gpu_sim_cycle;
if (m_stats->concurrent_row_access[m_dram->id][bank] > m_stats->max_conc_access2samerow[m_dram->id][bank]) {
m_stats->max_conc_access2samerow[m_dram->id][bank] = m_stats->concurrent_row_access[m_dram->id][bank];
}
m_stats->concurrent_row_access[m_dram->id][bank] = 0;
m_stats->num_activates[m_dram->id][bank]++;
}
dram_req_t *frfcfs_scheduler::schedule( unsigned bank, unsigned curr_row ) // FRFCFS的调度算法
{
if ( m_last_row[bank] == NULL ) { // 查看上一次(现在还处于激活状态)的行,其bank地址,为空,表示
if ( m_queue[bank].empty() ) // 查看
return NULL;
std::map<unsigned,std::list<std::list<dram_req_t*>::iterator> >::iterator bin_ptr = m_bins[bank].find( curr_row );
if ( bin_ptr == m_bins[bank].end()) {
dram_req_t *req = m_queue[bank].back();
bin_ptr = m_bins[bank].find( req->row );
assert( bin_ptr != m_bins[bank].end() ); // where did the request go???
m_last_row[bank] = &(bin_ptr->second);
data_collection(bank);
} else {
m_last_row[bank] = &(bin_ptr->second);
}
}
std::list<dram_req_t*>::iterator next = m_last_row[bank]->back();
dram_req_t *req = (*next);
m_stats->concurrent_row_access[m_dram->id][bank]++;
m_stats->row_access[m_dram->id][bank]++;
m_last_row[bank]->pop_back();
m_queue[bank].erase(next);
if ( m_last_row[bank]->empty() ) {
m_bins[bank].erase( req->row );
m_last_row[bank] = NULL;
}
#ifdef DEBUG_FAST_IDEAL_SCHED
if ( req )
printf("%08u : DRAM(%u) scheduling memory request to bank=%u, row=%u\n",
(unsigned)gpu_sim_cycle, m_dram->id, req->bk, req->row );
#endif
assert( req != NULL && m_num_pending != 0 );
m_num_pending--;
return req;
}
void frfcfs_scheduler::print( FILE *fp )
{
for ( unsigned b=0; b < m_config->nbk; b++ ) {
printf(" %u: queue length = %u\n", b, (unsigned)m_queue[b].size() );
}
}
void dram_t::scheduler_frfcfs() // 行优先的先来先服务
{ //
unsigned mrq_latency; // 记录
frfcfs_scheduler *sched = m_frfcfs_scheduler; // FRFCFS调度器
while ( !mrqq->empty() && (!m_config->gpgpu_frfcfs_dram_sched_queue_size || sched->num_pending() < m_config->gpgpu_frfcfs_dram_sched_queue_size)) {
dram_req_t *req = mrqq->pop(); // 取出mrqq队首的请求 req(dram_req_t)
// 循环条件:mrqq队列不空 && ( gpgpu_frfcfs_dram_sched_queue_size != 0 || FRFCFS调度器中的访存请求数量 < gpgpu_frfcfs_dram_sched_queue_size )
// Power stats
// if(req->data->get_type() != READ_REPLY && req->data->get_type() != WRITE_ACK)
m_stats->total_n_access++; // 更新统计量: 总的内存访问数量+1
// 判断请求的类型
if(req->data->get_type() == WRITE_REQUEST){ // 写入请求
m_stats->total_n_writes++;
} else if(req->data->get_type() == READ_REQUEST){ // 读取请求
m_stats->total_n_reads++;
}
req->data->set_status(IN_PARTITION_MC_INPUT_QUEUE,gpu_sim_cycle+gpu_tot_sim_cycle); // 设置访问状态
sched->add_req(req); // 调度器中,加入该请求req
}
dram_req_t *req;
unsigned i;
for ( i=0; i < m_config->nbk; i++ ) {
unsigned b = (i+prio)%m_config->nbk;
if ( !bk[b]->mrq ) {
req = sched->schedule(b, bk[b]->curr_row);
if ( req ) {
req->data->set_status(IN_PARTITION_MC_BANK_ARB_QUEUE,gpu_sim_cycle+gpu_tot_sim_cycle);
prio = (prio+1)%m_config->nbk;
bk[b]->mrq = req;
if (m_config->gpgpu_memlatency_stat) {
mrq_latency = gpu_sim_cycle + gpu_tot_sim_cycle - bk[b]->mrq->timestamp;
bk[b]->mrq->timestamp = gpu_tot_sim_cycle + gpu_sim_cycle;
m_stats->mrq_lat_table[LOGB2(mrq_latency)]++;
if (mrq_latency > m_stats->max_mrq_latency) {
m_stats->max_mrq_latency = mrq_latency;
}
}
break;
}
}
}
}
// Copyright (c) 2009-2011, Tor M. Aamodt, Ali Bakhoda, George L. Yuan
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef dram_sched_h_INCLUDED
#define dram_sched_h_INCLUDED
#include "dram.h"
#include "shader.h"
#include "gpu-sim.h"
#include "gpu-misc.h"
#include <list>
#include <map>
class frfcfs_scheduler {
public:
frfcfs_scheduler( const memory_config *config, dram_t *dm, memory_stats_t *stats );
void add_req( dram_req_t *req );
void data_collection(unsigned bank);
dram_req_t *schedule( unsigned bank, unsigned curr_row );
void print( FILE *fp );
unsigned num_pending() const { return m_num_pending;}
private:
const memory_config *m_config;
dram_t *m_dram;
unsigned m_num_pending;
std::list<dram_req_t*> *m_queue;
std::map<unsigned,std::list<std::list<dram_req_t*>::iterator> > *m_bins;
std::list<std::list<dram_req_t*>::iterator> **m_last_row;
unsigned *curr_row_service_time; // 每个bank对应一组变量 /// one set of variables for each bank.
unsigned *row_service_timestamp; // 用于跟踪,看调度程序何时为当前行提供服务 /// tracks when scheduler began servicing current row
memory_stats_t *m_stats;
};
#endif
// Copyright (c) 2009-2011, Tor M. Aamodt, George L. Yuan
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "gpu-misc.h"
unsigned int LOGB2( unsigned int v ) {
unsigned int shift;
unsigned int r;
r = 0;
shift = (( v & 0xFFFF0000) != 0 ) << 4; v >>= shift; r |= shift;
shift = (( v & 0xFF00 ) != 0 ) << 3; v >>= shift; r |= shift;
shift = (( v & 0xF0 ) != 0 ) << 2; v >>= shift; r |= shift;
shift = (( v & 0xC ) != 0 ) << 1; v >>= shift; r |= shift;
shift = (( v & 0x2 ) != 0 ) << 0; v >>= shift; r |= shift;
return r;
}
// Copyright (c) 2009-2011, Tor M. Aamodt, George L. Yuan, Andrew Turner,
// Ali Bakhoda
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef GPU_MISC_H
#define GPU_MISC_H
//enables a verbose printout of all L1 cache misses and all MSHR status changes
//good for a single shader configuration
#define DEBUGL1MISS 0
unsigned int LOGB2( unsigned int v );
#define gs_min2(a,b) (((a)<(b))?(a):(b))
#define min3(x,y,z) (((x)<(y) && (x)<(z))?(x):(gs_min2((y),(z))))
#endif
// Copyright (c) 2009-2011, Tor M. Aamodt, Ali Bakhoda, Wilson W.L. Fung
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "histogram.h"
#include <assert.h>
binned_histogram::binned_histogram (std::string name, int nbins, int* bins)
: m_name(name), m_nbins(nbins), m_bins(NULL), m_bin_cnts(new int[m_nbins]), m_maximum(0), m_sum(0)
{
if (bins) {
m_bins = new int[m_nbins];
for (int i = 0; i < nbins; i++) {
m_bins[i] = bins[i];
}
}
reset_bins();
}
binned_histogram::binned_histogram (const binned_histogram& other)
: m_name(other.m_name), m_nbins(other.m_nbins), m_bins(NULL),
m_bin_cnts(new int[m_nbins]), m_maximum(0), m_sum(0)
{
for (int i = 0; i < m_nbins; i++) {
m_bin_cnts[i] = other.m_bin_cnts[i];
}
}
void binned_histogram::reset_bins () {
for (int i = 0; i < m_nbins; i++) {
m_bin_cnts[i] = 0;
}
}
void binned_histogram::add2bin (int sample) {
assert(0);
m_maximum = (sample > m_maximum)? sample : m_maximum;
}
void binned_histogram::fprint (FILE *fout) const
{
if (m_name.c_str() != NULL) fprintf(fout, "%s = ", m_name.c_str());
int total_sample = 0;
for (int i = 0; i < m_nbins; i++) {
fprintf(fout, "%d ", m_bin_cnts[i]);
total_sample += m_bin_cnts[i];
}
fprintf(fout, "max=%d ", m_maximum);
float avg = 0.0f;
if (total_sample > 0) {
avg = (float)m_sum / total_sample;
}
fprintf(fout, "avg=%0.2f ", avg);
}
binned_histogram::~binned_histogram () {
if (m_bins) delete[] m_bins;
delete[] m_bin_cnts;
}
pow2_histogram::pow2_histogram (std::string name, int nbins, int* bins)
: binned_histogram (name, nbins, bins) {}
void pow2_histogram::add2bin (int sample) {
assert(sample >= 0);
int bin;
int v = sample;
register unsigned int shift;
bin = (v > 0xFFFF) << 4; v >>= bin;
shift = (v > 0xFF ) << 3; v >>= shift; bin |= shift;
shift = (v > 0xF ) << 2; v >>= shift; bin |= shift;
shift = (v > 0x3 ) << 1; v >>= shift; bin |= shift;
bin |= (v >> 1);
bin += (sample > 0)? 1:0;
m_bin_cnts[bin] += 1;
m_maximum = (sample > m_maximum)? sample : m_maximum;
m_sum += sample;
}
linear_histogram::linear_histogram (int stride, const char *name, int nbins, int* bins)
: binned_histogram (name, nbins, bins), m_stride(stride)
{
}
void linear_histogram::add2bin (int sample) {
assert(sample >= 0);
int bin = sample / m_stride;
if (bin >= m_nbins) bin = m_nbins - 1;
m_bin_cnts[bin] += 1;
m_maximum = (sample > m_maximum)? sample : m_maximum;
m_sum += sample;
}
// Copyright (c) 2009-2011, Tor M. Aamodt, Wilson W.L. Fung, Ali Bakhoda
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef HISTOGRAM_H
#define HISTOGRAM_H
#ifdef __cplusplus
#include <stdio.h>
#include <string>
class binned_histogram {
public:
// creators
binned_histogram (std::string name = "", int nbins = 32, int* bins = NULL);
binned_histogram (const binned_histogram& other);
virtual ~binned_histogram ();
// modifiers:
void reset_bins ();
void add2bin (int sample);
// accessors:
void fprint (FILE *fout) const;
protected:
std::string m_name;
int m_nbins;
int *m_bins; // bin boundaries
int *m_bin_cnts; // counters
int m_maximum; // the maximum sample
signed long long int m_sum; // for calculating the average
};
class pow2_histogram : public binned_histogram {
public:
pow2_histogram ( std::string name = "", int nbins = 32, int* bins = NULL);
~pow2_histogram() {}
void add2bin (int sample);
};
class linear_histogram : public binned_histogram {
public:
linear_histogram (int stride = 1, const char *name = NULL, int nbins = 32, int* bins = NULL);
~linear_histogram() {}
void add2bin (int sample);
private:
int m_stride;
};
#endif
#endif /* HISTOGRAM_H */
// Copyright (c) 2009-2011, Tor M. Aamodt, Wilson W.L. Fung, Ali Bakhoda
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "icnt_wrapper.h"
#include <assert.h>
#include "../intersim2/globals.hpp"
#include "../intersim2/interconnect_interface.hpp"
icnt_create_p icnt_create;
icnt_init_p icnt_init;
icnt_has_buffer_p icnt_has_buffer;
icnt_push_p icnt_push;
icnt_pop_p icnt_pop;
icnt_transfer_p icnt_transfer;
icnt_busy_p icnt_busy;
icnt_display_stats_p icnt_display_stats;
icnt_display_overall_stats_p icnt_display_overall_stats;
icnt_display_state_p icnt_display_state;
icnt_get_flit_size_p icnt_get_flit_size;
int g_network_mode;
char* g_network_config_filename;
#include "../option_parser.h"
// Wrapper to intersim2 to accompany old icnt_wrapper
// TODO: use delegate/boost/c++11<funtion> instead
static void intersim2_create(unsigned int n_shader, unsigned int n_mem)
{
g_icnt_interface->CreateInterconnect(n_shader, n_mem);
}
static void intersim2_init()
{
g_icnt_interface->Init();
}
static bool intersim2_has_buffer(unsigned input, unsigned int size)
{
return g_icnt_interface->HasBuffer(input, size);
}
static void intersim2_push(unsigned input, unsigned output, void* data, unsigned int size)
{
g_icnt_interface->Push(input, output, data, size);
}
static void* intersim2_pop(unsigned output)
{
return g_icnt_interface->Pop(output);
}
static void intersim2_transfer()
{
g_icnt_interface->Advance();
}
static bool intersim2_busy()
{
return g_icnt_interface->Busy();
}
static void intersim2_display_stats()
{
g_icnt_interface->DisplayStats();
}
static void intersim2_display_overall_stats()
{
g_icnt_interface->DisplayOverallStats();
}
static void intersim2_display_state(FILE *fp)
{
g_icnt_interface->DisplayState(fp);
}
static unsigned intersim2_get_flit_size()
{
return g_icnt_interface->GetFlitSize();
}
void icnt_reg_options( class OptionParser * opp )
{
option_parser_register(opp, "-network_mode", OPT_INT32, &g_network_mode, "Interconnection network mode", "1");
option_parser_register(opp, "-inter_config_file", OPT_CSTR, &g_network_config_filename, "Interconnection network config file", "mesh");
}
void icnt_wrapper_init()
{
switch (g_network_mode) {
case INTERSIM:
//FIXME: delete the object: may add icnt_done wrapper
g_icnt_interface = InterconnectInterface::New(g_network_config_filename);
icnt_create = intersim2_create;
icnt_init = intersim2_init;
icnt_has_buffer = intersim2_has_buffer;
icnt_push = intersim2_push;
icnt_pop = intersim2_pop;
icnt_transfer = intersim2_transfer;
icnt_busy = intersim2_busy;
icnt_display_stats = intersim2_display_stats;
icnt_display_overall_stats = intersim2_display_overall_stats;
icnt_display_state = intersim2_display_state;
icnt_get_flit_size = intersim2_get_flit_size;
break;
default:
assert(0);
break;
}
}
// Copyright (c) 2009-2011, Tor M. Aamodt, Wilson W.L. Fung, Ali Bakhoda
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef ICNT_WRAPPER_H
#define ICNT_WRAPPER_H
#include <stdio.h>
// functional interface to the interconnect
typedef void (*icnt_create_p)(unsigned n_shader, unsigned n_mem);
typedef void (*icnt_init_p)( );
typedef bool (*icnt_has_buffer_p)(unsigned input, unsigned int size);
typedef void (*icnt_push_p)(unsigned input, unsigned output, void* data, unsigned int size);
typedef void* (*icnt_pop_p)(unsigned output);
typedef void (*icnt_transfer_p)( );
typedef bool (*icnt_busy_p)( );
typedef void (*icnt_drain_p)( );
typedef void (*icnt_display_stats_p)( );
typedef void (*icnt_display_overall_stats_p)( );
typedef void (*icnt_display_state_p)(FILE* fp);
typedef unsigned (*icnt_get_flit_size_p)();
extern icnt_create_p icnt_create;
extern icnt_init_p icnt_init;
extern icnt_has_buffer_p icnt_has_buffer;
extern icnt_push_p icnt_push;
extern icnt_pop_p icnt_pop;
extern icnt_transfer_p icnt_transfer;
extern icnt_busy_p icnt_busy;
extern icnt_drain_p icnt_drain;
extern icnt_display_stats_p icnt_display_stats;
extern icnt_display_overall_stats_p icnt_display_overall_stats;
extern icnt_display_state_p icnt_display_state;
extern icnt_get_flit_size_p icnt_get_flit_size;
extern int g_network_mode;
enum network_mode {
INTERSIM = 1,
N_NETWORK_MODE
};
void icnt_wrapper_init();
void icnt_reg_options( class OptionParser * opp );
#endif
// Copyright (c) 2009-2011, Tor M. Aamodt
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef MC_PARTITION_INCLUDED
#define MC_PARTITION_INCLUDED
#include "dram.h"
#include "../abstract_hardware_model.h"
#include <list>
#include <queue>
class mem_fetch;
class partition_mf_allocator : public mem_fetch_allocator {
public:
partition_mf_allocator( const memory_config *config )
{
m_memory_config = config;
}
virtual mem_fetch * alloc(const class warp_inst_t &inst, const mem_access_t &access) const
{
abort();
return NULL;
}
virtual mem_fetch * alloc(new_addr_type addr, mem_access_type type, unsigned size, bool wr) const;
private:
const memory_config *m_memory_config;
};
// Memory partition unit contains all the units assolcated with a single DRAM channel.
// - It arbitrates the DRAM channel among multiple sub partitions.
// - It does not connect directly with the interconnection network.
class memory_partition_unit
{
public:
memory_partition_unit( unsigned partition_id, const struct memory_config *config, class memory_stats_t *stats );
~memory_partition_unit();
bool busy() const;
void cache_cycle( unsigned cycle );
void dram_cycle();
void set_done( mem_fetch *mf );
void visualizer_print( gzFile visualizer_file ) const;
void print_stat( FILE *fp ) { m_dram->print_stat(fp); }
void visualize() const { m_dram->visualize(); }
void print( FILE *fp ) const;
class memory_sub_partition * get_sub_partition(int sub_partition_id)
{
return m_sub_partition[sub_partition_id];
}
// Power model
void set_dram_power_stats(unsigned &n_cmd,
unsigned &n_activity,
unsigned &n_nop,
unsigned &n_act,
unsigned &n_pre,
unsigned &n_rd,
unsigned &n_wr,
unsigned &n_req) const;
int global_sub_partition_id_to_local_id(int global_sub_partition_id) const;
unsigned get_mpid() const { return m_id; }
private:
unsigned m_id;
const struct memory_config *m_config;
class memory_stats_t *m_stats;
class memory_sub_partition **m_sub_partition;
class dram_t *m_dram;
class arbitration_metadata // 该类里面的操作都是针对主存分区(DRAM channel)的L2 cache
{
public:
arbitration_metadata(const struct memory_config *config);
// check if a subpartition still has credit
bool has_credits(int inner_sub_partition_id) const; // 检查子分区(L2 cache)是否有信用
// borrow a credit for a subpartition ///
void borrow_credit(int inner_sub_partition_id); // 借出信用给L2 cache
// return a credit from a subpartition ///
void return_credit(int inner_sub_partition_id); // L2 cache归还信用
// return the last subpartition that borrowed credit
int last_borrower() const { return m_last_borrower; } // 返回:最后一个来借信用的L2 cache的本地ID
void print( FILE *fp ) const; // 将内容写入“文件描述符fp”
private:
// id of the last subpartition that borrowed credit
int m_last_borrower; // 最后一个来借信用的L2 cache的本地ID
int m_shared_credit_limit; // 共享信用的限制
int m_private_credit_limit; // 私人信用的限制
// credits borrowed by the subpartitions
std::vector<int> m_private_credit; // 记录该分区下,所有L2 cache的信用
int m_shared_credit; // 定义共享信用
};
arbitration_metadata m_arbitration_metadata; // 仲裁 元数据????
// determine wheither a given subpartition can issue to DRAM
bool can_issue_to_dram(int inner_sub_partition_id); // 决定该L2 cache是否能发射(??指令/数据/请求)到DRAM channel
// model DRAM access scheduler latency (fixed latency between L2 and DRAM)
struct dram_delay_t // DRAM访问调度延时
{
unsigned long long ready_cycle;
class mem_fetch* req;
};
std::list<dram_delay_t> m_dram_latency_queue;
};
class memory_sub_partition
{
public:
memory_sub_partition( unsigned sub_partition_id, const struct memory_config *config, class memory_stats_t *stats );
~memory_sub_partition();
unsigned get_id() const { return m_id; }
bool busy() const;
void cache_cycle( unsigned cycle );
bool full() const;
void push( class mem_fetch* mf, unsigned long long clock_cycle );
class mem_fetch* pop();
class mem_fetch* top();
void set_done( mem_fetch *mf );
unsigned flushL2();
// interface to L2_dram_queue
bool L2_dram_queue_empty() const;
class mem_fetch* L2_dram_queue_top() const;
void L2_dram_queue_pop();
// interface to dram_L2_queue
bool dram_L2_queue_full() const;
void dram_L2_queue_push( class mem_fetch* mf );
void visualizer_print( gzFile visualizer_file );
void print_cache_stat(unsigned &accesses, unsigned &misses) const;
void print( FILE *fp ) const;
void accumulate_L2cache_stats(class cache_stats &l2_stats) const;
void get_L2cache_sub_stats(struct cache_sub_stats &css) const;
private:
// data
unsigned m_id; //< the global sub partition ID
const struct memory_config *m_config;
class l2_cache *m_L2cache;
class L2interface *m_L2interface;
partition_mf_allocator *m_mf_allocator;
// model delay of ROP units with a fixed latency
struct rop_delay_t
{
unsigned long long ready_cycle;
class mem_fetch* req;
};
std::queue<rop_delay_t> m_rop;
// these are various FIFOs between units within a memory partition
fifo_pipeline<mem_fetch> *m_icnt_L2_queue; // ICNT-to-L2队列
fifo_pipeline<mem_fetch> *m_L2_dram_queue; // L2-to-DRAM队列
fifo_pipeline<mem_fetch> *m_dram_L2_queue; // DRAM-to-L2队列
fifo_pipeline<mem_fetch> *m_L2_icnt_queue; // L2-to-ICNT队列 同时也是L2 cache命中的响应队列 /// L2 cache hit response queue
class mem_fetch *L2dramout;
unsigned long long int wb_addr;
class memory_stats_t *m_stats;
std::set<mem_fetch*> m_request_tracker;
friend class L2interface;
};
class L2interface : public mem_fetch_interface {
public:
L2interface( memory_sub_partition *unit ) { m_unit=unit; }
virtual ~L2interface() {}
virtual bool full( unsigned size, bool write) const
{
// assume read and write packets all same size
return m_unit->m_L2_dram_queue->full();
}
virtual void push(mem_fetch *mf)
{
mf->set_status(IN_PARTITION_L2_TO_DRAM_QUEUE,0/*FIXME*/);
m_unit->m_L2_dram_queue->push(mf);
}
private:
memory_sub_partition *m_unit;
};
#endif
// Copyright (c) 2009-2011, Tor M. Aamodt, Tim Rogers, Wilson W. L. Fung
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "../trace.h"
#if TRACING_ON
#define MEMPART_PRINT_STR SIM_PRINT_STR " %d - "
#define MEMPART_DTRACE(x) ( DTRACE(x) && (Trace::sampling_memory_partition == -1 || Trace::sampling_memory_partition == (int)get_mpid()) )
// Intended to be called from inside components of a memory partition
// Depends on a get_mpid() function
#define MEMPART_DPRINTF(...) do {\
if (MEMPART_DTRACE(MEMORY_PARTITION_UNIT)) {\
printf( MEMPART_PRINT_STR,\
gpu_sim_cycle + gpu_tot_sim_cycle,\
Trace::trace_streams_str[Trace::MEMORY_PARTITION_UNIT],\
get_mpid() );\
printf(__VA_ARGS__);\
}\
} while (0)
#else
#define MEMPART_DTRACE(x) (false)
#define MEMPART_DPRINTF(x, ...) do {} while (0)
#endif
// Copyright (c) 2009-2011, Tor M. Aamodt
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "mem_fetch.h"
#include "mem_latency_stat.h"
#include "shader.h"
#include "visualizer.h"
#include "gpu-sim.h"
unsigned mem_fetch::sm_next_mf_request_uid=1;
mem_fetch::mem_fetch( const mem_access_t &access,
const warp_inst_t *inst,
unsigned ctrl_size,
unsigned wid,
unsigned sid,
unsigned tpc,
const class memory_config *config )
{
m_request_uid = sm_next_mf_request_uid++;
m_access = access;
if( inst ) {
m_inst = *inst;
assert( wid == m_inst.warp_id() );
}
m_data_size = access.get_size();
m_ctrl_size = ctrl_size;
m_sid = sid;
m_tpc = tpc;
m_wid = wid;
config->m_address_mapping.addrdec_tlx(access.get_addr(),&m_raw_addr);
m_partition_addr = config->m_address_mapping.partition_address(access.get_addr());
m_type = m_access.is_write()?WRITE_REQUEST:READ_REQUEST;
m_timestamp = gpu_sim_cycle + gpu_tot_sim_cycle;
m_timestamp2 = 0;
m_status = MEM_FETCH_INITIALIZED;
m_status_change = gpu_sim_cycle + gpu_tot_sim_cycle;
m_mem_config = config;
icnt_flit_size = config->icnt_flit_size;
}
mem_fetch::~mem_fetch()
{
m_status = MEM_FETCH_DELETED;
}
#define MF_TUP_BEGIN(X) static const char* Status_str[] = {
#define MF_TUP(X) #X
#define MF_TUP_END(X) };
#include "mem_fetch_status.tup"
#undef MF_TUP_BEGIN
#undef MF_TUP
#undef MF_TUP_END
void mem_fetch::print( FILE *fp, bool print_inst ) const
{
if( this == NULL ) {
fprintf(fp," <NULL mem_fetch pointer>\n");
return;
}
fprintf(fp," mf: uid=%6u, sid%02u:w%02u, part=%u, ", m_request_uid, m_sid, m_wid, m_raw_addr.chip );
m_access.print(fp);
if( (unsigned)m_status < NUM_MEM_REQ_STAT )
fprintf(fp," status = %s (%llu), ", Status_str[m_status], m_status_change );
else
fprintf(fp," status = %u??? (%llu), ", m_status, m_status_change );
if( !m_inst.empty() && print_inst ) m_inst.print(fp);
else fprintf(fp,"\n");
}
void mem_fetch::set_status( enum mem_fetch_status status, unsigned long long cycle ) // 设置mf的状态
{
m_status = status;
m_status_change = cycle;
}
bool mem_fetch::isatomic() const
{
if( m_inst.empty() ) return false;
return m_inst.isatomic();
}
void mem_fetch::do_atomic()
{
m_inst.do_atomic( m_access.get_warp_mask() );
}
bool mem_fetch::istexture() const // 判断mf的类型是否为:纹理
{
if( m_inst.empty() ) return false; // 指令为空,返回false
return m_inst.space.get_type() == tex_space; // 指令域类型 == 纹理
}
bool mem_fetch::isconst() const
{
if( m_inst.empty() ) return false;
return (m_inst.space.get_type() == const_space) || (m_inst.space.get_type() == param_space_kernel);
}
/// Returns number of flits traversing interconnect. simt_to_mem specifies the direction
unsigned mem_fetch::get_num_flits(bool simt_to_mem){
unsigned sz=0;
// If atomic, write going to memory, or read coming back from memory, size = ctrl + data. Else, only ctrl
if( isatomic() || (simt_to_mem && get_is_write()) || !(simt_to_mem || get_is_write()) )
sz = size();
else
sz = get_ctrl_size();
return (sz/icnt_flit_size) + ( (sz % icnt_flit_size)? 1:0);
}
// Copyright (c) 2009-2011, Tor M. Aamodt
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef MEM_FETCH_H
#define MEM_FETCH_H
#include "addrdec.h"
#include "../abstract_hardware_model.h"
#include <bitset>
enum mf_type {
READ_REQUEST = 0, // 读取请求(load指令)
WRITE_REQUEST, // 写入请求(store指令),自带数据
READ_REPLY, // send to shader 发送到shader,读取回复(相当于把数据返回给shader),返回数据
WRITE_ACK // 写确认,不带数据,只有元数据
};
#define MF_TUP_BEGIN(X) enum X {
#define MF_TUP(X) X
#define MF_TUP_END(X) };
#include "mem_fetch_status.tup"
#undef MF_TUP_BEGIN
#undef MF_TUP
#undef MF_TUP_END
class mem_fetch {
public: // 从主存中取:数据/指令。 自带:warp的id、shader的id
mem_fetch( const mem_access_t &access,
const warp_inst_t *inst,
unsigned ctrl_size,
unsigned wid,
unsigned sid,
unsigned tpc,
const class memory_config *config );
~mem_fetch();
void set_status( enum mem_fetch_status status, unsigned long long cycle );
void set_reply() // 设置mf的类型(”读取请求“变更为“回复” “写入请求”变更为“写入确认”)
{ // 不能是: L1_WRBK_ACC 和 L2_WRBK_ACC
assert( m_access.get_type() != L1_WRBK_ACC && m_access.get_type() != L2_WRBK_ACC );
if( m_type==READ_REQUEST ) { // 读取请求
assert( !get_is_write() ); // 判断是否为写操作
m_type = READ_REPLY; // 更改mf的类型(原本是:READ_REQUEST,现在经过DRAM之后,数据拿到了,就变为:READ_REPLY)
} else if( m_type == WRITE_REQUEST ) { // 写入请求
assert( get_is_write() ); // 判断是否为写操作
m_type = WRITE_ACK; // 更改mf的类型(原来是:WRITE_REQUEST,经过DRAM之后,数据写入DRAM中了,就变更为:WRITE_ACK)
}
}
void do_atomic();
void print( FILE *fp, bool print_inst = true ) const;
const addrdec_t &get_tlx_addr() const { return m_raw_addr; } // 返回:原始物理地址(即:已解码DRAM的行-bank-列地址)
unsigned get_data_size() const { return m_data_size; } // 返回数据包的数据部分大小
void set_data_size( unsigned size ) { m_data_size=size; } // 设置数据包的数据部分大小
unsigned get_ctrl_size() const { return m_ctrl_size; } // 返回数据包的头部大小
unsigned size() const { return m_data_size+m_ctrl_size; } // 返回整个数据包的大小
bool is_write() {return m_access.is_write();} // 返回mf的响应类型: 读/写
void set_addr(new_addr_type addr) { m_access.set_addr(addr); }
new_addr_type get_addr() const { return m_access.get_addr(); } // 返回地址,应该是形如:0x4141ABC005,这样表示的地址
new_addr_type get_partition_addr() const { return m_partition_addr; }
unsigned get_sub_partition_id() const { return m_raw_addr.sub_partition; } // 返回主存子分区的id(DRAM--->DRAM分区(1,2,3...)--->子分区(1,2,3...))
bool get_is_write() const { return m_access.is_write(); }
unsigned get_request_uid() const { return m_request_uid; }
unsigned get_sid() const { return m_sid; } // 返回shader的id
unsigned get_tpc() const { return m_tpc; } // 返回cluster的id,tpc实际上是多个SM组合在一起形成的,即对应cluster
unsigned get_wid() const { return m_wid; } // 返回warp的id
bool istexture() const;
bool isconst() const;
enum mf_type get_type() const { return m_type; }
bool isatomic() const;
void set_return_timestamp( unsigned t ) { m_timestamp2=t; }
void set_icnt_receive_time( unsigned t ) { m_icnt_receive_time=t; }
unsigned get_timestamp() const { return m_timestamp; }
unsigned get_return_timestamp() const { return m_timestamp2; }
unsigned get_icnt_receive_time() const { return m_icnt_receive_time; }
enum mem_access_type get_access_type() const { return m_access.get_type(); }
const active_mask_t& get_access_warp_mask() const { return m_access.get_warp_mask(); }
mem_access_byte_mask_t get_access_byte_mask() const { return m_access.get_byte_mask(); }
address_type get_pc() const { return m_inst.empty()?-1:m_inst.pc; }
const warp_inst_t &get_inst() { return m_inst; }
enum mem_fetch_status get_status() const { return m_status; }
const memory_config *get_mem_config(){return m_mem_config;}
unsigned get_num_flits(bool simt_to_mem);
private:
// request source information
unsigned m_request_uid;
unsigned m_sid;
unsigned m_tpc;
unsigned m_wid;
// where is this request now?
enum mem_fetch_status m_status;
unsigned long long m_status_change;
// request type, address, size, mask /// 请求类型读取/写入, 地址, 读/写的数据大小, 掩码(???)
mem_access_t m_access;
unsigned m_data_size; // 写入的数据大小 /// how much data is being written
unsigned m_ctrl_size; // 元数据大小(数据包的头部大小) /// how big would all this meta data be in hardware (does not necessarily match actual size of mem_fetch)
new_addr_type m_partition_addr; // DRAM分区(DRAM channel)内的线性物理地址(bank的选择位不在这个地址) /// linear physical address *within* dram partition (partition bank select bits squeezed out)
addrdec_t m_raw_addr; // 原始物理地址(即:已解码的DRAM芯片行列地址) /// raw physical address (i.e., decoded DRAM chip-row-bank-column address)
enum mf_type m_type; // 请求类型
// statistics
unsigned m_timestamp; // set to gpu_sim_cycle+gpu_tot_sim_cycle at struct creation
unsigned m_timestamp2; // set to gpu_sim_cycle+gpu_tot_sim_cycle when pushed onto icnt to shader; only used for reads
unsigned m_icnt_receive_time; // set to gpu_sim_cycle + interconnect_latency when fixed icnt latency mode is enabled
// requesting instruction (put last so mem_fetch prints nicer in gdb)
warp_inst_t m_inst;
static unsigned sm_next_mf_request_uid;
const class memory_config *m_mem_config;
unsigned icnt_flit_size;
};
#endif
// Copyright (c) 2009 by Tor M. Aamodt, Tim Rogers and
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
MF_TUP_BEGIN( mem_fetch_status )
MF_TUP( MEM_FETCH_INITIALIZED ),
MF_TUP( IN_L1I_MISS_QUEUE ),
MF_TUP( IN_L1D_MISS_QUEUE ),
MF_TUP( IN_L1T_MISS_QUEUE ),
MF_TUP( IN_L1C_MISS_QUEUE ),
MF_TUP( IN_L1TLB_MISS_QUEUE ),
MF_TUP( IN_VM_MANAGER_QUEUE ),
MF_TUP( IN_ICNT_TO_MEM ),
MF_TUP( IN_PARTITION_ROP_DELAY ),
MF_TUP( IN_PARTITION_ICNT_TO_L2_QUEUE ),
MF_TUP( IN_PARTITION_L2_TO_DRAM_QUEUE ),
MF_TUP( IN_PARTITION_DRAM_LATENCY_QUEUE ),
MF_TUP( IN_PARTITION_L2_MISS_QUEUE ),
MF_TUP( IN_PARTITION_MC_INTERFACE_QUEUE ),
MF_TUP( IN_PARTITION_MC_INPUT_QUEUE ),
MF_TUP( IN_PARTITION_MC_BANK_ARB_QUEUE ),
MF_TUP( IN_PARTITION_DRAM ),
MF_TUP( IN_PARTITION_MC_RETURNQ ),
MF_TUP( IN_PARTITION_DRAM_TO_L2_QUEUE ),
MF_TUP( IN_PARTITION_L2_FILL_QUEUE ),
MF_TUP( IN_PARTITION_L2_TO_ICNT_QUEUE ),
MF_TUP( IN_ICNT_TO_SHADER ),
MF_TUP( IN_CLUSTER_TO_SHADER_QUEUE ),
MF_TUP( IN_SHADER_LDST_RESPONSE_FIFO ),
MF_TUP( IN_SHADER_FETCHED ),
MF_TUP( IN_SHADER_L1T_ROB ),
MF_TUP( MEM_FETCH_DELETED ),
MF_TUP( NUM_MEM_REQ_STAT )
MF_TUP_END( mem_fetch_status )
// Copyright (c) 2009-2011, Tor M. Aamodt
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef MEM_LATENCY_STAT_H
#define MEM_LATENCY_STAT_H
#include <stdio.h>
#include <zlib.h>
#include <map>
class memory_stats_t {
public:
memory_stats_t( unsigned n_shader,
const struct shader_core_config *shader_config,
const struct memory_config *mem_config );
unsigned memlatstat_done( class mem_fetch *mf );
void memlatstat_read_done( class mem_fetch *mf );
void memlatstat_dram_access( class mem_fetch *mf );
void memlatstat_icnt2mem_pop( class mem_fetch *mf);
void memlatstat_lat_pw();
void memlatstat_print(unsigned n_mem, unsigned gpu_mem_n_bk);
void visualizer_print( gzFile visualizer_file );
unsigned m_n_shader;
const struct shader_core_config *m_shader_config;
const struct memory_config *m_memory_config;
unsigned max_mrq_latency;
unsigned max_dq_latency;
unsigned max_mf_latency;
unsigned max_icnt2mem_latency;
unsigned max_icnt2sh_latency;
unsigned mrq_lat_table[32];
unsigned dq_lat_table[32];
unsigned mf_lat_table[32];
unsigned icnt2mem_lat_table[24];
unsigned icnt2sh_lat_table[24];
unsigned mf_lat_pw_table[32]; //table storing values of mf latency Per Window
unsigned mf_num_lat_pw;
unsigned max_warps;
unsigned mf_tot_lat_pw; //total latency summed up per window. divide by mf_num_lat_pw to obtain average latency Per Window
unsigned long long int mf_total_lat;
unsigned long long int ** mf_total_lat_table; //mf latency sums[dram chip id][bank id]
unsigned ** mf_max_lat_table; //mf latency sums[dram chip id][bank id]
unsigned num_mfs;
unsigned int ***bankwrites; //bankwrites[shader id][dram chip id][bank id]
unsigned int ***bankreads; //bankreads[shader id][dram chip id][bank id]
unsigned int **totalbankwrites; //bankwrites[dram chip id][bank id]
unsigned int **totalbankreads; //bankreads[dram chip id][bank id]
unsigned int **totalbankaccesses; //bankaccesses[dram chip id][bank id]
unsigned int *num_MCBs_accessed; //tracks how many memory controllers are accessed whenever any thread in a warp misses in cache
unsigned int *position_of_mrq_chosen; //position of mrq in m_queue chosen
unsigned ***mem_access_type_stats; // dram access type classification
// L2 cache stats
unsigned int *L2_cbtoL2length;
unsigned int *L2_cbtoL2writelength;
unsigned int *L2_L2tocblength;
unsigned int *L2_dramtoL2length;
unsigned int *L2_dramtoL2writelength;
unsigned int *L2_L2todramlength;
// DRAM access row locality stats
unsigned int **concurrent_row_access; //concurrent_row_access[dram chip id][bank id]
unsigned int **num_activates; //num_activates[dram chip id][bank id]
unsigned int **row_access; //row_access[dram chip id][bank id]
unsigned int **max_conc_access2samerow; //max_conc_access2samerow[dram chip id][bank id]
unsigned int **max_servicetime2samerow; //max_servicetime2samerow[dram chip id][bank id]
// Power stats
unsigned total_n_access;
unsigned total_n_reads;
unsigned total_n_writes;
};
#endif /*MEM_LATENCY_STAT_H*/
// Copyright (c) 2009-2011, Tor M. Aamodt, Ahmed El-Shafiey, Tayler Hetherington
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "power_interface.h"
void init_mcpat(const gpgpu_sim_config &config, class gpgpu_sim_wrapper *wrapper, unsigned stat_sample_freq, unsigned tot_inst, unsigned inst){
wrapper->init_mcpat(config.g_power_config_name, config.g_power_filename, config.g_power_trace_filename,
config.g_metric_trace_filename,config.g_steady_state_tracking_filename,config.g_power_simulation_enabled,
config.g_power_trace_enabled,config.g_steady_power_levels_enabled,config.g_power_per_cycle_dump,
config.gpu_steady_power_deviation,config.gpu_steady_min_period,config.g_power_trace_zlevel,
tot_inst+inst,stat_sample_freq
);
}
void mcpat_cycle(const gpgpu_sim_config &config, const struct shader_core_config *shdr_config, class gpgpu_sim_wrapper *wrapper, class power_stat_t *power_stats, unsigned stat_sample_freq, unsigned tot_cycle, unsigned cycle, unsigned tot_inst, unsigned inst){
static bool mcpat_init=true;
if(mcpat_init){ // If first cycle, don't have any power numbers yet
mcpat_init=false;
return;
}
if ((tot_cycle+cycle) % stat_sample_freq == 0) {
wrapper->set_inst_power(shdr_config->gpgpu_clock_gated_lanes,
stat_sample_freq, stat_sample_freq,
power_stats->get_total_inst(), power_stats->get_total_int_inst(),
power_stats->get_total_fp_inst(), power_stats->get_l1d_read_accesses(),
power_stats->get_l1d_write_accesses(), power_stats->get_committed_inst());
// Single RF for both int and fp ops
wrapper->set_regfile_power(power_stats->get_regfile_reads(), power_stats->get_regfile_writes(), power_stats->get_non_regfile_operands());
//Instruction cache stats
wrapper->set_icache_power(power_stats->get_inst_c_hits(), power_stats->get_inst_c_misses());
//Constant Cache, shared memory, texture cache
wrapper->set_ccache_power(power_stats->get_constant_c_hits(), power_stats->get_constant_c_misses());
wrapper->set_tcache_power(power_stats->get_texture_c_hits(), power_stats->get_texture_c_misses());
wrapper->set_shrd_mem_power(power_stats->get_shmem_read_access());
wrapper->set_l1cache_power(power_stats->get_l1d_read_hits(), power_stats->get_l1d_read_misses(),
power_stats->get_l1d_write_hits(), power_stats->get_l1d_write_misses());
wrapper->set_l2cache_power(power_stats->get_l2_read_hits(), power_stats->get_l2_read_misses(),
power_stats->get_l2_write_hits(), power_stats->get_l2_write_misses());
float active_sms=(*power_stats->m_active_sms)/stat_sample_freq;
float num_cores = shdr_config->num_shader();
float num_idle_core = num_cores - active_sms;
wrapper->set_idle_core_power(num_idle_core);
//pipeline power - pipeline_duty_cycle *= percent_active_sms;
float pipeline_duty_cycle=((*power_stats->m_average_pipeline_duty_cycle/( stat_sample_freq)) < 0.8)?((*power_stats->m_average_pipeline_duty_cycle)/stat_sample_freq):0.8;
wrapper->set_duty_cycle_power(pipeline_duty_cycle);
//Memory Controller
wrapper->set_mem_ctrl_power(power_stats->get_dram_rd(), power_stats->get_dram_wr(), power_stats->get_dram_pre());
//Execution pipeline accesses
//FPU (SP) accesses, Integer ALU (not present in Tesla), Sfu accesses
wrapper->set_exec_unit_power(power_stats->get_tot_fpu_accessess(), power_stats->get_ialu_accessess(), power_stats->get_tot_sfu_accessess());
//Average active lanes for sp and sfu pipelines
float avg_sp_active_lanes=(power_stats->get_sp_active_lanes())/stat_sample_freq;
float avg_sfu_active_lanes=(power_stats->get_sfu_active_lanes())/stat_sample_freq;
assert(avg_sp_active_lanes<=32);
assert(avg_sfu_active_lanes<=32);
wrapper->set_active_lanes_power((power_stats->get_sp_active_lanes())/stat_sample_freq,
(power_stats->get_sfu_active_lanes())/stat_sample_freq);
double n_icnt_simt_to_mem = (double)power_stats->get_icnt_simt_to_mem(); // # flits from SIMT clusters to memory partitions
double n_icnt_mem_to_simt = (double)power_stats->get_icnt_mem_to_simt(); // # flits from memory partitions to SIMT clusters
wrapper->set_NoC_power(n_icnt_mem_to_simt, n_icnt_simt_to_mem); // Number of flits traversing the interconnect
wrapper->compute();
wrapper->update_components_power();
wrapper->print_trace_files();
power_stats->save_stats();
wrapper->detect_print_steady_state(0,tot_inst+inst);
wrapper->power_metrics_calculations();
wrapper->dump();
}
//wrapper->close_files();
}
void mcpat_reset_perf_count(class gpgpu_sim_wrapper *wrapper){
wrapper->reset_counters();
}
// Copyright (c) 2009-2011, Tor M. Aamodt, Ahmed El-Shafiey, Tayler Hetherington
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef POWER_INTERFACE_H_
#define POWER_INTERFACE_H_
#include "gpu-sim.h"
#include "power_stat.h"
#include "shader.h"
#include "gpgpu_sim_wrapper.h"
void init_mcpat(const gpgpu_sim_config &config, class gpgpu_sim_wrapper *wrapper, unsigned stat_sample_freq, unsigned tot_inst, unsigned inst);
void mcpat_cycle(const gpgpu_sim_config &config, const struct shader_core_config *shdr_config, class gpgpu_sim_wrapper *wrapper, class power_stat_t *power_stats,
unsigned stat_sample_freq, unsigned tot_cycle, unsigned cycle, unsigned tot_inst, unsigned inst);
void mcpat_reset_perf_count(class gpgpu_sim_wrapper *wrapper);
#endif /* POWER_INTERFACE_H_ */
// Copyright (c) 2009-2011, Tor M. Aamodt, Inderpreet Singh
// The University of British Columbia
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this
// list of conditions and the following disclaimer in the documentation and/or
// other materials provided with the distribution.
// Neither the name of The University of British Columbia nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "scoreboard.h"
#include "shader.h"
#include "../cuda-sim/ptx_sim.h"
#include "shader_trace.h"
//Constructor
Scoreboard::Scoreboard( unsigned sid, unsigned n_warps )
: longopregs()
{
m_sid = sid;
//Initialize size of table
reg_table.resize(n_warps);
longopregs.resize(n_warps);
}
// Print scoreboard contents
void Scoreboard::printContents() const
{
printf("scoreboard contents (sid=%d): \n", m_sid);
for(unsigned i=0; i<reg_table.size(); i++) {
if(reg_table[i].size() == 0 ) continue;
printf(" wid = %2d: ", i);
std::set<unsigned>::const_iterator it;
for( it=reg_table[i].begin() ; it != reg_table[i].end(); it++ )
printf("%u ", *it);
printf("\n");
}
}
void Scoreboard::reserveRegister(unsigned wid, unsigned regnum)
{
if( !(reg_table[wid].find(regnum) == reg_table[wid].end()) ){
printf("Error: trying to reserve an already reserved register (sid=%d, wid=%d, regnum=%d).", m_sid, wid, regnum);
abort();
}
SHADER_DPRINTF( SCOREBOARD,
"Reserved Register - warp:%d, reg: %d\n", wid, regnum );
reg_table[wid].insert(regnum);
}
// Unmark register as write-pending
void Scoreboard::releaseRegister(unsigned wid, unsigned regnum)
{
if( !(reg_table[wid].find(regnum) != reg_table[wid].end()) )
return;
SHADER_DPRINTF( SCOREBOARD,
"Release register - warp:%d, reg: %d\n", wid, regnum );
reg_table[wid].erase(regnum);
}
const bool Scoreboard::islongop (unsigned warp_id,unsigned regnum) {
return longopregs[warp_id].find(regnum) != longopregs[warp_id].end();
}
void Scoreboard::reserveRegisters(const class warp_inst_t* inst)
{
for( unsigned r=0; r < 4; r++) {
if(inst->out[r] > 0) {
reserveRegister(inst->warp_id(), inst->out[r]);
SHADER_DPRINTF( SCOREBOARD,
"Reserved register - warp:%d, reg: %d\n",
inst->warp_id(),
inst->out[r] );
}
}
//Keep track of long operations
if (inst->is_load() &&
( inst->space.get_type() == global_space ||
inst->space.get_type() == local_space ||
inst->space.get_type() == param_space_kernel ||
inst->space.get_type() == param_space_local ||
inst->space.get_type() == param_space_unclassified ||
inst->space.get_type() == tex_space)){
for ( unsigned r=0; r<4; r++) {
if(inst->out[r] > 0) {
SHADER_DPRINTF( SCOREBOARD,
"New longopreg marked - warp:%d, reg: %d\n",
inst->warp_id(),
inst->out[r] );
longopregs[inst->warp_id()].insert(inst->out[r]);
}
}
}
}
// Release registers for an instruction
void Scoreboard::releaseRegisters(const class warp_inst_t *inst)
{
for( unsigned r=0; r < 4; r++) {
if(inst->out[r] > 0) {
SHADER_DPRINTF( SCOREBOARD,
"Register Released - warp:%d, reg: %d\n",
inst->warp_id(),
inst->out[r] );
releaseRegister(inst->warp_id(), inst->out[r]);
longopregs[inst->warp_id()].erase(inst->out[r]);
}
}
}
/**
* Checks to see if registers used by an instruction are reserved in the scoreboard
*
* @return
* true if WAW or RAW hazard (no WAR since in-order issue)
**/
bool Scoreboard::checkCollision( unsigned wid, const class inst_t *inst ) const
{
// Get list of all input and output registers
std::set<int> inst_regs;
if(inst->out[0] > 0) inst_regs.insert(inst->out[0]);
if(inst->out[1] > 0) inst_regs.insert(inst->out[1]);
if(inst->out[2] > 0) inst_regs.insert(inst->out[2]);
if(inst->out[3] > 0) inst_regs.insert(inst->out[3]);
if(inst->in[0] > 0) inst_regs.insert(inst->in[0]);
if(inst->in[1] > 0) inst_regs.insert(inst->in[1]);
if(inst->in[2] > 0) inst_regs.insert(inst->in[2]);
if(inst->in[3] > 0) inst_regs.insert(inst->in[3]);
if(inst->pred > 0) inst_regs.insert(inst->pred);
if(inst->ar1 > 0) inst_regs.insert(inst->ar1);
if(inst->ar2 > 0) inst_regs.insert(inst->ar2);
// Check for collision, get the intersection of reserved registers and instruction registers
std::set<int>::const_iterator it2;
for ( it2=inst_regs.begin() ; it2 != inst_regs.end(); it2++ )
if(reg_table[wid].find(*it2) != reg_table[wid].end()) {
return true;
}
return false;
}
bool Scoreboard::pendingWrites(unsigned wid) const
{
return !reg_table[wid].empty();
}
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment