Unverified Commit e38d00e2 by Tianqi Chen Committed by GitHub

[REFACTOR] Remove stale verilog generator (#2964)

parent 3441b95e
/*!
* Copyright (c) 2017 by Contributors
* \file codegen_verilog.cc
*/
#include <tvm/ir_pass.h>
#include <cctype>
#include <sstream>
#include <iostream>
#include <utility>
#include "codegen_verilog.h"
#include "../../arithmetic/compute_expr.h"
namespace tvm {
namespace codegen {
namespace verilog {
using namespace ir;
void CodeGenVerilog::Init() {
stream << "`include \"tvm_marcos.v\"\n\n";
}
void CodeGenVerilog::InitFuncState(LoweredFunc f) {
CodeGenSourceBase::ClearFuncState();
cmap_.clear();
tvm_vpi_modules_.clear();
done_sigs_.clear();
}
void CodeGenVerilog::AddFunction(LoweredFunc f) {
// clear previous generated state.
this->InitFuncState(f);
// skip the first underscore, so SSA variable starts from _1
GetUniqueName("_");
GetUniqueName("rst");
GetUniqueName("clk");
GetUniqueName("done");
GetUniqueName("enable");
GetUniqueName("all_input_valid");
// print out function body.
int func_scope = this->BeginScope();
// Stich things up.
stream << "module " << f->name << "(\n";
PrintDecl("clk", kInput, Bool(1), "");
stream << ",\n";
PrintDecl("rst", kInput, Bool(1), "");
VerilogFuncEntry entry;
for (size_t i = 0; i < f->args.size(); ++i) {
stream << ",\n";
Var v = f->args[i];
std::string vid = AllocVarID(v.get());
entry.arg_ids.push_back(vid);
entry.arg_types.push_back(v.type());
PrintDecl(vid, kInput, v.type(), "");
}
stream << ",\n";
PrintDecl("done", kOutput, Bool(1), "");
stream << "\n);\n";
this->CodeGen(MakePipeline(f));
PrintAssignAnd("done", done_sigs_);
this->EndScope(func_scope);
this->PrintIndent();
stream << "endmodule\n";
entry.vpi_modules = std::move(tvm_vpi_modules_);
functions_[f->name] = entry;
}
std::string VerilogCodeGenModule::AppendSimMain(
const std::string& func_name) const {
// Add main function for simulator hook
const VerilogFuncEntry& entry = fmap.at(func_name);
std::ostringstream stream;
stream << code;
stream << "\n"
<< "module main();\n"
<< " `TVM_DEFINE_TEST_SIGNAL(clk, rst)\n";
// print out function body.
std::vector<std::string> sargs;
for (size_t i = 0; i < entry.arg_types.size(); ++i) {
Type t = entry.arg_types[i];
std::ostringstream sarg;
sarg << "tvm_arg" << i;
std::string vid = sarg.str();
stream << " reg";
if (t.bits() > 1) {
stream << "[" << t.bits() - 1 << ":0]";
}
stream << " " << vid << ";\n";
sargs.push_back(vid);
}
stream << " wire done;\n";
stream << "\n " << func_name << " dut(\n"
<< " .clk(clk),\n"
<< " .rst(rst),\n";
for (size_t i = 0; i < entry.arg_ids.size(); ++i) {
stream << " ." << entry.arg_ids[i] << '('
<< sargs[i] << "),\n";
}
stream << " .done(done)\n"
<< " );\n";
stream << " initial begin\n"
<< " $tvm_session(clk";
for (const std::string& mvpi : entry.vpi_modules) {
stream << ", dut." << mvpi;
}
stream << ");\n"
<< " end\n";
stream << "endmodule\n";
return stream.str();
}
VerilogCodeGenModule CodeGenVerilog::Finish() {
VerilogCodeGenModule m;
m.code = stream.str();
m.fmap = std::move(functions_);
return m;
}
void CodeGenVerilog::PrintDecl(
const std::string& vid, VerilogVarType vtype, Type dtype,
const char* suffix, bool indent) {
if (indent) PrintIndent();
switch (vtype) {
case kReg: stream << "reg "; break;
case kWire: stream << "wire "; break;
case kInput: stream << "input "; break;
case kOutput: stream << "output "; break;
default: LOG(FATAL) << "unsupported vtype=" << vtype;
}
int bits = dtype.bits();
// bits for handle type.
if (dtype.is_handle()) {
bits = 64;
}
if (bits > 1) {
stream << "[" << bits - 1 << ":0] ";
}
stream << vid << suffix;
}
void CodeGenVerilog::PrintSSAAssign(
const std::string& target, const std::string& src, Type t) {
// add target to list of declaration.
PrintDecl(target, kWire, t, ";\n", false);
PrintAssign(target, src);
}
void CodeGenVerilog::PrintAssign(
const std::string& target, const std::string& src) {
PrintIndent();
stream << "assign " << target << " = ";
if (src.length() > 3 &&
src[0] == '(' && src[src.length() - 1] == ')') {
stream << src.substr(1, src.length() - 2);
} else {
stream << src;
}
stream << ";\n";
}
void CodeGenVerilog::PrintAssignAnd(
const std::string& target, const std::vector<std::string>& conds) {
if (conds.size() != 0) {
std::ostringstream os_valid;
for (size_t i = 0; i < conds.size(); ++i) {
if (i != 0) os_valid << " && ";
os_valid << conds[i];
}
PrintAssign(target, os_valid.str());
} else {
PrintAssign(target, "1");
}
}
void CodeGenVerilog::PrintLine(const std::string& line) {
PrintIndent();
stream << line << '\n';
}
VerilogValue CodeGenVerilog::MakeBinary(Type t,
VerilogValue a,
VerilogValue b,
const char *opstr) {
CHECK_EQ(t.lanes(), 1)
<< "Do not yet support vectorized op";
CHECK(t.is_int() || t.is_uint())
<< "Only support integer operations";
std::ostringstream os;
os << a.vid << ' ' << opstr << ' '<< b.vid;
return GetSSAValue(os.str(), t);
}
template<typename T>
inline VerilogValue IntConst(const T* op, CodeGenVerilog* p) {
if (op->type.bits() <= 32 && op->type.lanes() == 1) {
std::ostringstream temp;
temp << op->value;
p->MarkConst(temp.str());
return VerilogValue(temp.str(), kConst, op->type);
} else {
LOG(FATAL) << "Do not support integer constant type " << op->type;
return VerilogValue();
}
}
VerilogValue CodeGenVerilog::VisitExpr_(const IntImm *op) {
return IntConst(op, this);
}
VerilogValue CodeGenVerilog::VisitExpr_(const UIntImm *op) {
return IntConst(op, this);
}
VerilogValue CodeGenVerilog::VisitExpr_(const FloatImm *op) {
LOG(FATAL) << "Do not support float constant in Verilog";
return VerilogValue();
}
VerilogValue CodeGenVerilog::VisitExpr_(const StringImm *op) {
LOG(FATAL) << "Do not support string constant in Verilog";
return VerilogValue();
}
VerilogValue CodeGenVerilog::VisitExpr_(const Cast *op) {
LOG(FATAL) << "Type cast not supported";
return VerilogValue();
}
VerilogValue CodeGenVerilog::VisitExpr_(const Variable *op) {
return VerilogValue(GetVarID(op), kReg, op->type);
}
VerilogValue CodeGenVerilog::VisitExpr_(const Add *op) {
return MakeBinary(op->type, MakeValue(op->a), MakeValue(op->b), "+");
}
VerilogValue CodeGenVerilog::VisitExpr_(const Sub *op) {
return MakeBinary(op->type, MakeValue(op->a), MakeValue(op->b), "-");
}
VerilogValue CodeGenVerilog::VisitExpr_(const Mul *op) {
return MakeBinary(op->type, MakeValue(op->a), MakeValue(op->b), "*");
}
VerilogValue CodeGenVerilog::VisitExpr_(const Div *op) {
int shift;
if (is_const_power_of_two_integer(op->b, &shift) &&
(op->type.is_int() || op->type.is_uint())) {
return MakeValue(op->a >> make_const(op->b.type(), shift));
} else {
LOG(FATAL) << "do not support synthesis division";
}
return VerilogValue();
}
VerilogValue CodeGenVerilog::VisitExpr_(const Mod *op) {
LOG(FATAL) << "do not support synthesis Mod";
return VerilogValue();
}
VerilogValue CodeGenVerilog::VisitExpr_(const Min *op) {
LOG(FATAL) << "not supported";
return VerilogValue();
}
VerilogValue CodeGenVerilog::VisitExpr_(const Max *op) {
LOG(FATAL) << "not supported";
return VerilogValue();
}
VerilogValue CodeGenVerilog::VisitExpr_(const EQ *op) {
return MakeBinary(op->type, MakeValue(op->a), MakeValue(op->b), "==");
}
VerilogValue CodeGenVerilog::VisitExpr_(const NE *op) {
return MakeBinary(op->type, MakeValue(op->a), MakeValue(op->b), "!=");
}
VerilogValue CodeGenVerilog::VisitExpr_(const LT *op) {
return MakeBinary(op->type, MakeValue(op->a), MakeValue(op->b), "<");
}
VerilogValue CodeGenVerilog::VisitExpr_(const LE *op) {
return MakeBinary(op->type, MakeValue(op->a), MakeValue(op->b), "<=");
}
VerilogValue CodeGenVerilog::VisitExpr_(const GT *op) {
return MakeBinary(op->type, MakeValue(op->a), MakeValue(op->b), ">");
}
VerilogValue CodeGenVerilog::VisitExpr_(const GE *op) {
return MakeBinary(op->type, MakeValue(op->a), MakeValue(op->b), ">=");
}
VerilogValue CodeGenVerilog::VisitExpr_(const And *op) {
return MakeBinary(op->type, MakeValue(op->a), MakeValue(op->b), "&&");
}
VerilogValue CodeGenVerilog::VisitExpr_(const Or *op) {
return MakeBinary(op->type, MakeValue(op->a), MakeValue(op->b), "||");
}
VerilogValue CodeGenVerilog::VisitExpr_(const Not *op) {
VerilogValue value = MakeValue(op->a);
std::ostringstream os;
os << "(!" << value.vid << ")";
return GetSSAValue(os.str(), op->type);
}
VerilogValue CodeGenVerilog::VisitExpr_(const Call *op) {
if (op->is_intrinsic(Call::bitwise_and)) {
return MakeBinary(
op->type, MakeValue(op->args[0]), MakeValue(op->args[1]), "&");
} else if (op->is_intrinsic(Call::bitwise_xor)) {
return MakeBinary(
op->type, MakeValue(op->args[0]), MakeValue(op->args[1]), "^");
} else if (op->is_intrinsic(Call::bitwise_or)) {
return MakeBinary(
op->type, MakeValue(op->args[0]), MakeValue(op->args[1]), "|");
} else if (op->is_intrinsic(Call::bitwise_not)) {
VerilogValue value = MakeValue(op->args[0]);
std::ostringstream os;
os << "(~" << value.vid << ")";
return GetSSAValue(os.str(), op->type);
} else if (op->is_intrinsic(Call::shift_left)) {
return MakeBinary(
op->type, MakeValue(op->args[0]), MakeValue(op->args[1]), "<<");
} else if (op->is_intrinsic(Call::shift_right)) {
return MakeBinary(
op->type, MakeValue(op->args[0]), MakeValue(op->args[1]), ">>");
} else {
LOG(FATAL) << "Cannot generate call type " << op->name;
return VerilogValue();
}
}
VerilogValue CodeGenVerilog::VisitExpr_(const Let* op) {
VerilogValue value = MakeValue(op->value);
CHECK(!var_idmap_.count(op->var.get()));
var_idmap_[op->var.get()] = value.vid;
return value;
}
VerilogValue CodeGenVerilog::VisitExpr_(const Ramp* op) {
LOG(FATAL) << "Ramp: not supported ";
return VerilogValue();
}
VerilogValue CodeGenVerilog::VisitExpr_(const Broadcast* op) {
LOG(FATAL) << "Broadcast: not supported ";
return VerilogValue();
}
VerilogValue CodeGenVerilog::VisitExpr_(const Select* op) {
LOG(FATAL) << "Select: not supported ";
return VerilogValue();
}
void CodeGenVerilog::CodeGen(const Pipeline& pipeline) {
// setup channel map.
for (auto kv : pipeline->channels) {
ChannelEntry e; e.block = kv.second;
cmap_[kv.first.get()] = e;
}
for (ComputeBlock stage : pipeline->stages) {
const Store* store = stage->body.as<Store>();
CHECK(store);
const Load* load = store->value.as<Load>();
if (load) {
MakeLoadToFIFO(stage, store, load);
} else {
MakeStore(stage, store);
}
}
for (const auto& kv : cmap_) {
MakeChannelUnit(kv.second);
}
}
CodeGenVerilog::SignalEntry
CodeGenVerilog::MakeLoop(const Array<Stmt>& loop) {
SignalEntry sig;
// do not use init signal for now.
std::string init = "0";
std::string lp_ready = GetUniqueName("lp_tmp_sig");
sig.ready = GetUniqueName("loop_ready");
sig.valid = GetUniqueName("loop_valid");
PrintLine("// loop logic");
PrintDecl(lp_ready, kWire, Bool(1));
PrintDecl(sig.ready, kWire, Bool(1));
std::string end_loop = lp_ready;
for (size_t i = loop.size(); i != 0; --i) {
const For* for_op = loop[i - 1].as<For>();
int bits = for_op->loop_var.type().bits();
VerilogValue min = MakeValue(for_op->min);
VerilogValue extent = MakeValue(for_op->extent);
CHECK(min.vtype == kConst && extent.vtype == kConst)
<< "Only support constant loop domain";
std::string vid = AllocVarID(for_op->loop_var.get());
std::string finish = GetUniqueName(vid + "_finish");
this->PrintIndent();
stream <<"`NONSTOP_LOOP(" << vid << ", " << bits << ", " << init
<< ", " << end_loop << ", " << finish
<< ", " << min.vid << ", " << extent.vid << ")\n";
end_loop = finish;
}
if (loop.size() != 0) {
std::string local_ready = GetUniqueName("lp_tmp_sig");
this->PrintIndent();
stream <<"`WRAP_LOOP_ONCE(" << init << ", " << sig.valid
<< ", " << sig.ready << ", " << end_loop << ", " << local_ready << ")\n";
PrintAssign(lp_ready, local_ready);
}
return sig;
}
void CodeGenVerilog::MakeStageInputs(
const ComputeBlock& block,
const std::string& enable,
std::string* out_all_input_valid) {
std::vector<SignalEntry> sigs;
sigs.push_back(MakeLoop(block->loop));
// Input data path.
PrintLine("// stage inputs");
for (auto kv : block->inputs) {
const Var& var = kv.first;
const StageInput& arg = kv.second;
std::string vid = AllocVarID(var.get());
this->PrintDecl(vid, kWire, var.type());
if (arg->input_type == kGlobalConst ||
arg->input_type == kLoopVar) {
PrintAssign(vid, GetVarID(arg->var.get()));
} else if (arg->input_type == kChannel) {
std::string vid_valid = GetUniqueName(vid + "_valid");
std::string vid_ready = GetUniqueName(vid + "_ready");
this->PrintDecl(vid_valid, kWire, Bool(1));
this->PrintDecl(vid_ready, kWire, Bool(1));
ChannelEntry* e = GetChannelInfo(arg->var.get());
// TODO(tqchen, thierry) add one cache here.
e->AssignPort("read_data", vid, var.type());
e->AssignPort("read_valid", vid_valid, Bool(1));
e->AssignPort("read_ready", vid_ready, Bool(1));
e->AssignPort("read_addr", "0", Int(1));
sigs.push_back(SignalEntry{vid_valid, vid_ready});
} else {
LOG(FATAL) << "Unknown input type";
}
}
PrintLine("// stage input stall");
std::string all_input_valid = GetUniqueName("all_input_valid");
this->PrintDecl(all_input_valid, kWire, Bool(1));
// forward all valid
std::vector<std::string> valid_conds;
for (const SignalEntry& e : sigs) {
if (e.valid.length() != 0) {
valid_conds.push_back(e.valid);
}
}
PrintAssignAnd(all_input_valid, valid_conds);
// input ready signal
for (size_t i = 0; i < sigs.size(); ++i) {
if (sigs[i].ready.length() == 0) continue;
std::vector<std::string> conds = {enable};
for (size_t j = 0; j < sigs.size(); ++j) {
if (j != i && sigs[j].valid.length() != 0) {
conds.push_back(sigs[j].valid);
}
}
PrintAssignAnd(sigs[i].ready, conds);
}
*out_all_input_valid = all_input_valid;
}
void CodeGenVerilog::MakeDelay(const std::string& dst,
const std::string& src,
Type dtype,
int delay,
const std::string& enable) {
PrintIndent();
stream << "`DELAY(" << dst << ", " << src << ", "
<< dtype.bits() << ", " << delay << ", " << enable << ")\n";
}
void CodeGenVerilog::MakeStore(const ComputeBlock& block,
const Store* store) {
std::string all_input_valid;
std::string enable = GetUniqueName("enable");
this->PrintDecl(enable, kWire, Bool(1));
MakeStageInputs(block, enable, &all_input_valid);
// Data path
PrintLine("// data path");
VerilogValue value = MakeValue(store->value);
VerilogValue index = MakeValue(store->index);
PrintLine("// control and retiming");
ChannelEntry* write_entry = GetChannelInfo(store->buffer_var.get());
// TODO(tqchen, thierry) add delay model from expression.a
int delay = 2;
std::string ch_name = write_entry->block->channel->handle_var->name_hint;
std::string write_addr = GetUniqueName(ch_name + ".write_addr");
std::string write_ready = GetUniqueName(ch_name + ".write_ready");
std::string write_valid = GetUniqueName(ch_name + ".write_valid");
std::string write_data = GetUniqueName(ch_name + ".write_data");
PrintDecl(write_addr, kWire, store->index.type());
PrintDecl(write_ready, kWire, Bool(1));
PrintDecl(write_valid, kWire, Bool(1));
PrintDecl(write_data, kWire, store->value.type());
MakeDelay(write_addr, index.vid, store->index.type(), delay, enable);
MakeDelay(write_data, value.vid, store->value.type(), delay, enable);
MakeDelay(write_valid, all_input_valid, Bool(1), delay, enable);
PrintAssign(enable, "!" + write_valid + " || " + write_ready);
write_entry->AssignPort("write_addr", write_addr, store->index.type());
write_entry->AssignPort("write_ready", write_ready, Bool(1));
write_entry->AssignPort("write_valid", write_valid, Bool(1));
write_entry->AssignPort("write_data", write_data, store->value.type());
// The triggers
for (size_t i = 0; i < block->triggers.size(); ++i) {
SignalTrigger trigger = block->triggers[i];
CHECK(trigger->predicate.type() == Bool(1));
ChannelEntry* trigger_ch = GetChannelInfo(trigger->channel_var.get());
std::string port = trigger_ch->SignalPortName(trigger->signal_index);
VerilogValue v = MakeValue(trigger->predicate);
// Assign constant trigger.
if (v.vtype == kConst) {
trigger_ch->AssignPort(port, v.vid, Bool(1));
} else {
// non-constant trigger
CHECK_EQ(trigger_ch, write_entry)
<< "Can only triggger conditional event at write channel";
std::string v_trigger = GetUniqueName(ch_name + "." + port);
MakeDelay(v_trigger, v.vid, Bool(1), delay, enable);
write_entry->AssignPort(port, v_trigger, Bool(1));
}
}
stream << "\n";
}
void CodeGenVerilog::MakeLoadToFIFO(const ComputeBlock& block,
const Store* store,
const Load* load) {
ChannelEntry* write_entry = GetChannelInfo(store->buffer_var.get());
ChannelEntry* load_entry = GetChannelInfo(load->buffer_var.get());
std::string all_input_valid;
std::string enable = GetUniqueName("enable");
this->PrintDecl(enable, kWire, Bool(1));
MakeStageInputs(block, enable, &all_input_valid);
// data path
PrintLine("// data path");
VerilogValue index = MakeValue(load->index);
// control and retiming
PrintLine("// control and retiming");
// TODO(tqchen, thierry) add delay model from expression
int delay = 1;
std::string read_ch_name = load_entry->block->channel->handle_var->name_hint;
std::string write_ch_name = write_entry->block->channel->handle_var->name_hint;
std::string read_addr = GetUniqueName(read_ch_name + ".read_addr");
std::string read_data = GetUniqueName(read_ch_name + ".read_data");
std::string read_valid = GetUniqueName(read_ch_name + ".read_valid");
std::string index_valid = GetUniqueName(read_ch_name + ".index_valid");
std::string write_ready = GetUniqueName(write_ch_name + ".write_ready");
std::string data_valid = GetUniqueName(read_ch_name + ".data_valid");
std::string valid_delay = GetUniqueName(read_ch_name + ".valid_delay");
PrintDecl(read_addr, kWire, load->index.type());
PrintDecl(read_data, kWire, load->type);
PrintDecl(read_valid, kWire, Bool(1));
PrintDecl(index_valid, kWire, Bool(1));
PrintDecl(data_valid, kWire, Bool(1));
MakeDelay(read_addr, index.vid, load->index.type(), delay, enable);
MakeDelay(index_valid, all_input_valid, Bool(1), delay, enable);
PrintAssignAnd(data_valid, {read_valid, index_valid});
// The read ports.
load_entry->AssignPort("read_addr", read_addr, load->index.type());
load_entry->AssignPort("read_data", read_data, load->type);
load_entry->AssignPort("read_valid", read_valid, Bool(1));
// The write ports.
write_entry->AssignPort("write_ready", write_ready, Bool(1));
write_entry->AssignPort("write_data", read_data, load->type);
write_entry->AssignPort("write_valid", valid_delay, Bool(1));
write_entry->AssignPort("write_addr", "0", Int(1));
// The not stall condition.
PrintAssignAnd(enable, {write_ready, read_valid});
// The ready signal
PrintIndent();
stream << "`BUFFER_READ_VALID_DELAY(" << valid_delay << ", " << data_valid
<< ", " << write_ready << ")\n";
// The triggers
for (size_t i = 0; i < block->triggers.size(); ++i) {
SignalTrigger trigger = block->triggers[i];
CHECK(trigger->predicate.type() == Bool(1));
ChannelEntry* trigger_ch = GetChannelInfo(trigger->channel_var.get());
std::string port = trigger_ch->SignalPortName(trigger->signal_index);
VerilogValue v = MakeValue(trigger->predicate);
// Assign constant trigger.
if (v.vtype == kConst) {
trigger_ch->AssignPort(port, v.vid, Bool(1));
} else {
// non-constant trigger
CHECK_EQ(trigger_ch, load_entry)
<< "Can only triggger conditional event at load channel";
std::string v_trigger = GetUniqueName(read_ch_name + "." + port);
MakeDelay(v_trigger, v.vid, Bool(1), delay, enable);
load_entry->AssignPort(port, v_trigger, Bool(1));
}
}
stream << "\n";
}
void CodeGenVerilog::MakeChannelUnit(const ChannelEntry& ch) {
if (ch.block->read_window == 0) {
// This is a memory map
MakeChannelMemMap(ch);
} else if (ch.block->read_window == 1 &&
ch.block->write_window == 1) {
MakeChannelFIFO(ch);
} else {
// general Buffer
MakeChannelBuffer(ch);
}
}
void CodeGenVerilog::MakeChannelMemMap(const ChannelEntry& ch) {
Var ch_var = ch.block->channel->handle_var;
std::string dut = GetUniqueName(ch_var->name_hint + ".mmap");
std::string mmap_addr = GetVarID(ch_var.get());
tvm_vpi_modules_.push_back(dut);
if (ch.ports.count("read_addr")) {
CHECK(!ch.ports.count("write_addr"))
<< "Cannot read/write to same RAM";
const PortEntry& read_addr = ch.GetPort("read_addr");
const PortEntry& read_data = ch.GetPort("read_data");
const PortEntry& read_valid = ch.GetPort("read_valid");
stream << " // channel setup for " << ch_var << "\n"
<< " tvm_vpi_read_mmap # (\n"
<< " .DATA_WIDTH(" << read_data.dtype.bits() << "),\n"
<< " .ADDR_WIDTH(" << read_addr.dtype.bits() << "),\n"
<< " .BASE_ADDR_WIDTH(" << ch_var.type().bits() << ")\n"
<< " ) " << dut << " (\n"
<< " .clk(clk),\n"
<< " .rst(rst),\n"
<< " .addr(" << read_addr.value << "),\n"
<< " .data_out(" << read_data.value << "),\n"
<< " .mmap_addr(" << mmap_addr << ")\n"
<< " );\n";
PrintAssign(read_valid.value, "1");
} else if (ch.ports.count("write_addr")) {
const PortEntry& write_addr = ch.GetPort("write_addr");
const PortEntry& write_data = ch.GetPort("write_data");
const PortEntry& write_valid = ch.GetPort("write_valid");
const PortEntry& write_ready = ch.GetPort("write_ready");
stream << " // channel setup for " << ch_var << "\n"
<< " tvm_vpi_write_mmap # (\n"
<< " .DATA_WIDTH(" << write_data.dtype.bits() << "),\n"
<< " .ADDR_WIDTH(" << write_addr.dtype.bits() << "),\n"
<< " .BASE_ADDR_WIDTH(" << ch_var.type().bits() << ")\n"
<< " ) " << dut << " (\n"
<< " .clk(clk),\n"
<< " .rst(rst),\n"
<< " .addr(" << write_addr.value << "),\n"
<< " .data_in(" << write_data.value << "),\n"
<< " .en(" << write_valid.value << "),\n"
<< " .mmap_addr(" << mmap_addr << ")\n"
<< " );\n";
PrintAssign(write_ready.value, "1");
// additional control signals
for (size_t i = 0; i < ch.block->ctrl_signals.size(); ++i) {
ControlSignal sig = ch.block->ctrl_signals[i];
CHECK_EQ(sig->ctrl_type, kComputeFinish);
std::string port = ch.SignalPortName(i);
done_sigs_.push_back(ch.GetPort(port).value);
}
}
}
void CodeGenVerilog::MakeChannelFIFO(const ChannelEntry& ch) {
Var ch_var = ch.block->channel->handle_var;
std::string dut = GetUniqueName(ch_var->name_hint + ".fifo_reg");
const PortEntry& write_data = ch.GetPort("write_data");
const PortEntry& write_valid = ch.GetPort("write_valid");
const PortEntry& write_ready = ch.GetPort("write_ready");
const PortEntry& read_data = ch.GetPort("read_data");
const PortEntry& read_valid = ch.GetPort("read_valid");
const PortEntry& read_ready = ch.GetPort("read_ready");
CHECK_EQ(write_data.dtype, read_data.dtype);
stream << " // channel setup for " << ch_var << "\n"
<< " `CACHE_REG(" << write_data.dtype.bits()
<< ", " << write_data.value
<< ", " << write_valid.value
<< ", " << write_ready.value
<< ", " << read_data.value
<< ", " << read_valid.value
<< ", " << read_ready.value
<< ")\n";
}
void CodeGenVerilog::MakeChannelBuffer(const ChannelEntry& ch) {
LOG(FATAL) << "not implemeneted";
}
CodeGenVerilog::ChannelEntry*
CodeGenVerilog::GetChannelInfo(const Variable* var) {
auto it = cmap_.find(var);
CHECK(it != cmap_.end())
<< "cannot find channel for var " << var->name_hint;
return &(it->second);
}
void CodeGenVerilog::ChannelEntry::AssignPort(
std::string port, std::string value, Type dtype) {
CHECK(!ports.count(port))
<< "port " << port
<< " of channel " << block->channel << " has already been connected";
ports[port] = PortEntry{value, dtype};
}
const CodeGenVerilog::PortEntry&
CodeGenVerilog::ChannelEntry::GetPort(const std::string& port) const {
auto it = ports.find(port);
CHECK(it != ports.end())
<< "port " << port
<< " of channel " << block->channel << " has not been connected";
return it->second;
}
std::string CodeGenVerilog::ChannelEntry::SignalPortName(int index) const {
CHECK_LT(static_cast<size_t>(index), block->ctrl_signals.size());
std::ostringstream os;
os << "ctrl_port" << index;
return os.str();
}
} // namespace verilog
} // namespace codegen
} // namespace tvm
/*!
* Copyright (c) 2017 by Contributors
* \file codegen_verilog.h
* \brief Generate verilog code.
*/
#ifndef TVM_CODEGEN_VERILOG_CODEGEN_VERILOG_H_
#define TVM_CODEGEN_VERILOG_CODEGEN_VERILOG_H_
#include <tvm/base.h>
#include <tvm/ir.h>
#include <tvm/ir_functor_ext.h>
#include <tvm/codegen.h>
#include <tvm/lowered_func.h>
#include <string>
#include <vector>
#include <unordered_map>
#include "verilog_ir.h"
#include "../codegen_source_base.h"
namespace tvm {
namespace codegen {
namespace verilog {
using namespace ir;
/* \brief The variable type in register.*/
enum VerilogVarType {
kWire,
kInput,
kOutput,
kReg,
kConst
};
/*! \brief The verilog value */
struct VerilogValue {
/*! \brief The variable id */
std::string vid;
/*! \brief The variable type */
VerilogVarType vtype{kReg};
/*! \brief The data type it encodes */
Type dtype;
VerilogValue() {}
VerilogValue(std::string vid, VerilogVarType vtype, Type dtype)
: vid(vid), vtype(vtype), dtype(dtype) {}
};
/*! \brief Information of each procedure function generated */
struct VerilogFuncEntry {
/*! \brief The original functions */
std::vector<Type> arg_types;
/*! \brief The real argument ids of the function */
std::vector<std::string> arg_ids;
/*! \brief The VPI Modules in the function */
std::vector<std::string> vpi_modules;
};
/*!
* \brief The code module of generated verilog code.
*/
class VerilogCodeGenModule {
public:
/*! \brief the code of each modoules */
std::string code;
/*! \brief map of functions */
std::unordered_map<std::string, VerilogFuncEntry> fmap;
/*!
* \brief Generate a code that append simulator function to call func_name.
* \param func_name The function to be called.
* \return The generated code.
*/
std::string AppendSimMain(const std::string& func_name) const;
};
/*!
* \brief Verilog generator
*/
class CodeGenVerilog :
public ExprFunctor<VerilogValue(const Expr&)>,
public CodeGenSourceBase {
public:
/*!
* \brief Initialize the code generator.
* \param output_ssa Whether output SSA.
*/
void Init();
/*!
* \brief Add the function to the generated module.
* \param f The function to be compiled.
*/
void AddFunction(LoweredFunc f);
/*!
* \brief Finalize the compilation and return the code.
* \return The code.
*/
VerilogCodeGenModule Finish();
/*!
* \brief Transform expression to verilog value.
* \param n The expression to be printed.
*/
VerilogValue MakeValue(const Expr& n) {
return VisitExpr(n);
}
// The following parts are overloadable print operations.
// expression
VerilogValue VisitExpr_(const Variable* op) final;
VerilogValue VisitExpr_(const Let* op) final;
VerilogValue VisitExpr_(const Call* op) final;
VerilogValue VisitExpr_(const Add* op) final;
VerilogValue VisitExpr_(const Sub* op) final;
VerilogValue VisitExpr_(const Mul* op) final;
VerilogValue VisitExpr_(const Div* op) final;
VerilogValue VisitExpr_(const Mod* op) final;
VerilogValue VisitExpr_(const Min* op) final;
VerilogValue VisitExpr_(const Max* op) final;
VerilogValue VisitExpr_(const EQ* op) final;
VerilogValue VisitExpr_(const NE* op) final;
VerilogValue VisitExpr_(const LT* op) final;
VerilogValue VisitExpr_(const LE* op) final;
VerilogValue VisitExpr_(const GT* op) final;
VerilogValue VisitExpr_(const GE* op) final;
VerilogValue VisitExpr_(const And* op) final;
VerilogValue VisitExpr_(const Or* op) final;
VerilogValue VisitExpr_(const Cast* op) final;
VerilogValue VisitExpr_(const Not* op) final;
VerilogValue VisitExpr_(const Select* op) final;
VerilogValue VisitExpr_(const Ramp* op) final;
VerilogValue VisitExpr_(const Broadcast* op) final;
VerilogValue VisitExpr_(const IntImm* op) final;
VerilogValue VisitExpr_(const UIntImm* op) final;
VerilogValue VisitExpr_(const FloatImm* op) final;
VerilogValue VisitExpr_(const StringImm* op) final;
protected:
void InitFuncState(LoweredFunc f);
void PrintDecl(const std::string& vid, VerilogVarType vtype, Type dtype,
const char* suffix = ";\n", bool indent = true);
void PrintAssign(
const std::string& target, const std::string& src);
void PrintAssignAnd(
const std::string& target, const std::vector<std::string>& conds);
void PrintLine(const std::string& line);
void PrintSSAAssign(
const std::string& target, const std::string& src, Type t) final;
// make binary op
VerilogValue MakeBinary(Type t, VerilogValue a, VerilogValue b, const char* opstr);
private:
// Hand shake signal name.
// These name can be empty.
// Indicate that the signal is always true
// or do not need to take these signals.
struct SignalEntry {
std::string valid;
std::string ready;
};
// Information about port
struct PortEntry {
// The port value
std::string value;
// The data type
Type dtype;
};
// Channel setup
struct ChannelEntry {
// The channel block
ChannelBlock block;
// The port map, on how port is assigned.
std::unordered_map<std::string, PortEntry> ports;
// Assign port to be valueo
void AssignPort(std::string port, std::string value, Type dtype);
// Assign port to be valueo
const PortEntry& GetPort(const std::string& port) const;
// Signal port name
std::string SignalPortName(int index) const;
};
// Get wire ssa value from s
VerilogValue GetSSAValue(std::string s, Type dtype) {
VerilogValue ret;
ret.vid = SSAGetID(s, dtype);
ret.vtype = kWire;
ret.dtype = dtype;
return ret;
}
void CodeGen(const Pipeline& pipeine);
// codegen the delays
void MakeDelay(const std::string& dst,
const std::string& src,
Type dtype,
int delay,
const std::string& not_stall);
// codegen the loop macros
SignalEntry MakeLoop(const Array<Stmt>& loop);
// codegen the loop macros
void MakeStageInputs(const ComputeBlock& block,
const std::string& not_stall,
std::string* out_all_input_valid);
// codegen compute block
void MakeStore(const ComputeBlock& block, const Store* store);
// Codegen of load statement into FIFO
void MakeLoadToFIFO(const ComputeBlock& block,
const Store* store,
const Load* load);
// Make channel unit.
void MakeChannelUnit(const ChannelEntry& ch);
void MakeChannelFIFO(const ChannelEntry& ch);
void MakeChannelBuffer(const ChannelEntry& ch);
void MakeChannelMemMap(const ChannelEntry& ch);
// Get channel information
ChannelEntry* GetChannelInfo(const Variable* var);
// channel setup map.
std::unordered_map<const Variable*, ChannelEntry> cmap_;
// list of vpi modules to be hooked.
std::vector<std::string> tvm_vpi_modules_;
// The signals for done.
std::vector<std::string> done_sigs_;
// The verilog function.
std::unordered_map<std::string, VerilogFuncEntry> functions_;
};
} // namespace verilog
} // namespace codegen
} // namespace tvm
#endif // TVM_CODEGEN_VERILOG_CODEGEN_VERILOG_H_
/*!
* Copyright (c) 2017 by Contributors
* \file verilog_ir.cc
*/
#include <tvm/ir_pass.h>
#include <tvm/ir_visitor.h>
#include <tvm/ir_mutator.h>
#include <utility>
#include "verilog_ir.h"
#include "../../arithmetic/compute_expr.h"
namespace tvm {
namespace codegen {
namespace verilog {
using namespace ir;
ControlSignal ControlSignalNode::make(
ControlSignalType type, int advance_size) {
auto n = make_node<ControlSignalNode>();
n->ctrl_type = type;
n->advance_size = advance_size;
return ControlSignal(n);
}
StageInput StageInputNode::make(Var var, StageInputType input_type) {
NodePtr<StageInputNode> n = make_node<StageInputNode>();
n->var = var;
n->input_type = input_type;
return StageInput(n);
}
// Replace stage inputs by placeholder, update the input map.
class StageInputReplacer : public IRMutator {
public:
explicit StageInputReplacer(
const std::unordered_map<const Variable*, StageInput>& var_info)
: var_info_(var_info) {}
Expr Mutate_(const Variable* op, const Expr& e) final {
if (replace_.count(op)) {
return replace_.at(op);
}
auto it = var_info_.find(op);
if (it == var_info_.end()) return e;
Var new_var(it->second->var->name_hint + ".sync", op->type);
inputs_.Set(new_var, it->second);
replace_[op] = new_var;
return std::move(new_var);
}
Expr Mutate_(const Load* op, const Expr& e) final {
CHECK(is_zero(op->index))
<< "Load should be in its own stage.";
if (replace_.count(op->buffer_var.get())) {
return replace_.at(op->buffer_var.get());
}
auto it = var_info_.find(op->buffer_var.get());
CHECK(it != var_info_.end())
<< "Load from unknown channel";
Var data(it->second->var->name_hint + ".load.sync", op->type);
inputs_.Set(data, it->second);
replace_[op->buffer_var.get()] = data;
return std::move(data);
}
// inputs that get replaced.
Map<Var, StageInput> inputs_;
// replacement map
std::unordered_map<const Variable*, Var> replace_;
// Variable replacement plan.
const std::unordered_map<const Variable*, StageInput>& var_info_;
};
/*! \brief Extract module block */
class PipelineExtractor: public IRVisitor {
public:
Pipeline Extract(LoweredFunc f) {
// Initialize the memory map channels
// TODO(tqchen) move the logic to explicit specification.
for (auto arg : f->args) {
if (arg.type().is_handle()) {
arg_handle_[arg.get()] = arg;
}
}
pipeline_ = make_node<PipelineNode>();
this->Visit(f->body);
// setup channels
for (const auto &kv : cmap_) {
pipeline_->channels.Set(
kv.second.node->channel->handle_var,
ChannelBlock(kv.second.node));
}
pipeline_->args = f->args;
return Pipeline(pipeline_);
}
void Visit_(const AttrStmt* op) final {
if (op->attr_key == attr::pipeline_stage_scope) {
CHECK(!in_pipeline_stage_);
in_pipeline_stage_ = true;
trigger_.emplace_back(std::make_pair(loop_.size(), op));
IRVisitor::Visit_(op);
trigger_.pop_back();
in_pipeline_stage_ = false;
} else if (op->attr_key == attr::channel_read_advance ||
op->attr_key == attr::channel_write_advance) {
trigger_.emplace_back(std::make_pair(loop_.size(), op));
IRVisitor::Visit_(op);
trigger_.pop_back();
} else if (op->attr_key == attr::channel_read_scope ||
op->attr_key == attr::channel_write_scope) {
Channel ch(op->node.node_);
ChannelEntry& cb = cmap_[ch->handle_var.get()];
if (cb.node != nullptr) {
CHECK(cb.node->channel.same_as(ch));
} else {
cb.node = make_node<ChannelBlockNode>();
cb.node->channel = ch;
}
if (op->attr_key == attr::channel_read_scope) {
CHECK_EQ(cb.read_ref_count, 0)
<< "One channel can only be read from one consumer";
++cb.read_ref_count;
CHECK(arith::GetConstInt(op->value, &(cb.node->read_window)))
<< "Only supprt constant read window";
} else {
CHECK_EQ(cb.write_ref_count, 0)
<< "One channel can only be write by one producer";
++cb.write_ref_count;
CHECK(arith::GetConstInt(op->value, &(cb.node->write_window)))
<< "Only supprt constant write window";
}
var_info_[ch->handle_var.get()] =
StageInputNode::make(ch->handle_var, kChannel);
IRVisitor::Visit_(op);
var_info_.erase(ch->handle_var.get());
} else {
IRVisitor::Visit_(op);
}
}
void Visit_(const Block* op) final {
CHECK(!in_pipeline_stage_)
<< "Do not support serial execution inside pipeline";
IRVisitor::Visit_(op);
}
void Visit_(const IfThenElse* op) final {
LOG(FATAL) << "Not implemeneted";
}
void Visit_(const For* op) final {
if (in_pipeline_stage_) {
loop_.push_back(
For::make(op->loop_var, op->min, op->extent,
op->for_type, op->device_api, Evaluate::make(0)));
var_info_[op->loop_var.get()] =
StageInputNode::make(Var(op->loop_var.node_), kLoopVar);
IRVisitor::Visit_(op);
var_info_.erase(op->loop_var.get());
loop_.pop_back();
} else {
IRVisitor::Visit_(op);
}
}
void Visit_(const Store* op) final {
// Check the access pattern
Channel arg_write =
CheckArgHandleAccess(op->buffer_var.get(), op->value.type(), false);
this->Visit(op->value);
// The replace logic
StageInputReplacer repl(var_info_);
// Setup the compute block.
NodePtr<ComputeBlockNode> compute =
make_node<ComputeBlockNode>();
compute->loop = Array<Stmt>(loop_);
// setup the advance triggers
for (const auto& e : trigger_) {
const AttrStmt* attr = e.second;
Channel ch;
if (attr->attr_key == attr::pipeline_stage_scope) {
ch = arg_write;
if (!ch.defined()) continue;
} else {
ch = Channel(attr->node.node_);
}
NodePtr<SignalTriggerNode> trigger
= make_node<SignalTriggerNode>();
trigger->channel_var = ch->handle_var;
// predicate for the trigger
Expr predicate = const_true();
for (size_t i = e.first; i < loop_.size(); ++i) {
const For* loop = loop_[i].as<For>();
predicate = predicate &&
(loop->loop_var == (loop->extent - 1));
}
trigger->predicate = ir::Simplify(predicate);
// Add the signal back to the channels.
ChannelEntry& cb = cmap_.at(ch->handle_var.get());
trigger->signal_index = static_cast<int>(cb.node->ctrl_signals.size());
// Grab the advance constant size.
int trigger_size = 0;
if (attr->attr_key == attr::pipeline_stage_scope) {
cb.node->ctrl_signals.push_back(
ControlSignalNode::make(kComputeFinish, 0));
} else if (attr->attr_key == attr::channel_read_advance) {
CHECK(arith::GetConstInt(attr->value, &trigger_size))
<< "Only support constant advance size";
cb.node->ctrl_signals.push_back(
ControlSignalNode::make(kReadAdvance, trigger_size));
} else {
CHECK(arith::GetConstInt(attr->value, &trigger_size))
<< "Only support constant advance size";
cb.node->ctrl_signals.push_back(
ControlSignalNode::make(kWriteAdvance, trigger_size));
}
compute->triggers.push_back(SignalTrigger(trigger));
}
// Check if we are writing to FIFO.
const Load* load = op->value.as<Load>();
if (is_zero(op->index) && load) {
compute->body = Store::make(
op->buffer_var,
Load::make(load->type, load->buffer_var,
repl.Mutate(load->index), op->predicate),
op->index, op->predicate);
} else {
compute->body = Store::make(
op->buffer_var, repl.Mutate(op->value),
repl.Mutate(op->index), op->predicate);
}
compute->inputs = repl.inputs_;
pipeline_->stages.push_back(ComputeBlock(compute));
}
void Visit_(const LetStmt* op) final {
LOG(FATAL) << "cannot pass through let";
}
void Visit_(const Evaluate* op) final {
LOG(FATAL) << "Not implemeneted";
}
void Visit_(const Allocate* op) final {
CHECK(!in_pipeline_stage_);
}
void Visit_(const AssertStmt* op) final {
LOG(FATAL) << "Not implemeneted";
}
void Visit_(const Load* op) final {
CheckArgHandleAccess(op->buffer_var.get(), op->type, true);
}
Channel CheckArgHandleAccess(const Variable* var, Type dtype, bool read_access) {
if (!arg_handle_.count(var)) return Channel();
CHECK(!cmap_.count(var))
<< "Multiple access to the same handle";
ChannelEntry& cb = cmap_[var];
cb.node = make_node<ChannelBlockNode>();
cb.node->channel = ChannelNode::make(arg_handle_.at(var), dtype);
return cb.node->channel;
}
private:
// The channel information.
struct ChannelEntry {
NodePtr<ChannelBlockNode> node;
int read_ref_count{0};
int write_ref_count{0};
};
// Whether we are inside the pipeline stage.
bool in_pipeline_stage_{false};
// The current loop nest
std::vector<Stmt> loop_;
// Advance signal trigger
std::vector<std::pair<size_t, const AttrStmt*> > trigger_;
// Read write scope
std::vector<const AttrStmt*> channel_scope_;
// The loop index.
std::unordered_map<const Variable*, StageInput> var_info_;
// The channel entry;
std::unordered_map<const Variable*, ChannelEntry> cmap_;
// The argument handle map
std::unordered_map<const Variable*, Var> arg_handle_;
// The result block.
NodePtr<PipelineNode> pipeline_;
};
Pipeline MakePipeline(LoweredFunc f) {
return PipelineExtractor().Extract(f);
}
} // namespace verilog
} // namespace codegen
} // namespace tvm
/*!
* Copyright (c) 2017 by Contributors
* \file verilog_ir.h
* \brief A lowered IR that resembles verilog blocks,
* This is data structure before final codegen.
*/
#ifndef TVM_CODEGEN_VERILOG_VERILOG_IR_H_
#define TVM_CODEGEN_VERILOG_VERILOG_IR_H_
#include <tvm/ir.h>
#include <tvm/expr.h>
#include <tvm/channel.h>
#include <tvm/lowered_func.h>
#include <vector>
#include <memory>
#include <unordered_map>
namespace tvm {
namespace codegen {
namespace verilog {
/*! \brief The data argument type */
enum StageInputType : int {
/*! \brief Data channel input. */
kChannel,
/*! \brief Loop variable generated by compute block. */
kLoopVar,
/*! \brief Global constant. */
kGlobalConst
};
/*! \brief The data argument type */
enum ControlSignalType : int {
// Read advance signal
kReadAdvance,
// Write advance signal
kWriteAdvance,
// Pipeline stage finish signal
kComputeFinish
};
class ControlSignal;
class StageInput;
class SignalTrigger;
/*! \brief The control signal of a channel */
struct ControlSignalNode : public Node {
/*! \brief The control signal type */
ControlSignalType ctrl_type;
/*! \brief Advance size of the signal */
int advance_size{0};
// visit all attributes
void VisitAttrs(AttrVisitor* v) final {
v->Visit("ctrl_type", &ctrl_type);
v->Visit("advance_size", &advance_size);
}
static ControlSignal make(ControlSignalType ctrl_type, int advance_size);
static constexpr const char* _type_key = "VerilogControlSignal";
TVM_DECLARE_NODE_TYPE_INFO(ControlSignalNode, Node);
};
TVM_DEFINE_NODE_REF(ControlSignal, ControlSignalNode);
/*! \brief Information about channel. */
struct ChannelBlockNode : public Node {
/*! \brief The channel we are refer to */
Channel channel;
/*! \brief Read window */
int read_window{0};
/*! \brief Write window */
int write_window{0};
/*! \brief Control signals in the channel */
Array<ControlSignal> ctrl_signals;
// visit all attributes
void VisitAttrs(AttrVisitor* v) final {
v->Visit("channel", &channel);
v->Visit("read_window", &read_window);
v->Visit("write_window", &write_window);
v->Visit("ctrl_signals", &ctrl_signals);
}
static constexpr const char* _type_key = "VerilogChannelBlock";
TVM_DECLARE_NODE_TYPE_INFO(ChannelBlockNode, Node);
};
TVM_DEFINE_NODE_REF(ChannelBlock, ChannelBlockNode);
/*!
* \brief Input to the compute block.
* These represents the data values that need to be shared;
*/
struct StageInputNode : public Node {
/*!
* \brief The corresponding var of the input
* For loop and global const it is the var.
* For channel this corresponds to the channel handle.
*/
Var var;
/*! \brief The type of the input. */
StageInputType input_type;
// visit all attributes
void VisitAttrs(AttrVisitor* v) final {
v->Visit("var", &var);
v->Visit("input_type", &input_type);
}
// constructor
static StageInput make(Var var, StageInputType input_type);
static constexpr const char* _type_key = "VerilogStageInput";
TVM_DECLARE_NODE_TYPE_INFO(StageInputNode, Node);
};
TVM_DEFINE_NODE_REF(StageInput, StageInputNode);
/*! \brief The trigger signal for certain channel */
struct SignalTriggerNode : public Node {
/*! \brief The channel handle variable */
Var channel_var;
/*! \brief Boolean predicate to trigger the signal */
Expr predicate;
/*! \brief siginal index of the channel */
int signal_index;
// visit all attributes
void VisitAttrs(AttrVisitor* v) final {
v->Visit("channel_var", &channel_var);
v->Visit("predicate", &predicate);
v->Visit("signal_index", &signal_index);
}
// constructor
static constexpr const char* _type_key = "VerilogSignalTrigger";
TVM_DECLARE_NODE_TYPE_INFO(SignalTriggerNode, Node);
};
TVM_DEFINE_NODE_REF(SignalTrigger, SignalTriggerNode);
/*! \brief compute block for verilog */
struct ComputeBlockNode : public Node {
/*! \brief The body of the block. */
Stmt body;
/*! \brief The loop nest around the body, each is a For with no_op as body */
Array<Stmt> loop;
/*! \brief The channel advance trigger */
Array<SignalTrigger> triggers;
/*! \brief The input variables that need to be synced. */
Map<Var, StageInput> inputs;
// visit all attributes
void VisitAttrs(AttrVisitor* v) final {
v->Visit("body", &body);
v->Visit("loop", &loop);
v->Visit("triggers", &triggers);
v->Visit("inputs", &inputs);
}
static constexpr const char* _type_key = "VerilogComputeBlock";
TVM_DECLARE_NODE_TYPE_INFO(ComputeBlockNode, Node);
};
TVM_DEFINE_NODE_REF(ComputeBlock, ComputeBlockNode);
/*! \brief Codeblock for verilog module. */
struct PipelineNode : public Node {
/*! \brief arguments to the module */
Array<Var> args;
/*! \brief Computation stages */
Array<ComputeBlock> stages;
/*! \brief The data channels */
Map<Var, ChannelBlock> channels;
// visit all attributes
void VisitAttrs(AttrVisitor* v) final {
v->Visit("args", &args);
v->Visit("stages", &stages);
v->Visit("channels", &channels);
}
static constexpr const char* _type_key = "VerilogPipeline";
TVM_DECLARE_NODE_TYPE_INFO(PipelineNode, Node);
};
TVM_DEFINE_NODE_REF(Pipeline, PipelineNode);
/*!
* \brief Build a lowered verilog pipeline given function.
* \param f The function to be transformed.
* \param The created verilog pipeline.
*/
Pipeline MakePipeline(LoweredFunc f);
} // namespace verilog
} // namespace codegen
} // namespace tvm
#endif // TVM_CODEGEN_VERILOG_VERILOG_IR_H_
/*!
* Copyright (c) 2017 by Contributors
* \file verilog_module.cc
* \brief Build verilog source code.
*/
#include <tvm/runtime/packed_func.h>
#include <tvm/codegen.h>
#include <mutex>
#include "codegen_verilog.h"
#include "../../runtime/file_util.h"
#include "../../runtime/meta_data.h"
namespace tvm {
namespace codegen {
namespace verilog {
using runtime::TVMArgs;
using runtime::TVMRetValue;
using runtime::PackedFunc;
// Simulator function
class VerilogModuleNode : public runtime::ModuleNode {
public:
VerilogModuleNode() : fmt_("v") {}
const char* type_key() const {
return "verilog";
}
PackedFunc GetFunction(
const std::string& name,
const std::shared_ptr<ModuleNode>& sptr_to_self) final {
CHECK(sptr_to_self.get() == this);
if (!m_.fmap.count(name)) return PackedFunc();
auto f = [sptr_to_self, name, this](const runtime::TVMArgs& args, TVMRetValue* rv) {
auto* fsim = runtime::Registry::Get("tvm_callback_verilog_simulator");
CHECK(fsim != nullptr)
<< "tvm_callback_verilog_simulator is not registered,"
<<" did you import tvm.addon.verilog?";
std::string code = m_.AppendSimMain(name);
if (const auto* f = runtime::Registry::Get("tvm_callback_verilog_postproc")) {
code = (*f)(code).operator std::string();
}
std::vector<TVMValue> values;
std::vector<int> codes;
TVMValue v;
v.v_str = code.c_str();
values.push_back(v);
codes.push_back(kStr);
for (int i = 0; i < args.num_args; ++i) {
values.push_back(args.values[i]);
codes.push_back(args.type_codes[i]);
}
fsim->CallPacked(TVMArgs(&values[0], &codes[0], args.num_args + 1), rv);
};
return PackedFunc(f);
}
std::string GetSource(const std::string& format) final {
return m_.code;
}
void Init(const Array<LoweredFunc>& funcs) {
CodeGenVerilog cg;
cg.Init();
for (LoweredFunc f : funcs) {
cg.AddFunction(f);
}
m_ = cg.Finish();
}
private:
// the verilog code. data
VerilogCodeGenModule m_;
// format;
std::string fmt_;
};
TVM_REGISTER_API("codegen.build_verilog")
.set_body([](TVMArgs args, TVMRetValue* rv) {
std::shared_ptr<VerilogModuleNode> n =
std::make_shared<VerilogModuleNode>();
n->Init(args[0]);
*rv = runtime::Module(n);
});
} // namespace verilog
} // namespace codegen
} // namespace tvm
/*!
* Copyright (c) 2017 by Contributors
* \file vpi_device.cc
* \brief Simulated VPI RAM device.
*/
#include <tvm/runtime/registry.h>
#include <tvm/runtime/device_api.h>
#include <tvm/packed_func_ext.h>
#include <cstdlib>
#include <unordered_map>
#include <map>
#include <queue>
#include "vpi_session.h"
namespace tvm {
namespace codegen {
/*! \brief Simulated device ram */
class VPIDeviceAPI final : public runtime::DeviceAPI {
public:
VPIDeviceAPI() {
const char* s_ram_size = getenv("TVM_VPI_RAM_SIZE_MB");
// 16 MB ram.
int ram_size = 32;
if (s_ram_size != nullptr) {
ram_size = atoi(s_ram_size);
}
ram_.resize(ram_size << 17);
ram_head_ = runtime::kAllocAlignment;
ram_max_ = ram_.size() * sizeof(int64_t);
LOG(INFO) << "Initialize VPI simulated ram " << ram_size << "MB ...";
}
// convert address to real address
void* RealAddr(const void* addr, size_t size) const {
int64_t ptr = reinterpret_cast<int64_t>(addr);
CHECK_LE(ptr + size, ram_max_)
<< "VPI: Illegal memory access";
return (char*)(&ram_[0]) + ptr; // NOLINT(*)
}
// convert address to real address
void* RealAddrSafe(const void* addr, size_t size) const {
int64_t ptr = reinterpret_cast<int64_t>(addr);
if (ptr + size >= ram_max_) return nullptr;
return (char*)(&ram_[0]) + ptr; // NOLINT(*)
}
void SetDevice(TVMContext ctx) final {}
void GetAttr(TVMContext ctx, runtime::DeviceAttrKind kind, TVMRetValue* rv) final {
if (kind == runtime::kExist) {
*rv = 1;
}
}
void* AllocDataSpace(TVMContext ctx,
size_t size,
size_t alignment,
TVMType type_hint) final {
// always align to 32 bytes at least.
CHECK_LE(alignment, runtime::kAllocAlignment);
alignment = runtime::kAllocAlignment;
// always allocate block with aligned size.
size += alignment - (size % alignment);
// This is not thread safe, but fine for simulation.
auto it = free_blocks_.lower_bound(size);
if (it != free_blocks_.end()) {
size_t head = it->second;
free_blocks_.erase(it);
Block& b = block_map_.at(head);
CHECK(b.is_free);
b.is_free = false;
return reinterpret_cast<void*>(head);
} else {
CHECK_EQ(ram_head_ % runtime::kAllocAlignment, 0U);
Block b;
b.size = size;
b.is_free = false;
CHECK_LE(ram_head_ + size, ram_max_)
<< "VPI: Out of memory";
block_map_[ram_head_] = b;
void* ret = reinterpret_cast<void*>(ram_head_);
ram_head_ += size;
return ret;
}
}
void FreeDataSpace(TVMContext ctx, void* ptr) final {
size_t head = reinterpret_cast<size_t>(ptr);
Block& b = block_map_.at(head);
b.is_free = true;
free_blocks_.insert({b.size, head});
}
void CopyDataFromTo(const void* from,
size_t from_offset,
void* to,
size_t to_offset,
size_t size,
TVMContext ctx_from,
TVMContext ctx_to,
TVMType type_hint,
TVMStreamHandle stream) final {
if (static_cast<int>(ctx_from.device_type) == kDLVPI) {
from = RealAddr(static_cast<const char*>(from) + from_offset, size);
}
if (static_cast<int>(ctx_to.device_type) == kDLVPI) {
to = RealAddr(static_cast<char*>(to) + to_offset, size);
}
memcpy(to, from, size);
}
void StreamSync(TVMContext ctx, TVMStreamHandle stream) final {
}
static VPIDeviceAPI* Global() {
static VPIDeviceAPI inst;
return &inst;
}
private:
// allocator block for reuse
struct Block {
// The size of the block
size_t size;
// Whether this is already freed.
bool is_free{true};
};
// head -> blocks
std::unordered_map<size_t, Block> block_map_;
// size -> free heads.
std::multimap<size_t, size_t> free_blocks_;
// top of the ram
size_t ram_head_, ram_max_;
// The ram space.
std::vector<int64_t> ram_;
};
/* !\brief vector buffer to help read/write */
class VPIVecBuffer {
public:
// Put data into vec
void put_vec(const VPIHandle& h, size_t nwords,
const void* dptr, size_t size) {
wbuf_.resize(nwords);
vbuf_.resize(nwords);
memcpy(&wbuf_[0], dptr, size);
for (size_t i = 0; i < nwords; ++i) {
vbuf_[i].aval = wbuf_[i];
vbuf_[i].bval = 0;
}
h.put_vec(vbuf_);
}
// read data from vec.
void get_vec(const VPIHandle& h, void* dptr, size_t size) {
h.get_vec(&vbuf_);
wbuf_.resize(vbuf_.size());
for (size_t i = 0; i < vbuf_.size(); ++i) {
wbuf_[i] = vbuf_[i].aval;
CHECK_EQ(vbuf_[i].bval, 0)
<< "Write indetermined value to RAM";
}
memcpy(dptr, &wbuf_[0], size);
}
private:
// Temporal buffers.
std::vector<int32_t> wbuf_;
std::vector<vpi::VPIVecVal> vbuf_;
};
/*!
* \brief Memory interface for VPI memory.
*/
class VPIMemoryInterface {
public:
// Initialize the FSM.
void Init(VPIHandle module) {
device_ = VPIDeviceAPI::Global();
in_rst_ = module["rst"];
// read ports
in_read_dequeue_ = module["read_en"];
out_reg_read_data_ = module["reg_read_data"];
// Write ports
in_write_enqueue_ = module["write_en"];
in_write_data_ = module["write_data_in"];
// Status port
out_reg_read_valid_ = module["reg_read_valid"];
out_reg_write_ready_ = module["reg_write_ready"];
// memory control signal
ctrl_read_req_ = module["host_read_req"];
ctrl_read_addr_ = module["host_read_addr"];
ctrl_read_size_ = module["host_read_size"];
ctrl_write_req_ = module["host_write_req"];
ctrl_write_addr_ = module["host_write_addr"];
ctrl_write_size_ = module["host_write_size"];
// The bit and bytes;
size_t read_bits = out_reg_read_data_.size();
size_t write_bits = in_write_data_.size();
CHECK_EQ(read_bits % 8U, 0)
<< "Read/write unit have to be multiple of 8 bit(bytes)";
CHECK_EQ(write_bits % 8U, 0)
<< "Read/write unit have to be multiple of 8 bit(bytes)";
read_unit_bytes_ = read_bits / 8U;
write_unit_bytes_ = write_bits / 8U;
}
// Callback at neg-edge.
void AtNegEdge() {
// reset
if (in_rst_.get_int()) {
CHECK_EQ(pending_read_.size, 0U);
CHECK_EQ(pending_write_.size, 0U);
CHECK(read_tasks_.empty());
CHECK(write_tasks_.empty());
out_reg_write_ready_.put_int(0);
out_reg_read_valid_.put_int(0);
return;
}
// read write tasks
if (in_read_dequeue_.get_int() || !out_reg_read_valid_.get_int()) {
ReadFromFIFO();
}
// update write full
if (in_write_enqueue_.get_int()) {
CHECK(out_reg_write_ready_.get_int());
WriteToFIFO();
}
if (pending_write_.size || write_tasks_.size()) {
out_reg_write_ready_.put_int(1);
} else {
out_reg_write_ready_.put_int(0);
}
// Control tasks
if (ctrl_read_req_.get_int()) {
FIFOTask tsk;
tsk.addr = reinterpret_cast<char*>(ctrl_read_addr_.get_int());
tsk.size = static_cast<size_t>(ctrl_read_size_.get_int());
read_tasks_.push(tsk);
}
// Control tasks
if (ctrl_write_req_.get_int()) {
FIFOTask tsk;
tsk.addr = reinterpret_cast<char*>(ctrl_write_addr_.get_int());
tsk.size = static_cast<size_t>(ctrl_write_size_.get_int());
write_tasks_.push(tsk);
}
}
private:
// The FIFO tasks
struct FIFOTask {
char* addr{nullptr};
size_t size{0};
};
// handle dequeue event
void ReadFromFIFO() {
if (pending_read_.size == 0) {
if (!read_tasks_.empty()) {
pending_read_ = read_tasks_.front();
read_tasks_.pop();
// translate to real memory addr
pending_read_.addr = static_cast<char*>(
device_->RealAddr(
pending_read_.addr, pending_read_.size));
}
}
if (pending_read_.size != 0) {
// The size to be read
size_t nread = std::min(pending_read_.size, read_unit_bytes_);
// Read from the data
size_t nwords = (read_unit_bytes_ + 3) / 4;
vbuf_.put_vec(out_reg_read_data_, nwords,
pending_read_.addr, nread);
// Update the pointer
pending_read_.size -= nread;
pending_read_.addr += nread;
// read into the vector
out_reg_read_valid_.put_int(1);
} else {
out_reg_read_valid_.put_int(0);
}
}
// handle write event
void WriteToFIFO() {
if (pending_write_.size == 0) {
if (!write_tasks_.empty()) {
pending_write_ = write_tasks_.front();
write_tasks_.pop();
// translate to real memory addr
pending_write_.addr = static_cast<char*>(
device_->RealAddr(
pending_write_.addr, pending_write_.size));
}
}
if (pending_write_.size != 0) {
// write to the ram.
size_t nwrite = std::min(pending_write_.size, write_unit_bytes_);
vbuf_.get_vec(in_write_data_, pending_write_.addr, nwrite);
// Update the pointer
pending_write_.size -= nwrite;
pending_write_.addr += nwrite;
}
}
// Device API
VPIDeviceAPI* device_{nullptr};
// Input clock and reset
VPIHandle in_rst_;
// Read FIFO signal
VPIHandle in_read_dequeue_;
// Write FIFO signal
VPIHandle in_write_enqueue_;
VPIHandle in_write_data_;
// Read memory controler signals
VPIHandle ctrl_read_req_;
VPIHandle ctrl_read_addr_;
VPIHandle ctrl_read_size_;
// Write memory controler signal signals
VPIHandle ctrl_write_req_;
VPIHandle ctrl_write_addr_;
VPIHandle ctrl_write_size_;
// Read FIFO outputs
VPIHandle out_reg_read_data_;
VPIHandle out_reg_read_valid_;
// Write FIFO outputs
VPIHandle out_reg_write_ready_;
// Size of current pending read.
FIFOTask pending_read_;
FIFOTask pending_write_;
// The read/write task queues.
std::queue<FIFOTask> read_tasks_;
std::queue<FIFOTask> write_tasks_;
// Unit bytes for read/writing
size_t read_unit_bytes_;
size_t write_unit_bytes_;
// Temporal buffers.
VPIVecBuffer vbuf_;
};
// Read only memory map.
class VPIMemMapBase {
public:
// Initialize the FSM.
void Init(VPIHandle module, const std::string& data_port) {
device_ = VPIDeviceAPI::Global();
// intiatialize the connections
rst_ = module["rst"];
addr_ = module["addr"];
data_ = module[data_port];
mmap_addr_ = module["mmap_addr"];
size_t unit_bits = data_.size();
CHECK_EQ(unit_bits % 8U, 0)
<< "Read/write unit have to be multiple of 8 bit(bytes)";
unit_bytes_ = unit_bits / 8U;
}
void* RealAddr() {
int byte_offset = addr_.get_int() * unit_bytes_;
void* ptr =
device_->RealAddrSafe(
reinterpret_cast<void*>(mmap_addr_.get_int() + byte_offset), 1);
return ptr;
}
protected:
// Device API
VPIDeviceAPI* device_{nullptr};
VPIHandle rst_;
VPIHandle addr_;
VPIHandle data_;
VPIHandle mmap_addr_;
size_t unit_bytes_;
VPIVecBuffer vbuf_;
};
class VPIReadMemMap : public VPIMemMapBase {
public:
void Init(VPIHandle module) {
VPIMemMapBase::Init(module, "reg_data");
}
void AtNegEdge() {
void* ptr = RealAddr();
if (ptr == nullptr) return;
size_t nwords = (unit_bytes_ + 3) / 4;
vbuf_.put_vec(data_, nwords, ptr, unit_bytes_);
}
};
// Write only memory map.
class VPIWriteMemMap : public VPIMemMapBase {
public:
void Init(VPIHandle module) {
VPIMemMapBase::Init(module, "data_in");
enable_ = module["en"];
}
void AtNegEdge() {
if (!enable_.get_int() || rst_.get_int()) return;
void* ptr = RealAddr();
CHECK(ptr != nullptr)
<< "Illegal write to VPI RAM";
vbuf_.get_vec(data_, ptr, unit_bytes_);
}
private:
VPIHandle enable_;
};
TVM_REGISTER_GLOBAL("device_api.vpi")
.set_body([](runtime::TVMArgs args, runtime::TVMRetValue* rv) {
runtime::DeviceAPI* ptr = VPIDeviceAPI::Global();
*rv = static_cast<void*>(ptr);
});
template<typename T>
void TVMVPIHook(runtime::TVMArgs args, runtime::TVMRetValue* rv) {
VPIHandle m = args[0];
std::shared_ptr<T> p = std::make_shared<T>();
p->Init(m);
LOG(INFO) << "Hook " << m.name() << " to tvm vpi simulation...";
PackedFunc pf([p](const runtime::TVMArgs&, runtime::TVMRetValue*) {
p->AtNegEdge();
});
*rv = pf;
}
TVM_REGISTER_GLOBAL("_vpi_module_tvm_vpi_mem_interface")
.set_body(TVMVPIHook<VPIMemoryInterface>);
TVM_REGISTER_GLOBAL("_vpi_module_tvm_vpi_read_mmap")
.set_body(TVMVPIHook<VPIReadMemMap>);
TVM_REGISTER_GLOBAL("_vpi_module_tvm_vpi_write_mmap")
.set_body(TVMVPIHook<VPIWriteMemMap>);
} // namespace codegen
} // namespace tvm
/*!
* Copyright (c) 2017 by Contributors
* \file vpi_session.cc
* \brief IPC session call to verilog simulator via VPI.
*/
#include <tvm/api_registry.h>
#include <memory>
#include "vpi_session.h"
namespace tvm {
namespace codegen {
using namespace vpi;
// helper class to get the node.
class VPISessionEntry {
public:
// Whether in control.
bool in_control{false};
// Internal reader and writer.
common::Pipe reader;
common::Pipe writer;
// internal constructor
VPISessionEntry(int h_pipe_read, int h_pipe_write)
: reader(h_pipe_read), writer(h_pipe_write) {
}
~VPISessionEntry() {
if (in_control) {
VPIReturnCode cd;
writer.Write(kShutDown);
reader.Read(&cd);
}
reader.Close();
writer.Close();
}
void ReadExpect(VPIReturnCode rcode) {
VPIReturnCode code;
CHECK(reader.Read(&code));
CHECK_EQ(code, rcode) << "Error in simulation";
}
};
// Inline implementations
inline VPISessionNode* VPISession::get() const {
return static_cast<VPISessionNode*>(node_.get());
}
inline VPIHandleNode* VPIHandle::get() const {
return static_cast<VPIHandleNode*>(node_.get());
}
VPIHandle VPIHandleCreate(
const std::shared_ptr<VPISessionEntry>& sess,
VPIRawHandle handle) {
auto n = make_node<VPIHandleNode>();
n->sess = sess;
n->handle = handle;
return VPIHandle(n);
}
VPIHandle GetHandleByName(
const std::shared_ptr<VPISessionEntry>& sess,
const std::string& name,
VPIRawHandle handle,
bool allow_undefined) {
VPISessionEntry* n = sess.get();
CHECK(n->in_control);
n->writer.Write(kGetHandleByName);
n->writer.Write(name);
n->writer.Write(handle);
n->ReadExpect(kSuccess);
CHECK(n->reader.Read(&handle));
if (handle != nullptr) {
return VPIHandleCreate(sess, handle);
} else {
CHECK(allow_undefined)
<< "Cannot find handle with name=" << name;
return VPIHandle();
}
}
std::string VPIGetStrProp(VPIHandleNode* h, int code) {
VPISessionEntry* n = h->sess.get();
CHECK(n->in_control);
n->writer.Write(kGetStrProp);
n->writer.Write(code);
n->writer.Write(h->handle);
n->ReadExpect(kSuccess);
std::string str;
CHECK(n->reader.Read(&str));
return str;
}
int VPIGetIntProp(VPIHandleNode* h, int code) {
VPISessionEntry* n = h->sess.get();
CHECK(n->in_control);
n->writer.Write(kGetIntProp);
n->writer.Write(code);
n->writer.Write(h->handle);
n->ReadExpect(kSuccess);
int value;
CHECK(n->reader.Read(&value));
return value;
}
VPISession VPISession::make(int h_pipe_read, int h_pipe_write) {
auto n = make_node<VPISessionNode>();
n->sess = std::make_shared<VPISessionEntry>(h_pipe_read, h_pipe_write);
n->sess->in_control = true;
VPISession sess(n);
// The custom module handles
std::vector<VPIRawHandle> mod_handles;
n->sess->reader.Read(&mod_handles);
n->sess->ReadExpect(kPosEdgeTrigger);
// start Initialize the callbacks
for (VPIRawHandle raw_h : mod_handles) {
VPIHandle h = VPIHandleCreate(n->sess, raw_h);
CHECK_EQ(VPIGetIntProp(h.get(), kVPIType), kVPIModule)
<< "Expect pass modules to $tvm_session after clk";
std::string def = VPIGetStrProp(h.get(), kVPIDefName);
std::string callback_name = "_vpi_module_" + def;
const PackedFunc* f = runtime::Registry::Get(callback_name);
CHECK(f != nullptr)
<< "Cannot find definition for tvm vpi module " << def;
PackedFunc cb = (*f)(h);
n->posedge_end_callbacks.push_back(cb);
}
return sess;
}
VPIHandle VPISession::operator[](const std::string& name) const {
return GetHandleByName(get()->sess, name, nullptr, false);
}
VPIHandle VPISession::GetByName(const std::string& name,
bool allow_undefined) const {
return GetHandleByName(get()->sess, name, nullptr, true);
}
void VPISession::yield() {
VPISessionEntry* n = get()->sess.get();
CHECK(n->in_control);
for (const PackedFunc& f : get()->posedge_end_callbacks) {
f();
}
n->writer.Write(kYield);
n->ReadExpect(kSuccess);
n->in_control = false;
n->ReadExpect(kPosEdgeTrigger);
n->in_control = true;
}
void VPISession::shutdown() {
VPISessionEntry* n = get()->sess.get();
if (n->in_control) {
n->writer.Write(kShutDown);
n->ReadExpect(kSuccess);
n->in_control = false;
}
}
int VPIHandle::size() const {
return VPIGetIntProp(get(), kVPISize);
}
void VPIHandle::put_int(int value) {
VPIHandleNode* h = get();
VPISessionEntry* n = h->sess.get();
CHECK(n->in_control);
n->writer.Write(kPutInt32);
n->writer.Write(h->handle);
n->writer.Write(value);
n->ReadExpect(kSuccess);
}
int VPIHandle::get_int() const {
VPIHandleNode* h = get();
VPISessionEntry* n = h->sess.get();
CHECK(n->in_control);
n->writer.Write(kGetInt32);
n->writer.Write(h->handle);
n->ReadExpect(kSuccess);
int value;
CHECK(n->reader.Read(&value));
return value;
}
std::string VPIHandle::name() const {
return VPIGetStrProp(get(), kVPIFullName);
}
void VPIHandle::put_vec(const std::vector<VPIVecVal>& vec) const {
VPIHandleNode* h = get();
VPISessionEntry* n = h->sess.get();
CHECK(n->in_control);
n->writer.Write(kPutVec);
n->writer.Write(h->handle);
n->writer.Write(vec);
n->ReadExpect(kSuccess);
}
void VPIHandle::get_vec(std::vector<VPIVecVal>* vec) const {
VPIHandleNode* h = get();
VPISessionEntry* n = h->sess.get();
CHECK(n->in_control);
n->writer.Write(kGetVec);
n->writer.Write(h->handle);
n->ReadExpect(kSuccess);
CHECK(n->reader.Read(vec));
}
VPIHandle VPIHandle::operator[](const std::string& name) const {
VPIHandleNode* h = get();
return GetHandleByName(h->sess, name, h->handle, false);
}
// API registration
TVM_REGISTER_API("_vpi_SessMake")
.set_body([](TVMArgs args, TVMRetValue *ret) {
*ret = VPISession::make(args[0], args[1]);
});
TVM_REGISTER_API("_vpi_SessGetHandleByName")
.set_body([](TVMArgs args, TVMRetValue *ret) {
*ret = args[0].operator VPISession().operator[](args[1]);
});
TVM_REGISTER_API("_vpi_SessYield")
.set_body([](TVMArgs args, TVMRetValue *ret) {
args[0].operator VPISession().yield();
});
TVM_REGISTER_API("_vpi_SessShutdown")
.set_body([](TVMArgs args, TVMRetValue *ret) {
args[0].operator VPISession().shutdown();
});
TVM_REGISTER_API("_vpi_HandlePutInt")
.set_body([](TVMArgs args, TVMRetValue *ret) {
args[0].operator VPIHandle().put_int(args[1]);
});
TVM_REGISTER_API("_vpi_HandleGetInt")
.set_body([](TVMArgs args, TVMRetValue *ret) {
*ret = args[0].operator VPIHandle().get_int();
});
TVM_REGISTER_API("_vpi_HandleGetName")
.set_body([](TVMArgs args, TVMRetValue *ret) {
*ret = args[0].operator VPIHandle().name();
});
TVM_REGISTER_API("_vpi_HandleGetSize")
.set_body([](TVMArgs args, TVMRetValue *ret) {
*ret = args[0].operator VPIHandle().size();
});
TVM_REGISTER_API("_vpi_HandleGetHandleByName")
.set_body([](TVMArgs args, TVMRetValue *ret) {
*ret = args[0].operator VPIHandle().operator[](args[1]);
});
} // namespace codegen
} // namespace tvm
/*!
* Copyright (c) 2017 by Contributors
* \file vpi_session.h
* \brief IPC session call to verilog simulator via VPI.
*/
#ifndef TVM_CODEGEN_VERILOG_VPI_SESSION_H_
#define TVM_CODEGEN_VERILOG_VPI_SESSION_H_
#include <tvm/base.h>
#include <vector>
#include <string>
#include <memory>
#include "../../common/pipe.h"
#include "../../../verilog/tvm_vpi.h"
namespace tvm {
namespace codegen {
// node containers
class VPISessionNode;
class VPIHandleNode;
class VPIHandle;
class VPISessionEntry;
using runtime::PackedFunc;
/*! \brief Environment */
class VPISession : public NodeRef {
public:
VPISession() {}
explicit VPISession(NodePtr<Node> n) : NodeRef(n) {}
/*!
* \brief Get handle by name.
* \param name The name of the handle.
*/
VPIHandle operator[](const std::string& name) const;
/*!
* \brief Get handle by name.
* \param name The name of the handle.
* \param allow_undefined whether allow undefined
*/
VPIHandle GetByName(const std::string& name, bool allow_undefined) const;
/*!
* \brief Yield control back to the simulator
* Block until next cycle.
*/
void yield();
/*!
* \brief Shutdown the session.
*/
void shutdown();
/*!
* \brief Create new session by giving a read and write pipe to VPI process.
* \param h_pipe_read a read pipe from VPI process.
* \param h_pipe_write a write pipe from VPI process.
*/
static VPISession make(int h_pipe_read, int h_pipe_write);
// Internal methods.
using ContainerType = VPISessionNode;
inline VPISessionNode* get() const;
};
/*! \brief VPI Handle */
class VPIHandle : public NodeRef {
public:
VPIHandle() {}
explicit VPIHandle(NodePtr<Node> n) : NodeRef(n) {}
/*!
* \brief Get handle by name.
* \param name The name of the handle.
*/
VPIHandle operator[](const std::string& name) const;
/*! \return number of bits */
int size() const;
/*!
* \brief Set int value to the handle.
* \param value The value to set.
*/
void put_int(int value);
/*!
* \brief Get int value from handle.
* \return The result int value.
*/
int get_int() const;
/*! \return Name of the handle. */
std::string name() const;
/*!
* \brief Put byte vector into the handle.
* \param vec The vector to be put.
* \return The result int value.
*/
void put_vec(const std::vector<vpi::VPIVecVal>& vec) const;
/*!
* \brief Get byte vector from handle.
* \param vec The result data container.
*/
void get_vec(std::vector<vpi::VPIVecVal>* vec) const;
// Internal methods
using ContainerType = VPIHandleNode;
inline VPIHandleNode* get() const;
};
/*! \brief Container for session. */
class VPISessionNode : public Node {
public:
// internal session.
std::shared_ptr<VPISessionEntry> sess;
// callbacks at pos edge end.
std::vector<PackedFunc> posedge_end_callbacks;
// visit all attributes
void VisitAttrs(AttrVisitor* v) final {
}
static constexpr const char* _type_key = "VPISession";
TVM_DECLARE_NODE_TYPE_INFO(VPISessionNode, Node);
};
/*! \brief Container for handle */
class VPIHandleNode : public Node {
public:
// internal session.
std::shared_ptr<VPISessionEntry> sess;
// Internal handle
vpi::VPIRawHandle handle;
void VisitAttrs(AttrVisitor* v) final {
}
static constexpr const char* _type_key = "VPIHandle";
TVM_DECLARE_NODE_TYPE_INFO(VPIHandleNode, Node);
};
} // namespace codegen
} // namespace tvm
#endif // TVM_CODEGEN_VERILOG_VPI_SESSION_H_
import tvm
from tvm.contrib import verilog
import numpy as np
def lower(s, args, name):
binds = {}
arg_list = []
for x in args:
assert isinstance(x, tvm.tensor.Tensor)
buf = tvm.decl_buffer(x.shape, dtype=x.dtype, name=x.op.name)
binds[x] = buf
arg_list.append(buf)
s = s.normalize()
bounds = tvm.schedule.InferBound(s)
stmt = tvm.schedule.ScheduleOps(s, bounds)
stmt = tvm.ir_pass.StorageFlatten(stmt, binds, 64)
stmt = tvm.ir_pass.CanonicalSimplify(stmt)
stmt = tvm.ir_pass.Simplify(stmt)
stmt = tvm.ir_pass.SplitPipeline(stmt, True)
fapi = tvm.ir_pass.MakeAPI(stmt, name, arg_list, 0, True)
return fapi
@tvm.register_func
def tvm_callback_verilog_postproc(code):
"""Hook to inspect the verilog code before actually run it"""
print(code)
return code
def test_add_pipeline():
nn = 128
n = tvm.convert(nn)
A = tvm.placeholder((n,), name='A', dtype='int32')
B = tvm.placeholder((n,), name='B', dtype='int32')
C = tvm.compute(A.shape, lambda i: A[i] + B[i], name='C')
s = tvm.create_schedule(C.op)
px, x = s[C].split(C.op.axis[0], nparts=1)
s[C].bind(px, tvm.thread_axis("pipeline"))
fapi = lower(s, [A, B, C], "myadd")
fsplits = [x for x in tvm.ir_pass.SplitHostDevice(fapi)]
fsplits[0] = tvm.ir_pass.LowerTVMBuiltin(fsplits[0])
print("------")
def check_target(device, host="stackvm"):
if not tvm.module.enabled(host):
return
if not tvm.module.enabled(device):
return
ctx = tvm.vpi(0)
mhost = tvm.codegen.build_module(fsplits[0], host)
mdev = tvm.codegen.build_module(fsplits[1:], device)
mhost.import_module(mdev)
code = mdev.get_source()
f = mhost.entry_func
# launch the kernel.
n = nn
a = tvm.nd.array((np.random.uniform(size=n) * 128).astype(A.dtype), ctx)
b = tvm.nd.array((np.random.uniform(size=n) * 128).astype(A.dtype), ctx)
c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
f(a, b, c)
print("Check correctness...")
tvm.testing.assert_allclose(
c.asnumpy(), a.asnumpy() + b.asnumpy())
check_target("verilog")
if __name__ == "__main__":
test_add_pipeline()
import tvm
import numpy as np
from tvm.contrib import verilog
def test_buffer_doublebuff():
# Test the tvm_buffer.v module as a double buffer
# Window size is 16, buffer size is 32
window_width = 16
set_size = 8
# Find file will search root/verilog and root/tests/verilog
sess = verilog.session([
verilog.find_file("test_buffer_doublebuff.v"),
verilog.find_file("tvm_buffer.v")
])
# Get the handles by their names
rst = sess.main.rst
write_advance = sess.main.write_advance
write_addr = sess.main.write_addr
write_valid = sess.main.write_valid
write_ready = sess.main.write_ready
write_data = sess.main.write_data
read_data = sess.main.read_data
read_data_valid = sess.main.read_data_valid
# Simulation input data
test_data = np.arange(window_width*set_size).astype('int8')
# Initial state
rst.put_int(1)
write_advance.put_int(0)
write_addr.put_int(0)
write_valid.put_int(0)
write_data.put_int(0)
# De-assert reset
sess.yield_until_next_cycle()
rst.put_int(0)
# Leave the following signals set to true
sess.yield_until_next_cycle()
write_valid.put_int(1)
# Main simulation loop
write_idx = 0
read_idx = 0
while read_idx < len(test_data):
# write logic
if (write_idx < len(test_data)):
write_advance.put_int(0)
if (write_ready.get_int()):
write_data.put_int(int(test_data[write_idx]))
write_addr.put_int(write_idx % window_width)
if (write_idx%window_width==window_width-1):
write_advance.put_int(1)
write_idx += 1
else:
write_advance.put_int(0)
write_valid.put_int(0)
# correctness checks
if (read_data_valid.get_int()):
assert(read_data.get_int()==test_data[read_idx])
# print "{} {}".format(read_data.get_int(), test_data[read_idx])
read_idx += 1
# step
sess.yield_until_next_cycle()
if __name__ == "__main__":
test_buffer_doublebuff()
module main();
// Parameters
parameter PER=10;
// Double buffer parameters
parameter DATA_WIDTH = 8;
parameter DEPTH = 32;
parameter CNTR_WIDTH = 6; // floor(log(32)) + 1
parameter RD_WINDOW = 16;
parameter RD_ADVANCE = 16;
parameter RD_ADDR_WIDTH = 5; // floor(log(16)) + 1
parameter WR_WINDOW = 16;
parameter WR_ADVANCE = 16;
parameter WR_ADDR_WIDTH = 5; // floor(log(16)) + 1
// Clock & reset
reg clk;
reg rst;
// Read port inputs
reg read_advance;
reg [RD_ADDR_WIDTH-1:0] read_addr;
reg read_ready;
// Write port outputs
reg write_advance;
reg [DATA_WIDTH-1:0] write_data;
reg [WR_ADDR_WIDTH-1:0] write_addr;
reg write_valid;
// Outputs
wire [DATA_WIDTH-1:0] read_data;
wire read_valid;
wire write_ready;
wire [CNTR_WIDTH-1:0] status_counter;
// Module instantiation
tvm_buffer #(
.DATA_WIDTH(DATA_WIDTH),
.DEPTH(DEPTH),
.CNTR_WIDTH(CNTR_WIDTH),
.RD_WINDOW(RD_WINDOW),
.RD_ADVANCE(RD_ADVANCE),
.RD_ADDR_WIDTH(RD_ADDR_WIDTH),
.WR_WINDOW(WR_WINDOW),
.WR_ADVANCE(WR_ADVANCE),
.WR_ADDR_WIDTH(WR_ADDR_WIDTH)
) uut (
.clk(clk),
.rst(rst),
.read_advance(read_advance),
.read_data(read_data),
.read_addr(read_addr),
.read_ready(read_ready),
.read_valid(read_valid),
.write_advance(write_advance),
.write_data(write_data),
.write_addr(write_addr),
.write_ready(write_ready),
.write_valid(write_valid),
.status_counter(status_counter)
);
// clock generation
always begin
#(PER/2) clk =~ clk;
end
// read logic
always @(posedge clk) begin
if (rst) begin
read_advance <= 0;
read_addr <= 0;
read_ready <= 0;
end else begin
if (read_valid) begin
read_ready <= 1;
end else begin
read_ready <= 0;
end
if (read_addr%RD_WINDOW==RD_WINDOW-2) begin
read_advance <= 1;
end else begin
read_advance <= 0;
end
if (read_ready) begin
read_addr <= (read_addr+1) % WR_WINDOW;
end else begin
read_addr <= read_addr % WR_WINDOW;
end
end
end
// read_data_valid logic
reg read_data_valid;
always @(posedge clk) begin
if (rst)
read_data_valid <= 0;
else
read_data_valid <= read_ready;
end
initial begin
// This will allow tvm session to be called every cycle.
$tvm_session(clk);
end
endmodule
import tvm
import numpy as np
from tvm.contrib import verilog
def test_buffer_fifo():
# Test the tvm_buffer.v module as a fifo
# Find file will search root/verilog and root/tests/verilog
sess = verilog.session([
verilog.find_file("test_buffer_fifo.v"),
verilog.find_file("tvm_buffer.v")
])
# Get the handles by their names
rst = sess.main.rst
enq = sess.main.enq
write_data = sess.main.write_data
read_data = sess.main.read_data
read_data_valid = sess.main.read_data_valid
# Simulation input data
test_data = np.arange(16).astype('int8')
# Initial state
rst.put_int(1)
enq.put_int(0)
write_data.put_int(0)
# De-assert reset
sess.yield_until_next_cycle()
rst.put_int(0)
# Main simulation loop
read_idx = 0
write_idx = 0
while read_idx < len(test_data):
# write logic
if (write_idx < len(test_data)):
enq.put_int(1)
write_data.put_int(write_idx)
write_idx += 1
else:
enq.put_int(0)
# read logic
if (read_data_valid.get_int()):
assert(read_data.get_int()==test_data[read_idx])
read_idx += 1
# step
sess.yield_until_next_cycle()
if __name__ == "__main__":
test_buffer_fifo()
module main();
// Parameters
parameter PER=10;
// FIFO parameters
parameter DATA_WIDTH = 8;
parameter DEPTH = 32;
parameter CNTR_WIDTH = 6; // floor(log(32)) + 1
parameter RD_WINDOW = 1;
parameter RD_ADVANCE = 1;
parameter RD_ADDR_WIDTH = 1;
parameter WR_WINDOW = 1;
parameter WR_ADVANCE = 1;
parameter WR_ADDR_WIDTH = 1;
// Clock & reset
reg clk;
reg rst;
// Module inputs
reg [DATA_WIDTH-1:0] write_data;
// FIFO interface abstraction:
// Connect deq to read_advance and read_ready
// Connect enq to write_advance and write_valid
// Set read_addr and write_addr to 0
reg deq;
reg enq;
// Module outputs
wire [DATA_WIDTH-1:0] read_data;
wire read_valid;
wire write_ready;
wire [CNTR_WIDTH-1:0] status_counter;
// Module instantiation
tvm_buffer #(
.DATA_WIDTH(DATA_WIDTH),
.DEPTH(DEPTH),
.CNTR_WIDTH(CNTR_WIDTH),
.RD_WINDOW(RD_WINDOW),
.RD_ADVANCE(RD_ADVANCE),
.RD_ADDR_WIDTH(RD_ADDR_WIDTH),
.WR_WINDOW(WR_WINDOW),
.WR_ADVANCE(WR_ADVANCE),
.WR_ADDR_WIDTH(WR_ADDR_WIDTH)
) uut (
.clk(clk),
.rst(rst),
.read_advance(deq),
.read_addr({RD_ADDR_WIDTH{1'b0}}),
.read_ready(deq),
.read_valid(read_valid),
.read_data(read_data),
.write_advance(enq),
.write_addr({WR_ADDR_WIDTH{1'b0}}),
.write_ready(write_ready),
.write_valid(enq),
.write_data(write_data),
.status_counter(status_counter)
);
// clock generation
always begin
#(PER/2) clk =~ clk;
end
// fifo read logic
always @(posedge clk) begin
if (rst)
deq <= 0;
else
deq <= read_valid;
end
// read_data_valid logic
reg read_data_valid;
always @(posedge clk) begin
if (rst)
read_data_valid <= 0;
else
read_data_valid <= deq;
end
initial begin
// This will allow tvm session to be called every cycle.
$tvm_session(clk);
end
endmodule
import tvm
import numpy as np
from tvm.contrib import verilog
def test_buffer_linebuff():
# Test the tvm_buffer.v module as a line buffer
# Window is 8x8, kernel is 3x3
window_width = 8
kernel_width = 3
# Find file will search root/verilog and root/tests/verilog
sess = verilog.session([
verilog.find_file("test_buffer_linebuff.v"),
verilog.find_file("tvm_buffer.v")
])
# Get the handles by their names
rst = sess.main.rst
write_advance = sess.main.write_advance
write_valid = sess.main.write_valid
write_ready = sess.main.write_ready
write_data = sess.main.write_data
read_data = sess.main.read_data
read_data_valid = sess.main.read_data_valid
# Simulation input data
test_data = np.arange(window_width*window_width).astype('int8')
# Initial state
rst.put_int(1)
write_advance.put_int(0)
write_valid.put_int(0)
write_data.put_int(0)
# De-assert reset
sess.yield_until_next_cycle()
rst.put_int(0)
# Leave the following signals set to true
sess.yield_until_next_cycle()
write_advance.put_int(1)
write_valid.put_int(1)
# Main simulation loop
write_idx = 0
read_idx = 0
while read_idx < (window_width-kernel_width+1)*(window_width-kernel_width+1)*kernel_width*kernel_width:
# write logic
if (write_idx < len(test_data)):
if (write_ready.get_int()):
write_data.put_int(int(test_data[write_idx]))
write_idx += 1
else:
write_advance.put_int(0)
write_valid.put_int(0)
# correctness checks
if (read_data_valid.get_int()):
# Derive convolution window indices
baseIdx = read_idx // (kernel_width*kernel_width)
offsetIdx = read_idx % (kernel_width*kernel_width)
yOffset = offsetIdx // kernel_width
xOffset = offsetIdx%kernel_width
pixIndex = baseIdx + yOffset * window_width + xOffset
assert(read_data.get_int()==test_data[pixIndex])
# print "{} {}".format(read_data.get_int(), test_data[pixIndex])
read_idx += 1
# step
sess.yield_until_next_cycle()
if __name__ == "__main__":
test_buffer_linebuff()
module main();
// Parameters
parameter PER=10;
// In this example we perform a 3x3 convolution of an 8x8 input image
// Therefore the window size here is (3-1)*8+3 = 19
parameter IMAGE_WIDTH = 8;
parameter KERNEL_WIDTH = 3;
// Line buffer parameters
parameter DATA_WIDTH = 8;
parameter DEPTH = 20; // (3-1)*8+3+1
parameter CNTR_WIDTH = 5; // floor(log(20)) + 1
parameter RD_WINDOW = 19; // (3-1)*8+3
parameter RD_ADVANCE = 1;
parameter RD_ADDR_WIDTH = 5; // floor(log(19)) + 1
parameter WR_WINDOW = 1;
parameter WR_ADVANCE = 1;
parameter WR_ADDR_WIDTH = 1;
// Clock & reset
reg clk;
reg rst;
// Read port inputs
reg read_advance;
reg [RD_ADDR_WIDTH-1:0] read_addr;
reg read_ready;
// Write port outputs
reg write_advance;
reg [DATA_WIDTH-1:0] write_data;
reg write_valid;
// Outputs
wire [DATA_WIDTH-1:0] read_data;
wire read_valid;
wire write_ready;
wire [CNTR_WIDTH-1:0] status_counter;
// Module instantiation
tvm_buffer #(
.DATA_WIDTH(DATA_WIDTH),
.DEPTH(DEPTH),
.CNTR_WIDTH(CNTR_WIDTH),
.RD_WINDOW(RD_WINDOW),
.RD_ADVANCE(RD_ADVANCE),
.RD_ADDR_WIDTH(RD_ADDR_WIDTH),
.WR_WINDOW(WR_WINDOW),
.WR_ADVANCE(WR_ADVANCE),
.WR_ADDR_WIDTH(WR_ADDR_WIDTH)
) uut (
.clk(clk),
.rst(rst),
.read_advance(read_advance),
.read_data(read_data),
.read_addr(read_addr),
.read_ready(read_ready),
.read_valid(read_valid),
.write_advance(write_advance),
.write_data(write_data),
.write_addr({WR_ADDR_WIDTH{1'b0}}),
.write_ready(write_ready),
.write_valid(write_valid),
.status_counter(status_counter)
);
// clock generation
always begin
#(PER/2) clk =~ clk;
end
// read logic
localparam KERNEL_SIZE = KERNEL_WIDTH*KERNEL_WIDTH;
reg [3:0] read_counter;
always @(posedge clk) begin
if (rst) begin
read_counter <= KERNEL_SIZE-1;
read_advance <= 0;
read_addr <= -1;
read_ready <= 0;
end else begin
if (read_valid) begin
read_counter <= (read_counter+1)%KERNEL_SIZE;
read_ready <= 1;
// Only advance at the last inner loop iteration
if (read_counter==KERNEL_SIZE-2) begin
read_advance <= 1;
end else begin
read_advance <= 0;
end
// Read address should describe a loop
if (read_counter==KERNEL_SIZE-1) begin
read_addr <= 0;
end else if (read_counter%KERNEL_WIDTH==KERNEL_WIDTH-1) begin
read_addr <= read_addr+IMAGE_WIDTH-KERNEL_WIDTH+1;
end else begin
read_addr <= read_addr+1;
end
end else begin
read_counter <= read_counter;
read_advance <= 0;
read_addr <= read_addr;
read_ready <= 0;
end
end
end
// read_data_valid logic
reg read_data_valid;
always @(posedge clk) begin
if (rst)
read_data_valid <= 0;
else
read_data_valid <= read_ready;
end
initial begin
// This will allow tvm session to be called every cycle.
$tvm_session(clk);
end
endmodule
import tvm
from tvm.contrib import verilog
from testing_util import FIFODelayedWriter, FIFODelayedReader
def run_with_lag(n, read_lag, write_lag):
data = list(range(n))
# head ptr of a
sess = verilog.session([
verilog.find_file("test_cache_reg.v")
])
rst = sess.main.rst
in_data = sess.main.in_data
in_valid = sess.main.in_valid
in_ready = sess.main.in_ready
out_data = sess.main.out_data
out_valid = sess.main.out_valid
out_ready = sess.main.out_ready
# hook up reader
reader = FIFODelayedReader(out_data, out_valid, out_ready, read_lag)
writer = FIFODelayedWriter(in_data, in_valid, in_ready, data, write_lag)
rst.put_int(1)
sess.yield_until_next_cycle()
rst.put_int(0)
sess.yield_until_next_cycle()
sess.yield_callbacks.append(reader)
sess.yield_callbacks.append(writer)
timeout = sum(read_lag) + sum(write_lag) + n + 10
for t in range(timeout):
sess.yield_until_next_cycle()
if len(reader.data) == n:
break
assert tuple(reader.data) == tuple(range(n))
assert len(writer.data) == 0
sess.shutdown()
def test_fifo():
n = 20
# slow reader
run_with_lag(n, read_lag=[3,4,8], write_lag=[])
# slow writer
run_with_lag(n, read_lag=[0], write_lag=[0, 2, 10])
# mix
run_with_lag(n, read_lag=[3, 4, 8], write_lag=[0, 2, 10])
if __name__ == "__main__":
test_fifo()
`include "tvm_marcos.v"
module main();
`TVM_DEFINE_TEST_SIGNAL(clk, rst)
reg[31:0] in_data;
wire[31:0] out_data;
wire in_ready;
reg in_valid;
reg out_ready;
wire out_valid;
`CACHE_REG(32, in_data, in_valid, in_ready,
out_data, out_valid, out_ready)
initial begin
// This will allow tvm session to be called every cycle.
$tvm_session(clk);
end
endmodule
import tvm
from tvm.contrib import verilog
def test_counter():
# Start a new session by run simulation on test_counter.v
# Find file will search root/verilog and root/tests/verilog
sess = verilog.session([
verilog.find_file("test_counter.v"),
verilog.find_file("example_counter.v")
])
# Get the handles by their names
rst = sess.main.rst
counter = sess.main.counter
cnt = sess.main["counter_unit1"]
assert(counter.name == "main.counter")
assert(counter.size == 4)
rst.put_int(1)
# This will advance the cycle to next pos-edge of clk.
sess.yield_until_next_cycle()
rst.put_int(0)
sess.yield_until_next_cycle()
for i in range(10):
# get value of counter.
assert(counter.get_int() == i)
sess.yield_until_next_cycle()
def test_scratch():
sess = verilog.session([
verilog.find_file("test_counter.v"),
verilog.find_file("example_counter.v")
])
# Get the handles by their names
rst = sess.main.rst
counter = sess.main.counter
rst.put_int(1)
# This will advance the cycle to next pos-edge of clk.
sess.yield_until_next_cycle()
rst.put_int(0)
temp = 0
for i in range(10):
if rst.get_int():
rst.put_int(0)
temp = counter.get_int()
elif counter.get_int() == 3:
rst.put_int(1)
print("counter=%d, temp=%d" % (counter.get_int(), temp))
sess.yield_until_next_cycle()
if __name__ == "__main__":
test_scratch()
test_counter()
`include "tvm_marcos.v"
module main();
`TVM_DEFINE_TEST_SIGNAL(clk, rst)
wire[3:0] counter;
counter counter_unit1(.clk(clk), .rst(rst), .out(counter));
initial begin
// This will allow tvm session to be called every cycle.
$tvm_session(clk);
end
endmodule
import tvm
from tvm.contrib import verilog
def test_loop():
sess = verilog.session([
verilog.find_file("test_loop.v")
])
# Get the handles by their names
rst = sess.main.rst
iter0 = sess.main.iter0
iter1 = sess.main.iter1
ready = sess.main.ready
rst.put_int(1)
ready.put_int(1)
# This will advance the cycle to next pos-edge of clk.
sess.yield_until_next_cycle()
rst.put_int(0)
sess.yield_until_next_cycle()
for k in range(0, 1):
for i in range(0, 3):
for j in range(0, 4):
assert(iter1.get_int() == i)
assert(iter0.get_int() == j)
sess.yield_until_next_cycle()
if __name__ == "__main__":
test_loop()
`include "tvm_marcos.v"
module main();
`TVM_DEFINE_TEST_SIGNAL(clk, rst)
reg ready;
wire lp_ready;
`NONSTOP_LOOP(iter0, 4, 0, lp_ready, iter0_finish, 0, 4)
`NONSTOP_LOOP(iter1, 4, 0, iter0_finish, iter1_finish, 0, 3)
`WRAP_LOOP_ONCE(0, valid, ready, iter1_finish, loop_ready)
assign lp_ready = loop_ready;
initial begin
// This will allow tvm session to be called every cycle.
$tvm_session(clk);
end
endmodule
import tvm
import numpy as np
from tvm.contrib import verilog
class FIFOReader(object):
"""Auxiliary class to read from FIFO """
def __init__(self, read_data, read_valid):
self.read_data = read_data
self.read_valid = read_valid
self.data = []
def __call__(self):
if self.read_valid.get_int():
self.data.append(self.read_data.get_int())
class FIFOWriter(object):
"""Auxiliary class to write to FIFO """
def __init__(self, write_data, write_enable, write_pend, data):
self.write_data = write_data
self.write_enable = write_enable
self.write_pend = write_pend
self.data = data
def __call__(self):
if self.data and self.write_pend.get_int():
self.write_enable.put_int(1)
self.write_data.put_int(int(self.data[0]))
del self.data[0]
else:
self.write_enable.put_int(0)
def test_ram_read():
n = 10
# context for VPI RAM
ctx = tvm.vpi(0)
a_np = np.arange(n).astype('int8')
a = tvm.nd.array(a_np, ctx)
# head ptr of a
a_ptr = int(a.handle[0].data)
sess = verilog.session([
verilog.find_file("test_vpi_mem_interface.v"),
verilog.find_file("tvm_vpi_mem_interface.v")
])
rst = sess.main.rst
read_data = sess.main.read_data
read_valid = sess.main.read_data_valid
read_en = sess.main.read_en
host_read_req = sess.main.read_req
host_read_addr = sess.main.read_addr
host_read_size = sess.main.read_size
rst.put_int(1)
sess.yield_until_next_cycle()
rst.put_int(0)
# hook up reader
reader = FIFOReader(read_data, read_valid)
sess.yield_callbacks.append(reader)
# request read
host_read_req.put_int(1)
host_read_addr.put_int(a_ptr)
host_read_size.put_int(a.shape[0])
sess.yield_until_next_cycle()
# second read request
host_read_addr.put_int(a_ptr + 2)
host_read_size.put_int(a.shape[0] - 2)
sess.yield_until_next_cycle()
host_read_req.put_int(0)
read_en.put_int(1)
# yield until read is done
for i in range(a.shape[0] * 3):
sess.yield_until_next_cycle()
sess.shutdown()
# check if result matches
r = np.concatenate((a_np, a_np[2:]))
np.testing.assert_equal(np.array(reader.data), r)
def test_ram_write():
n = 10
# read from offset
offset = 2
# context for VPI RAM
ctx = tvm.vpi(0)
a_np = np.zeros(n).astype('int8')
a = tvm.nd.array(a_np, ctx)
w_data = list(range(2, n))
r_data = np.array(w_data, dtype='int8')
# head ptr of a
a_ptr = int(a.handle[0].data)
sess = verilog.session([
verilog.find_file("test_vpi_mem_interface.v"),
verilog.find_file("tvm_vpi_mem_interface.v")
])
rst = sess.main.rst
write_data = sess.main.write_data
write_en = sess.main.write_en
write_ready = sess.main.write_data_ready
host_write_req = sess.main.write_req
host_write_addr = sess.main.write_addr
host_write_size = sess.main.write_size
rst.put_int(1)
sess.yield_until_next_cycle()
rst.put_int(0)
# hook up writeer
writer = FIFOWriter(write_data, write_en, write_ready, w_data)
sess.yield_callbacks.append(writer)
# request write
host_write_req.put_int(1)
host_write_addr.put_int(a_ptr + offset)
host_write_size.put_int(a.shape[0] - offset)
sess.yield_until_next_cycle()
host_write_req.put_int(0)
# yield until write is done
for i in range(a.shape[0]+2):
sess.yield_until_next_cycle()
sess.shutdown()
# check if result matches
np.testing.assert_equal(a.asnumpy()[2:], r_data)
if __name__ == "__main__":
test_ram_read()
test_ram_write()
module main();
parameter PER = 10;
parameter WIDTH = 8;
reg clk;
reg rst;
// read channels
reg read_en;
wire [WIDTH-1:0] read_data;
wire read_data_valid;
// write channels
reg write_en;
reg [WIDTH-1:0] write_data;
wire write_data_ready;
// controls
reg read_req;
reg [31:0] read_addr;
reg [31:0] read_size;
reg write_req;
reg [31:0] write_addr;
reg [31:0] write_size;
always begin
#(PER/2) clk =~ clk;
end
tvm_vpi_mem_interface #
(
.READ_WIDTH(WIDTH),
.WRITE_WIDTH(WIDTH),
.ADDR_WIDTH(32),
.SIZE_WIDTH(32)
)
mem
(
.clk(clk),
.rst(rst),
.read_en(read_en),
.read_data_out(read_data),
.read_data_valid(read_data_valid),
.write_en(write_en),
.write_data_in(write_data),
.write_data_ready(write_data_ready),
.host_read_req(read_req),
.host_read_addr(read_addr),
.host_read_size(read_size),
.host_write_req(write_req),
.host_write_addr(write_addr),
.host_write_size(write_size)
);
initial begin
// pass myram to session to hook it up with simulation
$tvm_session(clk, mem);
end
endmodule
import tvm
import numpy as np
from tvm.contrib import verilog
def test_mmap():
n = 10
# context for VPI RAM
ctx = tvm.vpi(0)
a_np = np.arange(n).astype('int8')
a = tvm.nd.array(a_np, ctx)
# head ptr of a
a_ptr = int(a.handle[0].data)
sess = verilog.session([
verilog.find_file("test_vpi_mmap.v"),
verilog.find_file("tvm_vpi_mmap.v")
])
rst = sess.main.rst
read_addr = sess.main.read_addr
read_data = sess.main.read_data
write_addr = sess.main.write_addr
write_data = sess.main.write_data
write_en = sess.main.write_en
mmap_addr = sess.main.mmap_addr
# setup memory map.
rst.put_int(1)
sess.yield_until_next_cycle()
rst.put_int(0)
write_en.put_int(0)
mmap_addr.put_int(a_ptr)
sess.yield_until_next_cycle()
# read test
for i in range(n):
read_addr.put_int(i)
sess.yield_until_next_cycle()
# read addr get set this cycle
sess.yield_until_next_cycle()
# get the data out
assert(read_data.get_int() == i)
# write test
for i in reversed(range(n)):
write_addr.put_int(i)
write_en.put_int(1)
write_data.put_int(i + 1)
sess.yield_until_next_cycle()
write_en.put_int(0)
sess.yield_until_next_cycle()
np.testing.assert_equal(a.asnumpy(), a_np + 1)
if __name__ == "__main__":
test_mmap()
module main();
parameter PER = 10;
parameter DATA_WIDTH = 8;
parameter ADDR_WIDTH = 8;
reg clk;
reg rst;
// read channels
reg [ADDR_WIDTH-1:0] read_addr;
wire [DATA_WIDTH-1:0] read_data;
// write channels
reg [ADDR_WIDTH-1:0] write_addr;
reg [DATA_WIDTH-1:0] write_data;
reg write_en;
// mmap base
reg [31:0] mmap_addr;
always begin
#(PER/2) clk =~ clk;
end
tvm_vpi_read_mmap #
(
.DATA_WIDTH(DATA_WIDTH),
.ADDR_WIDTH(ADDR_WIDTH)
)
rmmap
(
.clk(clk),
.rst(rst),
.addr(read_addr),
.data_out(read_data),
.mmap_addr(mmap_addr)
);
tvm_vpi_write_mmap #
(
.DATA_WIDTH(DATA_WIDTH),
.ADDR_WIDTH(ADDR_WIDTH)
)
wmmap
(
.clk(clk),
.rst(rst),
.addr(write_addr),
.data_in(write_data),
.en(write_en),
.mmap_addr(mmap_addr)
);
initial begin
$tvm_session(clk, rmmap, wmmap);
end
endmodule
"""Common utilities for test"""
class FIFODelayedReader(object):
"""Reader that have specified ready lag."""
def __init__(self, read_data, read_valid, read_ready, lag):
self.read_data = read_data
self.read_valid = read_valid
self.read_ready = read_ready
self.read_ready.put_int(1)
self.lag = list(reversed(lag))
self.data = []
self.wait_counter = 0
self.wait_state = False
def __call__(self):
"""Logic as if always at pos-edge"""
if not self.wait_state:
if (self.read_ready.get_int() and
self.read_valid.get_int()):
self.data.append(self.read_data.get_int())
self.wait_counter = self.lag.pop() if self.lag else 0
self.wait_state = True
if self.wait_state:
if self.wait_counter == 0:
self.read_ready.put_int(1)
self.wait_state = False
else:
self.wait_counter -= 1
self.read_ready.put_int(0)
class FIFODelayedWriter(object):
"""Auxiliary class to write to FIFO """
def __init__(self, write_data, write_valid, write_ready, data, lag):
self.write_data = write_data
self.write_valid = write_valid
self.write_ready = write_ready
self.write_valid.put_int(0)
self.lag = list(reversed(lag))
self.data = list(reversed(data))
self.wait_counter = 0
self.wait_state = True
def __call__(self):
"""Logic as if always at pos-edge"""
if not self.wait_state:
if self.write_ready.get_int():
self.wait_counter = self.lag.pop() if self.lag else 0
self.wait_state = True
if self.wait_state:
if self.wait_counter == 0:
if self.data:
self.write_valid.put_int(1)
self.write_data.put_int(self.data.pop())
self.wait_state = False
else:
self.write_valid.put_int(0)
else:
self.write_valid.put_int(0)
self.wait_counter -= 1
# Verilog Code Guidline
The verilog backend is still at early alpha and not yet ready to use.
- Use ```my_port_name``` for variable naming.
- Always use suffix to indicate certain usage.
## Common Suffix
- ```clk```: clock
- ```rst```: reset
- ```in```: input port
- ```out```: output port
- ```en```: enable signal
- ```addr```: address port
- ```valid```: valid signal in FIFO handshake.
- ```ready```: ready signal in FIFO handshake.
// a counter that counts up
// Use as example of testcaase
module counter(clk, rst, out);
input clk;
input rst;
output [3:0] out;
reg [3:0] counter;
assign out = counter;
always @(posedge clk) begin
if (rst) begin
counter <= 0;
end else begin
counter <= counter +1;
end
end
endmodule
// Buffer used to add intermediate data buffering in channels
//
// Data within the read/write window is directly accessible via rd_addr/wr_addr.
// The read_advance/write_advance signals update the read/write data pointers by adding RD_WINDOW/WR_WINDOW.
// The status_counter indicate how many items are currently in the buffer (only registered after an advance signal is asserted).
// The ready/valid signals are used to implement a handshake protocol.
//
// Usage: create and pass instance to additional arguments of $tvm_session.
module tvm_buffer #(
parameter DATA_WIDTH = 256,
parameter DEPTH = 1024,
parameter CNTR_WIDTH = 10, // log base 2 of BUFF_DEPTH
parameter RD_WINDOW = 8, // set to 1 for FIFO behavior, or DEPTH for SRAM behavior
parameter RD_ADVANCE = 2, // window advance (set to 1 for FIFO behavior)
parameter RD_ADDR_WIDTH = 3, // log base 2 of RD_WINDOW
parameter WR_WINDOW = 8, // set to 1 for FIFO behavior, or DEPTH for SRAM behavior
parameter WR_ADVANCE = 2, // window advance (set to 1 for FIFO behavior)
parameter WR_ADDR_WIDTH = 3 // log base 2 of WR_WINDOW
) (
input clk,
input rst,
// Read ports
input read_advance, // Window advance (read pointer)
input [RD_ADDR_WIDTH-1:0] read_addr, // Read address offset
input read_ready, // Read ready (dequeue)
output read_valid, // Read valid (not empty)
output [DATA_WIDTH-1:0] read_data, // Read data port
// Write ports
input write_advance, // Window advance (write pointer)
input [WR_ADDR_WIDTH-1:0] write_addr, // Write address offset
output write_ready, // Write ready (not full)
input write_valid, // Write valid (enqueue)
input [DATA_WIDTH-1:0] write_data, // Write data port
// Other outputs
output [CNTR_WIDTH-1:0] status_counter // Number of elements currently in FIFO
);
// Outputs that need to be latched
reg read_data;
reg status_counter;
// Internal registers (read pointer, write pointer)
reg[CNTR_WIDTH-1:0] read_ptr;
reg[CNTR_WIDTH-1:0] write_ptr;
// RAM instance
reg [DATA_WIDTH-1:0] ram[DEPTH-1:0];
// Empty and full logic
assign read_valid = (status_counter>=RD_WINDOW) ? 1'b1 : 1'b0;
assign write_ready = (status_counter<(DEPTH-WR_WINDOW)) ? 1'b1 : 1'b0;
// Counter logic (only affected by enq and deq)
always @(posedge clk) begin
// Case 1: system reset
if (rst==1'b1) begin
status_counter <= 0;
// Case 2: simultaneous write advance and read advance and deq
end else if ((write_advance && write_ready) && (read_advance && read_valid)) begin
status_counter <= status_counter + (WR_ADVANCE - RD_ADVANCE);
// Case 3: write advance
end else if (write_advance && write_ready) begin
status_counter <= status_counter + WR_ADVANCE;
// Case 4: deq
end else if (read_advance && read_valid) begin
status_counter <= status_counter - RD_ADVANCE;
// Default
end else begin
status_counter <= status_counter;
end
end
// Output logic
always @(posedge clk) begin
if (rst==1'b1) begin
read_data <= 0;
end else begin
if(read_ready) begin
read_data <= ram[(read_ptr+read_addr)%DEPTH];
end else begin
read_data <= read_data;
end
end
end
// RAM writing logic
always @(posedge clk) begin
if(write_valid) begin
ram[((write_ptr+write_addr)%DEPTH)] <= write_data;
end
end
// Read and write pointer logic
always@(posedge clk) begin
if (rst==1'b1) begin
write_ptr <= 0;
read_ptr <= 0;
end else begin
// Increment write pointer by WR_ADVANCE when asserting write_advance
// When performing a write, no need to update the write pointer
if (write_advance && write_ready) begin
write_ptr <= (write_ptr + WR_ADVANCE) % DEPTH;
end else begin
write_ptr <= write_ptr;
end
// Increment read pointer by RD_ADVANCE when asserting read_advance
// When performing a read, no need to update the read pointer
if(read_advance && read_valid) begin
read_ptr <= (read_ptr + RD_ADVANCE) % DEPTH;
end else begin
read_ptr <= read_ptr;
end
end
end
endmodule // tvm_buffer
// Nonstop version of loop
// Always keeps looping when increase == true
// At end is a signal to indicate the next cycle is end
// Use that to signal parent loop to advance.
`define NONSTOP_LOOP(iter, width, init, ready, finish, min, extent)\
reg [width-1:0] iter;\
wire finish;\
always@(posedge clk) begin\
if (rst || init) begin\
iter <= (min);\
end else if(ready) begin\
if (iter != ((extent)-1)) begin\
iter <= iter + 1;\
end else begin\
iter <= (min);\
end\
end else begin\
iter <= iter;\
end\
end\
assign finish = (ready && (iter == (extent) - 1));
// Wrap a nonstop loop to normal loop that loop only once.
// Use done signal to control the non-stop body to stop.
// The init and done behaves like normal loop
`define WRAP_LOOP_ONCE(init, valid, ready, body_finish, body_ready)\
reg valid;\
wire body_ready;\
always@(posedge clk) begin\
if (rst || init) begin\
valid <= 1;\
end else if(body_finish) begin\
valid <= 0;\
end else begin\
valid <= valid;\
end\
end\
assign body_ready = (valid && ready);
// Assign dst as src delayed by specific cycles.
`define DELAY(dst, src, width, delay, not_stall)\
reg [(width)*(delay)-1:0] src``_dly_chain;\
always@(posedge clk) begin\
if(rst) begin\
src``_dly_chain <= 0;\
end else if (not_stall) begin\
src``_dly_chain[(width)-1:0] <= src;\
if((delay) != 1) begin\
src``_dly_chain[(delay)*(width)-1:(width)] <= src``_dly_chain[((delay)-1)*(width)-1:0];\
end\
end else begin\
src``_dly_chain <= src``_dly_chain;\
end\
end\
assign dst = src``_dly_chain[(delay)*(width)-1:((delay)-1)*(width)];
// TVM generate clock signal
`define TVM_DEFINE_TEST_SIGNAL(clk, rst)\
parameter PER = 10;\
reg clk;\
reg rst;\
always begin\
#(PER/2) clk =~ clk;\
end
// Control logic on buffer/RAM read valid.
// This delays the valid signal by one cycle and retain it when write_ready == 0
`define BUFFER_READ_VALID_DELAY(dst, data_valid, write_ready)\
reg dst;\
always@(posedge clk) begin\
if(rst) begin\
dst <= 0;\
end else if (write_ready) begin\
dst <= (data_valid);\
end else begin\
dst <= dst;\
end\
end\
// A cache register that add one cycle lag to the ready signal
// This allows the signal to flow more smoothly
`define CACHE_REG(width, in_data, in_valid, in_ready, out_data, out_valid, out_ready)\
reg [width-1:0] out_data``_state_;\
reg [width-1:0] out_data``_overflow_;\
reg out_valid``_state_;\
reg out_valid``_overflow_;\
always@(posedge clk) begin\
if(rst) begin\
out_valid``_overflow_ <= 0;\
out_valid``_state_ <= 0;\
end else if (out_valid``_overflow_) begin\
if (out_ready) begin\
out_valid``_state_ <= 1;\
out_data``_state_ <= out_data``_overflow_;\
out_valid``_overflow_ <= 0;\
out_data``_overflow_ <= 0;\
end else begin\
out_valid``_state_ <= 1;\
out_data``_state_ <= out_data``_state_;\
out_valid``_overflow_ <= out_valid``_overflow_;\
out_data``_overflow_ <= out_data``_overflow_;\
end\
end else begin\
if (!out_ready && out_valid``_state_) begin\
out_valid``_state_ <= 1;\
out_data``_state_ <= out_data``_state_;\
out_valid``_overflow_ <= in_valid;\
out_data``_overflow_ <= in_data;\
end else begin\
out_valid``_state_ <= in_valid;\
out_data``_state_ <= in_data;\
out_valid``_overflow_ <= out_valid``_overflow_;\
out_data``_overflow_ <= out_data``_overflow_;\
end\
end\
end\ // always@ (posedge clk)
assign in_ready = !out_valid``_overflow_;\
assign out_data = out_data``_state_;\
assign out_valid = out_valid``_state_;
/*!
* Copyright (c) 2017 by Contributors
* \file tvm_vpi.cc
* \brief Messages passed around VPI used for simulation.
*/
#include <dmlc/logging.h>
#include <vpi_user.h>
#include <cstdlib>
#include <memory>
#include <queue>
#include <string>
#include <vector>
#include "tvm_vpi.h"
#include "../src/common/pipe.h"
namespace tvm {
namespace vpi {
// standard consistency checks
static_assert(sizeof(vpiHandle) == sizeof(VPIRawHandle),
"VPI standard");
// type codes
static_assert(vpiModule == kVPIModule, "VPI standard");
// Property code
static_assert(vpiType == kVPIType, "VPI standard");
static_assert(vpiFullName == kVPIFullName, "VPI standard");
static_assert(vpiSize == kVPISize, "VPI standard");
static_assert(vpiDefName == kVPIDefName, "VPI standard");
// IPC client for VPI
class IPCClient {
public:
// constructor
IPCClient(int64_t hread, int64_t hwrite)
: reader_(hread), writer_(hwrite) {
}
void Init() {
vpiHandle argv = vpi_handle(vpiSysTfCall, 0);
vpiHandle arg_iter = vpi_iterate(vpiArgument, argv);
clock_ = vpi_scan(arg_iter);
std::vector<VPIRawHandle> handles;
while (vpiHandle h = vpi_scan(arg_iter)) {
handles.push_back(h);
}
writer_.Write(handles);
PutInt(clock_, 0);
}
int Callback() {
if (!GetInt(clock_)) {
try {
return AtNegEdge();
} catch (const std::runtime_error& e) {
reader_.Close();
writer_.Close();
vpi_printf("ERROR: encountered %s\n", e.what());
vpi_control(vpiFinish, 1);
return 0;
}
} else {
return 0;
}
}
// called at neg edge.
int AtNegEdge() {
// This is actually called at neg-edge
// The put values won't take effect until next neg-edge.
// This allow us to see the registers before snc
writer_.Write(kPosEdgeTrigger);
VPICallCode rcode;
VPIRawHandle handle;
int32_t index, value;
while (true) {
CHECK(reader_.Read(&rcode));
switch (rcode) {
case kGetHandleByName: {
std::string str;
CHECK(reader_.Read(&str));
CHECK(reader_.Read(&handle));
handle = vpi_handle_by_name(
str.c_str(), static_cast<vpiHandle>(handle));
writer_.Write(kSuccess);
writer_.Write(handle);
break;
}
case kGetHandleByIndex: {
CHECK(reader_.Read(&handle));
CHECK(reader_.Read(&index));
handle = vpi_handle_by_index(
static_cast<vpiHandle>(handle), index);
writer_.Write(kSuccess);
writer_.Write(handle);
break;
}
case kGetStrProp: {
CHECK(reader_.Read(&value));
CHECK(reader_.Read(&handle));
std::string prop = vpi_get_str(
value, static_cast<vpiHandle>(handle));
writer_.Write(kSuccess);
writer_.Write(prop);
break;
}
case kGetIntProp: {
CHECK(reader_.Read(&value));
CHECK(reader_.Read(&handle));
value = vpi_get(value, static_cast<vpiHandle>(handle));
writer_.Write(kSuccess);
writer_.Write(value);
break;
}
case kGetInt32: {
CHECK(reader_.Read(&handle));
value = GetInt(static_cast<vpiHandle>(handle));
writer_.Write(kSuccess);
writer_.Write(value);
break;
}
case kPutInt32: {
CHECK(reader_.Read(&handle));
CHECK(reader_.Read(&value));
CHECK(handle != clock_) << "Cannot write to clock";
PutInt(static_cast<vpiHandle>(handle), value);
writer_.Write(kSuccess);
break;
}
case kGetVec: {
CHECK(reader_.Read(&handle));
vpiHandle h = static_cast<vpiHandle>(handle);
int bits = vpi_get(vpiSize, h);
int nwords = (bits + 31) / 32;
s_vpi_value value_s;
value_s.format = vpiVectorVal;
vpi_get_value(h, &value_s);
vec_buf_.resize(nwords);
for (size_t i = 0; i < vec_buf_.size(); ++i) {
vec_buf_[i].aval = value_s.value.vector[i].aval;
vec_buf_[i].bval = value_s.value.vector[i].bval;
}
writer_.Write(kSuccess);
writer_.Write(vec_buf_);
break;
}
case kPutVec: {
CHECK(reader_.Read(&handle));
CHECK(reader_.Read(&vec_buf_));
CHECK(handle != clock_) << "Cannot write to clock";
vpiHandle h = static_cast<vpiHandle>(handle);
svec_buf_.resize(vec_buf_.size());
for (size_t i = 0; i < vec_buf_.size(); ++i) {
svec_buf_[i].aval = vec_buf_[i].aval;
svec_buf_[i].bval = vec_buf_[i].bval;
}
s_vpi_value value_s;
s_vpi_time time_s;
time_s.type = vpiSimTime;
time_s.high = 0;
time_s.low = 10;
value_s.format = vpiVectorVal;
value_s.value.vector = &svec_buf_[0];
vpi_put_value(h, &value_s, &time_s, vpiTransportDelay);
writer_.Write(kSuccess);
break;
}
case kYield: {
writer_.Write(kSuccess);
return 0;
}
case kShutDown : {
writer_.Write(kSuccess);
vpi_control(vpiFinish, 0);
return 0;
}
}
}
}
// Create a new FSM from ENV.
static IPCClient* Create() {
const char* d_read = getenv("TVM_DREAD_PIPE");
const char* d_write = getenv("TVM_DWRITE_PIPE");
const char* h_read = getenv("TVM_HREAD_PIPE");
const char* h_write = getenv("TVM_HWRITE_PIPE");
if (d_write == nullptr ||
d_read == nullptr ||
h_read == nullptr ||
h_write == nullptr) {
vpi_printf("ERROR: need environment var TVM_READ_PIPE, TVM_WRITE_PIPE\n");
vpi_control(vpiFinish, 1);
return nullptr;
}
// close host side pipe.
common::Pipe(atoi(h_read)).Close();
common::Pipe(atoi(h_write)).Close();
IPCClient* client = new IPCClient(atoi(d_read), atoi(d_write));
client->Init();
return client;
}
// Get integer from handle.
static int GetInt(vpiHandle h) {
s_vpi_value value_s;
value_s.format = vpiIntVal;
vpi_get_value(h, &value_s);
return value_s.value.integer;
}
// Put integer into handle.
static void PutInt(vpiHandle h, int value) {
s_vpi_value value_s;
s_vpi_time time_s;
time_s.type = vpiSimTime;
time_s.high = 0;
time_s.low = 10;
value_s.format = vpiIntVal;
value_s.value.integer = value;
vpi_put_value(h, &value_s, &time_s, vpiTransportDelay);
}
// Handles
vpiHandle clock_;
// the communicator
common::Pipe reader_, writer_;
// data buf
std::vector<VPIVecVal> vec_buf_;
std::vector<s_vpi_vecval> svec_buf_;
};
} // namespace vpi
} // namespace tvm
extern "C" {
static PLI_INT32 tvm_host_clock_cb(p_cb_data cb_data) {
return reinterpret_cast<tvm::vpi::IPCClient*>(
cb_data->user_data)->Callback();
}
static PLI_INT32 tvm_init(char* cb) {
s_vpi_value value_s;
s_vpi_time time_s;
s_cb_data cb_data_s;
tvm::vpi::IPCClient* client = tvm::vpi::IPCClient::Create();
if (client) {
cb_data_s.user_data = reinterpret_cast<char*>(client);
cb_data_s.reason = cbValueChange;
cb_data_s.cb_rtn = tvm_host_clock_cb;
cb_data_s.time = &time_s;
cb_data_s.value = &value_s;
time_s.type = vpiSuppressTime;
value_s.format = vpiIntVal;
cb_data_s.obj = client->clock_;
vpi_register_cb(&cb_data_s);
} else {
vpi_printf("ERROR: canot initalize host\n");
vpi_control(vpiFinish, 1);
}
return 0;
}
void tvm_vpi_register() {
s_vpi_systf_data tf_data;
tf_data.type = vpiSysTask;
tf_data.tfname = "$tvm_session";
tf_data.calltf = tvm_init;
tf_data.compiletf = nullptr;
tf_data.sizetf = nullptr;
tf_data.user_data = nullptr;
vpi_register_systf(&tf_data);
}
void (*vlog_startup_routines[])() = {
tvm_vpi_register,
0
};
} // extern "C"
/*!
* Copyright (c) 2017 by Contributors
* \file tvm_vpi.h
* \brief Messages passed around VPI used for simulation.
*/
#ifndef VERILOG_TVM_VPI_H_
#define VERILOG_TVM_VPI_H_
namespace tvm {
namespace vpi {
enum VPICallCode : int {
kGetHandleByName,
kGetHandleByIndex,
kGetStrProp,
kGetIntProp,
kGetInt32,
kPutInt32,
kGetVec,
kPutVec,
kYield,
kShutDown
};
enum VPIReturnCode : int {
kPosEdgeTrigger = 0,
kSuccess = 1,
kFail = 2
};
// VPI type code as in IEEE standard.
enum VPITypeCode {
kVPIModule = 32
};
// VPI property code as in IEEE standard.
enum VPIPropCode {
kVPIType = 1,
kVPIFullName = 3,
kVPISize = 4,
kVPIDefName = 9
};
/*! \brief The vector value used in trasmission */
struct VPIVecVal {
int aval;
int bval;
};
/*! \brief User facing vpi handle. */
typedef void* VPIRawHandle;
} // namespace vpi
} // namespace tvm
#endif // VERILOG_TVM_VPI_H_
// Memory controller to access TVM VPI simulated RAM.
//
// You only see the wires and registers but no logics here.
// The real computation is implemented via TVM VPI
//
// Usage: create and pass instance to additional arguments of $tvm_session.
// Then it will be automatically hook up the RAM logic.
//
module tvm_vpi_mem_interface
#(
parameter READ_WIDTH = 8,
parameter WRITE_WIDTH = 8,
parameter ADDR_WIDTH = 32,
parameter SIZE_WIDTH = 32
)
(
input clk,
input rst,
// Read Ports
input read_en, // Read buffer enable
output [READ_WIDTH-1:0] read_data_out, // The data port for read
output read_data_valid, // Read is valid.
// Write ports
input write_en, // Write buffer enable
input [WRITE_WIDTH-1:0] write_data_in, // Input data to write.
output write_data_ready, // There are still pending write
// Status port
// Control signal ports to issue tasks
input host_read_req, // Read request
input [ADDR_WIDTH-1:0] host_read_addr, // The address to issue a read task
input [SIZE_WIDTH-1:0] host_read_size, // The size of a read
input host_write_req, // Write request.
input [ADDR_WIDTH-1:0] host_write_addr, // The write address
input [SIZE_WIDTH-1:0] host_write_size // The write size
);
reg [READ_WIDTH-1:0] reg_read_data;
reg reg_read_valid;
reg reg_write_ready;
// The wires up.
assign read_data_out = reg_read_data;
assign read_data_valid = reg_read_valid;
assign write_data_ready = reg_write_ready;
endmodule
// TVM mmap maps virtual DRAM into interface of SRAM.
// This allows create testcases that directly access DRAM.
// Read only memory map, one cycle read.
// Usage: create and pass instance to additional arguments of $tvm_session.
module tvm_vpi_read_mmap
#(
parameter DATA_WIDTH = 8,
parameter ADDR_WIDTH = 8,
parameter BASE_ADDR_WIDTH = 32
)
(
input clk,
input rst,
// Read Ports
input [ADDR_WIDTH-1:0] addr, // Local offset in terms of number of units
output [DATA_WIDTH-1:0] data_out, // The data port for read
// Configure port
input [BASE_ADDR_WIDTH-1:0] mmap_addr // The base address of memory map.
);
reg [DATA_WIDTH-1:0] reg_data;
assign data_out = reg_data;
endmodule
// Write only memory map, one cycle write.
// Usage: create and pass instance to additional arguments of $tvm_session.
module tvm_vpi_write_mmap
#(
parameter DATA_WIDTH = 8,
parameter ADDR_WIDTH = 8,
parameter BASE_ADDR_WIDTH = 32
)
(
input clk,
input rst,
// Write Ports
input [ADDR_WIDTH-1:0] addr, // Local offset in terms of number of units
input [DATA_WIDTH-1:0] data_in, // The data port for write
input en, // The enable port for write
// Configure port
input [BASE_ADDR_WIDTH-1:0] mmap_addr // The base address of memap
);
endmodule
VPI_CFLAGS=`iverilog-vpi --cflags`
VPI_LDFLAGS=`iverilog-vpi --ldflags`
VER_SRCS = $(wildcard verilog/*.v)
VER_LIBS=lib/tvm_vpi.vpi
lib/tvm_vpi.vpi: verilog/tvm_vpi.cc verilog/tvm_vpi.h
@mkdir -p $(@D)
$(CXX) $(VPI_CFLAGS) $(CFLAGS) -o $@ $(filter %.cc, $^) $(LDFLAGS) $(VPI_LDFLAGS)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment