Unverified Commit e38d00e2 by Tianqi Chen Committed by GitHub

[REFACTOR] Remove stale verilog generator (#2964)

parent 3441b95e
/*!
* Copyright (c) 2017 by Contributors
* \file codegen_verilog.h
* \brief Generate verilog code.
*/
#ifndef TVM_CODEGEN_VERILOG_CODEGEN_VERILOG_H_
#define TVM_CODEGEN_VERILOG_CODEGEN_VERILOG_H_
#include <tvm/base.h>
#include <tvm/ir.h>
#include <tvm/ir_functor_ext.h>
#include <tvm/codegen.h>
#include <tvm/lowered_func.h>
#include <string>
#include <vector>
#include <unordered_map>
#include "verilog_ir.h"
#include "../codegen_source_base.h"
namespace tvm {
namespace codegen {
namespace verilog {
using namespace ir;
/* \brief The variable type in register.*/
enum VerilogVarType {
kWire,
kInput,
kOutput,
kReg,
kConst
};
/*! \brief The verilog value */
struct VerilogValue {
/*! \brief The variable id */
std::string vid;
/*! \brief The variable type */
VerilogVarType vtype{kReg};
/*! \brief The data type it encodes */
Type dtype;
VerilogValue() {}
VerilogValue(std::string vid, VerilogVarType vtype, Type dtype)
: vid(vid), vtype(vtype), dtype(dtype) {}
};
/*! \brief Information of each procedure function generated */
struct VerilogFuncEntry {
/*! \brief The original functions */
std::vector<Type> arg_types;
/*! \brief The real argument ids of the function */
std::vector<std::string> arg_ids;
/*! \brief The VPI Modules in the function */
std::vector<std::string> vpi_modules;
};
/*!
* \brief The code module of generated verilog code.
*/
class VerilogCodeGenModule {
public:
/*! \brief the code of each modoules */
std::string code;
/*! \brief map of functions */
std::unordered_map<std::string, VerilogFuncEntry> fmap;
/*!
* \brief Generate a code that append simulator function to call func_name.
* \param func_name The function to be called.
* \return The generated code.
*/
std::string AppendSimMain(const std::string& func_name) const;
};
/*!
* \brief Verilog generator
*/
class CodeGenVerilog :
public ExprFunctor<VerilogValue(const Expr&)>,
public CodeGenSourceBase {
public:
/*!
* \brief Initialize the code generator.
* \param output_ssa Whether output SSA.
*/
void Init();
/*!
* \brief Add the function to the generated module.
* \param f The function to be compiled.
*/
void AddFunction(LoweredFunc f);
/*!
* \brief Finalize the compilation and return the code.
* \return The code.
*/
VerilogCodeGenModule Finish();
/*!
* \brief Transform expression to verilog value.
* \param n The expression to be printed.
*/
VerilogValue MakeValue(const Expr& n) {
return VisitExpr(n);
}
// The following parts are overloadable print operations.
// expression
VerilogValue VisitExpr_(const Variable* op) final;
VerilogValue VisitExpr_(const Let* op) final;
VerilogValue VisitExpr_(const Call* op) final;
VerilogValue VisitExpr_(const Add* op) final;
VerilogValue VisitExpr_(const Sub* op) final;
VerilogValue VisitExpr_(const Mul* op) final;
VerilogValue VisitExpr_(const Div* op) final;
VerilogValue VisitExpr_(const Mod* op) final;
VerilogValue VisitExpr_(const Min* op) final;
VerilogValue VisitExpr_(const Max* op) final;
VerilogValue VisitExpr_(const EQ* op) final;
VerilogValue VisitExpr_(const NE* op) final;
VerilogValue VisitExpr_(const LT* op) final;
VerilogValue VisitExpr_(const LE* op) final;
VerilogValue VisitExpr_(const GT* op) final;
VerilogValue VisitExpr_(const GE* op) final;
VerilogValue VisitExpr_(const And* op) final;
VerilogValue VisitExpr_(const Or* op) final;
VerilogValue VisitExpr_(const Cast* op) final;
VerilogValue VisitExpr_(const Not* op) final;
VerilogValue VisitExpr_(const Select* op) final;
VerilogValue VisitExpr_(const Ramp* op) final;
VerilogValue VisitExpr_(const Broadcast* op) final;
VerilogValue VisitExpr_(const IntImm* op) final;
VerilogValue VisitExpr_(const UIntImm* op) final;
VerilogValue VisitExpr_(const FloatImm* op) final;
VerilogValue VisitExpr_(const StringImm* op) final;
protected:
void InitFuncState(LoweredFunc f);
void PrintDecl(const std::string& vid, VerilogVarType vtype, Type dtype,
const char* suffix = ";\n", bool indent = true);
void PrintAssign(
const std::string& target, const std::string& src);
void PrintAssignAnd(
const std::string& target, const std::vector<std::string>& conds);
void PrintLine(const std::string& line);
void PrintSSAAssign(
const std::string& target, const std::string& src, Type t) final;
// make binary op
VerilogValue MakeBinary(Type t, VerilogValue a, VerilogValue b, const char* opstr);
private:
// Hand shake signal name.
// These name can be empty.
// Indicate that the signal is always true
// or do not need to take these signals.
struct SignalEntry {
std::string valid;
std::string ready;
};
// Information about port
struct PortEntry {
// The port value
std::string value;
// The data type
Type dtype;
};
// Channel setup
struct ChannelEntry {
// The channel block
ChannelBlock block;
// The port map, on how port is assigned.
std::unordered_map<std::string, PortEntry> ports;
// Assign port to be valueo
void AssignPort(std::string port, std::string value, Type dtype);
// Assign port to be valueo
const PortEntry& GetPort(const std::string& port) const;
// Signal port name
std::string SignalPortName(int index) const;
};
// Get wire ssa value from s
VerilogValue GetSSAValue(std::string s, Type dtype) {
VerilogValue ret;
ret.vid = SSAGetID(s, dtype);
ret.vtype = kWire;
ret.dtype = dtype;
return ret;
}
void CodeGen(const Pipeline& pipeine);
// codegen the delays
void MakeDelay(const std::string& dst,
const std::string& src,
Type dtype,
int delay,
const std::string& not_stall);
// codegen the loop macros
SignalEntry MakeLoop(const Array<Stmt>& loop);
// codegen the loop macros
void MakeStageInputs(const ComputeBlock& block,
const std::string& not_stall,
std::string* out_all_input_valid);
// codegen compute block
void MakeStore(const ComputeBlock& block, const Store* store);
// Codegen of load statement into FIFO
void MakeLoadToFIFO(const ComputeBlock& block,
const Store* store,
const Load* load);
// Make channel unit.
void MakeChannelUnit(const ChannelEntry& ch);
void MakeChannelFIFO(const ChannelEntry& ch);
void MakeChannelBuffer(const ChannelEntry& ch);
void MakeChannelMemMap(const ChannelEntry& ch);
// Get channel information
ChannelEntry* GetChannelInfo(const Variable* var);
// channel setup map.
std::unordered_map<const Variable*, ChannelEntry> cmap_;
// list of vpi modules to be hooked.
std::vector<std::string> tvm_vpi_modules_;
// The signals for done.
std::vector<std::string> done_sigs_;
// The verilog function.
std::unordered_map<std::string, VerilogFuncEntry> functions_;
};
} // namespace verilog
} // namespace codegen
} // namespace tvm
#endif // TVM_CODEGEN_VERILOG_CODEGEN_VERILOG_H_
/*!
* Copyright (c) 2017 by Contributors
* \file verilog_ir.cc
*/
#include <tvm/ir_pass.h>
#include <tvm/ir_visitor.h>
#include <tvm/ir_mutator.h>
#include <utility>
#include "verilog_ir.h"
#include "../../arithmetic/compute_expr.h"
namespace tvm {
namespace codegen {
namespace verilog {
using namespace ir;
ControlSignal ControlSignalNode::make(
ControlSignalType type, int advance_size) {
auto n = make_node<ControlSignalNode>();
n->ctrl_type = type;
n->advance_size = advance_size;
return ControlSignal(n);
}
StageInput StageInputNode::make(Var var, StageInputType input_type) {
NodePtr<StageInputNode> n = make_node<StageInputNode>();
n->var = var;
n->input_type = input_type;
return StageInput(n);
}
// Replace stage inputs by placeholder, update the input map.
class StageInputReplacer : public IRMutator {
public:
explicit StageInputReplacer(
const std::unordered_map<const Variable*, StageInput>& var_info)
: var_info_(var_info) {}
Expr Mutate_(const Variable* op, const Expr& e) final {
if (replace_.count(op)) {
return replace_.at(op);
}
auto it = var_info_.find(op);
if (it == var_info_.end()) return e;
Var new_var(it->second->var->name_hint + ".sync", op->type);
inputs_.Set(new_var, it->second);
replace_[op] = new_var;
return std::move(new_var);
}
Expr Mutate_(const Load* op, const Expr& e) final {
CHECK(is_zero(op->index))
<< "Load should be in its own stage.";
if (replace_.count(op->buffer_var.get())) {
return replace_.at(op->buffer_var.get());
}
auto it = var_info_.find(op->buffer_var.get());
CHECK(it != var_info_.end())
<< "Load from unknown channel";
Var data(it->second->var->name_hint + ".load.sync", op->type);
inputs_.Set(data, it->second);
replace_[op->buffer_var.get()] = data;
return std::move(data);
}
// inputs that get replaced.
Map<Var, StageInput> inputs_;
// replacement map
std::unordered_map<const Variable*, Var> replace_;
// Variable replacement plan.
const std::unordered_map<const Variable*, StageInput>& var_info_;
};
/*! \brief Extract module block */
class PipelineExtractor: public IRVisitor {
public:
Pipeline Extract(LoweredFunc f) {
// Initialize the memory map channels
// TODO(tqchen) move the logic to explicit specification.
for (auto arg : f->args) {
if (arg.type().is_handle()) {
arg_handle_[arg.get()] = arg;
}
}
pipeline_ = make_node<PipelineNode>();
this->Visit(f->body);
// setup channels
for (const auto &kv : cmap_) {
pipeline_->channels.Set(
kv.second.node->channel->handle_var,
ChannelBlock(kv.second.node));
}
pipeline_->args = f->args;
return Pipeline(pipeline_);
}
void Visit_(const AttrStmt* op) final {
if (op->attr_key == attr::pipeline_stage_scope) {
CHECK(!in_pipeline_stage_);
in_pipeline_stage_ = true;
trigger_.emplace_back(std::make_pair(loop_.size(), op));
IRVisitor::Visit_(op);
trigger_.pop_back();
in_pipeline_stage_ = false;
} else if (op->attr_key == attr::channel_read_advance ||
op->attr_key == attr::channel_write_advance) {
trigger_.emplace_back(std::make_pair(loop_.size(), op));
IRVisitor::Visit_(op);
trigger_.pop_back();
} else if (op->attr_key == attr::channel_read_scope ||
op->attr_key == attr::channel_write_scope) {
Channel ch(op->node.node_);
ChannelEntry& cb = cmap_[ch->handle_var.get()];
if (cb.node != nullptr) {
CHECK(cb.node->channel.same_as(ch));
} else {
cb.node = make_node<ChannelBlockNode>();
cb.node->channel = ch;
}
if (op->attr_key == attr::channel_read_scope) {
CHECK_EQ(cb.read_ref_count, 0)
<< "One channel can only be read from one consumer";
++cb.read_ref_count;
CHECK(arith::GetConstInt(op->value, &(cb.node->read_window)))
<< "Only supprt constant read window";
} else {
CHECK_EQ(cb.write_ref_count, 0)
<< "One channel can only be write by one producer";
++cb.write_ref_count;
CHECK(arith::GetConstInt(op->value, &(cb.node->write_window)))
<< "Only supprt constant write window";
}
var_info_[ch->handle_var.get()] =
StageInputNode::make(ch->handle_var, kChannel);
IRVisitor::Visit_(op);
var_info_.erase(ch->handle_var.get());
} else {
IRVisitor::Visit_(op);
}
}
void Visit_(const Block* op) final {
CHECK(!in_pipeline_stage_)
<< "Do not support serial execution inside pipeline";
IRVisitor::Visit_(op);
}
void Visit_(const IfThenElse* op) final {
LOG(FATAL) << "Not implemeneted";
}
void Visit_(const For* op) final {
if (in_pipeline_stage_) {
loop_.push_back(
For::make(op->loop_var, op->min, op->extent,
op->for_type, op->device_api, Evaluate::make(0)));
var_info_[op->loop_var.get()] =
StageInputNode::make(Var(op->loop_var.node_), kLoopVar);
IRVisitor::Visit_(op);
var_info_.erase(op->loop_var.get());
loop_.pop_back();
} else {
IRVisitor::Visit_(op);
}
}
void Visit_(const Store* op) final {
// Check the access pattern
Channel arg_write =
CheckArgHandleAccess(op->buffer_var.get(), op->value.type(), false);
this->Visit(op->value);
// The replace logic
StageInputReplacer repl(var_info_);
// Setup the compute block.
NodePtr<ComputeBlockNode> compute =
make_node<ComputeBlockNode>();
compute->loop = Array<Stmt>(loop_);
// setup the advance triggers
for (const auto& e : trigger_) {
const AttrStmt* attr = e.second;
Channel ch;
if (attr->attr_key == attr::pipeline_stage_scope) {
ch = arg_write;
if (!ch.defined()) continue;
} else {
ch = Channel(attr->node.node_);
}
NodePtr<SignalTriggerNode> trigger
= make_node<SignalTriggerNode>();
trigger->channel_var = ch->handle_var;
// predicate for the trigger
Expr predicate = const_true();
for (size_t i = e.first; i < loop_.size(); ++i) {
const For* loop = loop_[i].as<For>();
predicate = predicate &&
(loop->loop_var == (loop->extent - 1));
}
trigger->predicate = ir::Simplify(predicate);
// Add the signal back to the channels.
ChannelEntry& cb = cmap_.at(ch->handle_var.get());
trigger->signal_index = static_cast<int>(cb.node->ctrl_signals.size());
// Grab the advance constant size.
int trigger_size = 0;
if (attr->attr_key == attr::pipeline_stage_scope) {
cb.node->ctrl_signals.push_back(
ControlSignalNode::make(kComputeFinish, 0));
} else if (attr->attr_key == attr::channel_read_advance) {
CHECK(arith::GetConstInt(attr->value, &trigger_size))
<< "Only support constant advance size";
cb.node->ctrl_signals.push_back(
ControlSignalNode::make(kReadAdvance, trigger_size));
} else {
CHECK(arith::GetConstInt(attr->value, &trigger_size))
<< "Only support constant advance size";
cb.node->ctrl_signals.push_back(
ControlSignalNode::make(kWriteAdvance, trigger_size));
}
compute->triggers.push_back(SignalTrigger(trigger));
}
// Check if we are writing to FIFO.
const Load* load = op->value.as<Load>();
if (is_zero(op->index) && load) {
compute->body = Store::make(
op->buffer_var,
Load::make(load->type, load->buffer_var,
repl.Mutate(load->index), op->predicate),
op->index, op->predicate);
} else {
compute->body = Store::make(
op->buffer_var, repl.Mutate(op->value),
repl.Mutate(op->index), op->predicate);
}
compute->inputs = repl.inputs_;
pipeline_->stages.push_back(ComputeBlock(compute));
}
void Visit_(const LetStmt* op) final {
LOG(FATAL) << "cannot pass through let";
}
void Visit_(const Evaluate* op) final {
LOG(FATAL) << "Not implemeneted";
}
void Visit_(const Allocate* op) final {
CHECK(!in_pipeline_stage_);
}
void Visit_(const AssertStmt* op) final {
LOG(FATAL) << "Not implemeneted";
}
void Visit_(const Load* op) final {
CheckArgHandleAccess(op->buffer_var.get(), op->type, true);
}
Channel CheckArgHandleAccess(const Variable* var, Type dtype, bool read_access) {
if (!arg_handle_.count(var)) return Channel();
CHECK(!cmap_.count(var))
<< "Multiple access to the same handle";
ChannelEntry& cb = cmap_[var];
cb.node = make_node<ChannelBlockNode>();
cb.node->channel = ChannelNode::make(arg_handle_.at(var), dtype);
return cb.node->channel;
}
private:
// The channel information.
struct ChannelEntry {
NodePtr<ChannelBlockNode> node;
int read_ref_count{0};
int write_ref_count{0};
};
// Whether we are inside the pipeline stage.
bool in_pipeline_stage_{false};
// The current loop nest
std::vector<Stmt> loop_;
// Advance signal trigger
std::vector<std::pair<size_t, const AttrStmt*> > trigger_;
// Read write scope
std::vector<const AttrStmt*> channel_scope_;
// The loop index.
std::unordered_map<const Variable*, StageInput> var_info_;
// The channel entry;
std::unordered_map<const Variable*, ChannelEntry> cmap_;
// The argument handle map
std::unordered_map<const Variable*, Var> arg_handle_;
// The result block.
NodePtr<PipelineNode> pipeline_;
};
Pipeline MakePipeline(LoweredFunc f) {
return PipelineExtractor().Extract(f);
}
} // namespace verilog
} // namespace codegen
} // namespace tvm
/*!
* Copyright (c) 2017 by Contributors
* \file verilog_ir.h
* \brief A lowered IR that resembles verilog blocks,
* This is data structure before final codegen.
*/
#ifndef TVM_CODEGEN_VERILOG_VERILOG_IR_H_
#define TVM_CODEGEN_VERILOG_VERILOG_IR_H_
#include <tvm/ir.h>
#include <tvm/expr.h>
#include <tvm/channel.h>
#include <tvm/lowered_func.h>
#include <vector>
#include <memory>
#include <unordered_map>
namespace tvm {
namespace codegen {
namespace verilog {
/*! \brief The data argument type */
enum StageInputType : int {
/*! \brief Data channel input. */
kChannel,
/*! \brief Loop variable generated by compute block. */
kLoopVar,
/*! \brief Global constant. */
kGlobalConst
};
/*! \brief The data argument type */
enum ControlSignalType : int {
// Read advance signal
kReadAdvance,
// Write advance signal
kWriteAdvance,
// Pipeline stage finish signal
kComputeFinish
};
class ControlSignal;
class StageInput;
class SignalTrigger;
/*! \brief The control signal of a channel */
struct ControlSignalNode : public Node {
/*! \brief The control signal type */
ControlSignalType ctrl_type;
/*! \brief Advance size of the signal */
int advance_size{0};
// visit all attributes
void VisitAttrs(AttrVisitor* v) final {
v->Visit("ctrl_type", &ctrl_type);
v->Visit("advance_size", &advance_size);
}
static ControlSignal make(ControlSignalType ctrl_type, int advance_size);
static constexpr const char* _type_key = "VerilogControlSignal";
TVM_DECLARE_NODE_TYPE_INFO(ControlSignalNode, Node);
};
TVM_DEFINE_NODE_REF(ControlSignal, ControlSignalNode);
/*! \brief Information about channel. */
struct ChannelBlockNode : public Node {
/*! \brief The channel we are refer to */
Channel channel;
/*! \brief Read window */
int read_window{0};
/*! \brief Write window */
int write_window{0};
/*! \brief Control signals in the channel */
Array<ControlSignal> ctrl_signals;
// visit all attributes
void VisitAttrs(AttrVisitor* v) final {
v->Visit("channel", &channel);
v->Visit("read_window", &read_window);
v->Visit("write_window", &write_window);
v->Visit("ctrl_signals", &ctrl_signals);
}
static constexpr const char* _type_key = "VerilogChannelBlock";
TVM_DECLARE_NODE_TYPE_INFO(ChannelBlockNode, Node);
};
TVM_DEFINE_NODE_REF(ChannelBlock, ChannelBlockNode);
/*!
* \brief Input to the compute block.
* These represents the data values that need to be shared;
*/
struct StageInputNode : public Node {
/*!
* \brief The corresponding var of the input
* For loop and global const it is the var.
* For channel this corresponds to the channel handle.
*/
Var var;
/*! \brief The type of the input. */
StageInputType input_type;
// visit all attributes
void VisitAttrs(AttrVisitor* v) final {
v->Visit("var", &var);
v->Visit("input_type", &input_type);
}
// constructor
static StageInput make(Var var, StageInputType input_type);
static constexpr const char* _type_key = "VerilogStageInput";
TVM_DECLARE_NODE_TYPE_INFO(StageInputNode, Node);
};
TVM_DEFINE_NODE_REF(StageInput, StageInputNode);
/*! \brief The trigger signal for certain channel */
struct SignalTriggerNode : public Node {
/*! \brief The channel handle variable */
Var channel_var;
/*! \brief Boolean predicate to trigger the signal */
Expr predicate;
/*! \brief siginal index of the channel */
int signal_index;
// visit all attributes
void VisitAttrs(AttrVisitor* v) final {
v->Visit("channel_var", &channel_var);
v->Visit("predicate", &predicate);
v->Visit("signal_index", &signal_index);
}
// constructor
static constexpr const char* _type_key = "VerilogSignalTrigger";
TVM_DECLARE_NODE_TYPE_INFO(SignalTriggerNode, Node);
};
TVM_DEFINE_NODE_REF(SignalTrigger, SignalTriggerNode);
/*! \brief compute block for verilog */
struct ComputeBlockNode : public Node {
/*! \brief The body of the block. */
Stmt body;
/*! \brief The loop nest around the body, each is a For with no_op as body */
Array<Stmt> loop;
/*! \brief The channel advance trigger */
Array<SignalTrigger> triggers;
/*! \brief The input variables that need to be synced. */
Map<Var, StageInput> inputs;
// visit all attributes
void VisitAttrs(AttrVisitor* v) final {
v->Visit("body", &body);
v->Visit("loop", &loop);
v->Visit("triggers", &triggers);
v->Visit("inputs", &inputs);
}
static constexpr const char* _type_key = "VerilogComputeBlock";
TVM_DECLARE_NODE_TYPE_INFO(ComputeBlockNode, Node);
};
TVM_DEFINE_NODE_REF(ComputeBlock, ComputeBlockNode);
/*! \brief Codeblock for verilog module. */
struct PipelineNode : public Node {
/*! \brief arguments to the module */
Array<Var> args;
/*! \brief Computation stages */
Array<ComputeBlock> stages;
/*! \brief The data channels */
Map<Var, ChannelBlock> channels;
// visit all attributes
void VisitAttrs(AttrVisitor* v) final {
v->Visit("args", &args);
v->Visit("stages", &stages);
v->Visit("channels", &channels);
}
static constexpr const char* _type_key = "VerilogPipeline";
TVM_DECLARE_NODE_TYPE_INFO(PipelineNode, Node);
};
TVM_DEFINE_NODE_REF(Pipeline, PipelineNode);
/*!
* \brief Build a lowered verilog pipeline given function.
* \param f The function to be transformed.
* \param The created verilog pipeline.
*/
Pipeline MakePipeline(LoweredFunc f);
} // namespace verilog
} // namespace codegen
} // namespace tvm
#endif // TVM_CODEGEN_VERILOG_VERILOG_IR_H_
/*!
* Copyright (c) 2017 by Contributors
* \file verilog_module.cc
* \brief Build verilog source code.
*/
#include <tvm/runtime/packed_func.h>
#include <tvm/codegen.h>
#include <mutex>
#include "codegen_verilog.h"
#include "../../runtime/file_util.h"
#include "../../runtime/meta_data.h"
namespace tvm {
namespace codegen {
namespace verilog {
using runtime::TVMArgs;
using runtime::TVMRetValue;
using runtime::PackedFunc;
// Simulator function
class VerilogModuleNode : public runtime::ModuleNode {
public:
VerilogModuleNode() : fmt_("v") {}
const char* type_key() const {
return "verilog";
}
PackedFunc GetFunction(
const std::string& name,
const std::shared_ptr<ModuleNode>& sptr_to_self) final {
CHECK(sptr_to_self.get() == this);
if (!m_.fmap.count(name)) return PackedFunc();
auto f = [sptr_to_self, name, this](const runtime::TVMArgs& args, TVMRetValue* rv) {
auto* fsim = runtime::Registry::Get("tvm_callback_verilog_simulator");
CHECK(fsim != nullptr)
<< "tvm_callback_verilog_simulator is not registered,"
<<" did you import tvm.addon.verilog?";
std::string code = m_.AppendSimMain(name);
if (const auto* f = runtime::Registry::Get("tvm_callback_verilog_postproc")) {
code = (*f)(code).operator std::string();
}
std::vector<TVMValue> values;
std::vector<int> codes;
TVMValue v;
v.v_str = code.c_str();
values.push_back(v);
codes.push_back(kStr);
for (int i = 0; i < args.num_args; ++i) {
values.push_back(args.values[i]);
codes.push_back(args.type_codes[i]);
}
fsim->CallPacked(TVMArgs(&values[0], &codes[0], args.num_args + 1), rv);
};
return PackedFunc(f);
}
std::string GetSource(const std::string& format) final {
return m_.code;
}
void Init(const Array<LoweredFunc>& funcs) {
CodeGenVerilog cg;
cg.Init();
for (LoweredFunc f : funcs) {
cg.AddFunction(f);
}
m_ = cg.Finish();
}
private:
// the verilog code. data
VerilogCodeGenModule m_;
// format;
std::string fmt_;
};
TVM_REGISTER_API("codegen.build_verilog")
.set_body([](TVMArgs args, TVMRetValue* rv) {
std::shared_ptr<VerilogModuleNode> n =
std::make_shared<VerilogModuleNode>();
n->Init(args[0]);
*rv = runtime::Module(n);
});
} // namespace verilog
} // namespace codegen
} // namespace tvm
/*!
* Copyright (c) 2017 by Contributors
* \file vpi_session.cc
* \brief IPC session call to verilog simulator via VPI.
*/
#include <tvm/api_registry.h>
#include <memory>
#include "vpi_session.h"
namespace tvm {
namespace codegen {
using namespace vpi;
// helper class to get the node.
class VPISessionEntry {
public:
// Whether in control.
bool in_control{false};
// Internal reader and writer.
common::Pipe reader;
common::Pipe writer;
// internal constructor
VPISessionEntry(int h_pipe_read, int h_pipe_write)
: reader(h_pipe_read), writer(h_pipe_write) {
}
~VPISessionEntry() {
if (in_control) {
VPIReturnCode cd;
writer.Write(kShutDown);
reader.Read(&cd);
}
reader.Close();
writer.Close();
}
void ReadExpect(VPIReturnCode rcode) {
VPIReturnCode code;
CHECK(reader.Read(&code));
CHECK_EQ(code, rcode) << "Error in simulation";
}
};
// Inline implementations
inline VPISessionNode* VPISession::get() const {
return static_cast<VPISessionNode*>(node_.get());
}
inline VPIHandleNode* VPIHandle::get() const {
return static_cast<VPIHandleNode*>(node_.get());
}
VPIHandle VPIHandleCreate(
const std::shared_ptr<VPISessionEntry>& sess,
VPIRawHandle handle) {
auto n = make_node<VPIHandleNode>();
n->sess = sess;
n->handle = handle;
return VPIHandle(n);
}
VPIHandle GetHandleByName(
const std::shared_ptr<VPISessionEntry>& sess,
const std::string& name,
VPIRawHandle handle,
bool allow_undefined) {
VPISessionEntry* n = sess.get();
CHECK(n->in_control);
n->writer.Write(kGetHandleByName);
n->writer.Write(name);
n->writer.Write(handle);
n->ReadExpect(kSuccess);
CHECK(n->reader.Read(&handle));
if (handle != nullptr) {
return VPIHandleCreate(sess, handle);
} else {
CHECK(allow_undefined)
<< "Cannot find handle with name=" << name;
return VPIHandle();
}
}
std::string VPIGetStrProp(VPIHandleNode* h, int code) {
VPISessionEntry* n = h->sess.get();
CHECK(n->in_control);
n->writer.Write(kGetStrProp);
n->writer.Write(code);
n->writer.Write(h->handle);
n->ReadExpect(kSuccess);
std::string str;
CHECK(n->reader.Read(&str));
return str;
}
int VPIGetIntProp(VPIHandleNode* h, int code) {
VPISessionEntry* n = h->sess.get();
CHECK(n->in_control);
n->writer.Write(kGetIntProp);
n->writer.Write(code);
n->writer.Write(h->handle);
n->ReadExpect(kSuccess);
int value;
CHECK(n->reader.Read(&value));
return value;
}
VPISession VPISession::make(int h_pipe_read, int h_pipe_write) {
auto n = make_node<VPISessionNode>();
n->sess = std::make_shared<VPISessionEntry>(h_pipe_read, h_pipe_write);
n->sess->in_control = true;
VPISession sess(n);
// The custom module handles
std::vector<VPIRawHandle> mod_handles;
n->sess->reader.Read(&mod_handles);
n->sess->ReadExpect(kPosEdgeTrigger);
// start Initialize the callbacks
for (VPIRawHandle raw_h : mod_handles) {
VPIHandle h = VPIHandleCreate(n->sess, raw_h);
CHECK_EQ(VPIGetIntProp(h.get(), kVPIType), kVPIModule)
<< "Expect pass modules to $tvm_session after clk";
std::string def = VPIGetStrProp(h.get(), kVPIDefName);
std::string callback_name = "_vpi_module_" + def;
const PackedFunc* f = runtime::Registry::Get(callback_name);
CHECK(f != nullptr)
<< "Cannot find definition for tvm vpi module " << def;
PackedFunc cb = (*f)(h);
n->posedge_end_callbacks.push_back(cb);
}
return sess;
}
VPIHandle VPISession::operator[](const std::string& name) const {
return GetHandleByName(get()->sess, name, nullptr, false);
}
VPIHandle VPISession::GetByName(const std::string& name,
bool allow_undefined) const {
return GetHandleByName(get()->sess, name, nullptr, true);
}
void VPISession::yield() {
VPISessionEntry* n = get()->sess.get();
CHECK(n->in_control);
for (const PackedFunc& f : get()->posedge_end_callbacks) {
f();
}
n->writer.Write(kYield);
n->ReadExpect(kSuccess);
n->in_control = false;
n->ReadExpect(kPosEdgeTrigger);
n->in_control = true;
}
void VPISession::shutdown() {
VPISessionEntry* n = get()->sess.get();
if (n->in_control) {
n->writer.Write(kShutDown);
n->ReadExpect(kSuccess);
n->in_control = false;
}
}
int VPIHandle::size() const {
return VPIGetIntProp(get(), kVPISize);
}
void VPIHandle::put_int(int value) {
VPIHandleNode* h = get();
VPISessionEntry* n = h->sess.get();
CHECK(n->in_control);
n->writer.Write(kPutInt32);
n->writer.Write(h->handle);
n->writer.Write(value);
n->ReadExpect(kSuccess);
}
int VPIHandle::get_int() const {
VPIHandleNode* h = get();
VPISessionEntry* n = h->sess.get();
CHECK(n->in_control);
n->writer.Write(kGetInt32);
n->writer.Write(h->handle);
n->ReadExpect(kSuccess);
int value;
CHECK(n->reader.Read(&value));
return value;
}
std::string VPIHandle::name() const {
return VPIGetStrProp(get(), kVPIFullName);
}
void VPIHandle::put_vec(const std::vector<VPIVecVal>& vec) const {
VPIHandleNode* h = get();
VPISessionEntry* n = h->sess.get();
CHECK(n->in_control);
n->writer.Write(kPutVec);
n->writer.Write(h->handle);
n->writer.Write(vec);
n->ReadExpect(kSuccess);
}
void VPIHandle::get_vec(std::vector<VPIVecVal>* vec) const {
VPIHandleNode* h = get();
VPISessionEntry* n = h->sess.get();
CHECK(n->in_control);
n->writer.Write(kGetVec);
n->writer.Write(h->handle);
n->ReadExpect(kSuccess);
CHECK(n->reader.Read(vec));
}
VPIHandle VPIHandle::operator[](const std::string& name) const {
VPIHandleNode* h = get();
return GetHandleByName(h->sess, name, h->handle, false);
}
// API registration
TVM_REGISTER_API("_vpi_SessMake")
.set_body([](TVMArgs args, TVMRetValue *ret) {
*ret = VPISession::make(args[0], args[1]);
});
TVM_REGISTER_API("_vpi_SessGetHandleByName")
.set_body([](TVMArgs args, TVMRetValue *ret) {
*ret = args[0].operator VPISession().operator[](args[1]);
});
TVM_REGISTER_API("_vpi_SessYield")
.set_body([](TVMArgs args, TVMRetValue *ret) {
args[0].operator VPISession().yield();
});
TVM_REGISTER_API("_vpi_SessShutdown")
.set_body([](TVMArgs args, TVMRetValue *ret) {
args[0].operator VPISession().shutdown();
});
TVM_REGISTER_API("_vpi_HandlePutInt")
.set_body([](TVMArgs args, TVMRetValue *ret) {
args[0].operator VPIHandle().put_int(args[1]);
});
TVM_REGISTER_API("_vpi_HandleGetInt")
.set_body([](TVMArgs args, TVMRetValue *ret) {
*ret = args[0].operator VPIHandle().get_int();
});
TVM_REGISTER_API("_vpi_HandleGetName")
.set_body([](TVMArgs args, TVMRetValue *ret) {
*ret = args[0].operator VPIHandle().name();
});
TVM_REGISTER_API("_vpi_HandleGetSize")
.set_body([](TVMArgs args, TVMRetValue *ret) {
*ret = args[0].operator VPIHandle().size();
});
TVM_REGISTER_API("_vpi_HandleGetHandleByName")
.set_body([](TVMArgs args, TVMRetValue *ret) {
*ret = args[0].operator VPIHandle().operator[](args[1]);
});
} // namespace codegen
} // namespace tvm
/*!
* Copyright (c) 2017 by Contributors
* \file vpi_session.h
* \brief IPC session call to verilog simulator via VPI.
*/
#ifndef TVM_CODEGEN_VERILOG_VPI_SESSION_H_
#define TVM_CODEGEN_VERILOG_VPI_SESSION_H_
#include <tvm/base.h>
#include <vector>
#include <string>
#include <memory>
#include "../../common/pipe.h"
#include "../../../verilog/tvm_vpi.h"
namespace tvm {
namespace codegen {
// node containers
class VPISessionNode;
class VPIHandleNode;
class VPIHandle;
class VPISessionEntry;
using runtime::PackedFunc;
/*! \brief Environment */
class VPISession : public NodeRef {
public:
VPISession() {}
explicit VPISession(NodePtr<Node> n) : NodeRef(n) {}
/*!
* \brief Get handle by name.
* \param name The name of the handle.
*/
VPIHandle operator[](const std::string& name) const;
/*!
* \brief Get handle by name.
* \param name The name of the handle.
* \param allow_undefined whether allow undefined
*/
VPIHandle GetByName(const std::string& name, bool allow_undefined) const;
/*!
* \brief Yield control back to the simulator
* Block until next cycle.
*/
void yield();
/*!
* \brief Shutdown the session.
*/
void shutdown();
/*!
* \brief Create new session by giving a read and write pipe to VPI process.
* \param h_pipe_read a read pipe from VPI process.
* \param h_pipe_write a write pipe from VPI process.
*/
static VPISession make(int h_pipe_read, int h_pipe_write);
// Internal methods.
using ContainerType = VPISessionNode;
inline VPISessionNode* get() const;
};
/*! \brief VPI Handle */
class VPIHandle : public NodeRef {
public:
VPIHandle() {}
explicit VPIHandle(NodePtr<Node> n) : NodeRef(n) {}
/*!
* \brief Get handle by name.
* \param name The name of the handle.
*/
VPIHandle operator[](const std::string& name) const;
/*! \return number of bits */
int size() const;
/*!
* \brief Set int value to the handle.
* \param value The value to set.
*/
void put_int(int value);
/*!
* \brief Get int value from handle.
* \return The result int value.
*/
int get_int() const;
/*! \return Name of the handle. */
std::string name() const;
/*!
* \brief Put byte vector into the handle.
* \param vec The vector to be put.
* \return The result int value.
*/
void put_vec(const std::vector<vpi::VPIVecVal>& vec) const;
/*!
* \brief Get byte vector from handle.
* \param vec The result data container.
*/
void get_vec(std::vector<vpi::VPIVecVal>* vec) const;
// Internal methods
using ContainerType = VPIHandleNode;
inline VPIHandleNode* get() const;
};
/*! \brief Container for session. */
class VPISessionNode : public Node {
public:
// internal session.
std::shared_ptr<VPISessionEntry> sess;
// callbacks at pos edge end.
std::vector<PackedFunc> posedge_end_callbacks;
// visit all attributes
void VisitAttrs(AttrVisitor* v) final {
}
static constexpr const char* _type_key = "VPISession";
TVM_DECLARE_NODE_TYPE_INFO(VPISessionNode, Node);
};
/*! \brief Container for handle */
class VPIHandleNode : public Node {
public:
// internal session.
std::shared_ptr<VPISessionEntry> sess;
// Internal handle
vpi::VPIRawHandle handle;
void VisitAttrs(AttrVisitor* v) final {
}
static constexpr const char* _type_key = "VPIHandle";
TVM_DECLARE_NODE_TYPE_INFO(VPIHandleNode, Node);
};
} // namespace codegen
} // namespace tvm
#endif // TVM_CODEGEN_VERILOG_VPI_SESSION_H_
import tvm
from tvm.contrib import verilog
import numpy as np
def lower(s, args, name):
binds = {}
arg_list = []
for x in args:
assert isinstance(x, tvm.tensor.Tensor)
buf = tvm.decl_buffer(x.shape, dtype=x.dtype, name=x.op.name)
binds[x] = buf
arg_list.append(buf)
s = s.normalize()
bounds = tvm.schedule.InferBound(s)
stmt = tvm.schedule.ScheduleOps(s, bounds)
stmt = tvm.ir_pass.StorageFlatten(stmt, binds, 64)
stmt = tvm.ir_pass.CanonicalSimplify(stmt)
stmt = tvm.ir_pass.Simplify(stmt)
stmt = tvm.ir_pass.SplitPipeline(stmt, True)
fapi = tvm.ir_pass.MakeAPI(stmt, name, arg_list, 0, True)
return fapi
@tvm.register_func
def tvm_callback_verilog_postproc(code):
"""Hook to inspect the verilog code before actually run it"""
print(code)
return code
def test_add_pipeline():
nn = 128
n = tvm.convert(nn)
A = tvm.placeholder((n,), name='A', dtype='int32')
B = tvm.placeholder((n,), name='B', dtype='int32')
C = tvm.compute(A.shape, lambda i: A[i] + B[i], name='C')
s = tvm.create_schedule(C.op)
px, x = s[C].split(C.op.axis[0], nparts=1)
s[C].bind(px, tvm.thread_axis("pipeline"))
fapi = lower(s, [A, B, C], "myadd")
fsplits = [x for x in tvm.ir_pass.SplitHostDevice(fapi)]
fsplits[0] = tvm.ir_pass.LowerTVMBuiltin(fsplits[0])
print("------")
def check_target(device, host="stackvm"):
if not tvm.module.enabled(host):
return
if not tvm.module.enabled(device):
return
ctx = tvm.vpi(0)
mhost = tvm.codegen.build_module(fsplits[0], host)
mdev = tvm.codegen.build_module(fsplits[1:], device)
mhost.import_module(mdev)
code = mdev.get_source()
f = mhost.entry_func
# launch the kernel.
n = nn
a = tvm.nd.array((np.random.uniform(size=n) * 128).astype(A.dtype), ctx)
b = tvm.nd.array((np.random.uniform(size=n) * 128).astype(A.dtype), ctx)
c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
f(a, b, c)
print("Check correctness...")
tvm.testing.assert_allclose(
c.asnumpy(), a.asnumpy() + b.asnumpy())
check_target("verilog")
if __name__ == "__main__":
test_add_pipeline()
import tvm
import numpy as np
from tvm.contrib import verilog
def test_buffer_doublebuff():
# Test the tvm_buffer.v module as a double buffer
# Window size is 16, buffer size is 32
window_width = 16
set_size = 8
# Find file will search root/verilog and root/tests/verilog
sess = verilog.session([
verilog.find_file("test_buffer_doublebuff.v"),
verilog.find_file("tvm_buffer.v")
])
# Get the handles by their names
rst = sess.main.rst
write_advance = sess.main.write_advance
write_addr = sess.main.write_addr
write_valid = sess.main.write_valid
write_ready = sess.main.write_ready
write_data = sess.main.write_data
read_data = sess.main.read_data
read_data_valid = sess.main.read_data_valid
# Simulation input data
test_data = np.arange(window_width*set_size).astype('int8')
# Initial state
rst.put_int(1)
write_advance.put_int(0)
write_addr.put_int(0)
write_valid.put_int(0)
write_data.put_int(0)
# De-assert reset
sess.yield_until_next_cycle()
rst.put_int(0)
# Leave the following signals set to true
sess.yield_until_next_cycle()
write_valid.put_int(1)
# Main simulation loop
write_idx = 0
read_idx = 0
while read_idx < len(test_data):
# write logic
if (write_idx < len(test_data)):
write_advance.put_int(0)
if (write_ready.get_int()):
write_data.put_int(int(test_data[write_idx]))
write_addr.put_int(write_idx % window_width)
if (write_idx%window_width==window_width-1):
write_advance.put_int(1)
write_idx += 1
else:
write_advance.put_int(0)
write_valid.put_int(0)
# correctness checks
if (read_data_valid.get_int()):
assert(read_data.get_int()==test_data[read_idx])
# print "{} {}".format(read_data.get_int(), test_data[read_idx])
read_idx += 1
# step
sess.yield_until_next_cycle()
if __name__ == "__main__":
test_buffer_doublebuff()
module main();
// Parameters
parameter PER=10;
// Double buffer parameters
parameter DATA_WIDTH = 8;
parameter DEPTH = 32;
parameter CNTR_WIDTH = 6; // floor(log(32)) + 1
parameter RD_WINDOW = 16;
parameter RD_ADVANCE = 16;
parameter RD_ADDR_WIDTH = 5; // floor(log(16)) + 1
parameter WR_WINDOW = 16;
parameter WR_ADVANCE = 16;
parameter WR_ADDR_WIDTH = 5; // floor(log(16)) + 1
// Clock & reset
reg clk;
reg rst;
// Read port inputs
reg read_advance;
reg [RD_ADDR_WIDTH-1:0] read_addr;
reg read_ready;
// Write port outputs
reg write_advance;
reg [DATA_WIDTH-1:0] write_data;
reg [WR_ADDR_WIDTH-1:0] write_addr;
reg write_valid;
// Outputs
wire [DATA_WIDTH-1:0] read_data;
wire read_valid;
wire write_ready;
wire [CNTR_WIDTH-1:0] status_counter;
// Module instantiation
tvm_buffer #(
.DATA_WIDTH(DATA_WIDTH),
.DEPTH(DEPTH),
.CNTR_WIDTH(CNTR_WIDTH),
.RD_WINDOW(RD_WINDOW),
.RD_ADVANCE(RD_ADVANCE),
.RD_ADDR_WIDTH(RD_ADDR_WIDTH),
.WR_WINDOW(WR_WINDOW),
.WR_ADVANCE(WR_ADVANCE),
.WR_ADDR_WIDTH(WR_ADDR_WIDTH)
) uut (
.clk(clk),
.rst(rst),
.read_advance(read_advance),
.read_data(read_data),
.read_addr(read_addr),
.read_ready(read_ready),
.read_valid(read_valid),
.write_advance(write_advance),
.write_data(write_data),
.write_addr(write_addr),
.write_ready(write_ready),
.write_valid(write_valid),
.status_counter(status_counter)
);
// clock generation
always begin
#(PER/2) clk =~ clk;
end
// read logic
always @(posedge clk) begin
if (rst) begin
read_advance <= 0;
read_addr <= 0;
read_ready <= 0;
end else begin
if (read_valid) begin
read_ready <= 1;
end else begin
read_ready <= 0;
end
if (read_addr%RD_WINDOW==RD_WINDOW-2) begin
read_advance <= 1;
end else begin
read_advance <= 0;
end
if (read_ready) begin
read_addr <= (read_addr+1) % WR_WINDOW;
end else begin
read_addr <= read_addr % WR_WINDOW;
end
end
end
// read_data_valid logic
reg read_data_valid;
always @(posedge clk) begin
if (rst)
read_data_valid <= 0;
else
read_data_valid <= read_ready;
end
initial begin
// This will allow tvm session to be called every cycle.
$tvm_session(clk);
end
endmodule
import tvm
import numpy as np
from tvm.contrib import verilog
def test_buffer_fifo():
# Test the tvm_buffer.v module as a fifo
# Find file will search root/verilog and root/tests/verilog
sess = verilog.session([
verilog.find_file("test_buffer_fifo.v"),
verilog.find_file("tvm_buffer.v")
])
# Get the handles by their names
rst = sess.main.rst
enq = sess.main.enq
write_data = sess.main.write_data
read_data = sess.main.read_data
read_data_valid = sess.main.read_data_valid
# Simulation input data
test_data = np.arange(16).astype('int8')
# Initial state
rst.put_int(1)
enq.put_int(0)
write_data.put_int(0)
# De-assert reset
sess.yield_until_next_cycle()
rst.put_int(0)
# Main simulation loop
read_idx = 0
write_idx = 0
while read_idx < len(test_data):
# write logic
if (write_idx < len(test_data)):
enq.put_int(1)
write_data.put_int(write_idx)
write_idx += 1
else:
enq.put_int(0)
# read logic
if (read_data_valid.get_int()):
assert(read_data.get_int()==test_data[read_idx])
read_idx += 1
# step
sess.yield_until_next_cycle()
if __name__ == "__main__":
test_buffer_fifo()
module main();
// Parameters
parameter PER=10;
// FIFO parameters
parameter DATA_WIDTH = 8;
parameter DEPTH = 32;
parameter CNTR_WIDTH = 6; // floor(log(32)) + 1
parameter RD_WINDOW = 1;
parameter RD_ADVANCE = 1;
parameter RD_ADDR_WIDTH = 1;
parameter WR_WINDOW = 1;
parameter WR_ADVANCE = 1;
parameter WR_ADDR_WIDTH = 1;
// Clock & reset
reg clk;
reg rst;
// Module inputs
reg [DATA_WIDTH-1:0] write_data;
// FIFO interface abstraction:
// Connect deq to read_advance and read_ready
// Connect enq to write_advance and write_valid
// Set read_addr and write_addr to 0
reg deq;
reg enq;
// Module outputs
wire [DATA_WIDTH-1:0] read_data;
wire read_valid;
wire write_ready;
wire [CNTR_WIDTH-1:0] status_counter;
// Module instantiation
tvm_buffer #(
.DATA_WIDTH(DATA_WIDTH),
.DEPTH(DEPTH),
.CNTR_WIDTH(CNTR_WIDTH),
.RD_WINDOW(RD_WINDOW),
.RD_ADVANCE(RD_ADVANCE),
.RD_ADDR_WIDTH(RD_ADDR_WIDTH),
.WR_WINDOW(WR_WINDOW),
.WR_ADVANCE(WR_ADVANCE),
.WR_ADDR_WIDTH(WR_ADDR_WIDTH)
) uut (
.clk(clk),
.rst(rst),
.read_advance(deq),
.read_addr({RD_ADDR_WIDTH{1'b0}}),
.read_ready(deq),
.read_valid(read_valid),
.read_data(read_data),
.write_advance(enq),
.write_addr({WR_ADDR_WIDTH{1'b0}}),
.write_ready(write_ready),
.write_valid(enq),
.write_data(write_data),
.status_counter(status_counter)
);
// clock generation
always begin
#(PER/2) clk =~ clk;
end
// fifo read logic
always @(posedge clk) begin
if (rst)
deq <= 0;
else
deq <= read_valid;
end
// read_data_valid logic
reg read_data_valid;
always @(posedge clk) begin
if (rst)
read_data_valid <= 0;
else
read_data_valid <= deq;
end
initial begin
// This will allow tvm session to be called every cycle.
$tvm_session(clk);
end
endmodule
import tvm
import numpy as np
from tvm.contrib import verilog
def test_buffer_linebuff():
# Test the tvm_buffer.v module as a line buffer
# Window is 8x8, kernel is 3x3
window_width = 8
kernel_width = 3
# Find file will search root/verilog and root/tests/verilog
sess = verilog.session([
verilog.find_file("test_buffer_linebuff.v"),
verilog.find_file("tvm_buffer.v")
])
# Get the handles by their names
rst = sess.main.rst
write_advance = sess.main.write_advance
write_valid = sess.main.write_valid
write_ready = sess.main.write_ready
write_data = sess.main.write_data
read_data = sess.main.read_data
read_data_valid = sess.main.read_data_valid
# Simulation input data
test_data = np.arange(window_width*window_width).astype('int8')
# Initial state
rst.put_int(1)
write_advance.put_int(0)
write_valid.put_int(0)
write_data.put_int(0)
# De-assert reset
sess.yield_until_next_cycle()
rst.put_int(0)
# Leave the following signals set to true
sess.yield_until_next_cycle()
write_advance.put_int(1)
write_valid.put_int(1)
# Main simulation loop
write_idx = 0
read_idx = 0
while read_idx < (window_width-kernel_width+1)*(window_width-kernel_width+1)*kernel_width*kernel_width:
# write logic
if (write_idx < len(test_data)):
if (write_ready.get_int()):
write_data.put_int(int(test_data[write_idx]))
write_idx += 1
else:
write_advance.put_int(0)
write_valid.put_int(0)
# correctness checks
if (read_data_valid.get_int()):
# Derive convolution window indices
baseIdx = read_idx // (kernel_width*kernel_width)
offsetIdx = read_idx % (kernel_width*kernel_width)
yOffset = offsetIdx // kernel_width
xOffset = offsetIdx%kernel_width
pixIndex = baseIdx + yOffset * window_width + xOffset
assert(read_data.get_int()==test_data[pixIndex])
# print "{} {}".format(read_data.get_int(), test_data[pixIndex])
read_idx += 1
# step
sess.yield_until_next_cycle()
if __name__ == "__main__":
test_buffer_linebuff()
module main();
// Parameters
parameter PER=10;
// In this example we perform a 3x3 convolution of an 8x8 input image
// Therefore the window size here is (3-1)*8+3 = 19
parameter IMAGE_WIDTH = 8;
parameter KERNEL_WIDTH = 3;
// Line buffer parameters
parameter DATA_WIDTH = 8;
parameter DEPTH = 20; // (3-1)*8+3+1
parameter CNTR_WIDTH = 5; // floor(log(20)) + 1
parameter RD_WINDOW = 19; // (3-1)*8+3
parameter RD_ADVANCE = 1;
parameter RD_ADDR_WIDTH = 5; // floor(log(19)) + 1
parameter WR_WINDOW = 1;
parameter WR_ADVANCE = 1;
parameter WR_ADDR_WIDTH = 1;
// Clock & reset
reg clk;
reg rst;
// Read port inputs
reg read_advance;
reg [RD_ADDR_WIDTH-1:0] read_addr;
reg read_ready;
// Write port outputs
reg write_advance;
reg [DATA_WIDTH-1:0] write_data;
reg write_valid;
// Outputs
wire [DATA_WIDTH-1:0] read_data;
wire read_valid;
wire write_ready;
wire [CNTR_WIDTH-1:0] status_counter;
// Module instantiation
tvm_buffer #(
.DATA_WIDTH(DATA_WIDTH),
.DEPTH(DEPTH),
.CNTR_WIDTH(CNTR_WIDTH),
.RD_WINDOW(RD_WINDOW),
.RD_ADVANCE(RD_ADVANCE),
.RD_ADDR_WIDTH(RD_ADDR_WIDTH),
.WR_WINDOW(WR_WINDOW),
.WR_ADVANCE(WR_ADVANCE),
.WR_ADDR_WIDTH(WR_ADDR_WIDTH)
) uut (
.clk(clk),
.rst(rst),
.read_advance(read_advance),
.read_data(read_data),
.read_addr(read_addr),
.read_ready(read_ready),
.read_valid(read_valid),
.write_advance(write_advance),
.write_data(write_data),
.write_addr({WR_ADDR_WIDTH{1'b0}}),
.write_ready(write_ready),
.write_valid(write_valid),
.status_counter(status_counter)
);
// clock generation
always begin
#(PER/2) clk =~ clk;
end
// read logic
localparam KERNEL_SIZE = KERNEL_WIDTH*KERNEL_WIDTH;
reg [3:0] read_counter;
always @(posedge clk) begin
if (rst) begin
read_counter <= KERNEL_SIZE-1;
read_advance <= 0;
read_addr <= -1;
read_ready <= 0;
end else begin
if (read_valid) begin
read_counter <= (read_counter+1)%KERNEL_SIZE;
read_ready <= 1;
// Only advance at the last inner loop iteration
if (read_counter==KERNEL_SIZE-2) begin
read_advance <= 1;
end else begin
read_advance <= 0;
end
// Read address should describe a loop
if (read_counter==KERNEL_SIZE-1) begin
read_addr <= 0;
end else if (read_counter%KERNEL_WIDTH==KERNEL_WIDTH-1) begin
read_addr <= read_addr+IMAGE_WIDTH-KERNEL_WIDTH+1;
end else begin
read_addr <= read_addr+1;
end
end else begin
read_counter <= read_counter;
read_advance <= 0;
read_addr <= read_addr;
read_ready <= 0;
end
end
end
// read_data_valid logic
reg read_data_valid;
always @(posedge clk) begin
if (rst)
read_data_valid <= 0;
else
read_data_valid <= read_ready;
end
initial begin
// This will allow tvm session to be called every cycle.
$tvm_session(clk);
end
endmodule
import tvm
from tvm.contrib import verilog
from testing_util import FIFODelayedWriter, FIFODelayedReader
def run_with_lag(n, read_lag, write_lag):
data = list(range(n))
# head ptr of a
sess = verilog.session([
verilog.find_file("test_cache_reg.v")
])
rst = sess.main.rst
in_data = sess.main.in_data
in_valid = sess.main.in_valid
in_ready = sess.main.in_ready
out_data = sess.main.out_data
out_valid = sess.main.out_valid
out_ready = sess.main.out_ready
# hook up reader
reader = FIFODelayedReader(out_data, out_valid, out_ready, read_lag)
writer = FIFODelayedWriter(in_data, in_valid, in_ready, data, write_lag)
rst.put_int(1)
sess.yield_until_next_cycle()
rst.put_int(0)
sess.yield_until_next_cycle()
sess.yield_callbacks.append(reader)
sess.yield_callbacks.append(writer)
timeout = sum(read_lag) + sum(write_lag) + n + 10
for t in range(timeout):
sess.yield_until_next_cycle()
if len(reader.data) == n:
break
assert tuple(reader.data) == tuple(range(n))
assert len(writer.data) == 0
sess.shutdown()
def test_fifo():
n = 20
# slow reader
run_with_lag(n, read_lag=[3,4,8], write_lag=[])
# slow writer
run_with_lag(n, read_lag=[0], write_lag=[0, 2, 10])
# mix
run_with_lag(n, read_lag=[3, 4, 8], write_lag=[0, 2, 10])
if __name__ == "__main__":
test_fifo()
`include "tvm_marcos.v"
module main();
`TVM_DEFINE_TEST_SIGNAL(clk, rst)
reg[31:0] in_data;
wire[31:0] out_data;
wire in_ready;
reg in_valid;
reg out_ready;
wire out_valid;
`CACHE_REG(32, in_data, in_valid, in_ready,
out_data, out_valid, out_ready)
initial begin
// This will allow tvm session to be called every cycle.
$tvm_session(clk);
end
endmodule
import tvm
from tvm.contrib import verilog
def test_counter():
# Start a new session by run simulation on test_counter.v
# Find file will search root/verilog and root/tests/verilog
sess = verilog.session([
verilog.find_file("test_counter.v"),
verilog.find_file("example_counter.v")
])
# Get the handles by their names
rst = sess.main.rst
counter = sess.main.counter
cnt = sess.main["counter_unit1"]
assert(counter.name == "main.counter")
assert(counter.size == 4)
rst.put_int(1)
# This will advance the cycle to next pos-edge of clk.
sess.yield_until_next_cycle()
rst.put_int(0)
sess.yield_until_next_cycle()
for i in range(10):
# get value of counter.
assert(counter.get_int() == i)
sess.yield_until_next_cycle()
def test_scratch():
sess = verilog.session([
verilog.find_file("test_counter.v"),
verilog.find_file("example_counter.v")
])
# Get the handles by their names
rst = sess.main.rst
counter = sess.main.counter
rst.put_int(1)
# This will advance the cycle to next pos-edge of clk.
sess.yield_until_next_cycle()
rst.put_int(0)
temp = 0
for i in range(10):
if rst.get_int():
rst.put_int(0)
temp = counter.get_int()
elif counter.get_int() == 3:
rst.put_int(1)
print("counter=%d, temp=%d" % (counter.get_int(), temp))
sess.yield_until_next_cycle()
if __name__ == "__main__":
test_scratch()
test_counter()
`include "tvm_marcos.v"
module main();
`TVM_DEFINE_TEST_SIGNAL(clk, rst)
wire[3:0] counter;
counter counter_unit1(.clk(clk), .rst(rst), .out(counter));
initial begin
// This will allow tvm session to be called every cycle.
$tvm_session(clk);
end
endmodule
import tvm
from tvm.contrib import verilog
def test_loop():
sess = verilog.session([
verilog.find_file("test_loop.v")
])
# Get the handles by their names
rst = sess.main.rst
iter0 = sess.main.iter0
iter1 = sess.main.iter1
ready = sess.main.ready
rst.put_int(1)
ready.put_int(1)
# This will advance the cycle to next pos-edge of clk.
sess.yield_until_next_cycle()
rst.put_int(0)
sess.yield_until_next_cycle()
for k in range(0, 1):
for i in range(0, 3):
for j in range(0, 4):
assert(iter1.get_int() == i)
assert(iter0.get_int() == j)
sess.yield_until_next_cycle()
if __name__ == "__main__":
test_loop()
`include "tvm_marcos.v"
module main();
`TVM_DEFINE_TEST_SIGNAL(clk, rst)
reg ready;
wire lp_ready;
`NONSTOP_LOOP(iter0, 4, 0, lp_ready, iter0_finish, 0, 4)
`NONSTOP_LOOP(iter1, 4, 0, iter0_finish, iter1_finish, 0, 3)
`WRAP_LOOP_ONCE(0, valid, ready, iter1_finish, loop_ready)
assign lp_ready = loop_ready;
initial begin
// This will allow tvm session to be called every cycle.
$tvm_session(clk);
end
endmodule
import tvm
import numpy as np
from tvm.contrib import verilog
class FIFOReader(object):
"""Auxiliary class to read from FIFO """
def __init__(self, read_data, read_valid):
self.read_data = read_data
self.read_valid = read_valid
self.data = []
def __call__(self):
if self.read_valid.get_int():
self.data.append(self.read_data.get_int())
class FIFOWriter(object):
"""Auxiliary class to write to FIFO """
def __init__(self, write_data, write_enable, write_pend, data):
self.write_data = write_data
self.write_enable = write_enable
self.write_pend = write_pend
self.data = data
def __call__(self):
if self.data and self.write_pend.get_int():
self.write_enable.put_int(1)
self.write_data.put_int(int(self.data[0]))
del self.data[0]
else:
self.write_enable.put_int(0)
def test_ram_read():
n = 10
# context for VPI RAM
ctx = tvm.vpi(0)
a_np = np.arange(n).astype('int8')
a = tvm.nd.array(a_np, ctx)
# head ptr of a
a_ptr = int(a.handle[0].data)
sess = verilog.session([
verilog.find_file("test_vpi_mem_interface.v"),
verilog.find_file("tvm_vpi_mem_interface.v")
])
rst = sess.main.rst
read_data = sess.main.read_data
read_valid = sess.main.read_data_valid
read_en = sess.main.read_en
host_read_req = sess.main.read_req
host_read_addr = sess.main.read_addr
host_read_size = sess.main.read_size
rst.put_int(1)
sess.yield_until_next_cycle()
rst.put_int(0)
# hook up reader
reader = FIFOReader(read_data, read_valid)
sess.yield_callbacks.append(reader)
# request read
host_read_req.put_int(1)
host_read_addr.put_int(a_ptr)
host_read_size.put_int(a.shape[0])
sess.yield_until_next_cycle()
# second read request
host_read_addr.put_int(a_ptr + 2)
host_read_size.put_int(a.shape[0] - 2)
sess.yield_until_next_cycle()
host_read_req.put_int(0)
read_en.put_int(1)
# yield until read is done
for i in range(a.shape[0] * 3):
sess.yield_until_next_cycle()
sess.shutdown()
# check if result matches
r = np.concatenate((a_np, a_np[2:]))
np.testing.assert_equal(np.array(reader.data), r)
def test_ram_write():
n = 10
# read from offset
offset = 2
# context for VPI RAM
ctx = tvm.vpi(0)
a_np = np.zeros(n).astype('int8')
a = tvm.nd.array(a_np, ctx)
w_data = list(range(2, n))
r_data = np.array(w_data, dtype='int8')
# head ptr of a
a_ptr = int(a.handle[0].data)
sess = verilog.session([
verilog.find_file("test_vpi_mem_interface.v"),
verilog.find_file("tvm_vpi_mem_interface.v")
])
rst = sess.main.rst
write_data = sess.main.write_data
write_en = sess.main.write_en
write_ready = sess.main.write_data_ready
host_write_req = sess.main.write_req
host_write_addr = sess.main.write_addr
host_write_size = sess.main.write_size
rst.put_int(1)
sess.yield_until_next_cycle()
rst.put_int(0)
# hook up writeer
writer = FIFOWriter(write_data, write_en, write_ready, w_data)
sess.yield_callbacks.append(writer)
# request write
host_write_req.put_int(1)
host_write_addr.put_int(a_ptr + offset)
host_write_size.put_int(a.shape[0] - offset)
sess.yield_until_next_cycle()
host_write_req.put_int(0)
# yield until write is done
for i in range(a.shape[0]+2):
sess.yield_until_next_cycle()
sess.shutdown()
# check if result matches
np.testing.assert_equal(a.asnumpy()[2:], r_data)
if __name__ == "__main__":
test_ram_read()
test_ram_write()
module main();
parameter PER = 10;
parameter WIDTH = 8;
reg clk;
reg rst;
// read channels
reg read_en;
wire [WIDTH-1:0] read_data;
wire read_data_valid;
// write channels
reg write_en;
reg [WIDTH-1:0] write_data;
wire write_data_ready;
// controls
reg read_req;
reg [31:0] read_addr;
reg [31:0] read_size;
reg write_req;
reg [31:0] write_addr;
reg [31:0] write_size;
always begin
#(PER/2) clk =~ clk;
end
tvm_vpi_mem_interface #
(
.READ_WIDTH(WIDTH),
.WRITE_WIDTH(WIDTH),
.ADDR_WIDTH(32),
.SIZE_WIDTH(32)
)
mem
(
.clk(clk),
.rst(rst),
.read_en(read_en),
.read_data_out(read_data),
.read_data_valid(read_data_valid),
.write_en(write_en),
.write_data_in(write_data),
.write_data_ready(write_data_ready),
.host_read_req(read_req),
.host_read_addr(read_addr),
.host_read_size(read_size),
.host_write_req(write_req),
.host_write_addr(write_addr),
.host_write_size(write_size)
);
initial begin
// pass myram to session to hook it up with simulation
$tvm_session(clk, mem);
end
endmodule
import tvm
import numpy as np
from tvm.contrib import verilog
def test_mmap():
n = 10
# context for VPI RAM
ctx = tvm.vpi(0)
a_np = np.arange(n).astype('int8')
a = tvm.nd.array(a_np, ctx)
# head ptr of a
a_ptr = int(a.handle[0].data)
sess = verilog.session([
verilog.find_file("test_vpi_mmap.v"),
verilog.find_file("tvm_vpi_mmap.v")
])
rst = sess.main.rst
read_addr = sess.main.read_addr
read_data = sess.main.read_data
write_addr = sess.main.write_addr
write_data = sess.main.write_data
write_en = sess.main.write_en
mmap_addr = sess.main.mmap_addr
# setup memory map.
rst.put_int(1)
sess.yield_until_next_cycle()
rst.put_int(0)
write_en.put_int(0)
mmap_addr.put_int(a_ptr)
sess.yield_until_next_cycle()
# read test
for i in range(n):
read_addr.put_int(i)
sess.yield_until_next_cycle()
# read addr get set this cycle
sess.yield_until_next_cycle()
# get the data out
assert(read_data.get_int() == i)
# write test
for i in reversed(range(n)):
write_addr.put_int(i)
write_en.put_int(1)
write_data.put_int(i + 1)
sess.yield_until_next_cycle()
write_en.put_int(0)
sess.yield_until_next_cycle()
np.testing.assert_equal(a.asnumpy(), a_np + 1)
if __name__ == "__main__":
test_mmap()
module main();
parameter PER = 10;
parameter DATA_WIDTH = 8;
parameter ADDR_WIDTH = 8;
reg clk;
reg rst;
// read channels
reg [ADDR_WIDTH-1:0] read_addr;
wire [DATA_WIDTH-1:0] read_data;
// write channels
reg [ADDR_WIDTH-1:0] write_addr;
reg [DATA_WIDTH-1:0] write_data;
reg write_en;
// mmap base
reg [31:0] mmap_addr;
always begin
#(PER/2) clk =~ clk;
end
tvm_vpi_read_mmap #
(
.DATA_WIDTH(DATA_WIDTH),
.ADDR_WIDTH(ADDR_WIDTH)
)
rmmap
(
.clk(clk),
.rst(rst),
.addr(read_addr),
.data_out(read_data),
.mmap_addr(mmap_addr)
);
tvm_vpi_write_mmap #
(
.DATA_WIDTH(DATA_WIDTH),
.ADDR_WIDTH(ADDR_WIDTH)
)
wmmap
(
.clk(clk),
.rst(rst),
.addr(write_addr),
.data_in(write_data),
.en(write_en),
.mmap_addr(mmap_addr)
);
initial begin
$tvm_session(clk, rmmap, wmmap);
end
endmodule
"""Common utilities for test"""
class FIFODelayedReader(object):
"""Reader that have specified ready lag."""
def __init__(self, read_data, read_valid, read_ready, lag):
self.read_data = read_data
self.read_valid = read_valid
self.read_ready = read_ready
self.read_ready.put_int(1)
self.lag = list(reversed(lag))
self.data = []
self.wait_counter = 0
self.wait_state = False
def __call__(self):
"""Logic as if always at pos-edge"""
if not self.wait_state:
if (self.read_ready.get_int() and
self.read_valid.get_int()):
self.data.append(self.read_data.get_int())
self.wait_counter = self.lag.pop() if self.lag else 0
self.wait_state = True
if self.wait_state:
if self.wait_counter == 0:
self.read_ready.put_int(1)
self.wait_state = False
else:
self.wait_counter -= 1
self.read_ready.put_int(0)
class FIFODelayedWriter(object):
"""Auxiliary class to write to FIFO """
def __init__(self, write_data, write_valid, write_ready, data, lag):
self.write_data = write_data
self.write_valid = write_valid
self.write_ready = write_ready
self.write_valid.put_int(0)
self.lag = list(reversed(lag))
self.data = list(reversed(data))
self.wait_counter = 0
self.wait_state = True
def __call__(self):
"""Logic as if always at pos-edge"""
if not self.wait_state:
if self.write_ready.get_int():
self.wait_counter = self.lag.pop() if self.lag else 0
self.wait_state = True
if self.wait_state:
if self.wait_counter == 0:
if self.data:
self.write_valid.put_int(1)
self.write_data.put_int(self.data.pop())
self.wait_state = False
else:
self.write_valid.put_int(0)
else:
self.write_valid.put_int(0)
self.wait_counter -= 1
# Verilog Code Guidline
The verilog backend is still at early alpha and not yet ready to use.
- Use ```my_port_name``` for variable naming.
- Always use suffix to indicate certain usage.
## Common Suffix
- ```clk```: clock
- ```rst```: reset
- ```in```: input port
- ```out```: output port
- ```en```: enable signal
- ```addr```: address port
- ```valid```: valid signal in FIFO handshake.
- ```ready```: ready signal in FIFO handshake.
// a counter that counts up
// Use as example of testcaase
module counter(clk, rst, out);
input clk;
input rst;
output [3:0] out;
reg [3:0] counter;
assign out = counter;
always @(posedge clk) begin
if (rst) begin
counter <= 0;
end else begin
counter <= counter +1;
end
end
endmodule
// Buffer used to add intermediate data buffering in channels
//
// Data within the read/write window is directly accessible via rd_addr/wr_addr.
// The read_advance/write_advance signals update the read/write data pointers by adding RD_WINDOW/WR_WINDOW.
// The status_counter indicate how many items are currently in the buffer (only registered after an advance signal is asserted).
// The ready/valid signals are used to implement a handshake protocol.
//
// Usage: create and pass instance to additional arguments of $tvm_session.
module tvm_buffer #(
parameter DATA_WIDTH = 256,
parameter DEPTH = 1024,
parameter CNTR_WIDTH = 10, // log base 2 of BUFF_DEPTH
parameter RD_WINDOW = 8, // set to 1 for FIFO behavior, or DEPTH for SRAM behavior
parameter RD_ADVANCE = 2, // window advance (set to 1 for FIFO behavior)
parameter RD_ADDR_WIDTH = 3, // log base 2 of RD_WINDOW
parameter WR_WINDOW = 8, // set to 1 for FIFO behavior, or DEPTH for SRAM behavior
parameter WR_ADVANCE = 2, // window advance (set to 1 for FIFO behavior)
parameter WR_ADDR_WIDTH = 3 // log base 2 of WR_WINDOW
) (
input clk,
input rst,
// Read ports
input read_advance, // Window advance (read pointer)
input [RD_ADDR_WIDTH-1:0] read_addr, // Read address offset
input read_ready, // Read ready (dequeue)
output read_valid, // Read valid (not empty)
output [DATA_WIDTH-1:0] read_data, // Read data port
// Write ports
input write_advance, // Window advance (write pointer)
input [WR_ADDR_WIDTH-1:0] write_addr, // Write address offset
output write_ready, // Write ready (not full)
input write_valid, // Write valid (enqueue)
input [DATA_WIDTH-1:0] write_data, // Write data port
// Other outputs
output [CNTR_WIDTH-1:0] status_counter // Number of elements currently in FIFO
);
// Outputs that need to be latched
reg read_data;
reg status_counter;
// Internal registers (read pointer, write pointer)
reg[CNTR_WIDTH-1:0] read_ptr;
reg[CNTR_WIDTH-1:0] write_ptr;
// RAM instance
reg [DATA_WIDTH-1:0] ram[DEPTH-1:0];
// Empty and full logic
assign read_valid = (status_counter>=RD_WINDOW) ? 1'b1 : 1'b0;
assign write_ready = (status_counter<(DEPTH-WR_WINDOW)) ? 1'b1 : 1'b0;
// Counter logic (only affected by enq and deq)
always @(posedge clk) begin
// Case 1: system reset
if (rst==1'b1) begin
status_counter <= 0;
// Case 2: simultaneous write advance and read advance and deq
end else if ((write_advance && write_ready) && (read_advance && read_valid)) begin
status_counter <= status_counter + (WR_ADVANCE - RD_ADVANCE);
// Case 3: write advance
end else if (write_advance && write_ready) begin
status_counter <= status_counter + WR_ADVANCE;
// Case 4: deq
end else if (read_advance && read_valid) begin
status_counter <= status_counter - RD_ADVANCE;
// Default
end else begin
status_counter <= status_counter;
end
end
// Output logic
always @(posedge clk) begin
if (rst==1'b1) begin
read_data <= 0;
end else begin
if(read_ready) begin
read_data <= ram[(read_ptr+read_addr)%DEPTH];
end else begin
read_data <= read_data;
end
end
end
// RAM writing logic
always @(posedge clk) begin
if(write_valid) begin
ram[((write_ptr+write_addr)%DEPTH)] <= write_data;
end
end
// Read and write pointer logic
always@(posedge clk) begin
if (rst==1'b1) begin
write_ptr <= 0;
read_ptr <= 0;
end else begin
// Increment write pointer by WR_ADVANCE when asserting write_advance
// When performing a write, no need to update the write pointer
if (write_advance && write_ready) begin
write_ptr <= (write_ptr + WR_ADVANCE) % DEPTH;
end else begin
write_ptr <= write_ptr;
end
// Increment read pointer by RD_ADVANCE when asserting read_advance
// When performing a read, no need to update the read pointer
if(read_advance && read_valid) begin
read_ptr <= (read_ptr + RD_ADVANCE) % DEPTH;
end else begin
read_ptr <= read_ptr;
end
end
end
endmodule // tvm_buffer
// Nonstop version of loop
// Always keeps looping when increase == true
// At end is a signal to indicate the next cycle is end
// Use that to signal parent loop to advance.
`define NONSTOP_LOOP(iter, width, init, ready, finish, min, extent)\
reg [width-1:0] iter;\
wire finish;\
always@(posedge clk) begin\
if (rst || init) begin\
iter <= (min);\
end else if(ready) begin\
if (iter != ((extent)-1)) begin\
iter <= iter + 1;\
end else begin\
iter <= (min);\
end\
end else begin\
iter <= iter;\
end\
end\
assign finish = (ready && (iter == (extent) - 1));
// Wrap a nonstop loop to normal loop that loop only once.
// Use done signal to control the non-stop body to stop.
// The init and done behaves like normal loop
`define WRAP_LOOP_ONCE(init, valid, ready, body_finish, body_ready)\
reg valid;\
wire body_ready;\
always@(posedge clk) begin\
if (rst || init) begin\
valid <= 1;\
end else if(body_finish) begin\
valid <= 0;\
end else begin\
valid <= valid;\
end\
end\
assign body_ready = (valid && ready);
// Assign dst as src delayed by specific cycles.
`define DELAY(dst, src, width, delay, not_stall)\
reg [(width)*(delay)-1:0] src``_dly_chain;\
always@(posedge clk) begin\
if(rst) begin\
src``_dly_chain <= 0;\
end else if (not_stall) begin\
src``_dly_chain[(width)-1:0] <= src;\
if((delay) != 1) begin\
src``_dly_chain[(delay)*(width)-1:(width)] <= src``_dly_chain[((delay)-1)*(width)-1:0];\
end\
end else begin\
src``_dly_chain <= src``_dly_chain;\
end\
end\
assign dst = src``_dly_chain[(delay)*(width)-1:((delay)-1)*(width)];
// TVM generate clock signal
`define TVM_DEFINE_TEST_SIGNAL(clk, rst)\
parameter PER = 10;\
reg clk;\
reg rst;\
always begin\
#(PER/2) clk =~ clk;\
end
// Control logic on buffer/RAM read valid.
// This delays the valid signal by one cycle and retain it when write_ready == 0
`define BUFFER_READ_VALID_DELAY(dst, data_valid, write_ready)\
reg dst;\
always@(posedge clk) begin\
if(rst) begin\
dst <= 0;\
end else if (write_ready) begin\
dst <= (data_valid);\
end else begin\
dst <= dst;\
end\
end\
// A cache register that add one cycle lag to the ready signal
// This allows the signal to flow more smoothly
`define CACHE_REG(width, in_data, in_valid, in_ready, out_data, out_valid, out_ready)\
reg [width-1:0] out_data``_state_;\
reg [width-1:0] out_data``_overflow_;\
reg out_valid``_state_;\
reg out_valid``_overflow_;\
always@(posedge clk) begin\
if(rst) begin\
out_valid``_overflow_ <= 0;\
out_valid``_state_ <= 0;\
end else if (out_valid``_overflow_) begin\
if (out_ready) begin\
out_valid``_state_ <= 1;\
out_data``_state_ <= out_data``_overflow_;\
out_valid``_overflow_ <= 0;\
out_data``_overflow_ <= 0;\
end else begin\
out_valid``_state_ <= 1;\
out_data``_state_ <= out_data``_state_;\
out_valid``_overflow_ <= out_valid``_overflow_;\
out_data``_overflow_ <= out_data``_overflow_;\
end\
end else begin\
if (!out_ready && out_valid``_state_) begin\
out_valid``_state_ <= 1;\
out_data``_state_ <= out_data``_state_;\
out_valid``_overflow_ <= in_valid;\
out_data``_overflow_ <= in_data;\
end else begin\
out_valid``_state_ <= in_valid;\
out_data``_state_ <= in_data;\
out_valid``_overflow_ <= out_valid``_overflow_;\
out_data``_overflow_ <= out_data``_overflow_;\
end\
end\
end\ // always@ (posedge clk)
assign in_ready = !out_valid``_overflow_;\
assign out_data = out_data``_state_;\
assign out_valid = out_valid``_state_;
/*!
* Copyright (c) 2017 by Contributors
* \file tvm_vpi.cc
* \brief Messages passed around VPI used for simulation.
*/
#include <dmlc/logging.h>
#include <vpi_user.h>
#include <cstdlib>
#include <memory>
#include <queue>
#include <string>
#include <vector>
#include "tvm_vpi.h"
#include "../src/common/pipe.h"
namespace tvm {
namespace vpi {
// standard consistency checks
static_assert(sizeof(vpiHandle) == sizeof(VPIRawHandle),
"VPI standard");
// type codes
static_assert(vpiModule == kVPIModule, "VPI standard");
// Property code
static_assert(vpiType == kVPIType, "VPI standard");
static_assert(vpiFullName == kVPIFullName, "VPI standard");
static_assert(vpiSize == kVPISize, "VPI standard");
static_assert(vpiDefName == kVPIDefName, "VPI standard");
// IPC client for VPI
class IPCClient {
public:
// constructor
IPCClient(int64_t hread, int64_t hwrite)
: reader_(hread), writer_(hwrite) {
}
void Init() {
vpiHandle argv = vpi_handle(vpiSysTfCall, 0);
vpiHandle arg_iter = vpi_iterate(vpiArgument, argv);
clock_ = vpi_scan(arg_iter);
std::vector<VPIRawHandle> handles;
while (vpiHandle h = vpi_scan(arg_iter)) {
handles.push_back(h);
}
writer_.Write(handles);
PutInt(clock_, 0);
}
int Callback() {
if (!GetInt(clock_)) {
try {
return AtNegEdge();
} catch (const std::runtime_error& e) {
reader_.Close();
writer_.Close();
vpi_printf("ERROR: encountered %s\n", e.what());
vpi_control(vpiFinish, 1);
return 0;
}
} else {
return 0;
}
}
// called at neg edge.
int AtNegEdge() {
// This is actually called at neg-edge
// The put values won't take effect until next neg-edge.
// This allow us to see the registers before snc
writer_.Write(kPosEdgeTrigger);
VPICallCode rcode;
VPIRawHandle handle;
int32_t index, value;
while (true) {
CHECK(reader_.Read(&rcode));
switch (rcode) {
case kGetHandleByName: {
std::string str;
CHECK(reader_.Read(&str));
CHECK(reader_.Read(&handle));
handle = vpi_handle_by_name(
str.c_str(), static_cast<vpiHandle>(handle));
writer_.Write(kSuccess);
writer_.Write(handle);
break;
}
case kGetHandleByIndex: {
CHECK(reader_.Read(&handle));
CHECK(reader_.Read(&index));
handle = vpi_handle_by_index(
static_cast<vpiHandle>(handle), index);
writer_.Write(kSuccess);
writer_.Write(handle);
break;
}
case kGetStrProp: {
CHECK(reader_.Read(&value));
CHECK(reader_.Read(&handle));
std::string prop = vpi_get_str(
value, static_cast<vpiHandle>(handle));
writer_.Write(kSuccess);
writer_.Write(prop);
break;
}
case kGetIntProp: {
CHECK(reader_.Read(&value));
CHECK(reader_.Read(&handle));
value = vpi_get(value, static_cast<vpiHandle>(handle));
writer_.Write(kSuccess);
writer_.Write(value);
break;
}
case kGetInt32: {
CHECK(reader_.Read(&handle));
value = GetInt(static_cast<vpiHandle>(handle));
writer_.Write(kSuccess);
writer_.Write(value);
break;
}
case kPutInt32: {
CHECK(reader_.Read(&handle));
CHECK(reader_.Read(&value));
CHECK(handle != clock_) << "Cannot write to clock";
PutInt(static_cast<vpiHandle>(handle), value);
writer_.Write(kSuccess);
break;
}
case kGetVec: {
CHECK(reader_.Read(&handle));
vpiHandle h = static_cast<vpiHandle>(handle);
int bits = vpi_get(vpiSize, h);
int nwords = (bits + 31) / 32;
s_vpi_value value_s;
value_s.format = vpiVectorVal;
vpi_get_value(h, &value_s);
vec_buf_.resize(nwords);
for (size_t i = 0; i < vec_buf_.size(); ++i) {
vec_buf_[i].aval = value_s.value.vector[i].aval;
vec_buf_[i].bval = value_s.value.vector[i].bval;
}
writer_.Write(kSuccess);
writer_.Write(vec_buf_);
break;
}
case kPutVec: {
CHECK(reader_.Read(&handle));
CHECK(reader_.Read(&vec_buf_));
CHECK(handle != clock_) << "Cannot write to clock";
vpiHandle h = static_cast<vpiHandle>(handle);
svec_buf_.resize(vec_buf_.size());
for (size_t i = 0; i < vec_buf_.size(); ++i) {
svec_buf_[i].aval = vec_buf_[i].aval;
svec_buf_[i].bval = vec_buf_[i].bval;
}
s_vpi_value value_s;
s_vpi_time time_s;
time_s.type = vpiSimTime;
time_s.high = 0;
time_s.low = 10;
value_s.format = vpiVectorVal;
value_s.value.vector = &svec_buf_[0];
vpi_put_value(h, &value_s, &time_s, vpiTransportDelay);
writer_.Write(kSuccess);
break;
}
case kYield: {
writer_.Write(kSuccess);
return 0;
}
case kShutDown : {
writer_.Write(kSuccess);
vpi_control(vpiFinish, 0);
return 0;
}
}
}
}
// Create a new FSM from ENV.
static IPCClient* Create() {
const char* d_read = getenv("TVM_DREAD_PIPE");
const char* d_write = getenv("TVM_DWRITE_PIPE");
const char* h_read = getenv("TVM_HREAD_PIPE");
const char* h_write = getenv("TVM_HWRITE_PIPE");
if (d_write == nullptr ||
d_read == nullptr ||
h_read == nullptr ||
h_write == nullptr) {
vpi_printf("ERROR: need environment var TVM_READ_PIPE, TVM_WRITE_PIPE\n");
vpi_control(vpiFinish, 1);
return nullptr;
}
// close host side pipe.
common::Pipe(atoi(h_read)).Close();
common::Pipe(atoi(h_write)).Close();
IPCClient* client = new IPCClient(atoi(d_read), atoi(d_write));
client->Init();
return client;
}
// Get integer from handle.
static int GetInt(vpiHandle h) {
s_vpi_value value_s;
value_s.format = vpiIntVal;
vpi_get_value(h, &value_s);
return value_s.value.integer;
}
// Put integer into handle.
static void PutInt(vpiHandle h, int value) {
s_vpi_value value_s;
s_vpi_time time_s;
time_s.type = vpiSimTime;
time_s.high = 0;
time_s.low = 10;
value_s.format = vpiIntVal;
value_s.value.integer = value;
vpi_put_value(h, &value_s, &time_s, vpiTransportDelay);
}
// Handles
vpiHandle clock_;
// the communicator
common::Pipe reader_, writer_;
// data buf
std::vector<VPIVecVal> vec_buf_;
std::vector<s_vpi_vecval> svec_buf_;
};
} // namespace vpi
} // namespace tvm
extern "C" {
static PLI_INT32 tvm_host_clock_cb(p_cb_data cb_data) {
return reinterpret_cast<tvm::vpi::IPCClient*>(
cb_data->user_data)->Callback();
}
static PLI_INT32 tvm_init(char* cb) {
s_vpi_value value_s;
s_vpi_time time_s;
s_cb_data cb_data_s;
tvm::vpi::IPCClient* client = tvm::vpi::IPCClient::Create();
if (client) {
cb_data_s.user_data = reinterpret_cast<char*>(client);
cb_data_s.reason = cbValueChange;
cb_data_s.cb_rtn = tvm_host_clock_cb;
cb_data_s.time = &time_s;
cb_data_s.value = &value_s;
time_s.type = vpiSuppressTime;
value_s.format = vpiIntVal;
cb_data_s.obj = client->clock_;
vpi_register_cb(&cb_data_s);
} else {
vpi_printf("ERROR: canot initalize host\n");
vpi_control(vpiFinish, 1);
}
return 0;
}
void tvm_vpi_register() {
s_vpi_systf_data tf_data;
tf_data.type = vpiSysTask;
tf_data.tfname = "$tvm_session";
tf_data.calltf = tvm_init;
tf_data.compiletf = nullptr;
tf_data.sizetf = nullptr;
tf_data.user_data = nullptr;
vpi_register_systf(&tf_data);
}
void (*vlog_startup_routines[])() = {
tvm_vpi_register,
0
};
} // extern "C"
/*!
* Copyright (c) 2017 by Contributors
* \file tvm_vpi.h
* \brief Messages passed around VPI used for simulation.
*/
#ifndef VERILOG_TVM_VPI_H_
#define VERILOG_TVM_VPI_H_
namespace tvm {
namespace vpi {
enum VPICallCode : int {
kGetHandleByName,
kGetHandleByIndex,
kGetStrProp,
kGetIntProp,
kGetInt32,
kPutInt32,
kGetVec,
kPutVec,
kYield,
kShutDown
};
enum VPIReturnCode : int {
kPosEdgeTrigger = 0,
kSuccess = 1,
kFail = 2
};
// VPI type code as in IEEE standard.
enum VPITypeCode {
kVPIModule = 32
};
// VPI property code as in IEEE standard.
enum VPIPropCode {
kVPIType = 1,
kVPIFullName = 3,
kVPISize = 4,
kVPIDefName = 9
};
/*! \brief The vector value used in trasmission */
struct VPIVecVal {
int aval;
int bval;
};
/*! \brief User facing vpi handle. */
typedef void* VPIRawHandle;
} // namespace vpi
} // namespace tvm
#endif // VERILOG_TVM_VPI_H_
// Memory controller to access TVM VPI simulated RAM.
//
// You only see the wires and registers but no logics here.
// The real computation is implemented via TVM VPI
//
// Usage: create and pass instance to additional arguments of $tvm_session.
// Then it will be automatically hook up the RAM logic.
//
module tvm_vpi_mem_interface
#(
parameter READ_WIDTH = 8,
parameter WRITE_WIDTH = 8,
parameter ADDR_WIDTH = 32,
parameter SIZE_WIDTH = 32
)
(
input clk,
input rst,
// Read Ports
input read_en, // Read buffer enable
output [READ_WIDTH-1:0] read_data_out, // The data port for read
output read_data_valid, // Read is valid.
// Write ports
input write_en, // Write buffer enable
input [WRITE_WIDTH-1:0] write_data_in, // Input data to write.
output write_data_ready, // There are still pending write
// Status port
// Control signal ports to issue tasks
input host_read_req, // Read request
input [ADDR_WIDTH-1:0] host_read_addr, // The address to issue a read task
input [SIZE_WIDTH-1:0] host_read_size, // The size of a read
input host_write_req, // Write request.
input [ADDR_WIDTH-1:0] host_write_addr, // The write address
input [SIZE_WIDTH-1:0] host_write_size // The write size
);
reg [READ_WIDTH-1:0] reg_read_data;
reg reg_read_valid;
reg reg_write_ready;
// The wires up.
assign read_data_out = reg_read_data;
assign read_data_valid = reg_read_valid;
assign write_data_ready = reg_write_ready;
endmodule
// TVM mmap maps virtual DRAM into interface of SRAM.
// This allows create testcases that directly access DRAM.
// Read only memory map, one cycle read.
// Usage: create and pass instance to additional arguments of $tvm_session.
module tvm_vpi_read_mmap
#(
parameter DATA_WIDTH = 8,
parameter ADDR_WIDTH = 8,
parameter BASE_ADDR_WIDTH = 32
)
(
input clk,
input rst,
// Read Ports
input [ADDR_WIDTH-1:0] addr, // Local offset in terms of number of units
output [DATA_WIDTH-1:0] data_out, // The data port for read
// Configure port
input [BASE_ADDR_WIDTH-1:0] mmap_addr // The base address of memory map.
);
reg [DATA_WIDTH-1:0] reg_data;
assign data_out = reg_data;
endmodule
// Write only memory map, one cycle write.
// Usage: create and pass instance to additional arguments of $tvm_session.
module tvm_vpi_write_mmap
#(
parameter DATA_WIDTH = 8,
parameter ADDR_WIDTH = 8,
parameter BASE_ADDR_WIDTH = 32
)
(
input clk,
input rst,
// Write Ports
input [ADDR_WIDTH-1:0] addr, // Local offset in terms of number of units
input [DATA_WIDTH-1:0] data_in, // The data port for write
input en, // The enable port for write
// Configure port
input [BASE_ADDR_WIDTH-1:0] mmap_addr // The base address of memap
);
endmodule
VPI_CFLAGS=`iverilog-vpi --cflags`
VPI_LDFLAGS=`iverilog-vpi --ldflags`
VER_SRCS = $(wildcard verilog/*.v)
VER_LIBS=lib/tvm_vpi.vpi
lib/tvm_vpi.vpi: verilog/tvm_vpi.cc verilog/tvm_vpi.h
@mkdir -p $(@D)
$(CXX) $(VPI_CFLAGS) $(CFLAGS) -o $@ $(filter %.cc, $^) $(LDFLAGS) $(VPI_LDFLAGS)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment