Commit 14142a81 by TABATA, Keiichi Committed by Thierry Moreau

Add initial support for Intel FPGA SDK for OpenCL (AOCL) (#1474)

parent feabd406
......@@ -42,6 +42,9 @@ set(USE_ROCM OFF)
# Whether enable SDAccel runtime
set(USE_SDACCEL OFF)
# Whether enable Intel FPGA SDK for OpenCL (AOCL) runtime
set(USE_AOCL OFF)
# Whether enable OpenCL runtime
set(USE_OPENCL OFF)
......
......@@ -19,6 +19,18 @@ else()
list(APPEND COMPILER_SRCS src/codegen/opt/build_sdaccel_off.cc)
endif(USE_SDACCEL)
if(USE_AOCL)
message(STATUS "Build with Intel FPGA SDK for OpenCL support")
file(GLOB RUNTIME_AOCL_SRCS src/runtime/opencl/aocl/*.cc)
list(APPEND RUNTIME_SRCS ${RUNTIME_AOCL_SRCS})
if(NOT USE_OPENCL)
message(STATUS "Enable OpenCL support required for Intel FPGA SDK for OpenCL")
set(USE_OPENCL ON)
endif()
else()
list(APPEND COMPILER_SRCS src/codegen/opt/build_aocl_off.cc)
endif(USE_AOCL)
if(USE_OPENCL)
find_package(OpenCL REQUIRED)
message(STATUS "Build with OpenCL support")
......
AOCL Backend Example
====================
TVM supports Intel FPGA SDK for OpenCL also known as AOCL. Here is a tutorial for how to use TVM with AOCL.
***Note***: This feature is still experimental. We cannot use AOCL to deploy an end to end neural networks for now. In addition, we only tested compilation for emulation mode of AOCL.
We use two python scripts for this tutorial.
- build.py - a script to synthesize FPGA bitstream.
```
import tvm
tgt_host="llvm"
tgt="aocl -device=s5_ref -mattr=emulator"
n = tvm.var("n")
A = tvm.placeholder((n,), name='A')
B = tvm.placeholder((n,), name='B')
C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C")
s = tvm.create_schedule(C.op)
px, x = s[C].split(C.op.axis[0], nparts=1)
s[C].bind(px, tvm.thread_axis("pipeline"))
fadd = tvm.build(s, [A, B, C], tgt, target_host=tgt_host, name="myadd")
fadd.save("myadd.o")
fadd.imported_modules[0].save("myadd.aocx")
tvm.contrib.cc.create_shared("myadd.so", ["myadd.o"])
```
- run.py - a script to use FPGA as an accelerator.
```
import tvm
import numpy as np
import os
tgt="aocl -device=s5_ref -mattr=emulator"
fadd = tvm.module.load("myadd.so")
fadd_dev = tvm.module.load("myadd.aocx")
fadd.import_module(fadd_dev)
ctx = tvm.context(tgt, 0)
n = 1024
a = tvm.nd.array(np.random.uniform(size=n).astype("float32"), ctx)
b = tvm.nd.array(np.random.uniform(size=n).astype("float32"), ctx)
c = tvm.nd.array(np.zeros(n, dtype="float32"), ctx)
fadd(a, b, c)
np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
```
Setup
-----
- Install AOCL 17.1 on Ubuntu 16.04.4 LTS.
- Install BSP for your FPGA device.
- Install FPGA device driver.
- Create an ICD file at /etc/OpenCL/vendors/Altera.icd so that the OpenCL platform can be found.
```
/opt/intelFPGA/17.1/hld/linux64/lib/libalteracl.so
```
- Create an FCD file for example at /opt/Intel/OpenCL/Boards/s5_ref.fcd so that your FPGA device can be found.
```
/opt/intelFPGA/17.1/hld/board/s5_ref/linux64/lib/libaltera_s5_ref_mmd.so
```
- Setup TVM with AOCL and OpenCL enabled.
Emulation
---------
- Run software emulation
```
export CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA=1
python build.py
python run.py
```
- Run on FPGA devices (not tested)
- Change tgt value to "aocl -device=s5_ref" on build.py and run.py
```
unset CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA
python build.py
python run.py
```
......@@ -60,6 +60,7 @@ typedef int64_t tvm_index_t;
/*! \brief Extension device types in TVM */
typedef enum {
kDLAOCL = 5,
kDLSDAccel = 6,
kDLVulkan = 7,
kOpenGL = 11,
......
......@@ -96,6 +96,7 @@ class TVMContext(ctypes.Structure):
1 : 'cpu',
2 : 'gpu',
4 : 'opencl',
5 : 'aocl',
6 : 'sdaccel',
7 : 'vulkan',
8 : 'metal',
......@@ -113,6 +114,7 @@ class TVMContext(ctypes.Structure):
'nvptx': 2,
'cl': 4,
'opencl': 4,
'aocl' : 5,
'sdaccel': 6,
'vulkan': 7,
'metal': 8,
......
......@@ -91,6 +91,9 @@ Target CreateTarget(const std::string& target_name,
} else if (target_name == "sdaccel") {
t->device_type = kDLOpenCL;
t->keys_array.push_back(ir::StringImm::make("sdaccel"));
} else if (target_name == "aocl") {
t->device_type = kDLAOCL;
t->keys_array.push_back(ir::StringImm::make("aocl"));
} else if (target_name == "opengl") {
t->device_type = kOpenGL;
t->keys_array.push_back(ir::StringImm::make("opengl"));
......
/*!
* Copyright (c) 2018 by Contributors
* \file codegen_aocl.cc
*/
#include <tvm/build_module.h>
#include <vector>
#include <string>
#include "./codegen_opencl.h"
#include "./build_common.h"
#include "../runtime/opencl/aocl/aocl_module.h"
#include "../runtime/file_util.h"
namespace tvm {
namespace codegen {
runtime::Module BuildAOCL(Array<LoweredFunc> funcs, std::string target_str) {
// Get code.
using tvm::runtime::Registry;
bool output_ssa = false;
CodeGenOpenCL cg;
cg.Init(output_ssa);
for (LoweredFunc f : funcs) {
cg.AddFunction(f);
}
std::string code = cg.Finish();
if (const auto* f = Registry::Get("tvm_callback_opencl_postproc")) {
code = (*f)(code).operator std::string();
}
// Write a .cl file.
runtime::SaveBinaryToFile("aocl.cl", code.c_str());
// Compile the .cl file.
Target target = Target::create(target_str);
if (target->device_name == "") {
LOG(FATAL) << "AOCL device name not specified in build target.";
}
std::string cmd = "aoc aocl.cl";
for (std::string option : target->options()) {
if (option == "-mattr=emulator") {
cmd += " -march=emulator";
}
}
cmd += " -board=" + target->device_name;
if (system(cmd.c_str()) != 0) {
LOG(FATAL) << "OpenCL offline compilation error.";
}
// Read .aocx file
std::string aocxbin;
runtime::LoadBinaryFromFile("aocl.aocx", &aocxbin);
return AOCLModuleCreate(aocxbin, "aocx", ExtractFuncInfo(funcs), code);
}
TVM_REGISTER_API("codegen.build_aocl")
.set_body([](TVMArgs args, TVMRetValue* rv) {
*rv = BuildAOCL(args[0], args[1]);
});
} // namespace codegen
} // namespace tvm
/*!
* Copyright (c) 2018 by Contributors
* Optional module when build aocl is switched to off
*/
#include "../codegen_source_base.h"
#include "../../runtime/opencl/opencl_module.h"
namespace tvm {
namespace runtime {
Module AOCLModuleCreate(
std::string data,
std::string fmt,
std::unordered_map<std::string, FunctionInfo> fmap,
std::string source) {
LOG(WARNING) << "AOCL runtime not enabled, return a source module...";
return codegen::DeviceSourceModuleCreate(data, fmt, fmap, "aocl");
}
} // namespace runtime
} // namespace tvm
......@@ -145,7 +145,7 @@ class MemoryAccessVerifier final : protected IRVisitor {
}
/// Check if a given DLDeviceType/TVMDeviceExtType value denotes FPGA device.
static bool IsFPGADevice(int dev_type) {
return kDLSDAccel == dev_type;
return kDLSDAccel == dev_type || kDLAOCL == dev_type;
}
private:
......
......@@ -32,6 +32,7 @@ inline std::string DeviceName(int type) {
case kDLGPU: return "gpu";
case kDLOpenCL: return "opencl";
case kDLSDAccel: return "sdaccel";
case kDLAOCL: return "aocl";
case kDLVulkan: return "vulkan";
case kDLMetal: return "metal";
case kDLVPI: return "vpi";
......
/*!
* Copyright (c) 2018 by Contributors
* \file aocl_common.h
* \brief AOCL common header
*/
#ifndef TVM_RUNTIME_OPENCL_AOCL_AOCL_COMMON_H_
#define TVM_RUNTIME_OPENCL_AOCL_AOCL_COMMON_H_
#include "../opencl_common.h"
namespace tvm {
namespace runtime {
namespace cl {
/*!
* \brief Process global AOCL workspace.
*/
class AOCLWorkspace final : public OpenCLWorkspace {
public:
// override OpenCL device API
void Init() final;
bool IsOpenCLDevice(TVMContext ctx) final;
OpenCLThreadEntry* GetThreadEntry() final;
// get the global workspace
static const std::shared_ptr<OpenCLWorkspace>& Global();
};
/*! \brief Thread local workspace for AOCL */
class AOCLThreadEntry : public OpenCLThreadEntry {
public:
// constructor
AOCLThreadEntry()
: OpenCLThreadEntry(static_cast<DLDeviceType>(kDLAOCL), AOCLWorkspace::Global()) {}
// get the global workspace
static AOCLThreadEntry* ThreadLocal();
};
} // namespace cl
} // namespace runtime
} // namespace tvm
#endif // TVM_RUNTIME_OPENCL_AOCL_AOCL_COMMON_H_
/*!
* Copyright (c) 2018 by Contributors
* \file aocl_device_api.cc
*/
#include <tvm/runtime/registry.h>
#include <dmlc/thread_local.h>
#include "./aocl_common.h"
namespace tvm {
namespace runtime {
namespace cl {
OpenCLThreadEntry* AOCLWorkspace::GetThreadEntry() {
return AOCLThreadEntry::ThreadLocal();
}
const std::shared_ptr<OpenCLWorkspace>& AOCLWorkspace::Global() {
static std::shared_ptr<OpenCLWorkspace> inst = std::make_shared<AOCLWorkspace>();
return inst;
}
void AOCLWorkspace::Init() {
OpenCLWorkspace::Init("aocl", "accelerator", "Intel(R) FPGA SDK for OpenCL(TM)");
}
bool AOCLWorkspace::IsOpenCLDevice(TVMContext ctx) {
return ctx.device_type == static_cast<DLDeviceType>(kDLAOCL);
}
typedef dmlc::ThreadLocalStore<AOCLThreadEntry> AOCLThreadStore;
AOCLThreadEntry* AOCLThreadEntry::ThreadLocal() {
return AOCLThreadStore::Get();
}
TVM_REGISTER_GLOBAL("device_api.aocl")
.set_body([](TVMArgs args, TVMRetValue* rv) {
DeviceAPI* ptr = AOCLWorkspace::Global().get();
*rv = static_cast<void*>(ptr);
});
} // namespace cl
} // namespace runtime
} // namespace tvm
/*!
* Copyright (c) 2018 by Contributors
* \file aocl_module.cc
*/
#include <dmlc/memory_io.h>
#include <tvm/runtime/registry.h>
#include <vector>
#include <string>
#include <unordered_map>
#include "./aocl_common.h"
#include "./aocl_module.h"
namespace tvm {
namespace runtime {
class AOCLModuleNode : public OpenCLModuleNode {
public:
explicit AOCLModuleNode(std::string data,
std::string fmt,
std::unordered_map<std::string, FunctionInfo> fmap,
std::string source)
: OpenCLModuleNode(data, fmt, fmap, source) {}
const std::shared_ptr<cl::OpenCLWorkspace>& GetGlobalWorkspace() final;
};
const std::shared_ptr<cl::OpenCLWorkspace>& AOCLModuleNode::GetGlobalWorkspace() {
return cl::AOCLWorkspace::Global();
}
Module AOCLModuleCreate(
std::string data,
std::string fmt,
std::unordered_map<std::string, FunctionInfo> fmap,
std::string source) {
std::shared_ptr<AOCLModuleNode> n =
std::make_shared<AOCLModuleNode>(data, fmt, fmap, source);
n->Init();
return Module(n);
}
Module AOCLModuleLoadFile(const std::string& file_name,
const std::string& format) {
std::string data;
std::unordered_map<std::string, FunctionInfo> fmap;
std::string fmt = GetFileFormat(file_name, format);
std::string meta_file = GetMetaFilePath(file_name);
LoadBinaryFromFile(file_name, &data);
LoadMetaDataFromFile(meta_file, &fmap);
return AOCLModuleCreate(data, fmt, fmap, std::string());
}
TVM_REGISTER_GLOBAL("module.loadfile_aocx")
.set_body([](TVMArgs args, TVMRetValue* rv) {
*rv = AOCLModuleLoadFile(args[0], args[1]);
});
} // namespace runtime
} // namespace tvm
/*!
* Copyright (c) 2018 by Contributors
* \file aocl_module.h
* \brief Execution handling of OpenCL kernels for AOCL
*/
#ifndef TVM_RUNTIME_OPENCL_AOCL_AOCL_MODULE_H_
#define TVM_RUNTIME_OPENCL_AOCL_AOCL_MODULE_H_
#include <tvm/runtime/packed_func.h>
#include <memory>
#include <vector>
#include <string>
#include "../../meta_data.h"
namespace tvm {
namespace runtime {
/*!
* \brief create a opencl module for AOCL from data.
*
* \param data The module data.
* \param fmt The format of the data, can be "aocx"
* \param fmap The map function information map of each function.
*/
Module AOCLModuleCreate(
std::string data,
std::string fmt,
std::unordered_map<std::string, FunctionInfo> fmap,
std::string source);
} // namespace runtime
} // namespace tvm
#endif // TVM_RUNTIME_OPENCL_AOCL_AOCL_MODULE_H_
......@@ -191,7 +191,7 @@ cl_kernel OpenCLModuleNode::InstallKernel(cl::OpenCLWorkspace* w,
program_ = clCreateProgramWithSource(w->context, 1, &s, &len, &err);
OPENCL_CHECK_ERROR(err);
}
} else if (fmt_ == "xclbin" || fmt_ == "awsxclbin") {
} else if (fmt_ == "xclbin" || fmt_ == "awsxclbin" || fmt_ == "aocx") {
const unsigned char* s = (const unsigned char *)data_.c_str();
size_t len = data_.length();
cl_int err;
......
......@@ -3,6 +3,7 @@ import numpy as np
import os
os.environ["XCL_EMULATION_MODE"] = "1"
os.environ["CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA"] = "1"
@tvm.register_func
def tvm_callback_vhls_postproc(code):
......@@ -43,6 +44,7 @@ def test_exp():
if "AWS_PLATFORM" in os.environ:
check_device("sdaccel -device=" + os.environ.get("AWS_PLATFORM"))
check_device("aocl -device=s5_ref -mattr=emulator")
def test_multi_kernel():
# graph
......@@ -80,6 +82,7 @@ def test_multi_kernel():
d.asnumpy(), a.asnumpy() * 2 + b.asnumpy(), rtol=1e-5)
check_device("sdaccel")
check_device("aocl -device=s5_ref -mattr=emulator")
if __name__ == "__main__":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment