Commit c7e7e7f5 by Yida Wang Committed by Tianqi Chen

add two more device properties (#1124)

parent 202570e4
......@@ -21,7 +21,9 @@ enum DeviceAttrKind : int {
kWarpSize = 2,
kMaxSharedMemoryPerBlock = 3,
kComputeVersion = 4,
kDeviceName = 5
kDeviceName = 5,
kMaxClockRate = 6,
kMultiProcessorCount = 7
};
/*! \brief Number of bytes each allocation must align to */
......
......@@ -166,6 +166,18 @@ class TVMContext(ctypes.Structure):
return _api_internal._GetDeviceAttr(
self.device_type, self.device_id, 5)
@property
def max_clock_rate(self):
"""Return the max clock frequency of device."""
return _api_internal._GetDeviceAttr(
self.device_type, self.device_id, 6)
@property
def multi_processor_count(self):
"""Return the number of compute units of device."""
return _api_internal._GetDeviceAttr(
self.device_type, self.device_id, 7)
def sync(self):
"""Synchronize until jobs finished at the context."""
check_call(_LIB.TVMSynchronize(self.device_type, self.device_id, None))
......
......@@ -62,6 +62,16 @@ class CUDADeviceAPI final : public DeviceAPI {
*rv = std::string(props.name);
return;
}
case kMaxClockRate: {
CUDA_CALL(cudaDeviceGetAttribute(
&value, cudaDevAttrClockRate, ctx.device_id));
break;
}
case kMultiProcessorCount: {
CUDA_CALL(cudaDeviceGetAttribute(
&value, cudaDevAttrMultiProcessorCount, ctx.device_id));
break;
}
}
*rv = value;
}
......
......@@ -42,6 +42,8 @@ void MetalWorkspace::GetAttr(
case kMaxSharedMemoryPerBlock: return;
case kComputeVersion: return;
case kDeviceName: return;
case kMaxClockRate: return;
case kMultiProcessorCount: return;
case kExist: break;
}
}
......
......@@ -42,6 +42,11 @@ void OpenCLWorkspace::GetAttr(
break;
}
case kWarpSize: {
/* TODO: the warp size of OpenCL device is not always 1
e.g. Intel GPU has a sub group concept which contains 8 - 32 work items,
corresponding to the number of SIMD entries the heardware configures.
We need to figure out a way to query this information from the hardware.
*/
*rv = 1;
break;
}
......@@ -62,6 +67,22 @@ void OpenCLWorkspace::GetAttr(
*rv = std::string(value);
break;
}
case kMaxClockRate: {
cl_uint value;
OPENCL_CALL(clGetDeviceInfo(
devices[index], CL_DEVICE_MAX_CLOCK_FREQUENCY,
sizeof(cl_uint), &value, nullptr));
*rv = static_cast<int32_t>(value);
break;
}
case kMultiProcessorCount: {
cl_uint value;
OPENCL_CALL(clGetDeviceInfo(
devices[index], CL_DEVICE_MAX_COMPUTE_UNITS,
sizeof(cl_uint), &value, nullptr));
*rv = static_cast<int32_t>(value);
break;
}
case kExist: break;
}
}
......
......@@ -176,7 +176,7 @@ class OpenCLModuleNode : public ModuleNode {
class OpenCLWrappedFunc {
public:
// initialize the CUDA function.
// initialize the OpenCL function.
void Init(OpenCLModuleNode* m,
std::shared_ptr<ModuleNode> sptr,
OpenCLModuleNode::KTRefEntry entry,
......
......@@ -98,6 +98,8 @@ void OpenGLWorkspace::GetAttr(
break;
}
case kDeviceName: return;
case kMaxClockRate: return;
case kMultiProcessorCount: return;
}
}
......
......@@ -52,6 +52,8 @@ class ROCMDeviceAPI final : public DeviceAPI {
return;
}
case kDeviceName: return;
case kMaxClockRate: return;
case kMultiProcessorCount: return;
}
*rv = value;
}
......
......@@ -74,6 +74,8 @@ void VulkanWorkspace::GetAttr(
break;
}
case kDeviceName: return;
case kMaxClockRate: return;
case kMultiProcessorCount: return;
case kExist: break;
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment