nvcc.py 6.62 KB
Newer Older
1
# pylint: disable=invalid-name
2
"""Utility to invoke nvcc compiler in the system"""
3
from __future__ import absolute_import as _abs
4

5
import subprocess
6 7
import os
import warnings
8
from . import util
9
from .. import ndarray as nd
10 11
from ..api import register_func
from .._ffi.base import py_str
12

13 14 15 16 17
def compile_cuda(code,
                 target="ptx",
                 arch=None,
                 options=None,
                 path_target=None):
18 19 20 21 22 23 24
    """Compile cuda code with NVCC from env.

    Parameters
    ----------
    code : str
        The cuda code.

25
    target : str
26 27
        The target format

28 29 30
    arch : str
        The architecture

31
    options : str or list of str
32 33
        The additional options

34 35 36
    path_target : str, optional
        Output file.

37 38 39 40 41
    Return
    ------
    cubin : bytearray
        The bytearray of the cubin
    """
42
    temp = util.tempdir()
43 44
    if target not in ["cubin", "ptx", "fatbin"]:
        raise ValueError("target must be in cubin, ptx, fatbin")
45 46
    temp_code = temp.relpath("my_kernel.cu")
    temp_target = temp.relpath("my_kernel.%s" % target)
47

48
    with open(temp_code, "w") as out_file:
49
        out_file.write(code)
50 51 52 53 54 55 56

    if arch is None:
        if nd.gpu(0).exist:
            # auto detect the compute arch argument
            arch = "sm_" + "".join(nd.gpu(0).compute_version.split('.'))
        else:
            raise ValueError("arch(sm_xy) is not passed, and we cannot detect it from env")
57

58
    file_target = path_target if path_target else temp_target
59 60
    cmd = ["nvcc"]
    cmd += ["--%s" % target, "-O3"]
61
    cmd += ["-arch", arch]
62

63
    if options:
64 65 66 67 68 69 70 71
        if isinstance(options, str):
            cmd += [options]
        elif isinstance(options, list):
            cmd += options
        else:
            raise ValueError("options must be str or list of str")

    cmd += ["-o", file_target]
72
    cmd += [temp_code]
73 74

    proc = subprocess.Popen(
75 76
        cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

77 78 79
    (out, _) = proc.communicate()

    if proc.returncode != 0:
80
        msg = "Compilation error:\n"
81
        msg += py_str(out)
82 83
        raise RuntimeError(msg)

84 85 86 87 88
    data = bytearray(open(file_target, "rb").read())
    if not data:
        raise RuntimeError(
            "Compilation error: empty result is generated")
    return data
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112

def find_cuda_path():
    """Utility function to find cuda path

    Returns
    -------
    path : str
        Path to cuda root.
    """
    if "CUDA_PATH" in os.environ:
        return os.environ["CUDA_PATH"]
    cmd = ["which", "nvcc"]
    proc = subprocess.Popen(
        cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    (out, _) = proc.communicate()
    out = py_str(out)
    if proc.returncode == 0:
        return os.path.abspath(os.path.join(str(out).strip(), "../.."))
    cuda_path = "/usr/local/cuda"
    if os.path.exists(os.path.join(cuda_path, "bin/nvcc")):
        return cuda_path
    raise RuntimeError("Cannot find cuda path")


113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
def get_cuda_version(cuda_path):
    """Utility function to get cuda version

    Parameters
    ----------
    cuda_path : str
        Path to cuda root.

    Returns
    -------
    version : float
        The cuda version
    """
    version_file_path = os.path.join(cuda_path, "version.txt")
    try:
        with open(version_file_path) as f:
            version_str = f.readline().replace('\n', '').replace('\r', '')
            return float(version_str.split(" ")[2][:2])
    except:
        raise RuntimeError("Cannot read cuda version file")


135 136 137 138 139 140 141 142
@register_func("tvm_callback_libdevice_path")
def find_libdevice_path(arch):
    """Utility function to find libdevice

    Parameters
    ----------
    arch : int
        The compute architecture in int
143 144 145 146 147

    Returns
    -------
    path : str
        Path to libdevice.
148 149 150 151 152
    """
    cuda_path = find_cuda_path()
    lib_path = os.path.join(cuda_path, "nvvm/libdevice")
    selected_ver = 0
    selected_path = None
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
    cuda_ver = get_cuda_version(cuda_path)
    if cuda_ver == 9.0 or cuda_ver == 9.1:
        path = os.path.join(lib_path, "libdevice.10.bc")
    else:
        for fn in os.listdir(lib_path):
            if not fn.startswith("libdevice"):
                continue
            ver = int(fn.split(".")[-3].split("_")[-1])
            if ver > selected_ver and ver <= arch:
                selected_ver = ver
                selected_path = fn
        if selected_path is None:
            raise RuntimeError("Cannot find libdevice for arch {}".format(arch))
        path = os.path.join(lib_path, selected_path)
    return path
168 169 170 171 172 173 174 175


def callback_libdevice_path(arch):
    try:
        return find_libdevice_path(arch)
    except RuntimeError:
        warnings.warn("Cannot find libdevice path")
        return ""
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250


def parse_compute_version(compute_version):
    """Parse compute capability string to divide major and minor version

    Parameters
    ----------
    compute_version : str
        compute capability of a GPU (e.g. "6.0")

    Returns
    -------
    major : int
        major version number
    minor : int
        minor version number
    """
    split_ver = compute_version.split('.')
    try:
        major = int(split_ver[0])
        minor = int(split_ver[1])
        return major, minor
    except (IndexError, ValueError) as err:
        raise RuntimeError("Compute version parsing error: " + str(err))


def have_fp16(compute_version):
    """Either fp16 support is provided in the compute capability or not

    Parameters
    ----------
    compute_version: str
        compute capability of a GPU (e.g. "6.0")
    """
    major, minor = parse_compute_version(compute_version)
    # fp 16 support in reference to:
    # https://docs.nvidia.com/cuda/cuda-c-programming-guide/#arithmetic-instructions
    if major == 5 and minor == 3:
        return True
    # NOTE: exclude compute capability 6.1 devices although it is actually available
    #       to compute fp16, because these devices only have low-rate fp16 performance.
    if major == 6 and minor != 1:
        return True
    if major == 7:
        return True

    return False

def have_int8(compute_version):
    """Either int8 support is provided in the compute capability or not

    Parameters
    ----------
    compute_version : str
        compute capability of a GPU (e.g. "6.1")
    """
    major, minor = parse_compute_version(compute_version)
    if major == 6 and minor == 1:
        return True

    return False

def have_tensorcore(compute_version):
    """Either TensorCore support is provided in the compute capability or not

    Parameters
    ----------
    compute_version : str
        compute capability of a GPU (e.g. "7.0")
    """
    major, _ = parse_compute_version(compute_version)
    if major == 7:
        return True

    return False