nvcc.py 3.61 KB
Newer Older
1
# pylint: disable=invalid-name
2
"""Utility to invoke nvcc compiler in the system"""
3
from __future__ import absolute_import as _abs
4

5
import subprocess
6 7
import os
import warnings
8
from . import util
9
from .. import ndarray as nd
10 11
from ..api import register_func
from .._ffi.base import py_str
12

13 14 15 16 17
def compile_cuda(code,
                 target="ptx",
                 arch=None,
                 options=None,
                 path_target=None):
18 19 20 21 22 23 24
    """Compile cuda code with NVCC from env.

    Parameters
    ----------
    code : str
        The cuda code.

25
    target : str
26 27
        The target format

28 29 30
    arch : str
        The architecture

31 32 33
    options : str
        The additional options

34 35 36
    path_target : str, optional
        Output file.

37 38 39 40 41
    Return
    ------
    cubin : bytearray
        The bytearray of the cubin
    """
42
    temp = util.tempdir()
43 44
    if target not in ["cubin", "ptx", "fatbin"]:
        raise ValueError("target must be in cubin, ptx, fatbin")
45 46
    temp_code = temp.relpath("my_kernel.cu")
    temp_target = temp.relpath("my_kernel.%s" % target)
47

48
    with open(temp_code, "w") as out_file:
49
        out_file.write(code)
50 51 52 53 54 55 56

    if arch is None:
        if nd.gpu(0).exist:
            # auto detect the compute arch argument
            arch = "sm_" + "".join(nd.gpu(0).compute_version.split('.'))
        else:
            raise ValueError("arch(sm_xy) is not passed, and we cannot detect it from env")
57

58
    file_target = path_target if path_target else temp_target
59 60
    cmd = ["nvcc"]
    cmd += ["--%s" % target, "-O3"]
61
    cmd += ["-arch", arch]
62 63
    cmd += ["-o", file_target]

64 65
    if options:
        cmd += options
66
    cmd += [temp_code]
67 68

    proc = subprocess.Popen(
69 70
        cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

71 72 73
    (out, _) = proc.communicate()

    if proc.returncode != 0:
74
        msg = "Compilation error:\n"
75
        msg += py_str(out)
76 77
        raise RuntimeError(msg)

78 79 80 81 82
    data = bytearray(open(file_target, "rb").read())
    if not data:
        raise RuntimeError(
            "Compilation error: empty result is generated")
    return data
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138

def find_cuda_path():
    """Utility function to find cuda path

    Returns
    -------
    path : str
        Path to cuda root.
    """
    if "CUDA_PATH" in os.environ:
        return os.environ["CUDA_PATH"]
    cmd = ["which", "nvcc"]
    proc = subprocess.Popen(
        cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    (out, _) = proc.communicate()
    out = py_str(out)
    if proc.returncode == 0:
        return os.path.abspath(os.path.join(str(out).strip(), "../.."))
    cuda_path = "/usr/local/cuda"
    if os.path.exists(os.path.join(cuda_path, "bin/nvcc")):
        return cuda_path
    raise RuntimeError("Cannot find cuda path")


@register_func("tvm_callback_libdevice_path")
def find_libdevice_path(arch):
    """Utility function to find libdevice

    Parameters
    ----------
    arch : int
        The compute architecture in int
    """
    cuda_path = find_cuda_path()
    lib_path = os.path.join(cuda_path, "nvvm/libdevice")
    selected_ver = 0
    selected_path = None

    for fn in os.listdir(lib_path):
        if not fn.startswith("libdevice"):
            continue
        ver = int(fn.split(".")[-3].split("_")[-1])
        if ver > selected_ver and ver <= arch:
            selected_ver = ver
            selected_path = fn
    if selected_path is None:
        raise RuntimeError("Cannot find libdevice for arch {}".format(arch))
    return os.path.join(lib_path, selected_path)


def callback_libdevice_path(arch):
    try:
        return find_libdevice_path(arch)
    except RuntimeError:
        warnings.warn("Cannot find libdevice path")
        return ""