[nvcc] enable multiple arch in one fatbin (#4377)

f8f4ceb2 · Yizhi Liu · Tianqi Chen · 500ff051 · f8f4ceb2 · f8f4ceb2
Commit f8f4ceb2 authored Nov 19, 2019 by Yizhi Liu Committed by Tianqi Chen Nov 19, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 14 additions and 3 deletions

python/tvm/autotvm/measure/measure_methods.py
+10 -2

python/tvm/contrib/nvcc.py
+4 -1

No files found.
--- a/python/tvm/autotvm/measure/measure_methods.py
+++ b/python/tvm/autotvm/measure/measure_methods.py
@@ -582,7 +582,13 @@ def check_remote(target, device_key, host=None, port=None, priority=100, timeout
 @register_func
 def tvm_callback_cuda_compile(code):
    """use nvcc to generate ptx code for better optimization"""
-    ptx = nvcc.compile_cuda(code, target="ptx", arch=AutotvmGlobalScope.current.cuda_target_arch)
+    curr_cuda_target_arch = AutotvmGlobalScope.current.cuda_target_arch
+    # e.g., target arch could be [
+    #   "-gencode", "arch=compute_52,code=sm_52",
+    #   "-gencode", "arch=compute_70,code=sm_70"
+    # ]
+    target = "fatbin" if isinstance(curr_cuda_target_arch, list) else "ptx"
+    ptx = nvcc.compile_cuda(code, target=target, arch=AutotvmGlobalScope.current.cuda_target_arch)
    return ptx


@@ -591,8 +597,10 @@ def set_cuda_target_arch(arch):

    Parameters
    ----------
-    arch: str
+    arch: str or list
        The argument of nvcc -arch. (e.g. "sm_51", "sm_62")
+        it can also be a count of gencode arguments pass to nvcc command line,
+        e.g., ["-gencode", "arch=compute_52,code=sm_52", "-gencode", "arch=compute_70,code=sm_70"]
    """
    AutotvmGlobalScope.current.cuda_target_arch = arch


--- a/python/tvm/contrib/nvcc.py
+++ b/python/tvm/contrib/nvcc.py
@@ -74,7 +74,10 @@ def compile_cuda(code,
    file_target = path_target if path_target else temp_target
    cmd = ["nvcc"]
    cmd += ["--%s" % target, "-O3"]
-    cmd += ["-arch", arch]
+    if isinstance(arch, list):
+        cmd += arch
+    else:
+        cmd += ["-arch", arch]

    if options:
        if isinstance(options, str):