test_pass_storage_sync.py 2.86 KB
Newer Older
1 2 3
import tvm

def test_storage_sync():
4 5
    m = tvm.var('m')
    l = tvm.var('l')
6 7 8 9 10
    A = tvm.placeholder((m, l), name='A')

    A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
    A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')

11
    s = tvm.create_schedule(A2.op)
12 13
    xo, xi = s[A2].split(A2.op.axis[0], factor=8)
    s[A2].bind(xo, tvm.thread_axis("blockIdx.x"))
14 15 16 17
    s[A1].compute_at(s[A2], xo)
    s[A1].set_scope("shared")

    bounds = tvm.schedule.InferBound(s)
18
    assert isinstance(bounds, tvm.container.Map)
19
    stmt = tvm.schedule.ScheduleOps(s, bounds)
20 21
    Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
    A2b = tvm.decl_buffer(A2.shape, A2.dtype, name='A2')
22
    stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
23
    f = tvm.ir_pass.MakeAPI(stmt, "test", [Ab, A2b], 0, True)
24 25
    flist = tvm.ir_pass.SplitHostDevice(f)
    f = flist[1]
26
    f = tvm.ir_pass.ThreadSync(f, "shared")
27 28 29 30 31
    body_list = tvm.make.stmt_list(f.body.body.body.body)
    assert(body_list[1].value.name == "tvm_storage_sync")


def test_coproc_sync():
32 33 34 35 36 37 38 39
    @tvm.register_func("tvm.info.mem.global.cache")
    def meminfo_cache():
        return tvm.make.node(
            "MemoryInfo",
            unit_bits=8,
            max_simd_bits=32,
            max_num_bits=128,
            head_address=tvm.call_extern("handle", "global_cache"))
40 41 42
    ib = tvm.ir_builder.create()
    n = tvm.var("n")
    cp = tvm.thread_axis((0, 1), "cop")
43
    A = ib.allocate("float32", 128, name="A", scope="global.cache")
44 45
    with ib.for_range(0, n, name="i") as i:
        A[i] = A[i] + 1
46 47 48 49 50 51 52 53 54 55 56 57 58
        with ib.for_range(0, 8, name="k") as k:
            with ib.for_range(0, 10, name="j") as j:
                ib.scope_attr(cp, "coproc_scope", 1)
                A[j] = A[j + k * 10] + 2
    stmt = ib.get()
    stmt = tvm.ir_pass.CoProcSync(stmt)
    body = stmt.body.body.body
    blist = tvm.make.stmt_list(body)
    assert(blist[1].value.name == "cop.coproc_read_barrier")
    assert(blist[1].value.args[3].value == 80)
    assert(blist[-2].value.name == "cop.coproc_sync")
    assert(blist[-1].value.name == "cop.coproc_write_barrier")
    assert(blist[-1].value.args[3].value == 10)
59

60

61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
def test_coproc_sync2():
    ib = tvm.ir_builder.create()
    n = tvm.var("n")
    cp = tvm.thread_axis((0, 1), "cop")
    ty = tvm.thread_axis("cthread")
    A = ib.allocate("float32", 128, name="A")
    ib.scope_attr(ty, "virtual_thread", 2)
    with ib.new_scope():
        ib.scope_attr(cp, "coproc_scope", 2)
        A[ty] = 0.0
    with ib.for_range(0, n, name="i") as i:
        with ib.new_scope():
            ib.scope_attr(cp, "coproc_scope", 1)
            A[ty] = 1.0
        with ib.new_scope():
            ib.scope_attr(cp, "coproc_scope", 2)
            A[ty] = 1.0
    stmt = ib.get()
    stmt = tvm.ir_pass.CoProcSync(stmt)

81
if __name__ == "__main__":
82
    test_coproc_sync()
83
    test_storage_sync()
84
    test_coproc_sync2()