test_pass_storage_sync.py 4.81 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
17 18 19
import tvm

def test_storage_sync():
20 21
    m = tvm.var('m')
    l = tvm.var('l')
22 23 24 25 26
    A = tvm.placeholder((m, l), name='A')

    A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
    A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')

27
    s = tvm.create_schedule(A2.op)
28 29
    xo, xi = s[A2].split(A2.op.axis[0], factor=8)
    s[A2].bind(xo, tvm.thread_axis("blockIdx.x"))
30 31 32 33
    s[A1].compute_at(s[A2], xo)
    s[A1].set_scope("shared")

    bounds = tvm.schedule.InferBound(s)
34
    assert isinstance(bounds, tvm.container.Map)
35
    stmt = tvm.schedule.ScheduleOps(s, bounds)
36 37
    Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
    A2b = tvm.decl_buffer(A2.shape, A2.dtype, name='A2')
38
    stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
39
    f = tvm.ir_pass.MakeAPI(stmt, "test", [Ab, A2b], 0, True)
40 41
    flist = tvm.ir_pass.SplitHostDevice(f)
    f = flist[1]
42
    f = tvm.ir_pass.ThreadSync(f, "shared")
43 44 45 46 47
    body_list = tvm.make.stmt_list(f.body.body.body.body)
    assert(body_list[1].value.name == "tvm_storage_sync")


def test_coproc_sync():
48 49 50 51 52 53 54 55
    @tvm.register_func("tvm.info.mem.global.cache")
    def meminfo_cache():
        return tvm.make.node(
            "MemoryInfo",
            unit_bits=8,
            max_simd_bits=32,
            max_num_bits=128,
            head_address=tvm.call_extern("handle", "global_cache"))
56 57 58
    ib = tvm.ir_builder.create()
    n = tvm.var("n")
    cp = tvm.thread_axis((0, 1), "cop")
59
    A = ib.allocate("float32", 128, name="A", scope="global.cache")
60 61
    with ib.for_range(0, n, name="i") as i:
        A[i] = A[i] + 1
62 63 64 65 66 67 68 69 70 71 72 73 74
        with ib.for_range(0, 8, name="k") as k:
            with ib.for_range(0, 10, name="j") as j:
                ib.scope_attr(cp, "coproc_scope", 1)
                A[j] = A[j + k * 10] + 2
    stmt = ib.get()
    stmt = tvm.ir_pass.CoProcSync(stmt)
    body = stmt.body.body.body
    blist = tvm.make.stmt_list(body)
    assert(blist[1].value.name == "cop.coproc_read_barrier")
    assert(blist[1].value.args[3].value == 80)
    assert(blist[-2].value.name == "cop.coproc_sync")
    assert(blist[-1].value.name == "cop.coproc_write_barrier")
    assert(blist[-1].value.args[3].value == 10)
75

76

77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
def test_coproc_sync2():
    ib = tvm.ir_builder.create()
    n = tvm.var("n")
    cp = tvm.thread_axis((0, 1), "cop")
    ty = tvm.thread_axis("cthread")
    A = ib.allocate("float32", 128, name="A")
    ib.scope_attr(ty, "virtual_thread", 2)
    with ib.new_scope():
        ib.scope_attr(cp, "coproc_scope", 2)
        A[ty] = 0.0
    with ib.for_range(0, n, name="i") as i:
        with ib.new_scope():
            ib.scope_attr(cp, "coproc_scope", 1)
            A[ty] = 1.0
        with ib.new_scope():
            ib.scope_attr(cp, "coproc_scope", 2)
            A[ty] = 1.0
    stmt = ib.get()
    stmt = tvm.ir_pass.CoProcSync(stmt)

97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
def test_coproc_sync3():
    def __check_list(tvm_array, py_list):
        for ti, li in zip(tvm_array, py_list):
            if ti.value != li:
                return False
        return True

    ib = tvm.ir_builder.create()
    n = tvm.var("n")
    cp = tvm.thread_axis((0, 1), "cop")
    A = ib.allocate("float32", 128, name="A", scope="global.cache")
    with ib.for_range(0, n, name="i") as i:
        with ib.for_range(0, n, name="i") as j:
            with ib.new_scope():
                ib.scope_attr(cp, "coproc_scope", 1)
                A[i] = 1.0
            with ib.new_scope():
                ib.scope_attr(cp, "coproc_scope", 2)
                A[i] = 1.0
    with ib.new_scope():
        ib.scope_attr(cp, "coproc_scope", 3)
        A[0] = 0.0
119

120 121 122 123 124 125 126 127 128 129 130
    stmt = ib.get()
    stmt = tvm.ir_pass.CoProcSync(stmt)
    slist = tvm.make.stmt_list(stmt.first.body.body)
    push_st = slist[2]
    slist = tvm.make.stmt_list(slist[-1])
    pop_st = slist[0].body.first

    assert(push_st.value.name == "cop.coproc_dep_push")
    assert(__check_list(push_st.value.args, [2,3]))
    assert(pop_st.value.name == "cop.coproc_dep_pop")
    assert(__check_list(pop_st.value.args, [2,3]))
131

132

133
if __name__ == "__main__":
134
    test_coproc_sync()
135
    test_storage_sync()
136
    test_coproc_sync2()
137
    test_coproc_sync3()