import tvm

def test_lower_warp_mem():
    m = 128
    A = tvm.placeholder((m,), name='A')
    B = tvm.compute((m,), lambda i: A[i] + 3, name='B')

    s = tvm.create_schedule(B.op)
    AA = s.cache_read(A, "warp", [B])
    xo, xi = s[B].split(B.op.axis[0], 32)
    xi0, xi1 = s[B].split(xi, factor=16)
    tx = tvm.thread_axis("threadIdx.x")
    s[B].bind(xi1, tx)
    s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
    s[AA].compute_at(s[B], xo)
    xo, xi = s[AA].split(s[AA].op.axis[0], 16)
    s[AA].bind(xi, tx)

    f = tvm.lower(s, [A, B])
    fhost, fdevice = tvm.ir_pass.SplitHostDevice(f)
    fdevice = tvm.ir_pass.LowerWarpMemory(fdevice, 16)
    assert(fdevice.body.body.value.value == "local")
    assert(fdevice.body.body.body.extents[0].value == 2)


if __name__ == "__main__":
    test_lower_warp_mem()