import tvm

def test_virtual_thread():
    m = tvm.var('m')
    A = tvm.placeholder((m, ), name='A')
    A1 = tvm.compute((m,), lambda i: A[i], name='A1')
    A2 = tvm.compute((m,), lambda i: A1[i] + 3, name='A2')

    s = tvm.create_schedule(A2.op)
    vx = tvm.thread_axis("vthread", name="vx")
    xo, xi = s[A2].split(A2.op.axis[0], nparts=2)
    s[A2].bind(xo, vx)
    xo, xi = s[A2].split(xi, 8)
    s[A1].compute_at(s[A2], xo)

    bounds = tvm.schedule.InferBound(s)
    assert isinstance(bounds, tvm.container.Map)
    stmt = tvm.schedule.ScheduleOps(s, bounds)

    Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
    A2b = tvm.decl_buffer(A2.shape, A2.dtype, name='A2')
    stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
    stmt = tvm.ir_pass.Simplify(stmt)
    stmt = tvm.ir_pass.InjectVirtualThread(stmt)
    print(stmt)

if __name__ == "__main__":
    test_virtual_thread()