[VTA] Bug fix for padded load with large inputs (#4293)

* bug fix for padded load with large inputs * Update TensorLoad.scala * Update test_vta_insn.py

[VTA] Bug fix for padded load with large inputs (#4293)
* bug fix for padded load with large inputs * Update TensorLoad.scala * Update test_vta_insn.py
5b1ca85d · Liangfu Chen · Thierry Moreau · 9e6371fb · 5b1ca85d · 5b1ca85d
Commit 5b1ca85d authored Nov 16, 2019 by Liangfu Chen Committed by Thierry Moreau Nov 15, 2019
Show whitespace changes
Inline Side-by-side

Showing with 16 additions and 11 deletions

vta/hardware/chisel/src/main/scala/core/TensorLoad.scala
+6 -7

vta/tests/python/unittest/test_vta_insn.py
+10 -4

No files found.
--- a/vta/hardware/chisel/src/main/scala/core/TensorLoad.scala
+++ b/vta/hardware/chisel/src/main/scala/core/TensorLoad.scala
@@ -108,15 +108,16 @@ class TensorLoad(tensorType: String = "none", debug: Boolean = false)(
          .otherwise {
            state := sIdle
          }
-        }.elsewhen(dataCtrl.io.stride || dataCtrl.io.split) {
+        }.elsewhen(dataCtrl.io.stride) {
          when(dec.xpad_1 =/= 0.U) {
            state := sXPad1
          }.elsewhen(dec.xpad_0 =/= 0.U) {
            state := sXPad0
-            }
-            .otherwise {
+          }.otherwise {
            state := sReadCmd
          }
+        }.elsewhen(dataCtrl.io.split) {
+          state := sReadCmd
        }
      }
    }
@@ -168,13 +169,11 @@ class TensorLoad(tensorType: String = "none", debug: Boolean = false)(
  xPadCtrl0.io.start := dec.xpad_0 =/= 0.U &
    ((state === sIdle & io.start) |
      (state === sYPad0 & yPadCtrl0.io.done) |
-      (io.vme_rd.data
-        .fire() & ~dataCtrlDone & (dataCtrl.io.stride | dataCtrl.io.split) & dec.xpad_1 === 0.U) |
+      (io.vme_rd.data.fire() & ~dataCtrlDone & dataCtrl.io.stride & dec.xpad_1 === 0.U) |
      (state === sXPad1 & xPadCtrl1.io.done & ~dataCtrlDone))

  xPadCtrl1.io.start := dec.xpad_1 =/= 0.U & io.vme_rd.data.fire() &
-    ((dataCtrl.io.done) |
-      (~dataCtrl.io.done & (dataCtrl.io.stride | dataCtrl.io.split) & dec.xpad_1 =/= 0.U))
+    ((dataCtrl.io.done) | (~dataCtrl.io.done & dataCtrl.io.stride & dec.xpad_1 =/= 0.U))

  yPadCtrl0.io.inst := io.inst
  yPadCtrl1.io.inst := io.inst

--- a/vta/tests/python/unittest/test_vta_insn.py
+++ b/vta/tests/python/unittest/test_vta_insn.py
@@ -24,6 +24,7 @@ import vta
 import vta.testing
 from vta.testing import simulator

+np.random.seed(0xdeadb)

 def test_save_load_out():
    """Test save/store output command"""
@@ -88,11 +89,10 @@ def test_save_load_out():
 def test_padded_load():
    """Test padded load."""
    def _run(env, remote):
+        def check_padded_load(pad_before, pad_after, test_name=None):
            # declare
            n = 3
            m = 5
-        pad_before = [2, 1, 0, 0]
-        pad_after = [1, 2, 0, 0]
            x = tvm.placeholder(
                (n, m, env.BATCH, env.BLOCK_OUT),
                name="x",
@@ -126,7 +126,7 @@ def test_padded_load():
            f = remote.load_module("padded_load.o")
            # verify
            ctx = remote.ext_dev(0)
-        x_np = np.random.randint(-10, 10, size=(
+            x_np = np.random.randint(0, 10, size=(
                n, m, env.BATCH, env.BLOCK_OUT)).astype(x.dtype)
            y_np = np.zeros((n + pad_before[0] + pad_after[0],
                             m + pad_before[1] + pad_after[1],
@@ -147,10 +147,16 @@ def test_padded_load():

            if env.TARGET in ["sim", "tsim"]:
                sim_stats = simulator.stats()
-            print("Padded load execution statistics:")
+                print("Padded {} load execution statistics:".format(test_name))
                for k, v in sim_stats.items():
                    print("\t{:<16}: {:>16}".format(k, v))

+        check_padded_load([2, 0, 0, 0], [0, 0, 0, 0], test_name="Y0")
+        check_padded_load([0, 2, 0, 0], [0, 0, 0, 0], test_name="Y1")
+        check_padded_load([0, 0, 0, 0], [2, 0, 0, 0], test_name="X0")
+        check_padded_load([0, 0, 0, 0], [0, 2, 0, 0], test_name="X1")
+        check_padded_load([1, 1, 0, 0], [1, 1, 0, 0], test_name="all")
+
    vta.testing.run(_run)