Commit 1c83b673 by James Greenhalgh Committed by James Greenhalgh

[Patch AArch64] Fix register constraints for lane intrinsics.

gcc/
	* config/aarch64/aarch64-simd.md
	(aarch64_sqdml<SBINQOPS:as>l_n<mode>_internal): Use
	<vwx> iterator to ensure correct register choice.
	(aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal): Likewise.
	(aarch64_sqdmull_n<mode>): Likewise.
	(aarch64_sqdmull2_n<mode>_internal): Likewise.
	* config/aarch64/arm_neon.h
	(vml<as><q>_lane<q>_<su>16): Use 'x' constraint for element vector.
	(vml<as><q>_n_<su>16): Likewise.
	(vml<as>l_high_lane<q>_<su>16): Likewise.
	(vml<as>l_high_n_<su>16): Likewise.
	(vml<as>l_lane<q>_<su>16): Likewise.
	(vml<as>l_n_<su>16): Likewise.
	(vmul<q>_lane<q>_<su>16): Likewise.
	(vmul<q>_n_<su>16): Likewise.
	(vmull_lane<q>_<su>16): Likewise.
	(vmull_n_<su>16): Likewise.
	(vmull_high_lane<q>_<su>16): Likewise.
	(vmull_high_n_<su>16): Likewise.
	(vqrdmulh<q>_n_s16): Likewise.

From-SVN: r202322
parent bb1ae543
2013-09-06 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/aarch64-simd.md
(aarch64_sqdml<SBINQOPS:as>l_n<mode>_internal): Use
<vwx> iterator to ensure correct register choice.
(aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal): Likewise.
(aarch64_sqdmull_n<mode>): Likewise.
(aarch64_sqdmull2_n<mode>_internal): Likewise.
* config/aarch64/arm_neon.h
(vml<as><q>_lane<q>_<su>16): Use 'x' constraint for element vector.
(vml<as><q>_n_<su>16): Likewise.
(vml<as>l_high_lane<q>_<su>16): Likewise.
(vml<as>l_high_n_<su>16): Likewise.
(vml<as>l_lane<q>_<su>16): Likewise.
(vml<as>l_n_<su>16): Likewise.
(vmul<q>_lane<q>_<su>16): Likewise.
(vmul<q>_n_<su>16): Likewise.
(vmull_lane<q>_<su>16): Likewise.
(vmull_n_<su>16): Likewise.
(vmull_high_lane<q>_<su>16): Likewise.
(vmull_high_n_<su>16): Likewise.
(vqrdmulh<q>_n_s16): Likewise.
2013-09-06 Tejas Belagod <tejas.belagod@arm.com> 2013-09-06 Tejas Belagod <tejas.belagod@arm.com>
* config/aarch64/arm_neon.h: Fix all vdup<bhsd_lane<q> intrinsics to * config/aarch64/arm_neon.h: Fix all vdup<bhsd_lane<q> intrinsics to
......
...@@ -2797,7 +2797,7 @@ ...@@ -2797,7 +2797,7 @@
(match_operand:VD_HSI 2 "register_operand" "w")) (match_operand:VD_HSI 2 "register_operand" "w"))
(sign_extend:<VWIDE> (sign_extend:<VWIDE>
(vec_duplicate:VD_HSI (vec_duplicate:VD_HSI
(match_operand:<VEL> 3 "register_operand" "w")))) (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
(const_int 1))))] (const_int 1))))]
"TARGET_SIMD" "TARGET_SIMD"
"sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
...@@ -2955,7 +2955,7 @@ ...@@ -2955,7 +2955,7 @@
(match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
(sign_extend:<VWIDE> (sign_extend:<VWIDE>
(vec_duplicate:<VHALF> (vec_duplicate:<VHALF>
(match_operand:<VEL> 3 "register_operand" "w")))) (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
(const_int 1))))] (const_int 1))))]
"TARGET_SIMD" "TARGET_SIMD"
"sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
...@@ -3083,7 +3083,7 @@ ...@@ -3083,7 +3083,7 @@
(match_operand:VD_HSI 1 "register_operand" "w")) (match_operand:VD_HSI 1 "register_operand" "w"))
(sign_extend:<VWIDE> (sign_extend:<VWIDE>
(vec_duplicate:VD_HSI (vec_duplicate:VD_HSI
(match_operand:<VEL> 2 "register_operand" "w"))) (match_operand:<VEL> 2 "register_operand" "<vwx>")))
) )
(const_int 1)))] (const_int 1)))]
"TARGET_SIMD" "TARGET_SIMD"
...@@ -3193,7 +3193,7 @@ ...@@ -3193,7 +3193,7 @@
(match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
(sign_extend:<VWIDE> (sign_extend:<VWIDE>
(vec_duplicate:<VHALF> (vec_duplicate:<VHALF>
(match_operand:<VEL> 2 "register_operand" "w"))) (match_operand:<VEL> 2 "register_operand" "<vwx>")))
) )
(const_int 1)))] (const_int 1)))]
"TARGET_SIMD" "TARGET_SIMD"
......
...@@ -7146,7 +7146,7 @@ vld1q_dup_u64 (const uint64_t * a) ...@@ -7146,7 +7146,7 @@ vld1q_dup_u64 (const uint64_t * a)
int16x4_t result; \ int16x4_t result; \
__asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \ __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -7174,7 +7174,7 @@ vld1q_dup_u64 (const uint64_t * a) ...@@ -7174,7 +7174,7 @@ vld1q_dup_u64 (const uint64_t * a)
uint16x4_t result; \ uint16x4_t result; \
__asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \ __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -7202,7 +7202,7 @@ vld1q_dup_u64 (const uint64_t * a) ...@@ -7202,7 +7202,7 @@ vld1q_dup_u64 (const uint64_t * a)
int16x4_t result; \ int16x4_t result; \
__asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \ __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -7230,7 +7230,7 @@ vld1q_dup_u64 (const uint64_t * a) ...@@ -7230,7 +7230,7 @@ vld1q_dup_u64 (const uint64_t * a)
uint16x4_t result; \ uint16x4_t result; \
__asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \ __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -7267,7 +7267,7 @@ vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c) ...@@ -7267,7 +7267,7 @@ vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
int16x4_t result; int16x4_t result;
__asm__ ("mla %0.4h,%2.4h,%3.h[0]" __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
: "=w"(result) : "=w"(result)
: "0"(a), "w"(b), "w"(c) : "0"(a), "w"(b), "x"(c)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -7289,7 +7289,7 @@ vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c) ...@@ -7289,7 +7289,7 @@ vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
uint16x4_t result; uint16x4_t result;
__asm__ ("mla %0.4h,%2.4h,%3.h[0]" __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
: "=w"(result) : "=w"(result)
: "0"(a), "w"(b), "w"(c) : "0"(a), "w"(b), "x"(c)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -7380,7 +7380,7 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) ...@@ -7380,7 +7380,7 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
int32x4_t result; \ int32x4_t result; \
__asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \ __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -7408,7 +7408,7 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) ...@@ -7408,7 +7408,7 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
uint32x4_t result; \ uint32x4_t result; \
__asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \ __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -7436,7 +7436,7 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) ...@@ -7436,7 +7436,7 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
int32x4_t result; \ int32x4_t result; \
__asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \ __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -7464,7 +7464,7 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) ...@@ -7464,7 +7464,7 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
uint32x4_t result; \ uint32x4_t result; \
__asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \ __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -7489,7 +7489,7 @@ vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c) ...@@ -7489,7 +7489,7 @@ vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
int32x4_t result; int32x4_t result;
__asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]" __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
: "=w"(result) : "=w"(result)
: "0"(a), "w"(b), "w"(c) : "0"(a), "w"(b), "x"(c)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -7511,7 +7511,7 @@ vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c) ...@@ -7511,7 +7511,7 @@ vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
uint32x4_t result; uint32x4_t result;
__asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]" __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
: "=w"(result) : "=w"(result)
: "0"(a), "w"(b), "w"(c) : "0"(a), "w"(b), "x"(c)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -7602,7 +7602,7 @@ vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) ...@@ -7602,7 +7602,7 @@ vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
int32x4_t result; \ int32x4_t result; \
__asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \ __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -7630,7 +7630,7 @@ vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) ...@@ -7630,7 +7630,7 @@ vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
uint32x4_t result; \ uint32x4_t result; \
__asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \ __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -7658,7 +7658,7 @@ vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) ...@@ -7658,7 +7658,7 @@ vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
int32x4_t result; \ int32x4_t result; \
__asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \ __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -7686,7 +7686,7 @@ vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) ...@@ -7686,7 +7686,7 @@ vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
uint32x4_t result; \ uint32x4_t result; \
__asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \ __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -7711,7 +7711,7 @@ vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c) ...@@ -7711,7 +7711,7 @@ vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
int32x4_t result; int32x4_t result;
__asm__ ("smlal %0.4s,%2.4h,%3.h[0]" __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
: "=w"(result) : "=w"(result)
: "0"(a), "w"(b), "w"(c) : "0"(a), "w"(b), "x"(c)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -7733,7 +7733,7 @@ vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c) ...@@ -7733,7 +7733,7 @@ vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
uint32x4_t result; uint32x4_t result;
__asm__ ("umlal %0.4s,%2.4h,%3.h[0]" __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
: "=w"(result) : "=w"(result)
: "0"(a), "w"(b), "w"(c) : "0"(a), "w"(b), "x"(c)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -7839,7 +7839,7 @@ vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) ...@@ -7839,7 +7839,7 @@ vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
int16x8_t result; \ int16x8_t result; \
__asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \ __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -7867,7 +7867,7 @@ vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) ...@@ -7867,7 +7867,7 @@ vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
uint16x8_t result; \ uint16x8_t result; \
__asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \ __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -7895,7 +7895,7 @@ vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) ...@@ -7895,7 +7895,7 @@ vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
int16x8_t result; \ int16x8_t result; \
__asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \ __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -7923,7 +7923,7 @@ vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) ...@@ -7923,7 +7923,7 @@ vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
uint16x8_t result; \ uint16x8_t result; \
__asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \ __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -7972,7 +7972,7 @@ vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c) ...@@ -7972,7 +7972,7 @@ vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
int16x8_t result; int16x8_t result;
__asm__ ("mla %0.8h,%2.8h,%3.h[0]" __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
: "=w"(result) : "=w"(result)
: "0"(a), "w"(b), "w"(c) : "0"(a), "w"(b), "x"(c)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -7994,7 +7994,7 @@ vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c) ...@@ -7994,7 +7994,7 @@ vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
uint16x8_t result; uint16x8_t result;
__asm__ ("mla %0.8h,%2.8h,%3.h[0]" __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
: "=w"(result) : "=w"(result)
: "0"(a), "w"(b), "w"(c) : "0"(a), "w"(b), "x"(c)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -8100,7 +8100,7 @@ vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) ...@@ -8100,7 +8100,7 @@ vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
int16x4_t result; \ int16x4_t result; \
__asm__ ("mls %0.4h,%2.4h,%3.h[%4]" \ __asm__ ("mls %0.4h,%2.4h,%3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8128,7 +8128,7 @@ vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) ...@@ -8128,7 +8128,7 @@ vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
uint16x4_t result; \ uint16x4_t result; \
__asm__ ("mls %0.4h,%2.4h,%3.h[%4]" \ __asm__ ("mls %0.4h,%2.4h,%3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8165,7 +8165,7 @@ vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c) ...@@ -8165,7 +8165,7 @@ vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
int16x4_t result; int16x4_t result;
__asm__ ("mls %0.4h, %2.4h, %3.h[0]" __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
: "=w"(result) : "=w"(result)
: "0"(a), "w"(b), "w"(c) : "0"(a), "w"(b), "x"(c)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -8187,7 +8187,7 @@ vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c) ...@@ -8187,7 +8187,7 @@ vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
uint16x4_t result; uint16x4_t result;
__asm__ ("mls %0.4h, %2.4h, %3.h[0]" __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
: "=w"(result) : "=w"(result)
: "0"(a), "w"(b), "w"(c) : "0"(a), "w"(b), "x"(c)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -8278,7 +8278,7 @@ vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) ...@@ -8278,7 +8278,7 @@ vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
int32x4_t result; \ int32x4_t result; \
__asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \ __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8306,7 +8306,7 @@ vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) ...@@ -8306,7 +8306,7 @@ vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
uint32x4_t result; \ uint32x4_t result; \
__asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \ __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8334,7 +8334,7 @@ vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) ...@@ -8334,7 +8334,7 @@ vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
int32x4_t result; \ int32x4_t result; \
__asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \ __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8362,7 +8362,7 @@ vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) ...@@ -8362,7 +8362,7 @@ vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
uint32x4_t result; \ uint32x4_t result; \
__asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \ __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8387,7 +8387,7 @@ vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c) ...@@ -8387,7 +8387,7 @@ vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
int32x4_t result; int32x4_t result;
__asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]" __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
: "=w"(result) : "=w"(result)
: "0"(a), "w"(b), "w"(c) : "0"(a), "w"(b), "x"(c)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -8409,7 +8409,7 @@ vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c) ...@@ -8409,7 +8409,7 @@ vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
uint32x4_t result; uint32x4_t result;
__asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]" __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
: "=w"(result) : "=w"(result)
: "0"(a), "w"(b), "w"(c) : "0"(a), "w"(b), "x"(c)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -8500,7 +8500,7 @@ vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) ...@@ -8500,7 +8500,7 @@ vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
int32x4_t result; \ int32x4_t result; \
__asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \ __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8528,7 +8528,7 @@ vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) ...@@ -8528,7 +8528,7 @@ vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
uint32x4_t result; \ uint32x4_t result; \
__asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \ __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8556,7 +8556,7 @@ vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) ...@@ -8556,7 +8556,7 @@ vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
int32x4_t result; \ int32x4_t result; \
__asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \ __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8584,7 +8584,7 @@ vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) ...@@ -8584,7 +8584,7 @@ vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
uint32x4_t result; \ uint32x4_t result; \
__asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \ __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8609,7 +8609,7 @@ vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c) ...@@ -8609,7 +8609,7 @@ vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
int32x4_t result; int32x4_t result;
__asm__ ("smlsl %0.4s, %2.4h, %3.h[0]" __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
: "=w"(result) : "=w"(result)
: "0"(a), "w"(b), "w"(c) : "0"(a), "w"(b), "x"(c)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -8631,7 +8631,7 @@ vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c) ...@@ -8631,7 +8631,7 @@ vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
uint32x4_t result; uint32x4_t result;
__asm__ ("umlsl %0.4s, %2.4h, %3.h[0]" __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
: "=w"(result) : "=w"(result)
: "0"(a), "w"(b), "w"(c) : "0"(a), "w"(b), "x"(c)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -8737,7 +8737,7 @@ vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) ...@@ -8737,7 +8737,7 @@ vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
int16x8_t result; \ int16x8_t result; \
__asm__ ("mls %0.8h,%2.8h,%3.h[%4]" \ __asm__ ("mls %0.8h,%2.8h,%3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8765,7 +8765,7 @@ vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) ...@@ -8765,7 +8765,7 @@ vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
uint16x8_t result; \ uint16x8_t result; \
__asm__ ("mls %0.8h,%2.8h,%3.h[%4]" \ __asm__ ("mls %0.8h,%2.8h,%3.h[%4]" \
: "=w"(result) \ : "=w"(result) \
: "0"(a_), "w"(b_), "w"(c_), "i"(d) \ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8808,7 +8808,7 @@ vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) ...@@ -8808,7 +8808,7 @@ vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
int16x8_t __result; \ int16x8_t __result; \
__asm__ ("mls %0.8h, %2.8h, %3.h[%4]" \ __asm__ ("mls %0.8h, %2.8h, %3.h[%4]" \
: "=w"(__result) \ : "=w"(__result) \
: "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \ : "0"(__a_), "w"(__b_), "x"(__c_), "i"(__d) \
: /* No clobbers */); \ : /* No clobbers */); \
__result; \ __result; \
}) })
...@@ -8836,7 +8836,7 @@ vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) ...@@ -8836,7 +8836,7 @@ vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
uint16x8_t __result; \ uint16x8_t __result; \
__asm__ ("mls %0.8h, %2.8h, %3.h[%4]" \ __asm__ ("mls %0.8h, %2.8h, %3.h[%4]" \
: "=w"(__result) \ : "=w"(__result) \
: "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \ : "0"(__a_), "w"(__b_), "x"(__c_), "i"(__d) \
: /* No clobbers */); \ : /* No clobbers */); \
__result; \ __result; \
}) })
...@@ -8874,7 +8874,7 @@ vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c) ...@@ -8874,7 +8874,7 @@ vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
float64x2_t t1; float64x2_t t1;
__asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d" __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d"
: "=w"(result), "=w"(t1) : "=w"(result), "=w"(t1)
: "0"(a), "w"(b), "w"(c) : "0"(a), "w"(b), "x"(c)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -8885,7 +8885,7 @@ vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c) ...@@ -8885,7 +8885,7 @@ vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
int16x8_t result; int16x8_t result;
__asm__ ("mls %0.8h, %2.8h, %3.h[0]" __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
: "=w"(result) : "=w"(result)
: "0"(a), "w"(b), "w"(c) : "0"(a), "w"(b), "x"(c)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -8907,7 +8907,7 @@ vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c) ...@@ -8907,7 +8907,7 @@ vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
uint16x8_t result; uint16x8_t result;
__asm__ ("mls %0.8h, %2.8h, %3.h[0]" __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
: "=w"(result) : "=w"(result)
: "0"(a), "w"(b), "w"(c) : "0"(a), "w"(b), "x"(c)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -9648,7 +9648,7 @@ vmul_n_s16 (int16x4_t a, int16_t b) ...@@ -9648,7 +9648,7 @@ vmul_n_s16 (int16x4_t a, int16_t b)
int16x4_t result; int16x4_t result;
__asm__ ("mul %0.4h,%1.4h,%2.h[0]" __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
: "=w"(result) : "=w"(result)
: "w"(a), "w"(b) : "w"(a), "x"(b)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -9670,7 +9670,7 @@ vmul_n_u16 (uint16x4_t a, uint16_t b) ...@@ -9670,7 +9670,7 @@ vmul_n_u16 (uint16x4_t a, uint16_t b)
uint16x4_t result; uint16x4_t result;
__asm__ ("mul %0.4h,%1.4h,%2.h[0]" __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
: "=w"(result) : "=w"(result)
: "w"(a), "w"(b) : "w"(a), "x"(b)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -9707,7 +9707,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b) ...@@ -9707,7 +9707,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b)
int32x4_t result; \ int32x4_t result; \
__asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \ __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
: "=w"(result) \ : "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \ : "w"(a_), "x"(b_), "i"(c) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -9733,7 +9733,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b) ...@@ -9733,7 +9733,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b)
uint32x4_t result; \ uint32x4_t result; \
__asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \ __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
: "=w"(result) \ : "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \ : "w"(a_), "x"(b_), "i"(c) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -9759,7 +9759,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b) ...@@ -9759,7 +9759,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b)
int32x4_t result; \ int32x4_t result; \
__asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \ __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
: "=w"(result) \ : "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \ : "w"(a_), "x"(b_), "i"(c) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -9785,7 +9785,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b) ...@@ -9785,7 +9785,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b)
uint32x4_t result; \ uint32x4_t result; \
__asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \ __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
: "=w"(result) \ : "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \ : "w"(a_), "x"(b_), "i"(c) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -9809,7 +9809,7 @@ vmull_high_n_s16 (int16x8_t a, int16_t b) ...@@ -9809,7 +9809,7 @@ vmull_high_n_s16 (int16x8_t a, int16_t b)
int32x4_t result; int32x4_t result;
__asm__ ("smull2 %0.4s,%1.8h,%2.h[0]" __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
: "=w"(result) : "=w"(result)
: "w"(a), "w"(b) : "w"(a), "x"(b)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -9831,7 +9831,7 @@ vmull_high_n_u16 (uint16x8_t a, uint16_t b) ...@@ -9831,7 +9831,7 @@ vmull_high_n_u16 (uint16x8_t a, uint16_t b)
uint32x4_t result; uint32x4_t result;
__asm__ ("umull2 %0.4s,%1.8h,%2.h[0]" __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
: "=w"(result) : "=w"(result)
: "w"(a), "w"(b) : "w"(a), "x"(b)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -9932,7 +9932,7 @@ vmull_high_u32 (uint32x4_t a, uint32x4_t b) ...@@ -9932,7 +9932,7 @@ vmull_high_u32 (uint32x4_t a, uint32x4_t b)
int32x4_t result; \ int32x4_t result; \
__asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \ __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \
: "=w"(result) \ : "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \ : "w"(a_), "x"(b_), "i"(c) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -9958,7 +9958,7 @@ vmull_high_u32 (uint32x4_t a, uint32x4_t b) ...@@ -9958,7 +9958,7 @@ vmull_high_u32 (uint32x4_t a, uint32x4_t b)
uint32x4_t result; \ uint32x4_t result; \
__asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \ __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \
: "=w"(result) \ : "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \ : "w"(a_), "x"(b_), "i"(c) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -9984,7 +9984,7 @@ vmull_high_u32 (uint32x4_t a, uint32x4_t b) ...@@ -9984,7 +9984,7 @@ vmull_high_u32 (uint32x4_t a, uint32x4_t b)
int32x4_t result; \ int32x4_t result; \
__asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \ __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \
: "=w"(result) \ : "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \ : "w"(a_), "x"(b_), "i"(c) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -10010,7 +10010,7 @@ vmull_high_u32 (uint32x4_t a, uint32x4_t b) ...@@ -10010,7 +10010,7 @@ vmull_high_u32 (uint32x4_t a, uint32x4_t b)
uint32x4_t result; \ uint32x4_t result; \
__asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \ __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \
: "=w"(result) \ : "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \ : "w"(a_), "x"(b_), "i"(c) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -10034,7 +10034,7 @@ vmull_n_s16 (int16x4_t a, int16_t b) ...@@ -10034,7 +10034,7 @@ vmull_n_s16 (int16x4_t a, int16_t b)
int32x4_t result; int32x4_t result;
__asm__ ("smull %0.4s,%1.4h,%2.h[0]" __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
: "=w"(result) : "=w"(result)
: "w"(a), "w"(b) : "w"(a), "x"(b)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -10056,7 +10056,7 @@ vmull_n_u16 (uint16x4_t a, uint16_t b) ...@@ -10056,7 +10056,7 @@ vmull_n_u16 (uint16x4_t a, uint16_t b)
uint32x4_t result; uint32x4_t result;
__asm__ ("umull %0.4s,%1.4h,%2.h[0]" __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
: "=w"(result) : "=w"(result)
: "w"(a), "w"(b) : "w"(a), "x"(b)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -10183,7 +10183,7 @@ vmull_u32 (uint32x2_t a, uint32x2_t b) ...@@ -10183,7 +10183,7 @@ vmull_u32 (uint32x2_t a, uint32x2_t b)
int16x8_t result; \ int16x8_t result; \
__asm__ ("mul %0.8h,%1.8h,%2.h[%3]" \ __asm__ ("mul %0.8h,%1.8h,%2.h[%3]" \
: "=w"(result) \ : "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \ : "w"(a_), "x"(b_), "i"(c) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -10209,7 +10209,7 @@ vmull_u32 (uint32x2_t a, uint32x2_t b) ...@@ -10209,7 +10209,7 @@ vmull_u32 (uint32x2_t a, uint32x2_t b)
uint16x8_t result; \ uint16x8_t result; \
__asm__ ("mul %0.8h,%1.8h,%2.h[%3]" \ __asm__ ("mul %0.8h,%1.8h,%2.h[%3]" \
: "=w"(result) \ : "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \ : "w"(a_), "x"(b_), "i"(c) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -10261,7 +10261,7 @@ vmull_u32 (uint32x2_t a, uint32x2_t b) ...@@ -10261,7 +10261,7 @@ vmull_u32 (uint32x2_t a, uint32x2_t b)
int16x8_t result; \ int16x8_t result; \
__asm__ ("mul %0.8h, %1.8h, %2.h[%3]" \ __asm__ ("mul %0.8h, %1.8h, %2.h[%3]" \
: "=w"(result) \ : "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \ : "w"(a_), "x"(b_), "i"(c) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -10287,7 +10287,7 @@ vmull_u32 (uint32x2_t a, uint32x2_t b) ...@@ -10287,7 +10287,7 @@ vmull_u32 (uint32x2_t a, uint32x2_t b)
uint16x8_t result; \ uint16x8_t result; \
__asm__ ("mul %0.8h, %1.8h, %2.h[%3]" \ __asm__ ("mul %0.8h, %1.8h, %2.h[%3]" \
: "=w"(result) \ : "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \ : "w"(a_), "x"(b_), "i"(c) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -10333,7 +10333,7 @@ vmulq_n_s16 (int16x8_t a, int16_t b) ...@@ -10333,7 +10333,7 @@ vmulq_n_s16 (int16x8_t a, int16_t b)
int16x8_t result; int16x8_t result;
__asm__ ("mul %0.8h,%1.8h,%2.h[0]" __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
: "=w"(result) : "=w"(result)
: "w"(a), "w"(b) : "w"(a), "x"(b)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -10355,7 +10355,7 @@ vmulq_n_u16 (uint16x8_t a, uint16_t b) ...@@ -10355,7 +10355,7 @@ vmulq_n_u16 (uint16x8_t a, uint16_t b)
uint16x8_t result; uint16x8_t result;
__asm__ ("mul %0.8h,%1.8h,%2.h[0]" __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
: "=w"(result) : "=w"(result)
: "w"(a), "w"(b) : "w"(a), "x"(b)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -11821,7 +11821,7 @@ vqrdmulh_n_s16 (int16x4_t a, int16_t b) ...@@ -11821,7 +11821,7 @@ vqrdmulh_n_s16 (int16x4_t a, int16_t b)
int16x4_t result; int16x4_t result;
__asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]" __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
: "=w"(result) : "=w"(result)
: "w"(a), "w"(b) : "w"(a), "x"(b)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -11843,7 +11843,7 @@ vqrdmulhq_n_s16 (int16x8_t a, int16_t b) ...@@ -11843,7 +11843,7 @@ vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
int16x8_t result; int16x8_t result;
__asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]" __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
: "=w"(result) : "=w"(result)
: "w"(a), "w"(b) : "w"(a), "x"(b)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment