Commit 38047d90 by James Greenhalgh Committed by James Greenhalgh

[AArch64] Fix vld1<q>_* asm constraints in arm_neon.h

gcc/
	* config/aarch64/arm_neon.h (vld1<q>_lane*): Fix constraints.
	(vld1<q>_dup_<sufp><8, 16, 32, 64>): Likewise.
	(vld1<q>_<sufp><8, 16, 32, 64>): Likewise.

From-SVN: r198250
parent 13f39b2e
2013-04-24 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/arm_neon.h (vld1<q>_lane*): Fix constraints.
(vld1<q>_dup_<sufp><8, 16, 32, 64>): Likewise.
(vld1<q>_<sufp><8, 16, 32, 64>): Likewise.
2013-04-24 Paolo Carlini <paolo.carlini@oracle.com> 2013-04-24 Paolo Carlini <paolo.carlini@oracle.com>
* doc/cpp.texi: Document __GXX_EXPERIMENTAL_CXX1Y__. * doc/cpp.texi: Document __GXX_EXPERIMENTAL_CXX1Y__.
......
...@@ -8390,10 +8390,10 @@ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) ...@@ -8390,10 +8390,10 @@ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vld1_dup_f32 (const float32_t * a) vld1_dup_f32 (const float32_t * a)
{ {
float32x2_t result; float32x2_t result;
__asm__ ("ld1r {%0.2s},[%1]" __asm__ ("ld1r {%0.2s}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8401,10 +8401,10 @@ __extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) ...@@ -8401,10 +8401,10 @@ __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vld1_dup_f64 (const float64_t * a) vld1_dup_f64 (const float64_t * a)
{ {
float64x1_t result; float64x1_t result;
__asm__ ("ld1 {%0.1d},[%1]" __asm__ ("ld1r {%0.1d}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8412,10 +8412,10 @@ __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) ...@@ -8412,10 +8412,10 @@ __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vld1_dup_p8 (const poly8_t * a) vld1_dup_p8 (const poly8_t * a)
{ {
poly8x8_t result; poly8x8_t result;
__asm__ ("ld1r {%0.8b},[%1]" __asm__ ("ld1r {%0.8b}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8423,10 +8423,10 @@ __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) ...@@ -8423,10 +8423,10 @@ __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vld1_dup_p16 (const poly16_t * a) vld1_dup_p16 (const poly16_t * a)
{ {
poly16x4_t result; poly16x4_t result;
__asm__ ("ld1r {%0.4h},[%1]" __asm__ ("ld1r {%0.4h}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8434,10 +8434,10 @@ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) ...@@ -8434,10 +8434,10 @@ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vld1_dup_s8 (const int8_t * a) vld1_dup_s8 (const int8_t * a)
{ {
int8x8_t result; int8x8_t result;
__asm__ ("ld1r {%0.8b},[%1]" __asm__ ("ld1r {%0.8b}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8445,10 +8445,10 @@ __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) ...@@ -8445,10 +8445,10 @@ __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vld1_dup_s16 (const int16_t * a) vld1_dup_s16 (const int16_t * a)
{ {
int16x4_t result; int16x4_t result;
__asm__ ("ld1r {%0.4h},[%1]" __asm__ ("ld1r {%0.4h}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8456,10 +8456,10 @@ __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) ...@@ -8456,10 +8456,10 @@ __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vld1_dup_s32 (const int32_t * a) vld1_dup_s32 (const int32_t * a)
{ {
int32x2_t result; int32x2_t result;
__asm__ ("ld1r {%0.2s},[%1]" __asm__ ("ld1r {%0.2s}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8467,10 +8467,10 @@ __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) ...@@ -8467,10 +8467,10 @@ __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vld1_dup_s64 (const int64_t * a) vld1_dup_s64 (const int64_t * a)
{ {
int64x1_t result; int64x1_t result;
__asm__ ("ld1 {%0.1d},[%1]" __asm__ ("ld1r {%0.1d}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8478,10 +8478,10 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) ...@@ -8478,10 +8478,10 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vld1_dup_u8 (const uint8_t * a) vld1_dup_u8 (const uint8_t * a)
{ {
uint8x8_t result; uint8x8_t result;
__asm__ ("ld1r {%0.8b},[%1]" __asm__ ("ld1r {%0.8b}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8489,10 +8489,10 @@ __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) ...@@ -8489,10 +8489,10 @@ __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vld1_dup_u16 (const uint16_t * a) vld1_dup_u16 (const uint16_t * a)
{ {
uint16x4_t result; uint16x4_t result;
__asm__ ("ld1r {%0.4h},[%1]" __asm__ ("ld1r {%0.4h}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8500,10 +8500,10 @@ __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) ...@@ -8500,10 +8500,10 @@ __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vld1_dup_u32 (const uint32_t * a) vld1_dup_u32 (const uint32_t * a)
{ {
uint32x2_t result; uint32x2_t result;
__asm__ ("ld1r {%0.2s},[%1]" __asm__ ("ld1r {%0.2s}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8511,10 +8511,10 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) ...@@ -8511,10 +8511,10 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vld1_dup_u64 (const uint64_t * a) vld1_dup_u64 (const uint64_t * a)
{ {
uint64x1_t result; uint64x1_t result;
__asm__ ("ld1 {%0.1d},[%1]" __asm__ ("ld1r {%0.1d}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8522,10 +8522,10 @@ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) ...@@ -8522,10 +8522,10 @@ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vld1_f32 (const float32_t * a) vld1_f32 (const float32_t * a)
{ {
float32x2_t result; float32x2_t result;
__asm__ ("ld1 {%0.2s},[%1]" __asm__ ("ld1 {%0.2s}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(({const float32x2_t *_a = (float32x2_t *) a; *_a;}))
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8533,10 +8533,10 @@ __extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) ...@@ -8533,10 +8533,10 @@ __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vld1_f64 (const float64_t * a) vld1_f64 (const float64_t * a)
{ {
float64x1_t result; float64x1_t result;
__asm__ ("ld1 {%0.1d},[%1]" __asm__ ("ld1 {%0.1d}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8546,9 +8546,9 @@ vld1_f64 (const float64_t * a) ...@@ -8546,9 +8546,9 @@ vld1_f64 (const float64_t * a)
float32x2_t b_ = (b); \ float32x2_t b_ = (b); \
const float32_t * a_ = (a); \ const float32_t * a_ = (a); \
float32x2_t result; \ float32x2_t result; \
__asm__ ("ld1 {%0.s}[%3],[%1]" \ __asm__ ("ld1 {%0.s}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8559,9 +8559,9 @@ vld1_f64 (const float64_t * a) ...@@ -8559,9 +8559,9 @@ vld1_f64 (const float64_t * a)
float64x1_t b_ = (b); \ float64x1_t b_ = (b); \
const float64_t * a_ = (a); \ const float64_t * a_ = (a); \
float64x1_t result; \ float64x1_t result; \
__asm__ ("ld1 {%0.d}[%3],[%1]" \ __asm__ ("ld1 {%0.d}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8572,9 +8572,9 @@ vld1_f64 (const float64_t * a) ...@@ -8572,9 +8572,9 @@ vld1_f64 (const float64_t * a)
poly8x8_t b_ = (b); \ poly8x8_t b_ = (b); \
const poly8_t * a_ = (a); \ const poly8_t * a_ = (a); \
poly8x8_t result; \ poly8x8_t result; \
__asm__ ("ld1 {%0.b}[%3],[%1]" \ __asm__ ("ld1 {%0.b}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8585,9 +8585,9 @@ vld1_f64 (const float64_t * a) ...@@ -8585,9 +8585,9 @@ vld1_f64 (const float64_t * a)
poly16x4_t b_ = (b); \ poly16x4_t b_ = (b); \
const poly16_t * a_ = (a); \ const poly16_t * a_ = (a); \
poly16x4_t result; \ poly16x4_t result; \
__asm__ ("ld1 {%0.h}[%3],[%1]" \ __asm__ ("ld1 {%0.h}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8598,9 +8598,9 @@ vld1_f64 (const float64_t * a) ...@@ -8598,9 +8598,9 @@ vld1_f64 (const float64_t * a)
int8x8_t b_ = (b); \ int8x8_t b_ = (b); \
const int8_t * a_ = (a); \ const int8_t * a_ = (a); \
int8x8_t result; \ int8x8_t result; \
__asm__ ("ld1 {%0.b}[%3],[%1]" \ __asm__ ("ld1 {%0.b}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8611,9 +8611,9 @@ vld1_f64 (const float64_t * a) ...@@ -8611,9 +8611,9 @@ vld1_f64 (const float64_t * a)
int16x4_t b_ = (b); \ int16x4_t b_ = (b); \
const int16_t * a_ = (a); \ const int16_t * a_ = (a); \
int16x4_t result; \ int16x4_t result; \
__asm__ ("ld1 {%0.h}[%3],[%1]" \ __asm__ ("ld1 {%0.h}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8624,9 +8624,9 @@ vld1_f64 (const float64_t * a) ...@@ -8624,9 +8624,9 @@ vld1_f64 (const float64_t * a)
int32x2_t b_ = (b); \ int32x2_t b_ = (b); \
const int32_t * a_ = (a); \ const int32_t * a_ = (a); \
int32x2_t result; \ int32x2_t result; \
__asm__ ("ld1 {%0.s}[%3],[%1]" \ __asm__ ("ld1 {%0.s}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8637,9 +8637,9 @@ vld1_f64 (const float64_t * a) ...@@ -8637,9 +8637,9 @@ vld1_f64 (const float64_t * a)
int64x1_t b_ = (b); \ int64x1_t b_ = (b); \
const int64_t * a_ = (a); \ const int64_t * a_ = (a); \
int64x1_t result; \ int64x1_t result; \
__asm__ ("ld1 {%0.d}[%3],[%1]" \ __asm__ ("ld1 {%0.d}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8650,9 +8650,9 @@ vld1_f64 (const float64_t * a) ...@@ -8650,9 +8650,9 @@ vld1_f64 (const float64_t * a)
uint8x8_t b_ = (b); \ uint8x8_t b_ = (b); \
const uint8_t * a_ = (a); \ const uint8_t * a_ = (a); \
uint8x8_t result; \ uint8x8_t result; \
__asm__ ("ld1 {%0.b}[%3],[%1]" \ __asm__ ("ld1 {%0.b}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8663,9 +8663,9 @@ vld1_f64 (const float64_t * a) ...@@ -8663,9 +8663,9 @@ vld1_f64 (const float64_t * a)
uint16x4_t b_ = (b); \ uint16x4_t b_ = (b); \
const uint16_t * a_ = (a); \ const uint16_t * a_ = (a); \
uint16x4_t result; \ uint16x4_t result; \
__asm__ ("ld1 {%0.h}[%3],[%1]" \ __asm__ ("ld1 {%0.h}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8676,9 +8676,9 @@ vld1_f64 (const float64_t * a) ...@@ -8676,9 +8676,9 @@ vld1_f64 (const float64_t * a)
uint32x2_t b_ = (b); \ uint32x2_t b_ = (b); \
const uint32_t * a_ = (a); \ const uint32_t * a_ = (a); \
uint32x2_t result; \ uint32x2_t result; \
__asm__ ("ld1 {%0.s}[%3],[%1]" \ __asm__ ("ld1 {%0.s}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8689,9 +8689,9 @@ vld1_f64 (const float64_t * a) ...@@ -8689,9 +8689,9 @@ vld1_f64 (const float64_t * a)
uint64x1_t b_ = (b); \ uint64x1_t b_ = (b); \
const uint64_t * a_ = (a); \ const uint64_t * a_ = (a); \
uint64x1_t result; \ uint64x1_t result; \
__asm__ ("ld1 {%0.d}[%3],[%1]" \ __asm__ ("ld1 {%0.d}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8700,10 +8700,10 @@ __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) ...@@ -8700,10 +8700,10 @@ __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vld1_p8 (const poly8_t * a) vld1_p8 (const poly8_t * a)
{ {
poly8x8_t result; poly8x8_t result;
__asm__ ("ld1 {%0.8b}, [%1]" __asm__ ("ld1 {%0.8b}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(({const poly8x8_t *_a = (poly8x8_t *) a; *_a;}))
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -8711,10 +8711,10 @@ __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) ...@@ -8711,10 +8711,10 @@ __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vld1_p16 (const poly16_t * a) vld1_p16 (const poly16_t * a)
{ {
poly16x4_t result; poly16x4_t result;
__asm__ ("ld1 {%0.4h}, [%1]" __asm__ ("ld1 {%0.4h}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(({const poly16x4_t *_a = (poly16x4_t *) a; *_a;}))
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
...@@ -8722,10 +8722,10 @@ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) ...@@ -8722,10 +8722,10 @@ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vld1_s8 (const int8_t * a) vld1_s8 (const int8_t * a)
{ {
int8x8_t result; int8x8_t result;
__asm__ ("ld1 {%0.8b},[%1]" __asm__ ("ld1 {%0.8b}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(({const int8x8_t *_a = (int8x8_t *) a; *_a;}))
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8733,10 +8733,10 @@ __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) ...@@ -8733,10 +8733,10 @@ __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vld1_s16 (const int16_t * a) vld1_s16 (const int16_t * a)
{ {
int16x4_t result; int16x4_t result;
__asm__ ("ld1 {%0.4h},[%1]" __asm__ ("ld1 {%0.4h}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(({const int16x4_t *_a = (int16x4_t *) a; *_a;}))
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8744,10 +8744,10 @@ __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) ...@@ -8744,10 +8744,10 @@ __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vld1_s32 (const int32_t * a) vld1_s32 (const int32_t * a)
{ {
int32x2_t result; int32x2_t result;
__asm__ ("ld1 {%0.2s},[%1]" __asm__ ("ld1 {%0.2s}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(({const int32x2_t *_a = (int32x2_t *) a; *_a;}))
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8755,10 +8755,10 @@ __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) ...@@ -8755,10 +8755,10 @@ __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vld1_s64 (const int64_t * a) vld1_s64 (const int64_t * a)
{ {
int64x1_t result; int64x1_t result;
__asm__ ("ld1 {%0.1d},[%1]" __asm__ ("ld1 {%0.1d}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8766,10 +8766,10 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) ...@@ -8766,10 +8766,10 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vld1_u8 (const uint8_t * a) vld1_u8 (const uint8_t * a)
{ {
uint8x8_t result; uint8x8_t result;
__asm__ ("ld1 {%0.8b},[%1]" __asm__ ("ld1 {%0.8b}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(({const uint8x8_t *_a = (uint8x8_t *) a; *_a;}))
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8777,10 +8777,10 @@ __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) ...@@ -8777,10 +8777,10 @@ __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vld1_u16 (const uint16_t * a) vld1_u16 (const uint16_t * a)
{ {
uint16x4_t result; uint16x4_t result;
__asm__ ("ld1 {%0.4h},[%1]" __asm__ ("ld1 {%0.4h}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(({const uint16x4_t *_a = (uint16x4_t *) a; *_a;}))
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8788,10 +8788,10 @@ __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) ...@@ -8788,10 +8788,10 @@ __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vld1_u32 (const uint32_t * a) vld1_u32 (const uint32_t * a)
{ {
uint32x2_t result; uint32x2_t result;
__asm__ ("ld1 {%0.2s},[%1]" __asm__ ("ld1 {%0.2s}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(({const uint32x2_t *_a = (uint32x2_t *) a; *_a;}))
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8799,10 +8799,10 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) ...@@ -8799,10 +8799,10 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vld1_u64 (const uint64_t * a) vld1_u64 (const uint64_t * a)
{ {
uint64x1_t result; uint64x1_t result;
__asm__ ("ld1 {%0.1d},[%1]" __asm__ ("ld1 {%0.1d}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8810,10 +8810,10 @@ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) ...@@ -8810,10 +8810,10 @@ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_dup_f32 (const float32_t * a) vld1q_dup_f32 (const float32_t * a)
{ {
float32x4_t result; float32x4_t result;
__asm__ ("ld1r {%0.4s},[%1]" __asm__ ("ld1r {%0.4s}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8821,10 +8821,10 @@ __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) ...@@ -8821,10 +8821,10 @@ __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vld1q_dup_f64 (const float64_t * a) vld1q_dup_f64 (const float64_t * a)
{ {
float64x2_t result; float64x2_t result;
__asm__ ("ld1r {%0.2d},[%1]" __asm__ ("ld1r {%0.2d}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8832,10 +8832,10 @@ __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) ...@@ -8832,10 +8832,10 @@ __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vld1q_dup_p8 (const poly8_t * a) vld1q_dup_p8 (const poly8_t * a)
{ {
poly8x16_t result; poly8x16_t result;
__asm__ ("ld1r {%0.16b},[%1]" __asm__ ("ld1r {%0.16b}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8843,10 +8843,10 @@ __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) ...@@ -8843,10 +8843,10 @@ __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vld1q_dup_p16 (const poly16_t * a) vld1q_dup_p16 (const poly16_t * a)
{ {
poly16x8_t result; poly16x8_t result;
__asm__ ("ld1r {%0.8h},[%1]" __asm__ ("ld1r {%0.8h}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8854,10 +8854,10 @@ __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) ...@@ -8854,10 +8854,10 @@ __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vld1q_dup_s8 (const int8_t * a) vld1q_dup_s8 (const int8_t * a)
{ {
int8x16_t result; int8x16_t result;
__asm__ ("ld1r {%0.16b},[%1]" __asm__ ("ld1r {%0.16b}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8865,10 +8865,10 @@ __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) ...@@ -8865,10 +8865,10 @@ __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vld1q_dup_s16 (const int16_t * a) vld1q_dup_s16 (const int16_t * a)
{ {
int16x8_t result; int16x8_t result;
__asm__ ("ld1r {%0.8h},[%1]" __asm__ ("ld1r {%0.8h}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8876,10 +8876,10 @@ __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) ...@@ -8876,10 +8876,10 @@ __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vld1q_dup_s32 (const int32_t * a) vld1q_dup_s32 (const int32_t * a)
{ {
int32x4_t result; int32x4_t result;
__asm__ ("ld1r {%0.4s},[%1]" __asm__ ("ld1r {%0.4s}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8887,10 +8887,10 @@ __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) ...@@ -8887,10 +8887,10 @@ __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vld1q_dup_s64 (const int64_t * a) vld1q_dup_s64 (const int64_t * a)
{ {
int64x2_t result; int64x2_t result;
__asm__ ("ld1r {%0.2d},[%1]" __asm__ ("ld1r {%0.2d}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8898,10 +8898,10 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) ...@@ -8898,10 +8898,10 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vld1q_dup_u8 (const uint8_t * a) vld1q_dup_u8 (const uint8_t * a)
{ {
uint8x16_t result; uint8x16_t result;
__asm__ ("ld1r {%0.16b},[%1]" __asm__ ("ld1r {%0.16b}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8909,10 +8909,10 @@ __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) ...@@ -8909,10 +8909,10 @@ __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vld1q_dup_u16 (const uint16_t * a) vld1q_dup_u16 (const uint16_t * a)
{ {
uint16x8_t result; uint16x8_t result;
__asm__ ("ld1r {%0.8h},[%1]" __asm__ ("ld1r {%0.8h}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8920,10 +8920,10 @@ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) ...@@ -8920,10 +8920,10 @@ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vld1q_dup_u32 (const uint32_t * a) vld1q_dup_u32 (const uint32_t * a)
{ {
uint32x4_t result; uint32x4_t result;
__asm__ ("ld1r {%0.4s},[%1]" __asm__ ("ld1r {%0.4s}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8931,10 +8931,10 @@ __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) ...@@ -8931,10 +8931,10 @@ __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vld1q_dup_u64 (const uint64_t * a) vld1q_dup_u64 (const uint64_t * a)
{ {
uint64x2_t result; uint64x2_t result;
__asm__ ("ld1r {%0.2d},[%1]" __asm__ ("ld1r {%0.2d}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(*a)
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8942,10 +8942,10 @@ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) ...@@ -8942,10 +8942,10 @@ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_f32 (const float32_t * a) vld1q_f32 (const float32_t * a)
{ {
float32x4_t result; float32x4_t result;
__asm__ ("ld1 {%0.4s},[%1]" __asm__ ("ld1 {%0.4s}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(({const float32x4_t *_a = (float32x4_t *) a; *_a;}))
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8953,10 +8953,10 @@ __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) ...@@ -8953,10 +8953,10 @@ __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vld1q_f64 (const float64_t * a) vld1q_f64 (const float64_t * a)
{ {
float64x2_t result; float64x2_t result;
__asm__ ("ld1 {%0.2d},[%1]" __asm__ ("ld1 {%0.2d}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(({const float64x2_t *_a = (float64x2_t *) a; *_a;}))
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -8966,9 +8966,9 @@ vld1q_f64 (const float64_t * a) ...@@ -8966,9 +8966,9 @@ vld1q_f64 (const float64_t * a)
float32x4_t b_ = (b); \ float32x4_t b_ = (b); \
const float32_t * a_ = (a); \ const float32_t * a_ = (a); \
float32x4_t result; \ float32x4_t result; \
__asm__ ("ld1 {%0.s}[%3],[%1]" \ __asm__ ("ld1 {%0.s}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8979,9 +8979,9 @@ vld1q_f64 (const float64_t * a) ...@@ -8979,9 +8979,9 @@ vld1q_f64 (const float64_t * a)
float64x2_t b_ = (b); \ float64x2_t b_ = (b); \
const float64_t * a_ = (a); \ const float64_t * a_ = (a); \
float64x2_t result; \ float64x2_t result; \
__asm__ ("ld1 {%0.d}[%3],[%1]" \ __asm__ ("ld1 {%0.d}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -8992,9 +8992,9 @@ vld1q_f64 (const float64_t * a) ...@@ -8992,9 +8992,9 @@ vld1q_f64 (const float64_t * a)
poly8x16_t b_ = (b); \ poly8x16_t b_ = (b); \
const poly8_t * a_ = (a); \ const poly8_t * a_ = (a); \
poly8x16_t result; \ poly8x16_t result; \
__asm__ ("ld1 {%0.b}[%3],[%1]" \ __asm__ ("ld1 {%0.b}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -9005,9 +9005,9 @@ vld1q_f64 (const float64_t * a) ...@@ -9005,9 +9005,9 @@ vld1q_f64 (const float64_t * a)
poly16x8_t b_ = (b); \ poly16x8_t b_ = (b); \
const poly16_t * a_ = (a); \ const poly16_t * a_ = (a); \
poly16x8_t result; \ poly16x8_t result; \
__asm__ ("ld1 {%0.h}[%3],[%1]" \ __asm__ ("ld1 {%0.h}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -9018,9 +9018,9 @@ vld1q_f64 (const float64_t * a) ...@@ -9018,9 +9018,9 @@ vld1q_f64 (const float64_t * a)
int8x16_t b_ = (b); \ int8x16_t b_ = (b); \
const int8_t * a_ = (a); \ const int8_t * a_ = (a); \
int8x16_t result; \ int8x16_t result; \
__asm__ ("ld1 {%0.b}[%3],[%1]" \ __asm__ ("ld1 {%0.b}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -9031,9 +9031,9 @@ vld1q_f64 (const float64_t * a) ...@@ -9031,9 +9031,9 @@ vld1q_f64 (const float64_t * a)
int16x8_t b_ = (b); \ int16x8_t b_ = (b); \
const int16_t * a_ = (a); \ const int16_t * a_ = (a); \
int16x8_t result; \ int16x8_t result; \
__asm__ ("ld1 {%0.h}[%3],[%1]" \ __asm__ ("ld1 {%0.h}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -9044,9 +9044,9 @@ vld1q_f64 (const float64_t * a) ...@@ -9044,9 +9044,9 @@ vld1q_f64 (const float64_t * a)
int32x4_t b_ = (b); \ int32x4_t b_ = (b); \
const int32_t * a_ = (a); \ const int32_t * a_ = (a); \
int32x4_t result; \ int32x4_t result; \
__asm__ ("ld1 {%0.s}[%3],[%1]" \ __asm__ ("ld1 {%0.s}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -9057,9 +9057,9 @@ vld1q_f64 (const float64_t * a) ...@@ -9057,9 +9057,9 @@ vld1q_f64 (const float64_t * a)
int64x2_t b_ = (b); \ int64x2_t b_ = (b); \
const int64_t * a_ = (a); \ const int64_t * a_ = (a); \
int64x2_t result; \ int64x2_t result; \
__asm__ ("ld1 {%0.d}[%3],[%1]" \ __asm__ ("ld1 {%0.d}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -9070,9 +9070,9 @@ vld1q_f64 (const float64_t * a) ...@@ -9070,9 +9070,9 @@ vld1q_f64 (const float64_t * a)
uint8x16_t b_ = (b); \ uint8x16_t b_ = (b); \
const uint8_t * a_ = (a); \ const uint8_t * a_ = (a); \
uint8x16_t result; \ uint8x16_t result; \
__asm__ ("ld1 {%0.b}[%3],[%1]" \ __asm__ ("ld1 {%0.b}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -9083,9 +9083,9 @@ vld1q_f64 (const float64_t * a) ...@@ -9083,9 +9083,9 @@ vld1q_f64 (const float64_t * a)
uint16x8_t b_ = (b); \ uint16x8_t b_ = (b); \
const uint16_t * a_ = (a); \ const uint16_t * a_ = (a); \
uint16x8_t result; \ uint16x8_t result; \
__asm__ ("ld1 {%0.h}[%3],[%1]" \ __asm__ ("ld1 {%0.h}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -9096,9 +9096,9 @@ vld1q_f64 (const float64_t * a) ...@@ -9096,9 +9096,9 @@ vld1q_f64 (const float64_t * a)
uint32x4_t b_ = (b); \ uint32x4_t b_ = (b); \
const uint32_t * a_ = (a); \ const uint32_t * a_ = (a); \
uint32x4_t result; \ uint32x4_t result; \
__asm__ ("ld1 {%0.s}[%3],[%1]" \ __asm__ ("ld1 {%0.s}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -9109,9 +9109,9 @@ vld1q_f64 (const float64_t * a) ...@@ -9109,9 +9109,9 @@ vld1q_f64 (const float64_t * a)
uint64x2_t b_ = (b); \ uint64x2_t b_ = (b); \
const uint64_t * a_ = (a); \ const uint64_t * a_ = (a); \
uint64x2_t result; \ uint64x2_t result; \
__asm__ ("ld1 {%0.d}[%3],[%1]" \ __asm__ ("ld1 {%0.d}[%1], %2" \
: "=w"(result) \ : "=w"(result) \
: "r"(a_), "0"(b_), "i"(c) \ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \ : /* No clobbers */); \
result; \ result; \
}) })
...@@ -9120,10 +9120,10 @@ __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) ...@@ -9120,10 +9120,10 @@ __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vld1q_p8 (const poly8_t * a) vld1q_p8 (const poly8_t * a)
{ {
poly8x16_t result; poly8x16_t result;
__asm__ ("ld1 {%0.16b},[%1]" __asm__ ("ld1 {%0.16b}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(({const poly8x16_t *_a = (poly8x16_t *) a; *_a;}))
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -9131,10 +9131,10 @@ __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) ...@@ -9131,10 +9131,10 @@ __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vld1q_p16 (const poly16_t * a) vld1q_p16 (const poly16_t * a)
{ {
poly16x8_t result; poly16x8_t result;
__asm__ ("ld1 {%0.8h},[%1]" __asm__ ("ld1 {%0.16b}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(({const poly16x8_t *_a = (poly16x8_t *) a; *_a;}))
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -9142,10 +9142,10 @@ __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) ...@@ -9142,10 +9142,10 @@ __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vld1q_s8 (const int8_t * a) vld1q_s8 (const int8_t * a)
{ {
int8x16_t result; int8x16_t result;
__asm__ ("ld1 {%0.16b},[%1]" __asm__ ("ld1 {%0.16b}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(({const int8x16_t *_a = (int8x16_t *) a; *_a;}))
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -9153,10 +9153,10 @@ __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) ...@@ -9153,10 +9153,10 @@ __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vld1q_s16 (const int16_t * a) vld1q_s16 (const int16_t * a)
{ {
int16x8_t result; int16x8_t result;
__asm__ ("ld1 {%0.8h},[%1]" __asm__ ("ld1 {%0.8h}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(({const int16x8_t *_a = (int16x8_t *) a; *_a;}))
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -9164,10 +9164,10 @@ __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) ...@@ -9164,10 +9164,10 @@ __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vld1q_s32 (const int32_t * a) vld1q_s32 (const int32_t * a)
{ {
int32x4_t result; int32x4_t result;
__asm__ ("ld1 {%0.4s},[%1]" __asm__ ("ld1 {%0.4s}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(({const int32x4_t *_a = (int32x4_t *) a; *_a;}))
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -9175,10 +9175,10 @@ __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) ...@@ -9175,10 +9175,10 @@ __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vld1q_s64 (const int64_t * a) vld1q_s64 (const int64_t * a)
{ {
int64x2_t result; int64x2_t result;
__asm__ ("ld1 {%0.2d},[%1]" __asm__ ("ld1 {%0.2d}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(({const int64x2_t *_a = (int64x2_t *) a; *_a;}))
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -9186,10 +9186,10 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) ...@@ -9186,10 +9186,10 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vld1q_u8 (const uint8_t * a) vld1q_u8 (const uint8_t * a)
{ {
uint8x16_t result; uint8x16_t result;
__asm__ ("ld1 {%0.16b},[%1]" __asm__ ("ld1 {%0.16b}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(({const uint8x16_t *_a = (uint8x16_t *) a; *_a;}))
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -9197,10 +9197,10 @@ __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) ...@@ -9197,10 +9197,10 @@ __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vld1q_u16 (const uint16_t * a) vld1q_u16 (const uint16_t * a)
{ {
uint16x8_t result; uint16x8_t result;
__asm__ ("ld1 {%0.8h},[%1]" __asm__ ("ld1 {%0.8h}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(({const uint16x8_t *_a = (uint16x8_t *) a; *_a;}))
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -9208,10 +9208,10 @@ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) ...@@ -9208,10 +9208,10 @@ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vld1q_u32 (const uint32_t * a) vld1q_u32 (const uint32_t * a)
{ {
uint32x4_t result; uint32x4_t result;
__asm__ ("ld1 {%0.4s},[%1]" __asm__ ("ld1 {%0.4s}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(({const uint32x4_t *_a = (uint32x4_t *) a; *_a;}))
: "memory"); : /* No clobbers */);
return result; return result;
} }
...@@ -9219,10 +9219,10 @@ __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) ...@@ -9219,10 +9219,10 @@ __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vld1q_u64 (const uint64_t * a) vld1q_u64 (const uint64_t * a)
{ {
uint64x2_t result; uint64x2_t result;
__asm__ ("ld1 {%0.2d},[%1]" __asm__ ("ld1 {%0.2d}, %1"
: "=w"(result) : "=w"(result)
: "r"(a) : "Utv"(({const uint64x2_t *_a = (uint64x2_t *) a; *_a;}))
: "memory"); : /* No clobbers */);
return result; return result;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment