Commit e58703e2 by Richard Sandiford Committed by Richard Sandiford

[AArch64] Add sign and zero extension for partial SVE modes

This patch adds support for extending from partial SVE modes
to both full vector modes and wider partial modes.

Some tests now need --param aarch64-sve-compare-costs=0 to force
the original full-vector code.

2019-11-16  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* config/aarch64/iterators.md (SVE_HSDI): New mode iterator.
	(narrower_mask): Handle VNx4HI, VNx2HI and VNx2SI.
	* config/aarch64/aarch64-sve.md
	(<ANY_EXTEND:optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2): New pattern.
	(*<ANY_EXTEND:optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2): Likewise.
	(@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>): Update
	comment.  Avoid new narrower_mask ambiguity.
	(@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>): Likewise.
	(*cond_uxt<mode>_2): Update comment.
	(*cond_uxt<mode>_any): Likewise.

gcc/testsuite/
	* gcc.target/aarch64/sve/cost_model_1.c: Expect the loop to be
	vectorized with bytes stored in 32-bit containers.
	* gcc.target/aarch64/sve/extend_1.c: New test.
	* gcc.target/aarch64/sve/extend_2.c: New test.
	* gcc.target/aarch64/sve/extend_3.c: New test.
	* gcc.target/aarch64/sve/extend_4.c: New test.
	* gcc.target/aarch64/sve/load_const_offset_3.c: Add
	--param aarch64-sve-compare-costs=0.
	* gcc.target/aarch64/sve/mask_struct_store_1.c: Likewise.
	* gcc.target/aarch64/sve/mask_struct_store_1_run.c: Likewise.
	* gcc.target/aarch64/sve/mask_struct_store_2.c: Likewise.
	* gcc.target/aarch64/sve/mask_struct_store_2_run.c: Likewise.
	* gcc.target/aarch64/sve/unpack_unsigned_1.c: Likewise.
	* gcc.target/aarch64/sve/unpack_unsigned_1_run.c: Likewise.

From-SVN: r278342
parent cc68f7c2
2019-11-16 Richard Sandiford <richard.sandiford@arm.com> 2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
* config/aarch64/iterators.md (SVE_HSDI): New mode iterator.
(narrower_mask): Handle VNx4HI, VNx2HI and VNx2SI.
* config/aarch64/aarch64-sve.md
(<ANY_EXTEND:optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2): New pattern.
(*<ANY_EXTEND:optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2): Likewise.
(@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>): Update
comment. Avoid new narrower_mask ambiguity.
(@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>): Likewise.
(*cond_uxt<mode>_2): Update comment.
(*cond_uxt<mode>_any): Likewise.
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
* config/aarch64/aarch64-modes.def: Define partial SVE vector * config/aarch64/aarch64-modes.def: Define partial SVE vector
float modes. float modes.
* config/aarch64/aarch64-protos.h (aarch64_sve_pred_mode): New * config/aarch64/aarch64-protos.h (aarch64_sve_pred_mode): New
...@@ -71,8 +71,7 @@ ...@@ -71,8 +71,7 @@
;; == Unary arithmetic ;; == Unary arithmetic
;; ---- [INT] General unary arithmetic corresponding to rtx codes ;; ---- [INT] General unary arithmetic corresponding to rtx codes
;; ---- [INT] General unary arithmetic corresponding to unspecs ;; ---- [INT] General unary arithmetic corresponding to unspecs
;; ---- [INT] Sign extension ;; ---- [INT] Sign and zero extension
;; ---- [INT] Zero extension
;; ---- [INT] Logical inverse ;; ---- [INT] Logical inverse
;; ---- [FP<-INT] General unary arithmetic that maps to unspecs ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
;; ---- [FP] General unary arithmetic corresponding to unspecs ;; ---- [FP] General unary arithmetic corresponding to unspecs
...@@ -2812,15 +2811,44 @@ ...@@ -2812,15 +2811,44 @@
) )
;; ------------------------------------------------------------------------- ;; -------------------------------------------------------------------------
;; ---- [INT] Sign extension ;; ---- [INT] Sign and zero extension
;; ------------------------------------------------------------------------- ;; -------------------------------------------------------------------------
;; Includes: ;; Includes:
;; - SXTB ;; - SXTB
;; - SXTH ;; - SXTH
;; - SXTW ;; - SXTW
;; - UXTB
;; - UXTH
;; - UXTW
;; ------------------------------------------------------------------------- ;; -------------------------------------------------------------------------
;; Predicated SXT[BHW]. ;; Unpredicated sign and zero extension from a narrower mode.
(define_expand "<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
[(set (match_operand:SVE_HSDI 0 "register_operand")
(unspec:SVE_HSDI
[(match_dup 2)
(ANY_EXTEND:SVE_HSDI
(match_operand:SVE_PARTIAL_I 1 "register_operand"))]
UNSPEC_PRED_X))]
"TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
{
operands[2] = aarch64_ptrue_reg (<SVE_HSDI:VPRED>mode);
}
)
;; Predicated sign and zero extension from a narrower mode.
(define_insn "*<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
[(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
(unspec:SVE_HSDI
[(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl")
(ANY_EXTEND:SVE_HSDI
(match_operand:SVE_PARTIAL_I 2 "register_operand" "w"))]
UNSPEC_PRED_X))]
"TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
"<su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>"
)
;; Predicated truncate-and-sign-extend operations.
(define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>" (define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
(unspec:SVE_FULL_HSDI (unspec:SVE_FULL_HSDI
...@@ -2829,11 +2857,12 @@ ...@@ -2829,11 +2857,12 @@
(truncate:SVE_PARTIAL_I (truncate:SVE_PARTIAL_I
(match_operand:SVE_FULL_HSDI 2 "register_operand" "w")))] (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")))]
UNSPEC_PRED_X))] UNSPEC_PRED_X))]
"TARGET_SVE && (~<narrower_mask> & <self_mask>) == 0" "TARGET_SVE
&& (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
"sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>" "sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
) )
;; Predicated SXT[BHW] with merging. ;; Predicated truncate-and-sign-extend operations with merging.
(define_insn "@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>" (define_insn "@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w, ?&w") [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w, ?&w")
(unspec:SVE_FULL_HSDI (unspec:SVE_FULL_HSDI
...@@ -2843,7 +2872,8 @@ ...@@ -2843,7 +2872,8 @@
(match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w"))) (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")))
(match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
UNSPEC_SEL))] UNSPEC_SEL))]
"TARGET_SVE && (~<narrower_mask> & <self_mask>) == 0" "TARGET_SVE
&& (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
"@ "@
sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
...@@ -2851,17 +2881,11 @@ ...@@ -2851,17 +2881,11 @@
[(set_attr "movprfx" "*,yes,yes")] [(set_attr "movprfx" "*,yes,yes")]
) )
;; ------------------------------------------------------------------------- ;; Predicated truncate-and-zero-extend operations, merging with the
;; ---- [INT] Zero extension
;; -------------------------------------------------------------------------
;; Includes:
;; - UXTB
;; - UXTH
;; - UXTW
;; -------------------------------------------------------------------------
;; Match UXT[BHW] as a conditional AND of a constant, merging with the
;; first input. ;; first input.
;;
;; The canonical form of this operation is an AND of a constant rather
;; than (zero_extend (truncate ...)).
(define_insn "*cond_uxt<mode>_2" (define_insn "*cond_uxt<mode>_2"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_I (unspec:SVE_FULL_I
...@@ -2878,7 +2902,7 @@ ...@@ -2878,7 +2902,7 @@
[(set_attr "movprfx" "*,yes")] [(set_attr "movprfx" "*,yes")]
) )
;; Match UXT[BHW] as a conditional AND of a constant, merging with an ;; Predicated truncate-and-zero-extend operations, merging with an
;; independent value. ;; independent value.
;; ;;
;; The earlyclobber isn't needed for the first alternative, but omitting ;; The earlyclobber isn't needed for the first alternative, but omitting
......
...@@ -359,6 +359,11 @@ ...@@ -359,6 +359,11 @@
VNx4SI VNx2SI VNx4SI VNx2SI
VNx2DI]) VNx2DI])
;; SVE integer vector modes whose elements are 16 bits or wider.
(define_mode_iterator SVE_HSDI [VNx8HI VNx4HI VNx2HI
VNx4SI VNx2SI
VNx2DI])
;; Modes involved in extending or truncating SVE data, for 8 elements per ;; Modes involved in extending or truncating SVE data, for 8 elements per
;; 128-bit block. ;; 128-bit block.
(define_mode_iterator VNx8_NARROW [VNx8QI]) (define_mode_iterator VNx8_NARROW [VNx8QI])
...@@ -1364,9 +1369,10 @@ ...@@ -1364,9 +1369,10 @@
(VNx2HI "0x22") (VNx2HI "0x22")
(VNx2SI "0x24")]) (VNx2SI "0x24")])
;; For full vector modes, the mask of narrower modes, encoded as above. ;; For SVE_HSDI vector modes, the mask of narrower modes, encoded as above.
(define_mode_attr narrower_mask [(VNx8HI "0x81") (define_mode_attr narrower_mask [(VNx8HI "0x81") (VNx4HI "0x41")
(VNx4SI "0x43") (VNx2HI "0x21")
(VNx4SI "0x43") (VNx2SI "0x23")
(VNx2DI "0x27")]) (VNx2DI "0x27")])
;; The constraint to use for an SVE [SU]DOT, FMUL, FMLA or FMLS lane index. ;; The constraint to use for an SVE [SU]DOT, FMUL, FMLA or FMLS lane index.
......
2019-11-16 Richard Sandiford <richard.sandiford@arm.com> 2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
* gcc.target/aarch64/sve/cost_model_1.c: Expect the loop to be
vectorized with bytes stored in 32-bit containers.
* gcc.target/aarch64/sve/extend_1.c: New test.
* gcc.target/aarch64/sve/extend_2.c: New test.
* gcc.target/aarch64/sve/extend_3.c: New test.
* gcc.target/aarch64/sve/extend_4.c: New test.
* gcc.target/aarch64/sve/load_const_offset_3.c: Add
--param aarch64-sve-compare-costs=0.
* gcc.target/aarch64/sve/mask_struct_store_1.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_store_1_run.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_store_2.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_store_2_run.c: Likewise.
* gcc.target/aarch64/sve/unpack_unsigned_1.c: Likewise.
* gcc.target/aarch64/sve/unpack_unsigned_1_run.c: Likewise.
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
* gcc.target/aarch64/sve/mixed_size_1.c: New test. * gcc.target/aarch64/sve/mixed_size_1.c: New test.
* gcc.target/aarch64/sve/mixed_size_2.c: Likewise. * gcc.target/aarch64/sve/mixed_size_2.c: Likewise.
* gcc.target/aarch64/sve/mixed_size_3.c: Likewise. * gcc.target/aarch64/sve/mixed_size_3.c: Likewise.
......
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */ /* { dg-options "-O2 -ftree-vectorize" } */
void void
f (unsigned int *restrict x, unsigned int *restrict y, f (unsigned int *restrict x, unsigned int *restrict y,
...@@ -8,5 +8,4 @@ f (unsigned int *restrict x, unsigned int *restrict y, ...@@ -8,5 +8,4 @@ f (unsigned int *restrict x, unsigned int *restrict y,
x[i] = x[i] + y[i] + z[i]; x[i] = x[i] + y[i] + z[i];
} }
/* { dg-final { scan-tree-dump "not vectorized: estimated iteration count too small" vect } } */ /* { dg-final { scan-assembler {\tld1b\tz[0-9]+\.s, p[0-7]/z, \[x2\]\n} } } */
/* { dg-final { scan-tree-dump "vectorized 0 loops" vect } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include <stdint.h>
#define TEST_LOOP(TYPE1, TYPE2) \
void \
f_##TYPE1##_##TYPE2 (TYPE1 *restrict dst, TYPE1 *restrict src1, \
TYPE2 *restrict src2, int n) \
{ \
for (int i = 0; i < n; ++i) \
dst[i] += src1[i] + (TYPE2) (src2[i] + 1); \
}
#define TEST_ALL(T) \
T (uint16_t, uint8_t) \
T (uint32_t, uint8_t) \
T (uint64_t, uint8_t) \
T (uint32_t, uint16_t) \
T (uint64_t, uint16_t) \
T (uint64_t, uint32_t)
TEST_ALL (TEST_LOOP)
/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.h,} 1 } } */
/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s,} 1 } } */
/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 1 } } */
/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s,} 1 } } */
/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 1 } } */
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 3 } } */
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 2 } } */
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, #1\n} 1 } } */
/* { dg-final { scan-assembler-times {\tuxtb\tz[0-9]+\.h,} 1 } } */
/* { dg-final { scan-assembler-times {\tuxtb\tz[0-9]+\.s,} 1 } } */
/* { dg-final { scan-assembler-times {\tuxtb\tz[0-9]+\.d,} 1 } } */
/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.s,} 1 } } */
/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.d,} 1 } } */
/* { dg-final { scan-assembler-times {\tuxtw\tz[0-9]+\.d,} 1 } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include <stdint.h>
#define TEST_LOOP(TYPE1, TYPE2) \
void \
f_##TYPE1##_##TYPE2 (TYPE1 *restrict dst, TYPE1 *restrict src1, \
TYPE2 *restrict src2, int n) \
{ \
for (int i = 0; i < n; ++i) \
dst[i] += src1[i] + (TYPE2) (src2[i] + 1); \
}
#define TEST_ALL(T) \
T (int16_t, int8_t) \
T (int32_t, int8_t) \
T (int64_t, int8_t) \
T (int32_t, int16_t) \
T (int64_t, int16_t) \
T (int64_t, int32_t)
TEST_ALL (TEST_LOOP)
/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.h,} 1 } } */
/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s,} 1 } } */
/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 1 } } */
/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s,} 1 } } */
/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 1 } } */
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 3 } } */
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 2 } } */
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, #1\n} 1 } } */
/* { dg-final { scan-assembler-times {\tsxtb\tz[0-9]+\.h,} 1 } } */
/* { dg-final { scan-assembler-times {\tsxtb\tz[0-9]+\.s,} 1 } } */
/* { dg-final { scan-assembler-times {\tsxtb\tz[0-9]+\.d,} 1 } } */
/* { dg-final { scan-assembler-times {\tsxth\tz[0-9]+\.s,} 1 } } */
/* { dg-final { scan-assembler-times {\tsxth\tz[0-9]+\.d,} 1 } } */
/* { dg-final { scan-assembler-times {\tsxtw\tz[0-9]+\.d,} 1 } } */
/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=512" } */
#include <stdint.h>
void
f (uint64_t *dst, uint32_t *restrict src1, uint16_t *restrict src2,
uint8_t *restrict src3)
{
for (int i = 0; i < 7; ++i)
dst[i] += (uint32_t) (src1[i] + (uint16_t) (src2[i]
+ (uint8_t) (src3[i] + 1)));
}
/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 1 } } */
/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 1 } } */
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
/* { dg-final { scan-assembler-times {\tuxtb\tz[0-9]+\.h,} 1 } } */
/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.s,} 1 } } */
/* { dg-final { scan-assembler-times {\tuxtw\tz[0-9]+\.d,} 1 } } */
/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=512" } */
#include <stdint.h>
void
f (int64_t *dst, int32_t *restrict src1, int16_t *restrict src2,
int8_t *restrict src3)
{
for (int i = 0; i < 7; ++i)
dst[i] += (int32_t) (src1[i] + (int16_t) (src2[i]
+ (int8_t) (src3[i] + 1)));
}
/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 1 } } */
/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 1 } } */
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
/* { dg-final { scan-assembler-times {\tsxtb\tz[0-9]+\.h,} 1 } } */
/* { dg-final { scan-assembler-times {\tsxth\tz[0-9]+\.s,} 1 } } */
/* { dg-final { scan-assembler-times {\tsxtw\tz[0-9]+\.d,} 1 } } */
/* { dg-do assemble { target aarch64_asm_sve_ok } } */ /* { dg-do assemble { target aarch64_asm_sve_ok } } */
/* { dg-options "-O2 -ftree-vectorize -save-temps -msve-vector-bits=256" } */ /* { dg-options "-O2 -ftree-vectorize -save-temps -msve-vector-bits=256 --param aarch64-sve-compare-costs=0" } */
#include "load_const_offset_2.c" #include "load_const_offset_2.c"
......
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ /* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
#include <stdint.h> #include <stdint.h>
......
/* { dg-do run { target aarch64_sve_hw } } */ /* { dg-do run { target aarch64_sve_hw } } */
/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ /* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
#include "mask_struct_store_1.c" #include "mask_struct_store_1.c"
......
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ /* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
#include <stdint.h> #include <stdint.h>
......
/* { dg-do run { target aarch64_sve_hw } } */ /* { dg-do run { target aarch64_sve_hw } } */
/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ /* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
#include "mask_struct_store_2.c" #include "mask_struct_store_2.c"
......
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fno-inline" } */ /* { dg-options "-O2 -ftree-vectorize -fno-inline --param aarch64-sve-compare-costs=0" } */
#include <stdint.h> #include <stdint.h>
......
/* { dg-do run { target aarch64_sve_hw } } */ /* { dg-do run { target aarch64_sve_hw } } */
/* { dg-options "-O2 -ftree-vectorize -fno-inline" } */ /* { dg-options "-O2 -ftree-vectorize -fno-inline --param aarch64-sve-compare-costs=0" } */
#include "unpack_unsigned_1.c" #include "unpack_unsigned_1.c"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment