Commit d5d27976 by James Greenhalgh Committed by James Greenhalgh

[AArch64] Fix vcvt_high_f64_f32 and vcvt_figh_f32_f64 intrinsics.

gcc/

	* config/aarch64/aarch64-simd.md
	(aarch64_float_truncate_hi_v4sf): Rewrite as an expand.
	(aarch64_float_truncate_hi_v4sf_le): New.
	(aarch64_float_truncate_hi_v4sf_be): Likewise.

gcc/testsuite/

	* gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c: New.

From-SVN: r228044
parent 07dc170b
2015-09-23 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/aarch64-simd.md
(aarch64_float_truncate_hi_v4sf): Rewrite as an expand.
(aarch64_float_truncate_hi_v4sf_le): New.
(aarch64_float_truncate_hi_v4sf_be): Likewise.
2015-09-23 Richard Biener <rguenther@suse.de>
* tree-ssa-structalias.c (intra_create_variable_infos): Build
......@@ -1703,6 +1703,15 @@
[(set_attr "type" "neon_fp_cvt_widen_s")]
)
;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
;; is inconsistent with vector ordering elsewhere in the compiler, in that
;; the meaning of HI and LO changes depending on the target endianness.
;; While elsewhere we map the higher numbered elements of a vector to
;; the lower architectural lanes of the vector, for these patterns we want
;; to always treat "hi" as referring to the higher architectural lanes.
;; Consequently, while the patterns below look inconsistent with our
;; other big-endian patterns their behaviour is as required.
(define_expand "vec_unpacks_lo_<mode>"
[(match_operand:<VWIDE> 0 "register_operand" "")
(match_operand:VQ_HSF 1 "register_operand" "")]
......@@ -1757,17 +1766,42 @@
[(set_attr "type" "neon_fp_cvt_narrow_d_q")]
)
(define_insn "aarch64_float_truncate_hi_<Vdbl>"
(define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
[(set (match_operand:<VDBL> 0 "register_operand" "=w")
(vec_concat:<VDBL>
(match_operand:VDF 1 "register_operand" "0")
(float_truncate:VDF
(match_operand:<VWIDE> 2 "register_operand" "w"))))]
"TARGET_SIMD"
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
[(set_attr "type" "neon_fp_cvt_narrow_d_q")]
)
(define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
[(set (match_operand:<VDBL> 0 "register_operand" "=w")
(vec_concat:<VDBL>
(float_truncate:VDF
(match_operand:<VWIDE> 2 "register_operand" "w"))
(match_operand:VDF 1 "register_operand" "0")))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
[(set_attr "type" "neon_fp_cvt_narrow_d_q")]
)
(define_expand "aarch64_float_truncate_hi_<Vdbl>"
[(match_operand:<VDBL> 0 "register_operand" "=w")
(match_operand:VDF 1 "register_operand" "0")
(match_operand:<VWIDE> 2 "register_operand" "w")]
"TARGET_SIMD"
{
rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
? gen_aarch64_float_truncate_hi_<Vdbl>_be
: gen_aarch64_float_truncate_hi_<Vdbl>_le;
emit_insn (gen (operands[0], operands[1], operands[2]));
DONE;
}
)
(define_expand "vec_pack_trunc_v2df"
[(set (match_operand:V4SF 0 "register_operand")
(vec_concat:V4SF
......
2015-09-23 James Greenhalgh <james.greenhalgh@arm.com>
* gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c: New.
2015-09-23 Richard Biener <rguenther@suse.de>
* g++.dg/tree-ssa/restrict2.C: Un-XFAIL testcase.
......
/* { dg-skip-if "" { arm*-*-* } } */
#include "arm_neon.h"
void abort (void);
void
foo (void)
{
/* Test vcvt_high_f32_f64. */
float32x2_t arg1;
float64x2_t arg2;
float32x4_t result;
arg1 = vcreate_f32 (UINT64_C (0x3f0db5793f6e1892));
arg2 = vcombine_f64 (vcreate_f64 (UINT64_C (0x3fe8e49d23fb575d)),
vcreate_f64 (UINT64_C (0x3fd921291b3df73e)));
// Expect: "result" = 3ec909483f4724e93f0db5793f6e1892
result = vcvt_high_f32_f64 (arg1, arg2);
float32_t got;
float32_t exp;
/* Lane 0. */
got = vgetq_lane_f32 (result, 0);
exp = ((float32_t) 0.9300624132156372);
if (((((exp / got) < ((float32_t) 0.999))
|| ((exp / got) > ((float32_t) 1.001)))
&& (((exp - got) < ((float32_t) -1.0e-4))
|| ((exp - got) > ((float32_t) 1.0e-4)))))
abort ();
/* Lane 1. */
got = vgetq_lane_f32 (result, 1);
exp = ((float32_t) 0.5535503029823303);
if (((((exp / got) < ((float32_t) 0.999))
|| ((exp / got) > ((float32_t) 1.001)))
&& (((exp - got) < ((float32_t) -1.0e-4))
|| ((exp - got) > ((float32_t) 1.0e-4)))))
abort ();
/* Lane 2. */
got = vgetq_lane_f32 (result, 2);
exp = ((float32_t) 0.7779069617051665);
if (((((exp / got) < ((float32_t) 0.999))
|| ((exp / got) > ((float32_t) 1.001)))
&& (((exp - got) < ((float32_t) -1.0e-4))
|| ((exp - got) > ((float32_t) 1.0e-4)))))
abort ();
/* Lane 3. */
got = vgetq_lane_f32 (result, 3);
exp = ((float32_t) 0.3926489606891329);
if (((((exp / got) < ((float32_t) 0.999))
|| ((exp / got) > ((float32_t) 1.001)))
&& (((exp - got) < ((float32_t) -1.0e-4))
|| ((exp - got) > ((float32_t) 1.0e-4)))))
abort ();
}
void
bar (void)
{
/* Test vcvt_high_f64_f32. */
float32x4_t arg1;
float64x2_t result;
arg1 = vcombine_f32 (vcreate_f32 (UINT64_C (0x3f7c5cf13f261f74)),
vcreate_f32 (UINT64_C (0x3e3a7bc03f6ccc1d)));
// Expect: "result" = 3fc74f78000000003fed9983a0000000
result = vcvt_high_f64_f32 (arg1);
float64_t got;
float64_t exp;
/* Lane 0. */
got = vgetq_lane_f64 (result, 0);
exp = 0.9249895215034485;
if (((((exp / got) < 0.999)
|| ((exp / got) > 1.001))
&& (((exp - got) < -1.0e-4)
|| ((exp - got) > 1.0e-4))))
abort ();
/* Lane 1. */
got = vgetq_lane_f64 (result, 1);
exp = 0.1821126937866211;
if (((((exp / got) < 0.999)
|| ((exp / got) > 1.001))
&& (((exp - got) < -1.0e-4)
|| ((exp - got) > 1.0e-4))))
abort ();
}
int
main (int argc, char **argv)
{
foo ();
bar ();
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment