[AArch64] Fix vcvt_high_f64_f32 and vcvt_figh_f32_f64 intrinsics.

gcc/ * config/aarch64/aarch64-simd.md (aarch64_float_truncate_hi_v4sf): Rewrite as an expand. (aarch64_float_truncate_hi_v4sf_le): New. (aarch64_float_truncate_hi_v4sf_be): Likewise. gcc/testsuite/ * gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c: New. From-SVN: r228044

[AArch64] Fix vcvt_high_f64_f32 and vcvt_figh_f32_f64 intrinsics.
gcc/ * config/aarch64/aarch64-simd.md (aarch64_float_truncate_hi_v4sf): Rewrite as an expand. (aarch64_float_truncate_hi_v4sf_le): New. (aarch64_float_truncate_hi_v4sf_be): Likewise. gcc/testsuite/ * gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c: New. From-SVN: r228044
d5d27976 · James Greenhalgh · James Greenhalgh · 07dc170b · d5d27976 · d5d27976
Commit d5d27976 authored Sep 23, 2015 by James Greenhalgh Committed by James Greenhalgh Sep 23, 2015
Hide whitespace changes
Inline Side-by-side

Showing with 145 additions and 2 deletions

gcc/ChangeLog
+7 -0

gcc/config/aarch64/aarch64-simd.md
+36 -2

gcc/testsuite/ChangeLog
+4 -0

gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c
+98 -0

No files found.
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
+2015-09-23  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64-simd.md
+	(aarch64_float_truncate_hi_v4sf): Rewrite as an expand.
+	(aarch64_float_truncate_hi_v4sf_le): New.
+	(aarch64_float_truncate_hi_v4sf_be): Likewise.
+
 2015-09-23  Richard Biener  <rguenther@suse.de>

 	* tree-ssa-structalias.c (intra_create_variable_infos): Build
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1703,6 +1703,15 @@
  [(set_attr "type" "neon_fp_cvt_widen_s")]
 )

+;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
+;; is inconsistent with vector ordering elsewhere in the compiler, in that
+;; the meaning of HI and LO changes depending on the target endianness.
+;; While elsewhere we map the higher numbered elements of a vector to
+;; the lower architectural lanes of the vector, for these patterns we want
+;; to always treat "hi" as referring to the higher architectural lanes.
+;; Consequently, while the patterns below look inconsistent with our
+;; other big-endian patterns their behaviour is as required.
+
 (define_expand "vec_unpacks_lo_<mode>"
  [(match_operand:<VWIDE> 0 "register_operand" "")
   (match_operand:VQ_HSF 1 "register_operand" "")]
@@ -1757,17 +1766,42 @@
  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
 )

-(define_insn "aarch64_float_truncate_hi_<Vdbl>"
+(define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
  [(set (match_operand:<VDBL> 0 "register_operand" "=w")
    (vec_concat:<VDBL>
      (match_operand:VDF 1 "register_operand" "0")
      (float_truncate:VDF
 	(match_operand:<VWIDE> 2 "register_operand" "w"))))]
-  "TARGET_SIMD"
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
  "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
 )

+(define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
+  [(set (match_operand:<VDBL> 0 "register_operand" "=w")
+    (vec_concat:<VDBL>
+      (float_truncate:VDF
+	(match_operand:<VWIDE> 2 "register_operand" "w"))
+      (match_operand:VDF 1 "register_operand" "0")))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
+  "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
+  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
+)
+
+(define_expand "aarch64_float_truncate_hi_<Vdbl>"
+  [(match_operand:<VDBL> 0 "register_operand" "=w")
+   (match_operand:VDF 1 "register_operand" "0")
+   (match_operand:<VWIDE> 2 "register_operand" "w")]
+  "TARGET_SIMD"
+{
+  rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
+			     ? gen_aarch64_float_truncate_hi_<Vdbl>_be
+			     : gen_aarch64_float_truncate_hi_<Vdbl>_le;
+  emit_insn (gen (operands[0], operands[1], operands[2]));
+  DONE;
+}
+)
+
 (define_expand "vec_pack_trunc_v2df"
  [(set (match_operand:V4SF 0 "register_operand")
      (vec_concat:V4SF

--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
+2015-09-23  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c: New.
+
 2015-09-23  Richard Biener  <rguenther@suse.de>

 	* g++.dg/tree-ssa/restrict2.C: Un-XFAIL testcase.

--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c
+/* { dg-skip-if "" { arm*-*-* } } */
+
+#include "arm_neon.h"
+
+void abort (void);
+
+void
+foo (void)
+{
+  /* Test vcvt_high_f32_f64.  */
+  float32x2_t arg1;
+  float64x2_t arg2;
+  float32x4_t result;
+  arg1 = vcreate_f32 (UINT64_C (0x3f0db5793f6e1892));
+  arg2 = vcombine_f64 (vcreate_f64 (UINT64_C (0x3fe8e49d23fb575d)),
+		       vcreate_f64 (UINT64_C (0x3fd921291b3df73e)));
+  //  Expect: "result" = 3ec909483f4724e93f0db5793f6e1892
+  result = vcvt_high_f32_f64 (arg1, arg2);
+  float32_t got;
+  float32_t exp;
+
+  /* Lane 0.  */
+  got = vgetq_lane_f32 (result, 0);
+  exp = ((float32_t) 0.9300624132156372);
+  if (((((exp / got) < ((float32_t) 0.999))
+	 || ((exp / got) > ((float32_t) 1.001)))
+     && (((exp - got) < ((float32_t) -1.0e-4))
+	 || ((exp - got) > ((float32_t) 1.0e-4)))))
+    abort ();
+
+  /* Lane 1.  */
+  got = vgetq_lane_f32 (result, 1);
+  exp = ((float32_t) 0.5535503029823303);
+  if (((((exp / got) < ((float32_t) 0.999))
+	  || ((exp / got) > ((float32_t) 1.001)))
+     && (((exp - got) < ((float32_t) -1.0e-4))
+	   || ((exp - got) > ((float32_t) 1.0e-4)))))
+    abort ();
+
+  /* Lane 2.  */
+  got = vgetq_lane_f32 (result, 2);
+  exp = ((float32_t) 0.7779069617051665);
+  if (((((exp / got) < ((float32_t) 0.999))
+	  || ((exp / got) > ((float32_t) 1.001)))
+      && (((exp - got) < ((float32_t) -1.0e-4))
+	  || ((exp - got) > ((float32_t) 1.0e-4)))))
+    abort ();
+
+  /* Lane 3.  */
+  got = vgetq_lane_f32 (result, 3);
+  exp = ((float32_t) 0.3926489606891329);
+  if (((((exp / got) < ((float32_t) 0.999))
+	  || ((exp / got) > ((float32_t) 1.001)))
+      && (((exp - got) < ((float32_t) -1.0e-4))
+	  || ((exp - got) > ((float32_t) 1.0e-4)))))
+    abort ();
+}
+
+void
+bar (void)
+{
+  /* Test vcvt_high_f64_f32.  */
+  float32x4_t arg1;
+  float64x2_t result;
+  arg1 = vcombine_f32 (vcreate_f32 (UINT64_C (0x3f7c5cf13f261f74)),
+		       vcreate_f32 (UINT64_C (0x3e3a7bc03f6ccc1d)));
+  //  Expect: "result" = 3fc74f78000000003fed9983a0000000
+  result = vcvt_high_f64_f32 (arg1);
+
+  float64_t got;
+  float64_t exp;
+
+  /* Lane 0.  */
+  got = vgetq_lane_f64 (result, 0);
+  exp = 0.9249895215034485;
+  if (((((exp / got) < 0.999)
+	 || ((exp / got) > 1.001))
+     && (((exp - got) < -1.0e-4)
+	 || ((exp - got) > 1.0e-4))))
+    abort ();
+
+  /* Lane 1.  */
+  got = vgetq_lane_f64 (result, 1);
+  exp = 0.1821126937866211;
+  if (((((exp / got) < 0.999)
+	  || ((exp / got) > 1.001))
+      && (((exp - got) < -1.0e-4)
+	  || ((exp - got) > 1.0e-4))))
+    abort ();
+}
+
+int
+main (int argc, char **argv)
+{
+  foo ();
+  bar ();
+  return 0;
+}