Commit 93c590ee by Michael Collison

neon.md (widen_<us>sum<mode>): New patterns where mode is VQI to improve mixed mode vectorization.

2016-04-25  Michael Collison  <michael.collison@linaro.org>

    * config/arm/neon.md (widen_<us>sum<mode>): New patterns where
    mode is VQI to improve mixed mode vectorization.
    * config/arm/neon.md (vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3): New
    define_insn to match low half of signed vaddw.
    * config/arm/neon.md (vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3): New
    define_insn to match high half of signed vaddw.
    * config/arm/neon.md (vec_sel_widen_usum_lo<VQI:mode><VW:mode>3): New
    define_insn to match low half of unsigned vaddw.
    * config/arm/neon.md (vec_sel_widen_usum_hi<VQI:mode><VW:mode>3): New
    define_insn to match high half of unsigned vaddw.
    * config/arm/arm.c (arm_simd_vect_par_cnst_half): New function.
    (arm_simd_check_vect_par_cnst_half_p): Likewise.
    * config/arm/arm-protos.h (arm_simd_vect_par_cnst_half): Prototype
    for new function.
    (arm_simd_check_vect_par_cnst_half_p): Likewise.
    * config/arm/predicates.md (vect_par_constant_high): Support
    big endian and simplify by calling
    arm_simd_check_vect_par_cnst_half
    (vect_par_constant_low): Likewise.
    * testsuite/gcc.target/arm/neon-vaddws16.c: New test.
    * testsuite/gcc.target/arm/neon-vaddws32.c: New test.
    * testsuite/gcc.target/arm/neon-vaddwu16.c: New test.
    * testsuite/gcc.target/arm/neon-vaddwu32.c: New test.
    * testsuite/gcc.target/arm/neon-vaddwu8.c: New test.
    * testsuite/lib/target-supports.exp
      (check_effective_target_vect_widen_sum_hi_to_si_pattern): Indicate
      that arm neon support vector widen sum of HImode TO SImode.

From-SVN: r235402
parent 151a1607
2016-04-25 Michael Collison <michael.collison@linaro.org>
* config/arm/neon.md (widen_<us>sum<mode>): New patterns where
mode is VQI to improve mixed mode vectorization.
* config/arm/neon.md (vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3): New
define_insn to match low half of signed vaddw.
* config/arm/neon.md (vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3): New
define_insn to match high half of signed vaddw.
* config/arm/neon.md (vec_sel_widen_usum_lo<VQI:mode><VW:mode>3): New
define_insn to match low half of unsigned vaddw.
* config/arm/neon.md (vec_sel_widen_usum_hi<VQI:mode><VW:mode>3): New
define_insn to match high half of unsigned vaddw.
* config/arm/arm.c (arm_simd_vect_par_cnst_half): New function.
(arm_simd_check_vect_par_cnst_half_p): Likewise.
* config/arm/arm-protos.h (arm_simd_vect_par_cnst_half): Prototype
for new function.
(arm_simd_check_vect_par_cnst_half_p): Likewise.
* config/arm/predicates.md (vect_par_constant_high): Support
big endian and simplify by calling
arm_simd_check_vect_par_cnst_half
(vect_par_constant_low): Likewise.
2016-04-25 Uros Bizjak <ubizjak@gmail.com> 2016-04-25 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.md (*lea<mode>_general_4): Use const_0_to_3_operand * config/i386/i386.md (*lea<mode>_general_4): Use const_0_to_3_operand
......
...@@ -50,7 +50,9 @@ extern tree arm_builtin_decl (unsigned code, bool initialize_p ...@@ -50,7 +50,9 @@ extern tree arm_builtin_decl (unsigned code, bool initialize_p
ATTRIBUTE_UNUSED); ATTRIBUTE_UNUSED);
extern void arm_init_builtins (void); extern void arm_init_builtins (void);
extern void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update); extern void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
extern rtx arm_simd_vect_par_cnst_half (machine_mode mode, bool high);
extern bool arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
bool high);
#ifdef RTX_CODE #ifdef RTX_CODE
extern bool arm_vector_mode_supported_p (machine_mode); extern bool arm_vector_mode_supported_p (machine_mode);
extern bool arm_small_register_classes_for_mode_p (machine_mode); extern bool arm_small_register_classes_for_mode_p (machine_mode);
......
...@@ -30302,4 +30302,80 @@ arm_sched_fusion_priority (rtx_insn *insn, int max_pri, ...@@ -30302,4 +30302,80 @@ arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
return; return;
} }
/* Construct and return a PARALLEL RTX vector with elements numbering the
lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
the vector - from the perspective of the architecture. This does not
line up with GCC's perspective on lane numbers, so we end up with
different masks depending on our target endian-ness. The diagram
below may help. We must draw the distinction when building masks
which select one half of the vector. An instruction selecting
architectural low-lanes for a big-endian target, must be described using
a mask selecting GCC high-lanes.
Big-Endian Little-Endian
GCC 0 1 2 3 3 2 1 0
| x | x | x | x | | x | x | x | x |
Architecture 3 2 1 0 3 2 1 0
Low Mask: { 2, 3 } { 0, 1 }
High Mask: { 0, 1 } { 2, 3 }
*/
rtx
arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
{
int nunits = GET_MODE_NUNITS (mode);
rtvec v = rtvec_alloc (nunits / 2);
int high_base = nunits / 2;
int low_base = 0;
int base;
rtx t1;
int i;
if (BYTES_BIG_ENDIAN)
base = high ? low_base : high_base;
else
base = high ? high_base : low_base;
for (i = 0; i < nunits / 2; i++)
RTVEC_ELT (v, i) = GEN_INT (base + i);
t1 = gen_rtx_PARALLEL (mode, v);
return t1;
}
/* Check OP for validity as a PARALLEL RTX vector with elements
numbering the lanes of either the high (HIGH == TRUE) or low lanes,
from the perspective of the architecture. See the diagram above
arm_simd_vect_par_cnst_half_p for more details. */
bool
arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
bool high)
{
rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
HOST_WIDE_INT count_op = XVECLEN (op, 0);
HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
int i = 0;
if (!VECTOR_MODE_P (mode))
return false;
if (count_op != count_ideal)
return false;
for (i = 0; i < count_ideal; i++)
{
rtx elt_op = XVECEXP (op, 0, i);
rtx elt_ideal = XVECEXP (ideal, 0, i);
if (!CONST_INT_P (elt_op)
|| INTVAL (elt_ideal) != INTVAL (elt_op))
return false;
}
return true;
}
#include "gt-arm.h" #include "gt-arm.h"
...@@ -1204,16 +1204,133 @@ ...@@ -1204,16 +1204,133 @@
;; Widening operations ;; Widening operations
(define_expand "widen_ssum<mode>3"
[(set (match_operand:<V_double_width> 0 "s_register_operand" "")
(plus:<V_double_width>
(sign_extend:<V_double_width>
(match_operand:VQI 1 "s_register_operand" ""))
(match_operand:<V_double_width> 2 "s_register_operand" "")))]
"TARGET_NEON"
{
machine_mode mode = GET_MODE (operands[1]);
rtx p1, p2;
p1 = arm_simd_vect_par_cnst_half (mode, false);
p2 = arm_simd_vect_par_cnst_half (mode, true);
if (operands[0] != operands[2])
emit_move_insn (operands[0], operands[2]);
emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
operands[1],
p1,
operands[0]));
emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
operands[1],
p2,
operands[0]));
DONE;
}
)
(define_insn "vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3"
[(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
(plus:<VW:V_widen>
(sign_extend:<VW:V_widen>
(vec_select:VW
(match_operand:VQI 1 "s_register_operand" "%w")
(match_operand:VQI 2 "vect_par_constant_low" "")))
(match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
"TARGET_NEON"
{
return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
"vaddw.<V_s_elem>\t%q0, %q3, %e1";
}
[(set_attr "type" "neon_add_widen")])
(define_insn "vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3"
[(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
(plus:<VW:V_widen>
(sign_extend:<VW:V_widen>
(vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
(match_operand:VQI 2 "vect_par_constant_high" "")))
(match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
"TARGET_NEON"
{
return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
"vaddw.<V_s_elem>\t%q0, %q3, %f1";
}
[(set_attr "type" "neon_add_widen")])
(define_insn "widen_ssum<mode>3" (define_insn "widen_ssum<mode>3"
[(set (match_operand:<V_widen> 0 "s_register_operand" "=w") [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
(plus:<V_widen> (sign_extend:<V_widen> (plus:<V_widen>
(match_operand:VW 1 "s_register_operand" "%w")) (sign_extend:<V_widen>
(match_operand:<V_widen> 2 "s_register_operand" "w")))] (match_operand:VW 1 "s_register_operand" "%w"))
(match_operand:<V_widen> 2 "s_register_operand" "w")))]
"TARGET_NEON" "TARGET_NEON"
"vaddw.<V_s_elem>\t%q0, %q2, %P1" "vaddw.<V_s_elem>\t%q0, %q2, %P1"
[(set_attr "type" "neon_add_widen")] [(set_attr "type" "neon_add_widen")]
) )
(define_expand "widen_usum<mode>3"
[(set (match_operand:<V_double_width> 0 "s_register_operand" "")
(plus:<V_double_width>
(zero_extend:<V_double_width>
(match_operand:VQI 1 "s_register_operand" ""))
(match_operand:<V_double_width> 2 "s_register_operand" "")))]
"TARGET_NEON"
{
machine_mode mode = GET_MODE (operands[1]);
rtx p1, p2;
p1 = arm_simd_vect_par_cnst_half (mode, false);
p2 = arm_simd_vect_par_cnst_half (mode, true);
if (operands[0] != operands[2])
emit_move_insn (operands[0], operands[2]);
emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
operands[1],
p1,
operands[0]));
emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
operands[1],
p2,
operands[0]));
DONE;
}
)
(define_insn "vec_sel_widen_usum_lo<VQI:mode><VW:mode>3"
[(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
(plus:<VW:V_widen>
(zero_extend:<VW:V_widen>
(vec_select:VW
(match_operand:VQI 1 "s_register_operand" "%w")
(match_operand:VQI 2 "vect_par_constant_low" "")))
(match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
"TARGET_NEON"
{
return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
"vaddw.<V_u_elem>\t%q0, %q3, %e1";
}
[(set_attr "type" "neon_add_widen")])
(define_insn "vec_sel_widen_usum_hi<VQI:mode><VW:mode>3"
[(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
(plus:<VW:V_widen>
(zero_extend:<VW:V_widen>
(vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
(match_operand:VQI 2 "vect_par_constant_high" "")))
(match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
"TARGET_NEON"
{
return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
"vaddw.<V_u_elem>\t%q0, %q3, %f1";
}
[(set_attr "type" "neon_add_widen")])
(define_insn "widen_usum<mode>3" (define_insn "widen_usum<mode>3"
[(set (match_operand:<V_widen> 0 "s_register_operand" "=w") [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
(plus:<V_widen> (zero_extend:<V_widen> (plus:<V_widen> (zero_extend:<V_widen>
......
...@@ -612,59 +612,13 @@ ...@@ -612,59 +612,13 @@
(define_special_predicate "vect_par_constant_high" (define_special_predicate "vect_par_constant_high"
(match_code "parallel") (match_code "parallel")
{ {
HOST_WIDE_INT count = XVECLEN (op, 0); return arm_simd_check_vect_par_cnst_half_p (op, mode, true);
int i;
int base = GET_MODE_NUNITS (mode);
if ((count < 1)
|| (count != base/2))
return false;
if (!VECTOR_MODE_P (mode))
return false;
for (i = 0; i < count; i++)
{
rtx elt = XVECEXP (op, 0, i);
int val;
if (!CONST_INT_P (elt))
return false;
val = INTVAL (elt);
if (val != (base/2) + i)
return false;
}
return true;
}) })
(define_special_predicate "vect_par_constant_low" (define_special_predicate "vect_par_constant_low"
(match_code "parallel") (match_code "parallel")
{ {
HOST_WIDE_INT count = XVECLEN (op, 0); return arm_simd_check_vect_par_cnst_half_p (op, mode, false);
int i;
int base = GET_MODE_NUNITS (mode);
if ((count < 1)
|| (count != base/2))
return false;
if (!VECTOR_MODE_P (mode))
return false;
for (i = 0; i < count; i++)
{
rtx elt = XVECEXP (op, 0, i);
int val;
if (!CONST_INT_P (elt))
return false;
val = INTVAL (elt);
if (val != i)
return false;
}
return true;
}) })
(define_predicate "const_double_vcvt_power_of_two_reciprocal" (define_predicate "const_double_vcvt_power_of_two_reciprocal"
......
2016-04-25 Michael Collison <michael.collison@arm.com>
* testsuite/gcc.target/arm/neon-vaddws16.c: New test.
* testsuite/gcc.target/arm/neon-vaddws32.c: New test.
* testsuite/gcc.target/arm/neon-vaddwu16.c: New test.
* testsuite/gcc.target/arm/neon-vaddwu32.c: New test.
* testsuite/gcc.target/arm/neon-vaddwu8.c: New test.
* testsuite/lib/target-supports.exp
(check_effective_target_vect_widen_sum_hi_to_si_pattern): Indicate
that arm neon support vector widen sum of HImode TO SImode.
2016-04-23 Jakub Jelinek <jakub@redhat.com> 2016-04-23 Jakub Jelinek <jakub@redhat.com>
PR sanitizer/70712 PR sanitizer/70712
......
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O3" } */
/* { dg-add-options arm_neon } */
int
t6 (int len, void * dummy, short * __restrict x)
{
len = len & ~31;
int result = 0;
__asm volatile ("");
for (int i = 0; i < len; i++)
result += x[i];
return result;
}
/* { dg-final { scan-assembler "vaddw\.s16" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O3" } */
/* { dg-add-options arm_neon } */
int
t6 (int len, void * dummy, int * __restrict x)
{
len = len & ~31;
long long result = 0;
__asm volatile ("");
for (int i = 0; i < len; i++)
result += x[i];
return result;
}
/* { dg-final { scan-assembler "vaddw\.s32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O3" } */
/* { dg-add-options arm_neon } */
int
t6 (int len, void * dummy, unsigned short * __restrict x)
{
len = len & ~31;
unsigned int result = 0;
__asm volatile ("");
for (int i = 0; i < len; i++)
result += x[i];
return result;
}
/* { dg-final { scan-assembler "vaddw.u16" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O3" } */
/* { dg-add-options arm_neon } */
int
t6 (int len, void * dummy, unsigned int * __restrict x)
{
len = len & ~31;
unsigned long long result = 0;
__asm volatile ("");
for (int i = 0; i < len; i++)
result += x[i];
return result;
}
/* { dg-final { scan-assembler "vaddw\.u32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O3" } */
/* { dg-add-options arm_neon } */
int
t6 (int len, void * dummy, char * __restrict x)
{
len = len & ~31;
unsigned short result = 0;
__asm volatile ("");
for (int i = 0; i < len; i++)
result += x[i];
return result;
}
/* { dg-final { scan-assembler "vaddw\.u8" } } */
...@@ -4348,6 +4348,8 @@ proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } { ...@@ -4348,6 +4348,8 @@ proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } {
set et_vect_widen_sum_hi_to_si_pattern_saved 0 set et_vect_widen_sum_hi_to_si_pattern_saved 0
if { [istarget powerpc*-*-*] if { [istarget powerpc*-*-*]
|| [istarget aarch64*-*-*] || [istarget aarch64*-*-*]
|| ([istarget arm*-*-*] &&
[check_effective_target_arm_neon_ok])
|| [istarget ia64-*-*] } { || [istarget ia64-*-*] } {
set et_vect_widen_sum_hi_to_si_pattern_saved 1 set et_vect_widen_sum_hi_to_si_pattern_saved 1
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment