Commit fa03d576 by Prathamesh Kulkarni Committed by Prathamesh Kulkarni

re PR middle-end/91272 ([SVE] Use fully-masked loops for CLASTB reductions)

2019-10-28  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>

	PR middle-end/91272
	* tree-vect-stmts.c (vectorizable_condition): Support
	EXTRACT_LAST_REDUCTION with fully-masked loops.

testsuite/
	* gcc.target/aarch64/sve/clastb_1.c: Add dg-scan.
	* gcc.target/aarch64/sve/clastb_2.c: Likewise.
	* gcc.target/aarch64/sve/clastb_3.c: Likewise.
	* gcc.target/aarch64/sve/clastb_4.c: Likewise.
	* gcc.target/aarch64/sve/clastb_5.c: Likewise.
	* gcc.target/aarch64/sve/clastb_6.c: Likewise.
	* gcc.target/aarch64/sve/clastb_7.c: Likewise.
	* gcc.target/aarch64/sve/clastb_8.c: Likewise.

From-SVN: r277524
parent 96d9d162
2019-10-28 Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
PR middle-end/91272
* tree-vect-stmts.c (vectorizable_condition): Support
EXTRACT_LAST_REDUCTION with fully-masked loops.
2019-10-28 Richard Biener <rguenther@suse.de>
PR tree-optimization/92252
2019-10-28 Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
PR middle-end/91272
* gcc.target/aarch64/sve/clastb_1.c: Add dg-scan.
* gcc.target/aarch64/sve/clastb_2.c: Likewise.
* gcc.target/aarch64/sve/clastb_3.c: Likewise.
* gcc.target/aarch64/sve/clastb_4.c: Likewise.
* gcc.target/aarch64/sve/clastb_5.c: Likewise.
* gcc.target/aarch64/sve/clastb_6.c: Likewise.
* gcc.target/aarch64/sve/clastb_7.c: Likewise.
* gcc.target/aarch64/sve/clastb_8.c: Likewise.
2019-10-28 Richard Biener <rguenther@suse.de>
PR tree-optimization/92252
......
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
#define N 32
......@@ -17,4 +17,5 @@ condition_reduction (int *a, int min_v)
return last;
}
/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\.s} } } */
/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
/* { dg-final { scan-assembler {\tclastb\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} } } */
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
#include <stdint.h>
......@@ -23,4 +23,5 @@ condition_reduction (TYPE *a, TYPE min_v)
return last;
}
/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\.s} } } */
/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
/* { dg-final { scan-assembler {\tclastb\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} } } */
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
#define TYPE uint8_t
#include "clastb_2.c"
/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\.b} } } */
/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
/* { dg-final { scan-assembler {\tclastb\tb[0-9]+, p[0-7], b[0-9]+, z[0-9]+\.b} } } */
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
#define TYPE int16_t
#include "clastb_2.c"
/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\.h} } } */
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
#define TYPE uint64_t
#include "clastb_2.c"
/* { dg-final { scan-assembler {\tclastb\tx[0-9]+, p[0-7], x[0-9]+, z[0-9]+\.d} } } */
/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
/* { dg-final { scan-assembler {\tclastb\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} } } */
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
#define N 32
......@@ -21,4 +21,5 @@ condition_reduction (TYPE *a, TYPE min_v)
return last;
}
/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
/* { dg-final { scan-assembler {\tclastb\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} } } */
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
#define TYPE double
#include "clastb_6.c"
/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
/* { dg-final { scan-assembler {\tclastb\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} } } */
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=256 --save-temps" } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -msve-vector-bits=256 --save-temps" } */
#include <stdint.h>
......@@ -19,6 +19,7 @@ TEST_TYPE (uint16_t);
TEST_TYPE (uint32_t);
TEST_TYPE (uint64_t);
/* { dg-final { scan-tree-dump-times "using a fully-masked loop." 4 "vect" } } */
/* { dg-final { scan-assembler {\tclastb\t(b[0-9]+), p[0-7], \1, z[0-9]+\.b\n} } } */
/* { dg-final { scan-assembler {\tclastb\t(h[0-9]+), p[0-7], \1, z[0-9]+\.h\n} } } */
/* { dg-final { scan-assembler {\tclastb\t(s[0-9]+), p[0-7], \1, z[0-9]+\.s\n} } } */
......
......@@ -10050,16 +10050,6 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
return false;
}
}
if (loop_vinfo
&& LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
&& reduction_type == EXTRACT_LAST_REDUCTION)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"can't yet use a fully-masked loop for"
" EXTRACT_LAST_REDUCTION.\n");
LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
}
if (expand_vec_cond_expr_p (vectype, comp_vectype,
cond_code))
{
......@@ -10089,31 +10079,31 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
/* Handle cond expr. */
for (j = 0; j < ncopies; j++)
{
tree loop_mask = NULL_TREE;
bool swap_cond_operands = false;
/* See whether another part of the vectorized code applies a loop
mask to the condition, or to its inverse. */
vec_loop_masks *masks = NULL;
if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
{
scalar_cond_masked_key cond (cond_expr, ncopies);
if (loop_vinfo->scalar_cond_masked_set.contains (cond))
{
vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
loop_mask = vect_get_loop_mask (gsi, masks, ncopies, vectype, j);
}
if (reduction_type == EXTRACT_LAST_REDUCTION)
masks = &LOOP_VINFO_MASKS (loop_vinfo);
else
{
bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
cond.code = invert_tree_comparison (cond.code, honor_nans);
scalar_cond_masked_key cond (cond_expr, ncopies);
if (loop_vinfo->scalar_cond_masked_set.contains (cond))
masks = &LOOP_VINFO_MASKS (loop_vinfo);
else
{
vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
loop_mask = vect_get_loop_mask (gsi, masks, ncopies,
vectype, j);
cond_code = cond.code;
swap_cond_operands = true;
bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
cond.code = invert_tree_comparison (cond.code, honor_nans);
if (loop_vinfo->scalar_cond_masked_set.contains (cond))
{
masks = &LOOP_VINFO_MASKS (loop_vinfo);
cond_code = cond.code;
swap_cond_operands = true;
}
}
}
}
......@@ -10248,28 +10238,10 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
vec != { 0, ... } (masked in the MASK_LOAD,
unmasked in the VEC_COND_EXPR). */
if (loop_mask)
{
if (COMPARISON_CLASS_P (vec_compare))
{
tree tmp = make_ssa_name (vec_cmp_type);
tree op0 = TREE_OPERAND (vec_compare, 0);
tree op1 = TREE_OPERAND (vec_compare, 1);
gassign *g = gimple_build_assign (tmp,
TREE_CODE (vec_compare),
op0, op1);
vect_finish_stmt_generation (stmt_info, g, gsi);
vec_compare = tmp;
}
tree tmp2 = make_ssa_name (vec_cmp_type);
gassign *g = gimple_build_assign (tmp2, BIT_AND_EXPR,
vec_compare, loop_mask);
vect_finish_stmt_generation (stmt_info, g, gsi);
vec_compare = tmp2;
}
/* Force vec_compare to be an SSA_NAME rather than a comparison,
in cases where that's necessary. */
if (reduction_type == EXTRACT_LAST_REDUCTION)
if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
{
if (!is_gimple_val (vec_compare))
{
......@@ -10279,6 +10251,7 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
vec_compare = vec_compare_name;
}
if (must_invert_cmp_result)
{
tree vec_compare_name = make_ssa_name (vec_cmp_type);
......@@ -10288,6 +10261,24 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
vec_compare = vec_compare_name;
}
if (masks)
{
unsigned vec_num = vec_oprnds0.length ();
tree loop_mask
= vect_get_loop_mask (gsi, masks, vec_num * ncopies,
vectype, vec_num * j + i);
tree tmp2 = make_ssa_name (vec_cmp_type);
gassign *g
= gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
loop_mask);
vect_finish_stmt_generation (stmt_info, g, gsi);
vec_compare = tmp2;
}
}
if (reduction_type == EXTRACT_LAST_REDUCTION)
{
gcall *new_stmt = gimple_build_call_internal
(IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
vec_then_clause);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment