Commit 6ae6116f by Richard Henderson Committed by Richard Henderson

Use VEC_WIDEN_MULT_EVEN/ODD_EXPR in supportable_widening_operation

        * tree-vect-stmts.c (supportable_widening_operation): Expand
        WIDEN_MULT_EXPR via VEC_WIDEN_MULT_EVEN/ODD_EXPR if possible.

From-SVN: r189408
parent 00f07b86
2012-07-10 Richard Henderson <rth@redhat.com> 2012-07-10 Richard Henderson <rth@redhat.com>
* tree-vect-stmts.c (supportable_widening_operation): Expand
WIDEN_MULT_EXPR via VEC_WIDEN_MULT_EVEN/ODD_EXPR if possible.
* expmed.c (expmed_mult_highpart): Rename from expand_mult_highpart. * expmed.c (expmed_mult_highpart): Rename from expand_mult_highpart.
(expmed_mult_highpart_optab): Rename from expand_mult_highpart_optab. (expmed_mult_highpart_optab): Rename from expand_mult_highpart_optab.
* optabs.c (can_mult_highpart_p): New. * optabs.c (can_mult_highpart_p): New.
......
...@@ -6199,7 +6199,8 @@ vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo, ...@@ -6199,7 +6199,8 @@ vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
bool bool
supportable_widening_operation (enum tree_code code, gimple stmt, supportable_widening_operation (enum tree_code code, gimple stmt,
tree vectype_out, tree vectype_in, tree vectype_out, tree vectype_in,
tree *decl1, tree *decl2, tree *decl1 ATTRIBUTE_UNUSED,
tree *decl2 ATTRIBUTE_UNUSED,
enum tree_code *code1, enum tree_code *code2, enum tree_code *code1, enum tree_code *code2,
int *multi_step_cvt, int *multi_step_cvt,
VEC (tree, heap) **interm_types) VEC (tree, heap) **interm_types)
...@@ -6207,7 +6208,6 @@ supportable_widening_operation (enum tree_code code, gimple stmt, ...@@ -6207,7 +6208,6 @@ supportable_widening_operation (enum tree_code code, gimple stmt,
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *vect_loop = NULL; struct loop *vect_loop = NULL;
bool ordered_p;
enum machine_mode vec_mode; enum machine_mode vec_mode;
enum insn_code icode1, icode2; enum insn_code icode1, icode2;
optab optab1, optab2; optab optab1, optab2;
...@@ -6223,56 +6223,60 @@ supportable_widening_operation (enum tree_code code, gimple stmt, ...@@ -6223,56 +6223,60 @@ supportable_widening_operation (enum tree_code code, gimple stmt,
if (loop_info) if (loop_info)
vect_loop = LOOP_VINFO_LOOP (loop_info); vect_loop = LOOP_VINFO_LOOP (loop_info);
/* The result of a vectorized widening operation usually requires two vectors switch (code)
(because the widened results do not fit into one vector). The generated {
vector results would normally be expected to be generated in the same case WIDEN_MULT_EXPR:
order as in the original scalar computation, i.e. if 8 results are /* The result of a vectorized widening operation usually requires
generated in each vector iteration, they are to be organized as follows: two vectors (because the widened results do not fit into one vector).
vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8]. The generated vector results would normally be expected to be
generated in the same order as in the original scalar computation,
However, in the special case that the result of the widening operation is i.e. if 8 results are generated in each vector iteration, they are
used in a reduction computation only, the order doesn't matter (because to be organized as follows:
when vectorizing a reduction we change the order of the computation). vect1: [res1,res2,res3,res4],
Some targets can take advantage of this and generate more efficient code. vect2: [res5,res6,res7,res8].
For example, targets like Altivec, that support widen_mult using a sequence
of {mult_even,mult_odd} generate the following vectors: However, in the special case that the result of the widening
vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8]. operation is used in a reduction computation only, the order doesn't
matter (because when vectorizing a reduction we change the order of
the computation). Some targets can take advantage of this and
generate more efficient code. For example, targets like Altivec,
that support widen_mult using a sequence of {mult_even,mult_odd}
generate the following vectors:
vect1: [res1,res3,res5,res7],
vect2: [res2,res4,res6,res8].
When vectorizing outer-loops, we execute the inner-loop sequentially When vectorizing outer-loops, we execute the inner-loop sequentially
(each vectorized inner-loop iteration contributes to VF outer-loop (each vectorized inner-loop iteration contributes to VF outer-loop
iterations in parallel). We therefore don't allow to change the order iterations in parallel). We therefore don't allow to change the
of the computation in the inner-loop during outer-loop vectorization. */ order of the computation in the inner-loop during outer-loop
vectorization. */
/* TODO: Another case in which order doesn't *really* matter is when we
widen and then contract again, e.g. (short)((int)x * y >> 8).
Normally, pack_trunc performs an even/odd permute, whereas the
repack from an even/odd expansion would be an interleave, which
would be significantly simpler for e.g. AVX2. */
/* In any case, in order to avoid duplicating the code below, recurse
on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
are properly set up for the caller. If we fail, we'll continue with
a VEC_WIDEN_MULT_LO/HI_EXPR check. */
if (vect_loop if (vect_loop
&& STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
&& !nested_in_vect_loop_p (vect_loop, stmt)) && !nested_in_vect_loop_p (vect_loop, stmt)
ordered_p = false; && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
else stmt, vectype_out, vectype_in,
ordered_p = true; NULL, NULL, code1, code2,
multi_step_cvt, interm_types))
if (!ordered_p
&& code == WIDEN_MULT_EXPR
&& targetm.vectorize.builtin_mul_widen_even
&& targetm.vectorize.builtin_mul_widen_even (vectype)
&& targetm.vectorize.builtin_mul_widen_odd
&& targetm.vectorize.builtin_mul_widen_odd (vectype))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "Unordered widening operation detected.");
*code1 = *code2 = CALL_EXPR;
*decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
*decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
return true; return true;
}
switch (code)
{
case WIDEN_MULT_EXPR:
c1 = VEC_WIDEN_MULT_LO_EXPR; c1 = VEC_WIDEN_MULT_LO_EXPR;
c2 = VEC_WIDEN_MULT_HI_EXPR; c2 = VEC_WIDEN_MULT_HI_EXPR;
break; break;
case VEC_WIDEN_MULT_EVEN_EXPR:
/* Support the recursion induced just above. */
c1 = VEC_WIDEN_MULT_EVEN_EXPR;
c2 = VEC_WIDEN_MULT_ODD_EXPR;
break;
case WIDEN_LSHIFT_EXPR: case WIDEN_LSHIFT_EXPR:
c1 = VEC_WIDEN_LSHIFT_LO_EXPR; c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
c2 = VEC_WIDEN_LSHIFT_HI_EXPR; c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
...@@ -6298,7 +6302,7 @@ supportable_widening_operation (enum tree_code code, gimple stmt, ...@@ -6298,7 +6302,7 @@ supportable_widening_operation (enum tree_code code, gimple stmt,
gcc_unreachable (); gcc_unreachable ();
} }
if (BYTES_BIG_ENDIAN) if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
{ {
enum tree_code ctmp = c1; enum tree_code ctmp = c1;
c1 = c2; c1 = c2;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment